######################################################################
######################################################################
# Function long.DEM rearranges 2010 raw DEM data for the specified country, 
# components and measurement periods (read and stored by function read.DEM) 
# to long format.
#
# Input: 
# country (e.g. "ro" for Romania), 
# vector of components names cp_name (e.g. c("o3","pm10")),
# vector of corresponding components codes cp_code (e.g. c("7","5")),
# vector of corresponding measurement periods cp_period (e.g. c("hour","day"))
#
# Output: 
# saves per component dataframes "country_period_component_long" 
# (e.g. ro_hour_o3_long, ro_day_pm10_long)
# to the corresponding country directory (e.g. ro) in ./Data/2010 DEM Data/ 
######################################################################
# author: M. Rehr
# LaMo: 2012-11-12
######################################################################
long.DEM <- function(country, cp_name, cp_code, cp_period){
  
  cp_hour <- cp_code[which(cp_period=="hour")]
  cp_day <- cp_code[which(cp_period=="day")]
  
  names_hour <- cp_name[which(cp_period=="hour")]
  names_day <- cp_name[which(cp_period=="day")]
  
  # load loc (for European station name coding)
  load("./Data/2010 DEM Data/loc")
  
  # hourly data
  ##############
  for(i in 1:length(cp_hour)){
    
    # load data
    name <- paste(country, "hour", names_hour[i], sep="_")
    file <- paste("./Data/2010 DEM Data", country, name, sep="/")
    load(file)
    
    # data.frame long
    #################
    # discard irrelevant columns "sn_db_identifier","cp_number","rd_number_values","rd_eoi","rd_calculated"
    d <- data[,-c(1,3,5,6,7,8)]
    # data is now of format: sn - start - value1 - flag1 - ...- value24 - flag24; discard flags:
    d <- (d[,-c(3+2*(1:24)-1)])
    # data is now of format: sn - start - value1 - ...- value24 
    
    # ensure appropriate class for factors
    d$sn_code <- as.factor(d$sn_code)
    
    # use European station code
    sns <- loc[which(loc$sn_code%in%d$sn_code),]
    idx <- match(d$sn_code, sns$sn_code)
    d$sn_eu_code <- sns$sn_eu_code[idx] 

    
    # rearrange variables to long format

    d$sn_eu_code <- factor(d$sn_eu_code, levels=unique(as.character(d$sn_eu_code)))
    Station <- rep(d$sn_eu_code, each=24)
    
    # build Time variable in format "%Y-%m-%d %H:%M:%S" (Date & Hour)
    # assumption: times are given in GMT format, not in local formats.
    days <- as.POSIXct(d$rd_startdate, tz="GMT")
    Time <- rep(days, each=24) + rep(seq(0, (23*3600), 3600), length(days))
   
    # stack values according to Time
    values <- t(d[,2+(1:24)]) 
    Value <- stack(as.data.frame(values))[,1]
    
    # data.frame of form: Station - Time - Value 
    Data <- data.frame(Station, Time, Value) 
    
    # NA coding (-999 -> NA, -9999 -> NA) in values
    is.na(Data$Value) <- which(Data$Value==-999)
    is.na(Data$Value) <- which(Data$Value==-9999)
    
    # store long
    long <- Data
    name_long <- paste(name, "long", sep="_")
    save(long, file=paste("./Data/2010 DEM Data", country, name_long, sep="/"))
     
  }
  
  
  # daily data: analogously
  #########################
  for(i in 1:length(cp_day)){
    
    name <- paste(country, "day", names_day[i], sep="_")
    file <- paste("./Data/2010 DEM Data", country, name, sep="/")
    load(file)
    
    # long
    #######
    d <- data[,-c(1,3,5,6,7,8)]
    number <- data[,6]
    
    d$sn_code <- as.factor(d$sn_code)
    
    # use European station code
    sns <- loc[which(loc$sn_code%in%d$sn_code),]
    idx <- match(d$sn_code, sns$sn_code)
    d$sn_eu_code <- sns$sn_eu_code[idx]  
    d$sn_eu_code <- factor(d$sn_eu_code, levels=unique(as.character(d$sn_eu_code)))
    
    Station <- rep(d$sn_eu_code, number)
    
    # build Time variable in format "%Y-%m-%d" (Date)
    Time <- seq(d$rd_startdate[1], by="DSTday", length.out=365)
    Time <- rep(Time, length(unique(d$sn_code)))
    
    # stack values according to Time
    values <- t(d[,2+2*(1:31)-1]) 
    Value <- stack(as.data.frame(values))[,1]
    
    # discard not existing dates (like Feb. 29th, 30th, 31st, April 31st etc.)
    month <- as.character(rep(1:12,each=31))
    month[1:(31*9)] <- paste("0", month[1:(31*9)], sep="")
    day <- as.character(1:31)
    day[1:9] <- paste("0", day[1:9], sep="")
    day <- rep(day,12)
    Time2 <- paste("2010-",month,"-",day, sep="")
    Time2 <- rep(Time2, length(unique(d$sn_code)))
    
    keep <- which(match(Time2, as.character(Time))!="NA")
    Value <- Value[keep]
    
    Data <- data.frame(Station, Time, Value)
    
    # NA coding 
    is.na(Data$Value) <- which(Data$Value==-999)
    is.na(Data$Value) <- which(Data$Value==-9999)
    
    
    long <- Data
    name_long <- paste(name, "long", sep="_")
    save(long, file=paste("./Data/2010 DEM Data", country, name_long, sep="/"))
    
  }
  
}     
######################################################################  
######################################################################

