######################################################################
######################################################################
# Function read.DEM reads 2010 data from DEM database for the specified country, components 
# and measurement periods.
# ODBC drivers must be set beforehand as discribed in data.R
#
# Input: 
# country (e.g. "ro" for Romania), 
# vector of components names cp_name (e.g. c("o3","pm10")),
# vector of corresponding components codes cp_code (e.g. c("7","5")),
# vector of corresponding measurement periods cp_period (e.g. c("hour","day"))
#
# Output: 
# saves per component dataframes "country_period_component" (e.g. ro_hour_o3, ro_day_pm10)
# to a country directory (e.g. ro) in ./Data/2010 DEM Data/ 
# ! directories have to be created beforehand
######################################################################
# author: M. Rehr
# LaMo: 2012-11-10
######################################################################

# for reading data from Databases
library(RODBC)

##
# helper function sort.date
sort.date <- function(data){
  rk <- rank(data$rd_startdate)
  rk_s <- rank(sort(data$rd_startdate))
  m <- match(rk_s,rk)
  data <- data[m,]
}
##

######################################################################
read.DEM <- function(country, cp_name, cp_code, cp_period){
  
  # load data
  dsn <- paste(country, "dem", sep="-")
  channel <- odbcConnect(dsn=dsn)
  hour <- sqlFetch(channel, "raw_data_hour")
  day <- sqlFetch(channel, "raw_data_day_report")
  close(channel)
  
  # select components
  cp_hour <- cp_code[which(cp_period=="hour")]
  cp_day <- cp_code[which(cp_period=="day")]
  hour <- subset(hour, cp_number %in% cp_hour)
  day <- subset(day, cp_number %in% cp_day)
  
  # split by component
  hour_cp <- split(hour, hour$cp_number)
  day_cp <- split(day, day$cp_number)
  
  # data.frames per component: assign and save
  names_hour <- cp_name[which(cp_period=="hour")]
  names_day <- cp_name[which(cp_period=="day")]
    
    # hourly data
    ##############
    for(i in 1:length(cp_hour)){
      name <- paste(country, "hour", names_hour[i], sep="_")
      idx <- cp_hour[i]
      data <- hour_cp[[idx]]

      # sort data according to date (as possibly confused when reading from DEM) 
      data_sn <- by(data, data$sn_code, sort.date)
      data <- data_sn[[1]]
      if(length(data_sn)>1){
        for(j in 2:length(data_sn)){
          data <- rbind(data, data_sn[[j]])
        }
      }
      save(data, file=paste("./Data/2010 DEM Data", country, name, sep="/"))  
    }
    ############# 
  
  
    # daily data
    #############
    for(i in 1:length(cp_day)){
      name <- paste(country, "day", names_day[i], sep="_")
      idx <- cp_day[i]
      data <- day_cp[[idx]]
    
      data_sn <- by(data, data$sn_code, sort.date)
      data <- data_sn[[1]]
      if(length(data_sn)>1){
        for(j in 2:length(data_sn)){
          data <- rbind(data, data_sn[[j]])
        }
      }
      save(data, file=paste("./Data/2010 DEM Data", country, name, sep="/"))
    }
    #############
  
}
######################################################################

######################################################################
######################################################################