######################################################################
######################################################################
# Function read.AirBase reads 2000-2010 data from AirBase data for the specified country, components 
# and measurement periods.
# AirBase_country_v6 directories have to be downloaded beforehand as discribed in data.R
#
# Input: 
# country (e.g. "DE" for Germany), 
# vector of components names cp_name (e.g. c("o3","pm10")),
# vector of corresponding components codes cp_code (e.g. c("00007","00005")),
# vector of corresponding measurement periods cp_period (e.g. c("hour","day"))
#
# Output: 
# saves per component dataframes "country_period_component" (e.g. DE_hour_o3, DE_day_pm10)
# to a country directory (e.g. DE) in ./Data/AirBase Data/ 
# ! directories have to be created beforehand
######################################################################
# author: M. Rehr
# LaMo: 2013-03-11
# this code for reading AirBase data is based on code by Benedikt Gräler
######################################################################

# example of AirBase file naming: DEBB0420000500600day.1-1-2001.31-12-2009
# 
# Position  1 -  7 station_european_code (here: DEBB042)          -> sn      
# Position  8 - 12 component_code (here: 00005, PM10)             -> code    ^=cp_code  
# Position 13 - 17 measurement_european_group_code (here: 00600)  -> mc      (irrelevant)   
# Position 18 until first dot: measurement period (here: day)     -> period  ^=cp_period
# followed by start date and end date of the time series

##
# helper function read.filenames for selecting files with respect to 
# component, measurement period and country
read.filenames <- function(code, period, country){
  
  loc <- paste("./Data/AirBase", country, "v6/AirBase", country, "v6_rawdata", sep="_")
  files <- dir(path=loc, pattern=paste(".{7}",code,"00",sep=""))  
  pattern <- paste(period, "\\.", sep="")
  
  keep <- grepl(pattern, substr(files,18,23))
  files <- files[keep]
  
  return(files)
}
##

##
# main function import.AirBase for reading 2000-2010 data from AirBase for the specified country, component 
# and measurement period
import.AirBase <- function(cp_name, code, period, country) {
  
  files <- read.filenames(code, period, country)
  
  # import only ts which do not end before year 2000
  keep <- grepl("-.{1,2}-.{8,10}-20", files)
  files <- files[keep]
  
  # import data from files into list "list"
  loc <- paste("./Data/AirBase", country, "v6/AirBase", country, "v6_rawdata", sep="_")
  list <- NULL
  for (file in files) {    
    newData <- read.csv(file=paste(loc,file,sep="/"), header=F, sep="\t")
    
    # keep only part of ts from 2000-01-01 onwards
    keep <- grepl("20.{8}",newData$V1)
    newData <- newData[keep,]   
    list[[paste(substr(file,1,7),substr(file,15,17),sep="-")]] <- newData    
  } 
   
  # add variable sn to data, discard flags, complete ts and fill missings with NAs
  sn_names <- names(list) 
 
  if(period=="hour"){
    # hourly data
    #############
    # ts from 2000-01-01 to 2010-12-31 (in days)
    date <- as.Date(0:4017, origin="2000-01-01")
    cols <- 24
  }else{
    if(period=="day"){
    # daily data
    ############
    # ts from 2000-01-01 to 2010-12-01 (in months)
    year <- rep(c(2000:2010), each=12)
    month <- rep(c(1:12), times=11)
    date <- as.Date(paste(year,month,"01",sep="-"))
    cols <- 31
    }else{warning("method not available for period = ",period)}
  }
  
  newList <- NULL
  for (i in 1:length(list)){   
    # values
    v <- list[[i]][,1+2*(1:cols)-1] 
    # stations 
    sn <- rep(sn_names[i], times=length(date))
    # complete ts
    values <- as.data.frame(matrix(nrow=length(date),ncol=cols))
    idx <- match(date,as.Date(list[[i]][,1]))
    values[which(idx!="NA"),] <- v
    # combine variables into data.frame
    newList[[i]] <- cbind(sn, date, values)
  }
  
  # convert list to data.frame "data" 
  data <- newList[[1]]
  if(length(list) > 1){
    for(j in 2:length(list)){
      data <- rbind(data, newList[[j]])
    }  
  }
  
  # save data
  name <- paste(country, period, cp_name, sep="_")
  save(data, file=paste("./Data/AirBase Data", country, name, sep="/"))
}  
##

##
# finally cp_name, code, period as vectors cp_name, cp_code, cp_period of components of interest 
read.AirBase <- function(country, cp_name, cp_code, cp_period){
  for (i in 1:length(cp_name)){
    import.AirBase(cp_name=cp_name[i], code=cp_code[i], period=cp_period[i], country=country)
  }
}
##

######################################################################
######################################################################