######################################################################
# author: M. Rehr
# LaMo: 2012-03-04
######################################################################

############################################## Settings and Data
##
## Settings
##    settings
##
## DEM data
##    load_DEM
##    DEM_pm10
##    DEM_cy
##
## AirBase data
##    load_NL (not shown in the report)
##    load_CZ
##    AirBase_pm10_CZ
##
##############################################

######################################################################
# Settings
######################################################################

###########################
## @knitr settings


# set path

#getwd()
#"C:/Users/mrehr_01/Documents/ETC-ACM/subtask 1.0.1.2-5b"
#set to "C:/Users/mrehr_01/Documents/ETC-ACM/subtask 1.0.1.2-5b/Scripts"
#path <- './Scripts'
#setwd(path)


# load scripts and packages

# preliminary data analysis:
source("./helper_functions/helper_settings.R")
source("./helper_functions/helper_prelim.R")
library(Hmisc)          # describe  
library(lattice)        # xyplot   
library(latticeExtra)   # xyplot.stl, as.layer
library(zoo)            # zoo ts objects
# outlier detection (additionally):
source("./helper_functions/helper_out.R")
library(caTools)        # runquantile
# break detection (additionally):
source("./helper_functions/helper_break.R")
library(kza)            # Kolmogorov-Zurbenko (adaptive) filter


# tune details of lattice graphics

# non-graphical options
lattice.options(default.args=list(as.table=TRUE, aspect="xy"))
# scales parameter for lattice graphics
scales <- list(tck=c(0.8,0), alternating=c(1,0), cex=1.2)
# graphical options
knit_hooks$set(par_lattice=function(before, options, envir){if (before)  
  trellis.par.set(list(strip.background=list(col=grey(.95)), 
                       par.xlab.text=list(cex=1.3), 
                       par.ylab.text=list(cex=1.3)
                       )) })
###########################

######################################################################
# DEM data
######################################################################

# read DEM (component-wise: pm10, co, so2, no2, o3)
#cp_name <- "pm10"
# "co"
# "so2"
# "no2"
# "o3"

###########################
## @knitr load_DEM

long <- load.allctrs.DEM(cp_name)
#str(long)

# consider also (log10, sqrt) transformed variables
long <- add.transforms(long)

# variable transformation causes some NaN, ...
#nans_sqrt <- long[which(sapply(long$sqrtValue,is.na)!=sapply(long$Value,is.na)),]
#nans_log10 <- long[which(sapply(long$log10Value,is.na)!=sapply(long$Value,is.na)),]

# ... treated as additional NAs here:
long$sqrtValue[which(sapply(long$sqrtValue,is.infinite))] <- NA
long$log10Value[which(sapply(long$log10Value,is.infinite))] <- NA
#head(long)

# data frames per country
ro <- subset(long, Country=="ro") 
ch <- subset(long, Country=="ch")
cy <- subset(long, Country=="cy")

# number of stations per country
n_ro <- length(unique(ro$Station)) 
n_ch <- length(unique(ch$Station))
n_cy <- length(unique(cy$Station))

# indices of stations
sns_ro <- seq(1:n_ro)
sns_ch <- seq(1:n_ch)+n_ro
sns_cy <- seq(1:n_cy)+n_ro+n_ch

# set colors 
ro_col <- rgb(0,0,1)
ch_col <- rgb(0,1,0)
cy_col <- rgb(.1,.7,.8)
cols <- c(ro_col,ch_col,cy_col)
###########################

###########################
## @knitr DEM_pm10
cp_name <- 'pm10'
xlab <- make.xlab.DEM(cp_name)
ylab <- make.ylab(cp_name)
run_chunk('load_DEM')
###########################

###########################
## @knitr DEM_cy
data <- cy
country <- 'cy'
sns <- sns_cy
layout <- make.layout(n_cy)
###########################

######################################################################
# AirBase data
######################################################################

# read AirBase country-wise (specifying component and period: pm10 daily, pm10 hourly, so2 hourly, o3 hourly)
# selected stations are: 
# NL: 107, 235, 411, 633 (O3 and NO2)
# CZ: AREP, MBEL, TNUJ, TOPO, AVYC/AVYN, UTEM/UTPM (TOPO only for PM10)

#cp_name <- "pm10"   # "so2"  # "o3"
#cp_period <- "day"  # "hour"

###########################
## @knitr load_NL

# load NL data
NL <- load.AirBase(country="NL", cp_name, cp_period)
# number of stations
#n_NL <-length(unique(NL$Station)) 
# number of station locations
#n_loc_NL <- length(unique(substr(NL$Station, 1, 7)))

# consider only subset of interest at a time because of size for hourly data
# NL: 107, 235, 411, 633 (O3 and NO2)
NL_stations <- c("107", "235", "411", "633")
NL_sub <- subset(NL, substr(NL$Station, 5, 7)%in%NL_stations)

changes <- NA

n_sub_NL <- length(unique(NL_sub$Station))
layout <- make.layout(n_sub_NL)
xlab <- make.xlab.AirBase(cp_period)
ylab <- make.ylab(cp_name)

data <- NL_sub
###########################

###########################
## @knitr load_CZ

# load CZ data
CZ <- load.AirBase(country="CZ", cp_name, cp_period)

# consider only subset of interest at a time because of size for hourly data
# CZ: AREP, MBEL, TNUJ, TOPO, AVYC/AVYN, UTEM/UTPM (TOPO only for PM10)
CZ_stations <- c("AREP", "MBEL", "TNUJ", "AVYC", "AVYN", "UTEM", "UTPM", "TOPO")
CZ_sub <- subset(CZ, substr(CZ$Station, 4, 7)%in%CZ_stations)

# join AVYC and AVYN
AVYC <- CZ[CZ$Station=="CZ0AVYC-500",]
AVYN <- CZ[CZ$Station=="CZ0AVYN-100",]
AVY <- AVYC
AVY[is.na(AVYC$Value),] <- AVYN[is.na(AVYC$Value),]
AVY[is.na(AVYN$Value),] <- AVYC[is.na(AVYN$Value),]
#AVY$Time[which.min(is.na(AVYN$Value))]
AVY$Station <- factor("CZ0AVYC-500_2004-01-01_CZ0AVYN-100")
# join UTEM and UTPM
UTEM <- CZ[CZ$Station=="CZ0UTEM-500",]
UTPM <- CZ[CZ$Station=="CZ0UTPM-100",]
UTM <- UTEM
UTM[is.na(UTEM$Value),] <- UTPM[is.na(UTEM$Value),]
UTM[is.na(UTPM$Value),] <- UTEM[is.na(UTPM$Value),]
#UTM$Time[which.min(is.na(UTPM$Value))]
UTM$Station <- factor("CZ0UTEM-500_2008-01-01_CZ0UTPM-100")

CZ_stations <- c("AREP", "MBEL", "TNUJ", "TOPO")
CZ_sub <- subset(CZ, substr(CZ$Station, 4, 7)%in%CZ_stations)
CZ_sub <- rbind(CZ_sub, AVY, UTM)

changes <- list(NA, "2007-08-01", c("2006-06-01", "2007-08-01", "2008-11-01"), "2009-01-01", "2004-01-01", "2008-01-01")
changes <- lapply(changes, as.Date)

n_sub_CZ <- length(unique(CZ_sub$Station))
layout <- make.layout(n_sub_CZ)
xlab <- make.xlab.AirBase(cp_period)
ylab <- make.ylab(cp_name)

data <- CZ_sub
data$Station <- factor(data$Station, exclude=NULL)
###########################

###########################
## @knitr AirBase_pm10_CZ
cp_name <- 'pm10'
cp_period <- 'day'
run_chunk('load_CZ')
k <- 10
run_chunk('runs')
###########################

######################################################################
######################################################################