######################################################################
# author: M. Rehr
# LaMo: 2012-03-04
######################################################################

############################################## Preliminary Data Analysis
##
## DEM data
##    ts_cy
##    descr (not shown in the report)
##    bp
##    sum_bp
##    xy_DEM
##    qq
##
## STL ts decomposition
##    sim
##    stl
##    stl_plot
##    decomposed
##
## AirBase data
##    runs
##    xy_AirBase
##
##############################################

######################################################################
# DEM data
######################################################################

###########################
## @knitr ts_cy
# simple time series plot
xyplot(Value ~ Time | Station, data=data, 
       layout=layout, scales=scales, 
       ylab=parse(text=ylab), xlab=xlab,
       panel = function(x, y) {
         panel.abline(v=x[which(is.na(y))], col=grey(.85), lwd=0.5)
         panel.xyplot(x, y, type="l")
       } )
###########################

###########################
## @knitr descr
# some summaries and percentages (in all and by country)
descr_all <- descr(long)
descr_country <- by(long, long$Country, descr)
###########################

###########################
## @knitr bp
# boxplots ordered by median
bymedian <- reorder(long$Station, long$Value, median, na.rm=TRUE)
ylim <- c(min(long$Value, na.rm=TRUE)-2, max(long$Value, na.rm=TRUE)+2)
cx <- 0.8
cx.a <- 0.9
b.ro <- boxplot(Value ~ bymedian, data=long, xaxt="n",  
                ylab = parse(text=ylab), cex.lab=1,
                cex=cx, cex.axis=cx.a, 
                subset= long$Country == "ro", border=ro_col, ylim=ylim)
b.ch <- boxplot(Value ~ bymedian, data=long, xaxt="n", add=TRUE, 
                cex=cx, cex.axis=cx.a, 
                subset= long$Country == "ch", border=ch_col)
b.cy <- boxplot(Value ~ bymedian, data=long, xaxt="n", add=TRUE,
                cex=cx, cex.axis=cx.a, 
                subset= long$Country == "cy", border=cy_col)
legend("topleft", inset=.01, legend=c("Romania", "Switzerland", "Cyprus"), 
       text.col=cols, box.col="transparent", cex=0.9)
bp_med <- list(bymedian, b.ro, b.ch, b.cy)
names(bp_med) <- c("bymedian", "b.ro", "b.ch", "b.cy")
###########################

###########################
## @knitr sum_bp 
# summaries of boxplot outliers
bp_data <- boxplot(Value ~ Station, data = long, plot=FALSE, subset=long$Country==country)
sum.bp.out(bp_data=bp_data, sns=sns, cp_name=cp_name)
###########################

###########################
## @knitr xy_DEM
# flag bp outliers in data and highlight them in xyplot
out <- out.bp(data)
data <- add.out.bp(data, out)
low <- bp_data$stats[1,]
low <- low[which(is.na(low)==FALSE)]
high <- bp_data$stats[5,]
high <- high[which(is.na(high)==FALSE)]
plot(xyplot(Value ~ Time | Station, data=data, pnts=FALSE, low=low, high=high,
       layout=layout, scales=scales,
       ylab=parse(text=ylab), xlab=xlab,
       panel = function(x, y, subscripts, pnts, low, high, ...) {
         panel.fct.bp.out(x, y, subscripts, pnts, low, high)
       } ) )
###########################

###########################
## @knitr qq
# normal-quantile plots
qqmath(~Value+sqrtValue+log10Value, groups=Country, data=long, 
       col=cols, ylab="", layout=c(3,1), xlab=NULL, key =
         list(x =0.12, y=0.9, corner = c(1,1), border = FALSE,
              text = list(c("Romania", "Switzerland", "Cyprus"),
                          col=cols, cex=0.9)),
       scales=list(tck=c(0.8,0), alternating=c(1,0), x="same", 
                   y=list(relation="free", rot=0)), 
       panel = function(x, y, ...) {     
         panel.qqmath(x, ...)
         panel.qqmathline(x, ...)
       })
###########################

######################################################################
# STL ts decomposition
######################################################################

###########################
## @knitr sim
# simulated time series (with break) for stl demonstration
my_trend <- c(110-0.5*(1:60),60-0.25*c(1:60))
my_seasonal <- c(16.2,16,17.1,16.8,16)*sin(c(1:20,21:30,31:60,61:80,81:120))
set.seed(43623)
my_remainder <- rnorm(120,0,5)
my_ts <- my_trend+my_seasonal+my_remainder
ts <- ts(data=my_ts, frequency=6)
###########################

# another example, longer ts, more breaks (clearly distinguishable)
#trend <- c(40+0.05*(1:200),rep(100,100),rep(75,100),75+0.75*c(1:100),
#           rep(50,300),50+0.5*c(1:50),75-0.1*c(1:150))
#seasonal <- rep(2*c(15.2,15,15.1,14.8,15)*sin(c(1:20,21:30,31:60,61:80,81:100)), times=10)
#error <- rnorm(1000,0,5)
#data <- trend+seasonal+error
#ts <- ts(data=data, frequency=6)

###########################
## @knitr stl
# decompose and build plot of components
stl <- stl(ts, s.window=7)
xy_stl <- xyplot(stl, col=1)
###########################

###########################
## @knitr stl_plot
# build plot of true components
sim <- data.frame(cbind(my_ts, my_seasonal, my_trend, my_remainder))
sim <- as.zoo(sim)
index(sim) <- xy_stl$panel.args[[1]]$x
xy_sim <- xyplot(sim, type=list("l","l","l","h"))
# combine plots
xy_stl+as.layer(xy_sim)+as.layer(xy_stl)
###########################

###########################
## @knitr decomposed
# plot ts and decomposed ts
trend <- stl$time.series[,2]
remainder <- stl$time.series[,3]
# plot
par(mfrow=c(2,1), mar=c(4.2,5.7,2.2,.7), cex=1.2, lwd=1.8, cex.lab=1, cex.axis=0.7, mgp=c(1.7,0.4,0), tcl=-.3, las=1)
plot(ts, ylim=c(0,140))
plot(trend+remainder, ylim=c(0,140))
###########################

######################################################################
# AirBase data
######################################################################

###########################
## @knitr runs
# find runs of equal values (length>=k)
names <- as.character(unique(data$Station))
times <- vector("list", length(names))
for (j in seq(along=names)){
  ts <- subset(data$Value, data$Station==names[j])
  # runs of equal values
  runs <- rle(ts)
  #table(runs$lengths)
  end_all <- cumsum(runs$length)
  start_all <- end_all-runs$length+1
  # runs >=k of equal values 
  start_k <- start_all[which(runs$lengths>=k)]
  end_k <- end_all[which(runs$lengths>=k)]
  #runs$values[start_k]
  #table(runs$values[start_k])
  # indices 
  idx <- vector("list", length(start_k))
  for (i in seq(along=start_k)) {
    idx[[i]] <- seq(start_k[i], end_k[i])
  }
  # time points
  times[[j]] <- data$Time[unlist(idx)]
}
###########################

###########################
## @knitr xy_AirBase
# highlight series of constant values in xyplot
plot(xyplot(Value ~ Time | Station, data=data, times=times, changes=changes,
            layout=layout, ylab=parse(text=ylab), xlab=xlab,
            scales=list(tck=c(0.8,0), cex=1.2, y=list(relation='free', rot=0)),
            panel = function(x, y, times, ...) {
              panel.abline(v=x[which(is.na(y))], col=grey(.85), lwd=0.5)
              panel.abline(v=x[which(y==0)], col='yellow', lwd=0.5)
              panel.abline(v=x[which(y<0)], col='orange', lwd=0.5)
              pn <- packet.number()
              panel.abline(v=unlist(times[pn]), col='green', lwd=0.5, lty=2)
              panel.xyplot(x, y, type="l")
              if (length(changes)==length(unique(data$Station))){
                panel.points(x=unlist(changes[pn]), y=0, pch=17, col=1, cex=1.3)
              }
            }  ) )
###########################

######################################################################
######################################################################