######################################################################
# author: M. Rehr
# LaMo: 2013-03-04
######################################################################

##############################################
##
## Summaries
##    descr(data)
##    sum.bp.out(bp_data, sns, cp_name)
##
## Boxplot Outliers 
##    out.bp(data)
##    add.out.bp(data, out)
##
## Panel Functions
##    panel.fct.bp.out(x, y, subscripts, pnts, low, high)
##
##############################################


######################################################################
# Summaries
######################################################################

###########################
# summaries from Hmisc::describe plus percentages
descr <- function(data){
  #require(Hmisc)
  d <- describe(data$Value)
  counts <- as.numeric(d$counts)
  names(counts) <- names(d$counts)
  #summary <- counts[1:4]
  n_stations <- length(unique(data$Station))
  names(n_stations) <- 'n_stations'
  n_measurements <- 365
  if(cp_name!='pm10'){n_measurements <- 24*n_measurements}
  names(n_measurements) <- 'n_measurements'
  missing.p <- 100*counts[2]/(counts[1]+counts[2])
  names(missing.p) <- 'missing%'
  unique.p <- counts[3]/(counts[1]+counts[2])
  names(unique.p) <- 'unique%'
  mean <- counts[4]
  names(mean) <- 'mean'
  summary <- list(n_measurements, n_stations, missing.p, unique.p, mean)
  summary <- unlist(summary)
  quantiles <- counts[5:11]
  lowest <- as.numeric(d$values[1:5])           
  highest <- as.numeric(d$values[6:10])
  descr <- list(summary, quantiles, lowest, highest)
  names(descr) <- c('summary', 'quantiles', 'lowest', 'highest')
  return(descr)
}
###########################

###########################
# summaries for boxplot outliers
sum.bp.out <- function(bp_data, sns, cp_name){
  if (cp_name=='pm10') {l <- 365} else {l <- 365*24}
  n <- table(bp_data$group)
  p <- table(bp_data$group)/l*100
  low <- bp_data$stats[1,]
  low <- low[which(is.na(low)==FALSE)]
  high <- bp_data$stats[5,]  
  high <- high[which(is.na(high)==FALSE)]
  
  names(low) <- names(high) <- factor(bp_data$names[sns])
  names(n) <- names(p) <- factor(bp_data$names[as.numeric(names(n))])
  
  d1 <- t(data.frame(low,high))
  d2 <- t(data.frame(n,p)[,-c(1,3)])
  colnames(d2) <- names(n)
  rownames(d2) <- c('number', 'percentage')
  
  missing <- which(colnames(d1)%in%colnames(d2)==F)
  if(length(missing)>0) {
    d2missings <- data.frame(rbind(number=rep(0,length(missing)), rep(0,length(missing))))
    colnames(d2missings) <- colnames(d1)[missing]
    d2 <- cbind(d2, d2missings)
  }
  bp_out <- rbind(d1, d2)
  
  return(bp_out)
}
###########################


######################################################################
# Boxplot Outliers
######################################################################

###########################
# data.frame out of boxplot outliers
out.bp <- function(data){
  bp <- boxplot(Value ~ Station, data = data, plot=FALSE)
  out <- list()  
  out$Value <- bp$out
  out$Station <- factor(bp$names[bp$group])
  out$bpFlag <- rep(TRUE, times=length(out$Value))
  out <- data.frame(out)
  return(out)
}
###########################

###########################
# add bpFlag to data
add.out.bp <- function(data, out){
  data <- merge(data, out, by=c('Station','Value'), all.x=TRUE, sort=FALSE)
  data <- data[order(data$Station, data$Time), ]
  data$bpFlag <- sapply(data$bpFlag,isTRUE)
  data$bpFlag <- as.factor(data$bpFlag)
  is.na(data$bpFlag) <- is.na(data$Value)
  return(data)
}
###########################


######################################################################
# Panel Functions
######################################################################

###########################
# panel.function for xy_DEM
panel.fct.bp.out <- function(x, y, subscripts, pnts, low, high){
  # NAs        
  panel.abline(v=x[which(is.na(y))], col=grey(.85), lwd=0.5)
  # time series
  panel.xyplot(x, y, type="l")
  # boxplot outliers 
  if (pnts==TRUE){
    idx <- which(data$bpFlag[subscripts]==TRUE)
    panel.points(x[idx], y[idx], col='orange', pch=20, cex=1.1)
  }
  # boxplot thresholds
  pn <- packet.number()
  panel.abline(h=low[pn], col='orange', lwd=0.5)
  panel.abline(h=high[pn], col='orange', lwd=0.5)
}
###########################

######################################################################
######################################################################