#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright 2010 Clay Baenziger.  All rights reserved.
#


# define office location for team members
locations<-c(
"CO",	"MA",  "CO",	"CO",
"GA",	"CO",  "CO",	"MA",
"OR",	"CA",  "CO",	"CA",
"CO",	"CA",  "CO",	"CO",
"CO",	"CA",  "CA",	"CA",
"CA",	"CA",  "CA",	"CA")

names(locations)<-c(
"Person 1", "Person 2",	 "Person 3",	"Person 4",
"Person 5",	"Person 6",  "Person 7",	"Person 8",
"Person 9",	"Person 10", "Person 11",	"Person 12",
"Person 13","Person 14", "Person 15",	"Person 16",
"Clay",		"Person 18", "Person 19",	"Person 20",
"Person 21","Person 22", "Person 23",	"Person 24")

# require graphics so that plots can be done when run as a script
require(graphics)
# grid allows us to rotate the viewport
require(grid)
# maximum observations
maxObs<-500
# need to provide a vector of "column names" they don't mean much but
# does provide an upper boud on the number of mails
colNames<-c('name',seq(1:maxObs))
# first read in data
rawData<-read.table("./publishable.csv", sep=",", header=FALSE,
					col.names=colNames, fill=TRUE)
# next apply row names
row.names(rawData)<-c(as.vector(rawData["name"]))[[1]]
# truncate off people names
rawData<-rawData[2:maxObs]
# convert to a matrix
matrixData<-as.matrix(rawData)

# function to convert into Minute Hour as a 24-hour integer from 
# input strings: %H:%M:%S "20:39:40" (e.g. 8:39:40PM) -- from a "POSIXlt"
# output strings: %H%M 2030 (e.g. 8:31PM) -- as a number
posixToNumeric<-function(a) {
	as.numeric(
		# convert from POSIX date to a 24-hour character string format
		format(
			# round to the nearest minute and throw out NA values
			round(as.POSIXct(a), "mins"),
		# print a HourMinute string
		format="%H%M"))

}
# function to convert types from character strings to "POSIXlt"
strToPosix<-function(a) {
    strptime(a ,"%H:%M:%S")
}

# apply conversion function
posixData<-apply(matrixData ,1, strToPosix)
# apply conversion function
finalData<-lapply(posixData, posixToNumeric)

# setup an empty variable to hold summary statistics for everyone
sumStat=c()
# setup an empty variable to hold total number of e-mails
totals=c()
# iterate over everyone
for (nm in row.names(matrixData)) {
	# count the total observations which are non-NA
	totals=rbind(totals, length(finalData[nm][[1]][!is.na(finalData[nm][[1]])]))
	# append to the summary statistics the name and summary statistics
	sumStat<-rbind(sumStat, c(nm, summary(as.numeric(
		# convert from POSIX date to a 24-hour character string format
		format(
			# round to the nearest minute and throw out NA values
			round(as.POSIXct(posixData[nm][[1]][!is.na(posixData[nm][[1]])]), "mins"),
		# print a HourMinute string
		format="%H%M")))))
}

# build a data frame out of the summary statistics
sumStat<-data.frame(
	# take the summary statistics data (columns 2 thorough
	# dim(ss)[[2]])
	sumStat[,2:dim(sumStat)[[2]]],
			# take the first column of sumStat as the names
			row.names=sumStat[1:dim(sumStat)[[1]]])

# need to convert all the summary statistics from factor data to
# numeric vectors (so we take the levels and then order them by
# the sumStat list for that data)
ss<-within(sumStat, {
	Min.<-as.numeric(levels(sumStat["Min."][[1]]))[sumStat["Min."][[1]]]
	Max.<-as.numeric(levels(sumStat["Max."][[1]]))[sumStat["Max."][[1]]]
	Mean<-as.numeric(levels(sumStat["Mean"][[1]]))[sumStat["Mean"][[1]]]
	Median<-as.numeric(levels(sumStat["Median"][[1]]))[sumStat["Median"][[1]]]
	X3rd.Qu.<-as.numeric(levels(sumStat["X3rd.Qu."][[1]]))[sumStat["X3rd.Qu."][[1]]]
	X1st.Qu.<-as.numeric(levels(sumStat["X1st.Qu."][[1]]))[sumStat["X1st.Qu."][[1]]]
})

#
# Produce graphics here
#
# set graph output to png file
png("/tmp/hists.png", width = 1024, height = 768)

	# draw a graph of boxplots showing everyone's times sorted by sortKey
	sortKey="Median"
	# set the background color
	par(bg="transparent")
	axesColor="lavender"
	# box colors should be muted to not distract the eye
	boxColors<-as.matrix(c(
		"lightblue","lightgreen","orange","brown","red"))
	# labels for the boxes should be readable
	textColors<-as.matrix(c("blue","green","orange","brown","red"))

	# get the order of the sorted statistics	
	orderSS<-order(ss[sortKey])
	# get the sorted list of statistics
	sortedSS<-ss[orderSS,]
	# get the sorted list of names
	people<-row.names(sortedSS)
	# to access sorted times by people, store it
	finalData<-finalData[orderSS]
	# to access sorted times by people, store it
	totals<-totals[orderSS]

	#
	# colorize by office
	#
	# get ranks of each office
	ranks<-row.names(as.matrix(
		sort(summary(factor(locations)), decreasing=TRUE)))
	# setup location to color mapping
	row.names(boxColors)<-ranks
	row.names(textColors)<-ranks

	# plot boxes so that we have a plot sized right for doing each
	# group
	par(mar=c(5, 5, 4, 2) + 0.1)
	boxplot(finalData,
			xlim = c(0.5, length(people) +0.5),
			axes = FALSE,
			frame.plot = FALSE,
			ylab = "",
			# outliers use plotting symbol
			pch=25,
			varwidth = FALSE,
			names = FALSE)
	# plot boxes (need to do it by office for legend to group right)
	# draw x-axis e-mail totals label
	text(-1.85, par("usr")[3]+1.1, offset=3.9, adj=c(0,0),
		 labels = "E-Mails:", pos = 1, xpd = TRUE,
		 col = "black", cex=0.9, font=2)
	
	for (loc in ranks) {
		# hold indices for this run
		indices<-match(names(locations[locations %in% loc]),people)
		col<-boxColors[match(loc,ranks)]
		boxplot(finalData[names(locations[locations %in% loc])],
				xlim = c(0.5, length(people) +0.5),
				axes = FALSE,
				width=rep(1,length(indices)),
				col = col,
				at = indices,
				add = TRUE,
				ylab = "",
				# outliers use plotting symbol
				pch=25,
				frame.plot = FALSE,
				names = FALSE)
		# draw x-axis label (so that we can put it at 45deg angle)
		text(indices-1, offset=1.5, par("usr")[3]-0.2,
			 labels = people[indices], srt = 45, pos = 1, xpd = TRUE,
			 col = textColors[match(loc,ranks)], cex=1.3)
		# draw x-axis e-mail totals
		text(indices-2, offset=3.9, par("usr")[3]+1.1,
			 labels = totals[indices], srt = 45, pos = 1, xpd = TRUE,
			 col = "black", cex=0.9)
		# fill in legend text
		mtext(paste(ranks[match(loc,ranks)],"\n",sep=""), 3, adj = 0,
			 line=-match(loc,ranks)-2, col=textColors[match(loc,ranks)],
			 cex=1.3)
	}
	# draw the y-axis
	axis(2, at=c(seq(0, 2400, by=200)), labels=c("00:00 MST",
		 "02:00 MST", "04:00 MST", "06:00 MST", "08:00 MST", "10:00 MST",
		 "12:00 MST", "14:00 MST", "16:00 MST", "18:00 MST", "20:00 MST",
		 "22:00 MST", "24:00 MST"), col = axesColor, las = 2, cex.axis=0.95)
	mtext(paste("Locations:\n",sep=""), 3, adj = 0, line=-2, cex=1.3, font=2)
	abline(h=seq(0,2400,2400), lty=2, col=axesColor)
	title("E-Mail Hour-of-the-Day Times", cex.main=2, font.main=2)
dev.off()