# # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # # Copyright 2010 Clay Baenziger. All rights reserved. # # define office location for team members locations<-c( "CO", "MA", "CO", "CO", "GA", "CO", "CO", "MA", "OR", "CA", "CO", "CA", "CO", "CA", "CO", "CO", "CO", "CA", "CA", "CA", "CA", "CA", "CA", "CA") names(locations)<-c( "Person 1", "Person 2", "Person 3", "Person 4", "Person 5", "Person 6", "Person 7", "Person 8", "Person 9", "Person 10", "Person 11", "Person 12", "Person 13","Person 14", "Person 15", "Person 16", "Clay", "Person 18", "Person 19", "Person 20", "Person 21","Person 22", "Person 23", "Person 24") # require graphics so that plots can be done when run as a script require(graphics) # grid allows us to rotate the viewport require(grid) # maximum observations maxObs<-500 # need to provide a vector of "column names" they don't mean much but # does provide an upper boud on the number of mails colNames<-c('name',seq(1:maxObs)) # first read in data rawData<-read.table("./publishable.csv", sep=",", header=FALSE, col.names=colNames, fill=TRUE) # next apply row names row.names(rawData)<-c(as.vector(rawData["name"]))[[1]] # truncate off people names rawData<-rawData[2:maxObs] # convert to a matrix matrixData<-as.matrix(rawData) # function to convert into Minute Hour as a 24-hour integer from # input strings: %H:%M:%S "20:39:40" (e.g. 8:39:40PM) -- from a "POSIXlt" # output strings: %H%M 2030 (e.g. 8:31PM) -- as a number posixToNumeric<-function(a) { as.numeric( # convert from POSIX date to a 24-hour character string format format( # round to the nearest minute and throw out NA values round(as.POSIXct(a), "mins"), # print a HourMinute string format="%H%M")) } # function to convert types from character strings to "POSIXlt" strToPosix<-function(a) { strptime(a ,"%H:%M:%S") } # apply conversion function posixData<-apply(matrixData ,1, strToPosix) # apply conversion function finalData<-lapply(posixData, posixToNumeric) # setup an empty variable to hold summary statistics for everyone sumStat=c() # setup an empty variable to hold total number of e-mails totals=c() # iterate over everyone for (nm in row.names(matrixData)) { # count the total observations which are non-NA totals=rbind(totals, length(finalData[nm][[1]][!is.na(finalData[nm][[1]])])) # append to the summary statistics the name and summary statistics sumStat<-rbind(sumStat, c(nm, summary(as.numeric( # convert from POSIX date to a 24-hour character string format format( # round to the nearest minute and throw out NA values round(as.POSIXct(posixData[nm][[1]][!is.na(posixData[nm][[1]])]), "mins"), # print a HourMinute string format="%H%M"))))) } # build a data frame out of the summary statistics sumStat<-data.frame( # take the summary statistics data (columns 2 thorough # dim(ss)[[2]]) sumStat[,2:dim(sumStat)[[2]]], # take the first column of sumStat as the names row.names=sumStat[1:dim(sumStat)[[1]]]) # need to convert all the summary statistics from factor data to # numeric vectors (so we take the levels and then order them by # the sumStat list for that data) ss<-within(sumStat, { Min.<-as.numeric(levels(sumStat["Min."][[1]]))[sumStat["Min."][[1]]] Max.<-as.numeric(levels(sumStat["Max."][[1]]))[sumStat["Max."][[1]]] Mean<-as.numeric(levels(sumStat["Mean"][[1]]))[sumStat["Mean"][[1]]] Median<-as.numeric(levels(sumStat["Median"][[1]]))[sumStat["Median"][[1]]] X3rd.Qu.<-as.numeric(levels(sumStat["X3rd.Qu."][[1]]))[sumStat["X3rd.Qu."][[1]]] X1st.Qu.<-as.numeric(levels(sumStat["X1st.Qu."][[1]]))[sumStat["X1st.Qu."][[1]]] }) # # Produce graphics here # # set graph output to png file png("/tmp/hists.png", width = 1024, height = 768) # draw a graph of boxplots showing everyone's times sorted by sortKey sortKey="Median" # set the background color par(bg="transparent") axesColor="lavender" # box colors should be muted to not distract the eye boxColors<-as.matrix(c( "lightblue","lightgreen","orange","brown","red")) # labels for the boxes should be readable textColors<-as.matrix(c("blue","green","orange","brown","red")) # get the order of the sorted statistics orderSS<-order(ss[sortKey]) # get the sorted list of statistics sortedSS<-ss[orderSS,] # get the sorted list of names people<-row.names(sortedSS) # to access sorted times by people, store it finalData<-finalData[orderSS] # to access sorted times by people, store it totals<-totals[orderSS] # # colorize by office # # get ranks of each office ranks<-row.names(as.matrix( sort(summary(factor(locations)), decreasing=TRUE))) # setup location to color mapping row.names(boxColors)<-ranks row.names(textColors)<-ranks # plot boxes so that we have a plot sized right for doing each # group par(mar=c(5, 5, 4, 2) + 0.1) boxplot(finalData, xlim = c(0.5, length(people) +0.5), axes = FALSE, frame.plot = FALSE, ylab = "", # outliers use plotting symbol pch=25, varwidth = FALSE, names = FALSE) # plot boxes (need to do it by office for legend to group right) # draw x-axis e-mail totals label text(-1.85, par("usr")[3]+1.1, offset=3.9, adj=c(0,0), labels = "E-Mails:", pos = 1, xpd = TRUE, col = "black", cex=0.9, font=2) for (loc in ranks) { # hold indices for this run indices<-match(names(locations[locations %in% loc]),people) col<-boxColors[match(loc,ranks)] boxplot(finalData[names(locations[locations %in% loc])], xlim = c(0.5, length(people) +0.5), axes = FALSE, width=rep(1,length(indices)), col = col, at = indices, add = TRUE, ylab = "", # outliers use plotting symbol pch=25, frame.plot = FALSE, names = FALSE) # draw x-axis label (so that we can put it at 45deg angle) text(indices-1, offset=1.5, par("usr")[3]-0.2, labels = people[indices], srt = 45, pos = 1, xpd = TRUE, col = textColors[match(loc,ranks)], cex=1.3) # draw x-axis e-mail totals text(indices-2, offset=3.9, par("usr")[3]+1.1, labels = totals[indices], srt = 45, pos = 1, xpd = TRUE, col = "black", cex=0.9) # fill in legend text mtext(paste(ranks[match(loc,ranks)],"\n",sep=""), 3, adj = 0, line=-match(loc,ranks)-2, col=textColors[match(loc,ranks)], cex=1.3) } # draw the y-axis axis(2, at=c(seq(0, 2400, by=200)), labels=c("00:00 MST", "02:00 MST", "04:00 MST", "06:00 MST", "08:00 MST", "10:00 MST", "12:00 MST", "14:00 MST", "16:00 MST", "18:00 MST", "20:00 MST", "22:00 MST", "24:00 MST"), col = axesColor, las = 2, cex.axis=0.95) mtext(paste("Locations:\n",sep=""), 3, adj = 0, line=-2, cex=1.3, font=2) abline(h=seq(0,2400,2400), lty=2, col=axesColor) title("E-Mail Hour-of-the-Day Times", cex.main=2, font.main=2) dev.off()