## marathon paces by race date given race length ## (C) Dirk Eddelbuettel 2005 - 2008 ## GNU GPL ## we need lattice and MASS suppressMessages(library(lattice)) suppressMessages(library(MASS)) ## this gets the data from the embedded text ## and does some simple transformations getData <- function() { Lines <- " Date Race Pace Dist Group 2001-07-13 5k 8.13 3.1 Short 2001-10-21 10k 7.40 6.2 Mid 2002-08-08 CC 7.237 3.5 Short 2002-10-05 5k 7.01 3.1 Short 2003-01-01 5k 7.23 3.1 Short 2003-04-06 5k 6.47 3.1 Short 2003-04-26 10m 8.10 10 Mid 2003-05-11 5k 7.33 3.1 Short 2003-08-03 Half 7.59 12.4 Long 2003-08-07 CC 6.5829 3.5 Short 2003-09-07 Half 7.499 13.1 Long 2003-09-28 8k 7.208 5 Mid 2003-11-27 10k 7.089 6.2 Mid 2004-03-28 8k 7.2773 5 Mid 2004-04-04 5k 6.462 3.1 Short 2004-04-25 10m 7.378 10 Mid 2004-05-08 Half 7.385 13.1 Long 2004-05-29 10m 7.119 10 Mid 2004-06-03 CC 6.3839 3.5 Short 2004-06-06 Half 7.3756 13.1 Long 2004-06-20 8k 6.5232 5 Mid 2004-08-01 Half 7.3179 13.1 Long 2004-09-19 Half 7.1979 13.1 Long 2004-10-10 Marathon 7.5840 26.2 Long 2004-10-24 10k 7.1147 6.2 Mid 2004-11-25 10k 7.2337 6.2 Mid 2005-01-01 5k 6.4762 3.1 Short 2005-03-20 Half 7.3211 13.1 Long 2005-04-10 5k 6.3509 3.1 Short 2005-05-26 CC 6.1903 3.5 Short 2005-05-30 Marathon 7.5706 26.2 Long 2005-09-13 10k 7.1824 6.2 Mid 2005-09-25 Half 7.1271 13.1 Long 2005-10-09 Marathon 7.41 26.2 Long 2005-11-24 10k 6.3934 6.2 Mid 2006-01-01 5k 6.2144 3.1 Short 2006-03-19 Half 7.1900 13.1 Long 2006-04-02 8k 6.4080 5 Mid 2006-04-09 5k 6.1900 3.1 Short 2006-05-25 CC 6.3691 3.5 Short 2006-06-03 Marathon 7.3298 26.2 Long 2006-07-22 10m 7.1146 10 Mid 2006-08-13 Half 7.1191 13.1 Long 2006-10-01 Half 7.0395 13.1 Long 2006-10-08 10k 6.3758 6.2 Mid 2006-10-14 5k 6.14706 3.1 Short 2006-10-22 Marathon 7.3557 26.2 Long 2006-11-05 5k 6.2202 3.1 Short 2006-11-23 10k 6.4565 6.2 Mid 2007-01-01 5k 6.3355 3.1 Short 2007-03-18 Half 6.5911 13.1 Long 2007-04-17 Marathon 7.57 26.2 Long 2007-05-24 CC 6.2226 3.5 Short 2007-09-09 Half 7.1188 13.1 Long 2007-10-07 Marathon 8.276 26.2 Long 2007-10-13 5k 6.2294 3.1 Short 2007-10-21 10k 6.3925 6.2 Mid 2007-11-04 Marathon 7.35 26.2 Long 2008-03-16 Half 7.2215 13.1 Long 2008-04-13 Marathon 7.4545 26.2 Long 2008-05-10 10m 7.13 10 Mid 2008-05-22 CC 5.5619 3.5 Short 2008-09-15 Half 6.56 13.1 Long 2008-09-28 Marathon 7.2233 26.2 Long " con <- textConnection(Lines) Races <- read.delim(con, sep=" ") close(con) Races$Date <- as.Date(Races$Date) ## seconds as fractions of 100 Races$ModPace <- (Races$Pace - floor(Races$Pace)) / 0.6 + floor(Races$Pace) Races$Race <- ordered(Races$Race, levels=c("5k", "CC", "8k", "10k", "10m", "Half", "Marathon")) Races$Group <- ordered(Races$Group, levels=c("Short", "Mid", "Long")) Races$ModRace <- Races$Race ## assign the JPM Chase Corp Challenges (3.5 miles) to 5k Races[which(Races$Race=='CC'),"ModRace"] <- "5k" ## and the Shamrock Shuffle etc 5 miler to the 10k group Races[which(Races$Race=='8k'),"ModRace"] <- "10k" with(Races, table(Dist)) with(Races, table(Race)) with(Races, table(Group)) Races } ## custom panel.line function to only act on at least four points ## and to use robust lm panel.myline <- function(x, y, ...) { if (length(x) >= 4) { ##panel.abline(lm(as.numeric(y) ~ as.numeric(x)), ...) panel.abline(rlm(as.numeric(y) ~ as.numeric(x)), ...) ##panel.loess(y, x, ...) } } ## convert imperial to metric miles2km <- function(x) { min <- floor(x) sec <- (x - min)*60# fractional seconds to actual, ie '7.5' is 7 min 30 seconds totsec <- (min*60 + sec) kmtot <- totsec/1.609344 kmmin <- floor(kmtot/60) kmrawsec <- (kmtot - kmmin*60) kmsec <- kmrawsec/60# actual seconds to fraction, ie 15 sec becomes 0.25 return(kmmin+kmsec) } ## and metric to imperial km2miles <- function(x) { min <- floor(x) sec <- (x - min)*60# fractional seconds to actual, ie '7.5' is 7 min 30 seconds totsec <- (min*60 + sec) miletot <- totsec*1.609344 milemin <- floor(miletot/60) milerawsec <- (miletot - milemin*60) milesec <- milerawsec/60# actual seconds to fraction, ie 15 sec becomes 0.25 return(milemin+milesec) } ## see ?axis.default in the lattice docs for this yscale.components.edd <- function(...) { ans <- yscale.components.default(...) ans$right <- ans$left lowval <- floor(miles2km(ans$num.limit[1])) hival <- ceiling(miles2km(ans$num.limit[2])) kmseq <- seq(lowval,hival,by=0.25) mileseq <- sapply(kmseq, km2miles) ans$right$ticks$at <- mileseq ans$right$labels$at <- mileseq ans$right$labels$labels <- format(kmseq) ans } ## preferred plot showPlot <- function(Races) { ## lattice setup lattice.options(default.theme = canonical.theme(color = TRUE)) trellis.par.set("strip.border" = list(col="white")) trellis.par.set("background" = list(col="white")) trellis.par.set("axis.line" = list(col="darkgrey")) trellis.par.set("strip.background" = list(col="lightgrey")) trellis.par.set("par.main.text"=list(cex=1.25, col='black')) trellis.par.set("par.sub.text"=list(cex=0.9, col='darkgrey', font=1, cex=0.8)) trellis.par.set("par.xlab.text"=list(cex=0.9, col='darkgrey')) trellis.par.set("par.ylab.text"=list(cex=0.8, col='darkgrey')) trellis.par.set("plot.symbol" = list(col = "mediumblue", pch=16, cex=0.9)) ## myColors <- c( "#377EB8") # "#E41A1C") print(xyplot(ModPace ~ Date | ModRace, data=Races, layout=c(5,1), scales=list(y=list(alternating=3)), yscale.components = yscale.components.edd, main="Running performance by race distance from 2001 to 2008", sub=paste("Dotted line from robust linear regression, solid line from nonparametrics Loess regression.\n", "Seven 3.5 mile races folded into 5k category; four 5 mile races folded into 10k."), ylab="Pace in minutes/mile, and minutes/km on other axis", xlab="Date of race", panel=function(x,y) { panel.xyplot(x,y); panel.loess(x, y, span=1.5, col="darkblue") panel.myline(x, y, col="mediumblue", lty='dotted') })) invisible(NULL) } Races <- getData() if (exists("doPDF") && doPDF) { pdf("/tmp/marathonPaceByDistance.pdf", width=10, heigh=5) showPlot(Races) dev.off() } if (exists("doPNG") && doPNG){ png("/tmp/marathonPaceByDistance.png", width=800, heigh=400, pointsize=9) showPlot(Races) dev.off() } showPlot(Races)