#!/usr/bin/env r ## screen-scraped, eg for Chi 2007 #Dirk Eddelbuettel | Bib # 4367 | River Forest, IL - CAN | Age 41 | M #START 5K 10K 15K 20K HALF 25K 30K 35K 40K FINISH #0:01:05 0:22:34 0:45:30 1:09:00 1:34:27 1:39:47 2:00:05 2:29:30 2:58:45 3:30:49 3:41:39 # #TIME Chip Time: 3:41:39 Clock: 3:42:44 Pace: 8:27 #Placement Overall: 1822 Gender: 1461 Division: 259 # 2008 # London under bib #53208 london08 <- c("0:22:12", "0:45:51", "1:09:42", "1:33:48", "1:57:57", "2:22:57", "2:49:07", "3:14:27", "3:24:41") # 2007 ny 25k imputed boston07 <- c("0:24:11", "0:48:34", "1:12:25", "1:37:17", "2:02:44", "2:28:28", "2:53:45", "3:17:59", "3:28:24") chicago07 <- c("0:22:34", "0:45:30", "1:09:00", "1:34:27", "2:00:05", "2:29:30", "2:58:45", "3:30:49", "3:41:39") newyork07 <- c("0:23:15", "0:46:48", "1:10:09", "1:33:38", "1:57:38", "2:21:39", "2:45:31", "3:09:17", "3:18:47") # 2006 chicago06 <- c("0:23:04", "0:46:25", "1:09:30", "1:32:39", "1:56:11", "2:19:51", "2:44:15", "3:08:24", "3:18:49") # 2005 15k 'imputed' chicago05 <- c("0:23:38", "0:47:25", "1:10:30", "1:33:39", "1:57:42", "2:21:23", "2:46:13", "3:11:19", "3:21:36") # 2004 5k guessed as half of 10k chicago04 <- c("0:23:52", "0:47:43", "1:11:11", "1:34:56", "1:59:06", "2:24:33", "2:51:02", "3:18:05", "3:28:56") ## two quick helper functions totsec <- function(x) { # turns a character string of the form "0:22:33" into seconds times <- as.numeric(strsplit(x, ':')[[1]]) # split on ':', pick 1st set return(sum(times * c(60*60, 60, 1))) # scale hours, mins, secs } totsec2pace <- function(totsec, distance=3.1) { # turns total seconds in to pace as a real number totsecpermile <- totsec / distance # so 7:45 comes out as 7.75 (ie 7 3/4) minpermile <- floor(totsecpermile/60) secpermile <- totsecpermile - minpermile*60 pace <- minpermile + secpermile/60 # so that 7:30 becomes 7.5 return(pace) } ## convert all time-vectors to pace vectors ## forms ad-hoc list of data and uses lappy to loop over the list ## for each of the data vectors, we transform to seconds, then take differences ## to get the per-5k-segment time and convert those into paces ## note how the vector is pre-padded with a zero to not drop the 1st in differencing ## and how the last segment from 40km to 42.195km is scaled up to 5km equivalent paces <- lapply( list(chicago04, chicago05, chicago06, boston07, chicago07, newyork07, london08), function(x) sapply(diff(c(0, sapply(x, totsec))) * c(rep(1,8), 5/2.195), totsec2pace)) xvec <- c( seq(5,40,by=5), 42.195) ## combine into matrix, set row and col names pacemat <- as.matrix(do.call("cbind", paces)) colnames(pacemat) <- c(paste("Chicago", 2004:2006), "Boston 2007", "Chicago 2007", "New York 2007", "London 2008") rownames(pacemat) <- xvec colvec <- c("darkblue", "mediumblue", "blue", "darkgreen", "lightblue", "orange", "yellow") plotPaces <- function() { ## plot with some formatting and eye-candy opar <- par(bg="gray95", mar=c(4.25,4.5,3,1), las=1, lwd=1, cex.axis=0.9, cex.lab=0.9) ## set up plot region, titles, ... but don't actually plot (i.e. type='n') plot(xvec, pacemat[,1], ylim=range(pacemat), ylab="Pace in min/mile", type='n', yaxt="n", xaxt="n", pch=18, xlab="kilometer marker corresponding to endpoint of segment for which its average pace is shown", main="Pace per 5k segment in different marathon races") ## put some lightgray horizontol gridlines on the 30sec marks abline(h=seq(6.5, 10, by=0.5), col='lightgray') ## plot all races in a loop for (i in 1:ncol(pacemat)) lines(xvec, pacemat[,i], type='b', pch=14+i, col=colvec[i], lwd=2) ## pretty-print y-axis as usual "min:sec" paces, x-axis with km markers axis(2, at=seq(6.5, 10, by=0.5), labels=c("6:30", "7:00", "7:30", "8:00", "8:30", "9:00", "9:30", "10:00")) axis(1, at=xvec, labels=paste(round(xvec, digit=1),"k")) ## show a legend legend("topleft", legend=colnames(pacemat), bty="n", lwd=2, pch=15:19, col=colvec) ## restore plot defaults par(opar) } pngPlot <- function(filename="/tmp/london2008.png") { png(filename, width=800, height=600, pointsize=9) plotPaces() dev.off() } plotPaces()