Thinking inside the box

Introduction to High-Performance Computing with R · full

2002-02-25  Dirk Eddelbuettel  <edd@debian.org>

    * Initial 0.1.0 release
#!/bin/bash

## adjust as needed
svn=${HOME}/svn/r-devel

rev=$(cd ${svn} && svn info --show-item revision)
today=$(date +%Y-%m-%d)

echo -n "Extracting ${rev} revisions at ${today} ... "
(cd ${svn} && svn log --limit ${rev} ) | gzip -9 > svn-log-${today}.txt.gz
echo "done"
logfile <- "svn-log-2021-03-20.txt.gz"

## cf http://dirk.eddelbuettel.com/blog/2007/08/11/
x <- readLines(logfile)
rx <- x[grep("^r",x)]
who <- gsub(" ","",sapply(strsplit(rx,"\\|"),"[",2))
twho <- table(who)
twho["ripley"]/sum(twho)
tod <- unlist(sapply(rx,function(x)strsplit(x,split=" ")[[1]][6]))
tod <- tod[who=="ripley"]

tz <- sub(pattern=".*(-[0-9]{4}).*",replacement="\\1",x=rx)
tz <- tz[who=="ripley"]
tz <- as.numeric(tz)/100
offset <- 3600*tz

z <- strptime(tod,format="%H:%M:%S")
hist(z,"hours",main="Ripley Commit Times in SVN TZ")

h <- z - offset
h <- format(h,format="%H")
h <- factor(as.numeric(h), levels=0:23)
## added as.vector() here to suppress a warning
dotchart(as.vector(table(h)), main="Ripley Commit Times, By Hour in GMT",
         labels=paste(0:23,1:24,sep=":"))
## rather extract both  date and time
dat <- unlist(sapply(rx, function(x) {
  txt <- strsplit(x,split=" ")[[1]]
  paste(txt[5], txt[6])
}))
## subset on Prof Ripley
dat <- dat[who == "ripley"]
## and convert to POSIXct, correcting by tz as well
datpt <- as.POSIXct(strptime(dat,format="%Y-%m-%d %H:%M:%S")) - offset

## turn into zoo -- we use a constant series of ones as each
## committ is taken as a timestamped event
library(zoo)
datzoo <- zoo(1, order.by=datpt)
## and use zoo to aggregate into commits per date
daily <- aggregate(datzoo, as.Date(index(datzoo)), sum)

## now plot as grey bars
plot(daily, col='darkgrey', type='h', lwd=2,
     ylab="Nb of SVN commits, three-week median",
     xlab="R release dates 2.5.0 and 2.5.1 shown in orange",
     main="The amazing Prof. Ripley")
## mark the two R releases of 2007
abline(v=c(as.Date("2007-04-24"),as.Date("2007-06-28")),col='orange',lwd=1.5)
## and do a quick centered rolling median
lines(rollmedian(daily, 21, align="center"), lwd=3)
    o Package compiler is now provided as a standard package.  See
      ?compiler::compile for information on how to use the compiler.
      This package implements a byte code compiler for R: by default
      the compiler is not used in this release.  See the 'R
      Installation and Administration Manual' for how to compile the
      base and recommended packages.

> ## cf http://dirk.eddelbuettel.com/blog/2010/09/07#straight_curly_or_compiled
> f <- function(n, x=1) for (i in 1:n) x=1/(1+x)
> g <- function(n, x=1) for (i in 1:n) x=(1/(1+x))
> h <- function(n, x=1) for (i in 1:n) x=(1+x)^(-1)
> j <- function(n, x=1) for (i in 1:n) x={1/{1+x}}
> k <- function(n, x=1) for (i in 1:n) x=1/{1+x}
> ## now load some tools
> library(rbenchmark)
> ## now run the benchmark
> N <- 1e6
> benchmark(f(N,1), g(N,1), h(N,1), j(N,1), k(N,1),
+           columns=c("test", "replications",
+           "elapsed", "relative"),
+           order="relative", replications=10)
     test replications elapsed relative
5 k(N, 1)           10   9.764  1.00000
1 f(N, 1)           10   9.998  1.02397
4 j(N, 1)           10  11.019  1.12853
2 g(N, 1)           10  11.822  1.21077
3 h(N, 1)           10  14.560  1.49119

> ## R 2.13.0 brings this toy
> library(compiler)
> lf <- cmpfun(f)
> lg <- cmpfun(g)
> lh <- cmpfun(h)
> lj <- cmpfun(j)
> lk <- cmpfun(k)
> # now run the benchmark
> N <- 1e6
> benchmark(f(N,1), g(N,1), h(N,1), j(N,1), k(N,1),
+           lf(N,1), lg(N,1), lh(N,1), lj(N,1), lk(N,1),
+           columns=c("test", "replications",
+           "elapsed", "relative"),
+           order="relative", replications=10)
       test replications elapsed relative
9  lj(N, 1)           10   2.971  1.00000
10 lk(N, 1)           10   2.980  1.00303
6  lf(N, 1)           10   2.998  1.00909
7  lg(N, 1)           10   3.007  1.01212
8  lh(N, 1)           10   4.024  1.35443
1   f(N, 1)           10   9.479  3.19051
5   k(N, 1)           10   9.526  3.20633
4   j(N, 1)           10  10.775  3.62673
2   g(N, 1)           10  11.299  3.80310
3   h(N, 1)           10  14.049  4.72871

> ## now with Rcpp and C++
> library(inline)
> ## and define our version in C++
> src <- 'int n = as<int>(ns);
+         double x = as<double>(xs);
+         for (int i=0; i<n; i++) x=1/(1+x);
+         return wrap(x); '
> l <- cxxfunction(signature(ns="integer",
+                            xs="numeric"),
+                  body=src, plugin="Rcpp")
> ## now run the benchmark again
> benchmark(f(N,1), g(N,1), h(N,1), j(N,1), k(N,1),
+           l(N,1),
+           lf(N,1), lg(N,1), lh(N,1), lj(N,1), lk(N,1),
+           columns=c("test", "replications",
+           "elapsed", "relative"),
+           order="relative", replications=10)
       test replications elapsed relative
6   l(N, 1)           10   0.120   1.0000
11 lk(N, 1)           10   2.961  24.6750
7  lf(N, 1)           10   3.128  26.0667
8  lg(N, 1)           10   3.140  26.1667
10 lj(N, 1)           10   3.161  26.3417
9  lh(N, 1)           10   4.212  35.1000
5   k(N, 1)           10   9.500  79.1667
1   f(N, 1)           10   9.621  80.1750
4   j(N, 1)           10  10.868  90.5667
2   g(N, 1)           10  11.409  95.0750
3   h(N, 1)           10  14.077 117.3083

# Xian's code, using <- for assignments and passing x down
f <- function(n, x=1) for (i in 1:n) x=1/(1+x)
g <- function(n, x=1) for (i in 1:n) x=(1/(1+x))
h <- function(n, x=1) for (i in 1:n) x=(1+x)^(-1)
j <- function(n, x=1) for (i in 1:n) x={1/{1+x}}
k <- function(n, x=1) for (i in 1:n) x=1/{1+x}

# now load some tools
library(Rcpp)
library(inline)

# and define our version in C++
l <- cxxfunction(signature(ns="integer", xs="numeric"),
                 'int n = as<int>(ns); double x=as<double>(xs);
                  for (int i=0; i<n; i++) x=1/(1+x);
                  return wrap(x); ',
                 plugin="Rcpp")

# more tools
library(rbenchmark)

# now run the benchmark
N <- 1e6
benchmark(f(N, 1), g(N, 1), h(N, 1), j(N, 1), k(N, 1), l(N, 1),
          columns=c("test", "replications", "elapsed", "relative"),
          order="relative", replications=10)

/tmp$ Rscript xian.R
Loading required package: methods
     test replications elapsed relative
6 l(N, 1)           10   0.122    1.000
5 k(N, 1)           10   9.880   80.984
1 f(N, 1)           10   9.978   81.787
4 j(N, 1)           10  11.293   92.566
2 g(N, 1)           10  12.027   98.582
3 h(N, 1)           10  15.372  126.000
/tmp$ 

R> library(sudoku)
R> s <- readSudoku("/tmp/sudoku.txt")
R> s
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
 [1,]    8    0    0    0    0    1    2    0    0
 [2,]    0    7    5    0    0    0    0    0    0
 [3,]    0    0    0    0    5    0    0    6    4
 [4,]    0    0    7    0    0    0    0    0    6
 [5,]    9    0    0    7    0    0    0    0    0
 [6,]    5    2    0    0    0    9    0    4    7
 [7,]    2    3    1    0    0    0    0    0    0
 [8,]    0    0    6    0    2    0    1    0    9
 [9,]    0    0    0    0    0    0    0    0    0
R> system.time(solveSudoku(s))
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
 [1,]    8    4    9    6    7    1    2    5    3
 [2,]    6    7    5    2    4    3    9    1    8
 [3,]    3    1    2    9    5    8    7    6    4
 [4,]    1    8    7    4    3    2    5    9    6
 [5,]    9    6    4    7    8    5    3    2    1
 [6,]    5    2    3    1    6    9    8    4    7
 [7,]    2    3    1    8    9    4    6    7    5
 [8,]    4    5    6    3    2    7    1    8    9
 [9,]    7    9    8    5    1    6    4    3    2
   user  system elapsed
  5.288   0.004   5.951
R>

gant
gant
20.09.2009 101112.TCX
C
20090920-101112.TCX
#!/usr/bin/r
files <- dir(".", pattern=".*\\.TCX$")
res <- lapply(files, function(f) {
    pt <- strptime(f, "%d.%m.%Y %H%M%S.TCX")  # parsed time
    ft <- strftime(pt, "%Y%m%d-%H%M%S.TCX")   # formatted time
    file.rename(f, ft)
})

patterns
dir()
files
lapply()
f
strptime
strftime
/usr/bin/r
apt-get
i386
amd64
  deb http://debian.cran.r-project.org/cran2deb/debian-i386 testing/

  deb http://debian.cran.r-project.org/cran2deb/debian-amd64 testing/

r-sig-debian
R CMD javareconf
available.packages()
static SEXP from_datetime_kobject(K x) 
{
	SEXP result;
	int i, length = x->n;
	if (scalar(x)) {
		result = PROTECT(allocVector(REALSXP, 1));
		REAL(result)[0] = (kF(x)[0] + 10957) * 86400;
	} else {
		result = PROTECT(allocVector(REALSXP, length));
		for(i = 0; i < length; i++) {
		    	REAL(result)[i] = (kF(x)[i] + 10957) * 86400;
		}
	}
	SEXP datetimeclass = PROTECT(allocVector(STRSXP,2));
	SET_STRING_ELT(datetimeclass, 0, mkChar("POSIXt"));
	SET_STRING_ELT(datetimeclass, 1, mkChar("POSIXct"));
	setAttrib(result, R_ClassSymbol, datetimeclass);
	UNPROTECT(2); 
        return result; 
}

x <- readLines("http://developer.r-project.org/R.svnlog.2007")
rx <- x[grep("^r",x)]
who <- gsub(" ","",sapply(strsplit(rx,"\\|"),"[",2))
twho <- table(who)
twho["ripley"]/sum(twho)

tod <- unlist(sapply(rx,function(x)strsplit(x,split=" ")[[1]][6]))
tod <- tod[who=="ripley"]

tz <- sub(pattern=".*(-[0-9]{4}).*",replacement="\\1",x=rx)
tz <- tz[who=="ripley"]
tz <- as.numeric(tz)/100
offset <- 3600*tz

z <- strptime(tod,format="%H:%M:%S")
hist(z,"hours",main="Ripley Commit Times in SVN TZ")

h <- z - offset
h <- format(h,format="%H")
h <- factor(as.numeric(h), levels=0:23)
dotchart(table(h), main="Ripley Commit Times, By Hour in GMT",
         labels=paste(0:23,1:24,sep=":"))

## rather extract both  date and time
dat <- unlist(sapply(rx, function(x) {
  txt <- strsplit(x,split=" ")[[1]]
  paste(txt[5], txt[6])
}))
## subset on Prof Ripley
dat <- dat[who == "ripley"]
## and convert to POSIXct, correcting by tz as well
datpt <- as.POSIXct(strptime(dat,format="%Y-%m-%d %H:%M:%S")) - offset

## turn into zoo -- we use a constant series of ones as each
## committ is taken as a timestamped event
datzoo <- zoo(1, order.by=datpt)
## and use zoo to aggregate into commits per date
daily <- aggregate(datzoo, as.Date(index(datzoo)), sum)

## now plot as grey bars
plot(daily, col='darkgrey', type='h', lwd=2,
     ylab="Nb of SVN commits, three-week median",
     xlab="R release dates 2.5.0 and 2.5.1 shown in orange",
     main="The amazing Prof. Ripley")
## mark the two R releases of 2007
abline(v=c(as.Date("2007-04-24"),as.Date("2007-06-28")),col='orange',lwd=1.5)
## and do a quick centered rolling median
lines(rollmedian(daily, 21, align="center"), lwd=3)

Fri, 25 Feb 2022

Sat, 20 Mar 2021

Sat, 08 Dec 2018

Mon, 11 Jun 2018

Overview

Debian

Ubuntu

Others

Closing

Wed, 08 Nov 2017

Call for Papers

Wed, 11 Jan 2017

Call for Papers

Thu, 15 Oct 2015

Call for Papers

Tue, 31 Mar 2015

Registration for R/Finance 2015 is now open!

Mon, 24 Nov 2014

Tue, 18 Nov 2014

Call for Papers:

Thu, 25 Sep 2014

Sat, 29 Mar 2014

Now open for registrations:

Mon, 27 May 2013

Fri, 29 Mar 2013

Now open for registrations:

Mon, 17 Dec 2012

Call for Papers:

Mon, 19 Mar 2012

Now open for registrations:

Thu, 15 Dec 2011

Call for Papers:

Wed, 25 May 2011

Tue, 12 Apr 2011

Thu, 07 Apr 2011

Fri, 31 Dec 2010

Call for Papers:

R/Finance 2011: Applied Finance with R

Fri, 17 Dec 2010

Wed, 27 Oct 2010

Fri, 24 Sep 2010

Call for Papers:

Tue, 07 Sep 2010

Sat, 24 Jul 2010

Thu, 27 May 2010

Tue, 20 Apr 2010

Wed, 07 Apr 2010

Sun, 04 Apr 2010

Thu, 25 Feb 2010

Thu, 18 Feb 2010

Fri, 05 Feb 2010

Now open for registrations:

Thu, 07 Jan 2010

Tue, 01 Dec 2009

Mon, 09 Nov 2009

Call for Papers:

Sat, 10 Oct 2009

Tue, 04 Aug 2009

Mon, 13 Jul 2009

Sun, 12 Jul 2009

Sat, 27 Jun 2009

Wed, 29 Apr 2009

Mon, 27 Apr 2009

Sun, 26 Apr 2009

Thu, 05 Mar 2009

Wed, 25 Feb 2009

Mon, 23 Feb 2009

Tue, 03 Feb 2009

Fri, 30 Jan 2009

Sat, 24 Jan 2009

Wed, 07 Jan 2009

Thu, 01 Jan 2009

Tue, 23 Dec 2008

Mon, 01 Dec 2008

Tue, 19 Aug 2008

Tue, 12 Aug 2008

Sat, 16 Feb 2008

Sat, 11 Aug 2007

Mon, 09 Jul 2007

Mon, 02 Jul 2007