|
Rcpp Version 0.9.10
|
00001 ## Simple Gibbs Sampler Example 00002 ## Adapted from Darren Wilkinson's post at 00003 ## http://darrenjw.wordpress.com/2010/04/28/mcmc-programming-in-r-python-java-and-c/ 00004 ## 00005 ## Sanjog Misra and Dirk Eddelbuettel, June-July 2011 00006 00007 suppressMessages(library(Rcpp)) 00008 suppressMessages(library(inline)) 00009 suppressMessages(library(compiler)) 00010 suppressMessages(library(rbenchmark)) 00011 00012 00013 ## Actual joint density -- the code which follow implements 00014 ## a Gibbs sampler to draw from the following joint density f(x,y) 00015 fun <- function(x,y) { 00016 x*x * exp(-x*y*y - y*y + 2*y - 4*x) 00017 } 00018 00019 ## Note that the full conditionals are propotional to 00020 ## f(x|y) = (x^2)*exp(-x*(4+y*y)) : a Gamma density kernel 00021 ## f(y|x) = exp(-0.5*2*(x+1)*(y^2 - 2*y/(x+1)) : Normal Kernel 00022 00023 ## There is a small typo in Darrens code. 00024 ## The full conditional for the normal has the wrong variance 00025 ## It should be 1/sqrt(2*(x+1)) not 1/sqrt(1+x) 00026 ## This we can verify ... 00027 ## The actual conditional (say for x=3) can be computed as follows 00028 ## First - Construct the Unnormalized Conditional 00029 fy.unnorm <- function(y) fun(3,y) 00030 00031 ## Then - Find the appropriate Normalizing Constant 00032 K <- integrate(fy.unnorm,-Inf,Inf) 00033 00034 ## Finally - Construct Actual Conditional 00035 fy <- function(y) fy.unnorm(y)/K$val 00036 00037 ## Now - The corresponding Normal should be 00038 fy.dnorm <- function(y) { 00039 x <- 3 00040 dnorm(y,1/(1+x),sqrt(1/(2*(1+x)))) 00041 } 00042 00043 ## and not ... 00044 fy.dnorm.wrong <- function(y) { 00045 x <- 3 00046 dnorm(y,1/(1+x),sqrt(1/((1+x)))) 00047 } 00048 00049 if (interactive()) { 00050 ## Graphical check 00051 ## Actual (gray thick line) 00052 curve(fy,-2,2,col='grey',lwd=5) 00053 00054 ## Correct Normal conditional (blue dotted line) 00055 curve(fy.dnorm,-2,2,col='blue',add=T,lty=3) 00056 00057 ## Wrong Normal (Red line) 00058 curve(fy.dnorm.wrong,-2,2,col='red',add=T) 00059 } 00060 00061 ## Here is the actual Gibbs Sampler 00062 ## This is Darren Wilkinsons R code (with the corrected variance) 00063 ## But we are returning only his columns 2 and 3 as the 1:N sequence 00064 ## is never used below 00065 Rgibbs <- function(N,thin) { 00066 mat <- matrix(0,ncol=2,nrow=N) 00067 x <- 0 00068 y <- 0 00069 for (i in 1:N) { 00070 for (j in 1:thin) { 00071 x <- rgamma(1,3,y*y+4) 00072 y <- rnorm(1,1/(x+1),1/sqrt(2*(x+1))) 00073 } 00074 mat[i,] <- c(x,y) 00075 } 00076 mat 00077 } 00078 00079 ## We can also try the R compiler on this R function 00080 RCgibbs <- cmpfun(Rgibbs) 00081 00082 ## For example 00083 ## mat <- Rgibbs(10000,10); dim(mat) 00084 ## would give: [1] 10000 2 00085 00086 00087 ## Now for the Rcpp version -- Notice how easy it is to code up! 00088 gibbscode <- ' 00089 00090 using namespace Rcpp; // inline does that for us already 00091 00092 // n and thin are SEXPs which the Rcpp::as function maps to C++ vars 00093 int N = as<int>(n); 00094 int thn = as<int>(thin); 00095 00096 int i,j; 00097 NumericMatrix mat(N, 2); 00098 00099 RNGScope scope; // Initialize Random number generator 00100 00101 // The rest of the code follows the R version 00102 double x=0, y=0; 00103 00104 for (i=0; i<N; i++) { 00105 for (j=0; j<thn; j++) { 00106 x = ::Rf_rgamma(3.0,1.0/(y*y+4)); 00107 y = ::Rf_rnorm(1.0/(x+1),1.0/sqrt(2*x+2)); 00108 } 00109 mat(i,0) = x; 00110 mat(i,1) = y; 00111 } 00112 00113 return mat; // Return to R 00114 ' 00115 00116 # Compile and Load 00117 RcppGibbs <- cxxfunction(signature(n="int", thin = "int"), 00118 gibbscode, plugin="Rcpp") 00119 00120 00121 gslgibbsincl <- ' 00122 #include <gsl/gsl_rng.h> 00123 #include <gsl/gsl_randist.h> 00124 00125 using namespace Rcpp; // just to be explicit 00126 ' 00127 00128 gslgibbscode <- ' 00129 int N = as<int>(ns); 00130 int thin = as<int>(thns); 00131 int i, j; 00132 gsl_rng *r = gsl_rng_alloc(gsl_rng_mt19937); 00133 double x=0, y=0; 00134 NumericMatrix mat(N, 2); 00135 for (i=0; i<N; i++) { 00136 for (j=0; j<thin; j++) { 00137 x = gsl_ran_gamma(r,3.0,1.0/(y*y+4)); 00138 y = 1.0/(x+1)+gsl_ran_gaussian(r,1.0/sqrt(2*x+2)); 00139 } 00140 mat(i,0) = x; 00141 mat(i,1) = y; 00142 } 00143 gsl_rng_free(r); 00144 00145 return mat; // Return to R 00146 ' 00147 00148 ## Compile and Load 00149 GSLGibbs <- cxxfunction(signature(ns="int", thns = "int"), 00150 body=gslgibbscode, includes=gslgibbsincl, 00151 plugin="RcppGSL") 00152 00153 ## without RcppGSL, using cfunction() 00154 #GSLGibbs <- cfunction(signature(ns="int", thns = "int"), 00155 # body=gslgibbscode, includes=gslgibbsincl, 00156 # Rcpp=TRUE, 00157 # cppargs="-I/usr/include", 00158 # libargs="-lgsl -lgslcblas") 00159 00160 00161 ## Now for some tests 00162 ## You can try other values if you like 00163 ## Note that the total number of interations are N*thin! 00164 Ns <- c(1000,5000,10000,20000) 00165 thins <- c(10,50,100,200) 00166 tim_R <- rep(0,4) 00167 tim_RC <- rep(0,4) 00168 tim_Rgsl <- rep(0,4) 00169 tim_Rcpp <- rep(0,4) 00170 00171 for (i in seq_along(Ns)) { 00172 tim_R[i] <- system.time(mat <- Rgibbs(Ns[i],thins[i]))[3] 00173 tim_RC[i] <- system.time(cmat <- RCgibbs(Ns[i],thins[i]))[3] 00174 tim_Rgsl[i] <- system.time(gslmat <- GSLGibbs(Ns[i],thins[i]))[3] 00175 tim_Rcpp[i] <- system.time(rcppmat <- RcppGibbs(Ns[i],thins[i]))[3] 00176 cat("Replication #", i, "complete \n") 00177 } 00178 00179 ## Comparison 00180 speedup <- round(tim_R/tim_Rcpp,2); 00181 speedup2 <- round(tim_R/tim_Rgsl,2); 00182 speedup3 <- round(tim_R/tim_RC,2); 00183 summtab <- round(rbind(tim_R,tim_RC, tim_Rcpp,tim_Rgsl,speedup3,speedup,speedup2),3) 00184 colnames(summtab) <- c("N=1000","N=5000","N=10000","N=20000") 00185 rownames(summtab) <- c("Elasped Time (R)","Elasped Time (RC)","Elapsed Time (Rcpp)", "Elapsed Time (Rgsl)", 00186 "SpeedUp Rcomp.","SpeedUp Rcpp", "SpeedUp GSL") 00187 00188 print(summtab) 00189 00190 ## Contour Plots -- based on Darren's example 00191 if (interactive() && require(KernSmooth)) { 00192 op <- par(mfrow=c(4,1),mar=c(3,3,3,1)) 00193 x <- seq(0,4,0.01) 00194 y <- seq(-2,4,0.01) 00195 z <- outer(x,y,fun) 00196 contour(x,y,z,main="Contours of actual distribution",xlim=c(0,2), ylim=c(-2,4)) 00197 fit <- bkde2D(as.matrix(mat),c(0.1,0.1)) 00198 contour(drawlabels=T, fit$x1, fit$x2, fit$fhat, xlim=c(0,2), ylim=c(-2,4), 00199 main=paste("Contours of empirical distribution:",round(tim_R[4],2)," seconds")) 00200 fitc <- bkde2D(as.matrix(rcppmat),c(0.1,0.1)) 00201 contour(fitc$x1,fitc$x2,fitc$fhat,xlim=c(0,2), ylim=c(-2,4), 00202 main=paste("Contours of Rcpp based empirical distribution:",round(tim_Rcpp[4],2)," seconds")) 00203 fitg <- bkde2D(as.matrix(gslmat),c(0.1,0.1)) 00204 contour(fitg$x1,fitg$x2,fitg$fhat,xlim=c(0,2), ylim=c(-2,4), 00205 main=paste("Contours of GSL based empirical distribution:",round(tim_Rgsl[4],2)," seconds")) 00206 par(op) 00207 } 00208 00209 00210 ## also use rbenchmark package 00211 N <- 20000 00212 thn <- 200 00213 res <- benchmark(Rgibbs(N, thn), 00214 RCgibbs(N, thn), 00215 RcppGibbs(N, thn), 00216 GSLGibbs(N, thn), 00217 columns=c("test", "replications", "elapsed", 00218 "relative", "user.self", "sys.self"), 00219 order="relative", 00220 replications=10) 00221 print(res) 00222 00223 00224 ## And we are done 00225 00226