Rcpp Version 1.0.0
RcppGibbs.R
Go to the documentation of this file.
1 ## Simple Gibbs Sampler Example
2 ## Adapted from Darren Wilkinson's post at
3 ## http://darrenjw.wordpress.com/2010/04/28/mcmc-programming-in-r-python-java-and-c/
4 ##
5 ## Sanjog Misra and Dirk Eddelbuettel, June-July 2011
6
7 suppressMessages(library(Rcpp))
8 suppressMessages(library(inline))
9 suppressMessages(library(compiler))
10 suppressMessages(library(rbenchmark))
11
12
13 ## Actual joint density -- the code which follow implements
14 ## a Gibbs sampler to draw from the following joint density f(x,y)
15 fun <- function(x,y) {
16  x*x * exp(-x*y*y - y*y + 2*y - 4*x)
17 }
18
19 ## Note that the full conditionals are propotional to
20 ## f(x|y) = (x^2)*exp(-x*(4+y*y)) : a Gamma density kernel
21 ## f(y|x) = exp(-0.5*2*(x+1)*(y^2 - 2*y/(x+1)) : Normal Kernel
22
23 ## There is a small typo in Darrens code.
24 ## The full conditional for the normal has the wrong variance
25 ## It should be 1/sqrt(2*(x+1)) not 1/sqrt(1+x)
26 ## This we can verify ...
27 ## The actual conditional (say for x=3) can be computed as follows
28 ## First - Construct the Unnormalized Conditional
29 fy.unnorm <- function(y) fun(3,y)
30
31 ## Then - Find the appropriate Normalizing Constant
32 K <- integrate(fy.unnorm,-Inf,Inf)
33
34 ## Finally - Construct Actual Conditional
35 fy <- function(y) fy.unnorm(y)/K\$val
36
37 ## Now - The corresponding Normal should be
38 fy.dnorm <- function(y) {
39  x <- 3
40  dnorm(y,1/(1+x),sqrt(1/(2*(1+x))))
41 }
42
43 ## and not ...
44 fy.dnorm.wrong <- function(y) {
45  x <- 3
46  dnorm(y,1/(1+x),sqrt(1/((1+x))))
47 }
48
49 if (interactive()) {
50  ## Graphical check
51  ## Actual (gray thick line)
52  curve(fy,-2,2,col='grey',lwd=5)
53
54  ## Correct Normal conditional (blue dotted line)
56
57  ## Wrong Normal (Red line)
59 }
60
61 ## Here is the actual Gibbs Sampler
62 ## This is Darren Wilkinsons R code (with the corrected variance)
63 ## But we are returning only his columns 2 and 3 as the 1:N sequence
64 ## is never used below
65 Rgibbs <- function(N,thin) {
66  mat <- matrix(0,ncol=2,nrow=N)
67  x <- 0
68  y <- 0
69  for (i in 1:N) {
70  for (j in 1:thin) {
71  x <- rgamma(1,3,y*y+4)
72  y <- rnorm(1,1/(x+1),1/sqrt(2*(x+1)))
73  }
74  mat[i,] <- c(x,y)
75  }
76  mat
77 }
78
79 ## We can also try the R compiler on this R function
80 RCgibbs <- cmpfun(Rgibbs)
81
82 ## For example
83 ## mat <- Rgibbs(10000,10); dim(mat)
84 ## would give: [1] 10000 2
85
86 ## Now for the Rcpp version -- Notice how easy it is to code up!
87
88 ## NOTE: This is the old way to compile Rcpp code inline.
89 ## The code here has left as a historical artifact and tribute to the old way.
90 ## Please use the code under the "new" inline compilation section.
91
92 gibbscode <- '
93
94  using namespace Rcpp; // inline does that for us already
95
96  // n and thin are SEXPs which the Rcpp::as function maps to C++ vars
97  int N = as<int>(n);
98  int thn = as<int>(thin);
99
100  int i,j;
101  NumericMatrix mat(N, 2);
102
103  RNGScope scope; // Initialize Random number generator
104
105  // The rest of the code follows the R version
106  double x=0, y=0;
107
108  for (i=0; i<N; i++) {
109  for (j=0; j<thn; j++) {
110  x = ::Rf_rgamma(3.0,1.0/(y*y+4));
111  y = ::Rf_rnorm(1.0/(x+1),1.0/sqrt(2*x+2));
112  }
113  mat(i,0) = x;
114  mat(i,1) = y;
115  }
116
118 '
119
121 RcppGibbs_old <- cxxfunction(signature(n="int", thin = "int"),
122  gibbscode, plugin="Rcpp")
123
124
125 gslgibbsincl <- '
126  #include <gsl/gsl_rng.h>
127  #include <gsl/gsl_randist.h>
128
129  using namespace Rcpp; // just to be explicit
130 '
131
132 gslgibbscode <- '
133  int N = as<int>(ns);
134  int thin = as<int>(thns);
135  int i, j;
136  gsl_rng *r = gsl_rng_alloc(gsl_rng_mt19937);
137  double x=0, y=0;
138  NumericMatrix mat(N, 2);
139  for (i=0; i<N; i++) {
140  for (j=0; j<thin; j++) {
141  x = gsl_ran_gamma(r,3.0,1.0/(y*y+4));
142  y = 1.0/(x+1)+gsl_ran_gaussian(r,1.0/sqrt(2*x+2));
143  }
144  mat(i,0) = x;
145  mat(i,1) = y;
146  }
147  gsl_rng_free(r);
148
150 '
151
153 GSLGibbs_old <- cxxfunction(signature(ns="int", thns = "int"),
154  body=gslgibbscode, includes=gslgibbsincl,
155  plugin="RcppGSL")
156
157 ## without RcppGSL, using cfunction()
158 #GSLGibbs <- cfunction(signature(ns="int", thns = "int"),
159 # body=gslgibbscode, includes=gslgibbsincl,
160 # Rcpp=TRUE,
161 # cppargs="-I/usr/include",
162 # libargs="-lgsl -lgslcblas")
163
164
165 ## NOTE: Within this section, the new way to compile Rcpp code inline has been
166 ## written. Please use the code next as a template for your own project.
167
168 ## Use of the cppFunction() gives the ability to immediately compile embed C++
169 ## without having to worry about header specification or Rcpp attributes.
170
171 cppFunction('
172 NumericMatrix RcppGibbs(int N, int thn){
173  // Note: n and thin are SEXPs which the Rcpp automatically converts to ints
174
175  // Setup storage matrix
176  NumericMatrix mat(N, 2);
177
178  // The rest of the code follows the R version
179  double x = 0, y = 0;
180
181  for (int i = 0; i < N; i++) {
182  for (int j = 0; j < thn; j++) {
183  x = R::rgamma(3.0,1.0/(y*y+4));
184  y = R::rnorm(1.0/(x+1),1.0/sqrt(2*x+2));
185  }
186  mat(i,0) = x;
187  mat(i,1) = y;
188  }
189
191 }')
192
193
194 ## Use of the sourceCpp() is preferred for users who wish to source external
195 ## files or specify their headers and Rcpp attributes within their code.
196 ## Code here is able to easily be extracted and placed into its own C++ file.
197
199 sourceCpp(code="
200 #include <RcppGSL.h>
201 #include <gsl/gsl_rng.h>
202 #include <gsl/gsl_randist.h>
203
204 using namespace Rcpp; // just to be explicit
205
206 // [[Rcpp::depends(RcppGSL)]]
207
208 // [[Rcpp::export]]
209 NumericMatrix GSLGibbs(int N, int thin){
210  gsl_rng *r = gsl_rng_alloc(gsl_rng_mt19937);
211  double x = 0, y = 0;
212  NumericMatrix mat(N, 2);
213  for (int i = 0; i < N; i++) {
214  for (int j = 0; j < thin; j++) {
215  x = gsl_ran_gamma(r,3.0,1.0/(y*y+4));
216  y = 1.0/(x+1)+gsl_ran_gaussian(r,1.0/sqrt(2*x+2));
217  }
218  mat(i,0) = x;
219  mat(i,1) = y;
220  }
221  gsl_rng_free(r);
222
224 }")
225
226
227
228 ## Now for some tests
229 ## You can try other values if you like
230 ## Note that the total number of interations are N*thin!
231 Ns <- c(1000,5000,10000,20000)
232 thins <- c(10,50,100,200)
233 tim_R <- rep(0,4)
234 tim_RC <- rep(0,4)
235 tim_Rgsl <- rep(0,4)
236 tim_Rcpp <- rep(0,4)
237
238 for (i in seq_along(Ns)) {
239  tim_R[i] <- system.time(mat <- Rgibbs(Ns[i],thins[i]))[3]
240  tim_RC[i] <- system.time(cmat <- RCgibbs(Ns[i],thins[i]))[3]
241  tim_Rgsl[i] <- system.time(gslmat <- GSLGibbs(Ns[i],thins[i]))[3]
242  tim_Rcpp[i] <- system.time(rcppmat <- RcppGibbs(Ns[i],thins[i]))[3]
243  cat("Replication #", i, "complete \n")
244 }
245
246 ## Comparison
247 speedup <- round(tim_R/tim_Rcpp,2);
248 speedup2 <- round(tim_R/tim_Rgsl,2);
249 speedup3 <- round(tim_R/tim_RC,2);
250 summtab <- round(rbind(tim_R,tim_RC, tim_Rcpp,tim_Rgsl,speedup3,speedup,speedup2),3)
251 colnames(summtab) <- c("N=1000","N=5000","N=10000","N=20000")
252 rownames(summtab) <- c("Elasped Time (R)","Elasped Time (RC)","Elapsed Time (Rcpp)", "Elapsed Time (Rgsl)",
253  "SpeedUp Rcomp.","SpeedUp Rcpp", "SpeedUp GSL")
254
255 print(summtab)
256
257 ## Contour Plots -- based on Darren's example
258 if (interactive() && require(KernSmooth)) {
259  op <- par(mfrow=c(4,1),mar=c(3,3,3,1))
260  x <- seq(0,4,0.01)
261  y <- seq(-2,4,0.01)
262  z <- outer(x,y,fun)
263  contour(x,y,z,main="Contours of actual distribution",xlim=c(0,2), ylim=c(-2,4))
264  fit <- bkde2D(as.matrix(mat),c(0.1,0.1))
265  contour(drawlabels=T, fit\$x1, fit\$x2, fit\$fhat, xlim=c(0,2), ylim=c(-2,4),
266  main=paste("Contours of empirical distribution:",round(tim_R[4],2)," seconds"))
267  fitc <- bkde2D(as.matrix(rcppmat),c(0.1,0.1))
268  contour(fitc\$x1,fitc\$x2,fitc\$fhat,xlim=c(0,2), ylim=c(-2,4),
269  main=paste("Contours of Rcpp based empirical distribution:",round(tim_Rcpp[4],2)," seconds"))
270  fitg <- bkde2D(as.matrix(gslmat),c(0.1,0.1))
271  contour(fitg\$x1,fitg\$x2,fitg\$fhat,xlim=c(0,2), ylim=c(-2,4),
272  main=paste("Contours of GSL based empirical distribution:",round(tim_Rgsl[4],2)," seconds"))
273  par(op)
274 }
275
276
277 ## also use rbenchmark package
278 N <- 20000
279 thn <- 200
280 res <- benchmark(Rgibbs(N, thn),
281  RCgibbs(N, thn),
282  RcppGibbs(N, thn),
283  GSLGibbs(N, thn),
284  columns=c("test", "replications", "elapsed",
285  "relative", "user.self", "sys.self"),
286  order="relative",
287  replications=10)
288 print(res)
289
290
291 ## And we are done
292
293