Rcpp Version 1.0.9
sample.h
Go to the documentation of this file.
1 // -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*-
2 //
3 // sample.h: Rcpp R/C++ interface class library -- sample
4 //
5 // Copyright (C) 2016 Nathan Russell
6 //
7 // This file is part of Rcpp.
8 //
9 // Rcpp is free software: you can redistribute it and/or modify it
10 // under the terms of the GNU General Public License as published by
11 // the Free Software Foundation, either version 2 of the License, or
12 // (at your option) any later version.
13 //
14 // Rcpp is distributed in the hope that it will be useful, but
15 // WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 // GNU General Public License for more details.
18 //
19 // You should have received a copy of the GNU General Public License
20 // along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
21 
22 // The sample() in RcppArmadillo came first, but is opt-in. In case someone did
23 // in fact load it, we need to skip the declarations here to avoid a conflict
24 #ifndef RCPPARMADILLO__EXTENSIONS__SAMPLE_H
25 
26 #ifndef Rcpp__sugar__sample_h
27 #define Rcpp__sugar__sample_h
28 
29 #include <vector>
30 
31 // In order to mirror the behavior of `base::sample`
32 // as closely as possible, this file contains adaptations
33 // of several functions in R/src/main/random.c:
34 //
35 // * do_sample - general logic as well as the empirical sampling routine.
36 //
37 // * FixupProb - an auxiliary function.
38 //
39 // * walker_ProbSampleReplace, ProbSampleReplace, and ProbSampleNoReplace -
40 // algorithms for sampling according to a supplied probability vector.
41 //
42 // For each of the sampling routines, two signatures are provided:
43 //
44 // * A version that returns an integer vector, which can be used to
45 // generate 0-based indices (one_based = false) or 1-based indices
46 // (one_based = true) -- where the latter corresponds to the
47 // bahavior of `base::sample.int`.
48 //
49 // * A version which takes an input Vector<> (rather than an integer 'n'),
50 // and samples its elements -- this corresponds to `base::sample`.
51 
52 namespace Rcpp {
53 namespace sugar {
54 
55 // Adapted from `FixupProb`
56 // Normalizes a probability vector 'p' S.T. sum(p) == 1
57 inline void Normalize(Vector<REALSXP>& p, int require_k, bool replace)
58 {
59  double sum = 0.0;
60  R_xlen_t npos = 0, i = 0, n = p.size();
61 
62  for ( ; i < n; i++) {
63  if (!R_FINITE(p[i]) || (p[i] < 0)) {
64  stop("Probabilities must be finite and non-negative!");
65  }
66  npos += (p[i] > 0.0);
67  sum += p[i];
68  }
69 
70  if ((!npos) || (!replace && (require_k > npos))) {
71  stop("Too few positive probabilities!");
72  }
73 
74  for (i = 0; i < n; i++) {
75  p[i] /= sum;
76  }
77 }
78 
79 // Adapted from `ProbSampleReplace`
80 // Index version
81 inline Vector<INTSXP> SampleReplace(Vector<REALSXP>& p, int n, int k, bool one_based)
82 {
83  Vector<INTSXP> perm = no_init(n), ans = no_init(k);
84  double rU = 0.0;
85  int i = 0, j = 0, nm1 = n - 1;
86 
87  int adj = one_based ? 0 : 1;
88 
89  for ( ; i < n; i++) {
90  perm[i] = i + 1;
91  }
92 
93  Rf_revsort(p.begin(), perm.begin(), n);
94 
95  for (i = 1; i < n; i++) {
96  p[i] += p[i - 1];
97  }
98 
99  for (i = 0; i < k; i++) {
100  rU = unif_rand();
101  for (j = 0; j < nm1; j++) {
102  if (rU <= p[j]) {
103  break;
104  }
105  }
106  ans[i] = perm[j] - adj;
107  }
108 
109  return ans;
110 }
111 
112 // Element version
113 template <int RTYPE>
115 {
116  int n = ref.size();
117 
118  Vector<INTSXP> perm = no_init(n);
119  Vector<RTYPE> ans = no_init(k);
120 
121  double rU = 0.0;
122  int i = 0, j = 0, nm1 = n - 1;
123 
124  for ( ; i < n; i++) {
125  perm[i] = i + 1;
126  }
127 
128  Rf_revsort(p.begin(), perm.begin(), n);
129 
130  for (i = 1; i < n; i++) {
131  p[i] += p[i - 1];
132  }
133 
134  for (i = 0; i < k; i++) {
135  rU = unif_rand();
136  for (j = 0; j < nm1; j++) {
137  if (rU <= p[j]) {
138  break;
139  }
140  }
141  ans[i] = ref[perm[j] - 1];
142  }
143 
144  return ans;
145 }
146 
147 // Adapted from `walker_ProbSampleReplace`
148 // Index version
149 inline Vector<INTSXP> WalkerSample(const Vector<REALSXP>& p, int n, int nans, bool one_based)
150 {
151  Vector<INTSXP> a = no_init(n), ans = no_init(nans);
152  int i, j, k;
153  std::vector<double> q(n);
154  double rU;
155 
156  std::vector<int> HL(n);
157  std::vector<int>::iterator H, L;
158 
159  int adj = one_based ? 1 : 0;
160 
161  H = HL.begin() - 1; L = HL.begin() + n;
162  for (i = 0; i < n; i++) {
163  q[i] = p[i] * n;
164  if (q[i] < 1.0) {
165  *++H = i;
166  } else {
167  *--L = i;
168  }
169  }
170 
171  if (H >= HL.begin() && L < HL.begin() + n) {
172  for (k = 0; k < n - 1; k++) {
173  i = HL[k];
174  j = *L;
175  a[i] = j;
176  q[j] += q[i] - 1;
177 
178  L += (q[j] < 1.0);
179 
180  if (L >= HL.begin() + n) {
181  break;
182  }
183  }
184  }
185 
186  for (i = 0; i < n; i++) {
187  q[i] += i;
188  }
189 
190  for (i = 0; i < nans; i++) {
191  rU = unif_rand() * n;
192  k = static_cast<int>(rU);
193  ans[i] = (rU < q[k]) ? k + adj : a[k] + adj;
194  }
195 
196  return ans;
197 }
198 
199 // Element version
200 template <int RTYPE>
201 inline Vector<RTYPE> WalkerSample(const Vector<REALSXP>& p, int nans, const Vector<RTYPE>& ref)
202 {
203  int n = ref.size();
204 
205  Vector<INTSXP> a = no_init(n);
206  Vector<RTYPE> ans = no_init(nans);
207 
208  int i, j, k;
209  std::vector<double> q(n);
210  double rU;
211 
212  std::vector<int> HL(n);
213  std::vector<int>::iterator H, L;
214 
215  H = HL.begin() - 1; L = HL.begin() + n;
216  for (i = 0; i < n; i++) {
217  q[i] = p[i] * n;
218  if (q[i] < 1.0) {
219  *++H = i;
220  } else {
221  *--L = i;
222  }
223  }
224 
225  if (H >= HL.begin() && L < HL.begin() + n) {
226  for (k = 0; k < n - 1; k++) {
227  i = HL[k];
228  j = *L;
229  a[i] = j;
230  q[j] += q[i] - 1;
231 
232  L += (q[j] < 1.0);
233 
234  if (L >= HL.begin() + n) {
235  break;
236  }
237  }
238  }
239 
240  for (i = 0; i < n; i++) {
241  q[i] += i;
242  }
243 
244  for (i = 0; i < nans; i++) {
245  rU = unif_rand() * n;
246  k = static_cast<int>(rU);
247  ans[i] = (rU < q[k]) ? ref[k] : ref[a[k]];
248  }
249 
250  return ans;
251 }
252 
253 // Adapted from `ProbSampleNoReplace`
254 // Index version
255 inline Vector<INTSXP> SampleNoReplace(Vector<REALSXP>& p, int n, int nans, bool one_based)
256 {
257  Vector<INTSXP> perm = no_init(n), ans = no_init(nans);
258  double rT, mass, totalmass;
259  int i, j, k, n1;
260 
261  int adj = one_based ? 0 : 1;
262 
263  for (i = 0; i < n; i++) {
264  perm[i] = i + 1;
265  }
266 
267  Rf_revsort(p.begin(), perm.begin(), n);
268 
269  totalmass = 1.0;
270  for (i = 0, n1 = n - 1; i < nans; i++, n1--) {
271  rT = totalmass * unif_rand();
272  mass = 0.0;
273 
274  for (j = 0; j < n1; j++) {
275  mass += p[j];
276  if (rT <= mass) {
277  break;
278  }
279  }
280 
281  ans[i] = perm[j] - adj;
282  totalmass -= p[j];
283 
284  for (k = j; k < n1; k++) {
285  p[k] = p[k + 1];
286  perm[k] = perm[k + 1];
287  }
288  }
289 
290  return ans;
291 }
292 
293 // Element version
294 template <int RTYPE>
296 {
297  int n = ref.size();
298 
299  Vector<INTSXP> perm = no_init(n);
300  Vector<RTYPE> ans = no_init(nans);
301 
302  double rT, mass, totalmass;
303  int i, j, k, n1;
304 
305  for (i = 0; i < n; i++) {
306  perm[i] = i + 1;
307  }
308 
309  Rf_revsort(p.begin(), perm.begin(), n);
310 
311  totalmass = 1.0;
312  for (i = 0, n1 = n - 1; i < nans; i++, n1--) {
313  rT = totalmass * unif_rand();
314  mass = 0.0;
315 
316  for (j = 0; j < n1; j++) {
317  mass += p[j];
318  if (rT <= mass) {
319  break;
320  }
321  }
322 
323  ans[i] = ref[perm[j] - 1];
324  totalmass -= p[j];
325 
326  for (k = j; k < n1; k++) {
327  p[k] = p[k + 1];
328  perm[k] = perm[k + 1];
329  }
330  }
331 
332  return ans;
333 }
334 
335 // Adapted from segment of `do_sample`
336 // Index version
337 inline Vector<INTSXP> EmpiricalSample(int n, int size, bool replace, bool one_based)
338 {
339  Vector<INTSXP> ans = no_init(size);
340  Vector<INTSXP>::iterator ians = ans.begin(), eans = ans.end();
341 
342  int adj = one_based ? 1 : 0;
343 
344  if (replace || size < 2) {
345  for ( ; ians != eans; ++ians) {
346  *ians = static_cast<int>(n * unif_rand() + adj);
347  }
348  return ans;
349  }
350 
351  IntegerVector x = no_init(n);
352  for (int i = 0; i < n; i++) {
353  x[i] = i;
354  }
355 
356  for ( ; ians != eans; ++ians) {
357  int j = static_cast<int>(n * unif_rand());
358  *ians = x[j] + adj;
359  x[j] = x[--n];
360  }
361 
362  return ans;
363 }
364 
365 // Element version
366 template <int RTYPE>
367 inline Vector<RTYPE> EmpiricalSample(int size, bool replace, const Vector<RTYPE>& ref)
368 {
369  int n = ref.size();
370 
371  Vector<RTYPE> ans = no_init(size);
372  typename Vector<RTYPE>::iterator ians = ans.begin(), eans = ans.end();
373 
374  if (replace || size < 2) {
375  for ( ; ians != eans; ++ians) {
376  *ians = ref[static_cast<int>(n * unif_rand())];
377  }
378  return ans;
379  }
380 
381  IntegerVector x = no_init(n);
382  for (int i = 0; i < n; i++) {
383  x[i] = i;
384  }
385 
386  for ( ; ians != eans; ++ians) {
387  int j = static_cast<int>(n * unif_rand());
388  *ians = ref[x[j]];
389  x[j] = x[--n];
390  }
391 
392  return ans;
393 }
394 
396 
397 } // sugar
398 
399 // Adapted from `do_sample`
400 inline Vector<INTSXP>
401 sample(int n, int size, bool replace = false, sugar::probs_t probs = R_NilValue, bool one_based = true)
402 {
403  if (probs.isNotNull()) {
404  Vector<REALSXP> p = clone(probs.get());
405  if (static_cast<int>(p.size()) != n) {
406  stop("probs.size() != n!");
407  }
408 
409  sugar::Normalize(p, size, replace);
410 
411  if (replace) {
412  int i = 0, nc = 0;
413  for ( ; i < n; i++) {
414  nc += (n * p[i] > 0.1);
415  }
416 
417  return nc > 200 ? sugar::WalkerSample(p, n, size, one_based) :
418  sugar::SampleReplace(p, n, size, one_based);
419  }
420 
421  if (size > n) {
422  stop("Sample size must be <= n when not using replacement!");
423  }
424 
425  return sugar::SampleNoReplace(p, n, size, one_based);
426  }
427 
428  if (!replace && size > n) {
429  stop("Sample size must be <= n when not using replacement!");
430  }
431 
432  return sugar::EmpiricalSample(n, size, replace, one_based);
433 }
434 
435 template <int RTYPE>
436 inline Vector<RTYPE>
437 sample(const Vector<RTYPE>& x, int size, bool replace = false, sugar::probs_t probs = R_NilValue)
438 {
439  int n = x.size();
440 
441  if (probs.isNotNull()) {
442  Vector<REALSXP> p = clone(probs.get());
443  if (static_cast<int>(p.size()) != n) {
444  stop("probs.size() != n!");
445  }
446 
447  sugar::Normalize(p, size, replace);
448 
449  if (replace) {
450  int i = 0, nc = 0;
451  for ( ; i < n; i++) {
452  nc += (n * p[i] > 0.1);
453  }
454 
455  return nc > 200 ? sugar::WalkerSample(p, size, x) :
456  sugar::SampleReplace(p, size, x);
457  }
458 
459  if (size > n) {
460  stop("Sample size must be <= n when not using replacement!");
461  }
462 
463  return sugar::SampleNoReplace(p, size, x);
464  }
465 
466  if (!replace && size > n) {
467  stop("Sample size must be <= n when not using replacement!");
468  }
469 
470  return sugar::EmpiricalSample(size, replace, x);
471 }
472 
473 } // Rcpp
474 
475 #endif // Rcpp__sugar__sample_h
476 #endif // RCPPARMADILLO__EXTENSIONS__SAMPLE_H
R_xlen_t size() const
Definition: Vector.h:276
iterator end()
Definition: Vector.h:335
traits::r_vector_iterator< RTYPE, StoragePolicy >::type iterator
Definition: Vector.h:46
iterator begin()
Definition: Vector.h:334
double unif_rand(void)
Definition: Rmath.h:31
void Normalize(Vector< REALSXP > &p, int require_k, bool replace)
Definition: sample.h:57
Vector< INTSXP > SampleReplace(Vector< REALSXP > &p, int n, int k, bool one_based)
Definition: sample.h:81
Nullable< Vector< REALSXP > > probs_t
Definition: sample.h:395
Vector< INTSXP > SampleNoReplace(Vector< REALSXP > &p, int n, int nans, bool one_based)
Definition: sample.h:255
Vector< INTSXP > EmpiricalSample(int n, int size, bool replace, bool one_based)
Definition: sample.h:337
Vector< INTSXP > WalkerSample(const Vector< REALSXP > &p, int n, int nans, bool one_based)
Definition: sample.h:149
Rcpp API.
Definition: algo.h:28
sugar::Sum< INTSXP, NA, T > sum(const VectorBase< INTSXP, NA, T > &t)
Definition: sum.h:98
no_init_vector no_init(R_xlen_t size)
Definition: no_init.h:77
void NORET stop(const char *fmt, Args &&... args)
Definition: exceptions.h:51
Vector< INTSXP > sample(int n, int size, bool replace=false, sugar::probs_t probs=R_NilValue, bool one_based=true)
Definition: sample.h:401
T clone(const T &object)
Definition: clone.h:33