Rcpp Version 1.0.14
Loading...
Searching...
No Matches
sample.h
Go to the documentation of this file.
1// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*-
2//
3// sample.h: Rcpp R/C++ interface class library -- sample
4//
5// Copyright (C) 2016 Nathan Russell
6//
7// This file is part of Rcpp.
8//
9// Rcpp is free software: you can redistribute it and/or modify it
10// under the terms of the GNU General Public License as published by
11// the Free Software Foundation, either version 2 of the License, or
12// (at your option) any later version.
13//
14// Rcpp is distributed in the hope that it will be useful, but
15// WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17// GNU General Public License for more details.
18//
19// You should have received a copy of the GNU General Public License
20// along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
21
22// The sample() in RcppArmadillo came first, but is opt-in. In case someone did
23// in fact load it, we need to skip the declarations here to avoid a conflict
24#ifndef RCPPARMADILLO__EXTENSIONS__SAMPLE_H
25
26#ifndef Rcpp__sugar__sample_h
27#define Rcpp__sugar__sample_h
28
29#include <vector>
30
31// In order to mirror the behavior of `base::sample`
32// as closely as possible, this file contains adaptations
33// of several functions in R/src/main/random.c:
34//
35// * do_sample - general logic as well as the empirical sampling routine.
36//
37// * FixupProb - an auxiliary function.
38//
39// * walker_ProbSampleReplace, ProbSampleReplace, and ProbSampleNoReplace -
40// algorithms for sampling according to a supplied probability vector.
41//
42// For each of the sampling routines, two signatures are provided:
43//
44// * A version that returns an integer vector, which can be used to
45// generate 0-based indices (one_based = false) or 1-based indices
46// (one_based = true) -- where the latter corresponds to the
47// bahavior of `base::sample.int`.
48//
49// * A version which takes an input Vector<> (rather than an integer 'n'),
50// and samples its elements -- this corresponds to `base::sample`.
51
52namespace Rcpp {
53namespace sugar {
54
55// Adapted from `FixupProb`
56// Normalizes a probability vector 'p' S.T. sum(p) == 1
57inline void Normalize(Vector<REALSXP>& p, int require_k, bool replace)
58{
59 double sum = 0.0;
60 R_xlen_t npos = 0, i = 0, n = p.size();
61
62 for ( ; i < n; i++) {
63 if (!R_FINITE(p[i]) || (p[i] < 0)) {
64 stop("Probabilities must be finite and non-negative!");
65 }
66 npos += (p[i] > 0.0);
67 sum += p[i];
68 }
69
70 if ((!npos) || (!replace && (require_k > npos))) {
71 stop("Too few positive probabilities!");
72 }
73
74 for (i = 0; i < n; i++) {
75 p[i] /= sum;
76 }
77}
78
79// Adapted from `ProbSampleReplace`
80// Index version
82{
84 double rU = 0.0;
85 int i = 0, j = 0, nm1 = n - 1;
86
87 int adj = one_based ? 0 : 1;
88
89 for ( ; i < n; i++) {
90 perm[i] = i + 1;
91 }
92
93 Rf_revsort(p.begin(), perm.begin(), n);
94
95 for (i = 1; i < n; i++) {
96 p[i] += p[i - 1];
97 }
98
99 for (i = 0; i < k; i++) {
100 rU = unif_rand();
101 for (j = 0; j < nm1; j++) {
102 if (rU <= p[j]) {
103 break;
104 }
105 }
106 ans[i] = perm[j] - adj;
107 }
108
109 return ans;
110}
111
112// Element version
113template <int RTYPE>
115{
116 int n = ref.size();
117
120
121 double rU = 0.0;
122 int i = 0, j = 0, nm1 = n - 1;
123
124 for ( ; i < n; i++) {
125 perm[i] = i + 1;
126 }
127
128 Rf_revsort(p.begin(), perm.begin(), n);
129
130 for (i = 1; i < n; i++) {
131 p[i] += p[i - 1];
132 }
133
134 for (i = 0; i < k; i++) {
135 rU = unif_rand();
136 for (j = 0; j < nm1; j++) {
137 if (rU <= p[j]) {
138 break;
139 }
140 }
141 ans[i] = ref[perm[j] - 1];
142 }
143
144 return ans;
145}
146
147// Adapted from `walker_ProbSampleReplace`
148// Index version
149inline Vector<INTSXP> WalkerSample(const Vector<REALSXP>& p, int n, int nans, bool one_based)
150{
152 int i, j, k;
153 std::vector<double> q(n);
154 double rU;
155
156 std::vector<int> HL(n);
157 std::vector<int>::iterator H, L;
158
159 int adj = one_based ? 1 : 0;
160
161 H = HL.begin() - 1; L = HL.begin() + n;
162 for (i = 0; i < n; i++) {
163 q[i] = p[i] * n;
164 if (q[i] < 1.0) {
165 *++H = i;
166 } else {
167 *--L = i;
168 }
169 }
170
171 if (H >= HL.begin() && L < HL.begin() + n) {
172 for (k = 0; k < n - 1; k++) {
173 i = HL[k];
174 j = *L;
175 a[i] = j;
176 q[j] += q[i] - 1;
177
178 L += (q[j] < 1.0);
179
180 if (L >= HL.begin() + n) {
181 break;
182 }
183 }
184 }
185
186 for (i = 0; i < n; i++) {
187 q[i] += i;
188 }
189
190 for (i = 0; i < nans; i++) {
191 rU = unif_rand() * n;
192 k = static_cast<int>(rU);
193 ans[i] = (rU < q[k]) ? k + adj : a[k] + adj;
194 }
195
196 return ans;
197}
198
199// Element version
200template <int RTYPE>
202{
203 int n = ref.size();
204
205 Vector<INTSXP> a = no_init(n);
207
208 int i, j, k;
209 std::vector<double> q(n);
210 double rU;
211
212 std::vector<int> HL(n);
213 std::vector<int>::iterator H, L;
214
215 H = HL.begin() - 1; L = HL.begin() + n;
216 for (i = 0; i < n; i++) {
217 q[i] = p[i] * n;
218 if (q[i] < 1.0) {
219 *++H = i;
220 } else {
221 *--L = i;
222 }
223 }
224
225 if (H >= HL.begin() && L < HL.begin() + n) {
226 for (k = 0; k < n - 1; k++) {
227 i = HL[k];
228 j = *L;
229 a[i] = j;
230 q[j] += q[i] - 1;
231
232 L += (q[j] < 1.0);
233
234 if (L >= HL.begin() + n) {
235 break;
236 }
237 }
238 }
239
240 for (i = 0; i < n; i++) {
241 q[i] += i;
242 }
243
244 for (i = 0; i < nans; i++) {
245 rU = unif_rand() * n;
246 k = static_cast<int>(rU);
247 ans[i] = (rU < q[k]) ? ref[k] : ref[a[k]];
248 }
249
250 return ans;
251}
252
253// Adapted from `ProbSampleNoReplace`
254// Index version
256{
258 double rT, mass, totalmass;
259 int i, j, k, n1;
260
261 int adj = one_based ? 0 : 1;
262
263 for (i = 0; i < n; i++) {
264 perm[i] = i + 1;
265 }
266
267 Rf_revsort(p.begin(), perm.begin(), n);
268
269 totalmass = 1.0;
270 for (i = 0, n1 = n - 1; i < nans; i++, n1--) {
271 rT = totalmass * unif_rand();
272 mass = 0.0;
273
274 for (j = 0; j < n1; j++) {
275 mass += p[j];
276 if (rT <= mass) {
277 break;
278 }
279 }
280
281 ans[i] = perm[j] - adj;
282 totalmass -= p[j];
283
284 for (k = j; k < n1; k++) {
285 p[k] = p[k + 1];
286 perm[k] = perm[k + 1];
287 }
288 }
289
290 return ans;
291}
292
293// Element version
294template <int RTYPE>
296{
297 int n = ref.size();
298
301
302 double rT, mass, totalmass;
303 int i, j, k, n1;
304
305 for (i = 0; i < n; i++) {
306 perm[i] = i + 1;
307 }
308
309 Rf_revsort(p.begin(), perm.begin(), n);
310
311 totalmass = 1.0;
312 for (i = 0, n1 = n - 1; i < nans; i++, n1--) {
313 rT = totalmass * unif_rand();
314 mass = 0.0;
315
316 for (j = 0; j < n1; j++) {
317 mass += p[j];
318 if (rT <= mass) {
319 break;
320 }
321 }
322
323 ans[i] = ref[perm[j] - 1];
324 totalmass -= p[j];
325
326 for (k = j; k < n1; k++) {
327 p[k] = p[k + 1];
328 perm[k] = perm[k + 1];
329 }
330 }
331
332 return ans;
333}
334
335// Adapted from segment of `do_sample`
336// Index version
337inline Vector<INTSXP> EmpiricalSample(int n, int size, bool replace, bool one_based)
338{
339 Vector<INTSXP> ans = no_init(size);
340 Vector<INTSXP>::iterator ians = ans.begin(), eans = ans.end();
341
342 int adj = one_based ? 1 : 0;
343
344 if (replace || size < 2) {
345 for ( ; ians != eans; ++ians) {
346 *ians = static_cast<int>(n * unif_rand() + adj);
347 }
348 return ans;
349 }
350
351 IntegerVector x = no_init(n);
352 for (int i = 0; i < n; i++) {
353 x[i] = i;
354 }
355
356 for ( ; ians != eans; ++ians) {
357 int j = static_cast<int>(n * unif_rand());
358 *ians = x[j] + adj;
359 x[j] = x[--n];
360 }
361
362 return ans;
363}
364
365// Element version
366template <int RTYPE>
367inline Vector<RTYPE> EmpiricalSample(int size, bool replace, const Vector<RTYPE>& ref)
368{
369 int n = ref.size();
370
371 Vector<RTYPE> ans = no_init(size);
372 typename Vector<RTYPE>::iterator ians = ans.begin(), eans = ans.end();
373
374 if (replace || size < 2) {
375 for ( ; ians != eans; ++ians) {
376 *ians = ref[static_cast<int>(n * unif_rand())];
377 }
378 return ans;
379 }
380
381 IntegerVector x = no_init(n);
382 for (int i = 0; i < n; i++) {
383 x[i] = i;
384 }
385
386 for ( ; ians != eans; ++ians) {
387 int j = static_cast<int>(n * unif_rand());
388 *ians = ref[x[j]];
389 x[j] = x[--n];
390 }
391
392 return ans;
393}
394
396
397} // sugar
398
399// Adapted from `do_sample`
400inline Vector<INTSXP>
401sample(int n, int size, bool replace = false, sugar::probs_t probs = R_NilValue, bool one_based = true)
402{
403 if (probs.isNotNull()) {
404 Vector<REALSXP> p = clone(probs.get());
405 if (static_cast<int>(p.size()) != n) {
406 stop("probs.size() != n!");
407 }
408
409 sugar::Normalize(p, size, replace);
410
411 if (replace) {
412 int i = 0, nc = 0;
413 for ( ; i < n; i++) {
414 nc += (n * p[i] > 0.1);
415 }
416
417 return nc > 200 ? sugar::WalkerSample(p, n, size, one_based) :
418 sugar::SampleReplace(p, n, size, one_based);
419 }
420
421 if (size > n) {
422 stop("Sample size must be <= n when not using replacement!");
423 }
424
425 return sugar::SampleNoReplace(p, n, size, one_based);
426 }
427
428 if (!replace && size > n) {
429 stop("Sample size must be <= n when not using replacement!");
430 }
431
432 return sugar::EmpiricalSample(n, size, replace, one_based);
433}
434
435template <int RTYPE>
436inline Vector<RTYPE>
437sample(const Vector<RTYPE>& x, int size, bool replace = false, sugar::probs_t probs = R_NilValue)
438{
439 int n = x.size();
440
441 if (probs.isNotNull()) {
442 Vector<REALSXP> p = clone(probs.get());
443 if (static_cast<int>(p.size()) != n) {
444 stop("probs.size() != n!");
445 }
446
447 sugar::Normalize(p, size, replace);
448
449 if (replace) {
450 int i = 0, nc = 0;
451 for ( ; i < n; i++) {
452 nc += (n * p[i] > 0.1);
453 }
454
455 return nc > 200 ? sugar::WalkerSample(p, size, x) :
456 sugar::SampleReplace(p, size, x);
457 }
458
459 if (size > n) {
460 stop("Sample size must be <= n when not using replacement!");
461 }
462
463 return sugar::SampleNoReplace(p, size, x);
464 }
465
466 if (!replace && size > n) {
467 stop("Sample size must be <= n when not using replacement!");
468 }
469
470 return sugar::EmpiricalSample(size, replace, x);
471}
472
473} // Rcpp
474
475#endif // Rcpp__sugar__sample_h
476#endif // RCPPARMADILLO__EXTENSIONS__SAMPLE_H
R_xlen_t size() const
Definition Vector.h:275
iterator begin()
Definition Vector.h:333
traits::r_vector_iterator< RTYPE, StoragePolicy >::type iterator
Definition Vector.h:45
void Normalize(Vector< REALSXP > &p, int require_k, bool replace)
Definition sample.h:57
Vector< INTSXP > SampleNoReplace(Vector< REALSXP > &p, int n, int nans, bool one_based)
Definition sample.h:255
Nullable< Vector< REALSXP > > probs_t
Definition sample.h:395
Vector< INTSXP > SampleReplace(Vector< REALSXP > &p, int n, int k, bool one_based)
Definition sample.h:81
Vector< INTSXP > EmpiricalSample(int n, int size, bool replace, bool one_based)
Definition sample.h:337
Vector< INTSXP > WalkerSample(const Vector< REALSXP > &p, int n, int nans, bool one_based)
Definition sample.h:149
Rcpp API.
Definition algo.h:28
void NORET stop(const std::string &message)
Definition exceptions.h:117
no_init_vector no_init(R_xlen_t size)
Definition no_init.h:77
Vector< INTSXP > sample(int n, int size, bool replace=false, sugar::probs_t probs=R_NilValue, bool one_based=true)
Definition sample.h:401
T as(SEXP x)
Definition as.h:151
T clone(const T &object)
Definition clone.h:33
sugar::Sum< INTSXP, NA, T > sum(const VectorBase< INTSXP, NA, T > &t)
Definition sum.h:98