Rcpp Version 1.0.9
IndexHash.h
Go to the documentation of this file.
1 
2 // IndexHash.h: Rcpp R/C++ interface class library -- hashing utility, inspired
3 // from Simon's fastmatch package
4 //
5 // Copyright (C) 2010, 2011 Simon Urbanek
6 // Copyright (C) 2012 - 2013 Dirk Eddelbuettel and Romain Francois
7 // Copyright (C) 2014 - 2021 Dirk Eddelbuettel, Romain Francois and Kevin Ushey
8 //
9 // This file is part of Rcpp.
10 //
11 // Rcpp is free software: you can redistribute it and/or modify it
12 // under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 2 of the License, or
14 // (at your option) any later version.
15 //
16 // Rcpp is distributed in the hope that it will be useful, but
17 // WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
23 
24 #ifndef RCPP__HASH__INDEX_HASH_H
25 #define RCPP__HASH__INDEX_HASH_H
26 
27 #if ( defined(HASH_PROFILE) && defined(__APPLE__) )
28  // only mac version for now
29  #include <mach/mach_time.h>
30  #define ABSOLUTE_TIME mach_absolute_time
31  #define RCPP_PROFILE_TIC start = ABSOLUTE_TIME() ;
32  #define RCPP_PROFILE_TOC end = ABSOLUTE_TIME() ;
33  #define RCPP_PROFILE_RECORD(name) profile_data[#name] = end - start ;
34 #else
35  #define RCPP_PROFILE_TIC
36  #define RCPP_PROFILE_TOC
37  #define RCPP_PROFILE_RECORD(name)
38 #endif
39 #define RCPP_USE_CACHE_HASH
40 
41 namespace Rcpp{
42  namespace sugar{
43 
44  #ifndef RCPP_HASH
45  #define RCPP_HASH(X) (3141592653U * ((uint32_t)(X)) >> (32 - k))
46  #endif
47 
48  template <int RTYPE>
49  class IndexHash {
50  public:
53 
54  IndexHash( SEXP table ) : n(Rf_length(table)), m(2), k(1), src( (STORAGE*)dataptr(table) ), size_(0)
55  , data()
56  #ifdef HASH_PROFILE
57  , profile_data()
58  #endif
59  {
61  int desired = n*2 ;
62  while( m < desired ){ m *= 2 ; k++ ; }
63  #ifdef RCPP_USE_CACHE_HASH
64  data = get_cache(m) ;
65  #else
66  data.resize( m ) ;
67  #endif
69  RCPP_PROFILE_RECORD(ctor_body)
70 
71  }
72 
73  inline IndexHash& fill(){
75 
76  for( int i=0; i<n; i++) add_value(i) ;
77 
80 
81  return *this ;
82  }
83 
86  int* res = LOGICAL(result) ;
87  for( int i=0; i<n; i++) res[i] = ! add_value(i) ;
88  return result ;
89  }
90 
91  template <typename T>
92  inline SEXP lookup(const T& vec) const {
93  return lookup__impl(vec, vec.size() ) ;
94  }
95 
96  // use the pointers for actual (non sugar expression vectors)
97  inline SEXP lookup(const VECTOR& vec) const {
98  return lookup__impl(vec.begin(), vec.size() ) ;
99  }
100 
101  inline bool contains(STORAGE val) const {
102  return get_index(val) != static_cast<uint32_t>(NA_INTEGER);
103  }
104 
105  inline int size() const {
106  return size_ ;
107  }
108 
109  // keys, in the order they appear in the data
110  inline Vector<RTYPE> keys() const{
111  Vector<RTYPE> res = no_init(size_) ;
112  for( int i=0, j=0; j<size_; i++){
113  if( data[i] ) res[j++] = src[data[i]-1] ;
114  }
115  return res ;
116  }
117 
118  int n, m, k ;
120  int size_ ;
121  #ifdef RCPP_USE_CACHE_HASH
122  int* data ;
123  #else
124  std::vector<int> data ;
125  #endif
126 
127  #ifdef HASH_PROFILE
128  mutable std::map<std::string,int> profile_data ;
129  mutable uint64_t start ;
130  mutable uint64_t end ;
131  #endif
132 
133  template <typename T>
134  SEXP lookup__impl(const T& vec, int n_) const {
136 
137  SEXP res = Rf_allocVector(INTSXP, n_) ;
138 
140  RCPP_PROFILE_RECORD(allocVector)
141 
142  int *v = INTEGER(res) ;
143 
145 
146  for( int i=0; i<n_; i++) v[i] = get_index( vec[i] ) ;
147 
150 
151  return res ;
152  }
153 
155  #ifdef HASH_PROFILE
156  return wrap( profile_data ) ;
157  #else
158  return R_NilValue ;
159  #endif
160  }
161 
162  inline bool not_equal(const STORAGE& lhs, const STORAGE& rhs) {
163  return ! internal::NAEquals<STORAGE>()(lhs, rhs);
164  }
165 
166  bool add_value(int i){
167  RCPP_DEBUG_2( "%s::add_value(%d)", DEMANGLE(IndexHash), i )
168  STORAGE val = src[i++] ;
169  uint32_t addr = get_addr(val) ;
170  while (data[addr] && not_equal( src[data[addr] - 1], val)) {
171  addr++;
172  if (addr == static_cast<uint32_t>(m)) {
173  addr = 0;
174  }
175  }
176 
177  if (!data[addr]){
178  data[addr] = i ;
179  size_++ ;
180 
181  return true ;
182  }
183  return false;
184  }
185 
186  /* NOTE: we are returning a 1-based index ! */
187  inline uint32_t get_index(STORAGE value) const {
188  uint32_t addr = get_addr(value) ;
189  while (data[addr]) {
190  if (src[data[addr] - 1] == value)
191  return data[addr];
192  addr++;
193  if (addr == static_cast<uint32_t>(m)) addr = 0;
194  }
195  return NA_INTEGER;
196  }
197 
198  // defined below
199  uint32_t get_addr(STORAGE value) const ;
200  } ;
201 
202  template <>
203  inline uint32_t IndexHash<INTSXP>::get_addr(int value) const {
204  return RCPP_HASH(value) ;
205  }
206  template <>
207  inline uint32_t IndexHash<REALSXP>::get_addr(double val) const {
208  uint32_t addr;
209  union dint_u {
210  double d;
211  uint32_t u[2];
212  };
213  union dint_u val_u;
214  /* double is a bit tricky - we nave to normalize 0.0, NA and NaN */
215  if (val == 0.0) val = 0.0;
216  if (internal::Rcpp_IsNA(val)) val = NA_REAL;
217  else if (internal::Rcpp_IsNaN(val)) val = R_NaN;
218  val_u.d = val;
219  addr = RCPP_HASH(val_u.u[0] + val_u.u[1]);
220  return addr ;
221  }
222 
223  template <>
224  inline uint32_t IndexHash<STRSXP>::get_addr(SEXP value) const {
225  intptr_t val = (intptr_t) value;
226  uint32_t addr;
227  #if (defined _LP64) || (defined __LP64__) || (defined WIN64)
228  addr = RCPP_HASH((val & 0xffffffff) ^ (val >> 32));
229  #else
230  addr = RCPP_HASH(val);
231  #endif
232  return addr ;
233  }
234 
235 
236 } // sugar
237 } // Rcpp
238 
239 #endif
#define RCPP_PROFILE_TIC
Definition: IndexHash.h:35
#define RCPP_PROFILE_TOC
Definition: IndexHash.h:36
#define RCPP_PROFILE_RECORD(name)
Definition: IndexHash.h:37
#define RCPP_HASH(X)
Definition: IndexHash.h:45
void * dataptr(SEXP)
Definition: routines.h:264
R_xlen_t size() const
Definition: Vector.h:276
iterator begin()
Definition: Vector.h:334
SEXP lookup(const VECTOR &vec) const
Definition: IndexHash.h:97
bool contains(STORAGE val) const
Definition: IndexHash.h:101
bool not_equal(const STORAGE &lhs, const STORAGE &rhs)
Definition: IndexHash.h:162
bool add_value(int i)
Definition: IndexHash.h:166
Vector< RTYPE > keys() const
Definition: IndexHash.h:110
uint32_t get_addr(STORAGE value) const
SEXP lookup__impl(const T &vec, int n_) const
Definition: IndexHash.h:134
Vector< RTYPE > VECTOR
Definition: IndexHash.h:52
traits::storage_type< RTYPE >::type STORAGE
Definition: IndexHash.h:51
SEXP lookup(const T &vec) const
Definition: IndexHash.h:92
IndexHash(SEXP table)
Definition: IndexHash.h:54
LogicalVector fill_and_get_duplicated()
Definition: IndexHash.h:84
uint32_t get_index(STORAGE value) const
Definition: IndexHash.h:187
IndexHash & fill()
Definition: IndexHash.h:73
#define RCPP_DEBUG_2(fmt, M1, M2)
Definition: debug.h:45
#define DEMANGLE(__TYPE__)
Definition: exceptions.h:382
Rcpp API.
Definition: algo.h:28
no_init_vector no_init(R_xlen_t size)
Definition: no_init.h:77
IntegerVector table(const VectorBase< RTYPE, NA, T > &x)
Definition: table.h:126
SEXP wrap(const Date &date)
Definition: Date.h:38
attribute_hidden int * get_cache(int n)
Definition: routines.h:282