Rcpp Version 0.12.14
IndexHash.h
Go to the documentation of this file.
1 // -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; tab-width: 4 -*-
2 //
3 // IndexHash.h: Rcpp R/C++ interface class library -- hashing utility, inspired
4 // from Simon's fastmatch package
5 //
6 // Copyright (C) 2010, 2011 Simon Urbanek
7 // Copyright (C) 2012 Dirk Eddelbuettel and Romain Francois
8 // Copyright (C) 2014 Dirk Eddelbuettel, Romain Francois and Kevin Ushey
9 //
10 // This file is part of Rcpp.
11 //
12 // Rcpp is free software: you can redistribute it and/or modify it
13 // under the terms of the GNU General Public License as published by
14 // the Free Software Foundation, either version 2 of the License, or
15 // (at your option) any later version.
16 //
17 // Rcpp is distributed in the hope that it will be useful, but
18 // WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 // GNU General Public License for more details.
21 //
22 // You should have received a copy of the GNU General Public License
23 // along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
24 
25 #ifndef RCPP__HASH__INDEX_HASH_H
26 #define RCPP__HASH__INDEX_HASH_H
27 
28 #if ( defined(HASH_PROFILE) && defined(__APPLE__) )
29  // only mac version for now
30  #include <mach/mach_time.h>
31  #define ABSOLUTE_TIME mach_absolute_time
32  #define RCPP_PROFILE_TIC start = ABSOLUTE_TIME() ;
33  #define RCPP_PROFILE_TOC end = ABSOLUTE_TIME() ;
34  #define RCPP_PROFILE_RECORD(name) profile_data[#name] = end - start ;
35 #else
36  #define RCPP_PROFILE_TIC
37  #define RCPP_PROFILE_TOC
38  #define RCPP_PROFILE_RECORD(name)
39 #endif
40 #define RCPP_USE_CACHE_HASH
41 
42 namespace Rcpp{
43  namespace sugar{
44 
45  #ifndef RCPP_HASH
46  #define RCPP_HASH(X) (3141592653U * ((unsigned int)(X)) >> (32 - k))
47  #endif
48 
49  template <int RTYPE>
50  class IndexHash {
51  public:
54 
55  IndexHash( SEXP table ) : n(Rf_length(table)), m(2), k(1), src( (STORAGE*)dataptr(table) ), size_(0)
56  , data()
57  #ifdef HASH_PROFILE
58  , profile_data()
59  #endif
60  {
62  int desired = n*2 ;
63  while( m < desired ){ m *= 2 ; k++ ; }
64  #ifdef RCPP_USE_CACHE_HASH
65  data = get_cache(m) ;
66  #else
67  data.resize( m ) ;
68  #endif
70  RCPP_PROFILE_RECORD(ctor_body)
71 
72  }
73 
74  inline IndexHash& fill(){
76 
77  for( int i=0; i<n; i++) add_value(i) ;
78 
81 
82  return *this ;
83  }
84 
87  int* res = LOGICAL(result) ;
88  for( int i=0; i<n; i++) res[i] = ! add_value(i) ;
89  return result ;
90  }
91 
92  template <typename T>
93  inline SEXP lookup(const T& vec) const {
94  return lookup__impl(vec, vec.size() ) ;
95  }
96 
97  // use the pointers for actual (non sugar expression vectors)
98  inline SEXP lookup(const VECTOR& vec) const {
99  return lookup__impl(vec.begin(), vec.size() ) ;
100  }
101 
102  inline bool contains(STORAGE val) const {
103  return get_index(val) != NA_INTEGER ;
104  }
105 
106  inline int size() const {
107  return size_ ;
108  }
109 
110  // keys, in the order they appear in the data
111  inline Vector<RTYPE> keys() const{
112  Vector<RTYPE> res = no_init(size_) ;
113  for( int i=0, j=0; j<size_; i++){
114  if( data[i] ) res[j++] = src[data[i]-1] ;
115  }
116  return res ;
117  }
118 
119  int n, m, k ;
120  STORAGE* src ;
121  int size_ ;
122  #ifdef RCPP_USE_CACHE_HASH
123  int* data ;
124  #else
125  std::vector<int> data ;
126  #endif
127 
128  #ifdef HASH_PROFILE
129  mutable std::map<std::string,int> profile_data ;
130  mutable uint64_t start ;
131  mutable uint64_t end ;
132  #endif
133 
134  template <typename T>
135  SEXP lookup__impl(const T& vec, int n_) const {
137 
138  SEXP res = Rf_allocVector(INTSXP, n_) ;
139 
141  RCPP_PROFILE_RECORD(allocVector)
142 
143  int *v = INTEGER(res) ;
144 
146 
147  for( int i=0; i<n_; i++) v[i] = get_index( vec[i] ) ;
148 
151 
152  return res ;
153  }
154 
156  #ifdef HASH_PROFILE
157  return wrap( profile_data ) ;
158  #else
159  return R_NilValue ;
160  #endif
161  }
162 
163  inline bool not_equal(const STORAGE& lhs, const STORAGE& rhs) {
164  return ! internal::NAEquals<STORAGE>()(lhs, rhs);
165  }
166 
167  bool add_value(int i){
168  RCPP_DEBUG_2( "%s::add_value(%d)", DEMANGLE(IndexHash), i )
169  STORAGE val = src[i++] ;
170  unsigned int addr = get_addr(val) ;
171  while (data[addr] && not_equal( src[data[addr] - 1], val)) {
172  addr++;
173  if (addr == static_cast<unsigned int>(m)) {
174  addr = 0;
175  }
176  }
177 
178  if (!data[addr]){
179  data[addr] = i ;
180  size_++ ;
181 
182  return true ;
183  }
184  return false;
185  }
186 
187  /* NOTE: we are returning a 1-based index ! */
188  inline unsigned int get_index(STORAGE value) const {
189  unsigned int addr = get_addr(value) ;
190  while (data[addr]) {
191  if (src[data[addr] - 1] == value)
192  return data[addr];
193  addr++;
194  if (addr == static_cast<unsigned int>(m)) addr = 0;
195  }
196  return NA_INTEGER;
197  }
198 
199  // defined below
200  unsigned int get_addr(STORAGE value) const ;
201  } ;
202 
203  template <>
204  inline unsigned int IndexHash<INTSXP>::get_addr(int value) const {
205  return RCPP_HASH(value) ;
206  }
207  template <>
208  inline unsigned int IndexHash<REALSXP>::get_addr(double val) const {
209  unsigned int addr;
210  union dint_u {
211  double d;
212  unsigned int u[2];
213  };
214  union dint_u val_u;
215  /* double is a bit tricky - we nave to normalize 0.0, NA and NaN */
216  if (val == 0.0) val = 0.0;
217  if (internal::Rcpp_IsNA(val)) val = NA_REAL;
218  else if (internal::Rcpp_IsNaN(val)) val = R_NaN;
219  val_u.d = val;
220  addr = RCPP_HASH(val_u.u[0] + val_u.u[1]);
221  return addr ;
222  }
223 
224  template <>
225  inline unsigned int IndexHash<STRSXP>::get_addr(SEXP value) const {
226  intptr_t val = (intptr_t) value;
227  unsigned int addr;
228  #if (defined _LP64) || (defined __LP64__) || (defined WIN64)
229  addr = RCPP_HASH((val & 0xffffffff) ^ (val >> 32));
230  #else
231  addr = RCPP_HASH(val);
232  #endif
233  return addr ;
234  }
235 
236 
237 } // sugar
238 } // Rcpp
239 
240 #endif
241 
bool contains(STORAGE val) const
Definition: IndexHash.h:102
LogicalVector fill_and_get_duplicated()
Definition: IndexHash.h:85
no_init_vector no_init(R_xlen_t size)
Definition: no_init.h:69
#define DEMANGLE(__TYPE__)
Definition: exceptions.h:315
IndexHash & fill()
Definition: IndexHash.h:74
R_xlen_t size() const
Definition: Vector.h:274
unsigned int get_index(STORAGE value) const
Definition: IndexHash.h:188
IntegerVector table(const VectorBase< RTYPE, NA, T > &x)
Definition: table.h:126
void * dataptr(SEXP)
Definition: routines.h:206
Vector< RTYPE > keys() const
Definition: IndexHash.h:111
#define RCPP_HASH(X)
Definition: IndexHash.h:46
Vector< RTYPE > VECTOR
Definition: IndexHash.h:53
bool not_equal(const STORAGE &lhs, const STORAGE &rhs)
Definition: IndexHash.h:163
SEXP lookup__impl(const T &vec, int n_) const
Definition: IndexHash.h:135
#define RCPP_PROFILE_TIC
Definition: IndexHash.h:36
traits::storage_type< RTYPE >::type STORAGE
Definition: IndexHash.h:52
#define RCPP_PROFILE_TOC
Definition: IndexHash.h:37
#define RCPP_PROFILE_RECORD(name)
Definition: IndexHash.h:38
IndexHash(SEXP table)
Definition: IndexHash.h:55
#define RCPP_DEBUG_2(fmt, M1, M2)
Definition: debug.h:45
attribute_hidden int * get_cache(int n)
Definition: routines.h:224
SEXP lookup(const T &vec) const
Definition: IndexHash.h:93
unsigned int get_addr(STORAGE value) const
SEXP wrap(const Date &date)
Definition: Date.h:38
Rcpp API.
Definition: algo.h:28
iterator begin()
Definition: Vector.h:332
bool add_value(int i)
Definition: IndexHash.h:167
SEXP lookup(const VECTOR &vec) const
Definition: IndexHash.h:98