Rcpp Version 1.0.9
DataFrame.h
Go to the documentation of this file.
1 // -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*-
2 //
3 // DataFrame.h: Rcpp R/C++ interface class library -- data frames
4 //
5 // Copyright (C) 2010 - 2021 Dirk Eddelbuettel and Romain Francois
6 //
7 // This file is part of Rcpp.
8 //
9 // Rcpp is free software: you can redistribute it and/or modify it
10 // under the terms of the GNU General Public License as published by
11 // the Free Software Foundation, either version 2 of the License, or
12 // (at your option) any later version.
13 //
14 // Rcpp is distributed in the hope that it will be useful, but
15 // WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 // GNU General Public License for more details.
18 //
19 // You should have received a copy of the GNU General Public License
20 // along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
21 
22 #ifndef Rcpp__DataFrame_h
23 #define Rcpp__DataFrame_h
24 
25 namespace Rcpp{
26 
27  namespace internal{
28  inline SEXP empty_data_frame(){
29  Shield<SEXP> df( Rf_allocVector(VECSXP, 0) );
30  Rf_setAttrib(df, R_NamesSymbol, Rf_allocVector(STRSXP, 0));
31  Rf_setAttrib(df, R_RowNamesSymbol, Rf_allocVector(INTSXP, 0));
32  Rf_setAttrib(df, R_ClassSymbol, Rf_mkString("data.frame"));
33  return df;
34  }
35  }
36 
37  template <template <class> class StoragePolicy>
38  class DataFrame_Impl : public Vector<VECSXP, StoragePolicy> {
39  public:
41 
42  DataFrame_Impl() : Parent( internal::empty_data_frame() ){}
43  DataFrame_Impl(SEXP x) : Parent(x) {
44  set__(x);
45  }
46  DataFrame_Impl( const DataFrame_Impl& other) : Parent() {
47  set__(other) ;
48  }
49 
50  template <typename T>
51  DataFrame_Impl( const T& obj ) ;
52 
54  if (*this != other) set__(other);
55  return *this;
56  }
57 
59  set__(x) ;
60  return *this ;
61  }
62 
63  // By definition, the number of rows in a data.frame is contained
64  // in its row.names attribute. If it has row names of the form 1:n,
65  // they will be stored as {NA_INTEGER, -<nrow>}. Unfortunately,
66  // getAttrib(df, R_RowNamesSymbol) will force an expansion of that
67  // compact form thereby allocating a huge vector when we just want
68  // the row.names. Hence this workaround.
69  inline int nrow() const {
70  SEXP rn = R_NilValue ;
71  SEXP att = ATTRIB( Parent::get__() ) ;
72  while( att != R_NilValue ){
73  if( TAG(att) == R_RowNamesSymbol ) {
74  rn = CAR(att) ;
75  break ;
76  }
77  att = CDR(att) ;
78  }
79  if (Rf_isNull(rn))
80  return 0;
81  if (TYPEOF(rn) == INTSXP && LENGTH(rn) == 2 && INTEGER(rn)[0] == NA_INTEGER)
82  return std::abs(INTEGER(rn)[1]);
83  return LENGTH(rn);
84  }
85 
86  template <typename T>
87  void push_back( const T& object){
88  Parent::push_back(object);
90  }
91 
92  template <typename T>
93  void push_back( const T& object, const std::string& name ){
94  Parent::push_back(object, name);
96  }
97 
98  template <typename T>
99  void push_front( const T& object){
100  Parent::push_front(object);
102  }
103 
104  template <typename T>
105  void push_front( const T& object, const std::string& name){
106  Parent::push_front(object, name);
108  }
109 
110  // Offer multiple variants to accomodate both old interface here and signatures in other classes
111  inline int nrows() const { return DataFrame_Impl::nrow(); }
112  inline int rows() const { return DataFrame_Impl::nrow(); }
113 
114  inline R_xlen_t ncol() const { return DataFrame_Impl::length(); }
115  inline R_xlen_t cols() const { return DataFrame_Impl::length(); }
116 
118  return DataFrame_Impl() ;
119  }
120 
121  #include <Rcpp/generated/DataFrame_generated.h>
122 
123  private:
124  void set__(SEXP x){
125  if( ::Rf_inherits( x, "data.frame" )){
126  Parent::set__( x ) ;
127  } else{
128  Shield<SEXP> y(internal::convert_using_rfunction( x, "as.data.frame" )) ;
129  Parent::set__( y ) ;
130  }
131  }
132 
134  int max_rows = 0;
135  bool invalid_column_size = false;
136  List::iterator it;
137  // Get the maximum number of rows
138  for (it = Parent::begin(); it != Parent::end(); ++it) {
139  if (Rf_xlength(*it) > max_rows) {
140  max_rows = Rf_xlength(*it);
141  }
142  }
143  for (it = Parent::begin(); it != Parent::end(); ++it) {
144  if (Rf_xlength(*it) == 0 || ( Rf_xlength(*it) > 1 && max_rows % Rf_xlength(*it) != 0 )) {
145  // We have a column that is not an integer fraction of the largest
146  invalid_column_size = true;
147  }
148  }
149  if (invalid_column_size) {
150  warning("Column sizes are not equal in DataFrame::push_back, object degrading to List\n");
151  } else {
152  set__(Parent::get__());
153  }
154  }
155 
157  bool use_default_strings_as_factors = true ;
158  bool strings_as_factors = true ;
159  int strings_as_factors_index = -1 ;
160  R_xlen_t n = obj.size() ;
161  CharacterVector names = obj.attr( "names" ) ;
162  if( !names.isNULL() ){
163  for( int i=0; i<n; i++){
164  if( names[i] == "stringsAsFactors" ){
165  strings_as_factors_index = i ;
166  use_default_strings_as_factors = false ;
167  if( !as<bool>(obj[i]) ) strings_as_factors = false ;
168  break ;
169  }
170  }
171  }
172  if( use_default_strings_as_factors )
173  return DataFrame_Impl(obj) ;
174  SEXP as_df_symb = Rf_install("as.data.frame");
175  SEXP strings_as_factors_symb = Rf_install("stringsAsFactors");
176 
177  obj.erase(strings_as_factors_index) ;
178  names.erase(strings_as_factors_index) ;
179  obj.attr( "names") = names ;
180  Shield<SEXP> call( Rf_lang3(as_df_symb, obj, Rf_ScalarLogical(strings_as_factors) ) ) ;
181  SET_TAG( CDDR(call), strings_as_factors_symb ) ;
182  Shield<SEXP> res(Rcpp_fast_eval(call, R_GlobalEnv));
183  DataFrame_Impl out( res ) ;
184  return out ;
185 
186  }
187 
188  } ;
189 
191 }
192 
193 #endif
AttributeProxy attr(const std::string &name)
static DataFrame_Impl from_list(Parent obj)
Definition: DataFrame.h:156
void push_back(const T &object)
Definition: DataFrame.h:87
int rows() const
Definition: DataFrame.h:112
R_xlen_t cols() const
Definition: DataFrame.h:115
void push_front(const T &object)
Definition: DataFrame.h:99
int nrow() const
Definition: DataFrame.h:69
DataFrame_Impl & operator=(DataFrame_Impl &other)
Definition: DataFrame.h:53
DataFrame_Impl(const DataFrame_Impl &other)
Definition: DataFrame.h:46
DataFrame_Impl(SEXP x)
Definition: DataFrame.h:43
void set_type_after_push()
Definition: DataFrame.h:133
Vector< VECSXP, StoragePolicy > Parent
Definition: DataFrame.h:40
int nrows() const
Definition: DataFrame.h:111
void push_front(const T &object, const std::string &name)
Definition: DataFrame.h:105
static DataFrame_Impl create()
Definition: DataFrame.h:117
void push_back(const T &object, const std::string &name)
Definition: DataFrame.h:93
void set__(SEXP x)
Definition: DataFrame.h:124
R_xlen_t ncol() const
Definition: DataFrame.h:114
DataFrame_Impl & operator=(SEXP x)
Definition: DataFrame.h:58
NamesProxy names()
Definition: NamesProxy.h:82
iterator erase(int position)
Definition: Vector.h:492
R_xlen_t size() const
Definition: Vector.h:276
traits::r_vector_iterator< RTYPE, PreserveStorage >::type iterator
Definition: Vector.h:46
void push_back(const T &object)
Definition: Vector.h:452
void push_front(const T &object)
Definition: Vector.h:466
double df(double x, double df1, double df2, int lg)
Definition: Rmath.h:84
SEXP empty_data_frame()
Definition: DataFrame.h:28
SEXP convert_using_rfunction(SEXP x, const char *const fun)
Definition: r_cast.h:30
Rcpp API.
Definition: algo.h:28
void warning(const char *fmt, Args &&... args)
Definition: exceptions.h:46
SEXP Rcpp_fast_eval(SEXP expr, SEXP env)
Definition: Rcpp_eval.h:68
DataFrame_Impl< PreserveStorage > DataFrame
Definition: DataFrame.h:190