aGrUM  0.16.0
rawDatabaseTable_tpl.h
Go to the documentation of this file.
1 
29 
30 #ifndef DOXYGEN_SHOULD_SKIP_THIS
31 
32 namespace gum {
33 
34  namespace learning {
35 
36 
37  // default constructor
38  template < template < typename > class ALLOC >
39  template < template < typename > class VARALLOC,
40  template < typename >
41  class MISSALLOC >
43  const typename RawDatabaseTable< ALLOC >::template MissingValType<
44  MISSALLOC >& missing_symbols,
45  const std::vector< std::string, VARALLOC< std::string > >& var_names,
46  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
47  IDatabaseTable< DBCell, ALLOC >(missing_symbols, var_names, alloc),
48  __ignored_cols(alloc) {
49  GUM_CONSTRUCTOR(RawDatabaseTable);
50  }
51 
52 
53  // default constructor
54  template < template < typename > class ALLOC >
55  template < template < typename > class MISSALLOC >
57  const typename RawDatabaseTable< ALLOC >::template MissingValType<
58  MISSALLOC >& missing_symbols,
59  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
60  IDatabaseTable< DBCell, ALLOC >(
61  missing_symbols,
62  std::vector< std::string, ALLOC< std::string > >(),
63  alloc),
64  __ignored_cols(alloc) {
65  GUM_CONSTRUCTOR(RawDatabaseTable);
66  }
67 
68 
69  // default constructor
70  template < template < typename > class ALLOC >
72  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
73  IDatabaseTable< DBCell, ALLOC >(
74  std::vector< std::string, ALLOC< std::string > >(),
75  std::vector< std::string, ALLOC< std::string > >(),
76  alloc),
77  __ignored_cols(alloc) {
78  GUM_CONSTRUCTOR(RawDatabaseTable);
79  }
80 
81 
82  // copy constructor with a given allocator
83  template < template < typename > class ALLOC >
85  const RawDatabaseTable< ALLOC >& from,
86  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
87  IDatabaseTable< DBCell, ALLOC >(from, alloc),
88  __ignored_cols(from.__ignored_cols, alloc) {
89  GUM_CONS_CPY(RawDatabaseTable);
90  }
91 
92  // copy constructor
93  template < template < typename > class ALLOC >
95  const RawDatabaseTable< ALLOC >& from) :
96  RawDatabaseTable< ALLOC >(from, from.getAllocator()) {}
97 
98 
99  // move constructor with a given allocator
100  template < template < typename > class ALLOC >
102  RawDatabaseTable< ALLOC >&& from,
103  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
104  IDatabaseTable< DBCell, ALLOC >(std::move(from), alloc),
105  __ignored_cols(std::move(from.__ignored_cols)) {
106  GUM_CONS_MOV(RawDatabaseTable);
107  }
108 
109 
110  // move constructor
111  template < template < typename > class ALLOC >
113  RawDatabaseTable< ALLOC >&& from) :
114  RawDatabaseTable< ALLOC >(std::move(from), from.getAllocator()) {}
115 
116 
117  // virtual copy constructor
118  template < template < typename > class ALLOC >
119  RawDatabaseTable< ALLOC >* RawDatabaseTable< ALLOC >::clone(
120  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) const {
121  ALLOC< RawDatabaseTable< ALLOC > > allocator(alloc);
122  RawDatabaseTable< ALLOC >* new_db = allocator.allocate(1);
123  try {
124  allocator.construct(new_db, *this, alloc);
125  } catch (...) {
126  allocator.deallocate(new_db, 1);
127  throw;
128  }
129 
130  return new_db;
131  }
132 
133 
134  // virtual copy constructor
135  template < template < typename > class ALLOC >
136  RawDatabaseTable< ALLOC >* RawDatabaseTable< ALLOC >::clone() const {
137  return clone(this->getAllocator());
138  }
139 
140 
141  // destructor
142  template < template < typename > class ALLOC >
144  GUM_DESTRUCTOR(RawDatabaseTable);
145  }
146 
147 
148  // copy operator
149  template < template < typename > class ALLOC >
150  RawDatabaseTable< ALLOC >& RawDatabaseTable< ALLOC >::
151  operator=(const RawDatabaseTable< ALLOC >& from) {
152  if (this != &from) {
154  __ignored_cols = from.__ignored_cols;
155  }
156  return *this;
157  }
158 
159 
160  // move constructor
161  template < template < typename > class ALLOC >
162  RawDatabaseTable< ALLOC >& RawDatabaseTable< ALLOC >::
163  operator=(RawDatabaseTable< ALLOC >&& from) {
164  if (this != &from) {
166  __ignored_cols = std::move(from.__ignored_cols);
167  }
168  return *this;
169  }
170 
171 
172  // sets the names of the variables
173  template < template < typename > class ALLOC >
175  const std::vector< std::string, ALLOC< std::string > >& names,
176  const bool from_external_object) {
177  const std::size_t size = names.size();
178  const std::size_t ignored_cols_size = __ignored_cols.size();
179 
180  if (!from_external_object || !ignored_cols_size) {
181  if (this->_rows.empty() || (size == this->_rows[0].size())) {
182  this->_variable_names = names;
183  } else {
184  GUM_ERROR(
185  SizeError,
186  "the number of variable's names (i.e., "
187  << size << ") does not correspond to the number of columns of the "
188  << "raw database table (i.e.," << this->_rows[0].size() << ")");
189  }
190  } else {
191  // check that the size of the names vector (after removing the ignored
192  // columns) is the same as the rest of the database
193  std::size_t ignored_size = std::size_t(0);
194 
195  // find the number of ignored cols
196  for (auto iter = __ignored_cols.rbegin(), rend = __ignored_cols.rend();
197  iter != rend;
198  ++iter, ++ignored_size) {
199  if (*iter < size) { break; }
200  }
201  ignored_size = ignored_cols_size - ignored_size;
202 
203  if (this->_rows.empty()
204  || (size == this->_rows[0].size() + ignored_size)) {
205  DBVector< std::string > new_names;
206  for (std::size_t i = std::size_t(0), j = std::size_t(0); i < size; ++i) {
207  if (i != __ignored_cols[j]) {
208  new_names.push_back(names[i]);
209  } else {
210  if (++j == ignored_cols_size) {
211  for (++i; i < size; ++i) {
212  new_names.push_back(names[i]);
213  }
214  }
215  }
216  }
217  this->_variable_names = std::move(new_names);
218  return;
219  } else {
220  GUM_ERROR(SizeError,
221  "the number of variable's names excluding the ignored "
222  << "columns (i.e., " << (size - ignored_size)
223  << ") does not correspond to the number of columns of the "
224  << "raw database table (i.e.," << this->_rows[0].size()
225  << ")");
226  }
227  }
228  }
229 
230 
232  template < template < typename > class ALLOC >
233  void RawDatabaseTable< ALLOC >::ignoreColumn(const std::size_t k,
234  const bool from_external_object) {
235  // first, compute the value that k would have in an external database
236  // and compute where the new value should be inserted
237  std::size_t i; // where to insert the new k into the ignored colums
238  std::size_t kk = k; // kk = k value for an external database
239  const std::size_t size = __ignored_cols.size();
240 
241  if (from_external_object) {
242  for (i = std::size_t(0); i < size; ++i) {
243  if (k <= __ignored_cols[i]) {
244  if (k == __ignored_cols[i]) return;
245  break;
246  }
247  }
248  } else {
249  for (i = std::size_t(0); i < size; ++i, ++kk) {
250  if (kk <= __ignored_cols[i]) {
251  if (kk == __ignored_cols[i]) return;
252  break;
253  }
254  }
255  }
256 
257  // the column of __rows and _variable_names impacted by the ignoreColumn
258  // operation is therefore equal to kk-i. So, we should check that such
259  // a column exists and, if so, we should remove the column from __rows
260  // and from _variable_names. Note that if there is no more variable,
261  // __rows should become empty
262  const std::size_t col = kk - i;
263  if (col < this->_variable_names.size()) {
264  this->_variable_names.erase(this->_variable_names.begin() + col);
265  if (this->_variable_names.empty()) {
267  } else {
268  const std::size_t nb_rows = this->_rows.size();
269  if (nb_rows != std::size_t(0)) {
270  const std::size_t nb_cols = this->_rows[0].size();
271  for (std::size_t i = std::size_t(0); i < nb_rows; ++i) {
272  auto& row = this->_rows[i].row();
273  if (this->_has_row_missing_val[i] == IsMissing::True) {
274  bool has_missing_val = false;
275  for (std::size_t j = std::size_t(0); j < nb_cols; ++j) {
276  if ((j != col) && row[j].isMissing()) {
277  has_missing_val = true;
278  break;
279  }
280  }
281  if (!has_missing_val)
282  this->_has_row_missing_val[i] = IsMissing::False;
283  }
284  row.erase(row.begin() + col);
285  }
286  }
287  }
288  }
289 
290  // here, we know that we should insert kk at the ith index of __ignored_cols
291  __ignored_cols.push_back(std::size_t(0));
292  for (std::size_t j = size; j > i; --j)
293  __ignored_cols[j] = __ignored_cols[j - 1];
294  __ignored_cols[i] = kk;
295  }
296 
297 
299  template < template < typename > class ALLOC >
300  INLINE const typename RawDatabaseTable< ALLOC >::template DBVector<
301  std::size_t >
303  return __ignored_cols;
304  }
305 
306 
308  template < template < typename > class ALLOC >
309  const typename RawDatabaseTable< ALLOC >::template DBVector< std::size_t >
311  const auto& data = IDatabaseTable< DBCell, ALLOC >::content();
312  if (data.empty()) { return DBVector< std::size_t >(); }
313 
314  const std::size_t size = data[0].size();
315  const std::size_t ignored_cols_size = __ignored_cols.size();
316  DBVector< std::size_t > cols(size);
317 
318  if (!ignored_cols_size) {
319  for (std::size_t i = std::size_t(0); i < size; ++i) {
320  cols[i] = i;
321  }
322  } else {
323  // fill the cols vector with consecutive values, excluding the
324  // ignored columns
325  std::size_t i = std::size_t(0); // the consecutive values
326  std::size_t k = std::size_t(0); // the index in col where we save values
327  std::size_t j = std::size_t(0); // the index to parse the ignored columns
328  while (true) {
329  if (i != __ignored_cols[j]) {
330  cols[k] = i;
331  if (++k == size) break;
332  } else {
333  if (++j == ignored_cols_size) {
334  for (++i; k < size; ++i, ++k) {
335  cols[k] = i;
336  }
337  break;
338  }
339  }
340  ++i;
341  }
342  }
343 
344  return cols;
345  }
346 
347 
348  // translates a string into a DBCell and returns it
349  template < template < typename > class ALLOC >
350  INLINE DBCell
351  RawDatabaseTable< ALLOC >::__convert(const std::string& elt) const {
352  return DBCell::bestDBCell(elt, this->_missing_symbols);
353  }
354 
355 
356  // insert a new row at the end of the database
357  template < template < typename > class ALLOC >
359  const std::vector< std::string, ALLOC< std::string > >& new_row) {
360  // check that the size of the row (after removing the ignored columns) is
361  // the same as the rest of the database
362  const std::size_t row_size = new_row.size();
363  const std::size_t ignored_cols_size = __ignored_cols.size();
364  std::size_t ignored_size = std::size_t(0);
365  if (ignored_cols_size) {
366  // find the number of ignored cols
367  for (auto iter = __ignored_cols.rbegin(), rend = __ignored_cols.rend();
368  iter != rend;
369  ++iter, ++ignored_size) {
370  if (*iter < row_size) { break; }
371  }
372  ignored_size = ignored_cols_size - ignored_size;
373  }
374 
375  if (!this->_isRowSizeOK(row_size - ignored_size)) {
376  GUM_ERROR(SizeError,
377  "the new row has "
378  << (row_size - ignored_size)
379  << " elements whereas the raw database table has "
380  << this->_variable_names.size() << " columns");
381  }
382 
383  // create the dbrow that will contain the new data
384  Row< DBCell > dbrow;
385  dbrow.reserve(row_size - ignored_size);
386  bool has_missing_val = false;
387 
388  // translate the row into T_data and put them into the newly created dbrow
389  if (ignored_size == 0) {
390  for (const auto& elt : new_row) {
391  const DBCell new_cell(this->__convert(elt));
392  if (new_cell.isMissing()) has_missing_val = true;
393  dbrow.pushBack(new_cell);
394  }
395  } else {
396  for (std::size_t i = std::size_t(0), j = std::size_t(0); i < row_size;
397  ++i) {
398  if (i != __ignored_cols[j]) {
399  const DBCell new_cell(this->__convert(new_row[i]));
400  if (new_cell.isMissing()) has_missing_val = true;
401  dbrow.pushBack(new_cell);
402  } else {
403  if (++j == ignored_size) {
404  for (++i; i < row_size; ++i) {
405  const DBCell new_cell(this->__convert(new_row[i]));
406  if (new_cell.isMissing()) has_missing_val = true;
407  dbrow.pushBack(new_cell);
408  }
409  }
410  }
411  }
412  }
413 
415  std::move(dbrow), has_missing_val ? IsMissing::True : IsMissing::False);
416  }
417 
418 
419  // erase the content of the database, including the names of the variables
420  template < template < typename > class ALLOC >
422  __ignored_cols.clear();
424  }
425 
426 
427  } /* namespace learning */
428 
429 } /* namespace gum */
430 
431 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
void insertRow(const std::vector< std::string, OTHER_ALLOC< std::string > > &new_row)
insert a new row at the end of the database
ALLOC< DBCell > getAllocator() const
returns the allocator of the database
virtual void ignoreColumn(const std::size_t k, const bool from_external_object=true) final
makes the database table ignore from now on the kth column
std::size_t size() const noexcept
returns the number of records (rows) in the database
STL namespace.
IDatabaseTable< T_DATA, ALLOC > & operator=(const IDatabaseTable< T_DATA, ALLOC > &from)
copy operator
virtual const DBVector< std::size_t > ignoredColumns() const final
returns the set of columns of the original dataset that are ignored
void eraseAllRows()
erase all the rows
virtual const DBVector< std::size_t > inputColumns() const final
returns the set of columns of the original dataset that are present in the RawDatabaseTable ...
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
static DBCell bestDBCell(const std::string &str, const std::vector< std::string, ALLOC< std::string > > &missingVals)
returns the DBCell with the best type for an element encoded as a string
bool _isRowSizeOK(const std::size_t size) const
checks whether a size corresponds to the number of columns of the database
const Matrix< T_DATA > & content() const noexcept
returns the content (the records) of the database
IDatabaseTable(const MissingValType< MISSALLOC > &missing_symbols, const std::vector< std::string, VARALLOC< std::string > > &var_names, const ALLOC< DBCell > &alloc)
default constructor
ALLOC< DBCell > allocator_type
Types for STL compliance.
DBVector< std::string > _variable_names
the names of the variables for each column
std::vector< TX_DATA, ALLOC< TX_DATA > > DBVector
the type for the vectors used in the RawDatabaseTable
virtual void insertRow(const std::vector< std::string, ALLOC< std::string > > &new_row) final
insert a new row at the end of the database
RawDatabaseTable(const MissingValType< MISSALLOC > &missing_symbols, const std::vector< std::string, VARALLOC< std::string > > &var_names, const allocator_type &alloc=allocator_type())
default constructor
virtual ~RawDatabaseTable()
destructor
std::vector< std::string, XALLOC< std::string > > MissingValType
virtual RawDatabaseTable< ALLOC > * clone() const final
virtual copy constructor
virtual void clear() final
erase the content of the database, including the names of the variables
RawDatabaseTable< ALLOC > & operator=(const RawDatabaseTable< ALLOC > &from)
copy operator
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55
virtual void clear()
erase the content of the database, including the names of the variables
virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true) final
sets the names of the variables
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.