aGrUM  0.14.2
rawDatabaseTable_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
26 
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
29 namespace gum {
30 
31  namespace learning {
32 
33 
34  // default constructor
35  template < template < typename > class ALLOC >
36  template < template < typename > class VARALLOC,
37  template < typename >
38  class MISSALLOC >
40  const typename RawDatabaseTable< ALLOC >::template MissingValType<
41  MISSALLOC >& missing_symbols,
42  const std::vector< std::string, VARALLOC< std::string > >& var_names,
43  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
44  IDatabaseTable< DBCell, ALLOC >(missing_symbols, var_names, alloc),
45  __ignored_cols(alloc) {
46  GUM_CONSTRUCTOR(RawDatabaseTable);
47  }
48 
49 
50  // default constructor
51  template < template < typename > class ALLOC >
52  template < template < typename > class MISSALLOC >
54  const typename RawDatabaseTable< ALLOC >::template MissingValType<
55  MISSALLOC >& missing_symbols,
56  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
57  IDatabaseTable< DBCell, ALLOC >(
58  missing_symbols,
59  std::vector< std::string, ALLOC< std::string > >(),
60  alloc),
61  __ignored_cols(alloc) {
62  GUM_CONSTRUCTOR(RawDatabaseTable);
63  }
64 
65 
66  // default constructor
67  template < template < typename > class ALLOC >
69  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
70  IDatabaseTable< DBCell, ALLOC >(
71  std::vector< std::string, ALLOC< std::string > >(),
72  std::vector< std::string, ALLOC< std::string > >(),
73  alloc),
74  __ignored_cols(alloc) {
75  GUM_CONSTRUCTOR(RawDatabaseTable);
76  }
77 
78 
79  // copy constructor with a given allocator
80  template < template < typename > class ALLOC >
82  const RawDatabaseTable< ALLOC >& from,
83  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
84  IDatabaseTable< DBCell, ALLOC >(from, alloc),
85  __ignored_cols(from.__ignored_cols, alloc) {
86  GUM_CONS_CPY(RawDatabaseTable);
87  }
88 
89  // copy constructor
90  template < template < typename > class ALLOC >
92  const RawDatabaseTable< ALLOC >& from) :
93  RawDatabaseTable< ALLOC >(from, from.getAllocator()) {}
94 
95 
96  // move constructor with a given allocator
97  template < template < typename > class ALLOC >
99  RawDatabaseTable< ALLOC >&& from,
100  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
101  IDatabaseTable< DBCell, ALLOC >(std::move(from), alloc),
102  __ignored_cols(std::move(from.__ignored_cols)) {
103  GUM_CONS_MOV(RawDatabaseTable);
104  }
105 
106 
107  // move constructor
108  template < template < typename > class ALLOC >
110  RawDatabaseTable< ALLOC >&& from) :
111  RawDatabaseTable< ALLOC >(std::move(from), from.getAllocator()) {}
112 
113 
114  // virtual copy constructor
115  template < template < typename > class ALLOC >
116  RawDatabaseTable< ALLOC >* RawDatabaseTable< ALLOC >::clone(
117  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) const {
118  ALLOC< RawDatabaseTable< ALLOC > > allocator(alloc);
119  RawDatabaseTable< ALLOC >* new_db = allocator.allocate(1);
120  try {
121  allocator.construct(new_db, *this, alloc);
122  } catch (...) {
123  allocator.deallocate(new_db, 1);
124  throw;
125  }
126 
127  return new_db;
128  }
129 
130 
131  // virtual copy constructor
132  template < template < typename > class ALLOC >
133  RawDatabaseTable< ALLOC >* RawDatabaseTable< ALLOC >::clone() const {
134  return clone(this->getAllocator());
135  }
136 
137 
138  // destructor
139  template < template < typename > class ALLOC >
141  GUM_DESTRUCTOR(RawDatabaseTable);
142  }
143 
144 
145  // copy operator
146  template < template < typename > class ALLOC >
147  RawDatabaseTable< ALLOC >& RawDatabaseTable< ALLOC >::
148  operator=(const RawDatabaseTable< ALLOC >& from) {
149  if (this != &from) {
151  __ignored_cols = from.__ignored_cols;
152  }
153  return *this;
154  }
155 
156 
157  // move constructor
158  template < template < typename > class ALLOC >
159  RawDatabaseTable< ALLOC >& RawDatabaseTable< ALLOC >::
160  operator=(RawDatabaseTable< ALLOC >&& from) {
161  if (this != &from) {
163  __ignored_cols = std::move(from.__ignored_cols);
164  }
165  return *this;
166  }
167 
168 
169  // sets the names of the variables
170  template < template < typename > class ALLOC >
172  const std::vector< std::string, ALLOC< std::string > >& names,
173  const bool from_external_object) {
174  const std::size_t size = names.size();
175  const std::size_t ignored_cols_size = __ignored_cols.size();
176 
177  if (!from_external_object || !ignored_cols_size) {
178  if (this->_rows.empty() || (size == this->_rows[0].size())) {
179  this->_variable_names = names;
180  } else {
181  GUM_ERROR(
182  SizeError,
183  "the number of variable's names (i.e., "
184  << size << ") does not correspond to the number of columns of the "
185  << "raw database table (i.e.," << this->_rows[0].size() << ")");
186  }
187  } else {
188  // check that the size of the names vector (after removing the ignored
189  // columns) is the same as the rest of the database
190  std::size_t ignored_size = std::size_t(0);
191 
192  // find the number of ignored cols
193  for (auto iter = __ignored_cols.rbegin(), rend = __ignored_cols.rend();
194  iter != rend;
195  ++iter, ++ignored_size) {
196  if (*iter < size) { break; }
197  }
198  ignored_size = ignored_cols_size - ignored_size;
199 
200  if (this->_rows.empty()
201  || (size == this->_rows[0].size() + ignored_size)) {
202  DBVector< std::string > new_names;
203  for (std::size_t i = std::size_t(0), j = std::size_t(0); i < size; ++i) {
204  if (i != __ignored_cols[j]) {
205  new_names.push_back(names[i]);
206  } else {
207  if (++j == ignored_cols_size) {
208  for (++i; i < size; ++i) {
209  new_names.push_back(names[i]);
210  }
211  }
212  }
213  }
214  this->_variable_names = std::move(new_names);
215  return;
216  } else {
217  GUM_ERROR(SizeError,
218  "the number of variable's names excluding the ignored "
219  << "columns (i.e., " << (size - ignored_size)
220  << ") does not correspond to the number of columns of the "
221  << "raw database table (i.e.," << this->_rows[0].size()
222  << ")");
223  }
224  }
225  }
226 
227 
229  template < template < typename > class ALLOC >
230  void RawDatabaseTable< ALLOC >::ignoreColumn(const std::size_t k,
231  const bool from_external_object) {
232  // first, compute the value that k would have in an external database
233  // and compute where the new value should be inserted
234  std::size_t i; // where to insert the new k into the ignored colums
235  std::size_t kk = k; // kk = k value for an external database
236  const std::size_t size = __ignored_cols.size();
237 
238  if (from_external_object) {
239  for (i = std::size_t(0); i < size; ++i) {
240  if (k <= __ignored_cols[i]) {
241  if (k == __ignored_cols[i]) return;
242  break;
243  }
244  }
245  } else {
246  for (i = std::size_t(0); i < size; ++i, ++kk) {
247  if (kk <= __ignored_cols[i]) {
248  if (kk == __ignored_cols[i]) return;
249  break;
250  }
251  }
252  }
253 
254  // the column of __rows and _variable_names impacted by the ignoreColumn
255  // operation is therefore equal to kk-i. So, we should check that such
256  // a column exists and, if so, we should remove the column from __rows
257  // and from _variable_names. Note that if there is no more variable,
258  // __rows should become empty
259  const std::size_t col = kk - i;
260  if (col < this->_variable_names.size()) {
261  this->_variable_names.erase(this->_variable_names.begin() + col);
262  if (this->_variable_names.empty()) {
264  } else {
265  const std::size_t nb_rows = this->_rows.size();
266  if (nb_rows != std::size_t(0)) {
267  const std::size_t nb_cols = this->_rows[0].size();
268  for (std::size_t i = std::size_t(0); i < nb_rows; ++i) {
269  auto& row = this->_rows[i].row();
270  if (this->_has_row_missing_val[i] == IsMissing::True) {
271  bool has_missing_val = false;
272  for (std::size_t j = std::size_t(0); j < nb_cols; ++j) {
273  if ((j != col) && row[j].isMissing()) {
274  has_missing_val = true;
275  break;
276  }
277  }
278  if (!has_missing_val)
279  this->_has_row_missing_val[i] = IsMissing::False;
280  }
281  row.erase(row.begin() + col);
282  }
283  }
284  }
285  }
286 
287  // here, we know that we should insert kk at the ith index of __ignored_cols
288  __ignored_cols.push_back(std::size_t(0));
289  for (std::size_t j = size; j > i; --j)
290  __ignored_cols[j] = __ignored_cols[j - 1];
291  __ignored_cols[i] = kk;
292  }
293 
294 
296  template < template < typename > class ALLOC >
297  INLINE const typename RawDatabaseTable< ALLOC >::template DBVector<
298  std::size_t >
300  return __ignored_cols;
301  }
302 
303 
305  template < template < typename > class ALLOC >
306  const typename RawDatabaseTable< ALLOC >::template DBVector< std::size_t >
308  const auto& data = IDatabaseTable< DBCell, ALLOC >::content();
309  if (data.empty()) { return DBVector< std::size_t >(); }
310 
311  const std::size_t size = data[0].size();
312  const std::size_t ignored_cols_size = __ignored_cols.size();
313  DBVector< std::size_t > cols(size);
314 
315  if (!ignored_cols_size) {
316  for (std::size_t i = std::size_t(0); i < size; ++i) {
317  cols[i] = i;
318  }
319  } else {
320  // fill the cols vector with consecutive values, excluding the
321  // ignored columns
322  std::size_t i = std::size_t(0); // the consecutive values
323  std::size_t k = std::size_t(0); // the index in col where we save values
324  std::size_t j = std::size_t(0); // the index to parse the ignored columns
325  while (true) {
326  if (i != __ignored_cols[j]) {
327  cols[k] = i;
328  if (++k == size) break;
329  } else {
330  if (++j == ignored_cols_size) {
331  for (++i; k < size; ++i, ++k) {
332  cols[k] = i;
333  }
334  break;
335  }
336  }
337  ++i;
338  }
339  }
340 
341  return cols;
342  }
343 
344 
345  // translates a string into a DBCell and returns it
346  template < template < typename > class ALLOC >
347  INLINE DBCell
348  RawDatabaseTable< ALLOC >::__convert(const std::string& elt) const {
349  return DBCell::bestDBCell(elt, this->_missing_symbols);
350  }
351 
352 
353  // insert a new row at the end of the database
354  template < template < typename > class ALLOC >
356  const std::vector< std::string, ALLOC< std::string > >& new_row) {
357  // check that the size of the row (after removing the ignored columns) is
358  // the same as the rest of the database
359  const std::size_t row_size = new_row.size();
360  const std::size_t ignored_cols_size = __ignored_cols.size();
361  std::size_t ignored_size = std::size_t(0);
362  if (ignored_cols_size) {
363  // find the number of ignored cols
364  for (auto iter = __ignored_cols.rbegin(), rend = __ignored_cols.rend();
365  iter != rend;
366  ++iter, ++ignored_size) {
367  if (*iter < row_size) { break; }
368  }
369  ignored_size = ignored_cols_size - ignored_size;
370  }
371 
372  if (!this->_isRowSizeOK(row_size - ignored_size)) {
373  GUM_ERROR(SizeError,
374  "the new row has "
375  << (row_size - ignored_size)
376  << " elements whereas the raw database table has "
377  << this->_variable_names.size() << " columns");
378  }
379 
380  // create the dbrow that will contain the new data
381  Row< DBCell > dbrow;
382  dbrow.reserve(row_size - ignored_size);
383  bool has_missing_val = false;
384 
385  // translate the row into T_data and put them into the newly created dbrow
386  if (ignored_size == 0) {
387  for (const auto& elt : new_row) {
388  const DBCell new_cell(this->__convert(elt));
389  if (new_cell.isMissing()) has_missing_val = true;
390  dbrow.pushBack(new_cell);
391  }
392  } else {
393  for (std::size_t i = std::size_t(0), j = std::size_t(0); i < row_size;
394  ++i) {
395  if (i != __ignored_cols[j]) {
396  const DBCell new_cell(this->__convert(new_row[i]));
397  if (new_cell.isMissing()) has_missing_val = true;
398  dbrow.pushBack(new_cell);
399  } else {
400  if (++j == ignored_size) {
401  for (++i; i < row_size; ++i) {
402  const DBCell new_cell(this->__convert(new_row[i]));
403  if (new_cell.isMissing()) has_missing_val = true;
404  dbrow.pushBack(new_cell);
405  }
406  }
407  }
408  }
409  }
410 
412  std::move(dbrow), has_missing_val ? IsMissing::True : IsMissing::False);
413  }
414 
415 
416  // erase the content of the database, including the names of the variables
417  template < template < typename > class ALLOC >
419  __ignored_cols.clear();
421  }
422 
423 
424  } /* namespace learning */
425 
426 } /* namespace gum */
427 
428 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
void insertRow(const std::vector< std::string, OTHER_ALLOC< std::string > > &new_row)
insert a new row at the end of the database
ALLOC< DBCell > getAllocator() const
returns the allocator of the database
virtual void ignoreColumn(const std::size_t k, const bool from_external_object=true) final
makes the database table ignore from now on the kth column
std::size_t size() const noexcept
returns the number of records (rows) in the database
STL namespace.
IDatabaseTable< T_DATA, ALLOC > & operator=(const IDatabaseTable< T_DATA, ALLOC > &from)
copy operator
virtual const DBVector< std::size_t > ignoredColumns() const final
returns the set of columns of the original dataset that are ignored
void eraseAllRows()
erase all the rows
virtual const DBVector< std::size_t > inputColumns() const final
returns the set of columns of the original dataset that are present in the RawDatabaseTable ...
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
static DBCell bestDBCell(const std::string &str, const std::vector< std::string, ALLOC< std::string > > &missingVals)
returns the DBCell with the best type for an element encoded as a string
bool _isRowSizeOK(const std::size_t size) const
checks whether a size corresponds to the number of columns of the database
const Matrix< T_DATA > & content() const noexcept
returns the content (the records) of the database
IDatabaseTable(const MissingValType< MISSALLOC > &missing_symbols, const std::vector< std::string, VARALLOC< std::string > > &var_names, const ALLOC< DBCell > &alloc)
default constructor
ALLOC< DBCell > allocator_type
Types for STL compliance.
DBVector< std::string > _variable_names
the names of the variables for each column
std::vector< TX_DATA, ALLOC< TX_DATA > > DBVector
the type for the vectors used in the RawDatabaseTable
virtual void insertRow(const std::vector< std::string, ALLOC< std::string > > &new_row) final
insert a new row at the end of the database
RawDatabaseTable(const MissingValType< MISSALLOC > &missing_symbols, const std::vector< std::string, VARALLOC< std::string > > &var_names, const allocator_type &alloc=allocator_type())
default constructor
virtual ~RawDatabaseTable()
destructor
std::vector< std::string, XALLOC< std::string > > MissingValType
virtual RawDatabaseTable< ALLOC > * clone() const final
virtual copy constructor
virtual void clear() final
erase the content of the database, including the names of the variables
RawDatabaseTable< ALLOC > & operator=(const RawDatabaseTable< ALLOC > &from)
copy operator
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52
virtual void clear()
erase the content of the database, including the names of the variables
virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true) final
sets the names of the variables
The table containing the raw/original data of a database.