aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
rawDatabaseTable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The implementation of raw tabular databases stored in memory (RAM)
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #include <agrum/tools/database/rawDatabaseTable.h>
28 
29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 
31 namespace gum {
32 
33  namespace learning {
34 
35 
36  // default constructor
37  template < template < typename > class ALLOC >
38  template < template < typename > class VARALLOC,
39  template < typename >
40  class MISSALLOC >
41  INLINE RawDatabaseTable< ALLOC >::RawDatabaseTable(
42  const typename RawDatabaseTable< ALLOC >::template MissingValType<
44  const std::vector< std::string, VARALLOC< std::string > >& var_names,
45  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
49  }
50 
51 
52  // default constructor
53  template < template < typename > class ALLOC >
54  template < template < typename > class MISSALLOC >
56  const typename RawDatabaseTable< ALLOC >::template MissingValType<
58  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
61  std::vector< std::string, ALLOC< std::string > >(),
62  alloc),
65  }
66 
67 
68  // default constructor
69  template < template < typename > class ALLOC >
71  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
73  std::vector< std::string, ALLOC< std::string > >(),
74  std::vector< std::string, ALLOC< std::string > >(),
75  alloc),
78  }
79 
80 
81  // copy constructor with a given allocator
82  template < template < typename > class ALLOC >
84  const RawDatabaseTable< ALLOC >& from,
85  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
89  }
90 
91  // copy constructor
92  template < template < typename > class ALLOC >
94  const RawDatabaseTable< ALLOC >& from) :
96 
97 
98  // move constructor with a given allocator
99  template < template < typename > class ALLOC >
102  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
106  }
107 
108 
109  // move constructor
110  template < template < typename > class ALLOC >
112  RawDatabaseTable< ALLOC >&& from) :
114 
115 
116  // virtual copy constructor
117  template < template < typename > class ALLOC >
119  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) const {
122  try {
123  allocator.construct(new_db, *this, alloc);
124  } catch (...) {
126  throw;
127  }
128 
129  return new_db;
130  }
131 
132 
133  // virtual copy constructor
134  template < template < typename > class ALLOC >
136  return clone(this->getAllocator());
137  }
138 
139 
140  // destructor
141  template < template < typename > class ALLOC >
144  }
145 
146 
147  // copy operator
148  template < template < typename > class ALLOC >
150  const RawDatabaseTable< ALLOC >& from) {
151  if (this != &from) {
154  }
155  return *this;
156  }
157 
158 
159  // move constructor
160  template < template < typename > class ALLOC >
163  if (this != &from) {
166  }
167  return *this;
168  }
169 
170 
171  // sets the names of the variables
172  template < template < typename > class ALLOC >
174  const std::vector< std::string, ALLOC< std::string > >& names,
175  const bool from_external_object) {
176  const std::size_t size = names.size();
178 
180  if (this->rows_.empty() || (size == this->rows_[0].size())) {
181  this->variable_names_ = names;
182  } else {
183  GUM_ERROR(
184  SizeError,
185  "the number of variable's names (i.e., "
186  << size << ") does not correspond to the number of columns of the "
187  << "raw database table (i.e.," << this->rows_[0].size() << ")");
188  }
189  } else {
190  // check that the size of the names vector (after removing the ignored
191  // columns) is the same as the rest of the database
193 
194  // find the number of ignored cols
195  for (auto iter = ignored_cols__.rbegin(), rend = ignored_cols__.rend();
196  iter != rend;
197  ++iter, ++ignored_size) {
198  if (*iter < size) { break; }
199  }
201 
202  if (this->rows_.empty()
203  || (size == this->rows_[0].size() + ignored_size)) {
205  for (std::size_t i = std::size_t(0), j = std::size_t(0); i < size; ++i) {
206  if (i != ignored_cols__[j]) {
208  } else {
209  if (++j == ignored_cols_size) {
210  for (++i; i < size; ++i) {
212  }
213  }
214  }
215  }
216  this->variable_names_ = std::move(new_names);
217  return;
218  } else {
220  "the number of variable's names excluding the ignored "
221  << "columns (i.e., " << (size - ignored_size)
222  << ") does not correspond to the number of columns of the "
223  << "raw database table (i.e.," << this->rows_[0].size()
224  << ")");
225  }
226  }
227  }
228 
229 
230  /// makes the database table ignore from now on the kth column
231  template < template < typename > class ALLOC >
232  void RawDatabaseTable< ALLOC >::ignoreColumn(const std::size_t k,
233  const bool from_external_object) {
234  // first, compute the value that k would have in an external database
235  // and compute where the new value should be inserted
236  std::size_t i; // where to insert the new k into the ignored colums
237  std::size_t kk = k; // kk = k value for an external database
238  const std::size_t size = ignored_cols__.size();
239 
240  if (from_external_object) {
241  for (i = std::size_t(0); i < size; ++i) {
242  if (k <= ignored_cols__[i]) {
243  if (k == ignored_cols__[i]) return;
244  break;
245  }
246  }
247  } else {
248  for (i = std::size_t(0); i < size; ++i, ++kk) {
249  if (kk <= ignored_cols__[i]) {
250  if (kk == ignored_cols__[i]) return;
251  break;
252  }
253  }
254  }
255 
256  // the column of rows__ and variable_names_ impacted by the ignoreColumn
257  // operation is therefore equal to kk-i. So, we should check that such
258  // a column exists and, if so, we should remove the column from rows__
259  // and from variable_names_. Note that if there is no more variable,
260  // rows__ should become empty
261  const std::size_t col = kk - i;
262  if (col < this->variable_names_.size()) {
263  this->variable_names_.erase(this->variable_names_.begin() + col);
264  if (this->variable_names_.empty()) {
266  } else {
267  const std::size_t nb_rows = this->rows_.size();
268  if (nb_rows != std::size_t(0)) {
269  const std::size_t nb_cols = this->rows_[0].size();
270  for (std::size_t i = std::size_t(0); i < nb_rows; ++i) {
271  auto& row = this->rows_[i].row();
272  if (this->has_row_missing_val_[i] == IsMissing::True) {
273  bool has_missing_val = false;
274  for (std::size_t j = std::size_t(0); j < nb_cols; ++j) {
275  if ((j != col) && row[j].isMissing()) {
276  has_missing_val = true;
277  break;
278  }
279  }
280  if (!has_missing_val)
282  }
283  row.erase(row.begin() + col);
284  }
285  }
286  }
287  }
288 
289  // here, we know that we should insert kk at the ith index of ignored_cols__
291  for (std::size_t j = size; j > i; --j)
293  ignored_cols__[i] = kk;
294  }
295 
296 
297  /// returns the set of ignored columns
298  template < template < typename > class ALLOC >
299  INLINE const typename RawDatabaseTable< ALLOC >::template DBVector<
300  std::size_t >
302  return ignored_cols__;
303  }
304 
305 
306  /// returns the set of columns parsed
307  template < template < typename > class ALLOC >
308  const typename RawDatabaseTable< ALLOC >::template DBVector< std::size_t >
309  RawDatabaseTable< ALLOC >::inputColumns() const {
310  const auto& data = IDatabaseTable< DBCell, ALLOC >::content();
311  if (data.empty()) { return DBVector< std::size_t >(); }
312 
313  const std::size_t size = data[0].size();
315  DBVector< std::size_t > cols(size);
316 
317  if (!ignored_cols_size) {
318  for (std::size_t i = std::size_t(0); i < size; ++i) {
319  cols[i] = i;
320  }
321  } else {
322  // fill the cols vector with consecutive values, excluding the
323  // ignored columns
324  std::size_t i = std::size_t(0); // the consecutive values
325  std::size_t k = std::size_t(0); // the index in col where we save values
326  std::size_t j = std::size_t(0); // the index to parse the ignored columns
327  while (true) {
328  if (i != ignored_cols__[j]) {
329  cols[k] = i;
330  if (++k == size) break;
331  } else {
332  if (++j == ignored_cols_size) {
333  for (++i; k < size; ++i, ++k) {
334  cols[k] = i;
335  }
336  break;
337  }
338  }
339  ++i;
340  }
341  }
342 
343  return cols;
344  }
345 
346 
347  // translates a string into a DBCell and returns it
348  template < template < typename > class ALLOC >
349  INLINE DBCell
350  RawDatabaseTable< ALLOC >::convert__(const std::string& elt) const {
351  return DBCell::bestDBCell(elt, this->missing_symbols_);
352  }
353 
354 
355  // insert a new row at the end of the database
356  template < template < typename > class ALLOC >
358  const std::vector< std::string, ALLOC< std::string > >& new_row) {
359  // check that the size of the row (after removing the ignored columns) is
360  // the same as the rest of the database
361  const std::size_t row_size = new_row.size();
364  if (ignored_cols_size) {
365  // find the number of ignored cols
366  for (auto iter = ignored_cols__.rbegin(), rend = ignored_cols__.rend();
367  iter != rend;
368  ++iter, ++ignored_size) {
369  if (*iter < row_size) { break; }
370  }
372  }
373 
374  if (!this->isRowSizeOK_(row_size - ignored_size)) {
376  "the new row has "
377  << (row_size - ignored_size)
378  << " elements whereas the raw database table has "
379  << this->variable_names_.size() << " columns");
380  }
381 
382  // create the dbrow that will contain the new data
383  Row< DBCell > dbrow;
385  bool has_missing_val = false;
386 
387  // translate the row into T_data and put them into the newly created dbrow
388  if (ignored_size == 0) {
389  for (const auto& elt: new_row) {
390  const DBCell new_cell(this->convert__(elt));
391  if (new_cell.isMissing()) has_missing_val = true;
393  }
394  } else {
395  for (std::size_t i = std::size_t(0), j = std::size_t(0); i < row_size;
396  ++i) {
397  if (i != ignored_cols__[j]) {
398  const DBCell new_cell(this->convert__(new_row[i]));
399  if (new_cell.isMissing()) has_missing_val = true;
401  } else {
402  if (++j == ignored_size) {
403  for (++i; i < row_size; ++i) {
404  const DBCell new_cell(this->convert__(new_row[i]));
405  if (new_cell.isMissing()) has_missing_val = true;
407  }
408  }
409  }
410  }
411  }
412 
414  std::move(dbrow),
416  }
417 
418 
419  // erase the content of the database, including the names of the variables
420  template < template < typename > class ALLOC >
421  void RawDatabaseTable< ALLOC >::clear() {
424  }
425 
426 
427  } /* namespace learning */
428 
429 } /* namespace gum */
430 
431 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)