aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
rawDatabaseTable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The implementation of raw tabular databases stored in memory (RAM)
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #include <agrum/tools/database/rawDatabaseTable.h>
28 
29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 
31 namespace gum {
32 
33  namespace learning {
34 
35 
36  // default constructor
37  template < template < typename > class ALLOC >
38  template < template < typename > class VARALLOC, template < typename > class MISSALLOC >
39  INLINE RawDatabaseTable< ALLOC >::RawDatabaseTable(
40  const typename RawDatabaseTable< ALLOC >::template MissingValType< MISSALLOC >&
42  const std::vector< std::string, VARALLOC< std::string > >& var_names,
43  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
47  }
48 
49 
50  // default constructor
51  template < template < typename > class ALLOC >
52  template < template < typename > class MISSALLOC >
54  const typename RawDatabaseTable< ALLOC >::template MissingValType< MISSALLOC >&
56  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
58  std::vector< std::string, ALLOC< std::string > >(),
59  alloc),
62  }
63 
64 
65  // default constructor
66  template < template < typename > class ALLOC >
68  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
70  std::vector< std::string, ALLOC< std::string > >(),
71  alloc),
74  }
75 
76 
77  // copy constructor with a given allocator
78  template < template < typename > class ALLOC >
80  const RawDatabaseTable< ALLOC >& from,
81  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
85  }
86 
87  // copy constructor
88  template < template < typename > class ALLOC >
91 
92 
93  // move constructor with a given allocator
94  template < template < typename > class ALLOC >
97  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) :
101  }
102 
103 
104  // move constructor
105  template < template < typename > class ALLOC >
108 
109 
110  // virtual copy constructor
111  template < template < typename > class ALLOC >
113  const typename RawDatabaseTable< ALLOC >::allocator_type& alloc) const {
116  try {
117  allocator.construct(new_db, *this, alloc);
118  } catch (...) {
120  throw;
121  }
122 
123  return new_db;
124  }
125 
126 
127  // virtual copy constructor
128  template < template < typename > class ALLOC >
130  return clone(this->getAllocator());
131  }
132 
133 
134  // destructor
135  template < template < typename > class ALLOC >
138  }
139 
140 
141  // copy operator
142  template < template < typename > class ALLOC >
145  if (this != &from) {
148  }
149  return *this;
150  }
151 
152 
153  // move constructor
154  template < template < typename > class ALLOC >
157  if (this != &from) {
160  }
161  return *this;
162  }
163 
164 
165  // sets the names of the variables
166  template < template < typename > class ALLOC >
168  const std::vector< std::string, ALLOC< std::string > >& names,
169  const bool from_external_object) {
170  const std::size_t size = names.size();
172 
174  if (this->rows_.empty() || (size == this->rows_[0].size())) {
175  this->variable_names_ = names;
176  } else {
178  "the number of variable's names (i.e., "
179  << size << ") does not correspond to the number of columns of the "
180  << "raw database table (i.e.," << this->rows_[0].size() << ")");
181  }
182  } else {
183  // check that the size of the names vector (after removing the ignored
184  // columns) is the same as the rest of the database
186 
187  // find the number of ignored cols
188  for (auto iter = _ignored_cols_.rbegin(), rend = _ignored_cols_.rend(); iter != rend;
189  ++iter, ++ignored_size) {
190  if (*iter < size) { break; }
191  }
193 
194  if (this->rows_.empty() || (size == this->rows_[0].size() + ignored_size)) {
196  for (std::size_t i = std::size_t(0), j = std::size_t(0); i < size; ++i) {
197  if (i != _ignored_cols_[j]) {
199  } else {
200  if (++j == ignored_cols_size) {
201  for (++i; i < size; ++i) {
203  }
204  }
205  }
206  }
207  this->variable_names_ = std::move(new_names);
208  return;
209  } else {
211  "the number of variable's names excluding the ignored "
212  << "columns (i.e., " << (size - ignored_size)
213  << ") does not correspond to the number of columns of the "
214  << "raw database table (i.e.," << this->rows_[0].size() << ")");
215  }
216  }
217  }
218 
219 
220  /// makes the database table ignore from now on the kth column
221  template < template < typename > class ALLOC >
222  void RawDatabaseTable< ALLOC >::ignoreColumn(const std::size_t k,
223  const bool from_external_object) {
224  // first, compute the value that k would have in an external database
225  // and compute where the new value should be inserted
226  std::size_t i; // where to insert the new k into the ignored colums
227  std::size_t kk = k; // kk = k value for an external database
228  const std::size_t size = _ignored_cols_.size();
229 
230  if (from_external_object) {
231  for (i = std::size_t(0); i < size; ++i) {
232  if (k <= _ignored_cols_[i]) {
233  if (k == _ignored_cols_[i]) return;
234  break;
235  }
236  }
237  } else {
238  for (i = std::size_t(0); i < size; ++i, ++kk) {
239  if (kk <= _ignored_cols_[i]) {
240  if (kk == _ignored_cols_[i]) return;
241  break;
242  }
243  }
244  }
245 
246  // the column of _rows_ and variable_names_ impacted by the ignoreColumn
247  // operation is therefore equal to kk-i. So, we should check that such
248  // a column exists and, if so, we should remove the column from _rows_
249  // and from variable_names_. Note that if there is no more variable,
250  // _rows_ should become empty
251  const std::size_t col = kk - i;
252  if (col < this->variable_names_.size()) {
253  this->variable_names_.erase(this->variable_names_.begin() + col);
254  if (this->variable_names_.empty()) {
256  } else {
257  const std::size_t nb_rows = this->rows_.size();
258  if (nb_rows != std::size_t(0)) {
259  const std::size_t nb_cols = this->rows_[0].size();
260  for (std::size_t i = std::size_t(0); i < nb_rows; ++i) {
261  auto& row = this->rows_[i].row();
262  if (this->has_row_missing_val_[i] == IsMissing::True) {
263  bool has_missing_val = false;
264  for (std::size_t j = std::size_t(0); j < nb_cols; ++j) {
265  if ((j != col) && row[j].isMissing()) {
266  has_missing_val = true;
267  break;
268  }
269  }
271  }
272  row.erase(row.begin() + col);
273  }
274  }
275  }
276  }
277 
278  // here, we know that we should insert kk at the ith index of _ignored_cols_
280  for (std::size_t j = size; j > i; --j)
282  _ignored_cols_[i] = kk;
283  }
284 
285 
286  /// returns the set of ignored columns
287  template < template < typename > class ALLOC >
288  INLINE const typename RawDatabaseTable< ALLOC >::template DBVector< std::size_t >
290  return _ignored_cols_;
291  }
292 
293 
294  /// returns the set of columns parsed
295  template < template < typename > class ALLOC >
296  const typename RawDatabaseTable< ALLOC >::template DBVector< std::size_t >
297  RawDatabaseTable< ALLOC >::inputColumns() const {
298  const auto& data = IDatabaseTable< DBCell, ALLOC >::content();
299  if (data.empty()) { return DBVector< std::size_t >(); }
300 
301  const std::size_t size = data[0].size();
303  DBVector< std::size_t > cols(size);
304 
305  if (!ignored_cols_size) {
306  for (std::size_t i = std::size_t(0); i < size; ++i) {
307  cols[i] = i;
308  }
309  } else {
310  // fill the cols vector with consecutive values, excluding the
311  // ignored columns
312  std::size_t i = std::size_t(0); // the consecutive values
313  std::size_t k = std::size_t(0); // the index in col where we save values
314  std::size_t j = std::size_t(0); // the index to parse the ignored columns
315  while (true) {
316  if (i != _ignored_cols_[j]) {
317  cols[k] = i;
318  if (++k == size) break;
319  } else {
320  if (++j == ignored_cols_size) {
321  for (++i; k < size; ++i, ++k) {
322  cols[k] = i;
323  }
324  break;
325  }
326  }
327  ++i;
328  }
329  }
330 
331  return cols;
332  }
333 
334 
335  // translates a string into a DBCell and returns it
336  template < template < typename > class ALLOC >
338  return DBCell::bestDBCell(elt, this->missing_symbols_);
339  }
340 
341 
342  // insert a new row at the end of the database
343  template < template < typename > class ALLOC >
345  const std::vector< std::string, ALLOC< std::string > >& new_row) {
346  // check that the size of the row (after removing the ignored columns) is
347  // the same as the rest of the database
348  const std::size_t row_size = new_row.size();
351  if (ignored_cols_size) {
352  // find the number of ignored cols
353  for (auto iter = _ignored_cols_.rbegin(), rend = _ignored_cols_.rend(); iter != rend;
354  ++iter, ++ignored_size) {
355  if (*iter < row_size) { break; }
356  }
358  }
359 
360  if (!this->isRowSizeOK_(row_size - ignored_size)) {
362  "the new row has " << (row_size - ignored_size)
363  << " elements whereas the raw database table has "
364  << this->variable_names_.size() << " columns");
365  }
366 
367  // create the dbrow that will contain the new data
368  Row< DBCell > dbrow;
370  bool has_missing_val = false;
371 
372  // translate the row into T_data and put them into the newly created dbrow
373  if (ignored_size == 0) {
374  for (const auto& elt: new_row) {
375  const DBCell new_cell(this->_convert_(elt));
376  if (new_cell.isMissing()) has_missing_val = true;
378  }
379  } else {
380  for (std::size_t i = std::size_t(0), j = std::size_t(0); i < row_size; ++i) {
381  if (i != _ignored_cols_[j]) {
382  const DBCell new_cell(this->_convert_(new_row[i]));
383  if (new_cell.isMissing()) has_missing_val = true;
385  } else {
386  if (++j == ignored_size) {
387  for (++i; i < row_size; ++i) {
388  const DBCell new_cell(this->_convert_(new_row[i]));
389  if (new_cell.isMissing()) has_missing_val = true;
391  }
392  }
393  }
394  }
395  }
396 
399  : IsMissing::False);
400  }
401 
402 
403  // erase the content of the database, including the names of the variables
404  template < template < typename > class ALLOC >
405  void RawDatabaseTable< ALLOC >::clear() {
408  }
409 
410 
411  } /* namespace learning */
412 
413 } /* namespace gum */
414 
415 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)