aGrUM  0.14.2
databaseTable_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
26 
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
29 namespace gum {
30 
31  namespace learning {
32 
33 
34  // default constructor
35  template < template < typename > class ALLOC >
36  template < template < typename > class XALLOC >
38  const typename DatabaseTable< ALLOC >::template MissingValType< XALLOC >&
39  missing_symbols,
40  const DBTranslatorSet< ALLOC >& translators,
41  const typename DatabaseTable< ALLOC >::allocator_type& alloc) :
42  IDatabaseTable< DBTranslatedValue, ALLOC >(
43  missing_symbols,
44  std::vector< std::string, ALLOC< std::string > >(),
45  alloc),
46  __translators(translators, alloc) {
47  if (translators.size()) {
48  // set the variables names according to those of the translators
49  std::vector< std::string, ALLOC< std::string > > var_names(
50  translators.size());
51  for (std::size_t i = std::size_t(0), size = translators.size(); i < size;
52  ++i) {
53  var_names[i] = __translators.translator(i).variable()->name();
54  }
55  setVariableNames(var_names, false);
56  }
57 
58  GUM_CONSTRUCTOR(DatabaseTable);
59  }
60 
61 
62  // default constructor
63  template < template < typename > class ALLOC >
65  const DBTranslatorSet< ALLOC >& translators,
66  const typename DatabaseTable< ALLOC >::allocator_type& alloc) :
67  IDatabaseTable< DBTranslatedValue, ALLOC >(
68  std::vector< std::string, ALLOC< std::string > >(),
69  std::vector< std::string, ALLOC< std::string > >(),
70  alloc),
71  __translators(translators, alloc) {
72  if (translators.size()) {
73  // set the variables names according to those of the translators
74  std::vector< std::string, ALLOC< std::string > > var_names(
75  translators.size());
76  for (std::size_t i = std::size_t(0), size = translators.size(); i < size;
77  ++i) {
78  var_names[i] = __translators.translator(i).variable()->name();
79  }
80  setVariableNames(var_names, false);
81  }
82 
83  GUM_CONSTRUCTOR(DatabaseTable);
84  }
85 
86 
87  // copy constructor with a given allocator
88  template < template < typename > class ALLOC >
90  const DatabaseTable< ALLOC >& from,
91  const typename DatabaseTable< ALLOC >::allocator_type& alloc) :
92  IDatabaseTable< DBTranslatedValue, ALLOC >(from, alloc),
93  __translators(from.__translators, alloc),
94  __ignored_cols(from.__ignored_cols) {
95  GUM_CONS_CPY(DatabaseTable);
96  }
97 
98 
99  // copy constructor
100  template < template < typename > class ALLOC >
101  INLINE
102  DatabaseTable< ALLOC >::DatabaseTable(const DatabaseTable< ALLOC >& from) :
103  DatabaseTable< ALLOC >(from, from.getAllocator()) {}
104 
105 
106  // move constructor with a given allocator
107  template < template < typename > class ALLOC >
109  DatabaseTable< ALLOC >&& from,
110  const typename DatabaseTable< ALLOC >::allocator_type& alloc) :
111  IDatabaseTable< DBTranslatedValue, ALLOC >(std::move(from), alloc),
112  __translators(std::move(from.__translators), alloc),
113  __ignored_cols(std::move(from.__ignored_cols)) {
114  GUM_CONS_MOV(DatabaseTable);
115  }
116 
117 
118  // move constructor
119  template < template < typename > class ALLOC >
120  INLINE DatabaseTable< ALLOC >::DatabaseTable(DatabaseTable< ALLOC >&& from) :
121  DatabaseTable< ALLOC >(std::move(from), from.getAllocator()) {}
122 
123 
124  // virtual copy constructor with a given allocator
125  template < template < typename > class ALLOC >
126  DatabaseTable< ALLOC >* DatabaseTable< ALLOC >::clone(
127  const typename DatabaseTable< ALLOC >::allocator_type& alloc) const {
128  ALLOC< DatabaseTable< ALLOC > > allocator(alloc);
129  DatabaseTable< ALLOC >* new_db = allocator.allocate(1);
130  try {
131  allocator.construct(new_db, *this, alloc);
132  } catch (...) {
133  allocator.deallocate(new_db, 1);
134  throw;
135  }
136 
137  return new_db;
138  }
139 
140 
141  // virtual copy constructor
142  template < template < typename > class ALLOC >
143  DatabaseTable< ALLOC >* DatabaseTable< ALLOC >::clone() const {
144  return clone(this->getAllocator());
145  }
146 
147 
148  // destructor
149  template < template < typename > class ALLOC >
151  GUM_DESTRUCTOR(DatabaseTable);
152  }
153 
154 
155  // copy operator
156  template < template < typename > class ALLOC >
157  DatabaseTable< ALLOC >& DatabaseTable< ALLOC >::
158  operator=(const DatabaseTable< ALLOC >& from) {
159  if (this != &from) {
161  __translators = from.__translators;
162  __ignored_cols = from.__ignored_cols;
163  }
164 
165  return *this;
166  }
167 
168 
169  // move constructor
170  template < template < typename > class ALLOC >
171  DatabaseTable< ALLOC >& DatabaseTable< ALLOC >::
172  operator=(DatabaseTable< ALLOC >&& from) {
173  if (this != &from) {
175  __translators = std::move(from.__translators);
176  __ignored_cols = std::move(from.__ignored_cols);
177  }
178 
179  return *this;
180  }
181 
182 
183  // a method to process the rows of the database in multithreading
184  template < template < typename > class ALLOC >
185  template < typename Functor1, typename Functor2 >
186  void DatabaseTable< ALLOC >::__threadProcessDatabase(Functor1& exec_func,
187  Functor2& undo_func) {
188  // compute the number of threads to execute the code, the number N of
189  // rows that each thread should process and the number of rows that
190  // would remain after each thread has processed its N rows. For instance,
191  // if the database has 105 rows and there are 10 threads, each thread
192  // should process 10 rows and there would remain 5 rows
193  const std::size_t db_size = this->_rows.size();
194  std::size_t nb_threads = db_size / this->_min_nb_rows_per_thread;
195  if (nb_threads < 1)
196  nb_threads = 1;
197  else if (nb_threads > this->_max_nb_threads)
198  nb_threads = this->_max_nb_threads;
199  std::size_t nb_rows_par_thread = db_size / nb_threads;
200  std::size_t rest_rows = db_size - nb_rows_par_thread * nb_threads;
201 
202  // if there is just one thread, let it process all the rows
203  if (nb_threads == 1) {
204  exec_func(std::size_t(0), db_size);
205  return;
206  }
207 
208  // here, we shall create the threads, but also one std::exception_ptr
209  // for each thread. This will allow us to catch the exception raised
210  // by the threads
211  std::vector< std::thread > threads;
212  threads.reserve(nb_threads);
213  std::vector< std::exception_ptr > func_exceptions(nb_threads, nullptr);
214 
215  // create a lambda that will execute exec_func while catching its exceptions
216  auto real_exec_func = [&exec_func](std::size_t begin,
217  std::size_t end,
218  std::exception_ptr& exc) -> void {
219  try {
220  exec_func(begin, end);
221  } catch (...) { exc = std::current_exception(); }
222  };
223 
224  // launch the threads
225  std::size_t begin_index = std::size_t(0);
226  for (std::size_t i = std::size_t(0); i < nb_threads; ++i) {
227  std::size_t end_index = begin_index + nb_rows_par_thread;
228  if (rest_rows != std::size_t(0)) {
229  ++end_index;
230  --rest_rows;
231  }
232  threads.push_back(std::thread(std::ref(real_exec_func),
233  begin_index,
234  end_index,
235  std::ref(func_exceptions[i])));
236  begin_index = end_index;
237  }
238 
239  // wait for the threads to complete their executions
240  std::for_each(
241  threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
242 
243  // now, check if one exception has been raised
244  bool exception_raised = false;
245  for (const auto& exc : func_exceptions) {
246  if (exc != nullptr) {
247  exception_raised = true;
248  break;
249  }
250  }
251 
252  if (exception_raised) {
253  // create a lambda that will execute undo_func while catching
254  // its exceptions
255  auto real_undo_func = [&undo_func](std::size_t begin,
256  std::size_t end,
257  std::exception_ptr& exc) -> void {
258  try {
259  undo_func(begin, end);
260  } catch (...) { exc = std::current_exception(); }
261  };
262 
263  // launch the repair threads
264  threads.clear();
265  begin_index = std::size_t(0);
266  std::vector< std::exception_ptr > undo_func_exceptions(nb_threads,
267  nullptr);
268  for (std::size_t i = std::size_t(0); i < nb_threads; ++i) {
269  std::size_t end_index = begin_index + nb_rows_par_thread;
270  if (rest_rows != std::size_t(0)) {
271  ++end_index;
272  --rest_rows;
273  }
274  // we just need to repair the threads that did not raise exceptions
275  if (func_exceptions[i] == nullptr)
276  threads.push_back(std::thread(std::ref(real_undo_func),
277  begin_index,
278  end_index,
279  std::ref(undo_func_exceptions[i])));
280  begin_index = end_index;
281  }
282 
283  // wait for the threads to complete their executions
284  std::for_each(
285  threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
286 
287  // rethrow the exception
288  for (const auto& exc : func_exceptions) {
289  if (exc != nullptr) { std::rethrow_exception(exc); }
290  }
291  }
292  }
293 
294 
296  template < template < typename > class ALLOC >
298  const DBTranslator< ALLOC >& translator,
299  const std::size_t input_column,
300  const bool unique_column) {
301  // check that there is no ignored_column corresponding to column
302  if (__ignored_cols.exists(input_column))
303  GUM_ERROR(
304  OperationNotAllowed,
305  "Column "
306  << input_column << " is marked as being ignored. "
307  << "So it is forbidden to create a translator for that column.");
308 
309  // reserve some place for the new column in the records of the database
310  const std::size_t new_size = this->nbVariables() + 1;
311 
312  // create the lambda for reserving some memory for the new column
313  // and the one that undoes what it performed if some thread executing
314  // it raised an exception
315  auto reserve_lambda = [this, new_size](std::size_t begin,
316  std::size_t end) -> void {
317  for (std::size_t i = begin; i < end; ++i)
318  this->_rows[i].row().reserve(new_size);
319  };
320 
321  auto undo_reserve_lambda = [](std::size_t begin, std::size_t end) -> void {};
322 
323  // launch the threads executing the lambdas
324  this->__threadProcessDatabase(reserve_lambda, undo_reserve_lambda);
325 
326  // insert the translator into the translator set
327  const std::size_t pos =
328  __translators.insertTranslator(translator, input_column, unique_column);
329 
330  // insert the name of the translator's variable to the set of variable names
331  try {
332  this->_variable_names.push_back(translator.variable()->name());
333  } catch (...) {
334  __translators.eraseTranslator(pos);
335  throw;
336  }
337 
338  // if the databaseTable is not empty, fill the column of the database
339  // corresponding to the translator with missing values
341  const DBTranslatedValue missing = __translators[pos].missingValue();
342 
343  // create the lambda for adding a new column filled wih a missing value
344  auto fill_lambda = [this, missing](std::size_t begin,
345  std::size_t end) -> void {
346  std::size_t i = begin;
347  try {
348  for (; i < end; ++i) {
349  this->_rows[i].row().push_back(missing);
350  this->_has_row_missing_val[i] = IsMissing::True;
351  }
352  } catch (...) {
353  for (std::size_t j = begin; j < i; ++j)
354  this->_rows[i].row().pop_back();
355  throw;
356  }
357  };
358 
359  auto undo_fill_lambda = [this](std::size_t begin,
360  std::size_t end) -> void {
361  for (std::size_t i = begin; i < end; ++i)
362  this->_rows[i].row().pop_back();
363  };
364 
365  // launch the threads executing the lambdas
366  this->__threadProcessDatabase(fill_lambda, undo_fill_lambda);
367  }
368 
369  return pos;
370  }
371 
372 
374  template < template < typename > class ALLOC >
375  std::size_t
376  DatabaseTable< ALLOC >::insertTranslator(const Variable& var,
377  const std::size_t input_column,
378  const bool unique_column) {
379  // check that there is no ignored_column corresponding to column
380  if (__ignored_cols.exists(input_column))
381  GUM_ERROR(
382  OperationNotAllowed,
383  "Column "
384  << input_column << " is marked as being ignored. "
385  << "So it is forbidden to create a translator for that column.");
386 
387  // if the databaseTable is not empty, we should fill the column of the
388  // database corresponding to the new translator with missing values. But, the
389  // current method assumes that the list of missing values is empty. Hence, it
390  // should raise an exception
392  GUM_ERROR(
393  MissingValueInDatabase,
394  "inserting a new translator into a database creates a new column "
395  << "with missing values. However, you did not define any symbol for "
396  << "such values.");
397  }
398 
399  // reserve some place for the new column in the records of the database
400  const std::size_t new_size = this->nbVariables() + 1;
401 
402  // create the lambda for reserving some memory for the new column
403  // and the one that undoes what it performed if some thread executing
404  // it raised an exception
405  auto reserve_lambda = [this, new_size](std::size_t begin,
406  std::size_t end) -> void {
407  for (std::size_t i = begin; i < end; ++i)
408  this->_rows[i].row().reserve(new_size);
409  };
410 
411  auto undo_reserve_lambda = [](std::size_t begin, std::size_t end) -> void {};
412 
413  // launch the threads executing the lambdas
414  this->__threadProcessDatabase(reserve_lambda, undo_reserve_lambda);
415 
416  // insert the translator into the translator set
417  const std::size_t pos =
418  __translators.insertTranslator(var, input_column, unique_column);
419 
420  // insert the name of the translator's variable to the set of variable names
421  try {
422  this->_variable_names.push_back(var.name());
423  } catch (...) {
424  __translators.eraseTranslator(pos);
425  throw;
426  }
427 
428  return pos;
429  }
430 
431 
433  template < template < typename > class ALLOC >
434  template < template < typename > class XALLOC >
436  const Variable& var,
437  const std::size_t input_column,
438  std::vector< std::string, XALLOC< std::string > > missing_symbols,
439  const bool unique_column) {
440  // check that there is no ignored_column corresponding to column
441  if (__ignored_cols.exists(input_column))
442  GUM_ERROR(
443  OperationNotAllowed,
444  "Column "
445  << input_column << " is marked as being ignored. "
446  << "So it is forbidden to create a translator for that column.");
447 
448  // reserve some place for the new column in the records of the database
449  const std::size_t new_size = this->nbVariables() + 1;
450 
451  // create the lambda for reserving some memory for the new column
452  // and the one that undoes what it performed if some thread executing
453  // it raised an exception
454  auto reserve_lambda = [this, new_size](std::size_t begin,
455  std::size_t end) -> void {
456  for (std::size_t i = begin; i < end; ++i)
457  this->_rows[i].row().reserve(new_size);
458  };
459 
460  auto undo_reserve_lambda = [](std::size_t begin, std::size_t end) -> void {};
461 
462  // launch the threads executing the lambdas
463  this->__threadProcessDatabase(reserve_lambda, undo_reserve_lambda);
464 
465  // insert the translator into the translator set
466  const std::size_t pos = __translators.insertTranslator(
467  var, input_column, missing_symbols, unique_column);
468 
469  // insert the name of the translator's variable to the set of variable names
470  try {
471  this->_variable_names.push_back(var.name());
472  } catch (...) {
473  __translators.eraseTranslator(pos);
474  throw;
475  }
476 
477  // if the databaseTable is not empty, fill the column of the database
478  // corresponding to the translator with missing values
480  const DBTranslatedValue missing = __translators[pos].missingValue();
481 
482  // create the lambda for adding a new column filled wih a missing value
483  auto fill_lambda = [this, missing](std::size_t begin,
484  std::size_t end) -> void {
485  std::size_t i = begin;
486  try {
487  for (; i < end; ++i) {
488  this->_rows[i].row().push_back(missing);
489  this->_has_row_missing_val[i] = IsMissing::True;
490  }
491  } catch (...) {
492  for (std::size_t j = begin; j < i; ++j)
493  this->_rows[i].row().pop_back();
494  throw;
495  }
496  };
497 
498  auto undo_fill_lambda = [this](std::size_t begin,
499  std::size_t end) -> void {
500  for (std::size_t i = begin; i < end; ++i)
501  this->_rows[i].row().pop_back();
502  };
503 
504  // launch the threads executing the lambdas
505  this->__threadProcessDatabase(fill_lambda, undo_fill_lambda);
506  }
507 
508  return pos;
509  }
510 
511 
516  template < template < typename > class ALLOC >
517  INLINE typename DatabaseTable< ALLOC >::template DBVector< std::size_t >
518  DatabaseTable< ALLOC >::__getKthIndices(const std::size_t k,
519  const bool k_is_input_col) const {
520  const std::size_t nb_trans = __translators.size();
521  if (!k_is_input_col) {
522  if (k < nb_trans)
523  return DBVector< std::size_t >{k};
524  else
525  return DBVector< std::size_t >();
526  } else {
527  DBVector< std::size_t > trans;
528  for (std::size_t i = std::size_t(0), kk = nb_trans - 1; i < nb_trans;
529  ++i, --kk) {
530  if (__translators.inputColumn(kk) == k) trans.push_back(kk);
531  }
532  return trans;
533  }
534  }
535 
536 
537  // erases the kth translator or all those parsing the kth column of
538  // the input dataset
539  template < template < typename > class ALLOC >
540  void DatabaseTable< ALLOC >::eraseTranslators(const std::size_t k,
541  const bool k_is_input_col) {
542  for (const auto kk : __getKthIndices(k, k_is_input_col)) {
543  // erase the translator of index kk and the corresponding variable
544  // name. If there remains no more translator in the translator set,
545  // _rows should become empty
546  this->_variable_names.erase(this->_variable_names.begin() + kk);
547  if (this->_variable_names.empty()) {
549  } else {
550  const std::size_t nb_trans = __translators.size();
551 
552  auto erase_lambda = [this, nb_trans, kk](std::size_t begin,
553  std::size_t end) -> void {
554  for (std::size_t i = begin; i < end; ++i) {
555  auto& row = this->_rows[i].row();
556  if (this->__translators.isMissingValue(row[kk], kk)) {
557  bool has_missing_val = false;
558  for (std::size_t j = std::size_t(0); j < nb_trans; ++j) {
559  if ((j != kk) && this->__translators.isMissingValue(row[j], j)) {
560  has_missing_val = true;
561  break;
562  }
563  }
564  if (!has_missing_val)
565  this->_has_row_missing_val[i] = IsMissing::False;
566  }
567  row.erase(row.begin() + kk);
568  }
569  };
570 
571  auto undo_erase_lambda = [](std::size_t begin, std::size_t end) -> void {
572  };
573 
574  // launch the threads executing the lambdas
575  this->__threadProcessDatabase(erase_lambda, undo_erase_lambda);
576  }
577  __translators.eraseTranslator(kk);
578  }
579  }
580 
581 
583  template < template < typename > class ALLOC >
584  INLINE const DBTranslatorSet< ALLOC >&
586  return __translators;
587  }
588 
589 
593  template < template < typename > class ALLOC >
594  INLINE std::size_t
595  DatabaseTable< ALLOC >::__getKthIndex(const std::size_t k,
596  const bool k_is_input_col) const {
597  if (k_is_input_col) {
598  const std::size_t nb_trans = __translators.size();
599  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
600  if (__translators.inputColumn(i) == k) { return i; }
601  }
602  return nb_trans + 1;
603  } else {
604  return k;
605  }
606  }
607 
608 
610  template < template < typename > class ALLOC >
611  const DBTranslator< ALLOC >&
612  DatabaseTable< ALLOC >::translator(const std::size_t k,
613  const bool k_is_input_col) const {
614  // find the position of the translator that we look for. This
615  // is variable kk below
616  const std::size_t nb_trans = __translators.size();
617  const std::size_t kk = __getKthIndex(k, k_is_input_col);
618 
619  // check if the translator exists
620  if (nb_trans <= kk) {
621  if (k_is_input_col) {
622  GUM_ERROR(UndefinedElement,
623  "there is no translator in the database table that "
624  << "parses Column " << k);
625  } else {
626  GUM_ERROR(UndefinedElement,
627  "the database has " << nb_trans
628  << " translators, so Translator #" << k
629  << " does not exist");
630  }
631  }
632 
633  return __translators.translator(kk);
634  }
635 
636 
638  template < template < typename > class ALLOC >
639  const Variable&
640  DatabaseTable< ALLOC >::variable(const std::size_t k,
641  const bool k_is_input_col) const {
642  // find the position of the translator that contains the variable.
643  // This is variable kk below
644  const std::size_t nb_trans = __translators.size();
645  const std::size_t kk = __getKthIndex(k, k_is_input_col);
646 
647  // check if the translator exists
648  if (nb_trans <= kk) {
649  if (k_is_input_col) {
650  GUM_ERROR(UndefinedElement,
651  "there is no variable in the database table that "
652  << "corresponds to Column " << k);
653  } else {
654  GUM_ERROR(UndefinedElement,
655  "the database has " << nb_trans << " variables, so Variable #"
656  << k << " does not exist");
657  }
658  }
659 
660  return __translators.variable(kk);
661  }
662 
663 
665  template < template < typename > class ALLOC >
667  const std::vector< std::string, ALLOC< std::string > >& names,
668  const bool from_external_object) {
669  const std::size_t size = names.size();
670  const std::size_t nb_trans = __translators.size();
671  if (!from_external_object) {
672  if (nb_trans != size) {
673  GUM_ERROR(SizeError,
674  "the number of variable's names (i.e., "
675  << size
676  << ") does not correspond to the number of columns of the "
677  << "database table (i.e.," << nb_trans << ")");
678  }
679 
680  // update the translator names
681  for (std::size_t i = std::size_t(0); i < size; ++i) {
682  __translators.translator(i).setVariableName(names[i]);
683  }
684  } else {
685  if (nb_trans && (__translators.highestInputColumn() >= size)) {
686  GUM_ERROR(SizeError,
687  "the names vector has "
688  << size << " elements whereas it should have at least "
689  << (__translators.highestInputColumn() + 1)
690  << "elements so that each translator is assigned a name");
691  }
692 
693  // update the translator names
694  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
695  __translators.translator(i).setVariableName(
696  names[__translators.inputColumn(i)]);
697  }
698  }
699 
700  // update _variable_names using the newly assigned translators names
701  this->_variable_names.resize(nb_trans);
702  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
703  this->_variable_names[i] = __translators.variable(i).name();
704  }
705  }
706 
707 
710  template < template < typename > class ALLOC >
711  void DatabaseTable< ALLOC >::ignoreColumn(const std::size_t k,
712  const bool k_is_input_col) {
713  // indicate that the column will be forbidden. If the column is already
714  // forbidden, do nothing. But if the column is assigned to a translator
715  // that does not exist, raise an UndefinedElement exception
716  const std::size_t nb_trans = __translators.size();
717  if (k_is_input_col) {
718  if (__ignored_cols.exists(k)) return;
719  __ignored_cols.insert(k);
720  } else {
721  if (k < nb_trans) {
722  __ignored_cols.insert(__translators.inputColumn(k));
723  } else {
724  GUM_ERROR(UndefinedElement,
725  "It is impossible to ignore the column parsed by Translator #"
726  << k << "because there exist only " << nb_trans
727  << " translators");
728  }
729  }
730 
731  // remove all the translators corresponding to k
732  eraseTranslators(k, k_is_input_col);
733  }
734 
735 
737  template < template < typename > class ALLOC >
738  const typename DatabaseTable< ALLOC >::template DBVector< std::size_t >
740  const std::size_t nb_trans = __translators.size();
741 
742  if (nb_trans == std::size_t(0)) {
743  return DBVector< std::size_t >{std::size_t(0)};
744  }
745 
746  // get the columns handled by the translators, sorted by increasing order
747  DBVector< std::size_t > cols(nb_trans);
748  for (std::size_t i = std::size_t(0); i < nb_trans; ++i)
749  cols[i] = __translators.inputColumn(i);
750  std::sort(cols.begin(), cols.end());
751 
752  // create a vector with all the possible input columns
753  const std::size_t highest = __translators.highestInputColumn() + 1;
754  DBVector< std::size_t > ignored_cols(highest);
755  std::iota(ignored_cols.begin(), ignored_cols.end(), 0);
756 
757  // remove from ignored_cols the elements of cols
758  for (std::size_t i = std::size_t(0),
759  ii = highest - 1,
760  k = std::size_t(0),
761  kk = nb_trans - 1;
762  i < highest;
763  ++i, --ii) {
764  if (cols[kk] == ii) {
765  ignored_cols.erase(ignored_cols.begin() + ii);
766  while ((k < nb_trans) && (cols[kk] == ii)) {
767  --kk;
768  ++k;
769  }
770  if (k == nb_trans) break;
771  }
772  }
773 
774  // add the column past the last translator
775  ignored_cols.push_back(highest);
776 
777  return ignored_cols;
778  }
779 
780 
782  template < template < typename > class ALLOC >
783  const typename DatabaseTable< ALLOC >::template DBVector< std::size_t >
785  const std::size_t nb_trans = __translators.size();
786  if (nb_trans == std::size_t(0)) { return DBVector< std::size_t >(); }
787 
788  DBVector< std::size_t > input_cols(nb_trans);
789  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
790  input_cols[i] = __translators.inputColumn(i);
791  }
792  return input_cols;
793  }
794 
795 
797  template < template < typename > class ALLOC >
798  std::size_t
799  DatabaseTable< ALLOC >::domainSize(const std::size_t k,
800  const bool k_is_input_col) const {
801  // find the position kk of the translator that contains the variable
802  const std::size_t nb_trans = __translators.size();
803  const std::size_t kk = __getKthIndex(k, k_is_input_col);
804 
805  // check if the translator exists
806  if (nb_trans <= kk) {
807  if (k_is_input_col) {
808  GUM_ERROR(UndefinedElement,
809  "there is no variable in the database table that "
810  << "corresponds to Column " << k);
811  } else {
812  GUM_ERROR(UndefinedElement,
813  "the database has " << nb_trans << " variables, so Variable #"
814  << k << " does not exist");
815  }
816  }
817 
818  return __translators.domainSize(kk);
819  }
820 
821 
823  template < template < typename > class ALLOC >
824  INLINE std::vector< std::size_t, ALLOC< std::size_t > >
826  const std::size_t nb_trans = __translators.size();
827  std::vector< std::size_t > dom(nb_trans);
828  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
829  dom[i] = __translators.domainSize(i);
830  }
831  return dom;
832  }
833 
834 
835  // indicates whether a reordering is needed to make the kth
836  // translator sorted by lexicographical order
837  template < template < typename > class ALLOC >
838  bool DatabaseTable< ALLOC >::needsReordering(const std::size_t k,
839  const bool k_is_input_col) const {
840  // find the position kk of the translator that contains the variable
841  const std::size_t nb_trans = __translators.size();
842  const std::size_t kk = __getKthIndex(k, k_is_input_col);
843 
844  // check if the translator exists
845  if (nb_trans <= kk) {
846  if (k_is_input_col) {
847  GUM_ERROR(UndefinedElement,
848  "there is no translator in the database table that "
849  << "parses Column " << k);
850  } else {
851  GUM_ERROR(UndefinedElement,
852  "the database has " << nb_trans
853  << " translators, so Translator #" << k
854  << " does not exist");
855  }
856  }
857 
858  return __translators.needsReordering(kk);
859  }
860 
861 
862  // performs a reordering of the kth translator or of the first
863  // translator corresponding to the kth column of the input database
864  template < template < typename > class ALLOC >
865  void DatabaseTable< ALLOC >::reorder(const std::size_t k,
866  const bool k_is_input_col) {
867  // find the position kk of the translator that contains the variable
868  const std::size_t nb_trans = __translators.size();
869  const std::size_t kk = __getKthIndex(k, k_is_input_col);
870 
871  // check if the translator exists
872  if (nb_trans <= kk) {
873  if (k_is_input_col) {
874  GUM_ERROR(UndefinedElement,
875  "there is no translator in the database table that "
876  << "parses Column " << k);
877  } else {
878  GUM_ERROR(UndefinedElement,
879  "the database has " << nb_trans
880  << " translators, so Translator #" << k
881  << " does not exist");
882  }
883  }
884 
885  // if the translator is not designed for a discrete variable, there
886  // is no reordering to apply
887  if (__translators.translator(kk).getValType()
889  return;
890 
891  // get the translation to perform
892  auto updates = __translators.reorder(kk);
893  if (updates.empty()) return;
894 
895  std::size_t size = updates.size();
896  std::vector< std::size_t, ALLOC< std::size_t > > new_values(size);
897  for (const auto& update : updates) {
898  if (update.first >= size) {
899  size = update.first + 1;
900  new_values.resize(size);
901  }
902  new_values[update.first] = update.second;
903  }
904 
905  // apply the translations
906  auto newtrans_lambda = [this, kk, &new_values](std::size_t begin,
907  std::size_t end) -> void {
908  for (std::size_t i = begin; i < end; ++i) {
909  auto& elt = this->_rows[i][kk].discr_val;
910  if (elt != std::numeric_limits< std::size_t >::max())
911  elt = new_values[elt];
912  }
913  };
914 
915  auto undo_newtrans_lambda = [](std::size_t begin, std::size_t end) -> void {
916  };
917 
918  // launch the threads executing the lambdas
919  this->__threadProcessDatabase(newtrans_lambda, undo_newtrans_lambda);
920  }
921 
922 
924  template < template < typename > class ALLOC >
925  INLINE void DatabaseTable< ALLOC >::reorder() {
926  const std::size_t nb_trans = __translators.size();
927  for (std::size_t i = std::size_t(0); i < nb_trans; ++i)
928  reorder(i, false);
929  }
930 
931 
933  template < template < typename > class ALLOC >
935  const std::vector< std::string, ALLOC< std::string > >& new_row) {
936  // check that the row can be fully translated, i.e., it contains enough
937  // columns to be translated
938  const std::size_t row_size = new_row.size();
939  if (row_size == std::size_t(0)) return;
940 
941  if (__translators.highestInputColumn() >= row_size) {
942  GUM_ERROR(SizeError,
943  "the new row has "
944  << row_size
945  << " columns whereas the database requires at least "
946  << (__translators.highestInputColumn() + 1) << " columns");
947  }
948 
949  // convert the new_row into a row of DBTranslatedValue
950  const std::size_t nb_trans = __translators.size();
951  Row< DBTranslatedValue > dbrow;
952  dbrow.reserve(nb_trans);
953  bool has_missing_val = false;
954  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
955  const DBTranslatedValue new_val(__translators.translate(new_row, i));
956  if (__translators.isMissingValue(new_val, i)) has_missing_val = true;
957  dbrow.pushBack(std::move(new_val));
958  }
959 
960  this->insertRow(std::move(dbrow),
961  has_missing_val ? IsMissing::True : IsMissing::False);
962  }
963 
964 
967  template < template < typename > class ALLOC >
968  bool DatabaseTable< ALLOC >::__isRowCompatible(
969  const typename DatabaseTable< ALLOC >::template Row< DBTranslatedValue >&
970  row) const {
971  // check that the size of the row corresponds to that of the translators
972  const std::size_t row_size = row.size();
973  if (row_size != __translators.size()) return false;
974 
975  const auto& translators = __translators.translators();
976  for (std::size_t i = std::size_t(0); i < row_size; ++i) {
977  switch (translators[i]->getValType()) {
979  if ((row[i].discr_val >= translators[i]->domainSize())
980  && (row[i].discr_val != std::numeric_limits< std::size_t >::max()))
981  return false;
982  break;
983 
985  const IContinuousVariable& var =
986  static_cast< const IContinuousVariable& >(
987  *(translators[i]->variable()));
988  if (((var.lowerBoundAsDouble() > (double)row[i].cont_val)
989  || (var.upperBoundAsDouble() < (double)row[i].cont_val))
990  && (row[i].cont_val != std::numeric_limits< float >::max()))
991  return false;
992  break;
993  }
994 
995  default:
996  GUM_ERROR(NotImplementedYet,
997  "Translated value type not supported yet");
998  }
999  }
1000 
1001  return true;
1002  }
1003 
1004 
1006  template < template < typename > class ALLOC >
1008  typename DatabaseTable< ALLOC >::template Row< DBTranslatedValue >&&
1009  new_row,
1010  const typename DatabaseTable< ALLOC >::IsMissing contains_missing_data) {
1011  // check that the new rows values are compatible with the values of
1012  // the variables stored within the translators
1013  if (!__isRowCompatible(new_row)) {
1014  if (new_row.size() != __translators.size()) {
1015  GUM_ERROR(SizeError,
1016  "The new row has "
1017  << new_row.size()
1018  << " elements whereas the database table has "
1019  << __translators.size() << " columns");
1020  } else {
1021  GUM_ERROR(InvalidArgument,
1022  "the new row is not compatible with the current translators");
1023  }
1024  }
1025 
1027  contains_missing_data);
1028  }
1029 
1030 
1032  template < template < typename > class ALLOC >
1034  const typename DatabaseTable< ALLOC >::template Row< DBTranslatedValue >&
1035  new_row,
1036  const typename DatabaseTable< ALLOC >::IsMissing contains_missing_data) {
1037  // check that the new rows values are compatible with the values of
1038  // the variables stored within the translators
1039  if (!__isRowCompatible(new_row)) {
1040  if (new_row.size() != __translators.size()) {
1041  GUM_ERROR(SizeError,
1042  "The new row has "
1043  << new_row.size()
1044  << " elements whereas the database table has "
1045  << __translators.size() << " columns");
1046  } else {
1047  GUM_ERROR(InvalidArgument,
1048  "the new row is not compatible with the current translators");
1049  }
1050  }
1051 
1053  contains_missing_data);
1054  }
1055 
1056 
1057  // insert a new DBRow of DBCells at the end of the database
1058  template < template < typename > class ALLOC >
1060  const typename DatabaseTable< ALLOC >::template Row< DBCell >& new_row) {
1061  GUM_ERROR(NotImplementedYet, "not implemented yet");
1062  }
1063 
1064  // insert a new DBRow of DBCells at the end of the database
1065  template < template < typename > class ALLOC >
1067  typename DatabaseTable< ALLOC >::template Row< DBCell >&& new_row) {
1068  GUM_ERROR(NotImplementedYet, "not implemented yet");
1069  }
1070 
1071 
1073  template < template < typename > class ALLOC >
1075  typename DatabaseTable< ALLOC >::template Matrix< DBTranslatedValue >&&
1076  rows,
1077  const typename DatabaseTable< ALLOC >::template DBVector< IsMissing >&
1078  rows_have_missing_vals) {
1079  // check that the new rows values are compatible with the values of
1080  // the variables stored within the translators
1081  for (const auto& new_row : rows) {
1082  if (!__isRowCompatible(new_row)) {
1083  if (new_row.size() != __translators.size()) {
1084  GUM_ERROR(SizeError,
1085  "The new row has "
1086  << new_row.size()
1087  << " elements whereas the database table has "
1088  << __translators.size() << " columns");
1089  } else {
1090  GUM_ERROR(
1091  InvalidArgument,
1092  "the new row is not compatible with the current translators");
1093  }
1094  }
1095  }
1096 
1098  std::move(rows), rows_have_missing_vals);
1099  }
1100 
1101 
1103  template < template < typename > class ALLOC >
1105  const typename DatabaseTable< ALLOC >::template Matrix< DBTranslatedValue >&
1106  new_rows,
1107  const typename DatabaseTable< ALLOC >::template DBVector< IsMissing >&
1108  rows_have_missing_vals) {
1109  // check that the new rows values are compatible with the values of
1110  // the variables stored within the translators
1111  for (const auto& new_row : new_rows) {
1112  if (!__isRowCompatible(new_row)) {
1113  if (new_row.size() != __translators.size()) {
1114  GUM_ERROR(SizeError,
1115  "The new row has "
1116  << new_row.size()
1117  << " elements whereas the database table has "
1118  << __translators.size() << " columns");
1119  } else {
1120  GUM_ERROR(
1121  InvalidArgument,
1122  "the new row is not compatible with the current translators");
1123  }
1124  }
1125  }
1126 
1128  new_rows, rows_have_missing_vals);
1129  }
1130 
1131 
1133  template < template < typename > class ALLOC >
1135  typename DatabaseTable< ALLOC >::template Matrix< DBCell >&& new_rows) {
1136  GUM_ERROR(NotImplementedYet, "not implemented yet");
1137  }
1138 
1139 
1141  template < template < typename > class ALLOC >
1143  const typename DatabaseTable< ALLOC >::template Matrix< DBCell >&
1144  new_rows) {
1145  GUM_ERROR(NotImplementedYet, "not implemented yet");
1146  }
1147 
1148 
1150  template < template < typename > class ALLOC >
1152  __translators.clear();
1153  __ignored_cols.clear();
1155  }
1156 
1157 
1158  } /* namespace learning */
1159 
1160 } /* namespace gum */
1161 
1162 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
virtual ~DatabaseTable()
destructor
void insertRow(const std::vector< std::string, OTHER_ALLOC< std::string > > &new_row)
insert a new row at the end of the database
The class representing a tabular database stored in RAM.
virtual void ignoreColumn(const std::size_t k, const bool from_external_object=true) final
makes the database table ignore from now on the kth column of the input dataset or the column parsed ...
DBVector< std::size_t > domainSizes() const
returns the domain sizes of all the variables in the database table
ALLOC< DBTranslatedValue > getAllocator() const
returns the allocator of the database
const DBTranslatorSet< ALLOC > & translatorSet() const
returns the set of translators
std::size_t size() const noexcept
returns the number of records (rows) in the database
virtual void insertRow(const std::vector< std::string, ALLOC< std::string > > &new_row) final
insert a new row at the end of the database
STL namespace.
IDatabaseTable< T_DATA, ALLOC > & operator=(const IDatabaseTable< T_DATA, ALLOC > &from)
copy operator
void eraseAllRows()
erase all the rows
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
ALLOC< DBTranslatedValue > allocator_type
Types for STL compliance.
bool needsReordering(const std::size_t k, const bool k_is_input_col=false) const
indicates whether a reordering is needed to sort the translations of the kth translator or those of t...
std::size_t nbVariables() const noexcept
returns the number of variables (columns) of the database
IDatabaseTable(const MissingValType< MISSALLOC > &missing_symbols, const std::vector< std::string, VARALLOC< std::string > > &var_names, const ALLOC< DBTranslatedValue > &alloc)
default constructor
virtual DatabaseTable< ALLOC > * clone() const final
virtual copy constructor
DBVector< std::string > _variable_names
the names of the variables for each column
bool empty() const noexcept
indicates whether the database contains some records or not
virtual const DBVector< std::size_t > inputColumns() const final
returns the set of columns of the original dataset that are present in the DatabaseTable ...
const iterator & end() const noexcept
returns a new unsafe handler pointing to the end of the database
const DBTranslator< ALLOC > & translator(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth translator of the database table or the first one reading the kth column of th...
std::size_t domainSize(const std::size_t k, const bool k_is_input_col=false) const
returns the domain size of the kth variable of the database table or of that of the first one corresp...
virtual const DBVector< std::size_t > ignoredColumns() const final
returns the set of columns of the original dataset that are ignored
DatabaseTable< ALLOC > & operator=(const DatabaseTable< ALLOC > &from)
copy operator
virtual void insertRows(Matrix< T_DATA > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
insert a set of new DBRows at the end of the database
const Variable & variable(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth variable of the database table or the first one corresponding to the kth colum...
virtual void insertRows(Matrix< DBTranslatedValue > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals) final
insert a set of new DBRows at the end of the database
virtual void clear() final
erase the content of the database, including the names of the variables
std::size_t insertTranslator(const DBTranslator< ALLOC > &translator, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true) final
sets the names of the variables
typename IDatabaseTable< DBTranslatedValue, ALLOC >::IsMissing IsMissing
iterator begin() const
returns a new unsafe handler pointing to the beginning of the database
DatabaseTable(const MissingValType< XALLOC > &missing_symbols, const DBTranslatorSet< ALLOC > &translators=DBTranslatorSet< ALLOC >(), const allocator_type &alloc=allocator_type())
default constructor
void eraseTranslators(const std::size_t k, const bool k_is_input_col=false)
erases either the kth translator or all those parsing the kth column of the input dataset ...
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52
virtual void clear()
erase the content of the database, including the names of the variables
void reorder()
performs a reordering of all the columns