aGrUM  0.16.0
databaseTable_tpl.h
Go to the documentation of this file.
1 
29 
30 #ifndef DOXYGEN_SHOULD_SKIP_THIS
31 
32 namespace gum {
33 
34  namespace learning {
35 
36 
37  // default constructor
38  template < template < typename > class ALLOC >
39  template < template < typename > class XALLOC >
41  const typename DatabaseTable< ALLOC >::template MissingValType< XALLOC >&
42  missing_symbols,
43  const DBTranslatorSet< ALLOC >& translators,
44  const typename DatabaseTable< ALLOC >::allocator_type& alloc) :
45  IDatabaseTable< DBTranslatedValue, ALLOC >(
46  missing_symbols,
47  std::vector< std::string, ALLOC< std::string > >(),
48  alloc),
49  __translators(translators, alloc) {
50  if (translators.size()) {
51  // set the variables names according to those of the translators
52  std::vector< std::string, ALLOC< std::string > > var_names(
53  translators.size());
54  for (std::size_t i = std::size_t(0), size = translators.size(); i < size;
55  ++i) {
56  var_names[i] = __translators.translator(i).variable()->name();
57  }
58  setVariableNames(var_names, false);
59  }
60 
61  GUM_CONSTRUCTOR(DatabaseTable);
62  }
63 
64 
65  // default constructor
66  template < template < typename > class ALLOC >
68  const DBTranslatorSet< ALLOC >& translators,
69  const typename DatabaseTable< ALLOC >::allocator_type& alloc) :
70  IDatabaseTable< DBTranslatedValue, ALLOC >(
71  std::vector< std::string, ALLOC< std::string > >(),
72  std::vector< std::string, ALLOC< std::string > >(),
73  alloc),
74  __translators(translators, alloc) {
75  if (translators.size()) {
76  // set the variables names according to those of the translators
77  std::vector< std::string, ALLOC< std::string > > var_names(
78  translators.size());
79  for (std::size_t i = std::size_t(0), size = translators.size(); i < size;
80  ++i) {
81  var_names[i] = __translators.translator(i).variable()->name();
82  }
83  setVariableNames(var_names, false);
84  }
85 
86  GUM_CONSTRUCTOR(DatabaseTable);
87  }
88 
89 
90  // copy constructor with a given allocator
91  template < template < typename > class ALLOC >
93  const DatabaseTable< ALLOC >& from,
94  const typename DatabaseTable< ALLOC >::allocator_type& alloc) :
95  IDatabaseTable< DBTranslatedValue, ALLOC >(from, alloc),
96  __translators(from.__translators, alloc),
97  __ignored_cols(from.__ignored_cols) {
98  GUM_CONS_CPY(DatabaseTable);
99  }
100 
101 
102  // copy constructor
103  template < template < typename > class ALLOC >
104  INLINE
105  DatabaseTable< ALLOC >::DatabaseTable(const DatabaseTable< ALLOC >& from) :
106  DatabaseTable< ALLOC >(from, from.getAllocator()) {}
107 
108 
109  // move constructor with a given allocator
110  template < template < typename > class ALLOC >
112  DatabaseTable< ALLOC >&& from,
113  const typename DatabaseTable< ALLOC >::allocator_type& alloc) :
114  IDatabaseTable< DBTranslatedValue, ALLOC >(std::move(from), alloc),
115  __translators(std::move(from.__translators), alloc),
116  __ignored_cols(std::move(from.__ignored_cols)) {
117  GUM_CONS_MOV(DatabaseTable);
118  }
119 
120 
121  // move constructor
122  template < template < typename > class ALLOC >
123  INLINE DatabaseTable< ALLOC >::DatabaseTable(DatabaseTable< ALLOC >&& from) :
124  DatabaseTable< ALLOC >(std::move(from), from.getAllocator()) {}
125 
126 
127  // virtual copy constructor with a given allocator
128  template < template < typename > class ALLOC >
129  DatabaseTable< ALLOC >* DatabaseTable< ALLOC >::clone(
130  const typename DatabaseTable< ALLOC >::allocator_type& alloc) const {
131  ALLOC< DatabaseTable< ALLOC > > allocator(alloc);
132  DatabaseTable< ALLOC >* new_db = allocator.allocate(1);
133  try {
134  allocator.construct(new_db, *this, alloc);
135  } catch (...) {
136  allocator.deallocate(new_db, 1);
137  throw;
138  }
139 
140  return new_db;
141  }
142 
143 
144  // virtual copy constructor
145  template < template < typename > class ALLOC >
146  DatabaseTable< ALLOC >* DatabaseTable< ALLOC >::clone() const {
147  return clone(this->getAllocator());
148  }
149 
150 
151  // destructor
152  template < template < typename > class ALLOC >
154  GUM_DESTRUCTOR(DatabaseTable);
155  }
156 
157 
158  // copy operator
159  template < template < typename > class ALLOC >
160  DatabaseTable< ALLOC >& DatabaseTable< ALLOC >::
161  operator=(const DatabaseTable< ALLOC >& from) {
162  if (this != &from) {
164  __translators = from.__translators;
165  __ignored_cols = from.__ignored_cols;
166  }
167 
168  return *this;
169  }
170 
171 
172  // move constructor
173  template < template < typename > class ALLOC >
174  DatabaseTable< ALLOC >& DatabaseTable< ALLOC >::
175  operator=(DatabaseTable< ALLOC >&& from) {
176  if (this != &from) {
178  __translators = std::move(from.__translators);
179  __ignored_cols = std::move(from.__ignored_cols);
180  }
181 
182  return *this;
183  }
184 
185 
186  // a method to process the rows of the database in multithreading
187  template < template < typename > class ALLOC >
188  template < typename Functor1, typename Functor2 >
189  void DatabaseTable< ALLOC >::__threadProcessDatabase(Functor1& exec_func,
190  Functor2& undo_func) {
191  // compute the number of threads to execute the code, the number N of
192  // rows that each thread should process and the number of rows that
193  // would remain after each thread has processed its N rows. For instance,
194  // if the database has 105 rows and there are 10 threads, each thread
195  // should process 10 rows and there would remain 5 rows
196  const std::size_t db_size = this->_rows.size();
197  std::size_t nb_threads = db_size / this->_min_nb_rows_per_thread;
198  if (nb_threads < 1)
199  nb_threads = 1;
200  else if (nb_threads > this->_max_nb_threads)
201  nb_threads = this->_max_nb_threads;
202  std::size_t nb_rows_par_thread = db_size / nb_threads;
203  std::size_t rest_rows = db_size - nb_rows_par_thread * nb_threads;
204 
205  // if there is just one thread, let it process all the rows
206  if (nb_threads == 1) {
207  exec_func(std::size_t(0), db_size);
208  return;
209  }
210 
211  // here, we shall create the threads, but also one std::exception_ptr
212  // for each thread. This will allow us to catch the exception raised
213  // by the threads
214  std::vector< std::thread > threads;
215  threads.reserve(nb_threads);
216  std::vector< std::exception_ptr > func_exceptions(nb_threads, nullptr);
217 
218  // create a lambda that will execute exec_func while catching its exceptions
219  auto real_exec_func = [&exec_func](std::size_t begin,
220  std::size_t end,
221  std::exception_ptr& exc) -> void {
222  try {
223  exec_func(begin, end);
224  } catch (...) { exc = std::current_exception(); }
225  };
226 
227  // launch the threads
228  std::size_t begin_index = std::size_t(0);
229  for (std::size_t i = std::size_t(0); i < nb_threads; ++i) {
230  std::size_t end_index = begin_index + nb_rows_par_thread;
231  if (rest_rows != std::size_t(0)) {
232  ++end_index;
233  --rest_rows;
234  }
235  threads.push_back(std::thread(std::ref(real_exec_func),
236  begin_index,
237  end_index,
238  std::ref(func_exceptions[i])));
239  begin_index = end_index;
240  }
241 
242  // wait for the threads to complete their executions
243  std::for_each(
244  threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
245 
246  // now, check if one exception has been raised
247  bool exception_raised = false;
248  for (const auto& exc : func_exceptions) {
249  if (exc != nullptr) {
250  exception_raised = true;
251  break;
252  }
253  }
254 
255  if (exception_raised) {
256  // create a lambda that will execute undo_func while catching
257  // its exceptions
258  auto real_undo_func = [&undo_func](std::size_t begin,
259  std::size_t end,
260  std::exception_ptr& exc) -> void {
261  try {
262  undo_func(begin, end);
263  } catch (...) { exc = std::current_exception(); }
264  };
265 
266  // launch the repair threads
267  threads.clear();
268  begin_index = std::size_t(0);
269  std::vector< std::exception_ptr > undo_func_exceptions(nb_threads,
270  nullptr);
271  for (std::size_t i = std::size_t(0); i < nb_threads; ++i) {
272  std::size_t end_index = begin_index + nb_rows_par_thread;
273  if (rest_rows != std::size_t(0)) {
274  ++end_index;
275  --rest_rows;
276  }
277  // we just need to repair the threads that did not raise exceptions
278  if (func_exceptions[i] == nullptr)
279  threads.push_back(std::thread(std::ref(real_undo_func),
280  begin_index,
281  end_index,
282  std::ref(undo_func_exceptions[i])));
283  begin_index = end_index;
284  }
285 
286  // wait for the threads to complete their executions
287  std::for_each(
288  threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
289 
290  // rethrow the exception
291  for (const auto& exc : func_exceptions) {
292  if (exc != nullptr) { std::rethrow_exception(exc); }
293  }
294  }
295  }
296 
297 
299  template < template < typename > class ALLOC >
301  const DBTranslator< ALLOC >& translator,
302  const std::size_t input_column,
303  const bool unique_column) {
304  // check that there is no ignored_column corresponding to column
305  if (__ignored_cols.exists(input_column))
306  GUM_ERROR(
307  OperationNotAllowed,
308  "Column "
309  << input_column << " is marked as being ignored. "
310  << "So it is forbidden to create a translator for that column.");
311 
312  // reserve some place for the new column in the records of the database
313  const std::size_t new_size = this->nbVariables() + 1;
314 
315  // create the lambda for reserving some memory for the new column
316  // and the one that undoes what it performed if some thread executing
317  // it raised an exception
318  auto reserve_lambda = [this, new_size](std::size_t begin,
319  std::size_t end) -> void {
320  for (std::size_t i = begin; i < end; ++i)
321  this->_rows[i].row().reserve(new_size);
322  };
323 
324  auto undo_reserve_lambda = [](std::size_t begin, std::size_t end) -> void {};
325 
326  // launch the threads executing the lambdas
327  this->__threadProcessDatabase(reserve_lambda, undo_reserve_lambda);
328 
329  // insert the translator into the translator set
330  const std::size_t pos =
331  __translators.insertTranslator(translator, input_column, unique_column);
332 
333  // insert the name of the translator's variable to the set of variable names
334  try {
335  this->_variable_names.push_back(translator.variable()->name());
336  } catch (...) {
337  __translators.eraseTranslator(pos);
338  throw;
339  }
340 
341  // if the databaseTable is not empty, fill the column of the database
342  // corresponding to the translator with missing values
344  const DBTranslatedValue missing = __translators[pos].missingValue();
345 
346  // create the lambda for adding a new column filled wih a missing value
347  auto fill_lambda = [this, missing](std::size_t begin,
348  std::size_t end) -> void {
349  std::size_t i = begin;
350  try {
351  for (; i < end; ++i) {
352  this->_rows[i].row().push_back(missing);
353  this->_has_row_missing_val[i] = IsMissing::True;
354  }
355  } catch (...) {
356  for (std::size_t j = begin; j < i; ++j)
357  this->_rows[i].row().pop_back();
358  throw;
359  }
360  };
361 
362  auto undo_fill_lambda = [this](std::size_t begin,
363  std::size_t end) -> void {
364  for (std::size_t i = begin; i < end; ++i)
365  this->_rows[i].row().pop_back();
366  };
367 
368  // launch the threads executing the lambdas
369  this->__threadProcessDatabase(fill_lambda, undo_fill_lambda);
370  }
371 
372  return pos;
373  }
374 
375 
377  template < template < typename > class ALLOC >
378  std::size_t
379  DatabaseTable< ALLOC >::insertTranslator(const Variable& var,
380  const std::size_t input_column,
381  const bool unique_column) {
382  // check that there is no ignored_column corresponding to column
383  if (__ignored_cols.exists(input_column))
384  GUM_ERROR(
385  OperationNotAllowed,
386  "Column "
387  << input_column << " is marked as being ignored. "
388  << "So it is forbidden to create a translator for that column.");
389 
390  // if the databaseTable is not empty, we should fill the column of the
391  // database corresponding to the new translator with missing values. But, the
392  // current method assumes that the list of missing values is empty. Hence, it
393  // should raise an exception
395  GUM_ERROR(
396  MissingValueInDatabase,
397  "inserting a new translator into a database creates a new column "
398  << "with missing values. However, you did not define any symbol for "
399  << "such values.");
400  }
401 
402  // reserve some place for the new column in the records of the database
403  const std::size_t new_size = this->nbVariables() + 1;
404 
405  // create the lambda for reserving some memory for the new column
406  // and the one that undoes what it performed if some thread executing
407  // it raised an exception
408  auto reserve_lambda = [this, new_size](std::size_t begin,
409  std::size_t end) -> void {
410  for (std::size_t i = begin; i < end; ++i)
411  this->_rows[i].row().reserve(new_size);
412  };
413 
414  auto undo_reserve_lambda = [](std::size_t begin, std::size_t end) -> void {};
415 
416  // launch the threads executing the lambdas
417  this->__threadProcessDatabase(reserve_lambda, undo_reserve_lambda);
418 
419  // insert the translator into the translator set
420  const std::size_t pos =
421  __translators.insertTranslator(var, input_column, unique_column);
422 
423  // insert the name of the translator's variable to the set of variable names
424  try {
425  this->_variable_names.push_back(var.name());
426  } catch (...) {
427  __translators.eraseTranslator(pos);
428  throw;
429  }
430 
431  return pos;
432  }
433 
434 
436  template < template < typename > class ALLOC >
437  template < template < typename > class XALLOC >
439  const Variable& var,
440  const std::size_t input_column,
441  std::vector< std::string, XALLOC< std::string > > missing_symbols,
442  const bool unique_column) {
443  // check that there is no ignored_column corresponding to column
444  if (__ignored_cols.exists(input_column))
445  GUM_ERROR(
446  OperationNotAllowed,
447  "Column "
448  << input_column << " is marked as being ignored. "
449  << "So it is forbidden to create a translator for that column.");
450 
451  // reserve some place for the new column in the records of the database
452  const std::size_t new_size = this->nbVariables() + 1;
453 
454  // create the lambda for reserving some memory for the new column
455  // and the one that undoes what it performed if some thread executing
456  // it raised an exception
457  auto reserve_lambda = [this, new_size](std::size_t begin,
458  std::size_t end) -> void {
459  for (std::size_t i = begin; i < end; ++i)
460  this->_rows[i].row().reserve(new_size);
461  };
462 
463  auto undo_reserve_lambda = [](std::size_t begin, std::size_t end) -> void {};
464 
465  // launch the threads executing the lambdas
466  this->__threadProcessDatabase(reserve_lambda, undo_reserve_lambda);
467 
468  // insert the translator into the translator set
469  const std::size_t pos = __translators.insertTranslator(
470  var, input_column, missing_symbols, unique_column);
471 
472  // insert the name of the translator's variable to the set of variable names
473  try {
474  this->_variable_names.push_back(var.name());
475  } catch (...) {
476  __translators.eraseTranslator(pos);
477  throw;
478  }
479 
480  // if the databaseTable is not empty, fill the column of the database
481  // corresponding to the translator with missing values
483  const DBTranslatedValue missing = __translators[pos].missingValue();
484 
485  // create the lambda for adding a new column filled wih a missing value
486  auto fill_lambda = [this, missing](std::size_t begin,
487  std::size_t end) -> void {
488  std::size_t i = begin;
489  try {
490  for (; i < end; ++i) {
491  this->_rows[i].row().push_back(missing);
492  this->_has_row_missing_val[i] = IsMissing::True;
493  }
494  } catch (...) {
495  for (std::size_t j = begin; j < i; ++j)
496  this->_rows[i].row().pop_back();
497  throw;
498  }
499  };
500 
501  auto undo_fill_lambda = [this](std::size_t begin,
502  std::size_t end) -> void {
503  for (std::size_t i = begin; i < end; ++i)
504  this->_rows[i].row().pop_back();
505  };
506 
507  // launch the threads executing the lambdas
508  this->__threadProcessDatabase(fill_lambda, undo_fill_lambda);
509  }
510 
511  return pos;
512  }
513 
514 
519  template < template < typename > class ALLOC >
520  INLINE typename DatabaseTable< ALLOC >::template DBVector< std::size_t >
521  DatabaseTable< ALLOC >::__getKthIndices(const std::size_t k,
522  const bool k_is_input_col) const {
523  const std::size_t nb_trans = __translators.size();
524  if (!k_is_input_col) {
525  if (k < nb_trans)
526  return DBVector< std::size_t >{k};
527  else
528  return DBVector< std::size_t >();
529  } else {
530  DBVector< std::size_t > trans;
531  for (std::size_t i = std::size_t(0), kk = nb_trans - 1; i < nb_trans;
532  ++i, --kk) {
533  if (__translators.inputColumn(kk) == k) trans.push_back(kk);
534  }
535  return trans;
536  }
537  }
538 
539 
540  // erases the kth translator or all those parsing the kth column of
541  // the input dataset
542  template < template < typename > class ALLOC >
543  void DatabaseTable< ALLOC >::eraseTranslators(const std::size_t k,
544  const bool k_is_input_col) {
545  for (const auto kk : __getKthIndices(k, k_is_input_col)) {
546  // erase the translator of index kk and the corresponding variable
547  // name. If there remains no more translator in the translator set,
548  // _rows should become empty
549  this->_variable_names.erase(this->_variable_names.begin() + kk);
550  if (this->_variable_names.empty()) {
552  } else {
553  const std::size_t nb_trans = __translators.size();
554 
555  auto erase_lambda = [this, nb_trans, kk](std::size_t begin,
556  std::size_t end) -> void {
557  for (std::size_t i = begin; i < end; ++i) {
558  auto& row = this->_rows[i].row();
559  if (this->__translators.isMissingValue(row[kk], kk)) {
560  bool has_missing_val = false;
561  for (std::size_t j = std::size_t(0); j < nb_trans; ++j) {
562  if ((j != kk) && this->__translators.isMissingValue(row[j], j)) {
563  has_missing_val = true;
564  break;
565  }
566  }
567  if (!has_missing_val)
568  this->_has_row_missing_val[i] = IsMissing::False;
569  }
570  row.erase(row.begin() + kk);
571  }
572  };
573 
574  auto undo_erase_lambda = [](std::size_t begin, std::size_t end) -> void {
575  };
576 
577  // launch the threads executing the lambdas
578  this->__threadProcessDatabase(erase_lambda, undo_erase_lambda);
579  }
580  __translators.eraseTranslator(kk);
581  }
582  }
583 
584 
586  template < template < typename > class ALLOC >
587  INLINE const DBTranslatorSet< ALLOC >&
589  return __translators;
590  }
591 
592 
596  template < template < typename > class ALLOC >
597  INLINE std::size_t
598  DatabaseTable< ALLOC >::__getKthIndex(const std::size_t k,
599  const bool k_is_input_col) const {
600  if (k_is_input_col) {
601  const std::size_t nb_trans = __translators.size();
602  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
603  if (__translators.inputColumn(i) == k) { return i; }
604  }
605  return nb_trans + 1;
606  } else {
607  return k;
608  }
609  }
610 
611 
613  template < template < typename > class ALLOC >
614  const DBTranslator< ALLOC >&
615  DatabaseTable< ALLOC >::translator(const std::size_t k,
616  const bool k_is_input_col) const {
617  // find the position of the translator that we look for. This
618  // is variable kk below
619  const std::size_t nb_trans = __translators.size();
620  const std::size_t kk = __getKthIndex(k, k_is_input_col);
621 
622  // check if the translator exists
623  if (nb_trans <= kk) {
624  if (k_is_input_col) {
625  GUM_ERROR(UndefinedElement,
626  "there is no translator in the database table that "
627  << "parses Column " << k);
628  } else {
629  GUM_ERROR(UndefinedElement,
630  "the database has " << nb_trans
631  << " translators, so Translator #" << k
632  << " does not exist");
633  }
634  }
635 
636  return __translators.translator(kk);
637  }
638 
639 
641  template < template < typename > class ALLOC >
642  const Variable&
643  DatabaseTable< ALLOC >::variable(const std::size_t k,
644  const bool k_is_input_col) const {
645  // find the position of the translator that contains the variable.
646  // This is variable kk below
647  const std::size_t nb_trans = __translators.size();
648  const std::size_t kk = __getKthIndex(k, k_is_input_col);
649 
650  // check if the translator exists
651  if (nb_trans <= kk) {
652  if (k_is_input_col) {
653  GUM_ERROR(UndefinedElement,
654  "there is no variable in the database table that "
655  << "corresponds to Column " << k);
656  } else {
657  GUM_ERROR(UndefinedElement,
658  "the database has " << nb_trans << " variables, so Variable #"
659  << k << " does not exist");
660  }
661  }
662 
663  return __translators.variable(kk);
664  }
665 
666 
668  template < template < typename > class ALLOC >
670  const std::vector< std::string, ALLOC< std::string > >& names,
671  const bool from_external_object) {
672  const std::size_t size = names.size();
673  const std::size_t nb_trans = __translators.size();
674  if (!from_external_object) {
675  if (nb_trans != size) {
676  GUM_ERROR(SizeError,
677  "the number of variable's names (i.e., "
678  << size
679  << ") does not correspond to the number of columns of the "
680  << "database table (i.e.," << nb_trans << ")");
681  }
682 
683  // update the translator names
684  for (std::size_t i = std::size_t(0); i < size; ++i) {
685  __translators.translator(i).setVariableName(names[i]);
686  }
687  } else {
688  if (nb_trans && (__translators.highestInputColumn() >= size)) {
689  GUM_ERROR(SizeError,
690  "the names vector has "
691  << size << " elements whereas it should have at least "
692  << (__translators.highestInputColumn() + 1)
693  << "elements so that each translator is assigned a name");
694  }
695 
696  // update the translator names
697  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
698  __translators.translator(i).setVariableName(
699  names[__translators.inputColumn(i)]);
700  }
701  }
702 
703  // update _variable_names using the newly assigned translators names
704  this->_variable_names.resize(nb_trans);
705  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
706  this->_variable_names[i] = __translators.variable(i).name();
707  }
708  }
709 
710 
713  template < template < typename > class ALLOC >
714  void DatabaseTable< ALLOC >::ignoreColumn(const std::size_t k,
715  const bool k_is_input_col) {
716  // indicate that the column will be forbidden. If the column is already
717  // forbidden, do nothing. But if the column is assigned to a translator
718  // that does not exist, raise an UndefinedElement exception
719  const std::size_t nb_trans = __translators.size();
720  if (k_is_input_col) {
721  if (__ignored_cols.exists(k)) return;
722  __ignored_cols.insert(k);
723  } else {
724  if (k < nb_trans) {
725  __ignored_cols.insert(__translators.inputColumn(k));
726  } else {
727  GUM_ERROR(UndefinedElement,
728  "It is impossible to ignore the column parsed by Translator #"
729  << k << "because there exist only " << nb_trans
730  << " translators");
731  }
732  }
733 
734  // remove all the translators corresponding to k
735  eraseTranslators(k, k_is_input_col);
736  }
737 
738 
740  template < template < typename > class ALLOC >
741  const typename DatabaseTable< ALLOC >::template DBVector< std::size_t >
743  const std::size_t nb_trans = __translators.size();
744 
745  if (nb_trans == std::size_t(0)) {
746  return DBVector< std::size_t >{std::size_t(0)};
747  }
748 
749  // get the columns handled by the translators, sorted by increasing order
750  DBVector< std::size_t > cols(nb_trans);
751  for (std::size_t i = std::size_t(0); i < nb_trans; ++i)
752  cols[i] = __translators.inputColumn(i);
753  std::sort(cols.begin(), cols.end());
754 
755  // create a vector with all the possible input columns
756  const std::size_t highest = __translators.highestInputColumn() + 1;
757  DBVector< std::size_t > ignored_cols(highest);
758  std::iota(ignored_cols.begin(), ignored_cols.end(), 0);
759 
760  // remove from ignored_cols the elements of cols
761  for (std::size_t i = std::size_t(0),
762  ii = highest - 1,
763  k = std::size_t(0),
764  kk = nb_trans - 1;
765  i < highest;
766  ++i, --ii) {
767  if (cols[kk] == ii) {
768  ignored_cols.erase(ignored_cols.begin() + ii);
769  while ((k < nb_trans) && (cols[kk] == ii)) {
770  --kk;
771  ++k;
772  }
773  if (k == nb_trans) break;
774  }
775  }
776 
777  // add the column past the last translator
778  ignored_cols.push_back(highest);
779 
780  return ignored_cols;
781  }
782 
783 
785  template < template < typename > class ALLOC >
786  const typename DatabaseTable< ALLOC >::template DBVector< std::size_t >
788  const std::size_t nb_trans = __translators.size();
789  if (nb_trans == std::size_t(0)) { return DBVector< std::size_t >(); }
790 
791  DBVector< std::size_t > input_cols(nb_trans);
792  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
793  input_cols[i] = __translators.inputColumn(i);
794  }
795  return input_cols;
796  }
797 
798 
800  template < template < typename > class ALLOC >
801  std::size_t
802  DatabaseTable< ALLOC >::domainSize(const std::size_t k,
803  const bool k_is_input_col) const {
804  // find the position kk of the translator that contains the variable
805  const std::size_t nb_trans = __translators.size();
806  const std::size_t kk = __getKthIndex(k, k_is_input_col);
807 
808  // check if the translator exists
809  if (nb_trans <= kk) {
810  if (k_is_input_col) {
811  GUM_ERROR(UndefinedElement,
812  "there is no variable in the database table that "
813  << "corresponds to Column " << k);
814  } else {
815  GUM_ERROR(UndefinedElement,
816  "the database has " << nb_trans << " variables, so Variable #"
817  << k << " does not exist");
818  }
819  }
820 
821  return __translators.domainSize(kk);
822  }
823 
824 
826  template < template < typename > class ALLOC >
827  INLINE std::vector< std::size_t, ALLOC< std::size_t > >
829  const std::size_t nb_trans = __translators.size();
830  std::vector< std::size_t > dom(nb_trans);
831  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
832  dom[i] = __translators.domainSize(i);
833  }
834  return dom;
835  }
836 
837 
838  // indicates whether a reordering is needed to make the kth
839  // translator sorted by lexicographical order
840  template < template < typename > class ALLOC >
841  bool DatabaseTable< ALLOC >::needsReordering(const std::size_t k,
842  const bool k_is_input_col) const {
843  // find the position kk of the translator that contains the variable
844  const std::size_t nb_trans = __translators.size();
845  const std::size_t kk = __getKthIndex(k, k_is_input_col);
846 
847  // check if the translator exists
848  if (nb_trans <= kk) {
849  if (k_is_input_col) {
850  GUM_ERROR(UndefinedElement,
851  "there is no translator in the database table that "
852  << "parses Column " << k);
853  } else {
854  GUM_ERROR(UndefinedElement,
855  "the database has " << nb_trans
856  << " translators, so Translator #" << k
857  << " does not exist");
858  }
859  }
860 
861  return __translators.needsReordering(kk);
862  }
863 
864 
865  // performs a reordering of the kth translator or of the first
866  // translator corresponding to the kth column of the input database
867  template < template < typename > class ALLOC >
868  void DatabaseTable< ALLOC >::reorder(const std::size_t k,
869  const bool k_is_input_col) {
870  // find the position kk of the translator that contains the variable
871  const std::size_t nb_trans = __translators.size();
872  const std::size_t kk = __getKthIndex(k, k_is_input_col);
873 
874  // check if the translator exists
875  if (nb_trans <= kk) {
876  if (k_is_input_col) {
877  GUM_ERROR(UndefinedElement,
878  "there is no translator in the database table that "
879  << "parses Column " << k);
880  } else {
881  GUM_ERROR(UndefinedElement,
882  "the database has " << nb_trans
883  << " translators, so Translator #" << k
884  << " does not exist");
885  }
886  }
887 
888  // if the translator is not designed for a discrete variable, there
889  // is no reordering to apply
890  if (__translators.translator(kk).getValType()
892  return;
893 
894  // get the translation to perform
895  auto updates = __translators.reorder(kk);
896  if (updates.empty()) return;
897 
898  std::size_t size = updates.size();
899  std::vector< std::size_t, ALLOC< std::size_t > > new_values(size);
900  for (const auto& update : updates) {
901  if (update.first >= size) {
902  size = update.first + 1;
903  new_values.resize(size);
904  }
905  new_values[update.first] = update.second;
906  }
907 
908  // apply the translations
909  auto newtrans_lambda = [this, kk, &new_values](std::size_t begin,
910  std::size_t end) -> void {
911  for (std::size_t i = begin; i < end; ++i) {
912  auto& elt = this->_rows[i][kk].discr_val;
913  if (elt != std::numeric_limits< std::size_t >::max())
914  elt = new_values[elt];
915  }
916  };
917 
918  auto undo_newtrans_lambda = [](std::size_t begin, std::size_t end) -> void {
919  };
920 
921  // launch the threads executing the lambdas
922  this->__threadProcessDatabase(newtrans_lambda, undo_newtrans_lambda);
923  }
924 
925 
927  template < template < typename > class ALLOC >
928  INLINE void DatabaseTable< ALLOC >::reorder() {
929  const std::size_t nb_trans = __translators.size();
930  for (std::size_t i = std::size_t(0); i < nb_trans; ++i)
931  reorder(i, false);
932  }
933 
934 
936  template < template < typename > class ALLOC >
938  const std::vector< std::string, ALLOC< std::string > >& new_row) {
939  // check that the row can be fully translated, i.e., it contains enough
940  // columns to be translated
941  const std::size_t row_size = new_row.size();
942  if (row_size == std::size_t(0)) return;
943 
944  if (__translators.highestInputColumn() >= row_size) {
945  GUM_ERROR(SizeError,
946  "the new row has "
947  << row_size
948  << " columns whereas the database requires at least "
949  << (__translators.highestInputColumn() + 1) << " columns");
950  }
951 
952  // convert the new_row into a row of DBTranslatedValue
953  const std::size_t nb_trans = __translators.size();
954  Row< DBTranslatedValue > dbrow;
955  dbrow.reserve(nb_trans);
956  bool has_missing_val = false;
957  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
958  const DBTranslatedValue new_val(__translators.translate(new_row, i));
959  if (__translators.isMissingValue(new_val, i)) has_missing_val = true;
960  dbrow.pushBack(std::move(new_val));
961  }
962 
963  this->insertRow(std::move(dbrow),
964  has_missing_val ? IsMissing::True : IsMissing::False);
965  }
966 
967 
970  template < template < typename > class ALLOC >
971  bool DatabaseTable< ALLOC >::__isRowCompatible(
972  const typename DatabaseTable< ALLOC >::template Row< DBTranslatedValue >&
973  row) const {
974  // check that the size of the row corresponds to that of the translators
975  const std::size_t row_size = row.size();
976  if (row_size != __translators.size()) return false;
977 
978  const auto& translators = __translators.translators();
979  for (std::size_t i = std::size_t(0); i < row_size; ++i) {
980  switch (translators[i]->getValType()) {
982  if ((row[i].discr_val >= translators[i]->domainSize())
983  && (row[i].discr_val != std::numeric_limits< std::size_t >::max()))
984  return false;
985  break;
986 
988  const IContinuousVariable& var =
989  static_cast< const IContinuousVariable& >(
990  *(translators[i]->variable()));
991  if (((var.lowerBoundAsDouble() > (double)row[i].cont_val)
992  || (var.upperBoundAsDouble() < (double)row[i].cont_val))
993  && (row[i].cont_val != std::numeric_limits< float >::max()))
994  return false;
995  break;
996  }
997 
998  default:
999  GUM_ERROR(NotImplementedYet,
1000  "Translated value type not supported yet");
1001  }
1002  }
1003 
1004  return true;
1005  }
1006 
1007 
1009  template < template < typename > class ALLOC >
1011  typename DatabaseTable< ALLOC >::template Row< DBTranslatedValue >&&
1012  new_row,
1013  const typename DatabaseTable< ALLOC >::IsMissing contains_missing_data) {
1014  // check that the new rows values are compatible with the values of
1015  // the variables stored within the translators
1016  if (!__isRowCompatible(new_row)) {
1017  if (new_row.size() != __translators.size()) {
1018  GUM_ERROR(SizeError,
1019  "The new row has "
1020  << new_row.size()
1021  << " elements whereas the database table has "
1022  << __translators.size() << " columns");
1023  } else {
1024  GUM_ERROR(InvalidArgument,
1025  "the new row is not compatible with the current translators");
1026  }
1027  }
1028 
1030  contains_missing_data);
1031  }
1032 
1033 
1035  template < template < typename > class ALLOC >
1037  const typename DatabaseTable< ALLOC >::template Row< DBTranslatedValue >&
1038  new_row,
1039  const typename DatabaseTable< ALLOC >::IsMissing contains_missing_data) {
1040  // check that the new rows values are compatible with the values of
1041  // the variables stored within the translators
1042  if (!__isRowCompatible(new_row)) {
1043  if (new_row.size() != __translators.size()) {
1044  GUM_ERROR(SizeError,
1045  "The new row has "
1046  << new_row.size()
1047  << " elements whereas the database table has "
1048  << __translators.size() << " columns");
1049  } else {
1050  GUM_ERROR(InvalidArgument,
1051  "the new row is not compatible with the current translators");
1052  }
1053  }
1054 
1056  contains_missing_data);
1057  }
1058 
1059 
1060  // insert a new DBRow of DBCells at the end of the database
1061  template < template < typename > class ALLOC >
1063  const typename DatabaseTable< ALLOC >::template Row< DBCell >& new_row) {
1064  GUM_ERROR(NotImplementedYet, "not implemented yet");
1065  }
1066 
1067  // insert a new DBRow of DBCells at the end of the database
1068  template < template < typename > class ALLOC >
1070  typename DatabaseTable< ALLOC >::template Row< DBCell >&& new_row) {
1071  GUM_ERROR(NotImplementedYet, "not implemented yet");
1072  }
1073 
1074 
1076  template < template < typename > class ALLOC >
1078  typename DatabaseTable< ALLOC >::template Matrix< DBTranslatedValue >&&
1079  rows,
1080  const typename DatabaseTable< ALLOC >::template DBVector< IsMissing >&
1081  rows_have_missing_vals) {
1082  // check that the new rows values are compatible with the values of
1083  // the variables stored within the translators
1084  for (const auto& new_row : rows) {
1085  if (!__isRowCompatible(new_row)) {
1086  if (new_row.size() != __translators.size()) {
1087  GUM_ERROR(SizeError,
1088  "The new row has "
1089  << new_row.size()
1090  << " elements whereas the database table has "
1091  << __translators.size() << " columns");
1092  } else {
1093  GUM_ERROR(
1094  InvalidArgument,
1095  "the new row is not compatible with the current translators");
1096  }
1097  }
1098  }
1099 
1101  std::move(rows), rows_have_missing_vals);
1102  }
1103 
1104 
1106  template < template < typename > class ALLOC >
1108  const typename DatabaseTable< ALLOC >::template Matrix< DBTranslatedValue >&
1109  new_rows,
1110  const typename DatabaseTable< ALLOC >::template DBVector< IsMissing >&
1111  rows_have_missing_vals) {
1112  // check that the new rows values are compatible with the values of
1113  // the variables stored within the translators
1114  for (const auto& new_row : new_rows) {
1115  if (!__isRowCompatible(new_row)) {
1116  if (new_row.size() != __translators.size()) {
1117  GUM_ERROR(SizeError,
1118  "The new row has "
1119  << new_row.size()
1120  << " elements whereas the database table has "
1121  << __translators.size() << " columns");
1122  } else {
1123  GUM_ERROR(
1124  InvalidArgument,
1125  "the new row is not compatible with the current translators");
1126  }
1127  }
1128  }
1129 
1131  new_rows, rows_have_missing_vals);
1132  }
1133 
1134 
1136  template < template < typename > class ALLOC >
1138  typename DatabaseTable< ALLOC >::template Matrix< DBCell >&& new_rows) {
1139  GUM_ERROR(NotImplementedYet, "not implemented yet");
1140  }
1141 
1142 
1144  template < template < typename > class ALLOC >
1146  const typename DatabaseTable< ALLOC >::template Matrix< DBCell >&
1147  new_rows) {
1148  GUM_ERROR(NotImplementedYet, "not implemented yet");
1149  }
1150 
1151 
1153  template < template < typename > class ALLOC >
1155  __translators.clear();
1156  __ignored_cols.clear();
1158  }
1159 
1160 
1161  } /* namespace learning */
1162 
1163 } /* namespace gum */
1164 
1165 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
virtual ~DatabaseTable()
destructor
void insertRow(const std::vector< std::string, OTHER_ALLOC< std::string > > &new_row)
insert a new row at the end of the database
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual void ignoreColumn(const std::size_t k, const bool from_external_object=true) final
makes the database table ignore from now on the kth column of the input dataset or the column parsed ...
DBVector< std::size_t > domainSizes() const
returns the domain sizes of all the variables in the database table
ALLOC< DBTranslatedValue > getAllocator() const
returns the allocator of the database
const DBTranslatorSet< ALLOC > & translatorSet() const
returns the set of translators
std::size_t size() const noexcept
returns the number of records (rows) in the database
virtual void insertRow(const std::vector< std::string, ALLOC< std::string > > &new_row) final
insert a new row at the end of the database
STL namespace.
IDatabaseTable< T_DATA, ALLOC > & operator=(const IDatabaseTable< T_DATA, ALLOC > &from)
copy operator
void eraseAllRows()
erase all the rows
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
ALLOC< DBTranslatedValue > allocator_type
Types for STL compliance.
bool needsReordering(const std::size_t k, const bool k_is_input_col=false) const
indicates whether a reordering is needed to sort the translations of the kth translator or those of t...
std::size_t nbVariables() const noexcept
returns the number of variables (columns) of the database
IDatabaseTable(const MissingValType< MISSALLOC > &missing_symbols, const std::vector< std::string, VARALLOC< std::string > > &var_names, const ALLOC< DBTranslatedValue > &alloc)
default constructor
virtual DatabaseTable< ALLOC > * clone() const final
virtual copy constructor
DBVector< std::string > _variable_names
the names of the variables for each column
bool empty() const noexcept
indicates whether the database contains some records or not
virtual const DBVector< std::size_t > inputColumns() const final
returns the set of columns of the original dataset that are present in the DatabaseTable ...
const iterator & end() const noexcept
returns a new unsafe handler pointing to the end of the database
const DBTranslator< ALLOC > & translator(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth translator of the database table or the first one reading the kth column of th...
std::size_t domainSize(const std::size_t k, const bool k_is_input_col=false) const
returns the domain size of the kth variable of the database table or of that of the first one corresp...
virtual const DBVector< std::size_t > ignoredColumns() const final
returns the set of columns of the original dataset that are ignored
DatabaseTable< ALLOC > & operator=(const DatabaseTable< ALLOC > &from)
copy operator
virtual void insertRows(Matrix< T_DATA > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
insert a set of new DBRows at the end of the database
const Variable & variable(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth variable of the database table or the first one corresponding to the kth colum...
virtual void insertRows(Matrix< DBTranslatedValue > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals) final
insert a set of new DBRows at the end of the database
virtual void clear() final
erase the content of the database, including the names of the variables
std::size_t insertTranslator(const DBTranslator< ALLOC > &translator, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true) final
sets the names of the variables
typename IDatabaseTable< DBTranslatedValue, ALLOC >::IsMissing IsMissing
iterator begin() const
returns a new unsafe handler pointing to the beginning of the database
DatabaseTable(const MissingValType< XALLOC > &missing_symbols, const DBTranslatorSet< ALLOC > &translators=DBTranslatorSet< ALLOC >(), const allocator_type &alloc=allocator_type())
default constructor
void eraseTranslators(const std::size_t k, const bool k_is_input_col=false)
erases either the kth translator or all those parsing the kth column of the input dataset ...
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55
virtual void clear()
erase the content of the database, including the names of the variables
void reorder()
performs a reordering of all the columns