aGrUM  0.21.0
a C++ library for (probabilistic) graphical models
databaseTable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The implementation of tabular databases stored in memory (RAM)
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #include <agrum/tools/database/databaseTable.h>
28 
29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 
31 namespace gum {
32 
33  namespace learning {
34 
35 
36  // default constructor
37  template < template < typename > class ALLOC >
38  template < template < typename > class XALLOC >
39  DatabaseTable< ALLOC >::DatabaseTable(
40  const typename DatabaseTable< ALLOC >::template MissingValType< XALLOC >& missing_symbols,
41  const DBTranslatorSet< ALLOC >& translators,
42  const typename DatabaseTable< ALLOC >::allocator_type& alloc) :
43  IDatabaseTable< DBTranslatedValue, ALLOC >(
44  missing_symbols,
45  std::vector< std::string, ALLOC< std::string > >(),
46  alloc),
47  _translators_(translators, alloc) {
48  if (translators.size()) {
49  // set the variables names according to those of the translators
50  std::vector< std::string, ALLOC< std::string > > var_names(translators.size());
51  for (std::size_t i = std::size_t(0), size = translators.size(); i < size; ++i) {
52  var_names[i] = _translators_.translator(i).variable()->name();
53  }
54  setVariableNames(var_names, false);
55  }
56 
57  GUM_CONSTRUCTOR(DatabaseTable);
58  }
59 
60 
61  // default constructor
62  template < template < typename > class ALLOC >
65  const typename DatabaseTable< ALLOC >::allocator_type& alloc) :
67  std::vector< std::string, ALLOC< std::string > >(),
68  std::vector< std::string, ALLOC< std::string > >(),
69  alloc),
71  if (translators.size()) {
72  // set the variables names according to those of the translators
74  for (std::size_t i = std::size_t(0), size = translators.size(); i < size; ++i) {
76  }
78  }
79 
81  }
82 
83 
84  // copy constructor with a given allocator
85  template < template < typename > class ALLOC >
87  const DatabaseTable< ALLOC >& from,
88  const typename DatabaseTable< ALLOC >::allocator_type& alloc) :
92  }
93 
94 
95  // copy constructor
96  template < template < typename > class ALLOC >
99 
100 
101  // move constructor with a given allocator
102  template < template < typename > class ALLOC >
104  DatabaseTable< ALLOC >&& from,
105  const typename DatabaseTable< ALLOC >::allocator_type& alloc) :
110  }
111 
112 
113  // move constructor
114  template < template < typename > class ALLOC >
117 
118 
119  // virtual copy constructor with a given allocator
120  template < template < typename > class ALLOC >
122  const typename DatabaseTable< ALLOC >::allocator_type& alloc) const {
125  try {
126  allocator.construct(new_db, *this, alloc);
127  } catch (...) {
129  throw;
130  }
131 
132  return new_db;
133  }
134 
135 
136  // virtual copy constructor
137  template < template < typename > class ALLOC >
138  DatabaseTable< ALLOC >* DatabaseTable< ALLOC >::clone() const {
139  return clone(this->getAllocator());
140  }
141 
142 
143  // destructor
144  template < template < typename > class ALLOC >
147  }
148 
149 
150  // copy operator
151  template < template < typename > class ALLOC >
153  if (this != &from) {
157  }
158 
159  return *this;
160  }
161 
162 
163  // move constructor
164  template < template < typename > class ALLOC >
166  if (this != &from) {
170  }
171 
172  return *this;
173  }
174 
175 
176  // a method to process the rows of the database in multithreading
177  template < template < typename > class ALLOC >
178  template < typename Functor1, typename Functor2 >
180  // compute the number of threads to execute the code
181  const std::size_t nb_threads = this->nbProcessingThreads_();
182 
183  // if there is just one thread, let it process all the rows
184  if (nb_threads == 1) {
185  const std::size_t db_size = this->nbRows();
186  exec_func(std::size_t(0), db_size, 0);
187  return;
188  }
189 
190  // if there are multiple threads, compute the ranges of rows they should process
191  const std::vector< std::pair< std::size_t, std::size_t > > ranges =
193 
194  // here, we shall create the threads, but also one std::exception_ptr
195  // for each thread. This will allow us to catch the exception raised
196  // by the threads
197  std::vector< std::thread > threads;
200 
201  // create a lambda that will execute exec_func while catching its exceptions
202  auto real_exec_func
203  = [&exec_func](std::size_t begin,
204  std::size_t end,
205  std::size_t index,
206  std::exception_ptr& exc) -> void {
207  try {
209  } catch (...) { exc = std::current_exception(); }
210  };
211 
212  // launch the threads
213  for (std::size_t i = std::size_t(0); i < nb_threads; ++i) {
215  ranges[i].first,
216  ranges[i].second,
217  i,
218  std::ref(func_exceptions[i])));
219  }
220 
221  // wait for the threads to complete their executions
223 
224  // now, check if one exception has been raised
225  bool exception_raised = false;
226  for (const auto& exc: func_exceptions) {
227  if (exc != nullptr) {
228  exception_raised = true;
229  break;
230  }
231  }
232 
233  if (exception_raised) {
234  // create a lambda that will execute undo_func while catching
235  // its exceptions
236  auto real_undo_func
237  = [&undo_func](std::size_t begin,
238  std::size_t end,
239  std::size_t index,
240  std::exception_ptr& exc) -> void {
241  try {
243  } catch (...) { exc = std::current_exception(); }
244  };
245 
246  // launch the repair threads
247  threads.clear();
249  for (std::size_t i = std::size_t(0); i < nb_threads; ++i) {
250  // we just need to repair the threads that did not raise exceptions
251  if (func_exceptions[i] == nullptr)
253  ranges[i].first,
254  ranges[i].second,
255  i,
257  }
258 
259  // wait for the threads to complete their executions
261 
262  // rethrow the exception
263  for (const auto& exc: func_exceptions) {
264  if (exc != nullptr) { std::rethrow_exception(exc); }
265  }
266  }
267  }
268 
269 
270  /// insert a new translator into the database
271  template < template < typename > class ALLOC >
273  const std::size_t input_column,
274  const bool unique_column) {
275  // check that there is no ignored_column corresponding to column
278  "Column " << input_column << " is marked as being ignored. "
279  << "So it is forbidden to create a translator for that column.")
280 
281  // reserve some place for the new column in the records of the database
282  const std::size_t new_size = this->nbVariables() + 1;
283 
284  // create the lambda for reserving some memory for the new column
285  // and the one that undoes what it performed if some thread executing
286  // it raised an exception
287  auto reserve_lambda = [this, new_size](std::size_t begin,
288  std::size_t end,
289  std::size_t index) -> void {
290  for (std::size_t i = begin; i < end; ++i)
291  this->rows_[i].row().reserve(new_size);
292  };
293 
294  auto undo_reserve_lambda = [](std::size_t begin,
295  std::size_t end,
296  std::size_t index) -> void {
297  };
298 
299  // launch the threads executing the lambdas
301 
302  // insert the translator into the translator set
303  const std::size_t pos
305 
306  // insert the name of the translator's variable to the set of variable names
307  try {
309  } catch (...) {
311  throw;
312  }
313 
314  // if the databaseTable is not empty, fill the column of the database
315  // corresponding to the translator with missing values
318 
319  // create the lambda for adding a new column filled wih a missing value
320  auto fill_lambda = [this, missing](std::size_t begin,
321  std::size_t end,
322  std::size_t index) -> void {
323  std::size_t i = begin;
324  try {
325  for (; i < end; ++i) {
326  this->rows_[i].row().push_back(missing);
328  }
329  } catch (...) {
330  for (std::size_t j = begin; j < i; ++j)
331  this->rows_[i].row().pop_back();
332  throw;
333  }
334  };
335 
336  auto undo_fill_lambda = [this](std::size_t begin,
337  std::size_t end,
338  std::size_t index) -> void {
339  for (std::size_t i = begin; i < end; ++i)
340  this->rows_[i].row().pop_back();
341  };
342 
343  // launch the threads executing the lambdas
345  }
346 
347  return pos;
348  }
349 
350 
351  /// insert a new translator into the database
352  template < template < typename > class ALLOC >
354  const std::size_t input_column,
355  const bool unique_column) {
356  // check that there is no ignored_column corresponding to column
359  "Column " << input_column << " is marked as being ignored. "
360  << "So it is forbidden to create a translator for that column.")
361 
362  // if the databaseTable is not empty, we should fill the column of the
363  // database corresponding to the new translator with missing values. But, the
364  // current method assumes that the list of missing values is empty. Hence, it
365  // should raise an exception
368  "inserting a new translator into a database creates a new column "
369  << "with missing values. However, you did not define any symbol for "
370  << "such values.")
371  }
372 
373  // reserve some place for the new column in the records of the database
374  const std::size_t new_size = this->nbVariables() + 1;
375 
376  // create the lambda for reserving some memory for the new column
377  // and the one that undoes what it performed if some thread executing
378  // it raised an exception
379  auto reserve_lambda = [this, new_size](std::size_t begin,
380  std::size_t end,
381  std::size_t index) -> void {
382  for (std::size_t i = begin; i < end; ++i)
383  this->rows_[i].row().reserve(new_size);
384  };
385 
386  auto undo_reserve_lambda = [](std::size_t begin,
387  std::size_t end,
388  std::size_t index) -> void {
389  };
390 
391  // launch the threads executing the lambdas
393 
394  // insert the translator into the translator set
396 
397  // insert the name of the translator's variable to the set of variable names
398  try {
400  } catch (...) {
402  throw;
403  }
404 
405  return pos;
406  }
407 
408 
409  /// insert a new translator into the database
410  template < template < typename > class ALLOC >
411  template < template < typename > class XALLOC >
413  const Variable& var,
414  const std::size_t input_column,
416  const bool unique_column) {
417  // check that there is no ignored_column corresponding to column
420  "Column " << input_column << " is marked as being ignored. "
421  << "So it is forbidden to create a translator for that column.")
422 
423  // reserve some place for the new column in the records of the database
424  const std::size_t new_size = this->nbVariables() + 1;
425 
426  // create the lambda for reserving some memory for the new column
427  // and the one that undoes what it performed if some thread executing
428  // it raised an exception
429  auto reserve_lambda = [this, new_size](std::size_t begin,
430  std::size_t end,
431  std::size_t index) -> void {
432  for (std::size_t i = begin; i < end; ++i)
433  this->rows_[i].row().reserve(new_size);
434  };
435 
436  auto undo_reserve_lambda = [](std::size_t begin,
437  std::size_t end,
438  std::size_t index) -> void {
439  };
440 
441  // launch the threads executing the lambdas
443 
444  // insert the translator into the translator set
445  const std::size_t pos
447 
448  // insert the name of the translator's variable to the set of variable names
449  try {
451  } catch (...) {
453  throw;
454  }
455 
456  // if the databaseTable is not empty, fill the column of the database
457  // corresponding to the translator with missing values
460 
461  // create the lambda for adding a new column filled wih a missing value
462  auto fill_lambda = [this, missing](std::size_t begin,
463  std::size_t end,
464  std::size_t index) -> void {
465  std::size_t i = begin;
466  try {
467  for (; i < end; ++i) {
468  this->rows_[i].row().push_back(missing);
470  }
471  } catch (...) {
472  for (std::size_t j = begin; j < i; ++j)
473  this->rows_[i].row().pop_back();
474  throw;
475  }
476  };
477 
478  auto undo_fill_lambda = [this](std::size_t begin,
479  std::size_t end,
480  std::size_t index) -> void {
481  for (std::size_t i = begin; i < end; ++i)
482  this->rows_[i].row().pop_back();
483  };
484 
485  // launch the threads executing the lambdas
487  }
488 
489  return pos;
490  }
491 
492 
493  /** @brief returns the indices corresponding either to the kth translator
494  * or to all those that parse the kth column of the input dataset
495  *
496  * @warning the indices are sorted by deacreasing order */
497  template < template < typename > class ALLOC >
498  INLINE typename DatabaseTable< ALLOC >::template DBVector< std::size_t >
500  const bool k_is_input_col) const {
501  const std::size_t nb_trans = _translators_.size();
502  if (!k_is_input_col) {
503  if (k < nb_trans)
504  return DBVector< std::size_t >{k};
505  else
506  return DBVector< std::size_t >();
507  } else {
508  DBVector< std::size_t > trans;
509  for (std::size_t i = std::size_t(0), kk = nb_trans - 1; i < nb_trans; ++i, --kk) {
511  }
512  return trans;
513  }
514  }
515 
516 
517  // erases the kth translator or all those parsing the kth column of
518  // the input dataset
519  template < template < typename > class ALLOC >
520  void DatabaseTable< ALLOC >::eraseTranslators(const std::size_t k, const bool k_is_input_col) {
521  for (const auto kk: _getKthIndices_(k, k_is_input_col)) {
522  // erase the translator of index kk and the corresponding variable
523  // name. If there remains no more translator in the translator set,
524  // rows_ should become empty
525  this->variable_names_.erase(this->variable_names_.begin() + kk);
526  if (this->variable_names_.empty()) {
528  } else {
529  const std::size_t nb_trans = _translators_.size();
530 
531  auto erase_lambda = [this, nb_trans, kk](std::size_t begin,
532  std::size_t end,
533  std::size_t index) -> void {
534  for (std::size_t i = begin; i < end; ++i) {
535  auto& row = this->rows_[i].row();
536  if (this->_translators_.isMissingValue(row[kk], kk)) {
537  bool has_missing_val = false;
538  for (std::size_t j = std::size_t(0); j < nb_trans; ++j) {
539  if ((j != kk) && this->_translators_.isMissingValue(row[j], j)) {
540  has_missing_val = true;
541  break;
542  }
543  }
545  }
546  row.erase(row.begin() + kk);
547  }
548  };
549 
550  auto undo_erase_lambda = [](std::size_t begin,
551  std::size_t end,
552  std::size_t index) -> void {
553  };
554 
555  // launch the threads executing the lambdas
557  }
559  }
560  }
561 
562 
563  /// change the translator of a database column
564  template < template < typename > class ALLOC >
566  const std::size_t k,
567  const bool k_is_input_col) {
568  // get the index of the column in the database. If it is not found, indicate that
569  // the substitution is impossible
570  const auto db_k = _getKthIndices_(k, k_is_input_col);
571  if (db_k >= _translators_.size()) {
573  "the translator at position " << k << '/' << db_k <<
574  "cannot be found.");
575  }
576 
577  // if the dataset does not contain any data, we can safely substitute the old translator
578  // by the new one
579  if (this->empty()) {
580  // keep into account the name of the new translator
582 
583  // substitute int the stransltor's set the old translator by the new one
585 
586  return;
587  }
588 
589  // get the translator and check that it is not lossy: as, here, there are some data,
590  // we cannot always ensure that there won't be some loss of information substituting
591  // one translator by another
593  if (!old_translator.isLossless()) {
594  // for the moment, we consider that it is impossible to substitute lossy translators
595  // because we may have already lost information that are necessary for the new
596  // translator
598  "Lossy translators cannot yet be substituted by other translators");
599  }
600 
601  const std::size_t nb_threads = this->nbProcessingThreads_();
602 
603  // how missing values will be translated
606 
607  // Now, we should compute the mapping from the values and missing symbols of the old
608  // translator to those of the new one.
609 
610  // When the database already contains some data, we must ensure that we will be able to
611  // substitute the old translator by the new one without loosing any information. Possible
612  // loss of information may occur in the following cases:
613  // 1/ if the set of missing symbols of the old translator is not a singleton and some of its
614  // missing symbols do not belong to the set of missing symbols of the new translator.
615  // In this case, the translation of this symbol by the new translator should either raise
616  // an exception because the new translator does not know how to handle it, or should
617  // produce a DBTranslatedValue if the new translator thinks this is an observed value.
618  // Now, the problem is that when observing a missing symbol in the database, we have no
619  // way to determine to which above case this should correspond. Hence the substitution
620  // cannot be made unambiguously.
621  // 2/ if the set of (non-missing) values of the old translator is not included in the one
622  // of the new translator
623  // If one of these cases occur, before performing the translation, we must parse the content
624  // of the database: if case 1/ obtains and if the database contains some missing symbols,
625  // then we cannot unambiguously substitute the old translator by the new one, hence an error.
626  // If case 2/ obtains, we must check that all the observed values currently stored into the
627  // database also belong to the set of values the new translator is capable of translating.
628  if (!this->empty()) {
629  // to test case 1, we first determine whether the dataset contains some
630  // missing values
631  bool has_missing_value = false;
632  {
633  std::vector< int > missing_values(nb_threads, 0);
634 
635  // a lambda to parse all the translated values for missing symbols
636  auto get_lambda = [this, db_k, &missing_values](std::size_t begin,
637  std::size_t end,
638  std::size_t index) -> void {
639  for (std::size_t i = begin; i < end; ++i) {
640  auto& row = this->rows_[i].row();
641  if (this->_translators_.isMissingValue(row[db_k], db_k)) {
642  missing_values[index] = 1;
643  return;
644  }
645  }
646  };
647 
648  auto undo_get_lambda = [](std::size_t begin, std::size_t end, std::size_t index) -> void {
649  };
650 
651  // launch the threads executing the lambdas
653 
654  // if has_missing_values has at least one value 1, there are missing values
655  for (const auto x: missing_values) {
656  if (x) {
657  has_missing_value = true;
658  break;
659  }
660  }
661  }
662 
663  // test for case 1/
669  // here, we know that the the database contains missing values
670  // and we cannot unambiguously perform the translator's substitution
672  "it is impossible to substitute the translator because "
673  "the database contains some missing values that cannot be "
674  "substituted unambiguously");
675  }
676 
677  // if the database contains some missing values, two cases can obtain:
678  // a/ old_miss_included is true, in which case all the old missing values
679  // will be translated as missing values in the new translator.
680  // In this case, there is no translation problem.
681  // b/ old_miss_included is false. In this case, we know that there is only
682  // one old missing symbol, which is not inluded in the set of missing
683  // symbols of the new translator. If we can translate its symbol as a
684  // "proper" value in the new translator, that's ok, otherwise we cannot
685  // perform the substitution.
687  try {
689  }
690  catch (Exception&) {
692  "it is impossible to substitute the translator because "
693  "the database contains some missing values that cannot be "
694  "substituted");
695  }
696  }
697 
698  // compute the mapping of the missing symbol if this one does not corresponds
699  // to a missing value in the new translator
703  }
704 
705  // test for case 2/ (if the set of (non-missing) values of the old translator is
706  // not included in the one of the new translator)
707 
708  // now, parse the database and check that all the values contained in the
709  // database can be translated
710  std::vector< int > unmapped(nb_threads, 0);
711 
712  // a lambda to parse all the translated values
715  std::size_t end, std::size_t index) -> void {
717  for (std::size_t i = begin; i < end; ++i) {
718  auto& row = this->rows_[i].row();
719  if (row[db_k] != old_miss) {
720  try {
722  } catch (Exception&) {
723  // ok, here, the translation is impossible
724  unmapped[index] = 1;
725  return;
726  }
727  }
728  }
729  };
730 
731  auto undo_check_lambda = [](std::size_t begin,
732  std::size_t end,
733  std::size_t index) -> void {};
734 
735  // launch the threads executing the lambdas
737 
738  // if unmapped has at least one value 1, there are values that we don't know how to translate
739  for (const auto x: unmapped) {
740  if (x) {
742  "The database contains some values that cannot be translated "
743  "using the new translator");
744  }
745  }
746  }
747 
748  // here, we know that we can perform the translator's substitution, so
749  // let's do it
752  std::size_t end,
753  std::size_t index) -> void {
755  for (std::size_t i = begin; i < end; ++i) {
756  auto& row = this->rows_[i].row();
757  if (row[db_k] == old_miss) {
759  } else {
761  }
762  }
763  };
764 
765  auto undo_change_lambda = [](std::size_t begin,
766  std::size_t end,
767  std::size_t index) -> void {};
768 
769  // launch the threads executing the lambdas
771 
772  // keep into account the name of the new translator
774 
775  // substitute int the stransltor's set the old translator by the new one
777  }
778 
779 
780  /// change the translator of a database column
781  template < template < typename > class ALLOC >
783  const std::size_t k,
784  const bool k_is_input_col) {
785 
786  }
787 
788 
789 
790 
791 
792 
793 
794 
795 
796 
797  /// returns the set of translators
798  template < template < typename > class ALLOC >
800  return _translators_;
801  }
802 
803 
804  /** @brief returns the index corresponding either to the kth translator or
805  * to that of the first translator parsing the kth column of the
806  * input dataset */
807  template < template < typename > class ALLOC >
809  const bool k_is_input_col) const {
810  if (k_is_input_col) {
811  const std::size_t nb_trans = _translators_.size();
812  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
813  if (_translators_.inputColumn(i) == k) { return i; }
814  }
815  return nb_trans + 1;
816  } else {
817  return k;
818  }
819  }
820 
821 
822  /// returns the kth translator of the database
823  template < template < typename > class ALLOC >
824  const DBTranslator< ALLOC >&
825  DatabaseTable< ALLOC >::translator(const std::size_t k, const bool k_is_input_col) const {
826  // find the position of the translator that we look for. This
827  // is variable kk below
828  const std::size_t nb_trans = _translators_.size();
830 
831  // check if the translator exists
832  if (nb_trans <= kk) {
833  if (k_is_input_col) {
835  "there is no translator in the database table that "
836  << "parses Column " << k)
837  } else {
839  "the database has " << nb_trans << " translators, so Translator #" << k
840  << " does not exist")
841  }
842  }
843 
844  return _translators_.translator(kk);
845  }
846 
847 
848  /// returns the kth variable of the database
849  template < template < typename > class ALLOC >
850  const Variable& DatabaseTable< ALLOC >::variable(const std::size_t k,
851  const bool k_is_input_col) const {
852  // find the position of the translator that contains the variable.
853  // This is variable kk below
854  const std::size_t nb_trans = _translators_.size();
856 
857  // check if the translator exists
858  if (nb_trans <= kk) {
859  if (k_is_input_col) {
861  "there is no variable in the database table that "
862  << "corresponds to Column " << k)
863  } else {
865  "the database has " << nb_trans << " variables, so Variable #" << k
866  << " does not exist")
867  }
868  }
869 
870  return _translators_.variable(kk);
871  }
872 
873 
874  /// sets the names of the variables
875  template < template < typename > class ALLOC >
877  const std::vector< std::string, ALLOC< std::string > >& names,
878  const bool from_external_object) {
879  const std::size_t size = names.size();
880  const std::size_t nb_trans = _translators_.size();
881  if (!from_external_object) {
882  if (nb_trans != size) {
884  "the number of variable's names (i.e., "
885  << size << ") does not correspond to the number of columns of the "
886  << "database table (i.e.," << nb_trans << ")")
887  }
888 
889  // update the translator names
890  for (std::size_t i = std::size_t(0); i < size; ++i) {
892  }
893  } else {
896  "the names vector has "
897  << size << " elements whereas it should have at least "
899  << "elements so that each translator is assigned a name")
900  }
901 
902  // update the translator names
903  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
905  }
906  }
907 
908  // update variable_names_ using the newly assigned translators names
910  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
912  }
913  }
914 
915 
916  /** @brief indicates that we should ignore the kth column of the original
917  * database when inserting new rows */
918  template < template < typename > class ALLOC >
919  void DatabaseTable< ALLOC >::ignoreColumn(const std::size_t k, const bool k_is_input_col) {
920  // indicate that the column will be forbidden. If the column is already
921  // forbidden, do nothing. But if the column is assigned to a translator
922  // that does not exist, raise an UndefinedElement exception
923  const std::size_t nb_trans = _translators_.size();
924  if (k_is_input_col) {
925  if (_ignored_cols_.exists(k)) return;
927  } else {
928  if (k < nb_trans) {
930  } else {
932  "It is impossible to ignore the column parsed by Translator #"
933  << k << "because there exist only " << nb_trans << " translators")
934  }
935  }
936 
937  // remove all the translators corresponding to k
939  }
940 
941 
942  /// returns the set of ignored columns
943  template < template < typename > class ALLOC >
944  const typename DatabaseTable< ALLOC >::template DBVector< std::size_t >
945  DatabaseTable< ALLOC >::ignoredColumns() const {
946  const std::size_t nb_trans = _translators_.size();
947 
948  if (nb_trans == std::size_t(0)) { return DBVector< std::size_t >{std::size_t(0)}; }
949 
950  // get the columns handled by the translators, sorted by increasing order
952  for (std::size_t i = std::size_t(0); i < nb_trans; ++i)
954  std::sort(cols.begin(), cols.end());
955 
956  // create a vector with all the possible input columns
960 
961  // remove from ignored_cols the elements of cols
962  for (std::size_t i = std::size_t(0), ii = highest - 1, k = std::size_t(0), kk = nb_trans - 1;
963  i < highest;
964  ++i, --ii) {
965  if (cols[kk] == ii) {
967  while ((k < nb_trans) && (cols[kk] == ii)) {
968  --kk;
969  ++k;
970  }
971  if (k == nb_trans) break;
972  }
973  }
974 
975  // add the column past the last translator
977 
978  return ignored_cols;
979  }
980 
981 
982  /// returns the set of columns parsed
983  template < template < typename > class ALLOC >
984  const typename DatabaseTable< ALLOC >::template DBVector< std::size_t >
985  DatabaseTable< ALLOC >::inputColumns() const {
986  const std::size_t nb_trans = _translators_.size();
987  if (nb_trans == std::size_t(0)) { return DBVector< std::size_t >(); }
988 
990  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
992  }
993  return input_cols;
994  }
995 
996 
997  /// returns the domain size of the kth variable
998  template < template < typename > class ALLOC >
1000  const bool k_is_input_col) const {
1001  // find the position kk of the translator that contains the variable
1002  const std::size_t nb_trans = _translators_.size();
1004 
1005  // check if the translator exists
1006  if (nb_trans <= kk) {
1007  if (k_is_input_col) {
1009  "there is no variable in the database table that "
1010  << "corresponds to Column " << k)
1011  } else {
1013  "the database has " << nb_trans << " variables, so Variable #" << k
1014  << " does not exist")
1015  }
1016  }
1017 
1018  return _translators_.domainSize(kk);
1019  }
1020 
1021 
1022  /// returns the domain sizes of all the variables in the database table
1023  template < template < typename > class ALLOC >
1025  DatabaseTable< ALLOC >::domainSizes() const {
1026  const std::size_t nb_trans = _translators_.size();
1027  std::vector< std::size_t > dom(nb_trans);
1028  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
1030  }
1031  return dom;
1032  }
1033 
1034 
1035  // indicates whether a reordering is needed to make the kth
1036  // translator sorted by lexicographical order
1037  template < template < typename > class ALLOC >
1038  bool DatabaseTable< ALLOC >::needsReordering(const std::size_t k,
1039  const bool k_is_input_col) const {
1040  // find the position kk of the translator that contains the variable
1041  const std::size_t nb_trans = _translators_.size();
1043 
1044  // check if the translator exists
1045  if (nb_trans <= kk) {
1046  if (k_is_input_col) {
1048  "there is no translator in the database table that "
1049  << "parses Column " << k)
1050  } else {
1052  "the database has " << nb_trans << " translators, so Translator #" << k
1053  << " does not exist")
1054  }
1055  }
1056 
1058  }
1059 
1060 
1061  // performs a reordering of the kth translator or of the first
1062  // translator corresponding to the kth column of the input database
1063  template < template < typename > class ALLOC >
1064  void DatabaseTable< ALLOC >::reorder(const std::size_t k, const bool k_is_input_col) {
1065  // find the position kk of the translator that contains the variable
1066  const std::size_t nb_trans = _translators_.size();
1068 
1069  // check if the translator exists
1070  if (nb_trans <= kk) {
1071  if (k_is_input_col) {
1073  "there is no translator in the database table that "
1074  << "parses Column " << k)
1075  } else {
1077  "the database has " << nb_trans << " translators, so Translator #" << k
1078  << " does not exist")
1079  }
1080  }
1081 
1082  // if the translator is not designed for a discrete variable, there
1083  // is no reordering to apply
1085 
1086  // get the translation to perform
1087  auto updates = _translators_.reorder(kk);
1088  if (updates.empty()) return;
1089 
1090  std::size_t size = updates.size();
1092  for (const auto& update: updates) {
1093  if (update.first >= size) {
1094  size = update.first + 1;
1096  }
1098  }
1099 
1100  // apply the translations
1101  auto newtrans_lambda = [this, kk, &new_values](std::size_t begin,
1102  std::size_t end,
1103  std::size_t index) -> void {
1104  for (std::size_t i = begin; i < end; ++i) {
1105  auto& elt = this->rows_[i][kk].discr_val;
1106  if (elt != std::numeric_limits< std::size_t >::max()) elt = new_values[elt];
1107  }
1108  };
1109 
1110  auto undo_newtrans_lambda = [](std::size_t begin,
1111  std::size_t end,
1112  std::size_t index) -> void {
1113  };
1114 
1115  // launch the threads executing the lambdas
1117  }
1118 
1119 
1120  /// performs a reordering of all the columns
1121  template < template < typename > class ALLOC >
1122  INLINE void DatabaseTable< ALLOC >::reorder() {
1123  const std::size_t nb_trans = _translators_.size();
1124  for (std::size_t i = std::size_t(0); i < nb_trans; ++i)
1125  reorder(i, false);
1126  }
1127 
1128 
1129  /// insert a new row at the end of the database
1130  template < template < typename > class ALLOC >
1131  void DatabaseTable< ALLOC >::insertRow(
1132  const std::vector< std::string, ALLOC< std::string > >& new_row) {
1133  // check that the row can be fully translated, i.e., it contains enough
1134  // columns to be translated
1135  const std::size_t row_size = new_row.size();
1136  if (row_size == std::size_t(0)) return;
1137 
1140  "the new row has " << row_size
1141  << " columns whereas the database requires at least "
1142  << (_translators_.highestInputColumn() + 1) << " columns")
1143  }
1144 
1145  // convert the new_row into a row of DBTranslatedValue
1146  const std::size_t nb_trans = _translators_.size();
1149  bool has_missing_val = false;
1150  for (std::size_t i = std::size_t(0); i < nb_trans; ++i) {
1154  }
1155 
1157  }
1158 
1159 
1160  /** @brief check that a row's values are compatible with those of the
1161  * translators' variables */
1162  template < template < typename > class ALLOC >
1164  const typename DatabaseTable< ALLOC >::template Row< DBTranslatedValue >& row) const {
1165  // check that the size of the row corresponds to that of the translators
1166  const std::size_t row_size = row.size();
1167  if (row_size != _translators_.size()) return false;
1168 
1169  const auto& translators = _translators_.translators();
1170  for (std::size_t i = std::size_t(0); i < row_size; ++i) {
1171  switch (translators[i]->getValType()) {
1173  if ((row[i].discr_val >= translators[i]->domainSize())
1174  && (row[i].discr_val != std::numeric_limits< std::size_t >::max()))
1175  return false;
1176  break;
1177 
1179  const IContinuousVariable& var
1180  = static_cast< const IContinuousVariable& >(*(translators[i]->variable()));
1181  if (((var.lowerBoundAsDouble() > (double)row[i].cont_val)
1182  || (var.upperBoundAsDouble() < (double)row[i].cont_val))
1183  && (row[i].cont_val != std::numeric_limits< float >::max()))
1184  return false;
1185  break;
1186  }
1187 
1188  default:
1189  GUM_ERROR(NotImplementedYet, "Translated value type not supported yet")
1190  }
1191  }
1192 
1193  return true;
1194  }
1195 
1196 
1197  /// insert a new DBRow at the end of the database
1198  template < template < typename > class ALLOC >
1200  typename DatabaseTable< ALLOC >::template Row< DBTranslatedValue >&& new_row,
1201  const typename DatabaseTable< ALLOC >::IsMissing contains_missing_data) {
1202  // check that the new rows values are compatible with the values of
1203  // the variables stored within the translators
1204  if (!_isRowCompatible_(new_row)) {
1205  if (new_row.size() != _translators_.size()) {
1207  "The new row has " << new_row.size()
1208  << " elements whereas the database table has "
1209  << _translators_.size() << " columns")
1210  } else {
1211  GUM_ERROR(InvalidArgument, "the new row is not compatible with the current translators")
1212  }
1213  }
1214 
1217  }
1218 
1219 
1220  /// insert a new row at the end of the database
1221  template < template < typename > class ALLOC >
1223  const typename DatabaseTable< ALLOC >::template Row< DBTranslatedValue >& new_row,
1224  const typename DatabaseTable< ALLOC >::IsMissing contains_missing_data) {
1225  // check that the new rows values are compatible with the values of
1226  // the variables stored within the translators
1227  if (!_isRowCompatible_(new_row)) {
1228  if (new_row.size() != _translators_.size()) {
1230  "The new row has " << new_row.size()
1231  << " elements whereas the database table has "
1232  << _translators_.size() << " columns")
1233  } else {
1234  GUM_ERROR(InvalidArgument, "the new row is not compatible with the current translators")
1235  }
1236  }
1237 
1239  }
1240 
1241 
1242  // insert a new DBRow of DBCells at the end of the database
1243  template < template < typename > class ALLOC >
1244  void DatabaseTable< ALLOC >::insertRow(
1245  const typename DatabaseTable< ALLOC >::template Row< DBCell >& new_row) {
1246  GUM_ERROR(NotImplementedYet, "not implemented yet")
1247  }
1248 
1249  // insert a new DBRow of DBCells at the end of the database
1250  template < template < typename > class ALLOC >
1251  void DatabaseTable< ALLOC >::insertRow(
1252  typename DatabaseTable< ALLOC >::template Row< DBCell >&& new_row) {
1253  GUM_ERROR(NotImplementedYet, "not implemented yet")
1254  }
1255 
1256 
1257  /// insert a set of new DBRows at the end of the database
1258  template < template < typename > class ALLOC >
1259  void DatabaseTable< ALLOC >::insertRows(
1260  typename DatabaseTable< ALLOC >::template Matrix< DBTranslatedValue >&& rows,
1261  const typename DatabaseTable< ALLOC >::template DBVector< IsMissing >&
1263  // check that the new rows values are compatible with the values of
1264  // the variables stored within the translators
1265  for (const auto& new_row: rows) {
1266  if (!_isRowCompatible_(new_row)) {
1267  if (new_row.size() != _translators_.size()) {
1269  "The new row has " << new_row.size()
1270  << " elements whereas the database table has "
1271  << _translators_.size() << " columns")
1272  } else {
1273  GUM_ERROR(InvalidArgument, "the new row is not compatible with the current translators")
1274  }
1275  }
1276  }
1277 
1280  }
1281 
1282 
1283  /// insert a set of new DBRows at the end of the database
1284  template < template < typename > class ALLOC >
1285  void DatabaseTable< ALLOC >::insertRows(
1286  const typename DatabaseTable< ALLOC >::template Matrix< DBTranslatedValue >& new_rows,
1287  const typename DatabaseTable< ALLOC >::template DBVector< IsMissing >&
1289  // check that the new rows values are compatible with the values of
1290  // the variables stored within the translators
1291  for (const auto& new_row: new_rows) {
1292  if (!_isRowCompatible_(new_row)) {
1293  if (new_row.size() != _translators_.size()) {
1295  "The new row has " << new_row.size()
1296  << " elements whereas the database table has "
1297  << _translators_.size() << " columns")
1298  } else {
1299  GUM_ERROR(InvalidArgument, "the new row is not compatible with the current translators")
1300  }
1301  }
1302  }
1303 
1305  }
1306 
1307 
1308  /// insert a set of new DBRows at the end of the database
1309  template < template < typename > class ALLOC >
1310  void DatabaseTable< ALLOC >::insertRows(
1311  typename DatabaseTable< ALLOC >::template Matrix< DBCell >&& new_rows) {
1312  GUM_ERROR(NotImplementedYet, "not implemented yet")
1313  }
1314 
1315 
1316  /// insert a set of new DBRows at the end of the database
1317  template < template < typename > class ALLOC >
1318  void DatabaseTable< ALLOC >::insertRows(
1319  const typename DatabaseTable< ALLOC >::template Matrix< DBCell >& new_rows) {
1320  GUM_ERROR(NotImplementedYet, "not implemented yet")
1321  }
1322 
1323 
1324  /// erase the content of the database, including the names of the variables
1325  template < template < typename > class ALLOC >
1326  void DatabaseTable< ALLOC >::clear() {
1327  _translators_.clear();
1330  }
1331 
1332 
1333  } /* namespace learning */
1334 
1335 } /* namespace gum */
1336 
1337 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)