aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
IDatabaseTable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The implementation of the common class for tabular databases
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #include <agrum/tools/database/IDatabaseTable.h>
28 
29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 
31 namespace gum {
32 
33  namespace learning {
34 
35  // ===========================================================================
36  // Unsafe handlers
37  // ===========================================================================
38 
39  // default constructor
40  template < typename T_DATA, template < typename > class ALLOC >
41  INLINE IDatabaseTable< T_DATA, ALLOC >::Handler::Handler(
42  const IDatabaseTable< T_DATA, ALLOC >& db) :
43  DBHandler< T_DATA, ALLOC >(),
44  db__(&db), row__(&(db.content())),
47  }
48 
49 
50  // copy constructor
51  template < typename T_DATA, template < typename > class ALLOC >
53  const typename IDatabaseTable< T_DATA, ALLOC >::Handler& h) :
54  DBHandler< T_DATA, ALLOC >(),
58  }
59 
60 
61  // move constructor
62  template < typename T_DATA, template < typename > class ALLOC >
64  typename IDatabaseTable< T_DATA, ALLOC >::Handler&& h) :
65  DBHandler< T_DATA, ALLOC >(),
69  }
70 
71 
72  // destructor
73  template < typename T_DATA, template < typename > class ALLOC >
76  }
77 
78 
79  // copy operator
80  template < typename T_DATA, template < typename > class ALLOC >
83  const typename IDatabaseTable< T_DATA, ALLOC >::Handler& h) {
84  db__ = h.db__;
85  row__ = h.row__;
86  index__ = h.index__;
89  return *this;
90  }
91 
92 
93  // move operator
94  template < typename T_DATA, template < typename > class ALLOC >
97  typename IDatabaseTable< T_DATA, ALLOC >::Handler&& h) {
98  db__ = h.db__;
99  row__ = h.row__;
100  index__ = h.index__;
103  return *this;
104  }
105 
106 
107  // returns the current row pointed to by the handler
108  template < typename T_DATA, template < typename > class ALLOC >
110  IDatabaseTable< T_DATA, ALLOC >::Handler::operator*() const {
111  return row__->operator[](index__);
112  }
113 
114 
115  // Dereferences the value pointed to by the handler (unsafe version)
116  template < typename T_DATA, template < typename > class ALLOC >
118  IDatabaseTable< T_DATA, ALLOC >::Handler::operator->() const {
119  return &(row__->operator[](index__));
120  }
121 
122 
123  // makes the handler point to the next row
124  template < typename T_DATA, template < typename > class ALLOC >
125  INLINE typename IDatabaseTable< T_DATA, ALLOC >::Handler&
127  ++index__;
128  return *this;
129  }
130 
131 
132  // makes the handler point to the previous row
133  template < typename T_DATA, template < typename > class ALLOC >
134  INLINE typename IDatabaseTable< T_DATA, ALLOC >::Handler&
136  if (index__ > begin_index__) --index__;
137  return *this;
138  }
139 
140 
141  // moves the handler by i rows
142  template < typename T_DATA, template < typename > class ALLOC >
143  INLINE typename IDatabaseTable< T_DATA, ALLOC >::Handler&
145  index__ += i;
146  return *this;
147  }
148 
149 
150  // moves back the handler by i rows
151  template < typename T_DATA, template < typename > class ALLOC >
152  INLINE typename IDatabaseTable< T_DATA, ALLOC >::Handler&
154  if (index__ >= begin_index__ + i)
155  index__ -= i;
156  else
158  return *this;
159  }
160 
161 
162  // checks whether two handlers point on the same row
163  template < typename T_DATA, template < typename > class ALLOC >
165  const Handler& handler) const {
166  return index__ == handler.index__;
167  }
168 
169 
170  // checks whether two handlers point to different rows
171  template < typename T_DATA, template < typename > class ALLOC >
173  const Handler& handler) const {
174  return index__ != handler.index__;
175  }
176 
177 
178  // returns the number of rows managed by the handler
179  template < typename T_DATA, template < typename > class ALLOC >
181  return end_index__ - begin_index__;
182  }
183 
184 
185  // return the number of rows of the whole database
186  template < typename T_DATA, template < typename > class ALLOC >
188  if (row__ != nullptr)
189  return row__->size();
190  else
191  return std::size_t(0);
192  }
193 
194 
195  // returns the current row pointed to by the handler
196  template < typename T_DATA, template < typename > class ALLOC >
198  IDatabaseTable< T_DATA, ALLOC >::Handler::rowSafe() const {
199  if (index__ >= end_index__) {
200  GUM_ERROR(OutOfBounds, "the handler has reached its end");
201  }
202 
203  return row__->operator[](index__);
204  }
205 
206 
207  // returns the current row pointed to by the handler
208  template < typename T_DATA, template < typename > class ALLOC >
211  if (index__ >= end_index__) {
212  GUM_ERROR(OutOfBounds, "the handler has reached its end");
213  }
214 
215  return const_cast< Matrix< T_DATA >* >(row__)->operator[](index__);
216  }
217 
218 
219  // returns the current row pointed to by the handler (unsafe version)
220  template < typename T_DATA, template < typename > class ALLOC >
222  IDatabaseTable< T_DATA, ALLOC >::Handler::row() const {
223  return row__->operator[](index__);
224  }
225 
226 
227  // returns the current row pointed to by the handler (unsafe version)
228  template < typename T_DATA, template < typename > class ALLOC >
231  return const_cast< Matrix< T_DATA >* >(row__)->operator[](index__);
232  }
233 
234 
235  // makes the handler point to the next row
236  template < typename T_DATA, template < typename > class ALLOC >
238  ++index__;
239  }
240 
241 
242  // returns the number of the current row
243  template < typename T_DATA, template < typename > class ALLOC >
245  return (index__ >= begin_index__) ? index__ - begin_index__ : 0;
246  }
247 
248  // indicates whether the handler has reached its end or not
249  template < typename T_DATA, template < typename > class ALLOC >
250  INLINE bool IDatabaseTable< T_DATA, ALLOC >::Handler::hasRows() const {
251  return (index__ < end_index__);
252  }
253 
254  // puts the handler to the beginning of the database area it handles
255  template < typename T_DATA, template < typename > class ALLOC >
258  }
259 
260 
261  // returns a new handler that points to the beginning of the
262  // database area of the current handler */
263  template < typename T_DATA, template < typename > class ALLOC >
265  IDatabaseTable< T_DATA, ALLOC >::Handler::begin() const {
266  Handler handler(*this);
267  handler.reset();
268  return handler;
269  }
270 
271 
272  // returns a new handler that points to the end of the
273  // database area of the current handler */
274  template < typename T_DATA, template < typename > class ALLOC >
276  IDatabaseTable< T_DATA, ALLOC >::Handler::end() const {
277  Handler handler(*this);
279  return handler;
280  }
281 
282 
283  // sets the area in the database the handler will handle
284  template < typename T_DATA, template < typename > class ALLOC >
285  INLINE void
287  std::size_t end) {
288  if (begin > end) std::swap(begin, end);
289 
290  // check that the end belongs to the database, else raise an exception
291  if (row__ == nullptr) {
292  GUM_ERROR(NullElement, "the handler does not point to any database");
293  }
294  if (end > row__->size()) {
296  "the database has fewer rows ("
297  << row__->size() << ") than the upper range (" << end
298  << ") specified to the handler");
299  }
300 
302  end_index__ = end;
303  index__ = begin;
304  }
305 
306 
307  // returns the current range of the handler
308  template < typename T_DATA, template < typename > class ALLOC >
310  IDatabaseTable< T_DATA, ALLOC >::Handler::range() const {
312  }
313 
314 
315  // returns the names of the variables
316  template < typename T_DATA, template < typename > class ALLOC >
317  INLINE const typename IDatabaseTable< T_DATA, ALLOC >::Handler::
318  template DBVector< std::string >&
320  return db__->variableNames();
321  }
322 
323 
324  // returns the number of variables (columns) of the database
325  template < typename T_DATA, template < typename > class ALLOC >
326  INLINE std::size_t
328  if (db__ != nullptr)
329  return db__->variableNames().size();
330  else
331  return 0;
332  }
333 
334 
335  // returns a pointer on the database
336  template < typename T_DATA, template < typename > class ALLOC >
337  INLINE const IDatabaseTable< T_DATA, ALLOC >&
338  IDatabaseTable< T_DATA, ALLOC >::Handler::database() const {
339  if (db__ == nullptr) {
341  "The database handler does not point toward a database");
342  }
343  return *db__;
344  }
345 
346 
347  // ===========================================================================
348  // Safe handlers
349  // ===========================================================================
350 
351  // attach a new handler to the database
352  template < typename T_DATA, template < typename > class ALLOC >
354  if (this->db__ != nullptr) { this->db__->attachHandler__(this); }
355  }
356 
357 
358  // detach a handler
359  template < typename T_DATA, template < typename > class ALLOC >
361  if (this->db__ != nullptr) { this->db__->detachHandler__(this); }
362  }
363 
364 
365  // default constructor
366  template < typename T_DATA, template < typename > class ALLOC >
368  const IDatabaseTable< T_DATA, ALLOC >& db) :
370  attachHandler__();
372  }
373 
374 
375  // copy constructor
376  template < typename T_DATA, template < typename > class ALLOC >
378  const typename IDatabaseTable< T_DATA, ALLOC >::HandlerSafe& h) :
380  attachHandler__();
382  }
383 
384 
385  // move constructor
386  template < typename T_DATA, template < typename > class ALLOC >
388  typename IDatabaseTable< T_DATA, ALLOC >::HandlerSafe&& h) :
390  attachHandler__();
392  }
393 
394 
395  // destructor
396  template < typename T_DATA, template < typename > class ALLOC >
398  detachHandler__();
400  }
401 
402 
403  // copy operator
404  template < typename T_DATA, template < typename > class ALLOC >
407  const typename IDatabaseTable< T_DATA, ALLOC >::HandlerSafe& h) {
408  if (this->db__ != h.db__) {
409  detachHandler__();
410  this->db__ = h.db__;
411  attachHandler__();
412  }
413 
415  return *this;
416  }
417 
418 
419  // copy operator
420  template < typename T_DATA, template < typename > class ALLOC >
423  const typename IDatabaseTable< T_DATA, ALLOC >::Handler& h) {
424  return this->operator=(
425  dynamic_cast< const IDatabaseTable< T_DATA, ALLOC >::HandlerSafe& >(h));
426  }
427 
428 
429  // move operator
430  template < typename T_DATA, template < typename > class ALLOC >
433  typename IDatabaseTable< T_DATA, ALLOC >::HandlerSafe&& h) {
434  if (this->db__ != h.db__) {
435  detachHandler__();
436  this->db__ = h.db__;
437  attachHandler__();
438  }
439 
441  return *this;
442  }
443 
444 
445  // move operator
446  template < typename T_DATA, template < typename > class ALLOC >
449  typename IDatabaseTable< T_DATA, ALLOC >::Handler&& h) {
450  return this->operator=(std::move(
451  dynamic_cast< IDatabaseTable< T_DATA, ALLOC >::HandlerSafe& >(h)));
452  }
453 
454 
455  // ===========================================================================
456  // Database Tables
457  // ===========================================================================
458 
459  // returns the allocator of the database
460  template < typename T_DATA, template < typename > class ALLOC >
462  return ALLOC< T_DATA >(*this);
463  }
464 
465 
466  // create the end iterators
467  template < typename T_DATA, template < typename > class ALLOC >
469  const IDatabaseTable< T_DATA, ALLOC >& db = *this;
470  ALLOC< iterator > allocator1(*this);
472  try {
474  } catch (...) {
476  throw;
477  }
478 
479  ALLOC< iterator_safe > allocator2(*this);
480  try {
482  try {
484  } catch (...) {
486  throw;
487  }
488  } catch (...) {
491  throw;
492  }
493  }
494 
495 
496  // default constructor
497  template < typename T_DATA, template < typename > class ALLOC >
498  template < template < typename > class VARALLOC,
499  template < typename >
500  class MISSALLOC >
502  const typename IDatabaseTable< T_DATA, ALLOC >::template MissingValType<
504  const std::vector< std::string, VARALLOC< std::string > >& var_names,
505  const ALLOC< T_DATA >& alloc) :
506  ALLOC< T_DATA >(alloc),
509  // copy the names
511  for (const auto& name: var_names)
513 
514  // copy the missing symbols
516  for (const auto& missing_symbol: missing_symbols)
518 
519  // create the end iterators
521 
523  }
524 
525 
526  // copy constructor with a given allocator
527  template < typename T_DATA, template < typename > class ALLOC >
529  const IDatabaseTable< T_DATA, ALLOC >& from,
530  const typename IDatabaseTable< T_DATA, ALLOC >::allocator_type& alloc) :
531  ALLOC< T_DATA >(alloc),
538  // create the end iterators
540 
542  }
543 
544 
545  // copy constructor
546  template < typename T_DATA, template < typename > class ALLOC >
548  const IDatabaseTable< T_DATA, ALLOC >& from) :
550 
551 
552  // move constructor with a given allocator
553  template < typename T_DATA, template < typename > class ALLOC >
556  const typename IDatabaseTable< T_DATA, ALLOC >::allocator_type& alloc) :
557  ALLOC< T_DATA >(alloc),
565  // create the end iterators
567 
569  }
570 
571 
572  // move constructor
573  template < typename T_DATA, template < typename > class ALLOC >
577 
578 
579  // destructor
580  template < typename T_DATA, template < typename > class ALLOC >
582  // indicate to all the handlers that we are destructing the database
584  for (auto handler: list_of_safe_handlers__) {
585  handler->db__ = nullptr;
586  handler->row__ = nullptr;
587  handler->end_index__ = 0;
588  handler->index__ = 0;
589  }
591 
595 
599 
601  }
602 
603 
604  // copy operator
605  template < typename T_DATA, template < typename > class ALLOC >
607  const IDatabaseTable< T_DATA, ALLOC >& from) {
608  if (this != &from) {
609  // invalidate the current handlers
611  for (auto handler: list_of_safe_handlers__) {
612  handler->db__ = nullptr;
613  handler->row__ = nullptr;
614  handler->end_index__ = 0;
615  handler->index__ = 0;
616  }
619 
620  rows_ = from.rows_;
626 
627  // update the end iterators
628  const std::size_t db_size = rows_.size();
629  end__->index__ = db_size;
633  }
634 
635  return *this;
636  }
637 
638 
639  // move operator
640  template < typename T_DATA, template < typename > class ALLOC >
643  if (this != &from) {
644  // invalidate the current handlers
646  for (auto handler: list_of_safe_handlers__) {
647  handler->db__ = nullptr;
648  handler->row__ = nullptr;
649  handler->end_index__ = 0;
650  handler->index__ = 0;
651  }
653 
654  rows_ = std::move(from.rows_);
660 
661  // update the end iterators
662  const std::size_t db_size = rows_.size();
663  end__->index__ = db_size;
667  }
668 
669  return *this;
670  }
671 
672 
673  // returns a new unsafe handler pointing to the beginning of the database
674  template < typename T_DATA, template < typename > class ALLOC >
676  IDatabaseTable< T_DATA, ALLOC >::begin() const {
677  return Handler(*this);
678  }
679 
680 
681  // returns a new safe handler pointing to the beginning of the database
682  template < typename T_DATA, template < typename > class ALLOC >
684  IDatabaseTable< T_DATA, ALLOC >::beginSafe() const {
685  return HandlerSafe(*this);
686  }
687 
688 
689  // returns a new unsafe handler pointing to the end of the database
690  template < typename T_DATA, template < typename > class ALLOC >
691  INLINE const typename IDatabaseTable< T_DATA, ALLOC >::Handler&
692  IDatabaseTable< T_DATA, ALLOC >::end() const noexcept {
693  return *end__;
694  }
695 
696 
697  /// returns a new safe handler pointing to the beginning of the database
698  template < typename T_DATA, template < typename > class ALLOC >
699  INLINE const typename IDatabaseTable< T_DATA, ALLOC >::HandlerSafe&
700  IDatabaseTable< T_DATA, ALLOC >::endSafe() const noexcept {
701  return *end_safe__;
702  }
703 
704 
705  // returns a new unsafe handler on the database
706  template < typename T_DATA, template < typename > class ALLOC >
708  IDatabaseTable< T_DATA, ALLOC >::handler() const {
709  return Handler(*this);
710  }
711 
712 
713  // returns a new safe handler on the database
714  template < typename T_DATA, template < typename > class ALLOC >
716  IDatabaseTable< T_DATA, ALLOC >::handlerSafe() const {
717  return HandlerSafe(*this);
718  }
719 
720 
721  // returns the content of the database
722  template < typename T_DATA, template < typename > class ALLOC >
723  INLINE const typename IDatabaseTable< T_DATA,
724  ALLOC >::template Matrix< T_DATA >&
725  IDatabaseTable< T_DATA, ALLOC >::content() const noexcept {
726  return rows_;
727  }
728 
729 
730  /// indicates whether the database contains some missing values
731  template < typename T_DATA, template < typename > class ALLOC >
732  bool IDatabaseTable< T_DATA, ALLOC >::hasMissingValues() const {
733  for (const auto& status: has_row_missing_val_)
734  if (status == IsMissing::True) return true;
735  return false;
736  }
737 
738 
739  /// indicates whether the kth row contains some missing values
740  template < typename T_DATA, template < typename > class ALLOC >
742  const std::size_t k) const {
743  return has_row_missing_val_[k] == IsMissing::True;
744  }
745 
746 
747  // returns the variable names for all the columns
748  template < typename T_DATA, template < typename > class ALLOC >
749  INLINE const std::vector< std::string, ALLOC< std::string > >&
750  IDatabaseTable< T_DATA, ALLOC >::variableNames() const noexcept {
751  return variable_names_;
752  }
753 
754 
755  // sets the names of the variables
756  template < typename T_DATA, template < typename > class ALLOC >
757  template < template < typename > class OTHER_ALLOC >
759  const std::vector< std::string, OTHER_ALLOC< std::string > >& names,
760  const bool from_external_object) {
761  // copy the variable names into a vector allocated with the allocator
762  // used by the database
763  const std::size_t size = names.size();
765  for (std::size_t i = 0; i < size; ++i)
766  variable_names[i] = names[i];
767 
769  }
770 
771 
772  /// returns the name of the kth column of the database
773  template < typename T_DATA, template < typename > class ALLOC >
774  INLINE const std::string&
775  IDatabaseTable< T_DATA, ALLOC >::variableName(const std::size_t k) const {
776  if (variable_names_.size() <= k)
777  GUM_ERROR(OutOfBounds, "the database does not contain Column #" << k);
778  return variable_names_[k];
779  }
780 
781 
782  /// returns the index of the column whose name is passed in argument
783  template < typename T_DATA, template < typename > class ALLOC >
785  const std::string& name) const {
786  const std::size_t size = variable_names_.size();
787  for (std::size_t i = 0; i < size; ++i)
788  if (variable_names_[i] == name) return i;
789 
791  "the database contains no column whose name is " << name);
792  }
793 
794 
795  /// returns the indices of the columns whose name is passed in argument
796  template < typename T_DATA, template < typename > class ALLOC >
797  INLINE
798  typename IDatabaseTable< T_DATA, ALLOC >::template DBVector< std::size_t >
800  const std::string& name) const {
801  const std::size_t size = variable_names_.size();
802  DBVector< std::size_t > cols;
803  for (std::size_t i = 0; i < size; ++i)
804  if (variable_names_[i] == name) cols.push_back(i);
805 
806  if (cols.empty())
808  "the database contains no column whose name is " << name);
809 
810  return cols;
811  }
812 
813 
814  // returns the number of variables (columns) of the database
815  template < typename T_DATA, template < typename > class ALLOC >
816  INLINE std::size_t
817  IDatabaseTable< T_DATA, ALLOC >::nbVariables() const noexcept {
818  return variable_names_.size();
819  }
820 
821 
822  // returns the number of records in the database
823  template < typename T_DATA, template < typename > class ALLOC >
824  INLINE std::size_t IDatabaseTable< T_DATA, ALLOC >::size() const noexcept {
825  return rows_.size();
826  }
827 
828 
829  // returns the number of records in the database
830  template < typename T_DATA, template < typename > class ALLOC >
831  INLINE std::size_t IDatabaseTable< T_DATA, ALLOC >::nbRows() const noexcept {
832  return rows_.size();
833  }
834 
835 
836  // indicates whether the database contains some records or not
837  template < typename T_DATA, template < typename > class ALLOC >
838  INLINE bool IDatabaseTable< T_DATA, ALLOC >::empty() const noexcept {
839  return rows_.empty();
840  }
841 
842 
843  // update the handlers when the size of the database changes
844  template < typename T_DATA, template < typename > class ALLOC >
846  std::size_t new_size) const {
847  const std::size_t db_size = rows_.size();
848 
850  for (auto handler: list_of_safe_handlers__) {
851  if ((handler->end_index__ == db_size)
852  || (handler->end_index__ > new_size)) {
854  // there is no need to update the index because, in safe handlers,
855  // we always check that the index is less than end_index when trying
856  // to access the rows
857  }
858  }
860 
861  // update the end iterators
866  }
867 
868 
869  // attach a new handler to the database
870  template < typename T_DATA, template < typename > class ALLOC >
872  HandlerSafe* handler) const {
876  }
877 
878 
879  // detach a handler
880  template < typename T_DATA, template < typename > class ALLOC >
882  HandlerSafe* handler) const {
884 
885  for (auto iter = list_of_safe_handlers__.rbegin();
887  ++iter) {
888  if (*iter == handler) {
891  break;
892  }
893  }
894 
896  }
897 
898 
899  // checks whether a new row has the same size as the rest of the database
900  template < typename T_DATA, template < typename > class ALLOC >
902  const std::size_t size) const {
903  return (size == variable_names_.size());
904  }
905 
906 
907  // insert a new row at the end of the database
908  template < typename T_DATA, template < typename > class ALLOC >
909  template < template < typename > class OTHER_ALLOC >
911  const std::vector< std::string, OTHER_ALLOC< std::string > >& new_row) {
912  const std::size_t size = new_row.size();
914  for (std::size_t i = 0; i < size; ++i)
916  this->insertRow(good_typed_row);
917  }
918 
919 
920  // insert a new DBRow at the end of the database
921  template < typename T_DATA, template < typename > class ALLOC >
923  typename IDatabaseTable< T_DATA, ALLOC >::template Row< T_DATA >&& new_row,
924  const typename IDatabaseTable< T_DATA, ALLOC >::IsMissing
926  // check that the size of the row is the same as the rest of the database
927  if (!isRowSizeOK_(new_row.size()))
929  "the new row is of size "
930  << new_row.size()
931  << ", which is different from the number of columns "
932  << "of the database, i.e., " << variable_names_.size());
933 
934  updateHandlers__(rows_.size() + 1);
936  try {
938  } catch (...) {
939  rows_.pop_back();
940  throw;
941  }
942  }
943 
944 
945  // insert a new DBRow at the end of the database
946  template < typename T_DATA, template < typename > class ALLOC >
948  const typename IDatabaseTable< T_DATA, ALLOC >::template Row< T_DATA >& row,
949  const typename IDatabaseTable< T_DATA, ALLOC >::IsMissing
951  this->insertRow(
952  typename IDatabaseTable< T_DATA, ALLOC >::template Row< T_DATA >(row),
954  }
955 
956 
957  // insert a set of new DBRow at the end of the database
958  template < typename T_DATA, template < typename > class ALLOC >
960  typename IDatabaseTable< T_DATA, ALLOC >::template Matrix< T_DATA >&&
961  new_rows,
962  const typename IDatabaseTable< T_DATA, ALLOC >::template DBVector<
963  typename IDatabaseTable< T_DATA, ALLOC >::IsMissing >&
965  if (new_rows.empty()) return;
966 
967  // check that the missing values indicators vector has the same size
968  // as the new rows
970  GUM_ERROR(
971  SizeError,
972  "the number of new rows (i.e., "
973  << new_rows.size()
974  << ") is different from the number of missing values indicators ("
976 
977  // check that all the rows have the same size
978  const std::size_t new_size = new_rows[0].size();
979 
980  for (const auto& row: new_rows) {
981  if (row.size() != new_size) {
983  "all the new rows do not have the same number of columns");
984  }
985  }
986 
987  // check that the sizes of the new rows are the same as the rest of
988  // the database
989  if (!isRowSizeOK_(new_size)) {
991  "the new rows have "
992  << new_size
993  << " columns, which is different from the number of columns "
994  << "of the database, i.e., " << variable_names_.size());
995  }
996 
997  const std::size_t nb_new_rows = new_rows.size();
999 
1002 
1003  for (std::size_t i = std::size_t(0); i < nb_new_rows; ++i) {
1006  }
1007 
1009  }
1010 
1011 
1012  // insert a set of new DBRow at the end of the database
1013  template < typename T_DATA, template < typename > class ALLOC >
1015  const typename IDatabaseTable< T_DATA, ALLOC >::template Matrix< T_DATA >&
1016  new_rows,
1017  const typename IDatabaseTable< T_DATA, ALLOC >::template DBVector<
1018  typename IDatabaseTable< T_DATA, ALLOC >::IsMissing >&
1020  if (new_rows.empty()) return;
1021 
1022  // check that the missing values indicators vector has the same size
1023  // as the new rows
1025  GUM_ERROR(
1026  SizeError,
1027  "the number of new rows (i.e., "
1028  << new_rows.size()
1029  << ") is different from the number of missing values indicators ("
1031 
1032  // check that all the rows have the same size
1033  const std::size_t new_size = new_rows[0].size();
1034 
1035  for (const auto& row: new_rows) {
1036  if (row.size() != new_size) {
1038  "all the new rows do not have the same number of columns");
1039  }
1040  }
1041 
1042  // check that the sizes of the new rows are the same as the rest of
1043  // the database
1044  std::size_t db_size = rows_.size();
1045 
1046  if (!isRowSizeOK_(new_size)) {
1048  "the new rows have "
1049  << new_size
1050  << " columns, which is different from the number of columns "
1051  << "of the database, i.e., " << variable_names_.size());
1052  }
1053 
1054  const std::size_t nb_new_rows = new_rows.size();
1055  const std::size_t new_db_size = rows_.size() + nb_new_rows;
1056 
1059 
1060  for (std::size_t i = std::size_t(0); i < nb_new_rows; ++i) {
1063  }
1064 
1066  }
1067 
1068 
1069  // erase a given row
1070  template < typename T_DATA, template < typename > class ALLOC >
1072  const std::size_t db_size = rows_.size();
1073 
1074  if (index < db_size) {
1076  rows_.erase(rows_.begin() + index);
1078  }
1079  }
1080 
1081 
1082  // erase the last row
1083  template < typename T_DATA, template < typename > class ALLOC >
1085  const std::size_t db_size = rows_.size();
1086 
1087  if (db_size) {
1089  rows_.pop_back();
1091  }
1092  }
1093 
1094 
1095  // erase the first row
1096  template < typename T_DATA, template < typename > class ALLOC >
1098  const std::size_t db_size = rows_.size();
1099 
1100  if (db_size) {
1102  rows_.erase(rows_.begin());
1104  }
1105  }
1106 
1107 
1108  // erase all the rows
1109  template < typename T_DATA, template < typename > class ALLOC >
1111  updateHandlers__(0);
1112  rows_.clear();
1114  }
1115 
1116 
1117  // erase the k first rows
1118  template < typename T_DATA, template < typename > class ALLOC >
1119  INLINE void
1121  const std::size_t db_size = rows_.size();
1122 
1123  if (nb_rows >= db_size) {
1124  eraseAllRows();
1125  } else {
1130  }
1131  }
1132 
1133 
1134  // erase the k last rows
1135  template < typename T_DATA, template < typename > class ALLOC >
1136  INLINE void
1138  const std::size_t db_size = rows_.size();
1139 
1140  if (nb_rows >= db_size) {
1141  eraseAllRows();
1142  } else {
1146  + (db_size - nb_rows),
1148  }
1149  }
1150 
1151 
1152  // erase the rows from the debth to the endth (not included)
1153  template < typename T_DATA, template < typename > class ALLOC >
1155  std::size_t end) {
1156  if (deb > end) std::swap(deb, end);
1157 
1158  const std::size_t db_size = rows_.size();
1159 
1160  if (end >= db_size) {
1161  if (deb >= db_size) {
1162  return;
1163  } else {
1165  }
1166  } else {
1168  rows_.erase(rows_.begin() + deb, rows_.begin() + end);
1171  }
1172  }
1173 
1174 
1175  // erase the content of the database, including the names of the variables
1176  template < typename T_DATA, template < typename > class ALLOC >
1177  INLINE void IDatabaseTable< T_DATA, ALLOC >::clear() {
1178  updateHandlers__(0);
1179  rows_.clear();
1182  }
1183 
1184 
1185  // returns the set of symbols for the missing values
1186  template < typename T_DATA, template < typename > class ALLOC >
1187  INLINE const std::vector< std::string, ALLOC< std::string > >&
1188  IDatabaseTable< T_DATA, ALLOC >::missingSymbols() const {
1189  return missing_symbols_;
1190  }
1191 
1192 
1193  /// changes the max number of threads that a database can use
1194  template < typename T_DATA, template < typename > class ALLOC >
1196  const std::size_t nb) const {
1197  if (nb == std::size_t(0))
1198  max_nb_threads_ = std::size_t(1);
1199  else
1200  max_nb_threads_ = nb;
1201  }
1202 
1203 
1204  /// returns the number of threads used to parse the database
1205  template < typename T_DATA, template < typename > class ALLOC >
1207  return max_nb_threads_;
1208  }
1209 
1210 
1211  /** @brief changes the number min of rows a thread should process in a
1212  * multithreading context */
1213  template < typename T_DATA, template < typename > class ALLOC >
1215  const std::size_t nb) const {
1216  if (nb == std::size_t(0))
1218  else
1220  }
1221 
1222 
1223  /// returns the minimum of rows that each thread should process
1224  template < typename T_DATA, template < typename > class ALLOC >
1225  INLINE std::size_t
1227  return min_nb_rows_per_thread_;
1228  }
1229 
1230 
1231  /// insert new rows at the end of the database
1232  template < template < typename > class ALLOC >
1234  const typename IDatabaseTableInsert4DBCell< ALLOC, true >::
1235  template DBVector< DBVector< std::string > >& new_rows) {
1236  for (const auto& new_row: new_rows)
1237  this->insertRow(new_row);
1238  }
1239 
1240 
1241  /// insert new rows at the end of the database
1242  template < template < typename > class ALLOC >
1244  const typename IDatabaseTableInsert4DBCell< ALLOC, false >::
1245  template DBVector< DBVector< std::string > >& new_rows) {
1246  for (const auto& new_row: new_rows)
1247  this->insertRow(new_row);
1248  }
1249 
1250 
1251  /// assign a given weight to all the rows of the database
1252  template < typename T_DATA, template < typename > class ALLOC >
1253  void
1255  // determine the number of threads to use and the number of rows
1256  // they should process
1257  std::vector< std::pair< std::size_t, std::size_t > > ranges;
1258  const std::size_t db_size = nbRows();
1260  if (nb_threads < 1)
1261  nb_threads = 1;
1262  else if (nb_threads > max_nb_threads_)
1266 
1267  // assign to threads the ranges over which they should change the
1268  // rows weights
1269  std::size_t begin_index = std::size_t(0);
1270  for (std::size_t i = std::size_t(0); i < nb_threads; ++i) {
1272  if (rest_rows != std::size_t(0)) {
1273  ++end_index;
1274  --rest_rows;
1275  }
1276  ranges.push_back(
1279  }
1280 
1281  // perform the assignment:
1282  // launch the threads
1283  // here we use openMP for launching the threads because, experimentally,
1284  // it seems to provide results that are twice as fast as the results
1285  // with the std::thread
1286 # pragma omp parallel num_threads(int(nb_threads))
1287  {
1288  // get the number of the thread
1292 
1293  for (std::size_t i = begin_index; i < end_index; ++i) {
1295  }
1296  }
1297  }
1298 
1299  /// assigns a given weight to the ith row of the database
1300  template < typename T_DATA, template < typename > class ALLOC >
1301  void IDatabaseTable< T_DATA, ALLOC >::setWeight(const std::size_t i,
1302  const double weight) {
1303  // check that i is less than the number of rows
1304  const std::size_t dbsize = nbRows();
1305  if (i >= dbsize) {
1306  std::string str;
1307  switch (i) {
1308  case 1:
1309  str = "st";
1310  break;
1311  case 2:
1312  str = "nd";
1313  break;
1314  default:
1315  str = "th";
1316  }
1318  "it is impossible to set the weight of the "
1319  << i << str << " record because the database contains only "
1320  << nbRows() << " records");
1321  }
1322 
1323  // check that the weight is positive
1324  if (weight < 0) {
1325  std::string str;
1326  switch (i) {
1327  case 1:
1328  str = "st";
1329  break;
1330  case 2:
1331  str = "nd";
1332  break;
1333  default:
1334  str = "th";
1335  }
1337  "it is impossible to set "
1338  << weight << " as a weight of the " << i << str
1339  << " record because this weight is negative");
1340  }
1341 
1342  rows_[i].setWeight(weight);
1343  }
1344 
1345 
1346  /// returns the weight of the ith record
1347  template < typename T_DATA, template < typename > class ALLOC >
1348  double IDatabaseTable< T_DATA, ALLOC >::weight(const std::size_t i) const {
1349  // check that i is less than the number of rows
1350  const std::size_t dbsize = nbRows();
1351  if (i >= dbsize) {
1352  std::string str;
1353  switch (i) {
1354  case 1:
1355  str = "st";
1356  break;
1357  case 2:
1358  str = "nd";
1359  break;
1360  default:
1361  str = "th";
1362  }
1364  "it is impossible to get the weight of the "
1365  << i << str << " record because the database contains only "
1366  << nbRows() << " records");
1367  }
1368 
1369  return rows_[i].weight();
1370  }
1371 
1372 
1373  /// returns the weight of the whole database
1374  template < typename T_DATA, template < typename > class ALLOC >
1375  double IDatabaseTable< T_DATA, ALLOC >::weight() const {
1376  double w = 0.0;
1377  for (const auto& row: rows_)
1378  w += row.weight();
1379  return w;
1380  }
1381 
1382 
1383  } /* namespace learning */
1384 
1385 } /* namespace gum */
1386 
1387 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)