aGrUM  0.14.2
IDatabaseTable.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
28 #ifndef GUM_IDATABASE_TABLE_H
29 #define GUM_IDATABASE_TABLE_H
30 
31 #include <cstddef>
32 #include <utility>
33 #include <string>
34 #include <cstring>
35 #include <memory>
36 #include <vector>
37 #include <mutex>
38 
39 #include <agrum/agrum.h>
40 #include <agrum/core/thread.h>
41 #include <agrum/core/OMPThreads.h>
46 
47 
48 namespace gum {
49 
50  namespace learning {
51 
52  template < template < typename > class ALLOC, bool ENABLE_INSERT >
54 
55  template < template < typename > class ALLOC >
56  struct IDatabaseTableInsert4DBCell< ALLOC, true > {
57  template < typename TX_DATA >
58  using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
59 
60  template < typename TX_DATA >
62 
63  template < typename TX_DATA >
64  using Matrix =
65  std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > >;
66 
67 
69 
71  virtual void insertRow(Row< DBCell >&& new_row) = 0;
72 
74 
76  virtual void insertRow(const Row< DBCell >& new_row) = 0;
77 
79 
81  virtual void insertRows(Matrix< DBCell >&& new_rows) = 0;
82 
84 
86  virtual void insertRows(const Matrix< DBCell >& new_rows) = 0;
87 
89 
91  virtual void insertRow(
92  const std::vector< std::string, ALLOC< std::string > >& new_row) = 0;
93 
95 
97  virtual void insertRows(const DBVector< DBVector< std::string > >& new_rows);
98  };
99 
100 
101  template < template < typename > class ALLOC >
102  struct IDatabaseTableInsert4DBCell< ALLOC, false > {
103  template < typename TX_DATA >
104  using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
105 
106  template < typename TX_DATA >
108 
109  template < typename TX_DATA >
110  using Matrix =
111  std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > >;
112 
114 
116  virtual void insertRow(
117  const std::vector< std::string, ALLOC< std::string > >& new_row) = 0;
118 
120 
122  virtual void insertRows(const DBVector< DBVector< std::string > >& new_rows);
123  };
124 
125 
252  template < typename T_DATA,
253  template < typename > class ALLOC = std::allocator >
256  ALLOC,
257  !std::is_same< T_DATA, DBCell >::value >
258  , private ALLOC< T_DATA > {
259  public:
261  template < typename TX_DATA >
262  using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
263 
265  template < typename TX_DATA >
267 
269  template < typename TX_DATA >
270  using Matrix =
271  std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > >;
272 
273  template < template < typename > class XALLOC >
274  using MissingValType = std::vector< std::string, XALLOC< std::string > >;
275 
276 
277  enum IsMissing : char { False, True };
278 
279 
371  class Handler : public DBHandler< T_DATA, ALLOC > {
372  public:
375  using iterator_category = std::random_access_iterator_tag;
378  using const_reference = const value_type&;
379  using pointer = value_type*;
380  using const_pointer = const value_type*;
381  using difference_type = std::ptrdiff_t;
382  using allocator_type = ALLOC< T_DATA >;
384 
385 
386  template < typename TX_DATA >
387  using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
388 
389  template < typename TX_DATA >
391 
392  template < typename TX_DATA >
393  using Matrix = std::vector< DBRow< TX_DATA, ALLOC >,
394  ALLOC< DBRow< TX_DATA, ALLOC > > >;
395 
396 
397  // ########################################################################
399  // ########################################################################
401 
403 
406 
408 
409  Handler(const Handler& h);
410 
412 
413  Handler(Handler&& h);
414 
416  virtual ~Handler();
417 
419 
420  // ########################################################################
422  // ########################################################################
424 
426  virtual Handler& operator=(const Handler&);
427 
429  virtual Handler& operator=(Handler&&);
430 
432 
434  virtual Handler& operator++() final;
435 
437 
440  virtual Handler& operator--() final;
441 
443 
446  virtual Handler& operator+=(const std::size_t i) final;
447 
449 
452  virtual Handler& operator-=(const std::size_t i) final;
453 
455  virtual bool operator==(const Handler& handler) const final;
456 
458  virtual bool operator!=(const Handler& handler) const final;
459 
461 
465  virtual const_reference operator*() const final;
466 
468 
472  virtual const_pointer operator->() const final;
473 
475 
476 
477  // ########################################################################
479  // ########################################################################
481 
483 
490  virtual std::size_t size() const final;
491 
493  virtual std::size_t DBSize() const final;
494 
496 
497  virtual const_reference rowSafe() const final;
498 
500 
501  virtual reference rowSafe() final;
502 
504 
508  virtual const_reference row() const final;
509 
511 
515  virtual reference row() final;
516 
518  virtual void nextRow() final;
519 
521  virtual std::size_t numRow() const final;
522 
524  virtual bool hasRows() const final;
525 
527  virtual void reset() final;
528 
534  virtual Handler begin() const;
535 
541  virtual Handler end() const;
542 
544 
554  virtual void setRange(std::size_t begin, std::size_t end) final;
555 
557  virtual std::pair< std::size_t, std::size_t > range() const final;
558 
560  virtual const DBVector< std::string >& variableNames() const final;
561 
563  virtual std::size_t nbVariables() const final;
564 
566 
568  virtual const IDatabaseTable< T_DATA, ALLOC >& database() const;
569 
571 
572 
573 #ifndef DOXYGEN_SHOULD_SKIP_THIS
574 
575  protected:
578 
580 
582  const Matrix< T_DATA >* __row;
583 
585  std::size_t __index{std::size_t(0)};
586 
588  std::size_t __begin_index{std::size_t(0)};
589 
591  std::size_t __end_index{std::size_t(0)};
592 
593  friend class IDatabaseTable< T_DATA, ALLOC >;
594 
595 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
596  };
597 
598 
690  class HandlerSafe : public Handler {
691  public:
694  using iterator_category = std::random_access_iterator_tag;
695  using value_type = typename Handler::value_type;
697  using const_reference = const value_type&;
698  using pointer = value_type*;
699  using const_pointer = const value_type*;
700  using difference_type = std::ptrdiff_t;
701  using allocator_type = ALLOC< T_DATA >;
703 
704  // ########################################################################
706  // ########################################################################
708 
710 
713 
715  HandlerSafe(const HandlerSafe& h);
716 
719 
721  virtual ~HandlerSafe();
722 
724 
725  // ########################################################################
727  // ########################################################################
729 
731  virtual HandlerSafe& operator=(const HandlerSafe&);
732 
734  virtual HandlerSafe& operator=(const Handler&);
735 
737  virtual HandlerSafe& operator=(HandlerSafe&&);
738 
740  virtual HandlerSafe& operator=(Handler&&);
741 
743 
744 
745 #ifndef DOXYGEN_SHOULD_SKIP_THIS
746 
747  private:
749  void __attachHandler();
750 
752  void __detachHandler();
753 
754  friend class IDatabaseTable< T_DATA, ALLOC >;
755 
756 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
757  };
758 
759 
764  using const_reference = const value_type&;
765  using pointer = value_type*;
766  using const_pointer = const value_type*;
767  using size_type = std::size_t;
768  using difference_type = std::ptrdiff_t;
769  using iterator = Handler;
770  using iterator_safe = HandlerSafe;
771  using const_iterator = const Handler;
772  using const_iterator_safe = const HandlerSafe;
773  using allocator_type = ALLOC< T_DATA >;
775 
776 
777  // ##########################################################################
779  // ##########################################################################
781 
783  template < template < typename > class VARALLOC,
784  template < typename >
785  class MISSALLOC >
787  const MissingValType< MISSALLOC >& missing_symbols,
788  const std::vector< std::string, VARALLOC< std::string > >& var_names,
789  const ALLOC< T_DATA >& alloc);
790 
793 
796  const allocator_type& alloc);
797 
800 
803  const allocator_type& alloc);
804 
806  virtual IDatabaseTable< T_DATA, ALLOC >* clone() const = 0;
807 
810  clone(const allocator_type& alloc) const = 0;
811 
813  virtual ~IDatabaseTable();
814 
816 
817 
818  // ##########################################################################
820  // ##########################################################################
822 
824  iterator begin() const;
825 
827  iterator_safe beginSafe() const;
828 
830  const iterator& end() const noexcept;
831 
833  const iterator_safe& endSafe() const noexcept;
834 
836 
837 
838  // ##########################################################################
840  // ##########################################################################
842 
844  const Matrix< T_DATA >& content() const noexcept;
845 
847  iterator handler() const;
848 
850  iterator_safe handlerSafe() const;
851 
853 
854  const DBVector< std::string >& variableNames() const noexcept;
855 
857 
882  virtual void setVariableNames(
883  const std::vector< std::string, ALLOC< std::string > >& names,
884  const bool from_external_object = true) = 0;
885 
887 
912  template < template < typename > class OTHER_ALLOC >
913  void setVariableNames(
914  const std::vector< std::string, OTHER_ALLOC< std::string > >& names,
915  const bool from_external_object = true);
916 
918 
920  const std::string& variableName(const std::size_t k) const;
921 
923 
928  std::size_t columnFromVariableName(const std::string& name) const;
929 
931 
934  DBVector< std::size_t >
935  columnsFromVariableName(const std::string& name) const;
936 
938  std::size_t nbVariables() const noexcept;
939 
941  std::size_t nbRows() const noexcept;
942 
944  std::size_t size() const noexcept;
945 
947  bool empty() const noexcept;
948 
950 
979  virtual void ignoreColumn(const std::size_t k,
980  const bool from_external_object = true) = 0;
981 
983  virtual const DBVector< std::size_t > ignoredColumns() const = 0;
984 
987  virtual const DBVector< std::size_t > inputColumns() const = 0;
988 
990  ALLOC,
991  !std::is_same< T_DATA, DBCell >::value >::insertRow;
992 
994 
999  template < template < typename > class OTHER_ALLOC >
1000  void insertRow(
1001  const std::vector< std::string, OTHER_ALLOC< std::string > >& new_row);
1002 
1004 
1010  virtual void insertRow(Row< T_DATA >&& new_row,
1011  const IsMissing contains_missing_data);
1012 
1014 
1020  virtual void insertRow(const Row< T_DATA >& new_row,
1021  const IsMissing contains_missing_data);
1022 
1024  ALLOC,
1025  !std::is_same< T_DATA, DBCell >::value >::insertRows;
1026 
1028 
1040  virtual void insertRows(Matrix< T_DATA >&& new_rows,
1041  const DBVector< IsMissing >& rows_have_missing_vals);
1042 
1044 
1056  virtual void insertRows(const Matrix< T_DATA >& new_rows,
1057  const DBVector< IsMissing >& rows_have_missing_vals);
1058 
1060 
1063  void eraseRow(std::size_t index);
1064 
1066 
1068  void eraseFirstRow();
1069 
1071 
1073  void eraseLastRow();
1074 
1076 
1078  void eraseFirstRows(const std::size_t k);
1079 
1081 
1083  void eraseLastRows(const std::size_t k);
1084 
1086 
1087  void eraseRows(std::size_t deb, std::size_t end);
1088 
1090  void eraseAllRows();
1091 
1093  virtual void clear();
1094 
1096  ALLOC< T_DATA > getAllocator() const;
1097 
1099  const DBVector< std::string >& missingSymbols() const;
1100 
1102  bool hasMissingValues() const;
1103 
1105  bool hasMissingValues(const std::size_t k) const;
1106 
1108 
1111  void setMaxNbThreads(const std::size_t nb) const;
1112 
1114  std::size_t nbThreads() const;
1115 
1125  void setMinNbRowsPerThread(const std::size_t nb) const;
1126 
1128  std::size_t minNbRowsPerThread() const;
1129 
1131  void setAllRowsWeight(const double new_weight);
1132 
1134 
1135 
1136  protected:
1138  DBVector< std::string > _variable_names;
1139 
1140  // the vector of DBRows containing all the raw data
1141  Matrix< T_DATA > _rows;
1142 
1143  // the set of string corresponding to missing values
1144  DBVector< std::string > _missing_symbols;
1145 
1146  // a vector indicating which rows have missing values (char != 0)
1147  DBVector< IsMissing > _has_row_missing_val;
1148 
1149  // the maximal number of threads that the database can use
1150  mutable std::size_t _max_nb_threads{
1151  std::size_t(thread::getMaxNumberOfThreads())};
1152 
1153  // the min number of rows that a thread should process in a
1154  // multithreading context
1155  mutable std::size_t _min_nb_rows_per_thread{100};
1156 
1157 
1160  bool _isRowSizeOK(const std::size_t size) const;
1161 
1164  operator=(const IDatabaseTable< T_DATA, ALLOC >& from);
1165 
1168  operator=(IDatabaseTable< T_DATA, ALLOC >&& from);
1169 
1170 
1171 #ifndef DOXYGEN_SHOULD_SKIP_THIS
1172 
1173  private:
1174  // the list of handlers currently attached to the database
1175  /* this is useful when the database is resized */
1176  mutable DBVector< HandlerSafe* > __list_of_safe_handlers;
1177 
1178  // a mutex to safely add/remove handlers in __list_of_safe_handlers
1179  mutable std::mutex __safe_handlers_mutex;
1180 
1181  // the end iterator for the database
1182  Handler* __end{nullptr};
1183 
1184  // the safe end iterator for the database
1185  iterator_safe* __end_safe{nullptr};
1186 
1188  void __attachHandler(HandlerSafe* handler) const;
1189 
1191  void __detachHandler(HandlerSafe* handler) const;
1192 
1194  void __updateHandlers(std::size_t new_size) const;
1195 
1196  // create the end iterators
1197  void __createEndIterators();
1198 
1199 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
1200 
1201 
1203  friend class Handler;
1204  friend class HandlerSafe;
1205  };
1206 
1207  } /* namespace learning */
1208 
1209 } /* namespace gum */
1210 
1213 
1214 #endif /* GUM_IDATABASE_TABLE_H */
The common class for the tabular database tables.
std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > > Matrix
std::vector< TX_DATA, ALLOC< TX_DATA > > DBVector
the type for the vectors used in the IDatabaseTable
The base class for all database handlers.
std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > > Matrix
The class representing the original values of the cells of databases.
Definition: DBCell.h:69
std::ptrdiff_t difference_type
Types for STL compliance.
Definition: DBHandler.h:129
the (unsafe) handler for the tabular databases
std::vector< TX_DATA, ALLOC< TX_DATA > > DBVector
std::vector< TX_DATA, ALLOC< TX_DATA > > DBVector
STL namespace.
std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > > Matrix
std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > > Matrix
the type for the matrices stored into the database
std::vector< std::string, XALLOC< std::string > > MissingValType
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
std::vector< TX_DATA, ALLOC< TX_DATA > > DBVector
the type for the allocated vectors in IDatabases
Definition: DBHandler.h:135
C++11 threads convenience utilities for agrum.
The class representing a record stored in a tabular database.
The class representing the original values of the cells of databases.
The base class for all database handlers.
Definition: DBHandler.h:118
std::random_access_iterator_tag iterator_category
Types for STL compliance.
Definition: DBHandler.h:122
the safe handler of the tabular databases
The class for storing a record in a database.
Definition: DBRow.h:53
Wrappers for OpenMP.
typename DBHandler< T_DATA, ALLOC >::value_type value_type
Types for STL compliance.
ALLOC< void > allocator_type
Types for STL compliance.
Definition: DBHandler.h:130
The base class for all the tabular databases&#39; cell translators.
unsigned int getMaxNumberOfThreads()
returns the maximum number of threads possible
Definition: thread_inl.h:38
The implementation of the common class for tabular databases.