aGrUM  0.16.0
databaseTable.h
Go to the documentation of this file.
1 
32 #ifndef GUM_DATABASE_TABLE_H
33 #define GUM_DATABASE_TABLE_H
34 
35 #include <numeric>
36 #include <algorithm>
37 #include <functional>
38 #include <exception>
39 #include <vector>
40 
41 #include <agrum/agrum.h>
42 #include <agrum/core/math/math.h>
43 #include <agrum/core/set.h>
44 #include <agrum/core/thread.h>
51 
52 namespace gum {
53 
54  namespace learning {
55 
186  template < template < typename > class ALLOC = std::allocator >
187  class DatabaseTable : public IDatabaseTable< DBTranslatedValue, ALLOC > {
188  public:
190  template < typename TX_DATA >
191  using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
192 
194  template < typename TX_DATA >
196 
198  template < typename TX_DATA >
199  using Matrix =
200  std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > >;
201 
202  template < template < typename > class XALLOC >
203  using MissingValType = std::vector< std::string, XALLOC< std::string > >;
204 
207 
209  using HandlerSafe =
211 
212  using IsMissing =
214 
219  using const_reference = const value_type&;
220  using pointer = value_type*;
221  using const_pointer = const value_type*;
222  using size_type = std::size_t;
223  using difference_type = std::ptrdiff_t;
224  using iterator = Handler;
226  using allocator_type = ALLOC< DBTranslatedValue >;
228 
229 
230  // ##########################################################################
232  // ##########################################################################
234 
236  template < template < typename > class XALLOC >
238  const MissingValType< XALLOC >& missing_symbols,
239  const DBTranslatorSet< ALLOC >& translators = DBTranslatorSet< ALLOC >(),
240  const allocator_type& alloc = allocator_type());
241 
244  const DBTranslatorSet< ALLOC >& translators = DBTranslatorSet< ALLOC >(),
245  const allocator_type& alloc = allocator_type());
246 
249 
252  const allocator_type& alloc);
253 
256 
259 
261  virtual DatabaseTable< ALLOC >* clone() const final;
262 
264  virtual DatabaseTable< ALLOC >*
265  clone(const allocator_type& alloc) const final;
266 
268  virtual ~DatabaseTable();
269 
271 
272  // ##########################################################################
274  // ##########################################################################
276 
278  DatabaseTable< ALLOC >& operator=(const DatabaseTable< ALLOC >& from);
279 
281  DatabaseTable< ALLOC >& operator=(DatabaseTable< ALLOC >&& from);
282 
284 
285 
286  // ##########################################################################
288  // ##########################################################################
290 
292 
306  std::size_t insertTranslator(const DBTranslator< ALLOC >& translator,
307  const std::size_t input_column,
308  const bool unique_column = true);
309 
311 
331  std::size_t insertTranslator(const Variable& var,
332  const std::size_t input_column,
333  const bool unique_column = true);
334 
336 
352  template < template < typename > class XALLOC >
353  std::size_t insertTranslator(
354  const Variable& var,
355  const std::size_t input_column,
356  std::vector< std::string, XALLOC< std::string > > missing_symbols,
357  const bool unique_column = true);
358 
373  void eraseTranslators(const std::size_t k,
374  const bool k_is_input_col = false);
375 
377  const DBTranslatorSet< ALLOC >& translatorSet() const;
378 
392  const DBTranslator< ALLOC >&
393  translator(const std::size_t k, const bool k_is_input_col = false) const;
394 
408  const Variable& variable(const std::size_t k,
409  const bool k_is_input_col = false) const;
410 
413 
415 
440  virtual void setVariableNames(
441  const std::vector< std::string, ALLOC< std::string > >& names,
442  const bool from_external_object = true) final;
443 
476  virtual void ignoreColumn(const std::size_t k,
477  const bool from_external_object = true) final;
478 
480 
482  virtual const DBVector< std::size_t > ignoredColumns() const final;
483 
486  virtual const DBVector< std::size_t > inputColumns() const final;
487 
502  std::size_t domainSize(const std::size_t k,
503  const bool k_is_input_col = false) const;
504 
506  DBVector< std::size_t > domainSizes() const;
507 
540  bool needsReordering(const std::size_t k,
541  const bool k_is_input_col = false) const;
542 
562  void reorder(const std::size_t k, const bool k_is_input_col = false);
563 
565 
570  void reorder();
571 
573  using IDatabaseTable< DBTranslatedValue, ALLOC >::insertRow;
574 
576 
598  virtual void insertRow(
599  const std::vector< std::string, ALLOC< std::string > >& new_row) final;
600 
602 
611  virtual void insertRow(Row< DBTranslatedValue >&& new_row,
612  const IsMissing contains_missing_data) final;
613 
615 
624  virtual void insertRow(const Row< DBTranslatedValue >& new_row,
625  const IsMissing contains_missing_data) final;
626 
628 
634  virtual void insertRow(const Row< DBCell >& new_row) final;
635 
637 
643  virtual void insertRow(Row< DBCell >&& new_row) final;
644 
646 
655  virtual void
656  insertRows(Matrix< DBTranslatedValue >&& new_rows,
657  const DBVector< IsMissing >& rows_have_missing_vals) final;
658 
660 
668  virtual void
669  insertRows(const Matrix< DBTranslatedValue >& new_rows,
670  const DBVector< IsMissing >& rows_have_missing_vals) final;
671 
673 
679  virtual void insertRows(Matrix< DBCell >&& new_rows) final;
680 
682 
688  virtual void insertRows(const Matrix< DBCell >& new_rows) final;
689 
691  virtual void clear() final;
692 
693  // substitutes the kth translator by another one
694  /* The method checks that:
695  * 1/ it is possible to get back the original values of the database
696  * for the rows already translated.
697  * 2/ that the new translator is capable of translating these values.
698  *
699  * If both checks passed, then it replaces the kth translator
700  * by the one passed in arguments and retranslates with it the kth
701  * cell of all the rows already contained in the database */
702 
704 
705 
706 #ifndef DOXYGEN_SHOULD_SKIP_THIS
707 
708  private:
710  DBTranslatorSet< ALLOC > __translators;
711 
714 
717  bool __isRowCompatible(const Row< DBTranslatedValue >& row) const;
718 
724  std::size_t __getKthIndex(const std::size_t k,
725  const bool k_is_input_col) const;
726 
731  DBVector< std::size_t > __getKthIndices(const std::size_t k,
732  const bool k_is_input_col) const;
733 
735 
753  template < typename Functor1, typename Functor2 >
754  void __threadProcessDatabase(Functor1& exec_func, Functor2& undo_func);
755 
756 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
757  };
758 
759  } /* namespace learning */
760 
761 } /* namespace gum */
762 
765 
766 #endif /* GUM_DATABASE_TABLE_H */
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
The common class for the tabular database tables.
Base class for every random variable.
Definition: variable.h:66
std::size_t size_type
Types for STL compliance.
std::vector< TX_DATA, ALLOC< TX_DATA > > DBVector
the type for the vectors used in the DatabaseTable
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual void ignoreColumn(const std::size_t k, const bool from_external_object=true) final
makes the database table ignore from now on the kth column of the input dataset or the column parsed ...
DBVector< std::size_t > domainSizes() const
returns the domain sizes of all the variables in the database table
typename IDatabaseTable< DBTranslatedValue, ALLOC >::HandlerSafe HandlerSafe
the safe handler type
const DBTranslatorSet< ALLOC > & translatorSet() const
returns the set of translators
The class representing the original values of the cells of databases.
Definition: DBCell.h:72
virtual void insertRow(const std::vector< std::string, ALLOC< std::string > > &new_row) final
insert a new row at the end of the database
HandlerSafe iterator_safe
Types for STL compliance.
STL namespace.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
std::vector< std::string, XALLOC< std::string > > MissingValType
std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > > Matrix
the type for the matrices stored into the database
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
The union class for storing the translated values in learning databases.
ALLOC< DBTranslatedValue > allocator_type
Types for STL compliance.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Representation of a setA Set is a structure that contains arbitrary elements.
Definition: set.h:165
bool needsReordering(const std::size_t k, const bool k_is_input_col=false) const
indicates whether a reordering is needed to sort the translations of the kth translator or those of t...
virtual DatabaseTable< ALLOC > * clone() const final
virtual copy constructor
Handler iterator
Types for STL compliance.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
typename IDatabaseTable< DBTranslatedValue, ALLOC >::Handler Handler
the unsafe handler type
virtual const DBVector< std::size_t > inputColumns() const final
returns the set of columns of the original dataset that are present in the DatabaseTable ...
The base class for all the tabular database cell translators.
Definition: DBTranslator.h:117
const DBTranslator< ALLOC > & translator(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth translator of the database table or the first one reading the kth column of th...
std::size_t domainSize(const std::size_t k, const bool k_is_input_col=false) const
returns the domain size of the kth variable of the database table or of that of the first one corresp...
virtual const DBVector< std::size_t > ignoredColumns() const final
returns the set of columns of the original dataset that are ignored
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
The class for storing a record in a database.
Definition: DBRow.h:56
const Variable & variable(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth variable of the database table or the first one corresponding to the kth colum...
virtual void insertRows(Matrix< DBTranslatedValue > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals) final
insert a set of new DBRows at the end of the database
The class representing a tabular database as used by learning tasks.
virtual void clear() final
erase the content of the database, including the names of the variables
std::size_t insertTranslator(const DBTranslator< ALLOC > &translator, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
std::ptrdiff_t difference_type
Types for STL compliance.
virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true) final
sets the names of the variables
typename IDatabaseTable< DBTranslatedValue, ALLOC >::IsMissing IsMissing
the class for packing together the translators used to preprocess the datasets
DatabaseTable(const MissingValType< XALLOC > &missing_symbols, const DBTranslatorSet< ALLOC > &translators=DBTranslatorSet< ALLOC >(), const allocator_type &alloc=allocator_type())
default constructor
void eraseTranslators(const std::size_t k, const bool k_is_input_col=false)
erases either the kth translator or all those parsing the kth column of the input dataset ...
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void reorder()
performs a reordering of all the columns