aGrUM  0.14.2
databaseTable.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
29 #ifndef GUM_DATABASE_TABLE_H
30 #define GUM_DATABASE_TABLE_H
31 
32 #include <numeric>
33 #include <algorithm>
34 #include <functional>
35 #include <exception>
36 #include <vector>
37 
38 #include <agrum/agrum.h>
39 #include <agrum/core/math/math.h>
40 #include <agrum/core/set.h>
41 #include <agrum/core/thread.h>
48 
49 namespace gum {
50 
51  namespace learning {
52 
183  template < template < typename > class ALLOC = std::allocator >
184  class DatabaseTable : public IDatabaseTable< DBTranslatedValue, ALLOC > {
185  public:
187  template < typename TX_DATA >
188  using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
189 
191  template < typename TX_DATA >
193 
195  template < typename TX_DATA >
196  using Matrix =
197  std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > >;
198 
199  template < template < typename > class XALLOC >
200  using MissingValType = std::vector< std::string, XALLOC< std::string > >;
201 
204 
206  using HandlerSafe =
208 
209  using IsMissing =
211 
216  using const_reference = const value_type&;
217  using pointer = value_type*;
218  using const_pointer = const value_type*;
219  using size_type = std::size_t;
220  using difference_type = std::ptrdiff_t;
221  using iterator = Handler;
223  using allocator_type = ALLOC< DBTranslatedValue >;
225 
226 
227  // ##########################################################################
229  // ##########################################################################
231 
233  template < template < typename > class XALLOC >
235  const MissingValType< XALLOC >& missing_symbols,
236  const DBTranslatorSet< ALLOC >& translators = DBTranslatorSet< ALLOC >(),
237  const allocator_type& alloc = allocator_type());
238 
241  const DBTranslatorSet< ALLOC >& translators = DBTranslatorSet< ALLOC >(),
242  const allocator_type& alloc = allocator_type());
243 
246 
249  const allocator_type& alloc);
250 
253 
256 
258  virtual DatabaseTable< ALLOC >* clone() const final;
259 
261  virtual DatabaseTable< ALLOC >*
262  clone(const allocator_type& alloc) const final;
263 
265  virtual ~DatabaseTable();
266 
268 
269  // ##########################################################################
271  // ##########################################################################
273 
275  DatabaseTable< ALLOC >& operator=(const DatabaseTable< ALLOC >& from);
276 
278  DatabaseTable< ALLOC >& operator=(DatabaseTable< ALLOC >&& from);
279 
281 
282 
283  // ##########################################################################
285  // ##########################################################################
287 
289 
303  std::size_t insertTranslator(const DBTranslator< ALLOC >& translator,
304  const std::size_t input_column,
305  const bool unique_column = true);
306 
308 
328  std::size_t insertTranslator(const Variable& var,
329  const std::size_t input_column,
330  const bool unique_column = true);
331 
333 
349  template < template < typename > class XALLOC >
350  std::size_t insertTranslator(
351  const Variable& var,
352  const std::size_t input_column,
353  std::vector< std::string, XALLOC< std::string > > missing_symbols,
354  const bool unique_column = true);
355 
370  void eraseTranslators(const std::size_t k,
371  const bool k_is_input_col = false);
372 
374  const DBTranslatorSet< ALLOC >& translatorSet() const;
375 
389  const DBTranslator< ALLOC >&
390  translator(const std::size_t k, const bool k_is_input_col = false) const;
391 
405  const Variable& variable(const std::size_t k,
406  const bool k_is_input_col = false) const;
407 
410 
412 
437  virtual void setVariableNames(
438  const std::vector< std::string, ALLOC< std::string > >& names,
439  const bool from_external_object = true) final;
440 
473  virtual void ignoreColumn(const std::size_t k,
474  const bool from_external_object = true) final;
475 
477 
479  virtual const DBVector< std::size_t > ignoredColumns() const final;
480 
483  virtual const DBVector< std::size_t > inputColumns() const final;
484 
499  std::size_t domainSize(const std::size_t k,
500  const bool k_is_input_col = false) const;
501 
503  DBVector< std::size_t > domainSizes() const;
504 
537  bool needsReordering(const std::size_t k,
538  const bool k_is_input_col = false) const;
539 
559  void reorder(const std::size_t k, const bool k_is_input_col = false);
560 
562 
567  void reorder();
568 
570  using IDatabaseTable< DBTranslatedValue, ALLOC >::insertRow;
571 
573 
595  virtual void insertRow(
596  const std::vector< std::string, ALLOC< std::string > >& new_row) final;
597 
599 
608  virtual void insertRow(Row< DBTranslatedValue >&& new_row,
609  const IsMissing contains_missing_data) final;
610 
612 
621  virtual void insertRow(const Row< DBTranslatedValue >& new_row,
622  const IsMissing contains_missing_data) final;
623 
625 
631  virtual void insertRow(const Row< DBCell >& new_row) final;
632 
634 
640  virtual void insertRow(Row< DBCell >&& new_row) final;
641 
643 
652  virtual void
653  insertRows(Matrix< DBTranslatedValue >&& new_rows,
654  const DBVector< IsMissing >& rows_have_missing_vals) final;
655 
657 
665  virtual void
666  insertRows(const Matrix< DBTranslatedValue >& new_rows,
667  const DBVector< IsMissing >& rows_have_missing_vals) final;
668 
670 
676  virtual void insertRows(Matrix< DBCell >&& new_rows) final;
677 
679 
685  virtual void insertRows(const Matrix< DBCell >& new_rows) final;
686 
688  virtual void clear() final;
689 
690  // substitutes the kth translator by another one
691  /* The method checks that:
692  * 1/ it is possible to get back the original values of the database
693  * for the rows already translated.
694  * 2/ that the new translator is capable of translating these values.
695  *
696  * If both checks passed, then it replaces the kth translator
697  * by the one passed in arguments and retranslates with it the kth
698  * cell of all the rows already contained in the database */
699 
701 
702 
703 #ifndef DOXYGEN_SHOULD_SKIP_THIS
704 
705  private:
707  DBTranslatorSet< ALLOC > __translators;
708 
711 
714  bool __isRowCompatible(const Row< DBTranslatedValue >& row) const;
715 
721  std::size_t __getKthIndex(const std::size_t k,
722  const bool k_is_input_col) const;
723 
728  DBVector< std::size_t > __getKthIndices(const std::size_t k,
729  const bool k_is_input_col) const;
730 
732 
750  template < typename Functor1, typename Functor2 >
751  void __threadProcessDatabase(Functor1& exec_func, Functor2& undo_func);
752 
753 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
754  };
755 
756  } /* namespace learning */
757 
758 } /* namespace gum */
759 
762 
763 #endif /* GUM_DATABASE_TABLE_H */
The union class for storing the translated values in learning databases.
Useful macros for maths.
The implementation of tabular databases stored in memory (RAM)
The common class for the tabular database tables.
Base class for every random variable.
Definition: variable.h:63
std::size_t size_type
Types for STL compliance.
std::vector< TX_DATA, ALLOC< TX_DATA > > DBVector
the type for the vectors used in the DatabaseTable
Sets of elements (i.e.
virtual void ignoreColumn(const std::size_t k, const bool from_external_object=true) final
makes the database table ignore from now on the kth column of the input dataset or the column parsed ...
DBVector< std::size_t > domainSizes() const
returns the domain sizes of all the variables in the database table
typename IDatabaseTable< DBTranslatedValue, ALLOC >::HandlerSafe HandlerSafe
the safe handler type
const DBTranslatorSet< ALLOC > & translatorSet() const
returns the set of translators
The class representing the original values of the cells of databases.
Definition: DBCell.h:69
virtual void insertRow(const std::vector< std::string, ALLOC< std::string > > &new_row) final
insert a new row at the end of the database
HandlerSafe iterator_safe
Types for STL compliance.
STL namespace.
The databases&#39; cell translators for continuous variables.
std::vector< std::string, XALLOC< std::string > > MissingValType
std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > > Matrix
the type for the matrices stored into the database
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
The union class for storing the translated values in learning databases.
ALLOC< DBTranslatedValue > allocator_type
Types for STL compliance.
C++11 threads convenience utilities for agrum.
The class representing a record stored in a tabular database.
Representation of a setA Set is a structure that contains arbitrary elements.
Definition: set.h:162
bool needsReordering(const std::size_t k, const bool k_is_input_col=false) const
indicates whether a reordering is needed to sort the translations of the kth translator or those of t...
virtual DatabaseTable< ALLOC > * clone() const final
virtual copy constructor
Handler iterator
Types for STL compliance.
The class representing the original values of the cells of databases.
typename IDatabaseTable< DBTranslatedValue, ALLOC >::Handler Handler
the unsafe handler type
virtual const DBVector< std::size_t > inputColumns() const final
returns the set of columns of the original dataset that are present in the DatabaseTable ...
The base class for all the tabular database cell translators.
Definition: DBTranslator.h:114
const DBTranslator< ALLOC > & translator(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth translator of the database table or the first one reading the kth column of th...
std::size_t domainSize(const std::size_t k, const bool k_is_input_col=false) const
returns the domain size of the kth variable of the database table or of that of the first one corresp...
virtual const DBVector< std::size_t > ignoredColumns() const final
returns the set of columns of the original dataset that are ignored
A class for storing several translators.
The class for storing a record in a database.
Definition: DBRow.h:53
const Variable & variable(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth variable of the database table or the first one corresponding to the kth colum...
virtual void insertRows(Matrix< DBTranslatedValue > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals) final
insert a set of new DBRows at the end of the database
The class representing a tabular database as used by learning tasks.
virtual void clear() final
erase the content of the database, including the names of the variables
std::size_t insertTranslator(const DBTranslator< ALLOC > &translator, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
std::ptrdiff_t difference_type
Types for STL compliance.
virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true) final
sets the names of the variables
typename IDatabaseTable< DBTranslatedValue, ALLOC >::IsMissing IsMissing
the class for packing together the translators used to preprocess the datasets
DatabaseTable(const MissingValType< XALLOC > &missing_symbols, const DBTranslatorSet< ALLOC > &translators=DBTranslatorSet< ALLOC >(), const allocator_type &alloc=allocator_type())
default constructor
void eraseTranslators(const std::size_t k, const bool k_is_input_col=false)
erases either the kth translator or all those parsing the kth column of the input dataset ...
The common class for the tabular database tables.
void reorder()
performs a reordering of all the columns