aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
IDBInitializer.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The base class for initializing DatabaseTable and RawDatabaseTable
24  * instances from CSV files or SQL databases
25  *
26  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
27  */
28 #ifndef GUM_LEARNING_IDB_INITILIALIZER_H
29 #define GUM_LEARNING_IDB_INITILIALIZER_H
30 
31 #include <vector>
32 #include <string>
33 
34 #include <agrum/agrum.h>
35 #include <agrum/tools/database/DBCell.h>
36 #include <agrum/tools/database/DBRow.h>
37 
38 
39 namespace gum {
40 
41  namespace learning {
42 
43  /** @class IDBInitializer
44  * @headerfile IDBInitializer.h <agrum/tools/database/IDBInitializer.h>
45  * @ingroup learning_database
46  * @brief The base class for initializing DatabaseTable and
47  * RawDatabaseTable instances from CSV files or SQL databases
48  *
49  * @par Usage example:
50  * @code
51  * // 1/ use the initializer to parse all the columns/rows of a CSV file
52  * // the DBInitializerFromCSV class inherits from IDBInitializer<>
53  * gum::learning::DBInitializerFromCSV<> initializer ( "asia.csv" );
54  * const auto& var_names = initializer.variableNames ();
55  * const std::size_t nb_vars = var_names.size ();
56  *
57  * // we create as many translators as there are variables
58  * gum::learning::DBTranslator4LabelizedVariable<> translator;
59  * gum::learning::DBTranslatorSet<> translator_set;
60  * for ( std::size_t i = 0; i < nb_vars; ++i )
61  * translator_set.insertTranslator ( translator, i );
62 
63  * // create a DatabaseTable with these translators. For the moment, the
64  * // DatabaseTable will be empty, i.e., it will contain no row
65  * gum::learning::DatabaseTable<> database ( translator_set );
66  * database.setVariableNames( initializer.variableNames () );
67  *
68  * // use the DBInitializerFromCSV to fill the rows:
69  * initializer.fillDatabase ( database );
70  * // now, the database contains all the content of the CSV file
71  *
72  *
73  * // 2/ use an IDBInitializer to initialize a DatabaseTable, but ignore
74  * // some columns.
75  * gum::learning::DBInitializerFromCSV<> initializer2 ( "asia.csv" );
76  * gum::learning::DatabaseTable<> database2; // empty database
77  *
78  * // indicate which columns of the CSV file should be read
79  * database2.insertTranslator ( translator, 1 );
80  * database2.insertTranslator ( translator, 3 );
81  * database2.insertTranslator ( translator, 4 );
82  *
83  * // sets the names of the columns correctly
84  * database2.setVariableNames( initializer2.variableNames () );
85  *
86  * // fill the rows:
87  * initializer2.fillDatabase ( database2 );
88  * // now all the rows of the CSV file have been transferred into database2,
89  * // but only columns 1, 3 and 4 of the CSV file have been kept.
90  *
91  *
92  * // 3/ another possibility to initialize a DatabaseTable, ignoring
93  * // some columns:
94  * gum::learning::DBInitializerFromCSV<> initializer3 ( "asia.csv" );
95  * gum::learning::DatabaseTable<> database3 ( translator_set );
96  * // here, database3 is an empty database but it contains already
97  * // translators for all the columns of the CSV file. We shall now remove
98  * // the columns/translators that are not wanted anymore
99  * database3.ignoreColumn ( 0 );
100  * database3.ignoreColumn ( 2 );
101  * database3.ignoreColumn ( 5 );
102  * database3.ignoreColumn ( 6 );
103  * database3.ignoreColumn ( 7 );
104  * // asia contains 8 columns. The above ignoreColumns keep only columns
105  * // 1, 3 and 4.
106  *
107  * // sets the names of the columns correctly
108  * database3.setVariableNames( initializer3.variableNames () );
109  * // fill the rows:
110  * initializer3.fillDatabase ( database3 );
111  * // now all the rows of the CSV file have been transferred into database3,
112  * // but only columns 1, 3 and 4 of the CSV file have been kept.
113  * @endcode
114  */
115  template < template < typename > class ALLOC >
117  public:
118  /** @brief the enumeration indicating the type of the data the
119  * IDBInitializer expects as input data */
120  enum class InputType : char
121  {
122  STRING,
123  DBCELL
124  };
125 
126  /// type for the allocators passed in arguments of methods
127  using allocator_type = ALLOC< std::string >;
128 
129  // ##########################################################################
130  /// @name Constructors / Destructors
131  // ##########################################################################
132  /// @{
133 
134  /// default constructor
135  /** @param type indicates what type of data will be read by the
136  * IDBInitializer when it will try to fill the database.
137  * @param alloc The allocator that will be used by all methods */
138  IDBInitializer(const InputType type, const allocator_type& alloc);
139 
140  /// copy constructor
141  IDBInitializer(const IDBInitializer< ALLOC >& from);
142 
143  /// copy constructor with a given allocator
144  IDBInitializer(const IDBInitializer< ALLOC >& from, const allocator_type& alloc);
145 
146  /// move constructor
147  IDBInitializer(IDBInitializer< ALLOC >&& from);
148 
149  /// move constructor with a given allocator
150  IDBInitializer(IDBInitializer< ALLOC >&& from, const allocator_type& alloc);
151 
152  /// virtual copy constructor
153  virtual IDBInitializer< ALLOC >* clone() const = 0;
154 
155  /// virtual copy constructor with a given allocator
156  virtual IDBInitializer< ALLOC >* clone(const allocator_type& alloc) const = 0;
157 
158  /// destructor
159  virtual ~IDBInitializer();
160 
161  /// @}
162 
163 
164  // ##########################################################################
165  /// @name Accessors / Modifiers
166  // ##########################################################################
167  /// @{
168 
169  /// returns the names of the variables in the input dataset
170  const std::vector< std::string, ALLOC< std::string > >& variableNames();
171 
172  /// fills the rows of the database table
173  /** This method may raise exceptions when trying to insert new rows
174  * into the database table. See Method insertRow() of the database table. */
175  template < template < template < typename > class > class DATABASE >
176  void fillDatabase(DATABASE< ALLOC >& database, const bool retry_insertion = false);
177 
178  /** @brief This method indicates which column filling raised an exception,
179  * if any, during the execution of fillDatabase */
180  std::size_t throwingColumn() const;
181 
182  /// returns the allocator used
183  allocator_type getAllocator() const;
184 
185  /// @}
186 
187 
188  protected:
189  /// copy operator
191 
192  /// move operator
194 
195  /// ask the child class for the names of the variables
196  virtual std::vector< std::string, ALLOC< std::string > > variableNames_() = 0;
197 
198  /// asks the child class for the content of the current row using strings
199  /** If the child class parses strings, this method should be overloaded */
200  virtual const std::vector< std::string, ALLOC< std::string > >& currentStringRow_();
201 
202  /// asks the child class for the content of the current row using dbcells
203  /** If the child class parses DBRows, this method should be overloaded */
204  virtual const DBRow< DBCell, ALLOC >& currentDBCellRow_();
205 
206  /// indicates whether there is a next row to read (and point on it)
207  virtual bool nextRow_() = 0;
208 
209 
210 #ifndef DOXYGEN_SHOULD_SKIP_THIS
211 
212  private:
213  // the names of the variables
214  std::vector< std::string, ALLOC< std::string > > _var_names_;
215 
216  // the types of the input data read to fill the database
217  InputType _input_type_;
218 
219  // indicates whether an exception was raised when adding the last row
220  // into the database. If so, when filling again the database, we may
221  // try to insert again the same row
222  bool _last_insertion_failed_{false};
223 
224 
225  /// fills the rows of the database using string inputs
226  template < template < template < typename > class > class DATABASE >
227  void _fillDatabaseFromStrings_(DATABASE< ALLOC >& database, const bool retry_insertion);
228 
229  /// fills the rows of the database using DBCell inputs
230  template < template < template < typename > class > class DATABASE >
231  void _fillDatabaseFromDBCells_(DATABASE< ALLOC >& database, const bool retry_insertion);
232 
233 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
234  };
235 
236  } /* namespace learning */
237 
238 } /* namespace gum */
239 
240 // always include the template implementation
241 #include <agrum/tools/database/IDBInitializer_tpl.h>
242 
243 #endif /* GUM_LEARNING_IDB_INITILIALIZER_H */
const std::vector< std::string, ALLOC< std::string > > & variableNames()
returns the names of the variables in the input dataset
IDBInitializer(IDBInitializer< ALLOC > &&from)
move constructor
virtual std::vector< std::string, ALLOC< std::string > > variableNames_()=0
ask the child class for the names of the variables
virtual IDBInitializer< ALLOC > * clone(const allocator_type &alloc) const =0
virtual copy constructor with a given allocator
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
allocator_type getAllocator() const
returns the allocator used
virtual bool nextRow_()=0
indicates whether there is a next row to read (and point on it)
IDBInitializer(const IDBInitializer< ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
virtual const DBRow< DBCell, ALLOC > & currentDBCellRow_()
asks the child class for the content of the current row using dbcells
IDBInitializer< ALLOC > & operator=(const IDBInitializer< ALLOC > &from)
copy operator
IDBInitializer(IDBInitializer< ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator
void fillDatabase(DATABASE< ALLOC > &database, const bool retry_insertion=false)
fills the rows of the database table
InputType
the enumeration indicating the type of the data the IDBInitializer expects as input data ...
IDBInitializer< ALLOC > & operator=(IDBInitializer< ALLOC > &&from)
move operator
virtual const std::vector< std::string, ALLOC< std::string > > & currentStringRow_()
asks the child class for the content of the current row using strings
IDBInitializer(const InputType type, const allocator_type &alloc)
default constructor
std::size_t throwingColumn() const
This method indicates which column filling raised an exception, if any, during the execution of fillD...
IDBInitializer(const IDBInitializer< ALLOC > &from)
copy constructor
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
virtual ~IDBInitializer()
destructor
The base class for initializing DatabaseTable and RawDatabaseTable instances from CSV files or SQL da...
virtual IDBInitializer< ALLOC > * clone() const =0
virtual copy constructor