aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
IDBInitializer.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The base class for initializing DatabaseTable and RawDatabaseTable
24  * instances from CSV files or SQL databases
25  *
26  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
27  */
28 #ifndef GUM_LEARNING_IDB_INITILIALIZER_H
29 #define GUM_LEARNING_IDB_INITILIALIZER_H
30 
31 #include <vector>
32 #include <string>
33 
34 #include <agrum/agrum.h>
35 #include <agrum/tools/database/DBCell.h>
36 #include <agrum/tools/database/DBRow.h>
37 
38 
39 namespace gum {
40 
41  namespace learning {
42 
43  /** @class IDBInitializer
44  * @headerfile IDBInitializer.h <agrum/tools/database/IDBInitializer.h>
45  * @ingroup learning_database
46  * @brief The base class for initializing DatabaseTable and
47  * RawDatabaseTable instances from CSV files or SQL databases
48  *
49  * @par Usage example:
50  * @code
51  * // 1/ use the initializer to parse all the columns/rows of a CSV file
52  * // the DBInitializerFromCSV class inherits from IDBInitializer<>
53  * gum::learning::DBInitializerFromCSV<> initializer ( "asia.csv" );
54  * const auto& var_names = initializer.variableNames ();
55  * const std::size_t nb_vars = var_names.size ();
56  *
57  * // we create as many translators as there are variables
58  * gum::learning::DBTranslator4LabelizedVariable<> translator;
59  * gum::learning::DBTranslatorSet<> translator_set;
60  * for ( std::size_t i = 0; i < nb_vars; ++i )
61  * translator_set.insertTranslator ( translator, i );
62 
63  * // create a DatabaseTable with these translators. For the moment, the
64  * // DatabaseTable will be empty, i.e., it will contain no row
65  * gum::learning::DatabaseTable<> database ( translator_set );
66  * database.setVariableNames( initializer.variableNames () );
67  *
68  * // use the DBInitializerFromCSV to fill the rows:
69  * initializer.fillDatabase ( database );
70  * // now, the database contains all the content of the CSV file
71  *
72  *
73  * // 2/ use an IDBInitializer to initialize a DatabaseTable, but ignore
74  * // some columns.
75  * gum::learning::DBInitializerFromCSV<> initializer2 ( "asia.csv" );
76  * gum::learning::DatabaseTable<> database2; // empty database
77  *
78  * // indicate which columns of the CSV file should be read
79  * database2.insertTranslator ( translator, 1 );
80  * database2.insertTranslator ( translator, 3 );
81  * database2.insertTranslator ( translator, 4 );
82  *
83  * // sets the names of the columns correctly
84  * database2.setVariableNames( initializer2.variableNames () );
85  *
86  * // fill the rows:
87  * initializer2.fillDatabase ( database2 );
88  * // now all the rows of the CSV file have been transferred into database2,
89  * // but only columns 1, 3 and 4 of the CSV file have been kept.
90  *
91  *
92  * // 3/ another possibility to initialize a DatabaseTable, ignoring
93  * // some columns:
94  * gum::learning::DBInitializerFromCSV<> initializer3 ( "asia.csv" );
95  * gum::learning::DatabaseTable<> database3 ( translator_set );
96  * // here, database3 is an empty database but it contains already
97  * // translators for all the columns of the CSV file. We shall now remove
98  * // the columns/translators that are not wanted anymore
99  * database3.ignoreColumn ( 0 );
100  * database3.ignoreColumn ( 2 );
101  * database3.ignoreColumn ( 5 );
102  * database3.ignoreColumn ( 6 );
103  * database3.ignoreColumn ( 7 );
104  * // asia contains 8 columns. The above ignoreColumns keep only columns
105  * // 1, 3 and 4.
106  *
107  * // sets the names of the columns correctly
108  * database3.setVariableNames( initializer3.variableNames () );
109  * // fill the rows:
110  * initializer3.fillDatabase ( database3 );
111  * // now all the rows of the CSV file have been transferred into database3,
112  * // but only columns 1, 3 and 4 of the CSV file have been kept.
113  * @endcode
114  */
115  template < template < typename > class ALLOC >
117  public:
118  /** @brief the enumeration indicating the type of the data the
119  * IDBInitializer expects as input data */
120  enum class InputType : char
121  {
122  STRING,
123  DBCELL
124  };
125 
126  /// type for the allocators passed in arguments of methods
127  using allocator_type = ALLOC< std::string >;
128 
129  // ##########################################################################
130  /// @name Constructors / Destructors
131  // ##########################################################################
132  /// @{
133 
134  /// default constructor
135  /** @param type indicates what type of data will be read by the
136  * IDBInitializer when it will try to fill the database.
137  * @param alloc The allocator that will be used by all methods */
138  IDBInitializer(const InputType type, const allocator_type& alloc);
139 
140  /// copy constructor
141  IDBInitializer(const IDBInitializer< ALLOC >& from);
142 
143  /// copy constructor with a given allocator
144  IDBInitializer(const IDBInitializer< ALLOC >& from,
145  const allocator_type& alloc);
146 
147  /// move constructor
148  IDBInitializer(IDBInitializer< ALLOC >&& from);
149 
150  /// move constructor with a given allocator
151  IDBInitializer(IDBInitializer< ALLOC >&& from, const allocator_type& alloc);
152 
153  /// virtual copy constructor
154  virtual IDBInitializer< ALLOC >* clone() const = 0;
155 
156  /// virtual copy constructor with a given allocator
157  virtual IDBInitializer< ALLOC >*
158  clone(const allocator_type& alloc) const = 0;
159 
160  /// destructor
161  virtual ~IDBInitializer();
162 
163  /// @}
164 
165 
166  // ##########################################################################
167  /// @name Accessors / Modifiers
168  // ##########################################################################
169  /// @{
170 
171  /// returns the names of the variables in the input dataset
172  const std::vector< std::string, ALLOC< std::string > >& variableNames();
173 
174  /// fills the rows of the database table
175  /** This method may raise exceptions when trying to insert new rows
176  * into the database table. See Method insertRow() of the database table. */
177  template < template < template < typename > class > class DATABASE >
178  void fillDatabase(DATABASE< ALLOC >& database,
179  const bool retry_insertion = false);
180 
181  /** @brief This method indicates which column filling raised an exception,
182  * if any, during the execution of fillDatabase */
183  std::size_t throwingColumn() const;
184 
185  /// returns the allocator used
186  allocator_type getAllocator() const;
187 
188  /// @}
189 
190 
191  protected:
192  /// copy operator
194 
195  /// move operator
197 
198  /// ask the child class for the names of the variables
199  virtual std::vector< std::string, ALLOC< std::string > > variableNames_()
200  = 0;
201 
202  /// asks the child class for the content of the current row using strings
203  /** If the child class parses strings, this method should be overloaded */
204  virtual const std::vector< std::string, ALLOC< std::string > >&
206 
207  /// asks the child class for the content of the current row using dbcells
208  /** If the child class parses DBRows, this method should be overloaded */
209  virtual const DBRow< DBCell, ALLOC >& currentDBCellRow_();
210 
211  /// indicates whether there is a next row to read (and point on it)
212  virtual bool nextRow_() = 0;
213 
214 
215 #ifndef DOXYGEN_SHOULD_SKIP_THIS
216 
217  private:
218  // the names of the variables
219  std::vector< std::string, ALLOC< std::string > > var_names__;
220 
221  // the types of the input data read to fill the database
222  InputType input_type__;
223 
224  // indicates whether an exception was raised when adding the last row
225  // into the database. If so, when filling again the database, we may
226  // try to insert again the same row
227  bool last_insertion_failed__{false};
228 
229 
230  /// fills the rows of the database using string inputs
231  template < template < template < typename > class > class DATABASE >
232  void fillDatabaseFromStrings__(DATABASE< ALLOC >& database,
233  const bool retry_insertion);
234 
235  /// fills the rows of the database using DBCell inputs
236  template < template < template < typename > class > class DATABASE >
237  void fillDatabaseFromDBCells__(DATABASE< ALLOC >& database,
238  const bool retry_insertion);
239 
240 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
241  };
242 
243  } /* namespace learning */
244 
245 } /* namespace gum */
246 
247 // always include the template implementation
248 #include <agrum/tools/database/IDBInitializer_tpl.h>
249 
250 #endif /* GUM_LEARNING_IDB_INITILIALIZER_H */
const std::vector< std::string, ALLOC< std::string > > & variableNames()
returns the names of the variables in the input dataset
IDBInitializer(IDBInitializer< ALLOC > &&from)
move constructor
virtual std::vector< std::string, ALLOC< std::string > > variableNames_()=0
ask the child class for the names of the variables
virtual IDBInitializer< ALLOC > * clone(const allocator_type &alloc) const =0
virtual copy constructor with a given allocator
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
allocator_type getAllocator() const
returns the allocator used
virtual bool nextRow_()=0
indicates whether there is a next row to read (and point on it)
IDBInitializer(const IDBInitializer< ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
virtual const DBRow< DBCell, ALLOC > & currentDBCellRow_()
asks the child class for the content of the current row using dbcells
IDBInitializer< ALLOC > & operator=(const IDBInitializer< ALLOC > &from)
copy operator
IDBInitializer(IDBInitializer< ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator
void fillDatabase(DATABASE< ALLOC > &database, const bool retry_insertion=false)
fills the rows of the database table
InputType
the enumeration indicating the type of the data the IDBInitializer expects as input data ...
IDBInitializer< ALLOC > & operator=(IDBInitializer< ALLOC > &&from)
move operator
virtual const std::vector< std::string, ALLOC< std::string > > & currentStringRow_()
asks the child class for the content of the current row using strings
IDBInitializer(const InputType type, const allocator_type &alloc)
default constructor
std::size_t throwingColumn() const
This method indicates which column filling raised an exception, if any, during the execution of fillD...
IDBInitializer(const IDBInitializer< ALLOC > &from)
copy constructor
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
virtual ~IDBInitializer()
destructor
The base class for initializing DatabaseTable and RawDatabaseTable instances from CSV files or SQL da...
virtual IDBInitializer< ALLOC > * clone() const =0
virtual copy constructor