aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
DBInitializerFromSQL.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The class for initializing DatabaseTable and RawDatabaseTable
24  * instances from SQL databases
25  *
26  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
27  */
28 #ifdef ODBC_
29 
30 # ifndef GUM_LEARNING_DB_INITILIALIZER_FROM_SQL_H
31 # define GUM_LEARNING_DB_INITILIALIZER_FROM_SQL_H
32 
33 # include <agrum/agrum.h>
34 # include <agrum/tools/database/IDBInitializer.h>
35 # include <agrum/tools/database/nanodbcParser.h>
36 
37 namespace gum {
38 
39  namespace learning {
40 
41  /** @class DBInitializerFromSQL
42  * @headerfile DBInitializerFromSQL.h <agrum/tools/database/DBInitializerFromSQL.h>
43  * @ingroup learning_database
44  * @brief The class for initializing DatabaseTable and RawDatabaseTable
45  * instances from SQL databases
46  *
47  * In aGrUM, the usual way to create DatabaseTable instances used by learning
48  * algorithms is to use the 4-step process below:
49  * -# Create an IDBInitializer instance (either a DBInitializerFromCSV or a
50  * DBInitializerFromSQL). This will enable to get the variables
51  * corresponding to the columns of the DatabaseTable.
52  * -# Knowing these variables, create a DBTranslatorSet for encoding the
53  * lines of the CSV file or those of the SQL result into the appropriate
54  * values for the learning algorithms.
55  * -# Create the DatabaseTable, passing it the DBTranslatorSet created
56  * in the preceding step. Use the IDBInitializer to provide the variables'
57  * names to the DatabaseTable.
58  * -# Use the IDBInitializer to add the lines of the CSV file or those of the
59  * SQL result into the DatabaseTable.
60  *
61  * @par The following codes show the details of this processe:
62  * @code
63  * // 1/ use the initializer to parse all the columns/rows of a SQL result:
64  * const std::string dataSource = "PostgreSQL";
65  * const std::string login = "gonzales";
66  * const std::string password = "agrum";
67  * const std::string query = "SELECT * FROM testunit.asia";
68  * gum::learning::DBInitializerFromSQL<>
69  * initializer ( dataSource, login, password, query );
70  * const auto& var_names = initializer.variableNames ();
71  * const std::size_t nb_vars = var_names.size ();
72  *
73  * // we create as many translators as there are variables
74  * gum::learning::DBTranslator4LabelizedVariable<> translator;
75  * gum::learning::DBTranslatorSet<> translator_set;
76  * for ( std::size_t i = 0; i < nb_vars; ++i )
77  * translator_set.insertTranslator ( translator, i );
78 
79  * // create a DatabaseTable with these translators. For the moment, the
80  * // DatabaseTable will be empty, i.e., it will contain no row
81  * gum::learning::DatabaseTable<> database ( translator_set );
82  * database.setVariableNames( initializer.variableNames () );
83  *
84  * // use the DBInitializerFromSQL to fill the rows:
85  * initializer.fillDatabase ( database );
86  * // now, the database contains all the content of the ASIA database
87  *
88  *
89  * // 2/ use an IDBInitializer to initialize a DatabaseTable, but ignore
90  * // some columns.
91  * gum::learning::DBInitializerFromSQL<>
92  * initializer2 ( dataSource, login, password, query );
93  * gum::learning::DatabaseTable<> database2; // empty database
94  *
95  * // indicate which columns of the SQL result should be read
96  * database2.insertTranslator ( translator, 1 );
97  * database2.insertTranslator ( translator, 3 );
98  * database2.insertTranslator ( translator, 4 );
99  *
100  * // sets the names of the columns correctly
101  * database2.setVariableNames( initializer2.variableNames () );
102  *
103  * // fill the rows:
104  * initializer2.fillDatabase ( database2 );
105  * // now all the rows of the SQL result have been transferred into
106  * // database2, but only columns 1, 3 and 4 of the SQL result have
107  * // been kept.
108  *
109  *
110  * // 3/ another possibility to initialize a DatabaseTable, ignoring
111  * // some columns:
112  * gum::learning::DBInitializerFromSQL<>
113  * initializer3 ( dataSource, login, password, query );
114  * gum::learning::DatabaseTable<> database3 ( translator_set );
115  * // here, database3 is an empty database but it contains already
116  * // translators for all the columns of the SQL result. We shall now
117  * // remove the columns/translators that are not wanted anymore
118  * database3.ignoreColumn ( 0 );
119  * database3.ignoreColumn ( 2 );
120  * database3.ignoreColumn ( 5 );
121  * database3.ignoreColumn ( 6 );
122  * database3.ignoreColumn ( 7 );
123  * // asia contains 8 columns. The above ignoreColumns keep only columns
124  * // 1, 3 and 4.
125  *
126  * // sets the names of the columns correctly
127  * database3.setVariableNames( initializer3.variableNames () );
128  * // fill the rows:
129  * initializer3.fillDatabase ( database3 );
130  * // now all the rows of the SQL result have been transferred into
131  * // database3, but only columns 1, 3 and 4 of the SQL result have
132  * // been kept.
133  * @endcode
134  */
135  template < template < typename > class ALLOC = std::allocator >
136  class DBInitializerFromSQL: public IDBInitializer< ALLOC > {
137  public:
138  /// type for the allocators passed in arguments of methods
139  using allocator_type = ALLOC< std::string >;
140 
141  // ##########################################################################
142  /// @name Constructors / Destructors
143  // ##########################################################################
144  /// @{
145 
146  /// default constructor, especially for postgresql databases
147  /** This will read the result of query and load it in memory.
148  *
149  * @param dataSource A declared dataSource in your odbc configuration
150  * file (usually /etc/odbc.ini).
151  * @param login The dataSource login.
152  * @param password The dataSource password.
153  * @param query The SQL query used as a database.
154  * @param timeout Defines a timeout for accessing the SQL database, if 0
155  * then no timeout is set.
156  * @param alloc the allocator used to allocate all the data structures
157  */
158  DBInitializerFromSQL(const std::string& dataSource,
159  const std::string& login,
160  const std::string& password,
161  const std::string& query,
162  long timeout = 0L,
163  const allocator_type& alloc = allocator_type());
164 
165  /// default constructor, especially for sqlite databases
166  /** This will read the result of query and load it in memory.
167  *
168  * @param connection_string a string specifying to nanODBC how to connect
169  * to a SQL database
170  * @param query The SQL query used as a database.
171  * @param timeout Defines a timeout for accessing the SQL database, if 0
172  * then no timeout is set.
173  * @param alloc the allocator used to allocate all the data structures
174  */
175  DBInitializerFromSQL(const std::string& connection_string,
176  const std::string& query,
177  long timeout = 0L,
178  const allocator_type& alloc = allocator_type());
179 
180  /// copy constructor
181  /** the new initializer points to the same SQL query as from, but
182  * it reparses the result it from scratch. */
183  DBInitializerFromSQL(const DBInitializerFromSQL< ALLOC >& from);
184 
185  /// copy constructor with a given allocator
186  /** the new initializer points to the same SQL query as from, but
187  * it reparses the result from scratch. */
188  DBInitializerFromSQL(const DBInitializerFromSQL< ALLOC >& from, const allocator_type& alloc);
189 
190  /// move constructor
191  DBInitializerFromSQL(DBInitializerFromSQL< ALLOC >&& from);
192 
193  /// move constructor with a given allocator
194  DBInitializerFromSQL(DBInitializerFromSQL< ALLOC >&& from, const allocator_type& alloc);
195 
196  /// virtual copy constructor
197  virtual DBInitializerFromSQL< ALLOC >* clone() const;
198 
199  /// virtual copy constructor with a given allocator
200  virtual DBInitializerFromSQL< ALLOC >* clone(const allocator_type& alloc) const;
201 
202  /// destructor
203  virtual ~DBInitializerFromSQL();
204 
205  /// @}
206 
207 
208  // ##########################################################################
209  /// @name Operators
210  // ##########################################################################
211 
212  /// @{
213 
214  /// copy operator
215  /** the new initializer points to the same SQL query as from, but
216  * it reparses the result from scratch. */
217  DBInitializerFromSQL< ALLOC >& operator=(const DBInitializerFromSQL< ALLOC >& from);
218 
219  /// move operator
220  /** the new initializer points to the same SQL query as from, but
221  * it reparses the result from scratch. */
222  DBInitializerFromSQL< ALLOC >& operator=(DBInitializerFromSQL< ALLOC >&& from);
223 
224  /// @}
225 
226 
227  protected:
228  /// returns the names of the variables
229  virtual std::vector< std::string, ALLOC< std::string > > variableNames_() final;
230 
231  /// returns the content of the current row using strings
232  virtual const std::vector< std::string, ALLOC< std::string > >& currentStringRow_() final;
233 
234  /// indicates whether there is a next row to read (and point on it)
235  virtual bool nextRow_() final;
236 
237 
238 # ifndef DOXYGEN_SHOULD_SKIP_THIS
239 
240  private:
241  // the string specifying how to connect to the database
242  std::string _connection_string_;
243 
244  // the current query
245  std::string _query_;
246 
247  // the timeout for executing the queries
248  long _timeout_;
249 
250  // the names of the columns in the query result
251  std::vector< std::string, ALLOC< std::string > > _var_names_;
252 
253  // the nanodbc connection to the database
254  nanodbc::connection _connection_;
255 
256  // the parser used for parsing the query results
257  NanodbcParser< ALLOC > _parser_;
258 
259  /// perform a connection from a connection string
260  void _connect_(const std::string& connection_string, long timeout);
261 
262 
263 # endif /* DOXYGEN_SHOULD_SKIP_THIS */
264  };
265 
266  } /* namespace learning */
267 
268 } /* namespace gum */
269 
270 // always include the template implementation
271 # include <agrum/tools/database/DBInitializerFromSQL_tpl.h>
272 
273 # endif /* GUM_LEARNING_DB_INITILIALIZER_FROM_SQL_H */
274 
275 #endif /* ODBC_ */