aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
DBInitializerFromSQL.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The class for initializing DatabaseTable and RawDatabaseTable
24  * instances from SQL databases
25  *
26  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
27  */
28 #ifdef ODBC_
29 
30 # ifndef GUM_LEARNING_DB_INITILIALIZER_FROM_SQL_H
31 # define GUM_LEARNING_DB_INITILIALIZER_FROM_SQL_H
32 
33 # include <agrum/agrum.h>
34 # include <agrum/tools/database/IDBInitializer.h>
35 # include <agrum/tools/database/nanodbcParser.h>
36 
37 namespace gum {
38 
39  namespace learning {
40 
41  /** @class DBInitializerFromSQL
42  * @headerfile DBInitializerFromSQL.h <agrum/tools/database/DBInitializerFromSQL.h>
43  * @ingroup learning_database
44  * @brief The class for initializing DatabaseTable and RawDatabaseTable
45  * instances from SQL databases
46  *
47  * In aGrUM, the usual way to create DatabaseTable instances used by learning
48  * algorithms is to use the 4-step process below:
49  * -# Create an IDBInitializer instance (either a DBInitializerFromCSV or a
50  * DBInitializerFromSQL). This will enable to get the variables
51  * corresponding to the columns of the DatabaseTable.
52  * -# Knowing these variables, create a DBTranslatorSet for encoding the
53  * lines of the CSV file or those of the SQL result into the appropriate
54  * values for the learning algorithms.
55  * -# Create the DatabaseTable, passing it the DBTranslatorSet created
56  * in the preceding step. Use the IDBInitializer to provide the variables'
57  * names to the DatabaseTable.
58  * -# Use the IDBInitializer to add the lines of the CSV file or those of the
59  * SQL result into the DatabaseTable.
60  *
61  * @par The following codes show the details of this processe:
62  * @code
63  * // 1/ use the initializer to parse all the columns/rows of a SQL result:
64  * const std::string dataSource = "PostgreSQL";
65  * const std::string login = "gonzales";
66  * const std::string password = "agrum";
67  * const std::string query = "SELECT * FROM testunit.asia";
68  * gum::learning::DBInitializerFromSQL<>
69  * initializer ( dataSource, login, password, query );
70  * const auto& var_names = initializer.variableNames ();
71  * const std::size_t nb_vars = var_names.size ();
72  *
73  * // we create as many translators as there are variables
74  * gum::learning::DBTranslator4LabelizedVariable<> translator;
75  * gum::learning::DBTranslatorSet<> translator_set;
76  * for ( std::size_t i = 0; i < nb_vars; ++i )
77  * translator_set.insertTranslator ( translator, i );
78 
79  * // create a DatabaseTable with these translators. For the moment, the
80  * // DatabaseTable will be empty, i.e., it will contain no row
81  * gum::learning::DatabaseTable<> database ( translator_set );
82  * database.setVariableNames( initializer.variableNames () );
83  *
84  * // use the DBInitializerFromSQL to fill the rows:
85  * initializer.fillDatabase ( database );
86  * // now, the database contains all the content of the ASIA database
87  *
88  *
89  * // 2/ use an IDBInitializer to initialize a DatabaseTable, but ignore
90  * // some columns.
91  * gum::learning::DBInitializerFromSQL<>
92  * initializer2 ( dataSource, login, password, query );
93  * gum::learning::DatabaseTable<> database2; // empty database
94  *
95  * // indicate which columns of the SQL result should be read
96  * database2.insertTranslator ( translator, 1 );
97  * database2.insertTranslator ( translator, 3 );
98  * database2.insertTranslator ( translator, 4 );
99  *
100  * // sets the names of the columns correctly
101  * database2.setVariableNames( initializer2.variableNames () );
102  *
103  * // fill the rows:
104  * initializer2.fillDatabase ( database2 );
105  * // now all the rows of the SQL result have been transferred into
106  * // database2, but only columns 1, 3 and 4 of the SQL result have
107  * // been kept.
108  *
109  *
110  * // 3/ another possibility to initialize a DatabaseTable, ignoring
111  * // some columns:
112  * gum::learning::DBInitializerFromSQL<>
113  * initializer3 ( dataSource, login, password, query );
114  * gum::learning::DatabaseTable<> database3 ( translator_set );
115  * // here, database3 is an empty database but it contains already
116  * // translators for all the columns of the SQL result. We shall now
117  * // remove the columns/translators that are not wanted anymore
118  * database3.ignoreColumn ( 0 );
119  * database3.ignoreColumn ( 2 );
120  * database3.ignoreColumn ( 5 );
121  * database3.ignoreColumn ( 6 );
122  * database3.ignoreColumn ( 7 );
123  * // asia contains 8 columns. The above ignoreColumns keep only columns
124  * // 1, 3 and 4.
125  *
126  * // sets the names of the columns correctly
127  * database3.setVariableNames( initializer3.variableNames () );
128  * // fill the rows:
129  * initializer3.fillDatabase ( database3 );
130  * // now all the rows of the SQL result have been transferred into
131  * // database3, but only columns 1, 3 and 4 of the SQL result have
132  * // been kept.
133  * @endcode
134  */
135  template < template < typename > class ALLOC = std::allocator >
136  class DBInitializerFromSQL: public IDBInitializer< ALLOC > {
137  public:
138  /// type for the allocators passed in arguments of methods
139  using allocator_type = ALLOC< std::string >;
140 
141  // ##########################################################################
142  /// @name Constructors / Destructors
143  // ##########################################################################
144  /// @{
145 
146  /// default constructor, especially for postgresql databases
147  /** This will read the result of query and load it in memory.
148  *
149  * @param dataSource A declared dataSource in your odbc configuration
150  * file (usually /etc/odbc.ini).
151  * @param login The dataSource login.
152  * @param password The dataSource password.
153  * @param query The SQL query used as a database.
154  * @param timeout Defines a timeout for accessing the SQL database, if 0
155  * then no timeout is set.
156  * @param alloc the allocator used to allocate all the data structures
157  */
158  DBInitializerFromSQL(const std::string& dataSource,
159  const std::string& login,
160  const std::string& password,
161  const std::string& query,
162  long timeout = 0L,
163  const allocator_type& alloc = allocator_type());
164 
165  /// default constructor, especially for sqlite databases
166  /** This will read the result of query and load it in memory.
167  *
168  * @param connection_string a string specifying to nanODBC how to connect
169  * to a SQL database
170  * @param query The SQL query used as a database.
171  * @param timeout Defines a timeout for accessing the SQL database, if 0
172  * then no timeout is set.
173  * @param alloc the allocator used to allocate all the data structures
174  */
175  DBInitializerFromSQL(const std::string& connection_string,
176  const std::string& query,
177  long timeout = 0L,
178  const allocator_type& alloc = allocator_type());
179 
180  /// copy constructor
181  /** the new initializer points to the same SQL query as from, but
182  * it reparses the result it from scratch. */
183  DBInitializerFromSQL(const DBInitializerFromSQL< ALLOC >& from);
184 
185  /// copy constructor with a given allocator
186  /** the new initializer points to the same SQL query as from, but
187  * it reparses the result from scratch. */
188  DBInitializerFromSQL(const DBInitializerFromSQL< ALLOC >& from,
189  const allocator_type& alloc);
190 
191  /// move constructor
192  DBInitializerFromSQL(DBInitializerFromSQL< ALLOC >&& from);
193 
194  /// move constructor with a given allocator
195  DBInitializerFromSQL(DBInitializerFromSQL< ALLOC >&& from,
196  const allocator_type& alloc);
197 
198  /// virtual copy constructor
199  virtual DBInitializerFromSQL< ALLOC >* clone() const;
200 
201  /// virtual copy constructor with a given allocator
202  virtual DBInitializerFromSQL< ALLOC >*
203  clone(const allocator_type& alloc) const;
204 
205  /// destructor
206  virtual ~DBInitializerFromSQL();
207 
208  /// @}
209 
210 
211  // ##########################################################################
212  /// @name Operators
213  // ##########################################################################
214 
215  /// @{
216 
217  /// copy operator
218  /** the new initializer points to the same SQL query as from, but
219  * it reparses the result from scratch. */
220  DBInitializerFromSQL< ALLOC >&
221  operator=(const DBInitializerFromSQL< ALLOC >& from);
222 
223  /// move operator
224  /** the new initializer points to the same SQL query as from, but
225  * it reparses the result from scratch. */
226  DBInitializerFromSQL< ALLOC >&
227  operator=(DBInitializerFromSQL< ALLOC >&& from);
228 
229  /// @}
230 
231 
232  protected:
233  /// returns the names of the variables
234  virtual std::vector< std::string, ALLOC< std::string > >
235  variableNames_() final;
236 
237  /// returns the content of the current row using strings
238  virtual const std::vector< std::string, ALLOC< std::string > >&
239  currentStringRow_() final;
240 
241  /// indicates whether there is a next row to read (and point on it)
242  virtual bool nextRow_() final;
243 
244 
245 # ifndef DOXYGEN_SHOULD_SKIP_THIS
246 
247  private:
248  // the string specifying how to connect to the database
249  std::string connection_string__;
250 
251  // the current query
252  std::string query__;
253 
254  // the timeout for executing the queries
255  long timeout__;
256 
257  // the names of the columns in the query result
258  std::vector< std::string, ALLOC< std::string > > var_names__;
259 
260  // the nanodbc connection to the database
261  nanodbc::connection connection__;
262 
263  // the parser used for parsing the query results
264  NanodbcParser< ALLOC > parser__;
265 
266  /// perform a connection from a connection string
267  void connect__(const std::string& connection_string, long timeout);
268 
269 
270 # endif /* DOXYGEN_SHOULD_SKIP_THIS */
271  };
272 
273  } /* namespace learning */
274 
275 } /* namespace gum */
276 
277 // always include the template implementation
278 # include <agrum/tools/database/DBInitializerFromSQL_tpl.h>
279 
280 # endif /* GUM_LEARNING_DB_INITILIALIZER_FROM_SQL_H */
281 
282 #endif /* ODBC_ */