aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
DBHandler.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The base class for all database handlers
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef GUM_LEARNING_DB_HANDLER_H
28 #define GUM_LEARNING_DB_HANDLER_H
29 
30 #include <cstddef>
31 #include <utility>
32 #include <memory>
33 #include <vector>
34 #include <string>
35 
36 #include <agrum/agrum.h>
37 #include <agrum/tools/database/DBRow.h>
38 
39 namespace gum {
40 
41  namespace learning {
42 
43  /** @class DBHandler
44  * @brief The base class for all database handlers
45  * @headerfile DBHandler.h <agrum/tools/database/DBHandler.h>
46  *
47  * Here is an example of how to use this class, illustrated on handlers
48  * for a RawDatabaseTable:
49  * @code
50  * // create the database
51  * gum::learning::RawDatabaseTable<> database;
52  * database.setVariableNames( std::vector<std::string> { "v1", "v2", "v3" } );
53  *
54  * // add one row to the database
55  * gum::learning::DBRow<gum::learning::DBCell>
56  * row( 3, gum::learning::DBCell(2) );
57  * database.insertRow( row );
58  *
59  * // create a safe and an unsafe handler. Those inherit from DBHandler
60  * typename gum::learning::RawDatabaseTable<>::HandlerSafe handler( database );
61  * typename gum::learning::RawDatabaseTable<>::Handler uhandler( database );
62  * // by default, the handlers range over the whole database, which
63  * // currently contains only one row
64  *
65  * // here, we add 5 new rows into the database
66  * for ( int i = 0; i < 5; ++i ) database.insertRow( row );
67  *
68  * // due to the addition of the rows, the safe handler is aware that there
69  * // are now 6 rows. The unsafe handler still thinks there is only one row
70  * std::cout << handler.range().second; // displays 6 (the last area's element)
71  * std::cout << handler.size (); // displays 6 (handler's range)
72  * std::cout << handler.DBSize (); // displays 6 (database's size)
73  * std::cout << uhandler.size (); // displays 1 (handler's range)
74  * std::cout << uhandler.DBSize (); // displays 6 (database's size)
75  *
76  * // change the range of rows handled by the DBHandler
77  * std::cout << handler.setRange ( 1, 4 ); // now parses rows [1,4)
78  * std::cout << handler.size (); // displays 3: rows 1, 2, and 3
79  * std::cout << handler.DBSize (); // displays 6: database's size
80  * std::cout << handler.numRow (); // displays 0: the handler currently
81  * // points on the first row of its managed area [1,4)
82  *
83  * uhandler.setRange ( 1, 4 ); // uhandler now parsed rows [1,4)
84  * std::cout << uhandler.size (); // displays 3: rows 1, 2, and 3
85  * std::cout << uhandler.DBSize (); // displays 6: database's size
86  * std::cout << uhandler.numRow (); // displays 0: the handler currently
87  * // points on the first row of its managed area [1,4)
88  *
89  * // move the handler to the next row
90  * handler.nextRow();
91  * std::cout << handler.numRow (); // displays 1: the handler points now
92  * // on the second row of its managed area. This corresponds to the third
93  * // DBRow of the database since the range of handler is [1,4)
94  *
95  * // get the DBRow pointed to by the handler: this is the 3rd DBRow
96  * // of the database
97  * auto& xrow2 = handler.row (); // get the DBRow, unsafe version
98  * auto& yrow2 = handler.rowSafe (); // get the DBRow, safe version
99  * const std::vector<gum::learning::DBCell>& xrow = xrow2.row ();
100  * const double xweight = xrow2.weight ();
101  *
102  * // check whether there exist other rows managed by the handler after
103  * // the current row
104  * bool has_rows = handler.hasRows (); // true: there is still the 4th row
105  * handler.nextRow();
106  * bool has_rows2 = handler.hasRows (); // false: the 4th row is the last one
107  *
108  * // makes the handler point again on the 2nd row of the database
109  * handler.reset ();
110  * std::cout << handler.numRow (); // displays 0: the handler currently
111  * // points on the first row of its managed area [1,4)
112  *
113  * // see the variables' names, i.e., the names of the database's columns
114  * const auto& vars = handler.variableNames();
115  * @endcode
116  * @ingroup learning_database
117  */
118  template < typename T_DATA,
119  template < typename > class ALLOC = std::allocator >
120  class DBHandler {
121  public:
122  /// Types for STL compliance.
123  /// @{
124  using iterator_category = std::random_access_iterator_tag;
127  using const_reference = const value_type&;
128  using pointer = value_type*;
129  using const_pointer = const value_type*;
130  using size_type = std::size_t;
131  using difference_type = std::ptrdiff_t;
132  using allocator_type = ALLOC< void >;
133  /// @}
134 
135  /// the type for the allocated vectors in IDatabases
136  template < typename TX_DATA >
137  using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
138 
139 
140  // ##########################################################################
141  /// @name Accessors / Modifiers
142  // ##########################################################################
143 
144  /// @{
145 
146  /// returns the number of rows managed by the handler
147  /** A handler needs not necessarily handle all the rows of the database.
148  * For instance, RecordCounters cut the database into several pieces and
149  * assign each piece to a handler. Then each handler is used in parallel
150  * to perform countings only on a subset of the database */
151  virtual std::size_t size() const = 0;
152 
153  /// the number of rows in the whole database
154  virtual std::size_t DBSize() const = 0;
155 
156  /// returns the current row of the database (safe version)
157  /** @throws OutOfBounds is raised if the handler points outside of
158  * its area */
159  virtual const_reference rowSafe() const = 0;
160 
161  /// returns the current row of the database (safe version)
162  /** @throws OutOfBounds is raised if the handler points outside of
163  * its area */
164  virtual reference rowSafe() = 0;
165 
166  /// returns the current row pointed to by the handler (unsafe version)
167  /** @warning The method does not check whether the handler already points
168  * to the end of its area. It is thus faster than method rowSafe () but,
169  * when you call it, you must be sure that the row actually exists, i.e.,
170  * that the handler has not reached its end. */
171  virtual const_reference row() const = 0;
172 
173  /// returns the current row pointed to by the handler (unsafe version)
174  /** @warning The method does not check whether the handler already points
175  * to the end of its area. It is thus faster than method rowSafe () but,
176  * when you call it, you must be sure that the row actually exists, i.e.,
177  * that the handler has not reached its end. */
178  virtual reference row() = 0;
179 
180  /// go to the next row in the database
181  /** @warning If there is no more row, i.e., you are already on the last
182  * DBRow managed or you point already to the end of the area, then the
183  * handler will point on the end of the area. In particular, this will
184  * not raise any exception. */
185  virtual void nextRow() = 0;
186 
187  /// number of row the handler points to (from the beginning of the area)
188  /** This method assigns 0 to the first row in the area handled by the
189  * handler. So the number returned is the number of rows between the
190  * currently pointed one to the beginning of the area handled. */
191  virtual std::size_t numRow() const = 0;
192 
193  /// indicates wether there are still rows to parse in the database
194  /** Remember that the handler manages only a specified area of the
195  * database, so the method just indicates whether there still remains
196  * rows in the area. */
197  virtual bool hasRows() const = 0;
198 
199  /// puts the handler to the beginning of the database area it handles
200  virtual void reset() = 0;
201 
202  /// sets the range of rows in the database that the handler will parse
203  /** The range provided in arguments specifies that area [begin,end) is
204  * the one managed by the DBHandler, i.e., the first row of the area
205  * has index begin in the whole database, and the last row of the area
206  * has index end-1 in the whole database. The endth row is the first one
207  * outside the area.
208  * @param begin the number of the row in the whole database that will
209  * be the first one in the area managed by the DBHandler.
210  * @param end the number of the row in the whole database from which
211  * the DBHandler considers it is outside of its area. */
212  virtual void setRange(std::size_t begin, std::size_t end) = 0;
213 
214  /// returns the current range of rows of the handler
215  /** The range returned is of type [begin,end), i.e., the first row of the
216  * range managed by the DBHandler has index begin in the whole database,
217  * and the last row of the range has index end-1 in the whole database.
218  * The endth row is therefore the first one outside the range.*/
219  virtual std::pair< std::size_t, std::size_t > range() const = 0;
220 
221  /// returns the names of the variables corresponding to the rows
222  virtual const DBVector< std::string >& variableNames() const = 0;
223 
224  /// returns the number of variables (columns) of the database
225  virtual std::size_t nbVariables() const = 0;
226 
227  /// @}
228 
229 
230 #ifndef DOXYGEN_SHOULD_SKIP_THIS
231 
232  protected:
233  // the cache used to avoid cacheline problems due to parallelism
234  static constexpr int cache_size_{128};
235 
236  // a buffer to avoid cacheline problems due to parallelism
237  char align_[cache_size_];
238 
239 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
240  };
241 
242  } /* namespace learning */
243 
244 } /* namespace gum */
245 
246 #endif /* GUM_LEARNING_DB_HANDLER_H */
virtual void setRange(std::size_t begin, std::size_t end)=0
sets the range of rows in the database that the handler will parse
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
virtual std::size_t nbVariables() const =0
returns the number of variables (columns) of the database
virtual void nextRow()=0
go to the next row in the database
virtual reference rowSafe()=0
returns the current row of the database (safe version)
The base class for all database handlers.
Definition: DBHandler.h:120
virtual std::size_t size() const =0
returns the number of rows managed by the handler
virtual bool hasRows() const =0
indicates wether there are still rows to parse in the database
virtual const DBVector< std::string > & variableNames() const =0
returns the names of the variables corresponding to the rows
virtual const_reference rowSafe() const =0
returns the current row of the database (safe version)
virtual reference row()=0
returns the current row pointed to by the handler (unsafe version)
virtual void reset()=0
puts the handler to the beginning of the database area it handles
virtual std::size_t numRow() const =0
number of row the handler points to (from the beginning of the area)
virtual std::size_t DBSize() const =0
the number of rows in the whole database
virtual std::pair< std::size_t, std::size_t > range() const =0
returns the current range of rows of the handler
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
virtual const_reference row() const =0
returns the current row pointed to by the handler (unsafe version)