aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
DBHandler.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The base class for all database handlers
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef GUM_LEARNING_DB_HANDLER_H
28 #define GUM_LEARNING_DB_HANDLER_H
29 
30 #include <cstddef>
31 #include <utility>
32 #include <memory>
33 #include <vector>
34 #include <string>
35 
36 #include <agrum/agrum.h>
37 #include <agrum/tools/database/DBRow.h>
38 
39 namespace gum {
40 
41  namespace learning {
42 
43  /** @class DBHandler
44  * @brief The base class for all database handlers
45  * @headerfile DBHandler.h <agrum/tools/database/DBHandler.h>
46  *
47  * Here is an example of how to use this class, illustrated on handlers
48  * for a RawDatabaseTable:
49  * @code
50  * // create the database
51  * gum::learning::RawDatabaseTable<> database;
52  * database.setVariableNames( std::vector<std::string> { "v1", "v2", "v3" } );
53  *
54  * // add one row to the database
55  * gum::learning::DBRow<gum::learning::DBCell>
56  * row( 3, gum::learning::DBCell(2) );
57  * database.insertRow( row );
58  *
59  * // create a safe and an unsafe handler. Those inherit from DBHandler
60  * typename gum::learning::RawDatabaseTable<>::HandlerSafe handler( database );
61  * typename gum::learning::RawDatabaseTable<>::Handler uhandler( database );
62  * // by default, the handlers range over the whole database, which
63  * // currently contains only one row
64  *
65  * // here, we add 5 new rows into the database
66  * for ( int i = 0; i < 5; ++i ) database.insertRow( row );
67  *
68  * // due to the addition of the rows, the safe handler is aware that there
69  * // are now 6 rows. The unsafe handler still thinks there is only one row
70  * std::cout << handler.range().second; // displays 6 (the last area's element)
71  * std::cout << handler.size (); // displays 6 (handler's range)
72  * std::cout << handler.DBSize (); // displays 6 (database's size)
73  * std::cout << uhandler.size (); // displays 1 (handler's range)
74  * std::cout << uhandler.DBSize (); // displays 6 (database's size)
75  *
76  * // change the range of rows handled by the DBHandler
77  * std::cout << handler.setRange ( 1, 4 ); // now parses rows [1,4)
78  * std::cout << handler.size (); // displays 3: rows 1, 2, and 3
79  * std::cout << handler.DBSize (); // displays 6: database's size
80  * std::cout << handler.numRow (); // displays 0: the handler currently
81  * // points on the first row of its managed area [1,4)
82  *
83  * uhandler.setRange ( 1, 4 ); // uhandler now parsed rows [1,4)
84  * std::cout << uhandler.size (); // displays 3: rows 1, 2, and 3
85  * std::cout << uhandler.DBSize (); // displays 6: database's size
86  * std::cout << uhandler.numRow (); // displays 0: the handler currently
87  * // points on the first row of its managed area [1,4)
88  *
89  * // move the handler to the next row
90  * handler.nextRow();
91  * std::cout << handler.numRow (); // displays 1: the handler points now
92  * // on the second row of its managed area. This corresponds to the third
93  * // DBRow of the database since the range of handler is [1,4)
94  *
95  * // get the DBRow pointed to by the handler: this is the 3rd DBRow
96  * // of the database
97  * auto& xrow2 = handler.row (); // get the DBRow, unsafe version
98  * auto& yrow2 = handler.rowSafe (); // get the DBRow, safe version
99  * const std::vector<gum::learning::DBCell>& xrow = xrow2.row ();
100  * const double xweight = xrow2.weight ();
101  *
102  * // check whether there exist other rows managed by the handler after
103  * // the current row
104  * bool has_rows = handler.hasRows (); // true: there is still the 4th row
105  * handler.nextRow();
106  * bool has_rows2 = handler.hasRows (); // false: the 4th row is the last one
107  *
108  * // makes the handler point again on the 2nd row of the database
109  * handler.reset ();
110  * std::cout << handler.numRow (); // displays 0: the handler currently
111  * // points on the first row of its managed area [1,4)
112  *
113  * // see the variables' names, i.e., the names of the database's columns
114  * const auto& vars = handler.variableNames();
115  * @endcode
116  * @ingroup learning_database
117  */
118  template < typename T_DATA, template < typename > class ALLOC = std::allocator >
119  class DBHandler {
120  public:
121  /// Types for STL compliance.
122  /// @{
123  using iterator_category = std::random_access_iterator_tag;
126  using const_reference = const value_type&;
127  using pointer = value_type*;
128  using const_pointer = const value_type*;
129  using size_type = std::size_t;
130  using difference_type = std::ptrdiff_t;
131  using allocator_type = ALLOC< void >;
132  /// @}
133 
134  /// the type for the allocated vectors in IDatabases
135  template < typename TX_DATA >
136  using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
137 
138 
139  // ##########################################################################
140  /// @name Accessors / Modifiers
141  // ##########################################################################
142 
143  /// @{
144 
145  /// returns the number of rows managed by the handler
146  /** A handler needs not necessarily handle all the rows of the database.
147  * For instance, RecordCounters cut the database into several pieces and
148  * assign each piece to a handler. Then each handler is used in parallel
149  * to perform countings only on a subset of the database */
150  virtual std::size_t size() const = 0;
151 
152  /// the number of rows in the whole database
153  virtual std::size_t DBSize() const = 0;
154 
155  /// returns the current row of the database (safe version)
156  /** @throws OutOfBounds is raised if the handler points outside of
157  * its area */
158  virtual const_reference rowSafe() const = 0;
159 
160  /// returns the current row of the database (safe version)
161  /** @throws OutOfBounds is raised if the handler points outside of
162  * its area */
163  virtual reference rowSafe() = 0;
164 
165  /// returns the current row pointed to by the handler (unsafe version)
166  /** @warning The method does not check whether the handler already points
167  * to the end of its area. It is thus faster than method rowSafe () but,
168  * when you call it, you must be sure that the row actually exists, i.e.,
169  * that the handler has not reached its end. */
170  virtual const_reference row() const = 0;
171 
172  /// returns the current row pointed to by the handler (unsafe version)
173  /** @warning The method does not check whether the handler already points
174  * to the end of its area. It is thus faster than method rowSafe () but,
175  * when you call it, you must be sure that the row actually exists, i.e.,
176  * that the handler has not reached its end. */
177  virtual reference row() = 0;
178 
179  /// go to the next row in the database
180  /** @warning If there is no more row, i.e., you are already on the last
181  * DBRow managed or you point already to the end of the area, then the
182  * handler will point on the end of the area. In particular, this will
183  * not raise any exception. */
184  virtual void nextRow() = 0;
185 
186  /// number of row the handler points to (from the beginning of the area)
187  /** This method assigns 0 to the first row in the area handled by the
188  * handler. So the number returned is the number of rows between the
189  * currently pointed one to the beginning of the area handled. */
190  virtual std::size_t numRow() const = 0;
191 
192  /// indicates wether there are still rows to parse in the database
193  /** Remember that the handler manages only a specified area of the
194  * database, so the method just indicates whether there still remains
195  * rows in the area. */
196  virtual bool hasRows() const = 0;
197 
198  /// puts the handler to the beginning of the database area it handles
199  virtual void reset() = 0;
200 
201  /// sets the range of rows in the database that the handler will parse
202  /** The range provided in arguments specifies that area [begin,end) is
203  * the one managed by the DBHandler, i.e., the first row of the area
204  * has index begin in the whole database, and the last row of the area
205  * has index end-1 in the whole database. The endth row is the first one
206  * outside the area.
207  * @param begin the number of the row in the whole database that will
208  * be the first one in the area managed by the DBHandler.
209  * @param end the number of the row in the whole database from which
210  * the DBHandler considers it is outside of its area. */
211  virtual void setRange(std::size_t begin, std::size_t end) = 0;
212 
213  /// returns the current range of rows of the handler
214  /** The range returned is of type [begin,end), i.e., the first row of the
215  * range managed by the DBHandler has index begin in the whole database,
216  * and the last row of the range has index end-1 in the whole database.
217  * The endth row is therefore the first one outside the range.*/
218  virtual std::pair< std::size_t, std::size_t > range() const = 0;
219 
220  /// returns the names of the variables corresponding to the rows
221  virtual const DBVector< std::string >& variableNames() const = 0;
222 
223  /// returns the number of variables (columns) of the database
224  virtual std::size_t nbVariables() const = 0;
225 
226  /// @}
227 
228 
229 #ifndef DOXYGEN_SHOULD_SKIP_THIS
230 
231  protected:
232  // the cache used to avoid cacheline problems due to parallelism
233  static constexpr int cache_size_{128};
234 
235  // a buffer to avoid cacheline problems due to parallelism
236  char align_[cache_size_];
237 
238 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
239  };
240 
241  } /* namespace learning */
242 
243 } /* namespace gum */
244 
245 #endif /* GUM_LEARNING_DB_HANDLER_H */
virtual void setRange(std::size_t begin, std::size_t end)=0
sets the range of rows in the database that the handler will parse
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
virtual std::size_t nbVariables() const =0
returns the number of variables (columns) of the database
virtual void nextRow()=0
go to the next row in the database
virtual reference rowSafe()=0
returns the current row of the database (safe version)
The base class for all database handlers.
Definition: DBHandler.h:119
virtual std::size_t size() const =0
returns the number of rows managed by the handler
virtual bool hasRows() const =0
indicates wether there are still rows to parse in the database
virtual const DBVector< std::string > & variableNames() const =0
returns the names of the variables corresponding to the rows
virtual const_reference rowSafe() const =0
returns the current row of the database (safe version)
virtual reference row()=0
returns the current row pointed to by the handler (unsafe version)
virtual void reset()=0
puts the handler to the beginning of the database area it handles
virtual std::size_t numRow() const =0
number of row the handler points to (from the beginning of the area)
virtual std::size_t DBSize() const =0
the number of rows in the whole database
virtual std::pair< std::size_t, std::size_t > range() const =0
returns the current range of rows of the handler
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
virtual const_reference row() const =0
returns the current row pointed to by the handler (unsafe version)