aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
IDatabaseTable.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The common class for the tabular database tables
24  *
25  * IDatabases are not intended to be created as is but should be created through
26  * the RawDatabaseTable and DatabaseTable classes.
27  *
28  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
29  */
30 #ifndef GUM_IDATABASE_TABLE_H
31 #define GUM_IDATABASE_TABLE_H
32 
33 #include <cstddef>
34 #include <utility>
35 #include <string>
36 #include <cstring>
37 #include <memory>
38 #include <vector>
39 #include <mutex>
40 
41 #include <agrum/agrum.h>
42 #include <agrum/tools/core/thread.h>
43 #include <agrum/tools/core/OMPThreads.h>
44 #include <agrum/tools/database/DBCell.h>
45 #include <agrum/tools/database/DBRow.h>
46 #include <agrum/tools/database/DBHandler.h>
47 #include <agrum/tools/database/DBTranslator.h>
48 
49 
50 namespace gum {
51 
52  namespace learning {
53 
54  template < template < typename > class ALLOC, bool ENABLE_INSERT >
55  struct IDatabaseTableInsert4DBCell;
56 
57  template < template < typename > class ALLOC >
59  template < typename TX_DATA >
61 
62  template < typename TX_DATA >
63  using Row = DBRow< TX_DATA, ALLOC >;
64 
65  template < typename TX_DATA >
67 
68 
69  /// insert a new DBRow at the end of the database
70  /** The new row passed in argument is supposed to come from an external
71  * database. So it must contain data for the ignored columns. */
72  virtual void insertRow(Row< DBCell >&& new_row) = 0;
73 
74  /// insert a new row at the end of the database
75  /** The new row passed in argument is supposed to come from an external
76  * database. So it must contain data for the ignored columns. */
77  virtual void insertRow(const Row< DBCell >& new_row) = 0;
78 
79  /// insert a set of new DBRows at the end of the database
80  /** The new rows passed in argument are supposed to come from an external
81  * database. So they must contain data for the ignored columns. */
82  virtual void insertRows(Matrix< DBCell >&& new_rows) = 0;
83 
84  /// insert a set of new DBRows at the end of the database
85  /** The new rows passed in argument are supposed to come from an external
86  * database. So they must contain data for the ignored columns. */
87  virtual void insertRows(const Matrix< DBCell >& new_rows) = 0;
88 
89  /// insert a new row at the end of the database
90  /** The new row passed in argument is supposed to come from an external
91  * database. So it must contain data for the ignored columns. */
92  virtual void insertRow(const std::vector< std::string, ALLOC< std::string > >& new_row) = 0;
93 
94  /// insert new rows at the end of the database
95  /** The new rows passed in argument are supposed to come from an external
96  * database. So they must contain data for the ignored columns. */
97  virtual void insertRows(const DBVector< DBVector< std::string > >& new_rows);
98  };
99 
100 
101  template < template < typename > class ALLOC >
103  template < typename TX_DATA >
105 
106  template < typename TX_DATA >
107  using Row = DBRow< TX_DATA, ALLOC >;
108 
109  template < typename TX_DATA >
111 
112  /// insert a new row at the end of the database
113  /** The new row passed in argument is supposed to come from an external
114  * database. So it must contain data for the ignored columns. */
115  virtual void insertRow(const std::vector< std::string, ALLOC< std::string > >& new_row) = 0;
116 
117  /// insert new rows at the end of the database
118  /** The new rows passed in argument are supposed to come from an external
119  * database. So they must contain data for the ignored columns. */
120  virtual void insertRows(const DBVector< DBVector< std::string > >& new_rows);
121  };
122 
123 
124  /** @class IDatabaseTable
125  * @headerfile IDatabaseTable.h <agrum/BN/learning/IDatabaseTable.h>
126  * @brief The common class for the tabular database tables
127  *
128  * IDatabases are not intended to be created as is but should be created
129  * through the RawDatabaseTable and DatabaseTable classes. They represent
130  * the structures shared by these latter classes.
131  *
132  * Here is an example of how to use the class, illustrated with the
133  * DatabaseTable class (in this case, the T_DATA type is just equal to
134  * DBTranslatedValue):
135  * @code
136  * // create the database from a CSV. This is not compulsory for
137  * // IDatabaseTable instances, but this is how we usually create
138  * // DatabaseTable instances
139  * gum::learning::DBInitializerFromCSV<> initializer ( "asia.csv" );
140  * const auto& var_names = initializer.variableNames ();
141  * gum::learning::DBTranslatorSet<> translator_set;
142  * gum::learning::DBTranslator4LabelizedVariable<> translator;
143  * for ( std::size_t i = 0; i < var_names.size(); ++i )
144  * translator_set.insertTranslator ( translator, i );
145  * gum::learning::DatabaseTable<> database ( translator_set );
146  * database.setVariableNames( initializer.variableNames () );
147  *
148  * // here, database contains the content of the asia.csv file.
149  * // determine how many columns and rows the database contains
150  * std::size_t nb_rows = database.nbRows();
151  * std::size_t nb_cols = database.nbVariables ();
152  *
153  * // manually add a new row into the database
154  * std::vector<std::string> row( 8, "toto" ); // asia has 8 columns
155  * database.insertRow ( row );
156  * gum::learning::DBRow<gum::learning::DBTranslatedValue>
157  * dbrow ( 8, gum::learning::DBTranslatedValue { std::size_t(0) } );
158  * database.insertRow ( dbrow );
159  * // insert 4 rows in a single call
160  * database.insertRows(
161  * std::vector<gum::learning::DBRow<gum::learning::DBTranslatedValue>>
162  * ( 4, dbrow ) );
163  *
164  * // erase some rows
165  * database.eraseRow ( 12 ); // erase the 13th row of the database
166  * database.eraseFirstRow (); // erase the first row of the database
167  * database.eraseLastRow (); // erase the last row of the database
168  * database.eraseFirstRows ( 2 ); // erase the first two rows
169  * database.eraseLastRows ( 3 ); // erase the last three rows
170  * database.eraseRows ( 2,4 ); // erase rows indexed from 2 to 4 (excluded)
171  *
172  * // parse the content of the database, the usual way
173  * for ( const auto& dbrow : database )
174  * std::cout << dbrow.row() << " weight: " << dbrow.weight() << std::endl;
175  *
176  * // ignore some columns of the database, i.e., remove them
177  * database.ignoreColumn ( 3 ); // remove the column X3 of the CSV file
178  * // now, the database contains columns 0, 1, 2, 4, 5, 6, 7 of the
179  * // CSV file. If we wish to remove Column X5 of the CSV file:
180  * database.ignoreColumn ( 5 ); // remove the column X5 of the CSV file
181  * // now, the database contains columns 0, 1, 2, 4, 6, 7 of the CSV file.
182  * // if we wish to remove the 5th column of the IDatabaseTable, i.e.,
183  * // column #4 of the CSV, either we determine that this actually correspond
184  * // to column X6 of the CSV and we use database.ignoreColumn ( 6 ) or
185  * // we call:
186  * database.ignoreColumn ( 4, false ); // false => 4 = the 5th column of
187  * // the IDatabaseTable, not the 5th column/variable of the CSV file
188  * // (remember that all column numbers start from 0).
189  *
190  * // display the columns of the CSV that were ignored and those that
191  * // were kept:
192  * std::vector<std::size_t> ignored_cols = database.ignoredColumns ();
193  * std::vector<std::size_t> kept_cols = database.inputColumns ();
194  *
195  * // parse the content of the database using handlers
196  * typename gum::learning::DatabaseTable<>::HandlerSafe handler( database );
197  * typename gum::learning::DatabaseTable<>::Handler uhandler( database );
198  * // by default, the handlers range over the whole database
199  *
200  * // change the range of rows handled by the DBHandler
201  * std::cout << handler.setRange ( 1, 40 ); // now parses rows [1,40)
202  * std::cout << handler.size (); // displays 39: rows 1,...,39
203  * std::cout << handler.DBSize (); // shows the number of rows in the database
204  * std::cout << handler.numRow (); // displays 0: the handler currently
205  * // points on the first row of its managed area [1,40)
206  *
207  * // move the handler to the next row
208  * handler.nextRow();
209  * std::cout << handler.numRow (); // displays 1: the handler points now
210  * // on the second row of its managed area. This corresponds to the third
211  * // DBRow of the database since the range of handler is [1,40)
212  * ++handler; // move again to the next row
213  * std::cout << handler.numRow (); // displays 2
214  * handler += 4; // advances the pointer by 4 rows
215  * std::cout << handler.numRow (); // displays 6
216  *
217  * // get the DBRow pointed to by the handler: this is the 7th DBRow
218  * // of the database
219  * const auto& xrow7 = handler.row (); // get the DBRow, unsafe version
220  * const auto& yrow7 = handler.rowSafe (); // get the DBRow, safe version
221  * const std::vector<gum::learning::DBCell>& xrow = xrow7.row ();
222  * const double xweight = xrow27.weight ();
223  *
224  * // another way to access the row
225  * const auto& zrow7 = *handler; // get the DBRow, unsafe version
226  *
227  * // check whether there exist other rows managed by the handler after
228  * // the current row
229  * bool has_rows = handler.hasRows (); // true: there remains 33 rows
230  *
231  * // makes the handler point again on the 2nd row of the database
232  * handler.reset (); // the handler points to the beginning of its area
233  * std::cout << handler.numRow (); // displays 0: the handler currently
234  * // points on the first row of its managed area [1,40)
235  *
236  * // see the variables' names, i.e., the names of the database's columns
237  * const auto& vars = handler.variableNames();
238  *
239  * // parse all the rows managed
240  * handler.reset ();
241  * for ( auto end = handler.end (); handler != end; ++handler )
242  * std::cout << handler.row ().weight () << std::endl;
243  *
244  * // another possibility:
245  * for ( const auto& row : handler )
246  * std::cout << row.weight () << std::endl;
247  * @endcode
248  * @ingroup learning_database
249  */
250  template < typename T_DATA, template < typename > class ALLOC = std::allocator >
253  private ALLOC< T_DATA > {
254  public:
255  /// the type for the vectors used in the IDatabaseTable
256  template < typename TX_DATA >
257  using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
258 
259  /// a row of the database
260  template < typename TX_DATA >
261  using Row = DBRow< TX_DATA, ALLOC >;
262 
263  /// the type for the matrices stored into the database
264  template < typename TX_DATA >
266 
267  template < template < typename > class XALLOC >
268  using MissingValType = std::vector< std::string, XALLOC< std::string > >;
269 
270 
271  enum IsMissing : char
272  {
275  };
276 
277 
278  /** @class Handler
279  * @headerfile IDatabaseTable.h <agrum/BN/learning/IDatabaseTable.h>
280  * @brief the (unsafe) handler for the tabular databases
281  *
282  * The IDatabaseTable class is provided with two types of handlers: unsafe
283  * handlers and safe ones. Compared to the former, the safe handlers
284  * incur a small overhead during their creation. But safe handlers
285  * are informed by their associated database when the structure of
286  * this one changes, i.e., when the number of rows/columns changes or
287  * when rows are added/removed, whereas unsafe handlers are not aware
288  * of such changes. For databases that are not affected by this kind of
289  * change, unsafe handlers should be used instead of safe ones because
290  * they are slightly faster. Both types of handlers are designed to be
291  * created in parallel by several threads.
292  *
293  * Here is an example of how to use this class, illustrated on handlers
294  * for a RawDatabaseTable:
295  * @code
296  * // create the database
297  * gum::learning::RawDatabaseTable<> database;
298  * database.setVariableNames( std::vector<std::string> {"v1","v2","v3"} );
299  *
300  * // add one row to the database
301  * gum::learning::DBRow<gum::learning::DBCell>
302  * row( 3, gum::learning::DBCell(2) );
303  * database.insertRow( row );
304  *
305  * // create a handler.
306  * typename gum::learning::RawDatabaseTable<>::Handler handler( database );
307  * // by default, the handlers range over the whole database, which
308  * // currently contains only one row
309  *
310  * // here, we add 95 new rows into the database
311  * for ( int i = 0; i < 95; ++i ) database.insertRow( row );
312  *
313  * // due to the addition of the rows, the (unsafe) handler still thinks
314  * // there is only one row
315  * std::cout << handler.size (); // displays 1 (handler's range)
316  * std::cout << handler.DBSize (); // displays 96 (database's size)
317  *
318  * // change the range of rows handled by the DBHandler
319  * std::cout << handler.setRange ( 1, 40 ); // now parses rows [1,40)
320  * std::cout << handler.size (); // displays 39: rows 1,...,39
321  * std::cout << handler.DBSize (); // displays 96: database's size
322  * std::cout << handler.numRow (); // displays 0: the handler currently
323  * // points on the first row of its managed area [1,40)
324  *
325  * // move the handler to the next row
326  * handler.nextRow();
327  * std::cout << handler.numRow (); // displays 1: the handler points now
328  * // on the second row of its managed area. This corresponds to the third
329  * // DBRow of the database since the range of handler is [1,40)
330  * ++handler; // move again to the next row
331  * std::cout << handler.numRow (); // displays 2
332  * handler += 4; // advances the pointer by 4 rows
333  * std::cout << handler.numRow (); // displays 6
334  *
335  * // get the DBRow pointed to by the handler: this is the 7th DBRow
336  * // of the database
337  * const auto& xrow7 = handler.row (); // get the DBRow, unsafe version
338  * const auto& yrow7 = handler.rowSafe (); // get the DBRow, safe version
339  * const std::vector<gum::learning::DBCell>& xrow = xrow7.row ();
340  * const double xweight = xrow27.weight ();
341  *
342  * // another way to access the row
343  * const auto& zrow7 = *handler; // get the DBRow, unsafe version
344  *
345  * // check whether there exist other rows managed by the handler after
346  * // the current row
347  * bool has_rows = handler.hasRows (); // true: there remains 33 rows
348  *
349  * // makes the handler point again on the 2nd row of the database
350  * handler.reset (); // the handler points to the beginning of its area
351  * std::cout << handler.numRow (); // displays 0: the handler currently
352  * // points on the first row of its managed area [1,40)
353  *
354  * // see the variables' names, i.e., the names of the database's columns
355  * const auto& vars = handler.variableNames();
356  *
357  * // parse all the rows managed
358  * handler.reset ();
359  * for ( auto end = handler.end (); handler != end; ++handler )
360  * std::cout << handler.row ().weight () << std::endl;
361  *
362  * // another possibility:
363  * for ( const auto& row : handler )
364  * std::cout << row.weight () << std::endl;
365  * @endcode
366  *
367  * @ingroup learning_database
368  */
369  class Handler: public DBHandler< T_DATA, ALLOC > {
370  public:
371  /// Types for STL compliance.
372  /// @{
373  using iterator_category = std::random_access_iterator_tag;
374  using value_type = typename DBHandler< T_DATA, ALLOC >::value_type;
376  using const_reference = const value_type&;
377  using pointer = value_type*;
378  using const_pointer = const value_type*;
379  using difference_type = std::ptrdiff_t;
380  using allocator_type = ALLOC< T_DATA >;
381  /// @}
382 
383 
384  template < typename TX_DATA >
385  using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
386 
387  template < typename TX_DATA >
388  using Row = DBRow< TX_DATA, ALLOC >;
389 
390  template < typename TX_DATA >
392 
393 
394  // ########################################################################
395  /// @name Constructors / Destructors
396  // ########################################################################
397  /// @{
398 
399  /// default constructor
400  /** @param db the database on which the handler will point to.
401  * By default, the range of the handler is the whole database. */
402  Handler(const IDatabaseTable< T_DATA, ALLOC >& db);
403 
404  /// copy constructor
405  /** @param h the handler we wish to copy */
406  Handler(const Handler& h);
407 
408  /// move constructor
409  /** @param h the handler we wish to move */
410  Handler(Handler&& h);
411 
412  /// destructor
413  virtual ~Handler();
414 
415  /// @}
416 
417  // ########################################################################
418  /// @name Operators
419  // ########################################################################
420  /// @{
421 
422  /// copy operator
423  virtual Handler& operator=(const Handler&);
424 
425  /// move operator
426  virtual Handler& operator=(Handler&&);
427 
428  /// makes the operator point to the next row in the database
429  /** if the pointer has already reached the end of the area managed by the
430  * handler, nothing happens. In particular, no exception is raised */
431  virtual Handler& operator++() final;
432 
433  /// makes the operator point to the previous row in the database
434  /** if the pointer is already at the beginning of the area managed
435  * by the handler, nothing happens. In particular, no exception
436  * is raised */
437  virtual Handler& operator--() final;
438 
439  /// advances the handler by i rows in the database
440  /** if, applying this move would make the handler reach the end of
441  * the area managed by the handler, then the handler is kept at the
442  * end of the area, i.e., after the last element of the area. */
443  virtual Handler& operator+=(const std::size_t i) final;
444 
445  /// moves back the handler by i rows in the database
446  /** if, applying this move would make the handler reach the beginning of
447  * the area managed by the handler, then the handler is kept at the
448  * beginning of the area, i.e., at the first element of the area. */
449  virtual Handler& operator-=(const std::size_t i) final;
450 
451  /// checks whether two handlers point to the same row in the database
452  virtual bool operator==(const Handler& handler) const final;
453 
454  /// checks whether two handlers point to different rows in the database
455  virtual bool operator!=(const Handler& handler) const final;
456 
457  /// returns the current row pointed to by the handler (unsafe version)
458  /** @warning The method does not check whether the handler already
459  * points to the end of the area it manages. It is thus faster than
460  * method rowSafe () but, when you call it, you must be sure that the row
461  * actually exists, i.e., that the handler has not reached its end. */
462  virtual const_reference operator*() const final;
463 
464  /// Dereferences the value pointed to by the handler (unsafe version)
465  /** @warning The method does not check whether the handler already
466  * points to the end of its area. It is thus faster than method
467  * rowSafe () but, when you call it, you must be sure that the row
468  * actually exists, i.e., that the handler has not reached its end. */
469  virtual const_pointer operator->() const final;
470 
471  /// @}
472 
473 
474  // ########################################################################
475  /// @name Accessors / Modifiers
476  // ########################################################################
477  /// @{
478 
479  /// returns the number of rows managed by the handler
480  /** A handler needs not necessarily handle all the rows of the database.
481  * For instance, RecordCounters cut the database into several pieces and
482  * assign each piece to a handler. Then each handler is used in parallel
483  * to perform countings only on their subset of the database. The size
484  * reported by method "size" is therefore the number of rows managed
485  * by the handler. If you wish to retrieve the size of the whole database,
486  * then use method DBSize instead. */
487  virtual std::size_t size() const final;
488 
489  /// returns the number of rows of the whole database
490  virtual std::size_t DBSize() const final;
491 
492  /// returns the current row pointed to by the handler (safe version)
493  /** @throws OutOfBounds if the handler points to the end of its area */
494  virtual const_reference rowSafe() const final;
495 
496  /// returns the current row pointed to by the handler (safe version)
497  /** @throws OutOfBounds if the handler points to the end of its area */
498  virtual reference rowSafe() final;
499 
500  /// returns the current row pointed to by the handler (unsafe version)
501  /** @warning The method does not check whether the handler already
502  * points to the end of its area. It is thus faster than method
503  * rowSafe () but, when you call it, you must be sure that the row
504  * actually exists, i.e., that the handler has not reached its end. */
505  virtual const_reference row() const final;
506 
507  /// returns the current row pointed to by the handler (unsafe version)
508  /** @warning The method does not check whether the handler already
509  * points to the end of its area. It is thus faster than method
510  * rowSafe () but, when you call it, you must be sure that the row
511  * actually exists, i.e., that the handler has not reached its end. */
512  virtual reference row() final;
513 
514  /// makes the handler point to the next row, equivalent to operator++
515  virtual void nextRow() final;
516 
517  /// the number of the current row (0 = the 1st row managed by the handler)
518  virtual std::size_t numRow() const final;
519 
520  /// indicates whether the handler has reached its end or not
521  virtual bool hasRows() const final;
522 
523  /// puts the handler to the beginning of the database's area it handles
524  virtual void reset() final;
525 
526  /** @brief returns a new handler that points to the beginning of the
527  * database's area of the current handler
528  *
529  * @warning The handler returned manages precisely the same area
530  * as the handler on which begin() is called. */
531  virtual Handler begin() const;
532 
533  /** @brief returns a new handler that points to the end of the
534  * database's area of the current handler
535  *
536  * @warning The handler returned manages precisely the same area
537  * as the handler on which end() is called. */
538  virtual Handler end() const;
539 
540  /// sets the area in the database the handler will handle
541  /** In addition to setting the area that will be parsed by the handler,
542  * this method makes the handler point to the beginning of the area.
543  * @param first the first row to be handled
544  * @param last the handler handles rows in interval [first,last). Thus,
545  * the endth row is not included in the set of rows handled.
546  * @warning if first is greater than last, these values are swapped.
547  * @throw NullElement is raised if the handler does not point to
548  * any database
549  * @throw SizeError is raised if end is greater than the number of
550  * rows of the database */
551  virtual void setRange(std::size_t first, std::size_t last) final;
552 
553  /// returns the current range of the handler [begin,end)
554  virtual std::pair< std::size_t, std::size_t > range() const final;
555 
556  /// returns the names of the variables
557  virtual const DBVector< std::string >& variableNames() const final;
558 
559  /// returns the number of variables (columns) of the database
560  virtual std::size_t nbVariables() const final;
561 
562  /// returns a pointer on the database
563  /** @throw NullElement is raised if the handler does not point toward
564  * any database. */
565  virtual const IDatabaseTable< T_DATA, ALLOC >& database() const;
566 
567  /// @}
568 
569 
570 #ifndef DOXYGEN_SHOULD_SKIP_THIS
571 
572  protected:
573  /// a reference to the whole database, including variable names
574  const IDatabaseTable< T_DATA, ALLOC >* _db_;
575 
576  /// a reference on the database's records pointed to by the handler
577  /** this data could be retrieved from _db_ but we prefer using a
578  * specific variable here for speed-up reasons. */
579  const Matrix< T_DATA >* _row_;
580 
581  /// the index of the row currently pointed to by the handler
582  std::size_t _index_{std::size_t(0)};
583 
584  /// the first row managed by the handler
585  std::size_t _begin_index_{std::size_t(0)};
586 
587  /// the row just after the last one managed by the handler
588  std::size_t _end_index_{std::size_t(0)};
589 
590  friend class IDatabaseTable< T_DATA, ALLOC >;
591 
592 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
593  };
594 
595 
596  /** @class HandlerSafe
597  * @headerfile IDatabaseTable.h <agrum/BN/learning/IDatabaseTable.h>
598  * @brief the safe handler of the tabular databases
599  *
600  * The IDatabaseTable class is provided with two types of handlers: unsafe
601  * handlers and safe ones. Compared to the former, the safe handlers
602  * incur a small overhead during their creation. But safe handlers
603  * are informed by their associated database when the structure of
604  * this one changes, i.e., when the number of rows/columns changes or
605  * when rows are added/removed, whereas unsafe handlers are not aware
606  * of such changes. For databases that are not affected by this kind of
607  * change, unsafe handlers should be used instead of safe ones because
608  * they are slightly faster. Both types of handlers are designed to be
609  * created in parallel by several threads.
610  *
611  * Here is an example of how to use this class, illustrated on handlers
612  * for a RawDatabaseTable:
613  * @code
614  * // create the database
615  * gum::learning::RawDatabaseTable<> database;
616  * database.setVariableNames( std::vector<std::string> {"v1","v2","v3"} );
617  *
618  * // add one row to the database
619  * gum::learning::DBRow<gum::learning::DBCell>
620  * row( 3, gum::learning::DBCell(2) );
621  * database.insertRow( row );
622  *
623  * // create a handler.
624  * typename gum::learning::RawDatabaseTable<>::HandlerSafe handler(database);
625  * // by default, the handlers range over the whole database, which
626  * // currently contains only one row
627  *
628  * // here, we add 95 new rows into the database
629  * for ( int i = 0; i < 95; ++i ) database.insertRow( row );
630  *
631  * // due to the addition of the rows, the safe handler updates its range
632  * // and its area is now [0,96)
633  * std::cout << handler.size (); // displays 96 (handler's range)
634  * std::cout << handler.DBSize (); // displays 96 (database's size)
635  *
636  * // change the range of rows handled by the DBHandler
637  * std::cout << handler.setRange ( 1, 40 ); // now parses rows [1,40)
638  * std::cout << handler.size (); // displays 39: rows 1,...,39
639  * std::cout << handler.DBSize (); // displays 96: database's size
640  * std::cout << handler.numRow (); // displays 0: the handler currently
641  * // points on the first row of its managed area [1,40)
642  *
643  * // move the handler to the next row
644  * handler.nextRow();
645  * std::cout << handler.numRow (); // displays 1: the handler points now
646  * // on the second row of its managed area. This corresponds to the third
647  * // DBRow of the database since the range of handler is [1,40)
648  * ++handler; // move again to the next row
649  * std::cout << handler.numRow (); // displays 2
650  * handler += 4; // advances the pointer by 4 rows
651  * std::cout << handler.numRow (); // displays 6
652  *
653  * // get the DBRow pointed to by the handler: this is the 7th DBRow
654  * // of the database
655  * const auto& xrow7 = handler.row (); // get the DBRow, unsafe version
656  * const auto& yrow7 = handler.rowSafe (); // get the DBRow, safe version
657  * const std::vector<gum::learning::DBCell>& xrow = xrow7.row ();
658  * const double xweight = xrow27.weight ();
659  *
660  * // another way to access the row
661  * const auto& zrow7 = *handler; // get the DBRow, unsafe version
662  *
663  * // check whether there exist other rows managed by the handler after
664  * // the current row
665  * bool has_rows = handler.hasRows (); // true: there remains 33 rows
666  *
667  * // makes the handler point again on the 2nd row of the database
668  * handler.reset (); // the handler points to the beginning of its area
669  * std::cout << handler.numRow (); // displays 0: the handler currently
670  * // points on the first row of its managed area [1,40)
671  *
672  * // see the variables' names, i.e., the names of the database's columns
673  * const auto& vars = handler.variableNames();
674  *
675  * // parse all the rows managed
676  * handler.reset ();
677  * for ( auto end = handler.end (); handler != end; ++handler )
678  * std::cout << handler.row ().weight () << std::endl;
679  *
680  * // another possibility:
681  * for ( const auto& row : handler )
682  * std::cout << row.weight () << std::endl;
683  * @endcode
684  *
685  * @ingroup learning_database
686  */
687  class HandlerSafe: public Handler {
688  public:
689  /// Types for STL compliance.
690  /// @{
691  using iterator_category = std::random_access_iterator_tag;
692  using value_type = typename Handler::value_type;
693  using reference = value_type&;
694  using const_reference = const value_type&;
695  using pointer = value_type*;
696  using const_pointer = const value_type*;
697  using difference_type = std::ptrdiff_t;
698  using allocator_type = ALLOC< T_DATA >;
699  /// @}
700 
701  // ########################################################################
702  /// @name Constructors / Destructors
703  // ########################################################################
704  /// @{
705 
706  /// default constructor
707  /** @param db the database on which the handler will point to.
708  * By default, the range of the handler is the whole database. */
709  HandlerSafe(const IDatabaseTable< T_DATA, ALLOC >& db);
710 
711  /// copy constructor
712  HandlerSafe(const HandlerSafe& h);
713 
714  /// move constructor
716 
717  /// destructor
718  virtual ~HandlerSafe();
719 
720  /// @}
721 
722  // ########################################################################
723  /// @name Operators
724  // ########################################################################
725  /// @{
726 
727  /// copy operator
728  virtual HandlerSafe& operator=(const HandlerSafe&);
729 
730  /// copy operator
731  virtual HandlerSafe& operator=(const Handler&);
732 
733  /// move operator
734  virtual HandlerSafe& operator=(HandlerSafe&&);
735 
736  /// move operator
737  virtual HandlerSafe& operator=(Handler&&);
738 
739  /// @}
740 
741 
742 #ifndef DOXYGEN_SHOULD_SKIP_THIS
743 
744  private:
745  /// attach a new handler to the database
746  void _attachHandler_();
747 
748  /// detach a handler
749  void _detachHandler_();
750 
751  friend class IDatabaseTable< T_DATA, ALLOC >;
752 
753 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
754  };
755 
756 
757  /// Types for STL compliance.
758  /// @{
759  using value_type = Row< T_DATA >;
761  using const_reference = const value_type&;
762  using pointer = value_type*;
763  using const_pointer = const value_type*;
764  using size_type = std::size_t;
765  using difference_type = std::ptrdiff_t;
766  using iterator = Handler;
767  using iterator_safe = HandlerSafe;
768  using const_iterator = const Handler;
769  using const_iterator_safe = const HandlerSafe;
770  using allocator_type = ALLOC< T_DATA >;
771  /// @}
772 
773 
774  // ##########################################################################
775  /// @name Constructors / Destructors
776  // ##########################################################################
777  /// @{
778 
779  /// default constructor
780  template < template < typename > class VARALLOC, template < typename > class MISSALLOC >
781  IDatabaseTable(const MissingValType< MISSALLOC >& missing_symbols,
782  const std::vector< std::string, VARALLOC< std::string > >& var_names,
783  const ALLOC< T_DATA >& alloc);
784 
785  /// copy constructor
786  IDatabaseTable(const IDatabaseTable< T_DATA, ALLOC >& from);
787 
788  /// copy constructor with a given allocator
789  IDatabaseTable(const IDatabaseTable< T_DATA, ALLOC >& from, const allocator_type& alloc);
790 
791  /// move constructor
792  IDatabaseTable(IDatabaseTable< T_DATA, ALLOC >&& from);
793 
794  /// move constructor with a given allocator
795  IDatabaseTable(IDatabaseTable< T_DATA, ALLOC >&& from, const allocator_type& alloc);
796 
797  /// virtual copy constructor
798  virtual IDatabaseTable< T_DATA, ALLOC >* clone() const = 0;
799 
800  /// virtual copy constructor with a given allocator
801  virtual IDatabaseTable< T_DATA, ALLOC >* clone(const allocator_type& alloc) const = 0;
802 
803  /// destructor
804  virtual ~IDatabaseTable();
805 
806  /// @}
807 
808 
809  // ##########################################################################
810  /// @name Iterators
811  // ##########################################################################
812  /// @{
813 
814  /// returns a new unsafe handler pointing to the beginning of the database
815  iterator begin() const;
816 
817  /// returns a new safe handler pointing to the beginning of the database
818  iterator_safe beginSafe() const;
819 
820  /// returns a new unsafe handler pointing to the end of the database
821  const iterator& end() const noexcept;
822 
823  /// returns a new safe handler pointing to the end of the database
824  const iterator_safe& endSafe() const noexcept;
825 
826  /// @}
827 
828 
829  // ##########################################################################
830  /// @name Accessors / Modifiers
831  // ##########################################################################
832  /// @{
833 
834  /// returns the content (the records) of the database
835  const Matrix< T_DATA >& content() const noexcept;
836 
837  /// returns a new unsafe handler pointing to the 1st record of the database
838  iterator handler() const;
839 
840  /// returns a new safe handler pointing to the 1st record of the database
841  iterator_safe handlerSafe() const;
842 
843  /// returns the variable names for all the columns of the database
844  /** The names do not include the ignored columns. */
845  const DBVector< std::string >& variableNames() const noexcept;
846 
847  /// sets the names of the variables
848  /** This method can be called in two different ways: either the names
849  * correspond precisely to the columns stored into the database table
850  * (in this case, parameter from_external_object is equal to false),
851  * or they corresponds to the columns of an external database (e.g., a
852  * CSV file) from which we potentially excluded some columns and,
853  * consequently, the latter should not be taken into account (in this
854  * case, parameter from_external_object is equal to true). As an
855  * example, imagine that the database table is created from a CSV file
856  * with 5 columns named X0, X1, X2, X3 and X4 respectively. Suppose that
857  * we asked the database table to ignore columns X1 and X3. Then
858  * setVariableNames( { "X0", "X1", "X2", "X3", "X4" }, true ) will
859  * set the columns of the database table as { "X0", "X2", "X4" }. The
860  * same result could be obtained by executing
861  * setVariableNames( { "X0", "X2", "X4" }, false ), which specifies
862  * directly the set of names to retain in the database table.
863  * @param names the names of all the columns, including the ignored
864  * columns if from_external_object is set to true, else excluding
865  * them (i.e., this should precisely correspond to the columns stored
866  * into the database table).
867  * @param from_external_object a Boolean indicating whether parameter
868  * names includes the columns ignored by the database table (true) or
869  * not (false).
870  * @throw SizeError is raised if the names passed in arguments cannot be
871  * assigned to the columns of the IDatabaseTable because the size of their
872  * vector is inadequate. */
873  virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > >& names,
874  const bool from_external_object = true)
875  = 0;
876 
877  /// sets the names of the variables
878  /** This method can be called in two different ways: either the names
879  * correspond precisely to the columns stored into the database table
880  * (in this case, parameter from_external_object is equal to false),
881  * or they corresponds to the columns of an external database (e.g., a
882  * CSV file) from which we potentially excluded some columns and,
883  * consequently, the latter should not be taken into account (in this
884  * case, parameter from_external_object is equal to true). As an
885  * example, imagine that the database table is created from a CSV file
886  * with 5 columns named X0, X1, X2, X3 and X4 respectively. Suppose that
887  * we asked the database table to ignore columns X1 and X3. Then
888  * setVariableNames( { "X0", "X1", "X2", "X3", "X4" }, true ) will
889  * set the columns of the database table as { "X0", "X2", "X4" }. The
890  * same result could be obtained by executing
891  * setVariableNames( { "X0", "X2", "X4" }, false ), which specifies
892  * directly the set of names to retain in the database table.
893  * @param names the names of all the columns, including the ignored
894  * columns if from_external_object is set to true, else excluding
895  * them (i.e., this should precisely correspond to the columns stored
896  * into the database table).
897  * @param from_external_object a Boolean indicating whether parameter
898  * names includes the columns ignored by the database table (true) or
899  * not (false).
900  * @throw SizeError is raised if the names passed in arguments cannot be
901  * assigned to the columns of the IDatabaseTable because the size of their
902  * vector is inadequate. */
903  template < template < typename > class OTHER_ALLOC >
904  void setVariableNames(const std::vector< std::string, OTHER_ALLOC< std::string > >& names,
905  const bool from_external_object = true);
906 
907  /// returns the name of the kth column of the IDatabaseTable
908  /** @throw OutOfBounds is raised if the IDatabaseTable contains fewer
909  * than k columns. */
910  const std::string& variableName(const std::size_t k) const;
911 
912  /// returns the index of the column whose name is passed in argument
913  /** @warning If several columns correspond to the name, only the
914  * column with the lowest index is returned. If you wish to retrieve all
915  * the columns, use method columnsFromVariableName
916  * @throw UndefinedElement is raised if there exists no column with
917  * the given name*/
918  std::size_t columnFromVariableName(const std::string& name) const;
919 
920  /// returns the indices of all the columns whose name is passed in argument
921  /** It may happen that several columns correspond to a given variable
922  * name. In this case, the function returns the indices of all the
923  * columns of the IDatabase that match the name. */
925 
926  /// returns the number of variables (columns) of the database
927  std::size_t nbVariables() const noexcept;
928 
929  /// returns the number of records (rows) in the database
930  std::size_t nbRows() const noexcept;
931 
932  /// returns the number of records (rows) in the database
933  std::size_t size() const noexcept;
934 
935  /// indicates whether the database contains some records or not
936  bool empty() const noexcept;
937 
938  /// makes the database table ignore from now on the kth column
939  /** This method can be called in two different ways: either k refers to
940  * the current kth column of the database table (in this case, parameter
941  * from_external_object is set to false), or k corresponds to the kth
942  * column of an original dataset used to fill the database table
943  * (in this case from_external_object is set to true). Depending on
944  * from_external_object's value, the ignored columns may differ. As an
945  * example, imagine that the database table is created from a CSV file
946  * with 5 columns named X0, X1, X2, X3 and X4 respectivly. Then a call to
947  * ignoreColumn ( 1, true ) will exclude column X1 from the database table.
948  * As a result, the database table columns are X0, X2, X3 and X4.
949  * Therefore, subsequently calling ignoreColumn ( 1, false ) will result
950  * in excluding X2 since X2 is the 2nd column (columns are indexed
951  * starting from 0). So, now the database table's columns are
952  * X0, X3 and X4. If, now, we call ignoreColumn ( 3, true ), this will
953  * remove column X3 because, in the original database, X3 was the 4th
954  * column.
955  *
956  * @warning If the database table was not empty, then the kth column is
957  * removed from all the rows currently stored.
958  * @warning If the kth column does not exist (i.e., the original dataset
959  * does not contain the kth column when from_external_object is set to
960  * true, or the IDatabaseTable has no kth column when from_external_object
961  * is set to false), column k is marked as to be ignored and nothing is
962  * done on the content of the IDatabaseTable. No exception is raised.
963  * @param k the column to remove. See the above detailed description on
964  * how k is computed.
965  * @param from_external_object indicates whether k refers to the kth
966  * column of an original external database or to the current kth column
967  * of the database table. */
968  virtual void ignoreColumn(const std::size_t k, const bool from_external_object = true) = 0;
969 
970  /// returns the set of columns of the original dataset that are ignored
971  virtual const DBVector< std::size_t > ignoredColumns() const = 0;
972 
973  /** @brief returns the set of columns of the original dataset that are
974  * present in the IDatabaseTable */
975  virtual const DBVector< std::size_t > inputColumns() const = 0;
976 
978 
979  /// insert a new row at the end of the database
980  /** The new_row passed in argument is supposed to come from an external
981  * database. So it must contain data for the ignored columns.
982  * @throw SizeError is raised if the vector of string cannot be inserted
983  * in the IDatabaseTable because its size does not allow a matching with the
984  * columns of the IDatabaseTable (taking into account the ignored columns) */
985  template < template < typename > class OTHER_ALLOC >
986  void insertRow(const std::vector< std::string, OTHER_ALLOC< std::string > >& new_row);
987 
988  /// insert a new DBRow at the end of the database
989  /** Unlike methods insertRow for data whose type is different from T_DATA,
990  * this method assumes that the new row passed in argument does not contain
991  * any data of the ignored columns. So, basically, it could be copied
992  * as is into the database table.
993  * @throw SizeError is raised if the size of the new_row is not equal to
994  * the number of columns retained in the IDatabaseTable */
995  virtual void insertRow(Row< T_DATA >&& new_row, const IsMissing contains_missing_data);
996 
997  /// insert a new row at the end of the database
998  /** Unlike methods insertRow for data whose type is different from T_DATA,
999  * this method assumes that the new row passed in argument does not contain
1000  * any data of the ignored columns. So, basically, it could be copied
1001  * as is into the database table.
1002  * @throw SizeError is raised if the size of the new_row is not equal to
1003  * the number of columns retained in the IDatabaseTable */
1004  virtual void insertRow(const Row< T_DATA >& new_row, const IsMissing contains_missing_data);
1005 
1008 
1009  /// insert a set of new DBRows at the end of the database
1010  /** Unlike methods insertRows for data whose type is different from T_DATA,
1011  * this method assumes that the new rows passed in argument do not contain
1012  * any data of the ignored columns. So, basically, these rows could be
1013  * copied as is into the database table.
1014  * @param new_rows the new set of rows to be copied as is
1015  * @param rows_have_missing_vals a vector of the same size as new_rows
1016  * that indicates, for each new row, whether it contains some missing
1017  * value or not
1018  * @throw SizeError is raised if the size of at least one row in new_rows
1019  * is not equal to the number of columns retained in the IDatabaseTable.
1020  * A SizeError exception will also be raised if the number of new rows
1021  * is not identical to the size of vector rows_have_missing_vals. */
1022  virtual void insertRows(Matrix< T_DATA >&& new_rows,
1023  const DBVector< IsMissing >& rows_have_missing_vals);
1024 
1025  /// insert a set of new DBRows at the end of the database
1026  /** Unlike methods insertRows for data whose type is different from T_DATA,
1027  * this method assumes that the new rows passed in argument do not contain
1028  * any data of the ignored columns. So, basically, these rows could be
1029  * copied as is into the database table.
1030  * @param new_rows the new set of rows to be copied as is
1031  * @param rows_have_missing_vals a vector of the same size as new_rows
1032  * that indicates, for each new row, whether it contains some missing
1033  * value or not
1034  * @throw SizeError is raised if the size of at least one row in new_rows
1035  * is not equal to the number of columns retained in the IDatabaseTable.
1036  * A SizeError exception will also be raised if the number of new rows
1037  * is not identical to the size of vector rows_have_missing_vals. */
1038  virtual void insertRows(const Matrix< T_DATA >& new_rows,
1039  const DBVector< IsMissing >& rows_have_missing_vals);
1040 
1041  /// erase a given row specified by its index in the table
1042  /** In the database, rows are indexed, starting from 0.
1043  * @warning If the row does not exist, nothing is done. In particular,
1044  * no exception is raised. */
1045  void eraseRow(std::size_t index);
1046 
1047  /// erase the first row
1048  /** @warning if the row does not exist, nothing is done. In particular, no
1049  * exception is raised. */
1050  void eraseFirstRow();
1051 
1052  /// erase the last row
1053  /** @warning if the row does not exist, nothing is done. In particular, no
1054  * exception is raised. */
1055  void eraseLastRow();
1056 
1057  /// erase the k first rows
1058  /** @warning if there are fewer than k rows in the database, the database is
1059  * completely emptied */
1060  void eraseFirstRows(const std::size_t k);
1061 
1062  /// erase the k last rows
1063  /** @warning if there are fewer than k rows in the database, the database is
1064  * completely emptied */
1065  void eraseLastRows(const std::size_t k);
1066 
1067  /// erase the rows from the debth to the endth (not included)
1068  /** In the database, rows are indexed, starting from 0. */
1069  void eraseRows(std::size_t deb, std::size_t end);
1070 
1071  /// erase all the rows
1072  void eraseAllRows();
1073 
1074  /// erase the content of the database, including the names of the variables
1075  virtual void clear();
1076 
1077  /// returns the allocator of the database
1078  ALLOC< T_DATA > getAllocator() const;
1079 
1080  /// returns the set of missing symbols
1081  const DBVector< std::string >& missingSymbols() const;
1082 
1083  /// indicates whether the database contains some missing values
1084  bool hasMissingValues() const;
1085 
1086  /// indicates whether the kth row contains some missing values
1087  bool hasMissingValues(const std::size_t k) const;
1088 
1089  /// changes the max number of threads that a database can use
1090  /** Within databases, some methods can be processed in a parallel fashion.
1091  * This methods indicates the maximum number of threads that can be run
1092  * in parallel at the same time. */
1093  void setMaxNbThreads(const std::size_t nb) const;
1094 
1095  /// returns the number of threads used to parse the database
1096  std::size_t nbThreads() const;
1097 
1098  /** @brief changes the number min of rows a thread should process in a
1099  * multithreading context
1100  *
1101  * When a method executes several threads to perform actions on the rows
1102  * of the database, the MinNbRowsPerThread indicates how many rows each
1103  * thread should at least process. This is used to compute the number of
1104  * threads actually run. This number is equal to the min between the max
1105  * number of threads allowed and the number of records in the database
1106  * divided by nb. */
1107  void setMinNbRowsPerThread(const std::size_t nb) const;
1108 
1109  /// returns the minimum of rows that each thread should process
1110  std::size_t minNbRowsPerThread() const;
1111 
1112  /// assign a given weight to all the rows of the database
1113  void setAllRowsWeight(const double new_weight);
1114 
1115  /// assigns a given weight to the ith row of the database
1116  /** @throws OutOfBounds if i is outside the set of indices of the
1117  * records or if the weight is negative */
1118  void setWeight(const std::size_t i, const double weight);
1119 
1120  /// returns the weight of the ith record
1121  /** @throws OutOfBounds if i is outside the set of indices of the
1122  * records */
1123  double weight(const std::size_t i) const;
1124 
1125  /// returns the weight of the whole database
1126  double weight() const;
1127 
1128  /// @}
1129 
1130 
1131  protected:
1132  /// the names of the variables for each column
1134 
1135  // the vector of DBRows containing all the raw data
1137 
1138  // the set of string corresponding to missing values
1140 
1141  // a vector indicating which rows have missing values (char != 0)
1143 
1144  // the maximal number of threads that the database can use
1146 
1147  // the min number of rows that a thread should process in a
1148  // multithreading context
1149  mutable std::size_t min_nb_rows_per_thread_{100};
1150 
1151 
1152  /** @brief checks whether a size corresponds to the number of columns
1153  * of the database */
1154  bool isRowSizeOK_(const std::size_t size) const;
1155 
1156  /// copy operator
1158 
1159  /// move operator
1161 
1162 
1163 #ifndef DOXYGEN_SHOULD_SKIP_THIS
1164 
1165  private:
1166  // the list of handlers currently attached to the database
1167  /* this is useful when the database is resized */
1168  mutable DBVector< HandlerSafe* > _list_of_safe_handlers_;
1169 
1170  // a mutex to safely add/remove handlers in _list_of_safe_handlers_
1171  mutable std::mutex _safe_handlers_mutex_;
1172 
1173  // the end iterator for the database
1174  Handler* _end_{nullptr};
1175 
1176  // the safe end iterator for the database
1177  iterator_safe* _end_safe_{nullptr};
1178 
1179  /// add a new safe handler to the list of attached handlers
1180  void _attachHandler_(HandlerSafe* handler) const;
1181 
1182  /// detach a safe handler from the list of attached handlers
1183  void _detachHandler_(HandlerSafe* handler) const;
1184 
1185  /// update the handlers when the size of the database changes
1186  void _updateHandlers_(std::size_t new_size) const;
1187 
1188  // create the end iterators
1189  void _createEndIterators_();
1190 
1191 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
1192 
1193 
1194  /// allow the handlers to access the database directly
1195  friend class Handler;
1196  friend class HandlerSafe;
1197  };
1198 
1199  } /* namespace learning */
1200 
1201 } /* namespace gum */
1202 
1203 /// always include the templated implementations
1204 #include <agrum/tools/database/IDatabaseTable_tpl.h>
1205 
1206 #endif /* GUM_IDATABASE_TABLE_H */
virtual void insertRow(const Row< T_DATA > &new_row, const IsMissing contains_missing_data)
insert a new row at the end of the database
virtual HandlerSafe & operator=(const Handler &)
copy operator
virtual Handler & operator--() final
makes the operator point to the previous row in the database
HandlerSafe(const IDatabaseTable< T_DATA, ALLOC > &db)
default constructor
void eraseFirstRows(const std::size_t k)
erase the k first rows
void insertRow(const std::vector< std::string, OTHER_ALLOC< std::string > > &new_row)
insert a new row at the end of the database
virtual const_reference row() const final
returns the current row pointed to by the handler (unsafe version)
virtual bool hasRows() const final
indicates whether the handler has reached its end or not
IDatabaseTable(IDatabaseTable< T_DATA, ALLOC > &&from)
move constructor
virtual reference rowSafe() final
returns the current row pointed to by the handler (safe version)
void setMaxNbThreads(const std::size_t nb) const
changes the max number of threads that a database can use
virtual IDatabaseTable< T_DATA, ALLOC > * clone(const allocator_type &alloc) const =0
virtual copy constructor with a given allocator
std::size_t columnFromVariableName(const std::string &name) const
returns the index of the column whose name is passed in argument
virtual std::size_t DBSize() const final
returns the number of rows of the whole database
ALLOC< T_DATA > getAllocator() const
returns the allocator of the database
DBVector< std::string > variable_names_
the names of the variables for each column
DBVector< std::size_t > columnsFromVariableName(const std::string &name) const
returns the indices of all the columns whose name is passed in argument
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
void eraseRow(std::size_t index)
erase a given row specified by its index in the table
std::size_t nbThreads() const
returns the number of threads used to parse the database
virtual void insertRows(const Matrix< T_DATA > &new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
insert a set of new DBRows at the end of the database
the (unsafe) handler for the tabular databases
std::size_t size() const noexcept
returns the number of records (rows) in the database
virtual const_pointer operator->() const final
Dereferences the value pointed to by the handler (unsafe version)
virtual reference row() final
returns the current row pointed to by the handler (unsafe version)
IDatabaseTable< T_DATA, ALLOC > & operator=(const IDatabaseTable< T_DATA, ALLOC > &from)
copy operator
virtual void setRange(std::size_t first, std::size_t last) final
sets the area in the database the handler will handle
virtual const_reference operator*() const final
returns the current row pointed to by the handler (unsafe version)
void setMinNbRowsPerThread(const std::size_t nb) const
changes the number min of rows a thread should process in a multithreading context ...
const DBVector< std::string > & missingSymbols() const
returns the set of missing symbols
void eraseRows(std::size_t deb, std::size_t end)
erase the rows from the debth to the endth (not included)
virtual Handler begin() const
returns a new handler that points to the beginning of the database&#39;s area of the current handler ...
virtual std::size_t nbVariables() const final
returns the number of variables (columns) of the database
void setVariableNames(const std::vector< std::string, OTHER_ALLOC< std::string > > &names, const bool from_external_object=true)
sets the names of the variables
virtual Handler & operator-=(const std::size_t i) final
moves back the handler by i rows in the database
void eraseLastRow()
erase the last row
virtual const DBVector< std::size_t > ignoredColumns() const =0
returns the set of columns of the original dataset that are ignored
void eraseAllRows()
erase all the rows
Handler(Handler &&h)
move constructor
Handler(const Handler &h)
copy constructor
IDatabaseTable< T_DATA, ALLOC > & operator=(IDatabaseTable< T_DATA, ALLOC > &&from)
move operator
iterator_safe beginSafe() const
returns a new safe handler pointing to the beginning of the database
bool isRowSizeOK_(const std::size_t size) const
checks whether a size corresponds to the number of columns of the database
virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true)=0
sets the names of the variables
IDatabaseTable(const IDatabaseTable< T_DATA, ALLOC > &from)
copy constructor
virtual HandlerSafe & operator=(HandlerSafe &&)
move operator
void eraseLastRows(const std::size_t k)
erase the k last rows
const Matrix< T_DATA > & content() const noexcept
returns the content (the records) of the database
DBVector< IsMissing > has_row_missing_val_
virtual Handler end() const
returns a new handler that points to the end of the database&#39;s area of the current handler ...
std::size_t nbVariables() const noexcept
returns the number of variables (columns) of the database
virtual bool operator==(const Handler &handler) const final
checks whether two handlers point to the same row in the database
virtual HandlerSafe & operator=(Handler &&)
move operator
DBVector< std::string > missing_symbols_
friend class Handler
allow the handlers to access the database directly
IDatabaseTable(const MissingValType< MISSALLOC > &missing_symbols, const std::vector< std::string, VARALLOC< std::string > > &var_names, const ALLOC< T_DATA > &alloc)
default constructor
virtual void nextRow() final
makes the handler point to the next row, equivalent to operator++
virtual std::size_t numRow() const final
the number of the current row (0 = the 1st row managed by the handler)
const iterator_safe & endSafe() const noexcept
returns a new safe handler pointing to the end of the database
virtual bool operator!=(const Handler &handler) const final
checks whether two handlers point to different rows in the database
bool hasMissingValues() const
indicates whether the database contains some missing values
bool empty() const noexcept
indicates whether the database contains some records or not
double weight() const
returns the weight of the whole database
const iterator & end() const noexcept
returns a new unsafe handler pointing to the end of the database
virtual const_reference rowSafe() const final
returns the current row pointed to by the handler (safe version)
virtual IDatabaseTable< T_DATA, ALLOC > * clone() const =0
virtual copy constructor
std::size_t nbRows() const noexcept
returns the number of records (rows) in the database
virtual const IDatabaseTable< T_DATA, ALLOC > & database() const
returns a pointer on the database
virtual Handler & operator=(Handler &&)
move operator
virtual HandlerSafe & operator=(const HandlerSafe &)
copy operator
the safe handler of the tabular databases
virtual Handler & operator+=(const std::size_t i) final
advances the handler by i rows in the database
double weight(const std::size_t i) const
returns the weight of the ith record
virtual void insertRows(Matrix< T_DATA > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
insert a set of new DBRows at the end of the database
virtual void insertRow(Row< T_DATA > &&new_row, const IsMissing contains_missing_data)
insert a new DBRow at the end of the database
Handler(const IDatabaseTable< T_DATA, ALLOC > &db)
default constructor
IDatabaseTable(IDatabaseTable< T_DATA, ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator
virtual Handler & operator=(const Handler &)
copy operator
const std::string & variableName(const std::size_t k) const
returns the name of the kth column of the IDatabaseTable
iterator_safe handlerSafe() const
returns a new safe handler pointing to the 1st record of the database
HandlerSafe(HandlerSafe &&h)
move constructor
virtual const DBVector< std::string > & variableNames() const final
returns the names of the variables
iterator begin() const
returns a new unsafe handler pointing to the beginning of the database
void setAllRowsWeight(const double new_weight)
assign a given weight to all the rows of the database
virtual const DBVector< std::size_t > inputColumns() const =0
returns the set of columns of the original dataset that are present in the IDatabaseTable ...
bool hasMissingValues(const std::size_t k) const
indicates whether the kth row contains some missing values
virtual std::size_t size() const final
returns the number of rows managed by the handler
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
std::size_t minNbRowsPerThread() const
returns the minimum of rows that each thread should process
HandlerSafe(const HandlerSafe &h)
copy constructor
virtual ~IDatabaseTable()
destructor
const DBVector< std::string > & variableNames() const noexcept
returns the variable names for all the columns of the database
virtual std::pair< std::size_t, std::size_t > range() const final
returns the current range of the handler [begin,end)
virtual void ignoreColumn(const std::size_t k, const bool from_external_object=true)=0
makes the database table ignore from now on the kth column
IDatabaseTable(const IDatabaseTable< T_DATA, ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
virtual Handler & operator++() final
makes the operator point to the next row in the database
iterator handler() const
returns a new unsafe handler pointing to the 1st record of the database
void eraseFirstRow()
erase the first row
virtual void clear()
erase the content of the database, including the names of the variables
virtual void reset() final
puts the handler to the beginning of the database&#39;s area it handles
void setWeight(const std::size_t i, const double weight)
assigns a given weight to the ith row of the database