aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
IDatabaseTable.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The common class for the tabular database tables
24  *
25  * IDatabases are not intended to be created as is but should be created through
26  * the RawDatabaseTable and DatabaseTable classes.
27  *
28  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
29  */
30 #ifndef GUM_IDATABASE_TABLE_H
31 #define GUM_IDATABASE_TABLE_H
32 
33 #include <cstddef>
34 #include <utility>
35 #include <string>
36 #include <cstring>
37 #include <memory>
38 #include <vector>
39 #include <mutex>
40 
41 #include <agrum/agrum.h>
42 #include <agrum/tools/core/thread.h>
43 #include <agrum/tools/core/OMPThreads.h>
44 #include <agrum/tools/database/DBCell.h>
45 #include <agrum/tools/database/DBRow.h>
46 #include <agrum/tools/database/DBHandler.h>
47 #include <agrum/tools/database/DBTranslator.h>
48 
49 
50 namespace gum {
51 
52  namespace learning {
53 
54  template < template < typename > class ALLOC, bool ENABLE_INSERT >
55  struct IDatabaseTableInsert4DBCell;
56 
57  template < template < typename > class ALLOC >
59  template < typename TX_DATA >
61 
62  template < typename TX_DATA >
63  using Row = DBRow< TX_DATA, ALLOC >;
64 
65  template < typename TX_DATA >
66  using Matrix = std::vector< DBRow< TX_DATA, ALLOC >,
68 
69 
70  /// insert a new DBRow at the end of the database
71  /** The new row passed in argument is supposed to come from an external
72  * database. So it must contain data for the ignored columns. */
73  virtual void insertRow(Row< DBCell >&& new_row) = 0;
74 
75  /// insert a new row at the end of the database
76  /** The new row passed in argument is supposed to come from an external
77  * database. So it must contain data for the ignored columns. */
78  virtual void insertRow(const Row< DBCell >& new_row) = 0;
79 
80  /// insert a set of new DBRows at the end of the database
81  /** The new rows passed in argument are supposed to come from an external
82  * database. So they must contain data for the ignored columns. */
83  virtual void insertRows(Matrix< DBCell >&& new_rows) = 0;
84 
85  /// insert a set of new DBRows at the end of the database
86  /** The new rows passed in argument are supposed to come from an external
87  * database. So they must contain data for the ignored columns. */
88  virtual void insertRows(const Matrix< DBCell >& new_rows) = 0;
89 
90  /// insert a new row at the end of the database
91  /** The new row passed in argument is supposed to come from an external
92  * database. So it must contain data for the ignored columns. */
93  virtual void
94  insertRow(const std::vector< std::string, ALLOC< std::string > >& new_row)
95  = 0;
96 
97  /// insert new rows at the end of the database
98  /** The new rows passed in argument are supposed to come from an external
99  * database. So they must contain data for the ignored columns. */
100  virtual void insertRows(const DBVector< DBVector< std::string > >& new_rows);
101  };
102 
103 
104  template < template < typename > class ALLOC >
106  template < typename TX_DATA >
108 
109  template < typename TX_DATA >
110  using Row = DBRow< TX_DATA, ALLOC >;
111 
112  template < typename TX_DATA >
113  using Matrix = std::vector< DBRow< TX_DATA, ALLOC >,
115 
116  /// insert a new row at the end of the database
117  /** The new row passed in argument is supposed to come from an external
118  * database. So it must contain data for the ignored columns. */
119  virtual void
120  insertRow(const std::vector< std::string, ALLOC< std::string > >& new_row)
121  = 0;
122 
123  /// insert new rows at the end of the database
124  /** The new rows passed in argument are supposed to come from an external
125  * database. So they must contain data for the ignored columns. */
126  virtual void insertRows(const DBVector< DBVector< std::string > >& new_rows);
127  };
128 
129 
130  /** @class IDatabaseTable
131  * @headerfile IDatabaseTable.h <agrum/BN/learning/IDatabaseTable.h>
132  * @brief The common class for the tabular database tables
133  *
134  * IDatabases are not intended to be created as is but should be created
135  * through the RawDatabaseTable and DatabaseTable classes. They represent
136  * the structures shared by these latter classes.
137  *
138  * Here is an example of how to use the class, illustrated with the
139  * DatabaseTable class (in this case, the T_DATA type is just equal to
140  * DBTranslatedValue):
141  * @code
142  * // create the database from a CSV. This is not compulsory for
143  * // IDatabaseTable instances, but this is how we usually create
144  * // DatabaseTable instances
145  * gum::learning::DBInitializerFromCSV<> initializer ( "asia.csv" );
146  * const auto& var_names = initializer.variableNames ();
147  * gum::learning::DBTranslatorSet<> translator_set;
148  * gum::learning::DBTranslator4LabelizedVariable<> translator;
149  * for ( std::size_t i = 0; i < var_names.size(); ++i )
150  * translator_set.insertTranslator ( translator, i );
151  * gum::learning::DatabaseTable<> database ( translator_set );
152  * database.setVariableNames( initializer.variableNames () );
153  *
154  * // here, database contains the content of the asia.csv file.
155  * // determine how many columns and rows the database contains
156  * std::size_t nb_rows = database.nbRows();
157  * std::size_t nb_cols = database.nbVariables ();
158  *
159  * // manually add a new row into the database
160  * std::vector<std::string> row( 8, "toto" ); // asia has 8 columns
161  * database.insertRow ( row );
162  * gum::learning::DBRow<gum::learning::DBTranslatedValue>
163  * dbrow ( 8, gum::learning::DBTranslatedValue { std::size_t(0) } );
164  * database.insertRow ( dbrow );
165  * // insert 4 rows in a single call
166  * database.insertRows(
167  * std::vector<gum::learning::DBRow<gum::learning::DBTranslatedValue>>
168  * ( 4, dbrow ) );
169  *
170  * // erase some rows
171  * database.eraseRow ( 12 ); // erase the 13th row of the database
172  * database.eraseFirstRow (); // erase the first row of the database
173  * database.eraseLastRow (); // erase the last row of the database
174  * database.eraseFirstRows ( 2 ); // erase the first two rows
175  * database.eraseLastRows ( 3 ); // erase the last three rows
176  * database.eraseRows ( 2,4 ); // erase rows indexed from 2 to 4 (excluded)
177  *
178  * // parse the content of the database, the usual way
179  * for ( const auto& dbrow : database )
180  * std::cout << dbrow.row() << " weight: " << dbrow.weight() << std::endl;
181  *
182  * // ignore some columns of the database, i.e., remove them
183  * database.ignoreColumn ( 3 ); // remove the column X3 of the CSV file
184  * // now, the database contains columns 0, 1, 2, 4, 5, 6, 7 of the
185  * // CSV file. If we wish to remove Column X5 of the CSV file:
186  * database.ignoreColumn ( 5 ); // remove the column X5 of the CSV file
187  * // now, the database contains columns 0, 1, 2, 4, 6, 7 of the CSV file.
188  * // if we wish to remove the 5th column of the IDatabaseTable, i.e.,
189  * // column #4 of the CSV, either we determine that this actually correspond
190  * // to column X6 of the CSV and we use database.ignoreColumn ( 6 ) or
191  * // we call:
192  * database.ignoreColumn ( 4, false ); // false => 4 = the 5th column of
193  * // the IDatabaseTable, not the 5th column/variable of the CSV file
194  * // (remember that all column numbers start from 0).
195  *
196  * // display the columns of the CSV that were ignored and those that
197  * // were kept:
198  * std::vector<std::size_t> ignored_cols = database.ignoredColumns ();
199  * std::vector<std::size_t> kept_cols = database.inputColumns ();
200  *
201  * // parse the content of the database using handlers
202  * typename gum::learning::DatabaseTable<>::HandlerSafe handler( database );
203  * typename gum::learning::DatabaseTable<>::Handler uhandler( database );
204  * // by default, the handlers range over the whole database
205  *
206  * // change the range of rows handled by the DBHandler
207  * std::cout << handler.setRange ( 1, 40 ); // now parses rows [1,40)
208  * std::cout << handler.size (); // displays 39: rows 1,...,39
209  * std::cout << handler.DBSize (); // shows the number of rows in the database
210  * std::cout << handler.numRow (); // displays 0: the handler currently
211  * // points on the first row of its managed area [1,40)
212  *
213  * // move the handler to the next row
214  * handler.nextRow();
215  * std::cout << handler.numRow (); // displays 1: the handler points now
216  * // on the second row of its managed area. This corresponds to the third
217  * // DBRow of the database since the range of handler is [1,40)
218  * ++handler; // move again to the next row
219  * std::cout << handler.numRow (); // displays 2
220  * handler += 4; // advances the pointer by 4 rows
221  * std::cout << handler.numRow (); // displays 6
222  *
223  * // get the DBRow pointed to by the handler: this is the 7th DBRow
224  * // of the database
225  * const auto& xrow7 = handler.row (); // get the DBRow, unsafe version
226  * const auto& yrow7 = handler.rowSafe (); // get the DBRow, safe version
227  * const std::vector<gum::learning::DBCell>& xrow = xrow7.row ();
228  * const double xweight = xrow27.weight ();
229  *
230  * // another way to access the row
231  * const auto& zrow7 = *handler; // get the DBRow, unsafe version
232  *
233  * // check whether there exist other rows managed by the handler after
234  * // the current row
235  * bool has_rows = handler.hasRows (); // true: there remains 33 rows
236  *
237  * // makes the handler point again on the 2nd row of the database
238  * handler.reset (); // the handler points to the beginning of its area
239  * std::cout << handler.numRow (); // displays 0: the handler currently
240  * // points on the first row of its managed area [1,40)
241  *
242  * // see the variables' names, i.e., the names of the database's columns
243  * const auto& vars = handler.variableNames();
244  *
245  * // parse all the rows managed
246  * handler.reset ();
247  * for ( auto end = handler.end (); handler != end; ++handler )
248  * std::cout << handler.row ().weight () << std::endl;
249  *
250  * // another possibility:
251  * for ( const auto& row : handler )
252  * std::cout << row.weight () << std::endl;
253  * @endcode
254  * @ingroup learning_database
255  */
256  template < typename T_DATA,
257  template < typename > class ALLOC = std::allocator >
260  ALLOC,
261  !std::is_same< T_DATA, DBCell >::value >,
262  private ALLOC< T_DATA > {
263  public:
264  /// the type for the vectors used in the IDatabaseTable
265  template < typename TX_DATA >
266  using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
267 
268  /// a row of the database
269  template < typename TX_DATA >
270  using Row = DBRow< TX_DATA, ALLOC >;
271 
272  /// the type for the matrices stored into the database
273  template < typename TX_DATA >
274  using Matrix = std::vector< DBRow< TX_DATA, ALLOC >,
276 
277  template < template < typename > class XALLOC >
278  using MissingValType = std::vector< std::string, XALLOC< std::string > >;
279 
280 
281  enum IsMissing : char
282  {
285  };
286 
287 
288  /** @class Handler
289  * @headerfile IDatabaseTable.h <agrum/BN/learning/IDatabaseTable.h>
290  * @brief the (unsafe) handler for the tabular databases
291  *
292  * The IDatabaseTable class is provided with two types of handlers: unsafe
293  * handlers and safe ones. Compared to the former, the safe handlers
294  * incur a small overhead during their creation. But safe handlers
295  * are informed by their associated database when the structure of
296  * this one changes, i.e., when the number of rows/columns changes or
297  * when rows are added/removed, whereas unsafe handlers are not aware
298  * of such changes. For databases that are not affected by this kind of
299  * change, unsafe handlers should be used instead of safe ones because
300  * they are slightly faster. Both types of handlers are designed to be
301  * created in parallel by several threads.
302  *
303  * Here is an example of how to use this class, illustrated on handlers
304  * for a RawDatabaseTable:
305  * @code
306  * // create the database
307  * gum::learning::RawDatabaseTable<> database;
308  * database.setVariableNames( std::vector<std::string> {"v1","v2","v3"} );
309  *
310  * // add one row to the database
311  * gum::learning::DBRow<gum::learning::DBCell>
312  * row( 3, gum::learning::DBCell(2) );
313  * database.insertRow( row );
314  *
315  * // create a handler.
316  * typename gum::learning::RawDatabaseTable<>::Handler handler( database );
317  * // by default, the handlers range over the whole database, which
318  * // currently contains only one row
319  *
320  * // here, we add 95 new rows into the database
321  * for ( int i = 0; i < 95; ++i ) database.insertRow( row );
322  *
323  * // due to the addition of the rows, the (unsafe) handler still thinks
324  * // there is only one row
325  * std::cout << handler.size (); // displays 1 (handler's range)
326  * std::cout << handler.DBSize (); // displays 96 (database's size)
327  *
328  * // change the range of rows handled by the DBHandler
329  * std::cout << handler.setRange ( 1, 40 ); // now parses rows [1,40)
330  * std::cout << handler.size (); // displays 39: rows 1,...,39
331  * std::cout << handler.DBSize (); // displays 96: database's size
332  * std::cout << handler.numRow (); // displays 0: the handler currently
333  * // points on the first row of its managed area [1,40)
334  *
335  * // move the handler to the next row
336  * handler.nextRow();
337  * std::cout << handler.numRow (); // displays 1: the handler points now
338  * // on the second row of its managed area. This corresponds to the third
339  * // DBRow of the database since the range of handler is [1,40)
340  * ++handler; // move again to the next row
341  * std::cout << handler.numRow (); // displays 2
342  * handler += 4; // advances the pointer by 4 rows
343  * std::cout << handler.numRow (); // displays 6
344  *
345  * // get the DBRow pointed to by the handler: this is the 7th DBRow
346  * // of the database
347  * const auto& xrow7 = handler.row (); // get the DBRow, unsafe version
348  * const auto& yrow7 = handler.rowSafe (); // get the DBRow, safe version
349  * const std::vector<gum::learning::DBCell>& xrow = xrow7.row ();
350  * const double xweight = xrow27.weight ();
351  *
352  * // another way to access the row
353  * const auto& zrow7 = *handler; // get the DBRow, unsafe version
354  *
355  * // check whether there exist other rows managed by the handler after
356  * // the current row
357  * bool has_rows = handler.hasRows (); // true: there remains 33 rows
358  *
359  * // makes the handler point again on the 2nd row of the database
360  * handler.reset (); // the handler points to the beginning of its area
361  * std::cout << handler.numRow (); // displays 0: the handler currently
362  * // points on the first row of its managed area [1,40)
363  *
364  * // see the variables' names, i.e., the names of the database's columns
365  * const auto& vars = handler.variableNames();
366  *
367  * // parse all the rows managed
368  * handler.reset ();
369  * for ( auto end = handler.end (); handler != end; ++handler )
370  * std::cout << handler.row ().weight () << std::endl;
371  *
372  * // another possibility:
373  * for ( const auto& row : handler )
374  * std::cout << row.weight () << std::endl;
375  * @endcode
376  *
377  * @ingroup learning_database
378  */
379  class Handler: public DBHandler< T_DATA, ALLOC > {
380  public:
381  /// Types for STL compliance.
382  /// @{
383  using iterator_category = std::random_access_iterator_tag;
384  using value_type = typename DBHandler< T_DATA, ALLOC >::value_type;
386  using const_reference = const value_type&;
387  using pointer = value_type*;
388  using const_pointer = const value_type*;
389  using difference_type = std::ptrdiff_t;
390  using allocator_type = ALLOC< T_DATA >;
391  /// @}
392 
393 
394  template < typename TX_DATA >
395  using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
396 
397  template < typename TX_DATA >
398  using Row = DBRow< TX_DATA, ALLOC >;
399 
400  template < typename TX_DATA >
401  using Matrix = std::vector< DBRow< TX_DATA, ALLOC >,
403 
404 
405  // ########################################################################
406  /// @name Constructors / Destructors
407  // ########################################################################
408  /// @{
409 
410  /// default constructor
411  /** @param db the database on which the handler will point to.
412  * By default, the range of the handler is the whole database. */
413  Handler(const IDatabaseTable< T_DATA, ALLOC >& db);
414 
415  /// copy constructor
416  /** @param h the handler we wish to copy */
417  Handler(const Handler& h);
418 
419  /// move constructor
420  /** @param h the handler we wish to move */
421  Handler(Handler&& h);
422 
423  /// destructor
424  virtual ~Handler();
425 
426  /// @}
427 
428  // ########################################################################
429  /// @name Operators
430  // ########################################################################
431  /// @{
432 
433  /// copy operator
434  virtual Handler& operator=(const Handler&);
435 
436  /// move operator
437  virtual Handler& operator=(Handler&&);
438 
439  /// makes the operator point to the next row in the database
440  /** if the pointer has already reached the end of the area managed by the
441  * handler, nothing happens. In particular, no exception is raised */
442  virtual Handler& operator++() final;
443 
444  /// makes the operator point to the previous row in the database
445  /** if the pointer is already at the beginning of the area managed
446  * by the handler, nothing happens. In particular, no exception
447  * is raised */
448  virtual Handler& operator--() final;
449 
450  /// advances the handler by i rows in the database
451  /** if, applying this move would make the handler reach the end of
452  * the area managed by the handler, then the handler is kept at the
453  * end of the area, i.e., after the last element of the area. */
454  virtual Handler& operator+=(const std::size_t i) final;
455 
456  /// moves back the handler by i rows in the database
457  /** if, applying this move would make the handler reach the beginning of
458  * the area managed by the handler, then the handler is kept at the
459  * beginning of the area, i.e., at the first element of the area. */
460  virtual Handler& operator-=(const std::size_t i) final;
461 
462  /// checks whether two handlers point to the same row in the database
463  virtual bool operator==(const Handler& handler) const final;
464 
465  /// checks whether two handlers point to different rows in the database
466  virtual bool operator!=(const Handler& handler) const final;
467 
468  /// returns the current row pointed to by the handler (unsafe version)
469  /** @warning The method does not check whether the handler already
470  * points to the end of the area it manages. It is thus faster than
471  * method rowSafe () but, when you call it, you must be sure that the row
472  * actually exists, i.e., that the handler has not reached its end. */
473  virtual const_reference operator*() const final;
474 
475  /// Dereferences the value pointed to by the handler (unsafe version)
476  /** @warning The method does not check whether the handler already
477  * points to the end of its area. It is thus faster than method
478  * rowSafe () but, when you call it, you must be sure that the row
479  * actually exists, i.e., that the handler has not reached its end. */
480  virtual const_pointer operator->() const final;
481 
482  /// @}
483 
484 
485  // ########################################################################
486  /// @name Accessors / Modifiers
487  // ########################################################################
488  /// @{
489 
490  /// returns the number of rows managed by the handler
491  /** A handler needs not necessarily handle all the rows of the database.
492  * For instance, RecordCounters cut the database into several pieces and
493  * assign each piece to a handler. Then each handler is used in parallel
494  * to perform countings only on their subset of the database. The size
495  * reported by method "size" is therefore the number of rows managed
496  * by the handler. If you wish to retrieve the size of the whole database,
497  * then use method DBSize instead. */
498  virtual std::size_t size() const final;
499 
500  /// returns the number of rows of the whole database
501  virtual std::size_t DBSize() const final;
502 
503  /// returns the current row pointed to by the handler (safe version)
504  /** @throws OutOfBounds if the handler points to the end of its area */
505  virtual const_reference rowSafe() const final;
506 
507  /// returns the current row pointed to by the handler (safe version)
508  /** @throws OutOfBounds if the handler points to the end of its area */
509  virtual reference rowSafe() final;
510 
511  /// returns the current row pointed to by the handler (unsafe version)
512  /** @warning The method does not check whether the handler already
513  * points to the end of its area. It is thus faster than method
514  * rowSafe () but, when you call it, you must be sure that the row
515  * actually exists, i.e., that the handler has not reached its end. */
516  virtual const_reference row() const final;
517 
518  /// returns the current row pointed to by the handler (unsafe version)
519  /** @warning The method does not check whether the handler already
520  * points to the end of its area. It is thus faster than method
521  * rowSafe () but, when you call it, you must be sure that the row
522  * actually exists, i.e., that the handler has not reached its end. */
523  virtual reference row() final;
524 
525  /// makes the handler point to the next row, equivalent to operator++
526  virtual void nextRow() final;
527 
528  /// the number of the current row (0 = the 1st row managed by the handler)
529  virtual std::size_t numRow() const final;
530 
531  /// indicates whether the handler has reached its end or not
532  virtual bool hasRows() const final;
533 
534  /// puts the handler to the beginning of the database's area it handles
535  virtual void reset() final;
536 
537  /** @brief returns a new handler that points to the beginning of the
538  * database's area of the current handler
539  *
540  * @warning The handler returned manages precisely the same area
541  * as the handler on which begin() is called. */
542  virtual Handler begin() const;
543 
544  /** @brief returns a new handler that points to the end of the
545  * database's area of the current handler
546  *
547  * @warning The handler returned manages precisely the same area
548  * as the handler on which end() is called. */
549  virtual Handler end() const;
550 
551  /// sets the area in the database the handler will handle
552  /** In addition to setting the area that will be parsed by the handler,
553  * this method makes the handler point to the beginning of the area.
554  * @param begin the first row to be handled
555  * @param end the handler handles rows in interval [begin,end). Thus,
556  * the endth row is not included in the set of rows handled.
557  * @warning if begin is greater than end, these values are swapped.
558  * @throw NullElement is raised if the handler does not point to
559  * any database
560  * @throw SizeError is raised if end is greater than the number of
561  * rows of the database */
562  virtual void setRange(std::size_t begin, std::size_t end) final;
563 
564  /// returns the current range of the handler [begin,end)
565  virtual std::pair< std::size_t, std::size_t > range() const final;
566 
567  /// returns the names of the variables
568  virtual const DBVector< std::string >& variableNames() const final;
569 
570  /// returns the number of variables (columns) of the database
571  virtual std::size_t nbVariables() const final;
572 
573  /// returns a pointer on the database
574  /** @throw NullElement is raised if the handler does not point toward
575  * any database. */
576  virtual const IDatabaseTable< T_DATA, ALLOC >& database() const;
577 
578  /// @}
579 
580 
581 #ifndef DOXYGEN_SHOULD_SKIP_THIS
582 
583  protected:
584  /// a reference to the whole database, including variable names
585  const IDatabaseTable< T_DATA, ALLOC >* db__;
586 
587  /// a reference on the database's records pointed to by the handler
588  /** this data could be retrieved from db__ but we prefer using a
589  * specific variable here for speed-up reasons. */
590  const Matrix< T_DATA >* row__;
591 
592  /// the index of the row currently pointed to by the handler
593  std::size_t index__{std::size_t(0)};
594 
595  /// the first row managed by the handler
596  std::size_t begin_index__{std::size_t(0)};
597 
598  /// the row just after the last one managed by the handler
599  std::size_t end_index__{std::size_t(0)};
600 
601  friend class IDatabaseTable< T_DATA, ALLOC >;
602 
603 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
604  };
605 
606 
607  /** @class HandlerSafe
608  * @headerfile IDatabaseTable.h <agrum/BN/learning/IDatabaseTable.h>
609  * @brief the safe handler of the tabular databases
610  *
611  * The IDatabaseTable class is provided with two types of handlers: unsafe
612  * handlers and safe ones. Compared to the former, the safe handlers
613  * incur a small overhead during their creation. But safe handlers
614  * are informed by their associated database when the structure of
615  * this one changes, i.e., when the number of rows/columns changes or
616  * when rows are added/removed, whereas unsafe handlers are not aware
617  * of such changes. For databases that are not affected by this kind of
618  * change, unsafe handlers should be used instead of safe ones because
619  * they are slightly faster. Both types of handlers are designed to be
620  * created in parallel by several threads.
621  *
622  * Here is an example of how to use this class, illustrated on handlers
623  * for a RawDatabaseTable:
624  * @code
625  * // create the database
626  * gum::learning::RawDatabaseTable<> database;
627  * database.setVariableNames( std::vector<std::string> {"v1","v2","v3"} );
628  *
629  * // add one row to the database
630  * gum::learning::DBRow<gum::learning::DBCell>
631  * row( 3, gum::learning::DBCell(2) );
632  * database.insertRow( row );
633  *
634  * // create a handler.
635  * typename gum::learning::RawDatabaseTable<>::HandlerSafe handler(database);
636  * // by default, the handlers range over the whole database, which
637  * // currently contains only one row
638  *
639  * // here, we add 95 new rows into the database
640  * for ( int i = 0; i < 95; ++i ) database.insertRow( row );
641  *
642  * // due to the addition of the rows, the safe handler updates its range
643  * // and its area is now [0,96)
644  * std::cout << handler.size (); // displays 96 (handler's range)
645  * std::cout << handler.DBSize (); // displays 96 (database's size)
646  *
647  * // change the range of rows handled by the DBHandler
648  * std::cout << handler.setRange ( 1, 40 ); // now parses rows [1,40)
649  * std::cout << handler.size (); // displays 39: rows 1,...,39
650  * std::cout << handler.DBSize (); // displays 96: database's size
651  * std::cout << handler.numRow (); // displays 0: the handler currently
652  * // points on the first row of its managed area [1,40)
653  *
654  * // move the handler to the next row
655  * handler.nextRow();
656  * std::cout << handler.numRow (); // displays 1: the handler points now
657  * // on the second row of its managed area. This corresponds to the third
658  * // DBRow of the database since the range of handler is [1,40)
659  * ++handler; // move again to the next row
660  * std::cout << handler.numRow (); // displays 2
661  * handler += 4; // advances the pointer by 4 rows
662  * std::cout << handler.numRow (); // displays 6
663  *
664  * // get the DBRow pointed to by the handler: this is the 7th DBRow
665  * // of the database
666  * const auto& xrow7 = handler.row (); // get the DBRow, unsafe version
667  * const auto& yrow7 = handler.rowSafe (); // get the DBRow, safe version
668  * const std::vector<gum::learning::DBCell>& xrow = xrow7.row ();
669  * const double xweight = xrow27.weight ();
670  *
671  * // another way to access the row
672  * const auto& zrow7 = *handler; // get the DBRow, unsafe version
673  *
674  * // check whether there exist other rows managed by the handler after
675  * // the current row
676  * bool has_rows = handler.hasRows (); // true: there remains 33 rows
677  *
678  * // makes the handler point again on the 2nd row of the database
679  * handler.reset (); // the handler points to the beginning of its area
680  * std::cout << handler.numRow (); // displays 0: the handler currently
681  * // points on the first row of its managed area [1,40)
682  *
683  * // see the variables' names, i.e., the names of the database's columns
684  * const auto& vars = handler.variableNames();
685  *
686  * // parse all the rows managed
687  * handler.reset ();
688  * for ( auto end = handler.end (); handler != end; ++handler )
689  * std::cout << handler.row ().weight () << std::endl;
690  *
691  * // another possibility:
692  * for ( const auto& row : handler )
693  * std::cout << row.weight () << std::endl;
694  * @endcode
695  *
696  * @ingroup learning_database
697  */
698  class HandlerSafe: public Handler {
699  public:
700  /// Types for STL compliance.
701  /// @{
702  using iterator_category = std::random_access_iterator_tag;
703  using value_type = typename Handler::value_type;
704  using reference = value_type&;
705  using const_reference = const value_type&;
706  using pointer = value_type*;
707  using const_pointer = const value_type*;
708  using difference_type = std::ptrdiff_t;
709  using allocator_type = ALLOC< T_DATA >;
710  /// @}
711 
712  // ########################################################################
713  /// @name Constructors / Destructors
714  // ########################################################################
715  /// @{
716 
717  /// default constructor
718  /** @param db the database on which the handler will point to.
719  * By default, the range of the handler is the whole database. */
720  HandlerSafe(const IDatabaseTable< T_DATA, ALLOC >& db);
721 
722  /// copy constructor
723  HandlerSafe(const HandlerSafe& h);
724 
725  /// move constructor
727 
728  /// destructor
729  virtual ~HandlerSafe();
730 
731  /// @}
732 
733  // ########################################################################
734  /// @name Operators
735  // ########################################################################
736  /// @{
737 
738  /// copy operator
739  virtual HandlerSafe& operator=(const HandlerSafe&);
740 
741  /// copy operator
742  virtual HandlerSafe& operator=(const Handler&);
743 
744  /// move operator
745  virtual HandlerSafe& operator=(HandlerSafe&&);
746 
747  /// move operator
748  virtual HandlerSafe& operator=(Handler&&);
749 
750  /// @}
751 
752 
753 #ifndef DOXYGEN_SHOULD_SKIP_THIS
754 
755  private:
756  /// attach a new handler to the database
757  void attachHandler__();
758 
759  /// detach a handler
760  void detachHandler__();
761 
762  friend class IDatabaseTable< T_DATA, ALLOC >;
763 
764 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
765  };
766 
767 
768  /// Types for STL compliance.
769  /// @{
770  using value_type = Row< T_DATA >;
772  using const_reference = const value_type&;
773  using pointer = value_type*;
774  using const_pointer = const value_type*;
775  using size_type = std::size_t;
776  using difference_type = std::ptrdiff_t;
777  using iterator = Handler;
778  using iterator_safe = HandlerSafe;
779  using const_iterator = const Handler;
780  using const_iterator_safe = const HandlerSafe;
781  using allocator_type = ALLOC< T_DATA >;
782  /// @}
783 
784 
785  // ##########################################################################
786  /// @name Constructors / Destructors
787  // ##########################################################################
788  /// @{
789 
790  /// default constructor
791  template < template < typename > class VARALLOC,
792  template < typename >
793  class MISSALLOC >
795  const MissingValType< MISSALLOC >& missing_symbols,
796  const std::vector< std::string, VARALLOC< std::string > >& var_names,
797  const ALLOC< T_DATA >& alloc);
798 
799  /// copy constructor
800  IDatabaseTable(const IDatabaseTable< T_DATA, ALLOC >& from);
801 
802  /// copy constructor with a given allocator
803  IDatabaseTable(const IDatabaseTable< T_DATA, ALLOC >& from,
804  const allocator_type& alloc);
805 
806  /// move constructor
807  IDatabaseTable(IDatabaseTable< T_DATA, ALLOC >&& from);
808 
809  /// move constructor with a given allocator
810  IDatabaseTable(IDatabaseTable< T_DATA, ALLOC >&& from,
811  const allocator_type& alloc);
812 
813  /// virtual copy constructor
814  virtual IDatabaseTable< T_DATA, ALLOC >* clone() const = 0;
815 
816  /// virtual copy constructor with a given allocator
817  virtual IDatabaseTable< T_DATA, ALLOC >*
818  clone(const allocator_type& alloc) const = 0;
819 
820  /// destructor
821  virtual ~IDatabaseTable();
822 
823  /// @}
824 
825 
826  // ##########################################################################
827  /// @name Iterators
828  // ##########################################################################
829  /// @{
830 
831  /// returns a new unsafe handler pointing to the beginning of the database
832  iterator begin() const;
833 
834  /// returns a new safe handler pointing to the beginning of the database
835  iterator_safe beginSafe() const;
836 
837  /// returns a new unsafe handler pointing to the end of the database
838  const iterator& end() const noexcept;
839 
840  /// returns a new safe handler pointing to the end of the database
841  const iterator_safe& endSafe() const noexcept;
842 
843  /// @}
844 
845 
846  // ##########################################################################
847  /// @name Accessors / Modifiers
848  // ##########################################################################
849  /// @{
850 
851  /// returns the content (the records) of the database
852  const Matrix< T_DATA >& content() const noexcept;
853 
854  /// returns a new unsafe handler pointing to the 1st record of the database
855  iterator handler() const;
856 
857  /// returns a new safe handler pointing to the 1st record of the database
858  iterator_safe handlerSafe() const;
859 
860  /// returns the variable names for all the columns of the database
861  /** The names do not include the ignored columns. */
862  const DBVector< std::string >& variableNames() const noexcept;
863 
864  /// sets the names of the variables
865  /** This method can be called in two different ways: either the names
866  * correspond precisely to the columns stored into the database table
867  * (in this case, parameter from_external_object is equal to false),
868  * or they corresponds to the columns of an external database (e.g., a
869  * CSV file) from which we potentially excluded some columns and,
870  * consequently, the latter should not be taken into account (in this
871  * case, parameter from_external_object is equal to true). As an
872  * example, imagine that the database table is created from a CSV file
873  * with 5 columns named X0, X1, X2, X3 and X4 respectively. Suppose that
874  * we asked the database table to ignore columns X1 and X3. Then
875  * setVariableNames( { "X0", "X1", "X2", "X3", "X4" }, true ) will
876  * set the columns of the database table as { "X0", "X2", "X4" }. The
877  * same result could be obtained by executing
878  * setVariableNames( { "X0", "X2", "X4" }, false ), which specifies
879  * directly the set of names to retain in the database table.
880  * @param names the names of all the columns, including the ignored
881  * columns if from_external_object is set to true, else excluding
882  * them (i.e., this should precisely correspond to the columns stored
883  * into the database table).
884  * @param from_external_object a Boolean indicating whether parameter
885  * names includes the columns ignored by the database table (true) or
886  * not (false).
887  * @throw SizeError is raised if the names passed in arguments cannot be
888  * assigned to the columns of the IDatabaseTable because the size of their
889  * vector is inadequate. */
890  virtual void setVariableNames(
891  const std::vector< std::string, ALLOC< std::string > >& names,
892  const bool from_external_object = true)
893  = 0;
894 
895  /// sets the names of the variables
896  /** This method can be called in two different ways: either the names
897  * correspond precisely to the columns stored into the database table
898  * (in this case, parameter from_external_object is equal to false),
899  * or they corresponds to the columns of an external database (e.g., a
900  * CSV file) from which we potentially excluded some columns and,
901  * consequently, the latter should not be taken into account (in this
902  * case, parameter from_external_object is equal to true). As an
903  * example, imagine that the database table is created from a CSV file
904  * with 5 columns named X0, X1, X2, X3 and X4 respectively. Suppose that
905  * we asked the database table to ignore columns X1 and X3. Then
906  * setVariableNames( { "X0", "X1", "X2", "X3", "X4" }, true ) will
907  * set the columns of the database table as { "X0", "X2", "X4" }. The
908  * same result could be obtained by executing
909  * setVariableNames( { "X0", "X2", "X4" }, false ), which specifies
910  * directly the set of names to retain in the database table.
911  * @param names the names of all the columns, including the ignored
912  * columns if from_external_object is set to true, else excluding
913  * them (i.e., this should precisely correspond to the columns stored
914  * into the database table).
915  * @param from_external_object a Boolean indicating whether parameter
916  * names includes the columns ignored by the database table (true) or
917  * not (false).
918  * @throw SizeError is raised if the names passed in arguments cannot be
919  * assigned to the columns of the IDatabaseTable because the size of their
920  * vector is inadequate. */
921  template < template < typename > class OTHER_ALLOC >
922  void setVariableNames(
923  const std::vector< std::string, OTHER_ALLOC< std::string > >& names,
924  const bool from_external_object = true);
925 
926  /// returns the name of the kth column of the IDatabaseTable
927  /** @throw OutOfBounds is raised if the IDatabaseTable contains fewer
928  * than k columns. */
929  const std::string& variableName(const std::size_t k) const;
930 
931  /// returns the index of the column whose name is passed in argument
932  /** @warning If several columns correspond to the name, only the
933  * column with the lowest index is returned. If you wish to retrieve all
934  * the columns, use method columnsFromVariableName
935  * @throw UndefinedElement is raised if there exists no column with
936  * the given name*/
937  std::size_t columnFromVariableName(const std::string& name) const;
938 
939  /// returns the indices of all the columns whose name is passed in argument
940  /** It may happen that several columns correspond to a given variable
941  * name. In this case, the function returns the indices of all the
942  * columns of the IDatabase that match the name. */
943  DBVector< std::size_t >
944  columnsFromVariableName(const std::string& name) const;
945 
946  /// returns the number of variables (columns) of the database
947  std::size_t nbVariables() const noexcept;
948 
949  /// returns the number of records (rows) in the database
950  std::size_t nbRows() const noexcept;
951 
952  /// returns the number of records (rows) in the database
953  std::size_t size() const noexcept;
954 
955  /// indicates whether the database contains some records or not
956  bool empty() const noexcept;
957 
958  /// makes the database table ignore from now on the kth column
959  /** This method can be called in two different ways: either k refers to
960  * the current kth column of the database table (in this case, parameter
961  * from_external_object is set to false), or k corresponds to the kth
962  * column of an original dataset used to fill the database table
963  * (in this case from_external_object is set to true). Depending on
964  * from_external_object's value, the ignored columns may differ. As an
965  * example, imagine that the database table is created from a CSV file
966  * with 5 columns named X0, X1, X2, X3 and X4 respectivly. Then a call to
967  * ignoreColumn ( 1, true ) will exclude column X1 from the database table.
968  * As a result, the database table columns are X0, X2, X3 and X4.
969  * Therefore, subsequently calling ignoreColumn ( 1, false ) will result
970  * in excluding X2 since X2 is the 2nd column (columns are indexed
971  * starting from 0). So, now the database table's columns are
972  * X0, X3 and X4. If, now, we call ignoreColumn ( 3, true ), this will
973  * remove column X3 because, in the original database, X3 was the 4th
974  * column.
975  *
976  * @warning If the database table was not empty, then the kth column is
977  * removed from all the rows currently stored.
978  * @warning If the kth column does not exist (i.e., the original dataset
979  * does not contain the kth column when from_external_object is set to
980  * true, or the IDatabaseTable has no kth column when from_external_object
981  * is set to false), column k is marked as to be ignored and nothing is
982  * done on the content of the IDatabaseTable. No exception is raised.
983  * @param k the column to remove. See the above detailed description on
984  * how k is computed.
985  * @param from_external_object indicates whether k refers to the kth
986  * column of an original external database or to the current kth column
987  * of the database table. */
988  virtual void ignoreColumn(const std::size_t k,
989  const bool from_external_object = true)
990  = 0;
991 
992  /// returns the set of columns of the original dataset that are ignored
993  virtual const DBVector< std::size_t > ignoredColumns() const = 0;
994 
995  /** @brief returns the set of columns of the original dataset that are
996  * present in the IDatabaseTable */
997  virtual const DBVector< std::size_t > inputColumns() const = 0;
998 
1000  ALLOC,
1001  !std::is_same< T_DATA, DBCell >::value >::insertRow;
1002 
1003  /// insert a new row at the end of the database
1004  /** The new_row passed in argument is supposed to come from an external
1005  * database. So it must contain data for the ignored columns.
1006  * @throw SizeError is raised if the vector of string cannot be inserted
1007  * in the IDatabaseTable because its size does not allow a matching with the
1008  * columns of the IDatabaseTable (taking into account the ignored columns) */
1009  template < template < typename > class OTHER_ALLOC >
1010  void insertRow(
1011  const std::vector< std::string, OTHER_ALLOC< std::string > >& new_row);
1012 
1013  /// insert a new DBRow at the end of the database
1014  /** Unlike methods insertRow for data whose type is different from T_DATA,
1015  * this method assumes that the new row passed in argument does not contain
1016  * any data of the ignored columns. So, basically, it could be copied
1017  * as is into the database table.
1018  * @throw SizeError is raised if the size of the new_row is not equal to
1019  * the number of columns retained in the IDatabaseTable */
1020  virtual void insertRow(Row< T_DATA >&& new_row,
1021  const IsMissing contains_missing_data);
1022 
1023  /// insert a new row at the end of the database
1024  /** Unlike methods insertRow for data whose type is different from T_DATA,
1025  * this method assumes that the new row passed in argument does not contain
1026  * any data of the ignored columns. So, basically, it could be copied
1027  * as is into the database table.
1028  * @throw SizeError is raised if the size of the new_row is not equal to
1029  * the number of columns retained in the IDatabaseTable */
1030  virtual void insertRow(const Row< T_DATA >& new_row,
1031  const IsMissing contains_missing_data);
1032 
1034  ALLOC,
1036 
1037  /// insert a set of new DBRows at the end of the database
1038  /** Unlike methods insertRows for data whose type is different from T_DATA,
1039  * this method assumes that the new rows passed in argument do not contain
1040  * any data of the ignored columns. So, basically, these rows could be
1041  * copied as is into the database table.
1042  * @param new_rows the new set of rows to be copied as is
1043  * @param rows_have_missing_vals a vector of the same size as new_rows
1044  * that indicates, for each new row, whether it contains some missing
1045  * value or not
1046  * @throw SizeError is raised if the size of at least one row in new_rows
1047  * is not equal to the number of columns retained in the IDatabaseTable.
1048  * A SizeError exception will also be raised if the number of new rows
1049  * is not identical to the size of vector rows_have_missing_vals. */
1050  virtual void insertRows(Matrix< T_DATA >&& new_rows,
1051  const DBVector< IsMissing >& rows_have_missing_vals);
1052 
1053  /// insert a set of new DBRows at the end of the database
1054  /** Unlike methods insertRows for data whose type is different from T_DATA,
1055  * this method assumes that the new rows passed in argument do not contain
1056  * any data of the ignored columns. So, basically, these rows could be
1057  * copied as is into the database table.
1058  * @param new_rows the new set of rows to be copied as is
1059  * @param rows_have_missing_vals a vector of the same size as new_rows
1060  * that indicates, for each new row, whether it contains some missing
1061  * value or not
1062  * @throw SizeError is raised if the size of at least one row in new_rows
1063  * is not equal to the number of columns retained in the IDatabaseTable.
1064  * A SizeError exception will also be raised if the number of new rows
1065  * is not identical to the size of vector rows_have_missing_vals. */
1066  virtual void insertRows(const Matrix< T_DATA >& new_rows,
1067  const DBVector< IsMissing >& rows_have_missing_vals);
1068 
1069  /// erase a given row specified by its index in the table
1070  /** In the database, rows are indexed, starting from 0.
1071  * @warning If the row does not exist, nothing is done. In particular,
1072  * no exception is raised. */
1073  void eraseRow(std::size_t index);
1074 
1075  /// erase the first row
1076  /** @warning if the row does not exist, nothing is done. In particular, no
1077  * exception is raised. */
1078  void eraseFirstRow();
1079 
1080  /// erase the last row
1081  /** @warning if the row does not exist, nothing is done. In particular, no
1082  * exception is raised. */
1083  void eraseLastRow();
1084 
1085  /// erase the k first rows
1086  /** @warning if there are fewer than k rows in the database, the database is
1087  * completely emptied */
1088  void eraseFirstRows(const std::size_t k);
1089 
1090  /// erase the k last rows
1091  /** @warning if there are fewer than k rows in the database, the database is
1092  * completely emptied */
1093  void eraseLastRows(const std::size_t k);
1094 
1095  /// erase the rows from the debth to the endth (not included)
1096  /** In the database, rows are indexed, starting from 0. */
1097  void eraseRows(std::size_t deb, std::size_t end);
1098 
1099  /// erase all the rows
1100  void eraseAllRows();
1101 
1102  /// erase the content of the database, including the names of the variables
1103  virtual void clear();
1104 
1105  /// returns the allocator of the database
1106  ALLOC< T_DATA > getAllocator() const;
1107 
1108  /// returns the set of missing symbols
1109  const DBVector< std::string >& missingSymbols() const;
1110 
1111  /// indicates whether the database contains some missing values
1112  bool hasMissingValues() const;
1113 
1114  /// indicates whether the kth row contains some missing values
1115  bool hasMissingValues(const std::size_t k) const;
1116 
1117  /// changes the max number of threads that a database can use
1118  /** Within databases, some methods can be processed in a parallel fashion.
1119  * This methods indicates the maximum number of threads that can be run
1120  * in parallel at the same time. */
1121  void setMaxNbThreads(const std::size_t nb) const;
1122 
1123  /// returns the number of threads used to parse the database
1124  std::size_t nbThreads() const;
1125 
1126  /** @brief changes the number min of rows a thread should process in a
1127  * multithreading context
1128  *
1129  * When a method executes several threads to perform actions on the rows
1130  * of the database, the MinNbRowsPerThread indicates how many rows each
1131  * thread should at least process. This is used to compute the number of
1132  * threads actually run. This number is equal to the min between the max
1133  * number of threads allowed and the number of records in the database
1134  * divided by nb. */
1135  void setMinNbRowsPerThread(const std::size_t nb) const;
1136 
1137  /// returns the minimum of rows that each thread should process
1138  std::size_t minNbRowsPerThread() const;
1139 
1140  /// assign a given weight to all the rows of the database
1141  void setAllRowsWeight(const double new_weight);
1142 
1143  /// assigns a given weight to the ith row of the database
1144  /** @throws OutOfBounds if i is outside the set of indices of the
1145  * records or if the weight is negative */
1146  void setWeight(const std::size_t i, const double weight);
1147 
1148  /// returns the weight of the ith record
1149  /** @throws OutOfBounds if i is outside the set of indices of the
1150  * records */
1151  double weight(const std::size_t i) const;
1152 
1153  /// returns the weight of the whole database
1154  double weight() const;
1155 
1156  /// @}
1157 
1158 
1159  protected:
1160  /// the names of the variables for each column
1162 
1163  // the vector of DBRows containing all the raw data
1165 
1166  // the set of string corresponding to missing values
1168 
1169  // a vector indicating which rows have missing values (char != 0)
1171 
1172  // the maximal number of threads that the database can use
1173  mutable std::size_t max_nb_threads_{
1175 
1176  // the min number of rows that a thread should process in a
1177  // multithreading context
1178  mutable std::size_t min_nb_rows_per_thread_{100};
1179 
1180 
1181  /** @brief checks whether a size corresponds to the number of columns
1182  * of the database */
1183  bool isRowSizeOK_(const std::size_t size) const;
1184 
1185  /// copy operator
1187  operator=(const IDatabaseTable< T_DATA, ALLOC >& from);
1188 
1189  /// move operator
1192 
1193 
1194 #ifndef DOXYGEN_SHOULD_SKIP_THIS
1195 
1196  private:
1197  // the list of handlers currently attached to the database
1198  /* this is useful when the database is resized */
1199  mutable DBVector< HandlerSafe* > list_of_safe_handlers__;
1200 
1201  // a mutex to safely add/remove handlers in list_of_safe_handlers__
1202  mutable std::mutex safe_handlers_mutex__;
1203 
1204  // the end iterator for the database
1205  Handler* end__{nullptr};
1206 
1207  // the safe end iterator for the database
1208  iterator_safe* end_safe__{nullptr};
1209 
1210  /// add a new safe handler to the list of attached handlers
1211  void attachHandler__(HandlerSafe* handler) const;
1212 
1213  /// detach a safe handler from the list of attached handlers
1214  void detachHandler__(HandlerSafe* handler) const;
1215 
1216  /// update the handlers when the size of the database changes
1217  void updateHandlers__(std::size_t new_size) const;
1218 
1219  // create the end iterators
1220  void createEndIterators__();
1221 
1222 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
1223 
1224 
1225  /// allow the handlers to access the database directly
1226  friend class Handler;
1227  friend class HandlerSafe;
1228  };
1229 
1230  } /* namespace learning */
1231 
1232 } /* namespace gum */
1233 
1234 /// always include the templated implementations
1235 #include <agrum/tools/database/IDatabaseTable_tpl.h>
1236 
1237 #endif /* GUM_IDATABASE_TABLE_H */
virtual void insertRow(const Row< T_DATA > &new_row, const IsMissing contains_missing_data)
insert a new row at the end of the database
virtual HandlerSafe & operator=(const Handler &)
copy operator
virtual Handler & operator--() final
makes the operator point to the previous row in the database
HandlerSafe(const IDatabaseTable< T_DATA, ALLOC > &db)
default constructor
void eraseFirstRows(const std::size_t k)
erase the k first rows
void insertRow(const std::vector< std::string, OTHER_ALLOC< std::string > > &new_row)
insert a new row at the end of the database
virtual const_reference row() const final
returns the current row pointed to by the handler (unsafe version)
virtual bool hasRows() const final
indicates whether the handler has reached its end or not
IDatabaseTable(IDatabaseTable< T_DATA, ALLOC > &&from)
move constructor
virtual reference rowSafe() final
returns the current row pointed to by the handler (safe version)
void setMaxNbThreads(const std::size_t nb) const
changes the max number of threads that a database can use
virtual IDatabaseTable< T_DATA, ALLOC > * clone(const allocator_type &alloc) const =0
virtual copy constructor with a given allocator
std::size_t columnFromVariableName(const std::string &name) const
returns the index of the column whose name is passed in argument
virtual std::size_t DBSize() const final
returns the number of rows of the whole database
virtual void setRange(std::size_t begin, std::size_t end) final
sets the area in the database the handler will handle
ALLOC< T_DATA > getAllocator() const
returns the allocator of the database
DBVector< std::string > variable_names_
the names of the variables for each column
DBVector< std::size_t > columnsFromVariableName(const std::string &name) const
returns the indices of all the columns whose name is passed in argument
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
void eraseRow(std::size_t index)
erase a given row specified by its index in the table
std::size_t nbThreads() const
returns the number of threads used to parse the database
virtual void insertRows(const Matrix< T_DATA > &new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
insert a set of new DBRows at the end of the database
the (unsafe) handler for the tabular databases
std::size_t size() const noexcept
returns the number of records (rows) in the database
virtual const_pointer operator->() const final
Dereferences the value pointed to by the handler (unsafe version)
virtual reference row() final
returns the current row pointed to by the handler (unsafe version)
IDatabaseTable< T_DATA, ALLOC > & operator=(const IDatabaseTable< T_DATA, ALLOC > &from)
copy operator
virtual const_reference operator*() const final
returns the current row pointed to by the handler (unsafe version)
void setMinNbRowsPerThread(const std::size_t nb) const
changes the number min of rows a thread should process in a multithreading context ...
const DBVector< std::string > & missingSymbols() const
returns the set of missing symbols
void eraseRows(std::size_t deb, std::size_t end)
erase the rows from the debth to the endth (not included)
virtual Handler begin() const
returns a new handler that points to the beginning of the database&#39;s area of the current handler ...
virtual std::size_t nbVariables() const final
returns the number of variables (columns) of the database
void setVariableNames(const std::vector< std::string, OTHER_ALLOC< std::string > > &names, const bool from_external_object=true)
sets the names of the variables
virtual Handler & operator-=(const std::size_t i) final
moves back the handler by i rows in the database
void eraseLastRow()
erase the last row
virtual const DBVector< std::size_t > ignoredColumns() const =0
returns the set of columns of the original dataset that are ignored
void eraseAllRows()
erase all the rows
Handler(Handler &&h)
move constructor
Handler(const Handler &h)
copy constructor
IDatabaseTable< T_DATA, ALLOC > & operator=(IDatabaseTable< T_DATA, ALLOC > &&from)
move operator
iterator_safe beginSafe() const
returns a new safe handler pointing to the beginning of the database
bool isRowSizeOK_(const std::size_t size) const
checks whether a size corresponds to the number of columns of the database
virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true)=0
sets the names of the variables
IDatabaseTable(const IDatabaseTable< T_DATA, ALLOC > &from)
copy constructor
virtual HandlerSafe & operator=(HandlerSafe &&)
move operator
void eraseLastRows(const std::size_t k)
erase the k last rows
const Matrix< T_DATA > & content() const noexcept
returns the content (the records) of the database
DBVector< IsMissing > has_row_missing_val_
virtual Handler end() const
returns a new handler that points to the end of the database&#39;s area of the current handler ...
std::size_t nbVariables() const noexcept
returns the number of variables (columns) of the database
virtual bool operator==(const Handler &handler) const final
checks whether two handlers point to the same row in the database
virtual HandlerSafe & operator=(Handler &&)
move operator
DBVector< std::string > missing_symbols_
friend class Handler
allow the handlers to access the database directly
IDatabaseTable(const MissingValType< MISSALLOC > &missing_symbols, const std::vector< std::string, VARALLOC< std::string > > &var_names, const ALLOC< T_DATA > &alloc)
default constructor
virtual void nextRow() final
makes the handler point to the next row, equivalent to operator++
virtual std::size_t numRow() const final
the number of the current row (0 = the 1st row managed by the handler)
const iterator_safe & endSafe() const noexcept
returns a new safe handler pointing to the end of the database
virtual bool operator!=(const Handler &handler) const final
checks whether two handlers point to different rows in the database
bool hasMissingValues() const
indicates whether the database contains some missing values
bool empty() const noexcept
indicates whether the database contains some records or not
double weight() const
returns the weight of the whole database
const iterator & end() const noexcept
returns a new unsafe handler pointing to the end of the database
virtual const_reference rowSafe() const final
returns the current row pointed to by the handler (safe version)
virtual IDatabaseTable< T_DATA, ALLOC > * clone() const =0
virtual copy constructor
std::size_t nbRows() const noexcept
returns the number of records (rows) in the database
virtual const IDatabaseTable< T_DATA, ALLOC > & database() const
returns a pointer on the database
virtual Handler & operator=(Handler &&)
move operator
virtual HandlerSafe & operator=(const HandlerSafe &)
copy operator
the safe handler of the tabular databases
virtual Handler & operator+=(const std::size_t i) final
advances the handler by i rows in the database
double weight(const std::size_t i) const
returns the weight of the ith record
virtual void insertRows(Matrix< T_DATA > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
insert a set of new DBRows at the end of the database
virtual void insertRow(Row< T_DATA > &&new_row, const IsMissing contains_missing_data)
insert a new DBRow at the end of the database
Handler(const IDatabaseTable< T_DATA, ALLOC > &db)
default constructor
IDatabaseTable(IDatabaseTable< T_DATA, ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator
virtual Handler & operator=(const Handler &)
copy operator
const std::string & variableName(const std::size_t k) const
returns the name of the kth column of the IDatabaseTable
iterator_safe handlerSafe() const
returns a new safe handler pointing to the 1st record of the database
HandlerSafe(HandlerSafe &&h)
move constructor
virtual const DBVector< std::string > & variableNames() const final
returns the names of the variables
iterator begin() const
returns a new unsafe handler pointing to the beginning of the database
void setAllRowsWeight(const double new_weight)
assign a given weight to all the rows of the database
virtual const DBVector< std::size_t > inputColumns() const =0
returns the set of columns of the original dataset that are present in the IDatabaseTable ...
bool hasMissingValues(const std::size_t k) const
indicates whether the kth row contains some missing values
virtual std::size_t size() const final
returns the number of rows managed by the handler
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
std::size_t minNbRowsPerThread() const
returns the minimum of rows that each thread should process
HandlerSafe(const HandlerSafe &h)
copy constructor
virtual ~IDatabaseTable()
destructor
const DBVector< std::string > & variableNames() const noexcept
returns the variable names for all the columns of the database
virtual std::pair< std::size_t, std::size_t > range() const final
returns the current range of the handler [begin,end)
virtual void ignoreColumn(const std::size_t k, const bool from_external_object=true)=0
makes the database table ignore from now on the kth column
IDatabaseTable(const IDatabaseTable< T_DATA, ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
virtual Handler & operator++() final
makes the operator point to the next row in the database
iterator handler() const
returns a new unsafe handler pointing to the 1st record of the database
void eraseFirstRow()
erase the first row
virtual void clear()
erase the content of the database, including the names of the variables
virtual void reset() final
puts the handler to the beginning of the database&#39;s area it handles
void setWeight(const std::size_t i, const double weight)
assigns a given weight to the ith row of the database