aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
DBRowGenerator.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The base class for all DBRow generators
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef GUM_LEARNING_DBROW_GENERATOR_H
28 #define GUM_LEARNING_DBROW_GENERATOR_H
29 
30 #include <vector>
31 
32 #include <agrum/agrum.h>
33 #include <agrum/tools/database/DBRow.h>
34 #include <agrum/tools/database/DBTranslatedValue.h>
35 
36 namespace gum {
37 
38  namespace learning {
39 
40  /** @enum DBRowGeneratorGoal
41  * @headerfile DBRowGenerator.h <agrum/tools/database/DBRowGenerator.h>
42  * @brief the type of things that a DBRowGenerator is designed for
43  *
44  * @ingroup learning_database
45  */
46  enum class DBRowGeneratorGoal : char
47  {
48  // the generator's goal is only to remove all missing values
50 
51  // the generator does something else than just missing values
53  };
54 
55 
56  /** @class DBRowGenerator
57  * @headerfile DBRowGenerator.h <agrum/tools/database/DBRowGenerator.h>
58  * @ingroup learning_database
59  * @brief The base class for all DBRow generators
60  *
61  * A DBRowGenerator instance takes as input a DBRow containing
62  * DBTranslatedValue instances provided directly by a DatabaseTable or
63  * resulting from a DBRow generation by another DBRowGenerator. Then,
64  * it produces from 0 to several instances of DBRow of DBTranslatedValue.
65  * This is essentially useful to deal with missing values: during learning,
66  * when a DBRow contains some missing values, what should we do with it?
67  * Should we discard it? Should we use an EM algorithm to produce several
68  * DBRows weighted by their probability of occurrence? Should we use a
69  * K-means algorithm to produce only one DBRow of highest probability of
70  * occurrence? Using the appropriate DBRowGenerator, you can apply any
71  * of these rules when your learning algorithm parses the DatabaseTable.
72  * You just need to indicate which DBRowGenerator to use, no line of
73  * code needs be changed in your high-level learning algorithm.
74  *
75  * As an example of how a DBRowGenerator works, an "Identity" DBRowGenerator
76  * takes as input a DBRow and returns it without any further processing, so
77  * it "produces" only one output DBRow. An EM DBRowGenerator takes in input
78  * a DBRow in which some cells may be missing. In this case, it produces all
79  * the possible combinations of values that these missing values may take and
80  * it assigns to these combinations a weight proportional to their probability
81  * of occurrence according to a given model. As such, it may most often produce
82  * several output DBRows.
83  *
84  * The standard usage of a DBRowGenerator is the following:
85  * @code
86  * // create a DatabaseTable and fill it
87  * gum::learning::DBTranslatorSet<> set;
88  * for ( int i = 0; i < 10; ++i )
89  * set.insertTranslator(gum::learning::DBTranslator4LabelizedVariable<>(),i);
90  * gum::learning::DatabaseTable<> database ( set );
91  * // fill the database
92  *
93  * // keep in a vector the types of the columns in the database
94  * const std::vector<gum::learning::DBTranslatedValueType>
95  * column_types ( 10, gum::learning::DBTranslatedValueType::DISCRETE );
96  *
97  * // create the generator
98  * gum::learning::DBRowGeneratorIdentity<> generator ( col_types );
99  *
100  * // parse the database and produce output rows
101  * for ( auto dbrow : database ) {
102  * generator.setInputRow ( dbrow );
103  * while ( generator.hasRows () ) {
104  * const auto& output_dbrow = generator.generate ();
105  * // do something with the output dbrow
106  * }
107  * }
108  * @endcode
109  *
110  * All DBRowGenerator classes should derive from this class. It takes care
111  * of the interaction with the RecordCounter / Score classes. The user
112  * who wishes to create a new DBRowGenerator, say for instance, one that
113  * outputs k times the input row, just has to define the following class
114  * (not all the constructors/destructors are required, but we provide them
115  * for self-consistency), the important part of which is located from the
116  * "Accessors / Modifiers" section on:
117  * @code
118  * template <template<typename> class ALLOC = std::allocator>
119  * class DuplicateGenerator : public DBRowGenerator<ALLOC> {
120  * public:
121  *
122  * /// type for the allocators passed in arguments of methods
123  * using allocator_type = ALLOC<DBTranslatedValue>;
124  *
125  * // ######################################################################
126  * // Constructors / Destructors
127  * // ######################################################################
128  *
129  * /// default constructor
130  * DuplicateGenerator( const std::vector<DBTranslatedValueType,
131  * ALLOC<DBTranslatedValueType>> column_types,
132  * const std::size_t nb_duplicates,
133  * const allocator_type& alloc = allocator_type () )
134  * : DBRowGenerator<ALLOC> ( column_types, alloc )
135  * , _nb_duplicates_ ( nb_duplicates ) {}
136  *
137  * /// copy constructor with a given allocator
138  * DuplicateGenerator( const DuplicateGenerator<ALLOC>& from,
139  * const allocator_type& alloc )
140  * : DBRowGenerator<ALLOC>( from, alloc )
141  * , _input_row_( from. _input_row_ )
142  * , _nb_duplicates_ ( from. _nb_duplicates_ ) {}
143  *
144  * /// copy constructor
145  * DuplicateGenerator( const DuplicateGenerator<ALLOC>& from )
146  * : DuplicateGenerator<ALLOC> ( from, from.getAllocator () ) {}
147  *
148  * /// move constructor with a given allocator
149  * DuplicateGenerator( DuplicateGenerator<ALLOC>&& from,
150  * const allocator_type& alloc )
151  * : DBRowGenerator<ALLOC> ( std::move( from ), alloc )
152  * , _input_row_( from. _input_row_ )
153  * , _nb_duplicates_ ( from. _nb_duplicates_ ) {}
154  *
155  * /// move constructor
156  * DuplicateGenerator( DuplicateGenerator<ALLOC>&& from )
157  * : DuplicateGenerator<ALLOC> ( std::move(from), from.getAllocator() ) {}
158  *
159  * /// virtual copy constructor with a given allocator
160  * virtual DuplicateGenerator<ALLOC>*
161  * clone ( const allocator_type& alloc ) const {
162  * ALLOC<DuplicateGenerator<ALLOC>> allocator ( alloc );
163  * DuplicateGenerator<ALLOC>* generator = allocator.allocate(1);
164  * try { allocator.construct ( generator, *this, alloc ); }
165  * catch ( ... ) {
166  * allocator.deallocate ( generator, 1 );
167  * throw;
168  * }
169  * return generator;
170  * }
171  *
172  *
173  * /// virtual copy constructor
174  * virtual DuplicateGenerator<ALLOC>* clone () const {
175  * return clone ( this->getAllocator () );
176  * }
177  *
178  * /// destructor
179  * ~DuplicateGenerator() {}
180  *
181  *
182  * // ######################################################################
183  * // Operators
184  * // ######################################################################
185  *
186  * /// copy operator
187  * DuplicateGenerator<ALLOC>&
188  * operator=( const DuplicateGenerator<ALLOC>& from ) {
189  * DBRowGenerator<ALLOC>::operator=( from );
190  * _input_row_ = from. _input_row_;
191  * _nb_duplicates_ = from. _nb_duplicates_;
192  * return *this;
193  * }
194  *
195  * /// move operator
196  * DuplicateGenerator<ALLOC>& operator=( DuplicateGenerator<ALLOC>&& from ) {
197  * DBRowGenerator<ALLOC>::operator=( std::move( from ) );
198  * _input_row_ = from. _input_row_;
199  * _nb_duplicates_ = from. _nb_duplicates_;
200  * return *this;
201  * }
202  *
203  *
204  * // ######################################################################
205  * // Accessors / Modifiers
206  * // ######################################################################
207  *
208  * /// generates new lines from those the generator gets in input
209  * virtual const DBRow<DBTranslatedValue,ALLOC>& generate() final {
210  * this->decreaseRemainingRows();
211  * return * _input_row_;
212  * }
213  *
214  *
215  * protected:
216  *
217  * /// computes the rows it will provide in output
218  * virtual std::size_t
219  * computeRows_( const DBRow<DBTranslatedValue,ALLOC>& row ) final {
220  * _input_row_ = &row;
221  * return _nb_duplicates_;
222  * }
223  *
224  *
225  * private:
226  * /// the row used as input to generate the output DBRows
227  * const DBRow<DBTranslatedValue,ALLOC>* _input_row_ { nullptr };
228  *
229  * /// the number of times we return each input row
230  * std::size_t _nb_duplicates_ { std::size_t(1) };
231  * };
232  * @endcode
233  */
234  template < template < typename > class ALLOC = std::allocator >
236  public:
237  /// type for the allocators passed in arguments of methods
239 
240  // ##########################################################################
241  /// @name Constructors / Destructors
242  // ##########################################################################
243 
244  /// @{
245 
246  /// default constructor
247  /** @param column_types indicates for each column whether this is a
248  * continuous or a discrete one
249  * @param alloc the allocator used by all the methods */
252  const DBRowGeneratorGoal goal,
253  const allocator_type& alloc = allocator_type());
254 
255  /// copy constructor
256  DBRowGenerator(const DBRowGenerator< ALLOC >& from);
257 
258  /// copy constructor with a given allocator
259  DBRowGenerator(const DBRowGenerator< ALLOC >& from, const allocator_type& alloc);
260 
261  /// move constructor
262  DBRowGenerator(DBRowGenerator< ALLOC >&& from);
263 
264  /// move constructor with a given allocator
265  DBRowGenerator(DBRowGenerator< ALLOC >&& from, const allocator_type& alloc);
266 
267  /// virtual copy constructor
268  virtual DBRowGenerator< ALLOC >* clone() const = 0;
269 
270  /// virtual copy constructor with a given allocator
271  virtual DBRowGenerator< ALLOC >* clone(const allocator_type& alloc) const = 0;
272 
273  /// destructor
274  virtual ~DBRowGenerator();
275 
276  /// @}
277 
278 
279  // ##########################################################################
280  /// @name Accessors / Modifiers
281  // ##########################################################################
282 
283  /// @{
284 
285  /** @brief returns true if there are still rows that can be output by
286  * the DBRowGenerator */
287  bool hasRows();
288 
289  /// sets the input row from which the generator will create its output rows
290  /** @return a Boolean indicating whether, from this input DBRow, the
291  * DBRowGenerator is capable of outputing at least one row or not */
292  bool setInputRow(const DBRow< DBTranslatedValue, ALLOC >& row);
293 
294  /// generate new rows from the input row
295  virtual const DBRow< DBTranslatedValue, ALLOC >& generate() = 0;
296 
297  /// decrease the number of remaining output rows
298  /** When method setInputRow is performed, the DBRowGenerator knows how
299  * many output rows it will be able to generate. Each time method
300  * decreaseRemainingRows is called, we decrement this number. When the
301  * number becomes equal to 0, then there remains no new output row to
302  * generate. */
303  void decreaseRemainingRows();
304 
305  /// resets the generator. There are therefore no more ouput row to generate
306  virtual void reset();
307 
308  /** @brief sets the columns of interest: the output DBRow needs only
309  * contain correct values fot these columns
310  *
311  * This method is useful, e.g., for EM-like algorithms that need to know
312  * which unobserved variables/values need be filled. In this case, the
313  * DBRowGenerator still outputs DBRows with the same columns as the
314  * DatabaseTable, but only the columns of these DBRows corresponding to
315  * those passed in argument to Method setColumnsOfInterest are meaningful.
316  * For instance, if a DatabaseTable contains 10 columns and Method
317  * setColumnsOfInterest() is applied with vector<> { 0, 3, 4 }, then the
318  * DBRowGenerator will output DBRows with 10 columns, in which only
319  * columns 0, 3 and 4 are guaranteed to have correct values (columns are
320  * always indexed, starting from 0).
321  */
322  virtual void setColumnsOfInterest(
323  const std::vector< std::size_t, ALLOC< std::size_t > >& cols_of_interest);
324 
325  /** @brief sets the columns of interest: the output DBRow needs only
326  * contain correct values fot these columns
327  *
328  * This method is useful, e.g., for EM-like algorithms that need to know
329  * which unobserved variables/values need be filled. In this case, the
330  * DBRowGenerator still outputs DBRows with the same columns as the
331  * DatabaseTable, but only the columns of these DBRows corresponding to
332  * those passed in argument to Method setColumnsOfInterest are meaningful.
333  * For instance, if a DatabaseTable contains 10 columns and Method
334  * setColumnsOfInterest() is applied with vector<> { 0, 3, 4 }, then the
335  * DBRowGenerator will output DBRows with 10 columns, in which only
336  * columns 0, 3 and 4 are guaranteed to have correct values (columns are
337  * always indexed, starting from 0).
338  */
339  virtual void
340  setColumnsOfInterest(std::vector< std::size_t, ALLOC< std::size_t > >&& cols_of_interest);
341 
342  /// returns the current set of columns of interest
343  const std::vector< std::size_t, ALLOC< std::size_t > >& columnsOfInterest() const;
344 
345  /// returns the allocator used
347 
348  /// returns the goal of the DBRowGenerator
349  DBRowGeneratorGoal goal() const;
350 
351  /// @}
352 
353 
354  protected:
355  /// the number of output rows still to retrieve through the generate method
356  std::size_t nb_remaining_output_rows_{std::size_t(0)};
357 
358  /// the types of the columns in the DatabaseTable
359  /** This is useful to determine whether we need to use the .discr_val
360  * field or the .cont_val field in DBTranslatedValue instances. */
362 
363  /// the set of columns of interest
364  std::vector< std::size_t, ALLOC< std::size_t > > columns_of_interest_;
365 
366  /// the goal of the DBRowGenerator (just remove missing values or not)
368 
369 
370  /// copy constructor
372 
373  /// move constructor
375 
376  /** @brief the method that computes the set of DBRow instances to output
377  * after method setInputRow has been called */
378  virtual std::size_t computeRows_(const DBRow< DBTranslatedValue, ALLOC >& row) = 0;
379  };
380 
381  } /* namespace learning */
382 
383 } /* namespace gum */
384 
385 
386 // always include the template implementation
387 #include <agrum/tools/database/DBRowGenerator_tpl.h>
388 
389 #endif /* GUM_LEARNING_DBROW_GENERATOR_H */
bool setInputRow(const DBRow< DBTranslatedValue, ALLOC > &row)
sets the input row from which the generator will create its output rows
DBRowGeneratorGoal
the type of things that a DBRowGenerator is designed for
DBRowGenerator(const DBRowGenerator< ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
DBRowGenerator(const DBRowGenerator< ALLOC > &from)
copy constructor
DBRowGenerator< ALLOC > & operator=(const DBRowGenerator< ALLOC > &)
copy constructor
void decreaseRemainingRows()
decrease the number of remaining output rows
virtual ~DBRowGenerator()
destructor
virtual DBRowGenerator< ALLOC > * clone() const =0
virtual copy constructor
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
virtual void setColumnsOfInterest(const std::vector< std::size_t, ALLOC< std::size_t > > &cols_of_interest)
sets the columns of interest: the output DBRow needs only contain correct values fot these columns ...
DBRowGenerator(DBRowGenerator< ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator
virtual DBRowGenerator< ALLOC > * clone(const allocator_type &alloc) const =0
virtual copy constructor with a given allocator
virtual void setColumnsOfInterest(std::vector< std::size_t, ALLOC< std::size_t > > &&cols_of_interest)
sets the columns of interest: the output DBRow needs only contain correct values fot these columns ...
virtual void reset()
resets the generator. There are therefore no more ouput row to generate
DBRowGeneratorGoal goal() const
returns the goal of the DBRowGenerator
DBRowGenerator< ALLOC > & operator=(DBRowGenerator< ALLOC > &&)
move constructor
DBRowGenerator(DBRowGenerator< ALLOC > &&from)
move constructor
bool hasRows()
returns true if there are still rows that can be output by the DBRowGenerator
DBRowGeneratorGoal goal_
the goal of the DBRowGenerator (just remove missing values or not)
allocator_type getAllocator() const
returns the allocator used
std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > > column_types_
the types of the columns in the DatabaseTable
DBRowGenerator(const std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > > column_types, const DBRowGeneratorGoal goal, const allocator_type &alloc=allocator_type())
default constructor
const std::vector< std::size_t, ALLOC< std::size_t > > & columnsOfInterest() const
returns the current set of columns of interest
std::vector< std::size_t, ALLOC< std::size_t > > columns_of_interest_
the set of columns of interest
virtual std::size_t computeRows_(const DBRow< DBTranslatedValue, ALLOC > &row)=0
the method that computes the set of DBRow instances to output after method setInputRow has been calle...
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
std::size_t nb_remaining_output_rows_
the number of output rows still to retrieve through the generate method
virtual const DBRow< DBTranslatedValue, ALLOC > & generate()=0
generate new rows from the input row