aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
DBRowGeneratorParser.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (c) 2005-2020 by Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6) *
3  * info_at_agrum_dot_org *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
20 /** @file
21  * @brief The class for parsing DatabaseTable rows and generating output rows
22  *
23  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
24  */
25 #ifndef GUM_LEARNING_DB_ROW_GENERATOR_PARSER_H
26 #define GUM_LEARNING_DB_ROW_GENERATOR_PARSER_H
27 
28 #include <limits>
29 
30 #include <agrum/agrum.h>
31 #include <agrum/tools/database/DBHandler.h>
32 #include <agrum/tools/database/databaseTable.h>
33 #include <agrum/tools/database/DBRowGeneratorSet.h>
34 
35 namespace gum {
36 
37  namespace learning {
38 
39  /** @class DBRowGeneratorParser
40  * @headerfile DBRowGeneratorParser.h <agrum/tools/database/DBRowGeneratorParser.h>
41  * @ingroup learning_database
42  * @brief the class used to read a row in the database and to transform it
43  * into a set of DBRow instances that can be used for learning.
44  *
45  * A DBRowGeneratorParser contains a handler on a DatabaseTable that enables
46  * it to parse DBRows contained in the DatabaseTable. It also contains a
47  * DBRowGeneratorSet that is used to create output rows for each parsed
48  * DBRow. Note that if the DBRowGeneratorSet is empty, then
49  * DBRowGeneratorParser simply outputs each parsed DBRow without additional
50  * processing. To understand the difference between a DBRowGeneratorParser
51  * and a DBRowGeneratorSet, the latter is designed to take as input only
52  * one DBRow instance and to produce some output DBRow instances, whereas
53  * the former is designed to parse the content of a DatabaseTable and to
54  * produce from them some output DBRow instances.
55  *
56  * @par Usage example:
57  * @code
58  * // create and fill a database
59  * gum::learning::DatabaseTable<> database ( ... );
60  * .....
61  *
62  * // create a vector with the types of the columns of database
63  * const std::vector<gum::learning::DBTranslatedValueType>
64  * col_types ( 10, gum::learning::DBTranslatedValueType::DISCRETE );
65  *
66  * // create a generator set
67  * gum::learning::MyGenerator<> generator1 ( col_types, 6 );
68  * gum::learning::MyGenerator2<> generator2 ( col_types, 4 );
69  * gum::learning::DBRowGeneratorSet<> genset;
70  * genset.insertGenerator ( generator1 );
71  * genset.insertGenerator ( generator2 );
72  *
73  * // create the DBRowGeneratorParser
74  * gum::learning::DBRowGeneratorParser<>
75  * parser ( database.handler (), genset );
76  *
77  * // use the parser to parse all the database and to apply all the
78  * // transformations induced by generator1 and generator2
79  * while ( parser.hasRows () ) {
80  * const auto& dbrow = parser.row();
81  * // do something with dbrow
82  * }
83  * @endcode
84  */
85  template <template<typename> class ALLOC = std::allocator>
87  public:
88 
89  /// type for the allocators passed in arguments of methods
91 
92  // ##########################################################################
93  /// @name Constructors / Destructors
94  // ##########################################################################
95 
96  /// @{
97 
98  /// default constructor
99  DBRowGeneratorParser( const typename DatabaseTable<ALLOC>::Handler& handler,
101  const allocator_type& alloc = allocator_type () );
102 
103  /// copy constructor
104  DBRowGeneratorParser( const DBRowGeneratorParser<ALLOC>& from );
105 
106  /// copy constructor with a given allocator
107  DBRowGeneratorParser( const DBRowGeneratorParser<ALLOC>& from,
108  const allocator_type& alloc );
109 
110  /// move constructor
111  DBRowGeneratorParser(DBRowGeneratorParser<ALLOC>&& filter);
112 
113  /// move constructor with a given allocator
114  DBRowGeneratorParser(DBRowGeneratorParser<ALLOC>&& filter,
115  const allocator_type& alloc );
116 
117  /// virtual copy constructor
118  virtual DBRowGeneratorParser<ALLOC>* clone () const;
119 
120  /// virtual copy constructor with a given allocator
121  virtual DBRowGeneratorParser<ALLOC>*
122  clone (const allocator_type& alloc) const;
123 
124  /// destructor
125  virtual ~DBRowGeneratorParser();
126 
127  /// @}
128 
129  // ##########################################################################
130  /// @name Operators
131  // ##########################################################################
132 
133  /// @{
134 
135  /// copy operator
138 
139  /// move operator
142 
143  /// @}
144 
145  // ##########################################################################
146  /// @name Accessors / Modifiers
147  // ##########################################################################
148 
149  /// @{
150 
151  /** @brief returns true if there are still rows that can be output by the
152  * DBRowGeneratorParser
153  *
154  * The usual way of calling this method is to encapsulate it into a while
155  * loop whose stopping condition is when the handler has no more rows.
156  * This loop shall be inside a try-catch statement that enables to
157  * stop properly the loop when the NotFound exception is raised. In most
158  * practical cases, this exception will never be raised, but if you use
159  * a row generator that enables to return 0 row (say, for instance an
160  * intelligent EM that does not return any row when there are too many
161  * missing data) and if the last rows of the database are such that this
162  * generator will return no row, then the exception will be raised.
163  * Actually, it is not efficient to parse all the database to detect such
164  * a case before trying to return the rows, especially because this
165  * situation is very unlikely to occur. So a correct code to use method
166  * row () is like:
167  * @code
168  * try {
169  * while ( parser.hasRows () ) {
170  * const auto& row = parser.row ();
171  * do_whatever_you_want_with_the_row... ;
172  * }
173  * }
174  * catch ( NotFound& ) { // stop, there are no more rows to process }
175  * @endcode
176  */
177  bool hasRows();
178 
179  /// returns a new output row with its corresponding weight
180  /** The usual way of calling this method is to encapsulate it into a while
181  * loop whose stopping condition is when the handler has no more rows.
182  * This loop shall be inside a try-catch statement that enables to
183  * stop properly the loop when the NotFound exception is raised. In most
184  * practical cases, this exception will never be raised, but if you use
185  * a row generator that enables to return 0 row (say, for instance an
186  * intelligent EM that does not return any row when there are too many
187  * missing data) and if the last rows of the database are such that this
188  * generator will return no row, then the exception will be raised.
189  * Actually, it is not efficient to parse all the database to detect such
190  * a case before trying to return the rows, especially because this
191  * situation is very unlikely to occur. So a correct code to use method
192  * row () is like:
193  * @code
194  * try {
195  * while ( parser.hasRows () ) {
196  * const auto& row = parser.row ();
197  * do_whatever_you_want_with_the_row... ;
198  * }
199  * }
200  * catch ( NotFound& ) { // stop, there are no more rows to process }
201  * @endcode
202  */
203  const DBRow<DBTranslatedValue,ALLOC>& row ();
204 
205  /// resets the parser
206  void reset();
207 
208  /// returns the handler used by the parser
209  typename DatabaseTable<ALLOC>::Handler& handler();
210 
211  /// returns the handler used by the parser
212  const typename DatabaseTable<ALLOC>::Handler& handler() const;
213 
214  /// returns a reference on the database
215  const DatabaseTable<ALLOC>& database () const;
216 
217  /// returns the generator set that is actually used
219 
220  /// returns the generator set that is actually used
221  const DBRowGeneratorSet<ALLOC>& generatorSet() const;
222 
223  /// sets the area in the database the handler will handle
224  /** In addition to setting the area that will be parsed by the handler,
225  * this method makes the handler point to the beginning of the area.
226  * @param begin the first row to be handled
227  * @param end the handler handles rows in interval [begin,end). Thus,
228  * the endth row is not included in the set of rows handled.
229  * @warning if begin is greater than end, these values are swapped.
230  * @throw NullElement is raised if the handler does not point to
231  * any database
232  * @throw SizeError is raised if end is greater than the number of
233  * rows of the database */
234  void setRange(std::size_t begin, std::size_t end);
235 
236  /** @brief sets the columns of interest: the output DBRow needs only
237  * contain values fot these columns
238  *
239  * This method is useful, e.g., for EM-like algorithms that need to know
240  * which unobserved variables/values need be filled.
241  *
242  * @throw OperationNotAllowed is raised if the generator set has already
243  * started generating output rows and is currently in a state where the
244  * generation is not completed yet (i.e., we still need to call the
245  * generate() method to complete it). */
246  void setColumnsOfInterest (
247  const std::vector<std::size_t,ALLOC<std::size_t>>& cols_of_interest );
248 
249  /** @brief sets the columns of interest: the output DBRow needs only
250  * contain values fot these columns
251  *
252  * This method is useful, e.g., for EM-like algorithms that need to know
253  * which unobserved variables/values need be filled.
254  *
255  * @throw OperationNotAllowed is raised if the generator set has already
256  * started generating output rows and is currently in a state where the
257  * generation is not completed yet (i.e., we still need to call the
258  * generate() method to complete it). */
259  void setColumnsOfInterest (
260  std::vector<std::size_t,ALLOC<std::size_t>>&& cols_of_interest );
261 
262  /// assign a new Bayes net to all the generators that depend on a BN
263  /** Typically, generators based on EM or K-means depend on a model to
264  * compute correctly their outputs. Method setBayesNet enables to
265  * update their BN model.
266  * @warning if one generator that relies on Bayes nets cannot be assigned
267  * new_bn, then no generator is updated and an exception is raised. */
268  template < typename GUM_SCALAR >
269  void setBayesNet (const BayesNet<GUM_SCALAR>& new_bn);
270 
271  /// returns the allocator used
272  allocator_type getAllocator () const;
273 
274  /// @}
275 
276 
277  private:
278 
279  /// the handler that is really used to parse the database
281 
282  /// the set of DBRow generators (might be empty)
284 
285  /// the size of the generator set
286  std::size_t _generator_size_;
287 
288  };
289 
290  } /* namespace learning */
291 
292 } /* namespace gum */
293 
294 // always include the template implementation
295 #include <agrum/tools/database/DBRowGeneratorParser_tpl.h>
296 
297 #endif /* GUM_LEARNING_DB_ROW_GENERATOR_PARSER_H */
virtual DBRowGeneratorParser< ALLOC > * clone() const
virtual copy constructor
DBRowGeneratorSet< ALLOC > _generator_set_
the set of DBRow generators (might be empty)
void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn)
assign a new Bayes net to all the generators that depend on a BN
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
const DatabaseTable< ALLOC >::Handler & handler() const
returns the handler used by the parser
DBRowGeneratorParser(const DBRowGeneratorParser< ALLOC > &from)
copy constructor
DBRowGeneratorParser< ALLOC > & operator=(DBRowGeneratorParser< ALLOC > &&from)
move operator
DBRowGeneratorParser(const DBRowGeneratorParser< ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
std::size_t _generator_size_
the size of the generator set
DBRowGeneratorParser< ALLOC > & operator=(const DBRowGeneratorParser< ALLOC > &from)
copy operator
void setColumnsOfInterest(const std::vector< std::size_t, ALLOC< std::size_t >> &cols_of_interest)
sets the columns of interest: the output DBRow needs only contain values fot these columns ...
allocator_type getAllocator() const
returns the allocator used
virtual ~DBRowGeneratorParser()
destructor
const DBRowGeneratorSet< ALLOC > & generatorSet() const
returns the generator set that is actually used
const DBRow< DBTranslatedValue, ALLOC > & row()
returns a new output row with its corresponding weight
DBRowGeneratorParser(DBRowGeneratorParser< ALLOC > &&filter, const allocator_type &alloc)
move constructor with a given allocator
bool hasRows()
returns true if there are still rows that can be output by the DBRowGeneratorParser ...
virtual DBRowGeneratorParser< ALLOC > * clone(const allocator_type &alloc) const
virtual copy constructor with a given allocator
DBRowGeneratorSet< ALLOC > & generatorSet()
returns the generator set that is actually used
const DatabaseTable< ALLOC > & database() const
returns a reference on the database
void setColumnsOfInterest(std::vector< std::size_t, ALLOC< std::size_t >> &&cols_of_interest)
sets the columns of interest: the output DBRow needs only contain values fot these columns ...
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
DBRowGeneratorParser(DBRowGeneratorParser< ALLOC > &&filter)
move constructor
void reset()
resets the parser
void setRange(std::size_t begin, std::size_t end)
sets the area in the database the handler will handle
DBRowGeneratorParser(const typename DatabaseTable< ALLOC >::Handler &handler, const DBRowGeneratorSet< ALLOC > &generator_set, const allocator_type &alloc=allocator_type())
default constructor