aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
DBTranslator4ContinuousVariable.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for continuous variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef GUM_LEARNING_DB_TRANSLATOR_4_CONTINUOUS_VARIABLE_H
28 #define GUM_LEARNING_DB_TRANSLATOR_4_CONTINUOUS_VARIABLE_H
29 
30 #include <string>
31 
32 #include <agrum/agrum.h>
33 #include <agrum/tools/core/hashTable.h>
34 #include <agrum/tools/database/DBTranslator.h>
35 #include <agrum/tools/variables/continuousVariable.h>
36 
37 
38 namespace gum {
39 
40  namespace learning {
41 
42 
43  /** @class DBTranslator4ContinuousVariable
44  * @headerfile DBTranslator4ContinuousVariable.h <agrum/tools/database/DBTranslator4ContinuousVariable.h>
45  * @brief The databases' cell translators for continuous variables
46  *
47  * Translators are used by DatabaseTable instances to transform datasets'
48  * strings into DBTranslatedValue instances. The point is that strings are
49  * not adequate for fast learning, they need to be preprocessed into a type
50  * that can be analyzed quickly (the so-called DBTranslatedValue type).
51  *
52  * A DBTranslator4ContinuousVariable is a translator that contains and
53  * exploits a ContinuousVariable for translations. Each time a string needs
54  * be translated, we ask the ContinuousVariable whether it belongs to its
55  * domain (which is supposed to be of type [x_min,x_max]). If this is the
56  * case, then the DBTranslatedValue corresponding to the translation of the
57  * string contains the floating point number specified in the string.
58  *
59  * @par Here is an example of how to use this class:
60  * @code
61  * // create the translator, with possible missing symbols: "N/A" and "???"
62  * // i.e., each time the translator reads a "N/A" or a "???" string, it
63  * // won't translate it into a number but into a missing value.
64  * std::vector<std::string> missing { "N/A", "???" };
65  * gum::learning::DBTranslator4ContinuousVariable<> translator ( missing );
66  *
67  * // gets the DBTranslatedValue corresponding to some strings
68  * auto val1 = translator.translate("5"); // val1 = DBTranslatedValue {5.0f}
69  * auto val2 = translator.translate("4.2"); // val2 = DBTRanslatedValue {4.2f}
70  * auto val3 = translator << "3.4"; // val3 = DBTranslatedValue {3.4f}
71  *
72  * // add the numbers assigned to val1, val2, val3
73  * float sum = val1.cont_val + val2.cont_val + val3.cont_val;
74  *
75  * // translate missing values: val4 and val5 will be equal to:
76  * // DBTranslatedValue { std::numeric_limits<float>::max () }
77  * auto val4 = translator << "N/A";
78  * auto val5 = translator.translate ( "???" );
79  *
80  * // the following instructions raise TypeError exceptions because the
81  * // strings cannot be translated into real numbers
82  * auto val6 = translator << "4.22x";
83  * auto val7 = translator.translate ( "xxx" );
84  *
85  * // given a DBTranslatedValue that is supposed to contain a float, get
86  * // the corresponding string. The strings should be equivalent to those
87  * // indicated below (maybe they could contain more zeroes after the dot).
88  * std::string str;
89  * str = translator.translateBack ( val1 ); // str ~ "5.0"
90  * str = translator >> val2; // str ~ "4.2"
91  * str = translator >> gum::learning::DBTranslatedValue {7.2e3f};
92  * // str ~ "7.2 e3"
93  *
94  * // translate back missing values: the string will corresponds to one of
95  * // the missing symbols known to the translator
96  * str = translator >> val4; // str = "N/A" or "???"
97  * str = translator >> val5; // str = "N/A" or "???"
98  *
99  * // get the variable stored within the translator
100  * const gum::ContinuousVariable<float>* var =
101  * dynamic_cast<const gum::ContinuousVariable<float>*>
102  * ( translator.variable () );
103  *
104  * // it is possible to create a translator for an already known variable.
105  * // In this case, by default, the translator is not in editable mode, but
106  * // this behavior can be changed passing the right arguments to the
107  * // constructor of the translator, or using the setEditableDictionaryMode
108  * // method. Here, we create a continuous variable whose domain is [-2,10]
109  * gum::ContinuousVariable<float> var ( "X", "", -2, 10 );
110  * gum::learning::DBTranslator4ContinuousVariable<> translator2 (var,missing);
111  *
112  * float xval1 = translator2.translate ( "-1.4" ).cont_val; // xval1 = -1.4
113  * float xval2 = translator2.translate ( "7" ).cont_val; // xval2 = 7
114  * float xval3 = translator2.translate ( "N/A" ).cont_val;
115  * // here xval3 corresponds to a missing value, hence it is equal to
116  * // std::numeric_limits<float>::max ()
117  *
118  * // trying to translate a string which is outside the domain of var will
119  * // raise Exception NotFound
120  * translator2.translate ( "20" ); // NotFound
121  * @endcode
122  *
123  * @ingroup learning_database
124  */
125  template < template < typename > class ALLOC = std::allocator >
127  public:
128  /// type for the allocators passed in arguments of methods
130 
131 
132  // ##########################################################################
133  /// @name Constructors / Destructors
134  // ##########################################################################
135 
136  /// @{
137 
138  /// default constructor without any initial variable
139  /** When using this constructor, it is assumed implicitly that the
140  * continuous variable has a range from minus infinity to plus infinity.
141  * If the fit_range parameter is on, the range of the variable is updated
142  * so that it precisely fits the range of the observed values in the
143  * database.
144  * @param missing_symbols the set of symbols in the database
145  * representing missing values
146  * @param fit_range if true, the range of the variable is updated
147  * so that it precisely fits the range of the observed values in the
148  * database, else the range is kept to (-inf,inf)
149  * @param alloc The allocator used to allocate memory for all the
150  * fields of the DBTranslator4ContinuousVariable
151  */
152  template < template < typename > class XALLOC >
154  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
155  const bool fit_range = false,
156  const allocator_type& alloc = allocator_type());
157 
158  /// default constructor without any initial variable nor missing symbol
159  /** When using this constructor, it is assumed implicitly that the
160  * continuous variable has a range from minus infinity to plus infinity.
161  * If the fit_range parameter is on, the range of the variable is updated
162  * so that it precisely fits the range of the observed values in the
163  * database.
164  * @param fit_range if true, the range of the variable is updated
165  * so that it precisely fits the range of the observed values in the
166  * database, else the range is kept to (-inf,inf)
167  * @param alloc The allocator used to allocate memory for all the
168  * fields of the DBTranslator4ContinuousVariable
169  */
170  DBTranslator4ContinuousVariable(const bool fit_range = false,
171  const allocator_type& alloc = allocator_type());
172 
173  /// default constructor with a continuous variable as translator
174  /** @param var a continuous variable that will be used for
175  * translations. The translator keeps a copy of this variable
176  * @param missing_symbols the set of symbols in the database
177  * representing missing values
178  * @param fit_range if true, the range of the variable is updated
179  * so that it precisely fits the range of the observed values in the
180  * database, else the range is kept to (-inf,inf)
181  * @param alloc The allocator used to allocate memory for all the
182  * fields of the DBTranslator4ContinuousVariable
183  * @warning If a missing value symbol is a number included in the range
184  * of the continuous variable, it will be discarded. If the fit_range
185  * parameter is on, the range of the variable is updated so that it
186  * can contain the range of the observed values in the database. */
187  template < typename GUM_SCALAR, template < typename > class XALLOC >
189  const ContinuousVariable< GUM_SCALAR >& var,
190  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
191  const bool fit_range = false,
192  const allocator_type& alloc = allocator_type());
193 
194  /** @brief default constructor with a continuous variable as translator
195  * but without missing symbol
196  *
197  * @param var a continuous variable that will be used for
198  * translations. The translator keeps a copy of this variable
199  * @param fit_range if true, the range of the variable is updated
200  * so that it precisely fits the range of the observed values in the
201  * database, else the range is kept to (-inf,inf)
202  * @param alloc The allocator used to allocate memory for all the
203  * fields of the DBTranslator4ContinuousVariable
204  * @warning If a missing value symbol is a number included in the range
205  * of the continuous variable, it will be discarded. If the fit_range
206  * parameter is on, the range of the variable is updated so that it
207  * can contain the range of the observed values in the database. */
208  template < typename GUM_SCALAR >
209  DBTranslator4ContinuousVariable(const ContinuousVariable< GUM_SCALAR >& var,
210  const bool fit_range = false,
211  const allocator_type& alloc = allocator_type());
212 
213  /// default constructor with a IContinuous variable as translator
214  /** @param var a IContinuous variable that will be used for
215  * translations. The translator keeps a copy of this variable
216  * @param missing_symbols the set of symbols in the database
217  * representing missing values
218  * @param fit_range if true, the range of the variable is updated
219  * so that it precisely fits the range of the observed values in the
220  * database, else the range is kept to (-inf,inf)
221  * @param alloc The allocator used to allocate memory for all the
222  * fields of the DBTranslator4ContinuousVariable
223  * @warning If a missing value symbol is a number included in the range
224  * of the continuous variable, it will be discarded. If the fit_range
225  * parameter is on, the range of the variable is updated so that it
226  * can contain the range of the observed values in the database. */
227  template < template < typename > class XALLOC >
229  const IContinuousVariable& var,
230  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
231  const bool fit_range = false,
232  const allocator_type& alloc = allocator_type());
233 
234  /** @brief default constructor with a IContinuous variable as translator
235  * but without missing symbol
236  *
237  * @param var a IContinuous variable that will be used for
238  * translations. The translator keeps a copy of this variable
239  * @param fit_range if true, the range of the variable is updated
240  * so that it precisely fits the range of the observed values in the
241  * database, else the range is kept to (-inf,inf)
242  * @param alloc The allocator used to allocate memory for all the
243  * fields of the DBTranslator4ContinuousVariable
244  * @warning If a missing value symbol is a number included in the range
245  * of the continuous variable, it will be discarded. If the fit_range
246  * parameter is on, the range of the variable is updated so that it
247  * can contain the range of the observed values in the database. */
248  DBTranslator4ContinuousVariable(const IContinuousVariable& var,
249  const bool fit_range = false,
250  const allocator_type& alloc = allocator_type());
251 
252  /// copy constructor
253  DBTranslator4ContinuousVariable(const DBTranslator4ContinuousVariable< ALLOC >& from);
254 
255  /// copy constructor with a given allocator
256  DBTranslator4ContinuousVariable(const DBTranslator4ContinuousVariable< ALLOC >& from,
257  const allocator_type& alloc);
258 
259  /// move constructor
260  DBTranslator4ContinuousVariable(DBTranslator4ContinuousVariable< ALLOC >&& from);
261 
262  /// move constructor with a given allocator
263  DBTranslator4ContinuousVariable(DBTranslator4ContinuousVariable< ALLOC >&& from,
264  const allocator_type& alloc);
265 
266  /// virtual copy constructor
267  virtual DBTranslator4ContinuousVariable< ALLOC >* clone() const;
268 
269  /// virtual copy constructor with a given allocator
271 
272  /// destructor
274 
275  /// @}
276 
277 
278  // ##########################################################################
279  /// @name Operators
280  // ##########################################################################
281 
282  /// @{
283 
284  /// copy operator
287 
288  /// move operator
291 
292  /// @}
293 
294 
295  // ##########################################################################
296  /// @name Accessors / Modifiers
297  // ##########################################################################
298 
299  /// @{
300 
301  /// returns the translation of a string
302  /** This method tries to translate a given string into the
303  * DBTranslatedValue that should be stored into a DatabaseTable. If the
304  * translator cannot find the translation in its current dictionary, then
305  * two situations can obtain:
306  * -# if the translator is not in an editable dictionary mode, then the
307  * translator raises a NotFound exception.
308  * -# if the translator is in an editable dictionary mode, i.e., it is
309  * allowed to update its dictionary, then it tries to update the range
310  * of its dictionary to include the new value. Upon success, it returns
311  * the translated value, otherwise, it raises either:
312  * - a TypeError exception if the string cannot be converted into a
313  * value that can be inserted into the dictionary
314  * - an OperationNotAllowed exception if the translation would induce
315  * incoherent behavior (e.g., a DBTranslator4ContinuousVariable that
316  * contains a variable whose domain is [x,y] as well as a missing
317  * value symbol z \f$\in\f$ [x,y]).
318 
319  * @warning Note that missing values (i.e., string encoded as missing
320  * symbols) are translated as std::numeric_limits<float>::max ().
321  * @warning If the variable contained into the translator has a value in
322  * its domain equal to a missing value symbol, this value will be taken
323  * into account in the translations, not the missing value.
324  * @return the translated value of the string to be stored into a
325  * DatabaseTable
326  * @throws UnknownLabelInDatabase is raised if the number represented by
327  * the string is out of the range of the continuous variable and the
328  * translator is not in an editable dictionary mode.
329  * @throws OperationNotAllowed exception is raised if the translation
330  * cannot be found and the insertion of the string into the translator's
331  * dictionary fails because it would induce incoherent behavior (e.g.,
332  * a DBTranslator4ContinuousVariable that contains a variable whose domain
333  * is [x,y] as well as a missing value symbol z \f$\in\f$ [x,y]).
334  * @throws TypeError is raised if the translation cannot be found and
335  * the insertion of the string into the translator's dictionary fails
336  * due to str being impossible to be converted into an appropriate type. */
337  virtual DBTranslatedValue translate(const std::string& str) final;
338 
339  /// returns the original value for a given translation
340  /** @return the string that was translated into a given DBTranslatedValue.
341  * @throws UnknownLabelInDatabase is raised if this original value is
342  * outside the domain of the continuous variable stored within the
343  * translator */
344  virtual std::string translateBack(const DBTranslatedValue translated_val) const final;
345 
346  /// returns std::numeric_limits<std::size_t>::max ()
347  virtual std::size_t domainSize() const final;
348 
349  /// indicates that the translations should never be reordered
350  virtual bool needsReordering() const final;
351 
352  /** @brief returns an empty mapping, indicating that old tanslations
353  * are equal to the newly reordered ones. */
354  virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > >
355  reorder() final;
356 
357  /// returns the variable stored into the translator
358  virtual const IContinuousVariable* variable() const final;
359 
360  /// returns the translation of a missing value
361  virtual DBTranslatedValue missingValue() const final;
362 
363  /// @}
364 
365 
366 #ifndef DOXYGEN_SHOULD_SKIP_THIS
367 
368  private:
369  // the ContinuousVariable really used by the translator. As its values
370  // are floats, this speeds-up translations
371  ContinuousVariable< float > _variable_;
372 
373  // the ContinuousVariablee returned by method variable ()
374  // We must return a IContinuousVariable because the user may have
375  // saved into the translator a ContinuousVariable<X>, with X != float
376  IContinuousVariable* _real_variable_;
377 
378  // assign to each float missing symbol a Boolean indicating whether
379  // we already translated it or not. If we translated it, then we cannot
380  // change the range of the variable so that this range contains the symbol.
381  HashTable< std::string, bool, ALLOC< std::pair< float, bool > > >
382  _status_float_missing_symbols_;
383 
384  // a string containing a non real missing symbol
385  // (useful for back translations)
386  std::string _nonfloat_missing_symbol_;
387 
388  // indicates whether we should fit the range of the observed values
389  bool _fit_range_;
390 
391 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
392  };
393 
394  } /* namespace learning */
395 
396 } /* namespace gum */
397 
398 
399 // always include the template implementation
400 #include <agrum/tools/database/DBTranslator4ContinuousVariable_tpl.h>
401 
402 #endif /* GUM_LEARNING_DB_TRANSLATOR_4_CONTINUOUS_VARIABLE_H */
virtual DBTranslatedValue translate(const std::string &str) final
returns the translation of a string
DBTranslator4ContinuousVariable(const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor without any initial variable nor missing symbol
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
virtual DBTranslator4ContinuousVariable< ALLOC > * clone() const
virtual copy constructor
DBTranslator4ContinuousVariable(const DBTranslator4ContinuousVariable< ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
DBTranslator4ContinuousVariable(const DBTranslator4ContinuousVariable< ALLOC > &from)
copy constructor
DBTranslator4ContinuousVariable(const ContinuousVariable< GUM_SCALAR > &var, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor with a continuous variable as translator
DBTranslator4ContinuousVariable(const ContinuousVariable< GUM_SCALAR > &var, const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor with a continuous variable as translator but without missing symbol ...
DBTranslator4ContinuousVariable(const IContinuousVariable &var, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor with a IContinuous variable as translator
virtual DBTranslator4ContinuousVariable< ALLOC > * clone(const allocator_type &alloc) const
virtual copy constructor with a given allocator
DBTranslator4ContinuousVariable< ALLOC > & operator=(const DBTranslator4ContinuousVariable< ALLOC > &from)
copy operator
virtual std::size_t domainSize() const final
returns std::numeric_limits<std::size_t>::max ()
DBTranslator4ContinuousVariable(const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor without any initial variable
virtual bool needsReordering() const final
indicates that the translations should never be reordered
DBTranslator4ContinuousVariable(DBTranslator4ContinuousVariable< ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator
DBTranslator4ContinuousVariable(DBTranslator4ContinuousVariable< ALLOC > &&from)
move constructor
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > reorder() final
returns an empty mapping, indicating that old tanslations are equal to the newly reordered ones...
DBTranslator4ContinuousVariable< ALLOC > & operator=(DBTranslator4ContinuousVariable< ALLOC > &&from)
move operator
virtual std::string translateBack(const DBTranslatedValue translated_val) const final
returns the original value for a given translation
The databases&#39; cell translators for continuous variables.
DBTranslator4ContinuousVariable(const IContinuousVariable &var, const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor with a IContinuous variable as translator but without missing symbol ...
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
virtual const IContinuousVariable * variable() const final
returns the variable stored into the translator
virtual DBTranslatedValue missingValue() const final
returns the translation of a missing value