aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
DBTranslator4ContinuousVariable.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for continuous variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef GUM_LEARNING_DB_TRANSLATOR_4_CONTINUOUS_VARIABLE_H
28 #define GUM_LEARNING_DB_TRANSLATOR_4_CONTINUOUS_VARIABLE_H
29 
30 #include <string>
31 
32 #include <agrum/agrum.h>
33 #include <agrum/tools/core/hashTable.h>
34 #include <agrum/tools/database/DBTranslator.h>
35 #include <agrum/tools/variables/continuousVariable.h>
36 
37 
38 namespace gum {
39 
40  namespace learning {
41 
42 
43  /** @class DBTranslator4ContinuousVariable
44  * @headerfile DBTranslator4ContinuousVariable.h <agrum/tools/database/DBTranslator4ContinuousVariable.h>
45  * @brief The databases' cell translators for continuous variables
46  *
47  * Translators are used by DatabaseTable instances to transform datasets'
48  * strings into DBTranslatedValue instances. The point is that strings are
49  * not adequate for fast learning, they need to be preprocessed into a type
50  * that can be analyzed quickly (the so-called DBTranslatedValue type).
51  *
52  * A DBTranslator4ContinuousVariable is a translator that contains and
53  * exploits a ContinuousVariable for translations. Each time a string needs
54  * be translated, we ask the ContinuousVariable whether it belongs to its
55  * domain (which is supposed to be of type [x_min,x_max]). If this is the
56  * case, then the DBTranslatedValue corresponding to the translation of the
57  * string contains the floating point number specified in the string.
58  *
59  * @par Here is an example of how to use this class:
60  * @code
61  * // create the translator, with possible missing symbols: "N/A" and "???"
62  * // i.e., each time the translator reads a "N/A" or a "???" string, it
63  * // won't translate it into a number but into a missing value.
64  * std::vector<std::string> missing { "N/A", "???" };
65  * gum::learning::DBTranslator4ContinuousVariable<> translator ( missing );
66  *
67  * // gets the DBTranslatedValue corresponding to some strings
68  * auto val1 = translator.translate("5"); // val1 = DBTranslatedValue {5.0f}
69  * auto val2 = translator.translate("4.2"); // val2 = DBTRanslatedValue {4.2f}
70  * auto val3 = translator << "3.4"; // val3 = DBTranslatedValue {3.4f}
71  *
72  * // add the numbers assigned to val1, val2, val3
73  * float sum = val1.cont_val + val2.cont_val + val3.cont_val;
74  *
75  * // translate missing values: val4 and val5 will be equal to:
76  * // DBTranslatedValue { std::numeric_limits<float>::max () }
77  * auto val4 = translator << "N/A";
78  * auto val5 = translator.translate ( "???" );
79  *
80  * // the following instructions raise TypeError exceptions because the
81  * // strings cannot be translated into real numbers
82  * auto val6 = translator << "4.22x";
83  * auto val7 = translator.translate ( "xxx" );
84  *
85  * // given a DBTranslatedValue that is supposed to contain a float, get
86  * // the corresponding string. The strings should be equivalent to those
87  * // indicated below (maybe they could contain more zeroes after the dot).
88  * std::string str;
89  * str = translator.translateBack ( val1 ); // str ~ "5.0"
90  * str = translator >> val2; // str ~ "4.2"
91  * str = translator >> gum::learning::DBTranslatedValue {7.2e3f};
92  * // str ~ "7.2 e3"
93  *
94  * // translate back missing values: the string will corresponds to one of
95  * // the missing symbols known to the translator
96  * str = translator >> val4; // str = "N/A" or "???"
97  * str = translator >> val5; // str = "N/A" or "???"
98  *
99  * // get the variable stored within the translator
100  * const gum::ContinuousVariable<float>* var =
101  * dynamic_cast<const gum::ContinuousVariable<float>*>
102  * ( translator.variable () );
103  *
104  * // it is possible to create a translator for an already known variable.
105  * // In this case, by default, the translator is not in editable mode, but
106  * // this behavior can be changed passing the right arguments to the
107  * // constructor of the translator, or using the setEditableDictionaryMode
108  * // method. Here, we create a continuous variable whose domain is [-2,10]
109  * gum::ContinuousVariable<float> var ( "X", "", -2, 10 );
110  * gum::learning::DBTranslator4ContinuousVariable<> translator2 (var,missing);
111  *
112  * float xval1 = translator2.translate ( "-1.4" ).cont_val; // xval1 = -1.4
113  * float xval2 = translator2.translate ( "7" ).cont_val; // xval2 = 7
114  * float xval3 = translator2.translate ( "N/A" ).cont_val;
115  * // here xval3 corresponds to a missing value, hence it is equal to
116  * // std::numeric_limits<float>::max ()
117  *
118  * // trying to translate a string which is outside the domain of var will
119  * // raise Exception NotFound
120  * translator2.translate ( "20" ); // NotFound
121  * @endcode
122  *
123  * @ingroup learning_database
124  */
125  template < template < typename > class ALLOC = std::allocator >
127  public:
128  /// type for the allocators passed in arguments of methods
130 
131 
132  // ##########################################################################
133  /// @name Constructors / Destructors
134  // ##########################################################################
135 
136  /// @{
137 
138  /// default constructor without any initial variable
139  /** When using this constructor, it is assumed implicitly that the
140  * continuous variable has a range from minus infinity to plus infinity.
141  * If the fit_range parameter is on, the range of the variable is updated
142  * so that it precisely fits the range of the observed values in the
143  * database.
144  * @param missing_symbols the set of symbols in the database
145  * representing missing values
146  * @param fit_range if true, the range of the variable is updated
147  * so that it precisely fits the range of the observed values in the
148  * database, else the range is kept to (-inf,inf)
149  * @param alloc The allocator used to allocate memory for all the
150  * fields of the DBTranslator4ContinuousVariable
151  */
152  template < template < typename > class XALLOC >
154  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
155  const bool fit_range = false,
156  const allocator_type& alloc = allocator_type());
157 
158  /// default constructor without any initial variable nor missing symbol
159  /** When using this constructor, it is assumed implicitly that the
160  * continuous variable has a range from minus infinity to plus infinity.
161  * If the fit_range parameter is on, the range of the variable is updated
162  * so that it precisely fits the range of the observed values in the
163  * database.
164  * @param fit_range if true, the range of the variable is updated
165  * so that it precisely fits the range of the observed values in the
166  * database, else the range is kept to (-inf,inf)
167  * @param alloc The allocator used to allocate memory for all the
168  * fields of the DBTranslator4ContinuousVariable
169  */
170  DBTranslator4ContinuousVariable(const bool fit_range = false,
171  const allocator_type& alloc
172  = allocator_type());
173 
174  /// default constructor with a continuous variable as translator
175  /** @param var a continuous variable that will be used for
176  * translations. The translator keeps a copy of this variable
177  * @param missing_symbols the set of symbols in the database
178  * representing missing values
179  * @param fit_range if true, the range of the variable is updated
180  * so that it precisely fits the range of the observed values in the
181  * database, else the range is kept to (-inf,inf)
182  * @param alloc The allocator used to allocate memory for all the
183  * fields of the DBTranslator4ContinuousVariable
184  * @warning If a missing value symbol is a number included in the range
185  * of the continuous variable, it will be discarded. If the fit_range
186  * parameter is on, the range of the variable is updated so that it
187  * can contain the range of the observed values in the database. */
188  template < typename GUM_SCALAR, template < typename > class XALLOC >
190  const ContinuousVariable< GUM_SCALAR >& var,
191  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
192  const bool fit_range = false,
193  const allocator_type& alloc = allocator_type());
194 
195  /** @brief default constructor with a continuous variable as translator
196  * but without missing symbol
197  *
198  * @param var a continuous variable that will be used for
199  * translations. The translator keeps a copy of this variable
200  * @param fit_range if true, the range of the variable is updated
201  * so that it precisely fits the range of the observed values in the
202  * database, else the range is kept to (-inf,inf)
203  * @param alloc The allocator used to allocate memory for all the
204  * fields of the DBTranslator4ContinuousVariable
205  * @warning If a missing value symbol is a number included in the range
206  * of the continuous variable, it will be discarded. If the fit_range
207  * parameter is on, the range of the variable is updated so that it
208  * can contain the range of the observed values in the database. */
209  template < typename GUM_SCALAR >
210  DBTranslator4ContinuousVariable(const ContinuousVariable< GUM_SCALAR >& var,
211  const bool fit_range = false,
212  const allocator_type& alloc
213  = allocator_type());
214 
215  /// default constructor with a IContinuous variable as translator
216  /** @param var a IContinuous variable that will be used for
217  * translations. The translator keeps a copy of this variable
218  * @param missing_symbols the set of symbols in the database
219  * representing missing values
220  * @param fit_range if true, the range of the variable is updated
221  * so that it precisely fits the range of the observed values in the
222  * database, else the range is kept to (-inf,inf)
223  * @param alloc The allocator used to allocate memory for all the
224  * fields of the DBTranslator4ContinuousVariable
225  * @warning If a missing value symbol is a number included in the range
226  * of the continuous variable, it will be discarded. If the fit_range
227  * parameter is on, the range of the variable is updated so that it
228  * can contain the range of the observed values in the database. */
229  template < template < typename > class XALLOC >
231  const IContinuousVariable& var,
232  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
233  const bool fit_range = false,
234  const allocator_type& alloc = allocator_type());
235 
236  /** @brief default constructor with a IContinuous variable as translator
237  * but without missing symbol
238  *
239  * @param var a IContinuous variable that will be used for
240  * translations. The translator keeps a copy of this variable
241  * @param fit_range if true, the range of the variable is updated
242  * so that it precisely fits the range of the observed values in the
243  * database, else the range is kept to (-inf,inf)
244  * @param alloc The allocator used to allocate memory for all the
245  * fields of the DBTranslator4ContinuousVariable
246  * @warning If a missing value symbol is a number included in the range
247  * of the continuous variable, it will be discarded. If the fit_range
248  * parameter is on, the range of the variable is updated so that it
249  * can contain the range of the observed values in the database. */
250  DBTranslator4ContinuousVariable(const IContinuousVariable& var,
251  const bool fit_range = false,
252  const allocator_type& alloc
253  = allocator_type());
254 
255  /// copy constructor
257  const DBTranslator4ContinuousVariable< ALLOC >& from);
258 
259  /// copy constructor with a given allocator
261  const DBTranslator4ContinuousVariable< ALLOC >& from,
262  const allocator_type& alloc);
263 
264  /// move constructor
266  DBTranslator4ContinuousVariable< ALLOC >&& from);
267 
268  /// move constructor with a given allocator
270  DBTranslator4ContinuousVariable< ALLOC >&& from,
271  const allocator_type& alloc);
272 
273  /// virtual copy constructor
274  virtual DBTranslator4ContinuousVariable< ALLOC >* clone() const;
275 
276  /// virtual copy constructor with a given allocator
278  clone(const allocator_type& alloc) const;
279 
280  /// destructor
282 
283  /// @}
284 
285 
286  // ##########################################################################
287  /// @name Operators
288  // ##########################################################################
289 
290  /// @{
291 
292  /// copy operator
295 
296  /// move operator
299 
300  /// @}
301 
302 
303  // ##########################################################################
304  /// @name Accessors / Modifiers
305  // ##########################################################################
306 
307  /// @{
308 
309  /// returns the translation of a string
310  /** This method tries to translate a given string into the
311  * DBTranslatedValue that should be stored into a DatabaseTable. If the
312  * translator cannot find the translation in its current dictionary, then
313  * two situations can obtain:
314  * -# if the translator is not in an editable dictionary mode, then the
315  * translator raises a NotFound exception.
316  * -# if the translator is in an editable dictionary mode, i.e., it is
317  * allowed to update its dictionary, then it tries to update the range
318  * of its dictionary to include the new value. Upon success, it returns
319  * the translated value, otherwise, it raises either:
320  * - a TypeError exception if the string cannot be converted into a
321  * value that can be inserted into the dictionary
322  * - an OperationNotAllowed exception if the translation would induce
323  * incoherent behavior (e.g., a DBTranslator4ContinuousVariable that
324  * contains a variable whose domain is [x,y] as well as a missing
325  * value symbol z \f$\in\f$ [x,y]).
326 
327  * @warning Note that missing values (i.e., string encoded as missing
328  * symbols) are translated as std::numeric_limits<float>::max ().
329  * @warning If the variable contained into the translator has a value in
330  * its domain equal to a missing value symbol, this value will be taken
331  * into account in the translations, not the missing value.
332  * @return the translated value of the string to be stored into a
333  * DatabaseTable
334  * @throws UnknownLabelInDatabase is raised if the number represented by
335  * the string is out of the range of the continuous variable and the
336  * translator is not in an editable dictionary mode.
337  * @throws OperationNotAllowed exception is raised if the translation
338  * cannot be found and the insertion of the string into the translator's
339  * dictionary fails because it would induce incoherent behavior (e.g.,
340  * a DBTranslator4ContinuousVariable that contains a variable whose domain
341  * is [x,y] as well as a missing value symbol z \f$\in\f$ [x,y]).
342  * @throws TypeError is raised if the translation cannot be found and
343  * the insertion of the string into the translator's dictionary fails
344  * due to str being impossible to be converted into an appropriate type. */
345  virtual DBTranslatedValue translate(const std::string& str) final;
346 
347  /// returns the original value for a given translation
348  /** @return the string that was translated into a given DBTranslatedValue.
349  * @throws UnknownLabelInDatabase is raised if this original value is
350  * outside the domain of the continuous variable stored within the
351  * translator */
352  virtual std::string
353  translateBack(const DBTranslatedValue translated_val) const final;
354 
355  /// returns std::numeric_limits<std::size_t>::max ()
356  virtual std::size_t domainSize() const final;
357 
358  /// indicates that the translations should never be reordered
359  virtual bool needsReordering() const final;
360 
361  /** @brief returns an empty mapping, indicating that old tanslations
362  * are equal to the newly reordered ones. */
363  virtual HashTable< std::size_t,
364  std::size_t,
365  ALLOC< std::pair< std::size_t, std::size_t > > >
366  reorder() final;
367 
368  /// returns the variable stored into the translator
369  virtual const IContinuousVariable* variable() const final;
370 
371  /// returns the translation of a missing value
372  virtual DBTranslatedValue missingValue() const final;
373 
374  /// @}
375 
376 
377 #ifndef DOXYGEN_SHOULD_SKIP_THIS
378 
379  private:
380  // the ContinuousVariable really used by the translator. As its values
381  // are floats, this speeds-up translations
382  ContinuousVariable< float > variable__;
383 
384  // the ContinuousVariablee returned by method variable ()
385  // We must return a IContinuousVariable because the user may have
386  // saved into the translator a ContinuousVariable<X>, with X != float
387  IContinuousVariable* real_variable__;
388 
389  // assign to each float missing symbol a Boolean indicating whether
390  // we already translated it or not. If we translated it, then we cannot
391  // change the range of the variable so that this range contains the symbol.
392  HashTable< std::string, bool, ALLOC< std::pair< float, bool > > >
393  status_float_missing_symbols__;
394 
395  // a string containing a non real missing symbol
396  // (useful for back translations)
397  std::string nonfloat_missing_symbol__;
398 
399  // indicates whether we should fit the range of the observed values
400  bool fit_range__;
401 
402 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
403  };
404 
405  } /* namespace learning */
406 
407 } /* namespace gum */
408 
409 
410 // always include the template implementation
411 #include <agrum/tools/database/DBTranslator4ContinuousVariable_tpl.h>
412 
413 #endif /* GUM_LEARNING_DB_TRANSLATOR_4_CONTINUOUS_VARIABLE_H */
virtual DBTranslatedValue translate(const std::string &str) final
returns the translation of a string
DBTranslator4ContinuousVariable(const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor without any initial variable nor missing symbol
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
virtual DBTranslator4ContinuousVariable< ALLOC > * clone() const
virtual copy constructor
DBTranslator4ContinuousVariable(const DBTranslator4ContinuousVariable< ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
DBTranslator4ContinuousVariable(const DBTranslator4ContinuousVariable< ALLOC > &from)
copy constructor
DBTranslator4ContinuousVariable(const ContinuousVariable< GUM_SCALAR > &var, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor with a continuous variable as translator
DBTranslator4ContinuousVariable(const ContinuousVariable< GUM_SCALAR > &var, const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor with a continuous variable as translator but without missing symbol ...
DBTranslator4ContinuousVariable(const IContinuousVariable &var, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor with a IContinuous variable as translator
virtual DBTranslator4ContinuousVariable< ALLOC > * clone(const allocator_type &alloc) const
virtual copy constructor with a given allocator
DBTranslator4ContinuousVariable< ALLOC > & operator=(const DBTranslator4ContinuousVariable< ALLOC > &from)
copy operator
virtual std::size_t domainSize() const final
returns std::numeric_limits<std::size_t>::max ()
DBTranslator4ContinuousVariable(const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor without any initial variable
virtual bool needsReordering() const final
indicates that the translations should never be reordered
DBTranslator4ContinuousVariable(DBTranslator4ContinuousVariable< ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator
DBTranslator4ContinuousVariable(DBTranslator4ContinuousVariable< ALLOC > &&from)
move constructor
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > reorder() final
returns an empty mapping, indicating that old tanslations are equal to the newly reordered ones...
DBTranslator4ContinuousVariable< ALLOC > & operator=(DBTranslator4ContinuousVariable< ALLOC > &&from)
move operator
virtual std::string translateBack(const DBTranslatedValue translated_val) const final
returns the original value for a given translation
The databases&#39; cell translators for continuous variables.
DBTranslator4ContinuousVariable(const IContinuousVariable &var, const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor with a IContinuous variable as translator but without missing symbol ...
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
virtual const IContinuousVariable * variable() const final
returns the variable stored into the translator
virtual DBTranslatedValue missingValue() const final
returns the translation of a missing value