aGrUM  0.21.0
a C++ library for (probabilistic) graphical models
DBTranslator4IntegerVariable.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for integer variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef GUM_LEARNING_DB_TRANSLATOR_4_INTEGER_VARIABLE_H
28 #define GUM_LEARNING_DB_TRANSLATOR_4_INTEGER_VARIABLE_H
29 
30 #include <agrum/agrum.h>
31 #include <agrum/tools/database/DBTranslator.h>
32 #include <agrum/tools/variables/integerVariable.h>
33 
34 
35 namespace gum {
36 
37  namespace learning {
38 
39 
40  /** @class DBTranslator4IntegerVariable
41  * @headerfile DBTranslator4IntegerVariable.h <agrum/tools/database/DBTranslator4IntegerVariable.h>
42  * @brief The databases' cell translators for integer variables
43  *
44  * Translators are used by DatabaseTable instances to transform datasets'
45  * strings into DBTranslatedValue instances. The point is that strings are
46  * not adequate for fast learning, they need to be preprocessed into a type
47  * that can be analyzed quickly (the so-called DBTranslatedValue type).
48  *
49  * A DBTranslator4IntegerVariable is a translator that contains and
50  * exploits a IntegerVariable for translations. Each time a string needs
51  * be translated, we ask the IntegerVariable which index contains the the number
52  * represented by the string. The DBTranslatedValue corresponding to the translation
53  * of the string contains in its discr_val field this number.
54  *
55  * @warning Translators for integer variables are not editable, that is,
56  * you must provide the const variable that will be used for translations.
57  * Enabling the editable mode would not make much sense because, during the
58  * translation, the DBTranslatedValue of an integer may change after translating
59  * another integer.
60  *
61  * @par Here is an example of how to use this class:
62  * @code
63  * // create the translator, with possible missing symbols: "N/A" and "???"
64  * // i.e., each time the translator reads a "N/A" or a "???" string, it
65  * // won't translate it into a number but into a missing value.
66  * std::vector<std::string> missing { "N/A", "???" };
67  * gum::IntegerVariable var ( "X1", "" );
68  * var.addValue( 1 );
69  * var.addValue( 3 );
70  * var.addValue( 10 );
71  * gum::learning::DBTranslator4IntegerVariable<> translator(var, missing);
72  *
73  * // gets the DBTranslatedValue corresponding to some strings
74  * auto val1 = translator.translate("3");
75  * auto val2 = translator << "1";
76  * // at this point, val1 and val2 are equal to
77  * // gum::learning::DBTranslatedValue { std::size_t(1) } and
78  * // gum::learning::DBTranslatedValue { std::size_t(0) } respectively
79  *
80  * // if the string contains a number outside the domain of the
81  * // IntegerVariable, then a gum::NotFound exception is raised:
82  * auto val3 = translator << "17"; // NotFound raised
83  *
84  * // add the numbers assigned to val1, val2
85  * std::size_t sum = val1.discr_val + val2.discr_val;
86  *
87  * // translate missing values: val4 and val5 will be equal to:
88  * // DBTranslatedValue { std::numeric_limits<std::size_t>::max () }
89  * auto val4 = translator << "N/A";
90  * auto val5 = translator.translate ( "???" );
91  *
92  * // the following instructions raise TypeError exceptions because the
93  * // strings are not integers
94  * auto val6 = translator << "422.5";
95  * auto val7 = translator.translate ( "xxx" );
96  *
97  * // given a DBTranslatedValue that is supposed to contain the index of
98  * // an integer, get the string representing this integer.
99  * std::string str;
100  * str = translator.translateBack ( val1 ); // str = "3"
101  * str = translator >> val2; // str = "1"
102  * str = translator >> gum::learning::DBTranslatedValue {std::size_t(1)};
103  * // str = "3"
104  *
105  * // translate back missing values: the string will corresponds to one of
106  * // the missing symbols known to the translator
107  * str = translator >> val4; // str = "N/A" or "???"
108  * str = translator >> val5; // str = "N/A" or "???"
109  *
110  * // get the variable stored within the translator
111  * const gum::IntegerVariable<float>* var =
112  * dynamic_cast<const gum::IntegerVariable*>(translator.variable());
113  * @endcode
114  *
115  * @ingroup learning_database
116  */
117  template < template < typename > class ALLOC = std::allocator >
119  public:
120  /// type for the allocators passed in arguments of methods
122 
123 
124  // ##########################################################################
125  /// @name Constructors / Destructors
126  // ##########################################################################
127 
128  /// @{
129 
130  /// default constructor with an integer variable as translator
131  /** @param var an integer variable which will be used for translations.
132  * The translator keeps a copy of this variable
133  * @param missing_symbols the set of symbols in the dataset
134  * representing missing values
135  * @param max_dico_entries the max number of entries that the dictionary
136  * can contain. During the construction, we check that the integer
137  * variable passed in argument has fewer values than
138  * the admissible dictionary size
139  * @param alloc The allocator used to allocate memory for all the
140  * fields of the DBTranslator4IntegerVariable */
141  template < template < typename > class XALLOC >
143  const IntegerVariable& var,
144  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
146  const allocator_type& alloc = allocator_type());
147 
148  /** @brief default constructor with an integer variable as translator
149  * but without missing symbols
150  *
151  * @param var an integer variable which will be used for translations.
152  * The translator keeps a copy of this variable
153  * @param max_dico_entries the max number of entries that the dictionary
154  * can contain. During the construction, we check that the integer
155  * variable passed in argument has a domain size not larger than
156  * the admissible dictionary size
157  * @param alloc The allocator used to allocate memory for all the
158  * fields of the DBTranslator4IntegerVariable */
159  DBTranslator4IntegerVariable(const IntegerVariable& var,
160  std::size_t max_dico_entries
161  = std::numeric_limits< std::size_t >::max(),
162  const allocator_type& alloc = allocator_type());
163 
164  /// copy constructor
165  DBTranslator4IntegerVariable(const DBTranslator4IntegerVariable< ALLOC >& from);
166 
167  /// copy constructor with a given allocator
168  DBTranslator4IntegerVariable(const DBTranslator4IntegerVariable< ALLOC >& from,
169  const allocator_type& alloc);
170 
171  /// move constructor
172  DBTranslator4IntegerVariable(DBTranslator4IntegerVariable< ALLOC >&& from);
173 
174  /// move constructor with a given allocator
175  DBTranslator4IntegerVariable(DBTranslator4IntegerVariable< ALLOC >&& from,
176  const allocator_type& alloc);
177 
178  /// virtual copy constructor
179  virtual DBTranslator4IntegerVariable< ALLOC >* clone() const;
180 
181  /// virtual copy constructor with a given allocator
182  virtual DBTranslator4IntegerVariable< ALLOC >* clone(const allocator_type& alloc) const;
183 
184  /// destructor
185  virtual ~DBTranslator4IntegerVariable();
186 
187  /// @}
188 
189 
190  // ##########################################################################
191  /// @name Operators
192  // ##########################################################################
193 
194  /// @{
195 
196  /// copy operator
199 
200  /// move operator
203 
204  /// @}
205 
206 
207  // ##########################################################################
208  /// @name Accessors / Modifiers
209  // ##########################################################################
210 
211  /// @{
212 
213  /// returns the translation of a string
214  /** This method tries to translate a given string into the
215  * DBTranslatedValue that should be stored into a databaseTable. If the
216  * translator cannot find the translation in its current dictionary, then
217  * the translator raises either a TypeError if the string is not a number
218  * or an UnknownLabelInDatabase exception.
219  *
220  * @warning Note that missing values (i.e., string encoded as missing
221  * symbols) are translated as std::numeric_limits<std::size_t>::max ().
222  * @warning If the variable contained into the translator has an integer
223  * that corresponds to a missing value symbol, the integer will be taken into
224  * account in the translation, not the missing symbol.
225  * @return the translated value of the string to be stored into a
226  * DatabaseTable
227  * @throws UnknownLabelInDatabase is raised if the translation cannot be
228  * found.
229  * @throws TypeError is raised if the translation cannot be found in
230  * the translator and the string does not correspond to a number. */
231  virtual DBTranslatedValue translate(const std::string& str) final;
232 
233  /// returns the original value for a given translation
234  /** @return the string that was translated into a given DBTranslatedValue.
235  * @throws UnknownLabelInDatabase is raised if this original value
236  * cannot be found */
237  virtual std::string translateBack(const DBTranslatedValue translated_val) const final;
238 
239  /// returns the domain size of the variable used for translations
240  /** @warning Note that missing values are encoded as
241  * std::numeric_limits<>::max () and are not taken into account in the
242  * domain sizes. */
243  virtual std::size_t domainSize() const final;
244 
245  /// indicates that the translator is never in editable dictionary mode
246  virtual bool hasEditableDictionary() const final;
247 
248  /// sets/unset the editable dictionary mode
249  virtual void setEditableDictionaryMode(bool new_mode) final;
250 
251  /// indicates that the translations should never be reordered
252  virtual bool needsReordering() const final;
253 
254  /** @brief returns an empty HashTable to indicate that no reordering
255  * is needed. */
256  virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > >
257  reorder() final;
258 
259  /// returns the variable stored into the translator
260  virtual const IntegerVariable* variable() const final;
261 
262  /// returns the translation of a missing value
263  virtual DBTranslatedValue missingValue() const final;
264 
265  /// @}
266 
267 
268 #ifndef DOXYGEN_SHOULD_SKIP_THIS
269 
270  private:
271  // the IntegerVariable used for translations
272  IntegerVariable _variable_;
273 
274 
275 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
276  };
277 
278 
279  } /* namespace learning */
280 
281 } /* namespace gum */
282 
283 
284 // always include the template implementation
285 #include <agrum/tools/database/DBTranslator4IntegerVariable_tpl.h>
286 
287 #endif /* GUM_LEARNING_DB_TRANSLATOR_4_INTEGER_VARIABLE_H */
virtual ~DBTranslator4IntegerVariable()
destructor
virtual const IntegerVariable * variable() const final
returns the variable stored into the translator
virtual std::string translateBack(const DBTranslatedValue translated_val) const final
returns the original value for a given translation
DBTranslator4IntegerVariable< ALLOC > & operator=(const DBTranslator4IntegerVariable< ALLOC > &from)
copy operator
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
DBTranslator4IntegerVariable(const DBTranslator4IntegerVariable< ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
virtual DBTranslator4IntegerVariable< ALLOC > * clone() const
virtual copy constructor
virtual DBTranslatedValue missingValue() const final
returns the translation of a missing value
virtual void setEditableDictionaryMode(bool new_mode) final
sets/unset the editable dictionary mode
DBTranslator4IntegerVariable(DBTranslator4IntegerVariable< ALLOC > &&from)
move constructor
DBTranslator4IntegerVariable< ALLOC > & operator=(DBTranslator4IntegerVariable< ALLOC > &&from)
move operator
virtual DBTranslatedValue translate(const std::string &str) final
returns the translation of a string
virtual bool hasEditableDictionary() const final
indicates that the translator is never in editable dictionary mode
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > reorder() final
returns an empty HashTable to indicate that no reordering is needed.
DBTranslator4IntegerVariable(const IntegerVariable &var, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor with an integer variable as translator but without missing symbols ...
virtual std::size_t domainSize() const final
returns the domain size of the variable used for translations
virtual DBTranslator4IntegerVariable< ALLOC > * clone(const allocator_type &alloc) const
virtual copy constructor with a given allocator
virtual bool needsReordering() const final
indicates that the translations should never be reordered
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
DBTranslator4IntegerVariable(const DBTranslator4IntegerVariable< ALLOC > &from)
copy constructor
DBTranslator4IntegerVariable(const IntegerVariable &var, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor with an integer variable as translator
DBTranslator4IntegerVariable(DBTranslator4IntegerVariable< ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator