aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
DBTranslator4ContinuousVariable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for continuous variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
29 # include <utility>
30 # include <vector>
31 # include <limits>
32 
33 # include <agrum/tools/database/DBTranslator4ContinuousVariable.h>
34 # include <agrum/tools/database/DBCell.h>
35 
36 namespace gum {
37 
38  namespace learning {
39 
40 
41  /// default constructor
42  template < template < typename > class ALLOC >
43  template < template < typename > class XALLOC >
44  DBTranslator4ContinuousVariable< ALLOC >::DBTranslator4ContinuousVariable(
45  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
46  const bool fit_range,
47  const typename DBTranslator4ContinuousVariable< ALLOC >::allocator_type& alloc) :
48  DBTranslator< ALLOC >(DBTranslatedValueType::CONTINUOUS,
49  missing_symbols,
50  fit_range,
51  1,
52  alloc),
53  _variable_("var", ""), _fit_range_(fit_range) {
54  // Here, if fit_range is set to false, and the range of the
55  // random variable will remain (-inf,+inf). So all the missing symbols
56  // that are numbers should be discarded since they lie in the domain
57  // of the variable. On the other hand, if fit_range is true, each newly
58  // observed value will update the range of the variable, so that, again,
59  // all the missing symbols that are numbers should be discarded since
60  // they always end up lying in the domain of the variable.
61  for (auto iter = this->missing_symbols_.beginSafe(); iter != this->missing_symbols_.endSafe();
62  ++iter) {
63  if (DBCell::isReal(*iter)) { this->missing_symbols_.erase(iter); }
64  }
65 
66  // the remaining symbols are not numbers. Take the first one as
67  // the default missing symbols for back translations.
68  if (!this->missing_symbols_.empty()) {
69  _nonfloat_missing_symbol_ = *(this->missing_symbols_.begin());
70  }
71 
72  // if fit_range is true, we shall be able to update the ranges of
73  // the continuous variable. To indicate that we did not encountered any
74  // value yet in the database, we fix the lower bound of _variable_ to +max
75  if (_fit_range_) _variable_.setLowerBound(std::numeric_limits< float >::infinity());
76 
77  // store a copy of the variable, that should be used by method variable ()
78  _real_variable_ = _variable_.clone();
79 
80  GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
81  }
82 
83 
84  /// default constructor
85  template < template < typename > class ALLOC >
87  const bool fit_range,
90  _variable_("var", ""), _fit_range_(fit_range) {
91  // if fit_range is true, we shall be able to update the ranges of
92  // the continuous variable. To indicate that we did not encountered any
93  // value yet in the database, we fix the lower bound of _variable_ to +max
95 
96  // store a copy of the variable, that should be used by method variable ()
98 
100  }
101 
102 
103  /// default constructor with a continuous variable as translator
104  template < template < typename > class ALLOC >
105  template < typename GUM_SCALAR, template < typename > class XALLOC >
108  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
109  const bool fit_range,
113  fit_range,
114  1,
115  alloc),
117  // get the bounds of the range variable
118  const float lower_bound = float(var.lowerBound());
119  const float upper_bound = float(var.upperBound());
122 
123  // remove all the missing symbols corresponding to a number between
124  // lower_bound and upper_bound
125  bool non_float_symbol_found = false;
126  for (auto iter = this->missing_symbols_.beginSafe(); iter != this->missing_symbols_.endSafe();
127  ++iter) {
128  if (DBCell::isReal(*iter)) {
129  const float missing_val = std::stof(*iter);
130  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
131  this->missing_symbols_.erase(iter);
132  } else
134  } else if (!non_float_symbol_found) {
135  non_float_symbol_found = true;
137  }
138  }
139 
140  // store a copy of the variable, that should be used by method variable ()
142 
144  }
145 
146 
147  /// default constructor with a continuous variable as translator
148  template < template < typename > class ALLOC >
149  template < typename GUM_SCALAR >
152  const bool fit_range,
156  // get the bounds of the range variable
157  const float lower_bound = float(var.lowerBound());
158  const float upper_bound = float(var.upperBound());
161 
162  // store a copy of the variable, that should be used by method variable ()
164 
166  }
167 
168 
169  /// default constructor with a IContinuous variable as translator
170  template < template < typename > class ALLOC >
171  template < template < typename > class XALLOC >
173  const IContinuousVariable& var,
174  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
175  const bool fit_range,
179  fit_range,
180  1,
181  alloc),
183  // get the bounds of the range variable
184  const float lower_bound = float(var.lowerBoundAsDouble());
185  const float upper_bound = float(var.upperBoundAsDouble());
188 
189  // remove all the missing symbols corresponding to a number between
190  // lower_bound and upper_bound
191  bool non_float_symbol_found = false;
192  for (auto iter = this->missing_symbols_.beginSafe(); iter != this->missing_symbols_.endSafe();
193  ++iter) {
194  if (DBCell::isReal(*iter)) {
195  const float missing_val = std::stof(*iter);
196  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
197  this->missing_symbols_.erase(iter);
198  } else
200  } else if (!non_float_symbol_found) {
201  non_float_symbol_found = true;
203  }
204  }
205 
206  // store a copy of the variable, that should be used by method variable ()
208 
210  }
211 
212 
213  /// default constructor with a IContinuous variable as translator
214  template < template < typename > class ALLOC >
216  const IContinuousVariable& var,
217  const bool fit_range,
221  // get the bounds of the range variable
222  const float lower_bound = var.lowerBoundAsDouble();
223  const float upper_bound = var.upperBoundAsDouble();
226 
227  // store a copy of the variable, that should be used by method variable ()
229 
231  }
232 
233 
234  /// copy constructor with a given allocator
235  template < template < typename > class ALLOC >
243  // store a copy of the variable, that should be used by method variable ()
245 
247  }
248 
249 
250  /// copy constructor
251  template < template < typename > class ALLOC >
255 
256 
257  /// move constructor with a given allocator
258  template < template < typename > class ALLOC >
267  // store a copy of the variable, that should be used by method variable ()
269  from._real_variable_ = nullptr;
270 
272  }
273 
274 
275  /// move constructor
276  template < template < typename > class ALLOC >
280 
281 
282  /// virtual copy constructor
283  template < template < typename > class ALLOC >
285  const typename DBTranslator4ContinuousVariable< ALLOC >::allocator_type& alloc) const {
288  try {
290  } catch (...) {
292  throw;
293  }
294  return translator;
295  }
296 
297 
298  /// virtual copy constructor
299  template < template < typename > class ALLOC >
302  return clone(this->getAllocator());
303  }
304 
305 
306  /// destructor
307  template < template < typename > class ALLOC >
309  if (_real_variable_ != nullptr) delete _real_variable_;
310 
312  }
313 
314 
315  /// copy operator
316  template < template < typename > class ALLOC >
319  if (this != &from) {
325 
326  if (_real_variable_ != nullptr) delete _real_variable_;
328  }
329 
330  return *this;
331  }
332 
333 
334  /// move operator
335  template < template < typename > class ALLOC >
338  if (this != &from) {
344 
345  if (_real_variable_ != nullptr) delete _real_variable_;
347  from._real_variable_ = nullptr;
348  }
349 
350  return *this;
351  }
352 
353 
354  /// returns the translation of a string, as found in the current dictionary
355  template < template < typename > class ALLOC >
357  // check if the string is actually a number
358  if (!DBCell::isReal(str)) {
359  if (this->isMissingSymbol(str)) {
360  return DBTranslatedValue{std::numeric_limits< float >::max()};
361  } else
363  "String \"" << str << "\" cannot be translated because it is not a number");
364  }
365 
366  // here we know that the string is a number
367  const float number = std::stof(str);
368 
369  // if we are in the range of the variable, return the number
371 
372  // check that this is not a missing value
373  if (this->isMissingSymbol(str)) {
375  return DBTranslatedValue{std::numeric_limits< float >::max()};
376  }
377 
378  // check if we are allowed to update the domain of the variable
379  if (!_fit_range_) {
381  "String \"" << str
382  << "\" cannot be translated because it is "
383  "out of the domain of the continuous variable");
384  }
385 
386  // now, we can try to add str as a new bound of the range variable
387  // if possible
388 
389  // if the variable is empty, set the min and max ranges. Here,
390  // there is no need to check whether the new range would contain an
391  // already translated missing symbol because this was already tested
392  // in the above test.
393  if (_variable_.lowerBound() == std::numeric_limits< float >::infinity()) {
396  return DBTranslatedValue{number};
397  }
398 
399  // here, the domain is not empty. So we should update either the
400  // lower bound or the upper bound of the variable, unless
401  // a missing symbol lies within the new bounds and we have already
402  // translated it.
403  const float lower_bound = _variable_.lowerBound();
404  const float upper_bound = _variable_.upperBound();
405  if (number < lower_bound) {
406  // check that there does not already exist a translated missing
407  // value within the new bounds of the variable
408  for (const auto& missing: _status_float_missing_symbols_) {
409  if (missing.second) {
410  const float miss_val = std::stof(missing.first);
411  if ((miss_val >= number) && (miss_val <= upper_bound)) {
413  "String \"" << str << "\" cannot be translated because "
414  << "it would induce a new domain containing an already "
415  << "translated missing symbol");
416  }
417  }
418  }
419 
420  // remove all the missing symbols that were not translated yet and
421  // that lie within the new bounds of the variable
424  ++iter) {
425  if (iter.val() == false) {
426  const float miss_val = std::stof(iter.key());
427  if ((miss_val >= number) && (miss_val <= upper_bound)) {
428  this->missing_symbols_.erase(iter.key());
430  }
431  }
432  }
433 
434  // update the domain of the continuous variable
436 
437  return DBTranslatedValue{number};
438  } else {
439  // check that there does not already exist a translated missing
440  // value within the new bounds of the variable
441  for (const auto& missing: _status_float_missing_symbols_) {
442  if (missing.second) {
443  const float miss_val = std::stof(missing.first);
444  if ((miss_val >= lower_bound) && (miss_val <= number)) {
446  "String \"" << str << "\" cannot be translated because "
447  << "it would induce a new domain containing an already "
448  << "translated missing symbol");
449  }
450  }
451  }
452 
453  // remove all the missing symbols that were not translated yet and
454  // that lie within the new bounds of the variable
457  ++iter) {
458  if (iter.val() == false) {
459  const float miss_val = std::stof(iter.key());
460  if ((miss_val >= lower_bound) && (miss_val <= number)) {
461  this->missing_symbols_.erase(iter.key());
463  }
464  }
465  }
466 
467  // update the domain of the continuous variable
469 
470  return DBTranslatedValue{number};
471  }
472  }
473 
474 
475  /// returns the original value for a given translation
476  template < template < typename > class ALLOC >
478  const DBTranslatedValue translated_val) const {
479  if (translated_val.cont_val == std::numeric_limits< float >::max()) {
481  if (this->missing_symbols_.empty()) return *(this->missing_symbols_.begin());
482  }
483 
487  "The back translation of "
489  << " could not be found because the value is outside the "
490  << "domain of the continuous variable");
491  }
492 
493  char buffer[100];
495  return std::string(buffer);
496  }
497 
498 
499  /// indicates whether the translations should be reordered
500  template < template < typename > class ALLOC >
502  return false;
503  }
504 
505 
506  /// returns a mapping to reorder the current dictionary and updates it
507  template < template < typename > class ALLOC >
510  return HashTable< std::size_t,
511  std::size_t,
512  ALLOC< std::pair< std::size_t, std::size_t > > >();
513  }
514 
515 
516  /// returns the domain size of a variable corresponding to the translations
517  template < template < typename > class ALLOC >
519  return std::numeric_limits< std::size_t >::max();
520  }
521 
522 
523  /// returns the variable stored into the translator
524  template < template < typename > class ALLOC >
528  return _real_variable_;
529  }
530 
531 
532  /// returns the translation of a missing value
533  template < template < typename > class ALLOC >
535  return DBTranslatedValue{std::numeric_limits< float >::max()};
536  }
537 
538 
539  } /* namespace learning */
540 
541 } /* namespace gum */
542 
543 
544 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)