aGrUM  0.21.0
a C++ library for (probabilistic) graphical models
DBTranslator4ContinuousVariable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for continuous variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
29 # include <utility>
30 # include <vector>
31 # include <limits>
32 
33 # include <agrum/tools/database/DBTranslator4ContinuousVariable.h>
34 # include <agrum/tools/database/DBCell.h>
35 
36 namespace gum {
37 
38  namespace learning {
39 
40 
41  /// default constructor
42  template < template < typename > class ALLOC >
43  template < template < typename > class XALLOC >
44  DBTranslator4ContinuousVariable< ALLOC >::DBTranslator4ContinuousVariable(
45  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
46  const bool fit_range,
47  const typename DBTranslator4ContinuousVariable< ALLOC >::allocator_type& alloc) :
48  DBTranslator< ALLOC >(DBTranslatedValueType::CONTINUOUS,
49  true,
50  missing_symbols,
51  fit_range,
52  1,
53  alloc),
54  _variable_("var", ""), _fit_range_(fit_range) {
55  // Here, if fit_range is set to false, and the range of the
56  // random variable will remain (-inf,+inf). So all the missing symbols
57  // that are numbers should be discarded since they lie in the domain
58  // of the variable. On the other hand, if fit_range is true, each newly
59  // observed value will update the range of the variable, so that, again,
60  // all the missing symbols that are numbers should be discarded since
61  // they always end up lying in the domain of the variable.
62  for (auto iter = this->missing_symbols_.beginSafe(); iter != this->missing_symbols_.endSafe();
63  ++iter) {
64  if (DBCell::isReal(*iter)) { this->missing_symbols_.erase(iter); }
65  }
66 
67  // the remaining symbols are not numbers. Take the first one as
68  // the default missing symbols for back translations.
69  if (!this->missing_symbols_.empty()) {
70  _nonfloat_missing_symbol_ = *(this->missing_symbols_.begin());
71  }
72 
73  // if fit_range is true, we shall be able to update the ranges of
74  // the continuous variable. To indicate that we did not encountered any
75  // value yet in the database, we fix the lower bound of _variable_ to +max
76  if (_fit_range_) _variable_.setLowerBound(std::numeric_limits< float >::infinity());
77 
78  // store a copy of the variable, that should be used by method variable ()
79  _real_variable_ = _variable_.clone();
80 
81  GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
82  }
83 
84 
85  /// default constructor
86  template < template < typename > class ALLOC >
88  const bool fit_range,
91  _variable_("var", ""), _fit_range_(fit_range) {
92  // if fit_range is true, we shall be able to update the ranges of
93  // the continuous variable. To indicate that we did not encountered any
94  // value yet in the database, we fix the lower bound of _variable_ to +max
96 
97  // store a copy of the variable, that should be used by method variable ()
99 
101  }
102 
103 
104  /// default constructor with a continuous variable as translator
105  template < template < typename > class ALLOC >
106  template < typename GUM_SCALAR, template < typename > class XALLOC >
109  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
110  const bool fit_range,
113  true,
115  fit_range,
116  1,
117  alloc),
119  // get the bounds of the range variable
120  const float lower_bound = float(var.lowerBound());
121  const float upper_bound = float(var.upperBound());
124 
125  // remove all the missing symbols corresponding to a number between
126  // lower_bound and upper_bound
127  bool non_float_symbol_found = false;
128  for (auto iter = this->missing_symbols_.beginSafe(); iter != this->missing_symbols_.endSafe();
129  ++iter) {
130  if (DBCell::isReal(*iter)) {
131  const float missing_val = std::stof(*iter);
132  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
133  this->missing_symbols_.erase(iter);
134  } else
136  } else if (!non_float_symbol_found) {
137  non_float_symbol_found = true;
139  }
140  }
141 
142  // store a copy of the variable, that should be used by method variable ()
144 
146  }
147 
148 
149  /// default constructor with a continuous variable as translator
150  template < template < typename > class ALLOC >
151  template < typename GUM_SCALAR >
154  const bool fit_range,
158  // get the bounds of the range variable
159  const float lower_bound = float(var.lowerBound());
160  const float upper_bound = float(var.upperBound());
163 
164  // store a copy of the variable, that should be used by method variable ()
166 
168  }
169 
170 
171  /// default constructor with a IContinuous variable as translator
172  template < template < typename > class ALLOC >
173  template < template < typename > class XALLOC >
175  const IContinuousVariable& var,
176  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
177  const bool fit_range,
180  true,
182  fit_range,
183  1,
184  alloc),
186  // get the bounds of the range variable
187  const float lower_bound = float(var.lowerBoundAsDouble());
188  const float upper_bound = float(var.upperBoundAsDouble());
191 
192  // remove all the missing symbols corresponding to a number between
193  // lower_bound and upper_bound
194  bool non_float_symbol_found = false;
195  for (auto iter = this->missing_symbols_.beginSafe(); iter != this->missing_symbols_.endSafe();
196  ++iter) {
197  if (DBCell::isReal(*iter)) {
198  const float missing_val = std::stof(*iter);
199  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
200  this->missing_symbols_.erase(iter);
201  } else
203  } else if (!non_float_symbol_found) {
204  non_float_symbol_found = true;
206  }
207  }
208 
209  // store a copy of the variable, that should be used by method variable ()
211 
213  }
214 
215 
216  /// default constructor with a IContinuous variable as translator
217  template < template < typename > class ALLOC >
219  const IContinuousVariable& var,
220  const bool fit_range,
224  // get the bounds of the range variable
225  const float lower_bound = var.lowerBoundAsDouble();
226  const float upper_bound = var.upperBoundAsDouble();
229 
230  // store a copy of the variable, that should be used by method variable ()
232 
234  }
235 
236 
237  /// copy constructor with a given allocator
238  template < template < typename > class ALLOC >
246  // store a copy of the variable, that should be used by method variable ()
248 
250  }
251 
252 
253  /// copy constructor
254  template < template < typename > class ALLOC >
258 
259 
260  /// move constructor with a given allocator
261  template < template < typename > class ALLOC >
270  // store a copy of the variable, that should be used by method variable ()
272  from._real_variable_ = nullptr;
273 
275  }
276 
277 
278  /// move constructor
279  template < template < typename > class ALLOC >
283 
284 
285  /// virtual copy constructor
286  template < template < typename > class ALLOC >
288  const typename DBTranslator4ContinuousVariable< ALLOC >::allocator_type& alloc) const {
291  try {
293  } catch (...) {
295  throw;
296  }
297  return translator;
298  }
299 
300 
301  /// virtual copy constructor
302  template < template < typename > class ALLOC >
305  return clone(this->getAllocator());
306  }
307 
308 
309  /// destructor
310  template < template < typename > class ALLOC >
312  if (_real_variable_ != nullptr) delete _real_variable_;
313 
315  }
316 
317 
318  /// copy operator
319  template < template < typename > class ALLOC >
322  if (this != &from) {
328 
329  if (_real_variable_ != nullptr) delete _real_variable_;
331  }
332 
333  return *this;
334  }
335 
336 
337  /// move operator
338  template < template < typename > class ALLOC >
341  if (this != &from) {
347 
348  if (_real_variable_ != nullptr) delete _real_variable_;
350  from._real_variable_ = nullptr;
351  }
352 
353  return *this;
354  }
355 
356 
357  /// returns the translation of a string, as found in the current dictionary
358  template < template < typename > class ALLOC >
360  // check if the string is actually a number
361  if (!DBCell::isReal(str)) {
362  if (this->isMissingSymbol(str)) {
363  return DBTranslatedValue{std::numeric_limits< float >::max()};
364  } else
366  "String \"" << str << "\" cannot be translated because it is not a number");
367  }
368 
369  // here we know that the string is a number
370  const float number = std::stof(str);
371 
372  // if we are in the range of the variable, return the number
374 
375  // check that this is not a missing value
376  if (this->isMissingSymbol(str)) {
378  return DBTranslatedValue{std::numeric_limits< float >::max()};
379  }
380 
381  // check if we are allowed to update the domain of the variable
382  if (!_fit_range_) {
384  "String \"" << str
385  << "\" cannot be translated because it is "
386  "out of the domain of the continuous variable");
387  }
388 
389  // now, we can try to add str as a new bound of the range variable
390  // if possible
391 
392  // if the variable is empty, set the min and max ranges. Here,
393  // there is no need to check whether the new range would contain an
394  // already translated missing symbol because this was already tested
395  // in the above test.
396  if (_variable_.lowerBound() == std::numeric_limits< float >::infinity()) {
399  return DBTranslatedValue{number};
400  }
401 
402  // here, the domain is not empty. So we should update either the
403  // lower bound or the upper bound of the variable, unless
404  // a missing symbol lies within the new bounds and we have already
405  // translated it.
406  const float lower_bound = _variable_.lowerBound();
407  const float upper_bound = _variable_.upperBound();
408  if (number < lower_bound) {
409  // check that there does not already exist a translated missing
410  // value within the new bounds of the variable
411  for (const auto& missing: _status_float_missing_symbols_) {
412  if (missing.second) {
413  const float miss_val = std::stof(missing.first);
414  if ((miss_val >= number) && (miss_val <= upper_bound)) {
416  "String \"" << str << "\" cannot be translated because "
417  << "it would induce a new domain containing an already "
418  << "translated missing symbol");
419  }
420  }
421  }
422 
423  // remove all the missing symbols that were not translated yet and
424  // that lie within the new bounds of the variable
427  ++iter) {
428  if (iter.val() == false) {
429  const float miss_val = std::stof(iter.key());
430  if ((miss_val >= number) && (miss_val <= upper_bound)) {
431  this->missing_symbols_.erase(iter.key());
433  }
434  }
435  }
436 
437  // update the domain of the continuous variable
439 
440  return DBTranslatedValue{number};
441  } else {
442  // check that there does not already exist a translated missing
443  // value within the new bounds of the variable
444  for (const auto& missing: _status_float_missing_symbols_) {
445  if (missing.second) {
446  const float miss_val = std::stof(missing.first);
447  if ((miss_val >= lower_bound) && (miss_val <= number)) {
449  "String \"" << str << "\" cannot be translated because "
450  << "it would induce a new domain containing an already "
451  << "translated missing symbol");
452  }
453  }
454  }
455 
456  // remove all the missing symbols that were not translated yet and
457  // that lie within the new bounds of the variable
460  ++iter) {
461  if (iter.val() == false) {
462  const float miss_val = std::stof(iter.key());
463  if ((miss_val >= lower_bound) && (miss_val <= number)) {
464  this->missing_symbols_.erase(iter.key());
466  }
467  }
468  }
469 
470  // update the domain of the continuous variable
472 
473  return DBTranslatedValue{number};
474  }
475  }
476 
477 
478  /// returns the original value for a given translation
479  template < template < typename > class ALLOC >
481  const DBTranslatedValue translated_val) const {
482  if (translated_val.cont_val == std::numeric_limits< float >::max()) {
484  if (this->missing_symbols_.empty()) return *(this->missing_symbols_.begin());
485  }
486 
490  "The back translation of "
492  << " could not be found because the value is outside the "
493  << "domain of the continuous variable");
494  }
495 
496  char buffer[100];
498  return std::string(buffer);
499  }
500 
501 
502  /// indicates whether the translations should be reordered
503  template < template < typename > class ALLOC >
505  return false;
506  }
507 
508 
509  /// returns a mapping to reorder the current dictionary and updates it
510  template < template < typename > class ALLOC >
513  return HashTable< std::size_t,
514  std::size_t,
515  ALLOC< std::pair< std::size_t, std::size_t > > >();
516  }
517 
518 
519  /// returns the domain size of a variable corresponding to the translations
520  template < template < typename > class ALLOC >
522  return std::numeric_limits< std::size_t >::max();
523  }
524 
525 
526  /// returns the variable stored into the translator
527  template < template < typename > class ALLOC >
531  return _real_variable_;
532  }
533 
534 
535  /// returns the translation of a missing value
536  template < template < typename > class ALLOC >
538  return DBTranslatedValue{std::numeric_limits< float >::max()};
539  }
540 
541 
542  } /* namespace learning */
543 
544 } /* namespace gum */
545 
546 
547 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)