aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
DBTranslator4ContinuousVariable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for continuous variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
29 # include <utility>
30 # include <vector>
31 # include <limits>
32 
33 # include <agrum/tools/database/DBTranslator4ContinuousVariable.h>
34 # include <agrum/tools/database/DBCell.h>
35 
36 namespace gum {
37 
38  namespace learning {
39 
40 
41  /// default constructor
42  template < template < typename > class ALLOC >
43  template < template < typename > class XALLOC >
44  DBTranslator4ContinuousVariable< ALLOC >::DBTranslator4ContinuousVariable(
45  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
46  const bool fit_range,
47  const typename DBTranslator4ContinuousVariable< ALLOC >::allocator_type&
48  alloc) :
49  DBTranslator< ALLOC >(DBTranslatedValueType::CONTINUOUS,
50  missing_symbols,
51  fit_range,
52  1,
53  alloc),
54  variable__("var", ""), fit_range__(fit_range) {
55  // Here, if fit_range is set to false, and the range of the
56  // random variable will remain (-inf,+inf). So all the missing symbols
57  // that are numbers should be discarded since they lie in the domain
58  // of the variable. On the other hand, if fit_range is true, each newly
59  // observed value will update the range of the variable, so that, again,
60  // all the missing symbols that are numbers should be discarded since
61  // they always end up lying in the domain of the variable.
62  for (auto iter = this->missing_symbols_.beginSafe();
63  iter != this->missing_symbols_.endSafe();
64  ++iter) {
65  if (DBCell::isReal(*iter)) { this->missing_symbols_.erase(iter); }
66  }
67 
68  // the remaining symbols are not numbers. Take the first one as
69  // the default missing symbols for back translations.
70  if (!this->missing_symbols_.empty()) {
71  nonfloat_missing_symbol__ = *(this->missing_symbols_.begin());
72  }
73 
74  // if fit_range is true, we shall be able to update the ranges of
75  // the continuous variable. To indicate that we did not encountered any
76  // value yet in the database, we fix the lower bound of variable__ to +max
77  if (fit_range__)
78  variable__.setLowerBound(std::numeric_limits< float >::infinity());
79 
80  // store a copy of the variable, that should be used by method variable ()
81  real_variable__ = variable__.clone();
82 
83  GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
84  }
85 
86 
87  /// default constructor
88  template < template < typename > class ALLOC >
90  const bool fit_range,
92  alloc) :
94  fit_range,
95  1,
96  alloc),
97  variable__("var", ""), fit_range__(fit_range) {
98  // if fit_range is true, we shall be able to update the ranges of
99  // the continuous variable. To indicate that we did not encountered any
100  // value yet in the database, we fix the lower bound of variable__ to +max
101  if (fit_range__)
103 
104  // store a copy of the variable, that should be used by method variable ()
106 
108  }
109 
110 
111  /// default constructor with a continuous variable as translator
112  template < template < typename > class ALLOC >
113  template < typename GUM_SCALAR, template < typename > class XALLOC >
116  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
117  const bool fit_range,
119  alloc) :
122  fit_range,
123  1,
124  alloc),
126  // get the bounds of the range variable
127  const float lower_bound = float(var.lowerBound());
128  const float upper_bound = float(var.upperBound());
131 
132  // remove all the missing symbols corresponding to a number between
133  // lower_bound and upper_bound
134  bool non_float_symbol_found = false;
135  for (auto iter = this->missing_symbols_.beginSafe();
136  iter != this->missing_symbols_.endSafe();
137  ++iter) {
138  if (DBCell::isReal(*iter)) {
139  const float missing_val = std::stof(*iter);
140  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
141  this->missing_symbols_.erase(iter);
142  } else
144  } else if (!non_float_symbol_found) {
145  non_float_symbol_found = true;
147  }
148  }
149 
150  // store a copy of the variable, that should be used by method variable ()
152 
154  }
155 
156 
157  /// default constructor with a continuous variable as translator
158  template < template < typename > class ALLOC >
159  template < typename GUM_SCALAR >
162  const bool fit_range,
164  alloc) :
166  fit_range,
167  1,
168  alloc),
170  // get the bounds of the range variable
171  const float lower_bound = float(var.lowerBound());
172  const float upper_bound = float(var.upperBound());
175 
176  // store a copy of the variable, that should be used by method variable ()
178 
180  }
181 
182 
183  /// default constructor with a IContinuous variable as translator
184  template < template < typename > class ALLOC >
185  template < template < typename > class XALLOC >
187  const IContinuousVariable& var,
188  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
189  const bool fit_range,
191  alloc) :
194  fit_range,
195  1,
196  alloc),
198  // get the bounds of the range variable
199  const float lower_bound = float(var.lowerBoundAsDouble());
200  const float upper_bound = float(var.upperBoundAsDouble());
203 
204  // remove all the missing symbols corresponding to a number between
205  // lower_bound and upper_bound
206  bool non_float_symbol_found = false;
207  for (auto iter = this->missing_symbols_.beginSafe();
208  iter != this->missing_symbols_.endSafe();
209  ++iter) {
210  if (DBCell::isReal(*iter)) {
211  const float missing_val = std::stof(*iter);
212  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
213  this->missing_symbols_.erase(iter);
214  } else
216  } else if (!non_float_symbol_found) {
217  non_float_symbol_found = true;
219  }
220  }
221 
222  // store a copy of the variable, that should be used by method variable ()
224 
226  }
227 
228 
229  /// default constructor with a IContinuous variable as translator
230  template < template < typename > class ALLOC >
232  const IContinuousVariable& var,
233  const bool fit_range,
235  alloc) :
237  fit_range,
238  1,
239  alloc),
241  // get the bounds of the range variable
242  const float lower_bound = var.lowerBoundAsDouble();
243  const float upper_bound = var.upperBoundAsDouble();
246 
247  // store a copy of the variable, that should be used by method variable ()
249 
251  }
252 
253 
254  /// copy constructor with a given allocator
255  template < template < typename > class ALLOC >
259  alloc) :
265  // store a copy of the variable, that should be used by method variable ()
267 
269  }
270 
271 
272  /// copy constructor
273  template < template < typename > class ALLOC >
277 
278 
279  /// move constructor with a given allocator
280  template < template < typename > class ALLOC >
284  alloc) :
291  // store a copy of the variable, that should be used by method variable ()
293  from.real_variable__ = nullptr;
294 
296  }
297 
298 
299  /// move constructor
300  template < template < typename > class ALLOC >
304  from.getAllocator()) {}
305 
306 
307  /// virtual copy constructor
308  template < template < typename > class ALLOC >
312  alloc) const {
315  try {
317  } catch (...) {
319  throw;
320  }
321  return translator;
322  }
323 
324 
325  /// virtual copy constructor
326  template < template < typename > class ALLOC >
329  return clone(this->getAllocator());
330  }
331 
332 
333  /// destructor
334  template < template < typename > class ALLOC >
337  if (real_variable__ != nullptr) delete real_variable__;
338 
340  }
341 
342 
343  /// copy operator
344  template < template < typename > class ALLOC >
348  if (this != &from) {
354 
355  if (real_variable__ != nullptr) delete real_variable__;
357  }
358 
359  return *this;
360  }
361 
362 
363  /// move operator
364  template < template < typename > class ALLOC >
368  if (this != &from) {
375 
376  if (real_variable__ != nullptr) delete real_variable__;
378  from.real_variable__ = nullptr;
379  }
380 
381  return *this;
382  }
383 
384 
385  /// returns the translation of a string, as found in the current dictionary
386  template < template < typename > class ALLOC >
388  const std::string& str) {
389  // check if the string is actually a number
390  if (!DBCell::isReal(str)) {
391  if (this->isMissingSymbol(str)) {
392  return DBTranslatedValue{std::numeric_limits< float >::max()};
393  } else
395  "String \""
396  << str
397  << "\" cannot be translated because it is not a number");
398  }
399 
400  // here we know that the string is a number
401  const float number = std::stof(str);
402 
403  // if we are in the range of the variable, return the number
405 
406  // check that this is not a missing value
407  if (this->isMissingSymbol(str)) {
410  }
411  return DBTranslatedValue{std::numeric_limits< float >::max()};
412  }
413 
414  // check if we are allowed to update the domain of the variable
415  if (!fit_range__) {
417  "String \"" << str
418  << "\" cannot be translated because it is "
419  "out of the domain of the continuous variable");
420  }
421 
422  // now, we can try to add str as a new bound of the range variable
423  // if possible
424 
425  // if the variable is empty, set the min and max ranges. Here,
426  // there is no need to check whether the new range would contain an
427  // already translated missing symbol because this was already tested
428  // in the above test.
429  if (variable__.lowerBound() == std::numeric_limits< float >::infinity()) {
432  return DBTranslatedValue{number};
433  }
434 
435  // here, the domain is not empty. So we should update either the
436  // lower bound or the upper bound of the variable, unless
437  // a missing symbol lies within the new bounds and we have already
438  // translated it.
439  const float lower_bound = variable__.lowerBound();
440  const float upper_bound = variable__.upperBound();
441  if (number < lower_bound) {
442  // check that there does not already exist a translated missing
443  // value within the new bounds of the variable
444  for (const auto& missing: status_float_missing_symbols__) {
445  if (missing.second) {
446  const float miss_val = std::stof(missing.first);
447  if ((miss_val >= number) && (miss_val <= upper_bound)) {
449  "String \""
450  << str << "\" cannot be translated because "
451  << "it would induce a new domain containing an already "
452  << "translated missing symbol");
453  }
454  }
455  }
456 
457  // remove all the missing symbols that were not translated yet and
458  // that lie within the new bounds of the variable
461  ++iter) {
462  if (iter.val() == false) {
463  const float miss_val = std::stof(iter.key());
464  if ((miss_val >= number) && (miss_val <= upper_bound)) {
465  this->missing_symbols_.erase(iter.key());
467  }
468  }
469  }
470 
471  // update the domain of the continuous variable
473 
474  return DBTranslatedValue{number};
475  } else {
476  // check that there does not already exist a translated missing
477  // value within the new bounds of the variable
478  for (const auto& missing: status_float_missing_symbols__) {
479  if (missing.second) {
480  const float miss_val = std::stof(missing.first);
481  if ((miss_val >= lower_bound) && (miss_val <= number)) {
483  "String \""
484  << str << "\" cannot be translated because "
485  << "it would induce a new domain containing an already "
486  << "translated missing symbol");
487  }
488  }
489  }
490 
491  // remove all the missing symbols that were not translated yet and
492  // that lie within the new bounds of the variable
495  ++iter) {
496  if (iter.val() == false) {
497  const float miss_val = std::stof(iter.key());
498  if ((miss_val >= lower_bound) && (miss_val <= number)) {
499  this->missing_symbols_.erase(iter.key());
501  }
502  }
503  }
504 
505  // update the domain of the continuous variable
507 
508  return DBTranslatedValue{number};
509  }
510  }
511 
512 
513  /// returns the original value for a given translation
514  template < template < typename > class ALLOC >
516  const DBTranslatedValue translated_val) const {
517  if (translated_val.cont_val == std::numeric_limits< float >::max()) {
519  if (this->missing_symbols_.empty())
520  return *(this->missing_symbols_.begin());
521  }
522 
526  "The back translation of "
528  << " could not be found because the value is outside the "
529  << "domain of the continuous variable");
530  }
531 
532  char buffer[100];
534  return std::string(buffer);
535  }
536 
537 
538  /// indicates whether the translations should be reordered
539  template < template < typename > class ALLOC >
541  return false;
542  }
543 
544 
545  /// returns a mapping to reorder the current dictionary and updates it
546  template < template < typename > class ALLOC >
548  std::size_t,
549  ALLOC< std::pair< std::size_t, std::size_t > > >
551  return HashTable< std::size_t,
552  std::size_t,
553  ALLOC< std::pair< std::size_t, std::size_t > > >();
554  }
555 
556 
557  /// returns the domain size of a variable corresponding to the translations
558  template < template < typename > class ALLOC >
559  INLINE std::size_t
561  return std::numeric_limits< std::size_t >::max();
562  }
563 
564 
565  /// returns the variable stored into the translator
566  template < template < typename > class ALLOC >
571  return real_variable__;
572  }
573 
574 
575  /// returns the translation of a missing value
576  template < template < typename > class ALLOC >
579  return DBTranslatedValue{std::numeric_limits< float >::max()};
580  }
581 
582 
583  } /* namespace learning */
584 
585 } /* namespace gum */
586 
587 
588 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)