aGrUM  0.14.2
DBTranslator4ContinuousVariable_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 
27 # include <utility>
28 # include <vector>
29 # include <limits>
30 
33 
34 namespace gum {
35 
36  namespace learning {
37 
38 
40  template < template < typename > class ALLOC >
41  template < template < typename > class XALLOC >
43  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
44  const bool fit_range,
46  alloc) :
47  DBTranslator< ALLOC >(DBTranslatedValueType::CONTINUOUS,
48  missing_symbols,
49  fit_range,
50  1,
51  alloc),
52  __variable("var", ""), __fit_range(fit_range) {
53  // Here, if fit_range is set to false, and the range of the
54  // random variable will remain (-inf,+inf). So all the missing symbols
55  // that are numbers should be discarded since they lie in the domain
56  // of the variable. On the other hand, if fit_range is true, each newly
57  // observed value will update the range of the variable, so that, again,
58  // all the missing symbols that are numbers should be discarded since
59  // they always end up lying in the domain of the variable.
60  for (auto iter = this->_missing_symbols.beginSafe();
61  iter != this->_missing_symbols.endSafe();
62  ++iter) {
63  if (DBCell::isReal(*iter)) { this->_missing_symbols.erase(iter); }
64  }
65 
66  // the remaining symbols are not numbers. Take the first one as
67  // the default missing symbols for back translations.
68  if (!this->_missing_symbols.empty()) {
69  __nonfloat_missing_symbol = *(this->_missing_symbols.begin());
70  }
71 
72  // if fit_range is true, we shall be able to update the ranges of
73  // the continuous variable. To indicate that we did not encountered any
74  // value yet in the database, we fix the lower bound of __variable to +max
75  if (__fit_range)
76  __variable.setLowerBound(std::numeric_limits< float >::infinity());
77 
78  // store a copy of the variable, that should be used by method variable ()
79  __real_variable = __variable.clone();
80 
81  GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
82  }
83 
84 
86  template < template < typename > class ALLOC >
88  const bool fit_range,
90  alloc) :
91  DBTranslator< ALLOC >(
92  DBTranslatedValueType::CONTINUOUS, fit_range, 1, alloc),
93  __variable("var", ""), __fit_range(fit_range) {
94  // if fit_range is true, we shall be able to update the ranges of
95  // the continuous variable. To indicate that we did not encountered any
96  // value yet in the database, we fix the lower bound of __variable to +max
97  if (__fit_range)
98  __variable.setLowerBound(std::numeric_limits< float >::infinity());
99 
100  // store a copy of the variable, that should be used by method variable ()
101  __real_variable = __variable.clone();
102 
103  GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
104  }
105 
106 
108  template < template < typename > class ALLOC >
109  template < typename GUM_SCALAR, template < typename > class XALLOC >
111  const ContinuousVariable< GUM_SCALAR >& var,
112  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
113  const bool fit_range,
115  alloc) :
117  missing_symbols,
118  fit_range,
119  1,
120  alloc),
121  __variable(var.name(), var.description()), __fit_range(fit_range) {
122  // get the bounds of the range variable
123  const float lower_bound = float(var.lowerBound());
124  const float upper_bound = float(var.upperBound());
125  __variable.setLowerBound(lower_bound);
126  __variable.setUpperBound(upper_bound);
127 
128  // remove all the missing symbols corresponding to a number between
129  // lower_bound and upper_bound
130  bool non_float_symbol_found = false;
131  for (auto iter = this->_missing_symbols.beginSafe();
132  iter != this->_missing_symbols.endSafe();
133  ++iter) {
134  if (DBCell::isReal(*iter)) {
135  const float missing_val = std::stof(*iter);
136  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
137  this->_missing_symbols.erase(iter);
138  } else
139  __status_float_missing_symbols.insert(*iter, false);
140  } else if (!non_float_symbol_found) {
141  non_float_symbol_found = true;
142  __nonfloat_missing_symbol = *iter;
143  }
144  }
145 
146  // store a copy of the variable, that should be used by method variable ()
147  __real_variable = var.clone();
148 
149  GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
150  }
151 
152 
154  template < template < typename > class ALLOC >
155  template < typename GUM_SCALAR >
157  const ContinuousVariable< GUM_SCALAR >& var,
158  const bool fit_range,
160  alloc) :
161  DBTranslator< ALLOC >(
162  DBTranslatedValueType::CONTINUOUS, fit_range, 1, alloc),
163  __variable(var.name(), var.description()), __fit_range(fit_range) {
164  // get the bounds of the range variable
165  const float lower_bound = float(var.lowerBound());
166  const float upper_bound = float(var.upperBound());
167  __variable.setLowerBound(lower_bound);
168  __variable.setUpperBound(upper_bound);
169 
170  // store a copy of the variable, that should be used by method variable ()
171  __real_variable = var.clone();
172 
173  GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
174  }
175 
176 
178  template < template < typename > class ALLOC >
179  template < template < typename > class XALLOC >
181  const IContinuousVariable& var,
182  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
183  const bool fit_range,
185  alloc) :
187  missing_symbols,
188  fit_range,
189  1,
190  alloc),
191  __variable(var.name(), var.description()), __fit_range(fit_range) {
192  // get the bounds of the range variable
193  const float lower_bound = float(var.lowerBoundAsDouble());
194  const float upper_bound = float(var.upperBoundAsDouble());
195  __variable.setLowerBound(lower_bound);
196  __variable.setUpperBound(upper_bound);
197 
198  // remove all the missing symbols corresponding to a number between
199  // lower_bound and upper_bound
200  bool non_float_symbol_found = false;
201  for (auto iter = this->_missing_symbols.beginSafe();
202  iter != this->_missing_symbols.endSafe();
203  ++iter) {
204  if (DBCell::isReal(*iter)) {
205  const float missing_val = std::stof(*iter);
206  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
207  this->_missing_symbols.erase(iter);
208  } else
209  __status_float_missing_symbols.insert(*iter, false);
210  } else if (!non_float_symbol_found) {
211  non_float_symbol_found = true;
212  __nonfloat_missing_symbol = *iter;
213  }
214  }
215 
216  // store a copy of the variable, that should be used by method variable ()
217  __real_variable = var.clone();
218 
219  GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
220  }
221 
222 
224  template < template < typename > class ALLOC >
226  const IContinuousVariable& var,
227  const bool fit_range,
229  alloc) :
230  DBTranslator< ALLOC >(
231  DBTranslatedValueType::CONTINUOUS, fit_range, 1, alloc),
232  __variable(var.name(), var.description()), __fit_range(fit_range) {
233  // get the bounds of the range variable
234  const float lower_bound = var.lowerBoundAsDouble();
235  const float upper_bound = var.upperBoundAsDouble();
236  __variable.setLowerBound(lower_bound);
237  __variable.setUpperBound(upper_bound);
238 
239  // store a copy of the variable, that should be used by method variable ()
240  __real_variable = var.clone();
241 
242  GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
243  }
244 
245 
247  template < template < typename > class ALLOC >
249  const DBTranslator4ContinuousVariable< ALLOC >& from,
251  alloc) :
252  DBTranslator< ALLOC >(from, alloc),
253  __variable(from.__variable),
254  __status_float_missing_symbols(from.__status_float_missing_symbols),
255  __nonfloat_missing_symbol(from.__nonfloat_missing_symbol),
256  __fit_range(from.__fit_range) {
257  // store a copy of the variable, that should be used by method variable ()
258  __real_variable = from.__real_variable->clone();
259 
260  GUM_CONS_CPY(DBTranslator4ContinuousVariable);
261  }
262 
263 
265  template < template < typename > class ALLOC >
267  const DBTranslator4ContinuousVariable< ALLOC >& from) :
268  DBTranslator4ContinuousVariable< ALLOC >(from, from.getAllocator()) {}
269 
270 
272  template < template < typename > class ALLOC >
274  DBTranslator4ContinuousVariable< ALLOC >&& from,
276  alloc) :
277  DBTranslator< ALLOC >(std::move(from), alloc),
278  __variable(std::move(from.__variable)),
279  __status_float_missing_symbols(
280  std::move(from.__status_float_missing_symbols)),
281  __nonfloat_missing_symbol(std::move(from.__nonfloat_missing_symbol)),
282  __fit_range(from.__fit_range) {
283  // store a copy of the variable, that should be used by method variable ()
284  __real_variable = from.__real_variable;
285  from.__real_variable = nullptr;
286 
287  GUM_CONS_MOV(DBTranslator4ContinuousVariable);
288  }
289 
290 
292  template < template < typename > class ALLOC >
294  DBTranslator4ContinuousVariable< ALLOC >&& from) :
295  DBTranslator4ContinuousVariable< ALLOC >(std::move(from),
296  from.getAllocator()) {}
297 
298 
300  template < template < typename > class ALLOC >
301  DBTranslator4ContinuousVariable< ALLOC >*
304  alloc) const {
305  ALLOC< DBTranslator4ContinuousVariable< ALLOC > > allocator(alloc);
306  DBTranslator4ContinuousVariable< ALLOC >* translator = allocator.allocate(1);
307  try {
308  allocator.construct(translator, *this, alloc);
309  } catch (...) {
310  allocator.deallocate(translator, 1);
311  throw;
312  }
313  return translator;
314  }
315 
316 
318  template < template < typename > class ALLOC >
319  INLINE DBTranslator4ContinuousVariable< ALLOC >*
321  return clone(this->getAllocator());
322  }
323 
324 
326  template < template < typename > class ALLOC >
329  if (__real_variable != nullptr) delete __real_variable;
330 
331  GUM_DESTRUCTOR(DBTranslator4ContinuousVariable);
332  }
333 
334 
336  template < template < typename > class ALLOC >
337  DBTranslator4ContinuousVariable< ALLOC >&
339  operator=(const DBTranslator4ContinuousVariable< ALLOC >& from) {
340  if (this != &from) {
342  __variable = from.__variable;
343  __status_float_missing_symbols = from.__status_float_missing_symbols;
344  __nonfloat_missing_symbol = from.__nonfloat_missing_symbol;
345  __fit_range = from.__fit_range;
346 
347  if (__real_variable != nullptr) delete __real_variable;
348  __real_variable = from.__real_variable->clone();
349  }
350 
351  return *this;
352  }
353 
354 
356  template < template < typename > class ALLOC >
357  DBTranslator4ContinuousVariable< ALLOC >&
359  operator=(DBTranslator4ContinuousVariable< ALLOC >&& from) {
360  if (this != &from) {
361  DBTranslator< ALLOC >::operator=(std::move(from));
362  __variable = std::move(from.__variable);
363  __status_float_missing_symbols =
364  std::move(from.__status_float_missing_symbols);
365  __nonfloat_missing_symbol = std::move(from.__nonfloat_missing_symbol);
366  __fit_range = from.__fit_range;
367 
368  if (__real_variable != nullptr) delete __real_variable;
369  __real_variable = from.__real_variable;
370  from.__real_variable = nullptr;
371  }
372 
373  return *this;
374  }
375 
376 
378  template < template < typename > class ALLOC >
380  const std::string& str) {
381  // check if the string is actually a number
382  if (!DBCell::isReal(str)) {
383  if (this->isMissingSymbol(str)) {
384  return DBTranslatedValue{std::numeric_limits< float >::max()};
385  } else
386  GUM_ERROR(TypeError,
387  "String \""
388  << str
389  << "\" cannot be translated because it is not a number");
390  }
391 
392  // here we know that the string is a number
393  const float number = std::stof(str);
394 
395  // if we are in the range of the variable, return the number
396  if (__variable.belongs(number)) return DBTranslatedValue{number};
397 
398  // check that this is not a missing value
399  if (this->isMissingSymbol(str)) {
400  if (!__status_float_missing_symbols[str]) {
401  __status_float_missing_symbols[str] = true;
402  }
403  return DBTranslatedValue{std::numeric_limits< float >::max()};
404  }
405 
406  // check if we are allowed to update the domain of the variable
407  if (!__fit_range) {
408  GUM_ERROR(UnknownLabelInDatabase,
409  "String \"" << str
410  << "\" cannot be translated because it is "
411  "out of the domain of the continuous variable");
412  }
413 
414  // now, we can try to add str as a new bound of the range variable
415  // if possible
416 
417  // if the variable is empty, set the min and max ranges. Here,
418  // there is no need to check whether the new range would contain an
419  // already translated missing symbol because this was already tested
420  // in the above test.
421  if (__variable.lowerBound() == std::numeric_limits< float >::infinity()) {
422  __variable.setLowerBound(number);
423  __variable.setUpperBound(number);
424  return DBTranslatedValue{number};
425  }
426 
427  // here, the domain is not empty. So we should update either the
428  // lower bound or the upper bound of the variable, unless
429  // a missing symbol lies within the new bounds and we have already
430  // translated it.
431  const float lower_bound = __variable.lowerBound();
432  const float upper_bound = __variable.upperBound();
433  if (number < lower_bound) {
434  // check that there does not already exist a translated missing
435  // value within the new bounds of the variable
436  for (const auto& missing : __status_float_missing_symbols) {
437  if (missing.second) {
438  const float miss_val = std::stof(missing.first);
439  if ((miss_val >= number) && (miss_val <= upper_bound)) {
440  GUM_ERROR(OperationNotAllowed,
441  "String \""
442  << str << "\" cannot be translated because "
443  << "it would induce a new domain containing an already "
444  << "translated missing symbol");
445  }
446  }
447  }
448 
449  // remove all the missing symbols that were not translated yet and
450  // that lie within the new bounds of the variable
451  for (auto iter = __status_float_missing_symbols.beginSafe();
452  iter != __status_float_missing_symbols.endSafe();
453  ++iter) {
454  if (iter.val() == false) {
455  const float miss_val = std::stof(iter.key());
456  if ((miss_val >= number) && (miss_val <= upper_bound)) {
457  this->_missing_symbols.erase(iter.key());
458  __status_float_missing_symbols.erase(iter);
459  }
460  }
461  }
462 
463  // update the domain of the continuous variable
464  __variable.setLowerBound(number);
465 
466  return DBTranslatedValue{number};
467  } else {
468  // check that there does not already exist a translated missing
469  // value within the new bounds of the variable
470  for (const auto& missing : __status_float_missing_symbols) {
471  if (missing.second) {
472  const float miss_val = std::stof(missing.first);
473  if ((miss_val >= lower_bound) && (miss_val <= number)) {
474  GUM_ERROR(OperationNotAllowed,
475  "String \""
476  << str << "\" cannot be translated because "
477  << "it would induce a new domain containing an already "
478  << "translated missing symbol");
479  }
480  }
481  }
482 
483  // remove all the missing symbols that were not translated yet and
484  // that lie within the new bounds of the variable
485  for (auto iter = __status_float_missing_symbols.beginSafe();
486  iter != __status_float_missing_symbols.endSafe();
487  ++iter) {
488  if (iter.val() == false) {
489  const float miss_val = std::stof(iter.key());
490  if ((miss_val >= lower_bound) && (miss_val <= number)) {
491  this->_missing_symbols.erase(iter.key());
492  __status_float_missing_symbols.erase(iter);
493  }
494  }
495  }
496 
497  // update the domain of the continuous variable
498  __variable.setUpperBound(number);
499 
500  return DBTranslatedValue{number};
501  }
502  }
503 
504 
506  template < template < typename > class ALLOC >
508  const DBTranslatedValue translated_val) const {
509  if (translated_val.cont_val == std::numeric_limits< float >::max()) {
510  if (!__nonfloat_missing_symbol.empty()) return __nonfloat_missing_symbol;
511  if (this->_missing_symbols.empty())
512  return *(this->_missing_symbols.begin());
513  }
514 
515  if ((translated_val.cont_val < __variable.lowerBound())
516  || (translated_val.cont_val > __variable.upperBound())) {
517  GUM_ERROR(UnknownLabelInDatabase,
518  "The back translation of "
519  << translated_val.cont_val
520  << " could not be found because the value is outside the "
521  << "domain of the continuous variable");
522  }
523 
524  char buffer[100];
525  sprintf(buffer, "%g", translated_val.cont_val);
526  return std::string(buffer);
527  }
528 
529 
531  template < template < typename > class ALLOC >
533  return false;
534  }
535 
536 
538  template < template < typename > class ALLOC >
539  INLINE HashTable< std::size_t,
540  std::size_t,
541  ALLOC< std::pair< std::size_t, std::size_t > > >
543  return HashTable< std::size_t,
544  std::size_t,
545  ALLOC< std::pair< std::size_t, std::size_t > > >();
546  }
547 
548 
550  template < template < typename > class ALLOC >
551  INLINE std::size_t
553  return std::numeric_limits< std::size_t >::max();
554  }
555 
556 
558  template < template < typename > class ALLOC >
559  INLINE const IContinuousVariable*
561  __real_variable->setLowerBoundFromDouble(__variable.lowerBound());
562  __real_variable->setUpperBoundFromDouble(__variable.upperBound());
563  return __real_variable;
564  }
565 
566 
568  template < template < typename > class ALLOC >
569  INLINE DBTranslatedValue
571  return DBTranslatedValue{std::numeric_limits< float >::max()};
572  }
573 
574 
575  } /* namespace learning */
576 
577 } /* namespace gum */
578 
579 
580 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
virtual DBTranslatedValue translate(const std::string &str) final
returns the translation of a string
virtual void setLowerBoundFromDouble(const double new_bound)=0
updates the lower bound of the domain of the variable
bool empty() const noexcept
Indicates whether the set is the empty set.
Definition: set_tpl.h:704
typename DBTranslator< ALLOC >::allocator_type allocator_type
type for the allocators passed in arguments of methods
virtual DBTranslator4ContinuousVariable< ALLOC > * clone() const
virtual copy constructor
STL namespace.
The databases&#39; cell translators for continuous variables.
void erase(const Key &k)
Erases an element from the set.
Definition: set_tpl.h:653
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
DBTranslator(DBTranslatedValueType val_type, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool editable_dictionary=true, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor
iterator begin() const
The usual unsafe begin iterator to parse the set.
Definition: set_tpl.h:514
DBTranslator4ContinuousVariable< ALLOC > & operator=(const DBTranslator4ContinuousVariable< ALLOC > &from)
copy operator
const iterator_safe & endSafe() const noexcept
The usual safe end iterator to parse the set.
Definition: set_tpl.h:499
virtual std::size_t domainSize() const final
returns std::numeric_limits<std::size_t>::max ()
DBTranslator4ContinuousVariable(const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool fit_range=false, const allocator_type &alloc=allocator_type())
default constructor without any initial variable
virtual bool needsReordering() const final
indicates that the translations should never be reordered
allocator_type getAllocator() const
returns the allocator used by the translator
The class representing the original values of the cells of databases.
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
static bool isReal(const std::string &str)
determine whether a string corresponds precisely to a real number
Set< std::string, ALLOC< std::string > > _missing_symbols
the set of missing symbols
Definition: DBTranslator.h:382
DBTranslator< ALLOC > & operator=(const DBTranslator< ALLOC > &from)
copy operator
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > reorder() final
returns an empty mapping, indicating that old tanslations are equal to the newly reordered ones...
virtual std::string translateBack(const DBTranslatedValue translated_val) const final
returns the original value for a given translation
iterator_safe beginSafe() const
The usual safe begin iterator to parse the set.
Definition: set_tpl.h:485
bool isMissingSymbol(const std::string &str) const
indicates whether a string corresponds to a missing symbol
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52
virtual const IContinuousVariable * variable() const final
returns the variable stored into the translator
virtual DBTranslatedValue missingValue() const final
returns the translation of a missing value