aGrUM  0.16.0
DBTranslator4DiscretizedVariable_tpl.h
Go to the documentation of this file.
1 
29 #include <utility>
30 #include <vector>
31 #include <sstream>
32 
35 
36 #ifndef DOXYGEN_SHOULD_SKIP_THIS
37 
38 namespace gum {
39 
40  namespace learning {
41 
42 
44  template < template < typename > class ALLOC >
45  template < typename GUM_SCALAR, template < typename > class XALLOC >
47  const DiscretizedVariable< GUM_SCALAR >& var,
48  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
49  std::size_t max_dico_entries,
51  alloc) :
52  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
53  missing_symbols,
54  false,
55  max_dico_entries,
56  alloc),
57  __variable(var.name(), var.description()) {
58  // check that the variable has not too many entries
59  if (var.domainSize() > max_dico_entries) {
60  GUM_ERROR(SizeError,
61  "the dictionary induced by the variable is too large");
62  }
63 
64  // copy the ticks of var into our internal variable
65  const auto& ticks = var.ticks();
66  for (const auto tick : ticks) {
67  __variable.addTick((float)tick);
68  }
69 
70  // the the bounds of the discretized variable
71  const float lower_bound = (float)ticks[0];
72  const float upper_bound = (float)ticks.back();
73 
74  // remove all the missing symbols corresponding to a number between
75  // lower_bound and upper_bound
76  for (auto iter = this->_missing_symbols.beginSafe();
77  iter != this->_missing_symbols.endSafe();
78  ++iter) {
79  if (DBCell::isReal(*iter)) {
80  const float missing_val = std::stof(*iter);
81  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
82  this->_missing_symbols.erase(iter);
83  }
84  }
85  }
86 
87  // add the content of the variable into the back dictionary
88  std::size_t size = 0;
89  for (const auto& label : var.labels()) {
90  // if the label corresponds to a missing value, then remove it from
91  // the set of missing symbols. If, in addition, it has already
92  // been entered into the back_dictionary, then, this has been done
93  // because the label corresponded to a missing value, so we should
94  // remove the label as well from the back_dictionary.
95  if (this->_missing_symbols.exists(label)) {
96  this->_missing_symbols.erase(label);
97  }
98 
99  this->_back_dico.insert(size, label);
100  ++size;
101  }
102 
103  // store a copy of the variable, that should be used by method variable ()
104  __real_variable = var.clone();
105 
106  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
107  }
108 
109 
111  template < template < typename > class ALLOC >
112  template < template < typename > class XALLOC >
114  const IDiscretizedVariable& var,
115  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
116  std::size_t max_dico_entries,
118  alloc) :
120  missing_symbols,
121  false,
122  max_dico_entries,
123  alloc),
124  __variable(var.name(), var.description()) {
125  // check that the variable has not too many entries
126  if (var.domainSize() > max_dico_entries) {
127  GUM_ERROR(SizeError,
128  "the dictionary induced by the variable is too large");
129  }
130 
131  // copy the ticks of var into our internal variable
132  const auto ticks = var.ticksAsDoubles();
133  for (const auto tick : ticks) {
134  __variable.addTick((float)tick);
135  }
136 
137  // the the bounds of the discretized variable
138  const float lower_bound = float(ticks[0]);
139  const float upper_bound = float(ticks.back());
140 
141  // remove all the missing symbols corresponding to a number between
142  // lower_bound and upper_bound
143  for (auto iter = this->_missing_symbols.beginSafe();
144  iter != this->_missing_symbols.endSafe();
145  ++iter) {
146  if (DBCell::isReal(*iter)) {
147  const float missing_val = std::stof(*iter);
148  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
149  this->_missing_symbols.erase(iter);
150  }
151  }
152  }
153 
154  // add the content of the variable into the back dictionary
155  std::size_t size = 0;
156  for (const auto& label : var.labels()) {
157  // if the label corresponds to a missing value, then remove it from
158  // the set of missing symbols. If, in addition, it has already
159  // been entered into the back_dictionary, then, this has been done
160  // because the label corresponded to a missing value, so we should
161  // remove the label as well from the back_dictionary.
162  if (this->_missing_symbols.exists(label)) {
163  this->_missing_symbols.erase(label);
164  }
165 
166  this->_back_dico.insert(size, label);
167  ++size;
168  }
169 
170  // store a copy of the variable, that should be used by method variable ()
171  __real_variable = var.clone();
172 
173  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
174  }
175 
176 
178  template < template < typename > class ALLOC >
179  template < typename GUM_SCALAR >
181  const DiscretizedVariable< GUM_SCALAR >& var,
182  std::size_t max_dico_entries,
184  alloc) :
185  DBTranslator< ALLOC >(
186  DBTranslatedValueType::DISCRETE, false, max_dico_entries, alloc),
187  __variable(var.name(), var.description()) {
188  // check that the variable has not too many entries
189  if (var.domainSize() > max_dico_entries) {
190  GUM_ERROR(SizeError,
191  "the dictionary induced by the variable is too large");
192  }
193 
194  // copy the ticks of var into our internal variable
195  const auto& ticks = var.ticks();
196  for (const auto tick : ticks) {
197  __variable.addTick((float)tick);
198  }
199 
200  // add the content of the variable into the back dictionary
201  std::size_t size = 0;
202  for (const auto& label : var.labels()) {
203  this->_back_dico.insert(size, label);
204  ++size;
205  }
206 
207  // store a copy of the variable, that should be used by method variable ()
208  __real_variable = var.clone();
209 
210  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
211  }
212 
213 
215  template < template < typename > class ALLOC >
217  const IDiscretizedVariable& var,
218  std::size_t max_dico_entries,
220  alloc) :
221  DBTranslator< ALLOC >(
222  DBTranslatedValueType::DISCRETE, false, max_dico_entries, alloc),
223  __variable(var.name(), var.description()) {
224  // check that the variable has not too many entries
225  if (var.domainSize() > max_dico_entries) {
226  GUM_ERROR(SizeError,
227  "the dictionary induced by the variable is too large");
228  }
229 
230  // copy the ticks of var into our internal variable
231  const auto ticks = var.ticksAsDoubles();
232  for (const auto tick : ticks) {
233  __variable.addTick((float)tick);
234  }
235 
236  // add the content of the variable into the back dictionary
237  std::size_t size = 0;
238  for (const auto& label : var.labels()) {
239  this->_back_dico.insert(size, label);
240  ++size;
241  }
242 
243  // store a copy of the variable, that should be used by method variable ()
244  __real_variable = var.clone();
245 
246  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
247  }
248 
249 
251  template < template < typename > class ALLOC >
253  const DBTranslator4DiscretizedVariable< ALLOC >& from,
255  alloc) :
256  DBTranslator< ALLOC >(from, alloc),
257  __variable(from.__variable) {
258  // store a copy of the variable, that should be used by method variable ()
259  __real_variable = from.__real_variable->clone();
260 
261  GUM_CONS_CPY(DBTranslator4DiscretizedVariable);
262  }
263 
264 
266  template < template < typename > class ALLOC >
268  const DBTranslator4DiscretizedVariable< ALLOC >& from) :
269  DBTranslator4DiscretizedVariable< ALLOC >(from, from.getAllocator()) {}
270 
271 
273  template < template < typename > class ALLOC >
275  DBTranslator4DiscretizedVariable< ALLOC >&& from,
277  alloc) :
278  DBTranslator< ALLOC >(std::move(from), alloc),
279  __variable(std::move(from.__variable)) {
280  // moves the copy of the variable, that should be used by method variable ()
281  __real_variable = from.__real_variable;
282  from.__real_variable = nullptr;
283 
284  GUM_CONS_MOV(DBTranslator4DiscretizedVariable);
285  }
286 
287 
289  template < template < typename > class ALLOC >
291  DBTranslator4DiscretizedVariable< ALLOC >&& from) :
292  DBTranslator4DiscretizedVariable< ALLOC >(std::move(from),
293  from.getAllocator()) {}
294 
295 
297  template < template < typename > class ALLOC >
298  DBTranslator4DiscretizedVariable< ALLOC >*
301  alloc) const {
302  ALLOC< DBTranslator4DiscretizedVariable< ALLOC > > allocator(alloc);
303  DBTranslator4DiscretizedVariable< ALLOC >* translator =
304  allocator.allocate(1);
305  try {
306  allocator.construct(translator, *this, alloc);
307  } catch (...) {
308  allocator.deallocate(translator, 1);
309  throw;
310  }
311  return translator;
312  }
313 
314 
316  template < template < typename > class ALLOC >
317  INLINE DBTranslator4DiscretizedVariable< ALLOC >*
319  return clone(this->getAllocator());
320  }
321 
322 
324  template < template < typename > class ALLOC >
327  if (__real_variable != nullptr) delete __real_variable;
328 
329  GUM_DESTRUCTOR(DBTranslator4DiscretizedVariable);
330  }
331 
332 
334  template < template < typename > class ALLOC >
335  DBTranslator4DiscretizedVariable< ALLOC >&
337  operator=(const DBTranslator4DiscretizedVariable< ALLOC >& from) {
338  if (this != &from) {
340  __variable = from.__variable;
341 
342  if (__real_variable != nullptr) delete __real_variable;
343  __real_variable = from.__real_variable->clone();
344  }
345 
346  return *this;
347  }
348 
349 
351  template < template < typename > class ALLOC >
352  DBTranslator4DiscretizedVariable< ALLOC >&
354  operator=(DBTranslator4DiscretizedVariable< ALLOC >&& from) {
355  if (this != &from) {
356  DBTranslator< ALLOC >::operator=(std::move(from));
357  __variable = std::move(from.__variable);
358 
359  if (__real_variable != nullptr) delete __real_variable;
360  __real_variable = from.__real_variable;
361  from.__real_variable = nullptr;
362  }
363 
364  return *this;
365  }
366 
367 
369  template < template < typename > class ALLOC >
371  const std::string& str) {
372  // try to get the index of str within the discretized variable.
373  try {
374  return DBTranslatedValue{std::size_t(__variable[str])};
375  } catch (gum::Exception&) {
376  // check for a missing symbol
377  if (this->isMissingSymbol(str))
378  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
379 
380  // check if the back_dictionary does not contain str. This enables
381  // to execute translate ( translateBack ( translate ( str ) ) )
382  // without raising an exception
383  try {
384  return DBTranslatedValue{this->_back_dico.first(str)};
385  } catch (gum::Exception&) {
386  if (!DBCell::isReal(str)) {
387  GUM_ERROR(TypeError,
388  "String \""
389  << str
390  << "\" cannot be translated because it is not a number");
391  } else {
392  GUM_ERROR(UnknownLabelInDatabase,
393  "The translation of \"" << str << "\" could not be found");
394  }
395  }
396  }
397  }
398 
399 
401  template < template < typename > class ALLOC >
403  const DBTranslatedValue translated_val) const {
404  try {
405  return this->_back_dico.second(translated_val.discr_val);
406  } catch (Exception&) {
407  // check if this is a missing value
408  if ((translated_val.discr_val == std::numeric_limits< std::size_t >::max())
409  && !this->_missing_symbols.empty())
410  return *(this->_missing_symbols.begin());
411  else
412  GUM_ERROR(UnknownLabelInDatabase,
413  "The back translation of \"" << translated_val.discr_val
414  << "\" could not be found");
415  }
416  }
417 
418 
420  template < template < typename > class ALLOC >
421  INLINE bool
423  return false;
424  }
425 
426 
428  template < template < typename > class ALLOC >
429  INLINE void
431  }
432 
433 
435  template < template < typename > class ALLOC >
437  return false;
438  }
439 
440 
442  template < template < typename > class ALLOC >
443  INLINE HashTable< std::size_t,
444  std::size_t,
445  ALLOC< std::pair< std::size_t, std::size_t > > >
447  return HashTable< std::size_t,
448  std::size_t,
449  ALLOC< std::pair< std::size_t, std::size_t > > >();
450  }
451 
452 
454  template < template < typename > class ALLOC >
455  INLINE std::size_t
457  return __variable.domainSize();
458  }
459 
460 
462  template < template < typename > class ALLOC >
463  INLINE const IDiscretizedVariable*
465  return __real_variable;
466  }
467 
468 
470  template < template < typename > class ALLOC >
471  INLINE DBTranslatedValue
473  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
474  }
475 
476 
477  } /* namespace learning */
478 
479 } /* namespace gum */
480 
481 
482 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
Bijection< std::size_t, std::string, ALLOC< std::pair< float, std::string > > > _back_dico
the bijection relating back translated values and their original strings.
Definition: DBTranslator.h:396
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
DBTranslator4DiscretizedVariable< ALLOC > & operator=(const DBTranslator4DiscretizedVariable< ALLOC > &from)
copy operator
virtual DBTranslator4DiscretizedVariable< ALLOC > * clone() const
virtual copy constructor
virtual DBTranslatedValue translate(const std::string &str) final
returns the translation of a string
const T1 & first(const T2 &second) const
Returns the first value of a pair given its second value.
STL namespace.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual std::string translateBack(const DBTranslatedValue translated_val) const final
returns the original value for a given translation
void erase(const Key &k)
Erases an element from the set.
Definition: set_tpl.h:656
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
DBTranslator(DBTranslatedValueType val_type, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool editable_dictionary=true, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor
iterator begin() const
The usual unsafe begin iterator to parse the set.
Definition: set_tpl.h:517
virtual const IDiscretizedVariable * variable() const final
returns the variable stored into the translator
virtual DBTranslatedValue missingValue() const final
returns the translation of a missing value
const iterator_safe & endSafe() const noexcept
The usual safe end iterator to parse the set.
Definition: set_tpl.h:502
virtual bool hasEditableDictionary() const final
indicates that the translator is never in editable dictionary mode
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition: set_tpl.h:607
virtual bool needsReordering() const final
indicates that the translations should never be reordered
allocator_type getAllocator() const
returns the allocator used by the translator
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
static bool isReal(const std::string &str)
determine whether a string corresponds precisely to a real number
Set< std::string, ALLOC< std::string > > _missing_symbols
the set of missing symbols
Definition: DBTranslator.h:385
DBTranslator< ALLOC > & operator=(const DBTranslator< ALLOC > &from)
copy operator
DBTranslator4DiscretizedVariable(const DiscretizedVariable< GUM_SCALAR > &var, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor with a discretized variable as translator
Base class for all aGrUM&#39;s exceptions.
Definition: exceptions.h:106
iterator_safe beginSafe() const
The usual safe begin iterator to parse the set.
Definition: set_tpl.h:488
virtual std::size_t domainSize() const final
returns the number of discretization intervals used for translations
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > reorder() final
returns an empty HashTable to indicate that no reordering is needed.
bool isMissingSymbol(const std::string &str) const
indicates whether a string corresponds to a missing symbol
typename DBTranslator< ALLOC >::allocator_type allocator_type
type for the allocators passed in arguments of methods
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55
virtual void setEditableDictionaryMode(bool new_mode) final
sets/unset the editable dictionary mode