aGrUM  0.14.2
DBTranslator4DiscretizedVariable_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
26 #include <utility>
27 #include <vector>
28 #include <sstream>
29 
32 
33 #ifndef DOXYGEN_SHOULD_SKIP_THIS
34 
35 namespace gum {
36 
37  namespace learning {
38 
39 
41  template < template < typename > class ALLOC >
42  template < typename GUM_SCALAR, template < typename > class XALLOC >
44  const DiscretizedVariable< GUM_SCALAR >& var,
45  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
46  std::size_t max_dico_entries,
48  alloc) :
49  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
50  missing_symbols,
51  false,
52  max_dico_entries,
53  alloc),
54  __variable(var.name(), var.description()) {
55  // check that the variable has not too many entries
56  if (var.domainSize() > max_dico_entries) {
57  GUM_ERROR(SizeError,
58  "the dictionary induced by the variable is too large");
59  }
60 
61  // copy the ticks of var into our internal variable
62  const auto& ticks = var.ticks();
63  for (const auto tick : ticks) {
64  __variable.addTick((float)tick);
65  }
66 
67  // the the bounds of the discretized variable
68  const float lower_bound = (float)ticks[0];
69  const float upper_bound = (float)ticks.back();
70 
71  // remove all the missing symbols corresponding to a number between
72  // lower_bound and upper_bound
73  for (auto iter = this->_missing_symbols.beginSafe();
74  iter != this->_missing_symbols.endSafe();
75  ++iter) {
76  if (DBCell::isReal(*iter)) {
77  const float missing_val = std::stof(*iter);
78  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
79  this->_missing_symbols.erase(iter);
80  }
81  }
82  }
83 
84  // add the content of the variable into the back dictionary
85  std::size_t size = 0;
86  for (const auto& label : var.labels()) {
87  // if the label corresponds to a missing value, then remove it from
88  // the set of missing symbols. If, in addition, it has already
89  // been entered into the back_dictionary, then, this has been done
90  // because the label corresponded to a missing value, so we should
91  // remove the label as well from the back_dictionary.
92  if (this->_missing_symbols.exists(label)) {
93  this->_missing_symbols.erase(label);
94  }
95 
96  this->_back_dico.insert(size, label);
97  ++size;
98  }
99 
100  // store a copy of the variable, that should be used by method variable ()
101  __real_variable = var.clone();
102 
103  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
104  }
105 
106 
108  template < template < typename > class ALLOC >
109  template < template < typename > class XALLOC >
111  const IDiscretizedVariable& var,
112  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
113  std::size_t max_dico_entries,
115  alloc) :
117  missing_symbols,
118  false,
119  max_dico_entries,
120  alloc),
121  __variable(var.name(), var.description()) {
122  // check that the variable has not too many entries
123  if (var.domainSize() > max_dico_entries) {
124  GUM_ERROR(SizeError,
125  "the dictionary induced by the variable is too large");
126  }
127 
128  // copy the ticks of var into our internal variable
129  const auto ticks = var.ticksAsDoubles();
130  for (const auto tick : ticks) {
131  __variable.addTick((float)tick);
132  }
133 
134  // the the bounds of the discretized variable
135  const float lower_bound = float(ticks[0]);
136  const float upper_bound = float(ticks.back());
137 
138  // remove all the missing symbols corresponding to a number between
139  // lower_bound and upper_bound
140  for (auto iter = this->_missing_symbols.beginSafe();
141  iter != this->_missing_symbols.endSafe();
142  ++iter) {
143  if (DBCell::isReal(*iter)) {
144  const float missing_val = std::stof(*iter);
145  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
146  this->_missing_symbols.erase(iter);
147  }
148  }
149  }
150 
151  // add the content of the variable into the back dictionary
152  std::size_t size = 0;
153  for (const auto& label : var.labels()) {
154  // if the label corresponds to a missing value, then remove it from
155  // the set of missing symbols. If, in addition, it has already
156  // been entered into the back_dictionary, then, this has been done
157  // because the label corresponded to a missing value, so we should
158  // remove the label as well from the back_dictionary.
159  if (this->_missing_symbols.exists(label)) {
160  this->_missing_symbols.erase(label);
161  }
162 
163  this->_back_dico.insert(size, label);
164  ++size;
165  }
166 
167  // store a copy of the variable, that should be used by method variable ()
168  __real_variable = var.clone();
169 
170  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
171  }
172 
173 
175  template < template < typename > class ALLOC >
176  template < typename GUM_SCALAR >
178  const DiscretizedVariable< GUM_SCALAR >& var,
179  std::size_t max_dico_entries,
181  alloc) :
182  DBTranslator< ALLOC >(
183  DBTranslatedValueType::DISCRETE, false, max_dico_entries, alloc),
184  __variable(var.name(), var.description()) {
185  // check that the variable has not too many entries
186  if (var.domainSize() > max_dico_entries) {
187  GUM_ERROR(SizeError,
188  "the dictionary induced by the variable is too large");
189  }
190 
191  // copy the ticks of var into our internal variable
192  const auto& ticks = var.ticks();
193  for (const auto tick : ticks) {
194  __variable.addTick((float)tick);
195  }
196 
197  // add the content of the variable into the back dictionary
198  std::size_t size = 0;
199  for (const auto& label : var.labels()) {
200  this->_back_dico.insert(size, label);
201  ++size;
202  }
203 
204  // store a copy of the variable, that should be used by method variable ()
205  __real_variable = var.clone();
206 
207  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
208  }
209 
210 
212  template < template < typename > class ALLOC >
214  const IDiscretizedVariable& var,
215  std::size_t max_dico_entries,
217  alloc) :
218  DBTranslator< ALLOC >(
219  DBTranslatedValueType::DISCRETE, false, max_dico_entries, alloc),
220  __variable(var.name(), var.description()) {
221  // check that the variable has not too many entries
222  if (var.domainSize() > max_dico_entries) {
223  GUM_ERROR(SizeError,
224  "the dictionary induced by the variable is too large");
225  }
226 
227  // copy the ticks of var into our internal variable
228  const auto ticks = var.ticksAsDoubles();
229  for (const auto tick : ticks) {
230  __variable.addTick((float)tick);
231  }
232 
233  // add the content of the variable into the back dictionary
234  std::size_t size = 0;
235  for (const auto& label : var.labels()) {
236  this->_back_dico.insert(size, label);
237  ++size;
238  }
239 
240  // store a copy of the variable, that should be used by method variable ()
241  __real_variable = var.clone();
242 
243  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
244  }
245 
246 
248  template < template < typename > class ALLOC >
250  const DBTranslator4DiscretizedVariable< ALLOC >& from,
252  alloc) :
253  DBTranslator< ALLOC >(from, alloc),
254  __variable(from.__variable) {
255  // store a copy of the variable, that should be used by method variable ()
256  __real_variable = from.__real_variable->clone();
257 
258  GUM_CONS_CPY(DBTranslator4DiscretizedVariable);
259  }
260 
261 
263  template < template < typename > class ALLOC >
265  const DBTranslator4DiscretizedVariable< ALLOC >& from) :
266  DBTranslator4DiscretizedVariable< ALLOC >(from, from.getAllocator()) {}
267 
268 
270  template < template < typename > class ALLOC >
272  DBTranslator4DiscretizedVariable< ALLOC >&& from,
274  alloc) :
275  DBTranslator< ALLOC >(std::move(from), alloc),
276  __variable(std::move(from.__variable)) {
277  // moves the copy of the variable, that should be used by method variable ()
278  __real_variable = from.__real_variable;
279  from.__real_variable = nullptr;
280 
281  GUM_CONS_MOV(DBTranslator4DiscretizedVariable);
282  }
283 
284 
286  template < template < typename > class ALLOC >
288  DBTranslator4DiscretizedVariable< ALLOC >&& from) :
289  DBTranslator4DiscretizedVariable< ALLOC >(std::move(from),
290  from.getAllocator()) {}
291 
292 
294  template < template < typename > class ALLOC >
295  DBTranslator4DiscretizedVariable< ALLOC >*
298  alloc) const {
299  ALLOC< DBTranslator4DiscretizedVariable< ALLOC > > allocator(alloc);
300  DBTranslator4DiscretizedVariable< ALLOC >* translator =
301  allocator.allocate(1);
302  try {
303  allocator.construct(translator, *this, alloc);
304  } catch (...) {
305  allocator.deallocate(translator, 1);
306  throw;
307  }
308  return translator;
309  }
310 
311 
313  template < template < typename > class ALLOC >
314  INLINE DBTranslator4DiscretizedVariable< ALLOC >*
316  return clone(this->getAllocator());
317  }
318 
319 
321  template < template < typename > class ALLOC >
324  if (__real_variable != nullptr) delete __real_variable;
325 
326  GUM_DESTRUCTOR(DBTranslator4DiscretizedVariable);
327  }
328 
329 
331  template < template < typename > class ALLOC >
332  DBTranslator4DiscretizedVariable< ALLOC >&
334  operator=(const DBTranslator4DiscretizedVariable< ALLOC >& from) {
335  if (this != &from) {
337  __variable = from.__variable;
338 
339  if (__real_variable != nullptr) delete __real_variable;
340  __real_variable = from.__real_variable->clone();
341  }
342 
343  return *this;
344  }
345 
346 
348  template < template < typename > class ALLOC >
349  DBTranslator4DiscretizedVariable< ALLOC >&
351  operator=(DBTranslator4DiscretizedVariable< ALLOC >&& from) {
352  if (this != &from) {
353  DBTranslator< ALLOC >::operator=(std::move(from));
354  __variable = std::move(from.__variable);
355 
356  if (__real_variable != nullptr) delete __real_variable;
357  __real_variable = from.__real_variable;
358  from.__real_variable = nullptr;
359  }
360 
361  return *this;
362  }
363 
364 
366  template < template < typename > class ALLOC >
368  const std::string& str) {
369  // try to get the index of str within the discretized variable.
370  try {
371  return DBTranslatedValue{std::size_t(__variable[str])};
372  } catch (gum::Exception&) {
373  // check for a missing symbol
374  if (this->isMissingSymbol(str))
375  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
376 
377  // check if the back_dictionary does not contain str. This enables
378  // to execute translate ( translateBack ( translate ( str ) ) )
379  // without raising an exception
380  try {
381  return DBTranslatedValue{this->_back_dico.first(str)};
382  } catch (gum::Exception&) {
383  if (!DBCell::isReal(str)) {
384  GUM_ERROR(TypeError,
385  "String \""
386  << str
387  << "\" cannot be translated because it is not a number");
388  } else {
389  GUM_ERROR(UnknownLabelInDatabase,
390  "The translation of \"" << str << "\" could not be found");
391  }
392  }
393  }
394  }
395 
396 
398  template < template < typename > class ALLOC >
400  const DBTranslatedValue translated_val) const {
401  try {
402  return this->_back_dico.second(translated_val.discr_val);
403  } catch (Exception&) {
404  // check if this is a missing value
405  if ((translated_val.discr_val == std::numeric_limits< std::size_t >::max())
406  && !this->_missing_symbols.empty())
407  return *(this->_missing_symbols.begin());
408  else
409  GUM_ERROR(UnknownLabelInDatabase,
410  "The back translation of \"" << translated_val.discr_val
411  << "\" could not be found");
412  }
413  }
414 
415 
417  template < template < typename > class ALLOC >
418  INLINE bool
420  return false;
421  }
422 
423 
425  template < template < typename > class ALLOC >
426  INLINE void
428  }
429 
430 
432  template < template < typename > class ALLOC >
434  return false;
435  }
436 
437 
439  template < template < typename > class ALLOC >
440  INLINE HashTable< std::size_t,
441  std::size_t,
442  ALLOC< std::pair< std::size_t, std::size_t > > >
444  return HashTable< std::size_t,
445  std::size_t,
446  ALLOC< std::pair< std::size_t, std::size_t > > >();
447  }
448 
449 
451  template < template < typename > class ALLOC >
452  INLINE std::size_t
454  return __variable.domainSize();
455  }
456 
457 
459  template < template < typename > class ALLOC >
460  INLINE const IDiscretizedVariable*
462  return __real_variable;
463  }
464 
465 
467  template < template < typename > class ALLOC >
468  INLINE DBTranslatedValue
470  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
471  }
472 
473 
474  } /* namespace learning */
475 
476 } /* namespace gum */
477 
478 
479 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
Bijection< std::size_t, std::string, ALLOC< std::pair< float, std::string > > > _back_dico
the bijection relating back translated values and their original strings.
Definition: DBTranslator.h:393
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
DBTranslator4DiscretizedVariable< ALLOC > & operator=(const DBTranslator4DiscretizedVariable< ALLOC > &from)
copy operator
virtual DBTranslator4DiscretizedVariable< ALLOC > * clone() const
virtual copy constructor
virtual DBTranslatedValue translate(const std::string &str) final
returns the translation of a string
const T1 & first(const T2 &second) const
Returns the first value of a pair given its second value.
STL namespace.
The databases&#39; cell translators for discretized variables.
virtual std::string translateBack(const DBTranslatedValue translated_val) const final
returns the original value for a given translation
void erase(const Key &k)
Erases an element from the set.
Definition: set_tpl.h:653
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
DBTranslator(DBTranslatedValueType val_type, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool editable_dictionary=true, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor
iterator begin() const
The usual unsafe begin iterator to parse the set.
Definition: set_tpl.h:514
virtual const IDiscretizedVariable * variable() const final
returns the variable stored into the translator
virtual DBTranslatedValue missingValue() const final
returns the translation of a missing value
const iterator_safe & endSafe() const noexcept
The usual safe end iterator to parse the set.
Definition: set_tpl.h:499
virtual bool hasEditableDictionary() const final
indicates that the translator is never in editable dictionary mode
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition: set_tpl.h:604
virtual bool needsReordering() const final
indicates that the translations should never be reordered
allocator_type getAllocator() const
returns the allocator used by the translator
The class representing the original values of the cells of databases.
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
static bool isReal(const std::string &str)
determine whether a string corresponds precisely to a real number
Set< std::string, ALLOC< std::string > > _missing_symbols
the set of missing symbols
Definition: DBTranslator.h:382
DBTranslator< ALLOC > & operator=(const DBTranslator< ALLOC > &from)
copy operator
DBTranslator4DiscretizedVariable(const DiscretizedVariable< GUM_SCALAR > &var, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor with a discretized variable as translator
Base class for all aGrUM&#39;s exceptions.
Definition: exceptions.h:103
iterator_safe beginSafe() const
The usual safe begin iterator to parse the set.
Definition: set_tpl.h:485
virtual std::size_t domainSize() const final
returns the number of discretization intervals used for translations
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > reorder() final
returns an empty HashTable to indicate that no reordering is needed.
bool isMissingSymbol(const std::string &str) const
indicates whether a string corresponds to a missing symbol
typename DBTranslator< ALLOC >::allocator_type allocator_type
type for the allocators passed in arguments of methods
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52
virtual void setEditableDictionaryMode(bool new_mode) final
sets/unset the editable dictionary mode