aGrUM  0.17.2
a C++ library for (probabilistic) graphical models
DBTranslator4RangeVariable_tpl.h
Go to the documentation of this file.
1 
29 #include <utility>
30 #include <vector>
31 #include <limits>
32 #include <cstdio>
33 
36 
37 #ifndef DOXYGEN_SHOULD_SKIP_THIS
38 
39 namespace gum {
40 
41  namespace learning {
42 
43 
45  template < template < typename > class ALLOC >
46  template < template < typename > class XALLOC >
48  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
49  std::size_t max_dico_entries,
51  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
52  missing_symbols,
53  true,
54  max_dico_entries,
55  alloc),
56  __variable("var", "", 1, 0) {
57  // assign to each integer missing symbol a Boolean indicating that
58  // we did not translate it yet. If we encounter a non integer missing
59  // symbol, we record it because it cannot be compomised by updating the
60  // domain of the range variable
61  bool non_int_symbol_found = false;
62  for (const auto& symbol: this->_missing_symbols) {
63  if (DBCell::isInteger(symbol)) {
64  __status_int_missing_symbols.insert(symbol, false);
65  } else if (!non_int_symbol_found) {
66  non_int_symbol_found = true;
67  __nonint_missing_symbol = symbol;
68  }
69  }
70 
71  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
72  }
73 
74 
76  template < template < typename > class ALLOC >
78  std::size_t max_dico_entries,
80  DBTranslator< ALLOC >(
81  DBTranslatedValueType::DISCRETE, true, max_dico_entries, alloc),
82  __variable("var", "", 1, 0) {
83  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
84  }
85 
86 
88  template < template < typename > class ALLOC >
89  template < template < typename > class XALLOC >
91  const RangeVariable& var,
92  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
93  const bool editable_dictionary,
94  std::size_t max_dico_entries,
97  missing_symbols,
98  editable_dictionary,
99  max_dico_entries,
100  alloc),
101  __variable(var) {
102  // get the bounds of the range variable
103  const long lower_bound = var.minVal();
104  const long upper_bound = var.maxVal();
105 
106  // check that the variable has not too many entries for the dictionary
107  if ((upper_bound >= lower_bound)
108  && (std::size_t(upper_bound - lower_bound + 1)
109  > this->_max_dico_entries)) {
110  GUM_ERROR(SizeError,
111  "the dictionary induced by the variable is too large");
112  }
113 
114  // if the range variable is not empty, i.e., its upper bound is greater
115  // than or equal to its lower bound, remove all the missing symbols
116  // corresponding to a number between lower_bound and upper_bound
117  if (lower_bound <= upper_bound) {
118  for (auto iter = this->_missing_symbols.beginSafe();
119  iter != this->_missing_symbols.endSafe();
120  ++iter) {
121  if (DBCell::isInteger(*iter)) {
122  const long missing_val = std::stol(*iter);
123  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
124  this->_missing_symbols.erase(iter);
125  }
126  }
127  }
128  }
129 
130  // add the content of the variable into the back dictionary
131  std::size_t size = 0;
132  for (const auto& label: var.labels()) {
133  // insert the label into the back_dictionary
134  this->_back_dico.insert(size, label);
135  ++size;
136  }
137 
138  // assign to each integer missing symbol a Boolean indicating that
139  // we did not translate it yet. If we encounter a non integer symbol,
140  // we record it because it cannot be compomised by updating the domain
141  // of the range variable. This will be useful for back translations
142  bool non_int_symbol_found = false;
143  for (const auto& symbol: this->_missing_symbols) {
144  if (DBCell::isInteger(symbol)) {
145  __status_int_missing_symbols.insert(symbol, false);
146  } else if (!non_int_symbol_found) {
147  non_int_symbol_found = true;
148  __nonint_missing_symbol = symbol;
149  }
150  }
151 
152  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
153  }
154 
155 
157  template < template < typename > class ALLOC >
159  const RangeVariable& var,
160  const bool editable_dictionary,
161  std::size_t max_dico_entries,
164  editable_dictionary,
165  max_dico_entries,
166  alloc),
167  __variable(var) {
168  // get the bounds of the range variable
169  const long lower_bound = var.minVal();
170  const long upper_bound = var.maxVal();
171 
172  // check that the variable has not too many entries for the dictionary
173  if ((upper_bound >= lower_bound)
174  && (std::size_t(upper_bound - lower_bound + 1)
175  > this->_max_dico_entries)) {
176  GUM_ERROR(SizeError,
177  "the dictionary induced by the variable is too large");
178  }
179 
180  // add the content of the variable into the back dictionary
181  std::size_t size = 0;
182  for (const auto& label: var.labels()) {
183  // insert the label into the back_dictionary
184  this->_back_dico.insert(size, label);
185  ++size;
186  }
187 
188  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
189  }
190 
191 
193  template < template < typename > class ALLOC >
195  const DBTranslator4RangeVariable< ALLOC >& from,
197  DBTranslator< ALLOC >(from, alloc),
198  __variable(from.__variable),
199  __status_int_missing_symbols(from.__status_int_missing_symbols),
200  __translated_int_missing_symbols(from.__translated_int_missing_symbols),
201  __nonint_missing_symbol(from.__nonint_missing_symbol) {
202  GUM_CONS_CPY(DBTranslator4RangeVariable);
203  }
204 
205 
207  template < template < typename > class ALLOC >
209  const DBTranslator4RangeVariable< ALLOC >& from) :
210  DBTranslator4RangeVariable< ALLOC >(from, from.getAllocator()) {}
211 
212 
214  template < template < typename > class ALLOC >
216  DBTranslator4RangeVariable< ALLOC >&& from,
218  DBTranslator< ALLOC >(std::move(from), alloc),
219  __variable(std::move(from.__variable)),
220  __status_int_missing_symbols(std::move(from.__status_int_missing_symbols)),
221  __translated_int_missing_symbols(
222  std::move(from.__translated_int_missing_symbols)),
223  __nonint_missing_symbol(std::move(from.__nonint_missing_symbol)) {
224  GUM_CONS_MOV(DBTranslator4RangeVariable);
225  }
226 
227 
229  template < template < typename > class ALLOC >
231  DBTranslator4RangeVariable< ALLOC >&& from) :
232  DBTranslator4RangeVariable< ALLOC >(std::move(from), from.getAllocator()) {
233  }
234 
235 
237  template < template < typename > class ALLOC >
238  DBTranslator4RangeVariable< ALLOC >*
241  alloc) const {
242  ALLOC< DBTranslator4RangeVariable< ALLOC > > allocator(alloc);
243  DBTranslator4RangeVariable< ALLOC >* translator = allocator.allocate(1);
244  try {
245  allocator.construct(translator, *this, alloc);
246  } catch (...) {
247  allocator.deallocate(translator, 1);
248  throw;
249  }
250  return translator;
251  }
252 
253 
255  template < template < typename > class ALLOC >
256  INLINE DBTranslator4RangeVariable< ALLOC >*
258  return clone(this->getAllocator());
259  }
260 
261 
263  template < template < typename > class ALLOC >
265  GUM_DESTRUCTOR(DBTranslator4RangeVariable);
266  }
267 
268 
270  template < template < typename > class ALLOC >
271  DBTranslator4RangeVariable< ALLOC >&
273  const DBTranslator4RangeVariable< ALLOC >& from) {
274  if (this != &from) {
276  __variable = from.__variable;
277  __status_int_missing_symbols = from.__status_int_missing_symbols;
278  __translated_int_missing_symbols = from.__translated_int_missing_symbols;
279  __nonint_missing_symbol = from.__nonint_missing_symbol;
280  }
281 
282  return *this;
283  }
284 
285 
287  template < template < typename > class ALLOC >
288  DBTranslator4RangeVariable< ALLOC >&
290  DBTranslator4RangeVariable< ALLOC >&& from) {
291  if (this != &from) {
292  DBTranslator< ALLOC >::operator=(std::move(from));
293  __variable = std::move(from.__variable);
294  __status_int_missing_symbols =
295  std::move(from.__status_int_missing_symbols);
296  __translated_int_missing_symbols =
297  std::move(from.__translated_int_missing_symbols);
298  __nonint_missing_symbol = std::move(from.__nonint_missing_symbol);
299  }
300 
301  return *this;
302  }
303 
304 
306  template < template < typename > class ALLOC >
307  DBTranslatedValue
308  DBTranslator4RangeVariable< ALLOC >::translate(const std::string& str) {
309  // try to get the index of str within the labelized variable. If this
310  // cannot be found, try to find if this corresponds to a missing value.
311  // Finally, if this is still not a missing value and, if enabled, try
312  // to add str as a new label
313  try {
314  return DBTranslatedValue{this->_back_dico.first(str)};
315  } catch (gum::Exception&) {
316  // check that this is not a missing value
317  if (this->isMissingSymbol(str)) {
318  try {
319  const bool is_str_translated = __status_int_missing_symbols[str];
320  if (!is_str_translated) {
321  __status_int_missing_symbols[str] = true;
322  __translated_int_missing_symbols.insert(std::stol(str));
323  }
324  } catch (gum::NotFound&) {}
325  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
326  }
327 
328  // check if we are allowed to update the range variable
329  if (!this->hasEditableDictionary()) {
330  GUM_ERROR(UnknownLabelInDatabase,
331  "The translation of String \"" << str
332  << "\" could not be found");
333  }
334 
335  // check if str could correspond to a bound of the range variable
336  if (!DBCell::isInteger(str)) {
337  GUM_ERROR(TypeError,
338  "String \"" << str << "\" cannot be translated because "
339  << "it cannot be converted into an integer");
340  }
341  const long new_value = std::stol(str);
342 
343  // if str corresponds to a missing symbol that we already
344  // translated, raise an exception
345  if (__translated_int_missing_symbols.exists(new_value)) {
346  GUM_ERROR(
347  OperationNotAllowed,
348  "String \""
349  << str << "\" cannot be translated because "
350  << "it corresponds to an already translated missing symbol");
351  }
352 
353  // now, we can try to add str as a new bound of the range variable
354  // if possible
355 
356  // if the range variable is empty, set the min and max ranges. Here,
357  // there is no need to check whether the new range would contain an
358  // already translated missing symbol because this was already tested
359  // in the above test.
360  if (__variable.minVal() > __variable.maxVal()) {
361  if (this->_max_dico_entries == 0) {
362  GUM_ERROR(SizeError,
363  "String \"" << str << "\" cannot be translated because "
364  << "the dictionary is already full");
365  }
366  __variable.setMinVal(new_value);
367  __variable.setMaxVal(new_value);
368  this->_back_dico.insert(std::size_t(0), str);
369  return DBTranslatedValue{std::size_t(0)};
370  }
371 
372  // here, the domain is not empty. So we should update either the
373  // lower bound or the upper bound of the range variable, unless
374  // a missing symbol lies within the new bounds and we have already
375  // translated it.
376  const long lower_bound = __variable.minVal();
377  const long upper_bound = __variable.maxVal();
378 
379  std::size_t size = upper_bound - lower_bound + 1;
380 
381  if (new_value < __variable.minVal()) {
382  if (std::size_t(upper_bound - new_value + 1) > this->_max_dico_entries)
383  GUM_ERROR(SizeError,
384  "String \"" << str << "\" cannot be translated because "
385  << "the dictionary is already full");
386 
387  // check that there does not already exist a translated missing
388  // value within the new bounds of the range variable
389  for (const auto& missing: __translated_int_missing_symbols) {
390  if ((missing >= new_value) && (missing <= upper_bound)) {
391  GUM_ERROR(OperationNotAllowed,
392  "String \""
393  << str << "\" cannot be translated "
394  << "because it would induce a new range containing "
395  << "an already translated missing symbol");
396  }
397  }
398 
399  // remove all the missing symbols that were not translated yet and
400  // that lie within the new bounds of the range variable
401  for (auto iter = __status_int_missing_symbols.beginSafe();
402  iter != __status_int_missing_symbols.endSafe();
403  ++iter) {
404  if (iter.val() == false) {
405  const long missing = std::stol(iter.key());
406  if ((missing >= new_value) && (missing <= upper_bound)) {
407  this->_missing_symbols.erase(iter.key());
408  __status_int_missing_symbols.erase(iter);
409  }
410  }
411  }
412 
413  // update the range and the back dictionary
414  const std::size_t index = size;
415  for (long i = new_value; i < __variable.minVal(); ++i) {
416  this->_back_dico.insert(size, std::to_string(i));
417  ++size;
418  }
419  __variable.setMinVal(new_value);
420 
421  return DBTranslatedValue{index};
422  } else {
423  if (std::size_t(new_value - lower_bound + 1) > this->_max_dico_entries)
424  GUM_ERROR(SizeError,
425  "String \"" << str << "\" cannot be translated because "
426  << "the dictionary is already full");
427 
428  // check that there does not already exist a translated missing
429  // value within the new bounds of the range variable
430  for (const auto& missing: __translated_int_missing_symbols) {
431  if ((missing <= new_value) && (missing >= lower_bound)) {
432  GUM_ERROR(OperationNotAllowed,
433  "String \""
434  << str << "\" cannot be translated "
435  << "because it would induce a new range containing "
436  << "an already translated missing symbol");
437  }
438  }
439 
440  // remove all the missing symbols that were not translated yet and
441  // that lie within the new bounds of the range variable
442  for (auto iter = __status_int_missing_symbols.beginSafe();
443  iter != __status_int_missing_symbols.endSafe();
444  ++iter) {
445  if (iter.val() == false) {
446  const long missing = std::stol(iter.key());
447  if ((missing <= new_value) && (missing >= lower_bound)) {
448  this->_missing_symbols.erase(iter.key());
449  __status_int_missing_symbols.erase(iter);
450  }
451  }
452  }
453 
454  // update the range and the back dictionary
455  for (long i = __variable.maxVal() + 1; i <= new_value; ++i) {
456  this->_back_dico.insert(size, std::to_string(i));
457  ++size;
458  }
459  __variable.setMaxVal(new_value);
460 
461  return DBTranslatedValue{size - std::size_t(1)};
462  }
463  }
464  }
465 
466 
468  template < template < typename > class ALLOC >
470  const DBTranslatedValue translated_val) const {
471  try {
472  return this->_back_dico.second(translated_val.discr_val);
473  } catch (Exception&) {
474  // check if this is a missing value
475  if (translated_val.discr_val
476  == std::numeric_limits< std::size_t >::max()) {
477  if (!__nonint_missing_symbol.empty()) return __nonint_missing_symbol;
478  if (this->_missing_symbols.empty())
479  return *(this->_missing_symbols.begin());
480  }
481 
482  GUM_ERROR(UnknownLabelInDatabase,
483  "The back translation of \"" << translated_val.discr_val
484  << "\" could not be found");
485  }
486  }
487 
488 
490  template < template < typename > class ALLOC >
492  // if the variable contains only numbers, they should be increasing
493  const auto& labels = __variable.labels();
494  std::size_t last_number = std::numeric_limits< std::size_t >::lowest();
495  std::size_t number;
496  for (const auto& label: labels) {
497  number = this->_back_dico.first(label);
498  if (number < last_number) return true;
499  last_number = number;
500  }
501 
502  return false;
503  }
504 
505 
507  template < template < typename > class ALLOC >
508  INLINE HashTable< std::size_t,
509  std::size_t,
510  ALLOC< std::pair< std::size_t, std::size_t > > >
512  // assign to each label the index it had before reordering
513  const auto& labels = __variable.labels();
514  const std::size_t size = labels.size();
515  std::vector< std::pair< std::size_t, std::string >,
516  ALLOC< std::pair< std::size_t, std::string > > >
517  xlabels;
518  xlabels.reserve(size);
519  bool modifications = false;
520  for (std::size_t i = std::size_t(0); i < size; ++i) {
521  const std::size_t old_val = this->_back_dico.first(labels[i]);
522  xlabels.push_back(std::make_pair(old_val, labels[i]));
523  if (old_val != i) modifications = true;
524  }
525 
526 
527  // if there were no modification, return an empty update hashtable
528  if (!modifications) {
529  return HashTable< std::size_t,
530  std::size_t,
531  ALLOC< std::pair< std::size_t, std::size_t > > >();
532  }
533 
534  // create the hashTable corresponding to the mapping from the old
535  // indices to the new one
536  this->_back_dico.clear();
537  HashTable< std::size_t,
538  std::size_t,
539  ALLOC< std::pair< std::size_t, std::size_t > > >
540  mapping((Size)size);
541  for (std::size_t i = std::size_t(0); i < size; ++i) {
542  mapping.insert(xlabels[i].first, i);
543  this->_back_dico.insert(i, xlabels[i].second);
544  }
545 
546  return mapping;
547  }
548 
549 
551  template < template < typename > class ALLOC >
552  INLINE std::size_t DBTranslator4RangeVariable< ALLOC >::domainSize() const {
553  return __variable.domainSize();
554  }
555 
556 
558  template < template < typename > class ALLOC >
559  INLINE const RangeVariable*
561  return &__variable;
562  }
563 
564 
566  template < template < typename > class ALLOC >
567  INLINE DBTranslatedValue
569  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
570  }
571 
572 
573  } /* namespace learning */
574 
575 } /* namespace gum */
576 
577 
578 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
Bijection< std::size_t, std::string, ALLOC< std::pair< float, std::string > > > _back_dico
the bijection relating back translated values and their original strings.
Definition: DBTranslator.h:396
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
virtual bool needsReordering() const final
indicates whether a reordering is needed to make the translations sorted by increasing numbers ...
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
void clear()
Removes all the associations from the gum::Bijection.
virtual const RangeVariable * variable() const final
returns the variable stored into the translator
virtual std::string translateBack(const DBTranslatedValue translated_val) const final
returns the original value for a given translation
const T1 & first(const T2 &second) const
Returns the first value of a pair given its second value.
std::size_t _max_dico_entries
the maximum number of entries that the dictionary is allowed to contain
Definition: DBTranslator.h:382
static bool isInteger(const std::string &str)
determines whether a string corresponds precisely to an integer
Copyright 2005-2020 Pierre-Henri WUILLEMIN () et Christophe GONZALES () info_at_agrum_dot_org.
STL namespace.
DBTranslator4RangeVariable< ALLOC > & operator=(const DBTranslator4RangeVariable< ALLOC > &from)
copy operator
Copyright 2005-2020 Pierre-Henri WUILLEMIN () et Christophe GONZALES () info_at_agrum_dot_org.
Definition: agrum.h:25
DBTranslator(DBTranslatedValueType val_type, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool editable_dictionary=true, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor
virtual ~DBTranslator4RangeVariable()
destructor
virtual DBTranslatedValue missingValue() const final
returns the translation of a missing value
allocator_type getAllocator() const
returns the allocator used by the translator
Copyright 2005-2020 Pierre-Henri WUILLEMIN () et Christophe GONZALES () info_at_agrum_dot_org.
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
std::string to_string(const Formula &f)
Definition: formula_inl.h:499
Set< std::string, ALLOC< std::string > > _missing_symbols
the set of missing symbols
Definition: DBTranslator.h:385
DBTranslator< ALLOC > & operator=(const DBTranslator< ALLOC > &from)
copy operator
virtual DBTranslatedValue translate(const std::string &str) final
returns the translation of a string
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > reorder() final
performs a reordering of the dictionary and returns a mapping from the old translated values to the n...
Base class for all aGrUM&#39;s exceptions.
Definition: exceptions.h:106
virtual std::size_t domainSize() const final
returns the domain size of a variable corresponding to the translations
virtual bool hasEditableDictionary() const
indicates whether the translator has an editable dictionary or not
bool isMissingSymbol(const std::string &str) const
indicates whether a string corresponds to a missing symbol
typename DBTranslator< ALLOC >::allocator_type allocator_type
type for the allocators passed in arguments of methods
virtual DBTranslator4RangeVariable< ALLOC > * clone() const
virtual copy constructor
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:48
DBTranslator4RangeVariable(const std::vector< std::string, XALLOC< std::string > > &missing_symbols, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor without any initial variable
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55