aGrUM  0.16.0
DBTranslator4RangeVariable_tpl.h
Go to the documentation of this file.
1 
29 #include <utility>
30 #include <vector>
31 #include <limits>
32 #include <cstdio>
33 
36 
37 #ifndef DOXYGEN_SHOULD_SKIP_THIS
38 
39 namespace gum {
40 
41  namespace learning {
42 
43 
45  template < template < typename > class ALLOC >
46  template < template < typename > class XALLOC >
48  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
49  std::size_t max_dico_entries,
51  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
52  missing_symbols,
53  true,
54  max_dico_entries,
55  alloc),
56  __variable("var", "", 1, 0) {
57  // assign to each integer missing symbol a Boolean indicating that
58  // we did not translate it yet. If we encounter a non integer missing
59  // symbol, we record it because it cannot be compomised by updating the
60  // domain of the range variable
61  bool non_int_symbol_found = false;
62  for (const auto& symbol : this->_missing_symbols) {
63  if (DBCell::isInteger(symbol)) {
64  __status_int_missing_symbols.insert(symbol, false);
65  } else if (!non_int_symbol_found) {
66  non_int_symbol_found = true;
67  __nonint_missing_symbol = symbol;
68  }
69  }
70 
71  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
72  }
73 
74 
76  template < template < typename > class ALLOC >
78  std::size_t max_dico_entries,
80  DBTranslator< ALLOC >(
81  DBTranslatedValueType::DISCRETE, true, max_dico_entries, alloc),
82  __variable("var", "", 1, 0) {
83  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
84  }
85 
86 
88  template < template < typename > class ALLOC >
89  template < template < typename > class XALLOC >
91  const RangeVariable& var,
92  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
93  const bool editable_dictionary,
94  std::size_t max_dico_entries,
97  missing_symbols,
98  editable_dictionary,
99  max_dico_entries,
100  alloc),
101  __variable(var) {
102  // get the bounds of the range variable
103  const long lower_bound = var.minVal();
104  const long upper_bound = var.maxVal();
105 
106  // check that the variable has not too many entries for the dictionary
107  if ((upper_bound >= lower_bound)
108  && (std::size_t(upper_bound - lower_bound + 1)
109  > this->_max_dico_entries)) {
110  GUM_ERROR(SizeError,
111  "the dictionary induced by the variable is too large");
112  }
113 
114  // if the range variable is not empty, i.e., its upper bound is greater
115  // than or equal to its lower bound, remove all the missing symbols
116  // corresponding to a number between lower_bound and upper_bound
117  if (lower_bound <= upper_bound) {
118  for (auto iter = this->_missing_symbols.beginSafe();
119  iter != this->_missing_symbols.endSafe();
120  ++iter) {
121  if (DBCell::isInteger(*iter)) {
122  const long missing_val = std::stol(*iter);
123  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
124  this->_missing_symbols.erase(iter);
125  }
126  }
127  }
128  }
129 
130  // add the content of the variable into the back dictionary
131  std::size_t size = 0;
132  for (const auto& label : var.labels()) {
133  // insert the label into the back_dictionary
134  this->_back_dico.insert(size, label);
135  ++size;
136  }
137 
138  // assign to each integer missing symbol a Boolean indicating that
139  // we did not translate it yet. If we encounter a non integer symbol,
140  // we record it because it cannot be compomised by updating the domain
141  // of the range variable. This will be useful for back translations
142  bool non_int_symbol_found = false;
143  for (const auto& symbol : this->_missing_symbols) {
144  if (DBCell::isInteger(symbol)) {
145  __status_int_missing_symbols.insert(symbol, false);
146  } else if (!non_int_symbol_found) {
147  non_int_symbol_found = true;
148  __nonint_missing_symbol = symbol;
149  }
150  }
151 
152  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
153  }
154 
155 
157  template < template < typename > class ALLOC >
159  const RangeVariable& var,
160  const bool editable_dictionary,
161  std::size_t max_dico_entries,
164  editable_dictionary,
165  max_dico_entries,
166  alloc),
167  __variable(var) {
168  // get the bounds of the range variable
169  const long lower_bound = var.minVal();
170  const long upper_bound = var.maxVal();
171 
172  // check that the variable has not too many entries for the dictionary
173  if ((upper_bound >= lower_bound)
174  && (std::size_t(upper_bound - lower_bound + 1)
175  > this->_max_dico_entries)) {
176  GUM_ERROR(SizeError,
177  "the dictionary induced by the variable is too large");
178  }
179 
180  // add the content of the variable into the back dictionary
181  std::size_t size = 0;
182  for (const auto& label : var.labels()) {
183  // insert the label into the back_dictionary
184  this->_back_dico.insert(size, label);
185  ++size;
186  }
187 
188  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
189  }
190 
191 
193  template < template < typename > class ALLOC >
195  const DBTranslator4RangeVariable< ALLOC >& from,
197  DBTranslator< ALLOC >(from, alloc),
198  __variable(from.__variable),
199  __status_int_missing_symbols(from.__status_int_missing_symbols),
200  __translated_int_missing_symbols(from.__translated_int_missing_symbols),
201  __nonint_missing_symbol(from.__nonint_missing_symbol) {
202  GUM_CONS_CPY(DBTranslator4RangeVariable);
203  }
204 
205 
207  template < template < typename > class ALLOC >
209  const DBTranslator4RangeVariable< ALLOC >& from) :
210  DBTranslator4RangeVariable< ALLOC >(from, from.getAllocator()) {}
211 
212 
214  template < template < typename > class ALLOC >
216  DBTranslator4RangeVariable< ALLOC >&& from,
218  DBTranslator< ALLOC >(std::move(from), alloc),
219  __variable(std::move(from.__variable)),
220  __status_int_missing_symbols(std::move(from.__status_int_missing_symbols)),
221  __translated_int_missing_symbols(
222  std::move(from.__translated_int_missing_symbols)),
223  __nonint_missing_symbol(std::move(from.__nonint_missing_symbol)) {
224  GUM_CONS_MOV(DBTranslator4RangeVariable);
225  }
226 
227 
229  template < template < typename > class ALLOC >
231  DBTranslator4RangeVariable< ALLOC >&& from) :
232  DBTranslator4RangeVariable< ALLOC >(std::move(from), from.getAllocator()) {
233  }
234 
235 
237  template < template < typename > class ALLOC >
238  DBTranslator4RangeVariable< ALLOC >*
241  alloc) const {
242  ALLOC< DBTranslator4RangeVariable< ALLOC > > allocator(alloc);
243  DBTranslator4RangeVariable< ALLOC >* translator = allocator.allocate(1);
244  try {
245  allocator.construct(translator, *this, alloc);
246  } catch (...) {
247  allocator.deallocate(translator, 1);
248  throw;
249  }
250  return translator;
251  }
252 
253 
255  template < template < typename > class ALLOC >
256  INLINE DBTranslator4RangeVariable< ALLOC >*
258  return clone(this->getAllocator());
259  }
260 
261 
263  template < template < typename > class ALLOC >
265  GUM_DESTRUCTOR(DBTranslator4RangeVariable);
266  }
267 
268 
270  template < template < typename > class ALLOC >
271  DBTranslator4RangeVariable< ALLOC >& DBTranslator4RangeVariable< ALLOC >::
272  operator=(const DBTranslator4RangeVariable< ALLOC >& from) {
273  if (this != &from) {
275  __variable = from.__variable;
276  __status_int_missing_symbols = from.__status_int_missing_symbols;
277  __translated_int_missing_symbols = from.__translated_int_missing_symbols;
278  __nonint_missing_symbol = from.__nonint_missing_symbol;
279  }
280 
281  return *this;
282  }
283 
284 
286  template < template < typename > class ALLOC >
287  DBTranslator4RangeVariable< ALLOC >& DBTranslator4RangeVariable< ALLOC >::
288  operator=(DBTranslator4RangeVariable< ALLOC >&& from) {
289  if (this != &from) {
290  DBTranslator< ALLOC >::operator=(std::move(from));
291  __variable = std::move(from.__variable);
292  __status_int_missing_symbols =
293  std::move(from.__status_int_missing_symbols);
294  __translated_int_missing_symbols =
295  std::move(from.__translated_int_missing_symbols);
296  __nonint_missing_symbol = std::move(from.__nonint_missing_symbol);
297  }
298 
299  return *this;
300  }
301 
302 
304  template < template < typename > class ALLOC >
305  DBTranslatedValue
306  DBTranslator4RangeVariable< ALLOC >::translate(const std::string& str) {
307  // try to get the index of str within the labelized variable. If this
308  // cannot be found, try to find if this corresponds to a missing value.
309  // Finally, if this is still not a missing value and, if enabled, try
310  // to add str as a new label
311  try {
312  return DBTranslatedValue{this->_back_dico.first(str)};
313  } catch (gum::Exception&) {
314  // check that this is not a missing value
315  if (this->isMissingSymbol(str)) {
316  try {
317  const bool is_str_translated = __status_int_missing_symbols[str];
318  if (!is_str_translated) {
319  __status_int_missing_symbols[str] = true;
320  __translated_int_missing_symbols.insert(std::stol(str));
321  }
322  } catch (gum::NotFound&) {}
323  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
324  }
325 
326  // check if we are allowed to update the range variable
327  if (!this->hasEditableDictionary()) {
328  GUM_ERROR(UnknownLabelInDatabase,
329  "The translation of String \"" << str
330  << "\" could not be found");
331  }
332 
333  // check if str could correspond to a bound of the range variable
334  if (!DBCell::isInteger(str)) {
335  GUM_ERROR(TypeError,
336  "String \"" << str << "\" cannot be translated because "
337  << "it cannot be converted into an integer");
338  }
339  const long new_value = std::stol(str);
340 
341  // if str corresponds to a missing symbol that we already
342  // translated, raise an exception
343  if (__translated_int_missing_symbols.exists(new_value)) {
344  GUM_ERROR(
345  OperationNotAllowed,
346  "String \""
347  << str << "\" cannot be translated because "
348  << "it corresponds to an already translated missing symbol");
349  }
350 
351  // now, we can try to add str as a new bound of the range variable
352  // if possible
353 
354  // if the range variable is empty, set the min and max ranges. Here,
355  // there is no need to check whether the new range would contain an
356  // already translated missing symbol because this was already tested
357  // in the above test.
358  if (__variable.minVal() > __variable.maxVal()) {
359  if (this->_max_dico_entries == 0) {
360  GUM_ERROR(SizeError,
361  "String \"" << str << "\" cannot be translated because "
362  << "the dictionary is already full");
363  }
364  __variable.setMinVal(new_value);
365  __variable.setMaxVal(new_value);
366  this->_back_dico.insert(std::size_t(0), str);
367  return DBTranslatedValue{std::size_t(0)};
368  }
369 
370  // here, the domain is not empty. So we should update either the
371  // lower bound or the upper bound of the range variable, unless
372  // a missing symbol lies within the new bounds and we have already
373  // translated it.
374  const long lower_bound = __variable.minVal();
375  const long upper_bound = __variable.maxVal();
376 
377  std::size_t size = upper_bound - lower_bound + 1;
378 
379  if (new_value < __variable.minVal()) {
380  if (std::size_t(upper_bound - new_value + 1) > this->_max_dico_entries)
381  GUM_ERROR(SizeError,
382  "String \"" << str << "\" cannot be translated because "
383  << "the dictionary is already full");
384 
385  // check that there does not already exist a translated missing
386  // value within the new bounds of the range variable
387  for (const auto& missing : __translated_int_missing_symbols) {
388  if ((missing >= new_value) && (missing <= upper_bound)) {
389  GUM_ERROR(OperationNotAllowed,
390  "String \""
391  << str << "\" cannot be translated "
392  << "because it would induce a new range containing "
393  << "an already translated missing symbol");
394  }
395  }
396 
397  // remove all the missing symbols that were not translated yet and
398  // that lie within the new bounds of the range variable
399  for (auto iter = __status_int_missing_symbols.beginSafe();
400  iter != __status_int_missing_symbols.endSafe();
401  ++iter) {
402  if (iter.val() == false) {
403  const long missing = std::stol(iter.key());
404  if ((missing >= new_value) && (missing <= upper_bound)) {
405  this->_missing_symbols.erase(iter.key());
406  __status_int_missing_symbols.erase(iter);
407  }
408  }
409  }
410 
411  // update the range and the back dictionary
412  const std::size_t index = size;
413  for (long i = new_value; i < __variable.minVal(); ++i) {
414  this->_back_dico.insert(size, std::to_string(i));
415  ++size;
416  }
417  __variable.setMinVal(new_value);
418 
419  return DBTranslatedValue{index};
420  } else {
421  if (std::size_t(new_value - lower_bound + 1) > this->_max_dico_entries)
422  GUM_ERROR(SizeError,
423  "String \"" << str << "\" cannot be translated because "
424  << "the dictionary is already full");
425 
426  // check that there does not already exist a translated missing
427  // value within the new bounds of the range variable
428  for (const auto& missing : __translated_int_missing_symbols) {
429  if ((missing <= new_value) && (missing >= lower_bound)) {
430  GUM_ERROR(OperationNotAllowed,
431  "String \""
432  << str << "\" cannot be translated "
433  << "because it would induce a new range containing "
434  << "an already translated missing symbol");
435  }
436  }
437 
438  // remove all the missing symbols that were not translated yet and
439  // that lie within the new bounds of the range variable
440  for (auto iter = __status_int_missing_symbols.beginSafe();
441  iter != __status_int_missing_symbols.endSafe();
442  ++iter) {
443  if (iter.val() == false) {
444  const long missing = std::stol(iter.key());
445  if ((missing <= new_value) && (missing >= lower_bound)) {
446  this->_missing_symbols.erase(iter.key());
447  __status_int_missing_symbols.erase(iter);
448  }
449  }
450  }
451 
452  // update the range and the back dictionary
453  for (long i = __variable.maxVal() + 1; i <= new_value; ++i) {
454  this->_back_dico.insert(size, std::to_string(i));
455  ++size;
456  }
457  __variable.setMaxVal(new_value);
458 
459  return DBTranslatedValue{size - std::size_t(1)};
460  }
461  }
462  }
463 
464 
466  template < template < typename > class ALLOC >
468  const DBTranslatedValue translated_val) const {
469  try {
470  return this->_back_dico.second(translated_val.discr_val);
471  } catch (Exception&) {
472  // check if this is a missing value
473  if (translated_val.discr_val
474  == std::numeric_limits< std::size_t >::max()) {
475  if (!__nonint_missing_symbol.empty()) return __nonint_missing_symbol;
476  if (this->_missing_symbols.empty())
477  return *(this->_missing_symbols.begin());
478  }
479 
480  GUM_ERROR(UnknownLabelInDatabase,
481  "The back translation of \"" << translated_val.discr_val
482  << "\" could not be found");
483  }
484  }
485 
486 
488  template < template < typename > class ALLOC >
490  // if the variable contains only numbers, they should be increasing
491  const auto& labels = __variable.labels();
492  std::size_t last_number = std::numeric_limits< std::size_t >::lowest();
493  std::size_t number;
494  for (const auto& label : labels) {
495  number = this->_back_dico.first(label);
496  if (number < last_number) return true;
497  last_number = number;
498  }
499 
500  return false;
501  }
502 
503 
505  template < template < typename > class ALLOC >
506  INLINE HashTable< std::size_t,
507  std::size_t,
508  ALLOC< std::pair< std::size_t, std::size_t > > >
510  // assign to each label the index it had before reordering
511  const auto& labels = __variable.labels();
512  const std::size_t size = labels.size();
513  std::vector< std::pair< std::size_t, std::string >,
514  ALLOC< std::pair< std::size_t, std::string > > >
515  xlabels;
516  xlabels.reserve(size);
517  bool modifications = false;
518  for (std::size_t i = std::size_t(0); i < size; ++i) {
519  const std::size_t old_val = this->_back_dico.first(labels[i]);
520  xlabels.push_back(std::make_pair(old_val, labels[i]));
521  if (old_val != i) modifications = true;
522  }
523 
524 
525  // if there were no modification, return an empty update hashtable
526  if (!modifications) {
527  return HashTable< std::size_t,
528  std::size_t,
529  ALLOC< std::pair< std::size_t, std::size_t > > >();
530  }
531 
532  // create the hashTable corresponding to the mapping from the old
533  // indices to the new one
534  this->_back_dico.clear();
535  HashTable< std::size_t,
536  std::size_t,
537  ALLOC< std::pair< std::size_t, std::size_t > > >
538  mapping((Size)size);
539  for (std::size_t i = std::size_t(0); i < size; ++i) {
540  mapping.insert(xlabels[i].first, i);
541  this->_back_dico.insert(i, xlabels[i].second);
542  }
543 
544  return mapping;
545  }
546 
547 
549  template < template < typename > class ALLOC >
550  INLINE std::size_t DBTranslator4RangeVariable< ALLOC >::domainSize() const {
551  return __variable.domainSize();
552  }
553 
554 
556  template < template < typename > class ALLOC >
557  INLINE const RangeVariable*
559  return &__variable;
560  }
561 
562 
564  template < template < typename > class ALLOC >
565  INLINE DBTranslatedValue
567  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
568  }
569 
570 
571  } /* namespace learning */
572 
573 } /* namespace gum */
574 
575 
576 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
Bijection< std::size_t, std::string, ALLOC< std::pair< float, std::string > > > _back_dico
the bijection relating back translated values and their original strings.
Definition: DBTranslator.h:396
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
virtual bool needsReordering() const final
indicates whether a reordering is needed to make the translations sorted by increasing numbers ...
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
void clear()
Removes all the associations from the gum::Bijection.
virtual const RangeVariable * variable() const final
returns the variable stored into the translator
virtual std::string translateBack(const DBTranslatedValue translated_val) const final
returns the original value for a given translation
const T1 & first(const T2 &second) const
Returns the first value of a pair given its second value.
std::size_t _max_dico_entries
the maximum number of entries that the dictionary is allowed to contain
Definition: DBTranslator.h:382
static bool isInteger(const std::string &str)
determines whether a string corresponds precisely to an integer
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
STL namespace.
DBTranslator4RangeVariable< ALLOC > & operator=(const DBTranslator4RangeVariable< ALLOC > &from)
copy operator
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
DBTranslator(DBTranslatedValueType val_type, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool editable_dictionary=true, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor
virtual ~DBTranslator4RangeVariable()
destructor
virtual DBTranslatedValue missingValue() const final
returns the translation of a missing value
allocator_type getAllocator() const
returns the allocator used by the translator
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
std::string to_string(const Formula &f)
Definition: formula_inl.h:499
Set< std::string, ALLOC< std::string > > _missing_symbols
the set of missing symbols
Definition: DBTranslator.h:385
DBTranslator< ALLOC > & operator=(const DBTranslator< ALLOC > &from)
copy operator
virtual DBTranslatedValue translate(const std::string &str) final
returns the translation of a string
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > reorder() final
performs a reordering of the dictionary and returns a mapping from the old translated values to the n...
Base class for all aGrUM&#39;s exceptions.
Definition: exceptions.h:106
virtual std::size_t domainSize() const final
returns the domain size of a variable corresponding to the translations
virtual bool hasEditableDictionary() const
indicates whether the translator has an editable dictionary or not
bool isMissingSymbol(const std::string &str) const
indicates whether a string corresponds to a missing symbol
typename DBTranslator< ALLOC >::allocator_type allocator_type
type for the allocators passed in arguments of methods
virtual DBTranslator4RangeVariable< ALLOC > * clone() const
virtual copy constructor
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:48
DBTranslator4RangeVariable(const std::vector< std::string, XALLOC< std::string > > &missing_symbols, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor without any initial variable
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55