aGrUM  0.14.2
DBTranslator4RangeVariable_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
26 #include <utility>
27 #include <vector>
28 #include <limits>
29 #include <cstdio>
30 
33 
34 #ifndef DOXYGEN_SHOULD_SKIP_THIS
35 
36 namespace gum {
37 
38  namespace learning {
39 
40 
42  template < template < typename > class ALLOC >
43  template < template < typename > class XALLOC >
45  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
46  std::size_t max_dico_entries,
48  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
49  missing_symbols,
50  true,
51  max_dico_entries,
52  alloc),
53  __variable("var", "", 1, 0) {
54  // assign to each integer missing symbol a Boolean indicating that
55  // we did not translate it yet. If we encounter a non integer missing
56  // symbol, we record it because it cannot be compomised by updating the
57  // domain of the range variable
58  bool non_int_symbol_found = false;
59  for (const auto& symbol : this->_missing_symbols) {
60  if (DBCell::isInteger(symbol)) {
61  __status_int_missing_symbols.insert(symbol, false);
62  } else if (!non_int_symbol_found) {
63  non_int_symbol_found = true;
64  __nonint_missing_symbol = symbol;
65  }
66  }
67 
68  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
69  }
70 
71 
73  template < template < typename > class ALLOC >
75  std::size_t max_dico_entries,
77  DBTranslator< ALLOC >(
78  DBTranslatedValueType::DISCRETE, true, max_dico_entries, alloc),
79  __variable("var", "", 1, 0) {
80  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
81  }
82 
83 
85  template < template < typename > class ALLOC >
86  template < template < typename > class XALLOC >
88  const RangeVariable& var,
89  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
90  const bool editable_dictionary,
91  std::size_t max_dico_entries,
94  missing_symbols,
95  editable_dictionary,
96  max_dico_entries,
97  alloc),
98  __variable(var) {
99  // get the bounds of the range variable
100  const long lower_bound = var.minVal();
101  const long upper_bound = var.maxVal();
102 
103  // check that the variable has not too many entries for the dictionary
104  if ((upper_bound >= lower_bound)
105  && (std::size_t(upper_bound - lower_bound + 1)
106  > this->_max_dico_entries)) {
107  GUM_ERROR(SizeError,
108  "the dictionary induced by the variable is too large");
109  }
110 
111  // if the range variable is not empty, i.e., its upper bound is greater
112  // than or equal to its lower bound, remove all the missing symbols
113  // corresponding to a number between lower_bound and upper_bound
114  if (lower_bound <= upper_bound) {
115  for (auto iter = this->_missing_symbols.beginSafe();
116  iter != this->_missing_symbols.endSafe();
117  ++iter) {
118  if (DBCell::isInteger(*iter)) {
119  const long missing_val = std::stol(*iter);
120  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
121  this->_missing_symbols.erase(iter);
122  }
123  }
124  }
125  }
126 
127  // add the content of the variable into the back dictionary
128  std::size_t size = 0;
129  for (const auto& label : var.labels()) {
130  // insert the label into the back_dictionary
131  this->_back_dico.insert(size, label);
132  ++size;
133  }
134 
135  // assign to each integer missing symbol a Boolean indicating that
136  // we did not translate it yet. If we encounter a non integer symbol,
137  // we record it because it cannot be compomised by updating the domain
138  // of the range variable. This will be useful for back translations
139  bool non_int_symbol_found = false;
140  for (const auto& symbol : this->_missing_symbols) {
141  if (DBCell::isInteger(symbol)) {
142  __status_int_missing_symbols.insert(symbol, false);
143  } else if (!non_int_symbol_found) {
144  non_int_symbol_found = true;
145  __nonint_missing_symbol = symbol;
146  }
147  }
148 
149  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
150  }
151 
152 
154  template < template < typename > class ALLOC >
156  const RangeVariable& var,
157  const bool editable_dictionary,
158  std::size_t max_dico_entries,
161  editable_dictionary,
162  max_dico_entries,
163  alloc),
164  __variable(var) {
165  // get the bounds of the range variable
166  const long lower_bound = var.minVal();
167  const long upper_bound = var.maxVal();
168 
169  // check that the variable has not too many entries for the dictionary
170  if ((upper_bound >= lower_bound)
171  && (std::size_t(upper_bound - lower_bound + 1)
172  > this->_max_dico_entries)) {
173  GUM_ERROR(SizeError,
174  "the dictionary induced by the variable is too large");
175  }
176 
177  // add the content of the variable into the back dictionary
178  std::size_t size = 0;
179  for (const auto& label : var.labels()) {
180  // insert the label into the back_dictionary
181  this->_back_dico.insert(size, label);
182  ++size;
183  }
184 
185  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
186  }
187 
188 
190  template < template < typename > class ALLOC >
192  const DBTranslator4RangeVariable< ALLOC >& from,
194  DBTranslator< ALLOC >(from, alloc),
195  __variable(from.__variable),
196  __status_int_missing_symbols(from.__status_int_missing_symbols),
197  __translated_int_missing_symbols(from.__translated_int_missing_symbols),
198  __nonint_missing_symbol(from.__nonint_missing_symbol) {
199  GUM_CONS_CPY(DBTranslator4RangeVariable);
200  }
201 
202 
204  template < template < typename > class ALLOC >
206  const DBTranslator4RangeVariable< ALLOC >& from) :
207  DBTranslator4RangeVariable< ALLOC >(from, from.getAllocator()) {}
208 
209 
211  template < template < typename > class ALLOC >
213  DBTranslator4RangeVariable< ALLOC >&& from,
215  DBTranslator< ALLOC >(std::move(from), alloc),
216  __variable(std::move(from.__variable)),
217  __status_int_missing_symbols(std::move(from.__status_int_missing_symbols)),
218  __translated_int_missing_symbols(
219  std::move(from.__translated_int_missing_symbols)),
220  __nonint_missing_symbol(std::move(from.__nonint_missing_symbol)) {
221  GUM_CONS_MOV(DBTranslator4RangeVariable);
222  }
223 
224 
226  template < template < typename > class ALLOC >
228  DBTranslator4RangeVariable< ALLOC >&& from) :
229  DBTranslator4RangeVariable< ALLOC >(std::move(from), from.getAllocator()) {
230  }
231 
232 
234  template < template < typename > class ALLOC >
235  DBTranslator4RangeVariable< ALLOC >*
238  alloc) const {
239  ALLOC< DBTranslator4RangeVariable< ALLOC > > allocator(alloc);
240  DBTranslator4RangeVariable< ALLOC >* translator = allocator.allocate(1);
241  try {
242  allocator.construct(translator, *this, alloc);
243  } catch (...) {
244  allocator.deallocate(translator, 1);
245  throw;
246  }
247  return translator;
248  }
249 
250 
252  template < template < typename > class ALLOC >
253  INLINE DBTranslator4RangeVariable< ALLOC >*
255  return clone(this->getAllocator());
256  }
257 
258 
260  template < template < typename > class ALLOC >
262  GUM_DESTRUCTOR(DBTranslator4RangeVariable);
263  }
264 
265 
267  template < template < typename > class ALLOC >
268  DBTranslator4RangeVariable< ALLOC >& DBTranslator4RangeVariable< ALLOC >::
269  operator=(const DBTranslator4RangeVariable< ALLOC >& from) {
270  if (this != &from) {
272  __variable = from.__variable;
273  __status_int_missing_symbols = from.__status_int_missing_symbols;
274  __translated_int_missing_symbols = from.__translated_int_missing_symbols;
275  __nonint_missing_symbol = from.__nonint_missing_symbol;
276  }
277 
278  return *this;
279  }
280 
281 
283  template < template < typename > class ALLOC >
284  DBTranslator4RangeVariable< ALLOC >& DBTranslator4RangeVariable< ALLOC >::
285  operator=(DBTranslator4RangeVariable< ALLOC >&& from) {
286  if (this != &from) {
287  DBTranslator< ALLOC >::operator=(std::move(from));
288  __variable = std::move(from.__variable);
289  __status_int_missing_symbols =
290  std::move(from.__status_int_missing_symbols);
291  __translated_int_missing_symbols =
292  std::move(from.__translated_int_missing_symbols);
293  __nonint_missing_symbol = std::move(from.__nonint_missing_symbol);
294  }
295 
296  return *this;
297  }
298 
299 
301  template < template < typename > class ALLOC >
302  DBTranslatedValue
303  DBTranslator4RangeVariable< ALLOC >::translate(const std::string& str) {
304  // try to get the index of str within the labelized variable. If this
305  // cannot be found, try to find if this corresponds to a missing value.
306  // Finally, if this is still not a missing value and, if enabled, try
307  // to add str as a new label
308  try {
309  return DBTranslatedValue{this->_back_dico.first(str)};
310  } catch (gum::Exception&) {
311  // check that this is not a missing value
312  if (this->isMissingSymbol(str)) {
313  try {
314  const bool is_str_translated = __status_int_missing_symbols[str];
315  if (!is_str_translated) {
316  __status_int_missing_symbols[str] = true;
317  __translated_int_missing_symbols.insert(std::stol(str));
318  }
319  } catch (gum::NotFound&) {}
320  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
321  }
322 
323  // check if we are allowed to update the range variable
324  if (!this->hasEditableDictionary()) {
325  GUM_ERROR(UnknownLabelInDatabase,
326  "The translation of String \"" << str
327  << "\" could not be found");
328  }
329 
330  // check if str could correspond to a bound of the range variable
331  if (!DBCell::isInteger(str)) {
332  GUM_ERROR(TypeError,
333  "String \"" << str << "\" cannot be translated because "
334  << "it cannot be converted into an integer");
335  }
336  const long new_value = std::stol(str);
337 
338  // if str corresponds to a missing symbol that we already
339  // translated, raise an exception
340  if (__translated_int_missing_symbols.exists(new_value)) {
341  GUM_ERROR(
342  OperationNotAllowed,
343  "String \""
344  << str << "\" cannot be translated because "
345  << "it corresponds to an already translated missing symbol");
346  }
347 
348  // now, we can try to add str as a new bound of the range variable
349  // if possible
350 
351  // if the range variable is empty, set the min and max ranges. Here,
352  // there is no need to check whether the new range would contain an
353  // already translated missing symbol because this was already tested
354  // in the above test.
355  if (__variable.minVal() > __variable.maxVal()) {
356  if (this->_max_dico_entries == 0) {
357  GUM_ERROR(SizeError,
358  "String \"" << str << "\" cannot be translated because "
359  << "the dictionary is already full");
360  }
361  __variable.setMinVal(new_value);
362  __variable.setMaxVal(new_value);
363  this->_back_dico.insert(std::size_t(0), str);
364  return DBTranslatedValue{std::size_t(0)};
365  }
366 
367  // here, the domain is not empty. So we should update either the
368  // lower bound or the upper bound of the range variable, unless
369  // a missing symbol lies within the new bounds and we have already
370  // translated it.
371  const long lower_bound = __variable.minVal();
372  const long upper_bound = __variable.maxVal();
373 
374  std::size_t size = upper_bound - lower_bound + 1;
375 
376  if (new_value < __variable.minVal()) {
377  if (std::size_t(upper_bound - new_value + 1) > this->_max_dico_entries)
378  GUM_ERROR(SizeError,
379  "String \"" << str << "\" cannot be translated because "
380  << "the dictionary is already full");
381 
382  // check that there does not already exist a translated missing
383  // value within the new bounds of the range variable
384  for (const auto& missing : __translated_int_missing_symbols) {
385  if ((missing >= new_value) && (missing <= upper_bound)) {
386  GUM_ERROR(OperationNotAllowed,
387  "String \""
388  << str << "\" cannot be translated "
389  << "because it would induce a new range containing "
390  << "an already translated missing symbol");
391  }
392  }
393 
394  // remove all the missing symbols that were not translated yet and
395  // that lie within the new bounds of the range variable
396  for (auto iter = __status_int_missing_symbols.beginSafe();
397  iter != __status_int_missing_symbols.endSafe();
398  ++iter) {
399  if (iter.val() == false) {
400  const long missing = std::stol(iter.key());
401  if ((missing >= new_value) && (missing <= upper_bound)) {
402  this->_missing_symbols.erase(iter.key());
403  __status_int_missing_symbols.erase(iter);
404  }
405  }
406  }
407 
408  // update the range and the back dictionary
409  const std::size_t index = size;
410  for (long i = new_value; i < __variable.minVal(); ++i) {
411  this->_back_dico.insert(size, std::to_string(i));
412  ++size;
413  }
414  __variable.setMinVal(new_value);
415 
416  return DBTranslatedValue{index};
417  } else {
418  if (std::size_t(new_value - lower_bound + 1) > this->_max_dico_entries)
419  GUM_ERROR(SizeError,
420  "String \"" << str << "\" cannot be translated because "
421  << "the dictionary is already full");
422 
423  // check that there does not already exist a translated missing
424  // value within the new bounds of the range variable
425  for (const auto& missing : __translated_int_missing_symbols) {
426  if ((missing <= new_value) && (missing >= lower_bound)) {
427  GUM_ERROR(OperationNotAllowed,
428  "String \""
429  << str << "\" cannot be translated "
430  << "because it would induce a new range containing "
431  << "an already translated missing symbol");
432  }
433  }
434 
435  // remove all the missing symbols that were not translated yet and
436  // that lie within the new bounds of the range variable
437  for (auto iter = __status_int_missing_symbols.beginSafe();
438  iter != __status_int_missing_symbols.endSafe();
439  ++iter) {
440  if (iter.val() == false) {
441  const long missing = std::stol(iter.key());
442  if ((missing <= new_value) && (missing >= lower_bound)) {
443  this->_missing_symbols.erase(iter.key());
444  __status_int_missing_symbols.erase(iter);
445  }
446  }
447  }
448 
449  // update the range and the back dictionary
450  for (long i = __variable.maxVal() + 1; i <= new_value; ++i) {
451  this->_back_dico.insert(size, std::to_string(i));
452  ++size;
453  }
454  __variable.setMaxVal(new_value);
455 
456  return DBTranslatedValue{size - std::size_t(1)};
457  }
458  }
459  }
460 
461 
463  template < template < typename > class ALLOC >
465  const DBTranslatedValue translated_val) const {
466  try {
467  return this->_back_dico.second(translated_val.discr_val);
468  } catch (Exception&) {
469  // check if this is a missing value
470  if (translated_val.discr_val
471  == std::numeric_limits< std::size_t >::max()) {
472  if (!__nonint_missing_symbol.empty()) return __nonint_missing_symbol;
473  if (this->_missing_symbols.empty())
474  return *(this->_missing_symbols.begin());
475  }
476 
477  GUM_ERROR(UnknownLabelInDatabase,
478  "The back translation of \"" << translated_val.discr_val
479  << "\" could not be found");
480  }
481  }
482 
483 
485  template < template < typename > class ALLOC >
487  // if the variable contains only numbers, they should be increasing
488  const auto& labels = __variable.labels();
489  std::size_t last_number = std::numeric_limits< std::size_t >::lowest();
490  std::size_t number;
491  for (const auto& label : labels) {
492  number = this->_back_dico.first(label);
493  if (number < last_number) return true;
494  last_number = number;
495  }
496 
497  return false;
498  }
499 
500 
502  template < template < typename > class ALLOC >
503  INLINE HashTable< std::size_t,
504  std::size_t,
505  ALLOC< std::pair< std::size_t, std::size_t > > >
507  // assign to each label the index it had before reordering
508  const auto& labels = __variable.labels();
509  const std::size_t size = labels.size();
510  std::vector< std::pair< std::size_t, std::string >,
511  ALLOC< std::pair< std::size_t, std::string > > >
512  xlabels;
513  xlabels.reserve(size);
514  bool modifications = false;
515  for (std::size_t i = std::size_t(0); i < size; ++i) {
516  const std::size_t old_val = this->_back_dico.first(labels[i]);
517  xlabels.push_back(std::make_pair(old_val, labels[i]));
518  if (old_val != i) modifications = true;
519  }
520 
521 
522  // if there were no modification, return an empty update hashtable
523  if (!modifications) {
524  return HashTable< std::size_t,
525  std::size_t,
526  ALLOC< std::pair< std::size_t, std::size_t > > >();
527  }
528 
529  // create the hashTable corresponding to the mapping from the old
530  // indices to the new one
531  this->_back_dico.clear();
532  HashTable< std::size_t,
533  std::size_t,
534  ALLOC< std::pair< std::size_t, std::size_t > > >
535  mapping((Size)size);
536  for (std::size_t i = std::size_t(0); i < size; ++i) {
537  mapping.insert(xlabels[i].first, i);
538  this->_back_dico.insert(i, xlabels[i].second);
539  }
540 
541  return mapping;
542  }
543 
544 
546  template < template < typename > class ALLOC >
547  INLINE std::size_t DBTranslator4RangeVariable< ALLOC >::domainSize() const {
548  return __variable.domainSize();
549  }
550 
551 
553  template < template < typename > class ALLOC >
554  INLINE const RangeVariable*
556  return &__variable;
557  }
558 
559 
561  template < template < typename > class ALLOC >
562  INLINE DBTranslatedValue
564  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
565  }
566 
567 
568  } /* namespace learning */
569 
570 } /* namespace gum */
571 
572 
573 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
Bijection< std::size_t, std::string, ALLOC< std::pair< float, std::string > > > _back_dico
the bijection relating back translated values and their original strings.
Definition: DBTranslator.h:393
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
virtual bool needsReordering() const final
indicates whether a reordering is needed to make the translations sorted by increasing numbers ...
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
void clear()
Removes all the associations from the gum::Bijection.
virtual const RangeVariable * variable() const final
returns the variable stored into the translator
virtual std::string translateBack(const DBTranslatedValue translated_val) const final
returns the original value for a given translation
const T1 & first(const T2 &second) const
Returns the first value of a pair given its second value.
std::size_t _max_dico_entries
the maximum number of entries that the dictionary is allowed to contain
Definition: DBTranslator.h:379
static bool isInteger(const std::string &str)
determines whether a string corresponds precisely to an integer
The databases&#39; cell translators for range variables.
STL namespace.
DBTranslator4RangeVariable< ALLOC > & operator=(const DBTranslator4RangeVariable< ALLOC > &from)
copy operator
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
DBTranslator(DBTranslatedValueType val_type, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool editable_dictionary=true, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor
virtual ~DBTranslator4RangeVariable()
destructor
virtual DBTranslatedValue missingValue() const final
returns the translation of a missing value
allocator_type getAllocator() const
returns the allocator used by the translator
The class representing the original values of the cells of databases.
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
std::string to_string(const Formula &f)
Definition: formula_inl.h:479
Set< std::string, ALLOC< std::string > > _missing_symbols
the set of missing symbols
Definition: DBTranslator.h:382
DBTranslator< ALLOC > & operator=(const DBTranslator< ALLOC > &from)
copy operator
virtual DBTranslatedValue translate(const std::string &str) final
returns the translation of a string
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > reorder() final
performs a reordering of the dictionary and returns a mapping from the old translated values to the n...
Base class for all aGrUM&#39;s exceptions.
Definition: exceptions.h:103
virtual std::size_t domainSize() const final
returns the domain size of a variable corresponding to the translations
virtual bool hasEditableDictionary() const
indicates whether the translator has an editable dictionary or not
bool isMissingSymbol(const std::string &str) const
indicates whether a string corresponds to a missing symbol
typename DBTranslator< ALLOC >::allocator_type allocator_type
type for the allocators passed in arguments of methods
virtual DBTranslator4RangeVariable< ALLOC > * clone() const
virtual copy constructor
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:45
DBTranslator4RangeVariable(const std::vector< std::string, XALLOC< std::string > > &missing_symbols, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor without any initial variable
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52