aGrUM  0.21.0
a C++ library for (probabilistic) graphical models
DBTranslator4RangeVariable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for range variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #include <utility>
29 #include <vector>
30 #include <limits>
31 #include <cstdio>
32 
33 #include <agrum/tools/database/DBTranslator4RangeVariable.h>
34 #include <agrum/tools/database/DBCell.h>
35 
36 #ifndef DOXYGEN_SHOULD_SKIP_THIS
37 
38 namespace gum {
39 
40  namespace learning {
41 
42 
43  /// default constructor
44  template < template < typename > class ALLOC >
45  template < template < typename > class XALLOC >
46  DBTranslator4RangeVariable< ALLOC >::DBTranslator4RangeVariable(
47  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
48  std::size_t max_dico_entries,
49  const typename DBTranslator4RangeVariable< ALLOC >::allocator_type& alloc) :
50  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
51  true,
52  missing_symbols,
53  true,
54  max_dico_entries,
55  alloc),
56  _variable_("var", "", 1, 0) {
57  // assign to each integer missing symbol a Boolean indicating that
58  // we did not translate it yet. If we encounter a non integer missing
59  // symbol, we record it because it cannot be compomised by updating the
60  // domain of the range variable
61  bool non_int_symbol_found = false;
62  for (const auto& symbol: this->missing_symbols_) {
63  if (DBCell::isInteger(symbol)) {
64  _status_int_missing_symbols_.insert(symbol, false);
65  } else if (!non_int_symbol_found) {
66  non_int_symbol_found = true;
67  _nonint_missing_symbol_ = symbol;
68  }
69  }
70 
71  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
72  }
73 
74 
75  /// default constructor
76  template < template < typename > class ALLOC >
81  _variable_("var", "", 1, 0) {
83  }
84 
85 
86  /// default constructor with a range variable as translator
87  template < template < typename > class ALLOC >
88  template < template < typename > class XALLOC >
90  const RangeVariable& var,
92  const bool editable_dictionary,
96  true,
100  alloc),
101  _variable_(var) {
102  // get the bounds of the range variable
103  const long lower_bound = var.minVal();
104  const long upper_bound = var.maxVal();
105 
106  // check that the variable has not too many entries for the dictionary
107  if ((upper_bound >= lower_bound)
108  && (std::size_t(upper_bound - lower_bound + 1) > this->max_dico_entries_)) {
109  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
110  }
111 
112  // if the range variable is not empty, i.e., its upper bound is greater
113  // than or equal to its lower bound, remove all the missing symbols
114  // corresponding to a number between lower_bound and upper_bound
115  if (lower_bound <= upper_bound) {
116  for (auto iter = this->missing_symbols_.beginSafe();
117  iter != this->missing_symbols_.endSafe();
118  ++iter) {
119  if (DBCell::isInteger(*iter)) {
120  const long missing_val = std::stol(*iter);
121  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
122  this->missing_symbols_.erase(iter);
123  }
124  }
125  }
126  }
127 
128  // add the content of the variable into the back dictionary
129  std::size_t size = 0;
130  for (const auto& label: var.labels()) {
131  // insert the label into the back_dictionary
132  this->back_dico_.insert(size, label);
133  ++size;
134  }
135 
136  // assign to each integer missing symbol a Boolean indicating that
137  // we did not translate it yet. If we encounter a non integer symbol,
138  // we record it because it cannot be compomised by updating the domain
139  // of the range variable. This will be useful for back translations
140  bool non_int_symbol_found = false;
141  for (const auto& symbol: this->missing_symbols_) {
142  if (DBCell::isInteger(symbol)) {
144  } else if (!non_int_symbol_found) {
145  non_int_symbol_found = true;
147  }
148  }
149 
151  }
152 
153 
154  /// default constructor with a range variable as translator
155  template < template < typename > class ALLOC >
157  const RangeVariable& var,
158  const bool editable_dictionary,
162  true,
165  alloc),
166  _variable_(var) {
167  // get the bounds of the range variable
168  const long lower_bound = var.minVal();
169  const long upper_bound = var.maxVal();
170 
171  // check that the variable has not too many entries for the dictionary
172  if ((upper_bound >= lower_bound)
173  && (std::size_t(upper_bound - lower_bound + 1) > this->max_dico_entries_)) {
174  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
175  }
176 
177  // add the content of the variable into the back dictionary
178  std::size_t size = 0;
179  for (const auto& label: var.labels()) {
180  // insert the label into the back_dictionary
181  this->back_dico_.insert(size, label);
182  ++size;
183  }
184 
186  }
187 
188 
189  /// copy constructor with a given allocator
190  template < template < typename > class ALLOC >
200  }
201 
202 
203  /// copy constructor
204  template < template < typename > class ALLOC >
208 
209 
210  /// move constructor with a given allocator
211  template < template < typename > class ALLOC >
221  }
222 
223 
224  /// move constructor
225  template < template < typename > class ALLOC >
229 
230 
231  /// virtual copy constructor with a given allocator
232  template < template < typename > class ALLOC >
234  const typename DBTranslator4RangeVariable< ALLOC >::allocator_type& alloc) const {
237  try {
239  } catch (...) {
241  throw;
242  }
243  return translator;
244  }
245 
246 
247  /// virtual copy constructor
248  template < template < typename > class ALLOC >
250  return clone(this->getAllocator());
251  }
252 
253 
254  /// destructor
255  template < template < typename > class ALLOC >
258  }
259 
260 
261  /// copy operator
262  template < template < typename > class ALLOC >
265  if (this != &from) {
271  }
272 
273  return *this;
274  }
275 
276 
277  /// move operator
278  template < template < typename > class ALLOC >
281  if (this != &from) {
287  }
288 
289  return *this;
290  }
291 
292 
293  /// returns the translation of a string, as found in the current dictionary
294  template < template < typename > class ALLOC >
296  // try to get the index of str within the labelized variable. If this
297  // cannot be found, try to find if this corresponds to a missing value.
298  // Finally, if this is still not a missing value and, if enabled, try
299  // to add str as a new label
300  try {
301  return DBTranslatedValue{this->back_dico_.first(str)};
302  } catch (gum::Exception&) {
303  // check that this is not a missing value
304  if (this->isMissingSymbol(str)) {
305  try {
307  if (!is_str_translated) {
310  }
311  } catch (gum::NotFound&) {}
313  }
314 
315  // check if we are allowed to update the range variable
316  if (!this->hasEditableDictionary()) {
318  "The translation of String \"" << str << "\" could not be found");
319  }
320 
321  // check if str could correspond to a bound of the range variable
322  if (!DBCell::isInteger(str)) {
324  "String \"" << str << "\" cannot be translated because "
325  << "it cannot be converted into an integer");
326  }
327  const long new_value = std::stol(str);
328 
329  // if str corresponds to a missing symbol that we already
330  // translated, raise an exception
333  "String \"" << str << "\" cannot be translated because "
334  << "it corresponds to an already translated missing symbol");
335  }
336 
337  // now, we can try to add str as a new bound of the range variable
338  // if possible
339 
340  // if the range variable is empty, set the min and max ranges. Here,
341  // there is no need to check whether the new range would contain an
342  // already translated missing symbol because this was already tested
343  // in the above test.
344  if (_variable_.minVal() > _variable_.maxVal()) {
345  if (this->max_dico_entries_ == 0) {
347  "String \"" << str << "\" cannot be translated because "
348  << "the dictionary is already full");
349  }
352  this->back_dico_.insert(std::size_t(0), str);
353  return DBTranslatedValue{std::size_t(0)};
354  }
355 
356  // here, the domain is not empty. So we should update either the
357  // lower bound or the upper bound of the range variable, unless
358  // a missing symbol lies within the new bounds and we have already
359  // translated it.
360  const long lower_bound = _variable_.minVal();
361  const long upper_bound = _variable_.maxVal();
362 
364 
365  if (new_value < _variable_.minVal()) {
366  if (std::size_t(upper_bound - new_value + 1) > this->max_dico_entries_)
368  "String \"" << str << "\" cannot be translated because "
369  << "the dictionary is already full");
370 
371  // check that there does not already exist a translated missing
372  // value within the new bounds of the range variable
373  for (const auto& missing: _translated_int_missing_symbols_) {
374  if ((missing >= new_value) && (missing <= upper_bound)) {
376  "String \"" << str << "\" cannot be translated "
377  << "because it would induce a new range containing "
378  << "an already translated missing symbol");
379  }
380  }
381 
382  // remove all the missing symbols that were not translated yet and
383  // that lie within the new bounds of the range variable
386  ++iter) {
387  if (iter.val() == false) {
388  const long missing = std::stol(iter.key());
389  if ((missing >= new_value) && (missing <= upper_bound)) {
390  this->missing_symbols_.erase(iter.key());
392  }
393  }
394  }
395 
396  // update the range and the back dictionary
397  const std::size_t index = size;
398  for (long i = new_value; i < _variable_.minVal(); ++i) {
399  this->back_dico_.insert(size, std::to_string(i));
400  ++size;
401  }
403 
404  return DBTranslatedValue{index};
405  } else {
406  if (std::size_t(new_value - lower_bound + 1) > this->max_dico_entries_)
408  "String \"" << str << "\" cannot be translated because "
409  << "the dictionary is already full");
410 
411  // check that there does not already exist a translated missing
412  // value within the new bounds of the range variable
413  for (const auto& missing: _translated_int_missing_symbols_) {
414  if ((missing <= new_value) && (missing >= lower_bound)) {
416  "String \"" << str << "\" cannot be translated "
417  << "because it would induce a new range containing "
418  << "an already translated missing symbol");
419  }
420  }
421 
422  // remove all the missing symbols that were not translated yet and
423  // that lie within the new bounds of the range variable
426  ++iter) {
427  if (iter.val() == false) {
428  const long missing = std::stol(iter.key());
429  if ((missing <= new_value) && (missing >= lower_bound)) {
430  this->missing_symbols_.erase(iter.key());
432  }
433  }
434  }
435 
436  // update the range and the back dictionary
437  for (long i = _variable_.maxVal() + 1; i <= new_value; ++i) {
438  this->back_dico_.insert(size, std::to_string(i));
439  ++size;
440  }
442 
443  return DBTranslatedValue{size - std::size_t(1)};
444  }
445  }
446  }
447 
448 
449  /// returns the original value for a given translation
450  template < template < typename > class ALLOC >
452  const DBTranslatedValue translated_val) const {
453  try {
455  } catch (Exception&) {
456  // check if this is a missing value
459  if (this->missing_symbols_.empty()) return *(this->missing_symbols_.begin());
460  }
461 
463  "The back translation of \"" << translated_val.discr_val
464  << "\" could not be found");
465  }
466  }
467 
468 
469  /// indicates whether the translations should be reordered
470  template < template < typename > class ALLOC >
472  // if the variable contains only numbers, they should be increasing
473  const auto& labels = _variable_.labels();
475  std::size_t number;
476  for (const auto& label: labels) {
477  number = this->back_dico_.first(label);
478  if (number < last_number) return true;
480  }
481 
482  return false;
483  }
484 
485 
486  /// returns a mapping to reorder the current dictionary and updates it
487  template < template < typename > class ALLOC >
490  // assign to each label the index it had before reordering
491  const auto& labels = _variable_.labels();
492  const std::size_t size = labels.size();
493  std::vector< std::pair< std::size_t, std::string >,
494  ALLOC< std::pair< std::size_t, std::string > > >
495  xlabels;
497  bool modifications = false;
498  for (std::size_t i = std::size_t(0); i < size; ++i) {
499  const std::size_t old_val = this->back_dico_.first(labels[i]);
501  if (old_val != i) modifications = true;
502  }
503 
504 
505  // if there were no modification, return an empty update hashtable
506  if (!modifications) {
507  return HashTable< std::size_t,
508  std::size_t,
509  ALLOC< std::pair< std::size_t, std::size_t > > >();
510  }
511 
512  // create the hashTable corresponding to the mapping from the old
513  // indices to the new one
514  this->back_dico_.clear();
516  (Size)size);
517  for (std::size_t i = std::size_t(0); i < size; ++i) {
519  this->back_dico_.insert(i, xlabels[i].second);
520  }
521 
522  return mapping;
523  }
524 
525 
526  /// returns the domain size of a variable corresponding to the translations
527  template < template < typename > class ALLOC >
529  return _variable_.domainSize();
530  }
531 
532 
533  /// returns the variable stored into the translator
534  template < template < typename > class ALLOC >
536  return &_variable_;
537  }
538 
539 
540  /// returns the translation of a missing value
541  template < template < typename > class ALLOC >
544  }
545 
546 
547  } /* namespace learning */
548 
549 } /* namespace gum */
550 
551 
552 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)