aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
DBTranslator4RangeVariable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for range variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #include <utility>
29 #include <vector>
30 #include <limits>
31 #include <cstdio>
32 
33 #include <agrum/tools/database/DBTranslator4RangeVariable.h>
34 #include <agrum/tools/database/DBCell.h>
35 
36 #ifndef DOXYGEN_SHOULD_SKIP_THIS
37 
38 namespace gum {
39 
40  namespace learning {
41 
42 
43  /// default constructor
44  template < template < typename > class ALLOC >
45  template < template < typename > class XALLOC >
46  DBTranslator4RangeVariable< ALLOC >::DBTranslator4RangeVariable(
47  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
48  std::size_t max_dico_entries,
49  const typename DBTranslator4RangeVariable< ALLOC >::allocator_type& alloc) :
50  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
51  missing_symbols,
52  true,
53  max_dico_entries,
54  alloc),
55  _variable_("var", "", 1, 0) {
56  // assign to each integer missing symbol a Boolean indicating that
57  // we did not translate it yet. If we encounter a non integer missing
58  // symbol, we record it because it cannot be compomised by updating the
59  // domain of the range variable
60  bool non_int_symbol_found = false;
61  for (const auto& symbol: this->missing_symbols_) {
62  if (DBCell::isInteger(symbol)) {
63  _status_int_missing_symbols_.insert(symbol, false);
64  } else if (!non_int_symbol_found) {
65  non_int_symbol_found = true;
66  _nonint_missing_symbol_ = symbol;
67  }
68  }
69 
70  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
71  }
72 
73 
74  /// default constructor
75  template < template < typename > class ALLOC >
80  _variable_("var", "", 1, 0) {
82  }
83 
84 
85  /// default constructor with a range variable as translator
86  template < template < typename > class ALLOC >
87  template < template < typename > class XALLOC >
89  const RangeVariable& var,
91  const bool editable_dictionary,
98  alloc),
99  _variable_(var) {
100  // get the bounds of the range variable
101  const long lower_bound = var.minVal();
102  const long upper_bound = var.maxVal();
103 
104  // check that the variable has not too many entries for the dictionary
105  if ((upper_bound >= lower_bound)
106  && (std::size_t(upper_bound - lower_bound + 1) > this->max_dico_entries_)) {
107  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
108  }
109 
110  // if the range variable is not empty, i.e., its upper bound is greater
111  // than or equal to its lower bound, remove all the missing symbols
112  // corresponding to a number between lower_bound and upper_bound
113  if (lower_bound <= upper_bound) {
114  for (auto iter = this->missing_symbols_.beginSafe();
115  iter != this->missing_symbols_.endSafe();
116  ++iter) {
117  if (DBCell::isInteger(*iter)) {
118  const long missing_val = std::stol(*iter);
119  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
120  this->missing_symbols_.erase(iter);
121  }
122  }
123  }
124  }
125 
126  // add the content of the variable into the back dictionary
127  std::size_t size = 0;
128  for (const auto& label: var.labels()) {
129  // insert the label into the back_dictionary
130  this->back_dico_.insert(size, label);
131  ++size;
132  }
133 
134  // assign to each integer missing symbol a Boolean indicating that
135  // we did not translate it yet. If we encounter a non integer symbol,
136  // we record it because it cannot be compomised by updating the domain
137  // of the range variable. This will be useful for back translations
138  bool non_int_symbol_found = false;
139  for (const auto& symbol: this->missing_symbols_) {
140  if (DBCell::isInteger(symbol)) {
142  } else if (!non_int_symbol_found) {
143  non_int_symbol_found = true;
145  }
146  }
147 
149  }
150 
151 
152  /// default constructor with a range variable as translator
153  template < template < typename > class ALLOC >
155  const RangeVariable& var,
156  const bool editable_dictionary,
162  alloc),
163  _variable_(var) {
164  // get the bounds of the range variable
165  const long lower_bound = var.minVal();
166  const long upper_bound = var.maxVal();
167 
168  // check that the variable has not too many entries for the dictionary
169  if ((upper_bound >= lower_bound)
170  && (std::size_t(upper_bound - lower_bound + 1) > this->max_dico_entries_)) {
171  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
172  }
173 
174  // add the content of the variable into the back dictionary
175  std::size_t size = 0;
176  for (const auto& label: var.labels()) {
177  // insert the label into the back_dictionary
178  this->back_dico_.insert(size, label);
179  ++size;
180  }
181 
183  }
184 
185 
186  /// copy constructor with a given allocator
187  template < template < typename > class ALLOC >
197  }
198 
199 
200  /// copy constructor
201  template < template < typename > class ALLOC >
205 
206 
207  /// move constructor with a given allocator
208  template < template < typename > class ALLOC >
218  }
219 
220 
221  /// move constructor
222  template < template < typename > class ALLOC >
226 
227 
228  /// virtual copy constructor with a given allocator
229  template < template < typename > class ALLOC >
231  const typename DBTranslator4RangeVariable< ALLOC >::allocator_type& alloc) const {
234  try {
236  } catch (...) {
238  throw;
239  }
240  return translator;
241  }
242 
243 
244  /// virtual copy constructor
245  template < template < typename > class ALLOC >
247  return clone(this->getAllocator());
248  }
249 
250 
251  /// destructor
252  template < template < typename > class ALLOC >
255  }
256 
257 
258  /// copy operator
259  template < template < typename > class ALLOC >
262  if (this != &from) {
268  }
269 
270  return *this;
271  }
272 
273 
274  /// move operator
275  template < template < typename > class ALLOC >
278  if (this != &from) {
284  }
285 
286  return *this;
287  }
288 
289 
290  /// returns the translation of a string, as found in the current dictionary
291  template < template < typename > class ALLOC >
293  // try to get the index of str within the labelized variable. If this
294  // cannot be found, try to find if this corresponds to a missing value.
295  // Finally, if this is still not a missing value and, if enabled, try
296  // to add str as a new label
297  try {
298  return DBTranslatedValue{this->back_dico_.first(str)};
299  } catch (gum::Exception&) {
300  // check that this is not a missing value
301  if (this->isMissingSymbol(str)) {
302  try {
304  if (!is_str_translated) {
307  }
308  } catch (gum::NotFound&) {}
310  }
311 
312  // check if we are allowed to update the range variable
313  if (!this->hasEditableDictionary()) {
315  "The translation of String \"" << str << "\" could not be found");
316  }
317 
318  // check if str could correspond to a bound of the range variable
319  if (!DBCell::isInteger(str)) {
321  "String \"" << str << "\" cannot be translated because "
322  << "it cannot be converted into an integer");
323  }
324  const long new_value = std::stol(str);
325 
326  // if str corresponds to a missing symbol that we already
327  // translated, raise an exception
330  "String \"" << str << "\" cannot be translated because "
331  << "it corresponds to an already translated missing symbol");
332  }
333 
334  // now, we can try to add str as a new bound of the range variable
335  // if possible
336 
337  // if the range variable is empty, set the min and max ranges. Here,
338  // there is no need to check whether the new range would contain an
339  // already translated missing symbol because this was already tested
340  // in the above test.
341  if (_variable_.minVal() > _variable_.maxVal()) {
342  if (this->max_dico_entries_ == 0) {
344  "String \"" << str << "\" cannot be translated because "
345  << "the dictionary is already full");
346  }
349  this->back_dico_.insert(std::size_t(0), str);
350  return DBTranslatedValue{std::size_t(0)};
351  }
352 
353  // here, the domain is not empty. So we should update either the
354  // lower bound or the upper bound of the range variable, unless
355  // a missing symbol lies within the new bounds and we have already
356  // translated it.
357  const long lower_bound = _variable_.minVal();
358  const long upper_bound = _variable_.maxVal();
359 
361 
362  if (new_value < _variable_.minVal()) {
363  if (std::size_t(upper_bound - new_value + 1) > this->max_dico_entries_)
365  "String \"" << str << "\" cannot be translated because "
366  << "the dictionary is already full");
367 
368  // check that there does not already exist a translated missing
369  // value within the new bounds of the range variable
370  for (const auto& missing: _translated_int_missing_symbols_) {
371  if ((missing >= new_value) && (missing <= upper_bound)) {
373  "String \"" << str << "\" cannot be translated "
374  << "because it would induce a new range containing "
375  << "an already translated missing symbol");
376  }
377  }
378 
379  // remove all the missing symbols that were not translated yet and
380  // that lie within the new bounds of the range variable
383  ++iter) {
384  if (iter.val() == false) {
385  const long missing = std::stol(iter.key());
386  if ((missing >= new_value) && (missing <= upper_bound)) {
387  this->missing_symbols_.erase(iter.key());
389  }
390  }
391  }
392 
393  // update the range and the back dictionary
394  const std::size_t index = size;
395  for (long i = new_value; i < _variable_.minVal(); ++i) {
396  this->back_dico_.insert(size, std::to_string(i));
397  ++size;
398  }
400 
401  return DBTranslatedValue{index};
402  } else {
403  if (std::size_t(new_value - lower_bound + 1) > this->max_dico_entries_)
405  "String \"" << str << "\" cannot be translated because "
406  << "the dictionary is already full");
407 
408  // check that there does not already exist a translated missing
409  // value within the new bounds of the range variable
410  for (const auto& missing: _translated_int_missing_symbols_) {
411  if ((missing <= new_value) && (missing >= lower_bound)) {
413  "String \"" << str << "\" cannot be translated "
414  << "because it would induce a new range containing "
415  << "an already translated missing symbol");
416  }
417  }
418 
419  // remove all the missing symbols that were not translated yet and
420  // that lie within the new bounds of the range variable
423  ++iter) {
424  if (iter.val() == false) {
425  const long missing = std::stol(iter.key());
426  if ((missing <= new_value) && (missing >= lower_bound)) {
427  this->missing_symbols_.erase(iter.key());
429  }
430  }
431  }
432 
433  // update the range and the back dictionary
434  for (long i = _variable_.maxVal() + 1; i <= new_value; ++i) {
435  this->back_dico_.insert(size, std::to_string(i));
436  ++size;
437  }
439 
440  return DBTranslatedValue{size - std::size_t(1)};
441  }
442  }
443  }
444 
445 
446  /// returns the original value for a given translation
447  template < template < typename > class ALLOC >
449  const DBTranslatedValue translated_val) const {
450  try {
452  } catch (Exception&) {
453  // check if this is a missing value
456  if (this->missing_symbols_.empty()) return *(this->missing_symbols_.begin());
457  }
458 
460  "The back translation of \"" << translated_val.discr_val
461  << "\" could not be found");
462  }
463  }
464 
465 
466  /// indicates whether the translations should be reordered
467  template < template < typename > class ALLOC >
469  // if the variable contains only numbers, they should be increasing
470  const auto& labels = _variable_.labels();
472  std::size_t number;
473  for (const auto& label: labels) {
474  number = this->back_dico_.first(label);
475  if (number < last_number) return true;
477  }
478 
479  return false;
480  }
481 
482 
483  /// returns a mapping to reorder the current dictionary and updates it
484  template < template < typename > class ALLOC >
487  // assign to each label the index it had before reordering
488  const auto& labels = _variable_.labels();
489  const std::size_t size = labels.size();
490  std::vector< std::pair< std::size_t, std::string >,
491  ALLOC< std::pair< std::size_t, std::string > > >
492  xlabels;
494  bool modifications = false;
495  for (std::size_t i = std::size_t(0); i < size; ++i) {
496  const std::size_t old_val = this->back_dico_.first(labels[i]);
498  if (old_val != i) modifications = true;
499  }
500 
501 
502  // if there were no modification, return an empty update hashtable
503  if (!modifications) {
504  return HashTable< std::size_t,
505  std::size_t,
506  ALLOC< std::pair< std::size_t, std::size_t > > >();
507  }
508 
509  // create the hashTable corresponding to the mapping from the old
510  // indices to the new one
511  this->back_dico_.clear();
513  (Size)size);
514  for (std::size_t i = std::size_t(0); i < size; ++i) {
516  this->back_dico_.insert(i, xlabels[i].second);
517  }
518 
519  return mapping;
520  }
521 
522 
523  /// returns the domain size of a variable corresponding to the translations
524  template < template < typename > class ALLOC >
526  return _variable_.domainSize();
527  }
528 
529 
530  /// returns the variable stored into the translator
531  template < template < typename > class ALLOC >
533  return &_variable_;
534  }
535 
536 
537  /// returns the translation of a missing value
538  template < template < typename > class ALLOC >
541  }
542 
543 
544  } /* namespace learning */
545 
546 } /* namespace gum */
547 
548 
549 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)