aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
DBTranslator4RangeVariable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for range variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #include <utility>
29 #include <vector>
30 #include <limits>
31 #include <cstdio>
32 
33 #include <agrum/tools/database/DBTranslator4RangeVariable.h>
34 #include <agrum/tools/database/DBCell.h>
35 
36 #ifndef DOXYGEN_SHOULD_SKIP_THIS
37 
38 namespace gum {
39 
40  namespace learning {
41 
42 
43  /// default constructor
44  template < template < typename > class ALLOC >
45  template < template < typename > class XALLOC >
46  DBTranslator4RangeVariable< ALLOC >::DBTranslator4RangeVariable(
47  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
48  std::size_t max_dico_entries,
49  const typename DBTranslator4RangeVariable< ALLOC >::allocator_type& alloc) :
50  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
51  missing_symbols,
52  true,
53  max_dico_entries,
54  alloc),
55  variable__("var", "", 1, 0) {
56  // assign to each integer missing symbol a Boolean indicating that
57  // we did not translate it yet. If we encounter a non integer missing
58  // symbol, we record it because it cannot be compomised by updating the
59  // domain of the range variable
60  bool non_int_symbol_found = false;
61  for (const auto& symbol: this->missing_symbols_) {
62  if (DBCell::isInteger(symbol)) {
63  status_int_missing_symbols__.insert(symbol, false);
64  } else if (!non_int_symbol_found) {
65  non_int_symbol_found = true;
66  nonint_missing_symbol__ = symbol;
67  }
68  }
69 
70  GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
71  }
72 
73 
74  /// default constructor
75  template < template < typename > class ALLOC >
80  true,
82  alloc),
83  variable__("var", "", 1, 0) {
85  }
86 
87 
88  /// default constructor with a range variable as translator
89  template < template < typename > class ALLOC >
90  template < template < typename > class XALLOC >
92  const RangeVariable& var,
94  const bool editable_dictionary,
101  alloc),
102  variable__(var) {
103  // get the bounds of the range variable
104  const long lower_bound = var.minVal();
105  const long upper_bound = var.maxVal();
106 
107  // check that the variable has not too many entries for the dictionary
108  if ((upper_bound >= lower_bound)
109  && (std::size_t(upper_bound - lower_bound + 1)
110  > this->max_dico_entries_)) {
112  "the dictionary induced by the variable is too large");
113  }
114 
115  // if the range variable is not empty, i.e., its upper bound is greater
116  // than or equal to its lower bound, remove all the missing symbols
117  // corresponding to a number between lower_bound and upper_bound
118  if (lower_bound <= upper_bound) {
119  for (auto iter = this->missing_symbols_.beginSafe();
120  iter != this->missing_symbols_.endSafe();
121  ++iter) {
122  if (DBCell::isInteger(*iter)) {
123  const long missing_val = std::stol(*iter);
124  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
125  this->missing_symbols_.erase(iter);
126  }
127  }
128  }
129  }
130 
131  // add the content of the variable into the back dictionary
132  std::size_t size = 0;
133  for (const auto& label: var.labels()) {
134  // insert the label into the back_dictionary
135  this->back_dico_.insert(size, label);
136  ++size;
137  }
138 
139  // assign to each integer missing symbol a Boolean indicating that
140  // we did not translate it yet. If we encounter a non integer symbol,
141  // we record it because it cannot be compomised by updating the domain
142  // of the range variable. This will be useful for back translations
143  bool non_int_symbol_found = false;
144  for (const auto& symbol: this->missing_symbols_) {
145  if (DBCell::isInteger(symbol)) {
147  } else if (!non_int_symbol_found) {
148  non_int_symbol_found = true;
150  }
151  }
152 
154  }
155 
156 
157  /// default constructor with a range variable as translator
158  template < template < typename > class ALLOC >
160  const RangeVariable& var,
161  const bool editable_dictionary,
167  alloc),
168  variable__(var) {
169  // get the bounds of the range variable
170  const long lower_bound = var.minVal();
171  const long upper_bound = var.maxVal();
172 
173  // check that the variable has not too many entries for the dictionary
174  if ((upper_bound >= lower_bound)
175  && (std::size_t(upper_bound - lower_bound + 1)
176  > this->max_dico_entries_)) {
178  "the dictionary induced by the variable is too large");
179  }
180 
181  // add the content of the variable into the back dictionary
182  std::size_t size = 0;
183  for (const auto& label: var.labels()) {
184  // insert the label into the back_dictionary
185  this->back_dico_.insert(size, label);
186  ++size;
187  }
188 
190  }
191 
192 
193  /// copy constructor with a given allocator
194  template < template < typename > class ALLOC >
204  }
205 
206 
207  /// copy constructor
208  template < template < typename > class ALLOC >
212 
213 
214  /// move constructor with a given allocator
215  template < template < typename > class ALLOC >
226  }
227 
228 
229  /// move constructor
230  template < template < typename > class ALLOC >
234  }
235 
236 
237  /// virtual copy constructor with a given allocator
238  template < template < typename > class ALLOC >
242  alloc) const {
245  try {
247  } catch (...) {
249  throw;
250  }
251  return translator;
252  }
253 
254 
255  /// virtual copy constructor
256  template < template < typename > class ALLOC >
259  return clone(this->getAllocator());
260  }
261 
262 
263  /// destructor
264  template < template < typename > class ALLOC >
267  }
268 
269 
270  /// copy operator
271  template < template < typename > class ALLOC >
275  if (this != &from) {
281  }
282 
283  return *this;
284  }
285 
286 
287  /// move operator
288  template < template < typename > class ALLOC >
292  if (this != &from) {
300  }
301 
302  return *this;
303  }
304 
305 
306  /// returns the translation of a string, as found in the current dictionary
307  template < template < typename > class ALLOC >
310  // try to get the index of str within the labelized variable. If this
311  // cannot be found, try to find if this corresponds to a missing value.
312  // Finally, if this is still not a missing value and, if enabled, try
313  // to add str as a new label
314  try {
315  return DBTranslatedValue{this->back_dico_.first(str)};
316  } catch (gum::Exception&) {
317  // check that this is not a missing value
318  if (this->isMissingSymbol(str)) {
319  try {
321  if (!is_str_translated) {
324  }
325  } catch (gum::NotFound&) {}
327  }
328 
329  // check if we are allowed to update the range variable
330  if (!this->hasEditableDictionary()) {
332  "The translation of String \"" << str
333  << "\" could not be found");
334  }
335 
336  // check if str could correspond to a bound of the range variable
337  if (!DBCell::isInteger(str)) {
339  "String \"" << str << "\" cannot be translated because "
340  << "it cannot be converted into an integer");
341  }
342  const long new_value = std::stol(str);
343 
344  // if str corresponds to a missing symbol that we already
345  // translated, raise an exception
347  GUM_ERROR(
349  "String \""
350  << str << "\" cannot be translated because "
351  << "it corresponds to an already translated missing symbol");
352  }
353 
354  // now, we can try to add str as a new bound of the range variable
355  // if possible
356 
357  // if the range variable is empty, set the min and max ranges. Here,
358  // there is no need to check whether the new range would contain an
359  // already translated missing symbol because this was already tested
360  // in the above test.
361  if (variable__.minVal() > variable__.maxVal()) {
362  if (this->max_dico_entries_ == 0) {
364  "String \"" << str << "\" cannot be translated because "
365  << "the dictionary is already full");
366  }
369  this->back_dico_.insert(std::size_t(0), str);
370  return DBTranslatedValue{std::size_t(0)};
371  }
372 
373  // here, the domain is not empty. So we should update either the
374  // lower bound or the upper bound of the range variable, unless
375  // a missing symbol lies within the new bounds and we have already
376  // translated it.
377  const long lower_bound = variable__.minVal();
378  const long upper_bound = variable__.maxVal();
379 
381 
382  if (new_value < variable__.minVal()) {
383  if (std::size_t(upper_bound - new_value + 1) > this->max_dico_entries_)
385  "String \"" << str << "\" cannot be translated because "
386  << "the dictionary is already full");
387 
388  // check that there does not already exist a translated missing
389  // value within the new bounds of the range variable
390  for (const auto& missing: translated_int_missing_symbols__) {
391  if ((missing >= new_value) && (missing <= upper_bound)) {
393  "String \""
394  << str << "\" cannot be translated "
395  << "because it would induce a new range containing "
396  << "an already translated missing symbol");
397  }
398  }
399 
400  // remove all the missing symbols that were not translated yet and
401  // that lie within the new bounds of the range variable
404  ++iter) {
405  if (iter.val() == false) {
406  const long missing = std::stol(iter.key());
407  if ((missing >= new_value) && (missing <= upper_bound)) {
408  this->missing_symbols_.erase(iter.key());
410  }
411  }
412  }
413 
414  // update the range and the back dictionary
415  const std::size_t index = size;
416  for (long i = new_value; i < variable__.minVal(); ++i) {
417  this->back_dico_.insert(size, std::to_string(i));
418  ++size;
419  }
421 
422  return DBTranslatedValue{index};
423  } else {
424  if (std::size_t(new_value - lower_bound + 1) > this->max_dico_entries_)
426  "String \"" << str << "\" cannot be translated because "
427  << "the dictionary is already full");
428 
429  // check that there does not already exist a translated missing
430  // value within the new bounds of the range variable
431  for (const auto& missing: translated_int_missing_symbols__) {
432  if ((missing <= new_value) && (missing >= lower_bound)) {
434  "String \""
435  << str << "\" cannot be translated "
436  << "because it would induce a new range containing "
437  << "an already translated missing symbol");
438  }
439  }
440 
441  // remove all the missing symbols that were not translated yet and
442  // that lie within the new bounds of the range variable
445  ++iter) {
446  if (iter.val() == false) {
447  const long missing = std::stol(iter.key());
448  if ((missing <= new_value) && (missing >= lower_bound)) {
449  this->missing_symbols_.erase(iter.key());
451  }
452  }
453  }
454 
455  // update the range and the back dictionary
456  for (long i = variable__.maxVal() + 1; i <= new_value; ++i) {
457  this->back_dico_.insert(size, std::to_string(i));
458  ++size;
459  }
461 
462  return DBTranslatedValue{size - std::size_t(1)};
463  }
464  }
465  }
466 
467 
468  /// returns the original value for a given translation
469  template < template < typename > class ALLOC >
471  const DBTranslatedValue translated_val) const {
472  try {
474  } catch (Exception&) {
475  // check if this is a missing value
477  == std::numeric_limits< std::size_t >::max()) {
479  if (this->missing_symbols_.empty())
480  return *(this->missing_symbols_.begin());
481  }
482 
484  "The back translation of \"" << translated_val.discr_val
485  << "\" could not be found");
486  }
487  }
488 
489 
490  /// indicates whether the translations should be reordered
491  template < template < typename > class ALLOC >
493  // if the variable contains only numbers, they should be increasing
494  const auto& labels = variable__.labels();
496  std::size_t number;
497  for (const auto& label: labels) {
498  number = this->back_dico_.first(label);
499  if (number < last_number) return true;
501  }
502 
503  return false;
504  }
505 
506 
507  /// returns a mapping to reorder the current dictionary and updates it
508  template < template < typename > class ALLOC >
510  std::size_t,
511  ALLOC< std::pair< std::size_t, std::size_t > > >
513  // assign to each label the index it had before reordering
514  const auto& labels = variable__.labels();
515  const std::size_t size = labels.size();
516  std::vector< std::pair< std::size_t, std::string >,
517  ALLOC< std::pair< std::size_t, std::string > > >
518  xlabels;
520  bool modifications = false;
521  for (std::size_t i = std::size_t(0); i < size; ++i) {
522  const std::size_t old_val = this->back_dico_.first(labels[i]);
524  if (old_val != i) modifications = true;
525  }
526 
527 
528  // if there were no modification, return an empty update hashtable
529  if (!modifications) {
530  return HashTable< std::size_t,
531  std::size_t,
532  ALLOC< std::pair< std::size_t, std::size_t > > >();
533  }
534 
535  // create the hashTable corresponding to the mapping from the old
536  // indices to the new one
537  this->back_dico_.clear();
538  HashTable< std::size_t,
539  std::size_t,
540  ALLOC< std::pair< std::size_t, std::size_t > > >
541  mapping((Size)size);
542  for (std::size_t i = std::size_t(0); i < size; ++i) {
544  this->back_dico_.insert(i, xlabels[i].second);
545  }
546 
547  return mapping;
548  }
549 
550 
551  /// returns the domain size of a variable corresponding to the translations
552  template < template < typename > class ALLOC >
554  return variable__.domainSize();
555  }
556 
557 
558  /// returns the variable stored into the translator
559  template < template < typename > class ALLOC >
560  INLINE const RangeVariable*
562  return &variable__;
563  }
564 
565 
566  /// returns the translation of a missing value
567  template < template < typename > class ALLOC >
571  }
572 
573 
574  } /* namespace learning */
575 
576 } /* namespace gum */
577 
578 
579 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)