aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
DBTranslator4LabelizedVariable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for labelized variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #include <utility>
29 #include <vector>
30 
31 #include <agrum/tools/database/DBTranslator4LabelizedVariable.h>
32 #include <agrum/tools/database/DBCell.h>
33 
34 #ifndef DOXYGEN_SHOULD_SKIP_THIS
35 
36 namespace gum {
37 
38  namespace learning {
39 
40 
41  /// default constructor
42  template < template < typename > class ALLOC >
43  template < template < typename > class XALLOC >
44  DBTranslator4LabelizedVariable< ALLOC >::DBTranslator4LabelizedVariable(
45  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
46  std::size_t max_dico_entries,
47  const typename DBTranslator4LabelizedVariable< ALLOC >::allocator_type&
48  alloc) :
49  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
50  missing_symbols,
51  true,
52  max_dico_entries,
53  alloc),
54  variable__("var", "", 0) {
55  GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
56  }
57 
58 
59  /// default constructor without missing symbols
60  template < template < typename > class ALLOC >
64  alloc) :
66  true,
68  alloc),
69  variable__("var", "", 0) {
71  }
72 
73 
74  /// default constructor with a labelized variable as translator
75  template < template < typename > class ALLOC >
76  template < template < typename > class XALLOC >
78  const LabelizedVariable& var,
80  const bool editable_dictionary,
83  alloc) :
88  alloc),
89  variable__(var) {
90  // check that the variable has not too many entries
93  "the dictionary induced by the variable is too large");
94  }
95 
96  // add the content of the variable into the back dictionary
97  std::size_t size = 0;
98  for (const auto& label: var.labels()) {
99  // if the label corresponds to a missing value, then remove it from
100  // the set of missing symbols.
101  if (this->missing_symbols_.exists(label)) {
103  }
104 
105  // insert the label into the back_dictionary
106  this->back_dico_.insert(size, label);
107  ++size;
108  }
109 
111  }
112 
113 
114  /// default constructor with a labelized variable as translator
115  template < template < typename > class ALLOC >
117  const LabelizedVariable& var,
118  const bool editable_dictionary,
121  alloc) :
125  alloc),
126  variable__(var) {
127  // check that the variable has not too many entries
128  if (var.domainSize() > max_dico_entries) {
130  "the dictionary induced by the variable is too large");
131  }
132 
133  // add the content of the variable into the back dictionary
134  std::size_t size = 0;
135  for (const auto& label: var.labels()) {
136  // insert the label into the back_dictionary
137  this->back_dico_.insert(size, label);
138  ++size;
139  }
140 
142  }
143 
144 
145  /// copy constructor with a given allocator
146  template < template < typename > class ALLOC >
150  alloc) :
154  }
155 
156 
157  /// copy constructor
158  template < template < typename > class ALLOC >
162 
163 
164  /// move constructor with a given allocator
165  template < template < typename > class ALLOC >
169  alloc) :
173  }
174 
175 
176  /// move constructor
177  template < template < typename > class ALLOC >
181  from.getAllocator()) {}
182 
183 
184  /// virtual copy constructor with a given allocator
185  template < template < typename > class ALLOC >
189  alloc) const {
192  try {
194  } catch (...) {
196  throw;
197  }
198  return translator;
199  }
200 
201 
202  /// virtual copy constructor
203  template < template < typename > class ALLOC >
206  return clone(this->getAllocator());
207  }
208 
209 
210  /// destructor
211  template < template < typename > class ALLOC >
212  INLINE
215  }
216 
217 
218  /// copy operator
219  template < template < typename > class ALLOC >
223  if (this != &from) {
226  }
227 
228  return *this;
229  }
230 
231 
232  /// move operator
233  template < template < typename > class ALLOC >
237  if (this != &from) {
240  }
241 
242  return *this;
243  }
244 
245 
246  /// returns the translation of a string, as found in the current dictionary
247  template < template < typename > class ALLOC >
250  // try to get the index of str within the labelized variable. If this
251  // cannot be found, try to find if this corresponds to a missing value.
252  // Finally, if this is still not a missing value and, if enabled, try
253  // to add str as a new label
254  try {
256  } catch (gum::Exception&) {
257  // check that this is not a missing value
258  if (this->isMissingSymbol(str)) {
260  }
261 
262  // try to add str as a new value if possible
263  if (this->hasEditableDictionary()) {
264  const std::size_t size = variable__.domainSize();
265  if (size >= this->max_dico_entries_)
267  "String \"" << str << "\" cannot be translated "
268  << "because the dictionary is already full");
270  this->back_dico_.insert(size, str);
271  return DBTranslatedValue{size};
272  } else
274  "The translation of \"" << str << "\" could not be found");
275  }
276  }
277 
278 
279  /// returns the original value for a given translation
280  template < template < typename > class ALLOC >
282  const DBTranslatedValue translated_val) const {
283  try {
285  } catch (Exception&) {
286  // check if this is a missing value
288  && !this->missing_symbols_.empty())
289  return *(this->missing_symbols_.begin());
290  else
292  "The back translation of \"" << translated_val.discr_val
293  << "\" could not be found");
294  }
295  }
296 
297 
298  /// indicates whether the translations should be reordered
299  template < template < typename > class ALLOC >
301  // if the variable contains only numbers, they should be increasing
302  const auto& labels = variable__.labels();
303  float last_number = std::numeric_limits< float >::lowest();
304  float number;
305  bool only_numbers = true;
306  for (const auto& label: labels) {
307  if (!DBCell::isReal(label)) {
308  only_numbers = false;
309  break;
310  }
311  number = std::stof(label);
312  if (number < last_number) return true;
314  }
315 
316  if (!only_numbers) {
317  // here we shall examine whether the strings are sorted by
318  // lexicographical order
319  const std::size_t size = labels.size();
320  for (std::size_t i = 1; i < size; ++i) {
321  if (labels[i] < labels[i - 1]) return true;
322  }
323  }
324 
325  return false;
326  }
327 
328 
329  /// returns a mapping to reorder the current dictionary and updates it
330  template < template < typename > class ALLOC >
331  HashTable< std::size_t,
332  std::size_t,
333  ALLOC< std::pair< std::size_t, std::size_t > > >
335  // check whether the variable contains only numeric values. In this
336  // case, we have to sort the values by increasing number
337  const auto& labels = variable__.labels();
338  const std::size_t size = labels.size();
339 
340  bool only_numbers = true;
341  for (const auto& label: labels) {
342  if (!DBCell::isReal(label)) {
343  only_numbers = false;
344  break;
345  }
346  }
347 
348  // assign to each label its current index
349  std::vector< std::pair< std::size_t, std::string >,
350  ALLOC< std::pair< std::size_t, std::string > > >
351  xlabels;
353  for (std::size_t i = std::size_t(0); i < size; ++i)
355 
356  // reorder by increasing order
357  if (only_numbers)
358  std::sort(xlabels.begin(),
359  xlabels.end(),
360  [](const std::pair< std::size_t, std::string >& a,
361  const std::pair< std::size_t, std::string >& b) -> bool {
362  return std::stof(a.second) < std::stof(b.second);
363  });
364  else
365  std::sort(xlabels.begin(),
366  xlabels.end(),
367  [](const std::pair< std::size_t, std::string >& a,
368  const std::pair< std::size_t, std::string >& b) -> bool {
369  return a.second < b.second;
370  });
371 
372  // check whether there were any modification
373  bool modifications = false;
374  for (std::size_t i = std::size_t(0); i < size; ++i) {
375  if (xlabels[i].first != i) {
376  modifications = true;
377  break;
378  }
379  }
380 
381  // if there were no modification, return an empty update hashtable
382  if (!modifications) {
383  return HashTable< std::size_t,
384  std::size_t,
385  ALLOC< std::pair< std::size_t, std::size_t > > >();
386  }
387 
388  // recreate the variable so that the labels correspond to the
389  // new ordering
391  for (auto& label: xlabels)
393 
394  // create the hashTable corresponding to the mapping from the old
395  // indices to the new one
396  this->back_dico_.clear();
397  HashTable< std::size_t,
398  std::size_t,
399  ALLOC< std::pair< std::size_t, std::size_t > > >
400  mapping((Size)size);
401  for (std::size_t i = std::size_t(0); i < size; ++i) {
403  this->back_dico_.insert(i, xlabels[i].second);
404  }
405 
406  return mapping;
407  }
408 
409 
410  /// returns the domain size of a variable corresponding to the translations
411  template < template < typename > class ALLOC >
412  INLINE std::size_t
414  return variable__.domainSize();
415  }
416 
417 
418  /// returns the variable stored into the translator
419  template < template < typename > class ALLOC >
422  return &variable__;
423  }
424 
425 
426  /// returns the translation of a missing value
427  template < template < typename > class ALLOC >
431  }
432 
433 
434  } /* namespace learning */
435 
436 } /* namespace gum */
437 
438 
439 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)