aGrUM  0.21.0
a C++ library for (probabilistic) graphical models
DBTranslator4LabelizedVariable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for labelized variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #include <utility>
29 #include <vector>
30 
31 #include <agrum/tools/database/DBTranslator4LabelizedVariable.h>
32 #include <agrum/tools/database/DBCell.h>
33 
34 #ifndef DOXYGEN_SHOULD_SKIP_THIS
35 
36 namespace gum {
37 
38  namespace learning {
39 
40 
41  /// default constructor
42  template < template < typename > class ALLOC >
43  template < template < typename > class XALLOC >
44  DBTranslator4LabelizedVariable< ALLOC >::DBTranslator4LabelizedVariable(
45  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
46  std::size_t max_dico_entries,
47  const typename DBTranslator4LabelizedVariable< ALLOC >::allocator_type& alloc) :
48  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
49  true,
50  missing_symbols,
51  true,
52  max_dico_entries,
53  alloc),
54  _variable_("var", "", 0) {
55  GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
56  }
57 
58 
59  /// default constructor without missing symbols
60  template < template < typename > class ALLOC >
65  _variable_("var", "", 0) {
67  }
68 
69 
70  /// default constructor with a labelized variable as translator
71  template < template < typename > class ALLOC >
72  template < template < typename > class XALLOC >
74  const LabelizedVariable& var,
76  const bool editable_dictionary,
80  true,
84  alloc),
85  _variable_(var) {
86  // check that the variable has not too many entries
88  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
89  }
90 
91  // add the content of the variable into the back dictionary
92  std::size_t size = 0;
93  for (const auto& label: var.labels()) {
94  // if the label corresponds to a missing value, then remove it from
95  // the set of missing symbols.
97 
98  // insert the label into the back_dictionary
99  this->back_dico_.insert(size, label);
100  ++size;
101  }
102 
104  }
105 
106 
107  /// default constructor with a labelized variable as translator
108  template < template < typename > class ALLOC >
110  const LabelizedVariable& var,
111  const bool editable_dictionary,
115  true,
118  alloc),
119  _variable_(var) {
120  // check that the variable has not too many entries
121  if (var.domainSize() > max_dico_entries) {
122  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
123  }
124 
125  // add the content of the variable into the back dictionary
126  std::size_t size = 0;
127  for (const auto& label: var.labels()) {
128  // insert the label into the back_dictionary
129  this->back_dico_.insert(size, label);
130  ++size;
131  }
132 
134  }
135 
136 
137  /// copy constructor with a given allocator
138  template < template < typename > class ALLOC >
145  }
146 
147 
148  /// copy constructor
149  template < template < typename > class ALLOC >
153 
154 
155  /// move constructor with a given allocator
156  template < template < typename > class ALLOC >
163  }
164 
165 
166  /// move constructor
167  template < template < typename > class ALLOC >
171 
172 
173  /// virtual copy constructor with a given allocator
174  template < template < typename > class ALLOC >
176  const typename DBTranslator4LabelizedVariable< ALLOC >::allocator_type& alloc) const {
179  try {
181  } catch (...) {
183  throw;
184  }
185  return translator;
186  }
187 
188 
189  /// virtual copy constructor
190  template < template < typename > class ALLOC >
193  return clone(this->getAllocator());
194  }
195 
196 
197  /// destructor
198  template < template < typename > class ALLOC >
201  }
202 
203 
204  /// copy operator
205  template < template < typename > class ALLOC >
208  if (this != &from) {
211  }
212 
213  return *this;
214  }
215 
216 
217  /// move operator
218  template < template < typename > class ALLOC >
221  if (this != &from) {
224  }
225 
226  return *this;
227  }
228 
229 
230  /// returns the translation of a string, as found in the current dictionary
231  template < template < typename > class ALLOC >
233  // try to get the index of str within the labelized variable. If this
234  // cannot be found, try to find if this corresponds to a missing value.
235  // Finally, if this is still not a missing value and, if enabled, try
236  // to add str as a new label
237  try {
239  } catch (gum::Exception&) {
240  // check that this is not a missing value
241  if (this->isMissingSymbol(str)) {
243  }
244 
245  // try to add str as a new value if possible
246  if (this->hasEditableDictionary()) {
247  const std::size_t size = _variable_.domainSize();
248  if (size >= this->max_dico_entries_)
250  "String \"" << str << "\" cannot be translated "
251  << "because the dictionary is already full");
253  this->back_dico_.insert(size, str);
254  return DBTranslatedValue{size};
255  } else
257  "The translation of \"" << str << "\" could not be found")
258  }
259  }
260 
261 
262  /// returns the original value for a given translation
263  template < template < typename > class ALLOC >
265  const DBTranslatedValue translated_val) const {
266  try {
268  } catch (Exception&) {
269  // check if this is a missing value
271  && !this->missing_symbols_.empty())
272  return *(this->missing_symbols_.begin());
273  else
275  "The back translation of \"" << translated_val.discr_val
276  << "\" could not be found");
277  }
278  }
279 
280 
281  /// indicates whether the translations should be reordered
282  template < template < typename > class ALLOC >
284  // if the variable contains only numbers, they should be increasing
285  const auto& labels = _variable_.labels();
286  float last_number = std::numeric_limits< float >::lowest();
287  float number;
288  bool only_numbers = true;
289  for (const auto& label: labels) {
290  if (!DBCell::isReal(label)) {
291  only_numbers = false;
292  break;
293  }
294  number = std::stof(label);
295  if (number < last_number) return true;
297  }
298 
299  if (!only_numbers) {
300  // here we shall examine whether the strings are sorted by
301  // lexicographical order
302  const std::size_t size = labels.size();
303  for (std::size_t i = 1; i < size; ++i) {
304  if (labels[i] < labels[i - 1]) return true;
305  }
306  }
307 
308  return false;
309  }
310 
311 
312  /// returns a mapping to reorder the current dictionary and updates it
313  template < template < typename > class ALLOC >
316  // check whether the variable contains only numeric values. In this
317  // case, we have to sort the values by increasing number
318  const auto& labels = _variable_.labels();
319  const std::size_t size = labels.size();
320 
321  bool only_numbers = true;
322  for (const auto& label: labels) {
323  if (!DBCell::isReal(label)) {
324  only_numbers = false;
325  break;
326  }
327  }
328 
329  // assign to each label its current index
330  std::vector< std::pair< std::size_t, std::string >,
331  ALLOC< std::pair< std::size_t, std::string > > >
332  xlabels;
334  for (std::size_t i = std::size_t(0); i < size; ++i)
336 
337  // reorder by increasing order
338  if (only_numbers)
339  std::sort(xlabels.begin(),
340  xlabels.end(),
341  [](const std::pair< std::size_t, std::string >& a,
342  const std::pair< std::size_t, std::string >& b) -> bool {
343  return std::stof(a.second) < std::stof(b.second);
344  });
345  else
346  std::sort(xlabels.begin(),
347  xlabels.end(),
348  [](const std::pair< std::size_t, std::string >& a,
349  const std::pair< std::size_t, std::string >& b) -> bool {
350  return a.second < b.second;
351  });
352 
353  // check whether there were any modification
354  bool modifications = false;
355  for (std::size_t i = std::size_t(0); i < size; ++i) {
356  if (xlabels[i].first != i) {
357  modifications = true;
358  break;
359  }
360  }
361 
362  // if there were no modification, return an empty update hashtable
363  if (!modifications) {
364  return HashTable< std::size_t,
365  std::size_t,
366  ALLOC< std::pair< std::size_t, std::size_t > > >();
367  }
368 
369  // recreate the variable so that the labels correspond to the
370  // new ordering
372  for (auto& label: xlabels)
374 
375  // create the hashTable corresponding to the mapping from the old
376  // indices to the new one
377  this->back_dico_.clear();
379  (Size)size);
380  for (std::size_t i = std::size_t(0); i < size; ++i) {
382  this->back_dico_.insert(i, xlabels[i].second);
383  }
384 
385  return mapping;
386  }
387 
388 
389  /// returns the domain size of a variable corresponding to the translations
390  template < template < typename > class ALLOC >
392  return _variable_.domainSize();
393  }
394 
395 
396  /// returns the variable stored into the translator
397  template < template < typename > class ALLOC >
399  return &_variable_;
400  }
401 
402 
403  /// returns the translation of a missing value
404  template < template < typename > class ALLOC >
407  }
408 
409 
410  } /* namespace learning */
411 
412 } /* namespace gum */
413 
414 
415 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)