aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
DBTranslator4LabelizedVariable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for labelized variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #include <utility>
29 #include <vector>
30 
31 #include <agrum/tools/database/DBTranslator4LabelizedVariable.h>
32 #include <agrum/tools/database/DBCell.h>
33 
34 #ifndef DOXYGEN_SHOULD_SKIP_THIS
35 
36 namespace gum {
37 
38  namespace learning {
39 
40 
41  /// default constructor
42  template < template < typename > class ALLOC >
43  template < template < typename > class XALLOC >
44  DBTranslator4LabelizedVariable< ALLOC >::DBTranslator4LabelizedVariable(
45  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
46  std::size_t max_dico_entries,
47  const typename DBTranslator4LabelizedVariable< ALLOC >::allocator_type& alloc) :
48  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
49  missing_symbols,
50  true,
51  max_dico_entries,
52  alloc),
53  _variable_("var", "", 0) {
54  GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
55  }
56 
57 
58  /// default constructor without missing symbols
59  template < template < typename > class ALLOC >
64  _variable_("var", "", 0) {
66  }
67 
68 
69  /// default constructor with a labelized variable as translator
70  template < template < typename > class ALLOC >
71  template < template < typename > class XALLOC >
73  const LabelizedVariable& var,
75  const bool editable_dictionary,
82  alloc),
83  _variable_(var) {
84  // check that the variable has not too many entries
86  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
87  }
88 
89  // add the content of the variable into the back dictionary
90  std::size_t size = 0;
91  for (const auto& label: var.labels()) {
92  // if the label corresponds to a missing value, then remove it from
93  // the set of missing symbols.
95 
96  // insert the label into the back_dictionary
97  this->back_dico_.insert(size, label);
98  ++size;
99  }
100 
102  }
103 
104 
105  /// default constructor with a labelized variable as translator
106  template < template < typename > class ALLOC >
108  const LabelizedVariable& var,
109  const bool editable_dictionary,
115  alloc),
116  _variable_(var) {
117  // check that the variable has not too many entries
118  if (var.domainSize() > max_dico_entries) {
119  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
120  }
121 
122  // add the content of the variable into the back dictionary
123  std::size_t size = 0;
124  for (const auto& label: var.labels()) {
125  // insert the label into the back_dictionary
126  this->back_dico_.insert(size, label);
127  ++size;
128  }
129 
131  }
132 
133 
134  /// copy constructor with a given allocator
135  template < template < typename > class ALLOC >
142  }
143 
144 
145  /// copy constructor
146  template < template < typename > class ALLOC >
150 
151 
152  /// move constructor with a given allocator
153  template < template < typename > class ALLOC >
160  }
161 
162 
163  /// move constructor
164  template < template < typename > class ALLOC >
168 
169 
170  /// virtual copy constructor with a given allocator
171  template < template < typename > class ALLOC >
173  const typename DBTranslator4LabelizedVariable< ALLOC >::allocator_type& alloc) const {
176  try {
178  } catch (...) {
180  throw;
181  }
182  return translator;
183  }
184 
185 
186  /// virtual copy constructor
187  template < template < typename > class ALLOC >
190  return clone(this->getAllocator());
191  }
192 
193 
194  /// destructor
195  template < template < typename > class ALLOC >
198  }
199 
200 
201  /// copy operator
202  template < template < typename > class ALLOC >
205  if (this != &from) {
208  }
209 
210  return *this;
211  }
212 
213 
214  /// move operator
215  template < template < typename > class ALLOC >
218  if (this != &from) {
221  }
222 
223  return *this;
224  }
225 
226 
227  /// returns the translation of a string, as found in the current dictionary
228  template < template < typename > class ALLOC >
230  // try to get the index of str within the labelized variable. If this
231  // cannot be found, try to find if this corresponds to a missing value.
232  // Finally, if this is still not a missing value and, if enabled, try
233  // to add str as a new label
234  try {
236  } catch (gum::Exception&) {
237  // check that this is not a missing value
238  if (this->isMissingSymbol(str)) {
240  }
241 
242  // try to add str as a new value if possible
243  if (this->hasEditableDictionary()) {
244  const std::size_t size = _variable_.domainSize();
245  if (size >= this->max_dico_entries_)
247  "String \"" << str << "\" cannot be translated "
248  << "because the dictionary is already full");
250  this->back_dico_.insert(size, str);
251  return DBTranslatedValue{size};
252  } else
254  "The translation of \"" << str << "\" could not be found")
255  }
256  }
257 
258 
259  /// returns the original value for a given translation
260  template < template < typename > class ALLOC >
262  const DBTranslatedValue translated_val) const {
263  try {
265  } catch (Exception&) {
266  // check if this is a missing value
268  && !this->missing_symbols_.empty())
269  return *(this->missing_symbols_.begin());
270  else
272  "The back translation of \"" << translated_val.discr_val
273  << "\" could not be found");
274  }
275  }
276 
277 
278  /// indicates whether the translations should be reordered
279  template < template < typename > class ALLOC >
281  // if the variable contains only numbers, they should be increasing
282  const auto& labels = _variable_.labels();
283  float last_number = std::numeric_limits< float >::lowest();
284  float number;
285  bool only_numbers = true;
286  for (const auto& label: labels) {
287  if (!DBCell::isReal(label)) {
288  only_numbers = false;
289  break;
290  }
291  number = std::stof(label);
292  if (number < last_number) return true;
294  }
295 
296  if (!only_numbers) {
297  // here we shall examine whether the strings are sorted by
298  // lexicographical order
299  const std::size_t size = labels.size();
300  for (std::size_t i = 1; i < size; ++i) {
301  if (labels[i] < labels[i - 1]) return true;
302  }
303  }
304 
305  return false;
306  }
307 
308 
309  /// returns a mapping to reorder the current dictionary and updates it
310  template < template < typename > class ALLOC >
313  // check whether the variable contains only numeric values. In this
314  // case, we have to sort the values by increasing number
315  const auto& labels = _variable_.labels();
316  const std::size_t size = labels.size();
317 
318  bool only_numbers = true;
319  for (const auto& label: labels) {
320  if (!DBCell::isReal(label)) {
321  only_numbers = false;
322  break;
323  }
324  }
325 
326  // assign to each label its current index
327  std::vector< std::pair< std::size_t, std::string >,
328  ALLOC< std::pair< std::size_t, std::string > > >
329  xlabels;
331  for (std::size_t i = std::size_t(0); i < size; ++i)
333 
334  // reorder by increasing order
335  if (only_numbers)
336  std::sort(xlabels.begin(),
337  xlabels.end(),
338  [](const std::pair< std::size_t, std::string >& a,
339  const std::pair< std::size_t, std::string >& b) -> bool {
340  return std::stof(a.second) < std::stof(b.second);
341  });
342  else
343  std::sort(xlabels.begin(),
344  xlabels.end(),
345  [](const std::pair< std::size_t, std::string >& a,
346  const std::pair< std::size_t, std::string >& b) -> bool {
347  return a.second < b.second;
348  });
349 
350  // check whether there were any modification
351  bool modifications = false;
352  for (std::size_t i = std::size_t(0); i < size; ++i) {
353  if (xlabels[i].first != i) {
354  modifications = true;
355  break;
356  }
357  }
358 
359  // if there were no modification, return an empty update hashtable
360  if (!modifications) {
361  return HashTable< std::size_t,
362  std::size_t,
363  ALLOC< std::pair< std::size_t, std::size_t > > >();
364  }
365 
366  // recreate the variable so that the labels correspond to the
367  // new ordering
369  for (auto& label: xlabels)
371 
372  // create the hashTable corresponding to the mapping from the old
373  // indices to the new one
374  this->back_dico_.clear();
376  (Size)size);
377  for (std::size_t i = std::size_t(0); i < size; ++i) {
379  this->back_dico_.insert(i, xlabels[i].second);
380  }
381 
382  return mapping;
383  }
384 
385 
386  /// returns the domain size of a variable corresponding to the translations
387  template < template < typename > class ALLOC >
389  return _variable_.domainSize();
390  }
391 
392 
393  /// returns the variable stored into the translator
394  template < template < typename > class ALLOC >
396  return &_variable_;
397  }
398 
399 
400  /// returns the translation of a missing value
401  template < template < typename > class ALLOC >
404  }
405 
406 
407  } /* namespace learning */
408 
409 } /* namespace gum */
410 
411 
412 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)