aGrUM  0.21.0
a C++ library for (probabilistic) graphical models
DBTranslator4DiscretizedVariable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for discretized variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #include <utility>
29 #include <vector>
30 #include <sstream>
31 
32 #include <agrum/tools/database/DBTranslator4DiscretizedVariable.h>
33 #include <agrum/tools/database/DBCell.h>
34 
35 #ifndef DOXYGEN_SHOULD_SKIP_THIS
36 
37 namespace gum {
38 
39  namespace learning {
40 
41 
42  /// default constructor with a discretized variable as translator
43  template < template < typename > class ALLOC >
44  template < typename GUM_SCALAR, template < typename > class XALLOC >
45  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
46  const DiscretizedVariable< GUM_SCALAR >& var,
47  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
48  std::size_t max_dico_entries,
49  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) :
50  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
51  false,
52  missing_symbols,
53  false,
54  max_dico_entries,
55  alloc),
56  _variable_(var.name(), var.description()) {
57  // check that the variable has not too many entries
58  if (var.domainSize() > max_dico_entries) {
59  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
60  }
61 
62  // copy the ticks of var into our internal variable
63  const auto& ticks = var.ticks();
64  for (const auto tick: ticks) {
65  _variable_.addTick((float)tick);
66  }
67 
68  // the the bounds of the discretized variable
69  const float lower_bound = (float)ticks[0];
70  const float upper_bound = (float)ticks.back();
71 
72  // remove all the missing symbols corresponding to a number between
73  // lower_bound and upper_bound
74  for (auto iter = this->missing_symbols_.beginSafe(); iter != this->missing_symbols_.endSafe();
75  ++iter) {
76  if (DBCell::isReal(*iter)) {
77  const float missing_val = std::stof(*iter);
78  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
79  this->missing_symbols_.erase(iter);
80  }
81  }
82  }
83 
84  // add the content of the variable into the back dictionary
85  std::size_t size = 0;
86  for (const auto& label: var.labels()) {
87  // if the label corresponds to a missing value, then remove it from
88  // the set of missing symbols. If, in addition, it has already
89  // been entered into the back_dictionary, then, this has been done
90  // because the label corresponded to a missing value, so we should
91  // remove the label as well from the back_dictionary.
92  if (this->missing_symbols_.exists(label)) { this->missing_symbols_.erase(label); }
93 
94  this->back_dico_.insert(size, label);
95  ++size;
96  }
97 
98  // store a copy of the variable, that should be used by method variable ()
99  _real_variable_ = var.clone();
100 
101  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
102  }
103 
104 
105  /// default constructor with a IDiscretized variable as translator
106  template < template < typename > class ALLOC >
107  template < template < typename > class XALLOC >
108  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
109  const IDiscretizedVariable& var,
110  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
111  std::size_t max_dico_entries,
112  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) :
113  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
114  false,
115  missing_symbols,
116  false,
117  max_dico_entries,
118  alloc),
119  _variable_(var.name(), var.description()) {
120  // check that the variable has not too many entries
121  if (var.domainSize() > max_dico_entries) {
122  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
123  }
124 
125  // copy the ticks of var into our internal variable
126  const auto ticks = var.ticksAsDoubles();
127  for (const auto tick: ticks) {
128  _variable_.addTick((float)tick);
129  }
130 
131  // the the bounds of the discretized variable
132  const float lower_bound = float(ticks[0]);
133  const float upper_bound = float(ticks.back());
134 
135  // remove all the missing symbols corresponding to a number between
136  // lower_bound and upper_bound
137  for (auto iter = this->missing_symbols_.beginSafe(); iter != this->missing_symbols_.endSafe();
138  ++iter) {
139  if (DBCell::isReal(*iter)) {
140  const float missing_val = std::stof(*iter);
141  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
142  this->missing_symbols_.erase(iter);
143  }
144  }
145  }
146 
147  // add the content of the variable into the back dictionary
148  std::size_t size = 0;
149  for (const auto& label: var.labels()) {
150  // if the label corresponds to a missing value, then remove it from
151  // the set of missing symbols. If, in addition, it has already
152  // been entered into the back_dictionary, then, this has been done
153  // because the label corresponded to a missing value, so we should
154  // remove the label as well from the back_dictionary.
155  if (this->missing_symbols_.exists(label)) { this->missing_symbols_.erase(label); }
156 
157  this->back_dico_.insert(size, label);
158  ++size;
159  }
160 
161  // store a copy of the variable, that should be used by method variable ()
162  _real_variable_ = var.clone();
163 
164  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
165  }
166 
167 
168  /// default constructor with a discretized variable as translator
169  template < template < typename > class ALLOC >
170  template < typename GUM_SCALAR >
171  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
172  const DiscretizedVariable< GUM_SCALAR >& var,
173  std::size_t max_dico_entries,
174  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) :
175  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
176  false,
177  false,
178  max_dico_entries,
179  alloc),
180  _variable_(var.name(), var.description()) {
181  // check that the variable has not too many entries
182  if (var.domainSize() > max_dico_entries) {
183  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
184  }
185 
186  // copy the ticks of var into our internal variable
187  const auto& ticks = var.ticks();
188  for (const auto tick: ticks) {
189  _variable_.addTick((float)tick);
190  }
191 
192  // add the content of the variable into the back dictionary
193  std::size_t size = 0;
194  for (const auto& label: var.labels()) {
195  this->back_dico_.insert(size, label);
196  ++size;
197  }
198 
199  // store a copy of the variable, that should be used by method variable ()
200  _real_variable_ = var.clone();
201 
202  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
203  }
204 
205 
206  /// default constructor with a IDiscretized variable as translator
207  template < template < typename > class ALLOC >
208  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
209  const IDiscretizedVariable& var,
210  std::size_t max_dico_entries,
211  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) :
212  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
213  false,
214  false,
215  max_dico_entries,
216  alloc),
217  _variable_(var.name(), var.description()) {
218  // check that the variable has not too many entries
219  if (var.domainSize() > max_dico_entries) {
220  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
221  }
222 
223  // copy the ticks of var into our internal variable
224  const auto ticks = var.ticksAsDoubles();
225  for (const auto tick: ticks) {
226  _variable_.addTick((float)tick);
227  }
228 
229  // add the content of the variable into the back dictionary
230  std::size_t size = 0;
231  for (const auto& label: var.labels()) {
232  this->back_dico_.insert(size, label);
233  ++size;
234  }
235 
236  // store a copy of the variable, that should be used by method variable ()
237  _real_variable_ = var.clone();
238 
239  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
240  }
241 
242 
243  /// copy constructor with a given allocator
244  template < template < typename > class ALLOC >
245  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
246  const DBTranslator4DiscretizedVariable< ALLOC >& from,
247  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) :
248  DBTranslator< ALLOC >(from, alloc),
249  _variable_(from._variable_) {
250  // store a copy of the variable, that should be used by method variable ()
251  _real_variable_ = from._real_variable_->clone();
252 
253  GUM_CONS_CPY(DBTranslator4DiscretizedVariable);
254  }
255 
256 
257  /// copy constructor
258  template < template < typename > class ALLOC >
259  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
260  const DBTranslator4DiscretizedVariable< ALLOC >& from) :
261  DBTranslator4DiscretizedVariable< ALLOC >(from, from.getAllocator()) {}
262 
263 
264  /// move constructor with a given allocator
265  template < template < typename > class ALLOC >
266  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
267  DBTranslator4DiscretizedVariable< ALLOC >&& from,
268  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) :
269  DBTranslator< ALLOC >(std::move(from), alloc),
270  _variable_(std::move(from._variable_)) {
271  // moves the copy of the variable, that should be used by method variable ()
272  _real_variable_ = from._real_variable_;
273  from._real_variable_ = nullptr;
274 
275  GUM_CONS_MOV(DBTranslator4DiscretizedVariable);
276  }
277 
278 
279  /// move constructor
280  template < template < typename > class ALLOC >
281  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
282  DBTranslator4DiscretizedVariable< ALLOC >&& from) :
283  DBTranslator4DiscretizedVariable< ALLOC >(std::move(from), from.getAllocator()) {}
284 
285 
286  /// virtual copy constructor with a given allocator
287  template < template < typename > class ALLOC >
288  DBTranslator4DiscretizedVariable< ALLOC >* DBTranslator4DiscretizedVariable< ALLOC >::clone(
289  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) const {
290  ALLOC< DBTranslator4DiscretizedVariable< ALLOC > > allocator(alloc);
291  DBTranslator4DiscretizedVariable< ALLOC >* translator = allocator.allocate(1);
292  try {
293  allocator.construct(translator, *this, alloc);
294  } catch (...) {
295  allocator.deallocate(translator, 1);
296  throw;
297  }
298  return translator;
299  }
300 
301 
302  /// virtual copy constructor
303  template < template < typename > class ALLOC >
304  INLINE DBTranslator4DiscretizedVariable< ALLOC >*
305  DBTranslator4DiscretizedVariable< ALLOC >::clone() const {
306  return clone(this->getAllocator());
307  }
308 
309 
310  /// destructor
311  template < template < typename > class ALLOC >
312  INLINE DBTranslator4DiscretizedVariable< ALLOC >::~DBTranslator4DiscretizedVariable() {
313  if (_real_variable_ != nullptr) delete _real_variable_;
314 
315  GUM_DESTRUCTOR(DBTranslator4DiscretizedVariable);
316  }
317 
318 
319  /// copy operator
320  template < template < typename > class ALLOC >
321  DBTranslator4DiscretizedVariable< ALLOC >& DBTranslator4DiscretizedVariable< ALLOC >::operator=(
322  const DBTranslator4DiscretizedVariable< ALLOC >& from) {
323  if (this != &from) {
324  DBTranslator< ALLOC >::operator=(from);
325  _variable_ = from._variable_;
326 
327  if (_real_variable_ != nullptr) delete _real_variable_;
328  _real_variable_ = from._real_variable_->clone();
329  }
330 
331  return *this;
332  }
333 
334 
335  /// move operator
336  template < template < typename > class ALLOC >
337  DBTranslator4DiscretizedVariable< ALLOC >& DBTranslator4DiscretizedVariable< ALLOC >::operator=(
338  DBTranslator4DiscretizedVariable< ALLOC >&& from) {
339  if (this != &from) {
340  DBTranslator< ALLOC >::operator=(std::move(from));
341  _variable_ = std::move(from._variable_);
342 
343  if (_real_variable_ != nullptr) delete _real_variable_;
344  _real_variable_ = from._real_variable_;
345  from._real_variable_ = nullptr;
346  }
347 
348  return *this;
349  }
350 
351 
352  /// returns the translation of a string, as found in the current dictionary
353  template < template < typename > class ALLOC >
354  INLINE DBTranslatedValue
355  DBTranslator4DiscretizedVariable< ALLOC >::translate(const std::string& str) {
356  // try to get the index of str within the discretized variable.
357  try {
358  return DBTranslatedValue{std::size_t(_variable_[str])};
359  } catch (gum::Exception&) {
360  // check for a missing symbol
361  if (this->isMissingSymbol(str))
362  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
363 
364  // check if the back_dictionary does not contain str. This enables
365  // to execute translate ( translateBack ( translate ( str ) ) )
366  // without raising an exception
367  try {
368  return DBTranslatedValue{this->back_dico_.first(str)};
369  } catch (gum::Exception&) {
370  if (!DBCell::isReal(str)) {
371  GUM_ERROR(TypeError,
372  "String \"" << str << "\" cannot be translated because it is not a number");
373  } else {
374  GUM_ERROR(UnknownLabelInDatabase,
375  "The translation of \"" << str << "\" could not be found")
376  }
377  }
378  }
379  }
380 
381 
382  /// returns the original value for a given translation
383  template < template < typename > class ALLOC >
384  INLINE std::string DBTranslator4DiscretizedVariable< ALLOC >::translateBack(
385  const DBTranslatedValue translated_val) const {
386  try {
387  return this->back_dico_.second(translated_val.discr_val);
388  } catch (Exception&) {
389  // check if this is a missing value
390  if ((translated_val.discr_val == std::numeric_limits< std::size_t >::max())
391  && !this->missing_symbols_.empty())
392  return *(this->missing_symbols_.begin());
393  else
394  GUM_ERROR(UnknownLabelInDatabase,
395  "The back translation of \"" << translated_val.discr_val
396  << "\" could not be found");
397  }
398  }
399 
400 
401  /// indicates whether the translator has an editable dictionary or not
402  template < template < typename > class ALLOC >
403  INLINE bool DBTranslator4DiscretizedVariable< ALLOC >::hasEditableDictionary() const {
404  return false;
405  }
406 
407 
408  /// sets/unset the editable dictionary mode
409  template < template < typename > class ALLOC >
410  INLINE void DBTranslator4DiscretizedVariable< ALLOC >::setEditableDictionaryMode(bool) {}
411 
412 
413  /// indicates whether the translations should be reordered
414  template < template < typename > class ALLOC >
415  bool DBTranslator4DiscretizedVariable< ALLOC >::needsReordering() const {
416  return false;
417  }
418 
419 
420  /// returns a mapping to reorder the current dictionary and updates it
421  template < template < typename > class ALLOC >
422  INLINE HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > >
423  DBTranslator4DiscretizedVariable< ALLOC >::reorder() {
424  return HashTable< std::size_t,
425  std::size_t,
426  ALLOC< std::pair< std::size_t, std::size_t > > >();
427  }
428 
429 
430  /// returns the domain size of a variable corresponding to the translations
431  template < template < typename > class ALLOC >
432  INLINE std::size_t DBTranslator4DiscretizedVariable< ALLOC >::domainSize() const {
433  return _variable_.domainSize();
434  }
435 
436 
437  /// returns the variable stored into the translator
438  template < template < typename > class ALLOC >
439  INLINE const IDiscretizedVariable* DBTranslator4DiscretizedVariable< ALLOC >::variable() const {
440  return _real_variable_;
441  }
442 
443 
444  /// returns the translation of a missing value
445  template < template < typename > class ALLOC >
446  INLINE DBTranslatedValue DBTranslator4DiscretizedVariable< ALLOC >::missingValue() const {
447  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
448  }
449 
450 
451  } /* namespace learning */
452 
453 } /* namespace gum */
454 
455 
456 #endif /* DOXYGEN_SHOULD_SKIP_THIS */