aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
DBTranslator4DiscretizedVariable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for discretized variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #include <utility>
29 #include <vector>
30 #include <sstream>
31 
32 #include <agrum/tools/database/DBTranslator4DiscretizedVariable.h>
33 #include <agrum/tools/database/DBCell.h>
34 
35 #ifndef DOXYGEN_SHOULD_SKIP_THIS
36 
37 namespace gum {
38 
39  namespace learning {
40 
41 
42  /// default constructor with a discretized variable as translator
43  template < template < typename > class ALLOC >
44  template < typename GUM_SCALAR, template < typename > class XALLOC >
45  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
46  const DiscretizedVariable< GUM_SCALAR >& var,
47  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
48  std::size_t max_dico_entries,
49  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) :
50  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
51  missing_symbols,
52  false,
53  max_dico_entries,
54  alloc),
55  _variable_(var.name(), var.description()) {
56  // check that the variable has not too many entries
57  if (var.domainSize() > max_dico_entries) {
58  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
59  }
60 
61  // copy the ticks of var into our internal variable
62  const auto& ticks = var.ticks();
63  for (const auto tick: ticks) {
64  _variable_.addTick((float)tick);
65  }
66 
67  // the the bounds of the discretized variable
68  const float lower_bound = (float)ticks[0];
69  const float upper_bound = (float)ticks.back();
70 
71  // remove all the missing symbols corresponding to a number between
72  // lower_bound and upper_bound
73  for (auto iter = this->missing_symbols_.beginSafe(); iter != this->missing_symbols_.endSafe();
74  ++iter) {
75  if (DBCell::isReal(*iter)) {
76  const float missing_val = std::stof(*iter);
77  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
78  this->missing_symbols_.erase(iter);
79  }
80  }
81  }
82 
83  // add the content of the variable into the back dictionary
84  std::size_t size = 0;
85  for (const auto& label: var.labels()) {
86  // if the label corresponds to a missing value, then remove it from
87  // the set of missing symbols. If, in addition, it has already
88  // been entered into the back_dictionary, then, this has been done
89  // because the label corresponded to a missing value, so we should
90  // remove the label as well from the back_dictionary.
91  if (this->missing_symbols_.exists(label)) { this->missing_symbols_.erase(label); }
92 
93  this->back_dico_.insert(size, label);
94  ++size;
95  }
96 
97  // store a copy of the variable, that should be used by method variable ()
98  _real_variable_ = var.clone();
99 
100  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
101  }
102 
103 
104  /// default constructor with a IDiscretized variable as translator
105  template < template < typename > class ALLOC >
106  template < template < typename > class XALLOC >
107  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
108  const IDiscretizedVariable& var,
109  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
110  std::size_t max_dico_entries,
111  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) :
112  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
113  missing_symbols,
114  false,
115  max_dico_entries,
116  alloc),
117  _variable_(var.name(), var.description()) {
118  // check that the variable has not too many entries
119  if (var.domainSize() > max_dico_entries) {
120  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
121  }
122 
123  // copy the ticks of var into our internal variable
124  const auto ticks = var.ticksAsDoubles();
125  for (const auto tick: ticks) {
126  _variable_.addTick((float)tick);
127  }
128 
129  // the the bounds of the discretized variable
130  const float lower_bound = float(ticks[0]);
131  const float upper_bound = float(ticks.back());
132 
133  // remove all the missing symbols corresponding to a number between
134  // lower_bound and upper_bound
135  for (auto iter = this->missing_symbols_.beginSafe(); iter != this->missing_symbols_.endSafe();
136  ++iter) {
137  if (DBCell::isReal(*iter)) {
138  const float missing_val = std::stof(*iter);
139  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
140  this->missing_symbols_.erase(iter);
141  }
142  }
143  }
144 
145  // add the content of the variable into the back dictionary
146  std::size_t size = 0;
147  for (const auto& label: var.labels()) {
148  // if the label corresponds to a missing value, then remove it from
149  // the set of missing symbols. If, in addition, it has already
150  // been entered into the back_dictionary, then, this has been done
151  // because the label corresponded to a missing value, so we should
152  // remove the label as well from the back_dictionary.
153  if (this->missing_symbols_.exists(label)) { this->missing_symbols_.erase(label); }
154 
155  this->back_dico_.insert(size, label);
156  ++size;
157  }
158 
159  // store a copy of the variable, that should be used by method variable ()
160  _real_variable_ = var.clone();
161 
162  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
163  }
164 
165 
166  /// default constructor with a discretized variable as translator
167  template < template < typename > class ALLOC >
168  template < typename GUM_SCALAR >
169  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
170  const DiscretizedVariable< GUM_SCALAR >& var,
171  std::size_t max_dico_entries,
172  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) :
173  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE, false, max_dico_entries, alloc),
174  _variable_(var.name(), var.description()) {
175  // check that the variable has not too many entries
176  if (var.domainSize() > max_dico_entries) {
177  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
178  }
179 
180  // copy the ticks of var into our internal variable
181  const auto& ticks = var.ticks();
182  for (const auto tick: ticks) {
183  _variable_.addTick((float)tick);
184  }
185 
186  // add the content of the variable into the back dictionary
187  std::size_t size = 0;
188  for (const auto& label: var.labels()) {
189  this->back_dico_.insert(size, label);
190  ++size;
191  }
192 
193  // store a copy of the variable, that should be used by method variable ()
194  _real_variable_ = var.clone();
195 
196  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
197  }
198 
199 
200  /// default constructor with a IDiscretized variable as translator
201  template < template < typename > class ALLOC >
202  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
203  const IDiscretizedVariable& var,
204  std::size_t max_dico_entries,
205  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) :
206  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE, false, max_dico_entries, alloc),
207  _variable_(var.name(), var.description()) {
208  // check that the variable has not too many entries
209  if (var.domainSize() > max_dico_entries) {
210  GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
211  }
212 
213  // copy the ticks of var into our internal variable
214  const auto ticks = var.ticksAsDoubles();
215  for (const auto tick: ticks) {
216  _variable_.addTick((float)tick);
217  }
218 
219  // add the content of the variable into the back dictionary
220  std::size_t size = 0;
221  for (const auto& label: var.labels()) {
222  this->back_dico_.insert(size, label);
223  ++size;
224  }
225 
226  // store a copy of the variable, that should be used by method variable ()
227  _real_variable_ = var.clone();
228 
229  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
230  }
231 
232 
233  /// copy constructor with a given allocator
234  template < template < typename > class ALLOC >
235  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
236  const DBTranslator4DiscretizedVariable< ALLOC >& from,
237  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) :
238  DBTranslator< ALLOC >(from, alloc),
239  _variable_(from._variable_) {
240  // store a copy of the variable, that should be used by method variable ()
241  _real_variable_ = from._real_variable_->clone();
242 
243  GUM_CONS_CPY(DBTranslator4DiscretizedVariable);
244  }
245 
246 
247  /// copy constructor
248  template < template < typename > class ALLOC >
249  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
250  const DBTranslator4DiscretizedVariable< ALLOC >& from) :
251  DBTranslator4DiscretizedVariable< ALLOC >(from, from.getAllocator()) {}
252 
253 
254  /// move constructor with a given allocator
255  template < template < typename > class ALLOC >
256  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
257  DBTranslator4DiscretizedVariable< ALLOC >&& from,
258  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) :
259  DBTranslator< ALLOC >(std::move(from), alloc),
260  _variable_(std::move(from._variable_)) {
261  // moves the copy of the variable, that should be used by method variable ()
262  _real_variable_ = from._real_variable_;
263  from._real_variable_ = nullptr;
264 
265  GUM_CONS_MOV(DBTranslator4DiscretizedVariable);
266  }
267 
268 
269  /// move constructor
270  template < template < typename > class ALLOC >
271  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
272  DBTranslator4DiscretizedVariable< ALLOC >&& from) :
273  DBTranslator4DiscretizedVariable< ALLOC >(std::move(from), from.getAllocator()) {}
274 
275 
276  /// virtual copy constructor with a given allocator
277  template < template < typename > class ALLOC >
278  DBTranslator4DiscretizedVariable< ALLOC >* DBTranslator4DiscretizedVariable< ALLOC >::clone(
279  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type& alloc) const {
280  ALLOC< DBTranslator4DiscretizedVariable< ALLOC > > allocator(alloc);
281  DBTranslator4DiscretizedVariable< ALLOC >* translator = allocator.allocate(1);
282  try {
283  allocator.construct(translator, *this, alloc);
284  } catch (...) {
285  allocator.deallocate(translator, 1);
286  throw;
287  }
288  return translator;
289  }
290 
291 
292  /// virtual copy constructor
293  template < template < typename > class ALLOC >
294  INLINE DBTranslator4DiscretizedVariable< ALLOC >*
295  DBTranslator4DiscretizedVariable< ALLOC >::clone() const {
296  return clone(this->getAllocator());
297  }
298 
299 
300  /// destructor
301  template < template < typename > class ALLOC >
302  INLINE DBTranslator4DiscretizedVariable< ALLOC >::~DBTranslator4DiscretizedVariable() {
303  if (_real_variable_ != nullptr) delete _real_variable_;
304 
305  GUM_DESTRUCTOR(DBTranslator4DiscretizedVariable);
306  }
307 
308 
309  /// copy operator
310  template < template < typename > class ALLOC >
311  DBTranslator4DiscretizedVariable< ALLOC >& DBTranslator4DiscretizedVariable< ALLOC >::operator=(
312  const DBTranslator4DiscretizedVariable< ALLOC >& from) {
313  if (this != &from) {
314  DBTranslator< ALLOC >::operator=(from);
315  _variable_ = from._variable_;
316 
317  if (_real_variable_ != nullptr) delete _real_variable_;
318  _real_variable_ = from._real_variable_->clone();
319  }
320 
321  return *this;
322  }
323 
324 
325  /// move operator
326  template < template < typename > class ALLOC >
327  DBTranslator4DiscretizedVariable< ALLOC >& DBTranslator4DiscretizedVariable< ALLOC >::operator=(
328  DBTranslator4DiscretizedVariable< ALLOC >&& from) {
329  if (this != &from) {
330  DBTranslator< ALLOC >::operator=(std::move(from));
331  _variable_ = std::move(from._variable_);
332 
333  if (_real_variable_ != nullptr) delete _real_variable_;
334  _real_variable_ = from._real_variable_;
335  from._real_variable_ = nullptr;
336  }
337 
338  return *this;
339  }
340 
341 
342  /// returns the translation of a string, as found in the current dictionary
343  template < template < typename > class ALLOC >
344  INLINE DBTranslatedValue
345  DBTranslator4DiscretizedVariable< ALLOC >::translate(const std::string& str) {
346  // try to get the index of str within the discretized variable.
347  try {
348  return DBTranslatedValue{std::size_t(_variable_[str])};
349  } catch (gum::Exception&) {
350  // check for a missing symbol
351  if (this->isMissingSymbol(str))
352  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
353 
354  // check if the back_dictionary does not contain str. This enables
355  // to execute translate ( translateBack ( translate ( str ) ) )
356  // without raising an exception
357  try {
358  return DBTranslatedValue{this->back_dico_.first(str)};
359  } catch (gum::Exception&) {
360  if (!DBCell::isReal(str)) {
361  GUM_ERROR(TypeError,
362  "String \"" << str << "\" cannot be translated because it is not a number");
363  } else {
364  GUM_ERROR(UnknownLabelInDatabase,
365  "The translation of \"" << str << "\" could not be found")
366  }
367  }
368  }
369  }
370 
371 
372  /// returns the original value for a given translation
373  template < template < typename > class ALLOC >
374  INLINE std::string DBTranslator4DiscretizedVariable< ALLOC >::translateBack(
375  const DBTranslatedValue translated_val) const {
376  try {
377  return this->back_dico_.second(translated_val.discr_val);
378  } catch (Exception&) {
379  // check if this is a missing value
380  if ((translated_val.discr_val == std::numeric_limits< std::size_t >::max())
381  && !this->missing_symbols_.empty())
382  return *(this->missing_symbols_.begin());
383  else
384  GUM_ERROR(UnknownLabelInDatabase,
385  "The back translation of \"" << translated_val.discr_val
386  << "\" could not be found");
387  }
388  }
389 
390 
391  /// indicates whether the translator has an editable dictionary or not
392  template < template < typename > class ALLOC >
393  INLINE bool DBTranslator4DiscretizedVariable< ALLOC >::hasEditableDictionary() const {
394  return false;
395  }
396 
397 
398  /// sets/unset the editable dictionary mode
399  template < template < typename > class ALLOC >
400  INLINE void DBTranslator4DiscretizedVariable< ALLOC >::setEditableDictionaryMode(bool) {}
401 
402 
403  /// indicates whether the translations should be reordered
404  template < template < typename > class ALLOC >
405  bool DBTranslator4DiscretizedVariable< ALLOC >::needsReordering() const {
406  return false;
407  }
408 
409 
410  /// returns a mapping to reorder the current dictionary and updates it
411  template < template < typename > class ALLOC >
412  INLINE HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > >
413  DBTranslator4DiscretizedVariable< ALLOC >::reorder() {
414  return HashTable< std::size_t,
415  std::size_t,
416  ALLOC< std::pair< std::size_t, std::size_t > > >();
417  }
418 
419 
420  /// returns the domain size of a variable corresponding to the translations
421  template < template < typename > class ALLOC >
422  INLINE std::size_t DBTranslator4DiscretizedVariable< ALLOC >::domainSize() const {
423  return _variable_.domainSize();
424  }
425 
426 
427  /// returns the variable stored into the translator
428  template < template < typename > class ALLOC >
429  INLINE const IDiscretizedVariable* DBTranslator4DiscretizedVariable< ALLOC >::variable() const {
430  return _real_variable_;
431  }
432 
433 
434  /// returns the translation of a missing value
435  template < template < typename > class ALLOC >
436  INLINE DBTranslatedValue DBTranslator4DiscretizedVariable< ALLOC >::missingValue() const {
437  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
438  }
439 
440 
441  } /* namespace learning */
442 
443 } /* namespace gum */
444 
445 
446 #endif /* DOXYGEN_SHOULD_SKIP_THIS */