aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
DBTranslator4DiscretizedVariable_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for discretized variables
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #include <utility>
29 #include <vector>
30 #include <sstream>
31 
32 #include <agrum/tools/database/DBTranslator4DiscretizedVariable.h>
33 #include <agrum/tools/database/DBCell.h>
34 
35 #ifndef DOXYGEN_SHOULD_SKIP_THIS
36 
37 namespace gum {
38 
39  namespace learning {
40 
41 
42  /// default constructor with a discretized variable as translator
43  template < template < typename > class ALLOC >
44  template < typename GUM_SCALAR, template < typename > class XALLOC >
45  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
46  const DiscretizedVariable< GUM_SCALAR >& var,
47  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
48  std::size_t max_dico_entries,
49  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type&
50  alloc) :
51  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
52  missing_symbols,
53  false,
54  max_dico_entries,
55  alloc),
56  variable__(var.name(), var.description()) {
57  // check that the variable has not too many entries
58  if (var.domainSize() > max_dico_entries) {
59  GUM_ERROR(SizeError,
60  "the dictionary induced by the variable is too large");
61  }
62 
63  // copy the ticks of var into our internal variable
64  const auto& ticks = var.ticks();
65  for (const auto tick: ticks) {
66  variable__.addTick((float)tick);
67  }
68 
69  // the the bounds of the discretized variable
70  const float lower_bound = (float)ticks[0];
71  const float upper_bound = (float)ticks.back();
72 
73  // remove all the missing symbols corresponding to a number between
74  // lower_bound and upper_bound
75  for (auto iter = this->missing_symbols_.beginSafe();
76  iter != this->missing_symbols_.endSafe();
77  ++iter) {
78  if (DBCell::isReal(*iter)) {
79  const float missing_val = std::stof(*iter);
80  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
81  this->missing_symbols_.erase(iter);
82  }
83  }
84  }
85 
86  // add the content of the variable into the back dictionary
87  std::size_t size = 0;
88  for (const auto& label: var.labels()) {
89  // if the label corresponds to a missing value, then remove it from
90  // the set of missing symbols. If, in addition, it has already
91  // been entered into the back_dictionary, then, this has been done
92  // because the label corresponded to a missing value, so we should
93  // remove the label as well from the back_dictionary.
94  if (this->missing_symbols_.exists(label)) {
95  this->missing_symbols_.erase(label);
96  }
97 
98  this->back_dico_.insert(size, label);
99  ++size;
100  }
101 
102  // store a copy of the variable, that should be used by method variable ()
103  real_variable__ = var.clone();
104 
105  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
106  }
107 
108 
109  /// default constructor with a IDiscretized variable as translator
110  template < template < typename > class ALLOC >
111  template < template < typename > class XALLOC >
112  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
113  const IDiscretizedVariable& var,
114  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
115  std::size_t max_dico_entries,
116  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type&
117  alloc) :
118  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
119  missing_symbols,
120  false,
121  max_dico_entries,
122  alloc),
123  variable__(var.name(), var.description()) {
124  // check that the variable has not too many entries
125  if (var.domainSize() > max_dico_entries) {
126  GUM_ERROR(SizeError,
127  "the dictionary induced by the variable is too large");
128  }
129 
130  // copy the ticks of var into our internal variable
131  const auto ticks = var.ticksAsDoubles();
132  for (const auto tick: ticks) {
133  variable__.addTick((float)tick);
134  }
135 
136  // the the bounds of the discretized variable
137  const float lower_bound = float(ticks[0]);
138  const float upper_bound = float(ticks.back());
139 
140  // remove all the missing symbols corresponding to a number between
141  // lower_bound and upper_bound
142  for (auto iter = this->missing_symbols_.beginSafe();
143  iter != this->missing_symbols_.endSafe();
144  ++iter) {
145  if (DBCell::isReal(*iter)) {
146  const float missing_val = std::stof(*iter);
147  if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
148  this->missing_symbols_.erase(iter);
149  }
150  }
151  }
152 
153  // add the content of the variable into the back dictionary
154  std::size_t size = 0;
155  for (const auto& label: var.labels()) {
156  // if the label corresponds to a missing value, then remove it from
157  // the set of missing symbols. If, in addition, it has already
158  // been entered into the back_dictionary, then, this has been done
159  // because the label corresponded to a missing value, so we should
160  // remove the label as well from the back_dictionary.
161  if (this->missing_symbols_.exists(label)) {
162  this->missing_symbols_.erase(label);
163  }
164 
165  this->back_dico_.insert(size, label);
166  ++size;
167  }
168 
169  // store a copy of the variable, that should be used by method variable ()
170  real_variable__ = var.clone();
171 
172  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
173  }
174 
175 
176  /// default constructor with a discretized variable as translator
177  template < template < typename > class ALLOC >
178  template < typename GUM_SCALAR >
179  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
180  const DiscretizedVariable< GUM_SCALAR >& var,
181  std::size_t max_dico_entries,
182  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type&
183  alloc) :
184  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
185  false,
186  max_dico_entries,
187  alloc),
188  variable__(var.name(), var.description()) {
189  // check that the variable has not too many entries
190  if (var.domainSize() > max_dico_entries) {
191  GUM_ERROR(SizeError,
192  "the dictionary induced by the variable is too large");
193  }
194 
195  // copy the ticks of var into our internal variable
196  const auto& ticks = var.ticks();
197  for (const auto tick: ticks) {
198  variable__.addTick((float)tick);
199  }
200 
201  // add the content of the variable into the back dictionary
202  std::size_t size = 0;
203  for (const auto& label: var.labels()) {
204  this->back_dico_.insert(size, label);
205  ++size;
206  }
207 
208  // store a copy of the variable, that should be used by method variable ()
209  real_variable__ = var.clone();
210 
211  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
212  }
213 
214 
215  /// default constructor with a IDiscretized variable as translator
216  template < template < typename > class ALLOC >
217  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
218  const IDiscretizedVariable& var,
219  std::size_t max_dico_entries,
220  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type&
221  alloc) :
222  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
223  false,
224  max_dico_entries,
225  alloc),
226  variable__(var.name(), var.description()) {
227  // check that the variable has not too many entries
228  if (var.domainSize() > max_dico_entries) {
229  GUM_ERROR(SizeError,
230  "the dictionary induced by the variable is too large");
231  }
232 
233  // copy the ticks of var into our internal variable
234  const auto ticks = var.ticksAsDoubles();
235  for (const auto tick: ticks) {
236  variable__.addTick((float)tick);
237  }
238 
239  // add the content of the variable into the back dictionary
240  std::size_t size = 0;
241  for (const auto& label: var.labels()) {
242  this->back_dico_.insert(size, label);
243  ++size;
244  }
245 
246  // store a copy of the variable, that should be used by method variable ()
247  real_variable__ = var.clone();
248 
249  GUM_CONSTRUCTOR(DBTranslator4DiscretizedVariable);
250  }
251 
252 
253  /// copy constructor with a given allocator
254  template < template < typename > class ALLOC >
255  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
256  const DBTranslator4DiscretizedVariable< ALLOC >& from,
257  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type&
258  alloc) :
259  DBTranslator< ALLOC >(from, alloc),
260  variable__(from.variable__) {
261  // store a copy of the variable, that should be used by method variable ()
262  real_variable__ = from.real_variable__->clone();
263 
264  GUM_CONS_CPY(DBTranslator4DiscretizedVariable);
265  }
266 
267 
268  /// copy constructor
269  template < template < typename > class ALLOC >
270  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
271  const DBTranslator4DiscretizedVariable< ALLOC >& from) :
272  DBTranslator4DiscretizedVariable< ALLOC >(from, from.getAllocator()) {}
273 
274 
275  /// move constructor with a given allocator
276  template < template < typename > class ALLOC >
277  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
278  DBTranslator4DiscretizedVariable< ALLOC >&& from,
279  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type&
280  alloc) :
281  DBTranslator< ALLOC >(std::move(from), alloc),
282  variable__(std::move(from.variable__)) {
283  // moves the copy of the variable, that should be used by method variable ()
284  real_variable__ = from.real_variable__;
285  from.real_variable__ = nullptr;
286 
287  GUM_CONS_MOV(DBTranslator4DiscretizedVariable);
288  }
289 
290 
291  /// move constructor
292  template < template < typename > class ALLOC >
293  DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable(
294  DBTranslator4DiscretizedVariable< ALLOC >&& from) :
295  DBTranslator4DiscretizedVariable< ALLOC >(std::move(from),
296  from.getAllocator()) {}
297 
298 
299  /// virtual copy constructor with a given allocator
300  template < template < typename > class ALLOC >
301  DBTranslator4DiscretizedVariable< ALLOC >*
302  DBTranslator4DiscretizedVariable< ALLOC >::clone(
303  const typename DBTranslator4DiscretizedVariable< ALLOC >::allocator_type&
304  alloc) const {
305  ALLOC< DBTranslator4DiscretizedVariable< ALLOC > > allocator(alloc);
306  DBTranslator4DiscretizedVariable< ALLOC >* translator
307  = allocator.allocate(1);
308  try {
309  allocator.construct(translator, *this, alloc);
310  } catch (...) {
311  allocator.deallocate(translator, 1);
312  throw;
313  }
314  return translator;
315  }
316 
317 
318  /// virtual copy constructor
319  template < template < typename > class ALLOC >
320  INLINE DBTranslator4DiscretizedVariable< ALLOC >*
321  DBTranslator4DiscretizedVariable< ALLOC >::clone() const {
322  return clone(this->getAllocator());
323  }
324 
325 
326  /// destructor
327  template < template < typename > class ALLOC >
328  INLINE DBTranslator4DiscretizedVariable<
329  ALLOC >::~DBTranslator4DiscretizedVariable() {
330  if (real_variable__ != nullptr) delete real_variable__;
331 
332  GUM_DESTRUCTOR(DBTranslator4DiscretizedVariable);
333  }
334 
335 
336  /// copy operator
337  template < template < typename > class ALLOC >
338  DBTranslator4DiscretizedVariable< ALLOC >&
339  DBTranslator4DiscretizedVariable< ALLOC >::operator=(
340  const DBTranslator4DiscretizedVariable< ALLOC >& from) {
341  if (this != &from) {
342  DBTranslator< ALLOC >::operator=(from);
343  variable__ = from.variable__;
344 
345  if (real_variable__ != nullptr) delete real_variable__;
346  real_variable__ = from.real_variable__->clone();
347  }
348 
349  return *this;
350  }
351 
352 
353  /// move operator
354  template < template < typename > class ALLOC >
355  DBTranslator4DiscretizedVariable< ALLOC >&
356  DBTranslator4DiscretizedVariable< ALLOC >::operator=(
357  DBTranslator4DiscretizedVariable< ALLOC >&& from) {
358  if (this != &from) {
359  DBTranslator< ALLOC >::operator=(std::move(from));
360  variable__ = std::move(from.variable__);
361 
362  if (real_variable__ != nullptr) delete real_variable__;
363  real_variable__ = from.real_variable__;
364  from.real_variable__ = nullptr;
365  }
366 
367  return *this;
368  }
369 
370 
371  /// returns the translation of a string, as found in the current dictionary
372  template < template < typename > class ALLOC >
373  INLINE DBTranslatedValue DBTranslator4DiscretizedVariable< ALLOC >::translate(
374  const std::string& str) {
375  // try to get the index of str within the discretized variable.
376  try {
377  return DBTranslatedValue{std::size_t(variable__[str])};
378  } catch (gum::Exception&) {
379  // check for a missing symbol
380  if (this->isMissingSymbol(str))
381  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
382 
383  // check if the back_dictionary does not contain str. This enables
384  // to execute translate ( translateBack ( translate ( str ) ) )
385  // without raising an exception
386  try {
387  return DBTranslatedValue{this->back_dico_.first(str)};
388  } catch (gum::Exception&) {
389  if (!DBCell::isReal(str)) {
390  GUM_ERROR(TypeError,
391  "String \""
392  << str
393  << "\" cannot be translated because it is not a number");
394  } else {
395  GUM_ERROR(UnknownLabelInDatabase,
396  "The translation of \"" << str << "\" could not be found");
397  }
398  }
399  }
400  }
401 
402 
403  /// returns the original value for a given translation
404  template < template < typename > class ALLOC >
405  INLINE std::string DBTranslator4DiscretizedVariable< ALLOC >::translateBack(
406  const DBTranslatedValue translated_val) const {
407  try {
408  return this->back_dico_.second(translated_val.discr_val);
409  } catch (Exception&) {
410  // check if this is a missing value
411  if ((translated_val.discr_val == std::numeric_limits< std::size_t >::max())
412  && !this->missing_symbols_.empty())
413  return *(this->missing_symbols_.begin());
414  else
415  GUM_ERROR(UnknownLabelInDatabase,
416  "The back translation of \"" << translated_val.discr_val
417  << "\" could not be found");
418  }
419  }
420 
421 
422  /// indicates whether the translator has an editable dictionary or not
423  template < template < typename > class ALLOC >
424  INLINE bool
425  DBTranslator4DiscretizedVariable< ALLOC >::hasEditableDictionary() const {
426  return false;
427  }
428 
429 
430  /// sets/unset the editable dictionary mode
431  template < template < typename > class ALLOC >
432  INLINE void
433  DBTranslator4DiscretizedVariable< ALLOC >::setEditableDictionaryMode(bool) {
434  }
435 
436 
437  /// indicates whether the translations should be reordered
438  template < template < typename > class ALLOC >
439  bool DBTranslator4DiscretizedVariable< ALLOC >::needsReordering() const {
440  return false;
441  }
442 
443 
444  /// returns a mapping to reorder the current dictionary and updates it
445  template < template < typename > class ALLOC >
446  INLINE HashTable< std::size_t,
447  std::size_t,
448  ALLOC< std::pair< std::size_t, std::size_t > > >
449  DBTranslator4DiscretizedVariable< ALLOC >::reorder() {
450  return HashTable< std::size_t,
451  std::size_t,
452  ALLOC< std::pair< std::size_t, std::size_t > > >();
453  }
454 
455 
456  /// returns the domain size of a variable corresponding to the translations
457  template < template < typename > class ALLOC >
458  INLINE std::size_t
459  DBTranslator4DiscretizedVariable< ALLOC >::domainSize() const {
460  return variable__.domainSize();
461  }
462 
463 
464  /// returns the variable stored into the translator
465  template < template < typename > class ALLOC >
466  INLINE const IDiscretizedVariable*
467  DBTranslator4DiscretizedVariable< ALLOC >::variable() const {
468  return real_variable__;
469  }
470 
471 
472  /// returns the translation of a missing value
473  template < template < typename > class ALLOC >
474  INLINE DBTranslatedValue
475  DBTranslator4DiscretizedVariable< ALLOC >::missingValue() const {
476  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
477  }
478 
479 
480  } /* namespace learning */
481 
482 } /* namespace gum */
483 
484 
485 #endif /* DOXYGEN_SHOULD_SKIP_THIS */