aGrUM  0.16.0
DBTranslator4LabelizedVariable_tpl.h
Go to the documentation of this file.
1 
29 #include <utility>
30 #include <vector>
31 
34 
35 #ifndef DOXYGEN_SHOULD_SKIP_THIS
36 
37 namespace gum {
38 
39  namespace learning {
40 
41 
43  template < template < typename > class ALLOC >
44  template < template < typename > class XALLOC >
46  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
47  std::size_t max_dico_entries,
49  alloc) :
50  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
51  missing_symbols,
52  true,
53  max_dico_entries,
54  alloc),
55  __variable("var", "", 0) {
56  GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
57  }
58 
59 
61  template < template < typename > class ALLOC >
63  std::size_t max_dico_entries,
65  alloc) :
66  DBTranslator< ALLOC >(
67  DBTranslatedValueType::DISCRETE, true, max_dico_entries, alloc),
68  __variable("var", "", 0) {
69  GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
70  }
71 
72 
74  template < template < typename > class ALLOC >
75  template < template < typename > class XALLOC >
77  const LabelizedVariable& var,
78  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
79  const bool editable_dictionary,
80  std::size_t max_dico_entries,
82  alloc) :
84  missing_symbols,
85  editable_dictionary,
86  max_dico_entries,
87  alloc),
88  __variable(var) {
89  // check that the variable has not too many entries
90  if (var.domainSize() > max_dico_entries) {
91  GUM_ERROR(SizeError,
92  "the dictionary induced by the variable is too large");
93  }
94 
95  // add the content of the variable into the back dictionary
96  std::size_t size = 0;
97  for (const auto& label : var.labels()) {
98  // if the label corresponds to a missing value, then remove it from
99  // the set of missing symbols.
100  if (this->_missing_symbols.exists(label)) {
101  this->_missing_symbols.erase(label);
102  }
103 
104  // insert the label into the back_dictionary
105  this->_back_dico.insert(size, label);
106  ++size;
107  }
108 
109  GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
110  }
111 
112 
114  template < template < typename > class ALLOC >
116  const LabelizedVariable& var,
117  const bool editable_dictionary,
118  std::size_t max_dico_entries,
120  alloc) :
122  editable_dictionary,
123  max_dico_entries,
124  alloc),
125  __variable(var) {
126  // check that the variable has not too many entries
127  if (var.domainSize() > max_dico_entries) {
128  GUM_ERROR(SizeError,
129  "the dictionary induced by the variable is too large");
130  }
131 
132  // add the content of the variable into the back dictionary
133  std::size_t size = 0;
134  for (const auto& label : var.labels()) {
135  // insert the label into the back_dictionary
136  this->_back_dico.insert(size, label);
137  ++size;
138  }
139 
140  GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
141  }
142 
143 
145  template < template < typename > class ALLOC >
147  const DBTranslator4LabelizedVariable< ALLOC >& from,
149  alloc) :
150  DBTranslator< ALLOC >(from, alloc),
151  __variable(from.__variable) {
152  GUM_CONS_CPY(DBTranslator4LabelizedVariable);
153  }
154 
155 
157  template < template < typename > class ALLOC >
159  const DBTranslator4LabelizedVariable< ALLOC >& from) :
160  DBTranslator4LabelizedVariable< ALLOC >(from, from.getAllocator()) {}
161 
162 
164  template < template < typename > class ALLOC >
166  DBTranslator4LabelizedVariable< ALLOC >&& from,
168  alloc) :
169  DBTranslator< ALLOC >(std::move(from), alloc),
170  __variable(std::move(from.__variable)) {
171  GUM_CONS_MOV(DBTranslator4LabelizedVariable);
172  }
173 
174 
176  template < template < typename > class ALLOC >
178  DBTranslator4LabelizedVariable< ALLOC >&& from) :
179  DBTranslator4LabelizedVariable< ALLOC >(std::move(from),
180  from.getAllocator()) {}
181 
182 
184  template < template < typename > class ALLOC >
185  DBTranslator4LabelizedVariable< ALLOC >*
188  alloc) const {
189  ALLOC< DBTranslator4LabelizedVariable< ALLOC > > allocator(alloc);
190  DBTranslator4LabelizedVariable< ALLOC >* translator = allocator.allocate(1);
191  try {
192  allocator.construct(translator, *this, alloc);
193  } catch (...) {
194  allocator.deallocate(translator, 1);
195  throw;
196  }
197  return translator;
198  }
199 
200 
202  template < template < typename > class ALLOC >
203  INLINE DBTranslator4LabelizedVariable< ALLOC >*
205  return clone(this->getAllocator());
206  }
207 
208 
210  template < template < typename > class ALLOC >
211  INLINE
213  GUM_DESTRUCTOR(DBTranslator4LabelizedVariable);
214  }
215 
216 
218  template < template < typename > class ALLOC >
219  DBTranslator4LabelizedVariable< ALLOC >&
221  operator=(const DBTranslator4LabelizedVariable< ALLOC >& from) {
222  if (this != &from) {
224  __variable = from.__variable;
225  }
226 
227  return *this;
228  }
229 
230 
232  template < template < typename > class ALLOC >
233  DBTranslator4LabelizedVariable< ALLOC >&
235  operator=(DBTranslator4LabelizedVariable< ALLOC >&& from) {
236  if (this != &from) {
237  DBTranslator< ALLOC >::operator=(std::move(from));
238  __variable = std::move(from.__variable);
239  }
240 
241  return *this;
242  }
243 
244 
246  template < template < typename > class ALLOC >
247  DBTranslatedValue
249  // try to get the index of str within the labelized variable. If this
250  // cannot be found, try to find if this corresponds to a missing value.
251  // Finally, if this is still not a missing value and, if enabled, try
252  // to add str as a new label
253  try {
254  return DBTranslatedValue{std::size_t(__variable[str])};
255  } catch (gum::Exception&) {
256  // check that this is not a missing value
257  if (this->isMissingSymbol(str)) {
258  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
259  }
260 
261  // try to add str as a new value if possible
262  if (this->hasEditableDictionary()) {
263  const std::size_t size = __variable.domainSize();
264  if (size >= this->_max_dico_entries)
265  GUM_ERROR(SizeError,
266  "String \"" << str << "\" cannot be translated "
267  << "because the dictionary is already full");
268  __variable.addLabel(str);
269  this->_back_dico.insert(size, str);
270  return DBTranslatedValue{size};
271  } else
272  GUM_ERROR(UnknownLabelInDatabase,
273  "The translation of \"" << str << "\" could not be found");
274  }
275  }
276 
277 
279  template < template < typename > class ALLOC >
281  const DBTranslatedValue translated_val) const {
282  try {
283  return this->_back_dico.second(translated_val.discr_val);
284  } catch (Exception&) {
285  // check if this is a missing value
286  if ((translated_val.discr_val == std::numeric_limits< std::size_t >::max())
287  && !this->_missing_symbols.empty())
288  return *(this->_missing_symbols.begin());
289  else
290  GUM_ERROR(UnknownLabelInDatabase,
291  "The back translation of \"" << translated_val.discr_val
292  << "\" could not be found");
293  }
294  }
295 
296 
298  template < template < typename > class ALLOC >
300  // if the variable contains only numbers, they should be increasing
301  const auto& labels = __variable.labels();
302  float last_number = std::numeric_limits< float >::lowest();
303  float number;
304  bool only_numbers = true;
305  for (const auto& label : labels) {
306  if (!DBCell::isReal(label)) {
307  only_numbers = false;
308  break;
309  }
310  number = std::stof(label);
311  if (number < last_number) return true;
312  last_number = number;
313  }
314 
315  if (!only_numbers) {
316  // here we shall examine whether the strings are sorted by
317  // lexicographical order
318  const std::size_t size = labels.size();
319  for (std::size_t i = 1; i < size; ++i) {
320  if (labels[i] < labels[i - 1]) return true;
321  }
322  }
323 
324  return false;
325  }
326 
327 
329  template < template < typename > class ALLOC >
330  HashTable< std::size_t,
331  std::size_t,
332  ALLOC< std::pair< std::size_t, std::size_t > > >
334  // check whether the variable contains only numeric values. In this
335  // case, we have to sort the values by increasing number
336  const auto& labels = __variable.labels();
337  const std::size_t size = labels.size();
338 
339  bool only_numbers = true;
340  for (const auto& label : labels) {
341  if (!DBCell::isReal(label)) {
342  only_numbers = false;
343  break;
344  }
345  }
346 
347  // assign to each label its current index
348  std::vector< std::pair< std::size_t, std::string >,
349  ALLOC< std::pair< std::size_t, std::string > > >
350  xlabels;
351  xlabels.reserve(size);
352  for (std::size_t i = std::size_t(0); i < size; ++i)
353  xlabels.push_back(std::make_pair(i, labels[i]));
354 
355  // reorder by increasing order
356  if (only_numbers)
357  std::sort(xlabels.begin(),
358  xlabels.end(),
359  [](const std::pair< std::size_t, std::string >& a,
360  const std::pair< std::size_t, std::string >& b) -> bool {
361  return std::stof(a.second) < std::stof(b.second);
362  });
363  else
364  std::sort(xlabels.begin(),
365  xlabels.end(),
366  [](const std::pair< std::size_t, std::string >& a,
367  const std::pair< std::size_t, std::string >& b) -> bool {
368  return a.second < b.second;
369  });
370 
371  // check whether there were any modification
372  bool modifications = false;
373  for (std::size_t i = std::size_t(0); i < size; ++i) {
374  if (xlabels[i].first != i) {
375  modifications = true;
376  break;
377  }
378  }
379 
380  // if there were no modification, return an empty update hashtable
381  if (!modifications) {
382  return HashTable< std::size_t,
383  std::size_t,
384  ALLOC< std::pair< std::size_t, std::size_t > > >();
385  }
386 
387  // recreate the variable so that the labels correspond to the
388  // new ordering
389  __variable.eraseLabels();
390  for (auto& label : xlabels)
391  __variable.addLabel(label.second);
392 
393  // create the hashTable corresponding to the mapping from the old
394  // indices to the new one
395  this->_back_dico.clear();
396  HashTable< std::size_t,
397  std::size_t,
398  ALLOC< std::pair< std::size_t, std::size_t > > >
399  mapping((Size)size);
400  for (std::size_t i = std::size_t(0); i < size; ++i) {
401  mapping.insert(xlabels[i].first, i);
402  this->_back_dico.insert(i, xlabels[i].second);
403  }
404 
405  return mapping;
406  }
407 
408 
410  template < template < typename > class ALLOC >
411  INLINE std::size_t
413  return __variable.domainSize();
414  }
415 
416 
418  template < template < typename > class ALLOC >
419  INLINE const LabelizedVariable*
421  return &__variable;
422  }
423 
424 
426  template < template < typename > class ALLOC >
427  INLINE DBTranslatedValue
429  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
430  }
431 
432 
433  } /* namespace learning */
434 
435 } /* namespace gum */
436 
437 
438 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
Bijection< std::size_t, std::string, ALLOC< std::pair< float, std::string > > > _back_dico
the bijection relating back translated values and their original strings.
Definition: DBTranslator.h:396
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
virtual const LabelizedVariable * variable() const final
returns the variable stored into the translator
virtual DBTranslatedValue missingValue() const final
returns the translation of a missing value
void clear()
Removes all the associations from the gum::Bijection.
virtual ~DBTranslator4LabelizedVariable()
destructor
std::size_t _max_dico_entries
the maximum number of entries that the dictionary is allowed to contain
Definition: DBTranslator.h:382
DBTranslator4LabelizedVariable(const std::vector< std::string, XALLOC< std::string > > &missing_symbols, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor without any initial variable
virtual std::size_t domainSize() const final
returns the domain size of a variable corresponding to the translations
STL namespace.
void erase(const Key &k)
Erases an element from the set.
Definition: set_tpl.h:656
virtual std::string translateBack(const DBTranslatedValue translated_val) const final
returns the original value for a given translation
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
DBTranslator(DBTranslatedValueType val_type, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool editable_dictionary=true, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor
iterator begin() const
The usual unsafe begin iterator to parse the set.
Definition: set_tpl.h:517
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition: set_tpl.h:607
DBTranslator4LabelizedVariable< ALLOC > & operator=(const DBTranslator4LabelizedVariable< ALLOC > &from)
copy operator
allocator_type getAllocator() const
returns the allocator used by the translator
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
static bool isReal(const std::string &str)
determine whether a string corresponds precisely to a real number
Set< std::string, ALLOC< std::string > > _missing_symbols
the set of missing symbols
Definition: DBTranslator.h:385
DBTranslator< ALLOC > & operator=(const DBTranslator< ALLOC > &from)
copy operator
virtual bool needsReordering() const final
indicates whether a reordering is needed to make the translations sorted
Base class for all aGrUM&#39;s exceptions.
Definition: exceptions.h:106
virtual DBTranslatedValue translate(const std::string &str) final
returns the translation of a string
virtual DBTranslator4LabelizedVariable< ALLOC > * clone() const
virtual copy constructor
virtual bool hasEditableDictionary() const
indicates whether the translator has an editable dictionary or not
bool isMissingSymbol(const std::string &str) const
indicates whether a string corresponds to a missing symbol
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > reorder() final
performs a reordering of the dictionary and returns a mapping from the old translated values to the n...
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:48
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
typename DBTranslator< ALLOC >::allocator_type allocator_type
type for the allocators passed in arguments of methods
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55