aGrUM  0.17.2
a C++ library for (probabilistic) graphical models
DBRowGeneratorEM_tpl.h
Go to the documentation of this file.
1 
29 
30 #ifndef DOXYGEN_SHOULD_SKIP_THIS
31 
32 namespace gum {
33 
34  namespace learning {
35 
37  template < typename GUM_SCALAR, template < typename > class ALLOC >
41  }
42 
43 
45  template < typename GUM_SCALAR, template < typename > class ALLOC >
47  const std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > >
48  column_types,
49  const BayesNet< GUM_SCALAR >& bn,
50  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
51  nodeId2columns,
53  alloc) :
54  DBRowGeneratorWithBN< GUM_SCALAR, ALLOC >(
55  column_types,
56  bn,
58  nodeId2columns,
59  alloc),
60  __filled_row1(bn.size(), 1.0, alloc),
61  __filled_row2(bn.size(), 1.0, alloc) {
62  setBayesNet(bn);
63 
64  GUM_CONSTRUCTOR(DBRowGeneratorEM);
65  }
66 
67 
69  template < typename GUM_SCALAR, template < typename > class ALLOC >
71  const DBRowGeneratorEM< GUM_SCALAR, ALLOC >& from,
73  alloc) :
74  DBRowGeneratorWithBN< GUM_SCALAR, ALLOC >(from, alloc),
75  __input_row(from.__input_row), __missing_cols(from.__missing_cols),
76  __nb_miss(from.__nb_miss), __joint_proba(from.__joint_proba),
77  __filled_row1(from.__filled_row1), __filled_row2(from.__filled_row2),
78  __use_filled_row1(from.__use_filled_row1),
79  __original_weight(from.__original_weight) {
80  if (from.__joint_inst != nullptr) {
81  __joint_inst = new Instantiation(__joint_proba);
82  const auto& var_seq = __joint_inst->variablesSequence();
83  const std::size_t size = var_seq.size();
84  for (std::size_t i = std::size_t(0); i < size; ++i) {
85  __joint_inst->chgVal(Idx(i), from.__joint_inst->val(i));
86  }
87  }
88 
89  GUM_CONS_CPY(DBRowGeneratorEM);
90  }
91 
92 
94  template < typename GUM_SCALAR, template < typename > class ALLOC >
96  const DBRowGeneratorEM< GUM_SCALAR, ALLOC >& from) :
97  DBRowGeneratorEM< GUM_SCALAR, ALLOC >(from, from.getAllocator()) {}
98 
99 
101  template < typename GUM_SCALAR, template < typename > class ALLOC >
103  DBRowGeneratorEM< GUM_SCALAR, ALLOC >&& from,
105  alloc) :
106  DBRowGeneratorWithBN< GUM_SCALAR, ALLOC >(std::move(from), alloc),
107  __input_row(from.__input_row),
108  __missing_cols(std::move(from.__missing_cols)), __nb_miss(from.__nb_miss),
109  __joint_proba(std::move(from.__joint_proba)),
110  __filled_row1(std::move(from.__filled_row1)),
111  __filled_row2(std::move(from.__filled_row2)),
112  __use_filled_row1(from.__use_filled_row1),
113  __original_weight(from.__original_weight) {
114  if (from.__joint_inst != nullptr) {
115  __joint_inst = new Instantiation(__joint_proba);
116  const auto& var_seq = __joint_inst->variablesSequence();
117  const std::size_t size = var_seq.size();
118  for (std::size_t i = std::size_t(0); i < size; ++i) {
119  __joint_inst->chgVal(Idx(i), from.__joint_inst->val(i));
120  }
121  }
122 
123  GUM_CONS_MOV(DBRowGeneratorEM);
124  }
125 
126 
128  template < typename GUM_SCALAR, template < typename > class ALLOC >
130  DBRowGeneratorEM< GUM_SCALAR, ALLOC >&& from) :
131  DBRowGeneratorEM< GUM_SCALAR, ALLOC >(std::move(from),
132  from.getAllocator()) {}
133 
134 
136  template < typename GUM_SCALAR, template < typename > class ALLOC >
137  DBRowGeneratorEM< GUM_SCALAR, ALLOC >*
140  alloc) const {
141  ALLOC< DBRowGeneratorEM< GUM_SCALAR, ALLOC > > allocator(alloc);
142  DBRowGeneratorEM< GUM_SCALAR, ALLOC >* generator = allocator.allocate(1);
143  try {
144  allocator.construct(generator, *this, alloc);
145  } catch (...) {
146  allocator.deallocate(generator, 1);
147  throw;
148  }
149  return generator;
150  }
151 
152 
154  template < typename GUM_SCALAR, template < typename > class ALLOC >
155  DBRowGeneratorEM< GUM_SCALAR, ALLOC >*
157  return clone(this->getAllocator());
158  }
159 
160 
162  template < typename GUM_SCALAR, template < typename > class ALLOC >
164  if (__joint_inst != nullptr) delete __joint_inst;
165  GUM_DESTRUCTOR(DBRowGeneratorEM);
166  }
167 
168 
170  template < typename GUM_SCALAR, template < typename > class ALLOC >
171  DBRowGeneratorEM< GUM_SCALAR, ALLOC >&
173  const DBRowGeneratorEM< GUM_SCALAR, ALLOC >& from) {
174  if (this != &from) {
176  __input_row = from.__input_row;
177  __missing_cols = from.__missing_cols;
178  __nb_miss = from.__nb_miss;
179  __joint_proba = from.__joint_proba;
180  __filled_row1 = from.__filled_row1;
181  __filled_row2 = from.__filled_row2;
182  __use_filled_row1 = from.__use_filled_row1;
183  __original_weight = from.__original_weight;
184 
185  if (__joint_inst != nullptr) {
186  delete __joint_inst;
187  __joint_inst = nullptr;
188  }
189 
190  if (from.__joint_inst != nullptr) {
191  __joint_inst = new Instantiation(__joint_proba);
192  const auto& var_seq = __joint_inst->variablesSequence();
193  const std::size_t size = var_seq.size();
194  for (std::size_t i = std::size_t(0); i < size; ++i) {
195  __joint_inst->chgVal(Idx(i), from.__joint_inst->val(i));
196  }
197  }
198  }
199 
200  return *this;
201  }
202 
203 
205  template < typename GUM_SCALAR, template < typename > class ALLOC >
206  DBRowGeneratorEM< GUM_SCALAR, ALLOC >&
208  DBRowGeneratorEM< GUM_SCALAR, ALLOC >&& from) {
209  if (this != &from) {
211  __input_row = from.__input_row;
212  __missing_cols = std::move(from.__missing_cols);
213  __nb_miss = from.__nb_miss;
214  __joint_proba = std::move(from.__joint_proba);
215  __filled_row1 = std::move(from.__filled_row1);
216  __filled_row2 = std::move(from.__filled_row2);
217  __use_filled_row1 = from.__use_filled_row1;
218  __original_weight = from.__original_weight;
219 
220  if (__joint_inst != nullptr) {
221  delete __joint_inst;
222  __joint_inst = nullptr;
223  }
224 
225  if (from.__joint_inst != nullptr) {
226  __joint_inst = new Instantiation(__joint_proba);
227  const auto& var_seq = __joint_inst->variablesSequence();
228  const std::size_t size = var_seq.size();
229  for (std::size_t i = std::size_t(0); i < size; ++i) {
230  __joint_inst->chgVal(Idx(i), from.__joint_inst->val(i));
231  }
232  }
233  }
234 
235  return *this;
236  }
237 
238 
240  template < typename GUM_SCALAR, template < typename > class ALLOC >
241  INLINE const DBRow< DBTranslatedValue, ALLOC >&
243  this->decreaseRemainingRows();
244 
245  // if everything is observed, return the input row
246  if (__input_row != nullptr) return *__input_row;
247 
248  if (__use_filled_row1) {
249  // get the weight of the row from the joint probability
250  __filled_row1.setWeight(__joint_proba.get(*__joint_inst)
251  * __original_weight);
252 
253  // fill the values of the row
254  for (std::size_t i = std::size_t(0); i < __nb_miss; ++i)
255  __filled_row1[__missing_cols[i]].discr_val = __joint_inst->val(i);
256 
257  __joint_inst->inc();
258  __use_filled_row1 = false;
259 
260  return __filled_row1;
261  } else {
262  // get the weight of the row from the joint probability
263  __filled_row2.setWeight(__joint_proba.get(*__joint_inst)
264  * __original_weight);
265 
266  // fill the values of the row
267  for (std::size_t i = std::size_t(0); i < __nb_miss; ++i)
268  __filled_row2[__missing_cols[i]].discr_val = __joint_inst->val(i);
269 
270  __joint_inst->inc();
271  __use_filled_row1 = true;
272 
273  return __filled_row2;
274  }
275  }
276 
277 
279  template < typename GUM_SCALAR, template < typename > class ALLOC >
281  const DBRow< DBTranslatedValue, ALLOC >& row) {
282  // check if there are unobserved values among the columns of interest.
283  // If this is the case, set them as targets
284  bool found_unobserved = false;
285  const auto& xrow = row.row();
286  for (const auto col: this->_columns_of_interest) {
287  switch (this->_column_types[col]) {
289  if (xrow[col].discr_val == std::numeric_limits< std::size_t >::max()) {
290  if (!found_unobserved) {
291  __missing_cols.clear();
292  found_unobserved = true;
293  }
294  __missing_cols.push_back(col);
295  }
296  break;
297 
299  GUM_ERROR(NotImplementedYet,
300  "The BDRowGeneratorEM does not handle yet continuous "
301  << "variables. But the variable in column" << col
302  << " is continuous.");
303  break;
304 
305  default:
306  GUM_ERROR(NotImplementedYet,
307  "DBTranslatedValueType " << int(this->_column_types[col])
308  << " is not supported yet");
309  }
310  }
311 
312  // if there is no unobserved value, make the __input_row point to the row
313  if (!found_unobserved) {
314  __input_row = &row;
315  return std::size_t(1);
316  }
317 
318  __input_row = nullptr;
319  __nb_miss = __missing_cols.size();
320  __original_weight = row.weight();
321 
322  // here, there are missing symbols, so we should compute the distribution
323  // of the missing values. For this purpose, we use Variable Elimination
324  VariableElimination< GUM_SCALAR > ve(this->_bn);
325 
326  // add the targets and fill the output row with the observed values
327  NodeSet target_set(__nb_miss);
328  if (this->_nodeId2columns.empty()) {
329  std::size_t i = std::size_t(0);
330  bool end_miss = false;
331  for (const auto col: this->_columns_of_interest) {
332  if (!end_miss && (col == __missing_cols[i])) {
333  target_set.insert(NodeId(col));
334  ++i;
335  if (i == __nb_miss) end_miss = true;
336  } else {
337  __filled_row1[col].discr_val = xrow[col].discr_val;
338  __filled_row2[col].discr_val = xrow[col].discr_val;
339  }
340  }
341  } else {
342  std::size_t i = std::size_t(0);
343  bool end_miss = false;
344  for (const auto col: this->_columns_of_interest) {
345  if (!end_miss && (col == __missing_cols[i])) {
346  target_set.insert(this->_nodeId2columns.first(col));
347  ++i;
348  if (i == __nb_miss) end_miss = true;
349  } else {
350  __filled_row1[col].discr_val = xrow[col].discr_val;
351  __filled_row2[col].discr_val = xrow[col].discr_val;
352  }
353  }
354  }
355 
356  ve.addJointTarget(target_set);
357 
358  // add the evidence and the target
359  const std::size_t row_size = xrow.size();
360  if (this->_nodeId2columns.empty()) {
361  for (std::size_t col = std::size_t(0); col < row_size; ++col) {
362  switch (this->_column_types[col]) {
364  // only observed values are evidence
365  if (xrow[col].discr_val
366  != std::numeric_limits< std::size_t >::max()) {
367  ve.addEvidence(NodeId(col), xrow[col].discr_val);
368  }
369  break;
370 
372  GUM_ERROR(NotImplementedYet,
373  "The BDRowGeneratorEM does not handle yet continuous "
374  << "variables. But the variable in column" << col
375  << " is continuous.");
376  break;
377 
378  default:
379  GUM_ERROR(NotImplementedYet,
380  "DBTranslatedValueType " << int(this->_column_types[col])
381  << " is not supported yet");
382  }
383  }
384  } else {
385  for (std::size_t col = std::size_t(0); col < row_size; ++col) {
386  switch (this->_column_types[col]) {
388  // only observed values are evidence
389  if (xrow[col].discr_val
390  != std::numeric_limits< std::size_t >::max()) {
391  ve.addEvidence(this->_nodeId2columns.first(col),
392  xrow[col].discr_val);
393  }
394  break;
395 
397  GUM_ERROR(NotImplementedYet,
398  "The BDRowGeneratorEM does not handle yet continuous "
399  << "variables. But the variable in column" << col
400  << " is continuous.");
401  break;
402 
403  default:
404  GUM_ERROR(NotImplementedYet,
405  "DBTranslatedValueType " << int(this->_column_types[col])
406  << " is not supported yet");
407  }
408  }
409  }
410 
411  // get the potential of the target set
412  Potential< GUM_SCALAR >& pot =
413  const_cast< Potential< GUM_SCALAR >& >(ve.jointPosterior(target_set));
414  __joint_proba = std::move(pot);
415  if (__joint_inst != nullptr) delete __joint_inst;
416  __joint_inst = new Instantiation(__joint_proba);
417 
418  // get the mapping between variables of the joint proba and the
419  // columns in the database
420  const auto& var_sequence = __joint_proba.variablesSequence();
421  if (this->_nodeId2columns.empty()) {
422  for (std::size_t i = std::size_t(0); i < __nb_miss; ++i) {
423  __missing_cols[i] = std::size_t(this->_bn->nodeId(*(var_sequence[i])));
424  }
425  } else {
426  for (std::size_t i = std::size_t(0); i < __nb_miss; ++i) {
427  __missing_cols[i] =
428  this->_nodeId2columns.second(this->_bn->nodeId(*(var_sequence[i])));
429  }
430  }
431 
432  return std::size_t(__joint_proba.domainSize());
433  }
434 
435 
437  template < typename GUM_SCALAR, template < typename > class ALLOC >
439  const BayesNet< GUM_SCALAR >& new_bn) {
440  // check that if nodeId2columns is not empty, then all the columns
441  // correspond to nodes of the BN
442  if (!this->_nodeId2columns.empty()) {
443  const DAG& dag = new_bn.dag();
444  for (auto iter = this->_nodeId2columns.begin();
445  iter != this->_nodeId2columns.end();
446  ++iter) {
447  if (!dag.existsNode(iter.first())) {
448  GUM_ERROR(IdError,
449  "Column "
450  << iter.second()
451  << " of the database is associated to Node ID "
452  << iter.first()
453  << ", which does not belong to the Bayesian network");
454  }
455  }
456  }
457 
459 
460  // we determine the size of the filled rows
461  std::size_t size = std::size_t(0);
462  if (this->_nodeId2columns.empty()) {
463  for (auto node: new_bn.dag())
464  if (std::size_t(node) > size) size = std::size_t(node);
465  } else {
466  for (auto iter = this->_nodeId2columns.begin();
467  iter != this->_nodeId2columns.end();
468  ++iter) {
469  if (iter.second() > size) size = iter.second();
470  }
471  }
472  __filled_row1.resize(size + 1);
473  __filled_row2.resize(size + 1);
474  }
475 
476  } /* namespace learning */
477 
478 } /* namespace gum */
479 
480 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
std::vector< std::size_t, ALLOC< std::size_t > > _columns_of_interest
the set of columns of interest
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
DBRowGeneratorGoal
the type of things that a DBRowGenerator is designed for
const T1 & first(const T2 &second) const
Returns the first value of a pair given its second value.
void decreaseRemainingRows()
decrease the number of remaining output rows
Set< NodeId > NodeSet
Some typdefs and define for shortcuts ...
DBRowGeneratorEM(const std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > > column_types, const BayesNet< GUM_SCALAR > &bn, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
const BayesNet< GUM_SCALAR > * _bn
the Bayesian network used to fill the unobserved values
iterator begin() const
Returns the unsafe iterator at the beginning of the gum::Bijection.
STL namespace.
Copyright 2005-2020 Pierre-Henri WUILLEMIN () et Christophe GONZALES () info_at_agrum_dot_org.
Definition: agrum.h:25
const iterator & end() const noexcept
Returns the unsafe iterator at the end of the gum::Bijection.
virtual DBRowGeneratorEM< GUM_SCALAR, ALLOC > * clone() const override final
virtual copy constructor
bool empty() const noexcept
Returns true if the gum::Bijection doesn&#39;t contain any association.
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
allocator_type getAllocator() const
returns the allocator used
DBRowGeneratorWithBN< GUM_SCALAR, ALLOC > & operator=(const DBRowGeneratorWithBN< GUM_SCALAR, ALLOC > &from)
copy operator
Bijection< NodeId, std::size_t, ALLOC< std::size_t > > _nodeId2columns
the mapping betwen the BN&#39;s node ids and the database&#39;s columns
virtual void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn) override final
assign a new Bayes net to the generator
virtual std::size_t _computeRows(const DBRow< DBTranslatedValue, ALLOC > &row) override final
computes the rows it will provide as output
virtual void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn)
assign a new Bayes net to the generator
DBRowGeneratorWithBN(const std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > > column_types, const BayesNet< GUM_SCALAR > &bn, const DBRowGeneratorGoal goal, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
virtual const DBRow< DBTranslatedValue, ALLOC > & generate() override final
generates one ouput DBRow for each DBRow passed to method setInputRow
std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > > _column_types
the types of the columns in the DatabaseTable
Size Idx
Type for indexes.
Definition: types.h:53
DBRowGeneratorEM< GUM_SCALAR, ALLOC > & operator=(const DBRowGeneratorEM< GUM_SCALAR, ALLOC > &from)
copy operator
Copyright 2005-2020 Pierre-Henri WUILLEMIN () et Christophe GONZALES () info_at_agrum_dot_org.
Size NodeId
Type for node ids.
Definition: graphElements.h:98
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55
allocator_type getAllocator() const
returns the allocator used
ALLOC< DBTranslatedValue > allocator_type
type for the allocators passed in arguments of methods