aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
DBRowGeneratorEM_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief A DBRowGenerator class that returns incomplete rows as EM would do
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #include <agrum/tools/database/DBRowGeneratorIdentity.h>
28 
29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 
31 namespace gum {
32 
33  namespace learning {
34 
35  /// returns the allocator used
36  template < typename GUM_SCALAR, template < typename > class ALLOC >
39  return DBRowGenerator< ALLOC >::getAllocator();
40  }
41 
42 
43  /// default constructor
44  template < typename GUM_SCALAR, template < typename > class ALLOC >
48  const BayesNet< GUM_SCALAR >& bn,
49  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
51  const typename DBRowGeneratorEM< GUM_SCALAR, ALLOC >::allocator_type&
52  alloc) :
55  bn,
58  alloc),
59  filled_row1__(bn.size(), 1.0, alloc),
60  filled_row2__(bn.size(), 1.0, alloc) {
61  setBayesNet(bn);
62 
64  }
65 
66 
67  /// copy constructor with a given allocator
68  template < typename GUM_SCALAR, template < typename > class ALLOC >
71  const typename DBRowGeneratorEM< GUM_SCALAR, ALLOC >::allocator_type&
72  alloc) :
79  if (from.joint_inst__ != nullptr) {
81  const auto& var_seq = joint_inst__->variablesSequence();
82  const std::size_t size = var_seq.size();
83  for (std::size_t i = std::size_t(0); i < size; ++i) {
85  }
86  }
87 
89  }
90 
91 
92  /// copy constructor
93  template < typename GUM_SCALAR, template < typename > class ALLOC >
97 
98 
99  /// move constructor with a given allocator
100  template < typename GUM_SCALAR, template < typename > class ALLOC >
103  const typename DBRowGeneratorEM< GUM_SCALAR, ALLOC >::allocator_type&
104  alloc) :
113  if (from.joint_inst__ != nullptr) {
115  const auto& var_seq = joint_inst__->variablesSequence();
116  const std::size_t size = var_seq.size();
117  for (std::size_t i = std::size_t(0); i < size; ++i) {
119  }
120  }
121 
123  }
124 
125 
126  /// move constructor
127  template < typename GUM_SCALAR, template < typename > class ALLOC >
131  from.getAllocator()) {}
132 
133 
134  /// virtual copy constructor with a given allocator
135  template < typename GUM_SCALAR, template < typename > class ALLOC >
138  const typename DBRowGeneratorEM< GUM_SCALAR, ALLOC >::allocator_type&
139  alloc) const {
142  try {
144  } catch (...) {
146  throw;
147  }
148  return generator;
149  }
150 
151 
152  /// virtual copy constructor
153  template < typename GUM_SCALAR, template < typename > class ALLOC >
155  DBRowGeneratorEM< GUM_SCALAR, ALLOC >::clone() const {
156  return clone(this->getAllocator());
157  }
158 
159 
160  /// destructor
161  template < typename GUM_SCALAR, template < typename > class ALLOC >
163  if (joint_inst__ != nullptr) delete joint_inst__;
165  }
166 
167 
168  /// copy operator
169  template < typename GUM_SCALAR, template < typename > class ALLOC >
172  const DBRowGeneratorEM< GUM_SCALAR, ALLOC >& from) {
173  if (this != &from) {
183 
184  if (joint_inst__ != nullptr) {
185  delete joint_inst__;
186  joint_inst__ = nullptr;
187  }
188 
189  if (from.joint_inst__ != nullptr) {
191  const auto& var_seq = joint_inst__->variablesSequence();
192  const std::size_t size = var_seq.size();
193  for (std::size_t i = std::size_t(0); i < size; ++i) {
195  }
196  }
197  }
198 
199  return *this;
200  }
201 
202 
203  /// move operator
204  template < typename GUM_SCALAR, template < typename > class ALLOC >
208  if (this != &from) {
218 
219  if (joint_inst__ != nullptr) {
220  delete joint_inst__;
221  joint_inst__ = nullptr;
222  }
223 
224  if (from.joint_inst__ != nullptr) {
226  const auto& var_seq = joint_inst__->variablesSequence();
227  const std::size_t size = var_seq.size();
228  for (std::size_t i = std::size_t(0); i < size; ++i) {
230  }
231  }
232  }
233 
234  return *this;
235  }
236 
237 
238  /// generates new lines from those the generator gets in input
239  template < typename GUM_SCALAR, template < typename > class ALLOC >
242  this->decreaseRemainingRows();
243 
244  // if everything is observed, return the input row
245  if (input_row__ != nullptr) return *input_row__;
246 
247  if (use_filled_row1__) {
248  // get the weight of the row from the joint probability
251 
252  // fill the values of the row
253  for (std::size_t i = std::size_t(0); i < nb_miss__; ++i)
255 
256  joint_inst__->inc();
257  use_filled_row1__ = false;
258 
259  return filled_row1__;
260  } else {
261  // get the weight of the row from the joint probability
264 
265  // fill the values of the row
266  for (std::size_t i = std::size_t(0); i < nb_miss__; ++i)
268 
269  joint_inst__->inc();
270  use_filled_row1__ = true;
271 
272  return filled_row2__;
273  }
274  }
275 
276 
277  /// computes the rows it will provide in output
278  template < typename GUM_SCALAR, template < typename > class ALLOC >
280  const DBRow< DBTranslatedValue, ALLOC >& row) {
281  // check if there are unobserved values among the columns of interest.
282  // If this is the case, set them as targets
283  bool found_unobserved = false;
284  const auto& xrow = row.row();
285  for (const auto col: this->columns_of_interest_) {
286  switch (this->column_types_[col]) {
288  if (xrow[col].discr_val == std::numeric_limits< std::size_t >::max()) {
289  if (!found_unobserved) {
291  found_unobserved = true;
292  }
294  }
295  break;
296 
299  "The BDRowGeneratorEM does not handle yet continuous "
300  << "variables. But the variable in column" << col
301  << " is continuous.");
302  break;
303 
304  default:
306  "DBTranslatedValueType " << int(this->column_types_[col])
307  << " is not supported yet");
308  }
309  }
310 
311  // if there is no unobserved value, make the input_row__ point to the row
312  if (!found_unobserved) {
313  input_row__ = &row;
314  return std::size_t(1);
315  }
316 
317  input_row__ = nullptr;
320 
321  // here, there are missing symbols, so we should compute the distribution
322  // of the missing values. For this purpose, we use Variable Elimination
324 
325  // add the targets and fill the output row with the observed values
327  if (this->nodeId2columns_.empty()) {
328  std::size_t i = std::size_t(0);
329  bool end_miss = false;
330  for (const auto col: this->columns_of_interest_) {
331  if (!end_miss && (col == missing_cols__[i])) {
333  ++i;
334  if (i == nb_miss__) end_miss = true;
335  } else {
338  }
339  }
340  } else {
341  std::size_t i = std::size_t(0);
342  bool end_miss = false;
343  for (const auto col: this->columns_of_interest_) {
344  if (!end_miss && (col == missing_cols__[i])) {
346  ++i;
347  if (i == nb_miss__) end_miss = true;
348  } else {
351  }
352  }
353  }
354 
356 
357  // add the evidence and the target
358  const std::size_t row_size = xrow.size();
359  if (this->nodeId2columns_.empty()) {
360  for (std::size_t col = std::size_t(0); col < row_size; ++col) {
361  switch (this->column_types_[col]) {
363  // only observed values are evidence
364  if (xrow[col].discr_val
365  != std::numeric_limits< std::size_t >::max()) {
367  }
368  break;
369 
372  "The BDRowGeneratorEM does not handle yet continuous "
373  << "variables. But the variable in column" << col
374  << " is continuous.");
375  break;
376 
377  default:
379  "DBTranslatedValueType " << int(this->column_types_[col])
380  << " is not supported yet");
381  }
382  }
383  } else {
384  for (std::size_t col = std::size_t(0); col < row_size; ++col) {
385  switch (this->column_types_[col]) {
387  // only observed values are evidence
388  if (xrow[col].discr_val
389  != std::numeric_limits< std::size_t >::max()) {
391  xrow[col].discr_val);
392  }
393  break;
394 
397  "The BDRowGeneratorEM does not handle yet continuous "
398  << "variables. But the variable in column" << col
399  << " is continuous.");
400  break;
401 
402  default:
404  "DBTranslatedValueType " << int(this->column_types_[col])
405  << " is not supported yet");
406  }
407  }
408  }
409 
410  // get the potential of the target set
412  = const_cast< Potential< GUM_SCALAR >& >(ve.jointPosterior(target_set));
414  if (joint_inst__ != nullptr) delete joint_inst__;
416 
417  // get the mapping between variables of the joint proba and the
418  // columns in the database
420  if (this->nodeId2columns_.empty()) {
421  for (std::size_t i = std::size_t(0); i < nb_miss__; ++i) {
422  missing_cols__[i] = std::size_t(this->bn_->nodeId(*(var_sequence[i])));
423  }
424  } else {
425  for (std::size_t i = std::size_t(0); i < nb_miss__; ++i) {
427  = this->nodeId2columns_.second(this->bn_->nodeId(*(var_sequence[i])));
428  }
429  }
430 
431  return std::size_t(joint_proba__.domainSize());
432  }
433 
434 
435  /// assign a new Bayes net to the generator
436  template < typename GUM_SCALAR, template < typename > class ALLOC >
438  const BayesNet< GUM_SCALAR >& new_bn) {
439  // check that if nodeId2columns is not empty, then all the columns
440  // correspond to nodes of the BN
441  if (!this->nodeId2columns_.empty()) {
442  const DAG& dag = new_bn.dag();
443  for (auto iter = this->nodeId2columns_.begin();
444  iter != this->nodeId2columns_.end();
445  ++iter) {
446  if (!dag.existsNode(iter.first())) {
448  "Column "
449  << iter.second()
450  << " of the database is associated to Node ID "
451  << iter.first()
452  << ", which does not belong to the Bayesian network");
453  }
454  }
455  }
456 
458 
459  // we determine the size of the filled rows
460  std::size_t size = std::size_t(0);
461  if (this->nodeId2columns_.empty()) {
462  for (auto node: new_bn.dag())
463  if (std::size_t(node) > size) size = std::size_t(node);
464  } else {
465  for (auto iter = this->nodeId2columns_.begin();
466  iter != this->nodeId2columns_.end();
467  ++iter) {
468  if (iter.second() > size) size = iter.second();
469  }
470  }
473  }
474 
475  } /* namespace learning */
476 
477 } /* namespace gum */
478 
479 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)