aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
DBRowGeneratorEM.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief A DBRowGenerator class that returns incomplete rows as EM would do
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef GUM_LEARNING_DBROW_GENERATOR_EM_H
28 #define GUM_LEARNING_DBROW_GENERATOR_EM_H
29 
30 #include <vector>
31 
32 #include <agrum/agrum.h>
33 #include <agrum/tools/core/bijection.h>
34 #include <agrum/BN/BayesNet.h>
35 #include <agrum/BN/inference/variableElimination.h>
36 #include <agrum/tools/database/DBRowGeneratorWithBN.h>
37 
38 namespace gum {
39 
40  namespace learning {
41 
42  /** @class DBRowGeneratorEM
43  * @headerfile DBRowGeneratorEM.h <agrum/tools/database/DBRowGeneratorEM.h>
44  * @brief A DBRowGenerator class that returns incomplete rows as EM would do
45  *
46  * @ingroup learning_database
47  *
48  * This class is a DBRowGenerator that fills the unobserved values of the
49  * nodes of interest as the EM algorithm does, i.e., by returning all the
50  * possible completed rows with a weight corresponding to the probability
51  * of the completion.
52  * The standard usage of a DBRowGenerator is the following:
53  * @code
54  * // create a DatabaseTable and fill it
55  * gum::learning::DBTranslatorSet<> set;
56  * for ( int i = 0; i < 10; ++i )
57  * set.insertTranslator(gum::learning::DBTranslator4LabelizedVariable<>(),i);
58  * gum::learning::DatabaseTable<> database ( set );
59  * // fill the database
60  *
61  * // keep in a vector the types of the columns in the database
62  * const std::vector<gum::learning::DBTranslatedValueType>
63  * column_types ( 10, gum::learning::DBTranslatedValueType::DISCRETE );
64  *
65  * // create the generator
66  * gum::learning::DBRowGeneratorEM<> generator ( col_types );
67  *
68  * // parse the database and produce output rows
69  * for ( auto dbrow : database ) {
70  * generator.setInputRow ( dbrow );
71  * while ( generator.hasRows() ) {
72  * const auto& output_dbrow = generator.generate ();
73  * // do something with the output dbrow
74  * }
75  * }
76  * @endcode
77  */
78  template < typename GUM_SCALAR = double, template < typename > class ALLOC = std::allocator >
80  public:
81  /// type for the allocators passed in arguments of methods
83 
84  // ##########################################################################
85  /// @name Constructors / Destructors
86  // ##########################################################################
87 
88  /// @{
89 
90  /// default constructor
93  const BayesNet< GUM_SCALAR >& bn,
95  = Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
97 
98  /// copy constructor
99  DBRowGeneratorEM(const DBRowGeneratorEM< GUM_SCALAR, ALLOC >& from);
100 
101  /// copy constructor with a given allocator
102  DBRowGeneratorEM(const DBRowGeneratorEM< GUM_SCALAR, ALLOC >& from,
103  const allocator_type& alloc);
104 
105  /// move constructor
106  DBRowGeneratorEM(DBRowGeneratorEM< GUM_SCALAR, ALLOC >&& from);
107 
108  /// move constructor with a given allocator
109  DBRowGeneratorEM(DBRowGeneratorEM< GUM_SCALAR, ALLOC >&& from, const allocator_type& alloc);
110 
111  /// virtual copy constructor
112  virtual DBRowGeneratorEM< GUM_SCALAR, ALLOC >* clone() const override final;
113 
114  /// virtual copy constructor with a given allocator
115  virtual DBRowGeneratorEM< GUM_SCALAR, ALLOC >*
116  clone(const allocator_type& alloc) const override final;
117 
118  /// destructor
119  ~DBRowGeneratorEM();
120 
121  /// @}
122 
123 
124  // ##########################################################################
125  /// @name Operators
126  // ##########################################################################
127 
128  /// @{
129 
130  /// copy operator
133 
134  /// move operator
137 
138  /// @}
139 
140 
141  // ##########################################################################
142  /// @name Accessors / Modifiers
143  // ##########################################################################
144 
145  /// @{
146 
147  /// generates one ouput DBRow for each DBRow passed to method setInputRow
148  virtual const DBRow< DBTranslatedValue, ALLOC >& generate() override final;
149 
150  /// assign a new Bayes net to the generator
151  virtual void setBayesNet(const BayesNet< GUM_SCALAR >& new_bn) override final;
152 
153  /// returns the allocator used
155 
156  /// @}
157 
158 
159  protected:
160  /// computes the rows it will provide as output
161  virtual std::size_t computeRows_(const DBRow< DBTranslatedValue, ALLOC >& row) override final;
162 
163 
164 #ifndef DOXYGEN_SHOULD_SKIP_THIS
165 
166  private:
167  /// the row used as input to generate the output DBRows
168  const DBRow< DBTranslatedValue, ALLOC >* _input_row_{nullptr};
169 
170  /// the set of missing columns of the current row
171  std::vector< std::size_t, ALLOC< std::size_t > > _missing_cols_;
172 
173  /// the number of missing values in the current row
174  std::size_t _nb_miss_;
175 
176  /// the joint probability of the missing variables of the current row
177  Potential< GUM_SCALAR > _joint_proba_;
178 
179  /// an instantiation over the joint proba
180  Instantiation* _joint_inst_{nullptr};
181 
182  /// the row that we return if there are missing values
183  /** DBRowGeneratorSets produce new rows in advance. So, when they
184  * return a row, they have already computed the new row. To cope with
185  * this, the DBRowGeneratorEM should have 2 filled_row: one that will
186  * be used for the first row returned and one for the next one. Hence
187  * _filled_row1_ and _filled_row2_, which are filled alternatively by
188  * the content of the probabilities computed. */
189  DBRow< DBTranslatedValue, ALLOC > _filled_row1_;
190 
191  /// the row that we return if there are missing values
192  DBRow< DBTranslatedValue, ALLOC > _filled_row2_;
193 
194  /// indicates whether we should return filled_row1 or filled_row2
195  bool _use_filled_row1_{true};
196 
197  /// the weight of the original input row
198  double _original_weight_;
199 
200 
201 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
202  };
203 
204  } /* namespace learning */
205 
206 } /* namespace gum */
207 
208 
209 // always include the template implementation
210 #include <agrum/tools/database/DBRowGeneratorEM_tpl.h>
211 
212 #endif /* GUM_LEARNING_DBROW_GENERATOR_EM_H */
virtual DBRowGeneratorEM< GUM_SCALAR, ALLOC > * clone(const allocator_type &alloc) const override final
virtual copy constructor with a given allocator
DBRowGeneratorEM(const DBRowGeneratorEM< GUM_SCALAR, ALLOC > &from)
copy constructor
DBRowGeneratorEM(DBRowGeneratorEM< GUM_SCALAR, ALLOC > &&from)
move constructor
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
DBRowGeneratorEM(const std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > > column_types, const BayesNet< GUM_SCALAR > &bn, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
A DBRowGenerator class that returns incomplete rows as EM would do.
DBRowGeneratorEM< GUM_SCALAR, ALLOC > & operator=(DBRowGeneratorEM< GUM_SCALAR, ALLOC > &&from)
move operator
DBRowGeneratorEM(DBRowGeneratorEM< GUM_SCALAR, ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator
virtual DBRowGeneratorEM< GUM_SCALAR, ALLOC > * clone() const override final
virtual copy constructor
DBRowGeneratorEM(const DBRowGeneratorEM< GUM_SCALAR, ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
virtual void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn) override final
assign a new Bayes net to the generator
virtual std::size_t computeRows_(const DBRow< DBTranslatedValue, ALLOC > &row) override final
computes the rows it will provide as output
virtual const DBRow< DBTranslatedValue, ALLOC > & generate() override final
generates one ouput DBRow for each DBRow passed to method setInputRow
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
DBRowGeneratorEM< GUM_SCALAR, ALLOC > & operator=(const DBRowGeneratorEM< GUM_SCALAR, ALLOC > &from)
copy operator
allocator_type getAllocator() const
returns the allocator used