aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
scoreBD.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief the class for computing Bayesian Dirichlet (BD) log2 scores
24  *
25  * @warning This class computes the "general" log2 (BD score). If you wish to
26  * reduce the number of hyperparameters, try using ScoreBDeu or ScoreK2.
27  *
28  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
29  */
30 
31 #ifndef GUM_LEARNING_SCORE_BD_H
32 #define GUM_LEARNING_SCORE_BD_H
33 
34 #include <string>
35 
36 #include <agrum/agrum.h>
37 #include <agrum/tools/core/math/math_utils.h>
38 #include <agrum/tools/core/math/gammaLog2.h>
39 #include <agrum/BN/learning/scores_and_tests/score.h>
40 #include <agrum/BN/learning/aprioris/aprioriNoApriori.h>
41 
42 namespace gum {
43 
44  namespace learning {
45 
46  /** @class ScoreBD
47  * @brief the class for computing Bayesian Dirichlet (BD) log2 scores
48  * @headerfile scoreBD.h <agrum/BN/learning/scores_and_tests/scoreBD.h>
49  * @ingroup learning_scores
50  *
51  * @warning This class computes the "general" log2 (BD score). If you wish to
52  * reduce the number of hyperparameters, try using ScoreBD or ScoreK2.
53  *
54  * @warning As BD already includes an implicit smoothing apriori on all
55  * the cells of contingency tables, the apriori passed to the score should
56  * be a NoApriori. But aGrUM will let you use another (certainly incompatible)
57  * apriori with the score. In this case, this apriori will be included in
58  * addition to the implicit smoothing apriori in a BD fashion, i.e., we
59  * will ressort to the Bayesian Dirichlet (BD) formula to include the sum of
60  * the two aprioris into the score.
61  *
62  */
63  template < template < typename > class ALLOC = std::allocator >
64  class ScoreBD: public Score< ALLOC > {
65  public:
66  /// type for the allocators passed in arguments of methods
68 
69  // ##########################################################################
70  /// @name Constructors / Destructors
71  // ##########################################################################
72  /// @{
73 
74  /// default constructor
75  /** @param parser the parser used to parse the database
76  * @param apriori An apriori that we add to the computation of the score
77  * @param ranges a set of pairs {(X1,Y1),...,(Xn,Yn)} of database's rows
78  * indices. The countings are then performed only on the union of the
79  * rows [Xi,Yi), i in {1,...,n}. This is useful, e.g, when performing
80  * cross validation tasks, in which part of the database should be ignored.
81  * An empty set of ranges is equivalent to an interval [X,Y) ranging over
82  * the whole database.
83  * @param nodeId2Columns a mapping from the ids of the nodes in the
84  * graphical model to the corresponding column in the DatabaseTable
85  * parsed by the parser. This enables estimating from a database in
86  * which variable A corresponds to the 2nd column the parameters of a BN
87  * in which variable A has a NodeId of 5. An empty nodeId2Columns
88  * bijection means that the mapping is an identity, i.e., the value of a
89  * NodeId is equal to the index of the column in the DatabaseTable.
90  * @param alloc the allocator used to allocate the structures within the
91  * Score.
92  * @warning If nodeId2columns is not empty, then only the scores over the
93  * ids belonging to this bijection can be computed: applying method
94  * score() over other ids will raise exception NotFound. */
95  ScoreBD(const DBRowGeneratorParser< ALLOC >& parser,
96  const Apriori< ALLOC >& apriori,
97  const std::vector< std::pair< std::size_t, std::size_t >,
98  ALLOC< std::pair< std::size_t, std::size_t > > >&
99  ranges,
100  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
102  = Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
103  const allocator_type& alloc = allocator_type());
104 
105 
106  /// default constructor
107  /** @param parser the parser used to parse the database
108  * @param apriori An apriori that we add to the computation of the score
109  * @param nodeId2Columns a mapping from the ids of the nodes in the
110  * graphical model to the corresponding column in the DatabaseTable
111  * parsed by the parser. This enables estimating from a database in
112  * which variable A corresponds to the 2nd column the parameters of a BN
113  * in which variable A has a NodeId of 5. An empty nodeId2Columns
114  * bijection means that the mapping is an identity, i.e., the value of a
115  * NodeId is equal to the index of the column in the DatabaseTable.
116  * @param alloc the allocator used to allocate the structures within the
117  * Score.
118  * @warning If nodeId2columns is not empty, then only the scores over the
119  * ids belonging to this bijection can be computed: applying method
120  * score() over other ids will raise exception NotFound. */
121  ScoreBD(const DBRowGeneratorParser< ALLOC >& parser,
122  const Apriori< ALLOC >& apriori,
123  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
125  = Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
126  const allocator_type& alloc = allocator_type());
127 
128  /// copy constructor
129  ScoreBD(const ScoreBD< ALLOC >& from);
130 
131  /// copy constructor with a given allocator
132  ScoreBD(const ScoreBD< ALLOC >& from, const allocator_type& alloc);
133 
134  /// move constructor
135  ScoreBD(ScoreBD< ALLOC >&& from);
136 
137  /// move constructor with a given allocator
138  ScoreBD(ScoreBD< ALLOC >&& from, const allocator_type& alloc);
139 
140  /// virtual copy constructor
141  virtual ScoreBD< ALLOC >* clone() const;
142 
143  /// virtual copy constructor with a given allocator
144  virtual ScoreBD< ALLOC >* clone(const allocator_type& alloc) const;
145 
146  /// destructor
147  virtual ~ScoreBD();
148 
149  /// @}
150 
151 
152  // ##########################################################################
153  /// @name Operators
154  // ##########################################################################
155 
156  /// @{
157 
158  /// copy operator
159  ScoreBD< ALLOC >& operator=(const ScoreBD< ALLOC >& from);
160 
161  /// move operator
162  ScoreBD< ALLOC >& operator=(ScoreBD< ALLOC >&& from);
163 
164  /// @}
165 
166 
167  // ##########################################################################
168  /// @name Accessors / Modifiers
169  // ##########################################################################
170  /// @{
171 
172  /// indicates whether the apriori is compatible (meaningful) with the score
173  /** The combination of some scores and aprioris can be meaningless. For
174  * instance, adding a Dirichlet apriori to the K2 score is not very
175  * meaningful since K2 corresponds to a BD score with a 1-smoothing
176  * apriori.
177  * aGrUM allows you to perform such combination, but you can check with
178  * method isAprioriCompatible () whether the result the score will give
179  * you is meaningful or not.
180  * @returns a non empty string if the apriori is compatible with the
181  * score.*/
182  virtual std::string isAprioriCompatible() const final;
183 
184  /// returns the internal apriori of the score
185  /** Some scores include an apriori. For instance, the K2 score is a BD
186  * score with a Laplace Apriori ( smoothing(1) ). BD is a BD score with
187  * a N'/(r_i * q_i) apriori, where N' is an effective sample size and r_i
188  * is the domain size of the target variable and q_i is the domain size of
189  * the Cartesian product of its parents. The goal of the score's internal
190  * apriori classes is to enable to account for these aprioris outside the
191  * score, e.g., when performing parameter estimation. It is important to
192  * note that, to be meaningful, a structure + parameter learning requires
193  * that the same aprioris are taken into account during structure learning
194  * and parameter learning. */
195  virtual const Apriori< ALLOC >& internalApriori() const final;
196 
197  /// @}
198 
199 
200  /// indicates whether the apriori is compatible (meaningful) with the score
201  /** @returns a non empty string if the apriori is compatible with the score.
202  */
203  static std::string isAprioriCompatible(const std::string& apriori_type,
204  double weight = 1.0f);
205 
206  /// indicates whether the apriori is compatible (meaningful) with the score
207  /** a non empty string if the apriori is compatible with the score. */
208  static std::string isAprioriCompatible(const Apriori< ALLOC >& apriori);
209 
210 
211  protected:
212  /// returns the score for a given IdCondSet
213  /** @throws OperationNotAllowed is raised if the score does not support
214  * calling method score such an idset (due to too many/too few variables
215  * in the left hand side or the right hand side of the idset). */
216  virtual double score_(const IdCondSet< ALLOC >& idset) final;
217 
218 
219 #ifndef DOXYGEN_SHOULD_SKIP_THIS
220 
221  private:
222  /// the internal apriori of the score
223  AprioriNoApriori< ALLOC > internal_apriori__;
224 
225  /// the log(gamma (n)) function: generalizes log((n-1)!)
226  GammaLog2 gammalog2__;
227 
228 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
229  };
230 
231  } /* namespace learning */
232 
233 } /* namespace gum */
234 
235 
236 #ifndef GUM_NO_EXTERN_TEMPLATE_CLASS
237 extern template class gum::learning::ScoreBD<>;
238 #endif
239 
240 
241 // always include the template implementation
242 #include <agrum/BN/learning/scores_and_tests/scoreBD_tpl.h>
243 
244 #endif /* GUM_LEARNING_SCORE_BD_H */
virtual const Apriori< ALLOC > & internalApriori() const final
returns the internal apriori of the score
virtual double score_(const IdCondSet< ALLOC > &idset) final
returns the score for a given IdCondSet
static std::string isAprioriCompatible(const std::string &apriori_type, double weight=1.0f)
indicates whether the apriori is compatible (meaningful) with the score
ScoreBD< ALLOC > & operator=(ScoreBD< ALLOC > &&from)
move operator
ScoreBD(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &apriori, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
the class for computing Bayesian Dirichlet (BD) log2 scores
Definition: scoreBD.h:64
ScoreBD(const ScoreBD< ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
ScoreBD(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
virtual ~ScoreBD()
destructor
static std::string isAprioriCompatible(const Apriori< ALLOC > &apriori)
indicates whether the apriori is compatible (meaningful) with the score
virtual ScoreBD< ALLOC > * clone(const allocator_type &alloc) const
virtual copy constructor with a given allocator
ScoreBD< ALLOC > & operator=(const ScoreBD< ALLOC > &from)
copy operator
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
ScoreBD(const ScoreBD< ALLOC > &from)
copy constructor
ScoreBD(ScoreBD< ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator
virtual ScoreBD< ALLOC > * clone() const
virtual copy constructor
ScoreBD(ScoreBD< ALLOC > &&from)
move constructor
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)