aGrUM  0.14.2
scoreBDeu_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
26 #ifndef DOXYGEN_SHOULD_SKIP_THIS
27 
29 # include <sstream>
30 
31 namespace gum {
32 
33  namespace learning {
34 
36  template < template < typename > class ALLOC >
38  const DBRowGeneratorParser< ALLOC >& parser,
39  const Apriori< ALLOC >& apriori,
40  const std::vector< std::pair< std::size_t, std::size_t >,
41  ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
42  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
43  nodeId2columns,
44  const typename ScoreBDeu< ALLOC >::allocator_type& alloc) :
45  Score< ALLOC >(parser, apriori, ranges, nodeId2columns, alloc),
46  __internal_apriori(parser.database(), nodeId2columns) {
47  GUM_CONSTRUCTOR(ScoreBDeu);
48  }
49 
50 
52  template < template < typename > class ALLOC >
54  const DBRowGeneratorParser< ALLOC >& parser,
55  const Apriori< ALLOC >& apriori,
56  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
57  nodeId2columns,
58  const typename ScoreBDeu< ALLOC >::allocator_type& alloc) :
59  Score< ALLOC >(parser, apriori, nodeId2columns, alloc),
60  __internal_apriori(parser.database(), nodeId2columns) {
61  GUM_CONSTRUCTOR(ScoreBDeu);
62  }
63 
64 
66  template < template < typename > class ALLOC >
68  const ScoreBDeu< ALLOC >& from,
69  const typename ScoreBDeu< ALLOC >::allocator_type& alloc) :
70  Score< ALLOC >(from, alloc),
71  __internal_apriori(from.__internal_apriori, alloc),
72  __gammalog2(from.__gammalog2) {
73  GUM_CONS_CPY(ScoreBDeu);
74  }
75 
76 
78  template < template < typename > class ALLOC >
79  INLINE ScoreBDeu< ALLOC >::ScoreBDeu(const ScoreBDeu< ALLOC >& from) :
80  ScoreBDeu< ALLOC >(from, from.getAllocator()) {}
81 
82 
84  template < template < typename > class ALLOC >
86  ScoreBDeu< ALLOC >&& from,
87  const typename ScoreBDeu< ALLOC >::allocator_type& alloc) :
88  Score< ALLOC >(std::move(from), alloc),
89  __internal_apriori(std::move(from.__internal_apriori), alloc),
90  __gammalog2(std::move(from.__gammalog2)) {
91  GUM_CONS_MOV(ScoreBDeu);
92  }
93 
94 
96  template < template < typename > class ALLOC >
97  INLINE ScoreBDeu< ALLOC >::ScoreBDeu(ScoreBDeu< ALLOC >&& from) :
98  ScoreBDeu< ALLOC >(std::move(from), from.getAllocator()) {}
99 
100 
102  template < template < typename > class ALLOC >
103  ScoreBDeu< ALLOC >* ScoreBDeu< ALLOC >::clone(
104  const typename ScoreBDeu< ALLOC >::allocator_type& alloc) const {
105  ALLOC< ScoreBDeu< ALLOC > > allocator(alloc);
106  ScoreBDeu< ALLOC >* new_score = allocator.allocate(1);
107  try {
108  allocator.construct(new_score, *this, alloc);
109  } catch (...) {
110  allocator.deallocate(new_score, 1);
111  throw;
112  }
113 
114  return new_score;
115  }
116 
117 
119  template < template < typename > class ALLOC >
120  ScoreBDeu< ALLOC >* ScoreBDeu< ALLOC >::clone() const {
121  return clone(this->getAllocator());
122  }
123 
124 
126  template < template < typename > class ALLOC >
128  GUM_DESTRUCTOR(ScoreBDeu);
129  }
130 
131 
133  template < template < typename > class ALLOC >
134  ScoreBDeu< ALLOC >& ScoreBDeu< ALLOC >::
135  operator=(const ScoreBDeu< ALLOC >& from) {
136  if (this != &from) {
138  __internal_apriori = from.__internal_apriori;
139  }
140  return *this;
141  }
142 
143 
145  template < template < typename > class ALLOC >
146  ScoreBDeu< ALLOC >& ScoreBDeu< ALLOC >::operator=(ScoreBDeu< ALLOC >&& from) {
147  if (this != &from) {
148  Score< ALLOC >::operator=(std::move(from));
149  __internal_apriori = std::move(from.__internal_apriori);
150  }
151  return *this;
152  }
153 
154 
156  template < template < typename > class ALLOC >
157  std::string
158  ScoreBDeu< ALLOC >::isAprioriCompatible(const std::string& apriori_type,
159  double weight) {
160  // check that the apriori is compatible with the score
161  if (apriori_type == AprioriNoAprioriType::type) { return ""; }
162 
163  if (weight == 0.0) {
164  return "The apriori is currently compatible with the BDeu score but "
165  "if you change the weight, it will become incompatible.";
166  }
167 
168  // known incompatible aprioris
169  if ((apriori_type == AprioriDirichletType::type)
170  || (apriori_type == AprioriSmoothingType::type)) {
171  return "The BDeu score already contains a different 'implicit' apriori. "
172  "Therefore, the learning will probably be biased.";
173  }
174 
175  // apriori types unsupported by the type checker
176  std::stringstream msg;
177  msg << "The apriori '" << apriori_type
178  << "' is not yet supported by method isAprioriCompatible os Score BDeu";
179  return msg.str();
180  }
181 
182 
184  template < template < typename > class ALLOC >
185  INLINE std::string
186  ScoreBDeu< ALLOC >::isAprioriCompatible(const Apriori< ALLOC >& apriori) {
187  return isAprioriCompatible(apriori.getType(), apriori.weight());
188  }
189 
190 
192  template < template < typename > class ALLOC >
193  INLINE std::string ScoreBDeu< ALLOC >::isAprioriCompatible() const {
194  return isAprioriCompatible(*(this->_apriori));
195  }
196 
197 
199  template < template < typename > class ALLOC >
200  INLINE const Apriori< ALLOC >& ScoreBDeu< ALLOC >::internalApriori() const {
201  return __internal_apriori;
202  }
203 
204 
206  template < template < typename > class ALLOC >
207  INLINE void ScoreBDeu< ALLOC >::setEffectiveSampleSize(double ess) {
208  if (ess < 0) {
209  GUM_ERROR(OutOfBounds,
210  "The effective sample size of the BDeu's "
211  "internal apriori must be positive");
212  } else {
213  __internal_apriori.setEffectiveSampleSize(ess);
214  }
215  }
216 
217 
219  template < template < typename > class ALLOC >
220  double ScoreBDeu< ALLOC >::_score(const IdSet< ALLOC >& idset) {
221  // get the counts for all the nodes in the idset and add the apriori
222  std::vector< double, ALLOC< double > > N_ijk(
223  this->_counter.counts(idset, true));
224  const std::size_t all_size = N_ijk.size();
225 
226  double score = 0.0;
227  const double ess = __internal_apriori.weight();
228  const bool informative_external_apriori = this->_apriori->isInformative();
229 
230 
231  // here, we distinguish idsets with conditioning nodes from those
232  // without conditioning nodes
233  if (idset.hasConditioningSet()) {
234  // get the counts for the conditioning nodes
235  std::vector< double, ALLOC< double > > N_ij(
236  this->_marginalize(idset[0], N_ijk));
237  const std::size_t conditioning_size = N_ij.size();
238  const double ess_qi = ess / conditioning_size;
239  const double ess_riqi = ess / all_size;
240 
241  if (informative_external_apriori) {
242  // the score to compute is that of BD with aprioris
243  // N'_ijk + ESS / (r_i * q_i )
244  // (the + ESS / (r_i * q_i ) is here to take into account the
245  // internal apriori of BDeu)
246  std::vector< double, ALLOC< double > > N_prime_ijk(all_size, 0.0);
247  this->_apriori->addAllApriori(idset, N_prime_ijk);
248  std::vector< double, ALLOC< double > > N_prime_ij(N_ij.size(), 0.0);
249  this->_apriori->addConditioningApriori(idset, N_prime_ij);
250 
251  // the BDeu score can be computed as follows:
252  // sum_j=1^qi [ gammalog2 ( N'_ij + ESS / q_i ) -
253  // gammalog2 ( N_ij + N'_ij + ESS / q_i )
254  // + sum_k=1^ri { gammlog2 ( N_ijk + N'_ijk + ESS / (r_i * q_i ) )
255  // - gammalog2 ( N'_ijk + ESS / (r_i * q_i ) ) } ]
256  for (std::size_t j = std::size_t(0); j < conditioning_size; ++j) {
257  score += __gammalog2(N_prime_ij[j] + ess_qi)
258  - __gammalog2(N_ij[j] + N_prime_ij[j] + ess_qi);
259  }
260  for (std::size_t k = std::size_t(0); k < all_size; ++k) {
261  score += __gammalog2(N_ijk[k] + N_prime_ijk[k] + ess_riqi)
262  - __gammalog2(N_prime_ijk[k] + ess_riqi);
263  }
264  } else {
265  // the BDeu score can be computed as follows:
266  // qi * gammalog2 (ess / qi) - ri * qi * gammalog2 (ess / (ri * qi) )
267  // - sum_j=1^qi [ gammalog2 ( N_ij + ess / qi ) ]
268  // + sum_j=1^qi sum_k=1^ri log [ gammalog2 ( N_ijk + ess / (ri * qi) )
269  // ]
270  score = conditioning_size * __gammalog2(ess_qi)
271  - all_size * __gammalog2(ess_riqi);
272 
273  for (const auto n_ij : N_ij) {
274  score -= __gammalog2(n_ij + ess_qi);
275  }
276  for (const auto n_ijk : N_ijk) {
277  score += __gammalog2(n_ijk + ess_riqi);
278  }
279  }
280  } else {
281  // here, there are no conditioning nodes
282  const double ess_ri = ess / all_size;
283 
284  if (informative_external_apriori) {
285  // the score to compute is that of BD with aprioris
286  // N'_ijk + ESS / ( ri * qi )
287  // (the + ESS / ( ri * qi ) is here to take into account the
288  // internal apriori of K2)
289  std::vector< double, ALLOC< double > > N_prime_ijk(all_size, 0.0);
290  this->_apriori->addAllApriori(idset, N_prime_ijk);
291 
292  // the BDeu score can be computed as follows:
293  // gammalog2 ( N' + ess ) - gammalog2 ( N + N' + ess )
294  // + sum_k=1^ri { gammlog2 ( N_i + N'_i + ESS / ri)
295  // - gammalog2 ( N'_i + ESS / ri ) }
296  double N = 0.0;
297  double N_prime = 0.0;
298  for (std::size_t k = std::size_t(0); k < all_size; ++k) {
299  score += __gammalog2(N_ijk[k] + N_prime_ijk[k] + ess_ri)
300  - __gammalog2(N_prime_ijk[k] + ess_ri);
301  N += N_ijk[k];
302  N_prime += N_prime_ijk[k];
303  }
304  score += __gammalog2(N_prime + ess) - __gammalog2(N + N_prime + ess);
305  } else {
306  // the BDeu score can be computed as follows:
307  // gammalog2 ( ess ) - ri * gammalog2 ( ess / ri )
308  // - gammalog2 ( N + ess )
309  // + sum_k=1^ri log [ gammalog2 ( N_ijk + ess / ri ) ]
310 
311  score = __gammalog2(ess) - all_size * __gammalog2(ess_ri);
312  double N = 0;
313  for (const auto n_ijk : N_ijk) {
314  score += __gammalog2(n_ijk + ess_ri);
315  N += n_ijk;
316  }
317  score -= __gammalog2(N + ess);
318  }
319  }
320 
321  return score;
322  }
323 
324  } /* namespace learning */
325 
326 } /* namespace gum */
327 
328 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
virtual ScoreBDeu< ALLOC > * clone() const
virtual copy constructor
const DatabaseTable< ALLOC > & database() const
return the database used by the score
double score(const NodeId var)
returns the score of a single node
virtual const Apriori< ALLOC > & internalApriori() const final
returns the internal apriori of the score
static const std::string type
Definition: aprioriTypes.h:40
Score(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
static const std::string type
Definition: aprioriTypes.h:45
ALLOC< NodeId > allocator_type
type for the allocators passed in arguments of methods
Definition: scoreBDeu.h:59
the class for computing BDeu scores
STL namespace.
virtual ~ScoreBDeu()
destructor
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
std::vector< double, ALLOC< double > > _marginalize(const NodeId X_id, const std::vector< double, ALLOC< double > > &N_xyz) const
returns a counting vector where variables are marginalized from N_xyz
void setEffectiveSampleSize(double ess)
sets the effective sample size of the internal apriori
virtual double _score(const IdSet< ALLOC > &idset) final
returns the score for a given IdSet
Score< ALLOC > & operator=(const Score< ALLOC > &from)
copy operator
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
allocator_type getAllocator() const
returns the allocator used by the score
ScoreBDeu< ALLOC > & operator=(const ScoreBDeu< ALLOC > &from)
copy operator
Apriori< ALLOC > * _apriori
the expert knowledge a priori we add to the score
Definition: score.h:239
static const std::string type
Definition: aprioriTypes.h:35
ScoreBDeu(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
RecordCounter< ALLOC > _counter
the record counter used for the countings over discrete variables
Definition: score.h:242
Size NodeId
Type for node ids.
Definition: graphElements.h:97
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52