aGrUM  0.21.0
a C++ library for (probabilistic) graphical models
scoreBIC_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief the class for computing BIC scores
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 
30 # include <agrum/BN/learning/scores_and_tests/scoreBIC.h>
31 # include <sstream>
32 
33 namespace gum {
34 
35  namespace learning {
36 
37  /// default constructor
38  template < template < typename > class ALLOC >
39  INLINE ScoreBIC< ALLOC >::ScoreBIC(
40  const DBRowGeneratorParser< ALLOC >& parser,
41  const Apriori< ALLOC >& apriori,
42  const std::vector< std::pair< std::size_t, std::size_t >,
43  ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
44  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >& nodeId2columns,
45  const typename ScoreBIC< ALLOC >::allocator_type& alloc) :
46  Score< ALLOC >(parser, apriori, ranges, nodeId2columns, alloc),
47  _internal_apriori_(parser.database(), nodeId2columns) {
48  GUM_CONSTRUCTOR(ScoreBIC);
49  }
50 
51 
52  /// default constructor
53  template < template < typename > class ALLOC >
54  INLINE ScoreBIC< ALLOC >::ScoreBIC(
55  const DBRowGeneratorParser< ALLOC >& parser,
56  const Apriori< ALLOC >& apriori,
57  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >& nodeId2columns,
58  const typename ScoreBIC< ALLOC >::allocator_type& alloc) :
59  Score< ALLOC >(parser, apriori, nodeId2columns, alloc),
60  _internal_apriori_(parser.database(), nodeId2columns) {
61  GUM_CONSTRUCTOR(ScoreBIC);
62  }
63 
64 
65  /// copy constructor with a given allocator
66  template < template < typename > class ALLOC >
67  INLINE ScoreBIC< ALLOC >::ScoreBIC(const ScoreBIC< ALLOC >& from,
68  const typename ScoreBIC< ALLOC >::allocator_type& alloc) :
69  Score< ALLOC >(from, alloc),
70  _internal_apriori_(from._internal_apriori_, alloc) {
71  GUM_CONS_CPY(ScoreBIC);
72  }
73 
74 
75  /// copy constructor
76  template < template < typename > class ALLOC >
77  INLINE ScoreBIC< ALLOC >::ScoreBIC(const ScoreBIC< ALLOC >& from) :
78  ScoreBIC< ALLOC >(from, from.getAllocator()) {}
79 
80 
81  /// move constructor with a given allocator
82  template < template < typename > class ALLOC >
83  INLINE ScoreBIC< ALLOC >::ScoreBIC(ScoreBIC< ALLOC >&& from,
84  const typename ScoreBIC< ALLOC >::allocator_type& alloc) :
85  Score< ALLOC >(std::move(from), alloc),
86  _internal_apriori_(std::move(from._internal_apriori_), alloc) {
87  GUM_CONS_MOV(ScoreBIC);
88  }
89 
90 
91  /// move constructor
92  template < template < typename > class ALLOC >
93  INLINE ScoreBIC< ALLOC >::ScoreBIC(ScoreBIC< ALLOC >&& from) :
94  ScoreBIC< ALLOC >(std::move(from), from.getAllocator()) {}
95 
96 
97  /// virtual copy constructor with a given allocator
98  template < template < typename > class ALLOC >
99  ScoreBIC< ALLOC >*
100  ScoreBIC< ALLOC >::clone(const typename ScoreBIC< ALLOC >::allocator_type& alloc) const {
101  ALLOC< ScoreBIC< ALLOC > > allocator(alloc);
102  ScoreBIC< ALLOC >* new_score = allocator.allocate(1);
103  try {
104  allocator.construct(new_score, *this, alloc);
105  } catch (...) {
106  allocator.deallocate(new_score, 1);
107  throw;
108  }
109 
110  return new_score;
111  }
112 
113 
114  /// virtual copy constructor
115  template < template < typename > class ALLOC >
116  ScoreBIC< ALLOC >* ScoreBIC< ALLOC >::clone() const {
117  return clone(this->getAllocator());
118  }
119 
120 
121  /// destructor
122  template < template < typename > class ALLOC >
123  ScoreBIC< ALLOC >::~ScoreBIC() {
124  GUM_DESTRUCTOR(ScoreBIC);
125  }
126 
127 
128  /// copy operator
129  template < template < typename > class ALLOC >
130  ScoreBIC< ALLOC >& ScoreBIC< ALLOC >::operator=(const ScoreBIC< ALLOC >& from) {
131  if (this != &from) {
132  Score< ALLOC >::operator=(from);
133  _internal_apriori_ = from._internal_apriori_;
134  }
135  return *this;
136  }
137 
138 
139  /// move operator
140  template < template < typename > class ALLOC >
141  ScoreBIC< ALLOC >& ScoreBIC< ALLOC >::operator=(ScoreBIC< ALLOC >&& from) {
142  if (this != &from) {
143  Score< ALLOC >::operator=(std::move(from));
144  _internal_apriori_ = std::move(from._internal_apriori_);
145  }
146  return *this;
147  }
148 
149 
150  /// indicates whether the apriori is compatible (meaningful) with the score
151  template < template < typename > class ALLOC >
152  std::string ScoreBIC< ALLOC >::isAprioriCompatible(const std::string& apriori_type,
153  double weight) {
154  // check that the apriori is compatible with the score
155  if ((apriori_type == AprioriDirichletType::type)
156  || (apriori_type == AprioriSmoothingType::type)
157  || (apriori_type == AprioriNoAprioriType::type)) {
158  return "";
159  }
160 
161  // apriori types unsupported by the type checker
162  std::stringstream msg;
163  msg << "The apriori '" << apriori_type << "' is not yet compatible with the score 'BIC'.";
164  return msg.str();
165  }
166 
167 
168  /// indicates whether the apriori is compatible (meaningful) with the score
169  template < template < typename > class ALLOC >
170  INLINE std::string ScoreBIC< ALLOC >::isAprioriCompatible(const Apriori< ALLOC >& apriori) {
171  return isAprioriCompatible(apriori.getType(), apriori.weight());
172  }
173 
174 
175  /// indicates whether the apriori is compatible (meaningful) with the score
176  template < template < typename > class ALLOC >
177  INLINE std::string ScoreBIC< ALLOC >::isAprioriCompatible() const {
178  return isAprioriCompatible(*(this->apriori_));
179  }
180 
181 
182  /// returns the internal apriori of the score
183  template < template < typename > class ALLOC >
184  INLINE const Apriori< ALLOC >& ScoreBIC< ALLOC >::internalApriori() const {
185  return _internal_apriori_;
186  }
187 
188 
189  /// returns the score corresponding to a given nodeset
190  template < template < typename > class ALLOC >
191  double ScoreBIC< ALLOC >::score_(const IdCondSet< ALLOC >& idset) {
192  // get the counts for all the nodes in the idset and add the apriori
193  std::vector< double, ALLOC< double > > N_ijk(this->counter_.counts(idset, true));
194  const bool informative_external_apriori = this->apriori_->isInformative();
195  if (informative_external_apriori) this->apriori_->addAllApriori(idset, N_ijk);
196  const std::size_t all_size = N_ijk.size();
197 
198  // here, we distinguish idsets with conditioning nodes from those
199  // without conditioning nodes
200  if (idset.hasConditioningSet()) {
201  // get the counts for the conditioning nodes
202  std::vector< double, ALLOC< double > > N_ij(this->marginalize_(idset[0], N_ijk));
203  const std::size_t conditioning_size = N_ij.size();
204 
205  // initialize the score: this should be the penalty of the BIC score,
206  // i.e., -(ri-1 ) * qi * .5 * log ( N + N' )
207  const std::size_t target_domsize = all_size / conditioning_size;
208  const double penalty = conditioning_size * double(target_domsize - std::size_t(1));
209 
210  // compute the score: it remains to compute the log likelihood, i.e.,
211  // sum_k=1^r_i sum_j=1^q_i N_ijk log (N_ijk / N_ij), which is also
212  // equivalent to:
213  // sum_j=1^q_i sum_k=1^r_i N_ijk log N_ijk - sum_j=1^q_i N_ij log N_ij
214  double score = 0.0;
215  for (const auto n_ijk: N_ijk) {
216  if (n_ijk) { score += n_ijk * std::log(n_ijk); }
217  }
218  double N = 0;
219  for (const auto n_ij: N_ij) {
220  if (n_ij) {
221  score -= n_ij * std::log(n_ij);
222  N += n_ij;
223  }
224  }
225 
226  // finally, remove the penalty
227  score -= penalty * std::log(N) * 0.5;
228 
229  // divide by log(2), since the log likelihood uses log_2
230  score *= this->one_log2_;
231 
232  return score;
233  } else {
234  // here, there are no conditioning nodes
235 
236  // initialize the score: this should be the penalty of the BIC score,
237  // i.e., -(ri-1 )
238  const double penalty = double(all_size - std::size_t(1));
239 
240  // compute the score: it remains to compute the log likelihood, i.e.,
241  // sum_k=1^r_i N_ijk log (N_ijk / N), which is also
242  // equivalent to:
243  // sum_j=1^q_i sum_k=1^r_i N_ijk log N_ijk - N log N
244  double N = 0.0;
245  double score = 0.0;
246  for (const auto n_ijk: N_ijk) {
247  if (n_ijk) {
248  score += n_ijk * std::log(n_ijk);
249  N += n_ijk;
250  }
251  }
252  score -= N * std::log(N);
253 
254  // finally, remove the penalty
255  score -= penalty * std::log(N) * 0.5;
256 
257  // divide by log(2), since the log likelihood uses log_2
258  score *= this->one_log2_;
259 
260  return score;
261  }
262  }
263 
264 
265  /// returns the size of the database w.r.t. a given idset
266  template < template < typename > class ALLOC >
267  double ScoreBIC< ALLOC >::N(const IdCondSet< ALLOC >& idset) {
268  // get the counts for all the nodes in the idset and add the apriori
269  std::vector< double, ALLOC< double > > N_ijk(this->counter_.counts(idset, true));
270  if (this->apriori_->isInformative()) this->apriori_->addAllApriori(idset, N_ijk);
271 
272  double N = 0;
273  for (const auto n_ijk: N_ijk) {
274  N += n_ijk;
275  }
276 
277  return N;
278  }
279 
280  } /* namespace learning */
281 
282 } /* namespace gum */
283 
284 #endif /* DOXYGEN_SHOULD_SKIP_THIS */