aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
scoreBIC_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief the class for computing BIC scores
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 
30 # include <agrum/BN/learning/scores_and_tests/scoreBIC.h>
31 # include <sstream>
32 
33 namespace gum {
34 
35  namespace learning {
36 
37  /// default constructor
38  template < template < typename > class ALLOC >
39  INLINE ScoreBIC< ALLOC >::ScoreBIC(
40  const DBRowGeneratorParser< ALLOC >& parser,
41  const Apriori< ALLOC >& apriori,
42  const std::vector< std::pair< std::size_t, std::size_t >,
43  ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
44  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
45  nodeId2columns,
46  const typename ScoreBIC< ALLOC >::allocator_type& alloc) :
47  Score< ALLOC >(parser, apriori, ranges, nodeId2columns, alloc),
48  internal_apriori__(parser.database(), nodeId2columns) {
49  GUM_CONSTRUCTOR(ScoreBIC);
50  }
51 
52 
53  /// default constructor
54  template < template < typename > class ALLOC >
55  INLINE ScoreBIC< ALLOC >::ScoreBIC(
56  const DBRowGeneratorParser< ALLOC >& parser,
57  const Apriori< ALLOC >& apriori,
58  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
59  nodeId2columns,
60  const typename ScoreBIC< ALLOC >::allocator_type& alloc) :
61  Score< ALLOC >(parser, apriori, nodeId2columns, alloc),
62  internal_apriori__(parser.database(), nodeId2columns) {
63  GUM_CONSTRUCTOR(ScoreBIC);
64  }
65 
66 
67  /// copy constructor with a given allocator
68  template < template < typename > class ALLOC >
69  INLINE ScoreBIC< ALLOC >::ScoreBIC(
70  const ScoreBIC< ALLOC >& from,
71  const typename ScoreBIC< ALLOC >::allocator_type& alloc) :
72  Score< ALLOC >(from, alloc),
73  internal_apriori__(from.internal_apriori__, alloc) {
74  GUM_CONS_CPY(ScoreBIC);
75  }
76 
77 
78  /// copy constructor
79  template < template < typename > class ALLOC >
80  INLINE ScoreBIC< ALLOC >::ScoreBIC(const ScoreBIC< ALLOC >& from) :
81  ScoreBIC< ALLOC >(from, from.getAllocator()) {}
82 
83 
84  /// move constructor with a given allocator
85  template < template < typename > class ALLOC >
86  INLINE ScoreBIC< ALLOC >::ScoreBIC(
87  ScoreBIC< ALLOC >&& from,
88  const typename ScoreBIC< ALLOC >::allocator_type& alloc) :
89  Score< ALLOC >(std::move(from), alloc),
90  internal_apriori__(std::move(from.internal_apriori__), alloc) {
91  GUM_CONS_MOV(ScoreBIC);
92  }
93 
94 
95  /// move constructor
96  template < template < typename > class ALLOC >
97  INLINE ScoreBIC< ALLOC >::ScoreBIC(ScoreBIC< ALLOC >&& from) :
98  ScoreBIC< ALLOC >(std::move(from), from.getAllocator()) {}
99 
100 
101  /// virtual copy constructor with a given allocator
102  template < template < typename > class ALLOC >
103  ScoreBIC< ALLOC >* ScoreBIC< ALLOC >::clone(
104  const typename ScoreBIC< ALLOC >::allocator_type& alloc) const {
105  ALLOC< ScoreBIC< ALLOC > > allocator(alloc);
106  ScoreBIC< ALLOC >* new_score = allocator.allocate(1);
107  try {
108  allocator.construct(new_score, *this, alloc);
109  } catch (...) {
110  allocator.deallocate(new_score, 1);
111  throw;
112  }
113 
114  return new_score;
115  }
116 
117 
118  /// virtual copy constructor
119  template < template < typename > class ALLOC >
120  ScoreBIC< ALLOC >* ScoreBIC< ALLOC >::clone() const {
121  return clone(this->getAllocator());
122  }
123 
124 
125  /// destructor
126  template < template < typename > class ALLOC >
127  ScoreBIC< ALLOC >::~ScoreBIC() {
128  GUM_DESTRUCTOR(ScoreBIC);
129  }
130 
131 
132  /// copy operator
133  template < template < typename > class ALLOC >
134  ScoreBIC< ALLOC >&
135  ScoreBIC< ALLOC >::operator=(const ScoreBIC< ALLOC >& from) {
136  if (this != &from) {
137  Score< ALLOC >::operator=(from);
138  internal_apriori__ = from.internal_apriori__;
139  }
140  return *this;
141  }
142 
143 
144  /// move operator
145  template < template < typename > class ALLOC >
146  ScoreBIC< ALLOC >& ScoreBIC< ALLOC >::operator=(ScoreBIC< ALLOC >&& from) {
147  if (this != &from) {
148  Score< ALLOC >::operator=(std::move(from));
149  internal_apriori__ = std::move(from.internal_apriori__);
150  }
151  return *this;
152  }
153 
154 
155  /// indicates whether the apriori is compatible (meaningful) with the score
156  template < template < typename > class ALLOC >
157  std::string
158  ScoreBIC< ALLOC >::isAprioriCompatible(const std::string& apriori_type,
159  double weight) {
160  // check that the apriori is compatible with the score
161  if ((apriori_type == AprioriDirichletType::type)
162  || (apriori_type == AprioriSmoothingType::type)
163  || (apriori_type == AprioriNoAprioriType::type)) {
164  return "";
165  }
166 
167  // apriori types unsupported by the type checker
168  std::stringstream msg;
169  msg << "The apriori '" << apriori_type
170  << "' is not yet supported by method isAprioriCompatible os Score BIC";
171  return msg.str();
172  }
173 
174 
175  /// indicates whether the apriori is compatible (meaningful) with the score
176  template < template < typename > class ALLOC >
177  INLINE std::string
178  ScoreBIC< ALLOC >::isAprioriCompatible(const Apriori< ALLOC >& apriori) {
179  return isAprioriCompatible(apriori.getType(), apriori.weight());
180  }
181 
182 
183  /// indicates whether the apriori is compatible (meaningful) with the score
184  template < template < typename > class ALLOC >
185  INLINE std::string ScoreBIC< ALLOC >::isAprioriCompatible() const {
186  return isAprioriCompatible(*(this->apriori_));
187  }
188 
189 
190  /// returns the internal apriori of the score
191  template < template < typename > class ALLOC >
192  INLINE const Apriori< ALLOC >& ScoreBIC< ALLOC >::internalApriori() const {
193  return internal_apriori__;
194  }
195 
196 
197  /// returns the score corresponding to a given nodeset
198  template < template < typename > class ALLOC >
199  double ScoreBIC< ALLOC >::score_(const IdCondSet< ALLOC >& idset) {
200  // get the counts for all the nodes in the idset and add the apriori
201  std::vector< double, ALLOC< double > > N_ijk(
202  this->counter_.counts(idset, true));
203  const bool informative_external_apriori = this->apriori_->isInformative();
204  if (informative_external_apriori)
205  this->apriori_->addAllApriori(idset, N_ijk);
206  const std::size_t all_size = N_ijk.size();
207 
208  // here, we distinguish idsets with conditioning nodes from those
209  // without conditioning nodes
210  if (idset.hasConditioningSet()) {
211  // get the counts for the conditioning nodes
212  std::vector< double, ALLOC< double > > N_ij(
213  this->marginalize_(idset[0], N_ijk));
214  const std::size_t conditioning_size = N_ij.size();
215 
216  // initialize the score: this should be the penalty of the BIC score,
217  // i.e., -(ri-1 ) * qi * .5 * log ( N + N' )
218  const std::size_t target_domsize = all_size / conditioning_size;
219  const double penalty
220  = conditioning_size * double(target_domsize - std::size_t(1));
221 
222  // compute the score: it remains to compute the log likelihood, i.e.,
223  // sum_k=1^r_i sum_j=1^q_i N_ijk log (N_ijk / N_ij), which is also
224  // equivalent to:
225  // sum_j=1^q_i sum_k=1^r_i N_ijk log N_ijk - sum_j=1^q_i N_ij log N_ij
226  double score = 0.0;
227  for (const auto n_ijk: N_ijk) {
228  if (n_ijk) { score += n_ijk * std::log(n_ijk); }
229  }
230  double N = 0;
231  for (const auto n_ij: N_ij) {
232  if (n_ij) {
233  score -= n_ij * std::log(n_ij);
234  N += n_ij;
235  }
236  }
237 
238  // finally, remove the penalty
239  score -= penalty * std::log(N) * 0.5;
240 
241  // divide by log(2), since the log likelihood uses log_2
242  score *= this->one_log2_;
243 
244  return score;
245  } else {
246  // here, there are no conditioning nodes
247 
248  // initialize the score: this should be the penalty of the BIC score,
249  // i.e., -(ri-1 )
250  const double penalty = double(all_size - std::size_t(1));
251 
252  // compute the score: it remains to compute the log likelihood, i.e.,
253  // sum_k=1^r_i N_ijk log (N_ijk / N), which is also
254  // equivalent to:
255  // sum_j=1^q_i sum_k=1^r_i N_ijk log N_ijk - N log N
256  double N = 0.0;
257  double score = 0.0;
258  for (const auto n_ijk: N_ijk) {
259  if (n_ijk) {
260  score += n_ijk * std::log(n_ijk);
261  N += n_ijk;
262  }
263  }
264  score -= N * std::log(N);
265 
266  // finally, remove the penalty
267  score -= penalty * std::log(N) * 0.5;
268 
269  // divide by log(2), since the log likelihood uses log_2
270  score *= this->one_log2_;
271 
272  return score;
273  }
274  }
275 
276 
277  /// returns the size of the database w.r.t. a given idset
278  template < template < typename > class ALLOC >
279  double ScoreBIC< ALLOC >::N(const IdCondSet< ALLOC >& idset) {
280  // get the counts for all the nodes in the idset and add the apriori
281  std::vector< double, ALLOC< double > > N_ijk(
282  this->counter_.counts(idset, true));
283  if (this->apriori_->isInformative())
284  this->apriori_->addAllApriori(idset, N_ijk);
285 
286  double N = 0;
287  for (const auto n_ijk: N_ijk) {
288  N += n_ijk;
289  }
290 
291  return N;
292  }
293 
294  } /* namespace learning */
295 
296 } /* namespace gum */
297 
298 #endif /* DOXYGEN_SHOULD_SKIP_THIS */