aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
scoreK2_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief the class for computing K2 scores
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 
30 # include <agrum/BN/learning/scores_and_tests/scoreK2.h>
31 # include <sstream>
32 
33 namespace gum {
34 
35  namespace learning {
36 
37  /// default constructor
38  template < template < typename > class ALLOC >
39  INLINE ScoreK2< ALLOC >::ScoreK2(
40  const DBRowGeneratorParser< ALLOC >& parser,
41  const Apriori< ALLOC >& apriori,
42  const std::vector< std::pair< std::size_t, std::size_t >,
43  ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
44  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
45  nodeId2columns,
46  const typename ScoreK2< ALLOC >::allocator_type& alloc) :
47  Score< ALLOC >(parser, apriori, ranges, nodeId2columns, alloc),
48  internal_apriori__(parser.database(), nodeId2columns) {
49  GUM_CONSTRUCTOR(ScoreK2);
50  }
51 
52 
53  /// default constructor
54  template < template < typename > class ALLOC >
55  INLINE ScoreK2< ALLOC >::ScoreK2(
56  const DBRowGeneratorParser< ALLOC >& parser,
57  const Apriori< ALLOC >& apriori,
58  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
59  nodeId2columns,
60  const typename ScoreK2< ALLOC >::allocator_type& alloc) :
61  Score< ALLOC >(parser, apriori, nodeId2columns, alloc),
62  internal_apriori__(parser.database(), nodeId2columns) {
63  GUM_CONSTRUCTOR(ScoreK2);
64  }
65 
66 
67  /// copy constructor with a given allocator
68  template < template < typename > class ALLOC >
69  INLINE ScoreK2< ALLOC >::ScoreK2(
70  const ScoreK2< ALLOC >& from,
71  const typename ScoreK2< ALLOC >::allocator_type& alloc) :
72  Score< ALLOC >(from, alloc),
73  internal_apriori__(from.internal_apriori__, alloc),
74  gammalog2__(from.gammalog2__) {
75  GUM_CONS_CPY(ScoreK2);
76  }
77 
78 
79  /// copy constructor
80  template < template < typename > class ALLOC >
81  INLINE ScoreK2< ALLOC >::ScoreK2(const ScoreK2< ALLOC >& from) :
82  ScoreK2< ALLOC >(from, from.getAllocator()) {}
83 
84 
85  /// move constructor with a given allocator
86  template < template < typename > class ALLOC >
87  INLINE ScoreK2< ALLOC >::ScoreK2(
88  ScoreK2< ALLOC >&& from,
89  const typename ScoreK2< ALLOC >::allocator_type& alloc) :
90  Score< ALLOC >(std::move(from), alloc),
91  internal_apriori__(std::move(from.internal_apriori__), alloc),
92  gammalog2__(std::move(from.gammalog2__)) {
93  GUM_CONS_MOV(ScoreK2);
94  }
95 
96 
97  /// move constructor
98  template < template < typename > class ALLOC >
99  INLINE ScoreK2< ALLOC >::ScoreK2(ScoreK2< ALLOC >&& from) :
100  ScoreK2< ALLOC >(std::move(from), from.getAllocator()) {}
101 
102 
103  /// virtual copy constructor with a given allocator
104  template < template < typename > class ALLOC >
105  ScoreK2< ALLOC >* ScoreK2< ALLOC >::clone(
106  const typename ScoreK2< ALLOC >::allocator_type& alloc) const {
107  ALLOC< ScoreK2< ALLOC > > allocator(alloc);
108  ScoreK2< ALLOC >* new_score = allocator.allocate(1);
109  try {
110  allocator.construct(new_score, *this, alloc);
111  } catch (...) {
112  allocator.deallocate(new_score, 1);
113  throw;
114  }
115 
116  return new_score;
117  }
118 
119 
120  /// virtual copy constructor
121  template < template < typename > class ALLOC >
122  ScoreK2< ALLOC >* ScoreK2< ALLOC >::clone() const {
123  return clone(this->getAllocator());
124  }
125 
126 
127  /// destructor
128  template < template < typename > class ALLOC >
129  ScoreK2< ALLOC >::~ScoreK2() {
130  GUM_DESTRUCTOR(ScoreK2);
131  }
132 
133 
134  /// copy operator
135  template < template < typename > class ALLOC >
136  ScoreK2< ALLOC >& ScoreK2< ALLOC >::operator=(const ScoreK2< ALLOC >& from) {
137  if (this != &from) {
138  Score< ALLOC >::operator=(from);
139  internal_apriori__ = from.internal_apriori__;
140  }
141  return *this;
142  }
143 
144 
145  /// move operator
146  template < template < typename > class ALLOC >
147  ScoreK2< ALLOC >& ScoreK2< ALLOC >::operator=(ScoreK2< ALLOC >&& from) {
148  if (this != &from) {
149  Score< ALLOC >::operator=(std::move(from));
150  internal_apriori__ = std::move(from.internal_apriori__);
151  }
152  return *this;
153  }
154 
155 
156  /// indicates whether the apriori is compatible (meaningful) with the score
157  template < template < typename > class ALLOC >
158  std::string
159  ScoreK2< ALLOC >::isAprioriCompatible(const std::string& apriori_type,
160  double weight) {
161  // check that the apriori is compatible with the score
162  if (apriori_type == AprioriNoAprioriType::type) { return ""; }
163 
164  if (weight == 0.0) {
165  return "The apriori is currently compatible with the K2 score but "
166  "if you change the weight, it will become incompatible.";
167  }
168 
169  // known incompatible aprioris
170  if ((apriori_type == AprioriDirichletType::type)
171  || (apriori_type == AprioriSmoothingType::type)) {
172  return "The K2 score already contains a different 'implicit' apriori. "
173  "Therefore, the learning will probably be biased.";
174  }
175 
176  // apriori types unsupported by the type checker
177  std::stringstream msg;
178  msg << "The apriori '" << apriori_type
179  << "' is not yet supported by method isAprioriCompatible os Score K2";
180  return msg.str();
181  }
182 
183 
184  /// indicates whether the apriori is compatible (meaningful) with the score
185  template < template < typename > class ALLOC >
186  INLINE std::string
187  ScoreK2< ALLOC >::isAprioriCompatible(const Apriori< ALLOC >& apriori) {
188  return isAprioriCompatible(apriori.getType(), apriori.weight());
189  }
190 
191 
192  /// indicates whether the apriori is compatible (meaningful) with the score
193  template < template < typename > class ALLOC >
194  INLINE std::string ScoreK2< ALLOC >::isAprioriCompatible() const {
195  return isAprioriCompatible(*(this->apriori_));
196  }
197 
198 
199  /// returns the internal apriori of the score
200  template < template < typename > class ALLOC >
201  INLINE const Apriori< ALLOC >& ScoreK2< ALLOC >::internalApriori() const {
202  return internal_apriori__;
203  }
204 
205 
206  /// returns the score corresponding to a given nodeset
207  template < template < typename > class ALLOC >
208  double ScoreK2< ALLOC >::score_(const IdCondSet< ALLOC >& idset) {
209  // get the counts for all the nodes in the idset and add the apriori
210  std::vector< double, ALLOC< double > > N_ijk(
211  this->counter_.counts(idset, true));
212  const std::size_t all_size = N_ijk.size();
213  const bool informative_external_apriori = this->apriori_->isInformative();
214  double score = 0.0;
215 
216  // here, we distinguish idsets with conditioning nodes from those
217  // without conditioning nodes
218  if (idset.hasConditioningSet()) {
219  // get the counts for the conditioning nodes
220  std::vector< double, ALLOC< double > > N_ij(
221  this->marginalize_(idset[0], N_ijk));
222  const std::size_t conditioning_size = N_ij.size();
223  const double ri = double(all_size / conditioning_size);
224 
225  if (informative_external_apriori) {
226  // the score to compute is that of BD with aprioris N'_ijk + 1
227  // (the + 1 is here to take into account the internal apriori of K2)
228  std::vector< double, ALLOC< double > > N_prime_ijk(all_size, 0.0);
229  this->apriori_->addAllApriori(idset, N_prime_ijk);
230  std::vector< double, ALLOC< double > > N_prime_ij(N_ij.size(), 0.0);
231  this->apriori_->addConditioningApriori(idset, N_prime_ij);
232 
233  // the K2 score can be computed as follows:
234  // sum_j=1^qi [ gammalog2 ( N'_ij + r_i ) -
235  // gammalog2 ( N_ij + N'_ij + r_i )
236  // + sum_k=1^ri { gammlog2 ( N_ijk + N'_ijk + 1 ) -
237  // gammalog2 ( N'_ijk + 1 ) } ]
238  for (std::size_t j = std::size_t(0); j < conditioning_size; ++j) {
239  score += gammalog2__(N_prime_ij[j] + ri)
240  - gammalog2__(N_ij[j] + N_prime_ij[j] + ri);
241  }
242  for (std::size_t k = std::size_t(0); k < all_size; ++k) {
243  score += gammalog2__(N_ijk[k] + N_prime_ijk[k] + 1.0)
244  - gammalog2__(N_prime_ijk[k] + 1.0);
245  }
246  } else {
247  // the K2 score can be computed as follows:
248  // qi log {(ri - 1)!} + sum_j=1^qi [ - log {(N_ij+ri-1)!} +
249  // sum_k=1^ri log { N_ijk! } ]
250  score = conditioning_size * gammalog2__(ri);
251 
252  for (const auto n_ij: N_ij) {
253  score -= gammalog2__(n_ij + ri);
254  }
255  for (const auto n_ijk: N_ijk) {
256  score += gammalog2__(n_ijk + 1);
257  }
258  }
259  } else {
260  // here, there are no conditioning nodes
261  const double ri = double(all_size);
262 
263  if (informative_external_apriori) {
264  // the score to compute is that of BD with aprioris N'_ijk + 1
265  // (the + 1 is here to take into account the internal apriori of K2)
266 
267  // the K2 score can be computed as follows:
268  // gammalog2 ( N' + r_i ) - gammalog2 ( N + N' + r_i )
269  // + sum_k=1^ri { gammlog2 ( N_i + N'_i + 1 ) - gammalog2 ( N'_i + 1 )
270  // }
271  std::vector< double, ALLOC< double > > N_prime_ijk(all_size, 0.0);
272  this->apriori_->addAllApriori(idset, N_prime_ijk);
273 
274  // the K2 score can be computed as follows:
275  double N = 0.0;
276  double N_prime = 0.0;
277  for (std::size_t k = std::size_t(0); k < all_size; ++k) {
278  score += gammalog2__(N_ijk[k] + N_prime_ijk[k] + 1)
279  - gammalog2__(N_prime_ijk[k] + 1);
280  N += N_ijk[k];
281  N_prime += N_prime_ijk[k];
282  }
283  score += gammalog2__(N_prime + ri) - gammalog2__(N + N_prime + ri);
284  } else {
285  // the K2 score can be computed as follows:
286  // log {(ri - 1)!} - log {(N + ri-1)!} + sum_k=1^ri log { N_ijk! } ]
287  score = gammalog2__(ri);
288  double N = 0;
289  for (const auto n_ijk: N_ijk) {
290  score += gammalog2__(n_ijk + 1);
291  N += n_ijk;
292  }
293  score -= gammalog2__(N + ri);
294  }
295  }
296 
297  return score;
298  }
299 
300  } /* namespace learning */
301 
302 } /* namespace gum */
303 
304 #endif /* DOXYGEN_SHOULD_SKIP_THIS */