aGrUM  0.14.2
scoreK2_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
26 #ifndef DOXYGEN_SHOULD_SKIP_THIS
27 
29 # include <sstream>
30 
31 namespace gum {
32 
33  namespace learning {
34 
36  template < template < typename > class ALLOC >
38  const DBRowGeneratorParser< ALLOC >& parser,
39  const Apriori< ALLOC >& apriori,
40  const std::vector< std::pair< std::size_t, std::size_t >,
41  ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
42  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
43  nodeId2columns,
44  const typename ScoreK2< ALLOC >::allocator_type& alloc) :
45  Score< ALLOC >(parser, apriori, ranges, nodeId2columns, alloc),
46  __internal_apriori(parser.database(), nodeId2columns) {
47  GUM_CONSTRUCTOR(ScoreK2);
48  }
49 
50 
52  template < template < typename > class ALLOC >
54  const DBRowGeneratorParser< ALLOC >& parser,
55  const Apriori< ALLOC >& apriori,
56  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
57  nodeId2columns,
58  const typename ScoreK2< ALLOC >::allocator_type& alloc) :
59  Score< ALLOC >(parser, apriori, nodeId2columns, alloc),
60  __internal_apriori(parser.database(), nodeId2columns) {
61  GUM_CONSTRUCTOR(ScoreK2);
62  }
63 
64 
66  template < template < typename > class ALLOC >
68  const ScoreK2< ALLOC >& from,
69  const typename ScoreK2< ALLOC >::allocator_type& alloc) :
70  Score< ALLOC >(from, alloc),
71  __internal_apriori(from.__internal_apriori, alloc),
72  __gammalog2(from.__gammalog2) {
73  GUM_CONS_CPY(ScoreK2);
74  }
75 
76 
78  template < template < typename > class ALLOC >
79  INLINE ScoreK2< ALLOC >::ScoreK2(const ScoreK2< ALLOC >& from) :
80  ScoreK2< ALLOC >(from, from.getAllocator()) {}
81 
82 
84  template < template < typename > class ALLOC >
86  ScoreK2< ALLOC >&& from,
87  const typename ScoreK2< ALLOC >::allocator_type& alloc) :
88  Score< ALLOC >(std::move(from), alloc),
89  __internal_apriori(std::move(from.__internal_apriori), alloc),
90  __gammalog2(std::move(from.__gammalog2)) {
91  GUM_CONS_MOV(ScoreK2);
92  }
93 
94 
96  template < template < typename > class ALLOC >
97  INLINE ScoreK2< ALLOC >::ScoreK2(ScoreK2< ALLOC >&& from) :
98  ScoreK2< ALLOC >(std::move(from), from.getAllocator()) {}
99 
100 
102  template < template < typename > class ALLOC >
103  ScoreK2< ALLOC >* ScoreK2< ALLOC >::clone(
104  const typename ScoreK2< ALLOC >::allocator_type& alloc) const {
105  ALLOC< ScoreK2< ALLOC > > allocator(alloc);
106  ScoreK2< ALLOC >* new_score = allocator.allocate(1);
107  try {
108  allocator.construct(new_score, *this, alloc);
109  } catch (...) {
110  allocator.deallocate(new_score, 1);
111  throw;
112  }
113 
114  return new_score;
115  }
116 
117 
119  template < template < typename > class ALLOC >
120  ScoreK2< ALLOC >* ScoreK2< ALLOC >::clone() const {
121  return clone(this->getAllocator());
122  }
123 
124 
126  template < template < typename > class ALLOC >
128  GUM_DESTRUCTOR(ScoreK2);
129  }
130 
131 
133  template < template < typename > class ALLOC >
134  ScoreK2< ALLOC >& ScoreK2< ALLOC >::operator=(const ScoreK2< ALLOC >& from) {
135  if (this != &from) {
137  __internal_apriori = from.__internal_apriori;
138  }
139  return *this;
140  }
141 
142 
144  template < template < typename > class ALLOC >
145  ScoreK2< ALLOC >& ScoreK2< ALLOC >::operator=(ScoreK2< ALLOC >&& from) {
146  if (this != &from) {
147  Score< ALLOC >::operator=(std::move(from));
148  __internal_apriori = std::move(from.__internal_apriori);
149  }
150  return *this;
151  }
152 
153 
155  template < template < typename > class ALLOC >
156  std::string
157  ScoreK2< ALLOC >::isAprioriCompatible(const std::string& apriori_type,
158  double weight) {
159  // check that the apriori is compatible with the score
160  if (apriori_type == AprioriNoAprioriType::type) { return ""; }
161 
162  if (weight == 0.0) {
163  return "The apriori is currently compatible with the K2 score but "
164  "if you change the weight, it will become incompatible.";
165  }
166 
167  // known incompatible aprioris
168  if ((apriori_type == AprioriDirichletType::type)
169  || (apriori_type == AprioriSmoothingType::type)) {
170  return "The K2 score already contains a different 'implicit' apriori. "
171  "Therefore, the learning will probably be biased.";
172  }
173 
174  // apriori types unsupported by the type checker
175  std::stringstream msg;
176  msg << "The apriori '" << apriori_type
177  << "' is not yet supported by method isAprioriCompatible os Score K2";
178  return msg.str();
179  }
180 
181 
183  template < template < typename > class ALLOC >
184  INLINE std::string
185  ScoreK2< ALLOC >::isAprioriCompatible(const Apriori< ALLOC >& apriori) {
186  return isAprioriCompatible(apriori.getType(), apriori.weight());
187  }
188 
189 
191  template < template < typename > class ALLOC >
192  INLINE std::string ScoreK2< ALLOC >::isAprioriCompatible() const {
193  return isAprioriCompatible(*(this->_apriori));
194  }
195 
196 
198  template < template < typename > class ALLOC >
199  INLINE const Apriori< ALLOC >& ScoreK2< ALLOC >::internalApriori() const {
200  return __internal_apriori;
201  }
202 
203 
205  template < template < typename > class ALLOC >
206  double ScoreK2< ALLOC >::_score(const IdSet< ALLOC >& idset) {
207  // get the counts for all the nodes in the idset and add the apriori
208  std::vector< double, ALLOC< double > > N_ijk(
209  this->_counter.counts(idset, true));
210  const std::size_t all_size = N_ijk.size();
211  const bool informative_external_apriori = this->_apriori->isInformative();
212  double score = 0.0;
213 
214  // here, we distinguish idsets with conditioning nodes from those
215  // without conditioning nodes
216  if (idset.hasConditioningSet()) {
217  // get the counts for the conditioning nodes
218  std::vector< double, ALLOC< double > > N_ij(
219  this->_marginalize(idset[0], N_ijk));
220  const std::size_t conditioning_size = N_ij.size();
221  const double ri = double(all_size / conditioning_size);
222 
223  if (informative_external_apriori) {
224  // the score to compute is that of BD with aprioris N'_ijk + 1
225  // (the + 1 is here to take into account the internal apriori of K2)
226  std::vector< double, ALLOC< double > > N_prime_ijk(all_size, 0.0);
227  this->_apriori->addAllApriori(idset, N_prime_ijk);
228  std::vector< double, ALLOC< double > > N_prime_ij(N_ij.size(), 0.0);
229  this->_apriori->addConditioningApriori(idset, N_prime_ij);
230 
231  // the K2 score can be computed as follows:
232  // sum_j=1^qi [ gammalog2 ( N'_ij + r_i ) -
233  // gammalog2 ( N_ij + N'_ij + r_i )
234  // + sum_k=1^ri { gammlog2 ( N_ijk + N'_ijk + 1 ) -
235  // gammalog2 ( N'_ijk + 1 ) } ]
236  for (std::size_t j = std::size_t(0); j < conditioning_size; ++j) {
237  score += __gammalog2(N_prime_ij[j] + ri)
238  - __gammalog2(N_ij[j] + N_prime_ij[j] + ri);
239  }
240  for (std::size_t k = std::size_t(0); k < all_size; ++k) {
241  score += __gammalog2(N_ijk[k] + N_prime_ijk[k] + 1.0)
242  - __gammalog2(N_prime_ijk[k] + 1.0);
243  }
244  } else {
245  // the K2 score can be computed as follows:
246  // qi log {(ri - 1)!} + sum_j=1^qi [ - log {(N_ij+ri-1)!} +
247  // sum_k=1^ri log { N_ijk! } ]
248  score = conditioning_size * __gammalog2(ri);
249 
250  for (const auto n_ij : N_ij) {
251  score -= __gammalog2(n_ij + ri);
252  }
253  for (const auto n_ijk : N_ijk) {
254  score += __gammalog2(n_ijk + 1);
255  }
256  }
257  } else {
258  // here, there are no conditioning nodes
259  const double ri = double(all_size);
260 
261  if (informative_external_apriori) {
262  // the score to compute is that of BD with aprioris N'_ijk + 1
263  // (the + 1 is here to take into account the internal apriori of K2)
264 
265  // the K2 score can be computed as follows:
266  // gammalog2 ( N' + r_i ) - gammalog2 ( N + N' + r_i )
267  // + sum_k=1^ri { gammlog2 ( N_i + N'_i + 1 ) - gammalog2 ( N'_i + 1 )
268  // }
269  std::vector< double, ALLOC< double > > N_prime_ijk(all_size, 0.0);
270  this->_apriori->addAllApriori(idset, N_prime_ijk);
271 
272  // the K2 score can be computed as follows:
273  double N = 0.0;
274  double N_prime = 0.0;
275  for (std::size_t k = std::size_t(0); k < all_size; ++k) {
276  score += __gammalog2(N_ijk[k] + N_prime_ijk[k] + 1)
277  - __gammalog2(N_prime_ijk[k] + 1);
278  N += N_ijk[k];
279  N_prime += N_prime_ijk[k];
280  }
281  score += __gammalog2(N_prime + ri) - __gammalog2(N + N_prime + ri);
282  } else {
283  // the K2 score can be computed as follows:
284  // log {(ri - 1)!} - log {(N + ri-1)!} + sum_k=1^ri log { N_ijk! } ]
285  score = __gammalog2(ri);
286  double N = 0;
287  for (const auto n_ijk : N_ijk) {
288  score += __gammalog2(n_ijk + 1);
289  N += n_ijk;
290  }
291  score -= __gammalog2(N + ri);
292  }
293  }
294 
295  return score;
296  }
297 
298  } /* namespace learning */
299 
300 } /* namespace gum */
301 
302 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
const DatabaseTable< ALLOC > & database() const
return the database used by the score
double score(const NodeId var)
returns the score of a single node
static const std::string type
Definition: aprioriTypes.h:40
Score(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
static const std::string type
Definition: aprioriTypes.h:45
STL namespace.
the class for computing K2 scores (actually their log2 value)
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
std::vector< double, ALLOC< double > > _marginalize(const NodeId X_id, const std::vector< double, ALLOC< double > > &N_xyz) const
returns a counting vector where variables are marginalized from N_xyz
ScoreK2< ALLOC > & operator=(const ScoreK2< ALLOC > &from)
copy operator
virtual ScoreK2< ALLOC > * clone() const
virtual copy constructor
ScoreK2(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
Score< ALLOC > & operator=(const Score< ALLOC > &from)
copy operator
virtual const Apriori< ALLOC > & internalApriori() const final
returns the internal apriori of the score
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
virtual double _score(const IdSet< ALLOC > &idset) final
returns the score for a given IdSet
allocator_type getAllocator() const
returns the allocator used by the score
virtual ~ScoreK2()
destructor
Apriori< ALLOC > * _apriori
the expert knowledge a priori we add to the score
Definition: score.h:239
static const std::string type
Definition: aprioriTypes.h:35
RecordCounter< ALLOC > _counter
the record counter used for the countings over discrete variables
Definition: score.h:242
Size NodeId
Type for node ids.
Definition: graphElements.h:97
ALLOC< NodeId > allocator_type
type for the allocators passed in arguments of methods
Definition: scoreK2.h:61