aGrUM  0.14.2
indepTestG2_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
26 #ifndef DOXYGEN_SHOULD_SKIP_THIS
27 
28 namespace gum {
29 
30  namespace learning {
31 
33  template < template < typename > class ALLOC >
35  const DBRowGeneratorParser< ALLOC >& parser,
36  const Apriori< ALLOC >& apriori,
37  const std::vector< std::pair< std::size_t, std::size_t >,
38  ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
39  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
40  nodeId2columns,
41  const typename IndepTestG2< ALLOC >::allocator_type& alloc) :
42  IndependenceTest< ALLOC >(parser, apriori, ranges, nodeId2columns, alloc),
43  __domain_sizes(parser.database().domainSizes()), __chi2(__domain_sizes) {
44  GUM_CONSTRUCTOR(IndepTestG2);
45  }
46 
47 
49  template < template < typename > class ALLOC >
51  const DBRowGeneratorParser< ALLOC >& parser,
52  const Apriori< ALLOC >& apriori,
53  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
54  nodeId2columns,
55  const typename IndepTestG2< ALLOC >::allocator_type& alloc) :
56  IndependenceTest< ALLOC >(parser, apriori, nodeId2columns, alloc),
57  __domain_sizes(parser.database().domainSizes()), __chi2(__domain_sizes) {
58  GUM_CONSTRUCTOR(IndepTestG2);
59  }
60 
61 
63  template < template < typename > class ALLOC >
65  const IndepTestG2< ALLOC >& from,
66  const typename IndepTestG2< ALLOC >::allocator_type& alloc) :
67  IndependenceTest< ALLOC >(from, alloc),
68  __chi2(__domain_sizes) {
69  GUM_CONS_CPY(IndepTestG2);
70  }
71 
72 
74  template < template < typename > class ALLOC >
75  INLINE IndepTestG2< ALLOC >::IndepTestG2(const IndepTestG2< ALLOC >& from) :
76  IndepTestG2< ALLOC >(from, from.getAllocator()) {}
77 
78 
80  template < template < typename > class ALLOC >
82  IndepTestG2< ALLOC >&& from,
83  const typename IndepTestG2< ALLOC >::allocator_type& alloc) :
84  IndependenceTest< ALLOC >(std::move(from), alloc),
85  __domain_sizes(from.__domain_sizes), __chi2(__domain_sizes) {
86  GUM_CONS_MOV(IndepTestG2);
87  }
88 
89 
91  template < template < typename > class ALLOC >
92  INLINE IndepTestG2< ALLOC >::IndepTestG2(IndepTestG2< ALLOC >&& from) :
93  IndepTestG2< ALLOC >(std::move(from), from.getAllocator()) {}
94 
95 
97  template < template < typename > class ALLOC >
98  IndepTestG2< ALLOC >* IndepTestG2< ALLOC >::clone(
99  const typename IndepTestG2< ALLOC >::allocator_type& alloc) const {
100  ALLOC< IndepTestG2< ALLOC > > allocator(alloc);
101  IndepTestG2< ALLOC >* new_score = allocator.allocate(1);
102  try {
103  allocator.construct(new_score, *this, alloc);
104  } catch (...) {
105  allocator.deallocate(new_score, 1);
106  throw;
107  }
108 
109  return new_score;
110  }
111 
112 
114  template < template < typename > class ALLOC >
115  IndepTestG2< ALLOC >* IndepTestG2< ALLOC >::clone() const {
116  return clone(this->getAllocator());
117  }
118 
119 
121  template < template < typename > class ALLOC >
123  GUM_DESTRUCTOR(IndepTestG2);
124  }
125 
126 
128  template < template < typename > class ALLOC >
129  IndepTestG2< ALLOC >& IndepTestG2< ALLOC >::
130  operator=(const IndepTestG2< ALLOC >& from) {
131  if (this != &from) {
133  //__chi2 = from.__chi2;
134  }
135  return *this;
136  }
137 
138 
140  template < template < typename > class ALLOC >
141  IndepTestG2< ALLOC >& IndepTestG2< ALLOC >::
142  operator=(IndepTestG2< ALLOC >&& from) {
143  if (this != &from) {
144  IndependenceTest< ALLOC >::operator=(std::move(from));
145  //__chi2 = std::move(from.__chi2);
146  }
147  return *this;
148  }
149 
150 
152  template < template < typename > class ALLOC >
153  double IndepTestG2< ALLOC >::_score(const IdSet< ALLOC >& idset) {
154  // get the countings
155  std::vector< double, ALLOC< double > > N_xyz(
156  this->_counter.counts(idset, true));
157  const bool informative_external_apriori = this->_apriori->isInformative();
158  if (informative_external_apriori)
159  this->_apriori->addAllApriori(idset, N_xyz);
160  const std::size_t all_size = (N_xyz.size());
161 
162  // compute the domain sizes of X and Y
163  const auto& nodeId2cols = this->_counter.nodeId2Columns();
164  const auto& database = this->_counter.database();
165  Idx var_x, var_y;
166  if (nodeId2cols.empty()) {
167  var_x = idset[0];
168  var_y = idset[1];
169  } else {
170  var_x = nodeId2cols.second(idset[0]);
171  var_y = nodeId2cols.second(idset[1]);
172  }
173 
174  const std::size_t X_size = database.domainSize(var_x);
175  const std::size_t Y_size = database.domainSize(var_y);
176 
177 
178  // here, we distinguish idsets with conditioning nodes from those
179  // without conditioning nodes
180  if (idset.hasConditioningSet()) {
181  const std::size_t Z_size = all_size / (X_size * Y_size);
182 
183  // get the counts for the conditioning nodes
184  std::vector< double, ALLOC< double > > N_xz =
185  this->_marginalize(std::size_t(1), X_size, Y_size, Z_size, N_xyz);
186  std::vector< double, ALLOC< double > > N_yz =
187  this->_marginalize(std::size_t(0), X_size, Y_size, Z_size, N_xyz);
188  std::vector< double, ALLOC< double > > N_z =
189  this->_marginalize(std::size_t(2), X_size, Y_size, Z_size, N_xyz);
190 
191  // indicate to the chi2 distribution the set of conditioning nodes
192  std::vector< Idx > cond_nodes;
193  cond_nodes.reserve(idset.nbRHSIds());
194  {
195  const auto cond_idset = idset.conditionalIdSet().ids();
196  if (nodeId2cols.empty()) {
197  for (const auto node : cond_idset)
198  cond_nodes.push_back(node);
199  } else {
200  for (const auto node : cond_idset)
201  cond_nodes.push_back(nodeId2cols.second(node));
202  }
203  }
204  __chi2.setConditioningNodes(cond_nodes);
205 
206 
207  // now, perform :
208  // sum_X sum_Y sum_Z #XYZ * log ( ( #XYZ * #Z ) / ( #XZ * #YZ ) )
209  double score = 0.0;
210 
211  for (std::size_t z = std::size_t(0),
212  beg_xz = std::size_t(0),
213  beg_yz = std::size_t(0),
214  xyz = std::size_t(0);
215  z < Z_size;
216  ++z, beg_xz += X_size, beg_yz += Y_size) {
217  if (N_z[z]) {
218  for (std::size_t y = std::size_t(0), yz = beg_yz; y < Y_size;
219  ++yz, ++y) {
220  for (std::size_t x = std::size_t(0), xz = beg_xz; x < X_size;
221  ++xz, ++x, ++xyz) {
222  const double tmp1 = N_xyz[xyz] * N_z[z];
223  const double tmp2 = N_yz[yz] * N_xz[xz];
224  if ((tmp1 != 0.0) && (tmp2 != 0.0)) {
225  score += N_xyz[xyz] * std::log(tmp1 / tmp2);
226  }
227  }
228  }
229  }
230  }
231 
232 
233  // ok, here, score contains the value of the chi2 formula.
234  // To get a meaningful score, we shall compute the critical values
235  // for the Chi2 distribution and assign as the score of
236  // (score - alpha ) / alpha, where alpha is the critical value
237  const double alpha = __chi2.criticalValue(var_x, var_y);
238  score = (score - alpha) / alpha;
239  return score;
240  } else {
241  // here, there is no conditioning set
242 
243  // indicate to the chi2 distribution the set of conditioning nodes
244  __chi2.setConditioningNodes(__empty_set);
245 
246  // now, perform sum_X sum_Y #XY * log ( ( #XY * N ) / ( #X * #Y ) )
247 
248  // get the counts for all the targets and for the conditioning nodes
249  std::vector< double, ALLOC< double > > N_x = this->_marginalize(
250  std::size_t(1), X_size, Y_size, std::size_t(1), N_xyz);
251  std::vector< double, ALLOC< double > > N_y = this->_marginalize(
252  std::size_t(0), X_size, Y_size, std::size_t(1), N_xyz);
253 
254  // count N
255  double N = 0.0;
256  for (auto n_x : N_x)
257  N += n_x;
258 
259 
260  double score = 0;
261 
262  for (std::size_t y = std::size_t(0), xy = 0; y < Y_size; ++y) {
263  const double tmp_Ny = N_y[y];
264  for (std::size_t x = 0; x < X_size; ++x, ++xy) {
265  const double tmp = (tmp_Ny * N_x[x]);
266  if (tmp) { score += N_xyz[xy] * std::log((N_xyz[xy] * N) / tmp); }
267  }
268  }
269 
270 
271  // ok, here, score contains the value of the chi2 formula.
272  // To get a meaningful score, we shall compute the critical values
273  // for the Chi2 distribution and assign as the score of
274  // (score - alpha ) / alpha, where alpha is the critical value
275  const double alpha = __chi2.criticalValue(var_x, var_y);
276  score = (score - alpha) / alpha;
277  return score;
278  }
279  }
280 
281  } /* namespace learning */
282 
283 } /* namespace gum */
284 
285 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
ALLOC< NodeId > allocator_type
type for the allocators passed in arguments of methods
Definition: indepTestG2.h:48
std::vector< double, ALLOC< double > > _marginalize(const std::size_t node_2_marginalize, const std::size_t X_size, const std::size_t Y_size, const std::size_t Z_size, const std::vector< double, ALLOC< double > > &N_xyz) const
returns a counting vector where variables are marginalized from N_xyz
virtual IndepTestG2< ALLOC > * clone() const
virtual copy constructor
RecordCounter< ALLOC > _counter
the record counter used for the countings over discrete variables
IndepTestG2(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
STL namespace.
const DatabaseTable< ALLOC > & database() const
return the database used by the score
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
IndependenceTest(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
allocator_type getAllocator() const
returns the allocator used by the score
IndependenceTest< ALLOC > & operator=(const IndependenceTest< ALLOC > &from)
copy operator
double score(const NodeId var1, const NodeId var2)
returns the score of a pair of nodes
virtual ~IndepTestG2()
destructor
IndepTestG2< ALLOC > & operator=(const IndepTestG2< ALLOC > &from)
copy operator
virtual double _score(const IdSet< ALLOC > &idset) final
returns the score for a given IdSet
Size NodeId
Type for node ids.
Definition: graphElements.h:97
Apriori< ALLOC > * _apriori
the expert knowledge a priori we add to the contongency tables