29 #ifndef DOXYGEN_SHOULD_SKIP_THIS 38 template <
template <
typename >
class ALLOC >
40 const DBRowGeneratorParser< ALLOC >& parser,
41 const Apriori< ALLOC >& apriori,
42 const std::vector< std::pair< std::size_t, std::size_t >,
43 ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
44 const Bijection<
NodeId, std::size_t, ALLOC< std::size_t > >&
47 IndependenceTest< ALLOC >(parser, apriori, ranges, nodeId2columns, alloc),
48 __domain_sizes(parser.database().domainSizes()), __chi2(__domain_sizes) {
54 template <
template <
typename >
class ALLOC >
56 const DBRowGeneratorParser< ALLOC >& parser,
57 const Apriori< ALLOC >& apriori,
58 const Bijection<
NodeId, std::size_t, ALLOC< std::size_t > >&
62 __domain_sizes(parser.
database().domainSizes()), __chi2(__domain_sizes) {
68 template <
template <
typename >
class ALLOC >
70 const IndepTestChi2< ALLOC >& from,
73 __domain_sizes(from.__domain_sizes), __chi2(__domain_sizes) {
79 template <
template <
typename >
class ALLOC >
86 template <
template <
typename >
class ALLOC >
88 IndepTestChi2< ALLOC >&& from,
91 __domain_sizes(from.__domain_sizes), __chi2(__domain_sizes) {
97 template <
template <
typename >
class ALLOC >
103 template <
template <
typename >
class ALLOC >
106 ALLOC< IndepTestChi2< ALLOC > > allocator(alloc);
107 IndepTestChi2< ALLOC >* new_score = allocator.allocate(1);
109 allocator.construct(new_score, *
this, alloc);
111 allocator.deallocate(new_score, 1);
120 template <
template <
typename >
class ALLOC >
127 template <
template <
typename >
class ALLOC >
134 template <
template <
typename >
class ALLOC >
136 operator=(
const IndepTestChi2< ALLOC >& from) {
146 template <
template <
typename >
class ALLOC >
148 operator=(IndepTestChi2< ALLOC >&& from) {
157 template <
template <
typename >
class ALLOC >
161 const std::vector<
NodeId, ALLOC< NodeId > >& rhs_ids) {
162 return _statistics(IdSet< ALLOC >(var1, var2, rhs_ids,
false));
166 template <
template <
typename >
class ALLOC >
167 std::pair< double, double >
170 std::vector< double, ALLOC< double > > N_xyz(
171 this->
_counter.counts(idset,
true));
172 const bool informative_external_apriori = this->
_apriori->isInformative();
173 if (informative_external_apriori)
174 this->
_apriori->addAllApriori(idset, N_xyz);
175 const std::size_t all_size = (N_xyz.size());
178 const auto& nodeId2cols = this->
_counter.nodeId2Columns();
181 if (nodeId2cols.empty()) {
185 var_x = nodeId2cols.second(idset[0]);
186 var_y = nodeId2cols.second(idset[1]);
189 const std::size_t X_size =
database.domainSize(var_x);
190 const std::size_t Y_size =
database.domainSize(var_y);
192 double cumulStat = 0;
196 if (idset.hasConditioningSet()) {
197 const std::size_t Z_size = all_size / (X_size * Y_size);
200 std::vector< double, ALLOC< double > > N_xz =
201 this->
_marginalize(std::size_t(1), X_size, Y_size, Z_size, N_xyz);
202 std::vector< double, ALLOC< double > > N_yz =
203 this->
_marginalize(std::size_t(0), X_size, Y_size, Z_size, N_xyz);
204 std::vector< double, ALLOC< double > > N_z =
205 this->
_marginalize(std::size_t(2), X_size, Y_size, Z_size, N_xyz);
208 std::vector< Idx > cond_nodes;
209 cond_nodes.reserve(idset.nbRHSIds());
211 const auto cond_idset = idset.conditionalIdSet().ids();
212 if (nodeId2cols.empty()) {
213 for (
const auto node : cond_idset)
214 cond_nodes.push_back(node);
216 for (
const auto node : cond_idset)
217 cond_nodes.push_back(nodeId2cols.second(node));
220 __chi2.setConditioningNodes(cond_nodes);
225 for (std::size_t z = std::size_t(0),
226 beg_xz = std::size_t(0),
227 beg_yz = std::size_t(0),
228 xyz = std::size_t(0);
230 ++z, beg_xz += X_size, beg_yz += Y_size) {
232 for (std::size_t y = std::size_t(0), yz = beg_yz; y < Y_size;
234 for (std::size_t x = std::size_t(0), xz = beg_xz; x < X_size;
236 const double tmp1 = (N_yz[yz] * N_xz[xz]) / N_z[z];
238 const double tmp2 = N_xyz[xyz] - tmp1;
239 cumulStat += (tmp2 * tmp2) / tmp1;
244 xyz += X_size * Y_size;
251 __chi2.setConditioningNodes(__empty_set);
256 std::vector< double, ALLOC< double > > N_x = this->
_marginalize(
257 std::size_t(1), X_size, Y_size, std::size_t(1), N_xyz);
258 std::vector< double, ALLOC< double > > N_y = this->
_marginalize(
259 std::size_t(0), X_size, Y_size, std::size_t(1), N_xyz);
263 for (
const auto n_x : N_x)
266 for (std::size_t y = std::size_t(0), xy = 0; y < Y_size; ++y) {
267 const double tmp_Ny = N_y[y];
268 for (std::size_t x = 0; x < X_size; ++x, ++xy) {
269 const double tmp1 = (tmp_Ny * N_x[x]) / N;
271 const double tmp2 = N_xyz[xy] - tmp1;
272 cumulStat += (tmp2 * tmp2) / tmp1;
278 Size df = __chi2.degreesOfFreedom(var_x, var_y);
279 double pValue = __chi2.probaChi2(cumulStat, df);
280 return std::pair< double, double >(cumulStat, pValue);
285 template <
template <
typename >
class ALLOC >
287 const auto& nodeId2cols = this->
_counter.nodeId2Columns();
289 if (nodeId2cols.empty()) {
293 var_x = nodeId2cols.second(idset[0]);
294 var_y = nodeId2cols.second(idset[1]);
298 double score = stat.first;
304 const double alpha = __chi2.criticalValue(var_x, var_y);
305 score = (score - alpha) / alpha;
std::pair< double, double > _statistics(const IdSet< ALLOC > &idset)
compute the pair <chi2 statistic,pvalue>
virtual IndepTestChi2< ALLOC > * clone() const
virtual copy constructor
ALLOC< NodeId > allocator_type
type for the allocators passed in arguments of methods
IndepTestChi2(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
std::vector< double, ALLOC< double > > _marginalize(const std::size_t node_2_marginalize, const std::size_t X_size, const std::size_t Y_size, const std::size_t Z_size, const std::vector< double, ALLOC< double > > &N_xyz) const
returns a counting vector where variables are marginalized from N_xyz
IndepTestChi2< ALLOC > & operator=(const IndepTestChi2< ALLOC > &from)
copy operator
RecordCounter< ALLOC > _counter
the record counter used for the countings over discrete variables
const DatabaseTable< ALLOC > & database() const
return the database used by the score
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
std::pair< double, double > statistics(NodeId var1, NodeId var2, const std::vector< NodeId, ALLOC< NodeId > > &rhs_ids={})
get the pair <chi2 statistic,pvalue> for a test var1 indep var2 given rhs_ids
IndependenceTest(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
allocator_type getAllocator() const
returns the allocator used by the score
IndependenceTest< ALLOC > & operator=(const IndependenceTest< ALLOC > &from)
copy operator
double score(const NodeId var1, const NodeId var2)
returns the score of a pair of nodes
virtual double _score(const IdSet< ALLOC > &idset) final
returns the score for a given IdSet
virtual ~IndepTestChi2()
destructor
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Size NodeId
Type for node ids.
Apriori< ALLOC > * _apriori
the expert knowledge a priori we add to the contongency tables