29 #ifndef DOXYGEN_SHOULD_SKIP_THIS 36 template <
template <
typename >
class ALLOC >
38 const DBRowGeneratorParser< ALLOC >& parser,
39 const Apriori< ALLOC >& apriori,
40 const std::vector< std::pair< std::size_t, std::size_t >,
41 ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
42 const Bijection<
NodeId, std::size_t, ALLOC< std::size_t > >&
45 IndependenceTest< ALLOC >(parser, apriori, ranges, nodeId2columns, alloc),
46 __domain_sizes(parser.database().domainSizes()), __chi2(__domain_sizes) {
52 template <
template <
typename >
class ALLOC >
54 const DBRowGeneratorParser< ALLOC >& parser,
55 const Apriori< ALLOC >& apriori,
56 const Bijection<
NodeId, std::size_t, ALLOC< std::size_t > >&
60 __domain_sizes(parser.
database().domainSizes()), __chi2(__domain_sizes) {
66 template <
template <
typename >
class ALLOC >
68 const IndepTestG2< ALLOC >& from,
71 __chi2(__domain_sizes) {
77 template <
template <
typename >
class ALLOC >
83 template <
template <
typename >
class ALLOC >
85 IndepTestG2< ALLOC >&& from,
88 __domain_sizes(from.__domain_sizes), __chi2(__domain_sizes) {
94 template <
template <
typename >
class ALLOC >
100 template <
template <
typename >
class ALLOC >
103 ALLOC< IndepTestG2< ALLOC > > allocator(alloc);
104 IndepTestG2< ALLOC >* new_score = allocator.allocate(1);
106 allocator.construct(new_score, *
this, alloc);
108 allocator.deallocate(new_score, 1);
117 template <
template <
typename >
class ALLOC >
124 template <
template <
typename >
class ALLOC >
131 template <
template <
typename >
class ALLOC >
133 operator=(
const IndepTestG2< ALLOC >& from) {
143 template <
template <
typename >
class ALLOC >
154 template <
template <
typename >
class ALLOC >
158 const std::vector<
NodeId, ALLOC< NodeId > >& rhs_ids) {
159 return _statistics(IdSet< ALLOC >(var1, var2, rhs_ids,
false));
163 template <
template <
typename >
class ALLOC >
164 std::pair< double, double >
167 std::vector< double, ALLOC< double > > N_xyz(
168 this->
_counter.counts(idset,
true));
169 const bool informative_external_apriori = this->
_apriori->isInformative();
170 if (informative_external_apriori)
171 this->
_apriori->addAllApriori(idset, N_xyz);
172 const std::size_t all_size = (N_xyz.size());
175 const auto& nodeId2cols = this->
_counter.nodeId2Columns();
178 if (nodeId2cols.empty()) {
182 var_x = nodeId2cols.second(idset[0]);
183 var_y = nodeId2cols.second(idset[1]);
186 const std::size_t X_size =
database.domainSize(var_x);
187 const std::size_t Y_size =
database.domainSize(var_y);
189 double cumulStat = 0.0;
193 if (idset.hasConditioningSet()) {
194 const std::size_t Z_size = all_size / (X_size * Y_size);
197 std::vector< double, ALLOC< double > > N_xz =
198 this->
_marginalize(std::size_t(1), X_size, Y_size, Z_size, N_xyz);
199 std::vector< double, ALLOC< double > > N_yz =
200 this->
_marginalize(std::size_t(0), X_size, Y_size, Z_size, N_xyz);
201 std::vector< double, ALLOC< double > > N_z =
202 this->
_marginalize(std::size_t(2), X_size, Y_size, Z_size, N_xyz);
205 std::vector< Idx > cond_nodes;
206 cond_nodes.reserve(idset.nbRHSIds());
208 const auto cond_idset = idset.conditionalIdSet().ids();
209 if (nodeId2cols.empty()) {
210 for (
const auto node : cond_idset)
211 cond_nodes.push_back(node);
213 for (
const auto node : cond_idset)
214 cond_nodes.push_back(nodeId2cols.second(node));
217 __chi2.setConditioningNodes(cond_nodes);
222 for (std::size_t z = std::size_t(0),
223 beg_xz = std::size_t(0),
224 beg_yz = std::size_t(0),
225 xyz = std::size_t(0);
227 ++z, beg_xz += X_size, beg_yz += Y_size) {
229 for (std::size_t y = std::size_t(0), yz = beg_yz; y < Y_size;
231 for (std::size_t x = std::size_t(0), xz = beg_xz; x < X_size;
233 const double tmp1 = N_xyz[xyz] * N_z[z];
234 const double tmp2 = N_yz[yz] * N_xz[xz];
235 if ((tmp1 != 0.0) && (tmp2 != 0.0)) {
236 cumulStat += N_xyz[xyz] * std::log(tmp1 / tmp2);
241 xyz += X_size * Y_size;
248 __chi2.setConditioningNodes(__empty_set);
253 std::vector< double, ALLOC< double > > N_x = this->
_marginalize(
254 std::size_t(1), X_size, Y_size, std::size_t(1), N_xyz);
255 std::vector< double, ALLOC< double > > N_y = this->
_marginalize(
256 std::size_t(0), X_size, Y_size, std::size_t(1), N_xyz);
263 for (std::size_t y = std::size_t(0), xy = 0; y < Y_size; ++y) {
264 const double tmp_Ny = N_y[y];
265 for (std::size_t x = 0; x < X_size; ++x, ++xy) {
266 const double tmp = (tmp_Ny * N_x[x]);
267 if ((tmp != 0.0) && (N_xyz[xy] != 0.0)) {
268 cumulStat += N_xyz[xy] * std::log((N_xyz[xy] * N) / tmp);
278 Size df = __chi2.degreesOfFreedom(var_x, var_y);
279 double pValue = __chi2.probaChi2(cumulStat, df);
280 return std::pair< double, double >(cumulStat, pValue);
284 template <
template <
typename >
class ALLOC >
287 const auto& nodeId2cols = this->
_counter.nodeId2Columns();
289 if (nodeId2cols.empty()) {
293 var_x = nodeId2cols.second(idset[0]);
294 var_y = nodeId2cols.second(idset[1]);
298 double score = stat.first;
304 const double alpha = __chi2.criticalValue(var_x, var_y);
305 score = (score - alpha) / alpha;
ALLOC< NodeId > allocator_type
type for the allocators passed in arguments of methods
std::vector< double, ALLOC< double > > _marginalize(const std::size_t node_2_marginalize, const std::size_t X_size, const std::size_t Y_size, const std::size_t Z_size, const std::vector< double, ALLOC< double > > &N_xyz) const
returns a counting vector where variables are marginalized from N_xyz
virtual IndepTestG2< ALLOC > * clone() const
virtual copy constructor
RecordCounter< ALLOC > _counter
the record counter used for the countings over discrete variables
IndepTestG2(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
const DatabaseTable< ALLOC > & database() const
return the database used by the score
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
IndependenceTest(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
allocator_type getAllocator() const
returns the allocator used by the score
std::pair< double, double > statistics(NodeId var1, NodeId var2, const std::vector< NodeId, ALLOC< NodeId > > &rhs_ids={})
get the pair <G2statistic,pvalue> for a test var1 indep var2 given rhs_ids
IndependenceTest< ALLOC > & operator=(const IndependenceTest< ALLOC > &from)
copy operator
double score(const NodeId var1, const NodeId var2)
returns the score of a pair of nodes
virtual ~IndepTestG2()
destructor
IndepTestG2< ALLOC > & operator=(const IndepTestG2< ALLOC > &from)
copy operator
virtual double _score(const IdSet< ALLOC > &idset) final
returns the score for a given IdSet
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Size NodeId
Type for node ids.
Apriori< ALLOC > * _apriori
the expert knowledge a priori we add to the contongency tables
std::pair< double, double > _statistics(const IdSet< ALLOC > &idset)
compute the pair <G2 statistic,pvalue>