26 #ifndef DOXYGEN_SHOULD_SKIP_THIS 34 template <
template <
typename >
class ALLOC >
36 const DBRowGeneratorParser< ALLOC >& parser,
37 const Apriori< ALLOC >& apriori,
38 const std::vector< std::pair< std::size_t, std::size_t >,
39 ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
40 const Bijection<
NodeId, std::size_t, ALLOC< std::size_t > >&
43 IndependenceTest< ALLOC >(parser, apriori, ranges, nodeId2columns, alloc),
44 __domain_sizes(parser.database().domainSizes()), __chi2(__domain_sizes) {
50 template <
template <
typename >
class ALLOC >
52 const DBRowGeneratorParser< ALLOC >& parser,
53 const Apriori< ALLOC >& apriori,
54 const Bijection<
NodeId, std::size_t, ALLOC< std::size_t > >&
58 __domain_sizes(parser.
database().domainSizes()), __chi2(__domain_sizes) {
64 template <
template <
typename >
class ALLOC >
66 const IndepTestChi2< ALLOC >& from,
69 __domain_sizes(from.__domain_sizes), __chi2(__domain_sizes) {
75 template <
template <
typename >
class ALLOC >
82 template <
template <
typename >
class ALLOC >
84 IndepTestChi2< ALLOC >&& from,
87 __domain_sizes(from.__domain_sizes), __chi2(__domain_sizes) {
93 template <
template <
typename >
class ALLOC >
99 template <
template <
typename >
class ALLOC >
102 ALLOC< IndepTestChi2< ALLOC > > allocator(alloc);
103 IndepTestChi2< ALLOC >* new_score = allocator.allocate(1);
105 allocator.construct(new_score, *
this, alloc);
107 allocator.deallocate(new_score, 1);
116 template <
template <
typename >
class ALLOC >
123 template <
template <
typename >
class ALLOC >
130 template <
template <
typename >
class ALLOC >
132 operator=(
const IndepTestChi2< ALLOC >& from) {
142 template <
template <
typename >
class ALLOC >
144 operator=(IndepTestChi2< ALLOC >&& from) {
153 template <
template <
typename >
class ALLOC >
157 const std::vector<
NodeId, ALLOC< NodeId > >& rhs_ids) {
158 return _statistics(IdSet< ALLOC >(var1, var2, rhs_ids,
false));
162 template <
template <
typename >
class ALLOC >
163 std::pair< double, double >
166 std::vector< double, ALLOC< double > > N_xyz(
167 this->
_counter.counts(idset,
true));
168 const bool informative_external_apriori = this->
_apriori->isInformative();
169 if (informative_external_apriori)
170 this->
_apriori->addAllApriori(idset, N_xyz);
171 const std::size_t all_size = (N_xyz.size());
174 const auto& nodeId2cols = this->
_counter.nodeId2Columns();
177 if (nodeId2cols.empty()) {
181 var_x = nodeId2cols.second(idset[0]);
182 var_y = nodeId2cols.second(idset[1]);
185 const std::size_t X_size =
database.domainSize(var_x);
186 const std::size_t Y_size =
database.domainSize(var_y);
188 double cumulStat = 0;
192 if (idset.hasConditioningSet()) {
193 const std::size_t Z_size = all_size / (X_size * Y_size);
196 std::vector< double, ALLOC< double > > N_xz =
197 this->
_marginalize(std::size_t(1), X_size, Y_size, Z_size, N_xyz);
198 std::vector< double, ALLOC< double > > N_yz =
199 this->
_marginalize(std::size_t(0), X_size, Y_size, Z_size, N_xyz);
200 std::vector< double, ALLOC< double > > N_z =
201 this->
_marginalize(std::size_t(2), X_size, Y_size, Z_size, N_xyz);
204 std::vector< Idx > cond_nodes;
205 cond_nodes.reserve(idset.nbRHSIds());
207 const auto cond_idset = idset.conditionalIdSet().ids();
208 if (nodeId2cols.empty()) {
209 for (
const auto node : cond_idset)
210 cond_nodes.push_back(node);
212 for (
const auto node : cond_idset)
213 cond_nodes.push_back(nodeId2cols.second(node));
216 __chi2.setConditioningNodes(cond_nodes);
222 for (std::size_t z = std::size_t(0),
223 beg_xz = std::size_t(0),
224 beg_yz = std::size_t(0),
225 xyz = std::size_t(0);
227 ++z, beg_xz += X_size, beg_yz += Y_size) {
229 for (std::size_t y = std::size_t(0), yz = beg_yz; y < Y_size;
231 for (std::size_t x = std::size_t(0), xz = beg_xz; x < X_size;
233 const double tmp1 = (N_yz[yz] * N_xz[xz]) / N_z[z];
235 const double tmp2 = N_xyz[xyz] - tmp1;
236 cumulStat += (tmp2 * tmp2) / tmp1;
246 __chi2.setConditioningNodes(__empty_set);
251 std::vector< double, ALLOC< double > > N_x = this->
_marginalize(
252 std::size_t(1), X_size, Y_size, std::size_t(1), N_xyz);
253 std::vector< double, ALLOC< double > > N_y = this->
_marginalize(
254 std::size_t(0), X_size, Y_size, std::size_t(1), N_xyz);
258 for (
const auto n_x : N_x)
261 for (std::size_t y = std::size_t(0), xy = 0; y < Y_size; ++y) {
262 const double tmp_Ny = N_y[y];
263 for (std::size_t x = 0; x < X_size; ++x, ++xy) {
264 const double tmp1 = (tmp_Ny * N_x[x]) / N;
266 const double tmp2 = N_xyz[xy] - tmp1;
267 cumulStat += (tmp2 * tmp2) / tmp1;
273 Size df = __chi2.degreesOfFreedom(var_x, var_y);
274 double pValue = __chi2.probaChi2(cumulStat, df);
275 return std::pair< double, double >(cumulStat, pValue);
280 template <
template <
typename >
class ALLOC >
282 const auto& nodeId2cols = this->
_counter.nodeId2Columns();
284 if (nodeId2cols.empty()) {
288 var_x = nodeId2cols.second(idset[0]);
289 var_y = nodeId2cols.second(idset[1]);
293 double score = stat.first;
299 const double alpha = __chi2.criticalValue(var_x, var_y);
300 score = (score - alpha) / alpha;
std::pair< double, double > _statistics(const IdSet< ALLOC > &idset)
compute the pair <chi2 statistic,pvalue>
virtual IndepTestChi2< ALLOC > * clone() const
virtual copy constructor
ALLOC< NodeId > allocator_type
type for the allocators passed in arguments of methods
IndepTestChi2(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
std::vector< double, ALLOC< double > > _marginalize(const std::size_t node_2_marginalize, const std::size_t X_size, const std::size_t Y_size, const std::size_t Z_size, const std::vector< double, ALLOC< double > > &N_xyz) const
returns a counting vector where variables are marginalized from N_xyz
IndepTestChi2< ALLOC > & operator=(const IndepTestChi2< ALLOC > &from)
copy operator
RecordCounter< ALLOC > _counter
the record counter used for the countings over discrete variables
const DatabaseTable< ALLOC > & database() const
return the database used by the score
A class used by learning caches to represent uniquely sets of variables.
gum is the global namespace for all aGrUM entities
std::pair< double, double > statistics(NodeId var1, NodeId var2, const std::vector< NodeId, ALLOC< NodeId > > &rhs_ids={})
get the pair <chi2 statistic,pvalue> for a test var1 indep var2 given rhs_ids
IndependenceTest(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
allocator_type getAllocator() const
returns the allocator used by the score
IndependenceTest< ALLOC > & operator=(const IndependenceTest< ALLOC > &from)
copy operator
double score(const NodeId var1, const NodeId var2)
returns the score of a pair of nodes
virtual double _score(const IdSet< ALLOC > &idset) final
returns the score for a given IdSet
virtual ~IndepTestChi2()
destructor
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Size NodeId
Type for node ids.
Apriori< ALLOC > * _apriori
the expert knowledge a priori we add to the contongency tables