aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
indepTestChi2.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief the class for computing Chi2 scores
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 
28 #ifndef GUM_LEARNING_INDEP_TEST_CHI2_H
29 #define GUM_LEARNING_INDEP_TEST_CHI2_H
30 
31 #include <vector>
32 
33 #include <agrum/tools/core/math/chi2.h>
34 #include <agrum/tools/stattests/independenceTest.h>
35 #include <agrum/BN/learning/aprioris/aprioriNoApriori.h>
36 
37 namespace gum {
38 
39  namespace learning {
40 
41  /** @class IndepTestChi2
42  * @brief the class for computing Chi2 independence test scores
43  * @headerfile indepTestChi2.h <agrum/BN/learning/scores_and_tests/indepTestChi2.h>
44  * @ingroup learning_scores
45  */
46  template < template < typename > class ALLOC = std::allocator >
48  public:
49  /// type for the allocators passed in arguments of methods
51 
52  // ##########################################################################
53  /// @name Constructors / Destructors
54  // ##########################################################################
55  /// @{
56 
57  /// default constructor
58  /** @param parser the parser used to parse the database
59  * @param external_apriori An apriori that we add to the computation
60  * of the score (this should come from expert knowledge): this consists in
61  * adding numbers to countings in the contingency tables
62  * @param ranges a set of pairs {(X1,Y1),...,(Xn,Yn)} of database's rows
63  * indices. The countings are then performed only on the union of the
64  * rows [Xi,Yi), i in {1,...,n}. This is useful, e.g, when performing
65  * cross validation tasks, in which part of the database should be ignored.
66  * An empty set of ranges is equivalent to an interval [X,Y) ranging over
67  * the whole database.
68  * @param nodeId2Columns a mapping from the ids of the nodes in the
69  * graphical model to the corresponding column in the DatabaseTable
70  * parsed by the parser. This enables estimating from a database in
71  * which variable A corresponds to the 2nd column the parameters of a BN
72  * in which variable A has a NodeId of 5. An empty nodeId2Columns
73  * bijection means that the mapping is an identity, i.e., the value of a
74  * NodeId is equal to the index of the column in the DatabaseTable.
75  * @param alloc the allocator used to allocate the structures within the
76  * Score.
77  * @warning If nodeId2columns is not empty, then only the scores over the
78  * ids belonging to this bijection can be computed: applying method
79  * score() over other ids will raise exception NotFound. */
81  const DBRowGeneratorParser< ALLOC >& parser,
82  const Apriori< ALLOC >& external_apriori,
83  const std::vector< std::pair< std::size_t, std::size_t >,
84  ALLOC< std::pair< std::size_t, std::size_t > > >&
85  ranges,
86  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
88  = Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
90 
91 
92  /// default constructor
93  /** @param parser the parser used to parse the database
94  * @param apriori An apriori that we add to the computation of the score
95  * @param nodeId2Columns a mapping from the ids of the nodes in the
96  * graphical model to the corresponding column in the DatabaseTable
97  * parsed by the parser. This enables estimating from a database in
98  * which variable A corresponds to the 2nd column the parameters of a BN
99  * in which variable A has a NodeId of 5. An empty nodeId2Columns
100  * bijection means that the mapping is an identity, i.e., the value of a
101  * NodeId is equal to the index of the column in the DatabaseTable.
102  * @param alloc the allocator used to allocate the structures within the
103  * Score.
104  * @warning If nodeId2columns is not empty, then only the scores over the
105  * ids belonging to this bijection can be computed: applying method
106  * score() over other ids will raise exception NotFound. */
107  IndepTestChi2(const DBRowGeneratorParser< ALLOC >& parser,
108  const Apriori< ALLOC >& apriori,
109  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
111  = Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
112  const allocator_type& alloc = allocator_type());
113 
114  /// copy constructor
115  IndepTestChi2(const IndepTestChi2< ALLOC >& from);
116 
117  /// copy constructor with a given allocator
118  IndepTestChi2(const IndepTestChi2< ALLOC >& from,
119  const allocator_type& alloc);
120 
121  /// move constructor
122  IndepTestChi2(IndepTestChi2< ALLOC >&& from);
123 
124  /// move constructor with a given allocator
125  IndepTestChi2(IndepTestChi2< ALLOC >&& from, const allocator_type& alloc);
126 
127  /// virtual copy constructor
128  virtual IndepTestChi2< ALLOC >* clone() const;
129 
130  /// virtual copy constructor with a given allocator
131  virtual IndepTestChi2< ALLOC >* clone(const allocator_type& alloc) const;
132 
133  /// destructor
134  virtual ~IndepTestChi2();
135 
136  /// @}
137 
138 
139  // ##########################################################################
140  /// @name Operators
141  // ##########################################################################
142 
143  /// @{
144 
145  /// copy operator
147 
148  /// move operator
150 
151  /// @}
152 
153  // ##########################################################################
154  /// @name Statistics
155  // ##########################################################################
156 
157  /// @{
158 
159  /// get the pair <chi2 statistic,pvalue> for a test var1 indep var2 given
160  /// rhs_ids
161  std::pair< double, double >
162  statistics(NodeId var1,
163  NodeId var2,
164  const std::vector< NodeId, ALLOC< NodeId > >& rhs_ids = {});
165 
166  /// @}
167 
168 
169  protected:
170  /// returns the score for a given IdCondSet
171  /** @throws OperationNotAllowed is raised if the score does not support
172  * calling method score such an idset (due to too many/too few variables
173  * in the left hand side or the right hand side of the idset). */
174  virtual double score_(const IdCondSet< ALLOC >& idset) final;
175 
176  /// compute the pair <chi2 statistic,pvalue>
177  std::pair< double, double > statistics_(const IdCondSet< ALLOC >& idset);
178 
179 #ifndef DOXYGEN_SHOULD_SKIP_THIS
180 
181  private:
182  /// the domain sizes of the variables
183  std::vector< std::size_t, ALLOC< std::size_t > > domain_sizes__;
184 
185  /// a chi2 distribution for computing critical values
186  Chi2 chi2__;
187 
188  /// an empty conditioning set
189  const std::vector< Idx > empty_set__;
190 
191 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
192  };
193 
194  } /* namespace learning */
195 
196 } /* namespace gum */
197 
198 
199 #ifndef GUM_NO_EXTERN_TEMPLATE_CLASS
200 extern template class gum::learning::IndepTestChi2<>;
201 #endif
202 
203 
204 // always include the template implementation
205 #include <agrum/tools/stattests/indepTestChi2_tpl.h>
206 
207 #endif /* GUM_LEARNING_INDEP_TEST_CHI2_H */
virtual IndepTestChi2< ALLOC > * clone() const
virtual copy constructor
IndepTestChi2(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &apriori, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
IndepTestChi2(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
std::pair< double, double > statistics_(const IdCondSet< ALLOC > &idset)
compute the pair <chi2 statistic,pvalue>
IndepTestChi2< ALLOC > & operator=(const IndepTestChi2< ALLOC > &from)
copy operator
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
the class for computing Chi2 independence test scores
Definition: indepTestChi2.h:47
IndepTestChi2(const IndepTestChi2< ALLOC > &from)
copy constructor
std::pair< double, double > statistics(NodeId var1, NodeId var2, const std::vector< NodeId, ALLOC< NodeId > > &rhs_ids={})
get the pair <chi2 statistic,pvalue> for a test var1 indep var2 given rhs_ids
IndepTestChi2< ALLOC > & operator=(IndepTestChi2< ALLOC > &&from)
move operator
virtual IndepTestChi2< ALLOC > * clone(const allocator_type &alloc) const
virtual copy constructor with a given allocator
virtual double score_(const IdCondSet< ALLOC > &idset) final
returns the score for a given IdCondSet
IndepTestChi2(IndepTestChi2< ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator
virtual ~IndepTestChi2()
destructor
IndepTestChi2(const IndepTestChi2< ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
IndepTestChi2(IndepTestChi2< ALLOC > &&from)
move constructor
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)