aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
Miic.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief The 3off2 algorithm
25  *
26  * The ThreeOffTwo class implements the 3off2 algorithm as proposed by Affeldt and
27  * al. in https://doi.org/10.1186/s12859-015-0856-x.
28  * It starts by eliminating edges that correspond to independent variables to
29  * build the skeleton of the graph, and then directs the remaining edges to get an
30  * essential graph. Latent variables can be detected using bi-directed arcs.
31  *
32  * The variant MIIC is also implemented based on
33  * https://doi.org/10.1371/journal.pcbi.1005662. Only the orientation phase differs
34  * from 3off2, with a diffferent ranking method and different propagation rules.
35  *
36  * @author Quentin FALCAND and Pierre-Henri WUILLEMIN(@LIP6) and Maria Virginia
37  * RUIZ CUEVAS
38  */
39 #ifndef GUM_LEARNING_3_OFF_2_H
40 #define GUM_LEARNING_3_OFF_2_H
41 
42 #include <string>
43 #include <vector>
44 
45 #include <agrum/BN/BayesNet.h>
46 #include <agrum/config.h>
47 #include <agrum/tools/core/approximations/IApproximationSchemeConfiguration.h>
48 #include <agrum/tools/core/approximations/approximationScheme.h>
49 #include <agrum/tools/core/heap.h>
50 #include <agrum/tools/graphs/DAG.h>
51 #include <agrum/tools/graphs/mixedGraph.h>
52 #include <agrum/tools/stattests/correctedMutualInformation.h>
53 
54 namespace gum {
55 
56  namespace learning {
57 
59  public:
60  bool operator()(
61  const std::pair<
63  double >& e1,
64  const std::pair<
66  double >& e2) const;
67  };
68 
70  public:
71  bool operator()(
72  const std::pair< std::tuple< NodeId, NodeId, NodeId >*, double >& e1,
73  const std::pair< std::tuple< NodeId, NodeId, NodeId >*, double >& e2)
74  const;
75  };
76 
78  public:
79  bool operator()(
80  const std::
81  tuple< std::tuple< NodeId, NodeId, NodeId >*, double, double, double >&
82  e1,
83  const std::
84  tuple< std::tuple< NodeId, NodeId, NodeId >*, double, double, double >&
85  e2) const;
86  };
87  /**
88  * @class Miic
89  * @brief The miic learning algorithm
90  *
91  * The miic class implements the miic algorithm based on
92  * https://doi.org/10.1371/journal.pcbi.1005662.
93  * It starts by eliminating edges that correspond to independent variables to
94  * build the skeleton of the graph, and then directs the remaining edges to get
95  * an
96  * essential graph. Latent variables can be detected using bi-directed arcs.
97  *
98  * The variant 3off2 is also implemented as proposed by Affeldt and
99  * al. in https://doi.org/10.1186/s12859-015-0856-x. Only the orientation
100  * phase differs
101  * from miic, with a different ranking method and different propagation rules.
102  *
103  * @ingroup learning_group
104  */
105  class Miic: public ApproximationScheme {
106  public:
107  // ##########################################################################
108  /// @name Constructors / Destructors
109  // ##########################################################################
110  /// @{
111 
112  /// default constructor
113  Miic();
114 
115  /// default constructor with maxLog
116  Miic(int maxLog);
117 
118  /// copy constructor
119  Miic(const Miic& from);
120 
121  /// move constructor
122  Miic(Miic&& from);
123 
124  /// destructor
125  ~Miic();
126 
127  /// @}
128 
129  /// copy operator
130  Miic& operator=(const Miic& from);
131 
132  /// move operator
133  Miic& operator=(Miic&& from);
134 
135  // ##########################################################################
136  /// @name Accessors / Modifiers
137  // ##########################################################################
138  /// @{
139 
140 
141  /// learns the structure of an Essential Graph
142  /** @param I A mutual information instance that will do the computations
143  * and has loaded the database.
144  * @param graph the MixedGraph we start from for the learning
145  * */
147  MixedGraph graph);
148 
149  /// learns the structure of an Bayesian network, ie a DAG, by first learning
150  /// an Essential graph and then directing the remaining edges.
151  /** @param I A mutual information instance that will do the computations
152  * and has loaded the database
153  * @param graph the MixedGraph we start from for the learning
154  */
156 
157  /// learns the structure and the parameters of a BN
158  /** @param selector A selector class that computes the best changes that
159  * can be applied and that enables the user to get them very easily.
160  * Typically, the selector is a GraphChangesSelector4DiGraph<SCORE,
161  * STRUCT_CONSTRAINT, GRAPH_CHANGES_GENERATOR>.
162  * @param estimator A estimator.
163  * @param names The variables names.
164  * @param modal the domain sizes of the random variables observed in the
165  * database
166  * @param translator The cell translator to use.
167  * @param initial_dag the DAG we start from for our learning */
168  template < typename GUM_SCALAR = double,
169  typename GRAPH_CHANGES_SELECTOR,
170  typename PARAM_ESTIMATOR >
173  DAG initial_dag = DAG());
174 
175  /// get the list of arcs hiding latent variables
176  const std::vector< Arc > latentVariables() const;
177 
178  /// Sets the orientation phase to follow the one of the MIIC algorithm
179  void setMiicBehaviour();
180  /// Sets the orientation phase to follow the one of the 3off2 algorithm
181  void set3off2Behaviour();
182 
183  /// Set a ensemble of constraints for the orientation phase
184  void addConstraints(
185  HashTable< std::pair< NodeId, NodeId >, char > constraints);
186  /// @}
187 
188  protected:
189  // ##########################################################################
190  /// @name Main phases
191  // ##########################################################################
192  /// @{
193 
194  /// Initiation phase
195  /**
196  * We go over all edges and test if the variables are marginally independent.
197  * If they are, the edge is deleted. If not, the best contributor is found.
198  *
199  * @param I A mutual information instance that will do the computations
200  * and has loaded the database.
201  * @param graph the MixedGraph we start from for the learning
202  * @param sep_set the separation set for independent couples, here set to {}
203  * @param rank_ the heap of ranks of the algorithm
204  */
205  void initiation_(
206  CorrectedMutualInformation<>& I,
207  MixedGraph& graph,
209  Heap< std::pair<
211  double >,
213 
214  /// Iteration phase
215  /**
216  * As long as we find important nodes for edges, we go over them to see if
217  * we can assess the conditional independence of the variables.
218  *
219  * @param I A mutual information instance that will do the computations
220  * and has loaded the database.
221  * @param graph the MixedGraph returned from the previous phase
222  * @param sep_set the separation set for independent couples, built during
223  * the iterations of the phase
224  * @param rank_ the heap of ranks of the algorithm
225  */
226  void iteration_(
227  CorrectedMutualInformation<>& I,
228  MixedGraph& graph,
230  Heap< std::pair<
232  double >,
234 
235  /// Orientation phase from the 3off2 algorithm, returns a CPDAG
236  /** @param I A mutual information instance that will do the computations
237  * and has loaded the database.
238  * @param graph the MixedGraph returned from the previous phase
239  * @param sep_set the separation set for independent couples, built during
240  * the previous phase
241  */
242  void orientation_3off2_(CorrectedMutualInformation<>& I,
243  MixedGraph& graph,
244  const HashTable< std::pair< NodeId, NodeId >,
245  std::vector< NodeId > >& sep_set);
246 
247  /// Modified version of the orientation phase that tries to propagate
248  /// orientations from both orientations in case of a bidirected arc, not used
249  /** @param I A mutual information instance that will do the computations
250  * and has loaded the database.
251  * @param graph the MixedGraph returned from the previous phase
252  * @param sep_set the separation set for independent couples, built during
253  * the previous phase
254  */
255  void orientation_latents_(CorrectedMutualInformation<>& I,
256  MixedGraph& graph,
257  const HashTable< std::pair< NodeId, NodeId >,
258  std::vector< NodeId > >& sep_set);
259 
260  /// Orientation phase from the MIIC algorithm, returns a mixed graph that
261  /// may contain circles
262  /** @param I A mutual information instance that will do the computations
263  * and has loaded the database.
264  * @param graph the MixedGraph returned from the previous phase
265  * @param sep_set the separation set for independent couples, built during
266  * the previous phase
267  */
268  void orientation_miic_(CorrectedMutualInformation<>& I,
269  MixedGraph& graph,
270  const HashTable< std::pair< NodeId, NodeId >,
271  std::vector< NodeId > >& sep_set);
272  /// @}
273 
274  /// finds the best contributor node for a pair given a conditioning set
275  /**@param x first node
276  * @param y second node
277  * @param ui conditioning set
278  * @param I A mutual information instance that will do the computations
279  * and has loaded the database.
280  * @param graph containing the assessed nodes
281  * @param rank_ the heap of ranks of the algorithm
282  */
284  NodeId x,
285  NodeId y,
286  const std::vector< NodeId >& ui,
287  const MixedGraph& graph,
288  CorrectedMutualInformation<>& I,
289  Heap< std::pair<
291  double >,
293 
294  /// gets the list of unshielded triples in the graph in decreasing value of
295  ///|I'(x, y, z|{ui})|
296  /*@param graph graph in which to find the triples
297  *@param I mutual information object to compute the scores
298  *@param sep_set hashtable storing the separation sets for pairs of variables
299  */
300  std::vector< std::pair< std::tuple< NodeId, NodeId, NodeId >*, double > >
303  const HashTable< std::pair< NodeId, NodeId >,
304  std::vector< NodeId > >& sep_set);
305 
306  /// gets the list of unshielded triples in the graph in decreasing value of
307  ///|I'(x, y, z|{ui})|, prepares the orientation matrix for MIIC
308  /*@param graph graph in which to find the triples
309  *@param I mutual information object to compute the scores
310  *@param sep_set hashtable storing the separation sets for pairs of variables
311  * @param marks hashtable containing the orientation marks for edges
312  */
314  double,
315  double,
316  double > >
318  const MixedGraph& graph,
320  const HashTable< std::pair< NodeId, NodeId >, std::vector< NodeId > >&
321  sep_set,
322  HashTable< std::pair< NodeId, NodeId >, char >& marks);
323 
324  /// Gets the orientation probabilities like MIIC for the orientation phase
325  /*@param graph graph in which to find the triples
326  *@param proba_triples probabilities for the different triples to update
327  */
329  double,
330  double,
331  double > >
333  const MixedGraph& graph,
335  double,
336  double,
337  double > > proba_triples);
338 
339  /// Propagates the orientation from a node to its neighbours
340  /*@param dag graph in which to which to propagate arcs
341  *@param node node on which neighbours to propagate th orientation
342  *@todo : avoid exception driven programmation in circle detection
343  */
344  void propagatesHead_(MixedGraph& graph, NodeId node);
345 
346 
347  private:
348  /// Fixes the maximum log that we accept in exponential computations
349  int maxLog__ = 100;
350  /// an empty conditioning set
352  /// an empty vector of arcs
354 
355  /// size of the database
357  /// wether to use the miic algorithm or not
358  bool usemiic__{false};
359 
360  /// Storing the propabilities for each arc set in the graph
362 
363  /// Initial marks for the orientation phase, used to convey constraints
365 
366  /// checks for directed paths in a graph, consider double arcs like edges
367  /*@param graph MixedGraph in which to search the path
368  *@param n1 tail of the path
369  *@param n2 head of the path
370  *@param countArc bool to know if we consider arc as a directed path
371  */
372  const bool existsDirectedPath__(const MixedGraph& graph,
373  const NodeId n1,
374  const NodeId n2,
375  const bool countArc = true) const;
376  };
377 
378  } /* namespace learning */
379 
380 } /* namespace gum */
381 
382 /// always include templated methods
383 //#include <agrum/BN/learning/threeOffTwo_tpl.h>
384 
385 #endif /* GUM_LEARNING_3_OFF_2_H */
void propagatesHead_(MixedGraph &graph, NodeId node)
Propagates the orientation from a node to its neighbours.
Definition: Miic.cpp:1170
void set3off2Behaviour()
Sets the orientation phase to follow the one of the 3off2 algorithm.
Definition: Miic.cpp:1254
void findBestContributor_(NodeId x, NodeId y, const std::vector< NodeId > &ui, const MixedGraph &graph, CorrectedMutualInformation<> &I, Heap< std::pair< std::tuple< NodeId, NodeId, NodeId, std::vector< NodeId > > *, double >, GreaterPairOn2nd > &rank_)
finds the best contributor node for a pair given a conditioning set
Definition: Miic.cpp:901
MixedGraph learnMixedStructure(CorrectedMutualInformation<> &I, MixedGraph graph)
learns the structure of an Essential Graph
Definition: Miic.cpp:119
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
void orientation_miic_(CorrectedMutualInformation<> &I, MixedGraph &graph, const HashTable< std::pair< NodeId, NodeId >, std::vector< NodeId > > &sep_set)
Orientation phase from the MIIC algorithm, returns a mixed graph that may contain circles...
Definition: Miic.cpp:602
Miic & operator=(const Miic &from)
copy operator
Definition: Miic.cpp:62
void addConstraints(HashTable< std::pair< NodeId, NodeId >, char > constraints)
Set a ensemble of constraints for the orientation phase.
Definition: Miic.cpp:1256
std::vector< std::tuple< std::tuple< NodeId, NodeId, NodeId > *, double, double, double > > getUnshieldedTriplesMIIC_(const MixedGraph &graph, CorrectedMutualInformation<> &I, const HashTable< std::pair< NodeId, NodeId >, std::vector< NodeId > > &sep_set, HashTable< std::pair< NodeId, NodeId >, char > &marks)
gets the list of unshielded triples in the graph in decreasing value of |I&#39;(x, y, z|{ui})|...
Definition: Miic.cpp:1030
ArcProperty< double > arc_probas__
Storing the propabilities for each arc set in the graph.
Definition: Miic.h:361
std::vector< std::tuple< std::tuple< NodeId, NodeId, NodeId > *, double, double, double > > updateProbaTriples_(const MixedGraph &graph, std::vector< std::tuple< std::tuple< NodeId, NodeId, NodeId > *, double, double, double > > proba_triples)
Gets the orientation probabilities like MIIC for the orientation phase.
Definition: Miic.cpp:1082
Miic(int maxLog)
default constructor with maxLog
Definition: Miic.cpp:46
int maxLog__
Fixes the maximum log that we accept in exponential computations.
Definition: Miic.h:349
const std::vector< Arc > latentVariables() const
get the list of arcs hiding latent variables
Definition: Miic.cpp:1237
Miic & operator=(Miic &&from)
move operator
Definition: Miic.cpp:68
void orientation_3off2_(CorrectedMutualInformation<> &I, MixedGraph &graph, const HashTable< std::pair< NodeId, NodeId >, std::vector< NodeId > > &sep_set)
Orientation phase from the 3off2 algorithm, returns a CPDAG.
Definition: Miic.cpp:258
void setMiicBehaviour()
Sets the orientation phase to follow the one of the MIIC algorithm.
Definition: Miic.cpp:1253
std::vector< std::pair< std::tuple< NodeId, NodeId, NodeId > *, double > > getUnshieldedTriples_(const MixedGraph &graph, CorrectedMutualInformation<> &I, const HashTable< std::pair< NodeId, NodeId >, std::vector< NodeId > > &sep_set)
gets the list of unshielded triples in the graph in decreasing value of |I&#39;(x, y, z|{ui})| ...
Definition: Miic.cpp:988
const bool existsDirectedPath__(const MixedGraph &graph, const NodeId n1, const NodeId n2, const bool countArc=true) const
checks for directed paths in a graph, consider double arcs like edges
Definition: Miic.cpp:1262
bool operator()(const std::pair< std::tuple< NodeId, NodeId, NodeId > *, double > &e1, const std::pair< std::tuple< NodeId, NodeId, NodeId > *, double > &e2) const
Definition: Miic.cpp:84
HashTable< std::pair< NodeId, NodeId >, char > initial_marks__
Initial marks for the orientation phase, used to convey constraints.
Definition: Miic.h:364
void initiation_(CorrectedMutualInformation<> &I, MixedGraph &graph, HashTable< std::pair< NodeId, NodeId >, std::vector< NodeId > > &sep_set, Heap< std::pair< std::tuple< NodeId, NodeId, NodeId, std::vector< NodeId > > *, double >, GreaterPairOn2nd > &rank_)
Initiation phase.
Definition: Miic.cpp:161
bool usemiic__
wether to use the miic algorithm or not
Definition: Miic.h:358
Miic()
default constructor
Definition: Miic.cpp:43
std::vector< Arc > latent_couples__
an empty vector of arcs
Definition: Miic.h:353
Miic(const Miic &from)
copy constructor
Definition: Miic.cpp:49
bool operator()(const std::tuple< std::tuple< NodeId, NodeId, NodeId > *, double, double, double > &e1, const std::tuple< std::tuple< NodeId, NodeId, NodeId > *, double, double, double > &e2) const
Definition: Miic.cpp:91
const std::vector< NodeId > empty_set__
an empty conditioning set
Definition: Miic.h:351
Size N__
size of the database
Definition: Miic.h:356
The miic learning algorithm.
Definition: Miic.h:105
void orientation_latents_(CorrectedMutualInformation<> &I, MixedGraph &graph, const HashTable< std::pair< NodeId, NodeId >, std::vector< NodeId > > &sep_set)
Modified version of the orientation phase that tries to propagate orientations from both orientations...
Definition: Miic.cpp:450
bool operator()(const std::pair< std::tuple< NodeId, NodeId, NodeId, std::vector< NodeId > > *, double > &e1, const std::pair< std::tuple< NodeId, NodeId, NodeId, std::vector< NodeId > > *, double > &e2) const
Definition: Miic.cpp:74
Miic(Miic &&from)
move constructor
Definition: Miic.cpp:54
~Miic()
destructor
Definition: Miic.cpp:59
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
void iteration_(CorrectedMutualInformation<> &I, MixedGraph &graph, HashTable< std::pair< NodeId, NodeId >, std::vector< NodeId > > &sep_set, Heap< std::pair< std::tuple< NodeId, NodeId, NodeId, std::vector< NodeId > > *, double >, GreaterPairOn2nd > &rank_)
Iteration phase.
Definition: Miic.cpp:201
BayesNet< GUM_SCALAR > learnBN(GRAPH_CHANGES_SELECTOR &selector, PARAM_ESTIMATOR &estimator, DAG initial_dag=DAG())
learns the structure and the parameters of a BN
Definition: Miic.cpp:1245
DAG learnStructure(CorrectedMutualInformation<> &I, MixedGraph graph)
learns the structure of an Bayesian network, ie a DAG, by first learning an Essential graph and then ...
Definition: Miic.cpp:1126