aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
gspan.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Headers of gspan.
25  *
26  * @author Lionel TORTI and Pierre-Henri WUILLEMIN(@LIP6)
27  */
28 
29 #ifndef GUM_GSPAN_H
30 #define GUM_GSPAN_H
31 
32 #include <algorithm>
33 #include <list>
34 #include <ostream>
35 #include <string>
36 #include <vector>
37 
38 #include <agrum/tools/core/timer.h>
39 
40 #include <agrum/BN/inference/variableElimination.h>
41 
42 #include <agrum/PRM/PRM.h>
43 #include <agrum/PRM/gspan/DFSTree.h>
44 #include <agrum/PRM/gspan/interfaceGraph.h>
45 
46 namespace gum {
47  namespace prm {
48 
49  /**
50  * @class GSpan
51  * @headerfile gspan.h <agrum/PRM/gspan.h>
52  *
53  * @brief This class discovers pattern in a PRM<GUM_SCALAR>'s
54  *PRMSystem<GUM_SCALAR>
55  *to speed up structured
56  * inference.
57  *
58  * This class is not an inference algorithm for PRM<GUM_SCALAR>, however it
59  *can
60  *be used to
61  * speed up structured inference as it will discover repeated patterns
62  *including
63  * more than one PRMInstance<GUM_SCALAR>.
64  *
65  * This algorithm proceeds in three main steps represented by the private
66  * methods GSpan::sortNodesAndEdges__(), GSpan::subgraph_mining__() and
67  * GSpan::sortPatterns__().
68  */
69  template < typename GUM_SCALAR >
70  class GSpan {
71  public:
72  // ========================================================================
73  /// @name Constructors & destructor.
74  // ========================================================================
75  /// @{
76 
77  /**
78  * Default constructor.
79  * @param prm The PRM<GUM_SCALAR> used by this class.
80  * @param sys The PRMSystem<GUM_SCALAR> on which this class searches for
81  * patterns.
82  * @param strategy The search strategy used for pattern mining, the
83  * default strategy is gspan::FrequenceSearch.
84  */
85  GSpan(const PRM< GUM_SCALAR >& prm,
86  const PRMSystem< GUM_SCALAR >& sys,
87  gspan::SearchStrategy< GUM_SCALAR >* strategy = 0);
88 
89  /// Destructor.
90  ~GSpan();
91 
92  /// @}
93  // ========================================================================
94  /// @name Getters and setters.
95  // ========================================================================
96  /// @{
97 
98  /**
99  * Returns the maximal depth of the DFSTree used to discover new patterns.
100  *
101  * @return the maximal depth of the DFSTree used to discover new patterns.
102  */
103  Size getMaxDFSDepth() const;
104 
105  /**
106  * Defines the maximal depth of the DFSTree used by this class to discover
107  * new patterns.
108  *
109  * @param depth The new maximal DFSTree depth.
110  */
111  void setMaxDFSDepth(Size depth);
112 
113  /**
114  * Returns the DFSTree used to discover new patters.
115  * @returns the DFSTree used to discover new patters.
116  */
117  gspan::DFSTree< GUM_SCALAR >& tree();
118 
119  /**
120  * Returns the DFSTree used to discover new patters.
121  * @returns the DFSTree used to discover new patters.
122  */
123  const gspan::DFSTree< GUM_SCALAR >& tree() const;
124 
125  /**
126  * Returns the InterfaceGraph used by this.
127  * @returns the InterfaceGraph used by this.
128  */
129  gspan::InterfaceGraph< GUM_SCALAR >& interfaceGraph();
130 
131  /**
132  * Returns the InterfaceGraph used by this.
133  * @returns the InterfaceGraph used by this.
134  */
135  const gspan::InterfaceGraph< GUM_SCALAR >& interfaceGraph() const;
136 
137  /// @}
138  // ========================================================================
139  /// @name Pattern discovery methods.
140  // ========================================================================
141  /// @{
142 
143  /**
144  * @brief This will methods will discover repeated patterns in the
145  * PRMSystem<GUM_SCALAR> assigned to this class.
146  *
147  * The results are saved in a vector of Patterns which can be obtained
148  * by calling GSpan::patterns().
149  */
150  void discoverPatterns();
151 
152  /**
153  * Returns the Pattern mined by this class in a decreasing order of
154  * interest.
155  * @returns the Pattern mined by this class in a decreasing order of
156  * interest.
157  */
158  std::vector< gspan::Pattern* >& patterns();
159 
160  /**
161  * Returns the Pattern mined by this class in a decreasing order of
162  * interest.
163  * @returns the Pattern mined by this class in a decreasing order of
164  * interest.
165  */
166  const std::vector< gspan::Pattern* >& patterns() const;
167 
168  /// Code alias.
169  typedef Set< Sequence< PRMInstance< GUM_SCALAR >* >* > MatchedInstances;
170 
171  /**
172  * Returns a mapping between patterns and the sequence of instance in the
173  * interface graph matching them.
174  * @returns a mapping between patterns and the sequence of instance in the
175  * interface graph matching them.
176  */
177  MatchedInstances& matches(const gspan::Pattern& p);
178 
179  /**
180  * Returns a mapping between patterns and the sequence of instance in the
181  * interface graph matching them.
182  * @returns a mapping between patterns and the sequence of instance in the
183  * interface graph matching them.
184  */
185  const MatchedInstances& matches(const gspan::Pattern& p) const;
186 
187  // /**
188  // * Returns the cumulative sum of all the cliques size created after
189  // lifting
190  // * the patterns.
191  // * @returns the cumulative sum of all the cliques size created after
192  // lifting
193  // * the patterns.
194  // */
195  // double patterns_cost() const;
196 
197  // /**
198  // * Returns the cumulative sum of all the cliques size created after
199  // lifting
200  // * the inner attributes.
201  // * @returns the cumulative sum of all the cliques size created after
202  // lifting
203  // * the inner attributes.
204  // */
205  // double sve_cost() const;
206 
207  ///@}
208  private:
209  // ========================================================================
210  /// @name Private Members.
211  // ========================================================================
212  /// @{
213 
214  /// The interface graph used by this class.
215  gspan::InterfaceGraph< GUM_SCALAR >* graph__;
216 
217  /// The DFSTree used to discover new patters.
218  gspan::DFSTree< GUM_SCALAR > tree__;
219 
220  /// The max depth allowed for the DSF tree
221  Size depth_stop__;
222 
223  /// The vector of discovered patters, in decreasing order of interest.
224  std::vector< gspan::Pattern* > patterns__;
225 
226  /// The vector of nodes in graph__, in decreasing order of interest.
227  std::vector< gspan::LabelData* > nodes__;
228 
229  /// The vector of edges in graph__, in decreasing order of interest.
230  std::vector< gspan::LabelData* > edges__;
231 
232  /// Mapping between labels and their cost.
233  HashTable< gspan::LabelData*, Idx > cost__;
234 
235  /// Mapping between a pattern and the multiset of instances matched
236  /// to it.
237  HashTable< gspan::Pattern*, MatchedInstances* > matched_instances__;
238 
239  /// Contains all instance which belongs to a discovered and used pattern.
240  Set< PRMInstance< GUM_SCALAR >* > chosen__;
241 
242  /// @}
243  // ========================================================================
244  /// @name Private Methods
245  // ========================================================================
246  /// @{
247 
248  /// Sort the nodes and edges of graph__.
249  void sortNodesAndEdges__();
250 
251  /// Discovers new patterns by developing p.
252  /// @param graph The interface graph used in this discovery process.
253  /// @param p The pattern used as a base for discovery.
254  void subgraph_mining__(gspan::InterfaceGraph< GUM_SCALAR >& graph,
255  gspan::Pattern& p);
256 
257  /// Returns the cost with respect to an interface size and its frequency.
258  /// @param interface_size The size of all output nodes of a pattern.
259  /// @param frequency The frequency of the pattern in the current interface
260  /// graph.
261  /// @return the cost with respect to an interface size and its frequency.
262  Size cost_func__(Size interface_size, Size frequency);
263 
264  /// Sort the patterns and compute their respective costs.
265  void sortPatterns__();
266 
267  /// Returns true if e is an eligible root edge.
268  /// @param e An EdgeData<GUM_SCALAR>.
269  /// @return true if e is an eligible root edge.
270  bool isEdgeEligible__(typename gspan::EdgeData< GUM_SCALAR >* e);
271 
272  /// @}
273 
274  // ========================================================================
275  /// @name Private classes
276  // ========================================================================
277  /// @{
278 
279  /**
280  * @class LabelSort
281  * @headerfile gspan.h <agrum/PRM/gspan.h>
282  * Private class used to sort LabelData using STL sort algorithms.
283  */
284  struct LabelSort {
285  /**
286  * Default constructor.
287  * @param my_gspan A pointer over the GSpan using this class.
288  */
289  LabelSort(GSpan* my_gspan);
290 
291  /**
292  * Copy constructor.
293  * @param source The copied LabelSort.
294  */
295  LabelSort(const LabelSort& source);
296 
297  /**
298  * Destructor.
299  */
300  ~LabelSort();
301 
302  /**
303  * Returns true if i's cost is lesser than j's.
304  * @param i A LabelData.
305  * @param j A LabelData.
306  * @return true if i's cost is above than j's cost.
307  */
308  bool operator()(gspan::LabelData* i, gspan::LabelData* j);
309 
310  /// A pointer over an instance of the GSpan class using this class.
311  GSpan* gspan;
312  };
313 
314  /**
315  * @class PatternSort
316  * @headerfile gspan.h <agrum/PRM/gspan.h>
317  * Private class used to sort Pattern using STL sort algorithms.
318  */
319  struct PatternSort {
320  /**
321  * Default constructor.
322  * @param my_gspan A pointer over the GSpan using this class.
323  */
324  PatternSort(GSpan* my_gspan);
325 
326  /**
327  * Copy constructor.
328  * @param source The copied PatternSort.
329  */
330  PatternSort(const PatternSort& source);
331 
332  /**
333  * Destructor.
334  */
335  ~PatternSort();
336 
337  /**
338  * Returns true if i's cost is lesser than j's.
339  * @param i A Pattern.
340  * @param j A Pattern.
341  * @return true if i's cost is above j's cost.
342  */
343  bool operator()(gspan::Pattern* i, gspan::Pattern* j);
344 
345  /// A pointer over an instance of the GSpan class using this class.
346  GSpan* gspan;
347  };
348 
349  /// @}
350  };
351 
352 
353 #ifndef GUM_NO_EXTERN_TEMPLATE_CLASS
354  extern template class GSpan< double >;
355 #endif
356 
357 
358  } /* namespace prm */
359 } /* namespace gum */
360 
361 #include <agrum/PRM/inference/gspan_tpl.h>
362 
363 #endif /* GUM_GSPAN_H */