aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
gspan.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Headers of gspan.
25  *
26  * @author Lionel TORTI and Pierre-Henri WUILLEMIN(@LIP6)
27  */
28 
29 #ifndef GUM_GSPAN_H
30 #define GUM_GSPAN_H
31 
32 #include <algorithm>
33 #include <list>
34 #include <ostream>
35 #include <string>
36 #include <vector>
37 
38 #include <agrum/tools/core/timer.h>
39 
40 #include <agrum/BN/inference/variableElimination.h>
41 
42 #include <agrum/PRM/PRM.h>
43 #include <agrum/PRM/gspan/DFSTree.h>
44 #include <agrum/PRM/gspan/interfaceGraph.h>
45 
46 namespace gum {
47  namespace prm {
48 
49  /**
50  * @class GSpan
51  * @headerfile gspan.h <agrum/PRM/gspan.h>
52  *
53  * @brief This class discovers pattern in a PRM<GUM_SCALAR>'s
54  *PRMSystem<GUM_SCALAR>
55  *to speed up structured
56  * inference.
57  *
58  * This class is not an inference algorithm for PRM<GUM_SCALAR>, however it
59  *can
60  *be used to
61  * speed up structured inference as it will discover repeated patterns
62  *including
63  * more than one PRMInstance<GUM_SCALAR>.
64  *
65  * This algorithm proceeds in three main steps represented by the private
66  * methods GSpan:: _sortNodesAndEdges_(), GSpan:: _subgraph_mining_() and
67  * GSpan:: _sortPatterns_().
68  */
69  template < typename GUM_SCALAR >
70  class GSpan {
71  public:
72  // ========================================================================
73  /// @name Constructors & destructor.
74  // ========================================================================
75  /// @{
76 
77  /**
78  * Default constructor.
79  * @param prm The PRM<GUM_SCALAR> used by this class.
80  * @param sys The PRMSystem<GUM_SCALAR> on which this class searches for
81  * patterns.
82  * @param strategy The search strategy used for pattern mining, the
83  * default strategy is gspan::FrequenceSearch.
84  */
85  GSpan(const PRM< GUM_SCALAR >& prm,
86  const PRMSystem< GUM_SCALAR >& sys,
87  gspan::SearchStrategy< GUM_SCALAR >* strategy = 0);
88 
89  /// Destructor.
90  ~GSpan();
91 
92  /// @}
93  // ========================================================================
94  /// @name Getters and setters.
95  // ========================================================================
96  /// @{
97 
98  /**
99  * Returns the maximal depth of the DFSTree used to discover new patterns.
100  *
101  * @return the maximal depth of the DFSTree used to discover new patterns.
102  */
103  Size getMaxDFSDepth() const;
104 
105  /**
106  * Defines the maximal depth of the DFSTree used by this class to discover
107  * new patterns.
108  *
109  * @param depth The new maximal DFSTree depth.
110  */
111  void setMaxDFSDepth(Size depth);
112 
113  /**
114  * Returns the DFSTree used to discover new patters.
115  * @returns the DFSTree used to discover new patters.
116  */
117  gspan::DFSTree< GUM_SCALAR >& tree();
118 
119  /**
120  * Returns the DFSTree used to discover new patters.
121  * @returns the DFSTree used to discover new patters.
122  */
123  const gspan::DFSTree< GUM_SCALAR >& tree() const;
124 
125  /**
126  * Returns the InterfaceGraph used by this.
127  * @returns the InterfaceGraph used by this.
128  */
129  gspan::InterfaceGraph< GUM_SCALAR >& interfaceGraph();
130 
131  /**
132  * Returns the InterfaceGraph used by this.
133  * @returns the InterfaceGraph used by this.
134  */
135  const gspan::InterfaceGraph< GUM_SCALAR >& interfaceGraph() const;
136 
137  /// @}
138  // ========================================================================
139  /// @name Pattern discovery methods.
140  // ========================================================================
141  /// @{
142 
143  /**
144  * @brief This will methods will discover repeated patterns in the
145  * PRMSystem<GUM_SCALAR> assigned to this class.
146  *
147  * The results are saved in a vector of Patterns which can be obtained
148  * by calling GSpan::patterns().
149  */
150  void discoverPatterns();
151 
152  /**
153  * Returns the Pattern mined by this class in a decreasing order of
154  * interest.
155  * @returns the Pattern mined by this class in a decreasing order of
156  * interest.
157  */
158  std::vector< gspan::Pattern* >& patterns();
159 
160  /**
161  * Returns the Pattern mined by this class in a decreasing order of
162  * interest.
163  * @returns the Pattern mined by this class in a decreasing order of
164  * interest.
165  */
166  const std::vector< gspan::Pattern* >& patterns() const;
167 
168  /// Code alias.
169  typedef Set< Sequence< PRMInstance< GUM_SCALAR >* >* > MatchedInstances;
170 
171  /**
172  * Returns a mapping between patterns and the sequence of instance in the
173  * interface graph matching them.
174  * @returns a mapping between patterns and the sequence of instance in the
175  * interface graph matching them.
176  */
177  MatchedInstances& matches(const gspan::Pattern& p);
178 
179  /**
180  * Returns a mapping between patterns and the sequence of instance in the
181  * interface graph matching them.
182  * @returns a mapping between patterns and the sequence of instance in the
183  * interface graph matching them.
184  */
185  const MatchedInstances& matches(const gspan::Pattern& p) const;
186 
187  // /**
188  // * Returns the cumulative sum of all the cliques size created after
189  // lifting
190  // * the patterns.
191  // * @returns the cumulative sum of all the cliques size created after
192  // lifting
193  // * the patterns.
194  // */
195  // double patterns_cost() const;
196 
197  // /**
198  // * Returns the cumulative sum of all the cliques size created after
199  // lifting
200  // * the inner attributes.
201  // * @returns the cumulative sum of all the cliques size created after
202  // lifting
203  // * the inner attributes.
204  // */
205  // double sve_cost() const;
206 
207  ///@}
208  private:
209  // ========================================================================
210  /// @name Private Members.
211  // ========================================================================
212  /// @{
213 
214  /// The interface graph used by this class.
215  gspan::InterfaceGraph< GUM_SCALAR >* _graph_;
216 
217  /// The DFSTree used to discover new patters.
218  gspan::DFSTree< GUM_SCALAR > _tree_;
219 
220  /// The max depth allowed for the DSF tree
221  Size _depth_stop_;
222 
223  /// The vector of discovered patters, in decreasing order of interest.
224  std::vector< gspan::Pattern* > _patterns_;
225 
226  /// The vector of nodes in _graph_, in decreasing order of interest.
227  std::vector< gspan::LabelData* > _nodes_;
228 
229  /// The vector of edges in _graph_, in decreasing order of interest.
230  std::vector< gspan::LabelData* > _edges_;
231 
232  /// Mapping between labels and their cost.
233  HashTable< gspan::LabelData*, Idx > _cost_;
234 
235  /// Mapping between a pattern and the multiset of instances matched
236  /// to it.
237  HashTable< gspan::Pattern*, MatchedInstances* > _matched_instances_;
238 
239  /// Contains all instance which belongs to a discovered and used pattern.
240  Set< PRMInstance< GUM_SCALAR >* > _chosen_;
241 
242  /// @}
243  // ========================================================================
244  /// @name Private Methods
245  // ========================================================================
246  /// @{
247 
248  /// Sort the nodes and edges of _graph_.
249  void _sortNodesAndEdges_();
250 
251  /// Discovers new patterns by developing p.
252  /// @param graph The interface graph used in this discovery process.
253  /// @param p The pattern used as a base for discovery.
254  void _subgraph_mining_(gspan::InterfaceGraph< GUM_SCALAR >& graph, gspan::Pattern& p);
255 
256  /// Returns the cost with respect to an interface size and its frequency.
257  /// @param interface_size The size of all output nodes of a pattern.
258  /// @param frequency The frequency of the pattern in the current interface
259  /// graph.
260  /// @return the cost with respect to an interface size and its frequency.
261  Size _cost_func_(Size interface_size, Size frequency);
262 
263  /// Sort the patterns and compute their respective costs.
264  void _sortPatterns_();
265 
266  /// Returns true if e is an eligible root edge.
267  /// @param e An EdgeData<GUM_SCALAR>.
268  /// @return true if e is an eligible root edge.
269  bool _isEdgeEligible_(typename gspan::EdgeData< GUM_SCALAR >* e);
270 
271  /// @}
272 
273  // ========================================================================
274  /// @name Private classes
275  // ========================================================================
276  /// @{
277 
278  /**
279  * @class LabelSort
280  * @headerfile gspan.h <agrum/PRM/gspan.h>
281  * Private class used to sort LabelData using STL sort algorithms.
282  */
283  struct LabelSort {
284  /**
285  * Default constructor.
286  * @param my_gspan A pointer over the GSpan using this class.
287  */
288  LabelSort(GSpan* my_gspan);
289 
290  /**
291  * Copy constructor.
292  * @param source The copied LabelSort.
293  */
294  LabelSort(const LabelSort& source);
295 
296  /**
297  * Destructor.
298  */
299  ~LabelSort();
300 
301  /**
302  * Returns true if i's cost is lesser than j's.
303  * @param i A LabelData.
304  * @param j A LabelData.
305  * @return true if i's cost is above than j's cost.
306  */
307  bool operator()(gspan::LabelData* i, gspan::LabelData* j);
308 
309  /// A pointer over an instance of the GSpan class using this class.
310  GSpan* gspan;
311  };
312 
313  /**
314  * @class PatternSort
315  * @headerfile gspan.h <agrum/PRM/gspan.h>
316  * Private class used to sort Pattern using STL sort algorithms.
317  */
318  struct PatternSort {
319  /**
320  * Default constructor.
321  * @param my_gspan A pointer over the GSpan using this class.
322  */
323  PatternSort(GSpan* my_gspan);
324 
325  /**
326  * Copy constructor.
327  * @param source The copied PatternSort.
328  */
329  PatternSort(const PatternSort& source);
330 
331  /**
332  * Destructor.
333  */
334  ~PatternSort();
335 
336  /**
337  * Returns true if i's cost is lesser than j's.
338  * @param i A Pattern.
339  * @param j A Pattern.
340  * @return true if i's cost is above j's cost.
341  */
342  bool operator()(gspan::Pattern* i, gspan::Pattern* j);
343 
344  /// A pointer over an instance of the GSpan class using this class.
345  GSpan* gspan;
346  };
347 
348  /// @}
349  };
350 
351 
352 #ifndef GUM_NO_EXTERN_TEMPLATE_CLASS
353  extern template class GSpan< double >;
354 #endif
355 
356 
357  } /* namespace prm */
358 } /* namespace gum */
359 
360 #include <agrum/PRM/inference/gspan_tpl.h>
361 
362 #endif /* GUM_GSPAN_H */