aGrUM  0.14.2
gspan_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
28 
29 namespace gum {
30  namespace prm {
31 
32  template < typename GUM_SCALAR >
34  Timer t;
35  __sortNodesAndEdges();
37 
38  for (auto root = __tree.roots().begin(); root != __tree.roots().end();
39  ++root) {
40  if (__tree.strategy().accept_root(&(__tree.pattern(*root)))) {
41  gspan::Pattern& p = __tree.pattern(*root);
42  __subgraph_mining(graph, p);
43 
44  for (const auto node : __tree.iso_graph(p).nodes()) {
45  PRMInstance< GUM_SCALAR >* u = __tree.iso_map(p, node).atPos(0);
46  PRMInstance< GUM_SCALAR >* v = __tree.iso_map(p, node).atPos(1);
47  graph.graph().eraseEdge(Edge(graph.id(u), graph.id(v)));
48  }
49  }
50  }
51 
52  __sortPatterns();
53  }
54 
55  template < typename GUM_SCALAR >
57  for (auto iter = __graph->labels().begin(); iter != __graph->labels().end();
58  ++iter) {
59  try {
60  if (__graph->nodes(iter.second()).size() >= 2) {
61  __cost.insert(iter.second(),
62  __cost_func(iter.second()->tree_width,
63  __graph->nodes(iter.second()).size()));
64  __nodes.push_back(const_cast< gspan::LabelData* >(iter.second()));
65  }
66  } catch (NotFound&) {
67  // It's a label over edges
68  if (__isEdgeEligible(*(__graph->edges(iter.second()).begin()))) {
69  __cost.insert(iter.second(),
70  __cost_func(iter.second()->tree_width,
71  __graph->edges(iter.second()).size()));
72  __edges.push_back(iter.second());
73  }
74  }
75  }
76 
79  GSpan< GUM_SCALAR >::LabelSort my_sort(this);
80  std::sort(__nodes.begin(), __nodes.end(), my_sort);
81  std::sort(__edges.begin(), __edges.end(), my_sort);
82  Size idx = 0;
83 
84  for (auto iter = __nodes.begin(); iter != __nodes.end(); ++iter) {
85  (*iter)->id = ++idx;
86  new_labels->insert(idx, *iter);
87  }
88 
89  for (auto iter = __edges.begin(); iter != __edges.end(); ++iter) {
90  (*iter)->id = ++idx;
91  new_labels->insert(idx, *iter);
92  __tree.addRoot(**iter);
93  }
94 
95  delete __graph->__labels;
96  __graph->__labels = new_labels;
97  }
98 
99  template < typename GUM_SCALAR >
102  std::vector< gspan::Pattern* > stack;
103  stack.push_back(&pat);
104  // Pointers used in the following while
105  gspan::Pattern* p = nullptr;
107  nullptr;
108  gspan::EdgeGrowth< GUM_SCALAR >* edge_growth = nullptr;
109  Sequence< PRMInstance< GUM_SCALAR >* >* seq = nullptr;
110  PRMInstance< GUM_SCALAR >* current = nullptr;
111  PRMInstance< GUM_SCALAR >* neighbor = nullptr;
112 
113  // Neighbor_id is the neighbor's id in the interface graph and
114  // neighbor_node
115  // is its id in the rightmost path in the case of a backward edge growth
116  NodeId current_id = 0;
117  NodeId neighbor_node = 0;
118  gspan::LabelData* neighbor_label = 0;
119 
120  typename gspan::EdgeData< GUM_SCALAR >* edge_data = nullptr;
121 
122  size_t idx;
123  const std::list< NodeId >* children = 0;
124 
125  while (!stack.empty()) {
126  // Getting next pattern
127  p = stack.back();
128  stack.pop_back();
129 
130  if (p->code().codes.size() < __depth_stop) {
131  // We need the rightmost path of p
132  std::list< NodeId > r_path;
133  p->rightmostPath(r_path);
134  // Mapping used to count each possible child of p, the position in the
135  // vector
136  // matches the one in the rightmost path
137  std::vector<
139  count_vector;
140 
141  for (size_t i = 0; i < r_path.size(); ++i)
142  count_vector.push_back(
143  new HashTable< std::string, gspan::EdgeGrowth< GUM_SCALAR >* >());
144 
145  // For each subgraph represented by p, we look for a valid edge growth
146  // for
147  // each instance match of p in its isomorphism graph.
148  for (const auto iso_node : __tree.iso_graph(*p).nodes()) {
149  seq = &(__tree.iso_map(*p, iso_node));
150  idx = 0;
151 
152  for (const auto node : r_path) {
153  edge_count = count_vector[idx];
154  // Retrieving the equivalent instance in the current match
155  current = seq->atPos((Idx)(node - 1));
156  current_id = ig.id(current);
157  // Checking for edges not in p
158 
159  for (const auto neighbor_id : ig.graph().neighbours(current_id)) {
160  neighbor = ig.node(neighbor_id).n;
161 
162  // We want a forward edge in any case or a backward edge if
163  // current
164  // is the rightmost vertex
165  if ((!seq->exists(neighbor)) || (node == r_path.back())) {
166  // Things we need to know: the LabelData data of the neighbour
167  // and,
168  // if it's a backward edge, its node id in the rightmost path
169  edge_data = &(ig.edge(current_id, neighbor_id));
170  neighbor_label =
171  (neighbor == edge_data->u) ? edge_data->l_u : edge_data->l_v;
172  neighbor_node =
173  (seq->exists(neighbor)) ? seq->pos(neighbor) + 1 : 0;
174  // Adding the edge growth to the edge_growth hashtable
176  node, edge_data->l, neighbor_label, neighbor_node);
177 
178  try {
179  edge_growth = (*edge_count)[temp_growth.toString()];
180  edge_growth->insert(current, neighbor);
181  } catch (NotFound&) {
182  edge_growth = new gspan::EdgeGrowth< GUM_SCALAR >(
183  node, edge_data->l, neighbor_label, neighbor_node);
184  edge_growth->insert(current, neighbor);
185  edge_count->insert(edge_growth->toString(), edge_growth);
186  }
187  }
188  }
189  }
190  }
191 
192  // Removing any infrequent child
193  for (size_t node = 0; node < count_vector.size(); ++node) {
194  edge_count = count_vector[node];
195 
196  for (const auto& elt : *edge_count) {
197  try {
198  __tree.growPattern(*p, *elt.second, 2);
199  } catch (OperationNotAllowed&) {
200  // The child was not minimal or was not worth considering
201  }
202 
203  delete elt.second;
204  }
205 
206  delete edge_count;
207  }
208 
209  // Calling __subgraph_mining over children of p
210  children = &(__tree.children(*p));
211 
212  for (std::list< NodeId >::const_reverse_iterator child =
213  children->rbegin();
214  child != children->rend();
215  ++child)
216  stack.push_back(&(__tree.pattern(*child)));
217  }
218  }
219  }
220 
221  template < typename GUM_SCALAR >
223  // First we put all the patterns in __patterns.
224  std::vector< NodeId > stack;
225 
226  for (std::list< NodeId >::reverse_iterator root = tree().roots().rbegin();
227  root != tree().roots().rend();
228  ++root)
229  stack.push_back(*root);
230 
231  NodeId id = 0;
232  std::list< NodeId >* children = nullptr;
233 
234  while (!stack.empty()) {
235  id = stack.back();
236  stack.pop_back();
237  __patterns.push_back(&(tree().pattern(id)));
238  children = &(tree().children(tree().pattern(id)));
239 
240  for (std::list< NodeId >::reverse_iterator child = children->rbegin();
241  child != children->rend();
242  ++child)
243  stack.push_back(*child);
244  }
245 
246  if (!__patterns.empty()) {
247  // We sort __patterns.
248  GSpan< GUM_SCALAR >::PatternSort my_sort(this);
249  std::sort(__patterns.begin(), __patterns.end(), my_sort);
250  // Now we need to find all the matches we can, using __patterns.
251  // We start by the best Pattern and add it's maximal independent set to
252  // __chosen
255  Sequence< PRMInstance< GUM_SCALAR >* >* match = nullptr;
256 
257  for (const auto node : tree().max_indep_set(*(__patterns.front()))) {
258  match = &(tree().iso_map(*(__patterns.front()), node));
259 
260  for (const auto i : *match)
261  __chosen.insert(i);
262 
263  matches->insert(match);
264  }
265 
266  __matched_instances.insert(__patterns.front(), matches);
267  // Now we see what kind of pattern we can still use
268  bool found;
269  UndiGraph* iso_graph = nullptr;
270 
271  for (auto patt = __patterns.begin() + 1; patt != __patterns.end();
272  ++patt) {
273  UndiGraph reduced_iso_graph;
274  std::vector< NodeId > degree_list;
275  iso_graph = &(tree().iso_graph(**patt));
276 
277  for (const auto node : iso_graph->nodes()) {
278  found = false;
279  match = &(tree().iso_map(**patt, node));
280 
281  for (const auto i : *match)
282  if (__chosen.exists(i)) {
283  found = true;
284  break;
285  }
286 
287  if (!found) {
288  // We add the pattern to the reduced isomorphism graph to compute
289  // the
290  // max independent set
291  // over the remaining matches
292  reduced_iso_graph.addNodeWithId(node);
293 
294  for (const auto iso : reduced_iso_graph.nodes())
295  if (iso_graph->existsEdge(node, iso))
296  reduced_iso_graph.addEdge(node, iso);
297 
298  degree_list.push_back(node);
299  }
300  }
301 
302  // We create a new set to hold all the chosen matches of patt
304  // We can compute the max independent set and the matches belonging to
305  // it
307  reduced_iso_graph);
308  std::sort(degree_list.begin(), degree_list.end(), my_sort);
309  Set< NodeId > removed;
310 
311  for (const auto node : degree_list)
312  if (!removed.exists(node)) {
313  // First we update removed to follow the max independent set
314  // algorithm
315  removed.insert(node);
316 
317  for (const auto neighbor : reduced_iso_graph.neighbours(node))
318  removed.insert(neighbor);
319 
320  // Second we update match and matches to keep track of the current
321  // match
322  match = &(tree().iso_map(**patt, node));
323  matches->insert(match);
324 
325  for (const auto elt : *match)
326  __chosen.insert(elt);
327  }
328 
329  __matched_instances.insert(*patt, matches);
330  }
331 
332  // // We remove patterns with 0 matches
333  std::vector< size_t > trash;
334 
335  for (size_t idx = 0; idx < __patterns.size(); ++idx)
336  if (__matched_instances[__patterns[idx]]->size() < 2)
337  trash.push_back(idx);
338 
339  while (trash.size()) {
340  delete __matched_instances[__patterns[trash.back()]];
341  __matched_instances.erase(__patterns[trash.back()]);
342  // delete __patterns[trash.back()];
343  __patterns[trash.back()] = __patterns.back();
344  __patterns.pop_back();
345  trash.pop_back();
346  }
347  }
348  }
349 
350  template < typename GUM_SCALAR >
351  INLINE
353  const PRMSystem< GUM_SCALAR >& sys,
355  __graph(new gspan::InterfaceGraph< GUM_SCALAR >(sys)),
356  __tree(*__graph, strategy), __depth_stop(INT_MAX) {
357  GUM_CONSTRUCTOR(GSpan);
358  }
359 
360  template < typename GUM_SCALAR >
362  GUM_DESTRUCTOR(GSpan);
363 
364  for (const auto& elt : __matched_instances)
365  delete elt.second;
366 
367  delete __graph;
368  }
369 
370  template < typename GUM_SCALAR >
372  return __depth_stop;
373  }
374 
375  template < typename GUM_SCALAR >
377  __depth_stop = depth;
378  }
379 
380  template < typename GUM_SCALAR >
382  return __tree;
383  }
384 
385  template < typename GUM_SCALAR >
387  return __tree;
388  }
389 
390  template < typename GUM_SCALAR >
392  Size frequency) {
393  return Idx(interface_size * frequency);
394  }
395 
396  template < typename GUM_SCALAR >
397  INLINE std::vector< gspan::Pattern* >& GSpan< GUM_SCALAR >::patterns() {
398  return __patterns;
399  }
400 
401  template < typename GUM_SCALAR >
402  INLINE const std::vector< gspan::Pattern* >&
404  return __patterns;
405  }
406 
407  template < typename GUM_SCALAR >
410  return *(__matched_instances[const_cast< gspan::Pattern* >(&p)]);
411  }
412 
413  template < typename GUM_SCALAR >
414  INLINE const typename GSpan< GUM_SCALAR >::MatchedInstances&
416  return *(__matched_instances[const_cast< gspan::Pattern* >(&p)]);
417  }
418 
419  template < typename GUM_SCALAR >
422  return *__graph;
423  }
424 
425  template < typename GUM_SCALAR >
428  return *__graph;
429  }
430 
431  template < typename GUM_SCALAR >
432  INLINE bool
434  return (__graph->edges(e->l).size() >= 2)
435  && (__graph->nodes(e->l_u).size() >= 2)
436  && (__graph->nodes(e->l_v).size() >= 2);
437  }
438 
439  // LalbeSort
440 
441  template < typename GUM_SCALAR >
443  gspan(my_gspan) {
444  GUM_CONSTRUCTOR(GSpan< GUM_SCALAR >::LabelSort);
445  }
446 
447  template < typename GUM_SCALAR >
449  gspan(source.gspan) {
450  GUM_CONS_CPY(GSpan< GUM_SCALAR >::LabelSort);
451  }
452 
453  template < typename GUM_SCALAR >
455  GUM_DESTRUCTOR(GSpan< GUM_SCALAR >::LabelSort);
456  }
457 
458  template < typename GUM_SCALAR >
460  gspan::LabelData* j) {
461  // We want a descending order
462  // return gspan->__cost[i] > gspan->__cost[j];
463  return gspan->__tree.strategy()(i, j);
464  }
465 
466  // PatternSort
467 
468  template < typename GUM_SCALAR >
470  gspan(my_gspan) {
471  GUM_CONSTRUCTOR(GSpan< GUM_SCALAR >::PatternSort);
472  }
473 
474  template < typename GUM_SCALAR >
475  INLINE
477  gspan(source.gspan) {
478  GUM_CONS_CPY(GSpan< GUM_SCALAR >::PatternSort);
479  }
480 
481  template < typename GUM_SCALAR >
483  GUM_DESTRUCTOR(GSpan< GUM_SCALAR >::PatternSort);
484  }
485 
486  template < typename GUM_SCALAR >
488  gspan::Pattern* j) {
489  // We want a descending order
490  return gspan->tree().strategy().operator()(i, j);
491  }
492 
493  } /* namespace prm */
494 } /* namespace gum */
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
GSpan * gspan
A pointer over an instance of the GSpan class using this class.
Definition: gspan.h:310
bool __isEdgeEligible(typename gspan::EdgeData< GUM_SCALAR > *e)
Returns true if e is an eligible root edge.
Definition: gspan_tpl.h:433
This class is used to define an edge growth of a pattern in this DFSTree.
Definition: edgeGrowth.h:60
virtual void addNodeWithId(const NodeId id)
try to insert a node with the given id
Idx pos(const Key &key) const
Returns the position of the object passed in argument (if it exists).
Definition: sequence_tpl.h:515
PatternSort(GSpan *my_gspan)
Default constructor.
Definition: gspan_tpl.h:469
Set< Sequence< PRMInstance< GUM_SCALAR > *> *> MatchedInstances
Code alias.
Definition: gspan.h:167
Inner class to handle data about labels in this interface graph.
PRMInstance< GUM_SCALAR > * u
One of the two instance represented by this edge.
Headers of gspan.
An PRMInstance is a Bayesian Network fragment defined by a Class and used in a PRMSystem.
Definition: PRMInstance.h:60
DFSCode & code()
Returns the DFSCode of this Pattern.
Definition: pattern_inl.h:170
The generic class for storing (ordered) sequences of objects.
Definition: sequence.h:1019
virtual void addEdge(const NodeId first, const NodeId second)
insert a new edge into the undirected graph
Definition: undiGraph_inl.h:32
LabelSort(GSpan *my_gspan)
Default constructor.
Definition: gspan_tpl.h:442
Size __depth_stop
The max depth allowed for the DSF tree.
Definition: gspan.h:219
LabelData * l_u
The label data of u.
NodeData< GUM_SCALAR > & node(const PRMInstance< GUM_SCALAR > *i)
Returns data about a node.
Inner class to handle data about edges in __graph.
void __subgraph_mining(gspan::InterfaceGraph< GUM_SCALAR > &graph, gspan::Pattern &p)
Discovers new patterns by developing p.
Definition: gspan_tpl.h:100
This is used to generate the max_indep_set of a Pattern.
Definition: DFSTree.h:244
This class represent the interface graph of a given gum::prm::PRMSystem<GUM_SCALAR>.
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
std::vector< EdgeCode *> codes
The vector containing the EdgeCode composing this DFSCode.
Definition: DFSCode.h:87
void __sortPatterns()
Sort the patterns and compute their respective costs.
Definition: gspan_tpl.h:222
The class for generic Hash Tables.
Definition: hashTable.h:676
bool operator()(gspan::Pattern *i, gspan::Pattern *j)
Returns true if i&#39;s cost is lesser than j&#39;s.
Definition: gspan_tpl.h:487
const NodeSet & neighbours(const NodeId id) const
returns the set of edges adjacent to a given node
LabelData * l
The labal data of this edge.
Representation of a setA Set is a structure that contains arbitrary elements.
Definition: set.h:162
gspan::DFSTree< GUM_SCALAR > __tree
The DFSTree used to discover new patters.
Definition: gspan.h:216
A DFSTree is used by gspan to sort lexicographically patterns discovered in an interface graph...
Definition: DFSTree.h:68
~PatternSort()
Destructor.
Definition: gspan_tpl.h:482
~GSpan()
Destructor.
Definition: gspan_tpl.h:361
UndiGraph & graph()
Returns the graph of this interface graph.
bool existsEdge(const Edge &edge) const
indicates whether a given edge exists
~LabelSort()
Destructor.
Definition: gspan_tpl.h:454
std::vector< gspan::Pattern *> __patterns
The vector of discovered patters, in decreasing order of interest.
Definition: gspan.h:222
gspan::InterfaceGraph< GUM_SCALAR > * __graph
The interface graph used by this class.
Definition: gspan.h:213
const NodeGraphPart & nodes() const
return *this as a NodeGraphPart
A PRMSystem is a container of PRMInstance and describe a relational skeleton.
Definition: PRMObject.h:226
Headers of the DFSTree class.
bool exists(const Key &k) const
Check the existence of k in the sequence.
Definition: sequence_tpl.h:399
Private class used to sort LabelData using STL sort algorithms.
Definition: gspan.h:283
Set of pairs of elements with fast search for both elements.
Definition: bijection.h:1803
GSpan * gspan
A pointer over an instance of the GSpan class using this class.
Definition: gspan.h:345
gspan::DFSTree< GUM_SCALAR > & tree()
Returns the DFSTree used to discover new patters.
Definition: gspan_tpl.h:381
void rightmostPath(std::list< NodeId > &r_path) const
Fill r_path with the rightmost path of this Pattern. The list is supposed empty.
Definition: pattern_inl.h:150
gspan::InterfaceGraph< GUM_SCALAR > & interfaceGraph()
Returns the InterfaceGraph used by this.
Definition: gspan_tpl.h:421
LabelData * l_v
The label data of v.
NodeId id(const PRMInstance< GUM_SCALAR > &i) const
Returns the id of i in this interface graph.
void discoverPatterns()
This will methods will discover repeated patterns in the PRMSystem<GUM_SCALAR> assigned to this class...
Definition: gspan_tpl.h:33
This class represents a Probabilistic Relational PRMSystem<GUM_SCALAR>.
Definition: PRM.h:63
The base class for all undirected edges.
This class discovers pattern in a PRM<GUM_SCALAR>&#39;s PRMSystem<GUM_SCALAR> to speed up structured infe...
Definition: DFSTree.h:54
std::vector< gspan::Pattern *> & patterns()
Returns the Pattern mined by this class in a decreasing order of interest.
Definition: gspan_tpl.h:397
std::string toString()
Return a string representation of this.
void setMaxDFSDepth(Size depth)
Defines the maximal depth of the DFSTree used by this class to discover new patterns.
Definition: gspan_tpl.h:376
Base class for undirected graphs.
Definition: undiGraph.h:106
Class used to compute response times for benchmark purposesThis class represents a classic timer...
Definition: timer.h:48
Size Idx
Type for indexes.
Definition: types.h:50
void __sortNodesAndEdges()
Sort the nodes and edges of __graph.
Definition: gspan_tpl.h:56
GSpan(const PRM< GUM_SCALAR > &prm, const PRMSystem< GUM_SCALAR > &sys, gspan::SearchStrategy< GUM_SCALAR > *strategy=0)
Default constructor.
Definition: gspan_tpl.h:352
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:45
Private class used to sort Pattern using STL sort algorithms.
Definition: gspan.h:318
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
EdgeData< GUM_SCALAR > & edge(NodeId u, NodeId v)
Returns data about an edge.
Size getMaxDFSDepth() const
Returns the maximal depth of the DFSTree used to discover new patterns.
Definition: gspan_tpl.h:371
MatchedInstances & matches(const gspan::Pattern &p)
Returns a mapping between patterns and the sequence of instance in the interface graph matching them...
Definition: gspan_tpl.h:409
This contains all the information we want for a node in a DFSTree.
Definition: pattern.h:70
This is an abstract class used to tune search strategies in the gspan algorithm.
Definition: DFSTree.h:58
Size __cost_func(Size interface_size, Size frequency)
Returns the cost with respect to an interface size and its frequency. TODO replace this by a class to...
Definition: gspan_tpl.h:391
Size NodeId
Type for node ids.
Definition: graphElements.h:97
void insert(const Key &k)
Inserts a new element into the set.
Definition: set_tpl.h:610
bool operator()(gspan::LabelData *i, gspan::LabelData *j)
Returns true if i&#39;s cost is lesser than j&#39;s.
Definition: gspan_tpl.h:459
HashTable< gspan::Pattern *, MatchedInstances *> __matched_instances
Mapping between a pattern and the multiset of instances matched to it.
Definition: gspan.h:235
const Key & atPos(Idx i) const
Returns the object at the pos i.
Definition: sequence_tpl.h:497
void insert(PRMInstance< GUM_SCALAR > *u, PRMInstance< GUM_SCALAR > *v)
Add the pair (u,v) as a match for the current growth.
void insert(const Key &k)
Insert an element at the end of the sequence.
Definition: sequence_tpl.h:405