aGrUM  0.16.0
gspan_tpl.h
Go to the documentation of this file.
1 
31 
32 namespace gum {
33  namespace prm {
34 
35  template < typename GUM_SCALAR >
37  Timer t;
38  __sortNodesAndEdges();
40 
41  for (auto root = __tree.roots().begin(); root != __tree.roots().end();
42  ++root) {
43  if (__tree.strategy().accept_root(&(__tree.pattern(*root)))) {
44  gspan::Pattern& p = __tree.pattern(*root);
45  __subgraph_mining(graph, p);
46 
47  for (const auto node : __tree.iso_graph(p).nodes()) {
48  PRMInstance< GUM_SCALAR >* u = __tree.iso_map(p, node).atPos(0);
49  PRMInstance< GUM_SCALAR >* v = __tree.iso_map(p, node).atPos(1);
50  graph.graph().eraseEdge(Edge(graph.id(u), graph.id(v)));
51  }
52  }
53  }
54 
55  __sortPatterns();
56  }
57 
58  template < typename GUM_SCALAR >
60  for (auto iter = __graph->labels().begin(); iter != __graph->labels().end();
61  ++iter) {
62  try {
63  if (__graph->nodes(iter.second()).size() >= 2) {
64  __cost.insert(iter.second(),
65  __cost_func(iter.second()->tree_width,
66  __graph->nodes(iter.second()).size()));
67  __nodes.push_back(const_cast< gspan::LabelData* >(iter.second()));
68  }
69  } catch (NotFound&) {
70  // It's a label over edges
71  if (__isEdgeEligible(*(__graph->edges(iter.second()).begin()))) {
72  __cost.insert(iter.second(),
73  __cost_func(iter.second()->tree_width,
74  __graph->edges(iter.second()).size()));
75  __edges.push_back(iter.second());
76  }
77  }
78  }
79 
82  GSpan< GUM_SCALAR >::LabelSort my_sort(this);
83  std::sort(__nodes.begin(), __nodes.end(), my_sort);
84  std::sort(__edges.begin(), __edges.end(), my_sort);
85  Size idx = 0;
86 
87  for (auto iter = __nodes.begin(); iter != __nodes.end(); ++iter) {
88  (*iter)->id = ++idx;
89  new_labels->insert(idx, *iter);
90  }
91 
92  for (auto iter = __edges.begin(); iter != __edges.end(); ++iter) {
93  (*iter)->id = ++idx;
94  new_labels->insert(idx, *iter);
95  __tree.addRoot(**iter);
96  }
97 
98  delete __graph->__labels;
99  __graph->__labels = new_labels;
100  }
101 
102  template < typename GUM_SCALAR >
105  std::vector< gspan::Pattern* > stack;
106  stack.push_back(&pat);
107  // Pointers used in the following while
108  gspan::Pattern* p = nullptr;
110  nullptr;
111  gspan::EdgeGrowth< GUM_SCALAR >* edge_growth = nullptr;
112  Sequence< PRMInstance< GUM_SCALAR >* >* seq = nullptr;
113  PRMInstance< GUM_SCALAR >* current = nullptr;
114  PRMInstance< GUM_SCALAR >* neighbor = nullptr;
115 
116  // Neighbor_id is the neighbor's id in the interface graph and
117  // neighbor_node
118  // is its id in the rightmost path in the case of a backward edge growth
119  NodeId current_id = 0;
120  NodeId neighbor_node = 0;
121  gspan::LabelData* neighbor_label = 0;
122 
123  typename gspan::EdgeData< GUM_SCALAR >* edge_data = nullptr;
124 
125  size_t idx;
126  const std::list< NodeId >* children = 0;
127 
128  while (!stack.empty()) {
129  // Getting next pattern
130  p = stack.back();
131  stack.pop_back();
132 
133  if (p->code().codes.size() < __depth_stop) {
134  // We need the rightmost path of p
135  std::list< NodeId > r_path;
136  p->rightmostPath(r_path);
137  // Mapping used to count each possible child of p, the position in the
138  // vector
139  // matches the one in the rightmost path
140  std::vector<
142  count_vector;
143 
144  for (size_t i = 0; i < r_path.size(); ++i)
145  count_vector.push_back(
146  new HashTable< std::string, gspan::EdgeGrowth< GUM_SCALAR >* >());
147 
148  // For each subgraph represented by p, we look for a valid edge growth
149  // for
150  // each instance match of p in its isomorphism graph.
151  for (const auto iso_node : __tree.iso_graph(*p).nodes()) {
152  seq = &(__tree.iso_map(*p, iso_node));
153  idx = 0;
154 
155  for (const auto node : r_path) {
156  edge_count = count_vector[idx];
157  // Retrieving the equivalent instance in the current match
158  current = seq->atPos((Idx)(node - 1));
159  current_id = ig.id(current);
160  // Checking for edges not in p
161 
162  for (const auto neighbor_id : ig.graph().neighbours(current_id)) {
163  neighbor = ig.node(neighbor_id).n;
164 
165  // We want a forward edge in any case or a backward edge if
166  // current
167  // is the rightmost vertex
168  if ((!seq->exists(neighbor)) || (node == r_path.back())) {
169  // Things we need to know: the LabelData data of the neighbour
170  // and,
171  // if it's a backward edge, its node id in the rightmost path
172  edge_data = &(ig.edge(current_id, neighbor_id));
173  neighbor_label =
174  (neighbor == edge_data->u) ? edge_data->l_u : edge_data->l_v;
175  neighbor_node =
176  (seq->exists(neighbor)) ? seq->pos(neighbor) + 1 : 0;
177  // Adding the edge growth to the edge_growth hashtable
179  node, edge_data->l, neighbor_label, neighbor_node);
180 
181  try {
182  edge_growth = (*edge_count)[temp_growth.toString()];
183  edge_growth->insert(current, neighbor);
184  } catch (NotFound&) {
185  edge_growth = new gspan::EdgeGrowth< GUM_SCALAR >(
186  node, edge_data->l, neighbor_label, neighbor_node);
187  edge_growth->insert(current, neighbor);
188  edge_count->insert(edge_growth->toString(), edge_growth);
189  }
190  }
191  }
192  }
193  }
194 
195  // Removing any infrequent child
196  for (size_t node = 0; node < count_vector.size(); ++node) {
197  edge_count = count_vector[node];
198 
199  for (const auto& elt : *edge_count) {
200  try {
201  __tree.growPattern(*p, *elt.second, 2);
202  } catch (OperationNotAllowed&) {
203  // The child was not minimal or was not worth considering
204  }
205 
206  delete elt.second;
207  }
208 
209  delete edge_count;
210  }
211 
212  // Calling __subgraph_mining over children of p
213  children = &(__tree.children(*p));
214 
215  for (std::list< NodeId >::const_reverse_iterator child =
216  children->rbegin();
217  child != children->rend();
218  ++child)
219  stack.push_back(&(__tree.pattern(*child)));
220  }
221  }
222  }
223 
224  template < typename GUM_SCALAR >
226  // First we put all the patterns in __patterns.
227  std::vector< NodeId > stack;
228 
229  for (std::list< NodeId >::reverse_iterator root = tree().roots().rbegin();
230  root != tree().roots().rend();
231  ++root)
232  stack.push_back(*root);
233 
234  NodeId id = 0;
235  std::list< NodeId >* children = nullptr;
236 
237  while (!stack.empty()) {
238  id = stack.back();
239  stack.pop_back();
240  __patterns.push_back(&(tree().pattern(id)));
241  children = &(tree().children(tree().pattern(id)));
242 
243  for (std::list< NodeId >::reverse_iterator child = children->rbegin();
244  child != children->rend();
245  ++child)
246  stack.push_back(*child);
247  }
248 
249  if (!__patterns.empty()) {
250  // We sort __patterns.
251  GSpan< GUM_SCALAR >::PatternSort my_sort(this);
252  std::sort(__patterns.begin(), __patterns.end(), my_sort);
253  // Now we need to find all the matches we can, using __patterns.
254  // We start by the best Pattern and add it's maximal independent set to
255  // __chosen
258  Sequence< PRMInstance< GUM_SCALAR >* >* match = nullptr;
259 
260  for (const auto node : tree().max_indep_set(*(__patterns.front()))) {
261  match = &(tree().iso_map(*(__patterns.front()), node));
262 
263  for (const auto i : *match)
264  __chosen.insert(i);
265 
266  matches->insert(match);
267  }
268 
269  __matched_instances.insert(__patterns.front(), matches);
270  // Now we see what kind of pattern we can still use
271  bool found;
272  UndiGraph* iso_graph = nullptr;
273 
274  for (auto patt = __patterns.begin() + 1; patt != __patterns.end();
275  ++patt) {
276  UndiGraph reduced_iso_graph;
277  std::vector< NodeId > degree_list;
278  iso_graph = &(tree().iso_graph(**patt));
279 
280  for (const auto node : iso_graph->nodes()) {
281  found = false;
282  match = &(tree().iso_map(**patt, node));
283 
284  for (const auto i : *match)
285  if (__chosen.exists(i)) {
286  found = true;
287  break;
288  }
289 
290  if (!found) {
291  // We add the pattern to the reduced isomorphism graph to compute
292  // the
293  // max independent set
294  // over the remaining matches
295  reduced_iso_graph.addNodeWithId(node);
296 
297  for (const auto iso : reduced_iso_graph.nodes())
298  if (iso_graph->existsEdge(node, iso))
299  reduced_iso_graph.addEdge(node, iso);
300 
301  degree_list.push_back(node);
302  }
303  }
304 
305  // We create a new set to hold all the chosen matches of patt
307  // We can compute the max independent set and the matches belonging to
308  // it
310  reduced_iso_graph);
311  std::sort(degree_list.begin(), degree_list.end(), my_sort);
312  Set< NodeId > removed;
313 
314  for (const auto node : degree_list)
315  if (!removed.exists(node)) {
316  // First we update removed to follow the max independent set
317  // algorithm
318  removed.insert(node);
319 
320  for (const auto neighbor : reduced_iso_graph.neighbours(node))
321  removed.insert(neighbor);
322 
323  // Second we update match and matches to keep track of the current
324  // match
325  match = &(tree().iso_map(**patt, node));
326  matches->insert(match);
327 
328  for (const auto elt : *match)
329  __chosen.insert(elt);
330  }
331 
332  __matched_instances.insert(*patt, matches);
333  }
334 
335  // // We remove patterns with 0 matches
336  std::vector< size_t > trash;
337 
338  for (size_t idx = 0; idx < __patterns.size(); ++idx)
339  if (__matched_instances[__patterns[idx]]->size() < 2)
340  trash.push_back(idx);
341 
342  while (trash.size()) {
343  delete __matched_instances[__patterns[trash.back()]];
344  __matched_instances.erase(__patterns[trash.back()]);
345  // delete __patterns[trash.back()];
346  __patterns[trash.back()] = __patterns.back();
347  __patterns.pop_back();
348  trash.pop_back();
349  }
350  }
351  }
352 
353  template < typename GUM_SCALAR >
354  INLINE
356  const PRMSystem< GUM_SCALAR >& sys,
358  __graph(new gspan::InterfaceGraph< GUM_SCALAR >(sys)),
359  __tree(*__graph, strategy), __depth_stop(INT_MAX) {
360  GUM_CONSTRUCTOR(GSpan);
361  }
362 
363  template < typename GUM_SCALAR >
365  GUM_DESTRUCTOR(GSpan);
366 
367  for (const auto& elt : __matched_instances)
368  delete elt.second;
369 
370  delete __graph;
371  }
372 
373  template < typename GUM_SCALAR >
375  return __depth_stop;
376  }
377 
378  template < typename GUM_SCALAR >
380  __depth_stop = depth;
381  }
382 
383  template < typename GUM_SCALAR >
385  return __tree;
386  }
387 
388  template < typename GUM_SCALAR >
390  return __tree;
391  }
392 
393  template < typename GUM_SCALAR >
395  Size frequency) {
396  return Idx(interface_size * frequency);
397  }
398 
399  template < typename GUM_SCALAR >
400  INLINE std::vector< gspan::Pattern* >& GSpan< GUM_SCALAR >::patterns() {
401  return __patterns;
402  }
403 
404  template < typename GUM_SCALAR >
405  INLINE const std::vector< gspan::Pattern* >&
407  return __patterns;
408  }
409 
410  template < typename GUM_SCALAR >
413  return *(__matched_instances[const_cast< gspan::Pattern* >(&p)]);
414  }
415 
416  template < typename GUM_SCALAR >
417  INLINE const typename GSpan< GUM_SCALAR >::MatchedInstances&
419  return *(__matched_instances[const_cast< gspan::Pattern* >(&p)]);
420  }
421 
422  template < typename GUM_SCALAR >
425  return *__graph;
426  }
427 
428  template < typename GUM_SCALAR >
431  return *__graph;
432  }
433 
434  template < typename GUM_SCALAR >
435  INLINE bool
437  return (__graph->edges(e->l).size() >= 2)
438  && (__graph->nodes(e->l_u).size() >= 2)
439  && (__graph->nodes(e->l_v).size() >= 2);
440  }
441 
442  // LalbeSort
443 
444  template < typename GUM_SCALAR >
446  gspan(my_gspan) {
447  GUM_CONSTRUCTOR(GSpan< GUM_SCALAR >::LabelSort);
448  }
449 
450  template < typename GUM_SCALAR >
452  gspan(source.gspan) {
453  GUM_CONS_CPY(GSpan< GUM_SCALAR >::LabelSort);
454  }
455 
456  template < typename GUM_SCALAR >
458  GUM_DESTRUCTOR(GSpan< GUM_SCALAR >::LabelSort);
459  }
460 
461  template < typename GUM_SCALAR >
463  gspan::LabelData* j) {
464  // We want a descending order
465  // return gspan->__cost[i] > gspan->__cost[j];
466  return gspan->__tree.strategy()(i, j);
467  }
468 
469  // PatternSort
470 
471  template < typename GUM_SCALAR >
473  gspan(my_gspan) {
474  GUM_CONSTRUCTOR(GSpan< GUM_SCALAR >::PatternSort);
475  }
476 
477  template < typename GUM_SCALAR >
478  INLINE
480  gspan(source.gspan) {
481  GUM_CONS_CPY(GSpan< GUM_SCALAR >::PatternSort);
482  }
483 
484  template < typename GUM_SCALAR >
486  GUM_DESTRUCTOR(GSpan< GUM_SCALAR >::PatternSort);
487  }
488 
489  template < typename GUM_SCALAR >
491  gspan::Pattern* j) {
492  // We want a descending order
493  return gspan->tree().strategy().operator()(i, j);
494  }
495 
496  } /* namespace prm */
497 } /* namespace gum */
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
GSpan * gspan
A pointer over an instance of the GSpan class using this class.
Definition: gspan.h:313
bool __isEdgeEligible(typename gspan::EdgeData< GUM_SCALAR > *e)
Returns true if e is an eligible root edge.
Definition: gspan_tpl.h:436
This class is used to define an edge growth of a pattern in this DFSTree.
Definition: edgeGrowth.h:63
virtual void addNodeWithId(const NodeId id)
try to insert a node with the given id
Idx pos(const Key &key) const
Returns the position of the object passed in argument (if it exists).
Definition: sequence_tpl.h:518
PatternSort(GSpan *my_gspan)
Default constructor.
Definition: gspan_tpl.h:472
Set< Sequence< PRMInstance< GUM_SCALAR > *> *> MatchedInstances
Code alias.
Definition: gspan.h:170
Inner class to handle data about labels in this interface graph.
PRMInstance< GUM_SCALAR > * u
One of the two instance represented by this edge.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
An PRMInstance is a Bayesian Network fragment defined by a Class and used in a PRMSystem.
Definition: PRMInstance.h:63
DFSCode & code()
Returns the DFSCode of this Pattern.
Definition: pattern_inl.h:173
The generic class for storing (ordered) sequences of objects.
Definition: sequence.h:1022
virtual void addEdge(const NodeId first, const NodeId second)
insert a new edge into the undirected graph
Definition: undiGraph_inl.h:35
LabelSort(GSpan *my_gspan)
Default constructor.
Definition: gspan_tpl.h:445
Size __depth_stop
The max depth allowed for the DSF tree.
Definition: gspan.h:222
LabelData * l_u
The label data of u.
NodeData< GUM_SCALAR > & node(const PRMInstance< GUM_SCALAR > *i)
Returns data about a node.
Inner class to handle data about edges in __graph.
void __subgraph_mining(gspan::InterfaceGraph< GUM_SCALAR > &graph, gspan::Pattern &p)
Discovers new patterns by developing p.
Definition: gspan_tpl.h:103
This is used to generate the max_indep_set of a Pattern.
Definition: DFSTree.h:247
This class represent the interface graph of a given gum::prm::PRMSystem<GUM_SCALAR>.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
std::vector< EdgeCode *> codes
The vector containing the EdgeCode composing this DFSCode.
Definition: DFSCode.h:90
void __sortPatterns()
Sort the patterns and compute their respective costs.
Definition: gspan_tpl.h:225
The class for generic Hash Tables.
Definition: hashTable.h:679
bool operator()(gspan::Pattern *i, gspan::Pattern *j)
Returns true if i&#39;s cost is lesser than j&#39;s.
Definition: gspan_tpl.h:490
const NodeSet & neighbours(const NodeId id) const
returns the set of edges adjacent to a given node
LabelData * l
The labal data of this edge.
Representation of a setA Set is a structure that contains arbitrary elements.
Definition: set.h:165
gspan::DFSTree< GUM_SCALAR > __tree
The DFSTree used to discover new patters.
Definition: gspan.h:219
A DFSTree is used by gspan to sort lexicographically patterns discovered in an interface graph...
Definition: DFSTree.h:71
~PatternSort()
Destructor.
Definition: gspan_tpl.h:485
~GSpan()
Destructor.
Definition: gspan_tpl.h:364
UndiGraph & graph()
Returns the graph of this interface graph.
bool existsEdge(const Edge &edge) const
indicates whether a given edge exists
~LabelSort()
Destructor.
Definition: gspan_tpl.h:457
std::vector< gspan::Pattern *> __patterns
The vector of discovered patters, in decreasing order of interest.
Definition: gspan.h:225
gspan::InterfaceGraph< GUM_SCALAR > * __graph
The interface graph used by this class.
Definition: gspan.h:216
const NodeGraphPart & nodes() const
return *this as a NodeGraphPart
A PRMSystem is a container of PRMInstance and describe a relational skeleton.
Definition: PRMObject.h:229
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
bool exists(const Key &k) const
Check the existence of k in the sequence.
Definition: sequence_tpl.h:402
Private class used to sort LabelData using STL sort algorithms.
Definition: gspan.h:286
Set of pairs of elements with fast search for both elements.
Definition: bijection.h:1805
GSpan * gspan
A pointer over an instance of the GSpan class using this class.
Definition: gspan.h:348
gspan::DFSTree< GUM_SCALAR > & tree()
Returns the DFSTree used to discover new patters.
Definition: gspan_tpl.h:384
void rightmostPath(std::list< NodeId > &r_path) const
Fill r_path with the rightmost path of this Pattern. The list is supposed empty.
Definition: pattern_inl.h:153
gspan::InterfaceGraph< GUM_SCALAR > & interfaceGraph()
Returns the InterfaceGraph used by this.
Definition: gspan_tpl.h:424
LabelData * l_v
The label data of v.
NodeId id(const PRMInstance< GUM_SCALAR > &i) const
Returns the id of i in this interface graph.
void discoverPatterns()
This will methods will discover repeated patterns in the PRMSystem<GUM_SCALAR> assigned to this class...
Definition: gspan_tpl.h:36
This class represents a Probabilistic Relational PRMSystem<GUM_SCALAR>.
Definition: PRM.h:66
The base class for all undirected edges.
This class discovers pattern in a PRM<GUM_SCALAR>&#39;s PRMSystem<GUM_SCALAR> to speed up structured infe...
Definition: DFSTree.h:57
std::vector< gspan::Pattern *> & patterns()
Returns the Pattern mined by this class in a decreasing order of interest.
Definition: gspan_tpl.h:400
std::string toString()
Return a string representation of this.
void setMaxDFSDepth(Size depth)
Defines the maximal depth of the DFSTree used by this class to discover new patterns.
Definition: gspan_tpl.h:379
Base class for undirected graphs.
Definition: undiGraph.h:109
Class used to compute response times for benchmark purposesThis class represents a classic timer...
Definition: timer.h:51
Size Idx
Type for indexes.
Definition: types.h:53
void __sortNodesAndEdges()
Sort the nodes and edges of __graph.
Definition: gspan_tpl.h:59
GSpan(const PRM< GUM_SCALAR > &prm, const PRMSystem< GUM_SCALAR > &sys, gspan::SearchStrategy< GUM_SCALAR > *strategy=0)
Default constructor.
Definition: gspan_tpl.h:355
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:48
Private class used to sort Pattern using STL sort algorithms.
Definition: gspan.h:321
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
EdgeData< GUM_SCALAR > & edge(NodeId u, NodeId v)
Returns data about an edge.
Size getMaxDFSDepth() const
Returns the maximal depth of the DFSTree used to discover new patterns.
Definition: gspan_tpl.h:374
MatchedInstances & matches(const gspan::Pattern &p)
Returns a mapping between patterns and the sequence of instance in the interface graph matching them...
Definition: gspan_tpl.h:412
This contains all the information we want for a node in a DFSTree.
Definition: pattern.h:73
This is an abstract class used to tune search strategies in the gspan algorithm.
Definition: DFSTree.h:61
Size __cost_func(Size interface_size, Size frequency)
Returns the cost with respect to an interface size and its frequency. TODO replace this by a class to...
Definition: gspan_tpl.h:394
Size NodeId
Type for node ids.
Definition: graphElements.h:98
void insert(const Key &k)
Inserts a new element into the set.
Definition: set_tpl.h:613
bool operator()(gspan::LabelData *i, gspan::LabelData *j)
Returns true if i&#39;s cost is lesser than j&#39;s.
Definition: gspan_tpl.h:462
HashTable< gspan::Pattern *, MatchedInstances *> __matched_instances
Mapping between a pattern and the multiset of instances matched to it.
Definition: gspan.h:238
const Key & atPos(Idx i) const
Returns the object at the pos i.
Definition: sequence_tpl.h:500
void insert(PRMInstance< GUM_SCALAR > *u, PRMInstance< GUM_SCALAR > *v)
Add the pair (u,v) as a match for the current growth.
void insert(const Key &k)
Insert an element at the end of the sequence.
Definition: sequence_tpl.h:408