aGrUM
0.20.2
a C++ library for (probabilistic) graphical models
gspan_tpl.h
Go to the documentation of this file.
1
/**
2
*
3
* Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/**
23
* @file
24
* @brief Inline implementation of gspan.
25
*
26
* @author Lionel TORTI and Pierre-Henri WUILLEMIN(@LIP6)
27
*/
28
#
include
<
agrum
/
PRM
/
gspan
/
edgeGrowth
.
h
>
29
#
include
<
agrum
/
PRM
/
inference
/
gspan
.
h
>
30
31
namespace
gum
{
32
namespace
prm
{
33
34
template
<
typename
GUM_SCALAR >
35
void
GSpan<
GUM_SCALAR
>::
discoverPatterns
() {
36
Timer
t
;
37
sortNodesAndEdges__
();
38
gspan
::
InterfaceGraph
<
GUM_SCALAR
>
graph
(*
graph__
);
39
40
for
(
auto
root
=
tree__
.
roots
().
begin
();
root
!=
tree__
.
roots
().
end
();
41
++
root
) {
42
if
(
tree__
.
strategy
().
accept_root
(&(
tree__
.
pattern
(*
root
)))) {
43
gspan
::
Pattern
&
p
=
tree__
.
pattern
(*
root
);
44
subgraph_mining__
(
graph
,
p
);
45
46
for
(
const
auto
node
:
tree__
.
iso_graph
(
p
).
nodes
()) {
47
PRMInstance
<
GUM_SCALAR
>*
u
=
tree__
.
iso_map
(
p
,
node
).
atPos
(0);
48
PRMInstance
<
GUM_SCALAR
>*
v
=
tree__
.
iso_map
(
p
,
node
).
atPos
(1);
49
graph
.
graph
().
eraseEdge
(
Edge
(
graph
.
id
(
u
),
graph
.
id
(
v
)));
50
}
51
}
52
}
53
54
sortPatterns__
();
55
}
56
57
template
<
typename
GUM_SCALAR
>
58
void
GSpan
<
GUM_SCALAR
>::
sortNodesAndEdges__
() {
59
for
(
auto
iter
=
graph__
->
labels
().
begin
();
iter
!=
graph__
->
labels
().
end
();
60
++
iter
) {
61
try
{
62
if
(
graph__
->
nodes
(
iter
.
second
()).
size
() >= 2) {
63
cost__
.
insert
(
iter
.
second
(),
64
cost_func__
(
iter
.
second
()->
tree_width
,
65
graph__
->
nodes
(
iter
.
second
()).
size
()));
66
nodes__
.
push_back
(
const_cast
<
gspan
::
LabelData
* >(
iter
.
second
()));
67
}
68
}
catch
(
NotFound
&) {
69
// It's a label over edges
70
if
(
isEdgeEligible__
(*(
graph__
->
edges
(
iter
.
second
()).
begin
()))) {
71
cost__
.
insert
(
iter
.
second
(),
72
cost_func__
(
iter
.
second
()->
tree_width
,
73
graph__
->
edges
(
iter
.
second
()).
size
()));
74
edges__
.
push_back
(
iter
.
second
());
75
}
76
}
77
}
78
79
Bijection
<
Idx
,
gspan
::
LabelData
* >*
new_labels
80
=
new
Bijection
<
Idx
,
gspan
::
LabelData
* >();
81
GSpan
<
GUM_SCALAR
>::
LabelSort
my_sort
(
this
);
82
std
::
sort
(
nodes__
.
begin
(),
nodes__
.
end
(),
my_sort
);
83
std
::
sort
(
edges__
.
begin
(),
edges__
.
end
(),
my_sort
);
84
Size
idx
= 0;
85
86
for
(
auto
iter
=
nodes__
.
begin
();
iter
!=
nodes__
.
end
(); ++
iter
) {
87
(*
iter
)->
id
= ++
idx
;
88
new_labels
->
insert
(
idx
, *
iter
);
89
}
90
91
for
(
auto
iter
=
edges__
.
begin
();
iter
!=
edges__
.
end
(); ++
iter
) {
92
(*
iter
)->
id
= ++
idx
;
93
new_labels
->
insert
(
idx
, *
iter
);
94
tree__
.
addRoot
(**
iter
);
95
}
96
97
delete
graph__
->
labels__
;
98
graph__
->
labels__
=
new_labels
;
99
}
100
101
template
<
typename
GUM_SCALAR
>
102
void
GSpan
<
GUM_SCALAR
>::
subgraph_mining__
(
103
gspan
::
InterfaceGraph
<
GUM_SCALAR
>&
ig
,
104
gspan
::
Pattern
&
pat
) {
105
std
::
vector
<
gspan
::
Pattern
* >
stack
;
106
stack
.
push_back
(&
pat
);
107
// Pointers used in the following while
108
gspan
::
Pattern
*
p
=
nullptr
;
109
HashTable
<
std
::
string
,
gspan
::
EdgeGrowth
<
GUM_SCALAR
>* >*
edge_count
110
=
nullptr
;
111
gspan
::
EdgeGrowth
<
GUM_SCALAR
>*
edge_growth
=
nullptr
;
112
Sequence
<
PRMInstance
<
GUM_SCALAR
>* >*
seq
=
nullptr
;
113
PRMInstance
<
GUM_SCALAR
>*
current
=
nullptr
;
114
PRMInstance
<
GUM_SCALAR
>*
neighbor
=
nullptr
;
115
116
// Neighbor_id is the neighbor's id in the interface graph and
117
// neighbor_node
118
// is its id in the rightmost path in the case of a backward edge growth
119
NodeId
current_id
= 0;
120
NodeId
neighbor_node
= 0;
121
gspan
::
LabelData
*
neighbor_label
= 0;
122
123
typename
gspan
::
EdgeData
<
GUM_SCALAR
>*
edge_data
=
nullptr
;
124
125
size_t
idx
;
126
const
std
::
list
<
NodeId
>*
children
= 0;
127
128
while
(!
stack
.
empty
()) {
129
// Getting next pattern
130
p
=
stack
.
back
();
131
stack
.
pop_back
();
132
133
if
(
p
->
code
().
codes
.
size
() <
depth_stop__
) {
134
// We need the rightmost path of p
135
std
::
list
<
NodeId
>
r_path
;
136
p
->
rightmostPath
(
r_path
);
137
// Mapping used to count each possible child of p, the position in the
138
// vector
139
// matches the one in the rightmost path
140
std
::
vector
<
141
HashTable
<
std
::
string
,
gspan
::
EdgeGrowth
<
GUM_SCALAR
>* >* >
142
count_vector
;
143
144
for
(
size_t
i
= 0;
i
<
r_path
.
size
(); ++
i
)
145
count_vector
.
push_back
(
146
new
HashTable
<
std
::
string
,
gspan
::
EdgeGrowth
<
GUM_SCALAR
>* >());
147
148
// For each subgraph represented by p, we look for a valid edge growth
149
// for
150
// each instance match of p in its isomorphism graph.
151
for
(
const
auto
iso_node
:
tree__
.
iso_graph
(*
p
).
nodes
()) {
152
seq
= &(
tree__
.
iso_map
(*
p
,
iso_node
));
153
idx
= 0;
154
155
for
(
const
auto
node
:
r_path
) {
156
edge_count
=
count_vector
[
idx
];
157
// Retrieving the equivalent instance in the current match
158
current
=
seq
->
atPos
((
Idx
)(
node
- 1));
159
current_id
=
ig
.
id
(
current
);
160
// Checking for edges not in p
161
162
for
(
const
auto
neighbor_id
:
ig
.
graph
().
neighbours
(
current_id
)) {
163
neighbor
=
ig
.
node
(
neighbor_id
).
n
;
164
165
// We want a forward edge in any case or a backward edge if
166
// current
167
// is the rightmost vertex
168
if
((!
seq
->
exists
(
neighbor
)) || (
node
==
r_path
.
back
())) {
169
// Things we need to know: the LabelData data of the neighbour
170
// and,
171
// if it's a backward edge, its node id in the rightmost path
172
edge_data
= &(
ig
.
edge
(
current_id
,
neighbor_id
));
173
neighbor_label
= (
neighbor
==
edge_data
->
u
) ?
edge_data
->
l_u
174
:
edge_data
->
l_v
;
175
neighbor_node
176
= (
seq
->
exists
(
neighbor
)) ?
seq
->
pos
(
neighbor
) + 1 : 0;
177
// Adding the edge growth to the edge_growth hashtable
178
gspan
::
EdgeGrowth
<
GUM_SCALAR
>
temp_growth
(
node
,
179
edge_data
->
l
,
180
neighbor_label
,
181
neighbor_node
);
182
183
try
{
184
edge_growth
= (*
edge_count
)[
temp_growth
.
toString
()];
185
edge_growth
->
insert
(
current
,
neighbor
);
186
}
catch
(
NotFound
&) {
187
edge_growth
188
=
new
gspan
::
EdgeGrowth
<
GUM_SCALAR
>(
node
,
189
edge_data
->
l
,
190
neighbor_label
,
191
neighbor_node
);
192
edge_growth
->
insert
(
current
,
neighbor
);
193
edge_count
->
insert
(
edge_growth
->
toString
(),
edge_growth
);
194
}
195
}
196
}
197
}
198
}
199
200
// Removing any infrequent child
201
for
(
size_t
node
= 0;
node
<
count_vector
.
size
(); ++
node
) {
202
edge_count
=
count_vector
[
node
];
203
204
for
(
const
auto
&
elt
: *
edge_count
) {
205
try
{
206
tree__
.
growPattern
(*
p
, *
elt
.
second
, 2);
207
}
catch
(
OperationNotAllowed
&) {
208
// The child was not minimal or was not worth considering
209
}
210
211
delete
elt
.
second
;
212
}
213
214
delete
edge_count
;
215
}
216
217
// Calling subgraph_mining__ over children of p
218
children
= &(
tree__
.
children
(*
p
));
219
220
for
(
std
::
list
<
NodeId
>::
const_reverse_iterator
child
221
=
children
->
rbegin
();
222
child
!=
children
->
rend
();
223
++
child
)
224
stack
.
push_back
(&(
tree__
.
pattern
(*
child
)));
225
}
226
}
227
}
228
229
template
<
typename
GUM_SCALAR
>
230
void
GSpan
<
GUM_SCALAR
>::
sortPatterns__
() {
231
// First we put all the patterns in patterns__.
232
std
::
vector
<
NodeId
>
stack
;
233
234
for
(
std
::
list
<
NodeId
>::
reverse_iterator
root
=
tree
().
roots
().
rbegin
();
235
root
!=
tree
().
roots
().
rend
();
236
++
root
)
237
stack
.
push_back
(*
root
);
238
239
NodeId
id
= 0;
240
std
::
list
<
NodeId
>*
children
=
nullptr
;
241
242
while
(!
stack
.
empty
()) {
243
id
=
stack
.
back
();
244
stack
.
pop_back
();
245
patterns__
.
push_back
(&(
tree
().
pattern
(
id
)));
246
children
= &(
tree
().
children
(
tree
().
pattern
(
id
)));
247
248
for
(
std
::
list
<
NodeId
>::
reverse_iterator
child
=
children
->
rbegin
();
249
child
!=
children
->
rend
();
250
++
child
)
251
stack
.
push_back
(*
child
);
252
}
253
254
if
(!
patterns__
.
empty
()) {
255
// We sort patterns__.
256
GSpan
<
GUM_SCALAR
>::
PatternSort
my_sort
(
this
);
257
std
::
sort
(
patterns__
.
begin
(),
patterns__
.
end
(),
my_sort
);
258
// Now we need to find all the matches we can, using patterns__.
259
// We start by the best Pattern and add it's maximal independent set to
260
// chosen__
261
GSpan
<
GUM_SCALAR
>::
MatchedInstances
*
matches
262
=
new
GSpan
<
GUM_SCALAR
>::
MatchedInstances
();
263
Sequence
<
PRMInstance
<
GUM_SCALAR
>* >*
match
=
nullptr
;
264
265
for
(
const
auto
node
:
tree
().
max_indep_set
(*(
patterns__
.
front
()))) {
266
match
= &(
tree
().
iso_map
(*(
patterns__
.
front
()),
node
));
267
268
for
(
const
auto
i
: *
match
)
269
chosen__
.
insert
(
i
);
270
271
matches
->
insert
(
match
);
272
}
273
274
matched_instances__
.
insert
(
patterns__
.
front
(),
matches
);
275
// Now we see what kind of pattern we can still use
276
bool
found
;
277
UndiGraph
*
iso_graph
=
nullptr
;
278
279
for
(
auto
patt
=
patterns__
.
begin
() + 1;
patt
!=
patterns__
.
end
();
280
++
patt
) {
281
UndiGraph
reduced_iso_graph
;
282
std
::
vector
<
NodeId
>
degree_list
;
283
iso_graph
= &(
tree
().
iso_graph
(**
patt
));
284
285
for
(
const
auto
node
:
iso_graph
->
nodes
()) {
286
found
=
false
;
287
match
= &(
tree
().
iso_map
(**
patt
,
node
));
288
289
for
(
const
auto
i
: *
match
)
290
if
(
chosen__
.
exists
(
i
)) {
291
found
=
true
;
292
break
;
293
}
294
295
if
(!
found
) {
296
// We add the pattern to the reduced isomorphism graph to compute
297
// the
298
// max independent set
299
// over the remaining matches
300
reduced_iso_graph
.
addNodeWithId
(
node
);
301
302
for
(
const
auto
iso
:
reduced_iso_graph
.
nodes
())
303
if
(
iso_graph
->
existsEdge
(
node
,
iso
))
304
reduced_iso_graph
.
addEdge
(
node
,
iso
);
305
306
degree_list
.
push_back
(
node
);
307
}
308
}
309
310
// We create a new set to hold all the chosen matches of patt
311
matches
=
new
GSpan
<
GUM_SCALAR
>::
MatchedInstances
();
312
// We can compute the max independent set and the matches belonging to
313
// it
314
typename
gspan
::
DFSTree
<
GUM_SCALAR
>::
NeighborDegreeSort
my_sort
(
315
reduced_iso_graph
);
316
std
::
sort
(
degree_list
.
begin
(),
degree_list
.
end
(),
my_sort
);
317
Set
<
NodeId
>
removed
;
318
319
for
(
const
auto
node
:
degree_list
)
320
if
(!
removed
.
exists
(
node
)) {
321
// First we update removed to follow the max independent set
322
// algorithm
323
removed
.
insert
(
node
);
324
325
for
(
const
auto
neighbor
:
reduced_iso_graph
.
neighbours
(
node
))
326
removed
.
insert
(
neighbor
);
327
328
// Second we update match and matches to keep track of the current
329
// match
330
match
= &(
tree
().
iso_map
(**
patt
,
node
));
331
matches
->
insert
(
match
);
332
333
for
(
const
auto
elt
: *
match
)
334
chosen__
.
insert
(
elt
);
335
}
336
337
matched_instances__
.
insert
(*
patt
,
matches
);
338
}
339
340
// // We remove patterns with 0 matches
341
std
::
vector
<
size_t
>
trash
;
342
343
for
(
size_t
idx
= 0;
idx
<
patterns__
.
size
(); ++
idx
)
344
if
(
matched_instances__
[
patterns__
[
idx
]]->
size
() < 2)
345
trash
.
push_back
(
idx
);
346
347
while
(
trash
.
size
()) {
348
delete
matched_instances__
[
patterns__
[
trash
.
back
()]];
349
matched_instances__
.
erase
(
patterns__
[
trash
.
back
()]);
350
// delete patterns__[trash.back()];
351
patterns__
[
trash
.
back
()] =
patterns__
.
back
();
352
patterns__
.
pop_back
();
353
trash
.
pop_back
();
354
}
355
}
356
}
357
358
template
<
typename
GUM_SCALAR
>
359
INLINE
360
GSpan
<
GUM_SCALAR
>::
GSpan
(
const
PRM
<
GUM_SCALAR
>&
prm
,
361
const
PRMSystem
<
GUM_SCALAR
>&
sys
,
362
gspan
::
SearchStrategy
<
GUM_SCALAR
>*
strategy
) :
363
graph__
(
new
gspan
::
InterfaceGraph
<
GUM_SCALAR
>(
sys
)),
364
tree__
(*
graph__
,
strategy
),
depth_stop__
(
INT_MAX
) {
365
GUM_CONSTRUCTOR
(
GSpan
);
366
}
367
368
template
<
typename
GUM_SCALAR
>
369
INLINE
GSpan
<
GUM_SCALAR
>::~
GSpan
() {
370
GUM_DESTRUCTOR
(
GSpan
);
371
372
for
(
const
auto
&
elt
:
matched_instances__
)
373
delete
elt
.
second
;
374
375
delete
graph__
;
376
}
377
378
template
<
typename
GUM_SCALAR
>
379
INLINE
Size
GSpan
<
GUM_SCALAR
>::
getMaxDFSDepth
()
const
{
380
return
depth_stop__
;
381
}
382
383
template
<
typename
GUM_SCALAR
>
384
INLINE
void
GSpan
<
GUM_SCALAR
>::
setMaxDFSDepth
(
Size
depth
) {
385
depth_stop__
=
depth
;
386
}
387
388
template
<
typename
GUM_SCALAR
>
389
INLINE
gspan
::
DFSTree
<
GUM_SCALAR
>&
GSpan
<
GUM_SCALAR
>::
tree
() {
390
return
tree__
;
391
}
392
393
template
<
typename
GUM_SCALAR
>
394
INLINE
const
gspan
::
DFSTree
<
GUM_SCALAR
>&
GSpan
<
GUM_SCALAR
>::
tree
()
const
{
395
return
tree__
;
396
}
397
398
template
<
typename
GUM_SCALAR
>
399
INLINE
Idx
GSpan
<
GUM_SCALAR
>::
cost_func__
(
Size
interface_size
,
400
Size
frequency
) {
401
return
Idx
(
interface_size
*
frequency
);
402
}
403
404
template
<
typename
GUM_SCALAR
>
405
INLINE
std
::
vector
<
gspan
::
Pattern
* >&
GSpan
<
GUM_SCALAR
>::
patterns
() {
406
return
patterns__
;
407
}
408
409
template
<
typename
GUM_SCALAR
>
410
INLINE
const
std
::
vector
<
gspan
::
Pattern
* >&
411
GSpan
<
GUM_SCALAR
>::
patterns
()
const
{
412
return
patterns__
;
413
}
414
415
template
<
typename
GUM_SCALAR
>
416
INLINE
typename
GSpan
<
GUM_SCALAR
>::
MatchedInstances
&
417
GSpan
<
GUM_SCALAR
>::
matches
(
const
gspan
::
Pattern
&
p
) {
418
return
*(
matched_instances__
[
const_cast
<
gspan
::
Pattern
* >(&
p
)]);
419
}
420
421
template
<
typename
GUM_SCALAR
>
422
INLINE
const
typename
GSpan
<
GUM_SCALAR
>::
MatchedInstances
&
423
GSpan
<
GUM_SCALAR
>::
matches
(
const
gspan
::
Pattern
&
p
)
const
{
424
return
*(
matched_instances__
[
const_cast
<
gspan
::
Pattern
* >(&
p
)]);
425
}
426
427
template
<
typename
GUM_SCALAR
>
428
INLINE
gspan
::
InterfaceGraph
<
GUM_SCALAR
>&
429
GSpan
<
GUM_SCALAR
>::
interfaceGraph
() {
430
return
*
graph__
;
431
}
432
433
template
<
typename
GUM_SCALAR
>
434
INLINE
const
gspan
::
InterfaceGraph
<
GUM_SCALAR
>&
435
GSpan
<
GUM_SCALAR
>::
interfaceGraph
()
const
{
436
return
*
graph__
;
437
}
438
439
template
<
typename
GUM_SCALAR
>
440
INLINE
bool
441
GSpan
<
GUM_SCALAR
>::
isEdgeEligible__
(
gspan
::
EdgeData
<
GUM_SCALAR
>*
e
) {
442
return
(
graph__
->
edges
(
e
->
l
).
size
() >= 2)
443
&& (
graph__
->
nodes
(
e
->
l_u
).
size
() >= 2)
444
&& (
graph__
->
nodes
(
e
->
l_v
).
size
() >= 2);
445
}
446
447
// LalbeSort
448
449
template
<
typename
GUM_SCALAR
>
450
INLINE
GSpan
<
GUM_SCALAR
>::
LabelSort
::
LabelSort
(
GSpan
*
my_gspan
) :
451
gspan
(
my_gspan
) {
452
GUM_CONSTRUCTOR
(
GSpan
<
GUM_SCALAR
>::
LabelSort
);
453
}
454
455
template
<
typename
GUM_SCALAR
>
456
INLINE
GSpan
<
GUM_SCALAR
>::
LabelSort
::
LabelSort
(
const
LabelSort
&
source
) :
457
gspan
(
source
.
gspan
) {
458
GUM_CONS_CPY
(
GSpan
<
GUM_SCALAR
>::
LabelSort
);
459
}
460
461
template
<
typename
GUM_SCALAR
>
462
INLINE
GSpan
<
GUM_SCALAR
>::
LabelSort
::~
LabelSort
() {
463
GUM_DESTRUCTOR
(
GSpan
<
GUM_SCALAR
>::
LabelSort
);
464
}
465
466
template
<
typename
GUM_SCALAR
>
467
INLINE
bool
GSpan
<
GUM_SCALAR
>::
LabelSort
::
operator
()(
gspan
::
LabelData
*
i
,
468
gspan
::
LabelData
*
j
) {
469
// We want a descending order
470
// return gspan->cost__[i] > gspan->cost__[j];
471
return
gspan
->
tree__
.
strategy
()(
i
,
j
);
472
}
473
474
// PatternSort
475
476
template
<
typename
GUM_SCALAR
>
477
INLINE
GSpan
<
GUM_SCALAR
>::
PatternSort
::
PatternSort
(
GSpan
*
my_gspan
) :
478
gspan
(
my_gspan
) {
479
GUM_CONSTRUCTOR
(
GSpan
<
GUM_SCALAR
>::
PatternSort
);
480
}
481
482
template
<
typename
GUM_SCALAR
>
483
INLINE
484
GSpan
<
GUM_SCALAR
>::
PatternSort
::
PatternSort
(
const
PatternSort
&
source
) :
485
gspan
(
source
.
gspan
) {
486
GUM_CONS_CPY
(
GSpan
<
GUM_SCALAR
>::
PatternSort
);
487
}
488
489
template
<
typename
GUM_SCALAR
>
490
INLINE
GSpan
<
GUM_SCALAR
>::
PatternSort
::~
PatternSort
() {
491
GUM_DESTRUCTOR
(
GSpan
<
GUM_SCALAR
>::
PatternSort
);
492
}
493
494
template
<
typename
GUM_SCALAR
>
495
INLINE
bool
GSpan
<
GUM_SCALAR
>::
PatternSort
::
operator
()(
gspan
::
Pattern
*
i
,
496
gspan
::
Pattern
*
j
) {
497
// We want a descending order
498
return
gspan
->
tree
().
strategy
().
operator
()(
i
,
j
);
499
}
500
501
}
/* namespace prm */
502
}
/* namespace gum */
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:669
gum::prm::ParamScopeData::ParamScopeData
ParamScopeData(const std::string &s, const PRMReferenceSlot< GUM_SCALAR > &ref, Idx d)
Definition:
PRMClass_tpl.h:1101