aGrUM
0.20.3
a C++ library for (probabilistic) graphical models
gspan_tpl.h
Go to the documentation of this file.
1
/**
2
*
3
* Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/**
23
* @file
24
* @brief Inline implementation of gspan.
25
*
26
* @author Lionel TORTI and Pierre-Henri WUILLEMIN(@LIP6)
27
*/
28
#
include
<
agrum
/
PRM
/
gspan
/
edgeGrowth
.
h
>
29
#
include
<
agrum
/
PRM
/
inference
/
gspan
.
h
>
30
31
namespace
gum
{
32
namespace
prm
{
33
34
template
<
typename
GUM_SCALAR >
35
void
GSpan<
GUM_SCALAR
>::
discoverPatterns
() {
36
Timer
t
;
37
_sortNodesAndEdges_
();
38
gspan
::
InterfaceGraph
<
GUM_SCALAR
>
graph
(*
_graph_
);
39
40
for
(
auto
root
=
_tree_
.
roots
().
begin
();
root
!=
_tree_
.
roots
().
end
(); ++
root
) {
41
if
(
_tree_
.
strategy
().
accept_root
(&(
_tree_
.
pattern
(*
root
)))) {
42
gspan
::
Pattern
&
p
=
_tree_
.
pattern
(*
root
);
43
_subgraph_mining_
(
graph
,
p
);
44
45
for
(
const
auto
node
:
_tree_
.
iso_graph
(
p
).
nodes
()) {
46
PRMInstance
<
GUM_SCALAR
>*
u
=
_tree_
.
iso_map
(
p
,
node
).
atPos
(0);
47
PRMInstance
<
GUM_SCALAR
>*
v
=
_tree_
.
iso_map
(
p
,
node
).
atPos
(1);
48
graph
.
graph
().
eraseEdge
(
Edge
(
graph
.
id
(
u
),
graph
.
id
(
v
)));
49
}
50
}
51
}
52
53
_sortPatterns_
();
54
}
55
56
template
<
typename
GUM_SCALAR
>
57
void
GSpan
<
GUM_SCALAR
>::
_sortNodesAndEdges_
() {
58
for
(
auto
iter
=
_graph_
->
labels
().
begin
();
iter
!=
_graph_
->
labels
().
end
(); ++
iter
) {
59
try
{
60
if
(
_graph_
->
nodes
(
iter
.
second
()).
size
() >= 2) {
61
_cost_
.
insert
(
62
iter
.
second
(),
63
_cost_func_
(
iter
.
second
()->
tree_width
,
_graph_
->
nodes
(
iter
.
second
()).
size
()));
64
_nodes_
.
push_back
(
const_cast
<
gspan
::
LabelData
* >(
iter
.
second
()));
65
}
66
}
catch
(
NotFound
&) {
67
// It's a label over edges
68
if
(
_isEdgeEligible_
(*(
_graph_
->
edges
(
iter
.
second
()).
begin
()))) {
69
_cost_
.
insert
(
70
iter
.
second
(),
71
_cost_func_
(
iter
.
second
()->
tree_width
,
_graph_
->
edges
(
iter
.
second
()).
size
()));
72
_edges_
.
push_back
(
iter
.
second
());
73
}
74
}
75
}
76
77
Bijection
<
Idx
,
gspan
::
LabelData
* >*
new_labels
=
new
Bijection
<
Idx
,
gspan
::
LabelData
* >();
78
GSpan
<
GUM_SCALAR
>::
LabelSort
my_sort
(
this
);
79
std
::
sort
(
_nodes_
.
begin
(),
_nodes_
.
end
(),
my_sort
);
80
std
::
sort
(
_edges_
.
begin
(),
_edges_
.
end
(),
my_sort
);
81
Size
idx
= 0;
82
83
for
(
auto
iter
=
_nodes_
.
begin
();
iter
!=
_nodes_
.
end
(); ++
iter
) {
84
(*
iter
)->
id
= ++
idx
;
85
new_labels
->
insert
(
idx
, *
iter
);
86
}
87
88
for
(
auto
iter
=
_edges_
.
begin
();
iter
!=
_edges_
.
end
(); ++
iter
) {
89
(*
iter
)->
id
= ++
idx
;
90
new_labels
->
insert
(
idx
, *
iter
);
91
_tree_
.
addRoot
(**
iter
);
92
}
93
94
delete
_graph_
->
_labels_
;
95
_graph_
->
_labels_
=
new_labels
;
96
}
97
98
template
<
typename
GUM_SCALAR
>
99
void
GSpan
<
GUM_SCALAR
>::
_subgraph_mining_
(
gspan
::
InterfaceGraph
<
GUM_SCALAR
>&
ig
,
100
gspan
::
Pattern
&
pat
) {
101
std
::
vector
<
gspan
::
Pattern
* >
stack
;
102
stack
.
push_back
(&
pat
);
103
// Pointers used in the following while
104
gspan
::
Pattern
*
p
=
nullptr
;
105
HashTable
<
std
::
string
,
gspan
::
EdgeGrowth
<
GUM_SCALAR
>* >*
edge_count
=
nullptr
;
106
gspan
::
EdgeGrowth
<
GUM_SCALAR
>*
edge_growth
=
nullptr
;
107
Sequence
<
PRMInstance
<
GUM_SCALAR
>* >*
seq
=
nullptr
;
108
PRMInstance
<
GUM_SCALAR
>*
current
=
nullptr
;
109
PRMInstance
<
GUM_SCALAR
>*
neighbor
=
nullptr
;
110
111
// Neighbor_id is the neighbor's id in the interface graph and
112
// neighbor_node
113
// is its id in the rightmost path in the case of a backward edge growth
114
NodeId
current_id
= 0;
115
NodeId
neighbor_node
= 0;
116
gspan
::
LabelData
*
neighbor_label
= 0;
117
118
typename
gspan
::
EdgeData
<
GUM_SCALAR
>*
edge_data
=
nullptr
;
119
120
size_t
idx
;
121
const
std
::
list
<
NodeId
>*
children
= 0;
122
123
while
(!
stack
.
empty
()) {
124
// Getting next pattern
125
p
=
stack
.
back
();
126
stack
.
pop_back
();
127
128
if
(
p
->
code
().
codes
.
size
() <
_depth_stop_
) {
129
// We need the rightmost path of p
130
std
::
list
<
NodeId
>
r_path
;
131
p
->
rightmostPath
(
r_path
);
132
// Mapping used to count each possible child of p, the position in the
133
// vector
134
// matches the one in the rightmost path
135
std
::
vector
<
HashTable
<
std
::
string
,
gspan
::
EdgeGrowth
<
GUM_SCALAR
>* >* >
count_vector
;
136
137
for
(
size_t
i
= 0;
i
<
r_path
.
size
(); ++
i
)
138
count_vector
.
push_back
(
139
new
HashTable
<
std
::
string
,
gspan
::
EdgeGrowth
<
GUM_SCALAR
>* >());
140
141
// For each subgraph represented by p, we look for a valid edge growth
142
// for
143
// each instance match of p in its isomorphism graph.
144
for
(
const
auto
iso_node
:
_tree_
.
iso_graph
(*
p
).
nodes
()) {
145
seq
= &(
_tree_
.
iso_map
(*
p
,
iso_node
));
146
idx
= 0;
147
148
for
(
const
auto
node
:
r_path
) {
149
edge_count
=
count_vector
[
idx
];
150
// Retrieving the equivalent instance in the current match
151
current
=
seq
->
atPos
((
Idx
)(
node
- 1));
152
current_id
=
ig
.
id
(
current
);
153
// Checking for edges not in p
154
155
for
(
const
auto
neighbor_id
:
ig
.
graph
().
neighbours
(
current_id
)) {
156
neighbor
=
ig
.
node
(
neighbor_id
).
n
;
157
158
// We want a forward edge in any case or a backward edge if
159
// current
160
// is the rightmost vertex
161
if
((!
seq
->
exists
(
neighbor
)) || (
node
==
r_path
.
back
())) {
162
// Things we need to know: the LabelData data of the neighbour
163
// and,
164
// if it's a backward edge, its node id in the rightmost path
165
edge_data
= &(
ig
.
edge
(
current_id
,
neighbor_id
));
166
neighbor_label
= (
neighbor
==
edge_data
->
u
) ?
edge_data
->
l_u
:
edge_data
->
l_v
;
167
neighbor_node
= (
seq
->
exists
(
neighbor
)) ?
seq
->
pos
(
neighbor
) + 1 : 0;
168
// Adding the edge growth to the edge_growth hashtable
169
gspan
::
EdgeGrowth
<
GUM_SCALAR
>
temp_growth
(
node
,
170
edge_data
->
l
,
171
neighbor_label
,
172
neighbor_node
);
173
174
try
{
175
edge_growth
= (*
edge_count
)[
temp_growth
.
toString
()];
176
edge_growth
->
insert
(
current
,
neighbor
);
177
}
catch
(
NotFound
&) {
178
edge_growth
=
new
gspan
::
EdgeGrowth
<
GUM_SCALAR
>(
node
,
179
edge_data
->
l
,
180
neighbor_label
,
181
neighbor_node
);
182
edge_growth
->
insert
(
current
,
neighbor
);
183
edge_count
->
insert
(
edge_growth
->
toString
(),
edge_growth
);
184
}
185
}
186
}
187
}
188
}
189
190
// Removing any infrequent child
191
for
(
size_t
node
= 0;
node
<
count_vector
.
size
(); ++
node
) {
192
edge_count
=
count_vector
[
node
];
193
194
for
(
const
auto
&
elt
: *
edge_count
) {
195
try
{
196
_tree_
.
growPattern
(*
p
, *
elt
.
second
, 2);
197
}
catch
(
OperationNotAllowed
&) {
198
// The child was not minimal or was not worth considering
199
}
200
201
delete
elt
.
second
;
202
}
203
204
delete
edge_count
;
205
}
206
207
// Calling _subgraph_mining_ over children of p
208
children
= &(
_tree_
.
children
(*
p
));
209
210
for
(
std
::
list
<
NodeId
>::
const_reverse_iterator
child
=
children
->
rbegin
();
211
child
!=
children
->
rend
();
212
++
child
)
213
stack
.
push_back
(&(
_tree_
.
pattern
(*
child
)));
214
}
215
}
216
}
217
218
template
<
typename
GUM_SCALAR
>
219
void
GSpan
<
GUM_SCALAR
>::
_sortPatterns_
() {
220
// First we put all the patterns in _patterns_.
221
std
::
vector
<
NodeId
>
stack
;
222
223
for
(
std
::
list
<
NodeId
>::
reverse_iterator
root
=
tree
().
roots
().
rbegin
();
224
root
!=
tree
().
roots
().
rend
();
225
++
root
)
226
stack
.
push_back
(*
root
);
227
228
NodeId
id
= 0;
229
std
::
list
<
NodeId
>*
children
=
nullptr
;
230
231
while
(!
stack
.
empty
()) {
232
id
=
stack
.
back
();
233
stack
.
pop_back
();
234
_patterns_
.
push_back
(&(
tree
().
pattern
(
id
)));
235
children
= &(
tree
().
children
(
tree
().
pattern
(
id
)));
236
237
for
(
std
::
list
<
NodeId
>::
reverse_iterator
child
=
children
->
rbegin
();
238
child
!=
children
->
rend
();
239
++
child
)
240
stack
.
push_back
(*
child
);
241
}
242
243
if
(!
_patterns_
.
empty
()) {
244
// We sort _patterns_.
245
GSpan
<
GUM_SCALAR
>::
PatternSort
my_sort
(
this
);
246
std
::
sort
(
_patterns_
.
begin
(),
_patterns_
.
end
(),
my_sort
);
247
// Now we need to find all the matches we can, using _patterns_.
248
// We start by the best Pattern and add it's maximal independent set to
249
// _chosen_
250
GSpan
<
GUM_SCALAR
>::
MatchedInstances
*
matches
251
=
new
GSpan
<
GUM_SCALAR
>::
MatchedInstances
();
252
Sequence
<
PRMInstance
<
GUM_SCALAR
>* >*
match
=
nullptr
;
253
254
for
(
const
auto
node
:
tree
().
max_indep_set
(*(
_patterns_
.
front
()))) {
255
match
= &(
tree
().
iso_map
(*(
_patterns_
.
front
()),
node
));
256
257
for
(
const
auto
i
: *
match
)
258
_chosen_
.
insert
(
i
);
259
260
matches
->
insert
(
match
);
261
}
262
263
_matched_instances_
.
insert
(
_patterns_
.
front
(),
matches
);
264
// Now we see what kind of pattern we can still use
265
bool
found
;
266
UndiGraph
*
iso_graph
=
nullptr
;
267
268
for
(
auto
patt
=
_patterns_
.
begin
() + 1;
patt
!=
_patterns_
.
end
(); ++
patt
) {
269
UndiGraph
reduced_iso_graph
;
270
std
::
vector
<
NodeId
>
degree_list
;
271
iso_graph
= &(
tree
().
iso_graph
(**
patt
));
272
273
for
(
const
auto
node
:
iso_graph
->
nodes
()) {
274
found
=
false
;
275
match
= &(
tree
().
iso_map
(**
patt
,
node
));
276
277
for
(
const
auto
i
: *
match
)
278
if
(
_chosen_
.
exists
(
i
)) {
279
found
=
true
;
280
break
;
281
}
282
283
if
(!
found
) {
284
// We add the pattern to the reduced isomorphism graph to compute
285
// the
286
// max independent set
287
// over the remaining matches
288
reduced_iso_graph
.
addNodeWithId
(
node
);
289
290
for
(
const
auto
iso
:
reduced_iso_graph
.
nodes
())
291
if
(
iso_graph
->
existsEdge
(
node
,
iso
))
reduced_iso_graph
.
addEdge
(
node
,
iso
);
292
293
degree_list
.
push_back
(
node
);
294
}
295
}
296
297
// We create a new set to hold all the chosen matches of patt
298
matches
=
new
GSpan
<
GUM_SCALAR
>::
MatchedInstances
();
299
// We can compute the max independent set and the matches belonging to
300
// it
301
typename
gspan
::
DFSTree
<
GUM_SCALAR
>::
NeighborDegreeSort
my_sort
(
reduced_iso_graph
);
302
std
::
sort
(
degree_list
.
begin
(),
degree_list
.
end
(),
my_sort
);
303
Set
<
NodeId
>
removed
;
304
305
for
(
const
auto
node
:
degree_list
)
306
if
(!
removed
.
exists
(
node
)) {
307
// First we update removed to follow the max independent set
308
// algorithm
309
removed
.
insert
(
node
);
310
311
for
(
const
auto
neighbor
:
reduced_iso_graph
.
neighbours
(
node
))
312
removed
.
insert
(
neighbor
);
313
314
// Second we update match and matches to keep track of the current
315
// match
316
match
= &(
tree
().
iso_map
(**
patt
,
node
));
317
matches
->
insert
(
match
);
318
319
for
(
const
auto
elt
: *
match
)
320
_chosen_
.
insert
(
elt
);
321
}
322
323
_matched_instances_
.
insert
(*
patt
,
matches
);
324
}
325
326
// // We remove patterns with 0 matches
327
std
::
vector
<
size_t
>
trash
;
328
329
for
(
size_t
idx
= 0;
idx
<
_patterns_
.
size
(); ++
idx
)
330
if
(
_matched_instances_
[
_patterns_
[
idx
]]->
size
() < 2)
trash
.
push_back
(
idx
);
331
332
while
(
trash
.
size
()) {
333
delete
_matched_instances_
[
_patterns_
[
trash
.
back
()]];
334
_matched_instances_
.
erase
(
_patterns_
[
trash
.
back
()]);
335
// delete _patterns_[trash.back()];
336
_patterns_
[
trash
.
back
()] =
_patterns_
.
back
();
337
_patterns_
.
pop_back
();
338
trash
.
pop_back
();
339
}
340
}
341
}
342
343
template
<
typename
GUM_SCALAR
>
344
INLINE
GSpan
<
GUM_SCALAR
>::
GSpan
(
const
PRM
<
GUM_SCALAR
>&
prm
,
345
const
PRMSystem
<
GUM_SCALAR
>&
sys
,
346
gspan
::
SearchStrategy
<
GUM_SCALAR
>*
strategy
) :
347
_graph_
(
new
gspan
::
InterfaceGraph
<
GUM_SCALAR
>(
sys
)),
348
_tree_
(*
_graph_
,
strategy
),
_depth_stop_
(
INT_MAX
) {
349
GUM_CONSTRUCTOR
(
GSpan
);
350
}
351
352
template
<
typename
GUM_SCALAR
>
353
INLINE
GSpan
<
GUM_SCALAR
>::~
GSpan
() {
354
GUM_DESTRUCTOR
(
GSpan
);
355
356
for
(
const
auto
&
elt
:
_matched_instances_
)
357
delete
elt
.
second
;
358
359
delete
_graph_
;
360
}
361
362
template
<
typename
GUM_SCALAR
>
363
INLINE
Size
GSpan
<
GUM_SCALAR
>::
getMaxDFSDepth
()
const
{
364
return
_depth_stop_
;
365
}
366
367
template
<
typename
GUM_SCALAR
>
368
INLINE
void
GSpan
<
GUM_SCALAR
>::
setMaxDFSDepth
(
Size
depth
) {
369
_depth_stop_
=
depth
;
370
}
371
372
template
<
typename
GUM_SCALAR
>
373
INLINE
gspan
::
DFSTree
<
GUM_SCALAR
>&
GSpan
<
GUM_SCALAR
>::
tree
() {
374
return
_tree_
;
375
}
376
377
template
<
typename
GUM_SCALAR
>
378
INLINE
const
gspan
::
DFSTree
<
GUM_SCALAR
>&
GSpan
<
GUM_SCALAR
>::
tree
()
const
{
379
return
_tree_
;
380
}
381
382
template
<
typename
GUM_SCALAR
>
383
INLINE
Idx
GSpan
<
GUM_SCALAR
>::
_cost_func_
(
Size
interface_size
,
Size
frequency
) {
384
return
Idx
(
interface_size
*
frequency
);
385
}
386
387
template
<
typename
GUM_SCALAR
>
388
INLINE
std
::
vector
<
gspan
::
Pattern
* >&
GSpan
<
GUM_SCALAR
>::
patterns
() {
389
return
_patterns_
;
390
}
391
392
template
<
typename
GUM_SCALAR
>
393
INLINE
const
std
::
vector
<
gspan
::
Pattern
* >&
GSpan
<
GUM_SCALAR
>::
patterns
()
const
{
394
return
_patterns_
;
395
}
396
397
template
<
typename
GUM_SCALAR
>
398
INLINE
typename
GSpan
<
GUM_SCALAR
>::
MatchedInstances
&
399
GSpan
<
GUM_SCALAR
>::
matches
(
const
gspan
::
Pattern
&
p
) {
400
return
*(
_matched_instances_
[
const_cast
<
gspan
::
Pattern
* >(&
p
)]);
401
}
402
403
template
<
typename
GUM_SCALAR
>
404
INLINE
const
typename
GSpan
<
GUM_SCALAR
>::
MatchedInstances
&
405
GSpan
<
GUM_SCALAR
>::
matches
(
const
gspan
::
Pattern
&
p
)
const
{
406
return
*(
_matched_instances_
[
const_cast
<
gspan
::
Pattern
* >(&
p
)]);
407
}
408
409
template
<
typename
GUM_SCALAR
>
410
INLINE
gspan
::
InterfaceGraph
<
GUM_SCALAR
>&
GSpan
<
GUM_SCALAR
>::
interfaceGraph
() {
411
return
*
_graph_
;
412
}
413
414
template
<
typename
GUM_SCALAR
>
415
INLINE
const
gspan
::
InterfaceGraph
<
GUM_SCALAR
>&
GSpan
<
GUM_SCALAR
>::
interfaceGraph
()
const
{
416
return
*
_graph_
;
417
}
418
419
template
<
typename
GUM_SCALAR
>
420
INLINE
bool
GSpan
<
GUM_SCALAR
>::
_isEdgeEligible_
(
gspan
::
EdgeData
<
GUM_SCALAR
>*
e
) {
421
return
(
_graph_
->
edges
(
e
->
l
).
size
() >= 2) && (
_graph_
->
nodes
(
e
->
l_u
).
size
() >= 2)
422
&& (
_graph_
->
nodes
(
e
->
l_v
).
size
() >= 2);
423
}
424
425
// LalbeSort
426
427
template
<
typename
GUM_SCALAR
>
428
INLINE
GSpan
<
GUM_SCALAR
>::
LabelSort
::
LabelSort
(
GSpan
*
my_gspan
) :
gspan
(
my_gspan
) {
429
GUM_CONSTRUCTOR
(
GSpan
<
GUM_SCALAR
>::
LabelSort
);
430
}
431
432
template
<
typename
GUM_SCALAR
>
433
INLINE
GSpan
<
GUM_SCALAR
>::
LabelSort
::
LabelSort
(
const
LabelSort
&
source
) :
434
gspan
(
source
.
gspan
) {
435
GUM_CONS_CPY
(
GSpan
<
GUM_SCALAR
>::
LabelSort
);
436
}
437
438
template
<
typename
GUM_SCALAR
>
439
INLINE
GSpan
<
GUM_SCALAR
>::
LabelSort
::~
LabelSort
() {
440
GUM_DESTRUCTOR
(
GSpan
<
GUM_SCALAR
>::
LabelSort
);
441
}
442
443
template
<
typename
GUM_SCALAR
>
444
INLINE
bool
GSpan
<
GUM_SCALAR
>::
LabelSort
::
operator
()(
gspan
::
LabelData
*
i
,
445
gspan
::
LabelData
*
j
) {
446
// We want a descending order
447
// return gspan-> _cost_[i] > gspan-> _cost_[j];
448
return
gspan
->
_tree_
.
strategy
()(
i
,
j
);
449
}
450
451
// PatternSort
452
453
template
<
typename
GUM_SCALAR
>
454
INLINE
GSpan
<
GUM_SCALAR
>::
PatternSort
::
PatternSort
(
GSpan
*
my_gspan
) :
gspan
(
my_gspan
) {
455
GUM_CONSTRUCTOR
(
GSpan
<
GUM_SCALAR
>::
PatternSort
);
456
}
457
458
template
<
typename
GUM_SCALAR
>
459
INLINE
GSpan
<
GUM_SCALAR
>::
PatternSort
::
PatternSort
(
const
PatternSort
&
source
) :
460
gspan
(
source
.
gspan
) {
461
GUM_CONS_CPY
(
GSpan
<
GUM_SCALAR
>::
PatternSort
);
462
}
463
464
template
<
typename
GUM_SCALAR
>
465
INLINE
GSpan
<
GUM_SCALAR
>::
PatternSort
::~
PatternSort
() {
466
GUM_DESTRUCTOR
(
GSpan
<
GUM_SCALAR
>::
PatternSort
);
467
}
468
469
template
<
typename
GUM_SCALAR
>
470
INLINE
bool
GSpan
<
GUM_SCALAR
>::
PatternSort
::
operator
()(
gspan
::
Pattern
*
i
,
gspan
::
Pattern
*
j
) {
471
// We want a descending order
472
return
gspan
->
tree
().
strategy
().
operator
()(
i
,
j
);
473
}
474
475
}
/* namespace prm */
476
}
/* namespace gum */
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:643
gum::prm::ParamScopeData::ParamScopeData
ParamScopeData(const std::string &s, const PRMReferenceSlot< GUM_SCALAR > &ref, Idx d)
Definition:
PRMClass_tpl.h:1032