aGrUM
0.20.3
a C++ library for (probabilistic) graphical models
barrenNodesFinder.cpp
Go to the documentation of this file.
1
/**
2
*
3
* Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/**
23
* @file
24
* @brief Detect barren nodes for inference in Bayesian networks
25
*/
26
#
include
<
limits
>
27
28
#
include
<
agrum
/
BN
/
algorithms
/
barrenNodesFinder
.
h
>
29
30
#
ifdef
GUM_NO_INLINE
31
#
include
<
agrum
/
BN
/
algorithms
/
barrenNodesFinder_inl
.
h
>
32
#
endif
// GUM_NO_INLINE
33
34
namespace
gum
{
35
36
/// returns the set of barren nodes in the messages sent in a junction tree
37
ArcProperty
<
NodeSet
>
BarrenNodesFinder
::
barrenNodes
(
const
CliqueGraph
&
junction_tree
) {
38
// assign a mark to all the nodes
39
// and mark all the observed nodes and their ancestors as non-barren
40
NodeProperty
<
Size
>
mark
(
_dag_
->
size
());
41
{
42
for
(
const
auto
node
: *
_dag_
)
43
mark
.
insert
(
node
, 0);
// for the moment, 0 = possibly barren
44
45
// mark all the observed nodes and their ancestors as non barren
46
// std::numeric_limits<unsigned int>::max () will be necessarily non
47
// barren
48
// later on
49
Sequence
<
NodeId
>
observed_anc
(
_dag_
->
size
());
50
const
Size
non_barren
=
std
::
numeric_limits
<
Size
>::
max
();
51
for
(
const
auto
node
: *
_observed_nodes_
)
52
observed_anc
.
insert
(
node
);
53
for
(
Idx
i
= 0;
i
<
observed_anc
.
size
(); ++
i
) {
54
const
NodeId
node
=
observed_anc
[
i
];
55
if
(!
mark
[
node
]) {
56
mark
[
node
] =
non_barren
;
57
for
(
const
auto
par
:
_dag_
->
parents
(
node
)) {
58
if
(!
mark
[
par
] && !
observed_anc
.
exists
(
par
)) {
observed_anc
.
insert
(
par
); }
59
}
60
}
61
}
62
}
63
64
// create the data structure that will contain the result of the
65
// method. By default, we assume that, for each pair of adjacent cliques,
66
// all
67
// the nodes that do not belong to their separator are possibly barren and,
68
// by sweeping the dag, we will remove the nodes that were determined
69
// above as non-barren. Structure result will assign to each (ordered) pair
70
// of adjacent cliques its set of barren nodes.
71
ArcProperty
<
NodeSet
>
result
;
72
for
(
const
auto
&
edge
:
junction_tree
.
edges
()) {
73
const
NodeSet
&
separator
=
junction_tree
.
separator
(
edge
);
74
75
NodeSet
non_barren1
=
junction_tree
.
clique
(
edge
.
first
());
76
for
(
auto
iter
=
non_barren1
.
beginSafe
();
iter
!=
non_barren1
.
endSafe
(); ++
iter
) {
77
if
(
mark
[*
iter
] ||
separator
.
exists
(*
iter
)) {
non_barren1
.
erase
(
iter
); }
78
}
79
result
.
insert
(
Arc
(
edge
.
first
(),
edge
.
second
()),
std
::
move
(
non_barren1
));
80
81
NodeSet
non_barren2
=
junction_tree
.
clique
(
edge
.
second
());
82
for
(
auto
iter
=
non_barren2
.
beginSafe
();
iter
!=
non_barren2
.
endSafe
(); ++
iter
) {
83
if
(
mark
[*
iter
] ||
separator
.
exists
(*
iter
)) {
non_barren2
.
erase
(
iter
); }
84
}
85
result
.
insert
(
Arc
(
edge
.
second
(),
edge
.
first
()),
std
::
move
(
non_barren2
));
86
}
87
88
// for each node in the DAG, indicate which are the arcs in the result
89
// structure whose separator contain it: the separators are actually the
90
// targets of the queries.
91
NodeProperty
<
ArcSet
>
node2arc
;
92
for
(
const
auto
node
: *
_dag_
)
93
node2arc
.
insert
(
node
,
ArcSet
());
94
for
(
const
auto
&
elt
:
result
) {
95
const
Arc
&
arc
=
elt
.
first
;
96
if
(!
result
[
arc
].
empty
()) {
// no need to further process cliques
97
const
NodeSet
&
separator
=
// with no barren nodes
98
junction_tree
.
separator
(
Edge
(
arc
.
tail
(),
arc
.
head
()));
99
100
for
(
const
auto
node
:
separator
) {
101
node2arc
[
node
].
insert
(
arc
);
102
}
103
}
104
}
105
106
// To determine the set of non-barren nodes w.r.t. a given single node
107
// query, we rely on the fact that those are precisely all the ancestors of
108
// this single node. To mutualize the computations, we will thus sweep the
109
// DAG from top to bottom and exploit the fact that the set of ancestors of
110
// the child of a given node A contain the ancestors of A. Therefore, we
111
// will
112
// determine sets of paths in the DAG and, for each path, compute the set of
113
// its barren nodes from the source to the destination of the path. The
114
// optimal set of paths, i.e., that which will minimize computations, is
115
// obtained by solving a "minimum path cover in directed acyclic graphs".
116
// But
117
// such an algorithm is too costly for the gain we can get from it, so we
118
// will
119
// rely on a simple heuristics.
120
121
// To compute the heuristics, we proceed as follows:
122
// 1/ we mark to 1 all the nodes that are ancestors of at least one (key)
123
// node
124
// with a non-empty arcset in node2arc and we extract from those the
125
// roots, i.e., those nodes whose set of parents, if any, have all been
126
// identified as non-barren by being marked as
127
// std::numeric_limits<unsigned int>::max (). Such nodes are
128
// thus the top of the graph to sweep.
129
// 2/ create a copy of the subgraph of the DAG w.r.t. the 1-marked nodes
130
// and, for each node, if the node has several parents and children,
131
// keep only one arc from one of the parents to the child with the
132
// smallest
133
// number of parents, and try to create a matching between parents and
134
// children and add one arc for each edge of this matching. This will
135
// allow
136
// us to create distinct paths in the DAG. Whenever a child has no more
137
// parents, it becomes the root of a new path.
138
// 3/ the sweeping will be performed from the roots of all these paths.
139
140
// perform step 1/
141
NodeSet
path_roots
;
142
{
143
List
<
NodeId
>
nodes_to_mark
;
144
for
(
const
auto
&
elt
:
node2arc
) {
145
if
(!
elt
.
second
.
empty
()) {
// only process nodes with assigned arcs
146
nodes_to_mark
.
insert
(
elt
.
first
);
147
}
148
}
149
while
(!
nodes_to_mark
.
empty
()) {
150
NodeId
node
=
nodes_to_mark
.
front
();
151
nodes_to_mark
.
popFront
();
152
153
if
(!
mark
[
node
]) {
// mark the node and all its ancestors
154
mark
[
node
] = 1;
155
Size
nb_par
= 0;
156
for
(
auto
par
:
_dag_
->
parents
(
node
)) {
157
Size
parent_mark
=
mark
[
par
];
158
if
(
parent_mark
!=
std
::
numeric_limits
<
Size
>::
max
()) {
159
++
nb_par
;
160
if
(
parent_mark
== 0) {
nodes_to_mark
.
insert
(
par
); }
161
}
162
}
163
164
if
(
nb_par
== 0) {
path_roots
.
insert
(
node
); }
165
}
166
}
167
}
168
169
// perform step 2/
170
DAG
sweep_dag
= *
_dag_
;
171
for
(
const
auto
node
: *
_dag_
) {
// keep only nodes marked with 1
172
if
(
mark
[
node
] != 1) {
sweep_dag
.
eraseNode
(
node
); }
173
}
174
for
(
const
auto
node
:
sweep_dag
) {
175
const
Size
nb_parents
=
sweep_dag
.
parents
(
node
).
size
();
176
const
Size
nb_children
=
sweep_dag
.
children
(
node
).
size
();
177
if
((
nb_parents
> 1) || (
nb_children
> 1)) {
178
// perform the matching
179
const
auto
&
parents
=
sweep_dag
.
parents
(
node
);
180
181
// if there is no child, remove all the parents except the first one
182
if
(
nb_children
== 0) {
183
auto
iter_par
=
parents
.
beginSafe
();
184
for
(++
iter_par
;
iter_par
!=
parents
.
endSafe
(); ++
iter_par
) {
185
sweep_dag
.
eraseArc
(
Arc
(*
iter_par
,
node
));
186
}
187
}
else
{
188
// find the child with the smallest number of parents
189
const
auto
&
children
=
sweep_dag
.
children
(
node
);
190
NodeId
smallest_child
= 0;
191
Size
smallest_nb_par
=
std
::
numeric_limits
<
Size
>::
max
();
192
for
(
const
auto
child
:
children
) {
193
const
auto
new_nb
=
sweep_dag
.
parents
(
child
).
size
();
194
if
(
new_nb
<
smallest_nb_par
) {
195
smallest_nb_par
=
new_nb
;
196
smallest_child
=
child
;
197
}
198
}
199
200
// if there is no parent, just keep the link with the smallest child
201
// and remove all the other arcs
202
if
(
nb_parents
== 0) {
203
for
(
auto
iter
=
children
.
beginSafe
();
iter
!=
children
.
endSafe
(); ++
iter
) {
204
if
(*
iter
!=
smallest_child
) {
205
if
(
sweep_dag
.
parents
(*
iter
).
size
() == 1) {
path_roots
.
insert
(*
iter
); }
206
sweep_dag
.
eraseArc
(
Arc
(
node
, *
iter
));
207
}
208
}
209
}
else
{
210
auto
nb_match
=
Size
(
std
::
min
(
nb_parents
,
nb_children
) - 1);
211
auto
iter_par
=
parents
.
beginSafe
();
212
++
iter_par
;
// skip the first parent, whose arc with node will
213
// remain
214
auto
iter_child
=
children
.
beginSafe
();
215
for
(
Idx
i
= 0;
i
<
nb_match
; ++
i
, ++
iter_par
, ++
iter_child
) {
216
if
(*
iter_child
==
smallest_child
) { ++
iter_child
; }
217
sweep_dag
.
addArc
(*
iter_par
, *
iter_child
);
218
sweep_dag
.
eraseArc
(
Arc
(*
iter_par
,
node
));
219
sweep_dag
.
eraseArc
(
Arc
(
node
, *
iter_child
));
220
}
221
for
(;
iter_par
!=
parents
.
endSafe
(); ++
iter_par
) {
222
sweep_dag
.
eraseArc
(
Arc
(*
iter_par
,
node
));
223
}
224
for
(;
iter_child
!=
children
.
endSafe
(); ++
iter_child
) {
225
if
(*
iter_child
!=
smallest_child
) {
226
if
(
sweep_dag
.
parents
(*
iter_child
).
size
() == 1) {
path_roots
.
insert
(*
iter_child
); }
227
sweep_dag
.
eraseArc
(
Arc
(
node
, *
iter_child
));
228
}
229
}
230
}
231
}
232
}
233
}
234
235
// step 3: sweep the paths from the roots of sweep_dag
236
// here, the idea is that, for each path of sweep_dag, the mark we put
237
// to the ancestors is a given number, say N, that increases from path
238
// to path. Hence, for a given path, all the nodes that are marked with a
239
// number at least as high as N are non-barren, the others being barren.
240
Idx
mark_id
= 2;
241
for
(
NodeId
path
:
path_roots
) {
242
// perform the sweeping from the path
243
while
(
true
) {
244
// mark all the ancestors of the node
245
List
<
NodeId
>
to_mark
{
path
};
246
while
(!
to_mark
.
empty
()) {
247
NodeId
node
=
to_mark
.
front
();
248
to_mark
.
popFront
();
249
if
(
mark
[
node
] <
mark_id
) {
250
mark
[
node
] =
mark_id
;
251
for
(
const
auto
par
:
_dag_
->
parents
(
node
)) {
252
if
(
mark
[
par
] <
mark_id
) {
to_mark
.
insert
(
par
); }
253
}
254
}
255
}
256
257
// now, get all the arcs that contained node "path" in their separator.
258
// this node acts as a query target and, therefore, its ancestors
259
// shall be non-barren.
260
const
ArcSet
&
arcs
=
node2arc
[
path
];
261
for
(
const
auto
&
arc
:
arcs
) {
262
NodeSet
&
barren
=
result
[
arc
];
263
for
(
auto
iter
=
barren
.
beginSafe
();
iter
!=
barren
.
endSafe
(); ++
iter
) {
264
if
(
mark
[*
iter
] >=
mark_id
) {
265
// this indicates a non-barren node
266
barren
.
erase
(
iter
);
267
}
268
}
269
}
270
271
// go to the next sweeping node
272
const
NodeSet
&
sweep_children
=
sweep_dag
.
children
(
path
);
273
if
(
sweep_children
.
size
()) {
274
path
= *(
sweep_children
.
begin
());
275
}
else
{
276
// here, the path has ended, so we shall go to the next path
277
++
mark_id
;
278
break
;
279
}
280
}
281
}
282
283
return
result
;
284
}
285
286
/// returns the set of barren nodes
287
NodeSet
BarrenNodesFinder
::
barrenNodes
() {
288
// mark all the nodes in the dag as barren (true)
289
NodeProperty
<
bool
>
barren_mark
=
_dag_
->
nodesProperty
(
true
);
290
291
// mark all the ancestors of the evidence and targets as non-barren
292
List
<
NodeId
>
nodes_to_examine
;
293
int
nb_non_barren
= 0;
294
for
(
const
auto
node
: *
_observed_nodes_
)
295
nodes_to_examine
.
insert
(
node
);
296
for
(
const
auto
node
: *
_target_nodes_
)
297
nodes_to_examine
.
insert
(
node
);
298
299
while
(!
nodes_to_examine
.
empty
()) {
300
const
NodeId
node
=
nodes_to_examine
.
front
();
301
nodes_to_examine
.
popFront
();
302
if
(
barren_mark
[
node
]) {
303
barren_mark
[
node
] =
false
;
304
++
nb_non_barren
;
305
for
(
const
auto
par
:
_dag_
->
parents
(
node
))
306
nodes_to_examine
.
insert
(
par
);
307
}
308
}
309
310
// here, all the nodes marked true are barren
311
NodeSet
barren_nodes
(
_dag_
->
sizeNodes
() -
nb_non_barren
);
312
for
(
const
auto
&
marked_pair
:
barren_mark
)
313
if
(
marked_pair
.
second
)
barren_nodes
.
insert
(
marked_pair
.
first
);
314
315
return
barren_nodes
;
316
}
317
318
}
/* namespace gum */
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:643