aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
barrenNodesFinder.cpp
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Detect barren nodes for inference in Bayesian networks
25  */
26 #include <limits>
27 
28 #include <agrum/BN/algorithms/barrenNodesFinder.h>
29 
30 #ifdef GUM_NO_INLINE
31 # include <agrum/BN/algorithms/barrenNodesFinder_inl.h>
32 #endif // GUM_NO_INLINE
33 
34 namespace gum {
35 
36  /// returns the set of barren nodes in the messages sent in a junction tree
38  // assign a mark to all the nodes
39  // and mark all the observed nodes and their ancestors as non-barren
41  {
42  for (const auto node: *_dag_)
43  mark.insert(node, 0); // for the moment, 0 = possibly barren
44 
45  // mark all the observed nodes and their ancestors as non barren
46  // std::numeric_limits<unsigned int>::max () will be necessarily non
47  // barren
48  // later on
50  const Size non_barren = std::numeric_limits< Size >::max();
51  for (const auto node: *_observed_nodes_)
53  for (Idx i = 0; i < observed_anc.size(); ++i) {
54  const NodeId node = observed_anc[i];
55  if (!mark[node]) {
56  mark[node] = non_barren;
57  for (const auto par: _dag_->parents(node)) {
59  }
60  }
61  }
62  }
63 
64  // create the data structure that will contain the result of the
65  // method. By default, we assume that, for each pair of adjacent cliques,
66  // all
67  // the nodes that do not belong to their separator are possibly barren and,
68  // by sweeping the dag, we will remove the nodes that were determined
69  // above as non-barren. Structure result will assign to each (ordered) pair
70  // of adjacent cliques its set of barren nodes.
72  for (const auto& edge: junction_tree.edges()) {
74 
76  for (auto iter = non_barren1.beginSafe(); iter != non_barren1.endSafe(); ++iter) {
78  }
80 
82  for (auto iter = non_barren2.beginSafe(); iter != non_barren2.endSafe(); ++iter) {
84  }
86  }
87 
88  // for each node in the DAG, indicate which are the arcs in the result
89  // structure whose separator contain it: the separators are actually the
90  // targets of the queries.
92  for (const auto node: *_dag_)
94  for (const auto& elt: result) {
95  const Arc& arc = elt.first;
96  if (!result[arc].empty()) { // no need to further process cliques
97  const NodeSet& separator = // with no barren nodes
99 
100  for (const auto node: separator) {
102  }
103  }
104  }
105 
106  // To determine the set of non-barren nodes w.r.t. a given single node
107  // query, we rely on the fact that those are precisely all the ancestors of
108  // this single node. To mutualize the computations, we will thus sweep the
109  // DAG from top to bottom and exploit the fact that the set of ancestors of
110  // the child of a given node A contain the ancestors of A. Therefore, we
111  // will
112  // determine sets of paths in the DAG and, for each path, compute the set of
113  // its barren nodes from the source to the destination of the path. The
114  // optimal set of paths, i.e., that which will minimize computations, is
115  // obtained by solving a "minimum path cover in directed acyclic graphs".
116  // But
117  // such an algorithm is too costly for the gain we can get from it, so we
118  // will
119  // rely on a simple heuristics.
120 
121  // To compute the heuristics, we proceed as follows:
122  // 1/ we mark to 1 all the nodes that are ancestors of at least one (key)
123  // node
124  // with a non-empty arcset in node2arc and we extract from those the
125  // roots, i.e., those nodes whose set of parents, if any, have all been
126  // identified as non-barren by being marked as
127  // std::numeric_limits<unsigned int>::max (). Such nodes are
128  // thus the top of the graph to sweep.
129  // 2/ create a copy of the subgraph of the DAG w.r.t. the 1-marked nodes
130  // and, for each node, if the node has several parents and children,
131  // keep only one arc from one of the parents to the child with the
132  // smallest
133  // number of parents, and try to create a matching between parents and
134  // children and add one arc for each edge of this matching. This will
135  // allow
136  // us to create distinct paths in the DAG. Whenever a child has no more
137  // parents, it becomes the root of a new path.
138  // 3/ the sweeping will be performed from the roots of all these paths.
139 
140  // perform step 1/
142  {
144  for (const auto& elt: node2arc) {
145  if (!elt.second.empty()) { // only process nodes with assigned arcs
147  }
148  }
149  while (!nodes_to_mark.empty()) {
152 
153  if (!mark[node]) { // mark the node and all its ancestors
154  mark[node] = 1;
155  Size nb_par = 0;
156  for (auto par: _dag_->parents(node)) {
158  if (parent_mark != std::numeric_limits< Size >::max()) {
159  ++nb_par;
160  if (parent_mark == 0) { nodes_to_mark.insert(par); }
161  }
162  }
163 
164  if (nb_par == 0) { path_roots.insert(node); }
165  }
166  }
167  }
168 
169  // perform step 2/
170  DAG sweep_dag = *_dag_;
171  for (const auto node: *_dag_) { // keep only nodes marked with 1
172  if (mark[node] != 1) { sweep_dag.eraseNode(node); }
173  }
174  for (const auto node: sweep_dag) {
177  if ((nb_parents > 1) || (nb_children > 1)) {
178  // perform the matching
179  const auto& parents = sweep_dag.parents(node);
180 
181  // if there is no child, remove all the parents except the first one
182  if (nb_children == 0) {
183  auto iter_par = parents.beginSafe();
184  for (++iter_par; iter_par != parents.endSafe(); ++iter_par) {
186  }
187  } else {
188  // find the child with the smallest number of parents
189  const auto& children = sweep_dag.children(node);
192  for (const auto child: children) {
193  const auto new_nb = sweep_dag.parents(child).size();
194  if (new_nb < smallest_nb_par) {
197  }
198  }
199 
200  // if there is no parent, just keep the link with the smallest child
201  // and remove all the other arcs
202  if (nb_parents == 0) {
203  for (auto iter = children.beginSafe(); iter != children.endSafe(); ++iter) {
204  if (*iter != smallest_child) {
205  if (sweep_dag.parents(*iter).size() == 1) { path_roots.insert(*iter); }
207  }
208  }
209  } else {
210  auto nb_match = Size(std::min(nb_parents, nb_children) - 1);
211  auto iter_par = parents.beginSafe();
212  ++iter_par; // skip the first parent, whose arc with node will
213  // remain
214  auto iter_child = children.beginSafe();
215  for (Idx i = 0; i < nb_match; ++i, ++iter_par, ++iter_child) {
216  if (*iter_child == smallest_child) { ++iter_child; }
220  }
221  for (; iter_par != parents.endSafe(); ++iter_par) {
223  }
224  for (; iter_child != children.endSafe(); ++iter_child) {
225  if (*iter_child != smallest_child) {
228  }
229  }
230  }
231  }
232  }
233  }
234 
235  // step 3: sweep the paths from the roots of sweep_dag
236  // here, the idea is that, for each path of sweep_dag, the mark we put
237  // to the ancestors is a given number, say N, that increases from path
238  // to path. Hence, for a given path, all the nodes that are marked with a
239  // number at least as high as N are non-barren, the others being barren.
240  Idx mark_id = 2;
241  for (NodeId path: path_roots) {
242  // perform the sweeping from the path
243  while (true) {
244  // mark all the ancestors of the node
245  List< NodeId > to_mark{path};
246  while (!to_mark.empty()) {
247  NodeId node = to_mark.front();
248  to_mark.popFront();
249  if (mark[node] < mark_id) {
250  mark[node] = mark_id;
251  for (const auto par: _dag_->parents(node)) {
252  if (mark[par] < mark_id) { to_mark.insert(par); }
253  }
254  }
255  }
256 
257  // now, get all the arcs that contained node "path" in their separator.
258  // this node acts as a query target and, therefore, its ancestors
259  // shall be non-barren.
260  const ArcSet& arcs = node2arc[path];
261  for (const auto& arc: arcs) {
262  NodeSet& barren = result[arc];
263  for (auto iter = barren.beginSafe(); iter != barren.endSafe(); ++iter) {
264  if (mark[*iter] >= mark_id) {
265  // this indicates a non-barren node
266  barren.erase(iter);
267  }
268  }
269  }
270 
271  // go to the next sweeping node
273  if (sweep_children.size()) {
274  path = *(sweep_children.begin());
275  } else {
276  // here, the path has ended, so we shall go to the next path
277  ++mark_id;
278  break;
279  }
280  }
281  }
282 
283  return result;
284  }
285 
286  /// returns the set of barren nodes
288  // mark all the nodes in the dag as barren (true)
289  NodeProperty< bool > barren_mark = _dag_->nodesProperty(true);
290 
291  // mark all the ancestors of the evidence and targets as non-barren
293  int nb_non_barren = 0;
294  for (const auto node: *_observed_nodes_)
296  for (const auto node: *_target_nodes_)
298 
299  while (!nodes_to_examine.empty()) {
300  const NodeId node = nodes_to_examine.front();
302  if (barren_mark[node]) {
303  barren_mark[node] = false;
304  ++nb_non_barren;
305  for (const auto par: _dag_->parents(node))
307  }
308  }
309 
310  // here, all the nodes marked true are barren
312  for (const auto& marked_pair: barren_mark)
314 
315  return barren_nodes;
316  }
317 
318 } /* namespace gum */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643