aGrUM  0.13.2
barrenNodesFinder.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES et Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
24 #include <limits>
25 
27 
28 #ifdef GUM_NO_INLINE
30 #endif // GUM_NO_INLINE
31 
32 namespace gum {
33 
35  ArcProperty< NodeSet >
37  // assign a mark to all the nodes
38  // and mark all the observed nodes and their ancestors as non-barren
40  {
41  for (const auto node : *__dag)
42  mark.insert(node, 0); // for the moment, 0 = possibly barren
43 
44  // mark all the observed nodes and their ancestors as non barren
45  // std::numeric_limits<unsigned int>::max () will be necessarily non
46  // barren
47  // later on
48  Sequence< NodeId > observed_anc(__dag->size());
49  const Size non_barren = std::numeric_limits< Size >::max();
50  for (const auto node : *__observed_nodes)
51  observed_anc.insert(node);
52  for (Idx i = 0; i < observed_anc.size(); ++i) {
53  const NodeId node = observed_anc[i];
54  if (!mark[node]) {
55  mark[node] = non_barren;
56  for (const auto par : __dag->parents(node)) {
57  if (!mark[par] && !observed_anc.exists(par)) {
58  observed_anc.insert(par);
59  }
60  }
61  }
62  }
63  }
64 
65  // create the data structure that will contain the result of the
66  // method. By default, we assume that, for each pair of adjacent cliques,
67  // all
68  // the nodes that do not belong to their separator are possibly barren and,
69  // by sweeping the dag, we will remove the nodes that were determined
70  // above as non-barren. Structure result will assign to each (ordered) pair
71  // of adjacent cliques its set of barren nodes.
73  for (const auto& edge : junction_tree.edges()) {
74  const NodeSet& separator = junction_tree.separator(edge);
75 
76  NodeSet non_barren1 = junction_tree.clique(edge.first());
77  for (auto iter = non_barren1.beginSafe(); iter != non_barren1.endSafe();
78  ++iter) {
79  if (mark[*iter] || separator.exists(*iter)) { non_barren1.erase(iter); }
80  }
81  result.insert(Arc(edge.first(), edge.second()), std::move(non_barren1));
82 
83  NodeSet non_barren2 = junction_tree.clique(edge.second());
84  for (auto iter = non_barren2.beginSafe(); iter != non_barren2.endSafe();
85  ++iter) {
86  if (mark[*iter] || separator.exists(*iter)) { non_barren2.erase(iter); }
87  }
88  result.insert(Arc(edge.second(), edge.first()), std::move(non_barren2));
89  }
90 
91  // for each node in the DAG, indicate which are the arcs in the result
92  // structure whose separator contain it: the separators are actually the
93  // targets of the queries.
94  NodeProperty< ArcSet > node2arc;
95  for (const auto node : *__dag)
96  node2arc.insert(node, ArcSet());
97  for (const auto& elt : result) {
98  const Arc& arc = elt.first;
99  if (!result[arc].empty()) { // no need to further process cliques
100  const NodeSet& separator = // with no barren nodes
101  junction_tree.separator(Edge(arc.tail(), arc.head()));
102 
103  for (const auto node : separator) {
104  node2arc[node].insert(arc);
105  }
106  }
107  }
108 
109  // To determine the set of non-barren nodes w.r.t. a given single node
110  // query, we rely on the fact that those are precisely all the ancestors of
111  // this single node. To mutualize the computations, we will thus sweep the
112  // DAG from top to bottom and exploit the fact that the set of ancestors of
113  // the child of a given node A contain the ancestors of A. Therefore, we
114  // will
115  // determine sets of paths in the DAG and, for each path, compute the set of
116  // its barren nodes from the source to the destination of the path. The
117  // optimal set of paths, i.e., that which will minimize computations, is
118  // obtained by solving a "minimum path cover in directed acyclic graphs".
119  // But
120  // such an algorithm is too costly for the gain we can get from it, so we
121  // will
122  // rely on a simple heuristics.
123 
124  // To compute the heuristics, we proceed as follows:
125  // 1/ we mark to 1 all the nodes that are ancestors of at least one (key)
126  // node
127  // with a non-empty arcset in node2arc and we extract from those the
128  // roots, i.e., those nodes whose set of parents, if any, have all been
129  // identified as non-barren by being marked as
130  // std::numeric_limits<unsigned int>::max (). Such nodes are
131  // thus the top of the graph to sweep.
132  // 2/ create a copy of the subgraph of the DAG w.r.t. the 1-marked nodes
133  // and, for each node, if the node has several parents and children,
134  // keep only one arc from one of the parents to the child with the
135  // smallest
136  // number of parents, and try to create a matching between parents and
137  // children and add one arc for each edge of this matching. This will
138  // allow
139  // us to create distinct paths in the DAG. Whenever a child has no more
140  // parents, it becomes the root of a new path.
141  // 3/ the sweeping will be performed from the roots of all these paths.
142 
143  // perform step 1/
144  NodeSet path_roots;
145  {
146  List< NodeId > nodes_to_mark;
147  for (const auto& elt : node2arc) {
148  if (!elt.second.empty()) { // only process nodes with assigned arcs
149  nodes_to_mark.insert(elt.first);
150  }
151  }
152  while (!nodes_to_mark.empty()) {
153  NodeId node = nodes_to_mark.front();
154  nodes_to_mark.popFront();
155 
156  if (!mark[node]) { // mark the node and all its ancestors
157  mark[node] = 1;
158  Size nb_par = 0;
159  for (auto par : __dag->parents(node)) {
160  Size parent_mark = mark[par];
161  if (parent_mark != std::numeric_limits< Size >::max()) {
162  ++nb_par;
163  if (parent_mark == 0) { nodes_to_mark.insert(par); }
164  }
165  }
166 
167  if (nb_par == 0) { path_roots.insert(node); }
168  }
169  }
170  }
171 
172  // perform step 2/
173  DAG sweep_dag = *__dag;
174  for (const auto node : *__dag) { // keep only nodes marked with 1
175  if (mark[node] != 1) { sweep_dag.eraseNode(node); }
176  }
177  for (const auto node : sweep_dag) {
178  const Size nb_parents = sweep_dag.parents(node).size();
179  const Size nb_children = sweep_dag.children(node).size();
180  if ((nb_parents > 1) || (nb_children > 1)) {
181  // perform the matching
182  const auto& parents = sweep_dag.parents(node);
183 
184  // if there is no child, remove all the parents except the first one
185  if (nb_children == 0) {
186  auto iter_par = parents.beginSafe();
187  for (++iter_par; iter_par != parents.endSafe(); ++iter_par) {
188  sweep_dag.eraseArc(Arc(*iter_par, node));
189  }
190  } else {
191  // find the child with the smallest number of parents
192  const auto& children = sweep_dag.children(node);
193  NodeId smallest_child = 0;
194  Size smallest_nb_par = std::numeric_limits< Size >::max();
195  for (const auto child : children) {
196  const auto new_nb = sweep_dag.parents(child).size();
197  if (new_nb < smallest_nb_par) {
198  smallest_nb_par = new_nb;
199  smallest_child = child;
200  }
201  }
202 
203  // if there is no parent, just keep the link with the smallest child
204  // and remove all the other arcs
205  if (nb_parents == 0) {
206  for (auto iter = children.beginSafe(); iter != children.endSafe();
207  ++iter) {
208  if (*iter != smallest_child) {
209  if (sweep_dag.parents(*iter).size() == 1) {
210  path_roots.insert(*iter);
211  }
212  sweep_dag.eraseArc(Arc(node, *iter));
213  }
214  }
215  } else {
216  auto nb_match = Size(std::min(nb_parents, nb_children) - 1);
217  auto iter_par = parents.beginSafe();
218  ++iter_par; // skip the first parent, whose arc with node will
219  // remain
220  auto iter_child = children.beginSafe();
221  for (Idx i = 0; i < nb_match; ++i, ++iter_par, ++iter_child) {
222  if (*iter_child == smallest_child) { ++iter_child; }
223  sweep_dag.addArc(*iter_par, *iter_child);
224  sweep_dag.eraseArc(Arc(*iter_par, node));
225  sweep_dag.eraseArc(Arc(node, *iter_child));
226  }
227  for (; iter_par != parents.endSafe(); ++iter_par) {
228  sweep_dag.eraseArc(Arc(*iter_par, node));
229  }
230  for (; iter_child != children.endSafe(); ++iter_child) {
231  if (*iter_child != smallest_child) {
232  if (sweep_dag.parents(*iter_child).size() == 1) {
233  path_roots.insert(*iter_child);
234  }
235  sweep_dag.eraseArc(Arc(node, *iter_child));
236  }
237  }
238  }
239  }
240  }
241  }
242 
243  // step 3: sweep the paths from the roots of sweep_dag
244  // here, the idea is that, for each path of sweep_dag, the mark we put
245  // to the ancestors is a given number, say N, that increases from path
246  // to path. Hence, for a given path, all the nodes that are marked with a
247  // number at least as high as N are non-barren, the others being barren.
248  Idx mark_id = 2;
249  for (NodeId path : path_roots) {
250  // perform the sweeping from the path
251  while (true) {
252  // mark all the ancestors of the node
253  List< NodeId > to_mark{path};
254  while (!to_mark.empty()) {
255  NodeId node = to_mark.front();
256  to_mark.popFront();
257  if (mark[node] < mark_id) {
258  mark[node] = mark_id;
259  for (const auto par : __dag->parents(node)) {
260  if (mark[par] < mark_id) { to_mark.insert(par); }
261  }
262  }
263  }
264 
265  // now, get all the arcs that contained node "path" in their separator.
266  // this node acts as a query target and, therefore, its ancestors
267  // shall be non-barren.
268  const ArcSet& arcs = node2arc[path];
269  for (const auto& arc : arcs) {
270  NodeSet& barren = result[arc];
271  for (auto iter = barren.beginSafe(); iter != barren.endSafe(); ++iter) {
272  if (mark[*iter] >= mark_id) {
273  // this indicates a non-barren node
274  barren.erase(iter);
275  }
276  }
277  }
278 
279  // go to the next sweeping node
280  const NodeSet& sweep_children = sweep_dag.children(path);
281  if (sweep_children.size()) {
282  path = *(sweep_children.begin());
283  } else {
284  // here, the path has ended, so we shall go to the next path
285  ++mark_id;
286  break;
287  }
288  }
289  }
290 
291  return result;
292  }
293 
296  // mark all the nodes in the dag as barren (true)
297  NodeProperty< bool > barren_mark = __dag->nodesProperty(true);
298 
299  // mark all the ancestors of the evidence and targets as non-barren
300  List< NodeId > nodes_to_examine;
301  int nb_non_barren = 0;
302  for (const auto node : *__observed_nodes)
303  nodes_to_examine.insert(node);
304  for (const auto node : *__target_nodes)
305  nodes_to_examine.insert(node);
306 
307  while (!nodes_to_examine.empty()) {
308  const NodeId node = nodes_to_examine.front();
309  nodes_to_examine.popFront();
310  if (barren_mark[node]) {
311  barren_mark[node] = false;
312  ++nb_non_barren;
313  for (const auto par : __dag->parents(node))
314  nodes_to_examine.insert(par);
315  }
316  }
317 
318  // here, all the nodes marked true are barren
319  NodeSet barren_nodes(__dag->sizeNodes() - nb_non_barren);
320  for (const auto& marked_pair : barren_mark)
321  if (marked_pair.second) barren_nodes.insert(marked_pair.first);
322 
323  return barren_nodes;
324  }
325 
326 } /* namespace gum */
Val & front() const
Returns a reference to first element of a list, if any.
Definition: list_tpl.h:1828
Size sizeNodes() const
returns the number of nodes in the NodeGraphPart
NodeProperty< VAL > nodesProperty(VAL(*f)(const NodeId &), Size size=0) const
a method to create a HashTable with key:NodeId and value:VAL
bool empty() const noexcept
Returns a boolean indicating whether the chained list is empty.
Definition: list_tpl.h:1967
unsigned long Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:50
const EdgeSet & edges() const
returns the set of edges stored within the EdgeGraphPart
unsigned int NodeId
Type for node ids.
Definition: graphElements.h:97
const NodeSet & separator(const Edge &edge) const
returns the separator included in a given edge
NodeId tail() const
returns the tail of the arc
const NodeSet & clique(const NodeId idClique) const
returns the set of nodes included into a given clique
NodeId first() const
returns one extremal node ID (whichever one it is is unspecified)
Set< Arc > ArcSet
Some typdefs and define for shortcuts ...
const NodeSet * __observed_nodes
the set of observed nodes
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition: set_tpl.h:607
void erase(const Key &k)
Erases an element from the set.
Definition: set_tpl.h:656
Generic doubly linked lists.
Definition: list.h:369
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
void popFront()
Removes the first element of a List, if any.
Definition: list_tpl.h:1961
The class for generic Hash Tables.
Definition: hashTable.h:676
const iterator_safe & endSafe() const noexcept
The usual safe end iterator to parse the set.
Definition: set_tpl.h:502
iterator begin() const
The usual unsafe begin iterator to parse the set.
Definition: set_tpl.h:517
const NodeSet & parents(const NodeId id) const
returns the set of nodes with arc ingoing to a given node
iterator_safe beginSafe() const
The usual safe begin iterator to parse the set.
Definition: set_tpl.h:488
The base class for all directed edgesThis class is used as a basis for manipulating all directed edge...
Size size() const
alias for sizeNodes
NodeSet barrenNodes()
returns the set of barren nodes
NodeId head() const
returns the head of the arc
Basic graph of cliques.
Definition: cliqueGraph.h:55
Val & insert(const Val &val)
Inserts a new element at the end of the chained list (alias of pushBack).
Definition: list_tpl.h:1616
Detect barren nodes for inference in Bayesian networks.
The base class for all undirected edges.
const DAG * __dag
the DAG on which we compute the barren nodes
const NodeSet * __target_nodes
the set of targeted nodes
Size size() const noexcept
Returns the number of elements in the set.
Definition: set_tpl.h:701
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
unsigned long Idx
Type for indexes.
Definition: types.h:43
Base class for dag.
Definition: DAG.h:98
virtual void eraseNode(const NodeId id)
remove a node and its adjacent arcs from the graph
Definition: diGraph_inl.h:66
void insert(const Key &k)
Inserts a new element into the set.
Definition: set_tpl.h:613
void insert(const Key &k)
Insert an element at the end of the sequence.