aGrUM  0.14.2
barrenNodesFinder.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES et Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
24 #include <limits>
25 
27 
28 #ifdef GUM_NO_INLINE
30 #endif // GUM_NO_INLINE
31 
32 namespace gum {
33 
35  ArcProperty< NodeSet >
37  // assign a mark to all the nodes
38  // and mark all the observed nodes and their ancestors as non-barren
40  {
41  for (const auto node : *__dag)
42  mark.insert(node, 0); // for the moment, 0 = possibly barren
43 
44  // mark all the observed nodes and their ancestors as non barren
45  // std::numeric_limits<unsigned int>::max () will be necessarily non
46  // barren
47  // later on
48  Sequence< NodeId > observed_anc(__dag->size());
49  const Size non_barren = std::numeric_limits< Size >::max();
50  for (const auto node : *__observed_nodes)
51  observed_anc.insert(node);
52  for (Idx i = 0; i < observed_anc.size(); ++i) {
53  const NodeId node = observed_anc[i];
54  if (!mark[node]) {
55  mark[node] = non_barren;
56  for (const auto par : __dag->parents(node)) {
57  if (!mark[par] && !observed_anc.exists(par)) {
58  observed_anc.insert(par);
59  }
60  }
61  }
62  }
63  }
64 
65  // create the data structure that will contain the result of the
66  // method. By default, we assume that, for each pair of adjacent cliques,
67  // all
68  // the nodes that do not belong to their separator are possibly barren and,
69  // by sweeping the dag, we will remove the nodes that were determined
70  // above as non-barren. Structure result will assign to each (ordered) pair
71  // of adjacent cliques its set of barren nodes.
73  for (const auto& edge : junction_tree.edges()) {
74  const NodeSet& separator = junction_tree.separator(edge);
75 
76  NodeSet non_barren1 = junction_tree.clique(edge.first());
77  for (auto iter = non_barren1.beginSafe(); iter != non_barren1.endSafe();
78  ++iter) {
79  if (mark[*iter] || separator.exists(*iter)) { non_barren1.erase(iter); }
80  }
81  result.insert(Arc(edge.first(), edge.second()), std::move(non_barren1));
82 
83  NodeSet non_barren2 = junction_tree.clique(edge.second());
84  for (auto iter = non_barren2.beginSafe(); iter != non_barren2.endSafe();
85  ++iter) {
86  if (mark[*iter] || separator.exists(*iter)) { non_barren2.erase(iter); }
87  }
88  result.insert(Arc(edge.second(), edge.first()), std::move(non_barren2));
89  }
90 
91  // for each node in the DAG, indicate which are the arcs in the result
92  // structure whose separator contain it: the separators are actually the
93  // targets of the queries.
94  NodeProperty< ArcSet > node2arc;
95  for (const auto node : *__dag)
96  node2arc.insert(node, ArcSet());
97  for (const auto& elt : result) {
98  const Arc& arc = elt.first;
99  if (!result[arc].empty()) { // no need to further process cliques
100  const NodeSet& separator = // with no barren nodes
101  junction_tree.separator(Edge(arc.tail(), arc.head()));
102 
103  for (const auto node : separator) {
104  node2arc[node].insert(arc);
105  }
106  }
107  }
108 
109  // To determine the set of non-barren nodes w.r.t. a given single node
110  // query, we rely on the fact that those are precisely all the ancestors of
111  // this single node. To mutualize the computations, we will thus sweep the
112  // DAG from top to bottom and exploit the fact that the set of ancestors of
113  // the child of a given node A contain the ancestors of A. Therefore, we
114  // will
115  // determine sets of paths in the DAG and, for each path, compute the set of
116  // its barren nodes from the source to the destination of the path. The
117  // optimal set of paths, i.e., that which will minimize computations, is
118  // obtained by solving a "minimum path cover in directed acyclic graphs".
119  // But
120  // such an algorithm is too costly for the gain we can get from it, so we
121  // will
122  // rely on a simple heuristics.
123 
124  // To compute the heuristics, we proceed as follows:
125  // 1/ we mark to 1 all the nodes that are ancestors of at least one (key)
126  // node
127  // with a non-empty arcset in node2arc and we extract from those the
128  // roots, i.e., those nodes whose set of parents, if any, have all been
129  // identified as non-barren by being marked as
130  // std::numeric_limits<unsigned int>::max (). Such nodes are
131  // thus the top of the graph to sweep.
132  // 2/ create a copy of the subgraph of the DAG w.r.t. the 1-marked nodes
133  // and, for each node, if the node has several parents and children,
134  // keep only one arc from one of the parents to the child with the
135  // smallest
136  // number of parents, and try to create a matching between parents and
137  // children and add one arc for each edge of this matching. This will
138  // allow
139  // us to create distinct paths in the DAG. Whenever a child has no more
140  // parents, it becomes the root of a new path.
141  // 3/ the sweeping will be performed from the roots of all these paths.
142 
143  // perform step 1/
144  NodeSet path_roots;
145  {
146  List< NodeId > nodes_to_mark;
147  for (const auto& elt : node2arc) {
148  if (!elt.second.empty()) { // only process nodes with assigned arcs
149  nodes_to_mark.insert(elt.first);
150  }
151  }
152  while (!nodes_to_mark.empty()) {
153  NodeId node = nodes_to_mark.front();
154  nodes_to_mark.popFront();
155 
156  if (!mark[node]) { // mark the node and all its ancestors
157  mark[node] = 1;
158  Size nb_par = 0;
159  for (auto par : __dag->parents(node)) {
160  Size parent_mark = mark[par];
161  if (parent_mark != std::numeric_limits< Size >::max()) {
162  ++nb_par;
163  if (parent_mark == 0) { nodes_to_mark.insert(par); }
164  }
165  }
166 
167  if (nb_par == 0) { path_roots.insert(node); }
168  }
169  }
170  }
171 
172  // perform step 2/
173  DAG sweep_dag = *__dag;
174  for (const auto node : *__dag) { // keep only nodes marked with 1
175  if (mark[node] != 1) { sweep_dag.eraseNode(node); }
176  }
177  for (const auto node : sweep_dag) {
178  const Size nb_parents = sweep_dag.parents(node).size();
179  const Size nb_children = sweep_dag.children(node).size();
180  if ((nb_parents > 1) || (nb_children > 1)) {
181  // perform the matching
182  const auto& parents = sweep_dag.parents(node);
183 
184  // if there is no child, remove all the parents except the first one
185  if (nb_children == 0) {
186  auto iter_par = parents.beginSafe();
187  for (++iter_par; iter_par != parents.endSafe(); ++iter_par) {
188  sweep_dag.eraseArc(Arc(*iter_par, node));
189  }
190  } else {
191  // find the child with the smallest number of parents
192  const auto& children = sweep_dag.children(node);
193  NodeId smallest_child = 0;
194  Size smallest_nb_par = std::numeric_limits< Size >::max();
195  for (const auto child : children) {
196  const auto new_nb = sweep_dag.parents(child).size();
197  if (new_nb < smallest_nb_par) {
198  smallest_nb_par = new_nb;
199  smallest_child = child;
200  }
201  }
202 
203  // if there is no parent, just keep the link with the smallest child
204  // and remove all the other arcs
205  if (nb_parents == 0) {
206  for (auto iter = children.beginSafe(); iter != children.endSafe();
207  ++iter) {
208  if (*iter != smallest_child) {
209  if (sweep_dag.parents(*iter).size() == 1) {
210  path_roots.insert(*iter);
211  }
212  sweep_dag.eraseArc(Arc(node, *iter));
213  }
214  }
215  } else {
216  auto nb_match = Size(std::min(nb_parents, nb_children) - 1);
217  auto iter_par = parents.beginSafe();
218  ++iter_par; // skip the first parent, whose arc with node will
219  // remain
220  auto iter_child = children.beginSafe();
221  for (Idx i = 0; i < nb_match; ++i, ++iter_par, ++iter_child) {
222  if (*iter_child == smallest_child) { ++iter_child; }
223  sweep_dag.addArc(*iter_par, *iter_child);
224  sweep_dag.eraseArc(Arc(*iter_par, node));
225  sweep_dag.eraseArc(Arc(node, *iter_child));
226  }
227  for (; iter_par != parents.endSafe(); ++iter_par) {
228  sweep_dag.eraseArc(Arc(*iter_par, node));
229  }
230  for (; iter_child != children.endSafe(); ++iter_child) {
231  if (*iter_child != smallest_child) {
232  if (sweep_dag.parents(*iter_child).size() == 1) {
233  path_roots.insert(*iter_child);
234  }
235  sweep_dag.eraseArc(Arc(node, *iter_child));
236  }
237  }
238  }
239  }
240  }
241  }
242 
243  // step 3: sweep the paths from the roots of sweep_dag
244  // here, the idea is that, for each path of sweep_dag, the mark we put
245  // to the ancestors is a given number, say N, that increases from path
246  // to path. Hence, for a given path, all the nodes that are marked with a
247  // number at least as high as N are non-barren, the others being barren.
248  Idx mark_id = 2;
249  for (NodeId path : path_roots) {
250  // perform the sweeping from the path
251  while (true) {
252  // mark all the ancestors of the node
253  List< NodeId > to_mark{path};
254  while (!to_mark.empty()) {
255  NodeId node = to_mark.front();
256  to_mark.popFront();
257  if (mark[node] < mark_id) {
258  mark[node] = mark_id;
259  for (const auto par : __dag->parents(node)) {
260  if (mark[par] < mark_id) { to_mark.insert(par); }
261  }
262  }
263  }
264 
265  // now, get all the arcs that contained node "path" in their separator.
266  // this node acts as a query target and, therefore, its ancestors
267  // shall be non-barren.
268  const ArcSet& arcs = node2arc[path];
269  for (const auto& arc : arcs) {
270  NodeSet& barren = result[arc];
271  for (auto iter = barren.beginSafe(); iter != barren.endSafe(); ++iter) {
272  if (mark[*iter] >= mark_id) {
273  // this indicates a non-barren node
274  barren.erase(iter);
275  }
276  }
277  }
278 
279  // go to the next sweeping node
280  const NodeSet& sweep_children = sweep_dag.children(path);
281  if (sweep_children.size()) {
282  path = *(sweep_children.begin());
283  } else {
284  // here, the path has ended, so we shall go to the next path
285  ++mark_id;
286  break;
287  }
288  }
289  }
290 
291  return result;
292  }
293 
296  // mark all the nodes in the dag as barren (true)
297  NodeProperty< bool > barren_mark = __dag->nodesProperty(true);
298 
299  // mark all the ancestors of the evidence and targets as non-barren
300  List< NodeId > nodes_to_examine;
301  int nb_non_barren = 0;
302  for (const auto node : *__observed_nodes)
303  nodes_to_examine.insert(node);
304  for (const auto node : *__target_nodes)
305  nodes_to_examine.insert(node);
306 
307  while (!nodes_to_examine.empty()) {
308  const NodeId node = nodes_to_examine.front();
309  nodes_to_examine.popFront();
310  if (barren_mark[node]) {
311  barren_mark[node] = false;
312  ++nb_non_barren;
313  for (const auto par : __dag->parents(node))
314  nodes_to_examine.insert(par);
315  }
316  }
317 
318  // here, all the nodes marked true are barren
319  NodeSet barren_nodes(__dag->sizeNodes() - nb_non_barren);
320  for (const auto& marked_pair : barren_mark)
321  if (marked_pair.second) barren_nodes.insert(marked_pair.first);
322 
323  return barren_nodes;
324  }
325 
326 } /* namespace gum */
bool empty() const noexcept
Returns a boolean indicating whether the chained list is empty.
Definition: list_tpl.h:1967
const NodeSet & separator(const Edge &edge) const
returns the separator included in a given edge
Size size() const
alias for sizeNodes
Set< Arc > ArcSet
Some typdefs and define for shortcuts ...
const NodeSet * __observed_nodes
the set of observed nodes
NodeProperty< VAL > nodesProperty(VAL(*f)(const NodeId &), Size size=0) const
a method to create a HashTable with key:NodeId and value:VAL
void erase(const Key &k)
Erases an element from the set.
Definition: set_tpl.h:653
Generic doubly linked lists.
Definition: list.h:369
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
iterator begin() const
The usual unsafe begin iterator to parse the set.
Definition: set_tpl.h:514
void popFront()
Removes the first element of a List, if any.
Definition: list_tpl.h:1961
NodeId head() const
returns the head of the arc
The class for generic Hash Tables.
Definition: hashTable.h:676
const iterator_safe & endSafe() const noexcept
The usual safe end iterator to parse the set.
Definition: set_tpl.h:499
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition: set_tpl.h:604
const EdgeSet & edges() const
returns the set of edges stored within the EdgeGraphPart
The base class for all directed edgesThis class is used as a basis for manipulating all directed edge...
const NodeSet & parents(const NodeId id) const
returns the set of nodes with arc ingoing to a given node
NodeSet barrenNodes()
returns the set of barren nodes
const NodeSet & clique(const NodeId idClique) const
returns the set of nodes included into a given clique
Basic graph of cliques.
Definition: cliqueGraph.h:55
Size sizeNodes() const
returns the number of nodes in the NodeGraphPart
Val & insert(const Val &val)
Inserts a new element at the end of the chained list (alias of pushBack).
Definition: list_tpl.h:1616
Val & front() const
Returns a reference to first element of a list, if any.
Definition: list_tpl.h:1828
Detect barren nodes for inference in Bayesian networks.
The base class for all undirected edges.
const DAG * __dag
the DAG on which we compute the barren nodes
iterator_safe beginSafe() const
The usual safe begin iterator to parse the set.
Definition: set_tpl.h:485
const NodeSet * __target_nodes
the set of targeted nodes
Size Idx
Type for indexes.
Definition: types.h:50
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:45
Size size() const noexcept
Returns the number of elements in the set.
Definition: set_tpl.h:698
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
NodeId first() const
returns one extremal node ID (whichever one it is is unspecified)
Base class for dag.
Definition: DAG.h:99
virtual void eraseNode(const NodeId id)
remove a node and its adjacent arcs from the graph
Definition: diGraph_inl.h:66
Size NodeId
Type for node ids.
Definition: graphElements.h:97
void insert(const Key &k)
Inserts a new element into the set.
Definition: set_tpl.h:610
NodeId tail() const
returns the tail of the arc
void insert(const Key &k)
Insert an element at the end of the sequence.
Definition: sequence_tpl.h:405