aGrUM  0.16.0
barrenNodesFinder.cpp
Go to the documentation of this file.
1 
27 #include <limits>
28 
30 
31 #ifdef GUM_NO_INLINE
33 #endif // GUM_NO_INLINE
34 
35 namespace gum {
36 
38  ArcProperty< NodeSet >
40  // assign a mark to all the nodes
41  // and mark all the observed nodes and their ancestors as non-barren
43  {
44  for (const auto node : *__dag)
45  mark.insert(node, 0); // for the moment, 0 = possibly barren
46 
47  // mark all the observed nodes and their ancestors as non barren
48  // std::numeric_limits<unsigned int>::max () will be necessarily non
49  // barren
50  // later on
51  Sequence< NodeId > observed_anc(__dag->size());
52  const Size non_barren = std::numeric_limits< Size >::max();
53  for (const auto node : *__observed_nodes)
54  observed_anc.insert(node);
55  for (Idx i = 0; i < observed_anc.size(); ++i) {
56  const NodeId node = observed_anc[i];
57  if (!mark[node]) {
58  mark[node] = non_barren;
59  for (const auto par : __dag->parents(node)) {
60  if (!mark[par] && !observed_anc.exists(par)) {
61  observed_anc.insert(par);
62  }
63  }
64  }
65  }
66  }
67 
68  // create the data structure that will contain the result of the
69  // method. By default, we assume that, for each pair of adjacent cliques,
70  // all
71  // the nodes that do not belong to their separator are possibly barren and,
72  // by sweeping the dag, we will remove the nodes that were determined
73  // above as non-barren. Structure result will assign to each (ordered) pair
74  // of adjacent cliques its set of barren nodes.
76  for (const auto& edge : junction_tree.edges()) {
77  const NodeSet& separator = junction_tree.separator(edge);
78 
79  NodeSet non_barren1 = junction_tree.clique(edge.first());
80  for (auto iter = non_barren1.beginSafe(); iter != non_barren1.endSafe();
81  ++iter) {
82  if (mark[*iter] || separator.exists(*iter)) { non_barren1.erase(iter); }
83  }
84  result.insert(Arc(edge.first(), edge.second()), std::move(non_barren1));
85 
86  NodeSet non_barren2 = junction_tree.clique(edge.second());
87  for (auto iter = non_barren2.beginSafe(); iter != non_barren2.endSafe();
88  ++iter) {
89  if (mark[*iter] || separator.exists(*iter)) { non_barren2.erase(iter); }
90  }
91  result.insert(Arc(edge.second(), edge.first()), std::move(non_barren2));
92  }
93 
94  // for each node in the DAG, indicate which are the arcs in the result
95  // structure whose separator contain it: the separators are actually the
96  // targets of the queries.
97  NodeProperty< ArcSet > node2arc;
98  for (const auto node : *__dag)
99  node2arc.insert(node, ArcSet());
100  for (const auto& elt : result) {
101  const Arc& arc = elt.first;
102  if (!result[arc].empty()) { // no need to further process cliques
103  const NodeSet& separator = // with no barren nodes
104  junction_tree.separator(Edge(arc.tail(), arc.head()));
105 
106  for (const auto node : separator) {
107  node2arc[node].insert(arc);
108  }
109  }
110  }
111 
112  // To determine the set of non-barren nodes w.r.t. a given single node
113  // query, we rely on the fact that those are precisely all the ancestors of
114  // this single node. To mutualize the computations, we will thus sweep the
115  // DAG from top to bottom and exploit the fact that the set of ancestors of
116  // the child of a given node A contain the ancestors of A. Therefore, we
117  // will
118  // determine sets of paths in the DAG and, for each path, compute the set of
119  // its barren nodes from the source to the destination of the path. The
120  // optimal set of paths, i.e., that which will minimize computations, is
121  // obtained by solving a "minimum path cover in directed acyclic graphs".
122  // But
123  // such an algorithm is too costly for the gain we can get from it, so we
124  // will
125  // rely on a simple heuristics.
126 
127  // To compute the heuristics, we proceed as follows:
128  // 1/ we mark to 1 all the nodes that are ancestors of at least one (key)
129  // node
130  // with a non-empty arcset in node2arc and we extract from those the
131  // roots, i.e., those nodes whose set of parents, if any, have all been
132  // identified as non-barren by being marked as
133  // std::numeric_limits<unsigned int>::max (). Such nodes are
134  // thus the top of the graph to sweep.
135  // 2/ create a copy of the subgraph of the DAG w.r.t. the 1-marked nodes
136  // and, for each node, if the node has several parents and children,
137  // keep only one arc from one of the parents to the child with the
138  // smallest
139  // number of parents, and try to create a matching between parents and
140  // children and add one arc for each edge of this matching. This will
141  // allow
142  // us to create distinct paths in the DAG. Whenever a child has no more
143  // parents, it becomes the root of a new path.
144  // 3/ the sweeping will be performed from the roots of all these paths.
145 
146  // perform step 1/
147  NodeSet path_roots;
148  {
149  List< NodeId > nodes_to_mark;
150  for (const auto& elt : node2arc) {
151  if (!elt.second.empty()) { // only process nodes with assigned arcs
152  nodes_to_mark.insert(elt.first);
153  }
154  }
155  while (!nodes_to_mark.empty()) {
156  NodeId node = nodes_to_mark.front();
157  nodes_to_mark.popFront();
158 
159  if (!mark[node]) { // mark the node and all its ancestors
160  mark[node] = 1;
161  Size nb_par = 0;
162  for (auto par : __dag->parents(node)) {
163  Size parent_mark = mark[par];
164  if (parent_mark != std::numeric_limits< Size >::max()) {
165  ++nb_par;
166  if (parent_mark == 0) { nodes_to_mark.insert(par); }
167  }
168  }
169 
170  if (nb_par == 0) { path_roots.insert(node); }
171  }
172  }
173  }
174 
175  // perform step 2/
176  DAG sweep_dag = *__dag;
177  for (const auto node : *__dag) { // keep only nodes marked with 1
178  if (mark[node] != 1) { sweep_dag.eraseNode(node); }
179  }
180  for (const auto node : sweep_dag) {
181  const Size nb_parents = sweep_dag.parents(node).size();
182  const Size nb_children = sweep_dag.children(node).size();
183  if ((nb_parents > 1) || (nb_children > 1)) {
184  // perform the matching
185  const auto& parents = sweep_dag.parents(node);
186 
187  // if there is no child, remove all the parents except the first one
188  if (nb_children == 0) {
189  auto iter_par = parents.beginSafe();
190  for (++iter_par; iter_par != parents.endSafe(); ++iter_par) {
191  sweep_dag.eraseArc(Arc(*iter_par, node));
192  }
193  } else {
194  // find the child with the smallest number of parents
195  const auto& children = sweep_dag.children(node);
196  NodeId smallest_child = 0;
197  Size smallest_nb_par = std::numeric_limits< Size >::max();
198  for (const auto child : children) {
199  const auto new_nb = sweep_dag.parents(child).size();
200  if (new_nb < smallest_nb_par) {
201  smallest_nb_par = new_nb;
202  smallest_child = child;
203  }
204  }
205 
206  // if there is no parent, just keep the link with the smallest child
207  // and remove all the other arcs
208  if (nb_parents == 0) {
209  for (auto iter = children.beginSafe(); iter != children.endSafe();
210  ++iter) {
211  if (*iter != smallest_child) {
212  if (sweep_dag.parents(*iter).size() == 1) {
213  path_roots.insert(*iter);
214  }
215  sweep_dag.eraseArc(Arc(node, *iter));
216  }
217  }
218  } else {
219  auto nb_match = Size(std::min(nb_parents, nb_children) - 1);
220  auto iter_par = parents.beginSafe();
221  ++iter_par; // skip the first parent, whose arc with node will
222  // remain
223  auto iter_child = children.beginSafe();
224  for (Idx i = 0; i < nb_match; ++i, ++iter_par, ++iter_child) {
225  if (*iter_child == smallest_child) { ++iter_child; }
226  sweep_dag.addArc(*iter_par, *iter_child);
227  sweep_dag.eraseArc(Arc(*iter_par, node));
228  sweep_dag.eraseArc(Arc(node, *iter_child));
229  }
230  for (; iter_par != parents.endSafe(); ++iter_par) {
231  sweep_dag.eraseArc(Arc(*iter_par, node));
232  }
233  for (; iter_child != children.endSafe(); ++iter_child) {
234  if (*iter_child != smallest_child) {
235  if (sweep_dag.parents(*iter_child).size() == 1) {
236  path_roots.insert(*iter_child);
237  }
238  sweep_dag.eraseArc(Arc(node, *iter_child));
239  }
240  }
241  }
242  }
243  }
244  }
245 
246  // step 3: sweep the paths from the roots of sweep_dag
247  // here, the idea is that, for each path of sweep_dag, the mark we put
248  // to the ancestors is a given number, say N, that increases from path
249  // to path. Hence, for a given path, all the nodes that are marked with a
250  // number at least as high as N are non-barren, the others being barren.
251  Idx mark_id = 2;
252  for (NodeId path : path_roots) {
253  // perform the sweeping from the path
254  while (true) {
255  // mark all the ancestors of the node
256  List< NodeId > to_mark{path};
257  while (!to_mark.empty()) {
258  NodeId node = to_mark.front();
259  to_mark.popFront();
260  if (mark[node] < mark_id) {
261  mark[node] = mark_id;
262  for (const auto par : __dag->parents(node)) {
263  if (mark[par] < mark_id) { to_mark.insert(par); }
264  }
265  }
266  }
267 
268  // now, get all the arcs that contained node "path" in their separator.
269  // this node acts as a query target and, therefore, its ancestors
270  // shall be non-barren.
271  const ArcSet& arcs = node2arc[path];
272  for (const auto& arc : arcs) {
273  NodeSet& barren = result[arc];
274  for (auto iter = barren.beginSafe(); iter != barren.endSafe(); ++iter) {
275  if (mark[*iter] >= mark_id) {
276  // this indicates a non-barren node
277  barren.erase(iter);
278  }
279  }
280  }
281 
282  // go to the next sweeping node
283  const NodeSet& sweep_children = sweep_dag.children(path);
284  if (sweep_children.size()) {
285  path = *(sweep_children.begin());
286  } else {
287  // here, the path has ended, so we shall go to the next path
288  ++mark_id;
289  break;
290  }
291  }
292  }
293 
294  return result;
295  }
296 
299  // mark all the nodes in the dag as barren (true)
300  NodeProperty< bool > barren_mark = __dag->nodesProperty(true);
301 
302  // mark all the ancestors of the evidence and targets as non-barren
303  List< NodeId > nodes_to_examine;
304  int nb_non_barren = 0;
305  for (const auto node : *__observed_nodes)
306  nodes_to_examine.insert(node);
307  for (const auto node : *__target_nodes)
308  nodes_to_examine.insert(node);
309 
310  while (!nodes_to_examine.empty()) {
311  const NodeId node = nodes_to_examine.front();
312  nodes_to_examine.popFront();
313  if (barren_mark[node]) {
314  barren_mark[node] = false;
315  ++nb_non_barren;
316  for (const auto par : __dag->parents(node))
317  nodes_to_examine.insert(par);
318  }
319  }
320 
321  // here, all the nodes marked true are barren
322  NodeSet barren_nodes(__dag->sizeNodes() - nb_non_barren);
323  for (const auto& marked_pair : barren_mark)
324  if (marked_pair.second) barren_nodes.insert(marked_pair.first);
325 
326  return barren_nodes;
327  }
328 
329 } /* namespace gum */
bool empty() const noexcept
Returns a boolean indicating whether the chained list is empty.
Definition: list_tpl.h:1970
const NodeSet & separator(const Edge &edge) const
returns the separator included in a given edge
Size size() const
alias for sizeNodes
Set< Arc > ArcSet
Some typdefs and define for shortcuts ...
const NodeSet * __observed_nodes
the set of observed nodes
NodeProperty< VAL > nodesProperty(VAL(*f)(const NodeId &), Size size=0) const
a method to create a HashTable with key:NodeId and value:VAL
void erase(const Key &k)
Erases an element from the set.
Definition: set_tpl.h:656
Generic doubly linked lists.
Definition: list.h:372
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
iterator begin() const
The usual unsafe begin iterator to parse the set.
Definition: set_tpl.h:517
void popFront()
Removes the first element of a List, if any.
Definition: list_tpl.h:1964
NodeId head() const
returns the head of the arc
The class for generic Hash Tables.
Definition: hashTable.h:679
const iterator_safe & endSafe() const noexcept
The usual safe end iterator to parse the set.
Definition: set_tpl.h:502
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition: set_tpl.h:607
const EdgeSet & edges() const
returns the set of edges stored within the EdgeGraphPart
The base class for all directed edgesThis class is used as a basis for manipulating all directed edge...
const NodeSet & parents(const NodeId id) const
returns the set of nodes with arc ingoing to a given node
NodeSet barrenNodes()
returns the set of barren nodes
const NodeSet & clique(const NodeId idClique) const
returns the set of nodes included into a given clique
Basic graph of cliques.
Definition: cliqueGraph.h:58
Size sizeNodes() const
returns the number of nodes in the NodeGraphPart
Val & insert(const Val &val)
Inserts a new element at the end of the chained list (alias of pushBack).
Definition: list_tpl.h:1619
Val & front() const
Returns a reference to first element of a list, if any.
Definition: list_tpl.h:1831
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
The base class for all undirected edges.
const DAG * __dag
the DAG on which we compute the barren nodes
iterator_safe beginSafe() const
The usual safe begin iterator to parse the set.
Definition: set_tpl.h:488
const NodeSet * __target_nodes
the set of targeted nodes
Size Idx
Type for indexes.
Definition: types.h:53
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:48
Size size() const noexcept
Returns the number of elements in the set.
Definition: set_tpl.h:701
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
NodeId first() const
returns one extremal node ID (whichever one it is is unspecified)
Base class for dag.
Definition: DAG.h:102
virtual void eraseNode(const NodeId id)
remove a node and its adjacent arcs from the graph
Definition: diGraph_inl.h:69
Size NodeId
Type for node ids.
Definition: graphElements.h:98
void insert(const Key &k)
Inserts a new element into the set.
Definition: set_tpl.h:613
NodeId tail() const
returns the tail of the arc
void insert(const Key &k)
Insert an element at the end of the sequence.
Definition: sequence_tpl.h:408