aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
projectionPattern4BaseName.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  *
25  * @brief the pattern used by all the "basename" projections of multidim tables
26  *
27  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
28  */
29 
30 // check if we allowed these patterns to be used
31 #ifndef GUM_PROJECTION_PATTERN_ALLOWED
32 
33 // #warning To use projectionPattern, you must define
34 // GUM_PROJECTION_PATTERN_ALLOWED
35 
36 #else
37 namespace gum {
38 
39  // a specialized function for projecting a multiDimImplementation over a subset
40  // of its variables
41 
42 # ifdef GUM_MULTI_DIM_PROJECTION_NAME
43 # define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR
44  template < typename GUM_SCALAR >
45  MultiDimImplementation< GUM_SCALAR >*
46  GUM_MULTI_DIM_PROJECTION_NAME(const MultiDimImplementation< GUM_SCALAR >* table,
47  const Set< const DiscreteVariable* >& del_vars)
48 # endif
49 
50  // clang-format off
51 
52 #ifdef GUM_MULTI_DIM_PROJECTION_POINTER_NAME
53 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR *
54 #define GUM_MULTI_DIM_PROJECTION_POINTER
55  template <typename GUM_SCALAR>
56  MultiDimImplementation<GUM_SCALAR*>* GUM_MULTI_DIM_PROJECTION_POINTER_NAME(
57  const MultiDimImplementation<GUM_SCALAR*>* table,
58  const Set<const DiscreteVariable*>& del_vars )
59 #endif
60 
61 #ifdef GUM_MULTI_DIM_PROJECTION_NAME_F
62 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR
63  template <typename GUM_SCALAR>
64  MultiDimImplementation<GUM_SCALAR>* GUM_MULTI_DIM_PROJECTION_NAME_F(
65  const MultiDimImplementation<GUM_SCALAR>* table,
66  const Set<const DiscreteVariable*>& del_vars,
67  GUM_SCALAR ( *f )( const GUM_SCALAR&, const GUM_SCALAR& ) )
68 #endif
69 
70 #ifdef GUM_MULTI_DIM_PROJECTION_POINTER_NAME_F
71 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR *
72 #define GUM_MULTI_DIM_PROJECTION_POINTER
73  template <typename GUM_SCALAR>
74  MultiDimImplementation<GUM_SCALAR*>*
75  GUM_MULTI_DIM_PROJECTION_POINTER_NAME_F(
76  const MultiDimImplementation<GUM_SCALAR*>* table,
77  const Set<const DiscreteVariable*>& del_vars,
78  GUM_SCALAR* ( *f )( const GUM_SCALAR const*,
79  const GUM_SCALAR const* ) )
80 #endif
81 
82  // clang-format on
83 
84  {
85 
86  // create the neutral element used to fill the result upon its
87  // creation
88  const GUM_SCALAR neutral_element = GUM_MULTI_DIM_PROJECTION_NEUTRAL;
89 
90  // first, compute whether we should loop over table or over the projected
91  // table first to get a faster algorithm.
92  const Sequence< const DiscreteVariable* >& table_vars = table->variablesSequence();
93  bool need_swapping = table_vars.size() >= 2 * del_vars.size();
94 
95  if (!need_swapping) {
96  // Compute the variables that belong to both the projection set and
97  // table. Store the domain size of the Cartesian product of these
98  // variables (result_domain_size) as well as the domain size of the
99  // Cartesian product of the variables of table that do not belong to
100  // projection set, i.e., those variables that belong to table but not to
101  // del_vars (table_alone_domain_size). In addition, store the number of
102  // increments in the computation loops at the end of the function before
103  // which the variables of the projection set need be incremented (vector
104  // before incr).
105  std::vector< Idx > table_and_result_offset;
106  std::vector< Idx > table_and_result_domain;
107  std::vector< Idx > before_incr;
108  unsigned int nb_positive_before_incr = 0;
109  Idx table_alone_domain_size = 1;
110  Idx result_domain_size = 1;
111  Idx table_domain_size = 1;
112  Sequence< const DiscreteVariable* > result_varSeq;
113  {
114  Idx tmp_before_incr = 1;
115  bool has_before_incr = false;
116 
117  for (const auto var: table_vars) {
118  table_domain_size *= var->domainSize();
119 
120  if (!del_vars.exists(var)) {
121  if (has_before_incr) {
122  before_incr.push_back(tmp_before_incr - 1);
123  has_before_incr = false;
124  ++nb_positive_before_incr;
125  } else {
126  before_incr.push_back(0);
127  }
128 
129  table_and_result_domain.push_back(var->domainSize());
130  table_and_result_offset.push_back(result_domain_size);
131  result_domain_size *= var->domainSize();
132  tmp_before_incr = 1;
133  result_varSeq << var;
134  } else {
135  tmp_before_incr *= var->domainSize();
136  has_before_incr = true;
137  table_alone_domain_size *= var->domainSize();
138  }
139  }
140  }
141  std::vector< Idx > table_and_result_value = table_and_result_domain;
142  std::vector< Idx > current_incr = before_incr;
143  std::vector< Idx > table_and_result_down = table_and_result_offset;
144 
145  for (unsigned int i = 0; i < table_and_result_down.size(); ++i) {
146  table_and_result_down[i] *= (table_and_result_domain[i] - 1);
147  }
148 
149  // create a table "result" containing only the variables of the
150  // projection: the variables are stored in the order in which they appear
151  // in "table" Hence, ++ operations on an instantiation on table will more
152  // or less correspond to a ++ operation on an instantiation on result
153  MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >* result
154  = new MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >;
155 
156  if (!result_varSeq.size()) { return result; }
157 
158  result->beginMultipleChanges();
159 
160  for (const auto var: result_varSeq)
161  *result << *var;
162 
163  result->endMultipleChanges();
164 
165 // fill the matrix with the neutral element
166 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
167 
168  for (Idx i = 0; i < result_domain_size; ++i) {
169  result->unsafeSet(i, new GUM_SCALAR(neutral_element));
170  }
171 
172 # else
173  result->fill(neutral_element);
174 # endif
175 
176  // compute the projection: first loop over the variables X's in table
177  // that do not belong to result and, for each value of these X's, loop
178  // over the variables in both table and result. As such, in the internal
179  // loop, the offsets of "result" need only be incremented as usually to
180  // parse appropriately this table. For result, the problem is slightly
181  // more complicated: in the outer for loop, we shall always reset
182  // resul_offset to 0. For the inner loop, result_offset should be
183  // incremented (++) only when t1 before_incr[xxx] steps in the loop have
184  // already been made.
185 
186  // but before doing so, check whether there exist positive_before_incr.
187  // If this is not the case, optimize by not using before_incr at all
188  if (!nb_positive_before_incr) {
189  Idx result_offset = 0;
190  Instantiation table_inst(table);
191 
192  for (Idx i = 0; i < table_alone_domain_size; ++i) {
193  for (Idx j = 0; j < result_domain_size; ++j) {
194 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
195  GUM_MULTI_DIM_PROJECTION_TYPE res = result->unsafeGet(result_offset);
196  *res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
197 # else
198  GUM_MULTI_DIM_PROJECTION_TYPE& res
199  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE& >(result->unsafeGet(result_offset));
200  res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
201 # endif
202 
203  // update the offset of table and result
204  ++table_inst;
205  ++result_offset;
206  }
207 
208  // update the offset of result
209  result_offset = 0;
210  }
211  } else {
212  // here there are positive before_incr and we should use them to know
213  // when result_offset needs be changed
214  Idx result_offset = 0;
215  Instantiation table_inst(table);
216 
217  for (Idx i = 0; i < table_domain_size; ++i) {
218 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
219  GUM_MULTI_DIM_PROJECTION_TYPE res = result->unsafeGet(result_offset);
220  *res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
221 # else
222  GUM_MULTI_DIM_PROJECTION_TYPE& res
223  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE& >(result->unsafeGet(result_offset));
224  res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
225 # endif
226 
227  // update the offset of table
228  ++table_inst;
229 
230  // update the offset of result
231  for (unsigned int k = 0; k < current_incr.size(); ++k) {
232  // check if we need modify result_offset
233  if (current_incr[k]) {
234  --current_incr[k];
235  break;
236  }
237 
238  current_incr[k] = before_incr[k];
239 
240  // here we shall modify result_offset
241  --table_and_result_value[k];
242 
243  if (table_and_result_value[k]) {
244  result_offset += table_and_result_offset[k];
245  break;
246  }
247 
248  table_and_result_value[k] = table_and_result_domain[k];
249  result_offset -= table_and_result_down[k];
250  }
251  }
252  }
253 
254  return result;
255  } else { // need_swapping = true
256 
257  // Compute the variables that are in t1 but not in t2. For these
258  // variables, get the increment in the offset of t1 that would result
259  // from an increment in one of these variables (vector t1_alone_offset).
260  // Also store the domain size of these variables (vector t1_alone_domain)
261  // and, for the computation loops at the end of this function, |variable|
262  // - the current values of these variables (vector t1_alone_value). All
263  // these vectors reference the variables of t1 \ t2 in the order in which
264  // they appear in seq1. Keep as well the size of the Cartesian product of
265  // these variables.
266  std::vector< Idx > table_alone_offset;
267  std::vector< Idx > table_alone_domain;
268  Idx offset = 1;
269  Idx table_alone_domain_size = 1;
270  HashTable< const DiscreteVariable*, Idx > var1offset(table_vars.size());
271 
272  for (const auto var: table_vars) {
273  if (del_vars.exists(var)) {
274  table_alone_domain.push_back(var->domainSize());
275  table_alone_offset.push_back(offset);
276  table_alone_domain_size *= var->domainSize();
277  }
278 
279  var1offset.insert(var, offset);
280  offset *= var->domainSize();
281  }
282 
283  std::vector< Idx > table_alone_value = table_alone_domain;
284  std::vector< Idx > table_alone_down = table_alone_offset;
285 
286  for (unsigned int i = 0; i < table_alone_down.size(); ++i)
287  table_alone_down[i] *= (table_alone_domain[i] - 1);
288 
289  // Compute the same vectors for the variables that belong to both t1 and
290  // t2. In this case, All these vectors reference the variables in the
291  // order in which they appear in seq2. In addition, store the number of
292  // increments in the computation loops at the end of the function before
293  // which the variables of t1 cap t2 need be incremented (vector
294  // t1_and_t2_before incr). Similarly, store the number of such
295  // increments currently still needed before the next incrementation of
296  // the variables of t1 cap t2. Keep as well the size of the Cartesian
297  // product of these variables.
298  Sequence< const DiscreteVariable* > result_varSeq;
299  std::vector< Idx > table_and_result_offset;
300  std::vector< Idx > table_and_result_domain;
301  Idx result_domain_size = 1;
302  bool has_before_incr = false;
303  bool found_proj_var = false;
304 
305  for (const auto var: table_vars) {
306  if (!del_vars.exists(var)) {
307  table_and_result_domain.push_back(var->domainSize());
308  table_and_result_offset.push_back(var1offset[var]);
309  found_proj_var = true;
310  result_domain_size *= var->domainSize();
311  result_varSeq << var;
312  } else {
313  if (found_proj_var) has_before_incr = true;
314  }
315  }
316 
317  std::vector< Idx > table_and_result_value = table_and_result_domain;
318  std::vector< Idx > table_and_result_down = table_and_result_offset;
319 
320  for (unsigned int i = 0; i < table_and_result_down.size(); ++i) {
321  table_and_result_down[i] *= (table_and_result_domain[i] - 1);
322  }
323 
324  // create a table "result" containing only the variables of the
325  // projection: the variables are stored in the order in which they appear
326  // in "table" Hence, ++ operations on an instantiation on table will more
327  // or less correspond to a ++ operation on an instantiation on result
328  MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >* result
329  = new MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >;
330  result->beginMultipleChanges();
331 
332  for (const auto var: result_varSeq)
333  *result << *var;
334 
335  result->endMultipleChanges();
336 
337 // fill the matrix with the neutral element
338 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
339 
340  for (Idx i = 0; i < result_domain_size; ++i) {
341  result->unsafeSet(i, new GUM_SCALAR(neutral_element));
342  }
343 
344 # else
345  result->fill(neutral_element);
346 # endif
347 
348  // compute the sum: first loop over the variables X's both in table and
349  // in result and, for each value of these X's, loop over the variables
350  // that are in table but not result. As such, in the internal loop, there
351  // is no increment in the offset of "result", and in the outer loop, this
352  // offset is incremented using a simple ++ operator. For table, the
353  // problem is slightly more complicated: in the outer for loop, we shall
354  // increment the variables of table cap result according to vectors
355  // table_and_result_xxx. Each time a variable of these vectors has been
356  // incremented up to its max, we shall put it down to 0 and increment the
357  // next one, and so on. For the inner loop, this is similar except that
358  // we shall do these operations only when before_incr[xxx] steps in the
359  // loop have already been made.
360 
361  // but before doing so, check whether there exist positive_before_incr.
362  // If this is not the case, optimize by not using before_incr at all
363  if (!has_before_incr) {
364  Idx result_offset = 0;
365  Instantiation table_inst(table);
366 
367  for (Idx i = 0; i < result_domain_size; ++i) {
368  for (Idx j = 0; j < table_alone_domain_size; ++j) {
369 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
370  GUM_MULTI_DIM_PROJECTION_TYPE res = result->unsafeGet(result_offset);
371  *res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
372 # else
373  GUM_MULTI_DIM_PROJECTION_TYPE& res
374  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE& >(result->unsafeGet(result_offset));
375  res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
376 # endif
377 
378  // update the offset of table
379  ++table_inst;
380  }
381 
382  // update the offset of result
383  ++result_offset;
384  }
385  } else {
386  // here there are positive before_incr and we should use them to know
387  // when result_offset needs be changed
388  Idx result_offset = 0;
389  Instantiation table_inst(table);
390 
391  for (Idx j = 0; j < result_domain_size; ++j) {
392  for (Idx i = 0; i < table_alone_domain_size; ++i) {
393 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
394  GUM_MULTI_DIM_PROJECTION_TYPE res = result->unsafeGet(result_offset);
395  *res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
396 # else
397  GUM_MULTI_DIM_PROJECTION_TYPE& res
398  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE& >(result->unsafeGet(result_offset));
399  res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
400 # endif
401 
402  // update the increment of table for the inner loop
403  for (unsigned int k = 0; k < table_alone_value.size(); ++k) {
404  --table_alone_value[k];
405 
406  if (table_alone_value[k]) {
407  table_inst += table_alone_offset[k];
408  break;
409  }
410 
411  table_alone_value[k] = table_alone_domain[k];
412  table_inst -= table_alone_down[k];
413  }
414  }
415 
416  // update the offset of table for the outer loop
417  for (unsigned int k = 0; k < table_and_result_value.size(); ++k) {
418  --table_and_result_value[k];
419 
420  if (table_and_result_value[k]) {
421  table_inst += table_and_result_offset[k];
422  break;
423  }
424 
425  table_and_result_value[k] = table_and_result_domain[k];
426  table_inst -= table_and_result_down[k];
427  }
428 
429  // update the offset of result for the outer loop
430  ++result_offset;
431  }
432  }
433 
434  return result;
435  }
436  }
437 
438 # undef GUM_MULTI_DIM_PROJECTION_TYPE
439 
440 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
441 # undef GUM_MULTI_DIM_PROJECTION_POINTER
442 # endif
443 } // namespace gum
444 #endif /* GUM_PROJECTION_PATTERN_ALLOWED */