aGrUM  0.21.0
a C++ library for (probabilistic) graphical models
projectionPattern4MultiDimArray.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  *
25  * @brief the pattern used by all the projections of multidimensional tables
26  *
27  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
28  */
29 
30 // check if we allowed these patterns to be used
31 #ifndef GUM_PROJECTION_PATTERN_ALLOWED
32 
33 // #warning To use projectionPattern, you must define
34 // GUM_PROJECTION_PATTERN_ALLOWED
35 
36 #else
37 namespace gum {
38 
39  // a specialized function for projecting a multiDimArray over a subset of its
40  // vars
41 
42 # ifdef GUM_MULTI_DIM_PROJECTION_NAME
43 # define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR
44  template < typename GUM_SCALAR >
45  MultiDimArray< GUM_SCALAR >* GUM_MULTI_DIM_PROJECTION_NAME(
46  const MultiDimArray< GUM_SCALAR >* table,
47  const Set< const DiscreteVariable* >& del_vars)
48 # endif
49 
50  // clang-format off
51 
52 #ifdef GUM_MULTI_DIM_PROJECTION_POINTER_NAME
53 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR *
54 #define GUM_MULTI_DIM_PROJECTION_POINTER
55  template <typename GUM_SCALAR>
56  MultiDimArray<GUM_SCALAR*>* GUM_MULTI_DIM_PROJECTION_POINTER_NAME(
57  const MultiDimArray<GUM_SCALAR*>* table,
58  const Set<const DiscreteVariable*>& del_vars )
59 #endif
60 
61 #ifdef GUM_MULTI_DIM_PROJECTION_NAME_F
62 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR
63  template <typename GUM_SCALAR>
64  MultiDimArray<GUM_SCALAR>* GUM_MULTI_DIM_PROJECTION_NAME_F(
65  const MultiDimArray<GUM_SCALAR>* table,
66  const Set<const DiscreteVariable*>& del_vars,
67  GUM_SCALAR ( *f )( const GUM_SCALAR&, const GUM_SCALAR& ) )
68 #endif
69 
70 #ifdef GUM_MULTI_DIM_PROJECTION_POINTER_NAME_F
71 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR *
72 #define GUM_MULTI_DIM_PROJECTION_POINTER
73  template <typename GUM_SCALAR>
74  MultiDimArray<GUM_SCALAR*>* GUM_MULTI_DIM_PROJECTION_POINTER_NAME_F(
75  const MultiDimArray<GUM_SCALAR*>* table,
76  const Set<const DiscreteVariable*>& del_vars,
77  GUM_SCALAR* ( *f )( const GUM_SCALAR const*,
78  const GUM_SCALAR const* ) )
79 #endif
80 
81 #ifdef GUM_MULTI_DIM_PROJECTION_IMPL2ARRAY_NAME
82 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR
83  template <typename GUM_SCALAR>
84  MultiDimImplementation<GUM_SCALAR>*
85  GUM_MULTI_DIM_PROJECTION_IMPL2ARRAY_NAME(
86  const MultiDimImplementation<GUM_SCALAR>* ttable,
87  const Set<const DiscreteVariable*>& del_vars )
88 #endif
89 
90 #ifdef GUM_MULTI_DIM_PROJECTION_POINTER_IMPL2ARRAY_NAME
91 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR *
92 #define GUM_MULTI_DIM_PROJECTION_POINTER
93  template <typename GUM_SCALAR>
94  MultiDimImplementation<GUM_SCALAR*>*
95  GUM_MULTI_DIM_PROJECTION_POINTER_IMPL2ARRAY_NAME(
96  const MultiDimImplementation<GUM_SCALAR*>* ttable,
97  const Set<const DiscreteVariable*>& del_vars )
98 #endif
99 
100  // clang-format on
101 
102  {
103 
104 # ifdef GUM_MULTI_DIM_PROJECTION_IMPL2ARRAY_NAME
105  const MultiDimArray< GUM_SCALAR >* table
106  = reinterpret_cast< const MultiDimArray< GUM_SCALAR >* >(ttable);
107 # endif
108 
109 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER_IMPL2ARRAY_NAME
110  const MultiDimArray< GUM_SCALAR* >* table
111  = reinterpret_cast< const MultiDimArray< GUM_SCALAR* >* >(ttable);
112 # endif
113 
114  // create the neutral element used to fill the result upon its
115  // creation
116  const GUM_SCALAR neutral_element = GUM_MULTI_DIM_PROJECTION_NEUTRAL;
117 
118  // first, compute whether we should loop over table or over the projected
119  // table first to get a faster algorithm.
120  const Sequence< const DiscreteVariable* >& table_vars = table->variablesSequence();
121  bool need_swapping = table_vars.size() >= 2 * del_vars.size();
122 
123  if (!need_swapping) {
124  // Compute the variables that belong to both the projection set and
125  // table. Store the domain size of the Cartesian product of these
126  // variables (result_domain_size) as well as the domain size of the
127  // Cartesian product of the variables of table that do not belong to
128  // projection set, i.e., those variables that belong to table but not to
129  // del_vars (table_alone_domain_size). In addition, store the number of
130  // increments in the computation loops at the end of the function before
131  // which the variables of the projection set need be incremented (vector
132  // before incr).
133  std::vector< Idx > table_and_result_offset;
134  std::vector< Idx > table_and_result_domain;
135  std::vector< Idx > before_incr;
136  unsigned int nb_positive_before_incr = 0;
137  Idx table_alone_domain_size = 1;
138  Idx result_domain_size = 1;
139  Idx table_domain_size = 1;
140  Sequence< const DiscreteVariable* > result_varSeq;
141  {
142  Idx tmp_before_incr = 1;
143  bool has_before_incr = false;
144 
145  for (const auto var: table_vars) {
146  table_domain_size *= var->domainSize();
147 
148  if (!del_vars.exists(var)) {
149  if (has_before_incr) {
150  before_incr.push_back(tmp_before_incr - 1);
151  has_before_incr = false;
152  ++nb_positive_before_incr;
153  } else {
154  before_incr.push_back(0);
155  }
156 
157  table_and_result_domain.push_back(var->domainSize());
158  table_and_result_offset.push_back(result_domain_size);
159  result_domain_size *= var->domainSize();
160  tmp_before_incr = 1;
161  result_varSeq << var;
162  } else {
163  tmp_before_incr *= var->domainSize();
164  has_before_incr = true;
165  table_alone_domain_size *= var->domainSize();
166  }
167  }
168  }
169  std::vector< Idx > table_and_result_value = table_and_result_domain;
170  std::vector< Idx > current_incr = before_incr;
171  std::vector< Idx > table_and_result_down = table_and_result_offset;
172 
173  for (unsigned int i = 0; i < table_and_result_down.size(); ++i) {
174  table_and_result_down[i] *= (table_and_result_domain[i] - 1);
175  }
176 
177  // create a table "result" containing only the variables of the
178  // projection: the variables are stored in the order in which they appear
179  // in "table" Hence, ++ operations on an instantiation on table will more
180  // or less correspond to a ++ operation on an instantiation on result
181  MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >* result
182  = new MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >;
183 
184  if (!result_varSeq.size()) { return result; }
185 
186  result->beginMultipleChanges();
187 
188  for (const auto var: result_varSeq)
189  *result << *var;
190 
191 // fill the matrix with the neutral element
192 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
193  result->endMultipleChanges();
194 
195  for (Idx i = 0; i < result_domain_size; ++i) {
196  result->unsafeSet(i, new GUM_SCALAR(neutral_element));
197  }
198 
199 # else
200  result->endMultipleChanges(neutral_element);
201 # endif
202 
203  // compute the projection: first loop over the variables X's in table
204  // that do not belong to result and, for each value of these X's, loop
205  // over the variables in both table and result. As such, in the internal
206  // loop, the offsets of "result" need only be incremented as usually to
207  // parse appropriately this table. For result, the problem is slightly
208  // more complicated: in the outer for loop, we shall always reset
209  // resul_offset to 0. For the inner loop, result_offset should be
210  // incremented (++) only when t1 before_incr[xxx] steps in the loop have
211  // already been made.
212 
213  // but before doing so, check whether there exist positive_before_incr.
214  // If this is not the case, optimize by not using before_incr at all
215  GUM_MULTI_DIM_PROJECTION_TYPE* pt
216  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE* >(&(table->unsafeGet(0)));
217  GUM_MULTI_DIM_PROJECTION_TYPE* pres
218  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE* >(&(result->unsafeGet(0)));
219  GUM_MULTI_DIM_PROJECTION_TYPE* pres_deb = pres;
220 
221  if (!nb_positive_before_incr) {
222  for (Idx i = 0; i < table_alone_domain_size; ++i) {
223  for (Idx j = 0; j < result_domain_size; ++j) {
224 # ifdef GUM_MULTI_DIM_PROJECTION_EFFECTIVE_TYPE
225  GUM_MULTI_DIM_PROJECTION(*pres, *pt);
226 # else
227 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
228  **pres = GUM_MULTI_DIM_PROJECTION(*pres, *pt);
229 # else
230  *pres = GUM_MULTI_DIM_PROJECTION(*pres, *pt);
231 # endif
232 # endif
233 
234  // update the offset of table and result
235  ++pt;
236  ++pres;
237  }
238 
239  // update the offset of result
240  pres = pres_deb;
241  }
242  } else {
243  // here there are positive before_incr and we should use them to know
244  // when result_offset needs be changed
245  Idx result_offset = 0;
246 
247  for (Idx i = 0; i < table_domain_size; ++i) {
248 # ifdef GUM_MULTI_DIM_PROJECTION_EFFECTIVE_TYPE
249  GUM_MULTI_DIM_PROJECTION(pres[result_offset], *pt);
250 # else
251 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
252  *(pres[result_offset]) = GUM_MULTI_DIM_PROJECTION(pres[result_offset], *pt);
253 # else
254  pres[result_offset] = GUM_MULTI_DIM_PROJECTION(pres[result_offset], *pt);
255 # endif
256 # endif
257 
258  // update the offset of table
259  ++pt;
260 
261  // update the offset of result
262  for (unsigned int k = 0; k < current_incr.size(); ++k) {
263  // check if we need modify result_offset
264  if (current_incr[k]) {
265  --current_incr[k];
266  break;
267  }
268 
269  current_incr[k] = before_incr[k];
270 
271  // here we shall modify result_offset
272  --table_and_result_value[k];
273 
274  if (table_and_result_value[k]) {
275  result_offset += table_and_result_offset[k];
276  break;
277  }
278 
279  table_and_result_value[k] = table_and_result_domain[k];
280  result_offset -= table_and_result_down[k];
281  }
282  }
283  }
284 
285  return result;
286  } else { // need_swapping = true
287 
288  // Compute the variables that are in t1 but not in t2. For these
289  // variables, get the increment in the offset of t1 that would result
290  // from an increment in one of these variables (vector t1_alone_offset).
291  // Also store the domain size of these variables (vector t1_alone_domain)
292  // and, for the computation loops at the end of this function, |variable|
293  // - the current values of these variables (vector t1_alone_value). All
294  // these vectors reference the variables of t1 \ t2 in the order in which
295  // they appear in seq1. Keep as well the size of the Cartesian product of
296  // these variables.
297  std::vector< Idx > table_alone_offset;
298  std::vector< Idx > table_alone_domain;
299  Idx offset = 1;
300  Idx table_alone_domain_size = 1;
301  HashTable< const DiscreteVariable*, Idx > var1offset(table_vars.size());
302 
303  for (const auto var: table_vars) {
304  if (del_vars.exists(var)) {
305  table_alone_domain.push_back(var->domainSize());
306  table_alone_offset.push_back(offset);
307  table_alone_domain_size *= var->domainSize();
308  }
309 
310  var1offset.insert(var, offset);
311  offset *= var->domainSize();
312  }
313 
314  std::vector< Idx > table_alone_value = table_alone_domain;
315  std::vector< Idx > table_alone_down = table_alone_offset;
316 
317  for (unsigned int i = 0; i < table_alone_down.size(); ++i)
318  table_alone_down[i] *= (table_alone_domain[i] - 1);
319 
320  // Compute the same vectors for the variables that belong to both t1 and
321  // t2. In this case, All these vectors reference the variables in the
322  // order in which they appear in seq2. In addition, store the number of
323  // increments in the computation loops at the end of the function before
324  // which the variables of t1 cap t2 need be incremented (vector
325  // t1_and_t2_before incr). Similarly, store the number of such
326  // increments currently still needed before the next incrementation of
327  // the variables of t1 cap t2. Keep as well the size of the Cartesian
328  // product of these variables.
329  Sequence< const DiscreteVariable* > result_varSeq;
330  std::vector< Idx > table_and_result_offset;
331  std::vector< Idx > table_and_result_domain;
332  Idx result_domain_size = 1;
333  bool has_before_incr = false;
334  bool found_proj_var = false;
335 
336  for (const auto var: table_vars) {
337  if (!del_vars.exists(var)) {
338  table_and_result_domain.push_back(var->domainSize());
339  table_and_result_offset.push_back(var1offset[var]);
340  found_proj_var = true;
341  result_domain_size *= var->domainSize();
342  result_varSeq << var;
343  } else {
344  if (found_proj_var) has_before_incr = true;
345  }
346  }
347 
348  std::vector< Idx > table_and_result_value = table_and_result_domain;
349  std::vector< Idx > table_and_result_down = table_and_result_offset;
350 
351  for (unsigned int i = 0; i < table_and_result_down.size(); ++i) {
352  table_and_result_down[i] *= (table_and_result_domain[i] - 1);
353  }
354 
355  // create a table "result" containing only the variables of the
356  // projection: the variables are stored in the order in which they appear
357  // in "table" Hence, ++ operations on an instantiation on table will more
358  // or less correspond to a ++ operation on an instantiation on result
359  MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >* result
360  = new MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >;
361  result->beginMultipleChanges();
362 
363  for (const auto var: result_varSeq)
364  *result << *var;
365 
366 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
367  result->endMultipleChanges();
368 
369  // fill the matrix with the neutral element
370  for (Idx i = 0; i < result_domain_size; ++i) {
371  result->unsafeSet(i, new GUM_SCALAR(neutral_element));
372  }
373 
374 # else
375  result->endMultipleChanges(neutral_element);
376 # endif
377 
378  // compute the sum: first loop over the variables X's both in table and
379  // in result and, for each value of these X's, loop over the variables
380  // that are in table but not result. As such, in the internal loop, there
381  // is no increment in the offset of "result", and in the outer loop, this
382  // offset is incremented using a simple ++ operator. For table, the
383  // problem is slightly more complicated: in the outer for loop, we shall
384  // increment the variables of table cap result according to vectors
385  // table_and_result_xxx. Each time a variable of these vectors has been
386  // incremented up to its max, we shall put it down to 0 and increment the
387  // next one, and so on. For the inner loop, this is similar except that
388  // we shall do these operations only when before_incr[xxx] steps in the
389  // loop have already been made.
390  GUM_MULTI_DIM_PROJECTION_TYPE* pt
391  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE* >(&(table->unsafeGet(0)));
392  GUM_MULTI_DIM_PROJECTION_TYPE* pres
393  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE* >(&(result->unsafeGet(0)));
394 
395  // but before doing so, check whether there exist positive_before_incr.
396  // If this is not the case, optimize by not using before_incr at all
397  if (!has_before_incr) {
398  for (Idx i = 0; i < result_domain_size; ++i) {
399  for (Idx j = 0; j < table_alone_domain_size; ++j) {
400 # ifdef GUM_MULTI_DIM_PROJECTION_EFFECTIVE_TYPE
401  GUM_MULTI_DIM_PROJECTION(*pres, *pt);
402 # else
403 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
404  **pres = GUM_MULTI_DIM_PROJECTION(*pres, *pt);
405 # else
406  *pres = GUM_MULTI_DIM_PROJECTION(*pres, *pt);
407 # endif
408 # endif
409 
410  // update the offset of table
411  ++pt;
412  }
413 
414  // update the offset of result
415  ++pres;
416  }
417  } else {
418  // here there are positive before_incr and we should use them to know
419  // when result_offset needs be changed
420  Idx table_offset = 0;
421 
422  for (Idx j = 0; j < result_domain_size; ++j) {
423  for (Idx i = 0; i < table_alone_domain_size; ++i) {
424 # ifdef GUM_MULTI_DIM_PROJECTION_EFFECTIVE_TYPE
425  GUM_MULTI_DIM_PROJECTION(*pres, pt[table_offset]);
426 # else
427 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
428  **pres = GUM_MULTI_DIM_PROJECTION(*pres, pt[table_offset]);
429 # else
430  *pres = GUM_MULTI_DIM_PROJECTION(*pres, pt[table_offset]);
431 # endif
432 # endif
433 
434  // update the increment of table for the inner loop
435  for (unsigned int k = 0; k < table_alone_value.size(); ++k) {
436  --table_alone_value[k];
437 
438  if (table_alone_value[k]) {
439  table_offset += table_alone_offset[k];
440  break;
441  }
442 
443  table_alone_value[k] = table_alone_domain[k];
444  table_offset -= table_alone_down[k];
445  }
446  }
447 
448  // update the offset of table for the outer loop
449  for (unsigned int k = 0; k < table_and_result_value.size(); ++k) {
450  --table_and_result_value[k];
451 
452  if (table_and_result_value[k]) {
453  table_offset += table_and_result_offset[k];
454  break;
455  }
456 
457  table_and_result_value[k] = table_and_result_domain[k];
458  table_offset -= table_and_result_down[k];
459  }
460 
461  // update the offset of result for the outer loop
462  //++result_offset;
463  ++pres;
464  }
465  }
466 
467  return result;
468  }
469  }
470 
471 # undef GUM_MULTI_DIM_PROJECTION_TYPE
472 
473 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
474 # undef GUM_MULTI_DIM_PROJECTION_POINTER
475 # endif
476 } // namespace gum
477 #endif /* GUM_PROJECTION_PATTERN_ALLOWED */