aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
projectionPattern4MultiDimArray.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  *
25  * @brief the pattern used by all the projections of multidimensional tables
26  *
27  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
28  */
29 
30 // check if we allowed these patterns to be used
31 #ifndef GUM_PROJECTION_PATTERN_ALLOWED
32 
33 // #warning To use projectionPattern, you must define
34 // GUM_PROJECTION_PATTERN_ALLOWED
35 
36 #else
37 namespace gum {
38 
39  // a specialized function for projecting a multiDimArray over a subset of its
40  // vars
41 
42 # ifdef GUM_MULTI_DIM_PROJECTION_NAME
43 # define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR
44  template < typename GUM_SCALAR >
45  MultiDimArray< GUM_SCALAR >* GUM_MULTI_DIM_PROJECTION_NAME(
46  const MultiDimArray< GUM_SCALAR >* table,
47  const Set< const DiscreteVariable* >& del_vars)
48 # endif
49 
50  // clang-format off
51 
52 #ifdef GUM_MULTI_DIM_PROJECTION_POINTER_NAME
53 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR *
54 #define GUM_MULTI_DIM_PROJECTION_POINTER
55  template <typename GUM_SCALAR>
56  MultiDimArray<GUM_SCALAR*>* GUM_MULTI_DIM_PROJECTION_POINTER_NAME(
57  const MultiDimArray<GUM_SCALAR*>* table,
58  const Set<const DiscreteVariable*>& del_vars )
59 #endif
60 
61 #ifdef GUM_MULTI_DIM_PROJECTION_NAME_F
62 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR
63  template <typename GUM_SCALAR>
64  MultiDimArray<GUM_SCALAR>* GUM_MULTI_DIM_PROJECTION_NAME_F(
65  const MultiDimArray<GUM_SCALAR>* table,
66  const Set<const DiscreteVariable*>& del_vars,
67  GUM_SCALAR ( *f )( const GUM_SCALAR&, const GUM_SCALAR& ) )
68 #endif
69 
70 #ifdef GUM_MULTI_DIM_PROJECTION_POINTER_NAME_F
71 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR *
72 #define GUM_MULTI_DIM_PROJECTION_POINTER
73  template <typename GUM_SCALAR>
74  MultiDimArray<GUM_SCALAR*>* GUM_MULTI_DIM_PROJECTION_POINTER_NAME_F(
75  const MultiDimArray<GUM_SCALAR*>* table,
76  const Set<const DiscreteVariable*>& del_vars,
77  GUM_SCALAR* ( *f )( const GUM_SCALAR const*,
78  const GUM_SCALAR const* ) )
79 #endif
80 
81 #ifdef GUM_MULTI_DIM_PROJECTION_IMPL2ARRAY_NAME
82 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR
83  template <typename GUM_SCALAR>
84  MultiDimImplementation<GUM_SCALAR>*
85  GUM_MULTI_DIM_PROJECTION_IMPL2ARRAY_NAME(
86  const MultiDimImplementation<GUM_SCALAR>* ttable,
87  const Set<const DiscreteVariable*>& del_vars )
88 #endif
89 
90 #ifdef GUM_MULTI_DIM_PROJECTION_POINTER_IMPL2ARRAY_NAME
91 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR *
92 #define GUM_MULTI_DIM_PROJECTION_POINTER
93  template <typename GUM_SCALAR>
94  MultiDimImplementation<GUM_SCALAR*>*
95  GUM_MULTI_DIM_PROJECTION_POINTER_IMPL2ARRAY_NAME(
96  const MultiDimImplementation<GUM_SCALAR*>* ttable,
97  const Set<const DiscreteVariable*>& del_vars )
98 #endif
99 
100  // clang-format on
101 
102  {
103 
104 # ifdef GUM_MULTI_DIM_PROJECTION_IMPL2ARRAY_NAME
105  const MultiDimArray< GUM_SCALAR >* table
106  = reinterpret_cast< const MultiDimArray< GUM_SCALAR >* >(ttable);
107 # endif
108 
109 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER_IMPL2ARRAY_NAME
110  const MultiDimArray< GUM_SCALAR* >* table
111  = reinterpret_cast< const MultiDimArray< GUM_SCALAR* >* >(ttable);
112 # endif
113 
114  // create the neutral element used to fill the result upon its
115  // creation
116  const GUM_SCALAR neutral_element = GUM_MULTI_DIM_PROJECTION_NEUTRAL;
117 
118  // first, compute whether we should loop over table or over the projected
119  // table first to get a faster algorithm.
120  const Sequence< const DiscreteVariable* >& table_vars
121  = table->variablesSequence();
122  bool need_swapping = table_vars.size() >= 2 * del_vars.size();
123 
124  if (!need_swapping) {
125  // Compute the variables that belong to both the projection set and
126  // table. Store the domain size of the Cartesian product of these
127  // variables (result_domain_size) as well as the domain size of the
128  // Cartesian product of the variables of table that do not belong to
129  // projection set, i.e., those variables that belong to table but not to
130  // del_vars (table_alone_domain_size). In addition, store the number of
131  // increments in the computation loops at the end of the function before
132  // which the variables of the projection set need be incremented (vector
133  // before incr).
134  std::vector< Idx > table_and_result_offset;
135  std::vector< Idx > table_and_result_domain;
136  std::vector< Idx > before_incr;
137  unsigned int nb_positive_before_incr = 0;
138  Idx table_alone_domain_size = 1;
139  Idx result_domain_size = 1;
140  Idx table_domain_size = 1;
141  Sequence< const DiscreteVariable* > result_varSeq;
142  {
143  Idx tmp_before_incr = 1;
144  bool has_before_incr = false;
145 
146  for (const auto var: table_vars) {
147  table_domain_size *= var->domainSize();
148 
149  if (!del_vars.exists(var)) {
150  if (has_before_incr) {
151  before_incr.push_back(tmp_before_incr - 1);
152  has_before_incr = false;
153  ++nb_positive_before_incr;
154  } else {
155  before_incr.push_back(0);
156  }
157 
158  table_and_result_domain.push_back(var->domainSize());
159  table_and_result_offset.push_back(result_domain_size);
160  result_domain_size *= var->domainSize();
161  tmp_before_incr = 1;
162  result_varSeq << var;
163  } else {
164  tmp_before_incr *= var->domainSize();
165  has_before_incr = true;
166  table_alone_domain_size *= var->domainSize();
167  }
168  }
169  }
170  std::vector< Idx > table_and_result_value = table_and_result_domain;
171  std::vector< Idx > current_incr = before_incr;
172  std::vector< Idx > table_and_result_down = table_and_result_offset;
173 
174  for (unsigned int i = 0; i < table_and_result_down.size(); ++i) {
175  table_and_result_down[i] *= (table_and_result_domain[i] - 1);
176  }
177 
178  // create a table "result" containing only the variables of the
179  // projection: the variables are stored in the order in which they appear
180  // in "table" Hence, ++ operations on an instantiation on table will more
181  // or less correspond to a ++ operation on an instantiation on result
182  MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >* result
183  = new MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >;
184 
185  if (!result_varSeq.size()) { return result; }
186 
187  result->beginMultipleChanges();
188 
189  for (const auto var: result_varSeq)
190  *result << *var;
191 
192 // fill the matrix with the neutral element
193 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
194  result->endMultipleChanges();
195 
196  for (Idx i = 0; i < result_domain_size; ++i) {
197  result->unsafeSet(i, new GUM_SCALAR(neutral_element));
198  }
199 
200 # else
201  result->endMultipleChanges(neutral_element);
202 # endif
203 
204  // compute the projection: first loop over the variables X's in table
205  // that do not belong to result and, for each value of these X's, loop
206  // over the variables in both table and result. As such, in the internal
207  // loop, the offsets of "result" need only be incremented as usually to
208  // parse appropriately this table. For result, the problem is slightly
209  // more complicated: in the outer for loop, we shall always reset
210  // resul_offset to 0. For the inner loop, result_offset should be
211  // incremented (++) only when t1 before_incr[xxx] steps in the loop have
212  // already been made.
213 
214  // but before doing so, check whether there exist positive_before_incr.
215  // If this is not the case, optimize by not using before_incr at all
216  GUM_MULTI_DIM_PROJECTION_TYPE* pt
217  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE* >(&(table->unsafeGet(0)));
218  GUM_MULTI_DIM_PROJECTION_TYPE* pres
219  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE* >(&(result->unsafeGet(0)));
220  GUM_MULTI_DIM_PROJECTION_TYPE* pres_deb = pres;
221 
222  if (!nb_positive_before_incr) {
223  for (Idx i = 0; i < table_alone_domain_size; ++i) {
224  for (Idx j = 0; j < result_domain_size; ++j) {
225 # ifdef GUM_MULTI_DIM_PROJECTION_EFFECTIVE_TYPE
226  GUM_MULTI_DIM_PROJECTION(*pres, *pt);
227 # else
228 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
229  **pres = GUM_MULTI_DIM_PROJECTION(*pres, *pt);
230 # else
231  *pres = GUM_MULTI_DIM_PROJECTION(*pres, *pt);
232 # endif
233 # endif
234 
235  // update the offset of table and result
236  ++pt;
237  ++pres;
238  }
239 
240  // update the offset of result
241  pres = pres_deb;
242  }
243  } else {
244  // here there are positive before_incr and we should use them to know
245  // when result_offset needs be changed
246  Idx result_offset = 0;
247 
248  for (Idx i = 0; i < table_domain_size; ++i) {
249 # ifdef GUM_MULTI_DIM_PROJECTION_EFFECTIVE_TYPE
250  GUM_MULTI_DIM_PROJECTION(pres[result_offset], *pt);
251 # else
252 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
253  *(pres[result_offset])
254  = GUM_MULTI_DIM_PROJECTION(pres[result_offset], *pt);
255 # else
256  pres[result_offset] = GUM_MULTI_DIM_PROJECTION(pres[result_offset], *pt);
257 # endif
258 # endif
259 
260  // update the offset of table
261  ++pt;
262 
263  // update the offset of result
264  for (unsigned int k = 0; k < current_incr.size(); ++k) {
265  // check if we need modify result_offset
266  if (current_incr[k]) {
267  --current_incr[k];
268  break;
269  }
270 
271  current_incr[k] = before_incr[k];
272 
273  // here we shall modify result_offset
274  --table_and_result_value[k];
275 
276  if (table_and_result_value[k]) {
277  result_offset += table_and_result_offset[k];
278  break;
279  }
280 
281  table_and_result_value[k] = table_and_result_domain[k];
282  result_offset -= table_and_result_down[k];
283  }
284  }
285  }
286 
287  return result;
288  } else { // need_swapping = true
289 
290  // Compute the variables that are in t1 but not in t2. For these
291  // variables, get the increment in the offset of t1 that would result
292  // from an increment in one of these variables (vector t1_alone_offset).
293  // Also store the domain size of these variables (vector t1_alone_domain)
294  // and, for the computation loops at the end of this function, |variable|
295  // - the current values of these variables (vector t1_alone_value). All
296  // these vectors reference the variables of t1 \ t2 in the order in which
297  // they appear in seq1. Keep as well the size of the Cartesian product of
298  // these variables.
299  std::vector< Idx > table_alone_offset;
300  std::vector< Idx > table_alone_domain;
301  Idx offset = 1;
302  Idx table_alone_domain_size = 1;
303  HashTable< const DiscreteVariable*, Idx > var1offset(table_vars.size());
304 
305  for (const auto var: table_vars) {
306  if (del_vars.exists(var)) {
307  table_alone_domain.push_back(var->domainSize());
308  table_alone_offset.push_back(offset);
309  table_alone_domain_size *= var->domainSize();
310  }
311 
312  var1offset.insert(var, offset);
313  offset *= var->domainSize();
314  }
315 
316  std::vector< Idx > table_alone_value = table_alone_domain;
317  std::vector< Idx > table_alone_down = table_alone_offset;
318 
319  for (unsigned int i = 0; i < table_alone_down.size(); ++i)
320  table_alone_down[i] *= (table_alone_domain[i] - 1);
321 
322  // Compute the same vectors for the variables that belong to both t1 and
323  // t2. In this case, All these vectors reference the variables in the
324  // order in which they appear in seq2. In addition, store the number of
325  // increments in the computation loops at the end of the function before
326  // which the variables of t1 cap t2 need be incremented (vector
327  // t1_and_t2_before incr). Similarly, store the number of such
328  // increments currently still needed before the next incrementation of
329  // the variables of t1 cap t2. Keep as well the size of the Cartesian
330  // product of these variables.
331  Sequence< const DiscreteVariable* > result_varSeq;
332  std::vector< Idx > table_and_result_offset;
333  std::vector< Idx > table_and_result_domain;
334  Idx result_domain_size = 1;
335  bool has_before_incr = false;
336  bool found_proj_var = false;
337 
338  for (const auto var: table_vars) {
339  if (!del_vars.exists(var)) {
340  table_and_result_domain.push_back(var->domainSize());
341  table_and_result_offset.push_back(var1offset[var]);
342  found_proj_var = true;
343  result_domain_size *= var->domainSize();
344  result_varSeq << var;
345  } else {
346  if (found_proj_var) has_before_incr = true;
347  }
348  }
349 
350  std::vector< Idx > table_and_result_value = table_and_result_domain;
351  std::vector< Idx > table_and_result_down = table_and_result_offset;
352 
353  for (unsigned int i = 0; i < table_and_result_down.size(); ++i) {
354  table_and_result_down[i] *= (table_and_result_domain[i] - 1);
355  }
356 
357  // create a table "result" containing only the variables of the
358  // projection: the variables are stored in the order in which they appear
359  // in "table" Hence, ++ operations on an instantiation on table will more
360  // or less correspond to a ++ operation on an instantiation on result
361  MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >* result
362  = new MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >;
363  result->beginMultipleChanges();
364 
365  for (const auto var: result_varSeq)
366  *result << *var;
367 
368 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
369  result->endMultipleChanges();
370 
371  // fill the matrix with the neutral element
372  for (Idx i = 0; i < result_domain_size; ++i) {
373  result->unsafeSet(i, new GUM_SCALAR(neutral_element));
374  }
375 
376 # else
377  result->endMultipleChanges(neutral_element);
378 # endif
379 
380  // compute the sum: first loop over the variables X's both in table and
381  // in result and, for each value of these X's, loop over the variables
382  // that are in table but not result. As such, in the internal loop, there
383  // is no increment in the offset of "result", and in the outer loop, this
384  // offset is incremented using a simple ++ operator. For table, the
385  // problem is slightly more complicated: in the outer for loop, we shall
386  // increment the variables of table cap result according to vectors
387  // table_and_result_xxx. Each time a variable of these vectors has been
388  // incremented up to its max, we shall put it down to 0 and increment the
389  // next one, and so on. For the inner loop, this is similar except that
390  // we shall do these operations only when before_incr[xxx] steps in the
391  // loop have already been made.
392  GUM_MULTI_DIM_PROJECTION_TYPE* pt
393  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE* >(&(table->unsafeGet(0)));
394  GUM_MULTI_DIM_PROJECTION_TYPE* pres
395  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE* >(&(result->unsafeGet(0)));
396 
397  // but before doing so, check whether there exist positive_before_incr.
398  // If this is not the case, optimize by not using before_incr at all
399  if (!has_before_incr) {
400  for (Idx i = 0; i < result_domain_size; ++i) {
401  for (Idx j = 0; j < table_alone_domain_size; ++j) {
402 # ifdef GUM_MULTI_DIM_PROJECTION_EFFECTIVE_TYPE
403  GUM_MULTI_DIM_PROJECTION(*pres, *pt);
404 # else
405 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
406  **pres = GUM_MULTI_DIM_PROJECTION(*pres, *pt);
407 # else
408  *pres = GUM_MULTI_DIM_PROJECTION(*pres, *pt);
409 # endif
410 # endif
411 
412  // update the offset of table
413  ++pt;
414  }
415 
416  // update the offset of result
417  ++pres;
418  }
419  } else {
420  // here there are positive before_incr and we should use them to know
421  // when result_offset needs be changed
422  Idx table_offset = 0;
423 
424  for (Idx j = 0; j < result_domain_size; ++j) {
425  for (Idx i = 0; i < table_alone_domain_size; ++i) {
426 # ifdef GUM_MULTI_DIM_PROJECTION_EFFECTIVE_TYPE
427  GUM_MULTI_DIM_PROJECTION(*pres, pt[table_offset]);
428 # else
429 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
430  **pres = GUM_MULTI_DIM_PROJECTION(*pres, pt[table_offset]);
431 # else
432  *pres = GUM_MULTI_DIM_PROJECTION(*pres, pt[table_offset]);
433 # endif
434 # endif
435 
436  // update the increment of table for the inner loop
437  for (unsigned int k = 0; k < table_alone_value.size(); ++k) {
438  --table_alone_value[k];
439 
440  if (table_alone_value[k]) {
441  table_offset += table_alone_offset[k];
442  break;
443  }
444 
445  table_alone_value[k] = table_alone_domain[k];
446  table_offset -= table_alone_down[k];
447  }
448  }
449 
450  // update the offset of table for the outer loop
451  for (unsigned int k = 0; k < table_and_result_value.size(); ++k) {
452  --table_and_result_value[k];
453 
454  if (table_and_result_value[k]) {
455  table_offset += table_and_result_offset[k];
456  break;
457  }
458 
459  table_and_result_value[k] = table_and_result_domain[k];
460  table_offset -= table_and_result_down[k];
461  }
462 
463  // update the offset of result for the outer loop
464  //++result_offset;
465  ++pres;
466  }
467  }
468 
469  return result;
470  }
471  }
472 
473 # undef GUM_MULTI_DIM_PROJECTION_TYPE
474 
475 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
476 # undef GUM_MULTI_DIM_PROJECTION_POINTER
477 # endif
478 } // namespace gum
479 #endif /* GUM_PROJECTION_PATTERN_ALLOWED */