aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
projectionPattern4BaseName.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  *
25  * @brief the pattern used by all the "basename" projections of multidim tables
26  *
27  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
28  */
29 
30 // check if we allowed these patterns to be used
31 #ifndef GUM_PROJECTION_PATTERN_ALLOWED
32 
33 // #warning To use projectionPattern, you must define
34 // GUM_PROJECTION_PATTERN_ALLOWED
35 
36 #else
37 namespace gum {
38 
39  // a specialized function for projecting a multiDimImplementation over a subset
40  // of its variables
41 
42 # ifdef GUM_MULTI_DIM_PROJECTION_NAME
43 # define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR
44  template < typename GUM_SCALAR >
45  MultiDimImplementation< GUM_SCALAR >* GUM_MULTI_DIM_PROJECTION_NAME(
46  const MultiDimImplementation< GUM_SCALAR >* table,
47  const Set< const DiscreteVariable* >& del_vars)
48 # endif
49 
50  // clang-format off
51 
52 #ifdef GUM_MULTI_DIM_PROJECTION_POINTER_NAME
53 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR *
54 #define GUM_MULTI_DIM_PROJECTION_POINTER
55  template <typename GUM_SCALAR>
56  MultiDimImplementation<GUM_SCALAR*>* GUM_MULTI_DIM_PROJECTION_POINTER_NAME(
57  const MultiDimImplementation<GUM_SCALAR*>* table,
58  const Set<const DiscreteVariable*>& del_vars )
59 #endif
60 
61 #ifdef GUM_MULTI_DIM_PROJECTION_NAME_F
62 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR
63  template <typename GUM_SCALAR>
64  MultiDimImplementation<GUM_SCALAR>* GUM_MULTI_DIM_PROJECTION_NAME_F(
65  const MultiDimImplementation<GUM_SCALAR>* table,
66  const Set<const DiscreteVariable*>& del_vars,
67  GUM_SCALAR ( *f )( const GUM_SCALAR&, const GUM_SCALAR& ) )
68 #endif
69 
70 #ifdef GUM_MULTI_DIM_PROJECTION_POINTER_NAME_F
71 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR *
72 #define GUM_MULTI_DIM_PROJECTION_POINTER
73  template <typename GUM_SCALAR>
74  MultiDimImplementation<GUM_SCALAR*>*
75  GUM_MULTI_DIM_PROJECTION_POINTER_NAME_F(
76  const MultiDimImplementation<GUM_SCALAR*>* table,
77  const Set<const DiscreteVariable*>& del_vars,
78  GUM_SCALAR* ( *f )( const GUM_SCALAR const*,
79  const GUM_SCALAR const* ) )
80 #endif
81 
82  // clang-format on
83 
84  {
85 
86  // create the neutral element used to fill the result upon its
87  // creation
88  const GUM_SCALAR neutral_element = GUM_MULTI_DIM_PROJECTION_NEUTRAL;
89 
90  // first, compute whether we should loop over table or over the projected
91  // table first to get a faster algorithm.
92  const Sequence< const DiscreteVariable* >& table_vars
93  = table->variablesSequence();
94  bool need_swapping = table_vars.size() >= 2 * del_vars.size();
95 
96  if (!need_swapping) {
97  // Compute the variables that belong to both the projection set and
98  // table. Store the domain size of the Cartesian product of these
99  // variables (result_domain_size) as well as the domain size of the
100  // Cartesian product of the variables of table that do not belong to
101  // projection set, i.e., those variables that belong to table but not to
102  // del_vars (table_alone_domain_size). In addition, store the number of
103  // increments in the computation loops at the end of the function before
104  // which the variables of the projection set need be incremented (vector
105  // before incr).
106  std::vector< Idx > table_and_result_offset;
107  std::vector< Idx > table_and_result_domain;
108  std::vector< Idx > before_incr;
109  unsigned int nb_positive_before_incr = 0;
110  Idx table_alone_domain_size = 1;
111  Idx result_domain_size = 1;
112  Idx table_domain_size = 1;
113  Sequence< const DiscreteVariable* > result_varSeq;
114  {
115  Idx tmp_before_incr = 1;
116  bool has_before_incr = false;
117 
118  for (const auto var: table_vars) {
119  table_domain_size *= var->domainSize();
120 
121  if (!del_vars.exists(var)) {
122  if (has_before_incr) {
123  before_incr.push_back(tmp_before_incr - 1);
124  has_before_incr = false;
125  ++nb_positive_before_incr;
126  } else {
127  before_incr.push_back(0);
128  }
129 
130  table_and_result_domain.push_back(var->domainSize());
131  table_and_result_offset.push_back(result_domain_size);
132  result_domain_size *= var->domainSize();
133  tmp_before_incr = 1;
134  result_varSeq << var;
135  } else {
136  tmp_before_incr *= var->domainSize();
137  has_before_incr = true;
138  table_alone_domain_size *= var->domainSize();
139  }
140  }
141  }
142  std::vector< Idx > table_and_result_value = table_and_result_domain;
143  std::vector< Idx > current_incr = before_incr;
144  std::vector< Idx > table_and_result_down = table_and_result_offset;
145 
146  for (unsigned int i = 0; i < table_and_result_down.size(); ++i) {
147  table_and_result_down[i] *= (table_and_result_domain[i] - 1);
148  }
149 
150  // create a table "result" containing only the variables of the
151  // projection: the variables are stored in the order in which they appear
152  // in "table" Hence, ++ operations on an instantiation on table will more
153  // or less correspond to a ++ operation on an instantiation on result
154  MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >* result
155  = new MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >;
156 
157  if (!result_varSeq.size()) { return result; }
158 
159  result->beginMultipleChanges();
160 
161  for (const auto var: result_varSeq)
162  *result << *var;
163 
164  result->endMultipleChanges();
165 
166 // fill the matrix with the neutral element
167 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
168 
169  for (Idx i = 0; i < result_domain_size; ++i) {
170  result->unsafeSet(i, new GUM_SCALAR(neutral_element));
171  }
172 
173 # else
174  result->fill(neutral_element);
175 # endif
176 
177  // compute the projection: first loop over the variables X's in table
178  // that do not belong to result and, for each value of these X's, loop
179  // over the variables in both table and result. As such, in the internal
180  // loop, the offsets of "result" need only be incremented as usually to
181  // parse appropriately this table. For result, the problem is slightly
182  // more complicated: in the outer for loop, we shall always reset
183  // resul_offset to 0. For the inner loop, result_offset should be
184  // incremented (++) only when t1 before_incr[xxx] steps in the loop have
185  // already been made.
186 
187  // but before doing so, check whether there exist positive_before_incr.
188  // If this is not the case, optimize by not using before_incr at all
189  if (!nb_positive_before_incr) {
190  Idx result_offset = 0;
191  Instantiation table_inst(table);
192 
193  for (Idx i = 0; i < table_alone_domain_size; ++i) {
194  for (Idx j = 0; j < result_domain_size; ++j) {
195 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
196  GUM_MULTI_DIM_PROJECTION_TYPE res = result->unsafeGet(result_offset);
197  *res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
198 # else
199  GUM_MULTI_DIM_PROJECTION_TYPE& res
200  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE& >(
201  result->unsafeGet(result_offset));
202  res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
203 # endif
204 
205  // update the offset of table and result
206  ++table_inst;
207  ++result_offset;
208  }
209 
210  // update the offset of result
211  result_offset = 0;
212  }
213  } else {
214  // here there are positive before_incr and we should use them to know
215  // when result_offset needs be changed
216  Idx result_offset = 0;
217  Instantiation table_inst(table);
218 
219  for (Idx i = 0; i < table_domain_size; ++i) {
220 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
221  GUM_MULTI_DIM_PROJECTION_TYPE res = result->unsafeGet(result_offset);
222  *res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
223 # else
224  GUM_MULTI_DIM_PROJECTION_TYPE& res
225  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE& >(
226  result->unsafeGet(result_offset));
227  res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
228 # endif
229 
230  // update the offset of table
231  ++table_inst;
232 
233  // update the offset of result
234  for (unsigned int k = 0; k < current_incr.size(); ++k) {
235  // check if we need modify result_offset
236  if (current_incr[k]) {
237  --current_incr[k];
238  break;
239  }
240 
241  current_incr[k] = before_incr[k];
242 
243  // here we shall modify result_offset
244  --table_and_result_value[k];
245 
246  if (table_and_result_value[k]) {
247  result_offset += table_and_result_offset[k];
248  break;
249  }
250 
251  table_and_result_value[k] = table_and_result_domain[k];
252  result_offset -= table_and_result_down[k];
253  }
254  }
255  }
256 
257  return result;
258  } else { // need_swapping = true
259 
260  // Compute the variables that are in t1 but not in t2. For these
261  // variables, get the increment in the offset of t1 that would result
262  // from an increment in one of these variables (vector t1_alone_offset).
263  // Also store the domain size of these variables (vector t1_alone_domain)
264  // and, for the computation loops at the end of this function, |variable|
265  // - the current values of these variables (vector t1_alone_value). All
266  // these vectors reference the variables of t1 \ t2 in the order in which
267  // they appear in seq1. Keep as well the size of the Cartesian product of
268  // these variables.
269  std::vector< Idx > table_alone_offset;
270  std::vector< Idx > table_alone_domain;
271  Idx offset = 1;
272  Idx table_alone_domain_size = 1;
273  HashTable< const DiscreteVariable*, Idx > var1offset(table_vars.size());
274 
275  for (const auto var: table_vars) {
276  if (del_vars.exists(var)) {
277  table_alone_domain.push_back(var->domainSize());
278  table_alone_offset.push_back(offset);
279  table_alone_domain_size *= var->domainSize();
280  }
281 
282  var1offset.insert(var, offset);
283  offset *= var->domainSize();
284  }
285 
286  std::vector< Idx > table_alone_value = table_alone_domain;
287  std::vector< Idx > table_alone_down = table_alone_offset;
288 
289  for (unsigned int i = 0; i < table_alone_down.size(); ++i)
290  table_alone_down[i] *= (table_alone_domain[i] - 1);
291 
292  // Compute the same vectors for the variables that belong to both t1 and
293  // t2. In this case, All these vectors reference the variables in the
294  // order in which they appear in seq2. In addition, store the number of
295  // increments in the computation loops at the end of the function before
296  // which the variables of t1 cap t2 need be incremented (vector
297  // t1_and_t2_before incr). Similarly, store the number of such
298  // increments currently still needed before the next incrementation of
299  // the variables of t1 cap t2. Keep as well the size of the Cartesian
300  // product of these variables.
301  Sequence< const DiscreteVariable* > result_varSeq;
302  std::vector< Idx > table_and_result_offset;
303  std::vector< Idx > table_and_result_domain;
304  Idx result_domain_size = 1;
305  bool has_before_incr = false;
306  bool found_proj_var = false;
307 
308  for (const auto var: table_vars) {
309  if (!del_vars.exists(var)) {
310  table_and_result_domain.push_back(var->domainSize());
311  table_and_result_offset.push_back(var1offset[var]);
312  found_proj_var = true;
313  result_domain_size *= var->domainSize();
314  result_varSeq << var;
315  } else {
316  if (found_proj_var) has_before_incr = true;
317  }
318  }
319 
320  std::vector< Idx > table_and_result_value = table_and_result_domain;
321  std::vector< Idx > table_and_result_down = table_and_result_offset;
322 
323  for (unsigned int i = 0; i < table_and_result_down.size(); ++i) {
324  table_and_result_down[i] *= (table_and_result_domain[i] - 1);
325  }
326 
327  // create a table "result" containing only the variables of the
328  // projection: the variables are stored in the order in which they appear
329  // in "table" Hence, ++ operations on an instantiation on table will more
330  // or less correspond to a ++ operation on an instantiation on result
331  MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >* result
332  = new MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >;
333  result->beginMultipleChanges();
334 
335  for (const auto var: result_varSeq)
336  *result << *var;
337 
338  result->endMultipleChanges();
339 
340 // fill the matrix with the neutral element
341 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
342 
343  for (Idx i = 0; i < result_domain_size; ++i) {
344  result->unsafeSet(i, new GUM_SCALAR(neutral_element));
345  }
346 
347 # else
348  result->fill(neutral_element);
349 # endif
350 
351  // compute the sum: first loop over the variables X's both in table and
352  // in result and, for each value of these X's, loop over the variables
353  // that are in table but not result. As such, in the internal loop, there
354  // is no increment in the offset of "result", and in the outer loop, this
355  // offset is incremented using a simple ++ operator. For table, the
356  // problem is slightly more complicated: in the outer for loop, we shall
357  // increment the variables of table cap result according to vectors
358  // table_and_result_xxx. Each time a variable of these vectors has been
359  // incremented up to its max, we shall put it down to 0 and increment the
360  // next one, and so on. For the inner loop, this is similar except that
361  // we shall do these operations only when before_incr[xxx] steps in the
362  // loop have already been made.
363 
364  // but before doing so, check whether there exist positive_before_incr.
365  // If this is not the case, optimize by not using before_incr at all
366  if (!has_before_incr) {
367  Idx result_offset = 0;
368  Instantiation table_inst(table);
369 
370  for (Idx i = 0; i < result_domain_size; ++i) {
371  for (Idx j = 0; j < table_alone_domain_size; ++j) {
372 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
373  GUM_MULTI_DIM_PROJECTION_TYPE res = result->unsafeGet(result_offset);
374  *res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
375 # else
376  GUM_MULTI_DIM_PROJECTION_TYPE& res
377  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE& >(
378  result->unsafeGet(result_offset));
379  res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
380 # endif
381 
382  // update the offset of table
383  ++table_inst;
384  }
385 
386  // update the offset of result
387  ++result_offset;
388  }
389  } else {
390  // here there are positive before_incr and we should use them to know
391  // when result_offset needs be changed
392  Idx result_offset = 0;
393  Instantiation table_inst(table);
394 
395  for (Idx j = 0; j < result_domain_size; ++j) {
396  for (Idx i = 0; i < table_alone_domain_size; ++i) {
397 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
398  GUM_MULTI_DIM_PROJECTION_TYPE res = result->unsafeGet(result_offset);
399  *res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
400 # else
401  GUM_MULTI_DIM_PROJECTION_TYPE& res
402  = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE& >(
403  result->unsafeGet(result_offset));
404  res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
405 # endif
406 
407  // update the increment of table for the inner loop
408  for (unsigned int k = 0; k < table_alone_value.size(); ++k) {
409  --table_alone_value[k];
410 
411  if (table_alone_value[k]) {
412  table_inst += table_alone_offset[k];
413  break;
414  }
415 
416  table_alone_value[k] = table_alone_domain[k];
417  table_inst -= table_alone_down[k];
418  }
419  }
420 
421  // update the offset of table for the outer loop
422  for (unsigned int k = 0; k < table_and_result_value.size(); ++k) {
423  --table_and_result_value[k];
424 
425  if (table_and_result_value[k]) {
426  table_inst += table_and_result_offset[k];
427  break;
428  }
429 
430  table_and_result_value[k] = table_and_result_domain[k];
431  table_inst -= table_and_result_down[k];
432  }
433 
434  // update the offset of result for the outer loop
435  ++result_offset;
436  }
437  }
438 
439  return result;
440  }
441  }
442 
443 # undef GUM_MULTI_DIM_PROJECTION_TYPE
444 
445 # ifdef GUM_MULTI_DIM_PROJECTION_POINTER
446 # undef GUM_MULTI_DIM_PROJECTION_POINTER
447 # endif
448 } // namespace gum
449 #endif /* GUM_PROJECTION_PATTERN_ALLOWED */