aGrUM
0.20.2
a C++ library for (probabilistic) graphical models
independenceTest_tpl.h
Go to the documentation of this file.
1
/**
2
*
3
* Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/** @file
23
* @brief the base class for all the independence tests used for learning
24
*
25
* @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26
*/
27
#
ifndef
DOXYGEN_SHOULD_SKIP_THIS
28
29
namespace
gum
{
30
31
namespace
learning
{
32
33
/// returns the allocator used by the independence test
34
template
<
template
<
typename
>
class
ALLOC
>
35
INLINE
typename
IndependenceTest
<
ALLOC
>::
allocator_type
36
IndependenceTest
<
ALLOC
>::
getAllocator
()
const
{
37
return
counter_
.
getAllocator
();
38
}
39
40
41
/// default constructor
42
template
<
template
<
typename
>
class
ALLOC
>
43
INLINE
IndependenceTest
<
ALLOC
>::
IndependenceTest
(
44
const
DBRowGeneratorParser
<
ALLOC
>&
parser
,
45
const
Apriori
<
ALLOC
>&
apriori
,
46
const
std
::
vector
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
>,
47
ALLOC
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
> > >&
ranges
,
48
const
Bijection
<
NodeId
,
std
::
size_t
,
ALLOC
<
std
::
size_t
> >&
49
nodeId2columns
,
50
const
typename
IndependenceTest
<
ALLOC
>::
allocator_type
&
alloc
) :
51
apriori_
(
apriori
.
clone
(
alloc
)),
52
counter_
(
parser
,
ranges
,
nodeId2columns
,
alloc
),
cache_
(
alloc
) {
53
GUM_CONSTRUCTOR
(
IndependenceTest
);
54
}
55
56
57
/// default constructor
58
template
<
template
<
typename
>
class
ALLOC
>
59
INLINE
IndependenceTest
<
ALLOC
>::
IndependenceTest
(
60
const
DBRowGeneratorParser
<
ALLOC
>&
parser
,
61
const
Apriori
<
ALLOC
>&
apriori
,
62
const
Bijection
<
NodeId
,
std
::
size_t
,
ALLOC
<
std
::
size_t
> >&
63
nodeId2columns
,
64
const
typename
IndependenceTest
<
ALLOC
>::
allocator_type
&
alloc
) :
65
apriori_
(
apriori
.
clone
(
alloc
)),
66
counter_
(
parser
,
nodeId2columns
,
alloc
),
cache_
(
alloc
) {
67
GUM_CONSTRUCTOR
(
IndependenceTest
);
68
}
69
70
71
/// copy constructor with a given allocator
72
template
<
template
<
typename
>
class
ALLOC
>
73
INLINE
IndependenceTest
<
ALLOC
>::
IndependenceTest
(
74
const
IndependenceTest
<
ALLOC
>&
from
,
75
const
typename
IndependenceTest
<
ALLOC
>::
allocator_type
&
alloc
) :
76
apriori_
(
from
.
apriori_
->
clone
(
alloc
)),
77
counter_
(
from
.
counter_
,
alloc
),
cache_
(
from
.
cache_
,
alloc
),
78
use_cache_
(
from
.
use_cache_
) {
79
GUM_CONS_CPY
(
IndependenceTest
);
80
}
81
82
83
/// copy constructor
84
template
<
template
<
typename
>
class
ALLOC
>
85
INLINE
IndependenceTest
<
ALLOC
>::
IndependenceTest
(
86
const
IndependenceTest
<
ALLOC
>&
from
) :
87
IndependenceTest
(
from
,
from
.
getAllocator
()) {}
88
89
90
/// move constructor
91
template
<
template
<
typename
>
class
ALLOC
>
92
INLINE
IndependenceTest
<
ALLOC
>::
IndependenceTest
(
93
IndependenceTest
<
ALLOC
>&&
from
,
94
const
typename
IndependenceTest
<
ALLOC
>::
allocator_type
&
alloc
) :
95
apriori_
(
from
.
apriori_
),
96
counter_
(
std
::
move
(
from
.
counter_
),
alloc
),
97
cache_
(
std
::
move
(
from
.
cache_
),
alloc
),
use_cache_
(
from
.
use_cache_
) {
98
from
.
apriori_
=
nullptr
;
99
GUM_CONS_MOV
(
IndependenceTest
);
100
}
101
102
103
/// move constructor
104
template
<
template
<
typename
>
class
ALLOC
>
105
INLINE
IndependenceTest
<
ALLOC
>::
IndependenceTest
(
106
IndependenceTest
<
ALLOC
>&&
from
) :
107
IndependenceTest
(
std
::
move
(
from
),
from
.
getAllocator
()) {}
108
109
110
/// destructor
111
template
<
template
<
typename
>
class
ALLOC
>
112
INLINE
IndependenceTest
<
ALLOC
>::~
IndependenceTest
() {
113
if
(
apriori_
!=
nullptr
) {
114
ALLOC
<
Apriori
<
ALLOC
> >
allocator
(
this
->
getAllocator
());
115
allocator
.
destroy
(
apriori_
);
116
allocator
.
deallocate
(
apriori_
, 1);
117
}
118
GUM_DESTRUCTOR
(
IndependenceTest
);
119
}
120
121
122
/// copy operator
123
template
<
template
<
typename
>
class
ALLOC
>
124
IndependenceTest
<
ALLOC
>&
IndependenceTest
<
ALLOC
>::
operator
=(
125
const
IndependenceTest
<
ALLOC
>&
from
) {
126
if
(
this
!= &
from
) {
127
Apriori
<
ALLOC
>*
new_apriori
=
from
.
apriori_
->
clone
();
128
RecordCounter
<
ALLOC
>
new_counter
=
from
.
counter_
;
129
ScoringCache
<
ALLOC
>
new_cache
=
from
.
cache_
;
130
131
if
(
apriori_
!=
nullptr
) {
132
ALLOC
<
Apriori
<
ALLOC
> >
allocator
(
this
->
getAllocator
());
133
allocator
.
destroy
(
apriori_
);
134
allocator
.
deallocate
(
apriori_
, 1);
135
}
136
137
apriori_
=
new_apriori
;
138
counter_
=
std
::
move
(
new_counter
);
139
cache_
=
std
::
move
(
new_cache
);
140
141
use_cache_
=
from
.
use_cache_
;
142
}
143
return
*
this
;
144
}
145
146
147
/// move operator
148
template
<
template
<
typename
>
class
ALLOC
>
149
IndependenceTest
<
ALLOC
>&
150
IndependenceTest
<
ALLOC
>::
operator
=(
IndependenceTest
<
ALLOC
>&&
from
) {
151
if
(
this
!= &
from
) {
152
std
::
swap
(
apriori_
,
from
.
apriori_
);
153
154
counter_
=
std
::
move
(
from
.
counter_
);
155
cache_
=
std
::
move
(
from
.
cache_
);
156
use_cache_
=
from
.
use_cache_
;
157
}
158
return
*
this
;
159
}
160
161
162
/// changes the max number of threads used to parse the database
163
template
<
template
<
typename
>
class
ALLOC
>
164
INLINE
void
IndependenceTest
<
ALLOC
>::
setMaxNbThreads
(
std
::
size_t
nb
)
const
{
165
counter_
.
setMaxNbThreads
(
nb
);
166
}
167
168
169
/// returns the number of threads used to parse the database
170
template
<
template
<
typename
>
class
ALLOC
>
171
INLINE
std
::
size_t
IndependenceTest
<
ALLOC
>::
nbThreads
()
const
{
172
return
counter_
.
nbThreads
();
173
}
174
175
176
/** @brief changes the number min of rows a thread should process in a
177
* multithreading context */
178
template
<
template
<
typename
>
class
ALLOC
>
179
INLINE
void
IndependenceTest
<
ALLOC
>::
setMinNbRowsPerThread
(
180
const
std
::
size_t
nb
)
const
{
181
counter_
.
setMinNbRowsPerThread
(
nb
);
182
}
183
184
185
/// returns the minimum of rows that each thread should process
186
template
<
template
<
typename
>
class
ALLOC
>
187
INLINE
std
::
size_t
IndependenceTest
<
ALLOC
>::
minNbRowsPerThread
()
const
{
188
return
counter_
.
minNbRowsPerThread
();
189
}
190
191
192
/// sets new ranges to perform the countings used by the score
193
/** @param ranges a set of pairs {(X1,Y1),...,(Xn,Yn)} of database's rows
194
* indices. The countings are then performed only on the union of the
195
* rows [Xi,Yi), i in {1,...,n}. This is useful, e.g, when performing
196
* cross validation tasks, in which part of the database should be ignored.
197
* An empty set of ranges is equivalent to an interval [X,Y) ranging over
198
* the whole database. */
199
template
<
template
<
typename
>
class
ALLOC
>
200
template
<
template
<
typename
>
class
XALLOC
>
201
void
IndependenceTest
<
ALLOC
>::
setRanges
(
202
const
std
::
vector
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
>,
203
XALLOC
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
> > >&
204
new_ranges
) {
205
std
::
vector
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
>,
206
ALLOC
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
> > >
207
old_ranges
=
ranges
();
208
counter_
.
setRanges
(
new_ranges
);
209
if
(
old_ranges
!=
ranges
())
clear
();
210
}
211
212
213
/// reset the ranges to the one range corresponding to the whole database
214
template
<
template
<
typename
>
class
ALLOC
>
215
void
IndependenceTest
<
ALLOC
>::
clearRanges
() {
216
std
::
vector
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
>,
217
ALLOC
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
> > >
218
old_ranges
=
ranges
();
219
counter_
.
clearRanges
();
220
if
(
old_ranges
!=
ranges
())
clear
();
221
}
222
223
224
/// returns the current ranges
225
template
<
template
<
typename
>
class
ALLOC
>
226
INLINE
const
std
::
vector
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
>,
227
ALLOC
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
> > >&
228
IndependenceTest
<
ALLOC
>::
ranges
()
const
{
229
return
counter_
.
ranges
();
230
}
231
232
233
/// returns the score of a pair of nodes
234
template
<
template
<
typename
>
class
ALLOC
>
235
INLINE
double
IndependenceTest
<
ALLOC
>::
score
(
const
NodeId
var1
,
236
const
NodeId
var2
) {
237
IdCondSet
<
ALLOC
>
idset
(
var1
,
238
var2
,
239
empty_ids_
,
240
false
,
241
true
,
242
this
->
getAllocator
());
243
if
(
use_cache_
) {
244
try
{
245
return
cache_
.
score
(
idset
);
246
}
catch
(
NotFound
&) {}
247
double
the_score
=
score_
(
idset
);
248
cache_
.
insert
(
std
::
move
(
idset
),
the_score
);
249
return
the_score
;
250
}
else
{
251
return
score_
(
std
::
move
(
idset
));
252
}
253
}
254
255
256
/// returns the score of a pair of nodes given some other nodes
257
template
<
template
<
typename
>
class
ALLOC
>
258
INLINE
double
IndependenceTest
<
ALLOC
>::
score
(
259
const
NodeId
var1
,
260
const
NodeId
var2
,
261
const
std
::
vector
<
NodeId
,
ALLOC
<
NodeId
> >&
rhs_ids
) {
262
IdCondSet
<
ALLOC
>
idset
(
var1
,
263
var2
,
264
rhs_ids
,
265
false
,
266
false
,
267
this
->
getAllocator
());
268
if
(
use_cache_
) {
269
try
{
270
return
cache_
.
score
(
idset
);
271
}
catch
(
NotFound
&) {}
272
double
the_score
=
score_
(
idset
);
273
cache_
.
insert
(
std
::
move
(
idset
),
the_score
);
274
return
the_score
;
275
}
else
{
276
return
score_
(
idset
);
277
}
278
}
279
280
281
/// clears all the data structures from memory
282
template
<
template
<
typename
>
class
ALLOC
>
283
INLINE
void
IndependenceTest
<
ALLOC
>::
clear
() {
284
counter_
.
clear
();
285
cache_
.
clear
();
286
}
287
288
289
/// clears the current cache (clear nodesets as well)
290
template
<
template
<
typename
>
class
ALLOC
>
291
INLINE
void
IndependenceTest
<
ALLOC
>::
clearCache
() {
292
cache_
.
clear
();
293
}
294
295
296
/// turn on/off the use of a cache of the previously computed score
297
template
<
template
<
typename
>
class
ALLOC
>
298
INLINE
void
IndependenceTest
<
ALLOC
>::
useCache
(
const
bool
on_off
) {
299
use_cache_
=
on_off
;
300
}
301
302
303
/// return the mapping between the columns of the database and the node ids
304
template
<
template
<
typename
>
class
ALLOC
>
305
INLINE
const
Bijection
<
NodeId
,
std
::
size_t
,
ALLOC
<
std
::
size_t
> >&
306
IndependenceTest
<
ALLOC
>::
nodeId2Columns
()
const
{
307
return
counter_
.
nodeId2Columns
();
308
}
309
310
311
/// return the database used by the score
312
template
<
template
<
typename
>
class
ALLOC
>
313
INLINE
const
DatabaseTable
<
ALLOC
>&
314
IndependenceTest
<
ALLOC
>::
database
()
const
{
315
return
counter_
.
database
();
316
}
317
318
319
/// returns a counting vector where variables are marginalized from N_xyz
320
/** @param node_2_marginalize indicates which node(s) shall be marginalized:
321
* - 0 means that X should be marginalized
322
* - 1 means that Y should be marginalized
323
* - 2 means that Z should be marginalized
324
*/
325
template
<
template
<
typename
>
class
ALLOC
>
326
std
::
vector
<
double
,
ALLOC
<
double
> >
IndependenceTest
<
ALLOC
>::
marginalize_
(
327
const
std
::
size_t
node_2_marginalize
,
328
const
std
::
size_t
X_size
,
329
const
std
::
size_t
Y_size
,
330
const
std
::
size_t
Z_size
,
331
const
std
::
vector
<
double
,
ALLOC
<
double
> >&
N_xyz
)
const
{
332
// determine the size of the output vector
333
std
::
size_t
out_size
=
Z_size
;
334
if
(
node_2_marginalize
==
std
::
size_t
(0))
335
out_size
*=
Y_size
;
336
else
if
(
node_2_marginalize
==
std
::
size_t
(1))
337
out_size
*=
X_size
;
338
339
// allocate the output vector
340
std
::
vector
<
double
,
ALLOC
<
double
> >
res
(
out_size
, 0.0);
341
342
// fill the vector:
343
if
(
node_2_marginalize
==
std
::
size_t
(0)) {
// marginalize X
344
for
(
std
::
size_t
yz
=
std
::
size_t
(0),
xyz
=
std
::
size_t
(0);
yz
<
out_size
;
345
++
yz
) {
346
for
(
std
::
size_t
x
=
std
::
size_t
(0);
x
<
X_size
; ++
x
, ++
xyz
) {
347
res
[
yz
] +=
N_xyz
[
xyz
];
348
}
349
}
350
}
else
if
(
node_2_marginalize
==
std
::
size_t
(1)) {
// marginalize Y
351
for
(
std
::
size_t
z
=
std
::
size_t
(0),
352
xyz
=
std
::
size_t
(0),
353
beg_xz
=
std
::
size_t
(0);
354
z
<
Z_size
;
355
++
z
,
beg_xz
+=
X_size
) {
356
for
(
std
::
size_t
y
=
std
::
size_t
(0);
y
<
Y_size
; ++
y
) {
357
for
(
std
::
size_t
x
=
std
::
size_t
(0),
xz
=
beg_xz
;
x
<
X_size
;
358
++
x
, ++
xz
, ++
xyz
) {
359
res
[
xz
] +=
N_xyz
[
xyz
];
360
}
361
}
362
}
363
}
else
if
(
node_2_marginalize
==
std
::
size_t
(2)) {
// marginalize X and Y
364
const
std
::
size_t
XY_size
=
X_size
*
Y_size
;
365
for
(
std
::
size_t
z
=
std
::
size_t
(0),
xyz
=
std
::
size_t
(0);
z
<
out_size
;
366
++
z
) {
367
for
(
std
::
size_t
xy
=
std
::
size_t
(0);
xy
<
XY_size
; ++
xy
, ++
xyz
) {
368
res
[
z
] +=
N_xyz
[
xyz
];
369
}
370
}
371
}
else
{
372
GUM_ERROR
(
NotImplementedYet
,
373
"_marginalize not implemented for nodeset "
374
<<
node_2_marginalize
);
375
}
376
377
return
res
;
378
}
379
380
}
/* namespace learning */
381
382
}
/* namespace gum */
383
384
#
endif
/* DOXYGEN_SHOULD_SKIP_THIS */
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:669
gum::learning::genericBNLearner::Database::Database
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
Definition:
genericBNLearner_tpl.h:31