aGrUM
0.20.3
a C++ library for (probabilistic) graphical models
independenceTest_tpl.h
Go to the documentation of this file.
1
/**
2
*
3
* Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/** @file
23
* @brief the base class for all the independence tests used for learning
24
*
25
* @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26
*/
27
#
ifndef
DOXYGEN_SHOULD_SKIP_THIS
28
29
namespace
gum
{
30
31
namespace
learning
{
32
33
/// returns the allocator used by the independence test
34
template
<
template
<
typename
>
class
ALLOC
>
35
INLINE
typename
IndependenceTest
<
ALLOC
>::
allocator_type
36
IndependenceTest
<
ALLOC
>::
getAllocator
()
const
{
37
return
counter_
.
getAllocator
();
38
}
39
40
41
/// default constructor
42
template
<
template
<
typename
>
class
ALLOC
>
43
INLINE
IndependenceTest
<
ALLOC
>::
IndependenceTest
(
44
const
DBRowGeneratorParser
<
ALLOC
>&
parser
,
45
const
Apriori
<
ALLOC
>&
apriori
,
46
const
std
::
vector
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
>,
47
ALLOC
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
> > >&
ranges
,
48
const
Bijection
<
NodeId
,
std
::
size_t
,
ALLOC
<
std
::
size_t
> >&
nodeId2columns
,
49
const
typename
IndependenceTest
<
ALLOC
>::
allocator_type
&
alloc
) :
50
apriori_
(
apriori
.
clone
(
alloc
)),
51
counter_
(
parser
,
ranges
,
nodeId2columns
,
alloc
),
cache_
(
alloc
) {
52
GUM_CONSTRUCTOR
(
IndependenceTest
);
53
}
54
55
56
/// default constructor
57
template
<
template
<
typename
>
class
ALLOC
>
58
INLINE
IndependenceTest
<
ALLOC
>::
IndependenceTest
(
59
const
DBRowGeneratorParser
<
ALLOC
>&
parser
,
60
const
Apriori
<
ALLOC
>&
apriori
,
61
const
Bijection
<
NodeId
,
std
::
size_t
,
ALLOC
<
std
::
size_t
> >&
nodeId2columns
,
62
const
typename
IndependenceTest
<
ALLOC
>::
allocator_type
&
alloc
) :
63
apriori_
(
apriori
.
clone
(
alloc
)),
64
counter_
(
parser
,
nodeId2columns
,
alloc
),
cache_
(
alloc
) {
65
GUM_CONSTRUCTOR
(
IndependenceTest
);
66
}
67
68
69
/// copy constructor with a given allocator
70
template
<
template
<
typename
>
class
ALLOC
>
71
INLINE
IndependenceTest
<
ALLOC
>::
IndependenceTest
(
72
const
IndependenceTest
<
ALLOC
>&
from
,
73
const
typename
IndependenceTest
<
ALLOC
>::
allocator_type
&
alloc
) :
74
apriori_
(
from
.
apriori_
->
clone
(
alloc
)),
75
counter_
(
from
.
counter_
,
alloc
),
cache_
(
from
.
cache_
,
alloc
),
use_cache_
(
from
.
use_cache_
) {
76
GUM_CONS_CPY
(
IndependenceTest
);
77
}
78
79
80
/// copy constructor
81
template
<
template
<
typename
>
class
ALLOC
>
82
INLINE
IndependenceTest
<
ALLOC
>::
IndependenceTest
(
const
IndependenceTest
<
ALLOC
>&
from
) :
83
IndependenceTest
(
from
,
from
.
getAllocator
()) {}
84
85
86
/// move constructor
87
template
<
template
<
typename
>
class
ALLOC
>
88
INLINE
IndependenceTest
<
ALLOC
>::
IndependenceTest
(
89
IndependenceTest
<
ALLOC
>&&
from
,
90
const
typename
IndependenceTest
<
ALLOC
>::
allocator_type
&
alloc
) :
91
apriori_
(
from
.
apriori_
),
92
counter_
(
std
::
move
(
from
.
counter_
),
alloc
),
cache_
(
std
::
move
(
from
.
cache_
),
alloc
),
93
use_cache_
(
from
.
use_cache_
) {
94
from
.
apriori_
=
nullptr
;
95
GUM_CONS_MOV
(
IndependenceTest
);
96
}
97
98
99
/// move constructor
100
template
<
template
<
typename
>
class
ALLOC
>
101
INLINE
IndependenceTest
<
ALLOC
>::
IndependenceTest
(
IndependenceTest
<
ALLOC
>&&
from
) :
102
IndependenceTest
(
std
::
move
(
from
),
from
.
getAllocator
()) {}
103
104
105
/// destructor
106
template
<
template
<
typename
>
class
ALLOC
>
107
INLINE
IndependenceTest
<
ALLOC
>::~
IndependenceTest
() {
108
if
(
apriori_
!=
nullptr
) {
109
ALLOC
<
Apriori
<
ALLOC
> >
allocator
(
this
->
getAllocator
());
110
allocator
.
destroy
(
apriori_
);
111
allocator
.
deallocate
(
apriori_
, 1);
112
}
113
GUM_DESTRUCTOR
(
IndependenceTest
);
114
}
115
116
117
/// copy operator
118
template
<
template
<
typename
>
class
ALLOC
>
119
IndependenceTest
<
ALLOC
>&
120
IndependenceTest
<
ALLOC
>::
operator
=(
const
IndependenceTest
<
ALLOC
>&
from
) {
121
if
(
this
!= &
from
) {
122
Apriori
<
ALLOC
>*
new_apriori
=
from
.
apriori_
->
clone
();
123
RecordCounter
<
ALLOC
>
new_counter
=
from
.
counter_
;
124
ScoringCache
<
ALLOC
>
new_cache
=
from
.
cache_
;
125
126
if
(
apriori_
!=
nullptr
) {
127
ALLOC
<
Apriori
<
ALLOC
> >
allocator
(
this
->
getAllocator
());
128
allocator
.
destroy
(
apriori_
);
129
allocator
.
deallocate
(
apriori_
, 1);
130
}
131
132
apriori_
=
new_apriori
;
133
counter_
=
std
::
move
(
new_counter
);
134
cache_
=
std
::
move
(
new_cache
);
135
136
use_cache_
=
from
.
use_cache_
;
137
}
138
return
*
this
;
139
}
140
141
142
/// move operator
143
template
<
template
<
typename
>
class
ALLOC
>
144
IndependenceTest
<
ALLOC
>&
145
IndependenceTest
<
ALLOC
>::
operator
=(
IndependenceTest
<
ALLOC
>&&
from
) {
146
if
(
this
!= &
from
) {
147
std
::
swap
(
apriori_
,
from
.
apriori_
);
148
149
counter_
=
std
::
move
(
from
.
counter_
);
150
cache_
=
std
::
move
(
from
.
cache_
);
151
use_cache_
=
from
.
use_cache_
;
152
}
153
return
*
this
;
154
}
155
156
157
/// changes the max number of threads used to parse the database
158
template
<
template
<
typename
>
class
ALLOC
>
159
INLINE
void
IndependenceTest
<
ALLOC
>::
setMaxNbThreads
(
std
::
size_t
nb
)
const
{
160
counter_
.
setMaxNbThreads
(
nb
);
161
}
162
163
164
/// returns the number of threads used to parse the database
165
template
<
template
<
typename
>
class
ALLOC
>
166
INLINE
std
::
size_t
IndependenceTest
<
ALLOC
>::
nbThreads
()
const
{
167
return
counter_
.
nbThreads
();
168
}
169
170
171
/** @brief changes the number min of rows a thread should process in a
172
* multithreading context */
173
template
<
template
<
typename
>
class
ALLOC
>
174
INLINE
void
IndependenceTest
<
ALLOC
>::
setMinNbRowsPerThread
(
const
std
::
size_t
nb
)
const
{
175
counter_
.
setMinNbRowsPerThread
(
nb
);
176
}
177
178
179
/// returns the minimum of rows that each thread should process
180
template
<
template
<
typename
>
class
ALLOC
>
181
INLINE
std
::
size_t
IndependenceTest
<
ALLOC
>::
minNbRowsPerThread
()
const
{
182
return
counter_
.
minNbRowsPerThread
();
183
}
184
185
186
/// sets new ranges to perform the countings used by the score
187
/** @param ranges a set of pairs {(X1,Y1),...,(Xn,Yn)} of database's rows
188
* indices. The countings are then performed only on the union of the
189
* rows [Xi,Yi), i in {1,...,n}. This is useful, e.g, when performing
190
* cross validation tasks, in which part of the database should be ignored.
191
* An empty set of ranges is equivalent to an interval [X,Y) ranging over
192
* the whole database. */
193
template
<
template
<
typename
>
class
ALLOC
>
194
template
<
template
<
typename
>
class
XALLOC
>
195
void
IndependenceTest
<
ALLOC
>::
setRanges
(
196
const
std
::
vector
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
>,
197
XALLOC
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
> > >&
new_ranges
) {
198
std
::
vector
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
>,
199
ALLOC
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
> > >
200
old_ranges
=
ranges
();
201
counter_
.
setRanges
(
new_ranges
);
202
if
(
old_ranges
!=
ranges
())
clear
();
203
}
204
205
206
/// reset the ranges to the one range corresponding to the whole database
207
template
<
template
<
typename
>
class
ALLOC
>
208
void
IndependenceTest
<
ALLOC
>::
clearRanges
() {
209
std
::
vector
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
>,
210
ALLOC
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
> > >
211
old_ranges
=
ranges
();
212
counter_
.
clearRanges
();
213
if
(
old_ranges
!=
ranges
())
clear
();
214
}
215
216
217
/// returns the current ranges
218
template
<
template
<
typename
>
class
ALLOC
>
219
INLINE
const
std
::
vector
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
>,
220
ALLOC
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
> > >&
221
IndependenceTest
<
ALLOC
>::
ranges
()
const
{
222
return
counter_
.
ranges
();
223
}
224
225
226
/// returns the score of a pair of nodes
227
template
<
template
<
typename
>
class
ALLOC
>
228
INLINE
double
IndependenceTest
<
ALLOC
>::
score
(
const
NodeId
var1
,
const
NodeId
var2
) {
229
IdCondSet
<
ALLOC
>
idset
(
var1
,
var2
,
empty_ids_
,
false
,
true
,
this
->
getAllocator
());
230
if
(
use_cache_
) {
231
try
{
232
return
cache_
.
score
(
idset
);
233
}
catch
(
NotFound
&) {}
234
double
the_score
=
score_
(
idset
);
235
cache_
.
insert
(
std
::
move
(
idset
),
the_score
);
236
return
the_score
;
237
}
else
{
238
return
score_
(
std
::
move
(
idset
));
239
}
240
}
241
242
243
/// returns the score of a pair of nodes given some other nodes
244
template
<
template
<
typename
>
class
ALLOC
>
245
INLINE
double
246
IndependenceTest
<
ALLOC
>::
score
(
const
NodeId
var1
,
247
const
NodeId
var2
,
248
const
std
::
vector
<
NodeId
,
ALLOC
<
NodeId
> >&
rhs_ids
) {
249
IdCondSet
<
ALLOC
>
idset
(
var1
,
var2
,
rhs_ids
,
false
,
false
,
this
->
getAllocator
());
250
if
(
use_cache_
) {
251
try
{
252
return
cache_
.
score
(
idset
);
253
}
catch
(
NotFound
&) {}
254
double
the_score
=
score_
(
idset
);
255
cache_
.
insert
(
std
::
move
(
idset
),
the_score
);
256
return
the_score
;
257
}
else
{
258
return
score_
(
idset
);
259
}
260
}
261
262
263
/// clears all the data structures from memory
264
template
<
template
<
typename
>
class
ALLOC
>
265
INLINE
void
IndependenceTest
<
ALLOC
>::
clear
() {
266
counter_
.
clear
();
267
cache_
.
clear
();
268
}
269
270
271
/// clears the current cache (clear nodesets as well)
272
template
<
template
<
typename
>
class
ALLOC
>
273
INLINE
void
IndependenceTest
<
ALLOC
>::
clearCache
() {
274
cache_
.
clear
();
275
}
276
277
278
/// turn on/off the use of a cache of the previously computed score
279
template
<
template
<
typename
>
class
ALLOC
>
280
INLINE
void
IndependenceTest
<
ALLOC
>::
useCache
(
const
bool
on_off
) {
281
use_cache_
=
on_off
;
282
}
283
284
285
/// return the mapping between the columns of the database and the node ids
286
template
<
template
<
typename
>
class
ALLOC
>
287
INLINE
const
Bijection
<
NodeId
,
std
::
size_t
,
ALLOC
<
std
::
size_t
> >&
288
IndependenceTest
<
ALLOC
>::
nodeId2Columns
()
const
{
289
return
counter_
.
nodeId2Columns
();
290
}
291
292
293
/// return the database used by the score
294
template
<
template
<
typename
>
class
ALLOC
>
295
INLINE
const
DatabaseTable
<
ALLOC
>&
IndependenceTest
<
ALLOC
>::
database
()
const
{
296
return
counter_
.
database
();
297
}
298
299
300
/// returns a counting vector where variables are marginalized from N_xyz
301
/** @param node_2_marginalize indicates which node(s) shall be marginalized:
302
* - 0 means that X should be marginalized
303
* - 1 means that Y should be marginalized
304
* - 2 means that Z should be marginalized
305
*/
306
template
<
template
<
typename
>
class
ALLOC
>
307
std
::
vector
<
double
,
ALLOC
<
double
> >
IndependenceTest
<
ALLOC
>::
marginalize_
(
308
const
std
::
size_t
node_2_marginalize
,
309
const
std
::
size_t
X_size
,
310
const
std
::
size_t
Y_size
,
311
const
std
::
size_t
Z_size
,
312
const
std
::
vector
<
double
,
ALLOC
<
double
> >&
N_xyz
)
const
{
313
// determine the size of the output vector
314
std
::
size_t
out_size
=
Z_size
;
315
if
(
node_2_marginalize
==
std
::
size_t
(0))
316
out_size
*=
Y_size
;
317
else
if
(
node_2_marginalize
==
std
::
size_t
(1))
318
out_size
*=
X_size
;
319
320
// allocate the output vector
321
std
::
vector
<
double
,
ALLOC
<
double
> >
res
(
out_size
, 0.0);
322
323
// fill the vector:
324
if
(
node_2_marginalize
==
std
::
size_t
(0)) {
// marginalize X
325
for
(
std
::
size_t
yz
=
std
::
size_t
(0),
xyz
=
std
::
size_t
(0);
yz
<
out_size
; ++
yz
) {
326
for
(
std
::
size_t
x
=
std
::
size_t
(0);
x
<
X_size
; ++
x
, ++
xyz
) {
327
res
[
yz
] +=
N_xyz
[
xyz
];
328
}
329
}
330
}
else
if
(
node_2_marginalize
==
std
::
size_t
(1)) {
// marginalize Y
331
for
(
std
::
size_t
z
=
std
::
size_t
(0),
xyz
=
std
::
size_t
(0),
beg_xz
=
std
::
size_t
(0);
332
z
<
Z_size
;
333
++
z
,
beg_xz
+=
X_size
) {
334
for
(
std
::
size_t
y
=
std
::
size_t
(0);
y
<
Y_size
; ++
y
) {
335
for
(
std
::
size_t
x
=
std
::
size_t
(0),
xz
=
beg_xz
;
x
<
X_size
; ++
x
, ++
xz
, ++
xyz
) {
336
res
[
xz
] +=
N_xyz
[
xyz
];
337
}
338
}
339
}
340
}
else
if
(
node_2_marginalize
==
std
::
size_t
(2)) {
// marginalize X and Y
341
const
std
::
size_t
XY_size
=
X_size
*
Y_size
;
342
for
(
std
::
size_t
z
=
std
::
size_t
(0),
xyz
=
std
::
size_t
(0);
z
<
out_size
; ++
z
) {
343
for
(
std
::
size_t
xy
=
std
::
size_t
(0);
xy
<
XY_size
; ++
xy
, ++
xyz
) {
344
res
[
z
] +=
N_xyz
[
xyz
];
345
}
346
}
347
}
else
{
348
GUM_ERROR
(
NotImplementedYet
,
349
"_marginalize not implemented for nodeset "
<<
node_2_marginalize
);
350
}
351
352
return
res
;
353
}
354
355
}
/* namespace learning */
356
357
}
/* namespace gum */
358
359
#
endif
/* DOXYGEN_SHOULD_SKIP_THIS */
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:643
gum::learning::genericBNLearner::Database::Database
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
Definition:
genericBNLearner_tpl.h:31