aGrUM
0.20.2
a C++ library for (probabilistic) graphical models
rawDatabaseTable_tpl.h
Go to the documentation of this file.
1
/**
2
*
3
* Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/** @file
23
* @brief The implementation of raw tabular databases stored in memory (RAM)
24
*
25
* @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26
*/
27
#
include
<
agrum
/
tools
/
database
/
rawDatabaseTable
.
h
>
28
29
#
ifndef
DOXYGEN_SHOULD_SKIP_THIS
30
31
namespace
gum
{
32
33
namespace
learning
{
34
35
36
// default constructor
37
template
<
template
<
typename
>
class
ALLOC
>
38
template
<
template
<
typename
>
class
VARALLOC,
39
template
<
typename
>
40
class
MISSALLOC >
41
INLINE RawDatabaseTable<
ALLOC
>::
RawDatabaseTable
(
42
const
typename
RawDatabaseTable
<
ALLOC
>::
template
MissingValType
<
43
MISSALLOC
>&
missing_symbols
,
44
const
std
::
vector
<
std
::
string
,
VARALLOC
<
std
::
string
> >&
var_names
,
45
const
typename
RawDatabaseTable
<
ALLOC
>::
allocator_type
&
alloc
) :
46
IDatabaseTable
<
DBCell
,
ALLOC
>(
missing_symbols
,
var_names
,
alloc
),
47
ignored_cols__
(
alloc
) {
48
GUM_CONSTRUCTOR
(
RawDatabaseTable
);
49
}
50
51
52
// default constructor
53
template
<
template
<
typename
>
class
ALLOC
>
54
template
<
template
<
typename
>
class
MISSALLOC
>
55
INLINE
RawDatabaseTable
<
ALLOC
>::
RawDatabaseTable
(
56
const
typename
RawDatabaseTable
<
ALLOC
>::
template
MissingValType
<
57
MISSALLOC
>&
missing_symbols
,
58
const
typename
RawDatabaseTable
<
ALLOC
>::
allocator_type
&
alloc
) :
59
IDatabaseTable
<
DBCell
,
ALLOC
>(
60
missing_symbols
,
61
std
::
vector
<
std
::
string
,
ALLOC
<
std
::
string
> >(),
62
alloc
),
63
ignored_cols__
(
alloc
) {
64
GUM_CONSTRUCTOR
(
RawDatabaseTable
);
65
}
66
67
68
// default constructor
69
template
<
template
<
typename
>
class
ALLOC
>
70
INLINE
RawDatabaseTable
<
ALLOC
>::
RawDatabaseTable
(
71
const
typename
RawDatabaseTable
<
ALLOC
>::
allocator_type
&
alloc
) :
72
IDatabaseTable
<
DBCell
,
ALLOC
>(
73
std
::
vector
<
std
::
string
,
ALLOC
<
std
::
string
> >(),
74
std
::
vector
<
std
::
string
,
ALLOC
<
std
::
string
> >(),
75
alloc
),
76
ignored_cols__
(
alloc
) {
77
GUM_CONSTRUCTOR
(
RawDatabaseTable
);
78
}
79
80
81
// copy constructor with a given allocator
82
template
<
template
<
typename
>
class
ALLOC
>
83
INLINE
RawDatabaseTable
<
ALLOC
>::
RawDatabaseTable
(
84
const
RawDatabaseTable
<
ALLOC
>&
from
,
85
const
typename
RawDatabaseTable
<
ALLOC
>::
allocator_type
&
alloc
) :
86
IDatabaseTable
<
DBCell
,
ALLOC
>(
from
,
alloc
),
87
ignored_cols__
(
from
.
ignored_cols__
,
alloc
) {
88
GUM_CONS_CPY
(
RawDatabaseTable
);
89
}
90
91
// copy constructor
92
template
<
template
<
typename
>
class
ALLOC
>
93
INLINE
RawDatabaseTable
<
ALLOC
>::
RawDatabaseTable
(
94
const
RawDatabaseTable
<
ALLOC
>&
from
) :
95
RawDatabaseTable
<
ALLOC
>(
from
,
from
.
getAllocator
()) {}
96
97
98
// move constructor with a given allocator
99
template
<
template
<
typename
>
class
ALLOC
>
100
INLINE
RawDatabaseTable
<
ALLOC
>::
RawDatabaseTable
(
101
RawDatabaseTable
<
ALLOC
>&&
from
,
102
const
typename
RawDatabaseTable
<
ALLOC
>::
allocator_type
&
alloc
) :
103
IDatabaseTable
<
DBCell
,
ALLOC
>(
std
::
move
(
from
),
alloc
),
104
ignored_cols__
(
std
::
move
(
from
.
ignored_cols__
)) {
105
GUM_CONS_MOV
(
RawDatabaseTable
);
106
}
107
108
109
// move constructor
110
template
<
template
<
typename
>
class
ALLOC
>
111
INLINE
RawDatabaseTable
<
ALLOC
>::
RawDatabaseTable
(
112
RawDatabaseTable
<
ALLOC
>&&
from
) :
113
RawDatabaseTable
<
ALLOC
>(
std
::
move
(
from
),
from
.
getAllocator
()) {}
114
115
116
// virtual copy constructor
117
template
<
template
<
typename
>
class
ALLOC
>
118
RawDatabaseTable
<
ALLOC
>*
RawDatabaseTable
<
ALLOC
>::
clone
(
119
const
typename
RawDatabaseTable
<
ALLOC
>::
allocator_type
&
alloc
)
const
{
120
ALLOC
<
RawDatabaseTable
<
ALLOC
> >
allocator
(
alloc
);
121
RawDatabaseTable
<
ALLOC
>*
new_db
=
allocator
.
allocate
(1);
122
try
{
123
allocator
.
construct
(
new_db
, *
this
,
alloc
);
124
}
catch
(...) {
125
allocator
.
deallocate
(
new_db
, 1);
126
throw
;
127
}
128
129
return
new_db
;
130
}
131
132
133
// virtual copy constructor
134
template
<
template
<
typename
>
class
ALLOC
>
135
RawDatabaseTable
<
ALLOC
>*
RawDatabaseTable
<
ALLOC
>::
clone
()
const
{
136
return
clone
(
this
->
getAllocator
());
137
}
138
139
140
// destructor
141
template
<
template
<
typename
>
class
ALLOC
>
142
INLINE
RawDatabaseTable
<
ALLOC
>::~
RawDatabaseTable
() {
143
GUM_DESTRUCTOR
(
RawDatabaseTable
);
144
}
145
146
147
// copy operator
148
template
<
template
<
typename
>
class
ALLOC
>
149
RawDatabaseTable
<
ALLOC
>&
RawDatabaseTable
<
ALLOC
>::
operator
=(
150
const
RawDatabaseTable
<
ALLOC
>&
from
) {
151
if
(
this
!= &
from
) {
152
IDatabaseTable
<
DBCell
,
ALLOC
>::
operator
=(
from
);
153
ignored_cols__
=
from
.
ignored_cols__
;
154
}
155
return
*
this
;
156
}
157
158
159
// move constructor
160
template
<
template
<
typename
>
class
ALLOC
>
161
RawDatabaseTable
<
ALLOC
>&
162
RawDatabaseTable
<
ALLOC
>::
operator
=(
RawDatabaseTable
<
ALLOC
>&&
from
) {
163
if
(
this
!= &
from
) {
164
IDatabaseTable
<
DBCell
,
ALLOC
>::
operator
=(
std
::
move
(
from
));
165
ignored_cols__
=
std
::
move
(
from
.
ignored_cols__
);
166
}
167
return
*
this
;
168
}
169
170
171
// sets the names of the variables
172
template
<
template
<
typename
>
class
ALLOC
>
173
void
RawDatabaseTable
<
ALLOC
>::
setVariableNames
(
174
const
std
::
vector
<
std
::
string
,
ALLOC
<
std
::
string
> >&
names
,
175
const
bool
from_external_object
) {
176
const
std
::
size_t
size
=
names
.
size
();
177
const
std
::
size_t
ignored_cols_size
=
ignored_cols__
.
size
();
178
179
if
(!
from_external_object
|| !
ignored_cols_size
) {
180
if
(
this
->
rows_
.
empty
() || (
size
==
this
->
rows_
[0].
size
())) {
181
this
->
variable_names_
=
names
;
182
}
else
{
183
GUM_ERROR
(
184
SizeError
,
185
"the number of variable's names (i.e., "
186
<<
size
<<
") does not correspond to the number of columns of the "
187
<<
"raw database table (i.e.,"
<<
this
->
rows_
[0].
size
() <<
")"
);
188
}
189
}
else
{
190
// check that the size of the names vector (after removing the ignored
191
// columns) is the same as the rest of the database
192
std
::
size_t
ignored_size
=
std
::
size_t
(0);
193
194
// find the number of ignored cols
195
for
(
auto
iter
=
ignored_cols__
.
rbegin
(),
rend
=
ignored_cols__
.
rend
();
196
iter
!=
rend
;
197
++
iter
, ++
ignored_size
) {
198
if
(*
iter
<
size
) {
break
; }
199
}
200
ignored_size
=
ignored_cols_size
-
ignored_size
;
201
202
if
(
this
->
rows_
.
empty
()
203
|| (
size
==
this
->
rows_
[0].
size
() +
ignored_size
)) {
204
DBVector
<
std
::
string
>
new_names
;
205
for
(
std
::
size_t
i
=
std
::
size_t
(0),
j
=
std
::
size_t
(0);
i
<
size
; ++
i
) {
206
if
(
i
!=
ignored_cols__
[
j
]) {
207
new_names
.
push_back
(
names
[
i
]);
208
}
else
{
209
if
(++
j
==
ignored_cols_size
) {
210
for
(++
i
;
i
<
size
; ++
i
) {
211
new_names
.
push_back
(
names
[
i
]);
212
}
213
}
214
}
215
}
216
this
->
variable_names_
=
std
::
move
(
new_names
);
217
return
;
218
}
else
{
219
GUM_ERROR
(
SizeError
,
220
"the number of variable's names excluding the ignored "
221
<<
"columns (i.e., "
<< (
size
-
ignored_size
)
222
<<
") does not correspond to the number of columns of the "
223
<<
"raw database table (i.e.,"
<<
this
->
rows_
[0].
size
()
224
<<
")"
);
225
}
226
}
227
}
228
229
230
/// makes the database table ignore from now on the kth column
231
template
<
template
<
typename
>
class
ALLOC
>
232
void
RawDatabaseTable
<
ALLOC
>::
ignoreColumn
(
const
std
::
size_t
k
,
233
const
bool
from_external_object
) {
234
// first, compute the value that k would have in an external database
235
// and compute where the new value should be inserted
236
std
::
size_t
i
;
// where to insert the new k into the ignored colums
237
std
::
size_t
kk
=
k
;
// kk = k value for an external database
238
const
std
::
size_t
size
=
ignored_cols__
.
size
();
239
240
if
(
from_external_object
) {
241
for
(
i
=
std
::
size_t
(0);
i
<
size
; ++
i
) {
242
if
(
k
<=
ignored_cols__
[
i
]) {
243
if
(
k
==
ignored_cols__
[
i
])
return
;
244
break
;
245
}
246
}
247
}
else
{
248
for
(
i
=
std
::
size_t
(0);
i
<
size
; ++
i
, ++
kk
) {
249
if
(
kk
<=
ignored_cols__
[
i
]) {
250
if
(
kk
==
ignored_cols__
[
i
])
return
;
251
break
;
252
}
253
}
254
}
255
256
// the column of rows__ and variable_names_ impacted by the ignoreColumn
257
// operation is therefore equal to kk-i. So, we should check that such
258
// a column exists and, if so, we should remove the column from rows__
259
// and from variable_names_. Note that if there is no more variable,
260
// rows__ should become empty
261
const
std
::
size_t
col
=
kk
-
i
;
262
if
(
col
<
this
->
variable_names_
.
size
()) {
263
this
->
variable_names_
.
erase
(
this
->
variable_names_
.
begin
() +
col
);
264
if
(
this
->
variable_names_
.
empty
()) {
265
IDatabaseTable
<
DBCell
,
ALLOC
>::
eraseAllRows
();
266
}
else
{
267
const
std
::
size_t
nb_rows
=
this
->
rows_
.
size
();
268
if
(
nb_rows
!=
std
::
size_t
(0)) {
269
const
std
::
size_t
nb_cols
=
this
->
rows_
[0].
size
();
270
for
(
std
::
size_t
i
=
std
::
size_t
(0);
i
<
nb_rows
; ++
i
) {
271
auto
&
row
=
this
->
rows_
[
i
].
row
();
272
if
(
this
->
has_row_missing_val_
[
i
] ==
IsMissing
::
True
) {
273
bool
has_missing_val
=
false
;
274
for
(
std
::
size_t
j
=
std
::
size_t
(0);
j
<
nb_cols
; ++
j
) {
275
if
((
j
!=
col
) &&
row
[
j
].
isMissing
()) {
276
has_missing_val
=
true
;
277
break
;
278
}
279
}
280
if
(!
has_missing_val
)
281
this
->
has_row_missing_val_
[
i
] =
IsMissing
::
False
;
282
}
283
row
.
erase
(
row
.
begin
() +
col
);
284
}
285
}
286
}
287
}
288
289
// here, we know that we should insert kk at the ith index of ignored_cols__
290
ignored_cols__
.
push_back
(
std
::
size_t
(0));
291
for
(
std
::
size_t
j
=
size
;
j
>
i
; --
j
)
292
ignored_cols__
[
j
] =
ignored_cols__
[
j
- 1];
293
ignored_cols__
[
i
] =
kk
;
294
}
295
296
297
/// returns the set of ignored columns
298
template
<
template
<
typename
>
class
ALLOC
>
299
INLINE
const
typename
RawDatabaseTable
<
ALLOC
>::
template
DBVector
<
300
std
::
size_t
>
301
RawDatabaseTable
<
ALLOC
>::
ignoredColumns
()
const
{
302
return
ignored_cols__
;
303
}
304
305
306
/// returns the set of columns parsed
307
template
<
template
<
typename
>
class
ALLOC
>
308
const
typename
RawDatabaseTable
<
ALLOC
>::
template
DBVector
<
std
::
size_t
>
309
RawDatabaseTable
<
ALLOC
>::
inputColumns
()
const
{
310
const
auto
&
data
=
IDatabaseTable
<
DBCell
,
ALLOC
>::
content
();
311
if
(
data
.
empty
()) {
return
DBVector
<
std
::
size_t
>(); }
312
313
const
std
::
size_t
size
=
data
[0].
size
();
314
const
std
::
size_t
ignored_cols_size
=
ignored_cols__
.
size
();
315
DBVector
<
std
::
size_t
>
cols
(
size
);
316
317
if
(!
ignored_cols_size
) {
318
for
(
std
::
size_t
i
=
std
::
size_t
(0);
i
<
size
; ++
i
) {
319
cols
[
i
] =
i
;
320
}
321
}
else
{
322
// fill the cols vector with consecutive values, excluding the
323
// ignored columns
324
std
::
size_t
i
=
std
::
size_t
(0);
// the consecutive values
325
std
::
size_t
k
=
std
::
size_t
(0);
// the index in col where we save values
326
std
::
size_t
j
=
std
::
size_t
(0);
// the index to parse the ignored columns
327
while
(
true
) {
328
if
(
i
!=
ignored_cols__
[
j
]) {
329
cols
[
k
] =
i
;
330
if
(++
k
==
size
)
break
;
331
}
else
{
332
if
(++
j
==
ignored_cols_size
) {
333
for
(++
i
;
k
<
size
; ++
i
, ++
k
) {
334
cols
[
k
] =
i
;
335
}
336
break
;
337
}
338
}
339
++
i
;
340
}
341
}
342
343
return
cols
;
344
}
345
346
347
// translates a string into a DBCell and returns it
348
template
<
template
<
typename
>
class
ALLOC
>
349
INLINE
DBCell
350
RawDatabaseTable
<
ALLOC
>::
convert__
(
const
std
::
string
&
elt
)
const
{
351
return
DBCell
::
bestDBCell
(
elt
,
this
->
missing_symbols_
);
352
}
353
354
355
// insert a new row at the end of the database
356
template
<
template
<
typename
>
class
ALLOC
>
357
void
RawDatabaseTable
<
ALLOC
>::
insertRow
(
358
const
std
::
vector
<
std
::
string
,
ALLOC
<
std
::
string
> >&
new_row
) {
359
// check that the size of the row (after removing the ignored columns) is
360
// the same as the rest of the database
361
const
std
::
size_t
row_size
=
new_row
.
size
();
362
const
std
::
size_t
ignored_cols_size
=
ignored_cols__
.
size
();
363
std
::
size_t
ignored_size
=
std
::
size_t
(0);
364
if
(
ignored_cols_size
) {
365
// find the number of ignored cols
366
for
(
auto
iter
=
ignored_cols__
.
rbegin
(),
rend
=
ignored_cols__
.
rend
();
367
iter
!=
rend
;
368
++
iter
, ++
ignored_size
) {
369
if
(*
iter
<
row_size
) {
break
; }
370
}
371
ignored_size
=
ignored_cols_size
-
ignored_size
;
372
}
373
374
if
(!
this
->
isRowSizeOK_
(
row_size
-
ignored_size
)) {
375
GUM_ERROR
(
SizeError
,
376
"the new row has "
377
<< (
row_size
-
ignored_size
)
378
<<
" elements whereas the raw database table has "
379
<<
this
->
variable_names_
.
size
() <<
" columns"
);
380
}
381
382
// create the dbrow that will contain the new data
383
Row
<
DBCell
>
dbrow
;
384
dbrow
.
reserve
(
row_size
-
ignored_size
);
385
bool
has_missing_val
=
false
;
386
387
// translate the row into T_data and put them into the newly created dbrow
388
if
(
ignored_size
== 0) {
389
for
(
const
auto
&
elt
:
new_row
) {
390
const
DBCell
new_cell
(
this
->
convert__
(
elt
));
391
if
(
new_cell
.
isMissing
())
has_missing_val
=
true
;
392
dbrow
.
pushBack
(
new_cell
);
393
}
394
}
else
{
395
for
(
std
::
size_t
i
=
std
::
size_t
(0),
j
=
std
::
size_t
(0);
i
<
row_size
;
396
++
i
) {
397
if
(
i
!=
ignored_cols__
[
j
]) {
398
const
DBCell
new_cell
(
this
->
convert__
(
new_row
[
i
]));
399
if
(
new_cell
.
isMissing
())
has_missing_val
=
true
;
400
dbrow
.
pushBack
(
new_cell
);
401
}
else
{
402
if
(++
j
==
ignored_size
) {
403
for
(++
i
;
i
<
row_size
; ++
i
) {
404
const
DBCell
new_cell
(
this
->
convert__
(
new_row
[
i
]));
405
if
(
new_cell
.
isMissing
())
has_missing_val
=
true
;
406
dbrow
.
pushBack
(
new_cell
);
407
}
408
}
409
}
410
}
411
}
412
413
IDatabaseTable
<
DBCell
,
ALLOC
>::
insertRow
(
414
std
::
move
(
dbrow
),
415
has_missing_val
?
IsMissing
::
True
:
IsMissing
::
False
);
416
}
417
418
419
// erase the content of the database, including the names of the variables
420
template
<
template
<
typename
>
class
ALLOC
>
421
void
RawDatabaseTable
<
ALLOC
>::
clear
() {
422
ignored_cols__
.
clear
();
423
IDatabaseTable
<
DBCell
,
ALLOC
>::
clear
();
424
}
425
426
427
}
/* namespace learning */
428
429
}
/* namespace gum */
430
431
#
endif
/* DOXYGEN_SHOULD_SKIP_THIS */
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:669
gum::learning::genericBNLearner::Database::Database
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
Definition:
genericBNLearner_tpl.h:31