warpcore
0.0.1
Hashing at the Speed of Light on modern CUDA-accelerators
bloom_filter.cuh
Go to the documentation of this file.
1
#
ifndef
WARPCORE_BLOOM_FILTER_CUH
2
#
define
WARPCORE_BLOOM_FILTER_CUH
3
4
// modified by Markus Vieth
5
6
/**
7
* @example "advanced_usage_from_device.cu"
8
* This example implements a filtered histogram over a multi-set of keys
9
* using warpcore. The task is to output the counts of all distinct keys
10
* which occure more than once in the input data.
11
* We make use of the device-sided operations provided by warpcore to
12
* implement a custom CUDA kernel that does the job.
13
*/
14
15
#
include
"base.cuh"
16
#
include
"defaults.cuh"
17
18
namespace
warpcore
19
{
20
21
/*! \brief bloom filter
22
* \tparam Key key type (\c std::uint32_t or \c std::uint64_t)
23
* \tparam Hasher hasher from \c warpcore::hashers
24
* \tparam Slot slot type (\c std::uint32_t or \c std::uint64_t)
25
* \tparam CGSize size of cooperative group
26
*/
27
template
<
28
class
Key,
29
class
Hasher =
defaults
::
hasher_t
<
Key
>,
30
class
Slot = std::uint64_t,
31
index_t CGSize = 1>
32
class
BloomFilter
33
{
34
static_assert
(
35
checks
::
is_valid_key_type
<
Key
>(),
36
"invalid key type"
);
37
38
static_assert
(
39
checks
::
is_hasher
<
Hasher
>(),
40
"not a valid hasher type"
);
41
42
static_assert
(
43
checks
::
is_valid_slot_type
<
Slot
>(),
44
"invalid slot type"
);
45
46
static_assert
(
47
checks
::is_valid_cg_size(CGSize),
48
"invalid cooperative group size"
);
49
50
public
:
51
using
key_type = Key;
52
using
value_type =
bool
;
53
using
index_type = index_t;
54
using
slot_type = Slot;
55
using
status_type =
Status
;
56
57
/*! \brief get cooperative group size
58
* \return cooperative group size
59
*/
60
HOSTDEVICEQUALIFIER INLINEQUALIFIER
61
static
constexpr
index_type
cg_size
()
noexcept
62
{
63
return
CGSize
;
64
}
65
66
/*! \brief get bits per slot
67
* \return number of bits
68
*/
69
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
70
static
constexpr
index_type
slot_bits
()
noexcept
71
{
72
return
sizeof
(
slot_type
) *
CHAR_BIT
;
73
}
74
75
/*! \brief get bits per cooperative group block of slots
76
* \return number of bits
77
*/
78
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
79
static
constexpr
index_type
block_bits
()
noexcept
80
{
81
return
slot_bits
() *
cg_size
();
82
}
83
84
/*! \brief constructor
85
* \param[in] num_bits total number of bits (m) of the bloom filter
86
* \param[in] k number of hash functions to apply
87
* \param[in] seed random seed
88
*/
89
HOSTQUALIFIER
90
explicit
BloomFilter
(
91
const
index_type
num_bits
,
92
const
index_type
k
,
93
const
key_type
seed
=
defaults
::
seed
<
key_type
>())
noexcept
:
94
bloom_filter_
(
nullptr
),
95
num_bits_
(
num_bits
),
96
num_slots_
(
SDIV
(
num_bits
,
slot_bits
())),
97
num_blocks_
(
SDIV
(
num_slots_
,
cg_size
())),
98
k_
(
k
),
99
seed_
(
seed
),
100
is_copy_
(
false
)
101
{
102
cudaMalloc
(&
bloom_filter_
,
sizeof
(
slot_type
) *
num_slots_
);
103
104
init
();
105
}
106
107
/*! \brief copy-constructor (shallow)
108
* \param[in] object to be copied
109
*/
110
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
111
BloomFilter
(
const
BloomFilter
&
o
)
noexcept
:
112
bloom_filter_
(
o
.
bloom_filter_
),
113
num_bits_
(
o
.
num_bits_
),
114
num_slots_
(
o
.
num_slots_
),
115
num_blocks_
(
o
.
num_blocks_
),
116
k_
(
o
.
k_
),
117
seed_
(
o
.
seed_
),
118
is_copy_
(
true
)
119
{}
120
121
/*! \brief move-constructor
122
* \param[in] object to be moved
123
*/
124
HOSTQUALIFIER
INLINEQUALIFIER
125
BloomFilter
(
BloomFilter
&&
o
)
noexcept
:
126
bloom_filter_
(
std
::
move
(
o
.
bloom_filter_
)),
127
num_bits_
(
std
::
move
(
o
.
num_bits_
)),
128
num_slots_
(
std
::
move
(
o
.
num_slots_
)),
129
num_blocks_
(
std
::
move
(
o
.
num_blocks_
)),
130
k_
(
std
::
move
(
o
.
k_
)),
131
seed_
(
std
::
move
(
o
.
seed_
)),
132
is_copy_
(
std
::
move
(
o
.
is_copy_
))
133
{
134
o
.
is_copy_
=
true
;
135
}
136
137
#
ifndef
__CUDA_ARCH__
138
/*! \brief destructor
139
*/
140
HOSTQUALIFIER
141
~
BloomFilter
()
noexcept
142
{
143
if
(!
is_copy_
)
144
{
145
if
(
bloom_filter_
!=
nullptr
)
cudaFree
(
bloom_filter_
);
146
}
147
}
148
#
endif
149
150
/*! \brief (re)initialize the hash table
151
* \param[in] seed random seed
152
* \param[in] stream CUDA stream in which this operation is executed in
153
*/
154
HOSTQUALIFIER
INLINEQUALIFIER
155
void
init
(
156
const
key_type
seed
,
157
const
cudaStream_t
stream
= 0)
noexcept
158
{
159
seed_
=
seed
;
160
161
kernels
::
memset
162
<<<
SDIV
(
num_slots_
,
WARPCORE_BLOCKSIZE
),
WARPCORE_BLOCKSIZE
, 0,
stream
>>>
163
(
bloom_filter_
,
num_slots_
);
164
}
165
166
/*! \brief (re)initialize the hash table
167
* \param[in] stream CUDA stream in which this operation is executed in
168
*/
169
HOSTQUALIFIER
INLINEQUALIFIER
170
void
init
(
const
cudaStream_t
stream
= 0)
noexcept
171
{
172
init
(
seed_
,
stream
);
173
}
174
175
/*! \brief inserts a key into the bloom filter
176
* \param[in] key key to insert into the bloom filter
177
* \param[in] group cooperative group
178
*/
179
DEVICEQUALIFIER
INLINEQUALIFIER
180
void
insert
(
181
const
key_type
key
,
182
const
cg
::
thread_block_tile
<
cg_size
()>&
group
)
noexcept
183
{
184
const
index_type
slot_index
=
185
((
Hasher
::
hash
(
key
+
seed_
) %
num_blocks_
) *
186
cg_size
() +
group
.
thread_rank
()) %
num_slots_
;
187
188
slot_type
slot
= 0;
189
for
(
index_type
k
= 0;
k
<
k_
;
k
++)
190
{
191
const
key_type
seeded_key
=
key
+
seed_
+
k
;
192
const
slot_type
hash
=
Hasher
::
hash
(
seeded_key
) %
block_bits
();
193
194
if
((
hash
/
slot_bits
()) ==
group
.
thread_rank
())
195
{
196
slot
|=
slot_type
(1) << (
hash
%
slot_bits
());
197
}
198
}
199
200
if
((
slot
&
bloom_filter_
[
slot_index
]) !=
slot
)
201
{
202
atomicOr
(
bloom_filter_
+
slot_index
,
slot
);
203
}
204
}
205
206
/*! \brief inserts a set of keys into the bloom filter
207
* \param[in] keys_in pointer to keys to insert into the bloom filter
208
* \param[in] num_in number of keys to insert
209
* \param[in] stream CUDA stream in which this operation is executed in
210
*/
211
HOSTQUALIFIER
INLINEQUALIFIER
212
void
insert
(
213
const
Key
*
const
keys_in
,
214
const
index_t
num_in
,
215
const
cudaStream_t
stream
= 0)
noexcept
216
{
217
kernels
::
bloom_filter
::
insert
218
<<<
SDIV
(
num_in
*
cg_size
(),
WARPCORE_BLOCKSIZE
),
WARPCORE_BLOCKSIZE
, 0,
stream
>>>
219
(
keys_in
,
num_in
, *
this
);
220
}
221
222
/*! \brief retrieve a key
223
* \param[in] key key to query
224
* \param[in] group cooperative group
225
* \return whether the key is already inside the filter or not
226
*/
227
DEVICEQUALIFIER
INLINEQUALIFIER
228
bool
retrieve
(
229
const
key_type
key
,
230
const
cg
::
thread_block_tile
<
cg_size
()>&
group
)
const
noexcept
231
{
232
const
index_type
slot_index
=
233
((
Hasher
::
hash
(
key
+
seed_
) %
num_blocks_
) *
234
cg_size
() +
group
.
thread_rank
()) %
num_slots_
;
235
236
slot_type
slot
= 0;
237
for
(
index_type
k
= 0;
k
<
k_
;
k
++)
238
{
239
const
key_type
seeded_key
=
key
+
seed_
+
k
;
240
const
slot_type
hash
=
Hasher
::
hash
(
seeded_key
) %
block_bits
();
241
242
if
((
hash
/
slot_bits
()) ==
group
.
thread_rank
())
243
{
244
slot
|=
slot_type
(1) << (
hash
%
slot_bits
());
245
}
246
}
247
248
return
(
group
.
all
((
slot
&
bloom_filter_
[
slot_index
]) ==
slot
)) ?
true
:
false
;
249
}
250
251
/*! \brief retrieve a set of keys
252
* \param[in] keys_in pointer to keys
253
* \param[in] num_in number of keys
254
* \param[out] flags_out result per key
255
' \param[in] stream CUDA stream in which this operation is executed in
256
*/
257
HOSTQUALIFIER
INLINEQUALIFIER
258
void
retrieve
(
259
const
key_type
*
const
keys_in
,
260
const
index_type
num_in
,
261
bool
*
const
flags_out
,
262
const
cudaStream_t
stream
= 0)
const
noexcept
263
{
264
kernels
::
bloom_filter
::
retrieve
265
<<<
SDIV
(
num_in
*
cg_size
(),
WARPCORE_BLOCKSIZE
),
WARPCORE_BLOCKSIZE
, 0,
stream
>>>
266
(
keys_in
,
num_in
,
flags_out
, *
this
);
267
}
268
269
/*! \brief queries and subsequently inserts a key into the bloom filter
270
* \note can only be used when \c CGSize==1 to prevent from race conditions
271
* \param[in] key key to query
272
* \param[in] group cooperative group this operation is executed in
273
* \param[out] flag whether the key was already inside the filter before insertion
274
*/
275
template
<
276
index_type
CGSize_
=
cg_size
(),
277
class
=
std
::
enable_if_t
<
CGSize_
== 1>>
278
DEVICEQUALIFIER
INLINEQUALIFIER
279
bool
insert_and_query
(
280
const
key_type
key
,
281
const
cg
::
thread_block_tile
<
cg_size
()>&
group
)
noexcept
282
{
283
const
index_type
slot_index
=
284
((
Hasher
::
hash
(
key
+
seed_
) %
num_blocks_
) *
285
cg_size
() +
group
.
thread_rank
()) %
num_slots_
;
286
287
slot_type
slot
=
slot_type
{0};
288
for
(
index_type
k
= 0;
k
<
k_
;
k
++)
289
{
290
const
key_type
seeded_key
=
key
+
seed_
+
k
;
291
const
slot_type
hash
=
Hasher
::
hash
(
seeded_key
) %
block_bits
();
292
293
if
((
hash
/
slot_bits
()) ==
group
.
thread_rank
())
294
{
295
slot
|=
slot_type
{1} << (
hash
%
slot_bits
());
296
}
297
}
298
299
if
((
slot
&
bloom_filter_
[
slot_index
]) !=
slot
)
300
{
301
const
auto
old
=
atomicOr
(
bloom_filter_
+
slot_index
,
slot
);
302
303
return
((
slot
&
old
) !=
slot
) ?
false
:
true
;
304
}
305
else
306
{
307
return
true
;
308
}
309
}
310
311
/*! \brief get number of bits (m)
312
* \return number of bits (m)
313
*/
314
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
315
index_type
num_bits
()
const
noexcept
316
{
317
return
num_bits_
;
318
}
319
320
/*! \brief get number of slots
321
* \return number of slots
322
*/
323
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
324
index_type
num_slots
()
const
noexcept
325
{
326
return
num_slots_
;
327
}
328
329
/*! \brief get number of blocks
330
* \return number of blocks
331
*/
332
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
333
index_type
num_blocks
()
const
noexcept
334
{
335
return
num_blocks_
;
336
}
337
338
/*! \brief get number of hash functions (k)
339
* \return number of hash functions (k)
340
*/
341
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
342
index_type
k
()
const
noexcept
343
{
344
return
k_
;
345
}
346
347
// TODO incorporate CG size
348
/*! \brief estimated false positive rate of pattern-blocked bloom filter
349
* \param[in] n number of inserted elements
350
* \return false positive rate
351
* \warning computationally expensive for large filters
352
*/
353
HOSTQUALIFIER
INLINEQUALIFIER
354
double
fpr
(
const
index_type
n
)
const
noexcept
355
{
356
double
res
= 0.0;
357
const
double
b
=
num_bits_
/
block_bits
();
358
359
#
pragma
omp
parallel
for
reduction
(
+
:
res
)
360
for
(
index_type
i
= 0;
i
< 5*
n
/(
num_bits_
/
block_bits
()); ++
i
)
361
{
362
res
+=
binom
(
n
,
i
, 1.0/
b
) *
fpr_base
(
num_bits_
/
b
,
i
,
k_
);
363
}
364
365
return
res
;
366
}
367
368
/*! \brief indicates if this object is a shallow copy
369
* \return \c bool
370
*/
371
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
372
bool
is_copy
()
const
noexcept
373
{
374
return
is_copy_
;
375
}
376
377
private
:
378
/*! \brief binomial coefficient
379
* \param[in] n
380
* \param[in] k
381
* \param[in] p
382
* \return binomial coefficient
383
*/
384
HOSTQUALIFIER
INLINEQUALIFIER
385
double
binom
(
386
const
index_type
n
,
387
const
index_type
k
,
388
const
double
p
)
const
noexcept
389
{
390
double
res
= 1.0;
391
392
for
(
index_type
i
=
n
-
k
+ 1;
i
<=
n
; ++
i
)
393
{
394
res
=
res
*
i
;
395
}
396
397
for
(
index_type
i
= 1;
i
<=
k
; ++
i
)
398
{
399
res
=
res
/
i
;
400
}
401
402
res
=
res
*
pow
(
p
,
k
) *
pow
(1.0 -
p
,
n
-
k
);
403
404
return
res
;
405
}
406
407
/*! \brief FPR of traditional bloom filters
408
* \param[in] m
409
* \param[in] n
410
* \param[in] k
411
* \return FPR
412
*/
413
HOSTQUALIFIER
INLINEQUALIFIER
414
double
fpr_base
(
415
const
index_type
m
,
416
const
index_type
n
,
417
const
index_type
k
)
const
noexcept
418
{
419
return
std
::
pow
(1.0 -
std
::
pow
(1.0 - 1.0 /
m
,
n
*
k
),
k
);
420
}
421
422
slot_type
*
bloom_filter_
;
//< pointer to the bit vector
423
const
index_type num_bits_;
//< number of bits (m)
424
const
index_type num_slots_;
//< number of slots
425
const
index_type num_blocks_;
//< number of CG blocks
426
const
index_type k_;
//< number of hash functions
427
key_type seed_;
//< random seed
428
bool
is_copy_;
//< indicates if this object is a shallow copy
429
430
};
// class BloomFilter
431
432
}
// namespace warpcore
433
434
#
endif
/* WARPCORE_BLOOM_FILTER_CUH */
include
warpcore
bloom_filter.cuh
Generated by
1.9.1