warpcore
0.0.1
Hashing at the Speed of Light on modern CUDA-accelerators
bucket_list_hash_table.cuh
Go to the documentation of this file.
1
#
ifndef
WARPCORE_BUCKET_LIST_HASH_TABLE_CUH
2
#
define
WARPCORE_BUCKET_LIST_HASH_TABLE_CUH
3
4
#
include
"single_value_hash_table.cuh"
5
6
namespace
warpcore
7
{
8
9
/*! \brief bucket list hash table
10
* \tparam Key key type (\c std::uint32_t or \c std::uint64_t)
11
* \tparam Value value type
12
* \tparam EmptyKey key which represents an empty slot
13
* \tparam TombstoneKey key which represents an erased slot
14
* \tparam ValueStore storage class from \c warpcore::storage::multi_value
15
* \tparam ProbingScheme probing scheme from \c warpcore::probing_schemes
16
*/
17
template
<
18
class
Key
,
19
class
Value
,
20
Key
EmptyKey
=
defaults
::
empty_key
<
Key
>(),
21
Key
TombstoneKey
=
defaults
::
tombstone_key
<
Key
>(),
22
class
ValueStore
=
defaults
::
value_storage_t
<
Value
>,
23
class
ProbingScheme
=
defaults
::
probing_scheme_t
<
Key
, 8>>
24
class
BucketListHashTable
25
{
26
static_assert
(
27
checks
::
is_value_storage
<
ValueStore
>(),
28
"not a valid storage type"
);
29
30
public
:
31
// TODO why public?
32
using
handle_type
=
typename
ValueStore
::
handle_type
;
33
34
private
:
35
using
hash_table_type
=
SingleValueHashTable
<
36
Key
,
37
handle_type
,
38
EmptyKey
,
39
TombstoneKey
,
40
ProbingScheme
>;
41
42
using
value_store_type
=
ValueStore
;
43
44
public
:
45
using
key_type
=
Key
;
46
using
value_type
=
Value
;
47
using
index_type
=
index_t
;
48
using
status_type
=
Status
;
49
50
/*! \brief get empty key
51
* \return empty key
52
*/
53
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
54
static
constexpr
key_type
empty_key
()
noexcept
55
{
56
return
EmptyKey
;
57
}
58
59
/*! \brief get tombstone key
60
* \return tombstone key
61
*/
62
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
63
static
constexpr
key_type
tombstone_key
()
noexcept
64
{
65
return
TombstoneKey
;
66
}
67
68
/*! \brief checks if \c key is equal to \c (EmptyKey||TombstoneKey)
69
* \return \c bool
70
*/
71
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
72
static
constexpr
bool
is_valid_key
(
const
key_type
key
)
noexcept
73
{
74
return
(
key
!=
empty_key
() &&
key
!=
tombstone_key
());
75
}
76
77
/*! \brief get cooperative group size
78
* \return cooperative group size
79
*/
80
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
81
static
constexpr
index_type
cg_size
()
noexcept
82
{
83
return
hash_table_type
::
cg_size
();
84
}
85
86
/*! \brief maximum bucket size
87
* \return size
88
*/
89
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
90
static
constexpr
index_type
max_bucket_size
()
noexcept
91
{
92
return
handle_type
::
max_bucket_size
();
93
}
94
95
/*! \brief constructor
96
* \param[in] key_capacity guaranteed number of key slots in the hash table
97
* \param[in] value_capacity total number of value slots
98
* \param[in] seed random seed
99
* \param[in] grow_factor bucket grow factor for \c warpcore::storage::multi_value::BucketListStore
100
* \param[in] min_bucket_size initial size of value buckets for \c warpcore::storage::multi_value::BucketListStore
101
* \param[in] max_bucket_size bucket size of \c warpcore::storage::multi_value::BucketListStore after which no more growth occurs
102
* \param[in] max_values_per_key maximum number of values to store per key
103
*/
104
HOSTQUALIFIER
105
explicit
BucketListHashTable
(
106
const
index_type
key_capacity
,
107
const
index_type
value_capacity
,
108
const
key_type
seed
=
defaults
::
seed
<
key_type
>(),
109
const
float
grow_factor
= 1.1,
110
const
index_type
min_bucket_size
= 1,
111
const
index_type
max_bucket_size
=
max_bucket_size
(),
112
const
index_type
max_values_per_key
=
handle_type
::
max_value_count
(),
113
const
bool
no_init
=
false
)
noexcept
:
114
hash_table_
(
key_capacity
,
seed
,
true
),
115
value_store_
(
value_capacity
,
grow_factor
,
min_bucket_size
,
max_bucket_size
),
116
max_values_per_key_
(
std
::
min
(
max_values_per_key
,
handle_type
::
max_value_count
())),
117
is_copy_
(
false
)
118
{
119
join_status
(
value_store_
.
status
());
120
121
if
(!
no_init
)
init
(
seed
);
122
}
123
124
/*! \brief copy-constructor (shallow)
125
* \param[in] object to be copied
126
*/
127
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
128
BucketListHashTable
(
const
BucketListHashTable
&
o
)
noexcept
:
129
hash_table_
(
o
.
hash_table_
),
130
value_store_
(
o
.
value_store_
),
131
max_values_per_key_
(
o
.
max_values_per_key_
),
132
is_copy_
(
true
)
133
{}
134
135
/*! \brief move-constructor
136
* \param[in] object to be moved
137
*/
138
HOSTQUALIFIER
INLINEQUALIFIER
139
BucketListHashTable
(
BucketListHashTable
&&
o
)
noexcept
:
140
hash_table_
(
std
::
move
(
o
.
hash_table_
)),
141
value_store_
(
std
::
move
(
o
.
value_store_
)),
142
max_values_per_key_
(
std
::
move
(
o
.
max_values_per_key_
)),
143
is_copy_
(
std
::
move
(
o
.
is_copy_
))
144
{
145
o
.
is_copy_
=
true
;
146
}
147
148
/*! \brief (re)initialize the hash table
149
* \param seed random seed
150
* \param stream CUDA stream in which this operation is executed
151
*/
152
HOSTQUALIFIER
INLINEQUALIFIER
153
void
init
(
154
const
key_type
seed
,
155
const
cudaStream_t
stream
= 0)
noexcept
156
{
157
const
auto
status
=
hash_table_
.
peek_status
(
stream
);
158
159
if
(!
status
.
has_not_initialized
())
160
{
161
hash_table_
.
init
(
seed
,
stream
);
162
value_store_
.
init
(
stream
);
163
hash_table_
.
table_
.
init_values
(
164
ValueStore
::
uninitialized_handle
(),
stream
);
165
}
166
}
167
168
/*! \brief (re)initialize the hash table
169
* \param stream CUDA stream in which this operation is executed
170
*/
171
HOSTQUALIFIER
INLINEQUALIFIER
172
void
init
(
const
cudaStream_t
stream
= 0)
noexcept
173
{
174
init
(
hash_table_
.
seed
(),
stream
);
175
}
176
177
/*! \brief inserts a key/value pair into the hash table
178
* \param[in] key_in key to insert into the hash table
179
* \param[in] value_in value that corresponds to \c key_in
180
* \param[in] group cooperative group
181
* \param[in] probing_length maximum number of probing attempts
182
* \return status (per thread)
183
*/
184
DEVICEQUALIFIER
INLINEQUALIFIER
185
status_type
insert
(
186
const
key_type
key_in
,
187
const
value_type
&
value_in
,
188
const
cg
::
thread_block_tile
<
cg_size
()>&
group
,
189
const
index_type
probing_length
=
defaults
::
probing_length
())
noexcept
190
{
191
status_type
status
=
status_type
::
unknown_error
();
192
193
handle_type
*
handle_ptr
=
194
hash_table_
.
insert_impl
(
key_in
,
status
,
group
,
probing_length
);
195
196
if
(
handle_ptr
!=
nullptr
)
197
{
198
if
(
handle_ptr
->
value_count
() >=
max_values_per_key_
)
199
{
200
device_join_status
(
status_type
::
max_values_for_key_reached
());
201
202
return
status
+
status_type
::
max_values_for_key_reached
();
203
}
204
else
205
{
206
status_type
append_status
=
Status
::
unknown_error
();
207
208
if
(
group
.
thread_rank
() == 0)
209
{
210
append_status
=
value_store_
.
append
(*
handle_ptr
,
value_in
,
max_values_per_key_
);
211
212
if
(
append_status
.
has_any
())
213
{
214
device_join_status
(
append_status
);
215
}
216
}
217
218
status
+=
append_status
.
group_shuffle
(
group
, 0);
219
}
220
}
221
222
return
status
;
223
}
224
225
/*! \brief insert a set of keys into the hash table
226
* \tparam StatusHandler handles returned status per key (see \c status_handlers)
227
* \param[in] keys_in pointer to keys to insert into the hash table
228
* \param[in] values_in corresponds values to \c keys_in
229
* \param[in] num_in number of keys to insert
230
* \param[in] stream CUDA stream in which this operation is executed in
231
* \param[in] probing_length maximum number of probing attempts
232
* \param[out] status_out status information per key
233
*/
234
template
<
class
StatusHandler
=
defaults
::
status_handler_t
>
235
HOSTQUALIFIER
INLINEQUALIFIER
236
void
insert
(
237
const
key_type
*
const
keys_in
,
238
const
value_type
*
const
values_in
,
239
const
index_type
num_in
,
240
const
cudaStream_t
stream
= 0,
241
const
index_type
probing_length
=
defaults
::
probing_length
(),
242
typename
StatusHandler
::
base_type
*
const
status_out
=
nullptr
)
noexcept
243
{
244
static_assert
(
245
checks
::
is_status_handler
<
StatusHandler
>(),
246
"not a valid status handler type"
);
247
248
if
(!
hash_table_
.
is_initialized_
)
return
;
249
250
static
constexpr
index_type
block_size
= 1024;
251
static
constexpr
index_type
groups_per_block
=
block_size
/
cg_size
();
252
static
constexpr
index_type
smem_status_size
=
253
std
::
is_same
<
StatusHandler
,
status_handlers
::
ReturnNothing
>::
value
?
254
1 :
groups_per_block
;
255
256
helpers
::
lambda_kernel
257
<<<
SDIV
(
num_in
*
cg_size
(),
block_size
),
block_size
, 0,
stream
>>>
258
([=, *
this
]
DEVICEQUALIFIER
()
mutable
259
{
260
const
index_type
tid
=
helpers
::
global_thread_id
();
261
const
index_type
btid
=
threadIdx
.
x
;
262
const
index_type
gid
=
tid
/
cg_size
();
263
const
index_type
bgid
=
gid
%
groups_per_block
;
264
const
auto
block
=
cg
::
this_thread_block
();
265
const
auto
group
=
cg
::
tiled_partition
<
cg_size
()>(
block
);
266
267
__shared__
handle_type
*
handles
[
groups_per_block
];
268
__shared__
status_type
status
[
smem_status_size
];
269
270
if
(
gid
<
num_in
)
271
{
272
status_type
probing_status
=
status_type
::
unknown_error
();
273
274
handles
[
bgid
] =
hash_table_
.
insert_impl
(
275
keys_in
[
gid
],
276
probing_status
,
277
group
,
278
probing_length
);
279
280
if
(!
std
::
is_same
<
281
StatusHandler
,
282
status_handlers
::
ReturnNothing
>::
value
&&
283
group
.
thread_rank
() == 0)
284
{
285
status
[
bgid
] =
probing_status
;
286
}
287
288
block
.
sync
();
289
290
if
(
btid
<
groups_per_block
&&
handles
[
btid
] !=
nullptr
)
291
{
292
status_type
append_status
;
293
294
const
index_type
block_offset
=
295
blockIdx
.
x
*
groups_per_block
;
296
297
if
(
value_store_
.
size
(*(
handles
[
btid
])) >=
max_values_per_key_
)
298
{
299
append_status
=
status_type
::
max_values_for_key_reached
();
300
}
301
else
302
{
303
if
(
block_offset
+
btid
<
num_in
){
304
append_status
=
value_store_
.
append
(
305
*(
handles
[
btid
]),
306
values_in
[
block_offset
+
btid
],
307
max_values_per_key_
);
308
}
309
}
310
311
if
(
append_status
.
has_any
())
312
{
313
device_join_status
(
append_status
);
314
}
315
316
if
(
block_offset
+
btid
<
num_in
){
317
318
// TODO not zero-cost
319
if
(!
std
::
is_same
<
320
StatusHandler
,
321
status_handlers
::
ReturnNothing
>::
value
)
322
{
323
StatusHandler
::
handle
(
324
status
[
btid
]+
append_status
,
325
status_out
,
326
block_offset
+
btid
);
327
}
328
329
}
330
}
331
}
332
333
});
334
335
if
(
stream
== 0)
336
{
337
cudaStreamSynchronize
(
stream
);
338
}
339
}
340
341
/*! \brief retrieves a key from the hash table
342
* \param[in] key_in key to retrieve from the hash table
343
* \param[out] values_out pointer to storage fo the retrieved values
344
* \param[out] num_out number of values retrieved
345
* \param[in] group cooperative group
346
* \param[in] probing_length maximum number of probing attempts
347
* \return status (per thread)
348
*/
349
DEVICEQUALIFIER
INLINEQUALIFIER
350
status_type
retrieve
(
351
const
key_type
key_in
,
352
value_type
*
const
values_out
,
353
index_type
&
num_out
,
354
const
cg
::
thread_block_tile
<
cg_size
()>&
group
,
355
const
index_type
probing_length
=
defaults
::
probing_length
())
const
noexcept
356
{
357
handle_type
handle
;
358
359
status_type
status
=
360
hash_table_
.
retrieve
(
key_in
,
handle
,
group
,
probing_length
);
361
362
if
(!
status
.
has_any
())
363
{
364
value_store_
.
for_each
(
365
[=]
DEVICEQUALIFIER
(
366
const
value_type
&
value
,
367
index_type
offset
)
368
{
369
values_out
[
offset
] =
value
;
370
},
371
handle
,
372
group
);
373
374
num_out
=
value_store_
.
size
(
handle
);
375
}
376
else
377
{
378
num_out
= 0;
379
}
380
381
return
status
;
382
}
383
384
/*! \brief retrieve a set of keys from the hash table
385
* \note this method has a dry-run mode where it only calculates the needed array sizes in case no memory (aka \c nullptr ) is provided
386
* \note \c end_offsets_out can be \c begin_offsets_out+1
387
* \tparam StatusHandler handles returned status per key (see \c status_handlers)
388
* \param[in] keys_in pointer to keys to retrieve from the hash table
389
* \param[in] num_in number of keys to retrieve
390
* \param[out] begin_offsets_out
391
* \param[out] end_offsets_out
392
* \param[out] values_out retrieved values of keys in \c key_in
393
* \param[out] num_out total number of values retrieved by this operation
394
* \param[in] stream CUDA stream in which this operation is executed in
395
* \param[in] probing_length maximum number of probing attempts
396
* \param[out] status_out status information (per key)
397
*/
398
template
<
class
StatusHandler
=
defaults
::
status_handler_t
>
399
HOSTQUALIFIER
INLINEQUALIFIER
400
void
retrieve
(
401
const
key_type
*
const
keys_in
,
402
const
index_type
num_in
,
403
index_type
*
const
begin_offsets_out
,
404
index_type
*
const
end_offsets_out
,
405
value_type
*
const
values_out
,
406
index_type
&
num_out
,
407
const
cudaStream_t
stream
= 0,
408
const
index_type
probing_length
=
defaults
::
probing_length
(),
409
typename
StatusHandler
::
base_type
*
const
status_out
=
nullptr
)
const
noexcept
410
{
411
static_assert
(
412
checks
::
is_status_handler
<
StatusHandler
>(),
413
"not a valid status handler type"
);
414
415
if
(!
hash_table_
.
is_initialized_
)
return
;
416
417
// cub::DeviceScan::InclusiveSum takes input sizes of type int
418
if
(
num_in
>
std
::
numeric_limits
<
int
>::
max
())
419
{
420
join_status
(
status_type
::
index_overflow
(),
stream
);
421
422
return
;
423
}
424
425
num_values
(
426
keys_in
,
427
num_in
,
428
num_out
,
429
end_offsets_out
,
430
stream
,
431
probing_length
);
432
433
if
(
values_out
!=
nullptr
)
434
{
435
index_type
temp_bytes
=
num_out
*
sizeof
(
value_type
);
436
437
cub
::
DeviceScan
::
InclusiveSum
(
438
values_out
,
439
temp_bytes
,
440
end_offsets_out
,
441
end_offsets_out
,
442
num_in
,
443
stream
);
444
445
cudaMemsetAsync
(
begin_offsets_out
, 0,
sizeof
(
index_type
),
stream
);
446
447
if
(
end_offsets_out
!=
begin_offsets_out
+ 1)
448
{
449
cudaMemcpyAsync
(
450
begin_offsets_out
+ 1,
451
end_offsets_out
,
452
sizeof
(
index_type
) * (
num_in
- 1),
453
D2D
,
454
stream
);
455
}
456
457
kernels
::
retrieve
<
BucketListHashTable
,
StatusHandler
>
458
<<<
SDIV
(
num_in
*
cg_size
(),
WARPCORE_BLOCKSIZE
),
WARPCORE_BLOCKSIZE
, 0,
stream
>>>
459
(
460
keys_in
,
461
num_in
,
462
begin_offsets_out
,
463
end_offsets_out
,
464
values_out
,
465
*
this
,
466
probing_length
,
467
status_out
);
468
}
469
else
470
{
471
if
(
status_out
!=
nullptr
)
472
{
473
helpers
::
lambda_kernel
474
<<<
SDIV
(
num_in
,
WARPCORE_BLOCKSIZE
),
WARPCORE_BLOCKSIZE
, 0,
stream
>>>
475
([=, *
this
]
DEVICEQUALIFIER
476
{
477
const
index_type
tid
=
helpers
::
global_thread_id
();
478
479
if
(
tid
<
num_in
)
480
{
481
StatusHandler
::
handle
(
Status
::
dry_run
(),
status_out
,
tid
);
482
}
483
});
484
}
485
486
join_status
(
status_type
::
dry_run
(),
stream
);
487
}
488
489
if
(
stream
== 0)
490
{
491
cudaStreamSynchronize
(
stream
);
492
}
493
}
494
495
// TODO host retrieve which also returns the set of unique keys
496
497
/*! \brief applies a funtion over all values of a corresponding key
498
* \tparam Func type of map i.e. CUDA device lambda
499
* \param[in] f map to apply
500
* \param[in] key_in key to retrieve
501
* \param[in] stream CUDA stream in which this operation is executed in
502
*/
503
template
<
class
Func
>
504
DEVICEQUALIFIER
INLINEQUALIFIER
505
status_type
for_each
(
506
Func
f
,
// TODO const?
507
const
key_type
key_in
,
508
const
cg
::
thread_block_tile
<
cg_size
()>&
group
,
509
const
index_type
probing_length
=
defaults
::
probing_length
())
const
noexcept
510
{
511
handle_type
handle
;
512
513
status_type
status
=
514
hash_table_
.
retrieve
(
key_in
,
handle
,
group
,
probing_length
);
515
516
if
(!
status
.
has_any
())
517
{
518
value_store_
.
for_each
(
f
,
handle
,
group
);
519
}
520
521
return
status
;
522
}
523
524
// TODO host functions for_each
525
// TODO get_key_set
526
527
/*! \brief retrieves all elements from the hash table
528
* \info this method has a dry-run mode where it only calculates the needed array sizes in case no memory (aka \c nullptr ) is provided
529
* \info this method implements a multi-stage dry-run mode
530
* \param[out] keys_out pointer to the set of unique keys
531
* \param[out] num_keys_out number of unique keys
532
* \param[out] begin_offsets_out begin of value range for a corresponding key in \c values_out
533
* \param[out] end_offsets_out end of value range for a corresponding key in \c values_out
534
* \param[out] values_out array which holds all retrieved values
535
* \param[out] num_values_out total number of values retrieved by this operation
536
* \param[in] stream CUDA stream in which this operation is executed in
537
*/
538
HOSTQUALIFIER
INLINEQUALIFIER
539
void
retrieve_all
(
540
key_type
*
const
keys_out
,
541
index_type
&
num_keys_out
,
542
index_type
*
const
begin_offsets_out
,
543
index_type
*
const
end_offsets_out
,
544
value_type
*
const
values_out
,
545
value_type
&
num_values_out
,
546
const
cudaStream_t
stream
= 0)
const
noexcept
547
{
548
retrieve_all_keys
(
keys_out
,
num_keys_out
,
stream
);
549
550
if
(
keys_out
!=
nullptr
)
551
{
552
retrieve
(
553
keys_out
,
554
num_keys_out
,
555
begin_offsets_out
,
556
end_offsets_out
,
557
values_out
,
558
num_values_out
,
559
stream
);
560
}
561
562
if
(
stream
== 0)
563
{
564
cudaStreamSynchronize
(
stream
);
565
}
566
}
567
568
/*! \brief retrieves the set of all keys stored inside the hash table
569
* \param[out] keys_out pointer to the retrieved keys
570
* \param[out] num_out number of retrieved keys
571
* \param[in] stream CUDA stream in which this operation is executed in
572
* \note if \c keys_out==nullptr then only \c num_out will be computed
573
*/
574
HOSTQUALIFIER
INLINEQUALIFIER
575
void
retrieve_all_keys
(
576
key_type
*
const
keys_out
,
577
index_type
&
num_out
,
578
const
cudaStream_t
stream
= 0)
const
noexcept
579
{
580
if
(!
hash_table_
.
is_initialized_
)
return
;
581
582
if
(
keys_out
==
nullptr
)
583
{
584
num_out
=
hash_table_
.
size
(
stream
);
585
}
586
else
587
{
588
index_type
*
key_count
=
hash_table_
.
temp_
.
get
();
589
cudaMemsetAsync
(
key_count
, 0,
sizeof
(
index_type
),
stream
);
590
591
hash_table_
.
for_each
(
592
[=]
DEVICEQUALIFIER
(
key_type
key
,
const
auto
&)
593
{
594
keys_out
[
helpers
::
atomicAggInc
(
key_count
)] =
key
;
595
},
stream
);
596
597
cudaMemcpyAsync
(
598
&
num_out
,
key_count
,
sizeof
(
index_type
),
D2H
,
stream
);
599
}
600
601
if
(
stream
== 0 ||
keys_out
==
nullptr
)
602
{
603
cudaStreamSynchronize
(
stream
);
604
}
605
}
606
607
/*! \brief get load factor of the key store
608
* \param stream CUDA stream in which this operation is executed in
609
* \return load factor
610
*/
611
HOSTQUALIFIER
INLINEQUALIFIER
612
float
key_load_factor
(
const
cudaStream_t
stream
= 0)
const
noexcept
613
{
614
return
hash_table_
.
load_factor
(
stream
);
615
}
616
617
/*! \brief get load factor of the value store
618
* \param stream CUDA stream in which this operation is executed in
619
* \return load factor
620
*/
621
HOSTQUALIFIER
INLINEQUALIFIER
622
float
value_load_factor
(
const
cudaStream_t
stream
= 0)
const
noexcept
623
{
624
return
value_store_
.
load_factor
(
stream
);
625
}
626
627
/*! \brief get the the total number of bytes occupied by this data structure
628
* \return bytes
629
*/
630
HOSTQUALIFIER
INLINEQUALIFIER
631
index_type
bytes_total
()
const
noexcept
632
{
633
return
hash_table_
.
bytes_total
() +
value_store_
.
bytes_total
();
634
}
635
636
/*! \brief get the the number of bytes in this data structure occupied by keys
637
* \param stream CUDA stream in which this operation is executed in
638
* \return bytes
639
*/
640
HOSTQUALIFIER
INLINEQUALIFIER
641
index_type
bytes_keys
(
const
cudaStream_t
stream
= 0)
const
noexcept
642
{
643
return
num_keys
(
stream
) *
sizeof
(
key_type
);
644
}
645
646
/*! \brief get the the number of bytes in this data structure occupied by values
647
* \param stream CUDA stream in which this operation is executed in
648
* \return bytes
649
*/
650
HOSTQUALIFIER
INLINEQUALIFIER
651
index_type
bytes_values
(
const
cudaStream_t
stream
= 0)
const
noexcept
652
{
653
return
num_values
(
stream
) *
sizeof
(
value_type
);
654
}
655
656
/*! \brief get the the number of bytes in this data structure occupied by actual information
657
* \param stream CUDA stream in which this operation is executed in
658
* \return bytes
659
*/
660
HOSTQUALIFIER
INLINEQUALIFIER
661
index_type
bytes_payload
(
const
cudaStream_t
stream
= 0)
const
noexcept
662
{
663
return
bytes_keys
(
stream
) +
bytes_values
(
stream
);
664
}
665
666
/*! \brief current storage density of the hash table
667
* \param stream CUDA stream in which this operation is executed in
668
* \return storage density
669
*/
670
HOSTQUALIFIER
INLINEQUALIFIER
671
float
storage_density
(
const
cudaStream_t
stream
= 0)
const
noexcept
672
{
673
return
float
(
bytes_payload
(
stream
)) /
float
(
bytes_total
());
674
}
675
676
/*! \brief current relative storage density of the hash table
677
* \param stream CUDA stream in which this operation is executed in
678
* \return storage density
679
*/
680
HOSTQUALIFIER
INLINEQUALIFIER
681
float
relative_storage_density
(
const
cudaStream_t
stream
= 0)
const
noexcept
682
{
683
const
float
bytes_hash_table
=
684
hash_table_
.
capacity
() * (
sizeof
(
key_type
) +
sizeof
(
handle_type
));
685
const
float
bytes_value_store
=
686
value_store_
.
bytes_occupied
(
stream
);
687
688
return
float
(
bytes_payload
(
stream
)) / (
bytes_value_store
+
bytes_hash_table
);
689
}
690
691
/*! \brief indicates if the hash table is properly initialized
692
* \return \c true iff the hash table is properly initialized
693
*/
694
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
695
bool
is_initialized
()
const
noexcept
696
{
697
return
hash_table_
.
is_initialized
();
698
}
699
700
/*! \brief get the status of the hash table
701
* \param stream CUDA stream in which this operation is executed in
702
* \return the status
703
*/
704
HOSTQUALIFIER
INLINEQUALIFIER
705
status_type
peek_status
(
const
cudaStream_t
stream
= 0)
const
noexcept
706
{
707
return
hash_table_
.
peek_status
(
stream
);
708
}
709
710
/*! \brief get and reset the status of the hash table
711
* \param[in] stream CUDA stream in which this operation is executed in
712
* \return the status
713
*/
714
HOSTQUALIFIER
INLINEQUALIFIER
715
status_type
pop_status
(
const
cudaStream_t
stream
= 0)
noexcept
716
{
717
return
hash_table_
.
pop_status
(
stream
);
718
}
719
720
/*! \brief get the key capacity of the hash table
721
* \return number of key slots in the hash table
722
*/
723
HOSTQUALIFIER
INLINEQUALIFIER
724
index_type
key_capacity
()
const
noexcept
725
{
726
return
hash_table_
.
capacity
();
727
}
728
729
/*! \brief get the maximum value capacity of the hash table
730
* \return maximum value capacity
731
*/
732
HOSTQUALIFIER
INLINEQUALIFIER
733
index_type
value_capacity
()
const
noexcept
734
{
735
return
value_store_
.
capacity
();
736
}
737
738
/*! \brief number of keys stored inside the hash table
739
* \param[in] stream CUDA stream in which this operation is executed in
740
* \return number of keys inside the hash table
741
*/
742
HOSTQUALIFIER
INLINEQUALIFIER
743
index_type
num_keys
(
const
cudaStream_t
stream
= 0)
const
noexcept
744
{
745
return
hash_table_
.
size
(
stream
);
746
}
747
748
/*! \brief get number of values to a corresponding key inside the hash table
749
* \param[in] key_in key to probe
750
* \param[out] num_out number of values
751
* \param[in] group cooperative group this operation is executed in
752
* \param[in] probing_length maximum number of probing attempts
753
* \return status (per thread)
754
*/
755
DEVICEQUALIFIER
INLINEQUALIFIER
756
status_type
num_values
(
757
const
key_type
key_in
,
758
index_type
&
num_out
,
759
const
cg
::
thread_block_tile
<
cg_size
()>&
group
,
760
const
index_type
probing_length
=
defaults
::
probing_length
())
const
noexcept
761
{
762
handle_type
handle
;
763
764
status_type
status
=
765
hash_table_
.
retrieve
(
key_in
,
handle
,
group
,
probing_length
);
766
767
num_out
= (!
status
.
has_any
()) ?
value_store_
.
size
(
handle
) : 0;
768
769
return
status
;
770
}
771
772
/*! \brief get number of values to a corresponding set of keys inside the hash table
773
* \param[in] keys_in keys to probe
774
* \param[in] num_in input size
775
* \param[out] num_out total number of values in this query
776
* \param[out] num_per_key_out number of values per key
777
* \param[in] probing_length maximum number of probing attempts
778
* \param[in] stream CUDA stream in which this operation is executed in
779
* \param[out] status_out status information (per key)
780
*/
781
template
<
class
StatusHandler
=
defaults
::
status_handler_t
>
782
HOSTQUALIFIER
INLINEQUALIFIER
783
void
num_values
(
784
const
key_type
*
const
keys_in
,
785
const
index_type
num_in
,
786
index_type
&
num_out
,
787
index_type
*
const
num_per_key_out
=
nullptr
,
788
const
cudaStream_t
stream
= 0,
789
const
index_type
probing_length
=
defaults
::
probing_length
(),
790
typename
StatusHandler
::
base_type
*
const
status_out
=
nullptr
)
const
noexcept
791
{
792
static_assert
(
793
checks
::
is_status_handler
<
StatusHandler
>(),
794
"not a valid status handler type"
);
795
796
if
(!
hash_table_
.
is_initialized_
)
return
;
797
798
index_type
*
const
tmp
=
hash_table_
.
temp_
.
get
();
799
cudaMemsetAsync
(
tmp
, 0,
sizeof
(
index_type
),
stream
);
800
801
kernels
::
num_values
<
BucketListHashTable
,
StatusHandler
>
802
<<<
SDIV
(
num_in
*
cg_size
(),
WARPCORE_BLOCKSIZE
),
WARPCORE_BLOCKSIZE
, 0,
stream
>>>
803
(
keys_in
,
num_in
,
tmp
,
num_per_key_out
, *
this
,
probing_length
,
status_out
);
804
805
cudaMemcpyAsync
(&
num_out
,
tmp
,
sizeof
(
index_type
),
D2H
,
stream
);
806
807
if
(
stream
== 0)
808
{
809
cudaStreamSynchronize
(
stream
);
810
}
811
}
812
813
/*! \brief get number of values inside the hash table
814
* \param[in] stream CUDA stream in which this operation is executed in
815
* \return total number of values
816
*/
817
HOSTQUALIFIER
INLINEQUALIFIER
818
index_type
num_values
(
const
cudaStream_t
stream
= 0)
const
noexcept
819
{
820
index_type
*
tmp
=
hash_table_
.
temp_
.
get
();
821
822
cudaMemsetAsync
(
tmp
, 0,
sizeof
(
index_type
),
stream
);
823
824
hash_table_
.
for_each
(
825
[=, *
this
]
DEVICEQUALIFIER
(
key_type
,
const
handle_type
&
handle
)
826
{
827
atomicAdd
(
tmp
,
value_store_
.
size
(
handle
));
828
},
829
stream
);
830
831
index_type
out
= 0;
832
833
cudaMemcpyAsync
(&
out
,
tmp
,
sizeof
(
index_type
),
D2H
,
stream
);
834
835
cudaStreamSynchronize
(
stream
);
836
837
return
out
;
838
}
839
840
/*! \brief indicates if this object is a shallow copy
841
* \return \c bool
842
*/
843
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
844
bool
is_copy
()
const
noexcept
845
{
846
return
is_copy_
;
847
}
848
849
private
:
850
/*! \brief joins additional flags to the hash table's status
851
* \info \c const on purpose
852
* \param[in] status new status
853
* \param[in] stream CUDA stream in which this operation is executed in
854
*/
855
HOSTQUALIFIER
INLINEQUALIFIER
856
void
join_status
(
857
const
status_type
status
,
858
const
cudaStream_t
stream
= 0)
const
noexcept
859
{
860
hash_table_
.
join_status
(
status
,
stream
);
861
}
862
863
/*! \brief joins additional flags to the hash table's status
864
* \info \c const on purpose
865
* \param[in] status new status
866
*/
867
DEVICEQUALIFIER
INLINEQUALIFIER
868
void
device_join_status
(
const
status_type
status
)
const
noexcept
869
{
870
hash_table_
.
device_join_status
(
status
);
871
}
872
873
hash_table_type
hash_table_
;
//< storage class for keys
874
value_store_type
value_store_
;
//< multi-value storage class
875
const
index_type
max_values_per_key_
;
//< maximum number of values to store per key
876
bool
is_copy_
;
//< indicates if this object is a shallow copy
877
878
template
<
class
Core
,
class
StatusHandler
>
879
GLOBALQUALIFIER
880
friend
void
kernels
::
retrieve
(
881
const
typename
Core
::
key_type
*
const
,
882
const
index_type
,
883
const
index_type
*
const
,
884
const
index_type
*
const
,
885
typename
Core
::
value_type
*
const
,
886
const
Core
,
887
const
index_type
,
888
typename
StatusHandler
::
base_type
*
const
);
889
890
};
// class BucketListHashTable
891
892
}
// namespace warpcore
893
894
#
endif
/* WARPCORE_BUCKET_LIST_HASH_TABLE_CUH */
include
warpcore
bucket_list_hash_table.cuh
Generated by
1.9.1