warpcore
0.0.1
Hashing at the Speed of Light on modern CUDA-accelerators
single_value_hash_table.cuh
Go to the documentation of this file.
1
#
ifndef
WARPCORE_SINGLE_VALUE_HASH_TABLE_CUH
2
#
define
WARPCORE_SINGLE_VALUE_HASH_TABLE_CUH
3
4
#
include
"base.cuh"
5
6
namespace
warpcore
7
{
8
9
// forward declaration of friends
10
template
<
11
class
Key,
12
class
Value,
13
Key EmptyKey,
14
Key TombstoneKey,
15
class
ProbingScheme,
16
class
TableStorage,
17
index_t TempMemoryBytes>
18
class
CountingHashTable;
19
20
/*! \brief single-value hash table
21
* \tparam Key key type (\c std::uint32_t or \c std::uint64_t)
22
* \tparam Value value type
23
* \tparam EmptyKey key which represents an empty slot
24
* \tparam TombstoneKey key which represents an erased slot
25
* \tparam ProbingScheme probing scheme from \c warpcore::probing_schemes
26
* \tparam TableStorage memory layout from \c warpcore::storage::key_value
27
* \tparam TempMemoryBytes size of temporary storage (typically a few kB)
28
*/
29
template
<
30
class
Key
,
31
class
Value
,
32
Key
EmptyKey
=
defaults
::
empty_key
<
Key
>(),
33
Key
TombstoneKey
=
defaults
::
tombstone_key
<
Key
>(),
34
class
ProbingScheme
=
defaults
::
probing_scheme_t
<
Key
, 8>,
35
class
TableStorage
=
defaults
::
table_storage_t
<
Key
,
Value
>,
36
index_t
TempMemoryBytes
=
defaults
::
temp_memory_bytes
()>
37
class
SingleValueHashTable
38
{
39
static_assert
(
40
checks
::
is_valid_key_type
<
Key
>(),
41
"invalid key type"
);
42
43
static_assert
(
44
EmptyKey
!=
TombstoneKey
,
45
"empty key and tombstone key must not be identical"
);
46
47
static_assert
(
48
checks
::
is_probing_scheme
<
ProbingScheme
>(),
49
"not a valid probing scheme type"
);
50
51
static_assert
(
52
std
::
is_same
<
typename
ProbingScheme
::
key_type
,
Key
>::
value
,
53
"probing key type differs from table's key type"
);
54
55
static_assert
(
56
checks
::
is_key_value_storage
<
TableStorage
>(),
57
"not a valid storage type"
);
58
59
static_assert
(
60
std
::
is_same
<
typename
TableStorage
::
key_type
,
Key
>::
value
,
61
"storage's key type differs from table's key type"
);
62
63
static_assert
(
64
std
::
is_same
<
typename
TableStorage
::
value_type
,
Value
>::
value
,
65
"storage's value type differs from table's value type"
);
66
67
static_assert
(
68
TempMemoryBytes
>=
sizeof
(
index_t
),
69
"temporary storage must at least be of size index_type"
);
70
71
using
temp_type
=
storage
::
CyclicStore
<
index_t
>;
72
73
public
:
74
using
key_type
=
Key
;
75
using
value_type
=
Value
;
76
using
index_type
=
index_t
;
77
using
status_type
=
Status
;
78
79
/*! \brief get empty key
80
* \return empty key
81
*/
82
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
83
static
constexpr
key_type
empty_key
()
noexcept
84
{
85
return
EmptyKey
;
86
}
87
88
/*! \brief get tombstone key
89
* \return tombstone key
90
*/
91
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
92
static
constexpr
key_type
tombstone_key
()
noexcept
93
{
94
return
TombstoneKey
;
95
}
96
97
/*! \brief get cooperative group size
98
* \return cooperative group size
99
*/
100
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
101
static
constexpr
index_type
cg_size
()
noexcept
102
{
103
return
ProbingScheme
::
cg_size
();
104
}
105
106
/*! \brief constructor
107
* \param[in] min_capacity minimum number of slots in the hash table
108
* \param[in] seed random seed
109
* \param[in] no_init whether to initialize the table at construction or not
110
*/
111
HOSTQUALIFIER
INLINEQUALIFIER
112
explicit
SingleValueHashTable
(
113
const
index_type
min_capacity
,
114
const
key_type
seed
=
defaults
::
seed
<
key_type
>(),
115
const
bool
no_init
=
false
)
noexcept
:
116
status_
(
nullptr
),
117
table_
(
detail
::
get_valid_capacity
(
min_capacity
,
cg_size
())),
118
temp_
(
TempMemoryBytes
/
sizeof
(
index_type
)),
119
seed_
(
seed
),
120
is_initialized_
(
false
),
121
is_copy_
(
false
)
122
{
123
cudaMalloc
(&
status_
,
sizeof
(
status_type
));
124
125
assign_status
(
table_
.
status
() +
temp_
.
status
());
126
127
if
(!
no_init
)
init
();
128
}
129
130
/*! \brief copy-constructor (shallow)
131
* \param[in] object to be copied
132
*/
133
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
134
SingleValueHashTable
(
const
SingleValueHashTable
&
o
)
noexcept
:
135
status_
(
o
.
status_
),
136
table_
(
o
.
table_
),
137
temp_
(
o
.
temp_
),
138
seed_
(
o
.
seed_
),
139
is_initialized_
(
o
.
is_initialized_
),
140
is_copy_
(
true
)
141
{}
142
143
/*! \brief move-constructor
144
* \param[in] object to be moved
145
*/
146
HOSTQUALIFIER
INLINEQUALIFIER
147
SingleValueHashTable
(
SingleValueHashTable
&&
o
)
noexcept
:
148
status_
(
std
::
move
(
o
.
status_
)),
149
table_
(
std
::
move
(
o
.
table_
)),
150
temp_
(
std
::
move
(
o
.
temp_
)),
151
seed_
(
std
::
move
(
o
.
seed_
)),
152
is_initialized_
(
std
::
move
(
o
.
is_initialized_
)),
153
is_copy_
(
std
::
move
(
o
.
is_copy_
))
154
{
155
o
.
is_copy_
=
true
;
156
}
157
158
#
ifndef
__CUDA_ARCH__
159
/*! \brief destructor
160
*/
161
HOSTQUALIFIER
INLINEQUALIFIER
162
~
SingleValueHashTable
()
noexcept
163
{
164
if
(!
is_copy_
)
165
{
166
if
(
status_
!=
nullptr
)
cudaFree
(
status_
);
167
}
168
}
169
#
endif
170
171
/*! \brief (re)initialize the hash table
172
* \param[in] seed random seed
173
* \param[in] stream CUDA stream in which this operation is executed in
174
*/
175
HOSTQUALIFIER
INLINEQUALIFIER
176
void
init
(
177
const
key_type
seed
,
178
const
cudaStream_t
stream
= 0)
noexcept
179
{
180
is_initialized_
=
false
;
181
182
seed_
=
seed
;
183
if
(!
table_
.
status
().
has_not_initialized
() &&
184
!
temp_
.
status
().
has_not_initialized
())
185
{
186
table_
.
init_keys
(
empty_key
(),
stream
);
187
188
assign_status
(
table_
.
status
() +
temp_
.
status
(),
stream
);
189
190
is_initialized_
=
true
;
191
}
192
}
193
194
/*! \brief (re)initialize the hash table
195
* \param[in] stream CUDA stream in which this operation is executed in
196
*/
197
HOSTQUALIFIER
INLINEQUALIFIER
198
void
init
(
const
cudaStream_t
stream
= 0)
noexcept
199
{
200
init
(
seed_
,
stream
);
201
}
202
203
/*! \brief inserts a key into the hash table
204
* \param[in] key_in key to insert into the hash table
205
* \param[in] value_in value that corresponds to \c key_in
206
* \param[in] group cooperative group
207
* \param[in] probing_length maximum number of probing attempts
208
* \return status (per thread)
209
*/
210
DEVICEQUALIFIER
INLINEQUALIFIER
211
status_type
insert
(
212
const
key_type
key_in
,
213
const
value_type
&
value_in
,
214
const
cg
::
thread_block_tile
<
cg_size
()>&
group
,
215
const
index_type
probing_length
=
defaults
::
probing_length
())
noexcept
216
{
217
status_type
status
=
status_type
::
unknown_error
();
218
219
value_type
*
value_ptr
=
220
insert_impl
(
key_in
,
status
,
group
,
probing_length
);
221
222
if
(
group
.
thread_rank
() == 0 &&
value_ptr
!=
nullptr
)
223
{
224
*
value_ptr
=
value_in
;
225
}
226
227
return
status
;
228
}
229
230
/*! \brief insert a set of keys into the hash table
231
* \tparam StatusHandler handles returned status per key (see \c status_handlers)
232
* \param[in] keys_in pointer to keys to insert into the hash table
233
* \param[in] values_in corresponds values to \c keys_in
234
* \param[in] num_in number of keys to insert
235
* \param[in] stream CUDA stream in which this operation is executed in
236
* \param[in] probing_length maximum number of probing attempts
237
* \param[out] status_out status information per key
238
*/
239
template
<
class
StatusHandler
=
defaults
::
status_handler_t
>
240
HOSTQUALIFIER
INLINEQUALIFIER
241
void
insert
(
242
const
key_type
*
const
keys_in
,
243
const
value_type
*
const
values_in
,
244
const
index_type
num_in
,
245
const
cudaStream_t
stream
= 0,
246
const
index_type
probing_length
=
defaults
::
probing_length
(),
247
typename
StatusHandler
::
base_type
*
const
status_out
=
nullptr
)
noexcept
248
{
249
static_assert
(
250
checks
::
is_status_handler
<
StatusHandler
>(),
251
"not a valid status handler type"
);
252
253
if
(!
is_initialized_
)
return
;
254
255
kernels
::
insert
<
SingleValueHashTable
,
StatusHandler
>
256
<<<
SDIV
(
num_in
*
cg_size
(),
WARPCORE_BLOCKSIZE
),
WARPCORE_BLOCKSIZE
, 0,
stream
>>>
257
(
keys_in
,
values_in
,
num_in
, *
this
,
probing_length
,
status_out
);
258
}
259
260
/*! \brief retrieves a key from the hash table
261
* \param[in] key_in key to retrieve from the hash table
262
* \param[out] value_out value for \c key_in
263
* \param[in] group cooperative group
264
* \param[in] probing_length maximum number of probing attempts
265
* \return status (per thread)
266
*/
267
DEVICEQUALIFIER
INLINEQUALIFIER
268
status_type
retrieve
(
269
const
key_type
key_in
,
270
value_type
&
value_out
,
271
const
cg
::
thread_block_tile
<
cg_size
()>&
group
,
272
const
index_type
probing_length
=
defaults
::
probing_length
())
const
noexcept
273
{
274
if
(!
is_initialized_
)
return
status_type
::
not_initialized
();
275
276
if
(!
is_valid_key
(
key_in
))
277
{
278
status_
->
atomic_join
(
status_type
::
invalid_key
());
279
return
status_type
::
invalid_key
();
280
}
281
282
ProbingScheme
iter
(
capacity
(),
probing_length
,
group
);
283
284
for
(
index_type
i
=
iter
.
begin
(
key_in
,
seed_
);
i
!=
iter
.
end
();
i
=
iter
.
next
())
285
{
286
key_type
table_key
=
table_
[
i
].
key
;
287
const
bool
hit
= (
table_key
==
key_in
);
288
const
auto
hit_mask
=
group
.
ballot
(
hit
);
289
290
if
(
hit_mask
)
291
{
292
const
auto
leader
=
ffs
(
hit_mask
) - 1;
293
value_out
=
table_
[
group
.
shfl
(
i
,
leader
)].
value
;
294
295
return
status_type
::
none
();
296
}
297
298
if
(
group
.
any
(
is_empty_key
(
table_key
)))
299
{
300
status_
->
atomic_join
(
status_type
::
key_not_found
());
301
return
status_type
::
key_not_found
();
302
}
303
}
304
305
status_
->
atomic_join
(
status_type
::
probing_length_exceeded
());
306
return
status_type
::
probing_length_exceeded
();
307
}
308
309
/*! \brief retrieve a set of keys from the hash table
310
* \tparam StatusHandler handles returned status per key (see \c status_handlers)
311
* \param[in] keys_in pointer to keys to retrieve from the hash table
312
* \param[in] num_in number of keys to retrieve
313
* \param[out] values_out retrieved values of keys in \c key_in
314
* \param[in] stream CUDA stream in which this operation is executed in
315
* \param[in] probing_length maximum number of probing attempts
316
* \param[out] status_out status information (per key)
317
*/
318
template
<
class
StatusHandler
=
defaults
::
status_handler_t
>
319
HOSTQUALIFIER
INLINEQUALIFIER
320
void
retrieve
(
321
const
key_type
*
const
keys_in
,
322
const
index_type
num_in
,
323
value_type
*
const
values_out
,
324
const
cudaStream_t
stream
= 0,
325
const
index_type
probing_length
=
defaults
::
probing_length
(),
326
typename
StatusHandler
::
base_type
*
const
status_out
=
nullptr
)
const
noexcept
327
{
328
static_assert
(
329
checks
::
is_status_handler
<
StatusHandler
>(),
330
"not a valid status handler type"
);
331
332
if
(!
is_initialized_
)
return
;
333
334
kernels
::
retrieve
<
SingleValueHashTable
,
StatusHandler
>
335
<<<
SDIV
(
num_in
*
cg_size
(),
WARPCORE_BLOCKSIZE
),
WARPCORE_BLOCKSIZE
, 0,
stream
>>>
336
(
keys_in
,
num_in
,
values_out
, *
this
,
probing_length
,
status_out
);
337
}
338
339
/*! \brief retrieve a set of keys from the hash table
340
* \tparam StatusHandler handles returned status per key (see \c status_handlers)
341
* \param[in] keys_in pointer to keys to retrieve from the hash table
342
* \param[in] num_in number of keys to retrieve
343
* \param[out] keys_out keys retrieved from the hash table
344
* \param[out] values_out retrieved values
345
* \param[out] num_out number of pairs retrieved
346
* \param[in] stream CUDA stream in which this operation is executed in
347
* \param[in] probing_length maximum number of probing attempts
348
* \param[out] status_out status information (per key)
349
*/
350
template
<
class
StatusHandler
=
defaults
::
status_handler_t
>
351
HOSTQUALIFIER
INLINEQUALIFIER
352
void
retrieve
(
353
const
key_type
*
const
keys_in
,
354
const
index_type
num_in
,
355
key_type
*
const
keys_out
,
356
value_type
*
const
values_out
,
357
index_type
&
num_out
,
358
const
cudaStream_t
stream
= 0,
359
const
index_type
probing_length
=
defaults
::
probing_length
(),
360
typename
StatusHandler
::
base_type
*
const
status_out
=
nullptr
)
const
noexcept
361
{
362
static_assert
(
363
checks
::
is_status_handler
<
StatusHandler
>(),
364
"not a valid status handler type"
);
365
366
if
(!
is_initialized_
)
return
;
367
368
index_type
*
tmp
=
temp_
.
get
();
369
370
cudaMemsetAsync
(
tmp
, 0,
sizeof
(
index_type
),
stream
);
371
372
kernels
::
retrieve
<
SingleValueHashTable
,
StatusHandler
>
373
<<<
SDIV
(
num_in
*
cg_size
(),
WARPCORE_BLOCKSIZE
),
WARPCORE_BLOCKSIZE
, 0,
stream
>>>
374
(
keys_in
,
num_in
,
keys_out
,
values_out
,
tmp
, *
this
,
probing_length
,
status_out
);
375
376
cudaMemcpyAsync
(&
num_out
,
tmp
,
sizeof
(
index_type
),
D2H
);
377
378
if
(
stream
== 0)
379
{
380
cudaStreamSynchronize
(
stream
);
381
}
382
}
383
384
/*! \brief retrieves all elements from the hash table
385
* \param[out] keys_out location to store retrieved keys
386
* \param[out] values_out location to store corresponding retrieved values
387
* \param[out] num_out number of of key/value pairs retrieved
388
* \param[in] stream CUDA stream in which this operation is executed in
389
*/
390
HOSTQUALIFIER
INLINEQUALIFIER
391
void
retrieve_all
(
392
key_type
*
const
keys_out
,
393
value_type
*
const
values_out
,
394
index_type
&
num_out
,
395
const
cudaStream_t
stream
= 0)
const
noexcept
396
{
397
if
(!
is_initialized_
)
return
;
398
399
index_type
*
tmp
=
temp_
.
get
();
400
401
cudaMemsetAsync
(
tmp
, 0,
sizeof
(
index_t
),
stream
);
402
403
for_each
([=, *
this
]
DEVICEQUALIFIER
404
(
key_type
key
,
const
value_type
&
value
)
405
{
406
const
auto
i
=
helpers
::
atomicAggInc
(
tmp
);
407
keys_out
[
i
] =
key
;
408
values_out
[
i
] =
value
;
409
},
stream
);
410
411
cudaMemcpyAsync
(&
num_out
,
tmp
,
sizeof
(
index_type
),
D2H
);
412
413
if
(
stream
== 0)
414
{
415
cudaStreamSynchronize
(
stream
);
416
}
417
}
418
419
/*! \brief erases a key from the hash table
420
* \param[in] key_in key to erase from the hash table
421
* \param[in] group cooperative group
422
* \param[in] probing_length maximum number of probing attempts
423
* \return status (per thread)
424
*/
425
DEVICEQUALIFIER
INLINEQUALIFIER
426
status_type
erase
(
427
const
key_type
key_in
,
428
const
cg
::
thread_block_tile
<
cg_size
()>&
group
,
429
const
index_type
probing_length
=
defaults
::
probing_length
())
noexcept
430
{
431
if
(!
is_initialized_
)
return
status_type
::
not_initialized
();
432
433
if
(!
is_valid_key
(
key_in
))
434
{
435
status_
->
atomic_join
(
status_type
::
invalid_key
());
436
return
status_type
::
invalid_key
();
437
}
438
439
ProbingScheme
iter
(
capacity
(),
probing_length
,
group
);
440
441
for
(
index_type
i
=
iter
.
begin
(
key_in
,
seed_
);
i
!=
iter
.
end
();
i
=
iter
.
next
())
442
{
443
Key
table_key
=
table_
[
i
].
key
;
444
const
bool
hit
= (
table_key
==
key_in
);
445
const
auto
hit_mask
=
group
.
ballot
(
hit
);
446
447
if
(
hit_mask
)
448
{
449
const
auto
leader
=
ffs
(
hit_mask
) - 1;
450
451
if
(
group
.
thread_rank
() ==
leader
)
452
{
453
table_
[
i
].
key
=
tombstone_key
();
454
}
455
456
return
status_type
::
none
();
457
}
458
459
if
(
group
.
any
(
is_empty_key
(
table_key
)))
460
{
461
//return status_type::none();
462
return
status_type
::
key_not_found
();
463
}
464
}
465
466
//return status_type::none();
467
return
status_type
::
probing_length_exceeded
();
468
}
469
470
/*! \brief erases a set of keys from the hash table
471
* \tparam StatusHandler handles returned per key (see \c status_handlers )
472
* \param[in] keys_in pointer to keys to erase from the hash table
473
* \param[in] num_in number of keys to erase
474
* \param[in] probing_length maximum number of probing attempts
475
* \param[in] stream CUDA stream in which this operation is executed in
476
* \param[out] status_out status information (per key)
477
*/
478
template
<
class
StatusHandler
=
defaults
::
status_handler_t
>
479
HOSTQUALIFIER
INLINEQUALIFIER
480
void
erase
(
481
const
key_type
*
const
keys_in
,
482
const
index_type
num_in
,
483
const
cudaStream_t
stream
= 0,
484
const
index_type
probing_length
=
defaults
::
probing_length
(),
485
typename
StatusHandler
::
base_type
*
const
status_out
=
nullptr
)
noexcept
486
{
487
static_assert
(
488
checks
::
is_status_handler
<
StatusHandler
>(),
489
"not a valid status handler type"
);
490
491
if
(!
is_initialized_
)
return
;
492
493
kernels
::
erase
<
SingleValueHashTable
,
StatusHandler
>
494
<<<
SDIV
(
num_in
*
cg_size
(),
WARPCORE_BLOCKSIZE
),
WARPCORE_BLOCKSIZE
, 0,
stream
>>>
495
(
keys_in
,
num_in
, *
this
,
probing_length
,
status_out
);
496
}
497
498
/*! \brief applies a funtion over all key value pairs inside the table
499
* \tparam Func type of map i.e. CUDA device lambda
500
* \param[in] f map to apply
501
* \param[in] stream CUDA stream in which this operation is executed in
502
* \param[in] size of shared memory to reserve for this execution
503
*/
504
template
<
class
Func
>
505
HOSTQUALIFIER
INLINEQUALIFIER
506
void
for_each
(
507
Func
f
,
// TODO const?
508
const
cudaStream_t
stream
= 0,
509
const
index_type
smem_bytes
= 0)
const
noexcept
510
{
511
if
(!
is_initialized_
)
return
;
512
513
helpers
::
lambda_kernel
514
<<<
SDIV
(
capacity
(),
WARPCORE_BLOCKSIZE
),
WARPCORE_BLOCKSIZE
,
smem_bytes
,
stream
>>>
515
([=, *
this
]
DEVICEQUALIFIER
516
{
517
const
index_type
tid
=
helpers
::
global_thread_id
();
518
519
if
(
tid
<
capacity
())
520
{
521
auto
&&
pair
=
table_
[
tid
];
522
if
(
is_valid_key
(
pair
.
key
))
523
{
524
f
(
pair
.
key
,
pair
.
value
);
525
}
526
}
527
});
528
}
529
530
/*! \brief number of key/value pairs stored inside the hash table
531
* \param[in] stream CUDA stream in which this operation is executed in
532
* \return the number of key/value pairs inside the hash table
533
*/
534
HOSTQUALIFIER
INLINEQUALIFIER
535
index_type
size
(
const
cudaStream_t
stream
= 0)
const
noexcept
536
{
537
if
(!
is_initialized_
)
return
0;
538
539
index_type
out
;
540
index_type
*
tmp
=
temp_
.
get
();
541
542
cudaMemsetAsync
(
tmp
, 0,
sizeof
(
index_t
),
stream
);
543
544
helpers
::
lambda_kernel
545
<<<
SDIV
(
capacity
(),
WARPCORE_BLOCKSIZE
),
WARPCORE_BLOCKSIZE
,
sizeof
(
index_type
),
stream
>>>
546
([=, *
this
]
DEVICEQUALIFIER
547
{
548
__shared__
index_type
smem
;
549
550
const
index_type
tid
=
helpers
::
global_thread_id
();
551
const
auto
block
=
cg
::
this_thread_block
();
552
553
if
(
tid
>=
capacity
())
return
;
554
555
const
bool
empty
= !
is_valid_key
(
table_
[
tid
].
key
);
556
557
if
(
block
.
thread_rank
() == 0)
558
{
559
smem
= 0;
560
}
561
562
block
.
sync
();
563
564
if
(!
empty
)
565
{
566
const
auto
active_threads
=
cg
::
coalesced_threads
();
567
568
if
(
active_threads
.
thread_rank
() == 0)
569
{
570
atomicAdd
(&
smem
,
active_threads
.
size
());
571
}
572
}
573
574
block
.
sync
();
575
576
if
(
block
.
thread_rank
() == 0 &&
smem
!= 0)
577
{
578
atomicAdd
(
tmp
,
smem
);
579
}
580
});
581
582
cudaMemcpyAsync
(
583
&
out
,
584
tmp
,
585
sizeof
(
index_type
),
586
D2H
,
587
stream
);
588
589
cudaStreamSynchronize
(
stream
);
590
591
return
out
;
592
}
593
594
/*! \brief current load factor of the hash table
595
* \param stream CUDA stream in which this operation is executed in
596
* \return load factor
597
*/
598
HOSTQUALIFIER
INLINEQUALIFIER
599
float
load_factor
(
const
cudaStream_t
stream
= 0)
const
noexcept
600
{
601
return
float
(
size
(
stream
)) /
float
(
capacity
());
602
}
603
604
/*! \brief current storage density of the hash table
605
* \param stream CUDA stream in which this operation is executed in
606
* \return storage density
607
*/
608
HOSTQUALIFIER
INLINEQUALIFIER
609
float
storage_density
(
const
cudaStream_t
stream
= 0)
const
noexcept
610
{
611
return
load_factor
(
stream
);
612
}
613
614
/*! \brief get the capacity of the hash table
615
* \return number of slots in the hash table
616
*/
617
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
618
index_type
capacity
()
const
noexcept
619
{
620
return
table_
.
capacity
();
621
}
622
623
/*! \brief get the total number of bytes occupied by this data structure
624
* \return bytes
625
*/
626
HOSTQUALIFIER
INLINEQUALIFIER
627
index_type
bytes_total
()
const
noexcept
628
{
629
return
table_
.
bytes_total
() +
temp_
.
bytes_total
() +
sizeof
(
status_type
);
630
}
631
632
/*! \brief indicates if the hash table is properly initialized
633
* \return \c true iff the hash table is properly initialized
634
*/
635
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
636
bool
is_initialized
()
const
noexcept
637
{
638
return
is_initialized_
;
639
}
640
641
/*! \brief get the status of the hash table
642
* \param stream CUDA stream in which this operation is executed in
643
* \return the status
644
*/
645
HOSTQUALIFIER
INLINEQUALIFIER
646
status_type
peek_status
(
const
cudaStream_t
stream
= 0)
const
noexcept
647
{
648
status_type
status
=
status_type
::
not_initialized
();
649
650
if
(
status_
!=
nullptr
)
651
{
652
cudaMemcpyAsync
(
653
&
status
,
654
status_
,
655
sizeof
(
status_type
),
656
D2H
,
657
stream
);
658
659
cudaStreamSynchronize
(
stream
);
660
}
661
662
return
status
;
663
}
664
665
/*! \brief get and reset the status of the hash table
666
* \param[in] stream CUDA stream in which this operation is executed in
667
* \return the status
668
*/
669
HOSTQUALIFIER
INLINEQUALIFIER
670
status_type
pop_status
(
const
cudaStream_t
stream
= 0)
noexcept
671
{
672
status_type
status
=
status_type
::
not_initialized
();
673
674
if
(
status_
!=
nullptr
)
675
{
676
cudaMemcpyAsync
(
677
&
status
,
678
status_
,
679
sizeof
(
status_type
),
680
D2H
,
681
stream
);
682
683
assign_status
(
table_
.
status
(),
stream
);
684
}
685
686
return
status
;
687
}
688
689
/*! \brief checks if \c key is equal to \c EmptyKey
690
* \return \c bool
691
*/
692
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
693
static
constexpr
bool
is_empty_key
(
const
key_type
key
)
noexcept
694
{
695
return
(
key
==
empty_key
());
696
}
697
698
/*! \brief checks if \c key is equal to \c TombstoneKey
699
* \return \c bool
700
*/
701
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
702
static
constexpr
bool
is_tombstone_key
(
const
key_type
key
)
noexcept
703
{
704
return
(
key
==
tombstone_key
());
705
}
706
707
/*! \brief checks if \c key is equal to \c (EmptyKey||TombstoneKey)
708
* \return \c bool
709
*/
710
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
711
static
constexpr
bool
is_valid_key
(
const
key_type
key
)
noexcept
712
{
713
return
(
key
!=
empty_key
() &&
key
!=
tombstone_key
());
714
}
715
716
/*! \brief get random seed
717
* \return seed
718
*/
719
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
720
key_type
seed
()
const
noexcept
721
{
722
return
seed_
;
723
}
724
725
/*! \brief indicates if this object is a shallow copy
726
* \return \c bool
727
*/
728
HOSTDEVICEQUALIFIER
INLINEQUALIFIER
729
bool
is_copy
()
const
noexcept
730
{
731
return
is_copy_
;
732
}
733
734
private
:
735
/*! \brief internal insert implementation
736
* \param[in] key_in key to insert into the hash table
737
* \param[out] status_out status returned by this operation
738
* \param[in] group cooperative group
739
* \param[in] probing_length maximum number of probing attempts
740
* \return pointer to the corresponding value slot
741
*/
742
DEVICEQUALIFIER
INLINEQUALIFIER
743
value_type
*
insert_impl
(
744
const
key_type
key_in
,
745
status_type
&
status_out
,
746
const
cg
::
thread_block_tile
<
cg_size
()>&
group
,
747
const
index_type
probing_length
)
noexcept
748
{
749
if
(!
is_initialized_
)
750
{
751
status_out
=
status_type
::
not_initialized
();
752
return
nullptr
;
753
}
754
755
if
(!
is_valid_key
(
key_in
))
756
{
757
status_
->
atomic_join
(
status_type
::
invalid_key
());
758
status_out
=
status_type
::
invalid_key
();
759
return
nullptr
;
760
}
761
762
ProbingScheme
iter
(
capacity
(),
probing_length
,
group
);
763
764
for
(
index_type
i
=
iter
.
begin
(
key_in
,
seed_
);
i
!=
iter
.
end
();
i
=
iter
.
next
())
765
{
766
key_type
table_key
=
table_
[
i
].
key
;
767
const
bool
hit
= (
table_key
==
key_in
);
768
const
auto
hit_mask
=
group
.
ballot
(
hit
);
769
770
if
(
hit_mask
)
771
{
772
status_
->
atomic_join
(
status_type
::
duplicate_key
());
773
status_out
=
status_type
::
duplicate_key
();
774
775
const
auto
leader
=
ffs
(
hit_mask
) - 1;
776
const
auto
leader_index
=
group
.
shfl
(
i
,
leader
);
777
return
&(
table_
[
leader_index
].
value
);
778
}
779
780
// !not_is_valid_key?
781
auto
empty_mask
=
group
.
ballot
(
is_empty_key
(
table_key
));
782
783
bool
success
=
false
;
784
bool
duplicate
=
false
;
785
786
while
(
empty_mask
)
787
{
788
const
auto
leader
=
ffs
(
empty_mask
) - 1;
789
790
if
(
group
.
thread_rank
() ==
leader
)
791
{
792
const
auto
old
=
793
atomicCAS
(&(
table_
[
i
].
key
),
table_key
,
key_in
);
794
795
success
= (
old
==
table_key
);
796
duplicate
= (
old
==
key_in
);
797
}
798
799
if
(
group
.
any
(
duplicate
))
800
{
801
status_
->
atomic_join
(
status_type
::
duplicate_key
());
802
status_out
=
status_type
::
duplicate_key
();
803
804
const
auto
leader_index
=
group
.
shfl
(
i
,
leader
);
805
return
&(
table_
[
leader_index
].
value
);
806
}
807
808
if
(
group
.
any
(
success
))
809
{
810
status_out
=
status_type
::
none
();
811
const
auto
leader_index
=
group
.
shfl
(
i
,
leader
);
812
813
return
&(
table_
[
leader_index
].
value
);
814
}
815
816
empty_mask
^= 1UL <<
leader
;
817
}
818
}
819
820
status_
->
atomic_join
(
status_type
::
probing_length_exceeded
());
821
status_out
=
status_type
::
probing_length_exceeded
();
822
return
nullptr
;
823
}
824
825
/*! \brief assigns the hash table's status
826
* \param[in] status new status
827
* \param[in] stream CUDA stream in which this operation is executed in
828
*/
829
HOSTQUALIFIER
INLINEQUALIFIER
830
void
assign_status
(
831
const
status_type
status
,
832
const
cudaStream_t
stream
= 0)
const
noexcept
833
{
834
if
(
status_
!=
nullptr
)
835
{
836
cudaMemcpyAsync
(
837
status_
,
838
&
status
,
839
sizeof
(
status_type
),
840
H2D
,
841
stream
);
842
843
cudaStreamSynchronize
(
stream
);
844
}
845
}
846
847
/*! \brief joins additional flags to the hash table's status
848
* \param[in] status new status
849
* \param[in] stream CUDA stream in which this operation is executed in
850
*/
851
HOSTQUALIFIER
INLINEQUALIFIER
852
void
join_status
(
853
const
status_type
status
,
854
const
cudaStream_t
stream
= 0)
const
noexcept
855
{
856
if
(
status_
!=
nullptr
)
857
{
858
const
status_type
joined
=
peek_status
(
stream
) +
status
;
859
860
cudaMemcpyAsync
(
861
status_
,
862
&
joined
,
863
sizeof
(
status_type
),
864
H2D
,
865
stream
);
866
867
cudaStreamSynchronize
(
stream
);
868
}
869
}
870
871
/*! \brief joins additional flags to the hash table's status
872
* \info \c const on purpose
873
* \param[in] status new status
874
*/
875
DEVICEQUALIFIER
INLINEQUALIFIER
876
void
device_join_status
(
const
status_type
status
)
const
noexcept
877
{
878
if
(
status_
!=
nullptr
)
879
{
880
status_
->
atomic_join
(
status
);
881
}
882
}
883
884
status_type
*
status_
;
//< pointer to status
885
TableStorage
table_
;
//< actual key/value storage
886
temp_type
temp_
;
//< temporary memory
887
key_type
seed_
;
//< random seed
888
bool
is_initialized_
;
//< indicates if table is properly initialized
889
bool
is_copy_
;
//< indicates if table is a shallow copy
890
891
// friend declarations
892
template
<
893
class
Key_
,
894
class
Value_
,
895
Key_
EmptyKey_
,
896
Key_
TombstoneKey_
,
897
class
ProbingScheme_
,
898
class
TableStorage_
,
899
index_type
TempMemoryBytes_
>
900
friend
class
CountingHashTable
;
901
902
template
<
903
class
Key_
,
904
class
Value_
,
905
Key_
EmptyKey_
,
906
Key_
TombstoneKey_
,
907
class
ValueStore_
,
908
class
ProbingScheme_
>
909
friend
class
BucketListHashTable
;
910
911
912
};
// class SingleValueHashTable
913
914
}
// namespace warpcore
915
916
#
endif
/* WARPCORE_SINGLE_VALUE_HASH_TABLE_CUH */
include
warpcore
single_value_hash_table.cuh
Generated by
1.9.1