1 #ifndef WARPCORE_STORAGE_CUH
2 #define WARPCORE_STORAGE_CUH
26 HOSTQUALIFIER INLINEQUALIFIER
143 const index_type capacity_;
144 index_type * current_;
156 template<
class Key,
class Value>
159 template<
class Key,
class Value>
165 template<
class Key,
class Value>
185 friend AoSStore<Key, Value>;
186 friend SoAStore<Key, Value>;
189 template<
class Key,
class Value>
209 template<
class Key,
class Value>
218 template<
class Key,
class Value>
222 using key_type = Key;
223 using value_type = Value;
224 using status_type = Status;
225 using index_type = index_t;
226 using tag = tags::key_value_storage;
231 HOSTQUALIFIER INLINEQUALIFIER
232 explicit SoAStore(
const index_type capacity)
noexcept :
233 status_(Status::not_initialized()),
241 const auto total_bytes = (((
sizeof(key_type) +
sizeof(value_type)) *
242 capacity) +
sizeof(status_type));
244 if(helpers::available_gpu_memory() >= total_bytes)
246 cudaMalloc(&keys_,
sizeof(key_type)*capacity);
247 cudaMalloc(&values_,
sizeof(value_type)*capacity);
249 status_ = status_type::none();
253 status_ += status_type::out_of_memory();
258 status_ += status_type::invalid_configuration();
265 HOSTDEVICEQUALIFIER INLINEQUALIFIER
266 SoAStore(
const SoAStore& o)
noexcept :
268 capacity_(o.capacity_),
277 HOSTQUALIFIER INLINEQUALIFIER
278 SoAStore(SoAStore&& o)
noexcept :
279 status_(std::move(o.status_)),
280 capacity_(std::move(o.capacity_)),
281 keys_(std::move(o.keys_)),
282 values_(std::move(o.values_)),
283 is_copy_(std::move(o.is_copy_))
288 #ifndef __CUDA_ARCH__
291 HOSTQUALIFIER INLINEQUALIFIER
296 if(keys_ !=
nullptr) cudaFree(keys_);
297 if(values_ !=
nullptr) cudaFree(values_);
306 HOSTQUALIFIER INLINEQUALIFIER
307 void init_keys(
const key_type key,
const cudaStream_t stream = 0)
noexcept
309 if(!status_.has_any())
311 helpers::lambda_kernel
312 <<<SDIV(capacity_, WARPCORE_BLOCKSIZE), WARPCORE_BLOCKSIZE, 0, stream>>>
313 ([=, *
this] DEVICEQUALIFIER
315 const index_type tid = helpers::global_thread_id();
329 HOSTQUALIFIER INLINEQUALIFIER
330 void init_values(
const value_type value,
const cudaStream_t stream = 0)
noexcept
332 if(!status_.has_any())
334 helpers::lambda_kernel
335 <<<SDIV(capacity_, WARPCORE_BLOCKSIZE), WARPCORE_BLOCKSIZE, 0, stream>>>
336 ([=, *
this] DEVICEQUALIFIER
338 const index_type tid = helpers::global_thread_id();
342 values_[tid] = value;
353 HOSTQUALIFIER INLINEQUALIFIER
356 const value_type value,
357 const cudaStream_t stream = 0)
noexcept
359 if(!status_.has_any())
361 helpers::lambda_kernel
362 <<<SDIV(capacity_, WARPCORE_BLOCKSIZE), WARPCORE_BLOCKSIZE, 0, stream>>>
363 ([=, *
this] DEVICEQUALIFIER
365 const index_type tid = helpers::global_thread_id();
370 values_[tid] = value;
380 DEVICEQUALIFIER INLINEQUALIFIER
381 detail::pair_ref_t<key_type, value_type> operator[](index_type i)
noexcept
383 assert(i < capacity_);
384 return detail::pair_ref_t<key_type, value_type>{keys_[i], values_[i]};
391 DEVICEQUALIFIER INLINEQUALIFIER
392 detail::pair_const_ref_t<key_type, value_type> operator[](
393 const index_type i)
const noexcept
395 return detail::pair_const_ref_t<key_type, value_type>{keys_[i], values_[i]};
401 HOSTDEVICEQUALIFIER INLINEQUALIFIER
402 status_type status()
const noexcept
410 HOSTDEVICEQUALIFIER INLINEQUALIFIER
411 index_type capacity()
const noexcept
419 HOSTQUALIFIER INLINEQUALIFIER
420 index_type bytes_total()
const noexcept
422 return capacity_ * (
sizeof(key_type) +
sizeof(value_type));
427 const index_type capacity_;
429 value_type * values_;
438 template<
class Key,
class Value>
444 using key_type = Key;
445 using value_type = Value;
446 using status_type = Status;
447 using index_type = index_t;
448 using tag = tags::key_value_storage;
453 HOSTQUALIFIER INLINEQUALIFIER
454 explicit AoSStore(
const index_type capacity)
noexcept :
455 status_(status_type::not_initialized()),
462 const auto total_bytes =
sizeof(pair_t) * capacity;
464 if(helpers::available_gpu_memory() >= total_bytes)
466 cudaMalloc(&pairs_,
sizeof(pair_t) * capacity);
468 status_ = status_type::none();
472 status_ += status_type::out_of_memory();
477 status_ += status_type::invalid_configuration();
484 HOSTDEVICEQUALIFIER INLINEQUALIFIER
485 AoSStore(
const AoSStore& o)
noexcept :
487 capacity_(o.capacity_),
495 HOSTQUALIFIER INLINEQUALIFIER
496 AoSStore(AoSStore&& o)
noexcept :
497 status_(std::move(o.status_)),
498 capacity_(std::move(o.capacity_)),
499 pairs_(std::move(o.pairs_)),
500 is_copy_(std::move(o.is_copy_))
505 #ifndef __CUDA_ARCH__
508 HOSTQUALIFIER INLINEQUALIFIER
513 if(pairs_ !=
nullptr) cudaFree(pairs_);
522 HOSTQUALIFIER INLINEQUALIFIER
523 void init_keys(
const key_type key,
const cudaStream_t stream = 0)
noexcept
525 if(!status_.has_any())
527 helpers::lambda_kernel
528 <<<SDIV(capacity_, WARPCORE_BLOCKSIZE), WARPCORE_BLOCKSIZE, 0, stream>>>
529 ([=, *
this] DEVICEQUALIFIER
531 const index_type tid = helpers::global_thread_id();
535 pairs_[tid].key = key;
545 HOSTQUALIFIER INLINEQUALIFIER
546 void init_values(
const value_type value,
const cudaStream_t stream = 0)
noexcept
548 if(!status_.has_any())
550 helpers::lambda_kernel
551 <<<SDIV(capacity_, WARPCORE_BLOCKSIZE), WARPCORE_BLOCKSIZE, 0, stream>>>
552 ([=, *
this] DEVICEQUALIFIER
554 const index_type tid = helpers::global_thread_id();
558 pairs_[tid].value = value;
569 HOSTQUALIFIER INLINEQUALIFIER
572 const value_type value,
573 const cudaStream_t stream = 0)
noexcept
575 if(!status_.has_any())
577 helpers::lambda_kernel
578 <<<SDIV(capacity_, WARPCORE_BLOCKSIZE), WARPCORE_BLOCKSIZE, 0, stream>>>
579 ([=, *
this] DEVICEQUALIFIER
581 const index_type tid = helpers::global_thread_id();
585 pairs_[tid].key = key;
586 pairs_[tid].value = value;
596 DEVICEQUALIFIER INLINEQUALIFIER
597 pair_t& operator[](
const index_type i)
noexcept
606 DEVICEQUALIFIER INLINEQUALIFIER
607 const pair_t& operator[](
const index_type i)
const noexcept
615 HOSTDEVICEQUALIFIER INLINEQUALIFIER
616 status_type status()
const noexcept
624 HOSTDEVICEQUALIFIER INLINEQUALIFIER
625 index_type capacity()
const noexcept
633 HOSTQUALIFIER INLINEQUALIFIER
634 index_type bytes_total()
const noexcept
636 return capacity_ *
sizeof(pair_t);
641 const index_type capacity_;
665 template<
class Store>
669 using value_type =
typename Store::value_type;
676 DEVICEQUALIFIER INLINEQUALIFIER
677 constexpr explicit Bucket(
682 constexpr explicit Bucket(
742 template<
class Store>
751 HOSTDEVICEQUALIFIER INLINEQUALIFIER
841 constexpr bool is_full()
const noexcept
857 return !(*
this ==
other);
893 index_t BucketIndexBits = 32,
894 index_t ValueCounterBits = 20,
895 index_t BucketSizeBits = 10>
901 "Value type must be trivially copyable.");
904 (BucketIndexBits + ValueCounterBits + BucketSizeBits + 2 <= 64),
905 "Too many bits for bucket index and value counter and bucket size.");
916 using value_type = Value;
926 HOSTDEVICEQUALIFIER INLINEQUALIFIER
1063 #ifndef __CUDA_ARCH__
1292 template<
class Func>
1446 const index_type capacity_;
1447 const float bucket_grow_factor_;
1448 const index_type min_bucket_size_;
1449 const index_type max_bucket_size_;
1450 bucket_type * buckets_;
1451 index_type * next_free_bucket_;