warpcore 0.0.1
Hashing at the Speed of Light on modern CUDA-accelerators
multi_bucket_hash_table.cuh
Go to the documentation of this file.
1 #ifndef WARPCORE_MULTI_BUCKET_HASH_TABLE_CUH
2 #define WARPCORE_MULTI_BUCKET_HASH_TABLE_CUH
3 
4 #include "hash_set.cuh"
5 
6 namespace warpcore
7 {
8 
9 template<
10  class Value,
11  std::uint32_t BucketSize = 1>
12 struct ArrayBucket {
13  using value_type = Value;
14  using index_type = std::uint32_t;
15 
16  static_assert(
17  BucketSize > 0,
18  "invalid bucket size of 0");
19 
20  HOSTDEVICEQUALIFIER INLINEQUALIFIER
21  explicit ArrayBucket(value_type value) noexcept
22  {
23  #ifdef __CUDA_ARCH__
24  #pragma unroll
25  #endif
26  for(index_type i = 0; i < bucket_size(); ++i)
27  values_[i] = value;
28  }
29 
31  ArrayBucket(const ArrayBucket& other) noexcept
32  {
33  #ifdef __CUDA_ARCH__
34  #pragma unroll
35  #endif
36  for(index_type i = 0; i < bucket_size(); ++i)
37  values_[i] = other.values_[i];
38  }
39 
41  ArrayBucket& operator =(const ArrayBucket& other) noexcept
42  {
43  #ifdef __CUDA_ARCH__
44  #pragma unroll
45  #endif
46  for(index_type i = 0; i < bucket_size(); ++i)
47  values_[i] = other.values_[i];
48  return *this;
49  }
50 
51  /*! \brief get bucket size
52  * \return bucket size
53  */
55  static constexpr index_type bucket_size() noexcept
56  {
57  return BucketSize;
58  }
59 
60  /*! \brief accessor
61  * \param[in] i index to access
62  * \return value at position \c i
63  */
65  constexpr value_type& operator[](const index_type i) noexcept
66  {
67  return values_[i];
68  }
69 
70  /*! \brief const accessor
71  * \param[in] i index to access
72  * \return value at position \c i
73  */
75  constexpr const value_type& operator[](const index_type i) const noexcept
76  {
77  return values_[i];
78  }
79 
81 };
82 
83 
84 /*! \brief multi-value hash table
85  * \tparam Key key type ( \c std::uint32_t or \c std::uint64_t )
86  * \tparam Value value type
87  * \tparam EmptyKey key which represents an empty slot
88  * \tparam TombstoneKey key which represents an erased slot
89  * \tparam ProbingScheme probing scheme from \c warpcore::probing_schemes
90  * \tparam TableStorage memory layout from \c warpcore::storage::key_value
91  * \tparam TempMemoryBytes size of temporary storage (typically a few kB)
92  */
93 template<
94  class Key,
95  class Value,
103 {
104  static_assert(
106  "invalid key type");
107 
108  static_assert(
110  "invalid value type");
111 
112  static_assert(
114  "empty key and tombstone key must not be identical");
115 
116  static_assert(
118  "not a valid probing scheme type");
119 
120  static_assert(
121  std::is_same<typename ProbingScheme::key_type, Key>::value,
122  "probing key type differs from table's key type");
123 
124  static_assert(
126  "not a valid storage type");
127 
128  static_assert(
129  std::is_same<typename TableStorage::key_type, Key>::value,
130  "storage's key type differs from table's key type");
131 
132  static_assert(
134  "storage's value type differs from table's value type");
135 
136  static_assert(
137  TempMemoryBytes >= sizeof(index_t),
138  "temporary storage must at least be of size index_type");
139 
141 
142 public:
143  using key_type = Key;
144  using value_type = Value;
149 
150  /*! \brief get empty key
151  * \return empty key
152  */
154  static constexpr key_type empty_key() noexcept
155  {
156  return EmptyKey;
157  }
158 
159  /*! \brief get tombstone key
160  * \return tombstone key
161  */
163  static constexpr key_type tombstone_key() noexcept
164  {
165  return TombstoneKey;
166  }
167 
168  /*! \brief get empty value
169  * \return empty value
170  */
172  static constexpr value_type empty_value() noexcept
173  {
174  return EmptyValue;
175  }
176 
177 
178  /*! \brief get cooperative group size
179  * \return cooperative group size
180  */
182  static constexpr index_type cg_size() noexcept
183  {
184  return ProbingScheme::cg_size();
185  }
186 
187  /*! \brief get bucket size
188  * \return bucket size
189  */
191  static constexpr index_type bucket_size() noexcept
192  {
194  }
195 
196  /*! \brief constructor
197  * \param[in] min_capacity minimum number of slots in the hash table
198  * \param[in] seed random seed
199  * \param[in] max_values_per_key maximum number of values to store per key
200  * \param[in] no_init whether to initialize the table at construction or not
201  */
204  const index_type min_capacity,
205  const key_type seed = defaults::seed<key_type>(),
208  const bool no_init = false) noexcept :
209  status_(nullptr),
211  temp_(TempMemoryBytes / sizeof(index_type)),
212  seed_(seed),
214  num_keys_(nullptr),
215  num_occupied_(nullptr),
216  is_copy_(false),
217  is_initialized_(false)
218  {
219  cudaMalloc(&status_, sizeof(status_type));
220  cudaMalloc(&num_keys_, sizeof(index_type));
222 
224 
225  if(!no_init) init();
226  }
227 
228  /*! \brief copy-constructor (shallow)
229  * \param[in] object to be copied
230  */
233  status_(o.status_),
234  table_(o.table_),
235  temp_(o.temp_),
236  seed_(o.seed_),
240  is_copy_(true),
242  {}
243 
244  /*! \brief move-constructor
245  * \param[in] object to be moved
246  */
249  status_(std::move(o.status_)),
250  table_(std::move(o.table_)),
251  temp_(std::move(o.temp_)),
252  seed_(std::move(o.seed_)),
258  {
259  o.is_copy_ = true;
260  }
261 
262  #ifndef __CUDA_ARCH__
263  /*! \brief destructor
264  */
267  {
268  if(!is_copy_)
269  {
270  if(status_ != nullptr) cudaFree(status_);
271  if(num_keys_ != nullptr) cudaFree(num_keys_);
272  if(num_occupied_ != nullptr) cudaFree(num_occupied_);
273  }
274  }
275  #endif
276 
277  /*! \brief (re)initialize the hash table
278  * \param[in] stream CUDA stream in which this operation is executed in
279  */
281  void init(const cudaStream_t stream = 0) noexcept
282  {
283  is_initialized_ = false;
284 
287  {
290 
292 
295 
296  is_initialized_ = true;
297  }
298  }
299 
300 private:
302  bool insert_into_bucket(
303  const index_type last_key_pos,
304  const value_type value_in,
305  const cg::thread_block_tile<cg_size()>& group,
307  status_type& status) noexcept
308  {
309  #pragma unroll
310  for(index_type i = 0;
311  i < SDIV(bucket_size(),cg_size())*cg_size();
312  i += cg_size())
313  {
314  // first bucket value always written after key insert
315  const value_type table_value =
316  ((0 < group.thread_rank()) && (i + group.thread_rank() < bucket_size())) ?
318  ~empty_value();
319 
321 
323 
325  {
329  return true;
330  }
331 
332  bool success = false;
333 
334  while(empty_value_mask)
335  {
336  const auto leader = ffs(empty_value_mask) - 1;
337 
338  if(group.thread_rank() == leader)
339  {
340  const auto old =
342 
343  success = (old == table_value);
344  }
345 
346  if(group.any(success))
347  {
348  status = (num_values > 0) ?
350  return true;
351  }
352 
353  ++num_values;
355  {
359  return true;
360  }
361 
362  empty_value_mask ^= 1UL << leader;
363  }
364  }
365 
366  return false;
367  }
368 
369 public:
370  /*! \brief inserts a key into the hash table
371  * \param[in] key_in key to insert into the hash table
372  * \param[in] value_in value that corresponds to \c key_in
373  * \param[in] group cooperative group
374  * \param[in] probing_length maximum number of probing attempts
375  * \return status (per thread)
376  */
379  const key_type key_in,
380  const value_type value_in,
381  const cg::thread_block_tile<cg_size()>& group,
382  const index_type probing_length = defaults::probing_length()) noexcept
383  {
384  if(!is_initialized_)
385  {
386  return status_type::not_initialized();
387  }
388 
389  if(!is_valid_key(key_in))
390  {
392  return status_type::invalid_key();
393  }
394 
396  {
398  return status_type::invalid_value();
399  }
400 
402  index_type num_values_plus_bucket_size = 0; // count one bucket less
403 
405  for(index_type i = iter.begin(key_in, seed_); i != iter.end(); i = iter.next())
406  {
408 
410 
411  const auto key_found_mask = group.ballot(table_key == key_in);
412 
413  const auto new_last_key_pos = group.shfl(i, 31 - __clz(key_found_mask));
414 
416 
418 
419  // early exit
421  {
422  if(bucket_size() == 1)
423  {
424  // num values = num buckets, so no space left
428  return status;
429  }
430  else
431  {
433  // check if space left in last bucket
436  return status;
437  }
438  }
439 
440  while(empty_key_mask)
441  {
443  if((bucket_size() > 1) &&
447  return status;
448 
449  // insert key
450  bool success = false;
451  bool key_collision = false;
452 
453  const auto leader = ffs(empty_key_mask) - 1;
454 
455  if(group.thread_rank() == leader)
456  {
457  const auto old =
459 
460  success = (old == table_key);
461  key_collision = (old == key_in);
462 
463  if(success)
464  {
465  // relaxed write to first slot in value array
466  table_[i].value[0] = value_in;
467 
469 
471  {
473  }
474  }
475  }
476 
477  if(group.any(success))
478  {
479  return (num_values_plus_bucket_size > 0) ?
481  }
482 
485 
486  if(bucket_size() == 1)
487  {
489  {
493  return status;
494  }
495  }
496  else
497  {
498  // check position in next iteration
499  const auto new_last_key_pos = group.shfl(i, leader);
501  }
502 
503  empty_key_mask ^= 1UL << leader;
504  }
505  }
506 
508  if((bucket_size() > 1) &&
512  return status;
513 
518  return status;
519  }
520 
521  /*! \brief insert a set of keys into the hash table
522  * \tparam StatusHandler handles returned status per key (see \c status_handlers)
523  * \param[in] keys_in pointer to keys to insert into the hash table
524  * \param[in] values_in corresponds values to \c keys_in
525  * \param[in] num_in number of keys to insert
526  * \param[in] stream CUDA stream in which this operation is executed in
527  * \param[in] probing_length maximum number of probing attempts
528  * \param[out] status_out status information per key
529  */
530  template<class StatusHandler = defaults::status_handler_t>
532  void insert(
533  const key_type * const keys_in,
534  const value_type * const values_in,
535  const index_type num_in,
536  const cudaStream_t stream = 0,
538  typename StatusHandler::base_type * const status_out = nullptr) noexcept
539  {
540  static_assert(
542  "not a valid status handler type");
543 
544  if(!is_initialized_) return;
545 
549  }
550 
551  /*! \brief retrieves all values to a corresponding key
552  * \param[in] key_in key to retrieve from the hash table
553  * \param[out] values_out values for \c key_in
554  * \param[out] num_out number of retrieved values
555  * \param[in] group cooperative group
556  * \param[in] probing_length maximum number of probing attempts
557  * \return status (per thread)
558  */
561  const key_type key_in,
562  value_type * const values_out,
564  const cg::thread_block_tile<cg_size()>& group,
565  const index_type probing_length = defaults::probing_length()) const noexcept
566  {
567  if(values_out == nullptr)
568  {
571  return status_type::dry_run() + status;
572  }
573  else
574  {
575  return for_each([=, *this] DEVICEQUALIFIER
576  (const key_type /* key */, const value_type& value, const index_type index)
577  {
579  },
580  key_in,
581  num_out,
582  group,
584  }
585  }
586 
587  /*! \brief retrieve a set of keys from the hash table
588  * \note this method has a dry-run mode where it only calculates the needed array sizes in case no memory (aka \c nullptr ) is provided
589  * \note \c end_offsets_out can be \c begin_offsets_out+1
590  * \tparam StatusHandler handles returned status per key (see \c status_handlers)
591  * \param[in] keys_in pointer to keys to retrieve from the hash table
592  * \param[in] num_in number of keys to retrieve
593  * \param[out] begin_offsets_out begin of value range for a corresponding key in \c values_out
594  * \param[out] end_offsets_out end of value range for a corresponding key in \c values_out
595  * \param[out] num_out total number of values retrieved by this operation
596  * \param[in] stream CUDA stream in which this operation is executed in
597  * \param[in] probing_length maximum number of probing attempts
598  * \param[out] status_out status information (per key)
599  */
600  template<class StatusHandler = defaults::status_handler_t>
602  void retrieve(
603  const key_type * const keys_in,
604  const index_type num_in,
606  index_type * const end_offsets_out,
607  value_type * const values_out,
609  const cudaStream_t stream = 0,
611  typename StatusHandler::base_type * const status_out = nullptr) const noexcept
612  {
613  static_assert(
615  "not a valid status handler type");
616 
617  if(!is_initialized_) return;
618 
619  // cub::DeviceScan::InclusiveSum takes input sizes of type int
620  if(num_in > std::numeric_limits<int>::max())
621  {
623 
624  return;
625  }
626 
627  num_values(
628  keys_in,
629  num_in,
630  num_out,
632  stream,
634 
635  if(values_out != nullptr)
636  {
638 
640  values_out,
641  temp_bytes,
644  num_in,
645  stream);
646 
648 
650  {
652  begin_offsets_out + 1,
654  sizeof(index_type) * (num_in - 1),
655  D2D,
656  stream);
657  }
658 
661  (
662  keys_in,
663  num_in,
666  values_out,
667  *this,
669  status_out);
670  }
671  else
672  {
673  if(status_out != nullptr)
674  {
677  ([=, *this] DEVICEQUALIFIER
678  {
680 
681  if(tid < num_in)
682  {
684  }
685  });
686  }
687 
689  }
690 
691  if(stream == 0)
692  {
694  }
695  }
696 
697  /*! \brief retrieves all elements from the hash table
698  * \note this method has a dry-run mode where it only calculates the needed array sizes in case no memory (aka \c nullptr ) is provided
699  * \note this method implements a multi-stage dry-run mode
700  * \param[out] keys_out pointer to the set of unique keys
701  * \param[out] num_keys_out number of unique keys
702  * \param[out] begin_offsets_out begin of value range for a corresponding key in \c values_out
703  * \param[out] end_offsets_out end of value range for a corresponding key in \c values_out
704  * \param[out] values_out array which holds all retrieved values
705  * \param[out] num_values_out total number of values retrieved by this operation
706  * \param[in] stream CUDA stream in which this operation is executed in
707  */
710  key_type * const keys_out,
713  index_type * const end_offsets_out,
714  value_type * const values_out,
716  const cudaStream_t stream = 0) const noexcept
717  {
718  if(!is_initialized_) return;
719 
721 
722  if(keys_out != nullptr)
723  {
724  retrieve(
725  keys_out,
726  num_keys_out,
729  values_out,
731  stream);
732  }
733 
734  if(stream == 0)
735  {
737  }
738  }
739 
740  /*! \brief retrieve all unqiue keys
741  * \info this method has a dry-run mode where it only calculates the needed array sizes in case no memory (aka \c nullptr ) is provided
742  * \param[out] keys_out retrieved unqiue keys
743  * \param[out] num_out numof unique keys
744  * \param[in] stream CUDA stream in which this operation is executed in
745  */
748  key_type * const keys_out,
750  const cudaStream_t stream = 0) const noexcept
751  {
752  if(!is_initialized_) return;
753 
754  if(keys_out != nullptr)
755  {
756  index_type * const tmp = temp_.get();
757  cudaMemsetAsync(tmp, 0, sizeof(index_type), stream);
758 
761  ([=] DEVICEQUALIFIER (const key_type& key)
762  {
764  keys_out[out] = key;
765  }, *this);
766 
768 
769  if(stream == 0)
770  {
772  }
773  }
774  else
775  {
778  }
779  }
780 
781  /*! \brief applies a funtion over all values of a specified key
782  * \tparam Func type of map i.e. CUDA device lambda
783  * \param[in] f map to apply
784  * \param[in] key_in key to consider
785  * \param[out] num_values_out number of values associated to \c key_in
786  * \param[in] group cooperative group
787  * \param[in] probing_length maximum number of probing attempts
788  * \return status (per thread)
789  */
790  template<class Func>
793  Func f,
794  const key_type key_in,
796  const cg::thread_block_tile<cg_size()>& group,
797  const index_type probing_length = defaults::probing_length()) const noexcept
798  {
800 
801  if(!is_valid_key(key_in))
802  {
803  num_values_out = 0;
805  return status_type::invalid_key();
806  }
807 
809 
810  index_type num = 0;
811  for(index_type i = iter.begin(key_in, seed_); i != iter.end(); i = iter.next())
812  {
813  const auto table_key = table_[i].key;
814  const auto hit = (table_key == key_in);
815  const auto hit_mask = group.ballot(hit);
816 
817  index_type num_empty = 0;
818  if(hit)
819  {
820  const auto j =
821  num + bucket_size() * __popc(hit_mask & ((1U << group.thread_rank()) - 1));
822 
823  const auto bucket = table_[i].value;
824  #pragma unroll
825  for(index_type b = 0; b < bucket_size(); ++b) {
826  const auto& value = bucket[b];
827  // if(value != empty_value() && j+b < max_values_per_key_)
828  if(value != empty_value())
829  f(key_in, value, j+b);
830  else
831  ++num_empty;
832  }
833  }
834 
835  // get num_empty from last bucket in group
836  // if not hit this return 0 from last thread
838 
840 
842  {
844 
845  if(num == 0)
846  {
848  return status_type::key_not_found();
849  }
850  else
851  {
852  return status_type::none();
853  }
854  }
855  }
856 
860  }
861 
862  /*! \brief applies a funtion over all key bucket pairs inside the table
863  * \tparam Func type of map i.e. CUDA device lambda
864  * \param[in] f map to apply
865  * \param[in] stream CUDA stream in which this operation is executed in
866  * \param[in] size of dynamic shared memory to reserve for this execution
867  */
868  template<class Func>
871  Func f, // TODO const?
872  const cudaStream_t stream = 0,
873  const index_type smem_bytes = 0) const noexcept
874  {
875  if(!is_initialized_) return;
876 
879  (f, *this);
880  }
881 
882  /*! \brief applies a funtion over all key value pairs inside the table
883  * \tparam Func type of map i.e. CUDA device lambda
884  * \param[in] f map to apply
885  * \param[in] stream CUDA stream in which this operation is executed in
886  * \param[in] size of dynamic shared memory to reserve for this execution
887  */
888  template<class Func>
891  Func f, // TODO const?
892  const cudaStream_t stream = 0,
893  const index_type smem_bytes = 0) const noexcept
894  {
895  if(!is_initialized_) return;
896 
897  auto bucket_f = [=, f=std::move(f)] DEVICEQUALIFIER
898  (const key_type key, const bucket_type bucket) mutable
899  {
900  #pragma unroll
901  for(index_type b = 0; b < bucket_size(); ++b) {
902  const auto& value = bucket[b];
903  if(value != empty_value())
904  f(key, value);
905  }
906  };
907 
910  (bucket_f, *this);
911  }
912 
913  /*! \brief applies a funtion over all key value pairs
914  * \tparam Func type of map i.e. CUDA device lambda
915  * \tparam StatusHandler handles returned status per key (see \c status_handlers)
916  * \param[in] f map to apply
917  * \param[in] keys_in keys to consider
918  * \param[in] num_in number of keys
919  * \param[in] stream CUDA stream in which this operation is executed in
920  * \param[in] probing_length maximum number of probing attempts
921  * \param[out] status_out status information (per key)
922  * \param[in] size of dynamic shared memory to reserve for this execution
923  */
924  template<class Func, class StatusHandler = defaults::status_handler_t>
926  void for_each(
927  Func f, // TODO const?
928  const key_type * const keys_in,
929  const index_type num_in,
930  const cudaStream_t stream = 0,
932  typename StatusHandler::base_type * const status_out = nullptr,
933  const index_type smem_bytes = 0) const noexcept
934  {
935  static_assert(
937  "not a valid status handler type");
938 
939  if(!is_initialized_) return;
940 
943  (f, keys_in, num_in, *this, status_out);
944  }
945 
946  /*! \brief number of unique keys inside the table
947  * \param[in] stream CUDA stream in which this operation is executed in
948  * \return number of unique keys
949  */
951  index_type num_keys(const cudaStream_t stream = 0) const noexcept
952  {
953  index_type num = 0;
954 
956 
958 
959  return num;
960  }
961 
962  /*! \brief number of occupied slots in the hash table
963  * \param[in] stream CUDA stream in which this operation is executed in
964  * \return the number of occupied slots
965  */
967  index_type num_occupied(const cudaStream_t stream = 0) const noexcept
968  {
969  index_type num = 0;
970 
972 
974 
975  return num;
976  }
977 
978  /*! \brief total number of values inside the table
979  * \param[in] key_in key to be probed
980  * \param[out] num_out number of values associated to \c key_in*
981  * \param[in] group cooperative group
982  * \param[in] probing_length maximum number of probing attempts
983  * \return status (per thread)
984  */
987  const key_type key_in,
989  const cg::thread_block_tile<cg_size()>& group,
990  const index_type probing_length = defaults::probing_length()) const noexcept
991  {
992  return for_each([=] DEVICEQUALIFIER (
993  const key_type /* key */,
994  const value_type& /* value */,
995  const index_type /* index */) {},
996  key_in,
997  num_out,
998  group,
1000  }
1001 
1002  /*! \brief number of values associated to a set of keys
1003  * \info this function returns only \c num_out if \c num_per_key_out==nullptr
1004  * \tparam StatusHandler handles returned status per key (see \c status_handlers)
1005  * \param[in] keys_in keys to consider
1006  * \param[in] num_in number of keys
1007  * \param[out] num_out total number of values
1008  * \param[out] num_per_key_out number of values per key
1009  * \param[in] stream CUDA stream in which this operation is executed in
1010  * \param[in] probing_length maximum number of probing attempts
1011  * \param[out] status_out status information (per key)
1012  */
1013  template<class StatusHandler = defaults::status_handler_t>
1016  const key_type * const keys_in,
1017  const index_type num_in,
1019  index_type * const num_per_key_out = nullptr,
1020  const cudaStream_t stream = 0,
1022  typename StatusHandler::base_type * const status_out = nullptr) const noexcept
1023  {
1024  if(!is_initialized_) return;
1025 
1026  // TODO check if shared memory is benefitial
1027 
1028  index_type * const tmp = temp_.get();
1029  cudaMemsetAsync(tmp, 0, sizeof(index_type), stream);
1030 
1034 
1036 
1037  if(stream == 0)
1038  {
1040  }
1041  }
1042 
1043  /*! \brief number of values stored inside the hash table
1044  * \info alias for \c size()
1045  * \param[in] stream CUDA stream in which this operation is executed in
1046  * \return the number of values
1047  */
1049  index_type num_values(const cudaStream_t stream = 0) const noexcept
1050  {
1051  return size(stream);
1052  }
1053 
1054  /*! \brief number of values stored inside the hash table
1055  * \param[in] stream CUDA stream in which this operation is executed in
1056  * \return the number of values
1057  */
1059  index_type size(const cudaStream_t stream = 0) const noexcept
1060  {
1061  if(!is_initialized_) return 0;
1062 
1063  index_type out;
1064  index_type * tmp = temp_.get();
1065 
1066  cudaMemsetAsync(tmp, 0, sizeof(index_t), stream);
1067 
1070  (tmp, *this);
1071 
1073  &out,
1074  tmp,
1075  sizeof(index_type),
1076  D2H,
1077  stream);
1078 
1080 
1081  return out;
1082  }
1083 
1084  /*! \brief current load factor of the hash table
1085  * \param[in] stream CUDA stream in which this operation is executed in
1086  * \return load factor
1087  */
1089  float key_load_factor(const cudaStream_t stream = 0) const noexcept
1090  {
1091  return float(num_occupied(stream)) / float(capacity());
1092  }
1093 
1094  /*! \brief current load factor of the hash table
1095  * \param[in] stream CUDA stream in which this operation is executed in
1096  * \return load factor
1097  */
1099  float value_load_factor(const cudaStream_t stream = 0) const noexcept
1100  {
1101  return float(num_values(stream)) / float(capacity()*bucket_size());
1102  }
1103 
1104  /*! \brief current storage density of the hash table
1105  * \param[in] stream CUDA stream in which this operation is executed in
1106  * \return storage density
1107  */
1109  float storage_density(const cudaStream_t stream = 0) const noexcept
1110  {
1111  const index_type key_bytes = num_keys(stream) * sizeof(key_type);
1112  const index_type value_bytes = num_values(stream) * sizeof(value_type);
1114 
1115  return float(key_bytes + value_bytes) / float(table_bytes);
1116  }
1117 
1118  /*! \brief current relative storage density of the hash table
1119  * \param stream CUDA stream in which this operation is executed in
1120  * \return storage density
1121  */
1123  float relative_storage_density(const cudaStream_t stream = 0) const noexcept
1124  {
1125  const index_type key_bytes = num_keys(stream) * sizeof(key_type);
1126  const index_type value_bytes = num_values(stream) * sizeof(value_type);
1127  const index_type occupied_bytes =
1128  num_occupied(stream) * sizeof(key_type) + value_bytes;
1129 
1130  return float(key_bytes + value_bytes) / (occupied_bytes);
1131  }
1132 
1133  /*! \brief get the key capacity of the hash table
1134  * \return number of key slots in the hash table
1135  */
1137  index_type capacity() const noexcept
1138  {
1139  return table_.capacity();
1140  }
1141 
1142  /*! \brief get the maximum value capacity of the hash table
1143  * \return maximum value capacity
1144  */
1146  index_type value_capacity() const noexcept
1147  {
1148  return table_.capacity() * bucket_size();
1149  }
1150 
1151  /*! \brief get the total number of bytes occupied by this data structure
1152  * \return bytes
1153  */
1155  index_type bytes_total() const noexcept
1156  {
1157  return table_.bytes_total() + sizeof(index_type);
1158  }
1159 
1160  /*! \brief indicates if the hash table is properly initialized
1161  * \return \c true iff the hash table is properly initialized
1162  */
1164  bool is_initialized() const noexcept
1165  {
1166  return is_initialized_;
1167  }
1168 
1169  /*! \brief get the status of the hash table
1170  * \param[in] stream CUDA stream in which this operation is executed in
1171  * \return the status
1172  */
1174  status_type peek_status(const cudaStream_t stream = 0) const noexcept
1175  {
1177 
1178  if(status_ != nullptr)
1179  {
1181  &status,
1182  status_,
1183  sizeof(status_type),
1184  D2H,
1185  stream);
1186 
1188  }
1189 
1190  return status;
1191  }
1192 
1193  /*! \brief get and reset the status of the hash table
1194  * \param[in] stream CUDA stream in which this operation is executed in
1195  * \return the status
1196  */
1199  {
1201 
1202  if(status_ != nullptr)
1203  {
1205  &status,
1206  status_,
1207  sizeof(status_type),
1208  D2H,
1209  stream);
1210 
1212  }
1213 
1214  return status;
1215  }
1216 
1217  /*! \brief checks if \c key is equal to \c EmptyKey
1218  * \return \c bool
1219  */
1221  static constexpr bool is_empty_key(const key_type key) noexcept
1222  {
1223  return (key == empty_key());
1224  }
1225 
1226  /*! \brief checks if \c key is equal to \c TombstoneKey
1227  * \return \c bool
1228  */
1230  static constexpr bool is_tombstone_key(const key_type key) noexcept
1231  {
1232  return (key == tombstone_key());
1233  }
1234 
1235  /*! \brief checks if \c key is not equal to \c (EmptyKey||TombstoneKey)
1236  * \return \c bool
1237  */
1239  static constexpr bool is_valid_key(const key_type key) noexcept
1240  {
1241  return (key != empty_key() && key != tombstone_key());
1242  }
1243 
1244  /*! \brief checks if \c value is equal to \c EmptyValue
1245  * \return \c bool
1246  */
1248  static constexpr bool is_empty_value(const value_type value) noexcept
1249  {
1250  return (value == empty_value());
1251  }
1252 
1253  /*! \brief checks if \c value is equal not to \c EmptyValue
1254  * \return \c bool
1255  */
1257  static constexpr bool is_valid_value(const value_type value) noexcept
1258  {
1259  return (value != empty_value());
1260  }
1261 
1262  /*! \brief indicates if this object is a shallow copy
1263  * \return \c bool
1264  */
1266  bool is_copy() const noexcept
1267  {
1268  return is_copy_;
1269  }
1270 
1271 private:
1272  /*! \brief assigns the hash table's status
1273  * \info \c const on purpose
1274  * \param[in] status new status
1275  * \param[in] stream CUDA stream in which this operation is executed in
1276  */
1278  void assign_status(
1279  const status_type status,
1280  const cudaStream_t stream = 0) const noexcept
1281  {
1282  if(status_ != nullptr)
1283  {
1285  status_,
1286  &status,
1287  sizeof(status_type),
1288  H2D,
1289  stream);
1290 
1292  }
1293  }
1294 
1295  /*! \brief joins additional flags to the hash table's status
1296  * \info \c const on purpose
1297  * \param[in] status new status
1298  * \param[in] stream CUDA stream in which this operation is executed in
1299  */
1301  void join_status(
1302  const status_type status,
1303  const cudaStream_t stream = 0) const noexcept
1304  {
1305  if(status_ != nullptr)
1306  {
1308  const status_type joined = peeked + status;
1309 
1310  if(joined != peeked)
1311  {
1313  }
1314  }
1315  }
1316 
1317  /*! \brief joins additional flags to the hash table's status
1318  * \info \c const on purpose
1319  * \param[in] status new status
1320  */
1322  void device_join_status(const status_type status) const noexcept
1323  {
1324  if(status_ != nullptr)
1325  {
1327  }
1328  }
1329 
1330  status_type * status_; //< pointer to status
1331  TableStorage table_; //< actual key/value storage
1332  temp_type temp_; //< temporary memory
1333  key_type seed_; //< random seed
1334  index_type max_values_per_key_; //< maximum number of values to store per key
1335  index_type * num_keys_; //< pointer to the count of unique keys
1336  index_type * num_occupied_; //< pointer to the count of occupied key slots
1337  bool is_copy_; //< indicates if table is a shallow copy
1338  bool is_initialized_; //< indicates if table is properly initialized
1339 
1340  template<class Core>
1342  friend void kernels::size(index_type * const, const Core);
1343 
1344  template<class Core>
1346  friend void kernels::num_values(index_type * const, const Core);
1347 
1348  template<class Func, class Core>
1350  friend void kernels::for_each(Func, const Core);
1351 
1352  template<class Func, class Core>
1354  friend void kernels::for_each_unique_key(Func, const Core);
1355 
1356  template<class Core, class StatusHandler>
1358  friend void kernels::retrieve(
1359  const typename Core::key_type * const,
1360  const index_type,
1361  const index_type * const,
1362  const index_type * const,
1363  typename Core::value_type * const,
1364  const Core,
1365  const index_type,
1366  typename StatusHandler::base_type * const);
1367 
1368 }; // class MultiBucketHashTable
1369 
1370 } // namespace warpcore
1371 
1372 #endif /* WARPCORE_MULTI_BUCKET_HASH_TABLE_CUH */