warpcore 0.0.1
Hashing at the Speed of Light on modern CUDA-accelerators
bucket_list_hash_table.cuh
Go to the documentation of this file.
1 #ifndef WARPCORE_BUCKET_LIST_HASH_TABLE_CUH
2 #define WARPCORE_BUCKET_LIST_HASH_TABLE_CUH
3 
5 
6 namespace warpcore
7 {
8 
9 /*! \brief bucket list hash table
10  * \tparam Key key type (\c std::uint32_t or \c std::uint64_t)
11  * \tparam Value value type
12  * \tparam EmptyKey key which represents an empty slot
13  * \tparam TombstoneKey key which represents an erased slot
14  * \tparam ValueStore storage class from \c warpcore::storage::multi_value
15  * \tparam ProbingScheme probing scheme from \c warpcore::probing_schemes
16  */
17 template<
18  class Key,
19  class Value,
25 {
26  static_assert(
28  "not a valid storage type");
29 
30 public:
31  // TODO why public?
32  using handle_type = typename ValueStore::handle_type;
33 
34 private:
36  Key,
38  EmptyKey,
41 
43 
44 public:
45  using key_type = Key;
46  using value_type = Value;
49 
50  /*! \brief get empty key
51  * \return empty key
52  */
54  static constexpr key_type empty_key() noexcept
55  {
56  return EmptyKey;
57  }
58 
59  /*! \brief get tombstone key
60  * \return tombstone key
61  */
63  static constexpr key_type tombstone_key() noexcept
64  {
65  return TombstoneKey;
66  }
67 
68  /*! \brief checks if \c key is equal to \c (EmptyKey||TombstoneKey)
69  * \return \c bool
70  */
72  static constexpr bool is_valid_key(const key_type key) noexcept
73  {
74  return (key != empty_key() && key != tombstone_key());
75  }
76 
77  /*! \brief get cooperative group size
78  * \return cooperative group size
79  */
81  static constexpr index_type cg_size() noexcept
82  {
83  return hash_table_type::cg_size();
84  }
85 
86  /*! \brief maximum bucket size
87  * \return size
88  */
90  static constexpr index_type max_bucket_size() noexcept
91  {
92  return handle_type::max_bucket_size();
93  }
94 
95  /*! \brief constructor
96  * \param[in] key_capacity guaranteed number of key slots in the hash table
97  * \param[in] value_capacity total number of value slots
98  * \param[in] seed random seed
99  * \param[in] grow_factor bucket grow factor for \c warpcore::storage::multi_value::BucketListStore
100  * \param[in] min_bucket_size initial size of value buckets for \c warpcore::storage::multi_value::BucketListStore
101  * \param[in] max_bucket_size bucket size of \c warpcore::storage::multi_value::BucketListStore after which no more growth occurs
102  * \param[in] max_values_per_key maximum number of values to store per key
103  */
106  const index_type key_capacity,
108  const key_type seed = defaults::seed<key_type>(),
109  const float grow_factor = 1.1,
110  const index_type min_bucket_size = 1,
113  const bool no_init = false) noexcept :
114  hash_table_(key_capacity, seed, true),
117  is_copy_(false)
118  {
120 
121  if(!no_init) init(seed);
122  }
123 
124  /*! \brief copy-constructor (shallow)
125  * \param[in] object to be copied
126  */
132  is_copy_(true)
133  {}
134 
135  /*! \brief move-constructor
136  * \param[in] object to be moved
137  */
144  {
145  o.is_copy_ = true;
146  }
147 
148  /*! \brief (re)initialize the hash table
149  * \param seed random seed
150  * \param stream CUDA stream in which this operation is executed
151  */
153  void init(
154  const key_type seed,
155  const cudaStream_t stream = 0) noexcept
156  {
157  const auto status = hash_table_.peek_status(stream);
158 
160  {
165  }
166  }
167 
168  /*! \brief (re)initialize the hash table
169  * \param stream CUDA stream in which this operation is executed
170  */
172  void init(const cudaStream_t stream = 0) noexcept
173  {
175  }
176 
177  /*! \brief inserts a key/value pair into the hash table
178  * \param[in] key_in key to insert into the hash table
179  * \param[in] value_in value that corresponds to \c key_in
180  * \param[in] group cooperative group
181  * \param[in] probing_length maximum number of probing attempts
182  * \return status (per thread)
183  */
186  const key_type key_in,
187  const value_type& value_in,
188  const cg::thread_block_tile<cg_size()>& group,
189  const index_type probing_length = defaults::probing_length()) noexcept
190  {
192 
195 
196  if(handle_ptr != nullptr)
197  {
199  {
201 
203  }
204  else
205  {
207 
208  if(group.thread_rank() == 0)
209  {
211 
212  if(append_status.has_any())
213  {
215  }
216  }
217 
219  }
220  }
221 
222  return status;
223  }
224 
225  /*! \brief insert a set of keys into the hash table
226  * \tparam StatusHandler handles returned status per key (see \c status_handlers)
227  * \param[in] keys_in pointer to keys to insert into the hash table
228  * \param[in] values_in corresponds values to \c keys_in
229  * \param[in] num_in number of keys to insert
230  * \param[in] stream CUDA stream in which this operation is executed in
231  * \param[in] probing_length maximum number of probing attempts
232  * \param[out] status_out status information per key
233  */
234  template<class StatusHandler = defaults::status_handler_t>
236  void insert(
237  const key_type * const keys_in,
238  const value_type * const values_in,
239  const index_type num_in,
240  const cudaStream_t stream = 0,
242  typename StatusHandler::base_type * const status_out = nullptr) noexcept
243  {
244  static_assert(
246  "not a valid status handler type");
247 
248  if(!hash_table_.is_initialized_) return;
249 
250  static constexpr index_type block_size = 1024;
251  static constexpr index_type groups_per_block = block_size / cg_size();
252  static constexpr index_type smem_status_size =
254  1 : groups_per_block;
255 
257  <<<SDIV(num_in * cg_size(), block_size), block_size, 0, stream>>>
258  ([=, *this] DEVICEQUALIFIER () mutable
259  {
261  const index_type btid = threadIdx.x;
262  const index_type gid = tid / cg_size();
264  const auto block = cg::this_thread_block();
265  const auto group = cg::tiled_partition<cg_size()>(block);
266 
269 
270  if(gid < num_in)
271  {
273 
275  keys_in[gid],
277  group,
279 
280  if(!std::is_same<
283  group.thread_rank() == 0)
284  {
286  }
287 
288  block.sync();
289 
290  if(btid < groups_per_block && handles[btid] != nullptr)
291  {
293 
294  const index_type block_offset =
296 
298  {
300  }
301  else
302  {
303  if(block_offset + btid < num_in){
305  *(handles[btid]),
308  }
309  }
310 
311  if(append_status.has_any())
312  {
314  }
315 
316  if(block_offset + btid < num_in){
317 
318  // TODO not zero-cost
319  if(!std::is_same<
322  {
325  status_out,
326  block_offset + btid);
327  }
328 
329  }
330  }
331  }
332 
333  });
334 
335  if(stream == 0)
336  {
338  }
339  }
340 
341  /*! \brief retrieves a key from the hash table
342  * \param[in] key_in key to retrieve from the hash table
343  * \param[out] values_out pointer to storage fo the retrieved values
344  * \param[out] num_out number of values retrieved
345  * \param[in] group cooperative group
346  * \param[in] probing_length maximum number of probing attempts
347  * \return status (per thread)
348  */
351  const key_type key_in,
352  value_type * const values_out,
354  const cg::thread_block_tile<cg_size()>& group,
355  const index_type probing_length = defaults::probing_length()) const noexcept
356  {
358 
361 
362  if(!status.has_any())
363  {
365  [=] DEVICEQUALIFIER (
366  const value_type& value,
368  {
370  },
371  handle,
372  group);
373 
375  }
376  else
377  {
378  num_out = 0;
379  }
380 
381  return status;
382  }
383 
384  /*! \brief retrieve a set of keys from the hash table
385  * \note this method has a dry-run mode where it only calculates the needed array sizes in case no memory (aka \c nullptr ) is provided
386  * \note \c end_offsets_out can be \c begin_offsets_out+1
387  * \tparam StatusHandler handles returned status per key (see \c status_handlers)
388  * \param[in] keys_in pointer to keys to retrieve from the hash table
389  * \param[in] num_in number of keys to retrieve
390  * \param[out] begin_offsets_out
391  * \param[out] end_offsets_out
392  * \param[out] values_out retrieved values of keys in \c key_in
393  * \param[out] num_out total number of values retrieved by this operation
394  * \param[in] stream CUDA stream in which this operation is executed in
395  * \param[in] probing_length maximum number of probing attempts
396  * \param[out] status_out status information (per key)
397  */
398  template<class StatusHandler = defaults::status_handler_t>
400  void retrieve(
401  const key_type * const keys_in,
402  const index_type num_in,
404  index_type * const end_offsets_out,
405  value_type * const values_out,
407  const cudaStream_t stream = 0,
409  typename StatusHandler::base_type * const status_out = nullptr) const noexcept
410  {
411  static_assert(
413  "not a valid status handler type");
414 
415  if(!hash_table_.is_initialized_) return;
416 
417  // cub::DeviceScan::InclusiveSum takes input sizes of type int
418  if(num_in > std::numeric_limits<int>::max())
419  {
421 
422  return;
423  }
424 
425  num_values(
426  keys_in,
427  num_in,
428  num_out,
430  stream,
432 
433  if(values_out != nullptr)
434  {
436 
438  values_out,
439  temp_bytes,
442  num_in,
443  stream);
444 
446 
448  {
450  begin_offsets_out + 1,
452  sizeof(index_type) * (num_in - 1),
453  D2D,
454  stream);
455  }
456 
459  (
460  keys_in,
461  num_in,
464  values_out,
465  *this,
467  status_out);
468  }
469  else
470  {
471  if(status_out != nullptr)
472  {
475  ([=, *this] DEVICEQUALIFIER
476  {
478 
479  if(tid < num_in)
480  {
482  }
483  });
484  }
485 
487  }
488 
489  if(stream == 0)
490  {
492  }
493  }
494 
495  // TODO host retrieve which also returns the set of unique keys
496 
497  /*! \brief applies a funtion over all values of a corresponding key
498  * \tparam Func type of map i.e. CUDA device lambda
499  * \param[in] f map to apply
500  * \param[in] key_in key to retrieve
501  * \param[in] stream CUDA stream in which this operation is executed in
502  */
503  template<class Func>
506  Func f, // TODO const?
507  const key_type key_in,
508  const cg::thread_block_tile<cg_size()>& group,
509  const index_type probing_length = defaults::probing_length()) const noexcept
510  {
512 
515 
516  if(!status.has_any())
517  {
519  }
520 
521  return status;
522  }
523 
524  // TODO host functions for_each
525  // TODO get_key_set
526 
527  /*! \brief retrieves all elements from the hash table
528  * \info this method has a dry-run mode where it only calculates the needed array sizes in case no memory (aka \c nullptr ) is provided
529  * \info this method implements a multi-stage dry-run mode
530  * \param[out] keys_out pointer to the set of unique keys
531  * \param[out] num_keys_out number of unique keys
532  * \param[out] begin_offsets_out begin of value range for a corresponding key in \c values_out
533  * \param[out] end_offsets_out end of value range for a corresponding key in \c values_out
534  * \param[out] values_out array which holds all retrieved values
535  * \param[out] num_values_out total number of values retrieved by this operation
536  * \param[in] stream CUDA stream in which this operation is executed in
537  */
540  key_type * const keys_out,
543  index_type * const end_offsets_out,
544  value_type * const values_out,
546  const cudaStream_t stream = 0) const noexcept
547  {
549 
550  if(keys_out != nullptr)
551  {
552  retrieve(
553  keys_out,
554  num_keys_out,
557  values_out,
559  stream);
560  }
561 
562  if(stream == 0)
563  {
565  }
566  }
567 
568  /*! \brief retrieves the set of all keys stored inside the hash table
569  * \param[out] keys_out pointer to the retrieved keys
570  * \param[out] num_out number of retrieved keys
571  * \param[in] stream CUDA stream in which this operation is executed in
572  * \note if \c keys_out==nullptr then only \c num_out will be computed
573  */
576  key_type * const keys_out,
578  const cudaStream_t stream = 0) const noexcept
579  {
580  if(!hash_table_.is_initialized_) return;
581 
582  if(keys_out == nullptr)
583  {
585  }
586  else
587  {
590 
592  [=] DEVICEQUALIFIER (key_type key, const auto&)
593  {
595  }, stream);
596 
598  &num_out, key_count, sizeof(index_type), D2H, stream);
599  }
600 
601  if(stream == 0 || keys_out == nullptr)
602  {
604  }
605  }
606 
607  /*! \brief get load factor of the key store
608  * \param stream CUDA stream in which this operation is executed in
609  * \return load factor
610  */
612  float key_load_factor(const cudaStream_t stream = 0) const noexcept
613  {
615  }
616 
617  /*! \brief get load factor of the value store
618  * \param stream CUDA stream in which this operation is executed in
619  * \return load factor
620  */
622  float value_load_factor(const cudaStream_t stream = 0) const noexcept
623  {
625  }
626 
627  /*! \brief get the the total number of bytes occupied by this data structure
628  * \return bytes
629  */
631  index_type bytes_total() const noexcept
632  {
634  }
635 
636  /*! \brief get the the number of bytes in this data structure occupied by keys
637  * \param stream CUDA stream in which this operation is executed in
638  * \return bytes
639  */
641  index_type bytes_keys(const cudaStream_t stream = 0) const noexcept
642  {
643  return num_keys(stream) * sizeof(key_type);
644  }
645 
646  /*! \brief get the the number of bytes in this data structure occupied by values
647  * \param stream CUDA stream in which this operation is executed in
648  * \return bytes
649  */
651  index_type bytes_values(const cudaStream_t stream = 0) const noexcept
652  {
653  return num_values(stream) * sizeof(value_type);
654  }
655 
656  /*! \brief get the the number of bytes in this data structure occupied by actual information
657  * \param stream CUDA stream in which this operation is executed in
658  * \return bytes
659  */
661  index_type bytes_payload(const cudaStream_t stream = 0) const noexcept
662  {
664  }
665 
666  /*! \brief current storage density of the hash table
667  * \param stream CUDA stream in which this operation is executed in
668  * \return storage density
669  */
671  float storage_density(const cudaStream_t stream = 0) const noexcept
672  {
673  return float(bytes_payload(stream)) / float(bytes_total());
674  }
675 
676  /*! \brief current relative storage density of the hash table
677  * \param stream CUDA stream in which this operation is executed in
678  * \return storage density
679  */
681  float relative_storage_density(const cudaStream_t stream = 0) const noexcept
682  {
683  const float bytes_hash_table =
684  hash_table_.capacity() * (sizeof(key_type) + sizeof(handle_type));
685  const float bytes_value_store =
687 
689  }
690 
691  /*! \brief indicates if the hash table is properly initialized
692  * \return \c true iff the hash table is properly initialized
693  */
695  bool is_initialized() const noexcept
696  {
697  return hash_table_.is_initialized();
698  }
699 
700  /*! \brief get the status of the hash table
701  * \param stream CUDA stream in which this operation is executed in
702  * \return the status
703  */
705  status_type peek_status(const cudaStream_t stream = 0) const noexcept
706  {
708  }
709 
710  /*! \brief get and reset the status of the hash table
711  * \param[in] stream CUDA stream in which this operation is executed in
712  * \return the status
713  */
716  {
717  return hash_table_.pop_status(stream);
718  }
719 
720  /*! \brief get the key capacity of the hash table
721  * \return number of key slots in the hash table
722  */
724  index_type key_capacity() const noexcept
725  {
726  return hash_table_.capacity();
727  }
728 
729  /*! \brief get the maximum value capacity of the hash table
730  * \return maximum value capacity
731  */
733  index_type value_capacity() const noexcept
734  {
735  return value_store_.capacity();
736  }
737 
738  /*! \brief number of keys stored inside the hash table
739  * \param[in] stream CUDA stream in which this operation is executed in
740  * \return number of keys inside the hash table
741  */
743  index_type num_keys(const cudaStream_t stream = 0) const noexcept
744  {
745  return hash_table_.size(stream);
746  }
747 
748  /*! \brief get number of values to a corresponding key inside the hash table
749  * \param[in] key_in key to probe
750  * \param[out] num_out number of values
751  * \param[in] group cooperative group this operation is executed in
752  * \param[in] probing_length maximum number of probing attempts
753  * \return status (per thread)
754  */
757  const key_type key_in,
759  const cg::thread_block_tile<cg_size()>& group,
760  const index_type probing_length = defaults::probing_length()) const noexcept
761  {
763 
766 
768 
769  return status;
770  }
771 
772  /*! \brief get number of values to a corresponding set of keys inside the hash table
773  * \param[in] keys_in keys to probe
774  * \param[in] num_in input size
775  * \param[out] num_out total number of values in this query
776  * \param[out] num_per_key_out number of values per key
777  * \param[in] probing_length maximum number of probing attempts
778  * \param[in] stream CUDA stream in which this operation is executed in
779  * \param[out] status_out status information (per key)
780  */
781  template<class StatusHandler = defaults::status_handler_t>
784  const key_type * const keys_in,
785  const index_type num_in,
787  index_type * const num_per_key_out = nullptr,
788  const cudaStream_t stream = 0,
790  typename StatusHandler::base_type * const status_out = nullptr) const noexcept
791  {
792  static_assert(
794  "not a valid status handler type");
795 
796  if(!hash_table_.is_initialized_) return;
797 
798  index_type * const tmp = hash_table_.temp_.get();
799  cudaMemsetAsync(tmp, 0, sizeof(index_type), stream);
800 
804 
806 
807  if(stream == 0)
808  {
810  }
811  }
812 
813  /*! \brief get number of values inside the hash table
814  * \param[in] stream CUDA stream in which this operation is executed in
815  * \return total number of values
816  */
818  index_type num_values(const cudaStream_t stream = 0) const noexcept
819  {
821 
822  cudaMemsetAsync(tmp, 0, sizeof(index_type), stream);
823 
825  [=, *this] DEVICEQUALIFIER (key_type, const handle_type& handle)
826  {
828  },
829  stream);
830 
831  index_type out = 0;
832 
833  cudaMemcpyAsync(&out, tmp, sizeof(index_type), D2H, stream);
834 
836 
837  return out;
838  }
839 
840  /*! \brief indicates if this object is a shallow copy
841  * \return \c bool
842  */
844  bool is_copy() const noexcept
845  {
846  return is_copy_;
847  }
848 
849 private:
850  /*! \brief joins additional flags to the hash table's status
851  * \info \c const on purpose
852  * \param[in] status new status
853  * \param[in] stream CUDA stream in which this operation is executed in
854  */
856  void join_status(
857  const status_type status,
858  const cudaStream_t stream = 0) const noexcept
859  {
861  }
862 
863  /*! \brief joins additional flags to the hash table's status
864  * \info \c const on purpose
865  * \param[in] status new status
866  */
868  void device_join_status(const status_type status) const noexcept
869  {
871  }
872 
873  hash_table_type hash_table_; //< storage class for keys
874  value_store_type value_store_; //< multi-value storage class
875  const index_type max_values_per_key_; //< maximum number of values to store per key
876  bool is_copy_; //< indicates if this object is a shallow copy
877 
878  template<class Core, class StatusHandler>
880  friend void kernels::retrieve(
881  const typename Core::key_type * const,
882  const index_type,
883  const index_type * const,
884  const index_type * const,
885  typename Core::value_type * const,
886  const Core,
887  const index_type,
888  typename StatusHandler::base_type * const);
889 
890 }; // class BucketListHashTable
891 
892 } // namespace warpcore
893 
894 #endif /* WARPCORE_BUCKET_LIST_HASH_TABLE_CUH */