warpcore 0.0.1
Hashing at the Speed of Light on modern CUDA-accelerators
single_value_hash_table.cuh
Go to the documentation of this file.
1 #ifndef WARPCORE_SINGLE_VALUE_HASH_TABLE_CUH
2 #define WARPCORE_SINGLE_VALUE_HASH_TABLE_CUH
3 
4 #include "base.cuh"
5 
6 namespace warpcore
7 {
8 
9 // forward declaration of friends
10 template<
11  class Key,
12  class Value,
13  Key EmptyKey,
14  Key TombstoneKey,
15  class ProbingScheme,
16  class TableStorage,
17  index_t TempMemoryBytes>
18 class CountingHashTable;
19 
20 /*! \brief single-value hash table
21  * \tparam Key key type (\c std::uint32_t or \c std::uint64_t)
22  * \tparam Value value type
23  * \tparam EmptyKey key which represents an empty slot
24  * \tparam TombstoneKey key which represents an erased slot
25  * \tparam ProbingScheme probing scheme from \c warpcore::probing_schemes
26  * \tparam TableStorage memory layout from \c warpcore::storage::key_value
27  * \tparam TempMemoryBytes size of temporary storage (typically a few kB)
28  */
29 template<
30  class Key,
31  class Value,
38 {
39  static_assert(
41  "invalid key type");
42 
43  static_assert(
45  "empty key and tombstone key must not be identical");
46 
47  static_assert(
49  "not a valid probing scheme type");
50 
51  static_assert(
52  std::is_same<typename ProbingScheme::key_type, Key>::value,
53  "probing key type differs from table's key type");
54 
55  static_assert(
57  "not a valid storage type");
58 
59  static_assert(
60  std::is_same<typename TableStorage::key_type, Key>::value,
61  "storage's key type differs from table's key type");
62 
63  static_assert(
65  "storage's value type differs from table's value type");
66 
67  static_assert(
68  TempMemoryBytes >= sizeof(index_t),
69  "temporary storage must at least be of size index_type");
70 
72 
73 public:
74  using key_type = Key;
75  using value_type = Value;
78 
79  /*! \brief get empty key
80  * \return empty key
81  */
83  static constexpr key_type empty_key() noexcept
84  {
85  return EmptyKey;
86  }
87 
88  /*! \brief get tombstone key
89  * \return tombstone key
90  */
92  static constexpr key_type tombstone_key() noexcept
93  {
94  return TombstoneKey;
95  }
96 
97  /*! \brief get cooperative group size
98  * \return cooperative group size
99  */
101  static constexpr index_type cg_size() noexcept
102  {
103  return ProbingScheme::cg_size();
104  }
105 
106  /*! \brief constructor
107  * \param[in] min_capacity minimum number of slots in the hash table
108  * \param[in] seed random seed
109  * \param[in] no_init whether to initialize the table at construction or not
110  */
113  const index_type min_capacity,
114  const key_type seed = defaults::seed<key_type>(),
115  const bool no_init = false) noexcept :
116  status_(nullptr),
118  temp_(TempMemoryBytes / sizeof(index_type)),
119  seed_(seed),
120  is_initialized_(false),
121  is_copy_(false)
122  {
123  cudaMalloc(&status_, sizeof(status_type));
124 
126 
127  if(!no_init) init();
128  }
129 
130  /*! \brief copy-constructor (shallow)
131  * \param[in] object to be copied
132  */
135  status_(o.status_),
136  table_(o.table_),
137  temp_(o.temp_),
138  seed_(o.seed_),
140  is_copy_(true)
141  {}
142 
143  /*! \brief move-constructor
144  * \param[in] object to be moved
145  */
148  status_(std::move(o.status_)),
149  table_(std::move(o.table_)),
150  temp_(std::move(o.temp_)),
151  seed_(std::move(o.seed_)),
154  {
155  o.is_copy_ = true;
156  }
157 
158  #ifndef __CUDA_ARCH__
159  /*! \brief destructor
160  */
163  {
164  if(!is_copy_)
165  {
166  if(status_ != nullptr) cudaFree(status_);
167  }
168  }
169  #endif
170 
171  /*! \brief (re)initialize the hash table
172  * \param[in] seed random seed
173  * \param[in] stream CUDA stream in which this operation is executed in
174  */
176  void init(
177  const key_type seed,
178  const cudaStream_t stream = 0) noexcept
179  {
180  is_initialized_ = false;
181 
182  seed_ = seed;
185  {
187 
189 
190  is_initialized_ = true;
191  }
192  }
193 
194  /*! \brief (re)initialize the hash table
195  * \param[in] stream CUDA stream in which this operation is executed in
196  */
198  void init(const cudaStream_t stream = 0) noexcept
199  {
200  init(seed_, stream);
201  }
202 
203  /*! \brief inserts a key into the hash table
204  * \param[in] key_in key to insert into the hash table
205  * \param[in] value_in value that corresponds to \c key_in
206  * \param[in] group cooperative group
207  * \param[in] probing_length maximum number of probing attempts
208  * \return status (per thread)
209  */
212  const key_type key_in,
213  const value_type& value_in,
214  const cg::thread_block_tile<cg_size()>& group,
215  const index_type probing_length = defaults::probing_length()) noexcept
216  {
218 
221 
222  if(group.thread_rank() == 0 && value_ptr != nullptr)
223  {
224  *value_ptr = value_in;
225  }
226 
227  return status;
228  }
229 
230  /*! \brief insert a set of keys into the hash table
231  * \tparam StatusHandler handles returned status per key (see \c status_handlers)
232  * \param[in] keys_in pointer to keys to insert into the hash table
233  * \param[in] values_in corresponds values to \c keys_in
234  * \param[in] num_in number of keys to insert
235  * \param[in] stream CUDA stream in which this operation is executed in
236  * \param[in] probing_length maximum number of probing attempts
237  * \param[out] status_out status information per key
238  */
239  template<class StatusHandler = defaults::status_handler_t>
241  void insert(
242  const key_type * const keys_in,
243  const value_type * const values_in,
244  const index_type num_in,
245  const cudaStream_t stream = 0,
247  typename StatusHandler::base_type * const status_out = nullptr) noexcept
248  {
249  static_assert(
251  "not a valid status handler type");
252 
253  if(!is_initialized_) return;
254 
258  }
259 
260  /*! \brief retrieves a key from the hash table
261  * \param[in] key_in key to retrieve from the hash table
262  * \param[out] value_out value for \c key_in
263  * \param[in] group cooperative group
264  * \param[in] probing_length maximum number of probing attempts
265  * \return status (per thread)
266  */
269  const key_type key_in,
271  const cg::thread_block_tile<cg_size()>& group,
272  const index_type probing_length = defaults::probing_length()) const noexcept
273  {
275 
276  if(!is_valid_key(key_in))
277  {
279  return status_type::invalid_key();
280  }
281 
283 
284  for(index_type i = iter.begin(key_in, seed_); i != iter.end(); i = iter.next())
285  {
287  const bool hit = (table_key == key_in);
288  const auto hit_mask = group.ballot(hit);
289 
290  if(hit_mask)
291  {
292  const auto leader = ffs(hit_mask) - 1;
294 
295  return status_type::none();
296  }
297 
299  {
301  return status_type::key_not_found();
302  }
303  }
304 
307  }
308 
309  /*! \brief retrieve a set of keys from the hash table
310  * \tparam StatusHandler handles returned status per key (see \c status_handlers)
311  * \param[in] keys_in pointer to keys to retrieve from the hash table
312  * \param[in] num_in number of keys to retrieve
313  * \param[out] values_out retrieved values of keys in \c key_in
314  * \param[in] stream CUDA stream in which this operation is executed in
315  * \param[in] probing_length maximum number of probing attempts
316  * \param[out] status_out status information (per key)
317  */
318  template<class StatusHandler = defaults::status_handler_t>
320  void retrieve(
321  const key_type * const keys_in,
322  const index_type num_in,
323  value_type * const values_out,
324  const cudaStream_t stream = 0,
326  typename StatusHandler::base_type * const status_out = nullptr) const noexcept
327  {
328  static_assert(
330  "not a valid status handler type");
331 
332  if(!is_initialized_) return;
333 
337  }
338 
339  /*! \brief retrieve a set of keys from the hash table
340  * \tparam StatusHandler handles returned status per key (see \c status_handlers)
341  * \param[in] keys_in pointer to keys to retrieve from the hash table
342  * \param[in] num_in number of keys to retrieve
343  * \param[out] keys_out keys retrieved from the hash table
344  * \param[out] values_out retrieved values
345  * \param[out] num_out number of pairs retrieved
346  * \param[in] stream CUDA stream in which this operation is executed in
347  * \param[in] probing_length maximum number of probing attempts
348  * \param[out] status_out status information (per key)
349  */
350  template<class StatusHandler = defaults::status_handler_t>
352  void retrieve(
353  const key_type * const keys_in,
354  const index_type num_in,
355  key_type * const keys_out,
356  value_type * const values_out,
358  const cudaStream_t stream = 0,
360  typename StatusHandler::base_type * const status_out = nullptr) const noexcept
361  {
362  static_assert(
364  "not a valid status handler type");
365 
366  if(!is_initialized_) return;
367 
368  index_type * tmp = temp_.get();
369 
370  cudaMemsetAsync(tmp, 0, sizeof(index_type), stream);
371 
375 
377 
378  if(stream == 0)
379  {
381  }
382  }
383 
384  /*! \brief retrieves all elements from the hash table
385  * \param[out] keys_out location to store retrieved keys
386  * \param[out] values_out location to store corresponding retrieved values
387  * \param[out] num_out number of of key/value pairs retrieved
388  * \param[in] stream CUDA stream in which this operation is executed in
389  */
392  key_type * const keys_out,
393  value_type * const values_out,
395  const cudaStream_t stream = 0) const noexcept
396  {
397  if(!is_initialized_) return;
398 
399  index_type * tmp = temp_.get();
400 
401  cudaMemsetAsync(tmp, 0, sizeof(index_t), stream);
402 
403  for_each([=, *this] DEVICEQUALIFIER
404  (key_type key, const value_type& value)
405  {
406  const auto i = helpers::atomicAggInc(tmp);
407  keys_out[i] = key;
408  values_out[i] = value;
409  }, stream);
410 
412 
413  if(stream == 0)
414  {
416  }
417  }
418 
419  /*! \brief erases a key from the hash table
420  * \param[in] key_in key to erase from the hash table
421  * \param[in] group cooperative group
422  * \param[in] probing_length maximum number of probing attempts
423  * \return status (per thread)
424  */
427  const key_type key_in,
428  const cg::thread_block_tile<cg_size()>& group,
429  const index_type probing_length = defaults::probing_length()) noexcept
430  {
432 
433  if(!is_valid_key(key_in))
434  {
436  return status_type::invalid_key();
437  }
438 
440 
441  for(index_type i = iter.begin(key_in, seed_); i != iter.end(); i = iter.next())
442  {
443  Key table_key = table_[i].key;
444  const bool hit = (table_key == key_in);
445  const auto hit_mask = group.ballot(hit);
446 
447  if(hit_mask)
448  {
449  const auto leader = ffs(hit_mask) - 1;
450 
451  if(group.thread_rank() == leader)
452  {
453  table_[i].key = tombstone_key();
454  }
455 
456  return status_type::none();
457  }
458 
460  {
461  //return status_type::none();
462  return status_type::key_not_found();
463  }
464  }
465 
466  //return status_type::none();
468  }
469 
470  /*! \brief erases a set of keys from the hash table
471  * \tparam StatusHandler handles returned per key (see \c status_handlers )
472  * \param[in] keys_in pointer to keys to erase from the hash table
473  * \param[in] num_in number of keys to erase
474  * \param[in] probing_length maximum number of probing attempts
475  * \param[in] stream CUDA stream in which this operation is executed in
476  * \param[out] status_out status information (per key)
477  */
478  template<class StatusHandler = defaults::status_handler_t>
480  void erase(
481  const key_type * const keys_in,
482  const index_type num_in,
483  const cudaStream_t stream = 0,
485  typename StatusHandler::base_type * const status_out = nullptr) noexcept
486  {
487  static_assert(
489  "not a valid status handler type");
490 
491  if(!is_initialized_) return;
492 
496  }
497 
498  /*! \brief applies a funtion over all key value pairs inside the table
499  * \tparam Func type of map i.e. CUDA device lambda
500  * \param[in] f map to apply
501  * \param[in] stream CUDA stream in which this operation is executed in
502  * \param[in] size of shared memory to reserve for this execution
503  */
504  template<class Func>
506  void for_each(
507  Func f, // TODO const?
508  const cudaStream_t stream = 0,
509  const index_type smem_bytes = 0) const noexcept
510  {
511  if(!is_initialized_) return;
512 
515  ([=, *this] DEVICEQUALIFIER
516  {
518 
519  if(tid < capacity())
520  {
521  auto&& pair = table_[tid];
522  if(is_valid_key(pair.key))
523  {
524  f(pair.key, pair.value);
525  }
526  }
527  });
528  }
529 
530  /*! \brief number of key/value pairs stored inside the hash table
531  * \param[in] stream CUDA stream in which this operation is executed in
532  * \return the number of key/value pairs inside the hash table
533  */
535  index_type size(const cudaStream_t stream = 0) const noexcept
536  {
537  if(!is_initialized_) return 0;
538 
539  index_type out;
540  index_type * tmp = temp_.get();
541 
542  cudaMemsetAsync(tmp, 0, sizeof(index_t), stream);
543 
546  ([=, *this] DEVICEQUALIFIER
547  {
549 
551  const auto block = cg::this_thread_block();
552 
553  if(tid >= capacity()) return;
554 
555  const bool empty = !is_valid_key(table_[tid].key);
556 
557  if(block.thread_rank() == 0)
558  {
559  smem = 0;
560  }
561 
562  block.sync();
563 
564  if(!empty)
565  {
566  const auto active_threads = cg::coalesced_threads();
567 
568  if(active_threads.thread_rank() == 0)
569  {
571  }
572  }
573 
574  block.sync();
575 
576  if(block.thread_rank() == 0 && smem != 0)
577  {
578  atomicAdd(tmp, smem);
579  }
580  });
581 
583  &out,
584  tmp,
585  sizeof(index_type),
586  D2H,
587  stream);
588 
590 
591  return out;
592  }
593 
594  /*! \brief current load factor of the hash table
595  * \param stream CUDA stream in which this operation is executed in
596  * \return load factor
597  */
599  float load_factor(const cudaStream_t stream = 0) const noexcept
600  {
601  return float(size(stream)) / float(capacity());
602  }
603 
604  /*! \brief current storage density of the hash table
605  * \param stream CUDA stream in which this operation is executed in
606  * \return storage density
607  */
609  float storage_density(const cudaStream_t stream = 0) const noexcept
610  {
611  return load_factor(stream);
612  }
613 
614  /*! \brief get the capacity of the hash table
615  * \return number of slots in the hash table
616  */
618  index_type capacity() const noexcept
619  {
620  return table_.capacity();
621  }
622 
623  /*! \brief get the total number of bytes occupied by this data structure
624  * \return bytes
625  */
627  index_type bytes_total() const noexcept
628  {
629  return table_.bytes_total() + temp_.bytes_total() + sizeof(status_type);
630  }
631 
632  /*! \brief indicates if the hash table is properly initialized
633  * \return \c true iff the hash table is properly initialized
634  */
636  bool is_initialized() const noexcept
637  {
638  return is_initialized_;
639  }
640 
641  /*! \brief get the status of the hash table
642  * \param stream CUDA stream in which this operation is executed in
643  * \return the status
644  */
646  status_type peek_status(const cudaStream_t stream = 0) const noexcept
647  {
649 
650  if(status_ != nullptr)
651  {
653  &status,
654  status_,
655  sizeof(status_type),
656  D2H,
657  stream);
658 
660  }
661 
662  return status;
663  }
664 
665  /*! \brief get and reset the status of the hash table
666  * \param[in] stream CUDA stream in which this operation is executed in
667  * \return the status
668  */
671  {
673 
674  if(status_ != nullptr)
675  {
677  &status,
678  status_,
679  sizeof(status_type),
680  D2H,
681  stream);
682 
684  }
685 
686  return status;
687  }
688 
689  /*! \brief checks if \c key is equal to \c EmptyKey
690  * \return \c bool
691  */
693  static constexpr bool is_empty_key(const key_type key) noexcept
694  {
695  return (key == empty_key());
696  }
697 
698  /*! \brief checks if \c key is equal to \c TombstoneKey
699  * \return \c bool
700  */
702  static constexpr bool is_tombstone_key(const key_type key) noexcept
703  {
704  return (key == tombstone_key());
705  }
706 
707  /*! \brief checks if \c key is equal to \c (EmptyKey||TombstoneKey)
708  * \return \c bool
709  */
711  static constexpr bool is_valid_key(const key_type key) noexcept
712  {
713  return (key != empty_key() && key != tombstone_key());
714  }
715 
716  /*! \brief get random seed
717  * \return seed
718  */
720  key_type seed() const noexcept
721  {
722  return seed_;
723  }
724 
725  /*! \brief indicates if this object is a shallow copy
726  * \return \c bool
727  */
729  bool is_copy() const noexcept
730  {
731  return is_copy_;
732  }
733 
734 private:
735  /*! \brief internal insert implementation
736  * \param[in] key_in key to insert into the hash table
737  * \param[out] status_out status returned by this operation
738  * \param[in] group cooperative group
739  * \param[in] probing_length maximum number of probing attempts
740  * \return pointer to the corresponding value slot
741  */
744  const key_type key_in,
746  const cg::thread_block_tile<cg_size()>& group,
747  const index_type probing_length) noexcept
748  {
749  if(!is_initialized_)
750  {
752  return nullptr;
753  }
754 
755  if(!is_valid_key(key_in))
756  {
759  return nullptr;
760  }
761 
763 
764  for(index_type i = iter.begin(key_in, seed_); i != iter.end(); i = iter.next())
765  {
767  const bool hit = (table_key == key_in);
768  const auto hit_mask = group.ballot(hit);
769 
770  if(hit_mask)
771  {
774 
775  const auto leader = ffs(hit_mask) - 1;
776  const auto leader_index = group.shfl(i, leader);
777  return &(table_[leader_index].value);
778  }
779 
780  // !not_is_valid_key?
782 
783  bool success = false;
784  bool duplicate = false;
785 
786  while(empty_mask)
787  {
788  const auto leader = ffs(empty_mask) - 1;
789 
790  if(group.thread_rank() == leader)
791  {
792  const auto old =
794 
795  success = (old == table_key);
796  duplicate = (old == key_in);
797  }
798 
799  if(group.any(duplicate))
800  {
803 
804  const auto leader_index = group.shfl(i, leader);
805  return &(table_[leader_index].value);
806  }
807 
808  if(group.any(success))
809  {
811  const auto leader_index = group.shfl(i, leader);
812 
813  return &(table_[leader_index].value);
814  }
815 
816  empty_mask ^= 1UL << leader;
817  }
818  }
819 
822  return nullptr;
823  }
824 
825  /*! \brief assigns the hash table's status
826  * \param[in] status new status
827  * \param[in] stream CUDA stream in which this operation is executed in
828  */
830  void assign_status(
831  const status_type status,
832  const cudaStream_t stream = 0) const noexcept
833  {
834  if(status_ != nullptr)
835  {
837  status_,
838  &status,
839  sizeof(status_type),
840  H2D,
841  stream);
842 
844  }
845  }
846 
847  /*! \brief joins additional flags to the hash table's status
848  * \param[in] status new status
849  * \param[in] stream CUDA stream in which this operation is executed in
850  */
852  void join_status(
853  const status_type status,
854  const cudaStream_t stream = 0) const noexcept
855  {
856  if(status_ != nullptr)
857  {
859 
861  status_,
862  &joined,
863  sizeof(status_type),
864  H2D,
865  stream);
866 
868  }
869  }
870 
871  /*! \brief joins additional flags to the hash table's status
872  * \info \c const on purpose
873  * \param[in] status new status
874  */
876  void device_join_status(const status_type status) const noexcept
877  {
878  if(status_ != nullptr)
879  {
881  }
882  }
883 
884  status_type * status_; //< pointer to status
885  TableStorage table_; //< actual key/value storage
886  temp_type temp_; //< temporary memory
887  key_type seed_; //< random seed
888  bool is_initialized_; //< indicates if table is properly initialized
889  bool is_copy_; //< indicates if table is a shallow copy
890 
891  // friend declarations
892  template<
893  class Key_,
894  class Value_,
895  Key_ EmptyKey_,
897  class ProbingScheme_,
898  class TableStorage_,
900  friend class CountingHashTable;
901 
902  template<
903  class Key_,
904  class Value_,
905  Key_ EmptyKey_,
907  class ValueStore_,
908  class ProbingScheme_>
909  friend class BucketListHashTable;
910 
911 
912 }; // class SingleValueHashTable
913 
914 } // namespace warpcore
915 
916 #endif /* WARPCORE_SINGLE_VALUE_HASH_TABLE_CUH */