warpcore 0.0.1
Hashing at the Speed of Light on modern CUDA-accelerators
storage.cuh
Go to the documentation of this file.
1 #ifndef WARPCORE_STORAGE_CUH
2 #define WARPCORE_STORAGE_CUH
3 
4 namespace warpcore
5 {
6 
7 /*! \brief storage classes
8  */
9 namespace storage
10 {
11 
12 /*! \brief thread-safe device-sided ring buffer without any overflow checks
13  * \tparam T base type
14  */
15 template<class T>
17 {
18 public:
19  using base_type = T;
22 
23  /*! \brief constructor
24  * \param[in] capacity buffer capacity
25  */
26  HOSTQUALIFIER INLINEQUALIFIER
27  explicit CyclicStore(const index_type capacity) noexcept :
28  store_(nullptr),
30  current_(nullptr),
32  is_copy_(false)
33  {
34  if(capacity != 0)
35  {
36  const auto total_bytes = sizeof(T) * capacity_;
37 
39  {
40  cudaMalloc(&store_, sizeof(T) * capacity_);
41  current_ = new index_type(0);
43  }
44  else
45  {
47  }
48  }
49  else
50  {
52  }
53  }
54 
55  /*! \brief copy-constructor (shallow)
56  * \param[in] object to be copied
57  */
59  CyclicStore(const CyclicStore& o) noexcept :
60  store_(o.store_),
63  status_(o.status_),
64  is_copy_(true)
65  {}
66 
67  /*! \brief move-constructor
68  * \param[in] object to be moved
69  */
71  CyclicStore(CyclicStore&& o) noexcept :
72  store_(std::move(o.store_)),
77  {
78  o.is_copy_ = true;
79  }
80 
81  #ifndef __CUDA_ARCH__
82  /*! \brief destructor
83  */
85  ~CyclicStore() noexcept
86  {
87  if(!is_copy_)
88  {
89  if(store_ != nullptr) cudaFree(store_);
90  delete current_;
91  }
92  }
93  #endif
94 
95  /*! \brief atomically fetches the next slot in the buffer
96  * \return pointer to the next slot in the buffer
97  * \info \c const on purpose
98  */
100  T * get() const noexcept
101  {
102  index_type old;
103  index_type val;
104 
105  do
106  {
107  old = *current_;
108  val = (old == capacity_ - 1) ? 0 : old + 1;
110 
111  return store_ + old;
112  }
113 
114  /*! \brief get buffer status
115  * \return status
116  */
118  status_type status() const noexcept
119  {
120  return status_;
121  }
122 
123  /*! \brief get buffer capacity
124  * \return capacity
125  */
127  index_type capacity() const noexcept
128  {
129  return capacity_;
130  }
131 
132  /*! \brief get the total number of bytes occupied by this data structure
133  * \return bytes
134  */
136  index_type bytes_total() const noexcept
137  {
138  return capacity_ * sizeof(base_type) + sizeof(index_type);
139  }
140 
141 private:
142  base_type * store_; //< actual buffer
143  const index_type capacity_; //< buffer capacity
144  index_type * current_; //< current active buffer slot
145  status_type status_; //< buffer status
146  bool is_copy_;
147 
148 }; // class CyclicStore
149 
150 /*! \brief key/value storage classes
151  */
152 namespace key_value
153 {
154 
155 // forward-declaration of friends
156 template<class Key, class Value>
157 class SoAStore;
158 
159 template<class Key, class Value>
160 class AoSStore;
161 
162 namespace detail
163 {
164 
165 template<class Key, class Value>
166 class pair_t
167 {
168 public:
169  Key key;
170  Value value;
171 
173  constexpr pair_t(const pair_t& pair) noexcept = delete;
174 
175 private:
177  constexpr pair_t(const Key& key_, const Value& value_) noexcept :
178  key(key_), value(value_)
179  {}
180 
182  constexpr pair_t() noexcept : key(), value()
183  {}
184 
185  friend AoSStore<Key, Value>;
186  friend SoAStore<Key, Value>;
187 };
188 
189 template<class Key, class Value>
191 {
192 public:
193  Key& key;
194  Value& value;
195 
196 private:
198  constexpr pair_ref_t(Key& key_, Value& value_) noexcept :
199  key(key_), value(value_)
200  {}
201 
202  using NKey = std::remove_const_t<Key>;
203  using NValue = std::remove_const_t<Value>;
204 
205  friend AoSStore<NKey, NValue>;
206  friend SoAStore<NKey, NValue>;
207 };
208 
209 template<class Key, class Value>
210 using pair_const_ref_t = pair_ref_t<const Key, const Value>;
211 
212 } // namespace detail
213 
214 /*! \brief key/value store with struct-of-arrays memory layout
215  * \tparam Key key type
216  * \tparam Value value type
217  */
218 template<class Key, class Value>
219 class SoAStore
220 {
221 public:
222  using key_type = Key;
223  using value_type = Value;
224  using status_type = Status;
225  using index_type = index_t;
226  using tag = tags::key_value_storage;
227 
228  /*! \brief constructor
229  * \param[in] capacity number of key/value slots
230  */
231  HOSTQUALIFIER INLINEQUALIFIER
232  explicit SoAStore(const index_type capacity) noexcept :
233  status_(Status::not_initialized()),
234  capacity_(capacity),
235  keys_(nullptr),
236  values_(nullptr),
237  is_copy_(false)
238  {
239  if(capacity != 0)
240  {
241  const auto total_bytes = (((sizeof(key_type) + sizeof(value_type)) *
242  capacity) + sizeof(status_type));
243 
244  if(helpers::available_gpu_memory() >= total_bytes)
245  {
246  cudaMalloc(&keys_, sizeof(key_type)*capacity);
247  cudaMalloc(&values_, sizeof(value_type)*capacity);
248 
249  status_ = status_type::none();
250  }
251  else
252  {
253  status_ += status_type::out_of_memory();
254  }
255  }
256  else
257  {
258  status_ += status_type::invalid_configuration();
259  }
260  }
261 
262  /*! \brief copy-constructor (shallow)
263  * \param[in] object to be copied
264  */
265  HOSTDEVICEQUALIFIER INLINEQUALIFIER
266  SoAStore(const SoAStore& o) noexcept :
267  status_(o.status_),
268  capacity_(o.capacity_),
269  keys_(o.keys_),
270  values_(o.values_),
271  is_copy_(true)
272  {}
273 
274  /*! \brief move-constructor
275  * \param[in] object to be moved
276  */
277  HOSTQUALIFIER INLINEQUALIFIER
278  SoAStore(SoAStore&& o) noexcept :
279  status_(std::move(o.status_)),
280  capacity_(std::move(o.capacity_)),
281  keys_(std::move(o.keys_)),
282  values_(std::move(o.values_)),
283  is_copy_(std::move(o.is_copy_))
284  {
285  o.is_copy_ = true;
286  }
287 
288  #ifndef __CUDA_ARCH__
289  /*! \brief destructor
290  */
291  HOSTQUALIFIER INLINEQUALIFIER
292  ~SoAStore() noexcept
293  {
294  if(!is_copy_)
295  {
296  if(keys_ != nullptr) cudaFree(keys_);
297  if(values_ != nullptr) cudaFree(values_);
298  }
299  }
300  #endif
301 
302  /*! \brief initialize keys
303  * \param[in] key initializer key
304  * \param[in] stream CUDA stream in which this operation is executed in
305  */
306  HOSTQUALIFIER INLINEQUALIFIER
307  void init_keys(const key_type key, const cudaStream_t stream = 0) noexcept
308  {
309  if(!status_.has_any())
310  {
311  helpers::lambda_kernel
312  <<<SDIV(capacity_, WARPCORE_BLOCKSIZE), WARPCORE_BLOCKSIZE, 0, stream>>>
313  ([=, *this] DEVICEQUALIFIER
314  {
315  const index_type tid = helpers::global_thread_id();
316 
317  if(tid < capacity_)
318  {
319  keys_[tid] = key;
320  }
321  });
322  }
323  }
324 
325  /*! \brief initialize values
326  * \param[in] value initializer value
327  * \param[in] stream CUDA stream in which this operation is executed in
328  */
329  HOSTQUALIFIER INLINEQUALIFIER
330  void init_values(const value_type value, const cudaStream_t stream = 0) noexcept
331  {
332  if(!status_.has_any())
333  {
334  helpers::lambda_kernel
335  <<<SDIV(capacity_, WARPCORE_BLOCKSIZE), WARPCORE_BLOCKSIZE, 0, stream>>>
336  ([=, *this] DEVICEQUALIFIER
337  {
338  const index_type tid = helpers::global_thread_id();
339 
340  if(tid < capacity_)
341  {
342  values_[tid] = value;
343  }
344  });
345  }
346  }
347 
348  /*! \brief initialize key/value pairs
349  * \param[in] key initializer key
350  * \param[in] value initializer value
351  * \param[in] stream CUDA stream in which this operation is executed in
352  */
353  HOSTQUALIFIER INLINEQUALIFIER
354  void init_pairs(
355  const key_type key,
356  const value_type value,
357  const cudaStream_t stream = 0) noexcept
358  {
359  if(!status_.has_any())
360  {
361  helpers::lambda_kernel
362  <<<SDIV(capacity_, WARPCORE_BLOCKSIZE), WARPCORE_BLOCKSIZE, 0, stream>>>
363  ([=, *this] DEVICEQUALIFIER
364  {
365  const index_type tid = helpers::global_thread_id();
366 
367  if(tid < capacity_)
368  {
369  keys_[tid] = key;
370  values_[tid] = value;
371  }
372  });
373  }
374  }
375 
376  /*! \brief accessor
377  * \param[in] i index to access
378  * \return pair at position \c i
379  */
380  DEVICEQUALIFIER INLINEQUALIFIER
381  detail::pair_ref_t<key_type, value_type> operator[](index_type i) noexcept
382  {
383  assert(i < capacity_);
384  return detail::pair_ref_t<key_type, value_type>{keys_[i], values_[i]};
385  }
386 
387  /*! \brief const accessor
388  * \param[in] i index to access
389  * \return pair at position \c i
390  */
391  DEVICEQUALIFIER INLINEQUALIFIER
392  detail::pair_const_ref_t<key_type, value_type> operator[](
393  const index_type i) const noexcept
394  {
395  return detail::pair_const_ref_t<key_type, value_type>{keys_[i], values_[i]};
396  }
397 
398  /*! \brief get storage status
399  * \return status
400  */
401  HOSTDEVICEQUALIFIER INLINEQUALIFIER
402  status_type status() const noexcept
403  {
404  return status_;
405  }
406 
407  /*! \brief get storage capacity
408  * \return capacity
409  */
410  HOSTDEVICEQUALIFIER INLINEQUALIFIER
411  index_type capacity() const noexcept
412  {
413  return capacity_;
414  }
415 
416  /*! \brief get the total number of bytes occupied by this data structure
417  * \return bytes
418  */
419  HOSTQUALIFIER INLINEQUALIFIER
420  index_type bytes_total() const noexcept
421  {
422  return capacity_ * (sizeof(key_type) + sizeof(value_type));
423  }
424 
425 private:
426  status_type status_; //< storage status
427  const index_type capacity_; //< storage capacity
428  key_type * keys_; //< actual key storage in SoA format
429  value_type * values_; //< actual value storage in SoA format
430  bool is_copy_; //< indicates if this object is a shallow copy
431 
432 }; // class SoAStore
433 
434 /*! \brief key/value store with array-of-structs memory layout
435  * \tparam Key key type
436  * \tparam Value value type
437  */
438 template<class Key, class Value>
439 class AoSStore
440 {
441  using pair_t = detail::pair_t<Key, Value>;
442 
443 public:
444  using key_type = Key;
445  using value_type = Value;
446  using status_type = Status;
447  using index_type = index_t;
448  using tag = tags::key_value_storage;
449 
450  /*! \brief constructor
451  * \param[in] capacity number of key/value slots
452  */
453  HOSTQUALIFIER INLINEQUALIFIER
454  explicit AoSStore(const index_type capacity) noexcept :
455  status_(status_type::not_initialized()),
456  capacity_(capacity),
457  pairs_(nullptr),
458  is_copy_(false)
459  {
460  if(capacity != 0)
461  {
462  const auto total_bytes = sizeof(pair_t) * capacity;
463 
464  if(helpers::available_gpu_memory() >= total_bytes)
465  {
466  cudaMalloc(&pairs_, sizeof(pair_t) * capacity);
467 
468  status_ = status_type::none();
469  }
470  else
471  {
472  status_ += status_type::out_of_memory();
473  }
474  }
475  else
476  {
477  status_ += status_type::invalid_configuration();
478  }
479  }
480 
481  /*! \brief copy-constructor (shallow)
482  * \param[in] object to be copied
483  */
484  HOSTDEVICEQUALIFIER INLINEQUALIFIER
485  AoSStore(const AoSStore& o) noexcept :
486  status_(o.status_),
487  capacity_(o.capacity_),
488  pairs_(o.pairs_),
489  is_copy_(true)
490  {}
491 
492  /*! \brief move-constructor
493  * \param[in] object to be moved
494  */
495  HOSTQUALIFIER INLINEQUALIFIER
496  AoSStore(AoSStore&& o) noexcept :
497  status_(std::move(o.status_)),
498  capacity_(std::move(o.capacity_)),
499  pairs_(std::move(o.pairs_)),
500  is_copy_(std::move(o.is_copy_))
501  {
502  o.is_copy_ = true;
503  }
504 
505  #ifndef __CUDA_ARCH__
506  /*! \brief destructor
507  */
508  HOSTQUALIFIER INLINEQUALIFIER
509  ~AoSStore() noexcept
510  {
511  if(!is_copy_)
512  {
513  if(pairs_ != nullptr) cudaFree(pairs_);
514  }
515  }
516  #endif
517 
518  /*! \brief initialize keys
519  * \param[in] key initializer key
520  * \param[in] stream CUDA stream in which this operation is executed in
521  */
522  HOSTQUALIFIER INLINEQUALIFIER
523  void init_keys(const key_type key, const cudaStream_t stream = 0) noexcept
524  {
525  if(!status_.has_any())
526  {
527  helpers::lambda_kernel
528  <<<SDIV(capacity_, WARPCORE_BLOCKSIZE), WARPCORE_BLOCKSIZE, 0, stream>>>
529  ([=, *this] DEVICEQUALIFIER
530  {
531  const index_type tid = helpers::global_thread_id();
532 
533  if(tid < capacity_)
534  {
535  pairs_[tid].key = key;
536  }
537  });
538  }
539  }
540 
541  /*! \brief initialize values
542  * \param[in] value initializer value
543  * \param[in] stream CUDA stream in which this operation is executed in
544  */
545  HOSTQUALIFIER INLINEQUALIFIER
546  void init_values(const value_type value, const cudaStream_t stream = 0) noexcept
547  {
548  if(!status_.has_any())
549  {
550  helpers::lambda_kernel
551  <<<SDIV(capacity_, WARPCORE_BLOCKSIZE), WARPCORE_BLOCKSIZE, 0, stream>>>
552  ([=, *this] DEVICEQUALIFIER
553  {
554  const index_type tid = helpers::global_thread_id();
555 
556  if(tid < capacity_)
557  {
558  pairs_[tid].value = value;
559  }
560  });
561  }
562  }
563 
564  /*! \brief initialize key/value pairs
565  * \param[in] key initializer key
566  * \param[in] value initializer value
567  * \param[in] stream CUDA stream in which this operation is executed in
568  */
569  HOSTQUALIFIER INLINEQUALIFIER
570  void init_pairs(
571  const key_type key,
572  const value_type value,
573  const cudaStream_t stream = 0) noexcept
574  {
575  if(!status_.has_any())
576  {
577  helpers::lambda_kernel
578  <<<SDIV(capacity_, WARPCORE_BLOCKSIZE), WARPCORE_BLOCKSIZE, 0, stream>>>
579  ([=, *this] DEVICEQUALIFIER
580  {
581  const index_type tid = helpers::global_thread_id();
582 
583  if(tid < capacity_)
584  {
585  pairs_[tid].key = key;
586  pairs_[tid].value = value;
587  }
588  });
589  }
590  }
591 
592  /*! \brief accessor
593  * \param[in] i index to access
594  * \return pair at position \c i
595  */
596  DEVICEQUALIFIER INLINEQUALIFIER
597  pair_t& operator[](const index_type i) noexcept
598  {
599  return pairs_[i];
600  }
601 
602  /*! \brief const accessor
603  * \param[in] i index to access
604  * \return pair at position \c i
605  */
606  DEVICEQUALIFIER INLINEQUALIFIER
607  const pair_t& operator[](const index_type i) const noexcept
608  {
609  return pairs_[i];
610  }
611 
612  /*! \brief get storage status
613  * \return status
614  */
615  HOSTDEVICEQUALIFIER INLINEQUALIFIER
616  status_type status() const noexcept
617  {
618  return status_;
619  }
620 
621  /*! \brief get storage capacity
622  * \return status
623  */
624  HOSTDEVICEQUALIFIER INLINEQUALIFIER
625  index_type capacity() const noexcept
626  {
627  return capacity_;
628  }
629 
630  /*! \brief get the total number of bytes occupied by this data structure
631  * \return bytes
632  */
633  HOSTQUALIFIER INLINEQUALIFIER
634  index_type bytes_total() const noexcept
635  {
636  return capacity_ * sizeof(pair_t);
637  }
638 
639 private:
640  status_type status_; //< storage status
641  const index_type capacity_; //< storage capacity
642  pair_t * pairs_; //< actual pair storage in AoS format
643  bool is_copy_; //< indicates if this object is a shallow copy
644 
645 }; // class AoSStore
646 
647 } // namespace key_value
648 
649 /*! \brief multi-value storage classes
650  */
651 namespace multi_value
652 {
653 
654 namespace detail
655 {
656  enum class LinkedListState
657  {
658  uninitialized = 0,
659  initialized = 1,
660  blocking = 2,
661  full = 3
662  };
663 
664 
665  template<class Store>
666  union Bucket
667  {
668  private:
669  using value_type = typename Store::value_type;
670  using info_type =
672 
673  value_type value_;
674  info_type info_;
675 
676  DEVICEQUALIFIER INLINEQUALIFIER
677  constexpr explicit Bucket(
678  const info_type info) noexcept : info_{info}
679  {}
680 
682  constexpr explicit Bucket(
683  const value_type value) noexcept : value_{value}
684  {}
685 
686  public:
687  // FIXME friend
689  constexpr explicit Bucket(
690  const index_t previous,
691  const index_t bucket_size) noexcept : info_{previous, bucket_size}
692  {}
693 
695  constexpr explicit Bucket() noexcept :
696  info_()
697  {};
698 
701  {
702  return Bucket<Store>(atomicExch(&info_, bucket.info_));
703  }
704 
706  constexpr value_type value() const noexcept
707  {
708  return value_;
709  }
710 
712  constexpr index_t previous() const noexcept
713  {
714  return info_.first();
715  }
716 
718  constexpr index_t bucket_size() const noexcept
719  {
720  return info_.second();
721  }
722 
724  constexpr void value(const value_type& val) noexcept
725  {
726  value_ = val;
727  }
728 
730  constexpr void previous(const index_t prev) noexcept
731  {
732  info_.first(prev);
733  }
734 
736  constexpr void bucket_size(const index_t size) noexcept
737  {
738  info_.second(size);
739  }
740  };
741 
742  template<class Store>
744  {
746  2,
750 
751  HOSTDEVICEQUALIFIER INLINEQUALIFIER
752  constexpr explicit BucketListHandle(
754  index_t index,
756  index_t offset) noexcept : pack_()
757  {
758  pack_.first(state);
759  pack_.second(index);
762  };
763 
765  constexpr explicit BucketListHandle(const packed_type pack) noexcept :
766  pack_(pack)
767  {};
768 
769  public:
771  constexpr explicit BucketListHandle() noexcept :
772  pack_()
773  {};
774 
775  private:
777  constexpr LinkedListState linked_list_state() const noexcept
778  {
779  return pack_.template first_as<LinkedListState>();
780  }
781 
783  constexpr index_t bucket_index() const noexcept
784  {
785  return pack_.second();
786  }
787 
788  public:
790  constexpr index_t value_count() const noexcept
791  {
792  return pack_.third();
793  }
794 
795  private:
797  constexpr index_t num_values_tail() const noexcept
798  {
799  return pack_.fourth();
800  }
801 
802  public:
804  static constexpr index_t max_bucket_index() noexcept
805  {
806  return (index_t{1} << Store::bucket_index_bits()) - 1;
807  }
808 
810  static constexpr index_t max_value_count() noexcept
811  {
812  return (index_t{1} << Store::value_counter_bits()) - 1;
813  }
814 
816  static constexpr index_t max_bucket_size() noexcept
817  {
818  return (index_t{1} << Store::bucket_size_bits()) - 1;
819  }
820 
821  private:
823  constexpr bool is_uninitialized() const noexcept
824  {
826  }
827 
829  constexpr bool is_initialized() const noexcept
830  {
832  }
833 
835  constexpr bool is_blocking() const noexcept
836  {
838  }
839 
841  constexpr bool is_full() const noexcept
842  {
843  return (linked_list_state() == LinkedListState::full);
844  }
845 
847  constexpr bool operator==(
848  const BucketListHandle<Store> other) const noexcept
849  {
850  return pack_ == other.pack_;
851  }
852 
854  constexpr bool operator!=(
855  const BucketListHandle<Store> other) const noexcept
856  {
857  return !(*this == other);
858  }
859 
861 
866  const BucketListHandle<Store> val_) noexcept
867  {
868  return BucketListHandle(
870  }
871 
875  const BucketListHandle<Store> val_) noexcept
876  {
877  return BucketListHandle(
879  }
880 
881  friend Store;
882  };
883 } // namespace detail
884 
885 /*! \brief value store consisting of growing linked buckets of values
886  * \tparam Value type to store
887  * \tparam BucketIndexBits number of bits used to enumerate bucket IDs
888  * \tparam ValueCounterBits number of bits used to count values in a bucket list
889  * \tparam bucketSizeBits number of bits used to hold the value capacity of a bucket
890  */
891 template<
892  class Value,
893  index_t BucketIndexBits = 32,
894  index_t ValueCounterBits = 20,
895  index_t BucketSizeBits = 10>
897 {
898 private:
899  static_assert(
901  "Value type must be trivially copyable.");
902 
903  static_assert(
904  (BucketIndexBits + ValueCounterBits + BucketSizeBits + 2 <= 64),
905  "Too many bits for bucket index and value counter and bucket size.");
906 
907  using type = BucketListStore<
908  Value,
909  BucketIndexBits,
910  ValueCounterBits,
911  BucketSizeBits>;
912 
913  friend detail::BucketListHandle<type>;
914 
915 public:
916  using value_type = Value;
917  using handle_type = detail::BucketListHandle<type>;
920  using bucket_type = detail::Bucket<type>;
922 
923  /*! \brief get number of bits used to enumerate buckets
924  * \return number of bits
925  */
926  HOSTDEVICEQUALIFIER INLINEQUALIFIER
927  static constexpr index_type bucket_index_bits() noexcept
928  {
929  return BucketIndexBits;
930  };
931 
932  /*! \brief get number of bits used to count values in a bucket list
933  * \return number of bits
934  */
936  static constexpr index_type value_counter_bits() noexcept
937  {
938  return ValueCounterBits;
939  };
940 
941  /*! \brief get number of bits used to hold the value capacity of a bucket
942  * \return number of bits
943  */
945  static constexpr index_type bucket_size_bits() noexcept
946  {
947  return BucketSizeBits;
948  };
949 
950 private:
951  friend bucket_type;
952 
953  /*! \brief head bucket identifier
954  * \return identifier
955  */
957  static constexpr index_type head() noexcept
958  {
959  return handle_type::max_bucket_index();
960  }
961 
962 public:
963  /*! \brief get uninitialized handle
964  * \return handle
965  */
967  static constexpr handle_type uninitialized_handle() noexcept
968  {
970  }
971 
972  /*! \brief get number of values in bucket list
973  * \return value count
974  */
976  static constexpr index_type size(const handle_type& handle) noexcept
977  {
978  return handle.value_count();
979  }
980 
981 
982  /*! \brief constructor
983  * \param[in] max_capacity maximum number of value slots
984  * \param[in] bucket_grow_factor factor which determines the growth of each newly allocated bucket
985  * \param[in] min_bucket_size value capacity of the first bucket of a bucket list
986  * \param[in] max_bucket_size value capacity after which no more growth is allowed for newly allocated buckets
987  */
989  explicit BucketListStore(
990  const index_type max_capacity,
991  const float bucket_grow_factor = 1.1,
992  const index_type min_bucket_size = 1,
999  next_free_bucket_(nullptr),
1000  buckets_(nullptr),
1001  is_copy_(false)
1002  {
1004  bucket_grow_factor_ >= 1.0 &&
1005  min_bucket_size_ >= 1 &&
1008  {
1009  const auto total_bytes =
1010  sizeof(bucket_type) * capacity_ + sizeof(index_type);
1011 
1013  {
1016 
1017  status_ = status_type::none();
1018  init();
1019  }
1020  else
1021  {
1023  }
1024  }
1025  else
1026  {
1028  }
1029  }
1030 
1031  /*! \brief copy-constructor (shallow)
1032  * \param[in] object to be copied
1033  */
1035  BucketListStore(const BucketListStore& o) noexcept :
1036  status_(o.status_),
1041  buckets_(o.buckets_),
1043  is_copy_(true)
1044  {}
1045 
1046  /*! \brief move-constructor
1047  * \param[in] object to be moved
1048  */
1051  status_(std::move(o.status_)),
1056  buckets_(std::move(o.buckets_)),
1059  {
1060  o.is_copy_ = true;
1061  }
1062 
1063  #ifndef __CUDA_ARCH__
1064  /*! \brief destructor
1065  */
1067  ~BucketListStore() noexcept
1068  {
1069  if(!is_copy_)
1070  {
1071  if(buckets_ != nullptr) cudaFree(buckets_);
1073  }
1074  }
1075  #endif
1076 
1077  /*! \brief (re)initialize storage
1078  * \param[in] stream CUDA stream in which this operation is executed in
1079  */
1081  void init(const cudaStream_t stream = 0) noexcept
1082  {
1084  {
1087  ([=, *this] DEVICEQUALIFIER () mutable
1088  {
1090 
1091  if(tid < capacity_)
1092  {
1093  if(tid == 0)
1094  {
1095  *next_free_bucket_ = 0;
1096  }
1097 
1098  buckets_[tid].previous(head());
1100  }
1101  });
1102 
1103  status_ = status_type::none();
1104  }
1105  }
1106 
1107  /*! \brief append a value to a bucket list
1108  * \param[in] handle handle to the bucket list
1109  * \param[in] value value to be inserted
1110  * \return status
1111  */
1115  const value_type& value,
1116  index_type max_values_per_key) noexcept
1117  {
1119 
1121  {
1122  // block handle
1123  const auto old_handle = atomicCAS(
1124  &handle,
1126  handle_type{
1128  head(),
1129  0,
1130  0});
1131 
1132  // winner allocates first bucket
1133  if(old_handle == current_handle)
1134  {
1135  const index_type alloc =
1137 
1139  {
1141 
1142  // successfully allocated initial bucket
1143  atomicExch(
1144  &handle,
1145  handle_type{
1147  alloc,
1148  1,
1149  1});
1150 
1151  return Status::none();
1152  }
1153 
1154  // mark as full
1155  atomicExch(
1156  &handle,
1157  handle_type{
1159  head(),
1160  0,
1161  0});
1162 
1163  return status_type::out_of_memory();
1164  }
1165  }
1166 
1167  // try to find a slot until there is no more space
1168  while(true)
1169  {
1171 
1173  {
1174  //__nanosleep(1000); // why not?
1175  continue;
1176  }
1177 
1178  if(current_handle.is_full())
1179  {
1180  return status_type::out_of_memory();
1181  }
1182 
1184  {
1186  }
1187 
1188  const auto current_bucket = cub::ThreadLoad<cub::LOAD_VOLATILE>(
1190 
1191  const auto current_bucket_size =
1194 
1195  // if the bucket is already full allocate a new bucket
1197  {
1198  const auto old_handle = atomicCAS(
1199  &handle,
1201  handle_type{
1206 
1207  // blocking failed -> reload handle
1208  if(old_handle != current_handle)
1209  {
1210  continue;
1211  }
1212 
1213  // compute new bucket size
1214  const index_type new_bucket_size = min(
1215  float(max_bucket_size_),
1217 
1218  // get index of next free bucket in pool
1219  const index_type alloc =
1221 
1222  if(alloc + new_bucket_size + 1 <= capacity_)
1223  {
1224  buckets_[alloc + 1].value(value);
1225 
1226  const auto old = buckets_[alloc].atomic_exchange_info(
1228  new_bucket_size});
1229 
1230  if(old.bucket_size() != 0)
1231  {
1232  // bucket allocation successful
1233  atomicExch(
1234  &handle,
1235  handle_type{
1237  alloc,
1239  1});
1240  }
1241 
1242  return Status::none();
1243  }
1244  else
1245  {
1246  // mark as full
1247  atomicExch(
1248  &handle,
1249  handle_type{
1254 
1255  return status_type::out_of_memory();
1256  }
1257  }
1258 
1259  const auto old_handle =
1260  atomicCAS(
1261  &handle,
1263  handle_type{
1268 
1269  if(old_handle == current_handle)
1270  {
1271  const auto i = current_handle.bucket_index();
1272  const auto j =
1276 
1277  buckets_[i + j].value(value);
1278 
1279  return status_type::none();
1280  }
1281  }
1282 
1283  return status_type::unknown_error();
1284  }
1285 
1286  /*! \brief apply a (lambda-)function on each value inside a bucket list
1287  * \tparam Func function to be executed for each value
1288  * \param[in] handle handle to the bucket list
1289  * \param[in] f function which takes the value together whith the index of the value inside the list as parameters
1290  * \param[in] group cooperative group used for hash table probing
1291  */
1292  template<class Func>
1294  void for_each(
1295  Func f, // TODO const
1296  const handle_type& handle,
1297  const cg::thread_group& group = cg::this_thread()) const noexcept
1298  {
1299  const index_type rank = group.thread_rank();
1300  const index_type group_size = group.size();
1302 
1303  // return if nothing is to be done
1304  if(!handle.is_initialized() || handle.bucket_index() == head()) return;
1305 
1307 
1308  const index_type bucket_offset =
1309  (handle.value_count() <= min_bucket_size_) ? 0 : 1;
1310 
1311  // process first bucket
1313  {
1316  }
1317 
1320 
1321  // while there are more values left, process them, too
1322  while(global_index < handle.value_count())
1323  {
1325 
1326  // check if we are at the final bucket
1327  const bool last =
1329  const auto current_bucket_size =
1331  const index_type bucket_offset =
1332  last ? 0 : 1;
1333 
1334  // while there are more values to be processed in the current bucket
1336  {
1338 
1341  }
1342 
1344  }
1345  }
1346 
1347  /*! \brief get status
1348  * \return status
1349  */
1351  status_type status() const noexcept
1352  {
1353  return status_;
1354  }
1355 
1356  /*! \brief get maximum value capacity
1357  * \return capacity
1358  */
1360  index_type capacity() const noexcept
1361  {
1362  return capacity_;
1363  }
1364 
1365  /*! \brief get the total number of bytes occupied by this data structure
1366  * \return bytes
1367  */
1369  index_type bytes_total() const noexcept
1370  {
1371  return capacity_ * sizeof(bucket_type) + sizeof(index_type);
1372  }
1373 
1374  /*! \brief get load factor
1375  * \param[in] stream CUDA stream in which this operation is executed in
1376  * \return load factor
1377  */
1379  float load_factor(const cudaStream_t stream = 0) const noexcept
1380  {
1381  index_type load = 0;
1382 
1384  &load, next_free_bucket_, sizeof(index_type), D2H, stream);
1385 
1387 
1388  return float(load) / float(capacity());
1389  }
1390 
1391  /*! \brief get the number of occupied bytes
1392  * \param[in] stream CUDA stream in which this operation is executed in
1393  * \return bytes
1394  */
1396  index_type bytes_occupied(const cudaStream_t stream = 0) const noexcept
1397  {
1398  index_type occupied = 0;
1399 
1402 
1404 
1405  return occupied * sizeof(bucket_type);
1406  }
1407 
1408  /*! \brief get bucket growth factor
1409  * \return factor
1410  */
1412  float bucket_grow_factor() const noexcept
1413  {
1414  return bucket_grow_factor_;
1415  }
1416 
1417  /*! \brief get minimum bucket capacity
1418  * \return capacity
1419  */
1421  index_type min_bucket_size() const noexcept
1422  {
1423  return min_bucket_size_;
1424  }
1425 
1426  /*! \brief get maximum bucket capacity
1427  * \return capacity
1428  */
1430  index_type max_bucket_size() const noexcept
1431  {
1432  return max_bucket_size_;
1433  }
1434 
1435  /*! \brief indicates if this object is a shallow copy
1436  * \return \c bool
1437  */
1439  bool is_copy() const noexcept
1440  {
1441  return is_copy_;
1442  }
1443 
1444 private:
1445  status_type status_; //< status of the store
1446  const index_type capacity_; //< value capacity
1447  const float bucket_grow_factor_; //< grow factor for allocated buckets
1448  const index_type min_bucket_size_; //< initial bucket size
1449  const index_type max_bucket_size_; //< bucket size after which no more growth occurs
1450  bucket_type * buckets_; //< pointer to bucket store
1451  index_type * next_free_bucket_; //< index of next non-occupied bucket
1452  bool is_copy_; //< indicates if this object is a shallow copy
1453 
1454 }; // class BucketListStore
1455 
1456 } // namespace multi_value
1457 
1458 } // namespace storage
1459 
1460 } // namespace warpcore
1461 
1462 #endif /* WARPCORE_STORAGE_CUH */