warpcore 0.0.1
Hashing at the Speed of Light on modern CUDA-accelerators
bloom_filter.cuh
Go to the documentation of this file.
1 #ifndef WARPCORE_BLOOM_FILTER_CUH
2 #define WARPCORE_BLOOM_FILTER_CUH
3 
4 // modified by Markus Vieth
5 
6 /**
7  * @example "advanced_usage_from_device.cu"
8  * This example implements a filtered histogram over a multi-set of keys
9  * using warpcore. The task is to output the counts of all distinct keys
10  * which occure more than once in the input data.
11  * We make use of the device-sided operations provided by warpcore to
12  * implement a custom CUDA kernel that does the job.
13  */
14 
15 #include "base.cuh"
16 #include "defaults.cuh"
17 
18 namespace warpcore
19 {
20 
21 /*! \brief bloom filter
22  * \tparam Key key type (\c std::uint32_t or \c std::uint64_t)
23  * \tparam Hasher hasher from \c warpcore::hashers
24  * \tparam Slot slot type (\c std::uint32_t or \c std::uint64_t)
25  * \tparam CGSize size of cooperative group
26  */
27 template<
28  class Key,
29  class Hasher = defaults::hasher_t<Key>,
30  class Slot = std::uint64_t,
31  index_t CGSize = 1>
33 {
34  static_assert(
36  "invalid key type");
37 
38  static_assert(
40  "not a valid hasher type");
41 
42  static_assert(
44  "invalid slot type");
45 
46  static_assert(
47  checks::is_valid_cg_size(CGSize),
48  "invalid cooperative group size");
49 
50 public:
51  using key_type = Key;
52  using value_type = bool;
53  using index_type = index_t;
54  using slot_type = Slot;
55  using status_type = Status;
56 
57  /*! \brief get cooperative group size
58  * \return cooperative group size
59  */
60  HOSTDEVICEQUALIFIER INLINEQUALIFIER
61  static constexpr index_type cg_size() noexcept
62  {
63  return CGSize;
64  }
65 
66  /*! \brief get bits per slot
67  * \return number of bits
68  */
70  static constexpr index_type slot_bits() noexcept
71  {
72  return sizeof(slot_type) * CHAR_BIT;
73  }
74 
75  /*! \brief get bits per cooperative group block of slots
76  * \return number of bits
77  */
79  static constexpr index_type block_bits() noexcept
80  {
81  return slot_bits() * cg_size();
82  }
83 
84  /*! \brief constructor
85  * \param[in] num_bits total number of bits (m) of the bloom filter
86  * \param[in] k number of hash functions to apply
87  * \param[in] seed random seed
88  */
90  explicit BloomFilter(
91  const index_type num_bits,
92  const index_type k,
93  const key_type seed = defaults::seed<key_type>()) noexcept :
94  bloom_filter_(nullptr),
98  k_(k),
99  seed_(seed),
100  is_copy_(false)
101  {
103 
104  init();
105  }
106 
107  /*! \brief copy-constructor (shallow)
108  * \param[in] object to be copied
109  */
111  BloomFilter(const BloomFilter& o) noexcept :
116  k_(o.k_),
117  seed_(o.seed_),
118  is_copy_(true)
119  {}
120 
121  /*! \brief move-constructor
122  * \param[in] object to be moved
123  */
125  BloomFilter(BloomFilter&& o) noexcept :
130  k_(std::move(o.k_)),
131  seed_(std::move(o.seed_)),
133  {
134  o.is_copy_ = true;
135  }
136 
137  #ifndef __CUDA_ARCH__
138  /*! \brief destructor
139  */
141  ~BloomFilter() noexcept
142  {
143  if(!is_copy_)
144  {
145  if(bloom_filter_ != nullptr) cudaFree(bloom_filter_);
146  }
147  }
148  #endif
149 
150  /*! \brief (re)initialize the hash table
151  * \param[in] seed random seed
152  * \param[in] stream CUDA stream in which this operation is executed in
153  */
155  void init(
156  const key_type seed,
157  const cudaStream_t stream = 0) noexcept
158  {
159  seed_ = seed;
160 
161  kernels::memset
164  }
165 
166  /*! \brief (re)initialize the hash table
167  * \param[in] stream CUDA stream in which this operation is executed in
168  */
170  void init(const cudaStream_t stream = 0) noexcept
171  {
172  init(seed_, stream);
173  }
174 
175  /*! \brief inserts a key into the bloom filter
176  * \param[in] key key to insert into the bloom filter
177  * \param[in] group cooperative group
178  */
180  void insert(
181  const key_type key,
182  const cg::thread_block_tile<cg_size()>& group) noexcept
183  {
184  const index_type slot_index =
185  ((Hasher::hash(key + seed_) % num_blocks_) *
187 
188  slot_type slot = 0;
189  for(index_type k = 0; k < k_; k++)
190  {
191  const key_type seeded_key = key + seed_ + k;
193 
194  if((hash / slot_bits()) == group.thread_rank())
195  {
196  slot |= slot_type(1) << (hash % slot_bits());
197  }
198  }
199 
200  if((slot & bloom_filter_[slot_index]) != slot)
201  {
203  }
204  }
205 
206  /*! \brief inserts a set of keys into the bloom filter
207  * \param[in] keys_in pointer to keys to insert into the bloom filter
208  * \param[in] num_in number of keys to insert
209  * \param[in] stream CUDA stream in which this operation is executed in
210  */
212  void insert(
213  const Key * const keys_in,
214  const index_t num_in,
215  const cudaStream_t stream = 0) noexcept
216  {
219  (keys_in, num_in, *this);
220  }
221 
222  /*! \brief retrieve a key
223  * \param[in] key key to query
224  * \param[in] group cooperative group
225  * \return whether the key is already inside the filter or not
226  */
228  bool retrieve(
229  const key_type key,
230  const cg::thread_block_tile<cg_size()>& group) const noexcept
231  {
232  const index_type slot_index =
233  ((Hasher::hash(key+seed_) % num_blocks_) *
235 
236  slot_type slot = 0;
237  for(index_type k = 0; k < k_; k++)
238  {
239  const key_type seeded_key = key + seed_ + k;
241 
242  if((hash / slot_bits()) == group.thread_rank())
243  {
244  slot |= slot_type(1) << (hash % slot_bits());
245  }
246  }
247 
248  return (group.all((slot & bloom_filter_[slot_index]) == slot)) ? true : false;
249  }
250 
251  /*! \brief retrieve a set of keys
252  * \param[in] keys_in pointer to keys
253  * \param[in] num_in number of keys
254  * \param[out] flags_out result per key
255  ' \param[in] stream CUDA stream in which this operation is executed in
256  */
258  void retrieve(
259  const key_type * const keys_in,
260  const index_type num_in,
261  bool * const flags_out,
262  const cudaStream_t stream = 0) const noexcept
263  {
266  (keys_in, num_in, flags_out, *this);
267  }
268 
269  /*! \brief queries and subsequently inserts a key into the bloom filter
270  * \note can only be used when \c CGSize==1 to prevent from race conditions
271  * \param[in] key key to query
272  * \param[in] group cooperative group this operation is executed in
273  * \param[out] flag whether the key was already inside the filter before insertion
274  */
275  template<
277  class = std::enable_if_t<CGSize_ == 1>>
280  const key_type key,
281  const cg::thread_block_tile<cg_size()>& group) noexcept
282  {
283  const index_type slot_index =
284  ((Hasher::hash(key+seed_) % num_blocks_) *
286 
287  slot_type slot = slot_type{0};
288  for(index_type k = 0; k < k_; k++)
289  {
290  const key_type seeded_key = key + seed_ + k;
292 
293  if((hash / slot_bits()) == group.thread_rank())
294  {
295  slot |= slot_type{1} << (hash % slot_bits());
296  }
297  }
298 
299  if((slot & bloom_filter_[slot_index]) != slot)
300  {
301  const auto old = atomicOr(bloom_filter_ + slot_index, slot);
302 
303  return ((slot & old) != slot) ? false : true;
304  }
305  else
306  {
307  return true;
308  }
309  }
310 
311  /*! \brief get number of bits (m)
312  * \return number of bits (m)
313  */
315  index_type num_bits() const noexcept
316  {
317  return num_bits_;
318  }
319 
320  /*! \brief get number of slots
321  * \return number of slots
322  */
324  index_type num_slots() const noexcept
325  {
326  return num_slots_;
327  }
328 
329  /*! \brief get number of blocks
330  * \return number of blocks
331  */
333  index_type num_blocks() const noexcept
334  {
335  return num_blocks_;
336  }
337 
338  /*! \brief get number of hash functions (k)
339  * \return number of hash functions (k)
340  */
342  index_type k() const noexcept
343  {
344  return k_;
345  }
346 
347  // TODO incorporate CG size
348  /*! \brief estimated false positive rate of pattern-blocked bloom filter
349  * \param[in] n number of inserted elements
350  * \return false positive rate
351  * \warning computationally expensive for large filters
352  */
354  double fpr(const index_type n) const noexcept
355  {
356  double res = 0.0;
357  const double b = num_bits_ / block_bits();
358 
359  #pragma omp parallel for reduction(+:res)
360  for(index_type i = 0; i < 5*n/(num_bits_ / block_bits()); ++i)
361  {
362  res += binom(n, i, 1.0/b) * fpr_base(num_bits_/b, i, k_);
363  }
364 
365  return res;
366  }
367 
368  /*! \brief indicates if this object is a shallow copy
369  * \return \c bool
370  */
372  bool is_copy() const noexcept
373  {
374  return is_copy_;
375  }
376 
377 private:
378  /*! \brief binomial coefficient
379  * \param[in] n
380  * \param[in] k
381  * \param[in] p
382  * \return binomial coefficient
383  */
385  double binom(
386  const index_type n,
387  const index_type k,
388  const double p) const noexcept
389  {
390  double res = 1.0;
391 
392  for(index_type i = n - k + 1; i <= n; ++i)
393  {
394  res = res * i;
395  }
396 
397  for(index_type i = 1; i <= k; ++i)
398  {
399  res = res / i;
400  }
401 
402  res = res * pow(p, k) * pow(1.0 - p, n - k);
403 
404  return res;
405  }
406 
407  /*! \brief FPR of traditional bloom filters
408  * \param[in] m
409  * \param[in] n
410  * \param[in] k
411  * \return FPR
412  */
414  double fpr_base(
415  const index_type m,
416  const index_type n,
417  const index_type k) const noexcept
418  {
419  return std::pow(1.0 - std::pow(1.0 - 1.0 / m, n * k), k);
420  }
421 
422  slot_type * bloom_filter_; //< pointer to the bit vector
423  const index_type num_bits_; //< number of bits (m)
424  const index_type num_slots_; //< number of slots
425  const index_type num_blocks_; //< number of CG blocks
426  const index_type k_; //< number of hash functions
427  key_type seed_; //< random seed
428  bool is_copy_; //< indicates if this object is a shallow copy
429 
430 }; // class BloomFilter
431 
432 } // namespace warpcore
433 
434 #endif /* WARPCORE_BLOOM_FILTER_CUH */