1 #ifndef WARPCORE_RANDOM_DISTRIBUTIONS_CUH
2 #define WARPCORE_RANDOM_DISTRIBUTIONS_CUH
7 #include <kiss/kiss.cuh>
8 #include <warpcore/bloom_filter.cuh>
23 HOSTDEVICEQUALIFIER INLINEQUALIFIER
35 assert(x >= a && x <= b);
40 return c + x % (d - c);
50 template<
class T,
class Rng>
51 HOSTQUALIFIER INLINEQUALIFIER
55 std::uint32_t seed)
noexcept
60 helpers::lambda_kernel<<<4096, 32>>>
63 const std::uint32_t tid = blockDim.x * blockIdx.x + threadIdx.x;
66 const std::uint32_t local_seed =
67 wc::hashers::MurmurHash<std::uint32_t>::hash(seed+tid);
72 const auto grid_stride = blockDim.x * gridDim.x;
73 for(std::uint64_t i = tid; i < n; i += grid_stride)
76 x = rng.
template next<T>();
85 template<
class Rng = kiss::Kiss<std::uint32_t>>
86 HOSTQUALIFIER INLINEQUALIFIER
90 std::uint32_t seed)
noexcept
92 uniform_distribution<std::uint32_t, Rng>(
98 template<
class Rng = kiss::Kiss<std::uint64_t>>
99 HOSTQUALIFIER INLINEQUALIFIER
103 std::uint32_t seed)
noexcept
105 uniform_distribution<std::uint64_t, Rng>(
111 template<
class Rng = kiss::Kiss<std::uint32_t>>
112 HOSTQUALIFIER INLINEQUALIFIER
116 std::uint32_t seed)
noexcept
118 uniform_distribution<std::uint32_t, Rng>(
119 reinterpret_cast<std::uint32_t*>(out),
124 template<
class Rng = kiss::Kiss<std::uint32_t>>
125 HOSTQUALIFIER INLINEQUALIFIER
129 std::uint32_t seed)
noexcept
131 uniform_distribution<std::uint32_t, Rng>(
132 reinterpret_cast<std::uint32_t*>(out),
144 template<
class T,
class Rng>
145 HOSTQUALIFIER INLINEQUALIFIER
149 std::uint32_t seed)
noexcept
152 namespace cg = cooperative_groups;
154 using filter_t = wc::BloomFilter<T>;
156 filter_t bf{n/4096, 8, T(seed)};
159 helpers::lambda_kernel
161 ([=] DEVICEQUALIFIER ()
mutable
163 const std::uint32_t tid = blockDim.x * blockIdx.x + threadIdx.x;
165 const std::uint32_t local_seed =
166 hasher_t::hash(hasher_t::hash(tid) + seed);
170 cg::tiled_partition<filter_t::cg_size()>(cg::this_thread_block());
175 const auto grid_stride = blockDim.x * gridDim.x;
176 for(std::uint64_t i = tid; i < n; i += grid_stride)
182 x = rng.
template next<T>();
185 is_unique = bf.insert_and_query(x, group);
196 template<
class Rng = kiss::Kiss<std::uint32_t>>
197 HOSTQUALIFIER INLINEQUALIFIER
201 std::uint32_t seed)
noexcept
203 unique_distribution<std::uint32_t, Rng>(
209 template<
class Rng = kiss::Kiss<std::uint64_t>>
210 HOSTQUALIFIER INLINEQUALIFIER
214 std::uint32_t seed)
noexcept
216 unique_distribution<std::uint64_t, Rng>(
222 template<
class Rng = kiss::Kiss<std::uint32_t>>
223 HOSTQUALIFIER INLINEQUALIFIER
227 std::uint32_t seed)
noexcept
229 unique_distribution<std::uint32_t, Rng>(
230 reinterpret_cast<std::uint32_t*>(out),
235 template<
class Rng = kiss::Kiss<std::uint32_t>>
236 HOSTQUALIFIER INLINEQUALIFIER
240 std::uint32_t seed)
noexcept
242 unique_distribution<std::uint32_t, Rng>(
243 reinterpret_cast<std::uint32_t*>(out),
259 template<
class T,
class Rng = kiss::Kiss<std::uint32_t>,
class P =
double>
260 HOSTQUALIFIER INLINEQUALIFIER
267 std::uint32_t seed)
noexcept
274 helpers::lambda_kernel
278 const std::uint32_t tid = blockDim.x * blockIdx.x + threadIdx.x;
280 const std::uint32_t local_seed =
281 wc::hashers::MurmurHash<std::uint32_t>::hash(seed+tid);
285 const P t = (pow(
double(n_in), 1.0 - s) - s) / (1.0 - s);
291 const auto grid_stride = blockDim.x * gridDim.x;
292 for(std::uint64_t i = tid; i < n_out; i += grid_stride)
297 P p = rng.
template next<P>();
300 ((p * t) <= 1.0) ? p * t : pow((p * t) * (1.0 - s) + s, 1.0 / (1.0 - s));
302 k = (T)(inv_b + 1.0);
304 y = rng.
template next<P>();
306 P b = (k <= 1.0) ? 1.0 / t : pow(inv_b, -s) / t;
308 rat = pow(
double(k), -s) / (b * t);
313 if((std::is_same<T, std::uint32_t>::value ||
314 std::is_same<T, std::uint64_t>::value) &&