1 #ifndef WARPCORE_GPU_ENGINE_CUH
2 #define WARPCORE_GPU_ENGINE_CUH
20 const index_t tid = helpers::global_thread_id();
36 const index_t tid = helpers::global_thread_id();
38 if(tid < core.capacity())
40 auto&& pair = core.table_[tid];
41 if(core.is_valid_key(pair.key))
43 f(pair.key, pair.value);
56 using index_type =
typename Core::index_type;
57 using probing_scheme_type =
typename Core::probing_scheme_type;
59 const index_t tid = helpers::global_thread_id();
60 const index_t gid = tid / Core::cg_size();
62 cg::tiled_partition<Core::cg_size()>(cg::this_thread_block());
64 if(gid < core.capacity())
67 auto search_key = core.table_[gid].key;
68 if(core.is_valid_key(search_key))
70 probing_scheme_type iter(core.capacity(), core.capacity(), group);
72 for(index_type i = iter.begin(search_key, core.seed_); i != iter.end(); i = iter.next())
74 const auto table_key = core.table_[i].key;
75 const auto hit = (table_key == search_key);
76 const auto hit_mask = group.ballot(hit);
78 const auto leader = ffs(hit_mask) - 1;
81 if(group.thread_rank() == leader && i == gid)
108 const index_t tid = helpers::global_thread_id();
109 const index_t gid = tid / Core::cg_size();
111 cg::tiled_partition<Core::cg_size()>(cg::this_thread_block());
118 core.for_each(f, keys_in[gid], num_values, group, probing_length);
120 if(group.thread_rank() == 0)
122 StatusHandler::handle(status, status_out, gid);
137 const index_t tid = helpers::global_thread_id();
138 const index_t gid = tid / Core::cg_size();
140 cg::tiled_partition<Core::cg_size()>(cg::this_thread_block());
144 core.insert(keys_in[gid], group);
157 const index_t tid = blockDim.x * blockIdx.x + threadIdx.x;
158 const index_t gid = tid / Core::cg_size();
160 cg::tiled_partition<Core::cg_size()>(cg::this_thread_block());
164 typename Core::value_type value = core.retrieve(keys_in[gid], group);
166 if(group.thread_rank() == 0)
168 values_out[gid] = value;
186 const index_t tid = helpers::global_thread_id();
187 const index_t gid = tid / Core::cg_size();
189 cg::tiled_partition<Core::cg_size()>(cg::this_thread_block());
194 core.insert(keys_in[gid], group, probing_length);
196 if(group.thread_rank() == 0)
198 StatusHandler::handle(status, status_out, gid);
213 const index_t tid = helpers::global_thread_id();
214 const index_t gid = tid / Core::cg_size();
216 cg::tiled_partition<Core::cg_size()>(cg::this_thread_block());
221 core.insert(keys_in[gid], values_in[gid], group, probing_length);
223 if(group.thread_rank() == 0)
225 StatusHandler::handle(status, status_out, gid);
240 const index_t tid = helpers::global_thread_id();
241 const index_t gid = tid / Core::cg_size();
243 cg::tiled_partition<Core::cg_size()>(cg::this_thread_block());
247 typename Core::value_type value_out;
250 core.retrieve(keys_in[gid], value_out, group, probing_length);
252 if(group.thread_rank() == 0)
254 if(!status.has_any())
256 values_out[gid] = value_out;
259 StatusHandler::handle(status, status_out, gid);
317 const index_t tid = helpers::global_thread_id();
318 const index_t gid = tid / Core::cg_size();
320 cg::tiled_partition<Core::cg_size()>(cg::this_thread_block());
322 using status_type =
typename Core::status_type;
328 auto status = core.retrieve(
330 values_out + begin_offsets_in[gid],
335 if(group.thread_rank() == 0)
337 const auto num_prev =
338 end_offsets_in[gid] - begin_offsets_in[gid];
340 if(num_prev != num_out)
343 core.device_join_status(status_type::invalid_phase_overlap());
344 status += status_type::invalid_phase_overlap();
347 StatusHandler::handle(status, status_out, gid);
361 const index_t tid = helpers::global_thread_id();
362 const index_t gid = tid / Core::cg_size();
364 cg::tiled_partition<Core::cg_size()>(cg::this_thread_block());
369 core.erase(keys_in[gid], group, probing_length);
371 if(group.thread_rank() == 0)
373 StatusHandler::handle(status, status_out, gid);
384 __shared__ index_t smem;
386 const index_t tid = helpers::global_thread_id();
387 const auto block = cg::this_thread_block();
389 if(tid < core.capacity())
391 const bool empty = !core.is_valid_key(core.table_[tid].key);
393 if(block.thread_rank() == 0)
402 const auto active_threads = cg::coalesced_threads();
404 if(active_threads.thread_rank() == 0)
406 atomicAdd(&smem, active_threads.size());
412 if(block.thread_rank() == 0 && smem != 0)
414 atomicAdd(num_out, smem);
426 __shared__ index_t smem;
428 const index_t tid = helpers::global_thread_id();
429 const auto block = cg::this_thread_block();
431 if(tid < core.capacity())
433 const bool empty = !core.is_valid_key(core.table_[tid].key);
435 if(block.thread_rank() == 0)
442 index_t value_count = 0;
445 const auto bucket = core.table_[tid].value;
447 for(
int b = 0; b < core.bucket_size(); ++b) {
448 const auto& value = bucket[b];
449 if(value != core.empty_value())
454 atomicAdd(&smem, value_count);
459 if(block.thread_rank() == 0 && smem != 0)
461 atomicAdd(num_out, smem);
477 const index_t tid = helpers::global_thread_id();
478 const index_t gid = tid / Core::cg_size();
480 cg::tiled_partition<Core::cg_size()>(cg::this_thread_block());
487 core.num_values(keys_in[gid], num, group, probing_length);
489 if(group.thread_rank() == 0)
491 if(num_per_key_out !=
nullptr)
493 num_per_key_out[gid] = num;
498 atomicAdd(num_out, num);
501 StatusHandler::handle(status, status_out, gid);