25namespace shamtree::details {
27 inline void reorder_scan_dtt_result(
32 size_t interact_count = in_out.
get_size();
33 size_t offsets_count = N + 1;
35 offsets.
resize(offsets_count);
51 [N](
u32 i,
const u32_2 *__restrict__ in_out,
u32 *__restrict__ offsets) {
56 sycl::memory_order_relaxed,
57 sycl::memory_scope_device,
58 sycl::access::address_space::global_space>
59 atom(offsets[in_out[i].x()]);
69 in_out.get_size(), in_out.get_dev_scheduler_ptr());
81 const u32_2 *__restrict__ in_out,
82 u32_2 *__restrict__ in_out_sorted,
83 u32 *__restrict__ local_head) {
88 sycl::memory_order_relaxed,
89 sycl::memory_scope_device,
90 sycl::access::address_space::global_space>
91 atom(local_head[in_out[i].x()]);
93 u32 ret = atom.fetch_add(1_u32);
95 in_out_sorted[ret] = in_out[i];
99 shamalgs::primitives::segmented_sort_in_place(in_out_sorted, offsets);
101 in_out = std::move(in_out_sorted);
104 std::vector<u32_2> in_out_stdvec = in_out.copy_to_stdvec();
105 std::sort(in_out_stdvec.begin(), in_out_stdvec.end(), [](u32_2 a, u32_2 b) {
106 return (a.x() == b.x()) ? (a.y() < b.y()) : (a.x() < b.x());
108 in_out.copy_from_stdvec(in_out_stdvec);
std::uint32_t u32
32 bit unsigned integer
Shamrock assertion utility.
#define SHAM_ASSERT(x)
Shorthand for SHAM_ASSERT_NAMED without a message.
A buffer allocated in USM (Unified Shared Memory)
void resize(size_t new_size, bool keep_data=true)
Resizes the buffer to a given size.
void fill(T value, std::array< size_t, 2 > idx_range)
Fill a subpart of the buffer with a given value.
size_t get_size() const
Gets the number of elements in the buffer.
DeviceScheduler & get_dev_scheduler() const
Gets the Device scheduler corresponding to the held allocation.
DeviceBuffer< T, target > copy() const
Copy the current buffer.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
void scan_exclusive_sum_in_place(sham::DeviceBuffer< T > &buf1, u32 len)
Compute exclusive prefix sum in-place on a device buffer.
In-place exclusive scan (prefix sum) algorithm for device buffers.
This file contains the definition for the stacktrace related functionality.
#define __shamrock_stack_entry()
Macro to create a stack entry.
A class that references multiple buffers or similar objects.