26class StreamCompactionAlg;
28namespace shamalgs::numeric::details {
31 sycl::queue &q, sycl::buffer<u32> &buf_flags,
u32 len) {
49 shamlog_debug_sycl_ln(
"StreamCompact",
"number of element : ", new_len);
55 constexpr u32 group_size = 256;
58 group_cnt = group_cnt + (group_cnt % 4);
59 u32 corrected_len = group_cnt * group_size;
62 sycl::buffer<u32> index_map{new_len};
64 q.submit([&, max_len](sycl::handler &cgh) {
65 sycl::accessor sum_vals{excl_sum, cgh, sycl::read_only};
66 sycl::accessor new_idx{index_map, cgh, sycl::write_only, sycl::no_init};
68 u32 last_idx = len - 1;
69 u32 last_flag = end_flag;
71 cgh.parallel_for<StreamCompactionAlg>(
73 sycl::nd_range<1>{corrected_len, group_size}, [=](sycl::nd_item<1> id) {
74 u32 local_id =
id.get_local_id(0);
75 u32 group_tile_id =
id.get_group_linear_id();
76 u32 idx = group_tile_id * group_size + local_id;
81 u32 current_val = sum_vals[idx];
83 bool _if1 = (idx < last_idx);
85 = (_if1) ? (current_val < sum_vals[idx + 1]) : (bool(last_flag));
88 new_idx[current_val] = idx;
93 return {std::move(index_map), new_len};
128 last_flag = end_flag](
u32 idx,
const u32 *sum_vals,
u32 *new_idx) {
129 u32 current_val = sum_vals[idx];
131 bool _if1 = (idx < last_idx);
133 = (_if1) ? (current_val < sum_vals[idx + 1]) : (bool(last_flag));
143 new_idx[current_val] = idx;
std::uint32_t u32
32 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
T get_val_at_idx(size_t idx) const
Get the value at a given index in the buffer.
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
T extract_element(sycl::queue &q, sycl::buffer< T > &buf, u32 idx)
extract a value of a buffer
sycl::buffer< T > scan_exclusive(sycl::queue &q, sycl::buffer< T > &buf1, u32 len)
Computes the exclusive sum of elements in a SYCL buffer.
constexpr u32 group_count(u32 len, u32 group_size)
Calculates the number of groups based on the length and group size.
std::tuple< std::optional< sycl::buffer< u32 > >, u32 > stream_compact_fallback(sycl::queue &q, sycl::buffer< u32 > &buf_flags, u32 len)
Stream compaction algorithm on fallback.
main include file for memory algorithms
std::tuple< std::optional< sycl::buffer< u32 > >, u32 > stream_compact_excl_scan(sycl::queue &q, sycl::buffer< u32 > &buf_flags, u32 len)
Stream compaction algorithm using exclusive summation.
A class that references multiple buffers or similar objects.