23namespace shamalgs::numeric::details {
28 sycl::buffer<T> ret_buf(len);
33 sycl::host_accessor acc_src{buf1, sycl::read_only};
34 sycl::host_accessor acc_res{ret_buf, sycl::write_only, sycl::no_init};
36 for (
u32 idx = 0; idx < len; idx++) {
39 accum += acc_src[idx];
43 return std::move(ret_buf);
54 std::exclusive_scan(acc_src.begin(), acc_src.end(), acc_src.begin(), 0);
62 sycl::buffer<T> inclusive_sum_fallback(sycl::queue &q, sycl::buffer<T> &buf1,
u32 len) {
64 sycl::buffer<T> ret_buf(len);
69 sycl::host_accessor acc_src{buf1, sycl::read_only};
70 sycl::host_accessor acc_res{ret_buf, sycl::write_only, sycl::no_init};
72 for (
u32 idx = 0; idx < len; idx++) {
74 accum += acc_src[idx];
79 return std::move(ret_buf);
83 void exclusive_sum_in_place_fallback(sycl::queue &q, sycl::buffer<T> &buf1,
u32 len) {
88 sycl::host_accessor acc_src{buf1, sycl::read_write};
90 for (
u32 idx = 0; idx < len; idx++) {
94 accum += acc_src[idx];
102 void inclusive_sum_in_place_fallback(sycl::queue &q, sycl::buffer<T> &buf1,
u32 len) {
107 sycl::host_accessor acc_src{buf1, sycl::read_write};
109 for (
u32 idx = 0; idx < len; idx++) {
111 accum += acc_src[idx];
112 acc_src[idx] = accum;
117 template sycl::buffer<u32> inclusive_sum_fallback(
118 sycl::queue &q, sycl::buffer<u32> &buf1,
u32 len);
124 sycl::queue &q, sycl::buffer<u32> &buf1,
u32 len);
126 template void exclusive_sum_in_place_fallback(sycl::queue &q, sycl::buffer<u32> &buf1,
u32 len);
128 template void inclusive_sum_in_place_fallback(sycl::queue &q, sycl::buffer<u32> &buf1,
u32 len);
131 sycl::queue &q, sycl::buffer<u32> &buf_flags,
u32 len) {
133 std::vector<u32> idxs;
136 sycl::host_accessor acc_src{buf_flags, sycl::read_only};
138 for (
u32 idx = 0; idx < len; idx++) {
156 std::vector<u32> idxs;
161 for (
u32 idx = 0; idx < len; idx++) {
std::uint32_t u32
32 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
void copy_from_stdvec(const std::vector< T > &vec)
Copy the content of a std::vector into the buffer.
std::vector< T > copy_to_stdvec() const
Copy the content of the buffer to a std::vector.
sycl::buffer< T > vec_to_buf(const std::vector< T > &buf)
Convert a std::vector to a sycl::buffer
sham::DeviceBuffer< T > exclusive_sum_fallback_usm(const sham::DeviceScheduler_ptr &sched, sham::DeviceBuffer< T > &buf1, u32 len)
Exclusive sum fallback on USM.
std::tuple< std::optional< sycl::buffer< u32 > >, u32 > stream_compact_fallback(sycl::queue &q, sycl::buffer< u32 > &buf_flags, u32 len)
Stream compaction algorithm on fallback.
sycl::buffer< T > exclusive_sum_fallback(sycl::queue &q, sycl::buffer< T > &buf1, u32 len)
Exclusive sum fallback on SYCL buffer.
main include file for memory algorithms