22namespace shamalgs::reduction::details {
26 static T sum(sycl::queue &q, sycl::buffer<T> &buf1,
u32 start_id,
u32 end_id);
33 template<
class T,
class Op>
34 inline T reduce_sycl_2020(
35 sycl::queue &q, sycl::buffer<T> &buf1,
u32 start_id,
u32 end_id, Op op) {
37 u32 len = end_id - start_id;
39 sycl::buffer<T> buf_int(len);
40 shamalgs::memory::write_with_offset_into(q, buf_int, buf1, start_id, len);
42 sycl::buffer<T> recov{1};
44 q.submit([&](sycl::handler &cgh) {
45 sycl::accessor global_mem{buf_int, cgh, sycl::read_only};
47#ifdef SYCL_COMP_INTEL_LLVM
48 auto reduc = sycl::reduction(recov, cgh, op);
50 sycl::accessor acc_rec{recov, cgh, sycl::write_only, sycl::no_init};
51 auto reduc = sycl::reduction(acc_rec, op);
54 cgh.parallel_for(sycl::range<1>{len}, reduc, [=](sycl::id<1> idx,
auto &sum) {
55 sum.combine(global_mem[idx]);
61 sycl::host_accessor acc{recov, sycl::read_only};
69 inline T SYCL2020<T>::sum(sycl::queue &q, sycl::buffer<T> &buf1,
u32 start_id,
u32 end_id) {
70#ifdef SYCL_COMP_INTEL_LLVM
71 return reduce_sycl_2020(q, buf1, start_id, end_id, sycl::plus<>{});
75 return reduce_sycl_2020(q, buf1, start_id, end_id, sycl::plus<T>{});
std::uint32_t u32
32 bit unsigned integer
main include file for memory algorithms