28namespace shamalgs::reduction {
31 T sum(sycl::queue &q, sycl::buffer<T> &buf1,
u32 start_id,
u32 end_id) {
32#ifdef SYCL2020_FEATURE_GROUP_REDUCTION
33 return details::GroupReduction<T, 32>::sum(q, buf1, start_id, end_id);
35 return details::FallbackReduction<T>::sum(q, buf1, start_id, end_id);
40 T max(sycl::queue &q, sycl::buffer<T> &buf1,
u32 start_id,
u32 end_id) {
41#ifdef SYCL2020_FEATURE_GROUP_REDUCTION
42 return details::GroupReduction<T, 32>::max(q, buf1, start_id, end_id);
44 return details::FallbackReduction<T>::max(q, buf1, start_id, end_id);
49 T min(sycl::queue &q, sycl::buffer<T> &buf1,
u32 start_id,
u32 end_id) {
50#ifdef SYCL2020_FEATURE_GROUP_REDUCTION
51 return details::GroupReduction<T, 32>::min(q, buf1, start_id, end_id);
53 return details::FallbackReduction<T>::min(q, buf1, start_id, end_id);
63 sycl::host_accessor acc{buf, sycl::read_only};
65 for (
u32 i = 0; i < cnt; i++) {
66 res = res && (acc[i] != 0);
74 bool has_nan(sycl::queue &q, sycl::buffer<T> &buf,
u64 cnt) {
77 sycl::buffer<u8> res(cnt);
78 q.submit([&](sycl::handler &cgh) {
79 sycl::accessor acc1{buf, cgh, sycl::read_only};
81 sycl::accessor out{res, cgh, sycl::write_only, sycl::no_init};
83 cgh.parallel_for(sycl::range{cnt}, [=](sycl::item<1> item) {
84 out[item] = !sham::has_nan(acc1[item]);
95 bool has_inf(sycl::queue &q, sycl::buffer<T> &buf,
u64 cnt) {
98 sycl::buffer<u8> res(cnt);
99 q.submit([&](sycl::handler &cgh) {
100 sycl::accessor acc1{buf, cgh, sycl::read_only};
102 sycl::accessor out{res, cgh, sycl::write_only, sycl::no_init};
104 cgh.parallel_for(sycl::range{cnt}, [=](sycl::item<1> item) {
105 out[item] = !sham::has_inf(acc1[item]);
116 bool has_nan_or_inf(sycl::queue &q, sycl::buffer<T> &buf,
u64 cnt) {
119 sycl::buffer<u8> res(cnt);
120 q.submit([&](sycl::handler &cgh) {
121 sycl::accessor acc1{buf, cgh, sycl::read_only};
123 sycl::accessor out{res, cgh, sycl::write_only, sycl::no_init};
125 cgh.parallel_for(sycl::range{cnt}, [=](sycl::item<1> item) {
126 out[item] = !sham::has_nan_or_inf(acc1[item]);
160 template _arg_ sum(sycl::queue &q, sycl::buffer<_arg_> &buf1, u32 start_id, u32 end_id); \
161 template _arg_ max(sycl::queue &q, sycl::buffer<_arg_> &buf1, u32 start_id, u32 end_id); \
162 template _arg_ min(sycl::queue &q, sycl::buffer<_arg_> &buf1, u32 start_id, u32 end_id); \
163 template bool has_nan(sycl::queue &q, sycl::buffer<_arg_> &buf1, u64 cnt); \
164 template bool has_inf(sycl::queue &q, sycl::buffer<_arg_> &buf1, u64 cnt); \
165 template bool has_nan_or_inf(sycl::queue &q, sycl::buffer<_arg_> &buf1, u64 cnt);
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
bool is_all_true(sycl::buffer< T > &buf, u32 cnt)
Check if all elements in a sycl::buffer are non-zero.
main include file for memory algorithms