27 using Tscal = shambase::VecComponent<T>;
29 if (start_id == end_id) {
33 if (start_id > end_id) {
35 shambase::format(
"start_id > end_id : {} > {}", start_id, end_id));
45 [start_id](
u32 i,
const T *__restrict buf1, Tscal *__restrict out) {
46 T in = buf1[i + start_id];
47 out[i] = sham::dot(in, in);
78 template shambase::VecComponent<_arg_> dot_sum( \
79 sham::DeviceBuffer<_arg_> &buf1, u32 start_id, u32 end_id);
std::uint32_t u32
32 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
DeviceQueue & get_queue() const
Gets the DeviceQueue associated with the held allocation.
std::shared_ptr< DeviceScheduler > & get_dev_scheduler_ptr()
Gets the Device scheduler pointer corresponding to the held allocation.
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
namespace for primitive algorithm (e.g. sort, scan, reductions, ...)
T sum(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &buf1, u32 start_id, u32 end_id)
Compute the sum of elements in a device buffer within a specified range.
shambase::VecComponent< T > dot_sum(sham::DeviceBuffer< T > &buf1, u32 start_id, u32 end_id)
Compute the sum of dot products of elements in a device buffer with themselves.
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
A class that references multiple buffers or similar objects.