77 bool equals(sycl::queue &q, sycl::buffer<T> &buf1, sycl::buffer<T> &buf2,
u32 cnt) {
79 if (buf1.size() < cnt) {
83 if (buf2.size() < cnt) {
93 auto e = q.submit([&](sycl::handler &cgh) {
95 sycl::accessor acc1{buf1, cgh, sycl::read_only};
96 sycl::accessor acc2{buf2, cgh, sycl::read_only};
98 cgh.parallel_for(sycl::range{cnt}, [=](sycl::item<1> item) {
99 out[item] = sham::equals(acc1[item], acc2[item]);
155 const sham::DeviceScheduler_ptr &dev_sched,
166 if (&buf1 == &buf2) {
187 [](
u32 i,
const T *__restrict acc1,
const T *__restrict acc2,
u8 *__restrict out) {
188 out[i] = sham::equals(acc1[i], acc2[i]);
236 const sham::DeviceScheduler_ptr &q,
280 bool equals(sycl::queue &q, sycl::buffer<T> &buf1, sycl::buffer<T> &buf2) {
281 bool same_size = buf1.size() == buf2.size();
286 return equals(q, buf1, buf2, buf1.size());
331 const std::unique_ptr<sycl::buffer<T>> &buf1,
332 const std::unique_ptr<sycl::buffer<T>> &buf2,
334 bool same_alloc = bool(buf1) == bool(buf2);
344 return equals(q, *buf1, *buf2, cnt);
388 const std::unique_ptr<sycl::buffer<T>> &buf1,
389 const std::unique_ptr<sycl::buffer<T>> &buf2) {
390 bool same_alloc = bool(buf1) == bool(buf2);
400 return equals(q, *buf1, *buf2);
Header file describing a Node Instance.
std::uint8_t u8
8 bit unsigned integer
std::uint32_t u32
32 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
size_t get_size() const
Gets the number of elements in the buffer.
Class to manage a list of SYCL events.
void set_consumed(bool consumed)
Set the consumed state of the EventList (to be used with interop)
std::vector< sycl::event > & get_events()
Get the list of events.
This header file contains utility functions related to exception handling in the code.
Boolean reduction algorithm for checking if all elements are non-zero.
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
namespace for primitive algorithm (e.g. sort, scan, reductions, ...)
bool equals(sycl::queue &q, sycl::buffer< T > &buf1, sycl::buffer< T > &buf2, u32 cnt)
Compare elements between two sycl::buffers for equality.
bool is_all_true(sycl::buffer< T > &buf, u32 cnt)
Check if all elements in a sycl::buffer are non-zero.
bool equals_ptr(sycl::queue &q, const std::unique_ptr< sycl::buffer< T > > &buf1, const std::unique_ptr< sycl::buffer< T > > &buf2)
Compare all elements between two unique_ptr-wrapped sycl::buffers.
bool equals_ptr_s(sycl::queue &q, const std::unique_ptr< sycl::buffer< T > > &buf1, const std::unique_ptr< sycl::buffer< T > > &buf2, u32 cnt)
Compare elements between two unique_ptr-wrapped sycl::buffers with count.
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
A class that references multiple buffers or similar objects.