38 template<
class Tvec, sham::USMKindTarget target>
42 using Tscal =
typename shambase::VectorProperties<Tvec>::component_type;
47 if constexpr (std::is_same_v<Tvec, Tscal>) {
49 }
else if constexpr (std::is_same_v<Tvec, sycl::vec<Tscal, 2>>) {
58 depends_list, [&](sycl::handler &cgh) {
59 cgh.parallel_for(buffer.
get_size(), [=](sycl::id<1> gid) {
60 Tvec tmp = ptr_src[gid];
61 ptr_dest[gid * 2 + 0] = tmp[0];
62 ptr_dest[gid * 2 + 1] = tmp[1];
71 }
else if constexpr (std::is_same_v<Tvec, sycl::vec<Tscal, 3>>) {
80 depends_list, [&](sycl::handler &cgh) {
81 cgh.parallel_for(buffer.
get_size(), [=](sycl::id<1> gid) {
82 Tvec tmp = ptr_src[gid];
83 ptr_dest[gid * 3 + 0] = tmp[0];
84 ptr_dest[gid * 3 + 1] = tmp[1];
85 ptr_dest[gid * 3 + 2] = tmp[2];
115 template<
class Tvec, sham::USMKindTarget target>
117 const sham::DeviceBuffer<
typename shambase::VectorProperties<Tvec>::component_type, target>
120 using Tscal =
typename shambase::VectorProperties<Tvec>::component_type;
125 if constexpr (std::is_same_v<Tscal, Tvec>) {
126 return buffer.copy();
127 }
else if constexpr (std::is_same_v<Tvec, sycl::vec<Tscal, 2>>) {
129 if (buffer.get_size() % 2 != 0) {
131 "The buffer must have an even number of elements");
137 const Tscal *ptr_src = buffer.get_read_access(depends_list);
140 sycl::event e = buffer.get_dev_scheduler().get_queue().submit(
141 depends_list, [&](sycl::handler &cgh) {
142 cgh.parallel_for(buffer.get_size() / 2, [=](sycl::id<1> gid) {
143 ptr_dest[gid] = Tvec{ptr_src[gid * 2 + 0], ptr_src[gid * 2 + 1]};
147 ret.complete_event_state(e);
148 buffer.complete_event_state(e);
152 }
else if constexpr (std::is_same_v<Tvec, sycl::vec<Tscal, 3>>) {
154 if (buffer.get_size() % 3 != 0) {
156 "The buffer must have a multiple of 3 elements");
162 const Tscal *ptr_src = buffer.get_read_access(depends_list);
165 sycl::event e = buffer.get_dev_scheduler().get_queue().submit(
166 depends_list, [&](sycl::handler &cgh) {
167 cgh.parallel_for(buffer.get_size() / 3, [=](sycl::id<1> gid) {
168 ptr_dest[gid] = Tvec{
169 ptr_src[gid * 3 + 0], ptr_src[gid * 3 + 1], ptr_src[gid * 3 + 2]};
173 ret.complete_event_state(e);
174 buffer.complete_event_state(e);
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
std::shared_ptr< DeviceScheduler > & get_dev_scheduler_ptr()
Gets the Device scheduler pointer corresponding to the held allocation.
size_t get_size() const
Gets the number of elements in the buffer.
DeviceScheduler & get_dev_scheduler() const
Gets the Device scheduler corresponding to the held allocation.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
DeviceBuffer< T, target > copy() const
Copy the current buffer.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
This header file contains utility functions related to exception handling in the code.
namespace for primitive algorithm (e.g. sort, scan, reductions, ...)
sham::DeviceBuffer< Tvec, target > unflatten_buffer(const sham::DeviceBuffer< typename shambase::VectorProperties< Tvec >::component_type, target > &buffer)
Unflatten a buffer that contains a flattened vector.
sham::DeviceBuffer< typename shambase::VectorProperties< Tvec >::component_type, target > flatten_buffer(const sham::DeviceBuffer< Tvec, target > &buffer)
Flatten a buffer of vector type into a buffer of scalar type.
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
void throw_unimplemented(SourceLocation loc=SourceLocation{})
Throw a std::runtime_error saying that the function is unimplemented.