33 sycl::queue &q, sycl::buffer<T> &buf, sycl::buffer<u32> &index_map,
u32 len) {
35 sycl::buffer<T> ret(len);
37 q.submit([&](sycl::handler &cgh) {
38 sycl::accessor in{buf, cgh, sycl::read_only};
39 sycl::accessor out{ret, cgh, sycl::write_only, sycl::no_init};
40 sycl::accessor permut{index_map, cgh, sycl::read_only};
42 cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {
43 out[item] = in[permut[item]];
47 return std::move(ret);
52 sycl::queue &q, sycl::buffer<T> &buf, sycl::buffer<u32> &index_map,
u32 len,
u32 nvar) {
54 sycl::buffer<T> ret(len * nvar);
56 q.submit([&](sycl::handler &cgh) {
57 sycl::accessor in{buf, cgh, sycl::read_only};
58 sycl::accessor out{ret, cgh, sycl::write_only, sycl::no_init};
59 sycl::accessor permut{index_map, cgh, sycl::read_only};
63 cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {
64 u32 in_id = permut[item] * nvar_loc;
65 u32 out_id = item.get_linear_id() * nvar_loc;
67 for (
u32 a = 0; a < nvar_loc; a++) {
68 out[out_id + a] = in[in_id + a];
73 return std::move(ret);
78 const sham::DeviceScheduler_ptr &sched_ptr,
92 auto e = q.
submit(el, [&](sycl::handler &cgh) {
93 cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {
94 out[item] = in[permut[item]];
105 const sham::DeviceScheduler_ptr &sched_ptr,
120 auto e = q.
submit(el, [&](sycl::handler &cgh) {
123 cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {
124 u32 in_id = permut[item] * nvar_loc;
125 u32 out_id = item.get_linear_id() * nvar_loc;
127 for (
u32 a = 0; a < nvar_loc; a++) {
128 out[out_id + a] = in[in_id + a];
159 template sycl::buffer<_arg_> index_remap( \
160 sycl::queue &q, sycl::buffer<_arg_> &buf, sycl::buffer<u32> &index_map, u32 len); \
162 template sycl::buffer<_arg_> index_remap_nvar( \
164 sycl::buffer<_arg_> &buf, \
165 sycl::buffer<u32> &index_map, \
169 template void index_remap( \
170 const sham::DeviceScheduler_ptr &sched, \
171 sham::DeviceBuffer<_arg_> &source, \
172 sham::DeviceBuffer<_arg_> &dest, \
173 sham::DeviceBuffer<u32> &index_map, \
176 template void index_remap_nvar( \
177 const sham::DeviceScheduler_ptr &sched, \
178 sham::DeviceBuffer<_arg_> &source, \
179 sham::DeviceBuffer<_arg_> &dest, \
180 sham::DeviceBuffer<u32> &index_map, \
std::uint32_t u32
32 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
Class to manage a list of SYCL events.
main include file for the shamalgs algorithms
namespace to store algorithms implemented by shamalgs
sycl::buffer< T > index_remap(sycl::queue &q, sycl::buffer< T > &source_buf, sycl::buffer< u32 > &index_map, u32 len)
remap a buffer according to a given index map result[i] = result[index_map[i]]
sycl::buffer< u32 > gen_buffer_index(sycl::queue &q, u32 len)
generate a buffer such that for i in [0,len[, buf[i] = i
sycl::buffer< T > index_remap_nvar(sycl::queue &q, sycl::buffer< T > &source_buf, sycl::buffer< u32 > &index_map, u32 len, u32 nvar)
remap a buffer (with multiple variable per index) according to a given index map result[i] = result[i...
sycl::buffer< typename std::invoke_result_t< Fct, u32 > > gen_buffer_device(sycl::queue &q, u32 len, Fct &&func)
generate a buffer from a lambda expression based on the indexes
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...