Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
algorithm.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
18#include "shambase/memory.hpp"
22
23namespace shamalgs::algorithm {
24
25 sycl::buffer<u32> gen_buffer_index(sycl::queue &q, u32 len) {
26 return gen_buffer_device(q, len, [](u32 i) -> u32 {
27 return i;
28 });
29 }
30
31 template<class T>
32 sycl::buffer<T> index_remap(
33 sycl::queue &q, sycl::buffer<T> &buf, sycl::buffer<u32> &index_map, u32 len) {
34
35 sycl::buffer<T> ret(len);
36
37 q.submit([&](sycl::handler &cgh) {
38 sycl::accessor in{buf, cgh, sycl::read_only};
39 sycl::accessor out{ret, cgh, sycl::write_only, sycl::no_init};
40 sycl::accessor permut{index_map, cgh, sycl::read_only};
41
42 cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {
43 out[item] = in[permut[item]];
44 });
45 });
46
47 return std::move(ret);
48 }
49
50 template<class T>
51 sycl::buffer<T> index_remap_nvar(
52 sycl::queue &q, sycl::buffer<T> &buf, sycl::buffer<u32> &index_map, u32 len, u32 nvar) {
53
54 sycl::buffer<T> ret(len * nvar);
55
56 q.submit([&](sycl::handler &cgh) {
57 sycl::accessor in{buf, cgh, sycl::read_only};
58 sycl::accessor out{ret, cgh, sycl::write_only, sycl::no_init};
59 sycl::accessor permut{index_map, cgh, sycl::read_only};
60
61 u32 nvar_loc = nvar;
62
63 cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {
64 u32 in_id = permut[item] * nvar_loc;
65 u32 out_id = item.get_linear_id() * nvar_loc;
66
67 for (u32 a = 0; a < nvar_loc; a++) {
68 out[out_id + a] = in[in_id + a];
69 }
70 });
71 });
72
73 return std::move(ret);
74 }
75
76 template<class T>
77 void index_remap(
78 const sham::DeviceScheduler_ptr &sched_ptr,
81 sham::DeviceBuffer<u32> &index_map,
82 u32 len) {
83
84 sham::DeviceQueue &q = shambase::get_check_ref(sched_ptr).get_queue();
85
87
88 const T *in = source.get_read_access(el);
89 T *out = dest.get_write_access(el);
90 const u32 *permut = index_map.get_read_access(el);
91
92 auto e = q.submit(el, [&](sycl::handler &cgh) {
93 cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {
94 out[item] = in[permut[item]];
95 });
96 });
97
98 source.complete_event_state(e);
100 index_map.complete_event_state(e);
101 }
102
103 template<class T>
104 void index_remap_nvar(
105 const sham::DeviceScheduler_ptr &sched_ptr,
106 sham::DeviceBuffer<T> &source,
108 sham::DeviceBuffer<u32> &index_map,
109 u32 len,
110 u32 nvar) {
111
112 sham::DeviceQueue &q = shambase::get_check_ref(sched_ptr).get_queue();
113
115
116 const T *in = source.get_read_access(el);
117 T *out = dest.get_write_access(el);
118 const u32 *permut = index_map.get_read_access(el);
119
120 auto e = q.submit(el, [&](sycl::handler &cgh) {
121 u32 nvar_loc = nvar;
122
123 cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {
124 u32 in_id = permut[item] * nvar_loc;
125 u32 out_id = item.get_linear_id() * nvar_loc;
126
127 for (u32 a = 0; a < nvar_loc; a++) {
128 out[out_id + a] = in[in_id + a];
129 }
130 });
131 });
132
133 source.complete_event_state(e);
134 dest.complete_event_state(e);
135 index_map.complete_event_state(e);
136 }
137
138#define XMAC_TYPES \
139 X(f32) \
140 X(f32_2) \
141 X(f32_3) \
142 X(f32_4) \
143 X(f32_8) \
144 X(f32_16) \
145 X(f64) \
146 X(f64_2) \
147 X(f64_3) \
148 X(f64_4) \
149 X(f64_8) \
150 X(f64_16) \
151 X(u32) \
152 X(u64) \
153 X(u32_3) \
154 X(u64_3) \
155 X(i64_3) \
156 X(i64)
157
158#define X(_arg_) \
159 template sycl::buffer<_arg_> index_remap( \
160 sycl::queue &q, sycl::buffer<_arg_> &buf, sycl::buffer<u32> &index_map, u32 len); \
161 \
162 template sycl::buffer<_arg_> index_remap_nvar( \
163 sycl::queue &q, \
164 sycl::buffer<_arg_> &buf, \
165 sycl::buffer<u32> &index_map, \
166 u32 len, \
167 u32 nvar); \
168 \
169 template void index_remap( \
170 const sham::DeviceScheduler_ptr &sched, \
171 sham::DeviceBuffer<_arg_> &source, \
172 sham::DeviceBuffer<_arg_> &dest, \
173 sham::DeviceBuffer<u32> &index_map, \
174 u32 len); \
175 \
176 template void index_remap_nvar( \
177 const sham::DeviceScheduler_ptr &sched, \
178 sham::DeviceBuffer<_arg_> &source, \
179 sham::DeviceBuffer<_arg_> &dest, \
180 sham::DeviceBuffer<u32> &index_map, \
181 u32 len, \
182 u32 nvar);
183
184 XMAC_TYPES
185
186#undef X
187
188} // namespace shamalgs::algorithm
std::uint32_t u32
32 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
Class to manage a list of SYCL events.
Definition EventList.hpp:31
main include file for the shamalgs algorithms
namespace to store algorithms implemented by shamalgs
Definition algorithm.hpp:29
sycl::buffer< T > index_remap(sycl::queue &q, sycl::buffer< T > &source_buf, sycl::buffer< u32 > &index_map, u32 len)
remap a buffer according to a given index map result[i] = result[index_map[i]]
Definition algorithm.cpp:32
sycl::buffer< u32 > gen_buffer_index(sycl::queue &q, u32 len)
generate a buffer such that for i in [0,len[, buf[i] = i
Definition algorithm.cpp:25
sycl::buffer< T > index_remap_nvar(sycl::queue &q, sycl::buffer< T > &source_buf, sycl::buffer< u32 > &index_map, u32 len, u32 nvar)
remap a buffer (with multiple variable per index) according to a given index map result[i] = result[i...
Definition algorithm.cpp:51
sycl::buffer< typename std::invoke_result_t< Fct, u32 > > gen_buffer_device(sycl::queue &q, u32 len, Fct &&func)
generate a buffer from a lambda expression based on the indexes
Definition algorithm.hpp:65
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110