Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
memory.hpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
10#pragma once
11
19#include "shambase/string.hpp"
21#include "shambackends/sycl.hpp"
24#include "shambackends/vec.hpp"
25#include <fmt/base.h>
26
27namespace shamalgs::memory {
28
38 template<class T>
39 T extract_element(sycl::queue &q, sycl::buffer<T> &buf, u32 idx);
40
50 template<class T>
52 T val;
53
54 sham::EventList depends_list;
55 auto acc = buf.get_read_access(depends_list);
56
57 T *dest = &val;
58
59 q.submit(depends_list, [&, idx](sycl::handler &cgh) {
60 cgh.copy(acc + idx, dest, 1);
61 }).wait_and_throw();
62 ;
63
64 buf.complete_event_state(sycl::event{});
65
66 return val;
67 }
68
69 template<class T>
70 void set_element(
71 sycl::queue &q, sycl::buffer<T> &buf, u32 idx, T val, bool discard_write = false) {
72
73 if (discard_write) {
74 q.submit([&, idx, val](sycl::handler &cgh) {
75 sycl::accessor acc{buf, cgh, sycl::write_only, sycl::no_init};
76 cgh.single_task([=]() {
77 acc[idx] = val;
78 });
79 });
80 } else {
81 q.submit([&, idx, val](sycl::handler &cgh) {
82 sycl::accessor acc{buf, cgh, sycl::write_only};
83 cgh.single_task([=]() {
84 acc[idx] = val;
85 });
86 });
87 }
88 }
89
97 template<class T>
98 sycl::buffer<T> vec_to_buf(const std::vector<T> &buf);
99
108 template<class T>
109 std::vector<T> buf_to_vec(sycl::buffer<T> &buf, u32 len);
110
118 template<class T>
119 inline void move_buffer_on_queue(sycl::queue &q, sycl::buffer<T> &buf) {
120 sycl::buffer<T> tmp(1);
121 q.submit([&](sycl::handler &cgh) {
122 sycl::accessor a{buf, cgh, sycl::read_write};
123 sycl::accessor b{tmp, cgh, sycl::write_only, sycl::no_init};
124
125 cgh.single_task([=]() {
126 b[0] = a[0];
127 });
128 });
129 }
130
139 template<class T>
140 inline void buf_fill(sycl::queue &q, sycl::buffer<T> &buf, T value) {
141 StackEntry stack_loc{};
142 q.submit([&, value](sycl::handler &cgh) {
143 sycl::accessor acc{buf, cgh, sycl::write_only};
144 shambase::parallel_for(cgh, buf.size(), "buf_fill", [=](u64 id_a) {
145 acc[id_a] = value;
146 });
147 });
148 }
149
158 template<class T>
159 inline void buf_fill_discard(sycl::queue &q, sycl::buffer<T> &buf, T value) {
160 StackEntry stack_loc{};
161 q.submit([&, value](sycl::handler &cgh) {
162 sycl::accessor acc{buf, cgh, sycl::write_only, sycl::no_init};
163
164 shambase::parallel_for(cgh, buf.size(), "buff_fill_discard", [=](u64 id_a) {
165 acc[id_a] = value;
166 });
167 });
168 }
169
180 template<class T>
181 inline void print_buf(sycl::buffer<T> &buf, u32 len, u32 column_count, std::string_view fmt) {
182
183 sycl::host_accessor acc{buf, sycl::read_only};
184
185 std::string accum;
186
187 for (u32 i = 0; i < len; i++) {
188
189 if (i % column_count == 0) {
190 if (i == 0) {
191 accum += shambase::format("{:8} : ", i);
192 } else {
193 accum += shambase::format("\n{:8} : ", i);
194 }
195 }
196
197 accum += shambase::vformat(fmt, fmt::make_format_args(acc[i]));
198 }
199
200 logger::raw_ln(accum);
201 }
202
203 template<class T>
204 void copybuf_discard(sycl::queue &q, sycl::buffer<T> &source, sycl::buffer<T> &dest, u32 cnt) {
205 q.submit([&](sycl::handler &cgh) {
206 sycl::accessor src{source, cgh, sycl::read_only};
207 sycl::accessor dst{dest, cgh, sycl::write_only, sycl::no_init};
208
209 shambase::parallel_for(cgh, cnt, "copybuf_discard", [=](u64 i) {
210 dst[i] = src[i];
211 });
212 });
213 }
214
215 template<class T>
216 void copybuf(sycl::queue &q, sycl::buffer<T> &source, sycl::buffer<T> &dest, u32 cnt) {
217 q.submit([&](sycl::handler &cgh) {
218 sycl::accessor src{source, cgh, sycl::read_only};
219 sycl::accessor dst{dest, cgh, sycl::write_only};
220
221 shambase::parallel_for(cgh, cnt, "copybuf", [=](u64 i) {
222 dst[i] = src[i];
223 });
224 });
225 }
226
227 template<class T>
228 void add_with_factor_to(
229 sycl::queue &q, sycl::buffer<T> &buf, T factor, sycl::buffer<T> &op, u32 cnt) {
230 q.submit([&](sycl::handler &cgh) {
231 sycl::accessor acc{buf, cgh, sycl::read_write};
232 sycl::accessor dd{op, cgh, sycl::read_only};
233
234 T fac = factor;
235
236 shambase::parallel_for(cgh, cnt, "add_with_factor_to", [=](u64 i) {
237 acc[i] += fac * dd[i];
238 });
239 });
240 }
241
242 template<class T>
243 void write_with_offset_into(
244 sycl::queue &q,
245 sycl::buffer<T> &buf_ctn,
246 sycl::buffer<T> &buf_in,
247 u32 offset,
248 u32 element_count) {
249 q.submit([&](sycl::handler &cgh) {
250 sycl::accessor source{buf_in, cgh, sycl::read_only};
251 sycl::accessor dest{buf_ctn, cgh, sycl::write_only, sycl::no_init};
252 u32 off = offset;
253 cgh.parallel_for(sycl::range{element_count}, [=](sycl::item<1> item) {
254 dest[item.get_id(0) + off] = source[item];
255 });
256 });
257 }
258
270 template<class T>
271 void write_with_offset_into(
273 sham::DeviceBuffer<T> &buf_ctn,
274 sham::DeviceBuffer<T> &buf_in,
275 u32 offset,
276 u32 element_count) {
277
278 sham::EventList depends_list;
279 auto source = buf_in.get_read_access(depends_list);
280 auto dest = buf_ctn.get_write_access(depends_list);
281
282 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
283 u32 off = offset;
284 cgh.parallel_for(sycl::range{element_count}, [=](sycl::item<1> item) {
285 dest[item.get_id(0) + off] = source[item];
286 });
287 });
288
289 buf_in.complete_event_state(e);
290 buf_ctn.complete_event_state(e);
291 }
292
303 template<class T>
304 void write_with_offset_into(
306 sycl::buffer<T> &buf_ctn,
307 sham::DeviceBuffer<T> &buf_in,
308 u32 offset,
309 u32 element_count) {
310
311 sham::EventList depends_list;
312 auto source = buf_in.get_read_access(depends_list);
313
314 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
315 sycl::accessor dest{buf_ctn, cgh, sycl::write_only, sycl::no_init};
316 u32 off = offset;
317 cgh.parallel_for(sycl::range{element_count}, [=](sycl::item<1> item) {
318 dest[item.get_id(0) + off] = source[item];
319 });
320 });
321
322 buf_in.complete_event_state(e);
323 }
324
325 template<class T>
326 void write_with_offset_into(
327 sycl::queue &q, sycl::buffer<T> &buf_ctn, T val, u32 offset, u32 element_count) {
328 q.submit([&, val](sycl::handler &cgh) {
329 sycl::accessor dest{buf_ctn, cgh, sycl::write_only, sycl::no_init};
330 u32 off = offset;
331 cgh.parallel_for(sycl::range{element_count}, [=](sycl::item<1> item) {
332 dest[item.get_id(0) + off] = val;
333 });
334 });
335 }
336
337 template<class T>
338 std::unique_ptr<sycl::buffer<T>> duplicate(
339 sycl::queue &q, const std::unique_ptr<sycl::buffer<T>> &buf_in) {
340 if (buf_in) {
341 auto buf = std::make_unique<sycl::buffer<T>>(buf_in->size());
342 copybuf_discard(q, *buf_in, *buf, buf_in->size());
343 return std::move(buf);
344 }
345 return {};
346 }
347
348 template<class T>
349 sycl::buffer<T> vector_to_buf(sycl::queue &q, std::vector<T> &&vec) {
350
351 u32 cnt = vec.size();
352 sycl::buffer<T> ret(cnt);
353
354 sycl::buffer<T> alias(vec.data(), cnt);
355
356 shamalgs::memory::copybuf_discard(q, alias, ret, cnt);
357
358// HIPSYCL segfault otherwise because looks like the destructor of the sycl buffer
359// doesn't wait for the end of the queue resulting in out of bound access
360#ifdef SYCL_COMP_ACPP
361 q.wait();
362#endif
363
364 return std::move(ret);
365 }
366
375 template<class T>
376 sycl::buffer<T> vector_to_buf(sycl::queue &q, std::vector<T> &vec) {
377
378 u32 cnt = vec.size();
379 sycl::buffer<T> ret(cnt);
380
381 sycl::buffer<T> alias(vec.data(), cnt);
382
383 shamalgs::memory::copybuf_discard(q, alias, ret, cnt);
384
385// HIPSYCL segfault otherwise because looks like the destructor of the sycl buffer
386// doesn't wait for the end of the queue resulting in out of bound access
387#ifdef SYCL_COMP_ACPP
388 q.wait();
389#endif
390
391 return std::move(ret);
392 }
393
394} // namespace shamalgs::memory
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
Class to manage a list of SYCL events.
Definition EventList.hpp:31
memory manipulation algorithms
void buf_fill_discard(sycl::queue &q, sycl::buffer< T > &buf, T value)
Fill a buffer with a given value (sycl::no_init mode)
Definition memory.hpp:159
void buf_fill(sycl::queue &q, sycl::buffer< T > &buf, T value)
Fill a buffer with a given value.
Definition memory.hpp:140
T extract_element(sycl::queue &q, sycl::buffer< T > &buf, u32 idx)
extract a value of a buffer
Definition memory.cpp:24
void print_buf(sycl::buffer< T > &buf, u32 len, u32 column_count, std::string_view fmt)
Print the content of a sycl::buffer
Definition memory.hpp:181
sycl::buffer< T > vec_to_buf(const std::vector< T > &buf)
Convert a std::vector to a sycl::buffer
Definition memory.cpp:29
void move_buffer_on_queue(sycl::queue &q, sycl::buffer< T > &buf)
enqueue a do nothing kernel to force the buffer to move
Definition memory.hpp:119
std::vector< T > buf_to_vec(sycl::buffer< T > &buf, u32 len)
Convert a sycl::buffer to a std::vector
Definition memory.cpp:34
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.