Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
append_subset_to.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
21
22#define XMAC_LIST_ENABLED_FIELD \
23 X(f32) \
24 X(f32_2) \
25 X(f32_3) \
26 X(f32_4) \
27 X(f32_8) \
28 X(f32_16) \
29 X(f64) \
30 X(f64_2) \
31 X(f64_3) \
32 X(f64_4) \
33 X(f64_8) \
34 X(f64_16) \
35 X(u32) \
36 X(u64) \
37 X(u32_3) \
38 X(u64_3) \
39 X(i64_3) \
40 X(i64)
41
42namespace shamalgs::primitives {
43
44 template<class T>
46 const sham::DeviceBuffer<T> &buf,
47 const sham::DeviceBuffer<u32> &idxs_buf,
48 u32 nvar,
49 sham::DeviceBuffer<T> &buf_other,
50 u32 start_enque) {
51
52 const u64 idx_count = idxs_buf.get_size();
53
54 if (idx_count == 0) {
55 return;
56 }
57
58 // TODO: flatten the kernel to squeeze more perf
60 idxs_buf.get_queue(),
61 sham::MultiRef{idxs_buf, buf},
62 sham::MultiRef{buf_other},
64 [nvar_loc = nvar, start_enque_loc = start_enque](
65 u32 gid,
66 const u32 *__restrict acc_idxs,
67 const T *__restrict acc_curr,
68 T *__restrict acc_other) {
69 u32 idx_extr = acc_idxs[gid] * nvar_loc;
70 u32 idx_push = start_enque_loc + gid * nvar_loc;
71
72 for (u32 a = 0; a < nvar_loc; a++) {
73 acc_other[idx_push + a] = acc_curr[idx_extr + a];
74 }
75 });
76 }
77
78#ifndef DOXYGEN
79 #define X(a) \
80 template void append_subset_to<a>( \
81 const sham::DeviceBuffer<a> &buf, \
82 const sham::DeviceBuffer<u32> &idxs_buf, \
83 u32 nvar, \
84 sham::DeviceBuffer<a> &buf_other, \
85 u32 start_enque);
86
87 XMAC_LIST_ENABLED_FIELD
88 #undef X
89#endif
90} // namespace shamalgs::primitives
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
DeviceQueue & get_queue() const
Gets the DeviceQueue associated with the held allocation.
size_t get_size() const
Gets the number of elements in the buffer.
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
namespace for primitive algorithm (e.g. sort, scan, reductions, ...)
void append_subset_to(const sham::DeviceBuffer< T > &buf, const sham::DeviceBuffer< u32 > &idxs_buf, u32 nvar, sham::DeviceBuffer< T > &buf_other, u32 start_enque)
Appends a subset of elements from one buffer to another.
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
Utilities for safe type narrowing conversions.
A class that references multiple buffers or similar objects.