Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
pyCommonUtils.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
25#include "shamcomm/logs.hpp"
28#include <pybind11/cast.h>
29#include <pybind11/complex.h>
30#include <shambackends/sycl.hpp>
31#include <vector>
32
33// Define the operator += for sham::DeviceBuffer, implementation to be done later.
34namespace sham {
35 template<typename T>
36 DeviceBuffer<T> &operator+=(DeviceBuffer<T> &lhs, const DeviceBuffer<T> &rhs) {
38 rhs.get_queue(),
39 sham::MultiRef{rhs},
40 sham::MultiRef{lhs},
41 lhs.get_size(),
42 [](u32 n, const T *rhs, T *lhs) {
43 lhs[n] += rhs[n];
44 });
45 return lhs;
46 }
47
48 template<typename T>
49 DeviceBuffer<T> &operator/=(DeviceBuffer<T> &lhs, const DeviceBuffer<T> &rhs) {
51 rhs.get_queue(),
52 sham::MultiRef{rhs},
53 sham::MultiRef{lhs},
54 lhs.get_size(),
55 [](u32 n, const T *rhs, T *lhs) {
56 auto r = rhs[n];
57 if (r != 0) {
58 lhs[n] /= r;
59 } else {
60 lhs[n] = std::numeric_limits<f64>::quiet_NaN();
61 }
62 });
63 return lhs;
64 }
65
66} // namespace sham
67
68Register_pymod(shammodelcommonlibinit) {
69
70 m.def(
71 "compute_histogram",
72 [](std::vector<f64> bin_edges,
75 bool do_average) -> std::vector<f64> {
76 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
77
78 u32 nx = bin_edges.size() - 1;
79 std::vector<f64> bin_edge_inf(nx);
80 std::vector<f64> bin_edge_sup(nx);
81
82 for (size_t i = 0; i < nx; i++) {
83 bin_edge_inf[i] = bin_edges[i];
84 bin_edge_sup[i] = bin_edges[i + 1];
85 }
86
87 sham::DeviceBuffer<f64> ret(bin_edge_inf.size(), dev_sched);
88 ret.fill(0);
89
90 sham::DeviceBuffer<f64> bin_inf(bin_edge_inf.size(), dev_sched);
91 sham::DeviceBuffer<f64> bin_sup(bin_edge_inf.size(), dev_sched);
92 bin_inf.copy_from_stdvec(bin_edge_inf);
93 bin_sup.copy_from_stdvec(bin_edge_sup);
94
95 shambase::DistributedData<u32> obj_cnts = x_field.get_obj_cnts();
96
97 obj_cnts.for_each([&](u64 id_patch, const unsigned int &obj_cnt) {
98 ret += shamalgs::primitives::compute_histogram<f64>(
99 dev_sched,
100 bin_inf,
101 bin_sup,
102 obj_cnt,
103 [](const f64 &bin_edge_inf,
104 const f64 &bin_edge_sup,
105 const f64 &x_val,
106 const f64 &y_val,
107 bool &has_value) {
108 has_value = x_val >= bin_edge_inf && x_val < bin_edge_sup;
109 return has_value ? y_val : 0;
110 },
111 x_field.get_buf(id_patch),
112 y_field.get_buf(id_patch));
113 });
114
115 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
116
117 if (do_average) {
118
119 sham::DeviceBuffer<f64> norm(bin_edge_inf.size(), dev_sched);
120 norm.fill(0);
121
122 obj_cnts.for_each([&](u64 id_patch, const unsigned int &obj_cnt) {
123 sham::DeviceBuffer<f64> unit_buf(obj_cnt, dev_sched);
124 unit_buf.fill(1);
125
126 norm += shamalgs::primitives::compute_histogram<f64>(
127 dev_sched,
128 bin_inf,
129 bin_sup,
130 obj_cnt,
131 [](const f64 &bin_edge_inf,
132 const f64 &bin_edge_sup,
133 const f64 &x_val,
134 const f64 &y_val,
135 bool &has_value) {
136 has_value = x_val >= bin_edge_inf && x_val < bin_edge_sup;
137 return has_value ? y_val : 0;
138 },
139 x_field.get_buf(id_patch),
140 unit_buf);
141 });
142
143 shamalgs::collective::reduce_buffer_in_place_sum(norm, MPI_COMM_WORLD);
144
145 ret /= norm;
146 }
147
148 return ret.copy_to_stdvec();
149 },
150 py::kw_only{},
151 py::arg("bin_edges"),
152 py::arg("x_field"),
153 py::arg("y_field"),
154 py::arg("do_average") = false);
155
156 m.def(
157 "compute_histogram_convolve_x",
158 [](std::vector<f64> bin_edges,
162 bool do_average) -> std::vector<f64> {
163 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
164
165 u32 nx = bin_edges.size() - 1;
166 std::vector<f64> bin_edge_inf(nx);
167 std::vector<f64> bin_edge_sup(nx);
168
169 for (size_t i = 0; i < nx; i++) {
170 bin_edge_inf[i] = bin_edges[i];
171 bin_edge_sup[i] = bin_edges[i + 1];
172 }
173
174 sham::DeviceBuffer<f64> ret(bin_edge_inf.size(), dev_sched);
175 ret.fill(0);
176
177 sham::DeviceBuffer<f64> bin_inf(bin_edge_inf.size(), dev_sched);
178 sham::DeviceBuffer<f64> bin_sup(bin_edge_inf.size(), dev_sched);
179 bin_inf.copy_from_stdvec(bin_edge_inf);
180 bin_sup.copy_from_stdvec(bin_edge_sup);
181
182 shambase::DistributedData<u32> obj_cnts = x_field.get_obj_cnts();
183
184 obj_cnts.for_each([&](u64 id_patch, const unsigned int &obj_cnt) {
185 ret += shamalgs::primitives::compute_histogram<f64>(
186 dev_sched,
187 bin_inf,
188 bin_sup,
189 obj_cnt,
190 [](const f64 &bin_edge_inf,
191 const f64 &bin_edge_sup,
192 const f64 &x_val,
193 const f64 &y_val,
194 const f64 &size_val,
195 bool &has_value) {
196 has_value
197 = x_val >= bin_edge_inf - size_val && x_val < bin_edge_sup + size_val;
198 return has_value ? y_val : 0;
199 },
200 x_field.get_buf(id_patch),
201 y_field.get_buf(id_patch),
202 size_field.get_buf(id_patch));
203 });
204
205 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
206
207 if (do_average) {
208
209 sham::DeviceBuffer<f64> norm(bin_edge_inf.size(), dev_sched);
210 norm.fill(0);
211
212 obj_cnts.for_each([&](u64 id_patch, const unsigned int &obj_cnt) {
213 sham::DeviceBuffer<f64> unit_buf(obj_cnt, dev_sched);
214 unit_buf.fill(1);
215
216 norm += shamalgs::primitives::compute_histogram<f64>(
217 dev_sched,
218 bin_inf,
219 bin_sup,
220 obj_cnt,
221 [](const f64 &bin_edge_inf,
222 const f64 &bin_edge_sup,
223 const f64 &x_val,
224 const f64 &y_val,
225 const f64 &size_val,
226 bool &has_value) {
227 has_value = x_val >= bin_edge_inf - size_val
228 && x_val < bin_edge_sup + size_val;
229 return has_value ? y_val : 0;
230 },
231 x_field.get_buf(id_patch),
232 unit_buf,
233 size_field.get_buf(id_patch));
234 });
235
236 shamalgs::collective::reduce_buffer_in_place_sum(norm, MPI_COMM_WORLD);
237
238 ret /= norm;
239 }
240
241 return ret.copy_to_stdvec();
242 },
243 py::kw_only{},
244 py::arg("bin_edges"),
245 py::arg("x_field"),
246 py::arg("y_field"),
247 py::arg("size_field"),
248 py::arg("do_average") = false);
249
250 m.def(
251 "compute_histogram_2d",
252 [](std::vector<f64> bin_edges_x,
253 std::vector<f64> bin_edges_y,
255 shamrock::solvergraph::Field<f64> &y_field) -> std::vector<u64> {
256 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
257
258 u32 nx = bin_edges_x.size() - 1;
259 u32 ny = bin_edges_y.size() - 1;
260
261 sham::DeviceBuffer<u64> ret(nx * ny, dev_sched);
262 ret.fill(0);
263
264 shambase::DistributedData<u32> obj_cnts = x_field.get_obj_cnts();
265
266 sham::DeviceBuffer<f64> binsx(bin_edges_x.size(), dev_sched);
267 sham::DeviceBuffer<f64> binsy(bin_edges_y.size(), dev_sched);
268 binsx.copy_from_stdvec(bin_edges_x);
269 binsy.copy_from_stdvec(bin_edges_y);
270
271 obj_cnts.for_each([&](u64 id_patch, const unsigned int &obj_cnt) {
273 dev_sched->get_queue(),
275 binsx, binsy, x_field.get_buf(id_patch), y_field.get_buf(id_patch)},
276 sham::MultiRef{ret},
277 obj_cnt,
278 [nx, ny](
279 u32 id,
280 const f64 *__restrict x_bins,
281 const f64 *__restrict y_bins,
282 const f64 *__restrict x_field,
283 const f64 *__restrict y_field,
284 u64 *__restrict pic) {
285 auto get_pic_coord = [&](u32 ix, u32 iy) {
286 return ix + iy * nx;
287 };
288
289 f64 x_val = x_field[id];
290 f64 y_val = y_field[id];
291
292 bool is_in_x_range = x_bins[0] <= x_val && x_val <= x_bins[nx];
293 bool is_in_y_range = y_bins[0] <= y_val && y_val <= y_bins[ny];
294
295 if (!(is_in_x_range && is_in_y_range)) {
296 return;
297 }
298
300 x_bins, 0, nx + 1, x_val);
302 y_bins, 0, ny + 1, y_val);
303
304 if (ix >= nx || iy >= ny) {
305 return;
306 }
307
308 using atomic_ref_T = sycl::atomic_ref<
309 u64,
310 sycl::memory_order_relaxed,
311 sycl::memory_scope_device,
312 sycl::access::address_space::global_space>;
313
314 atomic_ref_T pic_ref(pic[get_pic_coord(ix, iy)]);
315 pic_ref++;
316 });
317 });
318
319 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
320
321 return ret.copy_to_stdvec();
322 },
323 py::kw_only{},
324 py::arg("bin_edges_x"),
325 py::arg("bin_edges_y"),
326 py::arg("x_field"),
327 py::arg("y_field"));
328}
Header file describing a Node Instance.
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
Represents a collection of objects distributed across patches identified by a u64 id.
void for_each(std::function< void(u64, T &)> &&f)
Applies a function to each object in the collection.
namespace for backends this one is named only sham since shambackends is too long to write
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
constexpr u32 binary_search_upper_bound(const Tkey *__restrict__ key, u32 first, u32 last, const Tkey &value)
GPU compatible implementation of std::upper_bound.
Pybind11 include and definitions.
#define Register_pymod(placeholdername)
Register a python module init function using static initialisation.
A class that references multiple buffers or similar objects.
GPU compatible implementation of std::upper_bound.