Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
pyCommonUtils.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
15
25#include "shamcomm/logs.hpp"
28#include <pybind11/cast.h>
29#include <pybind11/complex.h>
30#include <shambackends/sycl.hpp>
31#include <vector>
32
33// Define the operator += for sham::DeviceBuffer, implementation to be done later.
34namespace sham {
35 template<typename T>
36 DeviceBuffer<T> &operator+=(DeviceBuffer<T> &lhs, const DeviceBuffer<T> &rhs) {
38 rhs.get_queue(),
39 sham::MultiRef{rhs},
40 sham::MultiRef{lhs},
41 lhs.get_size(),
42 [](u32 n, const T *rhs, T *lhs) {
43 lhs[n] += rhs[n];
44 });
45 return lhs;
46 }
47
48 template<typename T>
49 DeviceBuffer<T> &operator/=(DeviceBuffer<T> &lhs, const DeviceBuffer<T> &rhs) {
51 rhs.get_queue(),
52 sham::MultiRef{rhs},
53 sham::MultiRef{lhs},
54 lhs.get_size(),
55 [](u32 n, const T *rhs, T *lhs) {
56 auto r = rhs[n];
57 if (r != 0) {
58 lhs[n] /= r;
59 } else {
60 lhs[n] = std::numeric_limits<f64>::quiet_NaN();
61 }
62 });
63 return lhs;
64 }
65
66} // namespace sham
67
69 auto &m = root_module;
70
71 m.def(
72 "compute_histogram",
73 [](std::vector<f64> bin_edges,
76 bool do_average) -> std::vector<f64> {
77 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
78
79 u32 nx = bin_edges.size() - 1;
80 std::vector<f64> bin_edge_inf(nx);
81 std::vector<f64> bin_edge_sup(nx);
82
83 for (size_t i = 0; i < nx; i++) {
84 bin_edge_inf[i] = bin_edges[i];
85 bin_edge_sup[i] = bin_edges[i + 1];
86 }
87
88 sham::DeviceBuffer<f64> ret(bin_edge_inf.size(), dev_sched);
89 ret.fill(0);
90
91 sham::DeviceBuffer<f64> bin_inf(bin_edge_inf.size(), dev_sched);
92 sham::DeviceBuffer<f64> bin_sup(bin_edge_inf.size(), dev_sched);
93 bin_inf.copy_from_stdvec(bin_edge_inf);
94 bin_sup.copy_from_stdvec(bin_edge_sup);
95
96 shambase::DistributedData<u32> obj_cnts = x_field.get_obj_cnts();
97
98 obj_cnts.for_each([&](u64 id_patch, const unsigned int &obj_cnt) {
99 ret += shamalgs::primitives::compute_histogram<f64>(
100 dev_sched,
101 bin_inf,
102 bin_sup,
103 obj_cnt,
104 [](const f64 &bin_edge_inf,
105 const f64 &bin_edge_sup,
106 const f64 &x_val,
107 const f64 &y_val,
108 bool &has_value) {
109 has_value = x_val >= bin_edge_inf && x_val < bin_edge_sup;
110 return has_value ? y_val : 0;
111 },
112 x_field.get_buf(id_patch),
113 y_field.get_buf(id_patch));
114 });
115
116 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
117
118 if (do_average) {
119
120 sham::DeviceBuffer<f64> norm(bin_edge_inf.size(), dev_sched);
121 norm.fill(0);
122
123 obj_cnts.for_each([&](u64 id_patch, const unsigned int &obj_cnt) {
124 sham::DeviceBuffer<f64> unit_buf(obj_cnt, dev_sched);
125 unit_buf.fill(1);
126
127 norm += shamalgs::primitives::compute_histogram<f64>(
128 dev_sched,
129 bin_inf,
130 bin_sup,
131 obj_cnt,
132 [](const f64 &bin_edge_inf,
133 const f64 &bin_edge_sup,
134 const f64 &x_val,
135 const f64 &y_val,
136 bool &has_value) {
137 has_value = x_val >= bin_edge_inf && x_val < bin_edge_sup;
138 return has_value ? y_val : 0;
139 },
140 x_field.get_buf(id_patch),
141 unit_buf);
142 });
143
144 shamalgs::collective::reduce_buffer_in_place_sum(norm, MPI_COMM_WORLD);
145
146 ret /= norm;
147 }
148
149 return ret.copy_to_stdvec();
150 },
151 py::kw_only{},
152 py::arg("bin_edges"),
153 py::arg("x_field"),
154 py::arg("y_field"),
155 py::arg("do_average") = false);
156
157 m.def(
158 "compute_histogram_convolve_x",
159 [](std::vector<f64> bin_edges,
163 bool do_average) -> std::vector<f64> {
164 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
165
166 u32 nx = bin_edges.size() - 1;
167 std::vector<f64> bin_edge_inf(nx);
168 std::vector<f64> bin_edge_sup(nx);
169
170 for (size_t i = 0; i < nx; i++) {
171 bin_edge_inf[i] = bin_edges[i];
172 bin_edge_sup[i] = bin_edges[i + 1];
173 }
174
175 sham::DeviceBuffer<f64> ret(bin_edge_inf.size(), dev_sched);
176 ret.fill(0);
177
178 sham::DeviceBuffer<f64> bin_inf(bin_edge_inf.size(), dev_sched);
179 sham::DeviceBuffer<f64> bin_sup(bin_edge_inf.size(), dev_sched);
180 bin_inf.copy_from_stdvec(bin_edge_inf);
181 bin_sup.copy_from_stdvec(bin_edge_sup);
182
183 shambase::DistributedData<u32> obj_cnts = x_field.get_obj_cnts();
184
185 obj_cnts.for_each([&](u64 id_patch, const unsigned int &obj_cnt) {
186 ret += shamalgs::primitives::compute_histogram<f64>(
187 dev_sched,
188 bin_inf,
189 bin_sup,
190 obj_cnt,
191 [](const f64 &bin_edge_inf,
192 const f64 &bin_edge_sup,
193 const f64 &x_val,
194 const f64 &y_val,
195 const f64 &size_val,
196 bool &has_value) {
197 has_value
198 = x_val >= bin_edge_inf - size_val && x_val < bin_edge_sup + size_val;
199 return has_value ? y_val : 0;
200 },
201 x_field.get_buf(id_patch),
202 y_field.get_buf(id_patch),
203 size_field.get_buf(id_patch));
204 });
205
206 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
207
208 if (do_average) {
209
210 sham::DeviceBuffer<f64> norm(bin_edge_inf.size(), dev_sched);
211 norm.fill(0);
212
213 obj_cnts.for_each([&](u64 id_patch, const unsigned int &obj_cnt) {
214 sham::DeviceBuffer<f64> unit_buf(obj_cnt, dev_sched);
215 unit_buf.fill(1);
216
217 norm += shamalgs::primitives::compute_histogram<f64>(
218 dev_sched,
219 bin_inf,
220 bin_sup,
221 obj_cnt,
222 [](const f64 &bin_edge_inf,
223 const f64 &bin_edge_sup,
224 const f64 &x_val,
225 const f64 &y_val,
226 const f64 &size_val,
227 bool &has_value) {
228 has_value = x_val >= bin_edge_inf - size_val
229 && x_val < bin_edge_sup + size_val;
230 return has_value ? y_val : 0;
231 },
232 x_field.get_buf(id_patch),
233 unit_buf,
234 size_field.get_buf(id_patch));
235 });
236
237 shamalgs::collective::reduce_buffer_in_place_sum(norm, MPI_COMM_WORLD);
238
239 ret /= norm;
240 }
241
242 return ret.copy_to_stdvec();
243 },
244 py::kw_only{},
245 py::arg("bin_edges"),
246 py::arg("x_field"),
247 py::arg("y_field"),
248 py::arg("size_field"),
249 py::arg("do_average") = false);
250
251 m.def(
252 "compute_histogram_2d",
253 [](std::vector<f64> bin_edges_x,
254 std::vector<f64> bin_edges_y,
256 shamrock::solvergraph::Field<f64> &y_field) -> std::vector<u64> {
257 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
258
259 u32 nx = bin_edges_x.size() - 1;
260 u32 ny = bin_edges_y.size() - 1;
261
262 sham::DeviceBuffer<u64> ret(nx * ny, dev_sched);
263 ret.fill(0);
264
265 shambase::DistributedData<u32> obj_cnts = x_field.get_obj_cnts();
266
267 sham::DeviceBuffer<f64> binsx(bin_edges_x.size(), dev_sched);
268 sham::DeviceBuffer<f64> binsy(bin_edges_y.size(), dev_sched);
269 binsx.copy_from_stdvec(bin_edges_x);
270 binsy.copy_from_stdvec(bin_edges_y);
271
272 obj_cnts.for_each([&](u64 id_patch, const unsigned int &obj_cnt) {
274 dev_sched->get_queue(),
276 binsx, binsy, x_field.get_buf(id_patch), y_field.get_buf(id_patch)},
277 sham::MultiRef{ret},
278 obj_cnt,
279 [nx, ny](
280 u32 id,
281 const f64 *__restrict x_bins,
282 const f64 *__restrict y_bins,
283 const f64 *__restrict x_field,
284 const f64 *__restrict y_field,
285 u64 *__restrict pic) {
286 auto get_pic_coord = [&](u32 ix, u32 iy) {
287 return ix + iy * nx;
288 };
289
290 f64 x_val = x_field[id];
291 f64 y_val = y_field[id];
292
293 bool is_in_x_range = x_bins[0] <= x_val && x_val <= x_bins[nx];
294 bool is_in_y_range = y_bins[0] <= y_val && y_val <= y_bins[ny];
295
296 if (!(is_in_x_range && is_in_y_range)) {
297 return;
298 }
299
301 x_bins, 0, nx + 1, x_val);
303 y_bins, 0, ny + 1, y_val);
304
305 if (ix >= nx || iy >= ny) {
306 return;
307 }
308
309 using atomic_ref_T = sycl::atomic_ref<
310 u64,
311 sycl::memory_order_relaxed,
312 sycl::memory_scope_device,
313 sycl::access::address_space::global_space>;
314
315 atomic_ref_T pic_ref(pic[get_pic_coord(ix, iy)]);
316 pic_ref++;
317 });
318 });
319
320 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
321
322 return ret.copy_to_stdvec();
323 },
324 py::kw_only{},
325 py::arg("bin_edges_x"),
326 py::arg("bin_edges_y"),
327 py::arg("x_field"),
328 py::arg("y_field"));
329}
Header file describing a Node Instance.
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory).
Represents a collection of objects distributed across patches identified by a u64 id.
void for_each(std::function< void(u64, T &)> &&f)
Applies a function to each object in the collection.
namespace for backends this one is named only sham since shambackends is too long to write
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
constexpr u32 binary_search_upper_bound(const Tkey *__restrict__ key, u32 first, u32 last, const Tkey &value)
GPU compatible implementation of std::upper_bound.
Pybind11 include and definitions.
#define ON_PYTHON_INIT
Register a Python module init function using static initialization.
A class that references multiple buffers or similar objects.
GPU compatible implementation of std::upper_bound.