Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
AnalysisDisc.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
25#include "shamcomm/wrapper.hpp"
29#include <shambackends/sycl.hpp>
30#include <numeric>
31#include <vector>
32
33template<class Tvec, template<class> class SPHKernel>
35 Tscal pmass, Tscal Rmin, Tscal Rmax, u32 Nbin, const ShamrockCtx &context) -> analysis_basis {
36
37 sham::DeviceBuffer<Tscal> bin_edges(Nbin, shamsys::instance::get_compute_scheduler_ptr());
38 bin_edges = shamalgs::primitives::linspace(Rmin, Rmax, Nbin + 1);
39
40 using namespace shamrock;
41 using namespace shamrock::patch;
42
43 PatchScheduler &sched = shambase::get_check_ref(context.sched);
44 PatchDataLayerLayout &pdl = sched.pdl_old();
45 const u32 ixyz = pdl.get_field_idx<Tvec>("xyz");
46 const u32 ivxyz = pdl.get_field_idx<Tvec>("vxyz");
47
48 // get Npart
49 u64 Npart = 0;
50 u64 Npatch = 0;
51 std::vector<u64> parts_per_patch{};
52 scheduler().for_each_patchdata_nonempty([&](Patch cur_p, shamrock::scheduler::PatchData &pdat) {
53 parts_per_patch.push_back(pdat.get_obj_cnt());
54 Npart += pdat.get_obj_cnt();
55 Npatch++;
56 });
57
58 std::vector<u32> patch_start_index(Npatch);
59 std::exclusive_scan(
60 parts_per_patch.begin(), parts_per_patch.end(), patch_start_index.begin(), 0);
61
62 sham::DeviceBuffer<Tscal> buf_radius(Npart, shamsys::instance::get_compute_scheduler_ptr());
63 sham::DeviceBuffer<Tscal> buf_Jx(Npart, shamsys::instance::get_compute_scheduler_ptr());
64 sham::DeviceBuffer<Tscal> buf_Jy(Npart, shamsys::instance::get_compute_scheduler_ptr());
65 sham::DeviceBuffer<Tscal> buf_Jz(Npart, shamsys::instance::get_compute_scheduler_ptr());
66
67 u32 i = 0;
68 scheduler().for_each_patchdata_nonempty(
69 [&, this](Patch cur_p, shamrock::scheduler::PatchData &pdat) {
70 u32 len = pdat.get_obj_cnt();
71 u32 start_write = patch_start_index[i];
72 sham::DeviceBuffer<Tvec> &buf_xyz = pdat.get_field_buf_ref<Tvec>(ixyz);
73 sham::DeviceBuffer<Tvec> &buf_vxyz = pdat.get_field_buf_ref<Tvec>(ivxyz);
74 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
75
77 q,
78 sham::MultiRef{buf_xyz, buf_vxyz},
79 sham::MultiRef{buf_radius, buf_Jx, buf_Jy, buf_Jz},
80 len,
81 [pmass, start_write](
82 u32 i,
83 const Tvec *__restrict xyz,
84 const Tvec *__restrict vxyz,
85 Tscal *__restrict buf_radius,
86 Tscal *__restrict buf_Jx,
87 Tscal *__restrict buf_Jy,
88 Tscal *__restrict buf_Jz) {
89 using namespace shamrock::sph;
90 Tvec pos = xyz[i];
91 Tvec vel = vxyz[i];
92
93 Tscal radius = sycl::sqrt(sycl::dot(pos, pos));
94 Tscal Jx = pmass * sycl::cross(pos, vel)[0];
95 Tscal Jy = pmass * sycl::cross(pos, vel)[1];
96 Tscal Jz = pmass * sycl::cross(pos, vel)[2];
97
98 buf_radius[i + start_write] = radius;
99 buf_Jx[i + start_write] = Jx;
100 buf_Jy[i + start_write] = Jy;
101 buf_Jz[i + start_write] = Jz;
102 });
103
104 i++;
105 });
106
108 shamsys::instance::get_compute_scheduler_ptr(), bin_edges, Nbin, buf_radius, Npart);
109
111 shamsys::instance::get_compute_scheduler_ptr(), bin_edges, Nbin, buf_Jx, buf_radius, Npart);
112
114 shamsys::instance::get_compute_scheduler_ptr(), bin_edges, Nbin, buf_Jy, buf_radius, Npart);
115
117 shamsys::instance::get_compute_scheduler_ptr(), bin_edges, Nbin, buf_Jz, buf_radius, Npart);
118
119 auto buf_radius_key = buf_radius.copy();
120 auto Sigma = shamalgs::numeric::binned_compute<Tscal>(
121 shamsys::instance::get_compute_scheduler_ptr(),
122 bin_edges,
123 Nbin,
124 buf_radius,
125 buf_radius_key,
126 Npart,
127 [pmass, Rmin, Rmax, Nbin](auto for_each_values, u32 bin_count) {
128 Tscal sigma_bin = 0;
129 Tscal delta = (Rmax - Rmin) / Nbin;
130 Tscal delta_halfed = delta / 2;
131 for_each_values([&](Tscal val) {
132 Tscal area = shambase::constants::pi<Tscal>
133 * ((val + delta_halfed) * (val + delta_halfed)
134 - (val - delta_halfed) * (val - delta_halfed));
135 sigma_bin += pmass / area;
136 });
137 return sigma_bin;
138 });
139 shamalgs::collective::reduce_buffer_in_place_sum(Sigma, MPI_COMM_WORLD);
140
141 return analysis_basis{
142 std::move(buf_radius),
143 std::move(bin_edges),
144 std::move(histo.bins_center),
145 std::move(histo.counts),
146 std::move(binned_Jx),
147 std::move(binned_Jy),
148 std::move(binned_Jz),
149 std::move(Sigma)};
150}
151
152template<class Tvec, template<class> class SPHKernel>
154 analysis_basis &basis, u32 Nbin) -> analysis_stage0 {
155 // compute unit l
156 // still do it on device because data is there still
157 sham::DeviceBuffer<Tscal> &buf_binned_Jx = basis.binned_Jx;
158 sham::DeviceBuffer<Tscal> &buf_binned_Jy = basis.binned_Jy;
159 sham::DeviceBuffer<Tscal> &buf_binned_Jz = basis.binned_Jz;
160 sham::DeviceBuffer<Tscal> buf_binned_lx(Nbin, shamsys::instance::get_compute_scheduler_ptr());
161 sham::DeviceBuffer<Tscal> buf_binned_ly(Nbin, shamsys::instance::get_compute_scheduler_ptr());
162 sham::DeviceBuffer<Tscal> buf_binned_lz(Nbin, shamsys::instance::get_compute_scheduler_ptr());
163
164 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
165
167 q,
168 sham::MultiRef{buf_binned_Jx, buf_binned_Jy, buf_binned_Jz},
169 sham::MultiRef{buf_binned_lx, buf_binned_ly, buf_binned_lz},
170 Nbin,
171 [](u32 i,
172 const Tscal *__restrict Jx,
173 const Tscal *__restrict Jy,
174 const Tscal *__restrict Jz,
175 Tscal *__restrict lx,
176 Tscal *__restrict ly,
177 Tscal *__restrict lz) {
178 Tscal J_norm = sycl::sqrt(Jx[i] * Jx[i] + Jy[i] * Jy[i] + Jz[i] * Jz[i]);
179 if (J_norm < shambase::get_epsilon<Tscal>()) {
180 lx[i] = 0;
181 ly[i] = 0;
182 lz[i] = 0;
183 } else {
184 lx[i] = Jx[i] / J_norm;
185 ly[i] = Jy[i] / J_norm;
186 lz[i] = Jz[i] / J_norm;
187 }
188 });
189 shamalgs::collective::reduce_buffer_in_place_sum(buf_binned_lx, MPI_COMM_WORLD);
190 shamalgs::collective::reduce_buffer_in_place_sum(buf_binned_ly, MPI_COMM_WORLD);
191 shamalgs::collective::reduce_buffer_in_place_sum(buf_binned_lz, MPI_COMM_WORLD);
192
193 // compute zmean
194 using namespace shamrock;
195 using namespace shamrock::patch;
196 PatchScheduler &sched = shambase::get_check_ref(context.sched);
197 PatchDataLayerLayout &pdl = sched.pdl_old();
198 const u32 ixyz = pdl.get_field_idx<Tvec>("xyz");
199 const u32 ivxyz = pdl.get_field_idx<Tvec>("vxyz");
200
201 // get Npart
202 u64 Npart = 0;
203 u64 Npatch = 0;
204 std::vector<u64> parts_per_patch{};
205 // on all patches on all ranks, allreduce is included
206 scheduler().for_each_patchdata_nonempty([&](Patch cur_p, shamrock::scheduler::PatchData &pdat) {
207 parts_per_patch.push_back(pdat.get_obj_cnt());
208 Npart += pdat.get_obj_cnt();
209 Npatch++;
210 });
211
212 std::vector<u32> patch_start_index(Npatch);
213 std::exclusive_scan(
214 parts_per_patch.begin(), parts_per_patch.end(), patch_start_index.begin(), 0);
215
216 u32 i = 0;
217 sham::DeviceBuffer<Tscal> buf_zmean(Npart, shamsys::instance::get_compute_scheduler_ptr());
218 // compute zmean: loop on all patches of all ranks
219 scheduler().for_each_patchdata_nonempty([&](Patch cur_p, shamrock::scheduler::PatchData &pdat) {
220 u32 len = pdat.get_obj_cnt();
221 u32 start_write = patch_start_index[i];
222 sham::DeviceBuffer<Tvec> &buf_xyz = pdat.get_field_buf_ref<Tvec>(ixyz);
223 sham::DeviceBuffer<Tvec> &buf_vxyz = pdat.get_field_buf_ref<Tvec>(ivxyz);
224 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
225
227 q,
229 buf_xyz,
230 basis.bin_edges,
231 basis.buf_radius,
232 buf_binned_lx,
233 buf_binned_ly,
234 buf_binned_lz},
235 sham::MultiRef{buf_zmean},
236 len,
237 [this, Nbin, start_write](
238 u32 i,
239 const Tvec *__restrict xyz,
240 const Tscal *__restrict bin_edges,
241 const Tscal *__restrict radius,
242 const Tscal *__restrict lx,
243 const Tscal *__restrict ly,
244 const Tscal *__restrict lz,
245 Tscal *__restrict buf_zmean) {
246 using namespace shamrock::sph;
247 Tvec pos = xyz[i];
248 Tscal radiusi = radius[i];
249 u32 bini = mybin(radiusi, bin_edges, Nbin);
250
251 // zdash
252 buf_zmean[i + start_write]
253 = lx[bini] * pos[0] + ly[bini] * pos[1] + lz[bini] * pos[2];
254 });
255
256 i++;
257 });
258
259 // now that we have zmean, bin it
260 auto binned_zmean = shamalgs::numeric::binned_average_mpi(
261 shamsys::instance::get_compute_scheduler_ptr(),
262 basis.bin_edges,
263 Nbin,
264 buf_zmean,
265 basis.buf_radius,
266 Npart);
267
268 // now compute H
269 u32 j = 0;
270 sham::DeviceBuffer<Tscal> buf_H(Npart, shamsys::instance::get_compute_scheduler_ptr());
271 // compute buf_H: loop on all patches of all ranks
272 scheduler().for_each_patchdata_nonempty([&](Patch cur_p, shamrock::scheduler::PatchData &pdat) {
273 u32 len = pdat.get_obj_cnt();
274 u32 start_write = patch_start_index[j];
275 sham::DeviceBuffer<Tvec> &buf_xyz = pdat.get_field_buf_ref<Tvec>(ixyz);
276 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
277
279 q,
280 sham::MultiRef{buf_xyz, basis.bin_edges, binned_zmean, buf_zmean},
281 sham::MultiRef{buf_H},
282 len,
283 [this, Nbin, start_write](
284 u32 i,
285 const Tvec *__restrict xyz,
286 const Tscal *__restrict bin_edges,
287 const Tscal *__restrict binned_zmean,
288 const Tscal *__restrict buf_zmean,
289 Tscal *__restrict buf_H) {
290 using namespace shamrock::sph;
291
292 Tvec pos = xyz[i];
293 Tscal radius = sycl::sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]);
294 u32 bini = mybin(radius, bin_edges, Nbin);
295
296 buf_H[i + start_write]
297 = (buf_zmean[i] - binned_zmean[bini]) * (buf_zmean[i] - binned_zmean[bini]);
298 });
299
300 j++;
301 });
302
304 shamsys::instance::get_compute_scheduler_ptr(),
305 basis.bin_edges,
306 Nbin,
307 buf_H,
308 basis.buf_radius,
309 Npart);
310
311 return analysis_stage0{
312 std::move(buf_binned_lx),
313 std::move(buf_binned_ly),
314 std::move(buf_binned_lz),
315 std::move(buf_zmean),
316 std::move(binned_Hsq)};
317}
318
319template<class Tvec, template<class> class SPHKernel>
321 analysis_basis &basis, analysis_stage0 &stage0, u32 Nbin) -> analysis_stage1 {
322
323 sham::DeviceBuffer<Tscal> &buf_lx = stage0.lx;
324 sham::DeviceBuffer<Tscal> &buf_ly = stage0.ly;
325 sham::DeviceBuffer<Tscal> &buf_lz = stage0.lz;
326 sham::DeviceBuffer<Tscal> &buf_radius = basis.radius;
327
328 sham::DeviceBuffer<Tscal> buf_tilt(Nbin, shamsys::instance::get_compute_scheduler_ptr());
329 sham::DeviceBuffer<Tscal> buf_twist(Nbin, shamsys::instance::get_compute_scheduler_ptr());
330 sham::DeviceBuffer<Tscal> buf_psi(Nbin, shamsys::instance::get_compute_scheduler_ptr());
331
332 auto &q = shamsys::instance::get_compute_scheduler().get_queue();
333
335 q,
336 sham::MultiRef{buf_radius, buf_lx, buf_ly, buf_lz},
337 sham::MultiRef{buf_tilt, buf_twist, buf_psi},
338 Nbin,
339 [=](u32 i,
340 const Tscal *__restrict radius,
341 const Tscal *__restrict lx,
342 const Tscal *__restrict ly,
343 const Tscal *__restrict lz,
344 Tscal *__restrict tilt,
345 Tscal *__restrict twist,
346 Tscal *__restrict psi) {
347 Tscal r = radius[i];
348
349 tilt[i] = 0.;
350 twist[i] = 0.;
351 psi[i] = 0.;
352 const u32 Nmax = Nbin - 1;
353 if (sycl::fabs(lz[i]) > 0. && i < Nmax) {
354 tilt[i] = sycl::acos(lz[i]); // @@@ same as phantom
355 twist[i] = sycl::atan(ly[i] / lz[i]); // shambase::constants::pi<Tscal> * 0.5 *
356 }
357
358 if (i > 0 && i < Nmax) {
359 Tscal radius_diff = radius[i + 1] - radius[i - 1];
360 Tscal psi_x = (lx[i + 1] - lx[i - 1]) / radius_diff;
361 Tscal psi_y = (ly[i + 1] - ly[i - 1]) / radius_diff;
362 Tscal psi_z = (lz[i + 1] - lz[i - 1]) / radius_diff;
363 psi[i] = sycl::sqrt(psi_x * psi_x + psi_y * psi_y + psi_z * psi_z);
364 }
365 });
366
367 return analysis_stage1{std::move(buf_tilt), std::move(buf_twist), std::move(buf_psi)};
368}
369
370template<class Tvec, template<class> class SPHKernel>
372 Tscal Rmin, Tscal Rmax, u32 Nbin, const ShamrockCtx &ctx) -> analysis {
373
374 const Tscal pmass = solver_config.gpart_mass;
375 analysis_basis basis = compute_analysis_basis(pmass, Rmin, Rmax, Nbin, ctx);
376 analysis_stage0 stage0 = compute_analysis_stage0(basis, Nbin);
377 analysis_stage1 stage1 = compute_analysis_stage1(basis, stage0, Nbin);
378
379 return analysis{
380 std::move(basis.radius),
381 std::move(basis.counter),
382 std::move(basis.Sigma),
383 std::move(stage0.lx),
384 std::move(stage0.ly),
385 std::move(stage0.lz),
386 std::move(stage1.tilt),
387 std::move(stage1.twist),
388 std::move(stage1.psi),
389 std::move(stage0.Hsq)};
390}
391
392using namespace shammath;
396
constexpr const char * vxyz
3-velocity field
constexpr const char * xyz
Position field (3D coordinates)
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
The MPI scheduler.
A buffer allocated in USM (Unified Shared Memory)
A SYCL queue associated with a device and a context.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
u32 get_field_idx(const std::string &field_name) const
Get the field id if matching name & type.
PatchDataLayer container class, the layout is described in patchdata_layout.
Creating an array of N values between two values.
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
sham::DeviceBuffer< T > binned_average_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len, const sham::DeviceBuffer< u32 > &bin_counts_global)
Compute the average of values in each bin across all MPI ranks (with pre-computed global counts).
Definition numeric.hpp:659
histogram_result< T > device_histogram_full_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
Compute the histogram and bin properties (center, width) for a set of values and bin edges.
Definition numeric.hpp:753
sham::DeviceBuffer< Tval > linspace(Tval Rmin, Tval Rmax, u32 N)
Create an array of N values between two values.
Definition linspace.hpp:37
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110
namespace for math utility
Definition AABB.hpp:26
namespace for the main framework
Definition __init__.py:1
sph kernels
A class that references multiple buffers or similar objects.
Patch object that contain generic patch information.
Definition Patch.hpp:33