33template<
class Tvec,
template<
class>
class SPHKernel>
35 Tscal pmass, Tscal Rmin, Tscal Rmax,
u32 Nbin,
const ShamrockCtx &context) -> analysis_basis {
41 using namespace shamrock::patch;
51 std::vector<u64> parts_per_patch{};
53 parts_per_patch.push_back(pdat.get_obj_cnt());
54 Npart += pdat.get_obj_cnt();
58 std::vector<u32> patch_start_index(Npatch);
60 parts_per_patch.begin(), parts_per_patch.end(), patch_start_index.begin(), 0);
68 scheduler().for_each_patchdata_nonempty(
70 u32 len = pdat.get_obj_cnt();
71 u32 start_write = patch_start_index[i];
83 const Tvec *__restrict
xyz,
84 const Tvec *__restrict
vxyz,
85 Tscal *__restrict buf_radius,
86 Tscal *__restrict buf_Jx,
87 Tscal *__restrict buf_Jy,
88 Tscal *__restrict buf_Jz) {
89 using namespace shamrock::sph;
93 Tscal radius = sycl::sqrt(sycl::dot(pos, pos));
94 Tscal Jx = pmass * sycl::cross(pos, vel)[0];
95 Tscal Jy = pmass * sycl::cross(pos, vel)[1];
96 Tscal Jz = pmass * sycl::cross(pos, vel)[2];
98 buf_radius[i + start_write] = radius;
99 buf_Jx[i + start_write] = Jx;
100 buf_Jy[i + start_write] = Jy;
101 buf_Jz[i + start_write] = Jz;
108 shamsys::instance::get_compute_scheduler_ptr(), bin_edges, Nbin, buf_radius, Npart);
111 shamsys::instance::get_compute_scheduler_ptr(), bin_edges, Nbin, buf_Jx, buf_radius, Npart);
114 shamsys::instance::get_compute_scheduler_ptr(), bin_edges, Nbin, buf_Jy, buf_radius, Npart);
117 shamsys::instance::get_compute_scheduler_ptr(), bin_edges, Nbin, buf_Jz, buf_radius, Npart);
119 auto buf_radius_key = buf_radius.copy();
120 auto Sigma = shamalgs::numeric::binned_compute<Tscal>(
121 shamsys::instance::get_compute_scheduler_ptr(),
127 [pmass, Rmin, Rmax, Nbin](
auto for_each_values,
u32 bin_count) {
129 Tscal delta = (Rmax - Rmin) / Nbin;
130 Tscal delta_halfed = delta / 2;
131 for_each_values([&](Tscal val) {
132 Tscal area = shambase::constants::pi<Tscal>
133 * ((val + delta_halfed) * (val + delta_halfed)
134 - (val - delta_halfed) * (val - delta_halfed));
135 sigma_bin += pmass / area;
139 shamalgs::collective::reduce_buffer_in_place_sum(Sigma, MPI_COMM_WORLD);
141 return analysis_basis{
142 std::move(buf_radius),
143 std::move(bin_edges),
144 std::move(histo.bins_center),
145 std::move(histo.counts),
146 std::move(binned_Jx),
147 std::move(binned_Jy),
148 std::move(binned_Jz),
152template<
class Tvec,
template<
class>
class SPHKernel>
154 analysis_basis &basis,
u32 Nbin) -> analysis_stage0 {
172 const Tscal *__restrict Jx,
173 const Tscal *__restrict Jy,
174 const Tscal *__restrict Jz,
175 Tscal *__restrict lx,
176 Tscal *__restrict ly,
177 Tscal *__restrict lz) {
178 Tscal J_norm = sycl::sqrt(Jx[i] * Jx[i] + Jy[i] * Jy[i] + Jz[i] * Jz[i]);
184 lx[i] = Jx[i] / J_norm;
185 ly[i] = Jy[i] / J_norm;
186 lz[i] = Jz[i] / J_norm;
189 shamalgs::collective::reduce_buffer_in_place_sum(buf_binned_lx, MPI_COMM_WORLD);
190 shamalgs::collective::reduce_buffer_in_place_sum(buf_binned_ly, MPI_COMM_WORLD);
191 shamalgs::collective::reduce_buffer_in_place_sum(buf_binned_lz, MPI_COMM_WORLD);
195 using namespace shamrock::patch;
204 std::vector<u64> parts_per_patch{};
207 parts_per_patch.push_back(pdat.get_obj_cnt());
208 Npart += pdat.get_obj_cnt();
212 std::vector<u32> patch_start_index(Npatch);
214 parts_per_patch.begin(), parts_per_patch.end(), patch_start_index.begin(), 0);
220 u32 len = pdat.get_obj_cnt();
221 u32 start_write = patch_start_index[i];
237 [
this, Nbin, start_write](
239 const Tvec *__restrict
xyz,
240 const Tscal *__restrict bin_edges,
241 const Tscal *__restrict radius,
242 const Tscal *__restrict lx,
243 const Tscal *__restrict ly,
244 const Tscal *__restrict lz,
245 Tscal *__restrict buf_zmean) {
246 using namespace shamrock::sph;
248 Tscal radiusi = radius[i];
249 u32 bini = mybin(radiusi, bin_edges, Nbin);
252 buf_zmean[i + start_write]
253 = lx[bini] * pos[0] + ly[bini] * pos[1] + lz[bini] * pos[2];
261 shamsys::instance::get_compute_scheduler_ptr(),
273 u32 len = pdat.get_obj_cnt();
274 u32 start_write = patch_start_index[j];
280 sham::MultiRef{buf_xyz, basis.bin_edges, binned_zmean, buf_zmean},
283 [
this, Nbin, start_write](
285 const Tvec *__restrict
xyz,
286 const Tscal *__restrict bin_edges,
287 const Tscal *__restrict binned_zmean,
288 const Tscal *__restrict buf_zmean,
289 Tscal *__restrict buf_H) {
290 using namespace shamrock::sph;
293 Tscal radius = sycl::sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]);
294 u32 bini = mybin(radius, bin_edges, Nbin);
296 buf_H[i + start_write]
297 = (buf_zmean[i] - binned_zmean[bini]) * (buf_zmean[i] - binned_zmean[bini]);
304 shamsys::instance::get_compute_scheduler_ptr(),
311 return analysis_stage0{
312 std::move(buf_binned_lx),
313 std::move(buf_binned_ly),
314 std::move(buf_binned_lz),
315 std::move(buf_zmean),
316 std::move(binned_Hsq)};
319template<
class Tvec,
template<
class>
class SPHKernel>
321 analysis_basis &basis, analysis_stage0 &stage0,
u32 Nbin) -> analysis_stage1 {
332 auto &q = shamsys::instance::get_compute_scheduler().
get_queue();
340 const Tscal *__restrict radius,
341 const Tscal *__restrict lx,
342 const Tscal *__restrict ly,
343 const Tscal *__restrict lz,
344 Tscal *__restrict tilt,
345 Tscal *__restrict twist,
346 Tscal *__restrict psi) {
352 const u32 Nmax = Nbin - 1;
353 if (sycl::fabs(lz[i]) > 0. && i < Nmax) {
354 tilt[i] = sycl::acos(lz[i]);
355 twist[i] = sycl::atan(ly[i] / lz[i]);
358 if (i > 0 && i < Nmax) {
359 Tscal radius_diff = radius[i + 1] - radius[i - 1];
360 Tscal psi_x = (lx[i + 1] - lx[i - 1]) / radius_diff;
361 Tscal psi_y = (ly[i + 1] - ly[i - 1]) / radius_diff;
362 Tscal psi_z = (lz[i + 1] - lz[i - 1]) / radius_diff;
363 psi[i] = sycl::sqrt(psi_x * psi_x + psi_y * psi_y + psi_z * psi_z);
367 return analysis_stage1{std::move(buf_tilt), std::move(buf_twist), std::move(buf_psi)};
370template<
class Tvec,
template<
class>
class SPHKernel>
372 Tscal Rmin, Tscal Rmax,
u32 Nbin,
const ShamrockCtx &ctx) -> analysis {
374 const Tscal pmass = solver_config.gpart_mass;
375 analysis_basis basis = compute_analysis_basis(pmass, Rmin, Rmax, Nbin, ctx);
376 analysis_stage0 stage0 = compute_analysis_stage0(basis, Nbin);
377 analysis_stage1 stage1 = compute_analysis_stage1(basis, stage0, Nbin);
380 std::move(basis.radius),
381 std::move(basis.counter),
382 std::move(basis.Sigma),
383 std::move(stage0.lx),
384 std::move(stage0.ly),
385 std::move(stage0.lz),
386 std::move(stage1.tilt),
387 std::move(stage1.twist),
388 std::move(stage1.psi),
389 std::move(stage0.Hsq)};
constexpr const char * vxyz
3-velocity field
constexpr const char * xyz
Position field (3D coordinates)
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
A SYCL queue associated with a device and a context.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
u32 get_field_idx(const std::string &field_name) const
Get the field id if matching name & type.
PatchDataLayer container class, the layout is described in patchdata_layout.
Creating an array of N values between two values.
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
sham::DeviceBuffer< T > binned_average_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len, const sham::DeviceBuffer< u32 > &bin_counts_global)
Compute the average of values in each bin across all MPI ranks (with pre-computed global counts).
histogram_result< T > device_histogram_full_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
Compute the histogram and bin properties (center, width) for a set of values and bin edges.
sham::DeviceBuffer< Tval > linspace(Tval Rmin, Tval Rmax, u32 N)
Create an array of N values between two values.
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
namespace for math utility
namespace for the main framework
A class that references multiple buffers or similar objects.
Patch object that contain generic patch information.