Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
DiffOperatorDtDivv.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
18#include "shambase/memory.hpp"
25
26#define ZVEC shambase::VectorProperties<Tvec>::get_zero()
27
28template<class Tvec, template<class> class SPHKernel>
30 bool also_do_div_curl_v) {
31
32 StackEntry stack_loc{};
33
34 shamlog_debug_ln("SPH", "Updating dt divv");
35
36 Tscal gpart_mass = solver_config.gpart_mass;
37
38 using namespace shamrock;
39 using namespace shamrock::patch;
40
41 PatchDataLayerLayout &pdl = scheduler().pdl_old();
42 const u32 iaxyz = pdl.get_field_idx<Tvec>("axyz");
43
44 sph::BasicSPHGhostHandler<Tvec> &ghost_handle = storage.ghost_handler.get();
45
46 shambase::DistributedData<PatchDataLayer> &mpdats = storage.merged_patchdata_ghost.get();
47
48 auto &merged_xyzh = storage.merged_xyzh.get();
49
51 = shambase::get_check_ref(storage.ghost_layout.get());
52 u32 ihpart_interf = ghost_layout.get_field_idx<Tscal>("hpart");
53 u32 iuint_interf = ghost_layout.get_field_idx<Tscal>("uint");
54 u32 ivxyz_interf = ghost_layout.get_field_idx<Tvec>("vxyz");
55 u32 iaxyz_interf = ghost_layout.get_field_idx<Tvec>("axyz");
56 u32 iomega_interf = ghost_layout.get_field_idx<Tscal>("omega");
57
58 const u32 idtdivv = pdl.get_field_idx<Tscal>("dtdivv");
59
60 const u32 idivv = pdl.get_field_idx<Tscal>("divv");
61 const u32 icurlv = pdl.get_field_idx<Tvec>("curlv");
62
63 scheduler().for_each_patchdata_nonempty([&](Patch cur_p, PatchDataLayer &pdat) {
64 PatchDataLayer &mpdat = mpdats.get(cur_p.id_patch);
65
67 = merged_xyzh.get(cur_p.id_patch).template get_field_buf_ref<Tvec>(0);
68 sham::DeviceBuffer<Tvec> &buf_vxyz = mpdat.get_field_buf_ref<Tvec>(ivxyz_interf);
69 sham::DeviceBuffer<Tvec> &buf_axyz = mpdat.get_field_buf_ref<Tvec>(iaxyz_interf);
70
71 sham::DeviceBuffer<Tscal> &buf_hpart = mpdat.get_field_buf_ref<Tscal>(ihpart_interf);
72 sham::DeviceBuffer<Tscal> &buf_omega = mpdat.get_field_buf_ref<Tscal>(iomega_interf);
73 sham::DeviceBuffer<Tscal> &buf_uint = mpdat.get_field_buf_ref<Tscal>(iuint_interf);
74
75 sham::DeviceBuffer<Tscal> &buf_divv = pdat.get_field_buf_ref<Tscal>(idivv);
76 sham::DeviceBuffer<Tvec> &buf_curlv = pdat.get_field_buf_ref<Tvec>(icurlv);
77 sham::DeviceBuffer<Tscal> &buf_dtdivv = pdat.get_field_buf_ref<Tscal>(idtdivv);
78
79 sycl::range range_npart{pdat.get_obj_cnt()};
80
81 tree::ObjectCache &pcache
82 = shambase::get_check_ref(storage.neigh_cache).get_cache(cur_p.id_patch);
83
85
86 if (!also_do_div_curl_v) {
87 NamedStackEntry tmppp{"compute dtdivv"};
88
89 sham::DeviceQueue &queue = shamsys::instance::get_compute_scheduler().get_queue();
90
91 sham::EventList depends_list;
92
93 auto xyz = buf_xyz.get_read_access(depends_list);
94 auto vxyz = buf_vxyz.get_read_access(depends_list);
95 auto axyz = buf_axyz.get_read_access(depends_list);
96 auto hpart = buf_hpart.get_read_access(depends_list);
97 auto omega = buf_omega.get_read_access(depends_list);
98 auto dtdivv = buf_dtdivv.get_write_access(depends_list);
99 auto ploop_ptrs = pcache.get_read_access(depends_list);
100
101 auto e = queue.submit(depends_list, [&](sycl::handler &cgh) {
102 const Tscal pmass = gpart_mass;
103
104 tree::ObjectCacheIterator particle_looper(ploop_ptrs);
105
106 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
107
108 shambase::parallel_for(cgh, pdat.get_obj_cnt(), "compute dtdivv", [=](i32 id_a) {
109 using namespace shamrock::sph;
110
111 Tvec sum_axyz = ZVEC;
112 Tscal sum_du_a = 0;
113 Tscal h_a = hpart[id_a];
114 Tvec xyz_a = xyz[id_a];
115 Tvec vxyz_a = vxyz[id_a];
116 Tvec axyz_a = axyz[id_a];
117 Tscal omega_a = omega[id_a];
118
119 Tscal rho_a = rho_h(pmass, h_a, Kernel::hfactd);
120 // Tscal rho_a_sq = rho_a * rho_a;
121 // Tscal rho_a_inv = 1. / rho_a;
122 Tscal inv_rho_omega_a = 1. / (omega_a * rho_a);
123
124 Tscal sum_nabla_a = 0;
125
126 std::array<Tvec, dim> Rij_a{ZVEC, ZVEC, ZVEC};
127
128 std::array<Tvec, dim> Rij_a_dvk_dxj{ZVEC, ZVEC, ZVEC};
129 std::array<Tvec, dim> Rij_a_dak_dxj{ZVEC, ZVEC, ZVEC};
130
131 particle_looper.for_each_object(id_a, [&](u32 id_b) {
132 // compute only omega_a
133 Tvec r_ab = xyz_a - xyz[id_b];
134 Tscal rab2 = sycl::dot(r_ab, r_ab);
135 Tscal h_b = hpart[id_b];
136
137 if (rab2 > h_a * h_a * Rker2 && rab2 > h_b * h_b * Rker2) {
138 return;
139 }
140
141 Tscal rab = sycl::sqrt(rab2);
142 Tvec vxyz_b = vxyz[id_b];
143 Tvec axyz_b = axyz[id_b];
144 Tvec v_ab = vxyz_a - vxyz_b;
145 Tvec a_ab = axyz_a - axyz_b;
146
147 Tvec r_ab_unit = r_ab / rab;
148
149 if (rab < 1e-9) {
150 r_ab_unit = {0, 0, 0};
151 }
152
153 Tvec dWab_a = Kernel::dW_3d(rab, h_a) * r_ab_unit;
154
155 Tvec mdWab_b = dWab_a * pmass;
156
157 static_assert(dim == 3, "this is only implemented for dim 3");
158 Rij_a[0] -= r_ab.x() * mdWab_b;
159 Rij_a[1] -= r_ab.y() * mdWab_b;
160 Rij_a[2] -= r_ab.z() * mdWab_b;
161
162 Rij_a_dvk_dxj[0] -= v_ab * mdWab_b.x();
163 Rij_a_dvk_dxj[1] -= v_ab * mdWab_b.y();
164 Rij_a_dvk_dxj[2] -= v_ab * mdWab_b.z();
165
166 Rij_a_dak_dxj[0] -= a_ab * mdWab_b.x();
167 Rij_a_dak_dxj[1] -= a_ab * mdWab_b.y();
168 Rij_a_dak_dxj[2] -= a_ab * mdWab_b.z();
169
170 // sum_nabla_a += sycl::dot(a_ab, mdWab_b);
171 });
172
173 std::array<Tvec, 3> invRij = shammath::compute_inv_33(Rij_a);
174
175 std::array<Tvec, 3> dvi_dxk = shammath::mat_prod_33(invRij, Rij_a_dvk_dxj);
176 std::array<Tvec, 3> dai_dxk = shammath::mat_prod_33(invRij, Rij_a_dak_dxj);
177
178 Tscal div_ai = dai_dxk[0].x() + dai_dxk[1].y() + dai_dxk[2].z();
179 Tscal div_vi = dvi_dxk[0].x() + dvi_dxk[1].y() + dvi_dxk[2].z();
180 Tvec curl_vi
181 = {dvi_dxk[1].z() - dvi_dxk[2].y(),
182 dvi_dxk[2].x() - dvi_dxk[0].z(),
183 dvi_dxk[0].y() - dvi_dxk[1].x()};
184
185 Tscal tens_nablav
186 = dvi_dxk[0].x() * dvi_dxk[0].x() + dvi_dxk[1].x() * dvi_dxk[0].y()
187 + dvi_dxk[2].x() * dvi_dxk[0].z() + dvi_dxk[0].y() * dvi_dxk[1].x()
188 + dvi_dxk[1].y() * dvi_dxk[1].y() + dvi_dxk[2].y() * dvi_dxk[1].z()
189 + dvi_dxk[0].z() * dvi_dxk[2].x() + dvi_dxk[1].z() * dvi_dxk[2].y()
190 + dvi_dxk[2].z() * dvi_dxk[2].z();
191
192 // divv[id_a] = div_vi;
193 // curlv[id_a] = curl_vi;
194 dtdivv[id_a] = div_ai - tens_nablav;
195 });
196 });
197
198 buf_xyz.complete_event_state(e);
199 buf_vxyz.complete_event_state(e);
200 buf_axyz.complete_event_state(e);
201 buf_hpart.complete_event_state(e);
202 buf_omega.complete_event_state(e);
203 buf_dtdivv.complete_event_state(e);
204 sham::EventList resulting_events;
205 resulting_events.add_event(e);
206 pcache.complete_event_state(resulting_events);
207
208 } else {
209
210 NamedStackEntry tmppp{"compute dtdivv + divcurl v"};
211
212 sham::DeviceQueue &queue = shamsys::instance::get_compute_scheduler().get_queue();
213
214 sham::EventList depends_list;
215
216 auto xyz = buf_xyz.get_read_access(depends_list);
217 auto vxyz = buf_vxyz.get_read_access(depends_list);
218 auto axyz = buf_axyz.get_read_access(depends_list);
219 auto hpart = buf_hpart.get_read_access(depends_list);
220 auto omega = buf_omega.get_read_access(depends_list);
221 auto divv = buf_divv.get_write_access(depends_list);
222 auto curlv = buf_curlv.get_write_access(depends_list);
223 auto dtdivv = buf_dtdivv.get_write_access(depends_list);
224 auto ploop_ptrs = pcache.get_read_access(depends_list);
225
226 auto e = queue.submit(depends_list, [&](sycl::handler &cgh) {
227 const Tscal pmass = gpart_mass;
228
229 tree::ObjectCacheIterator particle_looper(ploop_ptrs);
230
231 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
232
233 shambase::parallel_for(
234 cgh, pdat.get_obj_cnt(), "compute dtdivv + divcurl v", [=](i32 id_a) {
235 using namespace shamrock::sph;
236
237 Tvec sum_axyz = ZVEC;
238 Tscal sum_du_a = 0;
239 Tscal h_a = hpart[id_a];
240 Tvec xyz_a = xyz[id_a];
241 Tvec vxyz_a = vxyz[id_a];
242 Tvec axyz_a = axyz[id_a];
243 Tscal omega_a = omega[id_a];
244
245 Tscal rho_a = rho_h(pmass, h_a, Kernel::hfactd);
246 // Tscal rho_a_sq = rho_a * rho_a;
247 // Tscal rho_a_inv = 1. / rho_a;
248 Tscal inv_rho_omega_a = 1. / (omega_a * rho_a);
249
250 Tscal sum_nabla_a = 0;
251
252 std::array<Tvec, dim> Rij_a{ZVEC, ZVEC, ZVEC};
253
254 std::array<Tvec, dim> Rij_a_dvk_dxj{ZVEC, ZVEC, ZVEC};
255 std::array<Tvec, dim> Rij_a_dak_dxj{ZVEC, ZVEC, ZVEC};
256
257 Tscal sum_nabla_v = 0;
258 Tvec sum_nabla_cross_v{};
259
260 particle_looper.for_each_object(id_a, [&](u32 id_b) {
261 // compute only omega_a
262 Tvec r_ab = xyz_a - xyz[id_b];
263 Tscal rab2 = sycl::dot(r_ab, r_ab);
264 Tscal h_b = hpart[id_b];
265
266 if (rab2 > h_a * h_a * Rker2 && rab2 > h_b * h_b * Rker2) {
267 return;
268 }
269
270 Tscal rab = sycl::sqrt(rab2);
271 Tvec vxyz_b = vxyz[id_b];
272 Tvec axyz_b = axyz[id_b];
273 Tvec v_ab = vxyz_a - vxyz_b;
274 Tvec a_ab = axyz_a - axyz_b;
275
276 Tvec r_ab_unit = r_ab / rab;
277
278 if (rab < 1e-9) {
279 r_ab_unit = ZVEC;
280 }
281
282 Tvec dWab_a = Kernel::dW_3d(rab, h_a) * r_ab_unit;
283
284 Tvec mdWab_b = dWab_a * pmass;
285
286 static_assert(dim == 3, "this is only implemented for dim 3");
287 Rij_a[0] -= r_ab.x() * mdWab_b;
288 Rij_a[1] -= r_ab.y() * mdWab_b;
289 Rij_a[2] -= r_ab.z() * mdWab_b;
290
291 Rij_a_dvk_dxj[0] -= v_ab * mdWab_b.x();
292 Rij_a_dvk_dxj[1] -= v_ab * mdWab_b.y();
293 Rij_a_dvk_dxj[2] -= v_ab * mdWab_b.z();
294
295 Rij_a_dak_dxj[0] -= a_ab * mdWab_b.x();
296 Rij_a_dak_dxj[1] -= a_ab * mdWab_b.y();
297 Rij_a_dak_dxj[2] -= a_ab * mdWab_b.z();
298
299 // sum_nabla_a += sycl::dot(a_ab, mdWab_b);
300 sum_nabla_v += pmass * sycl::dot(v_ab, dWab_a);
301 sum_nabla_cross_v += pmass * sycl::cross(v_ab, dWab_a);
302 });
303
304 std::array<Tvec, 3> invRij = shammath::compute_inv_33(Rij_a);
305
306 std::array<Tvec, 3> dvi_dxk = shammath::mat_prod_33(invRij, Rij_a_dvk_dxj);
307 std::array<Tvec, 3> dai_dxk = shammath::mat_prod_33(invRij, Rij_a_dak_dxj);
308
309 Tscal div_ai = dai_dxk[0].x() + dai_dxk[1].y() + dai_dxk[2].z();
310 Tscal div_vi = dvi_dxk[0].x() + dvi_dxk[1].y() + dvi_dxk[2].z();
311 Tvec curl_vi
312 = {dvi_dxk[1].z() - dvi_dxk[2].y(),
313 dvi_dxk[2].x() - dvi_dxk[0].z(),
314 dvi_dxk[0].y() - dvi_dxk[1].x()};
315
316 Tscal tens_nablav
317 = dvi_dxk[0].x() * dvi_dxk[0].x() + dvi_dxk[1].x() * dvi_dxk[0].y()
318 + dvi_dxk[2].x() * dvi_dxk[0].z() + dvi_dxk[0].y() * dvi_dxk[1].x()
319 + dvi_dxk[1].y() * dvi_dxk[1].y() + dvi_dxk[2].y() * dvi_dxk[1].z()
320 + dvi_dxk[0].z() * dvi_dxk[2].x() + dvi_dxk[1].z() * dvi_dxk[2].y()
321 + dvi_dxk[2].z() * dvi_dxk[2].z();
322
323 // divv[id_a] = div_vi;
324 // curlv[id_a] = curl_vi;
325 divv[id_a] = -inv_rho_omega_a * sum_nabla_v;
326 curlv[id_a] = -inv_rho_omega_a * sum_nabla_cross_v;
327 dtdivv[id_a] = div_ai - tens_nablav;
328 });
329 });
330
331 buf_xyz.complete_event_state(e);
332 buf_vxyz.complete_event_state(e);
333 buf_axyz.complete_event_state(e);
334 buf_hpart.complete_event_state(e);
335 buf_omega.complete_event_state(e);
336 buf_divv.complete_event_state(e);
337 buf_curlv.complete_event_state(e);
338 buf_dtdivv.complete_event_state(e);
339 sham::EventList resulting_events;
340 resulting_events.add_event(e);
341 pcache.complete_event_state(resulting_events);
342 }
343 });
344}
345
346using namespace shammath;
350
constexpr const char * axyz
3-acceleration field
constexpr const char * vxyz
3-velocity field
constexpr const char * xyz
Position field (3D coordinates)
constexpr const char * hpart
Smoothing length field.
constexpr const char * omega
Grad-h correction factor \Omega.
std::uint32_t u32
32 bit unsigned integer
std::int32_t i32
32 bit integer
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
Definition EventList.hpp:31
void add_event(sycl::event e)
Add an event to the list of events.
Definition EventList.hpp:87
Represents a collection of objects distributed across patches identified by a u64 id.
T & get(u64 id)
Returns a reference to an object in the collection.
u32 get_field_idx(const std::string &field_name) const
Get the field id if matching name & type.
PatchDataLayer container class, the layout is described in patchdata_layout.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110
namespace for math utility
Definition AABB.hpp:26
namespace for the main framework
Definition __init__.py:1
sph kernels
Patch object that contain generic patch information.
Definition Patch.hpp:33
u64 id_patch
unique key that identify the patch
Definition Patch.hpp:86