Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
Solver.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
27#include "shambase/memory.hpp"
28#include "shambase/string.hpp"
29#include "shambase/time.hpp"
34#include "shambackends/math.hpp"
35#include "shamcomm/logs.hpp"
65#include <memory>
66#include <stdexcept>
67#include <vector>
68
69template<class Tvec, template<class> class Kern>
71
72 storage.part_counts = std::make_shared<shamrock::solvergraph::Indexes<u32>>(
73 edges::part_counts, "N_{\\rm part}");
74
75 storage.part_counts_with_ghost = std::make_shared<shamrock::solvergraph::Indexes<u32>>(
76 edges::part_counts_with_ghost, "N_{\\rm part, with ghost}");
77
78 storage.patch_rank_owner = std::make_shared<shamrock::solvergraph::RankGetter>(
79 [&](u64 patch_id) -> u32 {
80 return scheduler().get_patch_rank_owner(patch_id);
81 },
82 "patch_rank_owner",
83 "rank");
84
85 // Merged ghost spans
86 storage.positions_with_ghosts = std::make_shared<shamrock::solvergraph::FieldRefs<Tvec>>(
87 edges::positions_with_ghosts, "\\mathbf{r}");
88 storage.hpart_with_ghosts
89 = std::make_shared<shamrock::solvergraph::FieldRefs<Tscal>>(edges::hpart_with_ghosts, "h");
90
91 storage.neigh_cache
92 = std::make_shared<shammodels::sph::solvergraph::NeighCache>(edges::neigh_cache, "neigh");
93
94 // Register ghost handler in solvergraph for explicit data dependency tracking
95 storage.ghost_handler = storage.solver_graph.register_edge(
96 "ghost_handler", solvergraph::GhostHandlerEdge<Tvec>("ghost_handler", "\\mathcal{G}"));
97
98 storage.omega = std::make_shared<shamrock::solvergraph::Field<Tscal>>(1, "omega", "\\Omega");
99 storage.density = std::make_shared<shamrock::solvergraph::Field<Tscal>>(1, "density", "\\rho");
100 storage.pressure = std::make_shared<shamrock::solvergraph::Field<Tscal>>(1, "pressure", "P");
101 storage.soundspeed
102 = std::make_shared<shamrock::solvergraph::Field<Tscal>>(1, "soundspeed", "c_s");
103
104 // Initialize gradient fields for MUSCL reconstruction
105 // These are only used when reconstruct_config.is_muscl() == true
106 storage.grad_density
107 = std::make_shared<shamrock::solvergraph::Field<Tvec>>(1, "grad_density", "\\nabla\\rho");
108 storage.grad_pressure
109 = std::make_shared<shamrock::solvergraph::Field<Tvec>>(1, "grad_pressure", "\\nabla P");
110 storage.grad_vx
111 = std::make_shared<shamrock::solvergraph::Field<Tvec>>(1, "grad_vx", "\\nabla v_x");
112 storage.grad_vy
113 = std::make_shared<shamrock::solvergraph::Field<Tvec>>(1, "grad_vy", "\\nabla v_y");
114 storage.grad_vz
115 = std::make_shared<shamrock::solvergraph::Field<Tvec>>(1, "grad_vz", "\\nabla v_z");
116}
117
118template<class Tvec, template<class> class Kern>
120 std::string filename, bool add_patch_world_id) {
121
122 modules::VTKDump<Tvec, Kern>(context, solver_config).do_dump(filename, add_patch_world_id);
123}
124
125template<class Tvec, template<class> class Kern>
127 StackEntry stack_loc{};
128
130 _sptree.attach_buf();
131 storage.serial_patch_tree.set(std::move(_sptree));
132}
133
134template<class Tvec, template<class> class Kern>
136 StackEntry stack_loc{};
137
138 using CfgClass = gsph::GSPHGhostHandlerConfig<Tvec>;
139 using BCConfig = typename CfgClass::Variant;
140
141 using BCFree = typename CfgClass::Free;
142 using BCPeriodic = typename CfgClass::Periodic;
143 using BCShearingPeriodic = typename CfgClass::ShearingPeriodic;
144
145 using SolverConfigBC = typename Config::BCConfig;
146 using SolverBCFree = typename SolverConfigBC::Free;
147 using SolverBCPeriodic = typename SolverConfigBC::Periodic;
148 using SolverBCShearingPeriodic = typename SolverConfigBC::ShearingPeriodic;
149
150 // Boundary condition selection - similar to SPH solver
151 // Note: Wall boundaries use Periodic with dynamic wall particles
152 if (SolverBCFree *c = std::get_if<SolverBCFree>(&solver_config.boundary_config.config)) {
153 shambase::get_check_ref(storage.ghost_handler)
154 .set(
155 GhostHandle{
156 scheduler(), BCFree{}, storage.patch_rank_owner, storage.xyzh_ghost_layout});
157 } else if (
158 SolverBCPeriodic *c
159 = std::get_if<SolverBCPeriodic>(&solver_config.boundary_config.config)) {
160 shambase::get_check_ref(storage.ghost_handler)
161 .set(
162 GhostHandle{
163 scheduler(),
164 BCPeriodic{},
165 storage.patch_rank_owner,
166 storage.xyzh_ghost_layout});
167 } else if (
168 SolverBCShearingPeriodic *c
169 = std::get_if<SolverBCShearingPeriodic>(&solver_config.boundary_config.config)) {
170 // Shearing periodic boundaries (Stone 2010) - reuse SPH implementation
171 shambase::get_check_ref(storage.ghost_handler)
172 .set(
173 GhostHandle{
174 scheduler(),
175 BCShearingPeriodic{
176 c->shear_base, c->shear_dir, c->shear_speed * time_val, c->shear_speed},
177 storage.patch_rank_owner,
178 storage.xyzh_ghost_layout});
179 } else {
180 shambase::throw_with_loc<std::runtime_error>("GSPH: Unsupported boundary condition type.");
181 }
182}
183
184template<class Tvec, template<class> class Kern>
186 StackEntry stack_loc{};
187
188 using GSPHUtils = GSPHUtilities<Tvec, Kernel>;
189 GSPHUtils gsph_utils(scheduler());
190
191 storage.ghost_patch_cache.set(gsph_utils.build_interf_cache(
192 shambase::get_check_ref(storage.ghost_handler).get(),
193 storage.serial_patch_tree.get(),
194 solver_config.htol_up_coarse_cycle));
195}
196
197template<class Tvec, template<class> class Kern>
199 StackEntry stack_loc{};
200 storage.ghost_patch_cache.reset();
201}
202
203template<class Tvec, template<class> class Kern>
205 StackEntry stack_loc{};
206
207 storage.merged_xyzh.set(
208 shambase::get_check_ref(storage.ghost_handler)
209 .get()
210 .build_comm_merge_positions(storage.ghost_patch_cache.get()));
211
212 // Get field indices from xyzh_ghost_layout
213 const u32 ixyz_ghost
214 = storage.xyzh_ghost_layout->template get_field_idx<Tvec>(gsph::names::common::xyz);
215 const u32 ihpart_ghost
216 = storage.xyzh_ghost_layout->template get_field_idx<Tscal>(gsph::names::common::hpart);
217
218 // Set element counts
219 shambase::get_check_ref(storage.part_counts).indexes
220 = storage.merged_xyzh.get().template map<u32>(
221 [&](u64 id, shamrock::patch::PatchDataLayer &mpdat) {
222 return scheduler().patch_data.get_pdat(id).get_obj_cnt();
223 });
224
225 // Set element counts with ghost
226 shambase::get_check_ref(storage.part_counts_with_ghost).indexes
227 = storage.merged_xyzh.get().template map<u32>(
228 [&](u64 id, shamrock::patch::PatchDataLayer &mpdat) {
229 return mpdat.get_obj_cnt();
230 });
231
232 // Attach spans to block coords
233 shambase::get_check_ref(storage.positions_with_ghosts)
234 .set_refs(
235 storage.merged_xyzh.get().template map<std::reference_wrapper<PatchDataField<Tvec>>>(
236 [&, ixyz_ghost](u64 id, shamrock::patch::PatchDataLayer &mpdat) {
237 return std::ref(mpdat.get_field<Tvec>(ixyz_ghost));
238 }));
239
240 shambase::get_check_ref(storage.hpart_with_ghosts)
241 .set_refs(
242 storage.merged_xyzh.get().template map<std::reference_wrapper<PatchDataField<Tscal>>>(
243 [&, ihpart_ghost](u64 id, shamrock::patch::PatchDataLayer &mpdat) {
244 return std::ref(mpdat.get_field<Tscal>(ihpart_ghost));
245 }));
246}
247
248template<class Tvec, template<class> class Kern>
250 StackEntry stack_loc{};
251
252 auto &merged_xyzh = storage.merged_xyzh.get();
253 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
254
255 // Get field index from xyzh_ghost_layout
256 const u32 ixyz_ghost
257 = storage.xyzh_ghost_layout->template get_field_idx<Tvec>(gsph::names::common::xyz);
258
259 shambase::DistributedData<RTree> trees = merged_xyzh.template map<RTree>(
260 [&, ixyz_ghost](u64 id, shamrock::patch::PatchDataLayer &merged) {
261 PatchDataField<Tvec> &pos = merged.template get_field<Tvec>(ixyz_ghost);
262 Tvec bmax = pos.compute_max();
263 Tvec bmin = pos.compute_min();
264
265 shammath::AABB<Tvec> aabb(bmin, bmax);
266
267 Tscal infty = std::numeric_limits<Tscal>::infinity();
268
269 // Ensure that no particle is on the boundary of the AABB
270 aabb.lower[0] = std::nextafter(aabb.lower[0], -infty);
271 aabb.lower[1] = std::nextafter(aabb.lower[1], -infty);
272 aabb.lower[2] = std::nextafter(aabb.lower[2], -infty);
273 aabb.upper[0] = std::nextafter(aabb.upper[0], infty);
274 aabb.upper[1] = std::nextafter(aabb.upper[1], infty);
275 aabb.upper[2] = std::nextafter(aabb.upper[2], infty);
276
277 auto bvh = RTree::make_empty(dev_sched);
278 bvh.rebuild_from_positions(
279 pos.get_buf(), pos.get_obj_cnt(), aabb, solver_config.tree_reduction_level);
280
281 return bvh;
282 });
283
284 storage.merged_pos_trees.set(std::move(trees));
285}
286
287template<class Tvec, template<class> class Kern>
289 StackEntry stack_loc{};
290 storage.merged_pos_trees.reset();
291}
292
293template<class Tvec, template<class> class Kern>
295 StackEntry stack_loc{};
296
297 auto &xyzh_merged = storage.merged_xyzh.get();
298 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
299
300 storage.rtree_rint_field.set(
301 storage.merged_pos_trees.get().template map<shamtree::KarrasRadixTreeField<Tscal>>(
302 [&](u64 id, RTree &rtree) -> shamtree::KarrasRadixTreeField<Tscal> {
303 shamrock::patch::PatchDataLayer &tmp = xyzh_merged.get(id);
304 auto &buf = tmp.get_field_buf_ref<Tscal>(1);
305 auto buf_int = shamtree::new_empty_karras_radix_tree_field<Tscal>();
306
307 auto ret = shamtree::compute_tree_field_max_field<Tscal>(
308 rtree.structure,
309 rtree.reduced_morton_set.get_leaf_cell_iterator(),
310 std::move(buf_int),
311 buf);
312
313 // Increase the size by tolerance factor
314 sham::kernel_call(
315 dev_sched->get_queue(),
316 sham::MultiRef{},
317 sham::MultiRef{ret.buf_field},
318 ret.buf_field.get_size(),
319 [htol = solver_config.htol_up_coarse_cycle](u32 i, Tscal *h_tree) {
320 h_tree[i] *= htol;
321 });
322
323 return std::move(ret);
324 }));
325}
326
327template<class Tvec, template<class> class Kern>
329 storage.rtree_rint_field.reset();
330}
331
332template<class Tvec, template<class> class Kern>
334 StackEntry stack_loc{};
335
336 shambase::Timer time_neigh;
337 time_neigh.start();
338
339 Tscal h_tolerance = solver_config.htol_up_coarse_cycle;
340
341 // Build neighbor cache using tree traversal - same approach as SPH module
342 auto build_neigh_cache = [&](u64 patch_id) -> shamrock::tree::ObjectCache {
343 auto &mfield = storage.merged_xyzh.get().get(patch_id);
344
345 sham::DeviceBuffer<Tvec> &buf_xyz = mfield.template get_field_buf_ref<Tvec>(0);
346 sham::DeviceBuffer<Tscal> &buf_hpart = mfield.template get_field_buf_ref<Tscal>(1);
347
348 sham::DeviceBuffer<Tscal> &tree_field_rint
349 = storage.rtree_rint_field.get().get(patch_id).buf_field;
350
351 RTree &tree = storage.merged_pos_trees.get().get(patch_id);
352 auto obj_it = tree.get_object_iterator();
353
354 u32 obj_cnt = shambase::get_check_ref(storage.part_counts).indexes.get(patch_id);
355
356 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
357
358 // Allocate neighbor count buffer
359 sham::DeviceBuffer<u32> neigh_count(
360 obj_cnt, shamsys::instance::get_compute_scheduler_ptr());
361
362 shamsys::instance::get_compute_queue().wait_and_throw();
363
364 // First pass: count neighbors
365 {
366 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
367 sham::EventList depends_list;
368
369 auto xyz = buf_xyz.get_read_access(depends_list);
370 auto hpart = buf_hpart.get_read_access(depends_list);
371 auto rint_tree = tree_field_rint.get_read_access(depends_list);
372 auto neigh_cnt = neigh_count.get_write_access(depends_list);
373 auto particle_looper = obj_it.get_read_access(depends_list);
374
375 auto e = q.submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
376 shambase::parallel_for(cgh, obj_cnt, "gsph_count_neighbors", [=](u64 gid) {
377 u32 id_a = (u32) gid;
378
379 Tscal rint_a = hpart[id_a] * h_tolerance;
380 Tvec xyz_a = xyz[id_a];
381
382 Tvec inter_box_a_min = xyz_a - rint_a * Kernel::Rkern;
383 Tvec inter_box_a_max = xyz_a + rint_a * Kernel::Rkern;
384
385 u32 cnt = 0;
386
387 particle_looper.rtree_for(
388 [&](u32 node_id, shammath::AABB<Tvec> node_aabb) -> bool {
389 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
390
391 using namespace walker::interaction_crit;
392
393 return sph_radix_cell_crit(
394 xyz_a,
395 inter_box_a_min,
396 inter_box_a_max,
397 node_aabb.lower,
398 node_aabb.upper,
399 int_r_max_cell);
400 },
401 [&](u32 id_b) {
402 Tvec dr = xyz_a - xyz[id_b];
403 Tscal rab2 = sycl::dot(dr, dr);
404 Tscal rint_b = hpart[id_b] * h_tolerance;
405
406 bool no_interact
407 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
408
409 cnt += (no_interact) ? 0 : 1;
410 });
411
412 neigh_cnt[id_a] = cnt;
413 });
414 });
415
416 buf_xyz.complete_event_state(e);
417 buf_hpart.complete_event_state(e);
418 neigh_count.complete_event_state(e);
419 tree_field_rint.complete_event_state(e);
420 obj_it.complete_event_state(e);
421 }
422
423 // Use tree::prepare_object_cache to do prefix sum and allocate buffers
425 = shamrock::tree::prepare_object_cache(std::move(neigh_count), obj_cnt);
426
427 // Second pass: fill neighbor indices
428 {
429 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
430 sham::EventList depends_list;
431
432 auto xyz = buf_xyz.get_read_access(depends_list);
433 auto hpart = buf_hpart.get_read_access(depends_list);
434 auto rint_tree = tree_field_rint.get_read_access(depends_list);
435 auto scanned_neigh_cnt = pcache.scanned_cnt.get_read_access(depends_list);
436 auto neigh = pcache.index_neigh_map.get_write_access(depends_list);
437 auto particle_looper = obj_it.get_read_access(depends_list);
438
439 auto e = q.submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
440 shambase::parallel_for(cgh, obj_cnt, "gsph_fill_neighbors", [=](u64 gid) {
441 u32 id_a = (u32) gid;
442
443 Tscal rint_a = hpart[id_a] * h_tolerance;
444 Tvec xyz_a = xyz[id_a];
445
446 Tvec inter_box_a_min = xyz_a - rint_a * Kernel::Rkern;
447 Tvec inter_box_a_max = xyz_a + rint_a * Kernel::Rkern;
448
449 u32 write_idx = scanned_neigh_cnt[id_a];
450
451 particle_looper.rtree_for(
452 [&](u32 node_id, shammath::AABB<Tvec> node_aabb) -> bool {
453 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
454
455 using namespace walker::interaction_crit;
456
457 return sph_radix_cell_crit(
458 xyz_a,
459 inter_box_a_min,
460 inter_box_a_max,
461 node_aabb.lower,
462 node_aabb.upper,
463 int_r_max_cell);
464 },
465 [&](u32 id_b) {
466 Tvec dr = xyz_a - xyz[id_b];
467 Tscal rab2 = sycl::dot(dr, dr);
468 Tscal rint_b = hpart[id_b] * h_tolerance;
469
470 bool no_interact
471 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
472
473 if (!no_interact) {
474 neigh[write_idx++] = id_b;
475 }
476 });
477 });
478 });
479
480 buf_xyz.complete_event_state(e);
481 buf_hpart.complete_event_state(e);
482 tree_field_rint.complete_event_state(e);
483 pcache.scanned_cnt.complete_event_state(e);
484 pcache.index_neigh_map.complete_event_state(e);
485 obj_it.complete_event_state(e);
486 }
487
488 return pcache;
489 };
490
491 shambase::get_check_ref(storage.neigh_cache).free_alloc();
492
493 using namespace shamrock::patch;
494 scheduler().for_each_patchdata_nonempty([&](Patch cur_p, PatchDataLayer &pdat) {
495 auto &ncache = shambase::get_check_ref(storage.neigh_cache);
496 ncache.neigh_cache.add_obj(cur_p.id_patch, build_neigh_cache(cur_p.id_patch));
497 });
498
499 time_neigh.end();
500 storage.timings_details.neighbors += time_neigh.elasped_sec();
501}
502
503template<class Tvec, template<class> class Kern>
505 storage.neigh_cache->neigh_cache = {};
506}
507
508template<class Tvec, template<class> class Kern>
509void shammodels::gsph::Solver<Tvec, Kern>::gsph_prestep(Tscal time_val, Tscal dt) {
510 StackEntry stack_loc{};
511
512 shamlog_debug_ln("GSPH", "Prestep at t =", time_val, "dt =", dt);
513}
514
515template<class Tvec, template<class> class Kern>
517 StackEntry stack_loc{};
518
519 shamlog_debug_ln("GSPH", "apply position boundary");
520
521 PatchScheduler &sched = scheduler();
522 shamrock::SchedulerUtility integrators(sched);
524
525 auto &pdl = sched.pdl_old();
526 const u32 ixyz = pdl.get_field_idx<Tvec>(gsph::names::common::xyz);
527 const u32 ivxyz = pdl.get_field_idx<Tvec>(gsph::names::newtonian::vxyz);
528 auto [bmin, bmax] = sched.get_box_volume<Tvec>();
529
530 using SolverConfigBC = typename Config::BCConfig;
531 using SolverBCFree = typename SolverConfigBC::Free;
532 using SolverBCPeriodic = typename SolverConfigBC::Periodic;
533 using SolverBCShearingPeriodic = typename SolverConfigBC::ShearingPeriodic;
534
535 if (SolverBCFree *c = std::get_if<SolverBCFree>(&solver_config.boundary_config.config)) {
536 if (shamcomm::world_rank() == 0) {
537 logger::info_ln("PositionUpdated", "free boundaries skipping geometry update");
538 }
539 } else if (
540 SolverBCPeriodic *c
541 = std::get_if<SolverBCPeriodic>(&solver_config.boundary_config.config)) {
542 integrators.fields_apply_periodicity(ixyz, std::pair{bmin, bmax});
543 } else if (
544 SolverBCShearingPeriodic *c
545 = std::get_if<SolverBCShearingPeriodic>(&solver_config.boundary_config.config)) {
546 // Apply shearing periodic boundaries (Stone 2010) - reuse SPH implementation
547 integrators.fields_apply_shearing_periodicity(
548 ixyz,
549 ivxyz,
550 std::pair{bmin, bmax},
551 c->shear_base,
552 c->shear_dir,
553 c->shear_speed * time_val,
554 c->shear_speed);
555 } else {
556 shambase::throw_with_loc<std::runtime_error>("GSPH: Unsupported boundary condition type.");
557 }
558
559 reatrib.reatribute_patch_objects(storage.serial_patch_tree.get(), gsph::names::common::xyz);
560}
561
562template<class Tvec, template<class> class Kern>
564 StackEntry stack_loc{};
565 using namespace shamrock::patch;
566
567 PatchDataLayerLayout &pdl = scheduler().pdl_old();
568 const u32 ixyz = pdl.get_field_idx<Tvec>(gsph::names::common::xyz);
569 const u32 ivxyz = pdl.get_field_idx<Tvec>(gsph::names::newtonian::vxyz);
570 const u32 iaxyz = pdl.get_field_idx<Tvec>(gsph::names::newtonian::axyz);
571
572 const bool has_uint = solver_config.has_field_uint();
573 const u32 iuint = has_uint ? pdl.get_field_idx<Tscal>(gsph::names::newtonian::uint) : 0;
574 const u32 iduint = has_uint ? pdl.get_field_idx<Tscal>(gsph::names::newtonian::duint) : 0;
575
576 Tscal half_dt = dt / 2;
577
578 // Predictor step: leapfrog kick-drift-kick
579 scheduler().for_each_patchdata_nonempty([&](const Patch p, PatchDataLayer &pdat) {
580 u32 cnt = pdat.get_obj_cnt();
581 if (cnt == 0)
582 return;
583
584 auto &xyz_field = pdat.get_field<Tvec>(ixyz);
585 auto &vxyz_field = pdat.get_field<Tvec>(ivxyz);
586 auto &axyz_field = pdat.get_field<Tvec>(iaxyz);
587
588 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
589
590 // Leapfrog KDK: first half-kick, then drift
591 // The second half-kick (corrector) happens AFTER force recomputation
593 dev_sched->get_queue(),
594 sham::MultiRef{axyz_field.get_buf()},
595 sham::MultiRef{xyz_field.get_buf(), vxyz_field.get_buf()},
596 cnt,
597 [half_dt, dt](u32 i, const Tvec *axyz, Tvec *xyz, Tvec *vxyz) {
598 // First kick: v += a*dt/2 (using OLD acceleration)
599 vxyz[i] += axyz[i] * half_dt;
600 // Drift: x += v*dt
601 xyz[i] += vxyz[i] * dt;
602 });
603
604 // Internal energy integration (if adiabatic EOS)
605 // Predictor: u += du*dt/2 (first half-step)
606 // The second half-step happens in the corrector after force recomputation
607 if (has_uint) {
608 auto &uint_field = pdat.get_field<Tscal>(iuint);
609 auto &duint_field = pdat.get_field<Tscal>(iduint);
610
612 dev_sched->get_queue(),
613 sham::MultiRef{duint_field.get_buf()},
614 sham::MultiRef{uint_field.get_buf()},
615 cnt,
616 [half_dt](u32 i, const Tscal *duint, Tscal *uint) {
617 // u += du*dt/2 (first half-step)
618 uint[i] += duint[i] * half_dt;
619 });
620 }
621 });
622}
623
624template<class Tvec, template<class> class Kern>
626 StackEntry stack_loc{};
627
628 // Initialize xyzh_ghost_layout for BasicSPHGhostHandler (position + smoothing length)
629 storage.xyzh_ghost_layout = std::make_shared<shamrock::patch::PatchDataLayerLayout>();
630 storage.xyzh_ghost_layout->template add_field<Tvec>(gsph::names::common::xyz, 1);
631 storage.xyzh_ghost_layout->template add_field<Tscal>(gsph::names::common::hpart, 1);
632
633 // Reset first in case it was set from a previous timestep
634 storage.ghost_layout = std::make_shared<shamrock::patch::PatchDataLayerLayout>();
635
637 = shambase::get_check_ref(storage.ghost_layout.get());
638
639 solver_config.set_ghost_layout(ghost_layout);
640}
641
642template<class Tvec, template<class> class Kern>
644 StackEntry stack_loc{};
645
646 shambase::Timer timer_interf;
647 timer_interf.start();
648
649 using namespace shamrock;
650 using namespace shamrock::patch;
651
652 PatchDataLayerLayout &pdl = scheduler().pdl_old();
653 const u32 ixyz = pdl.get_field_idx<Tvec>(gsph::names::common::xyz);
654 const u32 ivxyz = pdl.get_field_idx<Tvec>(gsph::names::newtonian::vxyz);
655 const u32 ihpart = pdl.get_field_idx<Tscal>(gsph::names::common::hpart);
656
657 const bool has_uint = solver_config.has_field_uint();
658 const u32 iuint = has_uint ? pdl.get_field_idx<Tscal>(gsph::names::newtonian::uint) : 0;
659
660 auto &ghost_layout_ptr = storage.ghost_layout;
661 shamrock::patch::PatchDataLayerLayout &ghost_layout = shambase::get_check_ref(ghost_layout_ptr);
662 u32 ihpart_interf = ghost_layout.get_field_idx<Tscal>(gsph::names::common::hpart);
663 u32 ivxyz_interf = ghost_layout.get_field_idx<Tvec>(gsph::names::newtonian::vxyz);
664 u32 iomega_interf = ghost_layout.get_field_idx<Tscal>(gsph::names::newtonian::omega);
665 u32 idensity_interf = ghost_layout.get_field_idx<Tscal>(gsph::names::newtonian::density);
666 u32 iuint_interf
667 = has_uint ? ghost_layout.get_field_idx<Tscal>(gsph::names::newtonian::uint) : 0;
668
669 // Gradient field indices (for MUSCL reconstruction)
670 const bool has_grads = solver_config.requires_gradients();
671 u32 igrad_d_interf
672 = has_grads ? ghost_layout.get_field_idx<Tvec>(gsph::names::newtonian::grad_density) : 0;
673 u32 igrad_p_interf
674 = has_grads ? ghost_layout.get_field_idx<Tvec>(gsph::names::newtonian::grad_pressure) : 0;
675 u32 igrad_vx_interf
676 = has_grads ? ghost_layout.get_field_idx<Tvec>(gsph::names::newtonian::grad_vx) : 0;
677 u32 igrad_vy_interf
678 = has_grads ? ghost_layout.get_field_idx<Tvec>(gsph::names::newtonian::grad_vy) : 0;
679 u32 igrad_vz_interf
680 = has_grads ? ghost_layout.get_field_idx<Tvec>(gsph::names::newtonian::grad_vz) : 0;
681
682 using InterfaceBuildInfos = typename gsph::GSPHGhostHandler<Tvec>::InterfaceBuildInfos;
683
684 gsph::GSPHGhostHandler<Tvec> &ghost_handle
685 = shambase::get_check_ref(storage.ghost_handler).get();
688
689 // Get gradient fields (for MUSCL)
690 shamrock::solvergraph::Field<Tvec> *grad_density_ptr
691 = has_grads ? &shambase::get_check_ref(storage.grad_density) : nullptr;
692 shamrock::solvergraph::Field<Tvec> *grad_pressure_ptr
693 = has_grads ? &shambase::get_check_ref(storage.grad_pressure) : nullptr;
695 = has_grads ? &shambase::get_check_ref(storage.grad_vx) : nullptr;
697 = has_grads ? &shambase::get_check_ref(storage.grad_vy) : nullptr;
699 = has_grads ? &shambase::get_check_ref(storage.grad_vz) : nullptr;
700
701 // Build interface data from ghost cache
702 auto pdat_interf = ghost_handle.template build_interface_native<PatchDataLayer>(
703 storage.ghost_patch_cache.get(),
704 [&](u64 sender, u64, InterfaceBuildInfos binfo, sham::DeviceBuffer<u32> &buf_idx, u32 cnt) {
705 PatchDataLayer pdat(ghost_layout_ptr);
706 pdat.reserve(cnt);
707 return pdat;
708 });
709
710 // Populate interface data with field values
711 ghost_handle.template modify_interface_native<PatchDataLayer>(
712 storage.ghost_patch_cache.get(),
713 pdat_interf,
714 [&](u64 sender,
715 u64,
716 InterfaceBuildInfos binfo,
718 u32 cnt,
719 PatchDataLayer &pdat) {
720 PatchDataLayer &sender_patch = scheduler().patch_data.get_pdat(sender);
721 PatchDataField<Tscal> &sender_omega = omega.get(sender);
722 PatchDataField<Tscal> &sender_density = density.get(sender);
723
724 sender_patch.get_field<Tscal>(ihpart).append_subset_to(
725 buf_idx, cnt, pdat.get_field<Tscal>(ihpart_interf));
726 sender_patch.get_field<Tvec>(ivxyz).append_subset_to(
727 buf_idx, cnt, pdat.get_field<Tvec>(ivxyz_interf));
728 sender_omega.append_subset_to(buf_idx, cnt, pdat.get_field<Tscal>(iomega_interf));
729 sender_density.append_subset_to(buf_idx, cnt, pdat.get_field<Tscal>(idensity_interf));
730
731 if (has_uint) {
732 sender_patch.get_field<Tscal>(iuint).append_subset_to(
733 buf_idx, cnt, pdat.get_field<Tscal>(iuint_interf));
734 }
735
736 // Communicate gradient fields for MUSCL reconstruction
737 if (has_grads) {
738 grad_density_ptr->get(sender).append_subset_to(
739 buf_idx, cnt, pdat.get_field<Tvec>(igrad_d_interf));
740 grad_pressure_ptr->get(sender).append_subset_to(
741 buf_idx, cnt, pdat.get_field<Tvec>(igrad_p_interf));
742 grad_vx_ptr->get(sender).append_subset_to(
743 buf_idx, cnt, pdat.get_field<Tvec>(igrad_vx_interf));
744 grad_vy_ptr->get(sender).append_subset_to(
745 buf_idx, cnt, pdat.get_field<Tvec>(igrad_vy_interf));
746 grad_vz_ptr->get(sender).append_subset_to(
747 buf_idx, cnt, pdat.get_field<Tvec>(igrad_vz_interf));
748 }
749 });
750
751 // Apply velocity offset for periodic boundaries
752 ghost_handle.template modify_interface_native<PatchDataLayer>(
753 storage.ghost_patch_cache.get(),
754 pdat_interf,
755 [&](u64 sender,
756 u64,
757 InterfaceBuildInfos binfo,
759 u32 cnt,
760 PatchDataLayer &pdat) {
761 if (sycl::length(binfo.offset_speed) > 0) {
762 pdat.get_field<Tvec>(ivxyz_interf).apply_offset(binfo.offset_speed);
763 }
764 });
765
766 // Communicate ghost data across MPI ranks
768 = ghost_handle.communicate_pdat(ghost_layout_ptr, std::move(pdat_interf));
769
770 // Count total ghost particles per patch
771 std::map<u64, u64> sz_interf_map;
772 interf_pdat.for_each([&](u64 s, u64 r, PatchDataLayer &pdat_interf) {
773 sz_interf_map[r] += pdat_interf.get_obj_cnt();
774 });
775
776 // Merge local and ghost data
777 storage.merged_patchdata_ghost.set(
778 ghost_handle.template merge_native<PatchDataLayer, PatchDataLayer>(
779 std::move(interf_pdat),
781 PatchDataLayer pdat_new(ghost_layout_ptr);
782
783 u32 or_elem = pdat.get_obj_cnt();
784 pdat_new.reserve(or_elem + sz_interf_map[p.id_patch]);
785
786 PatchDataField<Tscal> &cur_omega = omega.get(p.id_patch);
787 PatchDataField<Tscal> &cur_density = density.get(p.id_patch);
788
789 // Insert local particle data
790 pdat_new.get_field<Tscal>(ihpart_interf).insert(pdat.get_field<Tscal>(ihpart));
791 pdat_new.get_field<Tvec>(ivxyz_interf).insert(pdat.get_field<Tvec>(ivxyz));
792 pdat_new.get_field<Tscal>(iomega_interf).insert(cur_omega);
793 pdat_new.get_field<Tscal>(idensity_interf).insert(cur_density);
794
795 if (has_uint) {
796 pdat_new.get_field<Tscal>(iuint_interf).insert(pdat.get_field<Tscal>(iuint));
797 }
798
799 // Insert local gradient data for MUSCL reconstruction
800 if (has_grads) {
801 pdat_new.get_field<Tvec>(igrad_d_interf)
802 .insert(grad_density_ptr->get(p.id_patch));
803 pdat_new.get_field<Tvec>(igrad_p_interf)
804 .insert(grad_pressure_ptr->get(p.id_patch));
805 pdat_new.get_field<Tvec>(igrad_vx_interf).insert(grad_vx_ptr->get(p.id_patch));
806 pdat_new.get_field<Tvec>(igrad_vy_interf).insert(grad_vy_ptr->get(p.id_patch));
807 pdat_new.get_field<Tvec>(igrad_vz_interf).insert(grad_vz_ptr->get(p.id_patch));
808 }
809
810 pdat_new.check_field_obj_cnt_match();
811 return pdat_new;
812 },
813 [](PatchDataLayer &pdat, PatchDataLayer &pdat_interf) {
814 pdat.insert_elements(pdat_interf);
815 }));
816
817 timer_interf.end();
818 storage.timings_details.interface += timer_interf.elasped_sec();
819}
820
821template<class Tvec, template<class> class Kern>
823 storage.merged_patchdata_ghost.reset();
824}
825
826template<class Tvec, template<class> class Kern>
828 StackEntry stack_loc{};
829
830 using namespace shamrock;
831 using namespace shamrock::patch;
832
833 const Tscal pmass = solver_config.gpart_mass;
834
835 // Verify particle mass is valid
836 if (shamcomm::world_rank() == 0) {
837 if (pmass <= Tscal(0) || pmass < Tscal(1e-100) || !std::isfinite(pmass)) {
838 logger::warn_ln("GSPH", "Invalid particle mass in compute_omega: pmass =", pmass);
839 }
840 }
841
843 shamrock::solvergraph::Field<Tscal> &density_field = shambase::get_check_ref(storage.density);
844
845 // Create sizes directly from scheduler to ensure we have all patches
846 std::shared_ptr<shamrock::solvergraph::Indexes<u32>> sizes
847 = std::make_shared<shamrock::solvergraph::Indexes<u32>>(edges::sizes, "N");
848 scheduler().for_each_patchdata_nonempty([&](const Patch p, PatchDataLayer &pdat) {
849 sizes->indexes.add_obj(p.id_patch, pdat.get_obj_cnt());
850 });
851
852 // Ensure fields are allocated for all patches with correct sizes
853 omega_field.ensure_sizes(sizes->indexes);
854 density_field.ensure_sizes(sizes->indexes);
855
856 // Get patchdata layout for hpart field
857 PatchDataLayerLayout &pdl = scheduler().pdl_old();
858 const u32 ihpart = pdl.get_field_idx<Tscal>(gsph::names::common::hpart);
859
860 // =========================================================================
861 // OUTER-LOOP SMOOTHING LENGTH ITERATION (FIX FOR CACHE CONSISTENCY BUG)
862 // =========================================================================
863 // The original implementation had an inner-loop Newton-Raphson iteration
864 // inside a GPU kernel. This caused issues because:
865 // 1. Neighbor cache was built with OLD h values (+ 10% tolerance)
866 // 2. Inner iteration could change h by more than 10%
867 // 3. Particles that should be neighbors weren't found in the cache
868 // 4. Result: underestimated density at discontinuities -> wrong forces
869 //
870 // The fix uses the SPH-style outer-loop approach:
871 // 1. Create GSPH IterateSmoothingLengthDensity module (ONE step per call)
872 // 2. Wrap in LoopSmoothingLengthIter for multiple iterations
873 // 3. If h grows beyond tolerance, signal for cache rebuild
874 // =========================================================================
875
876 auto &merged_xyzh = storage.merged_xyzh.get();
877
878 // Create field references for the iteration module
879 // Position spans (from merged xyzh)
880 std::shared_ptr<shamrock::solvergraph::FieldRefs<Tvec>> pos_merged
881 = std::make_shared<shamrock::solvergraph::FieldRefs<Tvec>>(edges::pos_merged, "r");
883
884 // Old h spans (from merged xyzh - read only during iteration)
885 std::shared_ptr<shamrock::solvergraph::FieldRefs<Tscal>> hold
886 = std::make_shared<shamrock::solvergraph::FieldRefs<Tscal>>(edges::h_old, "h^{old}");
888
889 // New h spans (local patchdata - written during iteration)
890 std::shared_ptr<shamrock::solvergraph::FieldRefs<Tscal>> hnew
891 = std::make_shared<shamrock::solvergraph::FieldRefs<Tscal>>(edges::h_new, "h^{new}");
893
894 // Get field indices from xyzh_ghost_layout for merged data access
895 const u32 ixyz_ghost
896 = storage.xyzh_ghost_layout->template get_field_idx<Tvec>(gsph::names::common::xyz);
897 const u32 ihpart_ghost
898 = storage.xyzh_ghost_layout->template get_field_idx<Tscal>(gsph::names::common::hpart);
899
900 // Populate field references
901 scheduler().for_each_patchdata_nonempty(
902 [&, ixyz_ghost, ihpart_ghost](const Patch p, PatchDataLayer &pdat) {
903 auto &mfield = merged_xyzh.get(p.id_patch);
904
905 // Position from merged data (includes ghosts for neighbor search)
906 pos_refs.add_obj(p.id_patch, std::ref(mfield.template get_field<Tvec>(ixyz_ghost)));
907
908 // h_old from merged data
909 hold_refs.add_obj(p.id_patch, std::ref(mfield.template get_field<Tscal>(ihpart_ghost)));
910
911 // h_new to local patchdata (this is updated during iteration)
912 hnew_refs.add_obj(p.id_patch, std::ref(pdat.get_field<Tscal>(ihpart)));
913 });
914
915 pos_merged->set_refs(pos_refs);
916 hold->set_refs(hold_refs);
917 hnew->set_refs(hnew_refs);
918
919 // Initialize hnew with hold values
920 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
921 scheduler().for_each_patchdata_nonempty([&](const Patch p, PatchDataLayer &pdat) {
922 u32 cnt = pdat.get_obj_cnt();
923 if (cnt == 0)
924 return;
925
926 auto &mfield = merged_xyzh.get(p.id_patch);
927 auto &buf_hpart_merged = mfield.template get_field_buf_ref<Tscal>(1);
928 auto &buf_hpart_local = pdat.get_field_buf_ref<Tscal>(ihpart);
929
931 dev_sched->get_queue(),
932 sham::MultiRef{buf_hpart_merged},
933 sham::MultiRef{buf_hpart_local},
934 cnt,
935 [](u32 i, const Tscal *h_old, Tscal *h_new) {
936 h_new[i] = h_old[i];
937 });
938 });
939
940 // Create epsilon field for convergence tracking
941 shamrock::SchedulerUtility utility(scheduler());
942 ComputeField<Tscal> _epsilon_h = utility.make_compute_field<Tscal>("epsilon_h", 1);
943
944 // Initialize epsilon to large value (not converged)
945 scheduler().for_each_patchdata_nonempty([&](const Patch p, PatchDataLayer &pdat) {
946 u32 cnt = pdat.get_obj_cnt();
947 if (cnt == 0)
948 return;
949
950 auto &eps_buf = _epsilon_h.get_buf_check(p.id_patch);
951
953 dev_sched->get_queue(),
955 sham::MultiRef{eps_buf},
956 cnt,
957 [](u32 i, Tscal *eps) {
958 eps[i] = Tscal(1.0); // Start with large epsilon
959 });
960 });
961
962 // Create epsilon field references
963 std::shared_ptr<shamrock::solvergraph::FieldRefs<Tscal>> eps_h
964 = std::make_shared<shamrock::solvergraph::FieldRefs<Tscal>>(edges::eps_h, "\\epsilon_h");
966 scheduler().for_each_patchdata_nonempty([&](const Patch p, PatchDataLayer &pdat) {
967 auto &field = _epsilon_h.get_field(p.id_patch);
968 eps_h_refs.add_obj(p.id_patch, std::ref(field));
969 });
970 eps_h->set_refs(eps_h_refs);
971
972 // Use SPH's IterateSmoothingLengthDensity module (reuse, no duplication)
973 std::shared_ptr<sph::modules::IterateSmoothingLengthDensity<Tvec, Kernel>> smth_h_iter
974 = std::make_shared<sph::modules::IterateSmoothingLengthDensity<Tvec, Kernel>>(
975 solver_config.gpart_mass,
976 solver_config.htol_up_coarse_cycle,
977 solver_config.htol_up_fine_cycle);
978
979 // SPH's module only iterates h, no density/omega outputs
980 smth_h_iter->set_edges(sizes, storage.neigh_cache, pos_merged, hold, hnew, eps_h);
981
982 // Create convergence flag
983 std::shared_ptr<shamrock::solvergraph::ScalarEdge<bool>> is_converged
984 = std::make_shared<shamrock::solvergraph::ScalarEdge<bool>>("is_converged", "converged");
985
986 // Use LoopSmoothingLengthIter from SPH module for outer loop iteration
988 smth_h_iter, solver_config.epsilon_h, solver_config.h_iter_per_subcycles, false);
989 loop_smth_h_iter.set_edges(eps_h, is_converged);
990
991 // Run the outer loop iteration
992 loop_smth_h_iter.evaluate();
993
994 // Check convergence
995 if (!is_converged->value) {
996 // Get convergence statistics
997 Tscal local_max_eps = shamrock::solvergraph::get_rank_max(*eps_h);
998 Tscal global_max_eps = shamalgs::collective::allreduce_max(local_max_eps);
999
1000 // Count particles that need cache rebuild (eps == -1)
1001 u64 cnt_unconverged = 0;
1002 scheduler().for_each_patchdata_nonempty([&](const Patch p, PatchDataLayer &pdat) {
1003 auto res = _epsilon_h.get_field(p.id_patch).get_ids_buf_where([](auto access, u32 id) {
1004 return access[id] < Tscal(0);
1005 });
1006 cnt_unconverged += std::get<1>(res);
1007 });
1008 u64 global_cnt_unconverged = shamalgs::collective::allreduce_sum(cnt_unconverged);
1009
1010 if (shamcomm::world_rank() == 0) {
1011 if (global_cnt_unconverged > 0) {
1012 logger::warn_ln(
1013 "GSPH",
1014 "Smoothing length iteration: ",
1015 global_cnt_unconverged,
1016 " particles need cache rebuild (h grew beyond tolerance)");
1017 } else {
1018 logger::warn_ln(
1019 "GSPH",
1020 "Smoothing length iteration did not converge, max eps =",
1021 global_max_eps);
1022 }
1023 }
1024 }
1025
1026 // =========================================================================
1027 // COMPUTE DENSITY AND OMEGA AFTER H CONVERGENCE
1028 // =========================================================================
1029 // Now that h has converged, compute the final density and omega values.
1030 // This is done ONCE here instead of on every iteration (more efficient).
1031 // =========================================================================
1032
1033 static constexpr Tscal Rkern = Kernel::Rkern;
1034
1035 auto &neigh_cache = storage.neigh_cache->neigh_cache;
1036
1037 scheduler().for_each_patchdata_nonempty([&](const Patch p, PatchDataLayer &pdat) {
1038 u32 cnt = pdat.get_obj_cnt();
1039 if (cnt == 0)
1040 return;
1041
1042 auto &mfield = merged_xyzh.get(p.id_patch);
1043 auto &pcache = neigh_cache.get(p.id_patch);
1044
1045 // Get position and h from merged data (includes ghosts for neighbor search)
1046 auto &buf_xyz = mfield.template get_field_buf_ref<Tvec>(0);
1047 auto &buf_hpart = pdat.get_field_buf_ref<Tscal>(ihpart);
1048
1049 // Get density and omega output fields
1050 auto &dens_field = density_field.get_field(p.id_patch);
1051 auto &omeg_field = omega_field.get_field(p.id_patch);
1052
1053 sham::DeviceQueue &q = dev_sched->get_queue();
1054 sham::EventList depends_list;
1055
1056 auto ploop_ptrs = pcache.get_read_access(depends_list);
1057 auto xyz_acc = buf_xyz.get_read_access(depends_list);
1058 auto h_acc = buf_hpart.get_read_access(depends_list);
1059 auto density_acc = dens_field.get_buf().get_write_access(depends_list);
1060 auto omega_acc = omeg_field.get_buf().get_write_access(depends_list);
1061
1062 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
1063 shamrock::tree::ObjectCacheIterator particle_looper(ploop_ptrs);
1064
1065 shambase::parallel_for(cgh, cnt, "gsph_compute_density_omega", [=](u64 gid) {
1066 u32 id_a = (u32) gid;
1067
1068 Tvec xyz_a = xyz_acc[id_a];
1069 Tscal h_a = h_acc[id_a];
1070 Tscal dint = h_a * h_a * Rkern * Rkern;
1071
1072 // SPH density summation
1073 Tscal rho_sum = Tscal(0);
1074 Tscal sumdWdh = Tscal(0);
1075
1076 particle_looper.for_each_object(id_a, [&](u32 id_b) {
1077 Tvec dr = xyz_a - xyz_acc[id_b];
1078 Tscal rab2 = sycl::dot(dr, dr);
1079
1080 if (rab2 > dint) {
1081 return;
1082 }
1083
1084 Tscal rab = sycl::sqrt(rab2);
1085
1086 rho_sum += pmass * Kernel::W_3d(rab, h_a);
1087 sumdWdh += pmass * Kernel::dhW_3d(rab, h_a);
1088 });
1089
1090 // Store density
1091 density_acc[id_a] = sycl::max(rho_sum, Tscal(1e-30));
1092
1093 // Compute omega (grad-h correction factor)
1094 // Omega = 1 + h/(dim*rho) * (drho/dh)
1095 // This matches SPH's ComputeOmega and is used in sph_pressure_symetric
1096 // which divides by (rho^2 * omega), so we need Omega not 1/Omega
1097 Tscal omega_val = Tscal(1);
1098 if (rho_sum > Tscal(1e-30)) {
1099 omega_val = Tscal(1) + h_a / (Tscal(dim) * rho_sum) * sumdWdh;
1100 omega_val = sycl::clamp(omega_val, Tscal(0.5), Tscal(2.0));
1101 }
1102 omega_acc[id_a] = omega_val;
1103 });
1104 });
1105
1106 // Complete event states for all accessed buffers
1107 pcache.complete_event_state({e});
1108 buf_xyz.complete_event_state(e);
1109 buf_hpart.complete_event_state(e);
1110 dens_field.get_buf().complete_event_state(e);
1111 omeg_field.get_buf().complete_event_state(e);
1112 });
1113}
1114
1115template<class Tvec, template<class> class Kern>
1117 StackEntry stack_loc{};
1118
1119 using namespace shamrock;
1120 using namespace shamrock::patch;
1121
1122 // GSPH EOS: Following reference implementation (g_pre_interaction.cpp)
1123 // P = (\gamma - 1) * \rho * u where \rho is from SPH summation
1124 // c = sqrt(\gamma * (\gamma - 1) * u) -- from internal energy, not from P/\rho
1125
1126 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
1127 const Tscal gamma = solver_config.get_eos_gamma();
1128 const bool has_uint = solver_config.has_field_uint();
1129
1130 // Get ghost layout field indices
1132 = shambase::get_check_ref(storage.ghost_layout.get());
1133 u32 idensity_interf = ghost_layout.get_field_idx<Tscal>(gsph::names::newtonian::density);
1134 u32 iuint_interf
1135 = has_uint ? ghost_layout.get_field_idx<Tscal>(gsph::names::newtonian::uint) : 0;
1136
1137 shamrock::solvergraph::Field<Tscal> &pressure_field = shambase::get_check_ref(storage.pressure);
1138 shamrock::solvergraph::Field<Tscal> &soundspeed_field
1139 = shambase::get_check_ref(storage.soundspeed);
1140
1141 // Size buffers to part_counts_with_ghost (includes ghosts!)
1142 shambase::DistributedData<u32> &counts_with_ghosts
1143 = shambase::get_check_ref(storage.part_counts_with_ghost).indexes;
1144
1145 pressure_field.ensure_sizes(counts_with_ghosts);
1146 soundspeed_field.ensure_sizes(counts_with_ghosts);
1147
1148 // Iterate over merged_patchdata_ghost (includes local + ghost particles)
1149 storage.merged_patchdata_ghost.get().for_each([&](u64 id, PatchDataLayer &mpdat) {
1150 u32 total_elements
1151 = shambase::get_check_ref(storage.part_counts_with_ghost).indexes.get(id);
1152 if (total_elements == 0)
1153 return;
1154
1155 // Use SPH-summation density from communicated ghost data
1156 sham::DeviceBuffer<Tscal> &buf_density = mpdat.get_field_buf_ref<Tscal>(idensity_interf);
1157 auto &pressure_buf = pressure_field.get_field(id).get_buf();
1158 auto &soundspeed_buf = soundspeed_field.get_field(id).get_buf();
1159
1160 sham::DeviceQueue &q = dev_sched->get_queue();
1161 sham::EventList depends_list;
1162
1163 auto density = buf_density.get_read_access(depends_list);
1164 auto pressure = pressure_buf.get_write_access(depends_list);
1165 auto soundspeed = soundspeed_buf.get_write_access(depends_list);
1166
1167 const Tscal *uint_ptr = nullptr;
1168 if (has_uint) {
1169 uint_ptr = mpdat.get_field_buf_ref<Tscal>(iuint_interf).get_read_access(depends_list);
1170 }
1171
1172 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
1173 shambase::parallel_for(cgh, total_elements, "compute_eos_gsph", [=](u64 gid) {
1174 u32 i = (u32) gid;
1175
1176 // Use SPH-summation density (from compute_omega, communicated to ghosts)
1177 Tscal rho = density[i];
1178 rho = sycl::max(rho, Tscal(1e-30));
1179
1180 if (has_uint && uint_ptr != nullptr) {
1181 // Adiabatic EOS (reference: g_pre_interaction.cpp line 107)
1182 // P = (\gamma - 1) * \rho * u
1183 Tscal u = uint_ptr[i];
1184 u = sycl::max(u, Tscal(1e-30));
1185 Tscal P = (gamma - Tscal(1.0)) * rho * u;
1186
1187 // Sound speed from internal energy (reference: solver.cpp line 2661)
1188 // c = sqrt(\gamma * (\gamma - 1) * u)
1189 Tscal cs = sycl::sqrt(gamma * (gamma - Tscal(1.0)) * u);
1190
1191 // Clamp to reasonable values
1192 P = sycl::clamp(P, Tscal(1e-30), Tscal(1e30));
1193 cs = sycl::clamp(cs, Tscal(1e-10), Tscal(1e10));
1194
1195 pressure[i] = P;
1196 soundspeed[i] = cs;
1197 } else {
1198 // Isothermal case
1199 Tscal cs = Tscal(1.0);
1200 Tscal P = cs * cs * rho;
1201
1202 pressure[i] = P;
1203 soundspeed[i] = cs;
1204 }
1205 });
1206 });
1207
1208 // Complete all buffer event states
1209 buf_density.complete_event_state(e);
1210 if (has_uint) {
1211 mpdat.get_field_buf_ref<Tscal>(iuint_interf).complete_event_state(e);
1212 }
1213 pressure_buf.complete_event_state(e);
1214 soundspeed_buf.complete_event_state(e);
1215 });
1216}
1217
1218template<class Tvec, template<class> class Kern>
1220 // Reset computed EOS fields - they're recomputed each timestep
1221}
1222
1223template<class Tvec, template<class> class Kern>
1225 StackEntry stack_loc{};
1226
1227 using namespace shamrock;
1228 using namespace shamrock::patch;
1229
1230 // Copy density, pressure, and soundspeed from solvergraph fields to patchdata
1231 // This ensures the values persist across restarts and can be read by VTKDump
1232
1233 PatchDataLayerLayout &pdl = scheduler().pdl_old();
1234 u32 idensity = pdl.get_field_idx<Tscal>(names::newtonian::density);
1235 u32 ipressure = pdl.get_field_idx<Tscal>(names::newtonian::pressure);
1236 u32 isoundspeed = pdl.get_field_idx<Tscal>(names::newtonian::soundspeed);
1237
1238 auto &density_field = shambase::get_check_ref(storage.density);
1239 auto &pressure_field = shambase::get_check_ref(storage.pressure);
1240 auto &soundspeed_field = shambase::get_check_ref(storage.soundspeed);
1241
1242 scheduler().for_each_patchdata_nonempty([&](Patch cur_p, PatchDataLayer &pdat) {
1243 u32 npart = pdat.get_obj_cnt();
1244 if (npart == 0) {
1245 return;
1246 }
1247
1248 // Get patchdata buffers
1249 sham::DeviceBuffer<Tscal> &buf_rho = pdat.get_field_buf_ref<Tscal>(idensity);
1250 sham::DeviceBuffer<Tscal> &buf_P = pdat.get_field_buf_ref<Tscal>(ipressure);
1251 sham::DeviceBuffer<Tscal> &buf_cs = pdat.get_field_buf_ref<Tscal>(isoundspeed);
1252
1253 // Get solvergraph field buffers (source data)
1254 sham::DeviceBuffer<Tscal> &buf_rho_in = density_field.get_field(cur_p.id_patch).get_buf();
1255 sham::DeviceBuffer<Tscal> &buf_P_in = pressure_field.get_field(cur_p.id_patch).get_buf();
1256 sham::DeviceBuffer<Tscal> &buf_cs_in = soundspeed_field.get_field(cur_p.id_patch).get_buf();
1257
1258 auto &q = shamsys::instance::get_compute_scheduler().get_queue();
1259 sham::EventList depends_list;
1260
1261 auto rho_in = buf_rho_in.get_read_access(depends_list);
1262 auto P_in = buf_P_in.get_read_access(depends_list);
1263 auto cs_in = buf_cs_in.get_read_access(depends_list);
1264 auto rho = buf_rho.get_write_access(depends_list);
1265 auto P = buf_P.get_write_access(depends_list);
1266 auto cs = buf_cs.get_write_access(depends_list);
1267
1268 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
1269 cgh.parallel_for(sycl::range<1>{npart}, [=](sycl::item<1> item) {
1270 rho[item] = rho_in[item];
1271 P[item] = P_in[item];
1272 cs[item] = cs_in[item];
1273 });
1274 });
1275
1276 buf_rho_in.complete_event_state(e);
1277 buf_P_in.complete_event_state(e);
1278 buf_cs_in.complete_event_state(e);
1279 buf_rho.complete_event_state(e);
1280 buf_P.complete_event_state(e);
1281 buf_cs.complete_event_state(e);
1282 });
1283}
1284
1285template<class Tvec, template<class> class Kern>
1287 StackEntry stack_loc{};
1288
1289 // Only compute gradients for MUSCL reconstruction
1290 if (!solver_config.requires_gradients()) {
1291 return;
1292 }
1293
1294 using namespace shamrock;
1295 using namespace shamrock::patch;
1296
1297 const Tscal pmass = solver_config.gpart_mass;
1298 const Tscal gamma = solver_config.get_eos_gamma();
1299
1300 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
1301
1302 PatchDataLayerLayout &pdl = scheduler().pdl_old();
1303 const u32 ihpart = pdl.get_field_idx<Tscal>(gsph::names::common::hpart);
1304 const u32 ivxyz = pdl.get_field_idx<Tvec>(gsph::names::newtonian::vxyz);
1305 const bool has_uint = solver_config.has_field_uint();
1306 const u32 iuint = has_uint ? pdl.get_field_idx<Tscal>(gsph::names::newtonian::uint) : 0;
1307
1308 // Get gradient fields from storage
1309 shamrock::solvergraph::Field<Tvec> &grad_density_field
1310 = shambase::get_check_ref(storage.grad_density);
1311 shamrock::solvergraph::Field<Tvec> &grad_pressure_field
1312 = shambase::get_check_ref(storage.grad_pressure);
1313 shamrock::solvergraph::Field<Tvec> &grad_vx_field = shambase::get_check_ref(storage.grad_vx);
1314 shamrock::solvergraph::Field<Tvec> &grad_vy_field = shambase::get_check_ref(storage.grad_vy);
1315 shamrock::solvergraph::Field<Tvec> &grad_vz_field = shambase::get_check_ref(storage.grad_vz);
1316
1317 // Get density field for SPH-summation density
1318 shamrock::solvergraph::Field<Tscal> &density_field = shambase::get_check_ref(storage.density);
1319
1320 // Ensure gradient fields have correct sizes
1321 shambase::DistributedData<u32> &counts = shambase::get_check_ref(storage.part_counts).indexes;
1322
1323 grad_density_field.ensure_sizes(counts);
1324 grad_pressure_field.ensure_sizes(counts);
1325 grad_vx_field.ensure_sizes(counts);
1326 grad_vy_field.ensure_sizes(counts);
1327 grad_vz_field.ensure_sizes(counts);
1328
1329 auto &merged_xyzh = storage.merged_xyzh.get();
1330 auto &neigh_cache = storage.neigh_cache->neigh_cache;
1331
1332 static constexpr Tscal Rkern = Kernel::Rkern;
1333
1334 // Compute gradients following reference implementation (g_pre_interaction.cpp)
1335 // grad_d = \sigma_j m_j \nabla W_ij
1336 // grad_p = (grad_d * u_i + du) * (\gamma - 1) where du = \sigma_j m_j (u_j - u_i) \nabla W_ij
1337 // grad_v = \sigma_j m_j (v_j - v_i) \nabla W_ij / \rho_i
1338 scheduler().for_each_patchdata_nonempty([&](const Patch p, PatchDataLayer &pdat) {
1339 u32 cnt = pdat.get_obj_cnt();
1340 if (cnt == 0)
1341 return;
1342
1343 auto &mfield = merged_xyzh.get(p.id_patch);
1344 auto &pcache = neigh_cache.get(p.id_patch);
1345
1346 // Get position, h, velocity from merged data
1347 auto &buf_xyz = mfield.template get_field_buf_ref<Tvec>(0);
1348 auto &buf_hpart = mfield.template get_field_buf_ref<Tscal>(1);
1349 auto &buf_vxyz = pdat.get_field_buf_ref<Tvec>(ivxyz);
1350
1351 // Get density (local particles only)
1352 auto &dens_field = density_field.get_field(p.id_patch);
1353
1354 // Get gradient output fields
1355 auto &grad_d_field = grad_density_field.get_field(p.id_patch);
1356 auto &grad_p_field = grad_pressure_field.get_field(p.id_patch);
1357 auto &grad_vx_buf = grad_vx_field.get_field(p.id_patch);
1358 auto &grad_vy_buf = grad_vy_field.get_field(p.id_patch);
1359 auto &grad_vz_buf = grad_vz_field.get_field(p.id_patch);
1360
1361 sham::DeviceQueue &q = dev_sched->get_queue();
1362 sham::EventList depends_list;
1363
1364 auto ploop_ptrs = pcache.get_read_access(depends_list);
1365 auto xyz_acc = buf_xyz.get_read_access(depends_list);
1366 auto h_acc = buf_hpart.get_read_access(depends_list);
1367 auto v_acc = buf_vxyz.get_read_access(depends_list);
1368 auto dens_acc = dens_field.get_buf().get_read_access(depends_list);
1369 auto grad_d_acc = grad_d_field.get_buf().get_write_access(depends_list);
1370 auto grad_p_acc = grad_p_field.get_buf().get_write_access(depends_list);
1371 auto grad_vx_acc = grad_vx_buf.get_buf().get_write_access(depends_list);
1372 auto grad_vy_acc = grad_vy_buf.get_buf().get_write_access(depends_list);
1373 auto grad_vz_acc = grad_vz_buf.get_buf().get_write_access(depends_list);
1374
1375 // Get internal energy if adiabatic
1376 const Tscal *uint_ptr = nullptr;
1377 if (has_uint) {
1378 uint_ptr = pdat.get_field_buf_ref<Tscal>(iuint).get_read_access(depends_list);
1379 }
1380
1381 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
1382 shamrock::tree::ObjectCacheIterator particle_looper(ploop_ptrs);
1383
1384 shambase::parallel_for(cgh, cnt, "gsph_compute_gradients", [=](u64 gid) {
1385 u32 id_a = (u32) gid;
1386
1387 Tvec xyz_a = xyz_acc[id_a];
1388 Tscal h_a = h_acc[id_a];
1389 Tvec v_a = v_acc[id_a];
1390 Tscal rho_a = sycl::max(dens_acc[id_a], Tscal(1e-30));
1391 Tscal dint = h_a * h_a * Rkern * Rkern;
1392
1393 // Get internal energy for particle a
1394 Tscal u_a = Tscal(0);
1395 if (uint_ptr != nullptr) {
1396 u_a = uint_ptr[id_a];
1397 }
1398
1399 // Initialize gradient accumulators
1400 Tvec grad_d = {0, 0, 0}; // Density gradient
1401 Tvec grad_u = {0, 0, 0}; // Internal energy difference gradient
1402 Tvec grad_vx = {0, 0, 0}; // Velocity component gradients
1403 Tvec grad_vy = {0, 0, 0};
1404 Tvec grad_vz = {0, 0, 0};
1405
1406 particle_looper.for_each_object(id_a, [&](u32 id_b) {
1407 Tvec dr = xyz_a - xyz_acc[id_b];
1408 Tscal rab2 = sycl::dot(dr, dr);
1409
1410 if (rab2 > dint || id_a == id_b) {
1411 return;
1412 }
1413
1414 Tscal rab = sycl::sqrt(rab2);
1415
1416 // Kernel gradient: \nabla W = (dW/dr) * (r/|r|)
1417 Tscal dWdr = Kernel::dW_3d(rab, h_a);
1418 Tvec gradW = dr * (dWdr * sham::inv_sat_positive(rab));
1419
1420 // Accumulate gradients (reference: g_pre_interaction.cpp lines 130-147)
1421 grad_d += gradW * pmass;
1422
1423 // Internal energy gradient for pressure
1424 Tscal u_b = (uint_ptr != nullptr) ? uint_ptr[id_b] : Tscal(0);
1425 grad_u += gradW * (pmass * (u_b - u_a));
1426
1427 // Velocity gradients
1428 Tvec v_b = v_acc[id_b];
1429 grad_vx += gradW * (pmass * (v_b[0] - v_a[0]));
1430 grad_vy += gradW * (pmass * (v_b[1] - v_a[1]));
1431 grad_vz += gradW * (pmass * (v_b[2] - v_a[2]));
1432 });
1433
1434 // Store density gradient
1435 grad_d_acc[id_a] = grad_d;
1436
1437 // Compute pressure gradient: \nabla P = (\nabla \rho * u + du) * (\gamma - 1)
1438 // (reference: g_pre_interaction.cpp line 143)
1439 Tvec grad_p = (grad_d * u_a + grad_u) * (gamma - Tscal(1));
1440 grad_p_acc[id_a] = grad_p;
1441
1442 // Normalize velocity gradients by density
1443 // (reference: g_pre_interaction.cpp lines 144-147)
1444 Tscal rho_inv = sham::inv_sat_positive(rho_a);
1445 grad_vx_acc[id_a] = grad_vx * rho_inv;
1446 grad_vy_acc[id_a] = grad_vy * rho_inv;
1447 grad_vz_acc[id_a] = grad_vz * rho_inv;
1448 });
1449 });
1450
1451 // Complete event states
1452 pcache.complete_event_state({e});
1453 buf_xyz.complete_event_state(e);
1454 buf_hpart.complete_event_state(e);
1455 buf_vxyz.complete_event_state(e);
1456 dens_field.get_buf().complete_event_state(e);
1457 grad_d_field.get_buf().complete_event_state(e);
1458 grad_p_field.get_buf().complete_event_state(e);
1459 grad_vx_buf.get_buf().complete_event_state(e);
1460 grad_vy_buf.get_buf().complete_event_state(e);
1461 grad_vz_buf.get_buf().complete_event_state(e);
1462 if (has_uint) {
1463 pdat.get_field_buf_ref<Tscal>(iuint).complete_event_state(e);
1464 }
1465 });
1466}
1467
1468template<class Tvec, template<class> class Kern>
1470 StackEntry stack_loc{};
1471
1472 shamrock::SchedulerUtility utility(scheduler());
1473 shamrock::patch::PatchDataLayerLayout &pdl = scheduler().pdl_old();
1474
1475 const u32 iaxyz = pdl.get_field_idx<Tvec>(gsph::names::newtonian::axyz);
1476
1477 // Create compute field to store old acceleration
1478 auto old_axyz = utility.make_compute_field<Tvec>(gsph::names::internal::old_axyz, 1);
1479
1480 // Copy current acceleration to old_axyz
1481 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
1482
1483 scheduler().for_each_patchdata_nonempty(
1485 u32 cnt = pdat.get_obj_cnt();
1486 if (cnt == 0)
1487 return;
1488
1489 auto &axyz_field = pdat.get_field<Tvec>(iaxyz);
1490 auto &old_axyz_field = old_axyz.get_field(p.id_patch);
1491
1492 // Copy using kernel_call
1494 dev_sched->get_queue(),
1495 sham::MultiRef{axyz_field.get_buf()},
1496 sham::MultiRef{old_axyz_field.get_buf()},
1497 cnt,
1498 [](u32 i, const Tvec *src, Tvec *dst) {
1499 dst[i] = src[i];
1500 });
1501 });
1502
1503 storage.old_axyz.set(std::move(old_axyz));
1504
1505 if (solver_config.has_field_uint()) {
1506 const u32 iduint = pdl.get_field_idx<Tscal>(gsph::names::newtonian::duint);
1507 auto old_duint = utility.make_compute_field<Tscal>(gsph::names::internal::old_duint, 1);
1508
1509 scheduler().for_each_patchdata_nonempty(
1511 u32 cnt = pdat.get_obj_cnt();
1512 if (cnt == 0)
1513 return;
1514
1515 auto &duint_field = pdat.get_field<Tscal>(iduint);
1516 auto &old_duint_field = old_duint.get_field(p.id_patch);
1517
1518 // Copy using kernel_call
1520 dev_sched->get_queue(),
1521 sham::MultiRef{duint_field.get_buf()},
1522 sham::MultiRef{old_duint_field.get_buf()},
1523 cnt,
1524 [](u32 i, const Tscal *src, Tscal *dst) {
1525 dst[i] = src[i];
1526 });
1527 });
1528
1529 storage.old_duint.set(std::move(old_duint));
1530 }
1531}
1532
1533template<class Tvec, template<class> class Kern>
1535 StackEntry stack_loc{};
1536 // GSPH derivative update using Riemann solver
1537 gsph::modules::UpdateDerivs<Tvec, Kern>(context, solver_config, storage).update_derivs();
1538}
1539
1540template<class Tvec, template<class> class Kern>
1541typename shammodels::gsph::Solver<Tvec, Kern>::Tscal shammodels::gsph::Solver<Tvec, Kern>::
1543 StackEntry stack_loc{};
1544
1545 using namespace shamrock;
1546 using namespace shamrock::patch;
1547
1548 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
1549
1550 PatchDataLayerLayout &pdl = scheduler().pdl_old();
1551 const u32 ihpart = pdl.get_field_idx<Tscal>(gsph::names::common::hpart);
1552 const u32 iaxyz = pdl.get_field_idx<Tvec>(gsph::names::newtonian::axyz);
1553
1554 shamrock::solvergraph::Field<Tscal> &soundspeed_field
1555 = shambase::get_check_ref(storage.soundspeed);
1556
1557 Tscal C_cour = solver_config.cfl_config.cfl_cour;
1558 Tscal C_force = solver_config.cfl_config.cfl_force;
1559
1560 // Use ComputeField for proper reduction support
1561 shamrock::SchedulerUtility utility(scheduler());
1562 ComputeField<Tscal> cfl_dt = utility.make_compute_field<Tscal>("cfl_dt", 1);
1563
1564 scheduler().for_each_patchdata_nonempty([&](Patch cur_p, PatchDataLayer &pdat) {
1565 u32 cnt = pdat.get_obj_cnt();
1566 if (cnt == 0)
1567 return;
1568
1569 auto &buf_hpart = pdat.get_field_buf_ref<Tscal>(ihpart);
1570 auto &buf_axyz = pdat.get_field_buf_ref<Tvec>(iaxyz);
1571 auto &buf_cs = soundspeed_field.get_field(cur_p.id_patch).get_buf();
1572 auto &cfl_dt_buf = cfl_dt.get_buf_check(cur_p.id_patch);
1573
1574 sham::DeviceQueue &q = dev_sched->get_queue();
1575 sham::EventList depends_list;
1576
1577 auto hpart = buf_hpart.get_read_access(depends_list);
1578 auto axyz = buf_axyz.get_read_access(depends_list);
1579 auto cs = buf_cs.get_read_access(depends_list);
1580 auto cfl_dt_acc = cfl_dt_buf.get_write_access(depends_list);
1581
1582 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
1583 shambase::parallel_for(cgh, cnt, "gsph_compute_cfl_dt", [=](u64 gid) {
1584 u32 i = (u32) gid;
1585
1586 Tscal h_i = hpart[i];
1587 Tscal cs_i = cs[i];
1588 Tscal abs_a = sycl::length(axyz[i]);
1589
1590 // Guard against invalid values (NaN/Inf)
1591 if (!sycl::isfinite(h_i) || h_i <= Tscal(0))
1592 h_i = Tscal(1e-10);
1593 if (!sycl::isfinite(cs_i) || cs_i <= Tscal(0))
1594 cs_i = Tscal(1e-10);
1595 if (!sycl::isfinite(abs_a))
1596 abs_a = Tscal(1e30);
1597
1598 // Sound CFL condition: dt = C_cour * h / c_s
1599 // Following Kitajima et al. (2025) simple form for GSPH
1600 Tscal dt_c = C_cour * h_i / cs_i;
1601
1602 // Force condition: dt = C_force * sqrt(h / |a|)
1603 Tscal dt_f = C_force * sycl::sqrt(h_i / (abs_a + Tscal(1e-30)));
1604
1605 Tscal dt_min = sycl::min(dt_c, dt_f);
1606
1607 // Ensure a valid finite timestep with minimum floor
1608 if (!sycl::isfinite(dt_min) || dt_min <= Tscal(0)) {
1609 dt_min = Tscal(1e-10); // Minimum timestep floor
1610 }
1611
1612 cfl_dt_acc[i] = dt_min;
1613 });
1614 });
1615
1616 buf_hpart.complete_event_state(e);
1617 buf_axyz.complete_event_state(e);
1618 buf_cs.complete_event_state(e);
1619 cfl_dt_buf.complete_event_state(e);
1620 });
1621
1622 // Compute minimum across all patches on this rank
1623 Tscal rank_dt = cfl_dt.compute_rank_min();
1624
1625 // Guard against invalid reduction result
1626 if (!std::isfinite(rank_dt) || rank_dt <= Tscal(0)) {
1627 rank_dt = Tscal(1e-6); // Reasonable floor for SPH simulations
1628 }
1629
1630 // Global reduction across MPI ranks
1631 Tscal global_min_dt = shamalgs::collective::allreduce_min(rank_dt);
1632
1633 // Final safety floor to prevent simulation stalling
1634 // For typical SPH simulations, timestep should be O(h/cs) ~ O(1e-4)
1635 // Use 1e-6 as minimum floor to prevent extreme stalling
1636 const Tscal dt_min_floor = Tscal(1e-6);
1637 if (!std::isfinite(global_min_dt) || global_min_dt < dt_min_floor) {
1638 global_min_dt = dt_min_floor;
1639 }
1640
1641 return global_min_dt;
1642}
1643
1644template<class Tvec, template<class> class Kern>
1646 StackEntry stack_loc{};
1647
1648 shamrock::patch::PatchDataLayerLayout &pdl = scheduler().pdl_old();
1649
1650 const u32 ivxyz = pdl.get_field_idx<Tvec>(gsph::names::newtonian::vxyz);
1651 const u32 iaxyz = pdl.get_field_idx<Tvec>(gsph::names::newtonian::axyz);
1652
1653 Tscal half_dt = Tscal{0.5} * dt;
1654
1655 // Corrector: v = v + 0.5*a_new*dt (completing the leapfrog kick)
1656 // Predictor already added 0.5*a_old*dt, so total is 0.5*(a_old + a_new)*dt
1657 scheduler().for_each_patchdata_nonempty(
1659 u32 cnt = pdat.get_obj_cnt();
1660 if (cnt == 0)
1661 return;
1662
1663 auto &vxyz = pdat.get_field<Tvec>(ivxyz);
1664 auto &axyz = pdat.get_field<Tvec>(iaxyz);
1665
1666 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
1667
1669 dev_sched->get_queue(),
1670 sham::MultiRef{axyz.get_buf()},
1671 sham::MultiRef{vxyz.get_buf()},
1672 cnt,
1673 [half_dt](u32 i, const Tvec *axyz_new, Tvec *vxyz) {
1674 vxyz[i] += half_dt * axyz_new[i];
1675 });
1676 });
1677
1678 if (solver_config.has_field_uint()) {
1679 const u32 iuint = pdl.get_field_idx<Tscal>(gsph::names::newtonian::uint);
1680 const u32 iduint = pdl.get_field_idx<Tscal>(gsph::names::newtonian::duint);
1681
1682 scheduler().for_each_patchdata_nonempty(
1684 u32 cnt = pdat.get_obj_cnt();
1685 if (cnt == 0)
1686 return;
1687
1688 auto &uint_field = pdat.get_field<Tscal>(iuint);
1689 auto &duint = pdat.get_field<Tscal>(iduint);
1690
1691 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
1692
1693 // Corrector: u = u + 0.5*du_new*dt (completing the leapfrog)
1695 dev_sched->get_queue(),
1696 sham::MultiRef{duint.get_buf()},
1697 sham::MultiRef{uint_field.get_buf()},
1698 cnt,
1699 [half_dt](u32 i, const Tscal *duint_new, Tscal *uint) {
1700 uint[i] += half_dt * duint_new[i];
1701 });
1702 });
1703
1704 storage.old_duint.reset();
1705 }
1706
1707 storage.old_axyz.reset();
1708
1709 return true;
1710}
1711
1712template<class Tvec, template<class> class Kern>
1714
1715template<class Tvec, template<class> class Kern>
1717
1718 // Validate configuration before running
1719 solver_config.check_config_runtime();
1720
1721 Tscal t_current = solver_config.get_time();
1722 Tscal dt = solver_config.get_dt();
1723
1724 StackEntry stack_loc{};
1725
1726 if (shamcomm::world_rank() == 0) {
1727 shamcomm::logs::raw_ln(
1728 shambase::format(
1729 "---------------- GSPH t = {}, dt = {} ----------------", t_current, dt));
1730 }
1731
1732 shambase::Timer tstep;
1733 tstep.start();
1734
1735 // Load balancing step
1736 scheduler().scheduler_step(true, true);
1737 scheduler().scheduler_step(false, false);
1738
1740
1741 using namespace shamrock;
1742 using namespace shamrock::patch;
1743
1744 u64 Npart_all = scheduler().get_total_obj_count();
1745
1746 // =========================================================================
1747 // CORRECTED SIMULATION LOOP ORDER (matching reference SPH code)
1748 // =========================================================================
1749 // The key insight from the reference code is that density/EOS must be
1750 // computed AFTER the predictor step, on the NEW positions. Otherwise,
1751 // the forces are computed using stale EOS values.
1752 //
1753 // Loop order:
1754 // 1. PREDICTOR: move particles using OLD accelerations
1755 // 2. BOUNDARY: apply periodic/free boundary conditions
1756 // 3. TREE BUILD: build spatial trees on NEW positions
1757 // 4. DENSITY/EOS: compute density, pressure, soundspeed on NEW positions
1758 // 5. FORCES: compute accelerations using FRESH EOS
1759 // 6. CORRECTOR: refine velocities using average of old/new accelerations
1760 // 7. CFL: compute next timestep
1761 // =========================================================================
1762
1763 // STEP 1: PREDICTOR - move particles using OLD accelerations
1764 // (On first iteration, accelerations are zero, so this is just position drift)
1765 do_predictor_leapfrog(dt);
1766
1767 // STEP 2: BOUNDARY - apply boundary conditions to NEW positions
1768 // Build serial patch tree first (needed for boundary application)
1769 gen_serial_patch_tree();
1770 apply_position_boundary(t_current + dt);
1771
1772 // STEP 3: TREE BUILD - build trees on NEW positions
1773 // Generate ghost handler for the new positions
1774 gen_ghost_handler(t_current + dt);
1775
1776 // Build ghost cache for interface exchange
1777 build_ghost_cache();
1778
1779 // Merge positions with ghosts
1780 merge_position_ghost();
1781
1782 // Build trees over merged positions
1783 build_merged_pos_trees();
1784
1785 // Compute interaction ranges
1786 compute_presteps_rint();
1787
1788 // Build neighbor cache
1789 start_neighbors_cache();
1790
1791 // STEP 4: DENSITY/OMEGA - compute on NEW positions
1792 // Compute omega (grad-h correction factor) - needed for force computation
1793 compute_omega();
1794
1795 // STEP 4b: GRADIENTS - compute for MUSCL reconstruction (if enabled)
1796 // Computed BEFORE ghost communication so gradients are included in ghost data
1797 // Gradients are computed on local particles using neighbor data
1798 compute_gradients();
1799
1800 // Initialize ghost layout BEFORE communication
1801 // (includes gradients if MUSCL is enabled)
1802 init_ghost_layout();
1803
1804 // Communicate ghost fields (hpart, uint, vxyz, omega, and gradients if MUSCL)
1805 // This MUST happen BEFORE compute_eos_fields so EOS can be computed for ghosts
1806 communicate_merge_ghosts_fields();
1807
1808 // STEP 4c: EOS - compute AFTER ghost communication (CRITICAL!)
1809 // This ensures P and cs are computed for ALL particles (local + ghost)
1810 // Following SPH pattern: EOS is computed on merged_patchdata_ghost
1811 compute_eos_fields();
1812
1813 // STEP 5: FORCES - compute accelerations using FRESH EOS
1814 // Save old accelerations for corrector
1815 prepare_corrector();
1816
1817 // Update derivatives using GSPH Riemann solver
1818 update_derivs();
1819
1820 // STEP 6: CORRECTOR - refine velocities
1821 apply_corrector(dt, Npart_all);
1822
1823 // STEP 7: CFL - compute next timestep
1824 Tscal dt_next = compute_dt_cfl();
1825
1826 // Ensure dt doesn't grow too fast (max 2x per step), but allow any value if dt was 0
1827 if (dt > Tscal(0)) {
1828 dt_next = sham::min(dt_next, Tscal(2) * dt);
1829 }
1830
1831 // Copy EOS fields to patchdata for persistence and VTKDump access
1832 copy_eos_to_patchdata();
1833
1834 // Cleanup for next iteration
1835 reset_neighbors_cache();
1836 reset_presteps_rint();
1837 clear_merged_pos_trees();
1838 reset_merge_ghosts_fields();
1839 storage.merged_xyzh.reset();
1840 clear_ghost_cache();
1841 reset_serial_patch_tree();
1842 reset_ghost_handler();
1843 storage.ghost_layout.reset();
1844
1845 // Update time
1846 solver_config.set_time(t_current + dt);
1847 solver_config.set_next_dt(dt_next);
1848
1849 solve_logs.step_count++;
1850
1851 tstep.end();
1852
1853 // Prepare timing log
1854 TimestepLog log;
1855 log.rank = shamcomm::world_rank();
1856 log.rate = Tscal(Npart_all) / tstep.elasped_sec();
1857 log.npart = Npart_all;
1858 log.tcompute = tstep.elasped_sec();
1859
1860 return log;
1861}
1862
1863// Template instantiations
1864using namespace shammath;
1865
1866// M-spline kernels (Monaghan)
1870
1871// Wendland kernels (C2, C4, C6) - recommended for GSPH (Inutsuka 2002)
Constants for field names in GSPH solver, organized by physics mode.
constexpr const char * duint
Time derivative of internal energy du/dt.
constexpr const char * pos_merged
Position merged references (for h-iteration)
constexpr const char * xyz
Position field (3D coordinates)
constexpr const char * density
Density \rho (derived from h)
constexpr const char * eps_h
Epsilon h references (for h-iteration convergence)
constexpr const char * sizes
Temporary sizes for h-iteration.
constexpr const char * soundspeed
Sound speed c_s (derived from EOS)
constexpr const char * pressure
Pressure P (derived from EOS)
constexpr const char * hpart
Smoothing length field.
constexpr const char * neigh_cache
Neighbor cache.
constexpr const char * omega
Grad-h correction factor \Omega.
GSPH-specific utilities for ghost handling.
Declares the IterateSmoothingLengthDensity module for iterating smoothing length based on the SPH den...
Declares the LoopSmoothingLengthIter module for looping over the smoothing length iteration until con...
Header file describing a Node Instance.
sycl::queue & get_compute_queue(u32 id=0)
MPI scheduler.
Header file for the patch struct and related function.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
The MPI scheduler.
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
Definition EventList.hpp:31
Container for objects shared between two distributed data elements.
void for_each(std::function< void(u64, u64, T &)> &&f)
Apply a function to all stored objects.
Represents a collection of objects distributed across patches identified by a u64 id.
Class Timer measures the time elapsed since the timer was started.
Definition time.hpp:96
void end()
Stops the timer and stores the elapsed time in nanoseconds.
Definition time.hpp:111
f64 elasped_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
Definition time.hpp:123
void start()
Starts the timer.
Definition time.hpp:106
The GSPH Solver class.
Definition Solver.hpp:68
void copy_eos_to_patchdata()
Copy EOS fields from solvergraph to patchdata for persistence.
Definition Solver.cpp:1224
void compute_gradients()
Compute gradients for MUSCL reconstruction.
Definition Solver.cpp:1286
TimestepLog evolve_once()
Definition Solver.cpp:1716
void update_derivs()
Update derivatives using GSPH Riemann solver.
Definition Solver.cpp:1534
Tscal compute_dt_cfl()
Compute CFL timestep constraint.
Definition Solver.cpp:1542
GSPH derivative update module.
void update_derivs()
Update all derivatives using GSPH Riemann solver approach.
Utility class used to move the objects between patches.
ComputeField< T > make_compute_field(std::string new_name, u32 nvar)
create a compute field and init it to zeros
u32 get_field_idx(const std::string &field_name) const
Get the field id if matching name & type.
PatchDataLayer container class, the layout is described in patchdata_layout.
virtual void ensure_sizes(const shambase::DistributedData< u32 > &sizes)
Ensure that the sizes of the patches in the field match the given sizes (Can resize the underlying fi...
Definition Field.hpp:92
PatchDataField< T > & get_field(u64 id) const
Get the underlying PatchDataField at the given id.
A data structure representing a Karras Radix Tree Field.
This header file contains utility functions related to exception handling in the code.
MPI string gather / allgather helpers (declarations; implementations in shamalgs/src/collective/gathe...
Configuration for the Godunov SPH (GSPH) solver.
GSPH Solver class.
GSPH derivative update module.
VTK dump module for GSPH solver.
T inv_sat_positive(T v, T minvsat=T{1e-9}, T satval=T{0.}) noexcept
inverse saturated (positive numbers only)
Definition math.hpp:841
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
void append_subset_to(const sham::DeviceBuffer< T > &buf, const sham::DeviceBuffer< u32 > &idxs_buf, u32 nvar, sham::DeviceBuffer< T > &buf_other, u32 start_enque)
Appends a subset of elements from one buffer to another.
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
Definition worldInfo.cpp:40
namespace for math utility
Definition AABB.hpp:26
namespace for the main framework
Definition __init__.py:1
sph kernels
A class that references multiple buffers or similar objects.
Axis-Aligned bounding box.
Definition AABB.hpp:99
T lower
Lower bound of the AABB.
Definition AABB.hpp:104
T upper
Upper bound of the AABB.
Definition AABB.hpp:105
Patch object that contain generic patch information.
Definition Patch.hpp:33
u64 id_patch
unique key that identify the patch
Definition Patch.hpp:86
Functions related to the MPI communicator.