69template<
class Tvec,
template<
class>
class Kern>
72 storage.part_counts = std::make_shared<shamrock::solvergraph::Indexes<u32>>(
73 edges::part_counts,
"N_{\\rm part}");
75 storage.part_counts_with_ghost = std::make_shared<shamrock::solvergraph::Indexes<u32>>(
76 edges::part_counts_with_ghost,
"N_{\\rm part, with ghost}");
78 storage.patch_rank_owner = std::make_shared<shamrock::solvergraph::RankGetter>(
80 return scheduler().get_patch_rank_owner(patch_id);
86 storage.positions_with_ghosts = std::make_shared<shamrock::solvergraph::FieldRefs<Tvec>>(
87 edges::positions_with_ghosts,
"\\mathbf{r}");
88 storage.hpart_with_ghosts
89 = std::make_shared<shamrock::solvergraph::FieldRefs<Tscal>>(edges::hpart_with_ghosts,
"h");
92 = std::make_shared<shammodels::sph::solvergraph::NeighCache>(edges::neigh_cache,
"neigh");
95 storage.ghost_handler = storage.solver_graph.register_edge(
96 "ghost_handler", solvergraph::GhostHandlerEdge<Tvec>(
"ghost_handler",
"\\mathcal{G}"));
98 storage.omega = std::make_shared<shamrock::solvergraph::Field<Tscal>>(1,
"omega",
"\\Omega");
99 storage.density = std::make_shared<shamrock::solvergraph::Field<Tscal>>(1,
"density",
"\\rho");
100 storage.pressure = std::make_shared<shamrock::solvergraph::Field<Tscal>>(1,
"pressure",
"P");
102 = std::make_shared<shamrock::solvergraph::Field<Tscal>>(1,
"soundspeed",
"c_s");
107 = std::make_shared<shamrock::solvergraph::Field<Tvec>>(1,
"grad_density",
"\\nabla\\rho");
108 storage.grad_pressure
109 = std::make_shared<shamrock::solvergraph::Field<Tvec>>(1,
"grad_pressure",
"\\nabla P");
111 = std::make_shared<shamrock::solvergraph::Field<Tvec>>(1,
"grad_vx",
"\\nabla v_x");
113 = std::make_shared<shamrock::solvergraph::Field<Tvec>>(1,
"grad_vy",
"\\nabla v_y");
115 = std::make_shared<shamrock::solvergraph::Field<Tvec>>(1,
"grad_vz",
"\\nabla v_z");
118template<
class Tvec,
template<
class>
class Kern>
120 std::string filename,
bool add_patch_world_id) {
122 modules::VTKDump<Tvec, Kern>(context, solver_config).do_dump(filename, add_patch_world_id);
125template<
class Tvec,
template<
class>
class Kern>
130 _sptree.attach_buf();
131 storage.serial_patch_tree.set(std::move(_sptree));
134template<
class Tvec,
template<
class>
class Kern>
138 using CfgClass = gsph::GSPHGhostHandlerConfig<Tvec>;
139 using BCConfig =
typename CfgClass::Variant;
141 using BCFree =
typename CfgClass::Free;
142 using BCPeriodic =
typename CfgClass::Periodic;
143 using BCShearingPeriodic =
typename CfgClass::ShearingPeriodic;
145 using SolverConfigBC =
typename Config::BCConfig;
146 using SolverBCFree =
typename SolverConfigBC::Free;
147 using SolverBCPeriodic =
typename SolverConfigBC::Periodic;
148 using SolverBCShearingPeriodic =
typename SolverConfigBC::ShearingPeriodic;
152 if (SolverBCFree *c = std::get_if<SolverBCFree>(&solver_config.boundary_config.config)) {
156 scheduler(), BCFree{}, storage.patch_rank_owner, storage.xyzh_ghost_layout});
159 = std::get_if<SolverBCPeriodic>(&solver_config.boundary_config.config)) {
165 storage.patch_rank_owner,
166 storage.xyzh_ghost_layout});
168 SolverBCShearingPeriodic *c
169 = std::get_if<SolverBCShearingPeriodic>(&solver_config.boundary_config.config)) {
176 c->shear_base, c->shear_dir, c->shear_speed * time_val, c->shear_speed},
177 storage.patch_rank_owner,
178 storage.xyzh_ghost_layout});
184template<
class Tvec,
template<
class>
class Kern>
188 using GSPHUtils = GSPHUtilities<Tvec, Kernel>;
189 GSPHUtils gsph_utils(scheduler());
191 storage.ghost_patch_cache.set(gsph_utils.build_interf_cache(
193 storage.serial_patch_tree.get(),
194 solver_config.htol_up_coarse_cycle));
197template<
class Tvec,
template<
class>
class Kern>
200 storage.ghost_patch_cache.reset();
203template<
class Tvec,
template<
class>
class Kern>
207 storage.merged_xyzh.set(
210 .build_comm_merge_positions(storage.ghost_patch_cache.get()));
214 = storage.xyzh_ghost_layout->template get_field_idx<Tvec>(gsph::names::common::xyz);
215 const u32 ihpart_ghost
216 = storage.xyzh_ghost_layout->template get_field_idx<Tscal>(gsph::names::common::hpart);
220 = storage.merged_xyzh.get().template map<u32>(
222 return scheduler().patch_data.get_pdat(
id).get_obj_cnt();
227 = storage.merged_xyzh.get().template map<u32>(
229 return mpdat.get_obj_cnt();
237 return std::ref(mpdat.get_field<Tvec>(ixyz_ghost));
244 return std::ref(mpdat.get_field<Tscal>(ihpart_ghost));
248template<
class Tvec,
template<
class>
class Kern>
252 auto &merged_xyzh = storage.merged_xyzh.get();
253 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
257 = storage.xyzh_ghost_layout->template get_field_idx<Tvec>(gsph::names::common::xyz);
262 Tvec bmax = pos.compute_max();
263 Tvec bmin = pos.compute_min();
267 Tscal infty = std::numeric_limits<Tscal>::infinity();
270 aabb.lower[0] = std::nextafter(aabb.lower[0], -infty);
271 aabb.lower[1] = std::nextafter(aabb.lower[1], -infty);
272 aabb.lower[2] = std::nextafter(aabb.lower[2], -infty);
273 aabb.upper[0] = std::nextafter(aabb.upper[0], infty);
274 aabb.upper[1] = std::nextafter(aabb.upper[1], infty);
275 aabb.upper[2] = std::nextafter(aabb.upper[2], infty);
277 auto bvh = RTree::make_empty(dev_sched);
278 bvh.rebuild_from_positions(
279 pos.get_buf(), pos.get_obj_cnt(), aabb, solver_config.tree_reduction_level);
284 storage.merged_pos_trees.set(std::move(trees));
287template<
class Tvec,
template<
class>
class Kern>
290 storage.merged_pos_trees.reset();
293template<
class Tvec,
template<
class>
class Kern>
297 auto &xyzh_merged = storage.merged_xyzh.get();
298 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
300 storage.rtree_rint_field.set(
303 shamrock::patch::PatchDataLayer &tmp = xyzh_merged.get(id);
304 auto &buf = tmp.get_field_buf_ref<Tscal>(1);
305 auto buf_int = shamtree::new_empty_karras_radix_tree_field<Tscal>();
307 auto ret = shamtree::compute_tree_field_max_field<Tscal>(
309 rtree.reduced_morton_set.get_leaf_cell_iterator(),
315 dev_sched->get_queue(),
318 ret.buf_field.get_size(),
319 [htol = solver_config.htol_up_coarse_cycle](
u32 i, Tscal *h_tree) {
323 return std::move(ret);
327template<
class Tvec,
template<
class>
class Kern>
329 storage.rtree_rint_field.reset();
332template<
class Tvec,
template<
class>
class Kern>
339 Tscal h_tolerance = solver_config.htol_up_coarse_cycle;
343 auto &mfield = storage.merged_xyzh.get().get(patch_id);
349 = storage.rtree_rint_field.get().get(patch_id).buf_field;
351 RTree &tree = storage.merged_pos_trees.get().get(patch_id);
352 auto obj_it = tree.get_object_iterator();
356 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
360 obj_cnt, shamsys::instance::get_compute_scheduler_ptr());
372 auto neigh_cnt = neigh_count.get_write_access(depends_list);
373 auto particle_looper = obj_it.get_read_access(depends_list);
375 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
376 shambase::parallel_for(cgh, obj_cnt,
"gsph_count_neighbors", [=](
u64 gid) {
379 Tscal rint_a =
hpart[id_a] * h_tolerance;
380 Tvec xyz_a =
xyz[id_a];
382 Tvec inter_box_a_min = xyz_a - rint_a * Kernel::Rkern;
383 Tvec inter_box_a_max = xyz_a + rint_a * Kernel::Rkern;
387 particle_looper.rtree_for(
389 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
391 using namespace walker::interaction_crit;
393 return sph_radix_cell_crit(
402 Tvec dr = xyz_a -
xyz[id_b];
403 Tscal rab2 = sycl::dot(dr, dr);
404 Tscal rint_b =
hpart[id_b] * h_tolerance;
407 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
409 cnt += (no_interact) ? 0 : 1;
412 neigh_cnt[id_a] = cnt;
418 neigh_count.complete_event_state(e);
420 obj_it.complete_event_state(e);
425 = shamrock::tree::prepare_object_cache(std::move(neigh_count), obj_cnt);
435 auto scanned_neigh_cnt = pcache.scanned_cnt.
get_read_access(depends_list);
437 auto particle_looper = obj_it.get_read_access(depends_list);
439 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
440 shambase::parallel_for(cgh, obj_cnt,
"gsph_fill_neighbors", [=](
u64 gid) {
443 Tscal rint_a =
hpart[id_a] * h_tolerance;
444 Tvec xyz_a =
xyz[id_a];
446 Tvec inter_box_a_min = xyz_a - rint_a * Kernel::Rkern;
447 Tvec inter_box_a_max = xyz_a + rint_a * Kernel::Rkern;
449 u32 write_idx = scanned_neigh_cnt[id_a];
451 particle_looper.rtree_for(
453 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
455 using namespace walker::interaction_crit;
457 return sph_radix_cell_crit(
466 Tvec dr = xyz_a -
xyz[id_b];
467 Tscal rab2 = sycl::dot(dr, dr);
468 Tscal rint_b =
hpart[id_b] * h_tolerance;
471 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
474 neigh[write_idx++] = id_b;
485 obj_it.complete_event_state(e);
493 using namespace shamrock::patch;
496 ncache.neigh_cache.add_obj(cur_p.
id_patch, build_neigh_cache(cur_p.
id_patch));
500 storage.timings_details.neighbors += time_neigh.
elasped_sec();
503template<
class Tvec,
template<
class>
class Kern>
505 storage.neigh_cache->neigh_cache = {};
508template<
class Tvec,
template<
class>
class Kern>
512 shamlog_debug_ln(
"GSPH",
"Prestep at t =", time_val,
"dt =", dt);
515template<
class Tvec,
template<
class>
class Kern>
519 shamlog_debug_ln(
"GSPH",
"apply position boundary");
525 auto &pdl = sched.pdl_old();
527 const u32 ivxyz = pdl.get_field_idx<Tvec>(gsph::names::newtonian::vxyz);
528 auto [bmin, bmax] = sched.get_box_volume<Tvec>();
530 using SolverConfigBC =
typename Config::BCConfig;
531 using SolverBCFree =
typename SolverConfigBC::Free;
532 using SolverBCPeriodic =
typename SolverConfigBC::Periodic;
533 using SolverBCShearingPeriodic =
typename SolverConfigBC::ShearingPeriodic;
535 if (SolverBCFree *c = std::get_if<SolverBCFree>(&solver_config.boundary_config.config)) {
537 logger::info_ln(
"PositionUpdated",
"free boundaries skipping geometry update");
541 = std::get_if<SolverBCPeriodic>(&solver_config.boundary_config.config)) {
542 integrators.fields_apply_periodicity(ixyz, std::pair{bmin, bmax});
544 SolverBCShearingPeriodic *c
545 = std::get_if<SolverBCShearingPeriodic>(&solver_config.boundary_config.config)) {
547 integrators.fields_apply_shearing_periodicity(
550 std::pair{bmin, bmax},
553 c->shear_speed * time_val,
559 reatrib.reatribute_patch_objects(storage.serial_patch_tree.get(), gsph::names::common::xyz);
562template<
class Tvec,
template<
class>
class Kern>
565 using namespace shamrock::patch;
572 const bool has_uint = solver_config.has_field_uint();
573 const u32 iuint = has_uint ? pdl.
get_field_idx<Tscal>(gsph::names::newtonian::uint) : 0;
574 const u32 iduint = has_uint ? pdl.
get_field_idx<Tscal>(gsph::names::newtonian::duint) : 0;
576 Tscal half_dt = dt / 2;
580 u32 cnt = pdat.get_obj_cnt();
584 auto &xyz_field = pdat.get_field<Tvec>(ixyz);
585 auto &vxyz_field = pdat.get_field<Tvec>(ivxyz);
586 auto &axyz_field = pdat.get_field<Tvec>(iaxyz);
588 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
593 dev_sched->get_queue(),
597 [half_dt, dt](
u32 i,
const Tvec *axyz, Tvec *xyz, Tvec *vxyz) {
599 vxyz[i] += axyz[i] * half_dt;
601 xyz[i] += vxyz[i] * dt;
608 auto &uint_field = pdat.get_field<Tscal>(iuint);
609 auto &duint_field = pdat.get_field<Tscal>(iduint);
612 dev_sched->get_queue(),
616 [half_dt](
u32 i,
const Tscal *duint, Tscal *uint) {
618 uint[i] += duint[i] * half_dt;
624template<
class Tvec,
template<
class>
class Kern>
629 storage.xyzh_ghost_layout = std::make_shared<shamrock::patch::PatchDataLayerLayout>();
630 storage.xyzh_ghost_layout->template add_field<Tvec>(gsph::names::common::xyz, 1);
631 storage.xyzh_ghost_layout->template add_field<Tscal>(gsph::names::common::hpart, 1);
634 storage.ghost_layout = std::make_shared<shamrock::patch::PatchDataLayerLayout>();
639 solver_config.set_ghost_layout(ghost_layout);
642template<
class Tvec,
template<
class>
class Kern>
647 timer_interf.
start();
650 using namespace shamrock::patch;
657 const bool has_uint = solver_config.has_field_uint();
658 const u32 iuint = has_uint ? pdl.
get_field_idx<Tscal>(gsph::names::newtonian::uint) : 0;
660 auto &ghost_layout_ptr = storage.ghost_layout;
662 u32 ihpart_interf = ghost_layout.
get_field_idx<Tscal>(gsph::names::common::hpart);
663 u32 ivxyz_interf = ghost_layout.
get_field_idx<Tvec>(gsph::names::newtonian::vxyz);
664 u32 iomega_interf = ghost_layout.
get_field_idx<Tscal>(gsph::names::newtonian::omega);
665 u32 idensity_interf = ghost_layout.
get_field_idx<Tscal>(gsph::names::newtonian::density);
667 = has_uint ? ghost_layout.
get_field_idx<Tscal>(gsph::names::newtonian::uint) : 0;
670 const bool has_grads = solver_config.requires_gradients();
672 = has_grads ? ghost_layout.
get_field_idx<Tvec>(gsph::names::newtonian::grad_density) : 0;
674 = has_grads ? ghost_layout.
get_field_idx<Tvec>(gsph::names::newtonian::grad_pressure) : 0;
676 = has_grads ? ghost_layout.
get_field_idx<Tvec>(gsph::names::newtonian::grad_vx) : 0;
678 = has_grads ? ghost_layout.
get_field_idx<Tvec>(gsph::names::newtonian::grad_vy) : 0;
680 = has_grads ? ghost_layout.
get_field_idx<Tvec>(gsph::names::newtonian::grad_vz) : 0;
682 using InterfaceBuildInfos =
typename gsph::GSPHGhostHandler<Tvec>::InterfaceBuildInfos;
684 gsph::GSPHGhostHandler<Tvec> &ghost_handle
702 auto pdat_interf = ghost_handle.template build_interface_native<PatchDataLayer>(
703 storage.ghost_patch_cache.get(),
705 PatchDataLayer pdat(ghost_layout_ptr);
711 ghost_handle.template modify_interface_native<PatchDataLayer>(
712 storage.ghost_patch_cache.get(),
716 InterfaceBuildInfos binfo,
720 PatchDataLayer &sender_patch = scheduler().patch_data.get_pdat(sender);
721 PatchDataField<Tscal> &sender_omega = omega.get(sender);
722 PatchDataField<Tscal> &sender_density = density.get(sender);
724 sender_patch.get_field<Tscal>(ihpart).append_subset_to(
725 buf_idx, cnt, pdat.get_field<Tscal>(ihpart_interf));
726 sender_patch.get_field<Tvec>(ivxyz).append_subset_to(
727 buf_idx, cnt, pdat.get_field<Tvec>(ivxyz_interf));
728 sender_omega.append_subset_to(buf_idx, cnt, pdat.get_field<Tscal>(iomega_interf));
729 sender_density.append_subset_to(buf_idx, cnt, pdat.get_field<Tscal>(idensity_interf));
732 sender_patch.get_field<Tscal>(iuint).append_subset_to(
733 buf_idx, cnt, pdat.get_field<Tscal>(iuint_interf));
739 buf_idx, cnt, pdat.get_field<Tvec>(igrad_d_interf));
740 grad_pressure_ptr->get(sender).append_subset_to(
741 buf_idx, cnt, pdat.get_field<Tvec>(igrad_p_interf));
742 grad_vx_ptr->get(sender).append_subset_to(
743 buf_idx, cnt, pdat.get_field<Tvec>(igrad_vx_interf));
744 grad_vy_ptr->get(sender).append_subset_to(
745 buf_idx, cnt, pdat.get_field<Tvec>(igrad_vy_interf));
746 grad_vz_ptr->get(sender).append_subset_to(
747 buf_idx, cnt, pdat.get_field<Tvec>(igrad_vz_interf));
752 ghost_handle.template modify_interface_native<PatchDataLayer>(
753 storage.ghost_patch_cache.get(),
757 InterfaceBuildInfos binfo,
761 if (sycl::length(binfo.offset_speed) > 0) {
762 pdat.get_field<Tvec>(ivxyz_interf).apply_offset(binfo.offset_speed);
768 = ghost_handle.communicate_pdat(ghost_layout_ptr, std::move(pdat_interf));
771 std::map<u64, u64> sz_interf_map;
773 sz_interf_map[r] += pdat_interf.get_obj_cnt();
777 storage.merged_patchdata_ghost.set(
778 ghost_handle.template merge_native<PatchDataLayer, PatchDataLayer>(
779 std::move(interf_pdat),
781 PatchDataLayer pdat_new(ghost_layout_ptr);
783 u32 or_elem = pdat.get_obj_cnt();
784 pdat_new.reserve(or_elem + sz_interf_map[p.id_patch]);
786 PatchDataField<Tscal> &cur_omega = omega.get(p.id_patch);
787 PatchDataField<Tscal> &cur_density = density.get(p.id_patch);
790 pdat_new.get_field<Tscal>(ihpart_interf).insert(pdat.get_field<Tscal>(ihpart));
791 pdat_new.get_field<Tvec>(ivxyz_interf).insert(pdat.get_field<Tvec>(ivxyz));
792 pdat_new.get_field<Tscal>(iomega_interf).insert(cur_omega);
793 pdat_new.get_field<Tscal>(idensity_interf).insert(cur_density);
796 pdat_new.get_field<Tscal>(iuint_interf).insert(pdat.get_field<Tscal>(iuint));
801 pdat_new.get_field<Tvec>(igrad_d_interf)
802 .insert(grad_density_ptr->get(p.id_patch));
803 pdat_new.get_field<Tvec>(igrad_p_interf)
804 .insert(grad_pressure_ptr->get(p.id_patch));
805 pdat_new.get_field<Tvec>(igrad_vx_interf).insert(grad_vx_ptr->get(p.id_patch));
806 pdat_new.get_field<Tvec>(igrad_vy_interf).insert(grad_vy_ptr->get(p.id_patch));
807 pdat_new.get_field<Tvec>(igrad_vz_interf).insert(grad_vz_ptr->get(p.id_patch));
810 pdat_new.check_field_obj_cnt_match();
814 pdat.insert_elements(pdat_interf);
818 storage.timings_details.interface += timer_interf.
elasped_sec();
821template<
class Tvec,
template<
class>
class Kern>
823 storage.merged_patchdata_ghost.reset();
826template<
class Tvec,
template<
class>
class Kern>
831 using namespace shamrock::patch;
833 const Tscal pmass = solver_config.gpart_mass;
837 if (pmass <= Tscal(0) || pmass < Tscal(1e-100) || !std::isfinite(pmass)) {
838 logger::warn_ln(
"GSPH",
"Invalid particle mass in compute_omega: pmass =", pmass);
846 std::shared_ptr<shamrock::solvergraph::Indexes<u32>>
sizes
847 = std::make_shared<shamrock::solvergraph::Indexes<u32>>(edges::sizes,
"N");
849 sizes->indexes.add_obj(p.id_patch, pdat.get_obj_cnt());
876 auto &merged_xyzh = storage.merged_xyzh.get();
880 std::shared_ptr<shamrock::solvergraph::FieldRefs<Tvec>>
pos_merged
881 = std::make_shared<shamrock::solvergraph::FieldRefs<Tvec>>(edges::pos_merged,
"r");
885 std::shared_ptr<shamrock::solvergraph::FieldRefs<Tscal>> hold
886 = std::make_shared<shamrock::solvergraph::FieldRefs<Tscal>>(edges::h_old,
"h^{old}");
890 std::shared_ptr<shamrock::solvergraph::FieldRefs<Tscal>> hnew
891 = std::make_shared<shamrock::solvergraph::FieldRefs<Tscal>>(edges::h_new,
"h^{new}");
896 = storage.xyzh_ghost_layout->template get_field_idx<Tvec>(gsph::names::common::xyz);
897 const u32 ihpart_ghost
898 = storage.xyzh_ghost_layout->template get_field_idx<Tscal>(gsph::names::common::hpart);
901 scheduler().for_each_patchdata_nonempty(
903 auto &mfield = merged_xyzh.get(p.id_patch);
906 pos_refs.add_obj(p.id_patch, std::ref(mfield.template get_field<Tvec>(ixyz_ghost)));
909 hold_refs.add_obj(p.id_patch, std::ref(mfield.template get_field<Tscal>(ihpart_ghost)));
912 hnew_refs.add_obj(p.id_patch, std::ref(pdat.get_field<Tscal>(ihpart)));
916 hold->set_refs(hold_refs);
917 hnew->set_refs(hnew_refs);
920 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
922 u32 cnt = pdat.get_obj_cnt();
926 auto &mfield = merged_xyzh.get(p.id_patch);
927 auto &buf_hpart_merged = mfield.template get_field_buf_ref<Tscal>(1);
928 auto &buf_hpart_local = pdat.get_field_buf_ref<Tscal>(ihpart);
931 dev_sched->get_queue(),
935 [](
u32 i,
const Tscal *h_old, Tscal *h_new) {
946 u32 cnt = pdat.get_obj_cnt();
950 auto &eps_buf = _epsilon_h.get_buf_check(p.id_patch);
953 dev_sched->get_queue(),
957 [](
u32 i, Tscal *eps) {
963 std::shared_ptr<shamrock::solvergraph::FieldRefs<Tscal>>
eps_h
964 = std::make_shared<shamrock::solvergraph::FieldRefs<Tscal>>(edges::eps_h,
"\\epsilon_h");
967 auto &field = _epsilon_h.get_field(p.id_patch);
968 eps_h_refs.add_obj(p.id_patch, std::ref(field));
970 eps_h->set_refs(eps_h_refs);
973 std::shared_ptr<sph::modules::IterateSmoothingLengthDensity<Tvec, Kernel>> smth_h_iter
974 = std::make_shared<sph::modules::IterateSmoothingLengthDensity<Tvec, Kernel>>(
975 solver_config.gpart_mass,
976 solver_config.htol_up_coarse_cycle,
977 solver_config.htol_up_fine_cycle);
980 smth_h_iter->set_edges(sizes, storage.neigh_cache, pos_merged, hold, hnew, eps_h);
983 std::shared_ptr<shamrock::solvergraph::ScalarEdge<bool>> is_converged
984 = std::make_shared<shamrock::solvergraph::ScalarEdge<bool>>(
"is_converged",
"converged");
988 smth_h_iter, solver_config.epsilon_h, solver_config.h_iter_per_subcycles,
false);
989 loop_smth_h_iter.set_edges(eps_h, is_converged);
992 loop_smth_h_iter.evaluate();
995 if (!is_converged->value) {
997 Tscal local_max_eps = shamrock::solvergraph::get_rank_max(*eps_h);
998 Tscal global_max_eps = shamalgs::collective::allreduce_max(local_max_eps);
1001 u64 cnt_unconverged = 0;
1003 auto res = _epsilon_h.get_field(p.id_patch).get_ids_buf_where([](
auto access,
u32 id) {
1004 return access[id] < Tscal(0);
1006 cnt_unconverged += std::get<1>(res);
1008 u64 global_cnt_unconverged = shamalgs::collective::allreduce_sum(cnt_unconverged);
1011 if (global_cnt_unconverged > 0) {
1014 "Smoothing length iteration: ",
1015 global_cnt_unconverged,
1016 " particles need cache rebuild (h grew beyond tolerance)");
1020 "Smoothing length iteration did not converge, max eps =",
1033 static constexpr Tscal Rkern = Kernel::Rkern;
1035 auto &
neigh_cache = storage.neigh_cache->neigh_cache;
1038 u32 cnt = pdat.get_obj_cnt();
1042 auto &mfield = merged_xyzh.get(p.id_patch);
1046 auto &buf_xyz = mfield.template get_field_buf_ref<Tvec>(0);
1047 auto &buf_hpart = pdat.get_field_buf_ref<Tscal>(ihpart);
1050 auto &dens_field = density_field.
get_field(p.id_patch);
1051 auto &omeg_field = omega_field.
get_field(p.id_patch);
1056 auto ploop_ptrs = pcache.get_read_access(depends_list);
1059 auto density_acc = dens_field.get_buf().get_write_access(depends_list);
1060 auto omega_acc = omeg_field.get_buf().get_write_access(depends_list);
1062 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
1065 shambase::parallel_for(cgh, cnt,
"gsph_compute_density_omega", [=](
u64 gid) {
1068 Tvec xyz_a = xyz_acc[id_a];
1069 Tscal h_a = h_acc[id_a];
1070 Tscal dint = h_a * h_a * Rkern * Rkern;
1073 Tscal rho_sum = Tscal(0);
1074 Tscal sumdWdh = Tscal(0);
1076 particle_looper.for_each_object(id_a, [&](
u32 id_b) {
1077 Tvec dr = xyz_a - xyz_acc[id_b];
1078 Tscal rab2 = sycl::dot(dr, dr);
1084 Tscal rab = sycl::sqrt(rab2);
1086 rho_sum += pmass * Kernel::W_3d(rab, h_a);
1087 sumdWdh += pmass * Kernel::dhW_3d(rab, h_a);
1091 density_acc[id_a] = sycl::max(rho_sum, Tscal(1e-30));
1097 Tscal omega_val = Tscal(1);
1098 if (rho_sum > Tscal(1e-30)) {
1099 omega_val = Tscal(1) + h_a / (Tscal(dim) * rho_sum) * sumdWdh;
1100 omega_val = sycl::clamp(omega_val, Tscal(0.5), Tscal(2.0));
1102 omega_acc[id_a] = omega_val;
1107 pcache.complete_event_state({e});
1110 dens_field.get_buf().complete_event_state(e);
1111 omeg_field.get_buf().complete_event_state(e);
1115template<
class Tvec,
template<
class>
class Kern>
1120 using namespace shamrock::patch;
1126 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
1127 const Tscal gamma = solver_config.get_eos_gamma();
1128 const bool has_uint = solver_config.has_field_uint();
1133 u32 idensity_interf = ghost_layout.
get_field_idx<Tscal>(gsph::names::newtonian::density);
1135 = has_uint ? ghost_layout.
get_field_idx<Tscal>(gsph::names::newtonian::uint) : 0;
1149 storage.merged_patchdata_ghost.get().for_each([&](
u64 id,
PatchDataLayer &mpdat) {
1152 if (total_elements == 0)
1157 auto &pressure_buf = pressure_field.
get_field(
id).get_buf();
1158 auto &soundspeed_buf = soundspeed_field.
get_field(
id).get_buf();
1164 auto pressure = pressure_buf.get_write_access(depends_list);
1165 auto soundspeed = soundspeed_buf.get_write_access(depends_list);
1167 const Tscal *uint_ptr =
nullptr;
1169 uint_ptr = mpdat.get_field_buf_ref<Tscal>(iuint_interf).get_read_access(depends_list);
1172 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
1173 shambase::parallel_for(cgh, total_elements,
"compute_eos_gsph", [=](
u64 gid) {
1178 rho = sycl::max(rho, Tscal(1e-30));
1180 if (has_uint && uint_ptr !=
nullptr) {
1183 Tscal u = uint_ptr[i];
1184 u = sycl::max(u, Tscal(1e-30));
1185 Tscal P = (gamma - Tscal(1.0)) * rho * u;
1189 Tscal cs = sycl::sqrt(gamma * (gamma - Tscal(1.0)) * u);
1192 P = sycl::clamp(P, Tscal(1e-30), Tscal(1e30));
1193 cs = sycl::clamp(cs, Tscal(1e-10), Tscal(1e10));
1199 Tscal cs = Tscal(1.0);
1200 Tscal P = cs * cs * rho;
1211 mpdat.get_field_buf_ref<Tscal>(iuint_interf).complete_event_state(e);
1213 pressure_buf.complete_event_state(e);
1214 soundspeed_buf.complete_event_state(e);
1218template<
class Tvec,
template<
class>
class Kern>
1223template<
class Tvec,
template<
class>
class Kern>
1228 using namespace shamrock::patch;
1243 u32 npart = pdat.get_obj_cnt();
1258 auto &q = shamsys::instance::get_compute_scheduler().
get_queue();
1261 auto rho_in = buf_rho_in.get_read_access(depends_list);
1265 auto P = buf_P.get_write_access(depends_list);
1266 auto cs = buf_cs.get_write_access(depends_list);
1268 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
1269 cgh.parallel_for(sycl::range<1>{npart}, [=](sycl::item<1> item) {
1270 rho[item] = rho_in[item];
1271 P[item] = P_in[item];
1272 cs[item] = cs_in[item];
1276 buf_rho_in.complete_event_state(e);
1280 buf_P.complete_event_state(e);
1281 buf_cs.complete_event_state(e);
1285template<
class Tvec,
template<
class>
class Kern>
1290 if (!solver_config.requires_gradients()) {
1295 using namespace shamrock::patch;
1297 const Tscal pmass = solver_config.gpart_mass;
1298 const Tscal gamma = solver_config.get_eos_gamma();
1300 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
1305 const bool has_uint = solver_config.has_field_uint();
1306 const u32 iuint = has_uint ? pdl.
get_field_idx<Tscal>(gsph::names::newtonian::uint) : 0;
1329 auto &merged_xyzh = storage.merged_xyzh.get();
1330 auto &neigh_cache = storage.neigh_cache->neigh_cache;
1332 static constexpr Tscal Rkern = Kernel::Rkern;
1339 u32 cnt = pdat.get_obj_cnt();
1343 auto &mfield = merged_xyzh.get(p.id_patch);
1344 auto &pcache = neigh_cache.get(p.id_patch);
1347 auto &buf_xyz = mfield.template get_field_buf_ref<Tvec>(0);
1348 auto &buf_hpart = mfield.template get_field_buf_ref<Tscal>(1);
1349 auto &buf_vxyz = pdat.get_field_buf_ref<Tvec>(ivxyz);
1352 auto &dens_field = density_field.
get_field(p.id_patch);
1355 auto &grad_d_field = grad_density_field.
get_field(p.id_patch);
1356 auto &grad_p_field = grad_pressure_field.
get_field(p.id_patch);
1357 auto &grad_vx_buf = grad_vx_field.
get_field(p.id_patch);
1358 auto &grad_vy_buf = grad_vy_field.
get_field(p.id_patch);
1359 auto &grad_vz_buf = grad_vz_field.
get_field(p.id_patch);
1364 auto ploop_ptrs = pcache.get_read_access(depends_list);
1367 auto v_acc = buf_vxyz.get_read_access(depends_list);
1368 auto dens_acc = dens_field.get_buf().get_read_access(depends_list);
1369 auto grad_d_acc = grad_d_field.get_buf().get_write_access(depends_list);
1370 auto grad_p_acc = grad_p_field.get_buf().get_write_access(depends_list);
1371 auto grad_vx_acc = grad_vx_buf.get_buf().get_write_access(depends_list);
1372 auto grad_vy_acc = grad_vy_buf.get_buf().get_write_access(depends_list);
1373 auto grad_vz_acc = grad_vz_buf.get_buf().get_write_access(depends_list);
1376 const Tscal *uint_ptr =
nullptr;
1378 uint_ptr = pdat.get_field_buf_ref<Tscal>(iuint).get_read_access(depends_list);
1381 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
1384 shambase::parallel_for(cgh, cnt,
"gsph_compute_gradients", [=](
u64 gid) {
1387 Tvec xyz_a = xyz_acc[id_a];
1388 Tscal h_a = h_acc[id_a];
1389 Tvec v_a = v_acc[id_a];
1390 Tscal rho_a = sycl::max(dens_acc[id_a], Tscal(1e-30));
1391 Tscal dint = h_a * h_a * Rkern * Rkern;
1394 Tscal u_a = Tscal(0);
1395 if (uint_ptr !=
nullptr) {
1396 u_a = uint_ptr[id_a];
1400 Tvec grad_d = {0, 0, 0};
1401 Tvec grad_u = {0, 0, 0};
1402 Tvec grad_vx = {0, 0, 0};
1403 Tvec grad_vy = {0, 0, 0};
1404 Tvec grad_vz = {0, 0, 0};
1406 particle_looper.for_each_object(id_a, [&](
u32 id_b) {
1407 Tvec dr = xyz_a - xyz_acc[id_b];
1408 Tscal rab2 = sycl::dot(dr, dr);
1410 if (rab2 > dint || id_a == id_b) {
1414 Tscal rab = sycl::sqrt(rab2);
1417 Tscal dWdr = Kernel::dW_3d(rab, h_a);
1421 grad_d += gradW * pmass;
1424 Tscal u_b = (uint_ptr !=
nullptr) ? uint_ptr[id_b] : Tscal(0);
1425 grad_u += gradW * (pmass * (u_b - u_a));
1428 Tvec v_b = v_acc[id_b];
1429 grad_vx += gradW * (pmass * (v_b[0] - v_a[0]));
1430 grad_vy += gradW * (pmass * (v_b[1] - v_a[1]));
1431 grad_vz += gradW * (pmass * (v_b[2] - v_a[2]));
1435 grad_d_acc[id_a] = grad_d;
1439 Tvec grad_p = (grad_d * u_a + grad_u) * (gamma - Tscal(1));
1440 grad_p_acc[id_a] = grad_p;
1445 grad_vx_acc[id_a] = grad_vx * rho_inv;
1446 grad_vy_acc[id_a] = grad_vy * rho_inv;
1447 grad_vz_acc[id_a] = grad_vz * rho_inv;
1452 pcache.complete_event_state({e});
1455 buf_vxyz.complete_event_state(e);
1456 dens_field.get_buf().complete_event_state(e);
1457 grad_d_field.get_buf().complete_event_state(e);
1458 grad_p_field.get_buf().complete_event_state(e);
1459 grad_vx_buf.get_buf().complete_event_state(e);
1460 grad_vy_buf.get_buf().complete_event_state(e);
1461 grad_vz_buf.get_buf().complete_event_state(e);
1463 pdat.get_field_buf_ref<Tscal>(iuint).complete_event_state(e);
1468template<
class Tvec,
template<
class>
class Kern>
1478 auto old_axyz = utility.make_compute_field<Tvec>(gsph::names::internal::old_axyz, 1);
1481 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
1483 scheduler().for_each_patchdata_nonempty(
1485 u32 cnt = pdat.get_obj_cnt();
1489 auto &axyz_field = pdat.get_field<Tvec>(iaxyz);
1490 auto &old_axyz_field = old_axyz.get_field(p.id_patch);
1494 dev_sched->get_queue(),
1498 [](
u32 i,
const Tvec *src, Tvec *dst) {
1503 storage.old_axyz.set(std::move(old_axyz));
1505 if (solver_config.has_field_uint()) {
1506 const u32 iduint = pdl.
get_field_idx<Tscal>(gsph::names::newtonian::duint);
1507 auto old_duint = utility.make_compute_field<Tscal>(gsph::names::internal::old_duint, 1);
1509 scheduler().for_each_patchdata_nonempty(
1511 u32 cnt = pdat.get_obj_cnt();
1515 auto &duint_field = pdat.get_field<Tscal>(iduint);
1516 auto &old_duint_field = old_duint.get_field(p.id_patch);
1520 dev_sched->get_queue(),
1524 [](
u32 i,
const Tscal *src, Tscal *dst) {
1529 storage.old_duint.set(std::move(old_duint));
1533template<
class Tvec,
template<
class>
class Kern>
1540template<
class Tvec,
template<
class>
class Kern>
1546 using namespace shamrock::patch;
1548 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
1557 Tscal C_cour = solver_config.cfl_config.cfl_cour;
1558 Tscal C_force = solver_config.cfl_config.cfl_force;
1565 u32 cnt = pdat.get_obj_cnt();
1569 auto &buf_hpart = pdat.get_field_buf_ref<Tscal>(ihpart);
1570 auto &buf_axyz = pdat.get_field_buf_ref<Tvec>(iaxyz);
1571 auto &buf_cs = soundspeed_field.get_field(cur_p.
id_patch).get_buf();
1572 auto &cfl_dt_buf = cfl_dt.get_buf_check(cur_p.
id_patch);
1578 auto axyz = buf_axyz.get_read_access(depends_list);
1579 auto cs = buf_cs.get_read_access(depends_list);
1580 auto cfl_dt_acc = cfl_dt_buf.get_write_access(depends_list);
1582 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
1583 shambase::parallel_for(cgh, cnt,
"gsph_compute_cfl_dt", [=](
u64 gid) {
1586 Tscal h_i = hpart[i];
1588 Tscal abs_a = sycl::length(axyz[i]);
1591 if (!sycl::isfinite(h_i) || h_i <= Tscal(0))
1593 if (!sycl::isfinite(cs_i) || cs_i <= Tscal(0))
1594 cs_i = Tscal(1e-10);
1595 if (!sycl::isfinite(abs_a))
1596 abs_a = Tscal(1e30);
1600 Tscal dt_c = C_cour * h_i / cs_i;
1603 Tscal dt_f = C_force * sycl::sqrt(h_i / (abs_a + Tscal(1e-30)));
1605 Tscal dt_min = sycl::min(dt_c, dt_f);
1608 if (!sycl::isfinite(dt_min) || dt_min <= Tscal(0)) {
1609 dt_min = Tscal(1e-10);
1612 cfl_dt_acc[i] = dt_min;
1617 buf_axyz.complete_event_state(e);
1618 buf_cs.complete_event_state(e);
1619 cfl_dt_buf.complete_event_state(e);
1623 Tscal rank_dt = cfl_dt.compute_rank_min();
1626 if (!std::isfinite(rank_dt) || rank_dt <= Tscal(0)) {
1627 rank_dt = Tscal(1e-6);
1631 Tscal global_min_dt = shamalgs::collective::allreduce_min(rank_dt);
1636 const Tscal dt_min_floor = Tscal(1e-6);
1637 if (!std::isfinite(global_min_dt) || global_min_dt < dt_min_floor) {
1638 global_min_dt = dt_min_floor;
1641 return global_min_dt;
1644template<
class Tvec,
template<
class>
class Kern>
1653 Tscal half_dt = Tscal{0.5} * dt;
1657 scheduler().for_each_patchdata_nonempty(
1659 u32 cnt = pdat.get_obj_cnt();
1663 auto &vxyz = pdat.get_field<Tvec>(ivxyz);
1664 auto &axyz = pdat.get_field<Tvec>(iaxyz);
1666 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
1669 dev_sched->get_queue(),
1673 [half_dt](
u32 i,
const Tvec *axyz_new, Tvec *vxyz) {
1674 vxyz[i] += half_dt * axyz_new[i];
1678 if (solver_config.has_field_uint()) {
1680 const u32 iduint = pdl.
get_field_idx<Tscal>(gsph::names::newtonian::duint);
1682 scheduler().for_each_patchdata_nonempty(
1684 u32 cnt = pdat.get_obj_cnt();
1688 auto &uint_field = pdat.get_field<Tscal>(iuint);
1689 auto &
duint = pdat.get_field<Tscal>(iduint);
1691 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
1695 dev_sched->get_queue(),
1699 [half_dt](
u32 i,
const Tscal *duint_new, Tscal *uint) {
1700 uint[i] += half_dt * duint_new[i];
1704 storage.old_duint.reset();
1707 storage.old_axyz.reset();
1712template<
class Tvec,
template<
class>
class Kern>
1715template<
class Tvec,
template<
class>
class Kern>
1719 solver_config.check_config_runtime();
1721 Tscal t_current = solver_config.get_time();
1722 Tscal dt = solver_config.get_dt();
1727 shamcomm::logs::raw_ln(
1729 "---------------- GSPH t = {}, dt = {} ----------------", t_current, dt));
1736 scheduler().scheduler_step(
true,
true);
1737 scheduler().scheduler_step(
false,
false);
1742 using namespace shamrock::patch;
1744 u64 Npart_all = scheduler().get_total_obj_count();
1765 do_predictor_leapfrog(dt);
1769 gen_serial_patch_tree();
1770 apply_position_boundary(t_current + dt);
1774 gen_ghost_handler(t_current + dt);
1777 build_ghost_cache();
1780 merge_position_ghost();
1783 build_merged_pos_trees();
1786 compute_presteps_rint();
1789 start_neighbors_cache();
1798 compute_gradients();
1802 init_ghost_layout();
1806 communicate_merge_ghosts_fields();
1811 compute_eos_fields();
1815 prepare_corrector();
1821 apply_corrector(dt, Npart_all);
1824 Tscal dt_next = compute_dt_cfl();
1827 if (dt > Tscal(0)) {
1828 dt_next = sham::min(dt_next, Tscal(2) * dt);
1832 copy_eos_to_patchdata();
1835 reset_neighbors_cache();
1836 reset_presteps_rint();
1837 clear_merged_pos_trees();
1838 reset_merge_ghosts_fields();
1839 storage.merged_xyzh.reset();
1840 clear_ghost_cache();
1841 reset_serial_patch_tree();
1842 reset_ghost_handler();
1843 storage.ghost_layout.reset();
1846 solver_config.set_time(t_current + dt);
1847 solver_config.set_next_dt(dt_next);
1849 solve_logs.step_count++;
1856 log.rate = Tscal(Npart_all) / tstep.
elasped_sec();
1857 log.npart = Npart_all;
Constants for field names in GSPH solver, organized by physics mode.
constexpr const char * duint
Time derivative of internal energy du/dt.
constexpr const char * pos_merged
Position merged references (for h-iteration)
constexpr const char * xyz
Position field (3D coordinates)
constexpr const char * density
Density \rho (derived from h)
constexpr const char * eps_h
Epsilon h references (for h-iteration convergence)
constexpr const char * sizes
Temporary sizes for h-iteration.
constexpr const char * soundspeed
Sound speed c_s (derived from EOS)
constexpr const char * pressure
Pressure P (derived from EOS)
constexpr const char * hpart
Smoothing length field.
constexpr const char * neigh_cache
Neighbor cache.
constexpr const char * omega
Grad-h correction factor \Omega.
GSPH-specific utilities for ghost handling.
Declares the IterateSmoothingLengthDensity module for iterating smoothing length based on the SPH den...
Declares the LoopSmoothingLengthIter module for looping over the smoothing length iteration until con...
Header file describing a Node Instance.
sycl::queue & get_compute_queue(u32 id=0)
Header file for the patch struct and related function.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
Container for objects shared between two distributed data elements.
void for_each(std::function< void(u64, u64, T &)> &&f)
Apply a function to all stored objects.
Represents a collection of objects distributed across patches identified by a u64 id.
Class Timer measures the time elapsed since the timer was started.
void end()
Stops the timer and stores the elapsed time in nanoseconds.
f64 elasped_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
void start()
Starts the timer.
void copy_eos_to_patchdata()
Copy EOS fields from solvergraph to patchdata for persistence.
void compute_gradients()
Compute gradients for MUSCL reconstruction.
TimestepLog evolve_once()
void update_derivs()
Update derivatives using GSPH Riemann solver.
Tscal compute_dt_cfl()
Compute CFL timestep constraint.
GSPH derivative update module.
void update_derivs()
Update all derivatives using GSPH Riemann solver approach.
Utility class used to move the objects between patches.
ComputeField< T > make_compute_field(std::string new_name, u32 nvar)
create a compute field and init it to zeros
u32 get_field_idx(const std::string &field_name) const
Get the field id if matching name & type.
PatchDataLayer container class, the layout is described in patchdata_layout.
virtual void ensure_sizes(const shambase::DistributedData< u32 > &sizes)
Ensure that the sizes of the patches in the field match the given sizes (Can resize the underlying fi...
PatchDataField< T > & get_field(u64 id) const
Get the underlying PatchDataField at the given id.
A data structure representing a Karras Radix Tree Field.
This header file contains utility functions related to exception handling in the code.
MPI string gather / allgather helpers (declarations; implementations in shamalgs/src/collective/gathe...
Configuration for the Godunov SPH (GSPH) solver.
GSPH derivative update module.
VTK dump module for GSPH solver.
T inv_sat_positive(T v, T minvsat=T{1e-9}, T satval=T{0.}) noexcept
inverse saturated (positive numbers only)
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
void append_subset_to(const sham::DeviceBuffer< T > &buf, const sham::DeviceBuffer< u32 > &idxs_buf, u32 nvar, sham::DeviceBuffer< T > &buf_other, u32 start_enque)
Appends a subset of elements from one buffer to another.
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
namespace for math utility
namespace for the main framework
A class that references multiple buffers or similar objects.
Axis-Aligned bounding box.
T lower
Lower bound of the AABB.
T upper
Upper bound of the AABB.
Patch object that contain generic patch information.
u64 id_patch
unique key that identify the patch
Functions related to the MPI communicator.