165 sycl::vec<T, 3> shift;
166 sycl::vec<T, 3> shift_speed;
170using ShearPeriodicInfo =
175 i32_3 ioff, ShearPeriodicInfo<T> shear, sycl::vec<T, 3> bsize) {
177 i32 dx = ioff.x() * shear.shear_base.x();
178 i32 dy = ioff.y() * shear.shear_base.y();
179 i32 dz = ioff.z() * shear.shear_base.z();
181 i32 d = dx + dy + dz;
183 sycl::vec<T, 3> shift
184 = {(d * shear.shear_dir.x()) * shear.shear_value + bsize.x() * ioff.x(),
185 (d * shear.shear_dir.y()) * shear.shear_value + bsize.y() * ioff.y(),
186 (d * shear.shear_dir.z()) * shear.shear_value + bsize.z() * ioff.z()};
187 sycl::vec<T, 3> shift_speed
188 = {(d * shear.shear_dir.x()) * shear.shear_speed,
189 (d * shear.shear_dir.y()) * shear.shear_speed,
190 (d * shear.shear_dir.z()) * shear.shear_speed};
192 return {shift, shift_speed};
196inline void for_each_patch_shift(
197 ShearPeriodicInfo<T> shearinfo,
198 sycl::vec<T, 3> bsize,
201 i32_3 loop_offset = {0, 0, 0};
203 std::vector<i32_3> list_possible;
209 i32 repetition_x = 1 + abs(shearinfo.shear_dir.x());
210 i32 repetition_y = 1 + abs(shearinfo.shear_dir.y());
211 i32 repetition_z = 1 + abs(shearinfo.shear_dir.z());
213 T sz = bsize.x() * shearinfo.shear_dir.x() + bsize.y() * shearinfo.shear_dir.y()
214 + bsize.z() * shearinfo.shear_dir.z();
216 for (
i32 xoff = -repetition_x; xoff <= repetition_x; xoff++) {
217 for (
i32 yoff = -repetition_y; yoff <= repetition_y; yoff++) {
218 for (
i32 zoff = -repetition_z; zoff <= repetition_z; zoff++) {
220 i32 dx = xoff * shearinfo.shear_base.x();
221 i32 dy = yoff * shearinfo.shear_base.y();
222 i32 dz = zoff * shearinfo.shear_base.z();
224 i32 d = dx + dy + dz;
226 i32 df = -int(d * shearinfo.shear_value / sz);
229 = {shearinfo.shear_dir.x() * df,
230 shearinfo.shear_dir.y() * df,
231 shearinfo.shear_dir.z() * df};
239 list_possible.resize(list_possible.size() + 1);
240 list_possible[list_possible.size() - 1]
241 = i32_3{xoff + off_d.x(), yoff + off_d.y(), zoff + off_d.z()};
248 for (i32_3 off : list_possible) {
250 auto shift = compute_shift_infos(off, shearinfo, bsize);
268 using namespace shamrock::patch;
271 i32 repetition_x = 1;
272 i32 repetition_y = 1;
273 i32 repetition_z = 1;
283 using BCConfig =
typename CfgClass::Variant;
292 if (BCPeriodic *cfg = std::get_if<BCPeriodic>(&ghost_config)) {
293 sycl::host_accessor acc_tf{
296 for (
i32 xoff = -repetition_x; xoff <= repetition_x; xoff++) {
297 for (
i32 yoff = -repetition_y; yoff <= repetition_y; yoff++) {
298 for (
i32 zoff = -repetition_z; zoff <= repetition_z; zoff++) {
301 vec periodic_offset =
vec{xoff * bsize.x(), yoff * bsize.y(), zoff * bsize.z()};
303 sycl::host_accessor tree{
305 sycl::host_accessor lpid{
308#pragma omp parallel for
309 for (
u32 i = 0; i < sched.patch_list.local.size(); i++) {
312 CoordRange<vec> sender_bsize = patch_coord_transf.to_obj_coord(psender);
314 = sender_bsize.add_offset(periodic_offset);
316 flt sender_volume = sender_bsize.get_volume();
318 flt sender_h_max = int_range_max.get(psender.
id_patch);
322 sptree.host_for_each_leafs_internal(
323 [&](
u64 tree_id, PtNode n) {
324 flt receiv_h_max = acc_tf[tree_id];
326 n.box_min - receiv_h_max, n.box_max + receiv_h_max};
328 return receiv_exp.get_intersect(sender_bsize_off)
331 [&](
u64 id_found, PtNode n) {
332 if ((id_found == psender.
id_patch) && (xoff == 0) && (yoff == 0)
339 int_range_max.get(id_found));
342 receiv_exp.add_offset(-periodic_offset));
352 interf_volume.get_volume() / sender_volume});
361 }
else if (BCShearingPeriodic *cfg = std::get_if<BCShearingPeriodic>(&ghost_config)) {
362 sycl::host_accessor acc_tf{
365 for_each_patch_shift<flt>(*cfg, bsize, [&](i32_3 ioff,
ShiftInfo<flt> shift) {
370 vec offset = shift.shift;
372 sycl::host_accessor tree{
374 sycl::host_accessor lpid{
377#pragma omp parallel for
378 for (
u32 i = 0; i < sched.patch_list.local.size(); i++) {
382 CoordRange<vec> sender_bsize = patch_coord_transf.to_obj_coord(psender);
385 flt sender_volume = sender_bsize.get_volume();
387 flt sender_h_max = int_range_max.get(psender.
id_patch);
391 sptree.host_for_each_leafs_internal(
392 [&](
u64 tree_id, PtNode n) {
393 flt receiv_h_max = acc_tf[tree_id];
395 n.box_min - receiv_h_max, n.box_max + receiv_h_max};
397 return receiv_exp.get_intersect(sender_bsize_off).is_not_empty();
399 [&](
u64 id_found, PtNode n) {
400 if ((id_found == psender.
id_patch) && (xoff == 0) && (yoff == 0)
407 int_range_max.get(id_found));
410 = sender_bsize.get_intersect(receiv_exp.add_offset(-offset));
420 interf_volume.get_volume() / sender_volume});
432 sycl::host_accessor acc_tf{
435 vec periodic_offset =
vec{0, 0, 0};
438 sycl::host_accessor lpid{
441#pragma omp parallel for
442 for (
u32 i = 0; i < sched.patch_list.local.size(); i++) {
445 CoordRange<vec> sender_bsize = patch_coord_transf.to_obj_coord(psender);
446 CoordRange<vec> sender_bsize_off = sender_bsize.add_offset(periodic_offset);
448 flt sender_volume = sender_bsize.get_volume();
450 flt sender_h_max = int_range_max.get(psender.
id_patch);
454 sptree.host_for_each_leafs_internal(
455 [&](
u64 tree_id, PtNode n) {
456 flt receiv_h_max = acc_tf[tree_id];
458 n.box_min - receiv_h_max, n.box_max + receiv_h_max};
460 return receiv_exp.get_intersect(sender_bsize_off).is_not_empty();
462 [&](
u64 id_found, PtNode n) {
469 int_range_max.get(id_found));
472 = sender_bsize.get_intersect(receiv_exp.add_offset(-periodic_offset));
482 interf_volume.get_volume() / sender_volume});
515 using namespace shamrock::patch;
519 std::map<u64, f64> send_count_stats;
526 [](
auto access,
u32 id,
vec vmin,
vec vmax) {
527 return Patch::is_in_patch_converted(access[
id], vmin, vmax);
529 build.cut_volume.lower,
530 build.cut_volume.upper);
532 u32 pcnt = idxs_res.get_size();
539 f64 ratio =
f64(pcnt) /
f64(src.get_obj_cnt());
554 send_count_stats[sender] += ratio;
557 bool has_warn =
false;
559 std::string warn_log =
"";
561 for (
auto &[k, v] : send_count_stats) {
563 warn_log += shambase::format(
"\n patch {} high interf/patch volume: {}", k, v);
569 warn_log =
"\n This can lead to high mpi "
570 "overhead, try to increase the patch split crit"
575 logger::warn_ln(
"InterfaceGen",
"High interface/patch volume ratio." + warn_log);
586 static u32 cnt_dump_debug = 0;
588 std::string loc_graph =
"";
589 interf_info.
for_each([&loc_graph](
u64 send,
u64 recv, InterfaceIdTable &info) {
590 loc_graph += shambase::format(
" p{} -> p{}\n", send, recv);
593 sched.for_each_patch_data(
595 if (pdat.get_obj_cnt() > 0) {
596 loc_graph += shambase::format(
597 " p{} [label= \"id={} N={}\"]\n",
id,
id, pdat.get_obj_cnt());
601 std::string dot_graph =
"";
604 dot_graph =
"strict digraph {\n" + dot_graph +
"}";
607 std::string fname = shambase::format(
"ghost_graph_{}.dot", cnt_dump_debug);
608 logger::info_ln(
"SPH Ghost",
"writing", fname);
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
std::int32_t i32
32 bit integer
A buffer allocated in USM (Unified Shared Memory)
void for_each(std::function< void(u64, u64, T &)> &&f)
Apply a function to all stored objects.
iterator add_obj(u64 left_id, u64 right_id, T &&obj)
Add an object associated with a patch pair.
Class Timer measures the time elapsed since the timer was started.
void start()
Starts the timer.
Vector class based on std::array storage and mdspan.
shambase::DistributedDataShared< InterfaceIdTable > gen_id_table_interfaces(GeneratorMap &&gen)
precompute interfaces members and cache result in the return
GeneratorMap find_interfaces(SerialPatchTree< vec > &sptree, shamrock::patch::PatchtreeField< flt > &int_range_max_tree, shamrock::patch::PatchField< flt > &int_range_max)
Find interfaces and their metadata.
PatchDataLayer container class, the layout is described in patchdata_layout.
Store the information related to the size of the simulation box to convert patch integer coordinates ...
T get_bounding_box_size() const
Get the size of the stored bounding box of the domain.
PatchCoordTransform< T > get_patch_transform() const
Get a PatchCoordTransform object that describes the conversion between patch coordinates and domain c...
This header file contains utility functions related to exception handling in the code.
MPI string gather / allgather helpers (declarations; implementations in shamalgs/src/collective/gathe...
void gather_str(const std::string &send_vec, std::string &recv_vec)
Gathers a string from all nodes and store the result in a std::string.
void write_string_to_file(std::string filename, std::string s)
dump a string to a file
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
namespace for math utility
namespace for the sph model
Boundary conditions configuration.
Patch object that contain generic patch information.
bool is_err_mode() const
check if a patch is in error mode
u64 id_patch
unique key that identify the patch
Functions related to the MPI communicator.