165 sycl::vec<T, 3> shift;
166 sycl::vec<T, 3> shift_speed;
170using ShearPeriodicInfo =
175 i32_3 ioff, ShearPeriodicInfo<T> shear, sycl::vec<T, 3> bsize) {
177 i32 dx = ioff.x() * shear.shear_base.x();
178 i32 dy = ioff.y() * shear.shear_base.y();
179 i32 dz = ioff.z() * shear.shear_base.z();
181 i32 d = dx + dy + dz;
183 sycl::vec<T, 3> shift
184 = {(d * shear.shear_dir.x()) * shear.shear_value + bsize.x() * ioff.x(),
185 (d * shear.shear_dir.y()) * shear.shear_value + bsize.y() * ioff.y(),
186 (d * shear.shear_dir.z()) * shear.shear_value + bsize.z() * ioff.z()};
187 sycl::vec<T, 3> shift_speed
188 = {(d * shear.shear_dir.x()) * shear.shear_speed,
189 (d * shear.shear_dir.y()) * shear.shear_speed,
190 (d * shear.shear_dir.z()) * shear.shear_speed};
192 return {shift, shift_speed};
196inline void for_each_patch_shift(
197 ShearPeriodicInfo<T> shearinfo,
198 sycl::vec<T, 3> bsize,
201 i32_3 loop_offset = {0, 0, 0};
203 std::vector<i32_3> list_possible;
209 i32 repetition_x = 1 + sham::abs(shearinfo.shear_dir.x());
210 i32 repetition_y = 1 + sham::abs(shearinfo.shear_dir.y());
211 i32 repetition_z = 1 + sham::abs(shearinfo.shear_dir.z());
213 T sz = bsize.x() * shearinfo.shear_dir.x() + bsize.y() * shearinfo.shear_dir.y()
214 + bsize.z() * shearinfo.shear_dir.z();
216 for (
i32 xoff = -repetition_x; xoff <= repetition_x; xoff++) {
217 for (
i32 yoff = -repetition_y; yoff <= repetition_y; yoff++) {
218 for (
i32 zoff = -repetition_z; zoff <= repetition_z; zoff++) {
220 i32 dx = xoff * shearinfo.shear_base.x();
221 i32 dy = yoff * shearinfo.shear_base.y();
222 i32 dz = zoff * shearinfo.shear_base.z();
224 i32 d = dx + dy + dz;
226 i32 df = -int(d * shearinfo.shear_value / sz);
229 = {shearinfo.shear_dir.x() * df,
230 shearinfo.shear_dir.y() * df,
231 shearinfo.shear_dir.z() * df};
239 list_possible.resize(list_possible.size() + 1);
240 list_possible[list_possible.size() - 1]
241 = i32_3{xoff + off_d.x(), yoff + off_d.y(), zoff + off_d.z()};
248 for (i32_3 off : list_possible) {
250 auto shift = compute_shift_infos(off, shearinfo, bsize);
268 using namespace shamrock::patch;
271 i32 repetition_x = 1;
272 i32 repetition_y = 1;
273 i32 repetition_z = 1;
280 GeneratorMap interf_map;
283 using BCConfig =
typename CfgClass::Variant;
292 if (BCPeriodic *cfg = std::get_if<BCPeriodic>(&ghost_config)) {
293 sycl::host_accessor acc_tf{
296 for (
i32 xoff = -repetition_x; xoff <= repetition_x; xoff++) {
297 for (
i32 yoff = -repetition_y; yoff <= repetition_y; yoff++) {
298 for (
i32 zoff = -repetition_z; zoff <= repetition_z; zoff++) {
301 vec periodic_offset =
vec{xoff * bsize.x(), yoff * bsize.y(), zoff * bsize.z()};
303 sycl::host_accessor tree{
305 sycl::host_accessor lpid{
308#pragma omp parallel for
309 for (
u32 i = 0; i < sched.patch_list.local.size(); i++) {
312 CoordRange<vec> sender_bsize = patch_coord_transf.to_obj_coord(psender);
314 = sender_bsize.add_offset(periodic_offset);
316 flt sender_volume = sender_bsize.get_volume();
318 flt sender_h_max = int_range_max.get(psender.
id_patch);
320 using PtNode =
typename SerialPatchTree<vec>::PtNode;
322 sptree.host_for_each_leafs_internal(
323 [&](
u64 tree_id, PtNode n) {
324 flt receiv_h_max = acc_tf[tree_id];
326 n.box_min - receiv_h_max, n.box_max + receiv_h_max};
328 return receiv_exp.get_intersect(sender_bsize_off)
331 [&](
u64 id_found, PtNode n) {
332 if ((id_found == psender.
id_patch) && (xoff == 0) && (yoff == 0)
339 int_range_max.get(id_found));
342 receiv_exp.add_offset(-periodic_offset));
352 interf_volume.get_volume() / sender_volume});
361 }
else if (BCShearingPeriodic *cfg = std::get_if<BCShearingPeriodic>(&ghost_config)) {
362 sycl::host_accessor acc_tf{
365 for_each_patch_shift<flt>(*cfg, bsize, [&](i32_3 ioff,
ShiftInfo<flt> shift) {
370 vec offset = shift.shift;
372 sycl::host_accessor tree{
374 sycl::host_accessor lpid{
377#pragma omp parallel for
378 for (
u32 i = 0; i < sched.patch_list.local.size(); i++) {
382 CoordRange<vec> sender_bsize = patch_coord_transf.to_obj_coord(psender);
385 flt sender_volume = sender_bsize.get_volume();
387 flt sender_h_max = int_range_max.get(psender.
id_patch);
389 using PtNode =
typename SerialPatchTree<vec>::PtNode;
391 sptree.host_for_each_leafs_internal(
392 [&](
u64 tree_id, PtNode n) {
393 flt receiv_h_max = acc_tf[tree_id];
395 n.box_min - receiv_h_max, n.box_max + receiv_h_max};
397 return receiv_exp.get_intersect(sender_bsize_off).is_not_empty();
399 [&](
u64 id_found, PtNode n) {
400 if ((id_found == psender.
id_patch) && (xoff == 0) && (yoff == 0)
407 int_range_max.get(id_found));
410 = sender_bsize.get_intersect(receiv_exp.add_offset(-offset));
420 interf_volume.get_volume() / sender_volume});
432 sycl::host_accessor acc_tf{
435 vec periodic_offset =
vec{0, 0, 0};
438 sycl::host_accessor lpid{
441#pragma omp parallel for
442 for (
u32 i = 0; i < sched.patch_list.local.size(); i++) {
445 CoordRange<vec> sender_bsize = patch_coord_transf.to_obj_coord(psender);
446 CoordRange<vec> sender_bsize_off = sender_bsize.add_offset(periodic_offset);
448 flt sender_volume = sender_bsize.get_volume();
450 flt sender_h_max = int_range_max.get(psender.
id_patch);
452 using PtNode =
typename SerialPatchTree<vec>::PtNode;
454 sptree.host_for_each_leafs_internal(
455 [&](
u64 tree_id, PtNode n) {
456 flt receiv_h_max = acc_tf[tree_id];
458 n.box_min - receiv_h_max, n.box_max + receiv_h_max};
460 return receiv_exp.get_intersect(sender_bsize_off).is_not_empty();
462 [&](
u64 id_found, PtNode n) {
469 int_range_max.get(id_found));
472 = sender_bsize.get_intersect(receiv_exp.add_offset(-periodic_offset));
482 interf_volume.get_volume() / sender_volume});
515 using namespace shamrock::patch;
519 std::map<u64, f64> send_count_stats;
526 [](
auto access,
u32 id,
vec vmin,
vec vmax) {
529 build.cut_volume.lower,
530 build.cut_volume.upper);
532 u32 pcnt = idxs_res.get_size();
539 f64 ratio =
f64(pcnt) /
f64(src.get_obj_cnt());
554 send_count_stats[sender] += ratio;
557 bool has_warn =
false;
559 std::string warn_log =
"";
561 for (
auto &[k, v] : send_count_stats) {
563 warn_log += shambase::format(
"\n patch {} high interf/patch volume: {}", k, v);
569 warn_log =
"\n This can lead to high mpi "
570 "overhead, try to increase the patch split crit"
575 logger::warn_ln(
"InterfaceGen",
"High interface/patch volume ratio." + warn_log);
582void BasicSPHGhostHandler<vec>::gen_debug_patch_ghost(
586 static u32 cnt_dump_debug = 0;
588 std::string loc_graph =
"";
589 interf_info.
for_each([&loc_graph](
u64 send,
u64 recv, InterfaceIdTable &info) {
590 loc_graph += shambase::format(
" p{} -> p{}\n", send, recv);
593 sched.for_each_patch_data(
595 if (pdat.get_obj_cnt() > 0) {
596 loc_graph += shambase::format(
597 " p{} [label= \"id={} N={}\"]\n",
id,
id, pdat.get_obj_cnt());
601 std::string dot_graph =
"";
604 dot_graph =
"strict digraph {\n" + dot_graph +
"}";
607 std::string fname = shambase::format(
"ghost_graph_{}.dot", cnt_dump_debug);
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
std::int32_t i32
32 bit integer
A buffer allocated in USM (Unified Shared Memory).
Container for objects shared between two distributed data elements.
void for_each(std::function< void(u64, u64, T &)> &&f)
Apply a function to all stored objects.
iterator add_obj(u64 left_id, u64 right_id, T &&obj)
Add an object associated with a patch pair.
Class Timer measures the time elapsed since the timer was started.
void start()
Starts the timer.
Vector class based on std::array storage and mdspan.
shambase::DistributedDataShared< InterfaceIdTable > gen_id_table_interfaces(GeneratorMap &&gen)
precompute interfaces members and cache result in the return
GeneratorMap find_interfaces(SerialPatchTree< vec > &sptree, shamrock::patch::PatchtreeField< flt > &int_range_max_tree, shamrock::patch::PatchField< flt > &int_range_max)
Find interfaces and their metadata.
PatchDataLayer container class, the layout is described in patchdata_layout.
Store the information related to the size of the simulation box to convert patch integer coordinates ...
T get_bounding_box_size() const
Get the size of the stored bounding box of the domain.
PatchCoordTransform< T > get_patch_transform() const
Get a PatchCoordTransform object that describes the conversion between patch coordinates and domain c...
This header file contains utility functions related to exception handling in the code.
MPI string gather / allgather helpers (declarations; implementations in shamalgs/src/collective/gathe...
void gather_str(const std::string &send_vec, std::string &recv_vec)
Gathers a string from all nodes and store the result in a std::string.
void write_string_to_file(std::string filename, std::string s)
dump a string to a file
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
namespace for math utility
namespace for the sph model
void info_ln(std::string module_name, Types... var2)
Prints a log message with multiple arguments followed by a newline.
void warn_ln(std::string module_name, Types... var2)
Prints a log message with multiple arguments followed by a newline.
shambase::details::BasicStackEntry StackEntry
Alias for shambase::details::BasicStackEntry.
Boundary conditions configuration.
Patch object that contain generic patch information.
bool is_err_mode() const
check if a patch is in error mode
static bool is_in_patch_converted(sycl::vec< T, 3 > val, sycl::vec< T, 3 > min_val, sycl::vec< T, 3 > max_val)
check if particle is in the asked range, given the output of @convert_coord
u64 id_patch
unique key that identify the patch
Functions related to the MPI communicator.