Shamrock/doxygen/SPHSetup_8cpp_source.html

// -------------------------------------------------------//

//

// SHAMROCK code for hydrodynamics

// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>

// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1

// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information

//

// -------------------------------------------------------//


#include "shambase/DistributedData.hpp"

#include "shambase/aliases_int.hpp"

#include "shambase/memory.hpp"

#include "shambase/string.hpp"

#include "shambase/tabulate.hpp"

#include "shamalgs/collective/are_all_rank_true.hpp"

#include "shamalgs/primitives/is_all_true.hpp"

#include "shambackends/DeviceBuffer.hpp"

#include "shambackends/SyclMpiTypes.hpp"

#include "shambackends/kernel_call.hpp"

#include "shamcomm/logs.hpp"

#include "shamcomm/worldInfo.hpp"

#include "shamcomm/wrapper.hpp"

#include "shammodels/sph/modules/ComputeLoadBalanceValue.hpp"

#include "shammodels/sph/modules/ParticleReordering.hpp"

#include "shammodels/sph/modules/SPHSetup.hpp"

#include "shammodels/sph/modules/setup/CombinerAdd.hpp"

#include "shammodels/sph/modules/setup/GeneratorFromOtherContext.hpp"

#include "shammodels/sph/modules/setup/GeneratorLatticeCubic.hpp"

#include "shammodels/sph/modules/setup/GeneratorLatticeHCP.hpp"

#include "shammodels/sph/modules/setup/GeneratorMCDisc.hpp"

#include "shammodels/sph/modules/setup/ModifierApplyCustomWarp.hpp"

#include "shammodels/sph/modules/setup/ModifierApplyDiscWarp.hpp"

#include "shammodels/sph/modules/setup/ModifierFilter.hpp"

#include "shammodels/sph/modules/setup/ModifierOffset.hpp"

#include "shammodels/sph/modules/setup/ModifierSplitPart.hpp"

#include "shamrock/patch/PatchDataLayer.hpp"

#include "shamrock/scheduler/DataInserterUtility.hpp"

#include "shamsys/NodeInstance.hpp"

#include <mpi.h>

#include <vector>


template<class Tvec, template<class> class SPHKernel>

inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::

    SPHSetup<Tvec, SPHKernel>::make_generator_lattice_hcp(

        Tscal dr, std::pair<Tvec, Tvec> box, bool discontinuous) {

    if (discontinuous) {

        return std::shared_ptr<ISPHSetupNode>(

            new GeneratorLatticeHCP<Tvec, true>(context, dr, box));

    } else {

        return std::shared_ptr<ISPHSetupNode>(

            new GeneratorLatticeHCP<Tvec, false>(context, dr, box));

    }

}


template<class Tvec, template<class> class SPHKernel>

inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::

    SPHSetup<Tvec, SPHKernel>::make_generator_lattice_cubic(Tscal dr, std::pair<Tvec, Tvec> box) {

    return std::shared_ptr<ISPHSetupNode>(new GeneratorLatticeCubic<Tvec>(context, dr, box));

}


template<class Tvec, template<class> class SPHKernel>

inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::

    SPHSetup<Tvec, SPHKernel>::make_generator_disc_mc(

        Tscal part_mass,

        Tscal disc_mass,

        Tscal r_in,

        Tscal r_out,

        std::function<Tscal(Tscal)> sigma_profile,

        std::function<Tscal(Tscal)> H_profile,

        std::function<Tscal(Tscal)> rot_profile,

        std::function<Tscal(Tscal)> cs_profile,

        std::mt19937_64 eng,

        Tscal init_h_factor) {

    return std::shared_ptr<ISPHSetupNode>(new GeneratorMCDisc<Tvec, SPHKernel>(

        context,

        solver_config,

        part_mass,

        disc_mass,

        r_in,

        r_out,

        sigma_profile,

        H_profile,

        rot_profile,

        cs_profile,

        eng,

        init_h_factor));

}


template<class Tvec, template<class> class SPHKernel>

inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::

    SPHSetup<Tvec, SPHKernel>::make_generator_from_context(ShamrockCtx &context_other) {

    return std::shared_ptr<ISPHSetupNode>(

        new GeneratorFromOtherContext<Tvec>(context, context_other));

}


template<class Tvec, template<class> class SPHKernel>

inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::

    SPHSetup<Tvec, SPHKernel>::make_combiner_add(SetupNodePtr parent1, SetupNodePtr parent2) {

    return std::shared_ptr<ISPHSetupNode>(new CombinerAdd<Tvec>(context, parent1, parent2));

}


template<class Tvec, template<class> class SPHKernel>

void shammodels::sph::modules::SPHSetup<Tvec, SPHKernel>::apply_setup(

    SetupNodePtr setup, bool part_reordering, std::optional<u32> insert_step) {


    if (!bool(setup)) {

        shambase::throw_with_loc<std::invalid_argument>("The setup shared pointer is empty");

    }


    shambase::Timer time_setup;

    time_setup.start();

    StackEntry stack_loc{};


    PatchScheduler &sched = shambase::get_check_ref(context.sched);


    auto compute_load = [&]() {

        modules::ComputeLoadBalanceValue<Tvec, SPHKernel>(context, solver_config, storage)

            .update_load_balancing();

    };


    auto has_pdat = [&]() {

        bool ret = false;

        using namespace shamrock::patch;

        sched.for_each_local_patchdata([&](const Patch &p, PatchDataLayer &pdat) {

            ret = true;

        });

        return ret;

    };


    shamrock::DataInserterUtility inserter(sched);

    u32 _insert_step = sched.crit_patch_split * 8;

    if (bool(insert_step)) {

        _insert_step = insert_step.value();

    }


    while (!setup->is_done()) {


        shamrock::patch::PatchDataLayer pdat = setup->next_n((has_pdat()) ? _insert_step : 0);


        if (solver_config.track_particles_id) {

            // This bit set the tracking id of the particles

            // But be carefull this assume that the particle injection order

            // is independant from the MPI world size. It should be the case for most setups

            // but some generator could miss this assumption.

            // If that is the case please report the issue


            u64 loc_inj = pdat.get_obj_cnt();


            u64 offset_init = 0;

            shamcomm::mpi::Exscan(

                &loc_inj, &offset_init, 1, get_mpi_type<u64>(), MPI_SUM, MPI_COMM_WORLD);


            // we must add the number of already injected part such that the

            // offset start at the right spot.

            // The only thing that bothers me is that this can not handle the case where multiple

            // setups of things like that are applied. But in principle no sane person would do such

            // a thing...

            offset_init += injected_parts;


            auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();

            auto &q        = shambase::get_check_ref(dev_sched).get_queue();


            if (loc_inj > 0) {

                sham::DeviceBuffer<u64> part_ids(loc_inj, dev_sched);


                sham::kernel_call(

                    q,

                    sham::MultiRef{},

                    sham::MultiRef{part_ids},

                    loc_inj,

                    [offset_init](u32 i, u64 *__restrict part_ids) {

                        part_ids[i] = i + offset_init;

                    });


                pdat.get_field<u64>(pdat.pdl().get_field_idx<u64>("part_id"))

                    .overwrite(part_ids, loc_inj);

            }

        }


        u64 injected

            = inserter.push_patch_data<Tvec>(pdat, "xyz", sched.crit_patch_split * 8, compute_load);


        injected_parts += injected;

    }


    u32 final_balancing_steps = 3;

    for (u32 i = 0; i < final_balancing_steps; i++) {

        ON_RANK_0(

            logger::info_ln(

                "SPH setup", "Final load balancing step", i, "of", final_balancing_steps));

        inserter.balance_load(compute_load);

    }


    if (part_reordering) {

        modules::ParticleReordering<Tvec, u32, SPHKernel>(context, solver_config, storage)

            .reorder_particles();

    }


    time_setup.end();

    if (shamcomm::world_rank() == 0) {

        logger::info_ln("SPH setup", "the setup took :", time_setup.elasped_sec(), "s");

    }

}


struct SetupLog {


    struct State {

        std::vector<u64> count_per_rank;

        std::vector<std::tuple<u32, u32, u64>> msg_list;

    } state;


    u64 step_counter = 0;


    nlohmann::json json_data = nlohmann::json::array();


    void log_state() {

        nlohmann::json step_data;

        step_data["step_counter"]   = step_counter;

        step_data["count_per_rank"] = state.count_per_rank;

        step_data["msg_list"]       = state.msg_list;

        json_data.push_back(step_data);

    }


    void dump_state() {

        std::string fname = "setup_log_step.json";

        if (shamcomm::world_rank() == 0) {

            logger::normal_ln("SPH setup", "dumping setup log to ", fname);

        }


        std::ofstream file(fname);

        file << json_data.dump(4);

        file.close();


        step_counter++;

    }


    void update_count_per_rank(u64 count) {

        std::vector<u64> tmp{count};

        std::vector<u64> recv_count_per_rank;

        shamalgs::collective::vector_allgatherv(tmp, recv_count_per_rank, MPI_COMM_WORLD);

        state.count_per_rank = recv_count_per_rank;

        log_state();

        if (step_counter % 20 == 0)

            dump_state();

    }


    void update_msg_list(std::vector<std::tuple<u32, u32, u64>> &msg_list) {

        state.msg_list = msg_list;

        log_state();

        if (step_counter % 20 == 0)

            dump_state();

    }

};


inline constexpr f64 golden_number = 1.61803398874989484820458683436563;


template<class Tvec, template<class> class SPHKernel>

void shammodels::sph::modules::SPHSetup<Tvec, SPHKernel>::apply_setup_new(

    SetupNodePtr setup,

    bool part_reordering,

    std::optional<u32> gen_count_per_step,

    std::optional<u32> insert_count_per_step,

    std::optional<u64> max_msg_count_per_rank_per_step,

    std::optional<u64> max_data_count_per_rank_per_step,

    std::optional<u64> max_msg_size,

    bool do_setup_log,

    bool speculative_balancing) {


    __shamrock_stack_entry();


    if (!bool(setup)) {

        shambase::throw_with_loc<std::invalid_argument>("The setup shared pointer is empty");

    }


    std::optional<SetupLog> setup_log

        = (do_setup_log) ? std::make_optional<SetupLog>() : std::nullopt;


    shambase::Timer time_setup;

    time_setup.start();

    PatchScheduler &sched = shambase::get_check_ref(context.sched);

    shamrock::DataInserterUtility inserter(sched);


    u32 insert_step = sched.crit_patch_split * 2;

    if (bool(insert_count_per_step)) {

        insert_step = insert_count_per_step.value();

    }


    u32 gen_step = std::max(sched.crit_patch_split / 8, 1_u64);

    if (bool(gen_count_per_step)) {

        gen_step = gen_count_per_step.value();

    }


    u64 msg_limit = 1024;

    if (bool(max_msg_count_per_rank_per_step)) {

        msg_limit = max_msg_count_per_rank_per_step.value();

    }

    u64 data_count_limit = insert_step;

    if (bool(max_data_count_per_rank_per_step)) {

        data_count_limit = max_data_count_per_rank_per_step.value();

    }

    u64 max_message_size = std::max(insert_step / 16, 1_u32);

    if (bool(max_msg_size)) {

        max_message_size = max_msg_size.value();

    }


    shamrock::patch::PatchDataLayer to_insert(sched.get_layout_ptr_old());


    u64 speculative_last_npatch                            = 0;

    shambase::DistributedData<u64> speculative_load_values = {};


    auto compute_load = [&]() {

        if (speculative_balancing) {


            StackEntry stack_loc{};


            auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();


            u64 npatch = scheduler().patch_list.global.size();


            // check if the number of patches has changed, rebuild otherwise

            if (npatch != speculative_last_npatch) {


                shambase::details::NamedBasicStackEntry stack_loc2{"compute_load"};


                if (shamcomm::world_rank() == 0) {

                    logger::normal_ln(

                        "SPH setup",

                        "number of patches has changed, rebuilding speculative load values");

                }


                // reset the load values

                speculative_last_npatch = npatch;

                speculative_load_values.reset();


                // Compute the AABB of all the patches


                std::vector<Tvec> patch_aabb_min(npatch);

                std::vector<Tvec> patch_aabb_max(npatch);


                auto &global_patch_list = scheduler().patch_list.global;

                shamrock::patch::PatchCoordTransform<Tvec> ptransf

                    = sched.get_sim_box().get_patch_transform<Tvec>();


                for (size_t i = 0; i < global_patch_list.size(); i++) {

                    const shamrock::patch::Patch &p = global_patch_list[i];

                    if (!p.is_err_mode()) {

                        shammath::CoordRange<Tvec> patch_coord = ptransf.to_obj_coord(p);

                        patch_aabb_min[i]                      = patch_coord.lower;

                        patch_aabb_max[i]                      = patch_coord.upper;

                    }

                }


                sham::DeviceBuffer<Tvec> buf_patch_aabb_min(npatch, dev_sched);

                sham::DeviceBuffer<Tvec> buf_patch_aabb_max(npatch, dev_sched);


                buf_patch_aabb_min.copy_from_stdvec(patch_aabb_min);

                buf_patch_aabb_max.copy_from_stdvec(patch_aabb_max);


                // count the number of particles in each patch


                sham::DeviceBuffer<u64> local_load_values(npatch, dev_sched);

                local_load_values.fill(0);


                PatchDataField<Tvec> &xyz = to_insert.get_field<Tvec>(0);


                if (xyz.get_obj_cnt() > 0) {

                    sham::kernel_call(

                        shamsys::instance::get_compute_scheduler().get_queue(),

                        sham::MultiRef{xyz.get_buf(), buf_patch_aabb_min, buf_patch_aabb_max},

                        sham::MultiRef{local_load_values},

                        xyz.get_obj_cnt(),

                        [npatch](

                            u32 i,

                            const Tvec *__restrict xyz,

                            const Tvec *__restrict patch_aabb_min,

                            const Tvec *__restrict patch_aabb_max,

                            u64 *__restrict local_load_values) {

                            Tvec pos = xyz[i];

                            for (size_t j = 0; j < npatch; j++) {

                                shammath::CoordRange<Tvec> patch_coord

                                    = {patch_aabb_min[j], patch_aabb_max[j]};

                                if (patch_coord.contain_pos(pos)) {

                                    sycl::atomic_ref<

                                        u64,

                                        sycl::memory_order::relaxed,

                                        sycl::memory_scope::device>

                                        atomic_local_load_values(local_load_values[j]);

                                    atomic_local_load_values++;

                                }

                            }

                        });

                }


                // recover data


                auto local_load_values_host = local_load_values.copy_to_stdvec();


                std::vector<u64> reduced_load_values(npatch);


                // reduce the load values


                shamcomm::mpi::Allreduce(

                    local_load_values_host.data(),

                    reduced_load_values.data(),

                    npatch,

                    get_mpi_type<u64>(),

                    MPI_SUM,

                    MPI_COMM_WORLD);


                // convert to DistributedData


                for (size_t i = 0; i < npatch; i++) {

                    speculative_load_values.add_obj(

                        global_patch_list[i].id_patch, u64(reduced_load_values[i]));

                }


                // Add the already injected parts to the load values


                auto &patch_list = scheduler().patch_list;


                for (u64 id : scheduler().owned_patch_id) {

                    shamrock::patch::Patch &p

                        = patch_list.local[patch_list.id_patch_to_local_idx[id]];

                    speculative_load_values.get(id)

                        += scheduler().patch_data.owned_data.get(id).get_obj_cnt();

                }

            }


            // update load values


            scheduler().update_local_load_value([&](shamrock::patch::Patch p) {

                return speculative_load_values.get(p.id_patch);

            });


        } else {

            modules::ComputeLoadBalanceValue<Tvec, SPHKernel>(context, solver_config, storage)

                .update_load_balancing();

        }

    };


    auto has_pdat = [&]() {

        bool ret = false;

        using namespace shamrock::patch;

        sched.for_each_local_patchdata([&](const Patch &p, PatchDataLayer &pdat) {

            ret = true;

        });

        return ret;

    };


    shambase::Timer time_part_gen;

    time_part_gen.start();


    if (shamcomm::world_rank() == 0) {

        logger::normal_ln("SPH setup", "generating particles ...");

    }


    while (!setup->is_done()) {

        shambase::Timer timer_gen;

        timer_gen.start();


        shamrock::patch::PatchDataLayer tmp = setup->next_n(gen_step);


        if (solver_config.track_particles_id) {

            // This bit set the tracking id of the particles

            // But be carefull this assume that the particle injection order

            // is independant from the MPI world size. It should be the case for most setups

            // but some generator could miss this assumption.

            // If that is the case please report the issue


            u64 loc_inj = tmp.get_obj_cnt();


            u64 offset_init = 0;

            shamcomm::mpi::Exscan(

                &loc_inj, &offset_init, 1, get_mpi_type<u64>(), MPI_SUM, MPI_COMM_WORLD);


            // we must add the number of already injected part such that the

            // offset start at the right spot.

            // The only thing that bothers me is that this can not handle the case where multiple

            // setups of things like that are applied. But in principle no sane person would do such

            // a thing...

            offset_init += injected_parts;


            auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();

            auto &q        = shambase::get_check_ref(dev_sched).get_queue();


            if (loc_inj > 0) {

                sham::DeviceBuffer<u64> part_ids(loc_inj, dev_sched);


                sham::kernel_call(

                    q,

                    sham::MultiRef{},

                    sham::MultiRef{part_ids},

                    loc_inj,

                    [offset_init](u32 i, u64 *__restrict part_ids) {

                        part_ids[i] = i + offset_init;

                    });


                tmp.get_field<u64>(tmp.pdl().get_field_idx<u64>("part_id"))

                    .overwrite(part_ids, loc_inj);

            }

        }


        to_insert.insert_elements(tmp);


        u64 sum_push = shamalgs::collective::allreduce_sum<u64>(tmp.get_obj_cnt());

        u64 sum_all  = shamalgs::collective::allreduce_sum<u64>(to_insert.get_obj_cnt());


        u64 min_rank = shamalgs::collective::allreduce_min<u64>(to_insert.get_obj_cnt());

        u64 max_rank = shamalgs::collective::allreduce_max<u64>(to_insert.get_obj_cnt());


        timer_gen.end();


        if (shamcomm::world_rank() == 0) {

            f64 part_per_sec = f64(sum_push) / f64(timer_gen.elasped_sec());

            logger::normal_ln(

                "SPH setup",

                shambase::format(

                    "Nstep = {} ( {:.1e} ) Ntotal = {} ( {:.1e} rank min = {:.1e} max = {:.1e}) "

                    "rate = {:e} N.s^-1",

                    sum_push,

                    f64(sum_push),

                    sum_all,

                    f64(sum_all),

                    part_per_sec,

                    f64(min_rank),

                    f64(max_rank)));

        }


        if (setup_log) {

            setup_log.value().update_count_per_rank(to_insert.get_obj_cnt());

        }


        injected_parts += sum_push;

    }


    time_part_gen.end();

    if (shamcomm::world_rank() == 0) {

        logger::normal_ln(

            "SPH setup", "the generation step took :", time_part_gen.elasped_sec(), "s");

    }


    if (shamcomm::world_rank() == 0) {

        logger::normal_ln(

            "SPH setup", "final particle count =", injected_parts, "beginning injection ...");

    }


    sham::MemPerfInfos mem_perf_infos_start = sham::details::get_mem_perf_info();

    f64 mpi_timer_start                     = shamcomm::mpi::get_timer("total");


    // injection part (holy shit this is hard)


    shambase::Timer time_part_inject;

    time_part_inject.start();


    auto log_inject_status = [&](std::string log_suffix = "") {

        u64 sum_all = shamalgs::collective::allreduce_sum<u64>(to_insert.get_obj_cnt());


        u32 rank_without_patch

            = shamalgs::collective::allreduce_sum<u32>(sched.patch_list.local.size() == 0 ? 1 : 0);


        if (shamcomm::world_rank() == 0) {

            logger::normal_ln(

                "SPH setup",

                shambase::format(

                    "injected {:12} / {:} => {:5.1f}% | ranks with patchs = {:d} / {:d} {}",

                    injected_parts - sum_all,

                    injected_parts,

                    f64(injected_parts - sum_all) / f64(injected_parts) * 100.0,

                    shamcomm::world_size() - rank_without_patch,

                    shamcomm::world_size(),

                    log_suffix));

        }


        if (setup_log) {

            setup_log.value().update_count_per_rank(to_insert.get_obj_cnt());

        }

    };


    auto inject_in_local_domains =

        [&sched, &inserter, &compute_load, &insert_step, &log_inject_status](

            shamrock::patch::PatchDataLayer &to_insert) {

            __shamrock_stack_entry();


            bool has_been_limited = true;


            auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();

            sham::DeviceBuffer<u32> mask_get_ids_where(0, dev_sched);


            while (has_been_limited) {

                has_been_limited = false;

                using namespace shamrock::patch;


                // inject in local domains first

                PatchCoordTransform<Tvec> ptransf = sched.get_sim_box().get_patch_transform<Tvec>();

                sched.for_each_local_patchdata([&](const Patch &p, PatchDataLayer &pdat) {

                    shammath::CoordRange<Tvec> patch_coord = ptransf.to_obj_coord(p);


                    PatchDataField<Tvec> &xyz = to_insert.get_field<Tvec>(0);


                    auto ids = xyz.get_ids_where_recycle_buffer(

                        mask_get_ids_where,

                        [](auto access, u32 id, shammath::CoordRange<Tvec> patch_coord) {

                            Tvec tmp = access[id];

                            return patch_coord.contain_pos(tmp);

                        },

                        patch_coord);


                    if (ids.get_size() > insert_step) {

                        ids.resize(insert_step);

                        has_been_limited = true;

                    }


                    if (ids.get_size() > 0) {

                        to_insert.extract_elements(ids, pdat);

                    }

                });


                sched.check_patchdata_locality_correctness();


                inserter.balance_load(compute_load);


                has_been_limited

                    = !shamalgs::collective::are_all_rank_true(!has_been_limited, MPI_COMM_WORLD);


                if (has_been_limited) {

                    // since we will restart this one let's print

                    log_inject_status(" -> local loop <-");

                }

            }

        };


    auto get_index_per_ranks = [&](f64 &timer_result) {

        __shamrock_stack_entry();


        shambase::Timer time_get_index_per_ranks;

        time_get_index_per_ranks.start();


        SerialPatchTree<Tvec> sptree = SerialPatchTree<Tvec>::build(sched);

        sptree.attach_buf();


        // find where each particle should be inserted

        PatchDataField<Tvec> &pos_field = to_insert.get_field<Tvec>(0);


        if (pos_field.get_nvar() != 1) {

            shambase::throw_unimplemented();

        }


        sycl::buffer<u64> new_id_buf = sptree.compute_patch_owner(

            shamsys::instance::get_compute_scheduler_ptr(),

            pos_field.get_buf(),

            pos_field.get_obj_cnt());


        std::unordered_map<i32, std::vector<u32>> index_per_ranks;

        bool err_id_in_newid = false;

        {

            sycl::host_accessor nid{new_id_buf, sycl::read_only};

            for (u32 i = 0; i < pos_field.get_obj_cnt(); i++) {

                u64 patch_id    = nid[i];

                bool err        = patch_id == u64_max;

                err_id_in_newid = err_id_in_newid || (err);


                i32 rank = sched.get_patch_rank_owner(patch_id);

                index_per_ranks[rank].push_back(i);

            }

        }


        if (err_id_in_newid) {

            throw shambase::make_except_with_loc<std::runtime_error>(

                "a new id could not be computed");

        }


        time_get_index_per_ranks.end();

        timer_result = time_get_index_per_ranks.elasped_sec();


        return index_per_ranks;

    };


    f64 total_time_rank_getter = 0;

    f64 max_time_rank_getter   = 0;


    shamalgs::collective::DDSCommCache comm_cache;

    u32 step_count = 0;

    while (!shamalgs::collective::are_all_rank_true(to_insert.is_empty(), MPI_COMM_WORLD)) {


        // assume that the sched is synchronized and that there is at least a patch.

        // TODO actually check that


        using namespace shamrock::patch;


        auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();


        inject_in_local_domains(to_insert);


        f64 timer_get_index_per_ranks = 0;

        std::unordered_map<i32, std::vector<u32>> index_per_ranks

            = get_index_per_ranks(timer_get_index_per_ranks);

        total_time_rank_getter += timer_get_index_per_ranks;

        max_time_rank_getter = std::max(max_time_rank_getter, timer_get_index_per_ranks);


        // allgather the list of messages

        // format:(u32_2(sender_rank, receiver_rank), u64(indices_size))

        std::vector<u64> send_msg;

        for (auto &[rank, indices] : index_per_ranks) {

            send_msg.push_back(sham::pack32(shamcomm::world_rank(), rank));

            send_msg.push_back(indices.size());

        }


        u64 max_send      = (1 << 24) / shamcomm::world_size();

        bool sync_limited = false;

        if (send_msg.size() > max_send) {


            // here we must pack the send_msg infos in structs in order to keep

            // them together during shuffle


            struct tmp {

                u64 ranks, size;

            };


            // build the vector of structs

            std::vector<tmp> tmp_vec;

            tmp_vec.reserve(send_msg.size() / 2);

            for (u64 i = 0; i < send_msg.size(); i += 2) {

                tmp_vec.push_back({send_msg[i], send_msg[i + 1]});

            }


            // shuffle the messages infos

            u64 local_seed = u64(golden_number * 1000 * step_count + shamcomm::world_rank());

            std::mt19937_64 eng_local_msg(local_seed);

            std::shuffle(tmp_vec.begin(), tmp_vec.end(), eng_local_msg);


            // build the new send_msg

            std::vector<u64> send_msg_new;

            send_msg_new.reserve(max_send);

            for (auto &t : tmp_vec) {

                if (send_msg_new.size() >= max_send) {

                    break;

                }

                send_msg_new.push_back(t.ranks);

                send_msg_new.push_back(t.size);

            }


            send_msg     = send_msg_new;

            sync_limited = true;

        }


        std::vector<u64> recv_msg;

        shamalgs::collective::vector_allgatherv(send_msg, recv_msg, MPI_COMM_WORLD);


        std::vector<std::tuple<u32, u32, u64>> msg_list;

        for (u64 i = 0; i < recv_msg.size(); i += 2) {

            u32_2 sender_receiver = sham::unpack32(recv_msg[i]);

            u64 indices_size      = recv_msg[i + 1];


            u32 sender_rank   = sender_receiver.x();

            u32 receiver_rank = sender_receiver.y();


            if (sender_rank == receiver_rank) {

                continue; // only mean that it was not fully inserted in the patch

            }


            msg_list.push_back(std::make_tuple(sender_rank, receiver_rank, indices_size));

        }


        if (setup_log) {

            setup_log.value().update_msg_list(msg_list);

        }


        // shuffle msg_list according to seed golden_number*1000*step_count

        std::mt19937 eng_global_msg(u64(golden_number * 1000 * step_count));

        std::shuffle(msg_list.begin(), msg_list.end(), eng_global_msg);


        // now that we are in sync we can determine who should send to who


        std::vector<u64> msg_count_rank(shamcomm::world_size());

        std::vector<u64> comm_size_rank(shamcomm::world_size());


        std::vector<std::tuple<u32, u32, u64>> rank_msg_list;


        bool was_count_limited    = false;

        bool was_size_limited     = false;

        bool was_msg_size_limited = false;


        for (auto &[sender_rank, receiver_rank, indices_size] : msg_list) {


            bool msg_count_limit_not_reached = msg_count_rank.at(receiver_rank) < msg_limit

                                               && msg_count_rank.at(sender_rank) < msg_limit;


            bool recv_size_limit_not_reached = comm_size_rank.at(receiver_rank) < data_count_limit

                                               && comm_size_rank.at(sender_rank) < data_count_limit;


            was_count_limited = was_count_limited || !msg_count_limit_not_reached;

            was_size_limited  = was_size_limited || !recv_size_limit_not_reached;


            bool can_send_recv = msg_count_limit_not_reached && recv_size_limit_not_reached;


            u64 msg_size         = std::min(indices_size, max_message_size);

            msg_size             = std::min(msg_size, data_count_limit);

            was_msg_size_limited = was_msg_size_limited || (msg_size < indices_size);


            if (can_send_recv) {

                if (sender_rank == shamcomm::world_rank()

                    || receiver_rank == shamcomm::world_rank()) {

                    if (msg_size > 0) {

                        rank_msg_list.push_back(

                            std::make_tuple(sender_rank, receiver_rank, msg_size));

                    }

                }

            }


            msg_count_rank.at(receiver_rank) += 1;

            msg_count_rank.at(sender_rank) += 1;

            comm_size_rank.at(receiver_rank) += msg_size;

            comm_size_rank.at(sender_rank) += msg_size;

        }


        // logger::raw_ln(

        //     shamcomm::world_rank(),

        //     was_count_limited,

        //     was_size_limited,

        //     msg_count_rank,

        //     comm_size_rank);


        // logger::info_ln(

        //     "SPH setup", "rank", shamcomm::world_rank(), "rank_msg_list", rank_msg_list);


        // extract the data

        shambase::DistributedDataShared<PatchDataLayer> send_data;

        sham::DeviceBuffer idx_to_rem = sham::DeviceBuffer<u32>(0, dev_sched);

        for (auto &[sender_rank, receiver_rank, indices_size] : rank_msg_list) {

            if (sender_rank == shamcomm::world_rank()) {

                std::vector<u32> &idx_to_extract = index_per_ranks[receiver_rank];

                sham::DeviceBuffer _tmp = sham::DeviceBuffer<u32>(idx_to_extract.size(), dev_sched);

                _tmp.copy_from_stdvec(idx_to_extract);


                if (_tmp.get_size() > indices_size) {

                    _tmp.resize(indices_size);

                }


                PatchDataLayer _tmp_pdat = PatchDataLayer(sched.get_layout_ptr_old());

                to_insert.append_subset_to(_tmp, _tmp.get_size(), _tmp_pdat);


                idx_to_rem.append(_tmp);


                send_data.add_obj(sender_rank, receiver_rank, std::move(_tmp_pdat));

            }

        }


        to_insert.remove_ids(idx_to_rem, idx_to_rem.get_size());


        // comm the data to the right ranks

        shambase::DistributedDataShared<PatchDataLayer> recv_dat;


        shamalgs::collective::serialize_sparse_comm<PatchDataLayer>(

            dev_sched,

            std::move(send_data),

            recv_dat,

            [&](u64 id) {

                return id; // here the ids in the DDshared are the MPI ranks

            },

            [&](PatchDataLayer &pdat) {

                shamalgs::SerializeHelper ser(dev_sched);

                ser.allocate(pdat.serialize_buf_byte_size());

                pdat.serialize_buf(ser);

                return ser.finalize();

            },

            [&](sham::DeviceBuffer<u8> &&buf) {

                // exchange the buffer held by the distrib data and give it to the

                // serializer

                shamalgs::SerializeHelper ser(dev_sched, std::forward<sham::DeviceBuffer<u8>>(buf));

                return PatchDataLayer::deserialize_buf(ser, sched.get_layout_ptr_old());

            },

            comm_cache);


        // insert the data into the data to be inserted

        recv_dat.for_each([&](u64 sender, u64 receiver, PatchDataLayer &pdat) {

            to_insert.insert_elements(pdat);

        });


        was_count_limited

            = !shamalgs::collective::are_all_rank_true(!was_count_limited, MPI_COMM_WORLD);

        was_size_limited

            = !shamalgs::collective::are_all_rank_true(!was_size_limited, MPI_COMM_WORLD);

        was_msg_size_limited

            = !shamalgs::collective::are_all_rank_true(!was_msg_size_limited, MPI_COMM_WORLD);

        bool was_sync_limited

            = !shamalgs::collective::are_all_rank_true(!sync_limited, MPI_COMM_WORLD);


        std::string log_suffix = "";

        if (was_count_limited) {

            log_suffix += " (msg count limited)";

        }

        if (was_size_limited) {

            log_suffix += " (total msg size limited)";

        }

        if (was_msg_size_limited) {

            log_suffix += " (msg size limited)";

        }

        if (was_sync_limited) {

            log_suffix += " (sync limited)";

        }

        log_suffix += shambase::format(" (msg count : {})", recv_msg.size());

        log_inject_status(" <- global loop ->" + log_suffix);


        f64 worst_time_get_index_per_ranks

            = shamalgs::collective::allreduce_max<f64>(timer_get_index_per_ranks);


        step_count++;

    }


    if (setup_log) {

        setup_log.value().dump_state();

    }


    shamcomm::mpi::Barrier(MPI_COMM_WORLD);

    time_part_inject.end();

    if (shamcomm::world_rank() == 0) {

        logger::normal_ln(

            "SPH setup", "the injection step took :", time_part_inject.elasped_sec(), "s");

    }


    sham::MemPerfInfos mem_perf_infos_end = sham::details::get_mem_perf_info();


    f64 delta_mpi_timer = shamcomm::mpi::get_timer("total") - mpi_timer_start;

    f64 t_dev_alloc

        = (mem_perf_infos_end.time_alloc_device - mem_perf_infos_start.time_alloc_device)

          + (mem_perf_infos_end.time_free_device - mem_perf_infos_start.time_free_device);

    f64 t_host_alloc = (mem_perf_infos_end.time_alloc_host - mem_perf_infos_start.time_alloc_host)

                       + (mem_perf_infos_end.time_free_host - mem_perf_infos_start.time_free_host);


    { // perf infos

        std::vector<f64> time_rank_getter_all_ranks

            = shamalgs::collective::gather(total_time_rank_getter);

        std::vector<f64> max_time_rank_getter_all_ranks

            = shamalgs::collective::gather(max_time_rank_getter);

        std::vector<f64> mpi_timer_all_ranks = shamalgs::collective::gather(delta_mpi_timer);

        std::vector<f64> alloc_time_device_all_ranks = shamalgs::collective::gather(t_dev_alloc);

        std::vector<f64> alloc_time_host_all_ranks   = shamalgs::collective::gather(t_host_alloc);

        std::vector<size_t> max_mem_device_all_ranks

            = shamalgs::collective::gather(mem_perf_infos_end.max_allocated_byte_device);

        std::vector<size_t> max_mem_host_all_ranks

            = shamalgs::collective::gather(mem_perf_infos_end.max_allocated_byte_host);


        if (shamcomm::world_rank() == 0) {

            f64 time_part_inject_sec = time_part_inject.elasped_sec();

            f64 sum_t                = time_part_inject_sec * shamcomm::world_size();


            f64 sum_time_rank_getter = std::accumulate(

                time_rank_getter_all_ranks.begin(), time_rank_getter_all_ranks.end(), 0.0);

            f64 max_time_rank_getter = *std::max_element(

                max_time_rank_getter_all_ranks.begin(), max_time_rank_getter_all_ranks.end());

            f64 sum_mpi

                = std::accumulate(mpi_timer_all_ranks.begin(), mpi_timer_all_ranks.end(), 0.0);

            f64 sum_alloc_device = std::accumulate(

                alloc_time_device_all_ranks.begin(), alloc_time_device_all_ranks.end(), 0.0);

            f64 sum_alloc_host = std::accumulate(

                alloc_time_host_all_ranks.begin(), alloc_time_host_all_ranks.end(), 0.0);

            size_t sum_mem_device_total = std::accumulate(

                max_mem_device_all_ranks.begin(), max_mem_device_all_ranks.end(), 0_u64);

            size_t sum_mem_host_total = std::accumulate(

                max_mem_host_all_ranks.begin(), max_mem_host_all_ranks.end(), 0_u64);


            static constexpr u32 cols_count = 6;


            using Table = shambase::table;


            Table table(6);


            table.add_double_rule();

            table.add_data(

                {"rank", "rank get (sum/max)", "MPI", "alloc d% h%", "mem (max) d", "mem (max) h"},

                Table::center);

            table.add_double_rule();

            for (u32 i = 0; i < shamcomm::world_size(); i++) {

                table.add_data(

                    {shambase::format("{:<4}", i),

                     shambase::format(

                         "{:.2f}s / {:.2f}s",

                         time_rank_getter_all_ranks[i],

                         max_time_rank_getter_all_ranks[i]),

                     shambase::format("{:.2f}s", mpi_timer_all_ranks[i]),

                     shambase::format(

                         "{:>.1f}% {:<.1f}%",

                         100 * (alloc_time_device_all_ranks[i] / time_part_inject_sec),

                         100 * (alloc_time_host_all_ranks[i] / time_part_inject_sec)),

                     shambase::format("{}", shambase::readable_sizeof(max_mem_device_all_ranks[i])),

                     shambase::format("{}", shambase::readable_sizeof(max_mem_host_all_ranks[i]))},

                    Table::right);

            }

            if (shamcomm::world_size() > 1) {

                table.add_rulled_data({"", "<avg> / <max>", "<avg>", "<avg>", "<sum>", "<sum>"});

                table.add_data(

                    {"all",

                     shambase::format(

                         "{:.2f}s / {:.2f}s",

                         sum_time_rank_getter / shamcomm::world_size(),

                         max_time_rank_getter),

                     shambase::format("{:.2f}s", sum_mpi / shamcomm::world_size()),

                     shambase::format(

                         "{:>.1f}% {:<.1f}%",

                         100 * (sum_alloc_device / sum_t),

                         100 * (sum_alloc_host / sum_t)),

                     shambase::format("{}", shambase::readable_sizeof(sum_mem_device_total)),

                     shambase::format("{}", shambase::readable_sizeof(sum_mem_host_total))},

                    Table::right);

            }

            table.add_rule();

            logger::info_ln("SPH setup", "injection perf report:" + table.render());

        }

    }


    if (part_reordering) {

        modules::ParticleReordering<Tvec, u32, SPHKernel>(context, solver_config, storage)

            .reorder_particles();

    }


    time_setup.end();

    if (shamcomm::world_rank() == 0) {

        logger::normal_ln("SPH setup", "the setup took :", time_setup.elasped_sec(), "s");

    }

}


template<class Tvec, template<class> class SPHKernel>

inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::

    SPHSetup<Tvec, SPHKernel>::make_modifier_warp_disc(

        SetupNodePtr parent, Tscal Rwarp, Tscal Hwarp, Tscal inclination, Tscal posangle) {

    return std::shared_ptr<ISPHSetupNode>(new ModifierApplyDiscWarp<Tvec, SPHKernel>(

        context, solver_config, parent, Rwarp, Hwarp, inclination, posangle));

}


template<class Tvec, template<class> class SPHKernel>

inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::

    SPHSetup<Tvec, SPHKernel>::make_modifier_custom_warp(

        SetupNodePtr parent,

        std::function<Tscal(Tscal)> inc_profile,

        std::function<Tscal(Tscal)> psi_profile,

        std::function<Tvec(Tscal)> k_profile) {

    return std::shared_ptr<ISPHSetupNode>(new ModifierApplyCustomWarp<Tvec, SPHKernel>(

        context, solver_config, parent, inc_profile, psi_profile, k_profile));

}


template<class Tvec, template<class> class SPHKernel>

inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::

    SPHSetup<Tvec, SPHKernel>::make_modifier_add_offset(

        SetupNodePtr parent, Tvec offset_postion, Tvec offset_velocity) {


    return std::shared_ptr<ISPHSetupNode>(

        new ModifierOffset<Tvec>(context, parent, offset_postion, offset_velocity));

}


template<class Tvec, template<class> class SPHKernel>

inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::SPHSetup<

    Tvec,

    SPHKernel>::make_modifier_filter(SetupNodePtr parent, std::function<bool(Tvec)> filter) {


    return std::shared_ptr<ISPHSetupNode>(

        new ModifierFilter<Tvec, SPHKernel>(context, parent, filter));

}


template<class Tvec, template<class> class SPHKernel>

inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::

    SPHSetup<Tvec, SPHKernel>::make_modifier_split_part(

        SetupNodePtr parent, u64 n_split, u64 seed, Tscal h_scaling) {

    return std::shared_ptr<ISPHSetupNode>(

        new ModifierSplitPart<Tvec>(context, parent, n_split, seed, h_scaling));

}


using namespace shammath;

template class shammodels::sph::modules::SPHSetup<f64_3, M4>;

template class shammodels::sph::modules::SPHSetup<f64_3, M6>;

template class shammodels::sph::modules::SPHSetup<f64_3, M8>;


template class shammodels::sph::modules::SPHSetup<f64_3, C2>;

template class shammodels::sph::modules::SPHSetup<f64_3, C4>;

template class shammodels::sph::modules::SPHSetup<f64_3, C6>;

CombinerAdd.hpp

ComputeLoadBalanceValue.hpp

DataInserterUtility.hpp

DeviceBuffer.hpp

DistributedData.hpp

shammodels::gsph::names::common::xyz
constexpr const char * xyz
Position field (3D coordinates)
Definition FieldNames.hpp:33

GeneratorFromOtherContext.hpp

GeneratorLatticeCubic.hpp

GeneratorLatticeHCP.hpp

GeneratorMCDisc.hpp

ModifierApplyCustomWarp.hpp

ModifierApplyDiscWarp.hpp

ModifierFilter.hpp

ModifierOffset.hpp

ModifierSplitPart.hpp

NodeInstance.hpp
Header file describing a Node Instance.

ParticleReordering.hpp

PatchDataLayer.hpp

SPHSetup.hpp

SyclMpiTypes.hpp

f64
double f64
Alias for double.
Definition aliases_float.hpp:20

aliases_int.hpp

u32
std::uint32_t u32
32 bit unsigned integer
Definition aliases_int.hpp:27

u64
std::uint64_t u64
64 bit unsigned integer
Definition aliases_int.hpp:26

i32
std::int32_t i32
32 bit integer
Definition aliases_int.hpp:23

are_all_rank_true.hpp
Collective boolean reduction to check if all ranks have true as input.

shamalgs::collective::are_all_rank_true
bool are_all_rank_true(bool input, MPI_Comm comm)
return true only if all ranks have true as input
Definition are_all_rank_true.cpp:24

PatchDataField
Definition PatchDataField.hpp:43

PatchScheduler
The MPI scheduler.
Definition PatchScheduler.hpp:85

PatchScheduler::crit_patch_split
u64 crit_patch_split
splitting limit (if load value > crit_patch_split => patch split)
Definition PatchScheduler.hpp:98

PatchScheduler::patch_list
SchedulerPatchList patch_list
handle the list of the patches of the scheduler
Definition PatchScheduler.hpp:101

SchedulerPatchList::local
std::vector< shamrock::patch::Patch > local
contain the list of patch owned by the current node
Definition scheduler_patch_list.hpp:54

SerialPatchTree
Definition SerialPatchTree.hpp:35

ShamrockCtx
Definition ShamrockCtx.hpp:42

sham::DeviceBuffer
A buffer allocated in USM (Unified Shared Memory)
Definition DeviceBuffer.hpp:106

sham::DeviceBuffer::copy_from_stdvec
void copy_from_stdvec(const std::vector< T > &vec)
Copy the content of a std::vector into the buffer.
Definition DeviceBuffer.hpp:666

sham::DeviceBuffer::resize
void resize(size_t new_size, bool keep_data=true)
Resizes the buffer to a given size.
Definition DeviceBuffer.hpp:1103

sham::DeviceBuffer::append
void append(const DeviceBuffer &other)
Append the content of another buffer to this one.
Definition DeviceBuffer.hpp:1223

sham::DeviceBuffer::get_size
size_t get_size() const
Gets the number of elements in the buffer.
Definition DeviceBuffer.hpp:476

shamalgs::SerializeHelper
Definition serialize.hpp:130

shambase::DistributedDataShared
Container for objects shared between two distributed data elements.
Definition DistributedDataShared.hpp:52

shambase::DistributedDataShared::for_each
void for_each(std::function< void(u64, u64, T &)> &&f)
Apply a function to all stored objects.
Definition DistributedDataShared.hpp:110

shambase::DistributedDataShared::add_obj
iterator add_obj(u64 left_id, u64 right_id, T &&obj)
Add an object associated with a patch pair.
Definition DistributedDataShared.hpp:95

shambase::DistributedData
Represents a collection of objects distributed across patches identified by a u64 id.
Definition DistributedData.hpp:43

shambase::DistributedData::add_obj
iterator add_obj(u64 id, T &&obj)
Adds a new object to the collection.
Definition DistributedData.hpp:67

shambase::DistributedData::get
T & get(u64 id)
Returns a reference to an object in the collection.
Definition DistributedData.hpp:145

shambase::DistributedData::reset
void reset()
Reset the collection to its initial state.
Definition DistributedData.hpp:278

shambase::Timer
Class Timer measures the time elapsed since the timer was started.
Definition time.hpp:96

shambase::Timer::end
void end()
Stops the timer and stores the elapsed time in nanoseconds.
Definition time.hpp:111

shambase::Timer::elasped_sec
f64 elasped_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
Definition time.hpp:123

shambase::Timer::start
void start()
Starts the timer.
Definition time.hpp:106

shammodels::sph::modules::SPHSetup
Definition SPHSetup.hpp:31

shamrock::DataInserterUtility
Class to insert data in the PatchScheduler.
Definition DataInserterUtility.hpp:37

shamrock::patch::PatchCoordTransform
Definition PatchCoordTransform.hpp:29

shamrock::patch::PatchDataLayerLayout::get_field_idx
u32 get_field_idx(const std::string &field_name) const
Get the field id if matching name & type.
Definition PatchDataLayerLayout.hpp:349

shamrock::patch::PatchDataLayer
PatchDataLayer container class, the layout is described in patchdata_layout.
Definition PatchDataLayer.hpp:35

shamrock::patch::SimulationBoxInfo::get_patch_transform
PatchCoordTransform< T > get_patch_transform() const
Get a PatchCoordTransform object that describes the conversion between patch coordinates and domain c...
Definition SimBox.hpp:285

shamalgs::collective::vector_allgatherv
std::vector< int > vector_allgatherv(const std::vector< T > &send_vec, const MPI_Datatype &send_type, std::vector< T > &recv_vec, const MPI_Datatype &recv_type, const MPI_Comm comm)
allgatherv on vector with size query (size querying variant of vector_allgatherv_ks) //TODO add fault...
Definition exchanges.hpp:98

sham::details::get_mem_perf_info
MemPerfInfos get_mem_perf_info()
Retrieve the memory performance information.
Definition internal_alloc.cpp:123

is_all_true.hpp
Boolean reduction algorithm for checking if all elements are non-zero.

kernel_call.hpp

sham::kernel_call
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
Definition kernel_call.hpp:514

shambase::readable_sizeof
std::string readable_sizeof(double size)
given a sizeof value return a readble string Example : readable_sizeof(1024*1024*1024) -> "1....
Definition string.hpp:139

shambase::throw_with_loc
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
Definition exception.hpp:132

shambase::get_check_ref
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110

shambase::throw_unimplemented
void throw_unimplemented(SourceLocation loc=SourceLocation{})
Throw a std::runtime_error saying that the function is unimplemented.
Definition exception.hpp:153

shamcomm::world_rank
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
Definition worldInfo.cpp:40

shamcomm::world_size
i32 world_size()
Gives the size of the MPI communicator.
Definition worldInfo.cpp:38

shammath
namespace for math utility
Definition AABB.hpp:26

u64_max
constexpr u64 u64_max
u64 max value
Definition numeric_limits.hpp:75

memory.hpp

logs.hpp

shamcomm::logs::err
void err(std::string module_name, Types... var2)
Prints a log message with multiple arguments.
Definition logs.hpp:133

__shamrock_stack_entry
#define __shamrock_stack_entry()
Macro to create a stack entry.
Definition stacktrace.hpp:257

string.hpp

SetupLog::State
Definition SPHSetup.cpp:214

SetupLog
Definition SPHSetup.cpp:213

sham::MemPerfInfos
Structure to store the performance informations about memory allocation and deallocation.
Definition MemPerfInfos.hpp:30

sham::MemPerfInfos::time_alloc_host
f64 time_alloc_host
Time spent allocating memory on the host.
Definition MemPerfInfos.hpp:32

sham::MemPerfInfos::max_allocated_byte_host
size_t max_allocated_byte_host
max bytes allocated on the host
Definition MemPerfInfos.hpp:53

sham::MemPerfInfos::time_free_device
f64 time_free_device
Time spent deallocating memory on the device.
Definition MemPerfInfos.hpp:41

sham::MemPerfInfos::max_allocated_byte_device
size_t max_allocated_byte_device
max bytes allocated on the device
Definition MemPerfInfos.hpp:55

sham::MemPerfInfos::time_alloc_device
f64 time_alloc_device
Time spent allocating memory on the device.
Definition MemPerfInfos.hpp:34

sham::MemPerfInfos::time_free_host
f64 time_free_host
Time spent deallocating memory on the host.
Definition MemPerfInfos.hpp:39

sham::MultiRef
A class that references multiple buffers or similar objects.
Definition kernel_call.hpp:234

shamalgs::collective::DDSCommCache
Definition distributedDataComm.hpp:97

shambase::details::BasicStackEntry
Definition stacktrace.hpp:106

shambase::details::NamedBasicStackEntry
Definition stacktrace.hpp:170

shambase::table
Definition tabulate.hpp:30

shammath::CoordRange
Definition CoordRange.hpp:29

shamrock::patch::Patch
Patch object that contain generic patch information.
Definition Patch.hpp:33

tabulate.hpp

worldInfo.hpp
Functions related to the MPI communicator.

ON_RANK_0
#define ON_RANK_0(x)
Macro to execute code only on rank 0.
Definition worldInfo.hpp:73

wrapper.hpp

shamcomm::mpi::Exscan
void Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
MPI wrapper for MPI_Exscan.
Definition wrapper.cpp:166

shamcomm::mpi::Barrier
void Barrier(MPI_Comm comm)
MPI wrapper for MPI_Barrier.
Definition wrapper.cpp:194

shamcomm::mpi::get_timer
f64 get_timer(std::string timername)
get a timer value
Definition wrapper.cpp:44

shamcomm::mpi::Allreduce
void Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
MPI wrapper for MPI_Allreduce.
Definition wrapper.cpp:119