50template<
class Tvec,
template<
class>
class SPHKernel>
51inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::
52 SPHSetup<Tvec, SPHKernel>::make_generator_lattice_hcp(
53 Tscal dr, std::pair<Tvec, Tvec> box,
bool discontinuous) {
55 return std::shared_ptr<ISPHSetupNode>(
56 new GeneratorLatticeHCP<Tvec, true>(context, dr, box));
58 return std::shared_ptr<ISPHSetupNode>(
59 new GeneratorLatticeHCP<Tvec, false>(context, dr, box));
63template<
class Tvec,
template<
class>
class SPHKernel>
64inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::
65 SPHSetup<Tvec, SPHKernel>::make_generator_lattice_cubic(Tscal dr, std::pair<Tvec, Tvec> box) {
66 return std::shared_ptr<ISPHSetupNode>(
new GeneratorLatticeCubic<Tvec>(context, dr, box));
69template<
class Tvec,
template<
class>
class SPHKernel>
70inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::
71 SPHSetup<Tvec, SPHKernel>::make_generator_disc_mc(
76 std::function<Tscal(Tscal)> sigma_profile,
77 std::function<Tscal(Tscal)> H_profile,
78 std::function<Tscal(Tscal)> rot_profile,
79 std::function<Tscal(Tscal)> cs_profile,
81 Tscal init_h_factor) {
82 return std::shared_ptr<ISPHSetupNode>(
new GeneratorMCDisc<Tvec, SPHKernel>(
97template<
class Tvec,
template<
class>
class SPHKernel>
98inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::
99 SPHSetup<Tvec, SPHKernel>::make_generator_from_context(
ShamrockCtx &context_other) {
100 return std::shared_ptr<ISPHSetupNode>(
101 new GeneratorFromOtherContext<Tvec>(context, context_other));
104template<
class Tvec,
template<
class>
class SPHKernel>
105inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::
106 SPHSetup<Tvec, SPHKernel>::make_combiner_add(SetupNodePtr parent1, SetupNodePtr parent2) {
107 return std::shared_ptr<ISPHSetupNode>(
new CombinerAdd<Tvec>(context, parent1, parent2));
110template<
class Tvec,
template<
class>
class SPHKernel>
112 SetupNodePtr setup,
bool part_reordering, std::optional<u32> insert_step) {
124 auto compute_load = [&]() {
125 modules::ComputeLoadBalanceValue<Tvec, SPHKernel>(context, solver_config, storage)
126 .update_load_balancing();
129 auto has_pdat = [&]() {
131 using namespace shamrock::patch;
140 if (
bool(insert_step)) {
141 _insert_step = insert_step.value();
144 while (!setup->is_done()) {
148 if (solver_config.track_particles_id) {
155 u64 loc_inj = pdat.get_obj_cnt();
159 &loc_inj, &offset_init, 1, get_mpi_type<u64>(), MPI_SUM, MPI_COMM_WORLD);
166 offset_init += injected_parts;
168 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
179 [offset_init](
u32 i,
u64 *__restrict part_ids) {
180 part_ids[i] = i + offset_init;
184 .overwrite(part_ids, loc_inj);
189 = inserter.push_patch_data<Tvec>(pdat,
"xyz", sched.
crit_patch_split * 8, compute_load);
191 injected_parts += injected;
194 u32 final_balancing_steps = 3;
195 for (
u32 i = 0; i < final_balancing_steps; i++) {
198 "SPH setup",
"Final load balancing step", i,
"of", final_balancing_steps));
199 inserter.balance_load(compute_load);
202 if (part_reordering) {
203 modules::ParticleReordering<Tvec, u32, SPHKernel>(context, solver_config, storage)
204 .reorder_particles();
209 logger::info_ln(
"SPH setup",
"the setup took :", time_setup.
elasped_sec(),
"s");
215 std::vector<u64> count_per_rank;
216 std::vector<std::tuple<u32, u32, u64>> msg_list;
219 u64 step_counter = 0;
221 nlohmann::json json_data = nlohmann::json::array();
224 nlohmann::json step_data;
225 step_data[
"step_counter"] = step_counter;
226 step_data[
"count_per_rank"] = state.count_per_rank;
227 step_data[
"msg_list"] = state.msg_list;
228 json_data.push_back(step_data);
232 std::string fname =
"setup_log_step.json";
234 logger::normal_ln(
"SPH setup",
"dumping setup log to ", fname);
237 std::ofstream file(fname);
238 file << json_data.dump(4);
244 void update_count_per_rank(
u64 count) {
245 std::vector<u64> tmp{count};
246 std::vector<u64> recv_count_per_rank;
248 state.count_per_rank = recv_count_per_rank;
250 if (step_counter % 20 == 0)
254 void update_msg_list(std::vector<std::tuple<u32, u32, u64>> &msg_list) {
255 state.msg_list = msg_list;
257 if (step_counter % 20 == 0)
262inline constexpr f64 golden_number = 1.61803398874989484820458683436563;
264template<
class Tvec,
template<
class>
class SPHKernel>
267 bool part_reordering,
268 std::optional<u32> gen_count_per_step,
269 std::optional<u32> insert_count_per_step,
270 std::optional<u64> max_msg_count_per_rank_per_step,
271 std::optional<u64> max_data_count_per_rank_per_step,
272 std::optional<u64> max_msg_size,
274 bool speculative_balancing) {
282 std::optional<SetupLog> setup_log
283 = (do_setup_log) ? std::make_optional<SetupLog>() : std::nullopt;
291 if (
bool(insert_count_per_step)) {
292 insert_step = insert_count_per_step.value();
296 if (
bool(gen_count_per_step)) {
297 gen_step = gen_count_per_step.value();
300 u64 msg_limit = 1024;
301 if (
bool(max_msg_count_per_rank_per_step)) {
302 msg_limit = max_msg_count_per_rank_per_step.value();
304 u64 data_count_limit = insert_step;
305 if (
bool(max_data_count_per_rank_per_step)) {
306 data_count_limit = max_data_count_per_rank_per_step.value();
308 u64 max_message_size = std::max(insert_step / 16, 1_u32);
309 if (
bool(max_msg_size)) {
310 max_message_size = max_msg_size.value();
315 u64 speculative_last_npatch = 0;
318 auto compute_load = [&]() {
319 if (speculative_balancing) {
323 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
325 u64 npatch = scheduler().patch_list.global.size();
328 if (npatch != speculative_last_npatch) {
335 "number of patches has changed, rebuilding speculative load values");
339 speculative_last_npatch = npatch;
340 speculative_load_values.
reset();
344 std::vector<Tvec> patch_aabb_min(npatch);
345 std::vector<Tvec> patch_aabb_max(npatch);
347 auto &global_patch_list = scheduler().patch_list.global;
351 for (
size_t i = 0; i < global_patch_list.size(); i++) {
353 if (!p.is_err_mode()) {
355 patch_aabb_min[i] = patch_coord.lower;
356 patch_aabb_max[i] = patch_coord.upper;
363 buf_patch_aabb_min.copy_from_stdvec(patch_aabb_min);
364 buf_patch_aabb_max.copy_from_stdvec(patch_aabb_max);
369 local_load_values.fill(0);
373 if (
xyz.get_obj_cnt() > 0) {
375 shamsys::instance::get_compute_scheduler().get_queue(),
381 const Tvec *__restrict xyz,
382 const Tvec *__restrict patch_aabb_min,
383 const Tvec *__restrict patch_aabb_max,
384 u64 *__restrict local_load_values) {
386 for (
size_t j = 0; j < npatch; j++) {
388 = {patch_aabb_min[j], patch_aabb_max[j]};
389 if (patch_coord.contain_pos(pos)) {
392 sycl::memory_order::relaxed,
393 sycl::memory_scope::device>
394 atomic_local_load_values(local_load_values[j]);
395 atomic_local_load_values++;
403 auto local_load_values_host = local_load_values.copy_to_stdvec();
405 std::vector<u64> reduced_load_values(npatch);
410 local_load_values_host.data(),
411 reduced_load_values.data(),
419 for (
size_t i = 0; i < npatch; i++) {
420 speculative_load_values.
add_obj(
421 global_patch_list[i].id_patch,
u64(reduced_load_values[i]));
426 auto &patch_list = scheduler().patch_list;
428 for (
u64 id : scheduler().owned_patch_id) {
430 = patch_list.local[patch_list.id_patch_to_local_idx[id]];
431 speculative_load_values.
get(
id)
432 += scheduler().patch_data.owned_data.get(
id).get_obj_cnt();
439 return speculative_load_values.
get(p.id_patch);
443 modules::ComputeLoadBalanceValue<Tvec, SPHKernel>(context, solver_config, storage)
444 .update_load_balancing();
448 auto has_pdat = [&]() {
450 using namespace shamrock::patch;
458 time_part_gen.
start();
461 logger::normal_ln(
"SPH setup",
"generating particles ...");
464 while (!setup->is_done()) {
470 if (solver_config.track_particles_id) {
477 u64 loc_inj = tmp.get_obj_cnt();
481 &loc_inj, &offset_init, 1, get_mpi_type<u64>(), MPI_SUM, MPI_COMM_WORLD);
488 offset_init += injected_parts;
490 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
501 [offset_init](
u32 i,
u64 *__restrict part_ids) {
502 part_ids[i] = i + offset_init;
506 .overwrite(part_ids, loc_inj);
510 to_insert.insert_elements(tmp);
512 u64 sum_push = shamalgs::collective::allreduce_sum<u64>(tmp.get_obj_cnt());
513 u64 sum_all = shamalgs::collective::allreduce_sum<u64>(to_insert.get_obj_cnt());
515 u64 min_rank = shamalgs::collective::allreduce_min<u64>(to_insert.get_obj_cnt());
516 u64 max_rank = shamalgs::collective::allreduce_max<u64>(to_insert.get_obj_cnt());
525 "Nstep = {} ( {:.1e} ) Ntotal = {} ( {:.1e} rank min = {:.1e} max = {:.1e}) "
526 "rate = {:e} N.s^-1",
537 setup_log.value().update_count_per_rank(to_insert.get_obj_cnt());
540 injected_parts += sum_push;
546 "SPH setup",
"the generation step took :", time_part_gen.
elasped_sec(),
"s");
551 "SPH setup",
"final particle count =", injected_parts,
"beginning injection ...");
560 time_part_inject.
start();
562 auto log_inject_status = [&](std::string log_suffix =
"") {
563 u64 sum_all = shamalgs::collective::allreduce_sum<u64>(to_insert.get_obj_cnt());
565 u32 rank_without_patch
566 = shamalgs::collective::allreduce_sum<u32>(sched.
patch_list.
local.size() == 0 ? 1 : 0);
572 "injected {:12} / {:} => {:5.1f}% | ranks with patchs = {:d} / {:d} {}",
573 injected_parts - sum_all,
575 f64(injected_parts - sum_all) /
f64(injected_parts) * 100.0,
582 setup_log.value().update_count_per_rank(to_insert.get_obj_cnt());
586 auto inject_in_local_domains =
587 [&sched, &inserter, &compute_load, &insert_step, &log_inject_status](
591 bool has_been_limited =
true;
593 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
596 while (has_been_limited) {
597 has_been_limited =
false;
598 using namespace shamrock::patch;
607 auto ids =
xyz.get_ids_where_recycle_buffer(
610 Tvec tmp = access[id];
611 return patch_coord.contain_pos(tmp);
615 if (ids.get_size() > insert_step) {
616 ids.resize(insert_step);
617 has_been_limited =
true;
620 if (ids.get_size() > 0) {
621 to_insert.extract_elements(ids, pdat);
625 sched.check_patchdata_locality_correctness();
627 inserter.balance_load(compute_load);
632 if (has_been_limited) {
634 log_inject_status(
" -> local loop <-");
639 auto get_index_per_ranks = [&](
f64 &timer_result) {
643 time_get_index_per_ranks.
start();
651 if (pos_field.get_nvar() != 1) {
655 sycl::buffer<u64> new_id_buf = sptree.compute_patch_owner(
656 shamsys::instance::get_compute_scheduler_ptr(),
658 pos_field.get_obj_cnt());
660 std::unordered_map<i32, std::vector<u32>> index_per_ranks;
661 bool err_id_in_newid =
false;
663 sycl::host_accessor nid{new_id_buf, sycl::read_only};
664 for (
u32 i = 0; i < pos_field.get_obj_cnt(); i++) {
665 u64 patch_id = nid[i];
667 err_id_in_newid = err_id_in_newid || (
err);
669 i32 rank = sched.get_patch_rank_owner(patch_id);
670 index_per_ranks[rank].push_back(i);
674 if (err_id_in_newid) {
676 "a new id could not be computed");
679 time_get_index_per_ranks.
end();
680 timer_result = time_get_index_per_ranks.
elasped_sec();
682 return index_per_ranks;
685 f64 total_time_rank_getter = 0;
686 f64 max_time_rank_getter = 0;
690 while (!shamalgs::collective::are_all_rank_true(to_insert.is_empty(), MPI_COMM_WORLD)) {
695 using namespace shamrock::patch;
697 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
699 inject_in_local_domains(to_insert);
701 f64 timer_get_index_per_ranks = 0;
702 std::unordered_map<i32, std::vector<u32>> index_per_ranks
703 = get_index_per_ranks(timer_get_index_per_ranks);
704 total_time_rank_getter += timer_get_index_per_ranks;
705 max_time_rank_getter = std::max(max_time_rank_getter, timer_get_index_per_ranks);
709 std::vector<u64> send_msg;
710 for (
auto &[rank, indices] : index_per_ranks) {
712 send_msg.push_back(indices.size());
716 bool sync_limited =
false;
717 if (send_msg.size() > max_send) {
727 std::vector<tmp> tmp_vec;
728 tmp_vec.reserve(send_msg.size() / 2);
729 for (
u64 i = 0; i < send_msg.size(); i += 2) {
730 tmp_vec.push_back({send_msg[i], send_msg[i + 1]});
735 std::mt19937_64 eng_local_msg(local_seed);
736 std::shuffle(tmp_vec.begin(), tmp_vec.end(), eng_local_msg);
739 std::vector<u64> send_msg_new;
740 send_msg_new.reserve(max_send);
741 for (
auto &t : tmp_vec) {
742 if (send_msg_new.size() >= max_send) {
745 send_msg_new.push_back(t.ranks);
746 send_msg_new.push_back(t.size);
749 send_msg = send_msg_new;
753 std::vector<u64> recv_msg;
756 std::vector<std::tuple<u32, u32, u64>> msg_list;
757 for (
u64 i = 0; i < recv_msg.size(); i += 2) {
758 u32_2 sender_receiver = sham::unpack32(recv_msg[i]);
759 u64 indices_size = recv_msg[i + 1];
761 u32 sender_rank = sender_receiver.x();
762 u32 receiver_rank = sender_receiver.y();
764 if (sender_rank == receiver_rank) {
768 msg_list.push_back(std::make_tuple(sender_rank, receiver_rank, indices_size));
772 setup_log.value().update_msg_list(msg_list);
776 std::mt19937 eng_global_msg(
u64(golden_number * 1000 * step_count));
777 std::shuffle(msg_list.begin(), msg_list.end(), eng_global_msg);
784 std::vector<std::tuple<u32, u32, u64>> rank_msg_list;
786 bool was_count_limited =
false;
787 bool was_size_limited =
false;
788 bool was_msg_size_limited =
false;
790 for (
auto &[sender_rank, receiver_rank, indices_size] : msg_list) {
792 bool msg_count_limit_not_reached = msg_count_rank.at(receiver_rank) < msg_limit
793 && msg_count_rank.at(sender_rank) < msg_limit;
795 bool recv_size_limit_not_reached = comm_size_rank.at(receiver_rank) < data_count_limit
796 && comm_size_rank.at(sender_rank) < data_count_limit;
798 was_count_limited = was_count_limited || !msg_count_limit_not_reached;
799 was_size_limited = was_size_limited || !recv_size_limit_not_reached;
801 bool can_send_recv = msg_count_limit_not_reached && recv_size_limit_not_reached;
803 u64 msg_size = std::min(indices_size, max_message_size);
804 msg_size = std::min(msg_size, data_count_limit);
805 was_msg_size_limited = was_msg_size_limited || (msg_size < indices_size);
811 rank_msg_list.push_back(
812 std::make_tuple(sender_rank, receiver_rank, msg_size));
817 msg_count_rank.at(receiver_rank) += 1;
818 msg_count_rank.at(sender_rank) += 1;
819 comm_size_rank.at(receiver_rank) += msg_size;
820 comm_size_rank.at(sender_rank) += msg_size;
836 for (
auto &[sender_rank, receiver_rank, indices_size] : rank_msg_list) {
838 std::vector<u32> &idx_to_extract = index_per_ranks[receiver_rank];
842 if (_tmp.
get_size() > indices_size) {
843 _tmp.
resize(indices_size);
847 to_insert.append_subset_to(_tmp, _tmp.
get_size(), _tmp_pdat);
851 send_data.
add_obj(sender_rank, receiver_rank, std::move(_tmp_pdat));
855 to_insert.remove_ids(idx_to_rem, idx_to_rem.
get_size());
860 shamalgs::collective::serialize_sparse_comm<PatchDataLayer>(
862 std::move(send_data),
869 ser.allocate(pdat.serialize_buf_byte_size());
870 pdat.serialize_buf(ser);
871 return ser.finalize();
877 return PatchDataLayer::deserialize_buf(ser, sched.get_layout_ptr_old());
883 to_insert.insert_elements(pdat);
892 bool was_sync_limited
895 std::string log_suffix =
"";
896 if (was_count_limited) {
897 log_suffix +=
" (msg count limited)";
899 if (was_size_limited) {
900 log_suffix +=
" (total msg size limited)";
902 if (was_msg_size_limited) {
903 log_suffix +=
" (msg size limited)";
905 if (was_sync_limited) {
906 log_suffix +=
" (sync limited)";
908 log_suffix += shambase::format(
" (msg count : {})", recv_msg.size());
909 log_inject_status(
" <- global loop ->" + log_suffix);
911 f64 worst_time_get_index_per_ranks
912 = shamalgs::collective::allreduce_max<f64>(timer_get_index_per_ranks);
918 setup_log.value().dump_state();
922 time_part_inject.
end();
925 "SPH setup",
"the injection step took :", time_part_inject.
elasped_sec(),
"s");
938 std::vector<f64> time_rank_getter_all_ranks
939 = shamalgs::collective::gather(total_time_rank_getter);
940 std::vector<f64> max_time_rank_getter_all_ranks
941 = shamalgs::collective::gather(max_time_rank_getter);
942 std::vector<f64> mpi_timer_all_ranks = shamalgs::collective::gather(delta_mpi_timer);
943 std::vector<f64> alloc_time_device_all_ranks = shamalgs::collective::gather(t_dev_alloc);
944 std::vector<f64> alloc_time_host_all_ranks = shamalgs::collective::gather(t_host_alloc);
945 std::vector<size_t> max_mem_device_all_ranks
947 std::vector<size_t> max_mem_host_all_ranks
954 f64 sum_time_rank_getter = std::accumulate(
955 time_rank_getter_all_ranks.begin(), time_rank_getter_all_ranks.end(), 0.0);
956 f64 max_time_rank_getter = *std::max_element(
957 max_time_rank_getter_all_ranks.begin(), max_time_rank_getter_all_ranks.end());
959 = std::accumulate(mpi_timer_all_ranks.begin(), mpi_timer_all_ranks.end(), 0.0);
960 f64 sum_alloc_device = std::accumulate(
961 alloc_time_device_all_ranks.begin(), alloc_time_device_all_ranks.end(), 0.0);
962 f64 sum_alloc_host = std::accumulate(
963 alloc_time_host_all_ranks.begin(), alloc_time_host_all_ranks.end(), 0.0);
964 size_t sum_mem_device_total = std::accumulate(
965 max_mem_device_all_ranks.begin(), max_mem_device_all_ranks.end(), 0_u64);
966 size_t sum_mem_host_total = std::accumulate(
967 max_mem_host_all_ranks.begin(), max_mem_host_all_ranks.end(), 0_u64);
969 static constexpr u32 cols_count = 6;
975 table.add_double_rule();
977 {
"rank",
"rank get (sum/max)",
"MPI",
"alloc d% h%",
"mem (max) d",
"mem (max) h"},
979 table.add_double_rule();
982 {shambase::format(
"{:<4}", i),
985 time_rank_getter_all_ranks[i],
986 max_time_rank_getter_all_ranks[i]),
987 shambase::format(
"{:.2f}s", mpi_timer_all_ranks[i]),
990 100 * (alloc_time_device_all_ranks[i] / time_part_inject_sec),
991 100 * (alloc_time_host_all_ranks[i] / time_part_inject_sec)),
997 table.add_rulled_data({
"",
"<avg> / <max>",
"<avg>",
"<avg>",
"<sum>",
"<sum>"});
1001 "{:.2f}s / {:.2f}s",
1003 max_time_rank_getter),
1006 "{:>.1f}% {:<.1f}%",
1007 100 * (sum_alloc_device / sum_t),
1008 100 * (sum_alloc_host / sum_t)),
1014 logger::info_ln(
"SPH setup",
"injection perf report:" + table.render());
1018 if (part_reordering) {
1019 modules::ParticleReordering<Tvec, u32, SPHKernel>(context, solver_config, storage)
1020 .reorder_particles();
1025 logger::normal_ln(
"SPH setup",
"the setup took :", time_setup.
elasped_sec(),
"s");
1029template<
class Tvec,
template<
class>
class SPHKernel>
1030inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::
1031 SPHSetup<Tvec, SPHKernel>::make_modifier_warp_disc(
1032 SetupNodePtr parent, Tscal Rwarp, Tscal Hwarp, Tscal inclination, Tscal posangle) {
1033 return std::shared_ptr<ISPHSetupNode>(
new ModifierApplyDiscWarp<Tvec, SPHKernel>(
1034 context, solver_config, parent, Rwarp, Hwarp, inclination, posangle));
1037template<
class Tvec,
template<
class>
class SPHKernel>
1038inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::
1039 SPHSetup<Tvec, SPHKernel>::make_modifier_custom_warp(
1040 SetupNodePtr parent,
1041 std::function<Tscal(Tscal)> inc_profile,
1042 std::function<Tscal(Tscal)> psi_profile,
1043 std::function<Tvec(Tscal)> k_profile) {
1044 return std::shared_ptr<ISPHSetupNode>(
new ModifierApplyCustomWarp<Tvec, SPHKernel>(
1045 context, solver_config, parent, inc_profile, psi_profile, k_profile));
1048template<
class Tvec,
template<
class>
class SPHKernel>
1049inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::
1050 SPHSetup<Tvec, SPHKernel>::make_modifier_add_offset(
1051 SetupNodePtr parent, Tvec offset_postion, Tvec offset_velocity) {
1053 return std::shared_ptr<ISPHSetupNode>(
1054 new ModifierOffset<Tvec>(context, parent, offset_postion, offset_velocity));
1057template<
class Tvec,
template<
class>
class SPHKernel>
1060 SPHKernel>::make_modifier_filter(SetupNodePtr parent, std::function<
bool(Tvec)> filter) {
1062 return std::shared_ptr<ISPHSetupNode>(
1063 new ModifierFilter<Tvec, SPHKernel>(context, parent, filter));
1066template<
class Tvec,
template<
class>
class SPHKernel>
1067inline std::shared_ptr<shammodels::sph::modules::ISPHSetupNode> shammodels::sph::modules::
1068 SPHSetup<Tvec, SPHKernel>::make_modifier_split_part(
1069 SetupNodePtr parent,
u64 n_split,
u64 seed, Tscal h_scaling) {
1070 return std::shared_ptr<ISPHSetupNode>(
1071 new ModifierSplitPart<Tvec>(context, parent, n_split, seed, h_scaling));
constexpr const char * xyz
Position field (3D coordinates)
Header file describing a Node Instance.
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
std::int32_t i32
32 bit integer
Collective boolean reduction to check if all ranks have true as input.
bool are_all_rank_true(bool input, MPI_Comm comm)
return true only if all ranks have true as input
u64 crit_patch_split
splitting limit (if load value > crit_patch_split => patch split)
SchedulerPatchList patch_list
handle the list of the patches of the scheduler
std::vector< shamrock::patch::Patch > local
contain the list of patch owned by the current node
A buffer allocated in USM (Unified Shared Memory)
void copy_from_stdvec(const std::vector< T > &vec)
Copy the content of a std::vector into the buffer.
void resize(size_t new_size, bool keep_data=true)
Resizes the buffer to a given size.
void append(const DeviceBuffer &other)
Append the content of another buffer to this one.
size_t get_size() const
Gets the number of elements in the buffer.
Container for objects shared between two distributed data elements.
void for_each(std::function< void(u64, u64, T &)> &&f)
Apply a function to all stored objects.
iterator add_obj(u64 left_id, u64 right_id, T &&obj)
Add an object associated with a patch pair.
Represents a collection of objects distributed across patches identified by a u64 id.
iterator add_obj(u64 id, T &&obj)
Adds a new object to the collection.
T & get(u64 id)
Returns a reference to an object in the collection.
void reset()
Reset the collection to its initial state.
Class Timer measures the time elapsed since the timer was started.
void end()
Stops the timer and stores the elapsed time in nanoseconds.
f64 elasped_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
void start()
Starts the timer.
Class to insert data in the PatchScheduler.
u32 get_field_idx(const std::string &field_name) const
Get the field id if matching name & type.
PatchDataLayer container class, the layout is described in patchdata_layout.
PatchCoordTransform< T > get_patch_transform() const
Get a PatchCoordTransform object that describes the conversion between patch coordinates and domain c...
std::vector< int > vector_allgatherv(const std::vector< T > &send_vec, const MPI_Datatype &send_type, std::vector< T > &recv_vec, const MPI_Datatype &recv_type, const MPI_Comm comm)
allgatherv on vector with size query (size querying variant of vector_allgatherv_ks) //TODO add fault...
MemPerfInfos get_mem_perf_info()
Retrieve the memory performance information.
Boolean reduction algorithm for checking if all elements are non-zero.
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
std::string readable_sizeof(double size)
given a sizeof value return a readble string Example : readable_sizeof(1024*1024*1024) -> "1....
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
void throw_unimplemented(SourceLocation loc=SourceLocation{})
Throw a std::runtime_error saying that the function is unimplemented.
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
i32 world_size()
Gives the size of the MPI communicator.
namespace for math utility
constexpr u64 u64_max
u64 max value
void err(std::string module_name, Types... var2)
Prints a log message with multiple arguments.
#define __shamrock_stack_entry()
Macro to create a stack entry.
Structure to store the performance informations about memory allocation and deallocation.
f64 time_alloc_host
Time spent allocating memory on the host.
size_t max_allocated_byte_host
max bytes allocated on the host
f64 time_free_device
Time spent deallocating memory on the device.
size_t max_allocated_byte_device
max bytes allocated on the device
f64 time_alloc_device
Time spent allocating memory on the device.
f64 time_free_host
Time spent deallocating memory on the host.
A class that references multiple buffers or similar objects.
Patch object that contain generic patch information.
Functions related to the MPI communicator.
#define ON_RANK_0(x)
Macro to execute code only on rank 0.
void Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
MPI wrapper for MPI_Exscan.
void Barrier(MPI_Comm comm)
MPI wrapper for MPI_Barrier.
f64 get_timer(std::string timername)
get a timer value
void Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
MPI wrapper for MPI_Allreduce.