39void PatchScheduler::init_mpi_required_types() {
46void PatchScheduler::free_mpi_required_types() {
54void PatchScheduler::make_patch_base_grid(std::array<u32, dim> patch_count) {
56 static_assert(dim == 3,
"this is not implemented for dim != 3");
58 u32 max_lin_patch_count = 0;
59 for (
u32 i = 0; i < dim; i++) {
60 max_lin_patch_count = sycl::max(max_lin_patch_count, patch_count[i]);
65 u64 sz_root_patch = PatchScheduler::max_axis_patch_coord_length / coord_div_fact;
67 std::vector<shamrock::patch::PatchCoord<3>> coords;
68 for (
u32 x = 0; x < patch_count[0]; x++) {
69 for (
u32 y = 0; y < patch_count[1]; y++) {
70 for (
u32 z = 0; z < patch_count[2]; z++) {
73 coord.coord_min[0] = sz_root_patch * (x);
74 coord.coord_min[1] = sz_root_patch * (y);
75 coord.coord_min[2] = sz_root_patch * (z);
76 coord.coord_max[0] = sz_root_patch * (x + 1) - 1;
77 coord.coord_max[1] = sz_root_patch * (y + 1) - 1;
78 coord.coord_max[2] = sz_root_patch * (z + 1) - 1;
80 coords.push_back(coord);
86 bounds.coord_min[0] = 0;
87 bounds.coord_min[1] = 0;
88 bounds.coord_min[2] = 0;
89 bounds.coord_max[0] = sz_root_patch * patch_count[0] - 1;
90 bounds.coord_max[1] = sz_root_patch * patch_count[1] - 1;
91 bounds.coord_max[2] = sz_root_patch * patch_count[2] - 1;
93 get_sim_box().set_patch_coord_bounding_box(bounds);
98template void PatchScheduler::make_patch_base_grid<3>(std::array<u32, 3> patch_count);
103 using namespace shamrock::patch;
105 std::vector<u64> ret;
107 for (
auto coord : coords) {
109 u32 node_owner_id = 0;
115 root.coord_min[0] = coord.coord_min[0];
116 root.coord_min[1] = coord.coord_min[1];
117 root.coord_min[2] = coord.coord_min[2];
118 root.coord_max[0] = coord.coord_max[0];
119 root.coord_max[1] = coord.coord_max[1];
120 root.coord_max[2] = coord.coord_max[2];
128 shamlog_debug_sycl_ln(
"Scheduler",
"adding patch data");
130 shamlog_debug_sycl_ln(
132 "patch data wasn't added rank =",
168 shamlog_debug_ln(
"Scheduler",
"pushing data obj cnt =", pdat.get_obj_cnt());
175 variant_main.visit([&](
auto &arg) {
176 using base_t =
typename std::remove_reference<
decltype(arg)>::type::field_T;
181 shamlog_debug_sycl_ln(
182 "Scheduler",
"pushing data in patch ", id_patch,
"search range :", bmin, bmax);
186 throw std::runtime_error(
"this does not yet work with dimension different from 3");
193 using namespace shamrock::patch;
196 coord.coord_min[0] = 0;
197 coord.coord_min[1] = 0;
198 coord.coord_min[2] = 0;
199 coord.coord_max[0] = max_axis_patch_coord;
200 coord.coord_max[1] = max_axis_patch_coord;
201 coord.coord_max[2] = max_axis_patch_coord;
208PatchScheduler::PatchScheduler(
209 const std::shared_ptr<shamrock::patch::PatchDataLayerLayout> &pdl_ptr,
215 {{0, 0, 0}, {max_axis_patch_coord, max_axis_patch_coord, max_axis_patch_coord}}) {
217 crit_patch_split = crit_split;
218 crit_patch_merge = crit_merge;
221PatchScheduler::~PatchScheduler() {}
223bool PatchScheduler::should_resize_box(
bool node_in) {
231void PatchScheduler::sync_build_LB(
bool global_patch_sync,
bool balance_load) {
235 if (global_patch_sync)
252std::tuple<f32_3, f32_3> PatchScheduler::get_box_tranform() {
253 if (!pdl_old().check_main_field_type<f32_3>())
255 "cannot query single precision box the main field is not of f32_3 type");
259 f32_3 translate_factor = bmin;
262 return {translate_factor, scale_factor};
266std::tuple<f64_3, f64_3> PatchScheduler::get_box_tranform() {
267 if (!pdl_old().check_main_field_type<f64_3>())
269 "cannot query single precision box the main field is not of f64_3 type");
273 f64_3 translate_factor = bmin;
276 return {translate_factor, scale_factor};
280std::tuple<f32_3, f32_3> PatchScheduler::get_box_volume() {
281 if (!pdl_old().check_main_field_type<f32_3>())
283 "cannot query single precision box the main field is not of f32_3 type");
289std::tuple<f64_3, f64_3> PatchScheduler::get_box_volume() {
290 if (!pdl_old().check_main_field_type<f64_3>())
292 "cannot query single precision box the main field is not of f64_3 type");
298std::tuple<i64_3, i64_3> PatchScheduler::get_box_volume() {
299 if (!pdl_old().check_main_field_type<i64_3>())
301 "cannot query single precision box the main field is not of i64_3 type");
312 if (!is_mpi_sycl_interop_active())
314 "sycl mpi interop not initialized");
317 shamlog_debug_ln(
"Scheduler",
"running scheduler step");
319 struct SchedulerStepTimers {
322 std::optional<shambase::Timer> global_idx_map_build = {};
323 std::optional<shambase::Timer> patch_tree_count_reduce = {};
324 std::optional<shambase::Timer> gen_merge_split_rq = {};
325 std::optional<u32_2> split_merge_cnt = {};
326 std::optional<shambase::Timer> apply_splits = {};
327 std::optional<shambase::Timer> load_balance_compute = {};
328 std::optional<u32> load_balance_move_op_cnt = {};
329 std::optional<shambase::Timer> load_balance_apply = {};
334 std::string str =
"";
335 str +=
"Scheduler step timings : ";
336 str += shambase::format(
337 "\n metadata sync : {:<10} ({:2.1f}%)",
340 if (patch_tree_count_reduce) {
341 str += shambase::format(
342 "\n patch tree reduce : {:<10} ({:2.1f}%)",
343 patch_tree_count_reduce->get_time_str(),
344 100 * (patch_tree_count_reduce->nanosec / total));
346 if (gen_merge_split_rq) {
347 str += shambase::format(
348 "\n gen split merge : {:<10} ({:2.1f}%)",
349 gen_merge_split_rq->get_time_str(),
350 100 * (gen_merge_split_rq->nanosec / total));
352 if (split_merge_cnt) {
353 str += shambase::format(
354 "\n split / merge op : {}/{}",
355 split_merge_cnt->x(),
356 split_merge_cnt->y());
359 str += shambase::format(
360 "\n apply split merge : {:<10} ({:2.1f}%)",
361 apply_splits->get_time_str(),
362 100 * (apply_splits->nanosec / total));
364 if (load_balance_compute) {
365 str += shambase::format(
366 "\n LB compute : {:<10} ({:2.1f}%)",
367 load_balance_compute->get_time_str(),
368 100 * (load_balance_compute->nanosec / total));
370 if (load_balance_move_op_cnt) {
371 str += shambase::format(
372 "\n LB move op cnt : {}", *load_balance_move_op_cnt);
374 if (load_balance_apply) {
375 str += shambase::format(
376 "\n LB apply : {:<10} ({:2.1f}%)",
377 load_balance_apply->get_time_str(),
378 100 * (load_balance_apply->nanosec / total));
380 logger::info_ln(
"Scheduler", str);
385 timers.global_timer.start();
389 timers.metadata_sync.start();
391 timers.metadata_sync.end();
395 std::unordered_set<u64> split_rq;
396 std::unordered_set<u64> merge_rq;
398 if (do_split_merge) {
403 timers.global_idx_map_build->
start();
406 timers.global_idx_map_build->end();
412 timers.patch_tree_count_reduce->
start();
414 timers.patch_tree_count_reduce->end();
420 timers.gen_merge_split_rq->
start();
423 timers.gen_merge_split_rq->end();
425 timers.split_merge_cnt = u32_2{split_rq.size(), merge_rq.size()};
444 timers.apply_splits->
start();
445 split_patches(split_rq);
446 timers.apply_splits->end();
453 set_patch_pack_values(merge_rq);
456 if (do_load_balancing) {
459 timers.load_balance_compute->
start();
463 timers.load_balance_compute->end();
465 timers.load_balance_move_op_cnt = change_list.change_ops.size();
468 timers.load_balance_apply->
start();
471 timers.load_balance_apply->end();
476 if (do_split_merge) {
478 merge_patches(merge_rq);
491 if (split_rq.size() > 0 || merge_rq.size() > 0) {
497 timers.global_timer.end();
498 timers.print_stats();
568std::string PatchScheduler::dump_status() {
570 using namespace shamrock::patch;
572 std::stringstream ss;
574 ss <<
"----- MPI Scheduler dump -----\n\n";
575 ss <<
" -> SchedulerPatchList\n";
580 ss <<
" global content : \n";
583 ss <<
" -> " << p.id_patch <<
" : " << p.load_value <<
" " << p.node_owner_id <<
" "
584 << p.pack_node_index <<
" "
585 <<
"( [" << p.coord_min[0] <<
"," << p.coord_max[0] <<
"] "
586 <<
" [" << p.coord_min[1] <<
"," << p.coord_max[1] <<
"] "
587 <<
" [" << p.coord_min[2] <<
"," << p.coord_max[2] <<
"] )\n";
589 ss <<
" local content : \n";
592 ss <<
" -> id : " << p.id_patch <<
" : " << p.load_value <<
" " << p.node_owner_id
593 <<
" " << p.pack_node_index <<
" "
594 <<
"( [" << p.coord_min[0] <<
"," << p.coord_max[0] <<
"] "
595 <<
" [" << p.coord_min[1] <<
"," << p.coord_max[1] <<
"] "
596 <<
" [" << p.coord_min[2] <<
"," << p.coord_max[2] <<
"] )\n";
599 ss << shambase::format(
601 ss << shambase::format(
604 ss <<
" -> SchedulerPatchData\n";
605 ss <<
" owned data : \n";
608 ss <<
"patch id : " << patch_id <<
" len = " << pdat.get_obj_cnt() <<
"\n";
621 ss <<
" -> SchedulerPatchTree\n";
624 ss << shambase::format(
625 " -> id : {} -> ({}) <=> {} [{}, {}] (cl={} il={} l={} pid={})\n",
627 pnode.tree_node.childs_nid,
628 pnode.linked_patchid,
629 pnode.patch_coord.coord_min,
630 pnode.patch_coord.coord_max,
631 pnode.tree_node.child_are_all_leafs,
632 pnode.tree_node.is_leaf,
633 pnode.tree_node.level,
634 pnode.tree_node.parent_nid);
642 if (pdl_old().check_main_field_type<f32_3>()) {
644 ret = shambase::format(
"coord = {} {}", bmin, bmax);
645 }
else if (pdl_old().check_main_field_type<f64_3>()) {
647 ret = shambase::format(
"coord = {} {}", bmin, bmax);
648 }
else if (pdl_old().check_main_field_type<u32_3>()) {
650 ret = shambase::format(
"coord = {} {}", bmin, bmax);
651 }
else if (pdl_old().check_main_field_type<u64_3>()) {
653 ret = shambase::format(
"coord = {} {}", bmin, bmax);
656 "the main field does not match any");
666 using namespace shamrock::patch;
671 main_field.check_err_range(
672 [&](vec val, vec vmin, vec vmax) {
673 return Patch::is_in_patch_converted(val, vmin, vmax);
677 shambase::format(
"patch id = {}", pid));
681void PatchScheduler::check_patchdata_locality_correctness() {
685 if (pdl_old().check_main_field_type<f32_3>()) {
686 check_locality_t<f32_3>(*
this);
687 }
else if (pdl_old().check_main_field_type<f64_3>()) {
688 check_locality_t<f64_3>(*
this);
689 }
else if (pdl_old().check_main_field_type<u32_3>()) {
690 check_locality_t<u32_3>(*
this);
691 }
else if (pdl_old().check_main_field_type<u64_3>()) {
692 check_locality_t<u64_3>(*
this);
693 }
else if (pdl_old().check_main_field_type<i64_3>()) {
694 check_locality_t<i64_3>(*
this);
697 "the main field does not match any");
701void PatchScheduler::split_patches(std::unordered_set<u64> split_rq) {
703 for (
u64 tree_id : split_rq) {
711 auto [idx_p0, idx_p1, idx_p2, idx_p3, idx_p4, idx_p5, idx_p6, idx_p7]
714 u64 old_patch_id = splitted_node.linked_patchid;
716 splitted_node.linked_patchid =
u64_max;
746 logger::err_ln(
"SchedulerPatchData",
"catched range issue with patchdata split");
748 logger::raw_ln(
" old patch", old_patch.
id_patch, format_patch_coord(old_patch));
750 logger::err_ln(
"Scheduler",
"global patch list :");
752 logger::raw_ln(
" patch", p.id_patch, format_patch_coord(p));
756 "\n Initial error : "
762inline void PatchScheduler::merge_patches(std::unordered_set<u64> merge_rq) {
764 for (
u64 tree_id : merge_rq) {
809 to_merge_node.linked_patchid = patch_id0;
813inline void PatchScheduler::set_patch_pack_values(std::unordered_set<u64> merge_rq) {
815 for (
u64 tree_id : merge_rq) {
826 for (
u8 i = 1; i < 8; i++) {
831 .pack_node_index = idx_pack;
836void PatchScheduler::dump_local_patches(std::string filename) {
838 using namespace shamrock::patch;
840 std::ofstream fout(filename);
842 if (pdl_old().check_main_field_type<f32_3>()) {
844 std::tuple<f32_3, f32_3> box_transform = get_box_tranform<f32_3>();
849 = f32_3{p.coord_min[0], p.coord_min[1], p.coord_min[2]} * std::get<1>(box_transform)
850 + std::get<0>(box_transform);
851 f32_3 box_max = (f32_3{p.coord_max[0], p.coord_max[1], p.coord_max[2]} + 1)
852 * std::get<1>(box_transform)
853 + std::get<0>(box_transform);
855 fout << p.id_patch <<
"|" << p.load_value <<
"|" << p.node_owner_id <<
"|"
856 << p.pack_node_index <<
"|" << box_min.x() <<
"|" << box_max.x() <<
"|"
857 << box_min.y() <<
"|" << box_max.y() <<
"|" << box_min.z() <<
"|" << box_max.z()
863 }
else if (pdl_old().check_main_field_type<f64_3>()) {
865 std::tuple<f64_3, f64_3> box_transform = get_box_tranform<f64_3>();
870 = f64_3{p.coord_min[0], p.coord_min[1], p.coord_min[2]} * std::get<1>(box_transform)
871 + std::get<0>(box_transform);
872 f64_3 box_max = (f64_3{p.coord_max[0], p.coord_max[1], p.coord_max[3]} + 1)
873 * std::get<1>(box_transform)
874 + std::get<0>(box_transform);
876 fout << p.id_patch <<
"|" << p.load_value <<
"|" << p.node_owner_id <<
"|"
877 << p.pack_node_index <<
"|" << box_min.x() <<
"|" << box_max.x() <<
"|"
878 << box_min.y() <<
"|" << box_max.y() <<
"|" << box_min.z() <<
"|" << box_max.z()
886 "the chosen type for the main field is not handled");
891 std::unique_ptr<shamcomm::CommunicationBuffer> buf;
896void send_messages(std::vector<Message> &msgs, std::vector<MPI_Request> &rqs) {
897 for (
auto &msg : msgs) {
898 rqs.push_back(MPI_Request{});
899 u32 rq_index = rqs.size() - 1;
900 auto &rq = rqs[rq_index];
902 u64 bsize = msg.buf->get_size();
903 if (bsize % 8 != 0) {
905 "the following mpi comm assume that we can send longs to pack 8byte");
907 u64 lcount = bsize / 8;
923void recv_probe_messages(std::vector<Message> &msgs, std::vector<MPI_Request> &rqs) {
925 for (
auto &msg : msgs) {
926 rqs.push_back(MPI_Request{});
927 u32 rq_index = rqs.size() - 1;
928 auto &rq = rqs[rq_index];
935 msg.buf = std::make_unique<shamcomm::CommunicationBuffer>(
936 cnt * 8, shamsys::instance::get_compute_scheduler_ptr());
939 msg.buf->get_ptr(), cnt, get_mpi_type<u64>(), msg.rank, msg.tag, MPI_COMM_WORLD, &rq);
943std::vector<std::unique_ptr<shamrock::patch::PatchDataLayer>> PatchScheduler::gather_data(
946 using namespace shamrock::patch;
953 ser.allocate(pdat.serialize_buf_byte_size());
954 pdat.serialize_buf(ser);
955 return ser.finalize();
961 shamsys::instance::get_compute_scheduler_ptr(),
963 return shamrock::patch::PatchDataLayer::deserialize_buf(ser, get_layout_ptr_old());
966 std::vector<Message> send_payloads;
968 for (
u32 i = 0; i < plist.size(); i++) {
969 auto &cpatch = plist[i];
971 auto &patchdata = pdata.get(cpatch.id_patch);
975 send_payloads.push_back(
977 std::make_unique<shamcomm::CommunicationBuffer>(
978 std::move(tmp), shamsys::instance::get_compute_scheduler_ptr()),
984 std::vector<MPI_Request> rqs;
985 send_messages(send_payloads, rqs);
987 std::vector<Message> recv_payloads;
990 for (
u32 i = 0; i < plist.size(); i++) {
991 recv_payloads.push_back(
993 std::unique_ptr<shamcomm::CommunicationBuffer>{},
994 i32(plist[i].node_owner_id),
1000 recv_probe_messages(recv_payloads, rqs);
1002 std::vector<MPI_Status> st_lst(rqs.size());
1005 std::vector<std::unique_ptr<PatchDataLayer>> ret;
1006 for (
auto &recv_msg : recv_payloads) {
1012 ret.push_back(std::make_unique<PatchDataLayer>(deserializer(std::move(buf))));
1018nlohmann::json PatchScheduler::serialize_patch_metadata() {
1020 nlohmann::json jsim_box;
1026 {
"patchdata_layout", pdl_old()},
1027 {
"sim_box", jsim_box},
function to run load balancing with the hilbert curve
Header file describing a Node Instance.
double f64
Alias for double.
std::uint8_t u8
8 bit unsigned integer
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
std::uint16_t u16
16 bit unsigned integer
std::int32_t i32
32 bit integer
void for_each_patch_data(Function &&fct)
for each macro for patchadata example usage
SchedulerPatchData patch_data
handle the data of the patches of the scheduler
u64 crit_patch_split
splitting limit (if load value > crit_patch_split => patch split)
PatchTree patch_tree
handle the tree structure of the patches
void scheduler_step(bool do_split_merge, bool do_load_balancing)
scheduler step
SchedulerPatchList patch_list
handle the list of the patches of the scheduler
std::unordered_set< u64 > owned_patch_id
(owned_patch_id = patch_list.build_local())
std::vector< u64 > add_root_patches(std::vector< shamrock::patch::PatchCoord< 3 > > coords)
add a root patch to the scheduler
u64 crit_patch_merge
merging limit (if load value < crit_patch_merge => patch merge)
void allpush_data(shamrock::patch::PatchDataLayer &pdat)
push data in the scheduler The content of pdat as to be the same for each node
void add_root_patch()
add patch to the scheduler
std::vector< shamrock::patch::Patch > local
contain the list of patch owned by the current node
void reset_local_pack_index()
reset Patch's pack index value
std::unordered_map< u64, u64 > id_patch_to_local_idx
id_patch_to_local_idx[patch_id] = index in local patch list
std::vector< shamrock::patch::Patch > global
contain the list of all patches in the simulation
void build_global()
rebuild global from the local list of each tables
void invalidate_load_values()
Invalidate current load values (To use after a change the patches is made)
u64 _next_patch_id
The next available patch id.
std::tuple< u64, u64, u64, u64, u64, u64, u64, u64 > split_patch(u64 id_patch)
split the Patch having id_patch as id and return the index of the 8 subpatches in the global vector
std::unordered_set< u64 > build_local()
select owned patches owned by the node to rebuild local
std::unordered_map< u64, u64 > id_patch_to_global_idx
id_patch_to_global_idx[patch_id] = index in global patch list
void merge_patch(u64 idx0, u64 idx1, u64 idx2, u64 idx3, u64 idx4, u64 idx5, u64 idx6, u64 idx7)
merge the 8 given patches index in the global vector
void build_local_idx_map()
recompute id_patch_to_local_idx
void check_load_values_valid(SourceLocation loc=SourceLocation{})
Check if the load values are valid, throw otherwise.
void build_global_idx_map()
recompute id_patch_to_global_idx
A buffer allocated in USM (Unified Shared Memory)
Class Timer measures the time elapsed since the timer was started.
std::string get_time_str() const
Converts the stored nanosecond time to a string representation.
void start()
Starts the timer.
f64 nanosec
Time in nanosecond.
Shamrock communication buffers.
static sham::DeviceBuffer< u8 > convert_usm(CommunicationBuffer &&buf)
destroy the buffer and recover the held object
const var_t & get_main_field_any() const
Get the main field description as a variant object.
PatchDataLayer container class, the layout is described in patchdata_layout.
void insert_elements_in_range(PatchDataLayer &pdat, T bmin, T bmax)
insert elements of pdat only if they are within the range
std::tuple< T, T > get_bounding_box() const
Get the stored bounding box of the domain.
void to_json(nlohmann::json &j)
Serializes a SimulationBoxInfo object to a JSON object.
std::tuple< T, T > patch_coord_to_domain(const Patch &p) const
get the patch coordinates on the domain
static LoadBalancingChangeList make_change_list(std::vector< shamrock::patch::Patch > &global_patch_list)
generate the change list from the list of patch to run the load balancing
static constexpr u64 max_box_sz
maximal value along an axis for the patch coordinate
std::array< u64, 8 > childs_nid
Array of childs node ids.
Node information in the patchtree + held patch info.
std::unordered_set< u64 > get_merge_request(u64 crit_load_merge)
Get list of nodes id to merge.
std::unordered_set< u64 > get_split_request(u64 crit_load_split)
Get list of nodes id to split.
void partial_values_reduction(std::vector< Patch > &plist, const std::unordered_map< u64, u64 > &id_patch_to_global_idx)
update values in leafs and parent_of_only_leaf_key only
void merge_node_dm1(u64 idparent)
merge childs of idparent (id parent should have only leafs as childs)
std::unordered_map< u64, Node > tree
store the tree using a map
void split_node(u64 id)
split a leaf node
void merge_patchdata(u64 new_key, const std::array< u64, 8 > old_keys)
merge 8 old patchdata into one
shamrock::patch::SimulationBoxInfo sim_box
simulation box geometry info
void apply_change_list(const shamrock::scheduler::LoadBalancingChangeList &change_list, SchedulerPatchList &patch_list)
apply a load balancing change list to shuffle patchdata arround the cluster
void split_patchdata(u64 key_orginal, const std::array< shamrock::patch::Patch, 8 > patches)
split a patchdata into 8 childs according to the 8 patches in arguments
shambase::DistributedData< PatchData > owned_data
map container for patchdata owned by the current node (layout : id_patch,data)
This header file contains utility functions related to exception handling in the code.
constexpr T roundup_pow2_clz(T v) noexcept
round up to the next power of two 0 is rounded up to 1 as it is not a pow of 2 every input above the ...
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
std::string increase_indent(std::string in, std::string delim="\n ")
Increase indentation of a string.
auto extract_pointer(std::unique_ptr< T > &o, SourceLocation loc=SourceLocation()) -> T
extract content out of unique_ptr
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
constexpr u64 u64_max
u64 max value
constexpr i32 i32_max
i32 max value
header for PatchData related function and declaration
This file contains the definition for the stacktrace related functionality.
Patch object that contain generic patch information.
u64 pack_node_index
this value mean "to pack with index xxx in the global patch table" and not "to pack with id_pach == x...
u32 node_owner_id
node rank owner of this patch
u64 load_value
if synchronized contain the load value of the patch
u64 id_patch
unique key that identify the patch
header file to manage sycl
void Get_count(const MPI_Status *status, MPI_Datatype datatype, int *count)
MPI wrapper for MPI_Get_count.
void Irecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request)
MPI wrapper for MPI_Irecv.
void Probe(int source, int tag, MPI_Comm comm, MPI_Status *status)
MPI wrapper for MPI_Probe.
void Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
MPI wrapper for MPI_Allreduce.
void Waitall(int count, MPI_Request array_of_requests[], MPI_Status *array_of_statuses)
MPI wrapper for MPI_Waitall.
void Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)
MPI wrapper for MPI_Isend.