24#include <unordered_map>
29 inline std::vector<std::string> _gather_strings_internal(
30 const std::vector<std::string> &inputs,
const std::string &delimiter,
bool is_allgather) {
31 std::string accum_loc =
"";
32 for (
auto &s : inputs) {
33 accum_loc += s + delimiter;
35 std::string recv =
"";
50 std::unordered_map<std::string, int> _string_histogram_all_fetch(
51 const std::vector<std::string> &inputs, std::string delimiter,
bool is_allgather) {
53 auto splitted = _gather_strings_internal(inputs, delimiter, is_allgather);
56 std::unordered_map<std::string, int> histogram;
57 for (
size_t i = 0; i < splitted.size(); i++) {
58 histogram[splitted[i]] += 1;
66 inline auto hash_inputs(
const std::vector<std::string> &inputs) {
67 std::vector<u64_2> fnv1a_in(inputs.size());
68 for (
size_t i = 0; i < inputs.size(); i++) {
80 auto data_to_case_info(
const std::vector<u64_2> &data) {
81 std::unordered_map<u64, CaseInfo> hash_case_info = {};
82 for (
size_t i = 0; i < data.size(); i++) {
83 auto hash = data[i].x();
84 auto rank = data[i].y();
86 if (hash_case_info.find(hash) == hash_case_info.end()) {
87 hash_case_info[hash] = {rank, 1};
89 hash_case_info[hash].count += 1;
90 hash_case_info[hash].min_rank_id = std::min(hash_case_info[hash].min_rank_id, rank);
93 return hash_case_info;
96 std::unordered_map<std::string, int> _string_histogram_hash_fetch(
97 const std::vector<std::string> &inputs, std::string delimiter,
bool is_allgather) {
100 std::vector<u64_2> fnv1a_in = hash_inputs(inputs);
103 std::vector<u64_2> fnv1a_recv;
107 std::unordered_map<u64, CaseInfo> hash_case_info = data_to_case_info(fnv1a_recv);
110 std::vector<std::string> restricted_inputs = {};
111 for (
size_t i = 0; i < inputs.size(); i++) {
112 auto hash = fnv1a_in[i].x();
114 restricted_inputs.push_back(inputs[i]);
118 auto histogram = _string_histogram_all_fetch(restricted_inputs, delimiter, is_allgather);
121 for (
auto &[word, cnt] : histogram) {
123 cnt =
static_cast<int>(hash_case_info[fnv].count);
132 const std::vector<std::string> &inputs, std::string delimiter,
bool hash_based) {
135 return _string_histogram_hash_fetch(inputs, delimiter,
false);
138 return _string_histogram_all_fetch(inputs, delimiter,
false);
142 const std::vector<std::string> &inputs, std::string delimiter,
bool hash_based) {
145 return _string_histogram_hash_fetch(inputs, delimiter,
true);
148 return _string_histogram_all_fetch(inputs, delimiter,
true);
std::uint64_t u64
64 bit unsigned integer
std::vector< int > vector_allgatherv(const std::vector< T > &send_vec, const MPI_Datatype &send_type, std::vector< T > &recv_vec, const MPI_Datatype &recv_type, const MPI_Comm comm)
allgatherv on vector with size query (size querying variant of vector_allgatherv_ks) //TODO add fault...
MPI string gather / allgather helpers (declarations; implementations in shamalgs/src/collective/gathe...
void allgather_str(const std::string &send_vec, std::string &recv_vec)
Allgathers a string from all nodes and concatenates it in a std::string.
void gather_str(const std::string &send_vec, std::string &recv_vec)
Gathers a string from all nodes and store the result in a std::string.
u64 fnv1a_hash(const char *data, size_t size)
Compute the FNV-1a hash of a given data.
std::vector< std::string > split_str(std::string s, std::string delimiter)
Splits a string into a vector of substrings according to a delimiter.
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
This file contains the definition for the stacktrace related functionality.
MPI string gather / allgather helpers (declarations; implementations in shamalgs/src/collective/gathe...
std::unordered_map< std::string, int > string_histogram(const std::vector< std::string > &inputs, std::string delimiter, bool hash_based)
Constructs a histogram from a vector of strings, counting occurrences of each unique string.
std::unordered_map< std::string, int > all_string_histogram(const std::vector< std::string > &inputs, std::string delimiter, bool hash_based)
same as string_histogram but with result return on every rank
Functions related to the MPI communicator.