Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
timestep_report.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
22#include "shambase/string.hpp"
23#include "shambase/tabulate.hpp"
26#include "shamcomm/logs.hpp"
29#include <numeric>
30#include <string>
31#include <variant>
32#include <vector>
33
34std::string shammodels::report_perf_timestep(
35 f64 rate,
36 u64 nobj,
37 u64 npatch,
38 f64 tcompute,
39 f64 mpi_timer,
40 f64 alloc_time_device,
41 f64 alloc_time_host,
42 size_t max_mem_device,
43 size_t max_mem_host,
44 shamsys::SystemMetrics system_metrics,
45 bool report_power_usage) {
46
48
49 std::vector<f64> rate_all_ranks = shamalgs::collective::gather(rate);
50 std::vector<u64> nobj_all_ranks = shamalgs::collective::gather(nobj);
51 std::vector<u64> npatch_all_ranks = shamalgs::collective::gather(npatch);
52 std::vector<f64> tcompute_all_ranks = shamalgs::collective::gather(tcompute);
53 std::vector<f64> mpi_timer_all_ranks = shamalgs::collective::gather(mpi_timer);
54 std::vector<f64> alloc_time_device_all_ranks = shamalgs::collective::gather(alloc_time_device);
55 std::vector<f64> alloc_time_host_all_ranks = shamalgs::collective::gather(alloc_time_host);
56 std::vector<size_t> max_mem_device_all_ranks = shamalgs::collective::gather(max_mem_device);
57 std::vector<size_t> max_mem_host_all_ranks = shamalgs::collective::gather(max_mem_host);
58
59 auto rank_metrics = (report_power_usage) ? shamsys::gather_rank_metrics(system_metrics)
60 : std::vector<shamsys::SystemMetrics>{};
61
62 if (shamcomm::world_rank() != 0) {
63 return "";
64 }
65
66 // be careful with overflows
67 u64 obj_total = std::accumulate(nobj_all_ranks.begin(), nobj_all_ranks.end(), 0_u64);
68 u64 npatch_total = std::accumulate(npatch_all_ranks.begin(), npatch_all_ranks.end(), 0_u64);
69 f64 max_t = *std::max_element(tcompute_all_ranks.begin(), tcompute_all_ranks.end());
70 f64 sum_t = std::accumulate(tcompute_all_ranks.begin(), tcompute_all_ranks.end(), 0.0);
71 f64 sum_mpi = std::accumulate(mpi_timer_all_ranks.begin(), mpi_timer_all_ranks.end(), 0.0);
72 f64 sum_alloc_device = std::accumulate(
73 alloc_time_device_all_ranks.begin(), alloc_time_device_all_ranks.end(), 0.0);
74 f64 sum_alloc_host
75 = std::accumulate(alloc_time_host_all_ranks.begin(), alloc_time_host_all_ranks.end(), 0.0);
76 size_t sum_mem_device_total
77 = std::accumulate(max_mem_device_all_ranks.begin(), max_mem_device_all_ranks.end(), 0_u64);
78 size_t sum_mem_host_total
79 = std::accumulate(max_mem_host_all_ranks.begin(), max_mem_host_all_ranks.end(), 0_u64);
80
81 shamsys::SystemMetrics aggregated_metrics = shamsys::aggregate_rank_metrics(rank_metrics);
82
83 std::vector<shamsys::FormattedSystemMetrics> formatted_rank_metrics{};
84 shamsys::FormattedSystemMetrics formatted_aggregated_metrics = {};
85 if (report_power_usage) {
86 for (const auto &metric : rank_metrics) {
87 formatted_rank_metrics.push_back(shamsys::format_system_metrics(metric));
88 }
89 formatted_aggregated_metrics = shamsys::format_system_metrics(aggregated_metrics);
90 }
91
92 u32 cols_count = 9_u32;
93 if (report_power_usage) {
94 if (shamsys::support_rank_energy_consummed()) {
95 cols_count += 1_u32;
96 }
97 if (shamsys::support_gpu_energy_consummed()) {
98 cols_count += 1_u32;
99 }
100 if (shamsys::support_cpu_energy_consummed()) {
101 cols_count += 1_u32;
102 }
103 if (shamsys::support_dram_energy_consummed()) {
104 cols_count += 1_u32;
105 }
106 }
107
108 using Table = shambase::table;
109
110 Table table(cols_count);
111
112 std::vector<std::string> header
113 = {"rank",
114 "rate (N/s)",
115 "Nobj",
116 "Npatch",
117 "tstep",
118 "MPI",
119 "alloc d% h%",
120 "mem (max) d",
121 "mem (max) h"};
122 if (report_power_usage) {
123 if (shamsys::support_rank_energy_consummed()) {
124 header.push_back("power");
125 }
126 if (shamsys::support_gpu_energy_consummed()) {
127 header.push_back("gpu power");
128 }
129 if (shamsys::support_cpu_energy_consummed()) {
130 header.push_back("cpu power");
131 }
132 if (shamsys::support_dram_energy_consummed()) {
133 header.push_back("dram power");
134 }
135 }
136
137 table.add_double_rule();
138 table.add_data(header, Table::center);
139 table.add_double_rule();
140 for (u32 i = 0; i < shamcomm::world_size(); i++) {
141 std::vector<std::string> row = {
142 shambase::format("{:<4}", i),
143 shambase::format("{:.4e}", rate_all_ranks[i]),
144 shambase::format("{:}", nobj_all_ranks[i]),
145 shambase::format("{:}", npatch_all_ranks[i]),
146 shambase::format("{:.3e}", tcompute_all_ranks[i]),
147 shambase::format("{:.1f}%", 100 * (mpi_timer_all_ranks[i] / tcompute_all_ranks[i])),
148 shambase::format(
149 "{:>.1f}% {:<.1f}%",
150 100 * (alloc_time_device_all_ranks[i] / tcompute_all_ranks[i]),
151 100 * (alloc_time_host_all_ranks[i] / tcompute_all_ranks[i])),
152 shambase::format("{}", shambase::readable_sizeof(max_mem_device_all_ranks[i])),
153 shambase::format("{}", shambase::readable_sizeof(max_mem_host_all_ranks[i])),
154 };
155 if (report_power_usage) {
156 if (shamsys::support_rank_energy_consummed()) {
157 row.push_back(formatted_rank_metrics[i].rank_power.value_or("N/A"));
158 }
159 if (shamsys::support_gpu_energy_consummed()) {
160 row.push_back(formatted_rank_metrics[i].gpu_power.value_or("N/A"));
161 }
162 if (shamsys::support_cpu_energy_consummed()) {
163 row.push_back(formatted_rank_metrics[i].cpu_power.value_or("N/A"));
164 }
165 if (shamsys::support_dram_energy_consummed()) {
166 row.push_back(formatted_rank_metrics[i].dram_power.value_or("N/A"));
167 }
168 }
169 table.add_data(row, Table::right);
170 }
171 if (shamcomm::world_size() > 1) {
172 std::vector<std::string> ruled
173 = {"", "<sum N/max t>", "<sum>", "<sum>", "<max>", "<avg>", "<avg>", "<sum>", "<sum>"};
174 if (report_power_usage) {
175 if (shamsys::support_rank_energy_consummed()) {
176 ruled.push_back("<sum>");
177 }
178 if (shamsys::support_gpu_energy_consummed()) {
179 ruled.push_back("<sum>");
180 }
181 if (shamsys::support_cpu_energy_consummed()) {
182 ruled.push_back("<sum>");
183 }
184 if (shamsys::support_dram_energy_consummed()) {
185 ruled.push_back("<sum>");
186 }
187 }
188 table.add_rulled_data(ruled);
189 std::vector<std::string> all_row = {
190 "all",
191 shambase::format("{:.4e}", f64(obj_total) / max_t),
192 shambase::format("{:}", obj_total),
193 shambase::format("{:}", npatch_total),
194 shambase::format("{:.3e}", max_t),
195 shambase::format("{:.1f}%", 100 * (sum_mpi / sum_t)),
196 shambase::format(
197 "{:>.1f}% {:<.1f}%",
198 100 * (sum_alloc_device / sum_t),
199 100 * (sum_alloc_host / sum_t)),
200 shambase::format("{}", shambase::readable_sizeof(sum_mem_device_total)),
201 shambase::format("{}", shambase::readable_sizeof(sum_mem_host_total)),
202 };
203 if (report_power_usage) {
204 if (shamsys::support_rank_energy_consummed()) {
205 all_row.push_back(formatted_aggregated_metrics.rank_power.value_or("N/A"));
206 }
207 if (shamsys::support_gpu_energy_consummed()) {
208 all_row.push_back(formatted_aggregated_metrics.gpu_power.value_or("N/A"));
209 }
210 if (shamsys::support_cpu_energy_consummed()) {
211 all_row.push_back(formatted_aggregated_metrics.cpu_power.value_or("N/A"));
212 }
213 if (shamsys::support_dram_energy_consummed()) {
214 all_row.push_back(formatted_aggregated_metrics.dram_power.value_or("N/A"));
215 }
216 }
217 table.add_data(all_row, Table::right);
218 }
219 table.add_rule();
220
221 return "Timestep perf report:" + table.render();
222}
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
MPI string gather / allgather helpers (declarations; implementations in shamalgs/src/collective/gathe...
std::string readable_sizeof(double size)
given a sizeof value return a readble string Example : readable_sizeof(1024*1024*1024) -> "1....
Definition string.hpp:139
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
Definition worldInfo.cpp:40
i32 world_size()
Gives the size of the MPI communicator.
Definition worldInfo.cpp:38
FormattedSystemMetrics format_system_metrics(const SystemMetrics &input)
Only to be used on deltas, not the raw one.
This file contains the definition for the stacktrace related functionality.
#define __shamrock_stack_entry()
Macro to create a stack entry.
Functions related to the MPI communicator.