Note
Go to the end to download the full example code.
Weak scale test for SPH#
This example tests the weak scalability performance of the SPH solver. It is based on a Sedov blast wave simulation, the number of particles is scaled with the number of processes. Run it only if you have enough memory available.
10 from statistics import mean, stdev
11
12 import shamrock
13
14 result_text = ""
15
16 for N_target_base in [32e6]:
17 shamrock.backends.reset_mem_info_max()
18
19 gamma = 5.0 / 3.0
20 rho_g = 1
21 target_tot_u = 1
22
23 bmin = (-0.6, -0.6, -0.6)
24 bmax = (0.6, 0.6, 0.6)
25
26 compute_multiplier = shamrock.sys.world_size()
27 # compute_multiplier = 12
28 scheduler_split_val = int(2e7)
29 scheduler_merge_val = int(1)
30
31 N_target = N_target_base * compute_multiplier
32 xm, ym, zm = bmin
33 xM, yM, zM = bmax
34 vol_b = (xM - xm) * (yM - ym) * (zM - zm)
35
36 if shamrock.sys.world_rank() == 0:
37 print("N_target_base", N_target_base)
38 print("compute_multiplier", compute_multiplier)
39 print("scheduler_split_val", scheduler_split_val)
40 print("scheduler_merge_val", scheduler_merge_val)
41 print("N_target", N_target)
42 print("vol_b", vol_b)
43
44 part_vol = vol_b / N_target
45
46 # lattice volume
47 part_vol_lattice = 0.74 * part_vol
48
49 dr = (part_vol_lattice / ((4.0 / 3.0) * 3.1416)) ** (1.0 / 3.0)
50
51 pmass = -1
52
53 ctx = shamrock.Context()
54 ctx.pdata_layout_new()
55
56 model = shamrock.get_Model_SPH(context=ctx, vector_type="f64_3", sph_kernel="M4")
57
58 cfg = model.gen_default_config()
59 # cfg.set_artif_viscosity_Constant(alpha_u = 1, alpha_AV = 1, beta_AV = 2)
60 # cfg.set_artif_viscosity_VaryingMM97(alpha_min = 0.1,alpha_max = 1,sigma_decay = 0.1, alpha_u = 1, beta_AV = 2)
61 cfg.set_artif_viscosity_VaryingCD10(
62 alpha_min=0.0, alpha_max=1, sigma_decay=0.1, alpha_u=1, beta_AV=2
63 )
64 cfg.set_boundary_periodic()
65 cfg.set_eos_adiabatic(gamma)
66 cfg.print_status()
67 model.set_solver_config(cfg)
68 model.init_scheduler(scheduler_split_val, scheduler_merge_val)
69
70 bmin, bmax = model.get_ideal_hcp_box(dr, bmin, bmax)
71 xm, ym, zm = bmin
72 xM, yM, zM = bmax
73
74 model.resize_simulation_box(bmin, bmax)
75
76 setup = model.get_setup()
77 gen = setup.make_generator_lattice_hcp(dr, bmin, bmax, discontinuous=False)
78
79 # Kind of optimized for Aurora
80 setup.apply_setup(
81 gen,
82 gen_step=int(scheduler_split_val / 8),
83 insert_step=int(scheduler_split_val * 2),
84 msg_count_limit=1024,
85 rank_comm_size_limit=int(scheduler_split_val) * 2,
86 max_msg_size=int(scheduler_split_val / 8),
87 do_setup_log=False,
88 speculative_balancing=True,
89 )
90
91 xc, yc, zc = model.get_closest_part_to((0, 0, 0))
92
93 if shamrock.sys.world_rank() == 0:
94 print("closest part to (0,0,0) is in :", xc, yc, zc)
95
96 vol_b = (xM - xm) * (yM - ym) * (zM - zm)
97
98 totmass = rho_g * vol_b
99 # print("Total mass :", totmass)
100
101 pmass = model.total_mass_to_part_mass(totmass)
102
103 model.set_value_in_a_box("uint", "f64", 0, bmin, bmax)
104
105 rinj = 16 * dr
106 u_inj = 1
107 model.add_kernel_value("uint", "f64", u_inj, (0, 0, 0), rinj)
108
109 tot_u = pmass * model.get_sum("uint", "f64")
110 if shamrock.sys.world_rank() == 0:
111 print("total u :", tot_u)
112
113 # print("Current part mass :", pmass)
114 model.set_particle_mass(pmass)
115
116 model.set_cfl_cour(0.1)
117 model.set_cfl_force(0.1)
118
119 shamrock.backends.reset_mem_info_max()
120
121 # converge smoothing length and compute initial dt
122 model.timestep()
123
124 # Now run the actual benchmark for 5 steps
125 res_rates = []
126 res_cnts = []
127 res_system_metrics = []
128 res_mpi_timers = []
129
130 """
131 Here we insert callbacks to measure solver MPI usage by fetching the timers twice at the begining and end of the step
132 """
133 before_mpi_timers, after_mpi_timers = None, None
134
135 def callback_before_mpi_timer():
136 global before_mpi_timers
137 # print(shamrock.sys.world_rank(), "register before_mpi_timers")
138 before_mpi_timers = shamrock.comm.get_timers()
139
140 def callback_after_mpi_timer():
141 global after_mpi_timers
142 # print(shamrock.sys.world_rank(), "register after_mpi_timers")
143 after_mpi_timers = shamrock.comm.get_timers()
144
145 model.add_timestep_callback(
146 step_begin=callback_before_mpi_timer, step_end=callback_after_mpi_timer
147 )
148
149 for i in range(10):
150 if shamrock.sys.world_rank() == 0:
151 print("running step ", i + 1, "/", 10, " ...")
152
153 shamrock.sys.mpi_barrier()
154
155 # To replay the same step
156 model.set_next_dt(0.0)
157 model.timestep()
158
159 if shamrock.sys.world_rank() == 0:
160 print("collecting results ...")
161
162 tmp_res_rate, tmp_res_cnt, tmp_system_metrics = (
163 model.solver_logs_last_rate(),
164 model.solver_logs_last_obj_count(),
165 model.solver_logs_last_system_metrics(),
166 )
167 res_rates.append(tmp_res_rate)
168 res_cnts.append(tmp_res_cnt)
169 res_system_metrics.append(tmp_system_metrics)
170 res_mpi_timers.append(shamrock.comm.mpi_timers_delta(before_mpi_timers, after_mpi_timers))
171
172 if shamrock.sys.world_rank() == 0:
173 print("sleeping 1 second ...")
174
175 import time
176
177 time.sleep(1)
178
179 if shamrock.sys.world_rank() == 0:
180 print("done sleeping 1 second ...")
181
182 # result is the best rate of the 5 steps
183 res_rate, res_cnt = max(res_rates), res_cnts[0]
184
185 # index of the max rate
186 max_rate_index = res_rates.index(max(res_rates))
187 max_rate_system_metrics = res_system_metrics[max_rate_index]
188 max_mpi_timers = res_mpi_timers[max_rate_index]
189 step_time = res_cnt / res_rate
190
191 if shamrock.sys.world_rank() == 0:
192 result_text += f"--- final score for N_target_base={N_target_base} ---"
193 result_text += f"world size : {shamrock.sys.world_size()}\n"
194 result_text += f"result rate : {res_rate}\n"
195 result_text += f"result cnt : {res_cnt}\n"
196 result_text += f"cnt/rank : {res_cnt / shamrock.sys.world_size()}\n"
197 result_text += f"result rate per rank : {res_rate / shamrock.sys.world_size()}\n"
198 result_text += f"rates infos : max={max(res_rates)}, min={min(res_rates)}, mean={mean(res_rates)}, stddev={stdev(res_rates)}\n"
199 result_text += f"res_rates = {res_rates}\n"
200 result_text += f"res_cnts = {res_cnts}\n"
201 result_text += f"step time = {step_time}\n"
202
203 dic_out = {
204 "world_size": shamrock.sys.world_size(),
205 "rate": res_rate,
206 "cnt": res_cnt,
207 "step_time": step_time,
208 "mpi_timers": max_mpi_timers,
209 }
210
211 # print the system metrics
212 metrics_duration = max_rate_system_metrics["duration"]
213 result_text += "system metrics:\n"
214 for key, value in max_rate_system_metrics.items():
215 if not key == "duration":
216 result_text += f"{key}: {value} J\n"
217 dic_out[key] = value
218
219 for key, value in max_rate_system_metrics.items():
220 if not key == "duration":
221 result_text += f"avg power {key} / step time : {value / metrics_duration} W\n"
222 dic_out[f"power_{key}"] = value / metrics_duration
223
224 dic_out["system_metric_duration"] = metrics_duration
225
226 result_text += f"dic_out = {dic_out}\n"
227
228 print("current results:")
229 print(result_text)
230
231 if shamrock.sys.world_rank() == 0:
232 print(result_text)
Estimated memory usage: 0 MB