Weak scale test for SPH#

This example tests the weak scalability performance of the SPH solver. It is based on a Sedov blast wave simulation, the number of particles is scaled with the number of processes. Run it only if you have enough memory available.

 10 from statistics import mean, stdev
 11
 12 import shamrock
 13
 14 result_text = ""
 15
 16 for N_target_base in [32e6]:
 17     shamrock.backends.reset_mem_info_max()
 18
 19     gamma = 5.0 / 3.0
 20     rho_g = 1
 21     target_tot_u = 1
 22
 23     bmin = (-0.6, -0.6, -0.6)
 24     bmax = (0.6, 0.6, 0.6)
 25
 26     compute_multiplier = shamrock.sys.world_size()
 27     # compute_multiplier = 12
 28     scheduler_split_val = int(2e7)
 29     scheduler_merge_val = int(1)
 30
 31     N_target = N_target_base * compute_multiplier
 32     xm, ym, zm = bmin
 33     xM, yM, zM = bmax
 34     vol_b = (xM - xm) * (yM - ym) * (zM - zm)
 35
 36     if shamrock.sys.world_rank() == 0:
 37         print("N_target_base", N_target_base)
 38         print("compute_multiplier", compute_multiplier)
 39         print("scheduler_split_val", scheduler_split_val)
 40         print("scheduler_merge_val", scheduler_merge_val)
 41         print("N_target", N_target)
 42         print("vol_b", vol_b)
 43
 44     part_vol = vol_b / N_target
 45
 46     # lattice volume
 47     part_vol_lattice = 0.74 * part_vol
 48
 49     dr = (part_vol_lattice / ((4.0 / 3.0) * 3.1416)) ** (1.0 / 3.0)
 50
 51     pmass = -1
 52
 53     ctx = shamrock.Context()
 54     ctx.pdata_layout_new()
 55
 56     model = shamrock.get_Model_SPH(context=ctx, vector_type="f64_3", sph_kernel="M4")
 57
 58     cfg = model.gen_default_config()
 59     # cfg.set_artif_viscosity_Constant(alpha_u = 1, alpha_AV = 1, beta_AV = 2)
 60     # cfg.set_artif_viscosity_VaryingMM97(alpha_min = 0.1,alpha_max = 1,sigma_decay = 0.1, alpha_u = 1, beta_AV = 2)
 61     cfg.set_artif_viscosity_VaryingCD10(
 62         alpha_min=0.0, alpha_max=1, sigma_decay=0.1, alpha_u=1, beta_AV=2
 63     )
 64     cfg.set_boundary_periodic()
 65     cfg.set_eos_adiabatic(gamma)
 66     cfg.print_status()
 67     model.set_solver_config(cfg)
 68     model.init_scheduler(scheduler_split_val, scheduler_merge_val)
 69
 70     bmin, bmax = model.get_ideal_hcp_box(dr, bmin, bmax)
 71     xm, ym, zm = bmin
 72     xM, yM, zM = bmax
 73
 74     model.resize_simulation_box(bmin, bmax)
 75
 76     setup = model.get_setup()
 77     gen = setup.make_generator_lattice_hcp(dr, bmin, bmax, discontinuous=False)
 78
 79     # Kind of optimized for Aurora
 80     setup.apply_setup(
 81         gen,
 82         gen_step=int(scheduler_split_val / 8),
 83         insert_step=int(scheduler_split_val * 2),
 84         msg_count_limit=1024,
 85         rank_comm_size_limit=int(scheduler_split_val) * 2,
 86         max_msg_size=int(scheduler_split_val / 8),
 87         do_setup_log=False,
 88         speculative_balancing=True,
 89     )
 90
 91     xc, yc, zc = model.get_closest_part_to((0, 0, 0))
 92
 93     if shamrock.sys.world_rank() == 0:
 94         print("closest part to (0,0,0) is in :", xc, yc, zc)
 95
 96     vol_b = (xM - xm) * (yM - ym) * (zM - zm)
 97
 98     totmass = rho_g * vol_b
 99     # print("Total mass :", totmass)
100
101     pmass = model.total_mass_to_part_mass(totmass)
102
103     model.set_value_in_a_box("uint", "f64", 0, bmin, bmax)
104
105     rinj = 16 * dr
106     u_inj = 1
107     model.add_kernel_value("uint", "f64", u_inj, (0, 0, 0), rinj)
108
109     tot_u = pmass * model.get_sum("uint", "f64")
110     if shamrock.sys.world_rank() == 0:
111         print("total u :", tot_u)
112
113     # print("Current part mass :", pmass)
114     model.set_particle_mass(pmass)
115
116     model.set_cfl_cour(0.1)
117     model.set_cfl_force(0.1)
118
119     shamrock.backends.reset_mem_info_max()
120
121     # converge smoothing length and compute initial dt
122     model.timestep()
123
124     # Now run the actual benchmark for 5 steps
125     res_rates = []
126     res_cnts = []
127     res_system_metrics = []
128     res_mpi_timers = []
129
130     """
131     Here we insert callbacks to measure solver MPI usage by fetching the timers twice at the begining and end of the step
132     """
133     before_mpi_timers, after_mpi_timers = None, None
134
135     def callback_before_mpi_timer():
136         global before_mpi_timers
137         # print(shamrock.sys.world_rank(), "register before_mpi_timers")
138         before_mpi_timers = shamrock.comm.get_timers()
139
140     def callback_after_mpi_timer():
141         global after_mpi_timers
142         # print(shamrock.sys.world_rank(), "register after_mpi_timers")
143         after_mpi_timers = shamrock.comm.get_timers()
144
145     model.add_timestep_callback(
146         step_begin=callback_before_mpi_timer, step_end=callback_after_mpi_timer
147     )
148
149     for i in range(10):
150         if shamrock.sys.world_rank() == 0:
151             print("running step ", i + 1, "/", 10, " ...")
152
153         shamrock.sys.mpi_barrier()
154
155         # To replay the same step
156         model.set_next_dt(0.0)
157         model.timestep()
158
159         if shamrock.sys.world_rank() == 0:
160             print("collecting results ...")
161
162         tmp_res_rate, tmp_res_cnt, tmp_system_metrics = (
163             model.solver_logs_last_rate(),
164             model.solver_logs_last_obj_count(),
165             model.solver_logs_last_system_metrics(),
166         )
167         res_rates.append(tmp_res_rate)
168         res_cnts.append(tmp_res_cnt)
169         res_system_metrics.append(tmp_system_metrics)
170         res_mpi_timers.append(shamrock.comm.mpi_timers_delta(before_mpi_timers, after_mpi_timers))
171
172         if shamrock.sys.world_rank() == 0:
173             print("sleeping 1 second ...")
174
175         import time
176
177         time.sleep(1)
178
179         if shamrock.sys.world_rank() == 0:
180             print("done sleeping 1 second ...")
181
182     # result is the best rate of the 5 steps
183     res_rate, res_cnt = max(res_rates), res_cnts[0]
184
185     # index of the max rate
186     max_rate_index = res_rates.index(max(res_rates))
187     max_rate_system_metrics = res_system_metrics[max_rate_index]
188     max_mpi_timers = res_mpi_timers[max_rate_index]
189     step_time = res_cnt / res_rate
190
191     if shamrock.sys.world_rank() == 0:
192         result_text += f"--- final score for N_target_base={N_target_base} ---"
193         result_text += f"world size  : {shamrock.sys.world_size()}\n"
194         result_text += f"result rate : {res_rate}\n"
195         result_text += f"result cnt  : {res_cnt}\n"
196         result_text += f"cnt/rank    : {res_cnt / shamrock.sys.world_size()}\n"
197         result_text += f"result rate per rank : {res_rate / shamrock.sys.world_size()}\n"
198         result_text += f"rates infos : max={max(res_rates)}, min={min(res_rates)}, mean={mean(res_rates)}, stddev={stdev(res_rates)}\n"
199         result_text += f"res_rates = {res_rates}\n"
200         result_text += f"res_cnts = {res_cnts}\n"
201         result_text += f"step time = {step_time}\n"
202
203         dic_out = {
204             "world_size": shamrock.sys.world_size(),
205             "rate": res_rate,
206             "cnt": res_cnt,
207             "step_time": step_time,
208             "mpi_timers": max_mpi_timers,
209         }
210
211         # print the system metrics
212         metrics_duration = max_rate_system_metrics["duration"]
213         result_text += "system metrics:\n"
214         for key, value in max_rate_system_metrics.items():
215             if not key == "duration":
216                 result_text += f"{key}: {value} J\n"
217                 dic_out[key] = value
218
219         for key, value in max_rate_system_metrics.items():
220             if not key == "duration":
221                 result_text += f"avg power {key} / step time : {value / metrics_duration} W\n"
222                 dic_out[f"power_{key}"] = value / metrics_duration
223
224         dic_out["system_metric_duration"] = metrics_duration
225
226         result_text += f"dic_out = {dic_out}\n"
227
228         print("current results:")
229         print(result_text)
230
231 if shamrock.sys.world_rank() == 0:
232     print(result_text)

Estimated memory usage: 0 MB

Gallery generated by Sphinx-Gallery