Note
Go to the end to download the full example code.
Weak scale test for SPH#
This example tests the weak scalability performance of the SPH solver. It is based on a Sedov blast wave simulation, the number of particles is scaled with the number of processes. Run it only if you have enough memory available.
10 from statistics import mean, stdev
11
12 import shamrock
13
14 result_text = ""
15
16 for N_target_base in [32e6]:
17 gamma = 5.0 / 3.0
18 rho_g = 1
19 target_tot_u = 1
20
21 bmin = (-0.6, -0.6, -0.6)
22 bmax = (0.6, 0.6, 0.6)
23
24 compute_multiplier = shamrock.sys.world_size()
25 # compute_multiplier = 12
26 scheduler_split_val = int(2e7)
27 scheduler_merge_val = int(1)
28
29 N_target = N_target_base * compute_multiplier
30 xm, ym, zm = bmin
31 xM, yM, zM = bmax
32 vol_b = (xM - xm) * (yM - ym) * (zM - zm)
33
34 if shamrock.sys.world_rank() == 0:
35 print("N_target_base", N_target_base)
36 print("compute_multiplier", compute_multiplier)
37 print("scheduler_split_val", scheduler_split_val)
38 print("scheduler_merge_val", scheduler_merge_val)
39 print("N_target", N_target)
40 print("vol_b", vol_b)
41
42 part_vol = vol_b / N_target
43
44 # lattice volume
45 part_vol_lattice = 0.74 * part_vol
46
47 dr = (part_vol_lattice / ((4.0 / 3.0) * 3.1416)) ** (1.0 / 3.0)
48
49 pmass = -1
50
51 ctx = shamrock.Context()
52 ctx.pdata_layout_new()
53
54 model = shamrock.get_Model_SPH(context=ctx, vector_type="f64_3", sph_kernel="M4")
55
56 cfg = model.gen_default_config()
57 # cfg.set_artif_viscosity_Constant(alpha_u = 1, alpha_AV = 1, beta_AV = 2)
58 # cfg.set_artif_viscosity_VaryingMM97(alpha_min = 0.1,alpha_max = 1,sigma_decay = 0.1, alpha_u = 1, beta_AV = 2)
59 cfg.set_artif_viscosity_VaryingCD10(
60 alpha_min=0.0, alpha_max=1, sigma_decay=0.1, alpha_u=1, beta_AV=2
61 )
62 cfg.set_boundary_periodic()
63 cfg.set_eos_adiabatic(gamma)
64 cfg.set_max_neigh_cache_size(int(100e9))
65 cfg.print_status()
66 model.set_solver_config(cfg)
67 model.init_scheduler(scheduler_split_val, scheduler_merge_val)
68
69 bmin, bmax = model.get_ideal_hcp_box(dr, bmin, bmax)
70 xm, ym, zm = bmin
71 xM, yM, zM = bmax
72
73 model.resize_simulation_box(bmin, bmax)
74
75 setup = model.get_setup()
76 gen = setup.make_generator_lattice_hcp(dr, bmin, bmax)
77
78 # Kind of optimized for Aurora
79 setup.apply_setup(
80 gen,
81 gen_step=int(scheduler_split_val / 8),
82 insert_step=int(scheduler_split_val * 2),
83 msg_count_limit=1024,
84 rank_comm_size_limit=int(scheduler_split_val) * 2,
85 max_msg_size=int(scheduler_split_val / 8),
86 do_setup_log=False,
87 )
88
89 xc, yc, zc = model.get_closest_part_to((0, 0, 0))
90
91 if shamrock.sys.world_rank() == 0:
92 print("closest part to (0,0,0) is in :", xc, yc, zc)
93
94 vol_b = (xM - xm) * (yM - ym) * (zM - zm)
95
96 totmass = rho_g * vol_b
97 # print("Total mass :", totmass)
98
99 pmass = model.total_mass_to_part_mass(totmass)
100
101 model.set_value_in_a_box("uint", "f64", 0, bmin, bmax)
102
103 rinj = 8 * dr
104 u_inj = 1
105 model.add_kernel_value("uint", "f64", u_inj, (0, 0, 0), rinj)
106
107 tot_u = pmass * model.get_sum("uint", "f64")
108 if shamrock.sys.world_rank() == 0:
109 print("total u :", tot_u)
110
111 # print("Current part mass :", pmass)
112 model.set_particle_mass(pmass)
113
114 model.set_cfl_cour(0.1)
115 model.set_cfl_force(0.1)
116
117 model.set_cfl_multipler(1e-4)
118 model.set_cfl_mult_stiffness(1e6)
119
120 # converge smoothing length and compute initial dt
121 model.timestep()
122
123 # Now run the actual benchmark for 5 steps
124 res_rates = []
125 res_cnts = []
126 res_system_metrics = []
127
128 for i in range(5):
129 shamrock.sys.mpi_barrier()
130 model.timestep()
131
132 tmp_res_rate, tmp_res_cnt, tmp_system_metrics = (
133 model.solver_logs_last_rate(),
134 model.solver_logs_last_obj_count(),
135 model.solver_logs_last_system_metrics(),
136 )
137 res_rates.append(tmp_res_rate)
138 res_cnts.append(tmp_res_cnt)
139 res_system_metrics.append(tmp_system_metrics)
140
141 # result is the best rate of the 5 steps
142 res_rate, res_cnt = max(res_rates), res_cnts[0]
143
144 # index of the max rate
145 max_rate_index = res_rates.index(max(res_rates))
146 max_rate_system_metrics = res_system_metrics[max_rate_index]
147
148 step_time = res_cnt / res_rate
149
150 if shamrock.sys.world_rank() == 0:
151 result_text += f"--- final score for N_target_base={N_target_base} ---"
152 result_text += f"world size : {shamrock.sys.world_size()}\n"
153 result_text += f"result rate : {res_rate}\n"
154 result_text += f"result cnt : {res_cnt}\n"
155 result_text += f"cnt/rank : {res_cnt / shamrock.sys.world_size()}\n"
156 result_text += f"result rate per rank : {res_rate / shamrock.sys.world_size()}\n"
157 result_text += f"rates infos : max={max(res_rates)}, min={min(res_rates)}, mean={mean(res_rates)}, stddev={stdev(res_rates)}\n"
158 result_text += f"res_rates = {res_rates}\n"
159 result_text += f"res_cnts = {res_cnts}\n"
160 result_text += f"step time = {step_time}\n"
161
162 # print the system metrics
163 result_text += "system metrics:\n"
164 for key, value in max_rate_system_metrics.items():
165 result_text += f"{key}: {value} J\n"
166
167 for key, value in max_rate_system_metrics.items():
168 result_text += f"avg power {key} / step time : {value / step_time} W\n"
169
170 print("current results:")
171 print(result_text)
172
173 if shamrock.sys.world_rank() == 0:
174 print(result_text)
Estimated memory usage: 0 MB