Note
Go to the end to download the full example code.
Compute histogram performance benchmarks#
This example benchmarks the compute histogram performance for the different algorithms available in Shamrock
9 import random
10 import time
11
12 import matplotlib.colors as colors
13 import matplotlib.pyplot as plt
14 import numpy as np
15
16 import shamrock
17
18 # If we use the shamrock executable to run this script instead of the python interpreter,
19 # we should not initialize the system as the shamrock executable needs to handle specific MPI logic
20 if not shamrock.sys.is_initialized():
21 shamrock.change_loglevel(1)
22 shamrock.sys.init("0:0")
27 impl_control = shamrock.algs.compute_histogram_impl()
28
29 print(impl_control.get_alg_name())
compute_histogram
False
35 default_config = impl_control.get_default_config()
36 print(f"Current config: {impl_control.get_config()}")
37 print(f"Default config: {default_config}")
38 print(f"Available configs: {impl_control.get_avail_configs()}")
Info: no autotuning registered for compute_histogram [Algs][rank=0]
Info: no autotuning registered for compute_histogram [Algs][rank=0]
Info: switching config for alg compute_histogram to cfg=naive_gpu [Algs][rank=0]
Current config: naive_gpu
Default config: naive_gpu
Available configs: ['reference', 'naive_gpu', 'gpu_team_fetching', 'gpu_oversubscribe']
41 bin_edges = np.linspace(0, 1, 2049)
42 bin_edge_inf = bin_edges[:-1]
43 bin_edge_sup = bin_edges[1:]
44 rng = np.random.default_rng()
45 positions = rng.random(int(1e6))
46
47 bin_edge_inf_f32 = bin_edge_inf.astype(np.float32)
48 bin_edge_sup_f32 = bin_edge_sup.astype(np.float32)
49 positions_f32 = positions.astype(np.float32)
50
51 buf_bin_edge_inf = shamrock.backends.DeviceBuffer_f64()
52 buf_bin_edge_sup = shamrock.backends.DeviceBuffer_f64()
53 buf_positions = shamrock.backends.DeviceBuffer_f64()
54
55 buf_bin_edge_inf.resize(len(bin_edge_inf))
56 buf_bin_edge_sup.resize(len(bin_edge_sup))
57 buf_positions.resize(len(positions))
58
59 buf_bin_edge_inf.copy_from_stdvec(bin_edge_inf)
60 buf_bin_edge_sup.copy_from_stdvec(bin_edge_sup)
61 buf_positions.copy_from_stdvec(positions)
62
63 buf_bin_edge_inf_f32 = shamrock.backends.DeviceBuffer_f32()
64 buf_bin_edge_sup_f32 = shamrock.backends.DeviceBuffer_f32()
65 buf_positions_f32 = shamrock.backends.DeviceBuffer_f32()
66
67 buf_bin_edge_inf_f32.resize(len(bin_edge_inf_f32))
68 buf_bin_edge_sup_f32.resize(len(bin_edge_sup_f32))
69 buf_positions_f32.resize(len(positions_f32))
70
71 buf_bin_edge_inf_f32.copy_from_stdvec(bin_edge_inf_f32)
72 buf_bin_edge_sup_f32.copy_from_stdvec(bin_edge_sup_f32)
73 buf_positions_f32.copy_from_stdvec(positions_f32)
76 results_f64 = {}
77 results_f32 = {}
78 avail_configs = impl_control.get_avail_configs()
79 for config in avail_configs:
80 impl_control.set_config(config)
81 time_f64 = shamrock.algs.benchmark_compute_histogram_basic_f64(
82 buf_bin_edge_inf, buf_bin_edge_sup, buf_positions
83 )
84 time_f32 = shamrock.algs.benchmark_compute_histogram_basic_f32(
85 buf_bin_edge_inf_f32, buf_bin_edge_sup_f32, buf_positions_f32
86 )
87 print(f"Config: {config}, Time f64: {time_f64 * 1000}ms, Time f32: {time_f32 * 1000}ms")
88 results_f64[config] = time_f64 * 1000
89 results_f32[config] = time_f32 * 1000
Info: switching config for alg compute_histogram to cfg=reference [Algs][rank=0]
Config: reference, Time f64: 2371.757527ms, Time f32: 2366.8604170000003ms
Info: switching config for alg compute_histogram to cfg=naive_gpu [Algs][rank=0]
Config: naive_gpu, Time f64: 894.8318885000001ms, Time f32: 893.2677315000001ms
Info: switching config for alg compute_histogram to cfg=gpu_team_fetching [Algs][rank=0]
Config: gpu_team_fetching, Time f64: 1212.12335ms, Time f32: 1252.430123ms
Info: switching config for alg compute_histogram to cfg=gpu_oversubscribe [Algs][rank=0]
Config: gpu_oversubscribe, Time f64: 2985.355484ms, Time f32: 2296.522044ms
plot the histogram
93 result = shamrock.algs.compute_histogram_basic_f64(
94 buf_bin_edge_inf, buf_bin_edge_sup, buf_positions
95 )
96 plt.plot(result.copy_to_stdvec())
97 plt.show()

plot the results
101 plt.figure(layout="constrained")
102
103 configs = list(results_f64.keys())
104 vals_f64 = [results_f64[c] for c in configs]
105 vals_f32 = [results_f32[c] for c in configs]
106 x = np.arange(len(configs))
107 bar_w = 0.35
108 plt.bar(x - bar_w / 2, vals_f64, bar_w, label="f64")
109 plt.bar(x + bar_w / 2, vals_f32, bar_w, label="f32")
110 plt.xticks(x, configs, rotation=45, ha="right")
111 for tick_label, cfg in zip(plt.gca().get_xticklabels(), configs):
112 if cfg == default_config:
113 tick_label.set_color("red")
114
115 plt.ylabel("Time (ms)")
116 plt.yscale("log")
117
118 _ymin, _ymax = plt.gca().get_ylim()
119 _ymin = 10 ** int(np.floor(np.log10(_ymin)))
120 _ymax = 10 ** int(np.ceil(np.log10(_ymax)))
121 plt.ylim(_ymin, _ymax * 1.1)
122
123 plt.title("Compute histogram performance benchmarks")
124 plt.legend()
125 plt.grid(True, alpha=0.3)
126 plt.show()

Total running time of the script: (0 minutes 34.897 seconds)
Estimated memory usage: 157 MB