is_all_true performance benchmarks#

This example benchmarks the is_all_true performance for the different algorithms available in Shamrock

 import random
 import time

 import matplotlib.colors as colors
 import matplotlib.pyplot as plt
 import numpy as np

 import shamrock

 # If we use the shamrock executable to run this script instead of the python interpreter,
 # we should not initialize the system as the shamrock executable needs to handle specific MPI logic
 if not shamrock.sys.is_initialized():
     shamrock.change_loglevel(1)
     shamrock.sys.init("0:0")

Main benchmark functions

 def benchmark_is_all_true_random(N, nb_repeat=10):
     times = []
     for i in range(nb_repeat):
         random.seed(111)
         buf = shamrock.algs.mock_buffer_u8(random.randint(0, 1000000), N, 0, 1)
         times.append(shamrock.algs.benchmark_is_all_true(buf, N))
     return min(times), max(times), sum(times) / nb_repeat


 def benchmark_is_all_true_ones(N, nb_repeat=10):
     times = []
     for i in range(nb_repeat):
         buf = shamrock.backends.DeviceBuffer_u8()
         buf.resize(N)
         buf.fill(1)
         times.append(shamrock.algs.benchmark_is_all_true(buf, N))
     return min(times), max(times), sum(times) / nb_repeat


 def benchmark_is_all_true_zeros(N, nb_repeat=10):
     times = []
     for i in range(nb_repeat):
         buf = shamrock.backends.DeviceBuffer_u8()
         buf.resize(N)
         buf.fill(0)
         times.append(shamrock.algs.benchmark_is_all_true(buf, N))
     return min(times), max(times), sum(times) / nb_repeat

Run the performance test for all parameters

 def run_performance_sweep():

     # Define parameter ranges
     # logspace as array
     particle_counts = np.logspace(2, 7, 20).astype(int).tolist()

     # Initialize results matrix
     results_random = []
     results_ones = []
     results_zeros = []

     print(f"Particle counts: {particle_counts}")

     total_runs = len(particle_counts)
     current_run = 0

     for i, N in enumerate(particle_counts):
         current_run += 1

         print(
             f"[{current_run:2d}/{total_runs}] Running N={N:5d}...",
             end=" ",
         )

         start_time = time.time()
         min_time, max_time, mean_time = benchmark_is_all_true_random(N)
         results_random.append(mean_time)
         min_time, max_time, mean_time = benchmark_is_all_true_ones(N)
         results_ones.append(mean_time)
         min_time, max_time, mean_time = benchmark_is_all_true_zeros(N)
         results_zeros.append(mean_time)
         elapsed = time.time() - start_time

         print(f"mean={mean_time:.3f}s (took {elapsed:.1f}s)")

     return particle_counts, results_random, results_ones, results_zeros

List current implementation

 current_impl = shamrock.algs.get_current_impl_is_all_true()

 print(current_impl)

impl_param(impl_name="host", params="")

List all implementations available

 all_default_impls = shamrock.algs.get_default_impl_list_is_all_true()

 print(all_default_impls)

[impl_param(impl_name="host", params=""), impl_param(impl_name="sum_reduction", params="")]

Run the performance benchmarks for all implementations

 for impl in all_default_impls:
     shamrock.algs.set_impl_is_all_true(impl.impl_name, impl.params)

     print(f"Running is_all_true performance benchmarks for {impl}...")

     # Run the performance sweep
     particle_counts, results_random, results_ones, results_zeros = run_performance_sweep()

     plt.plot(particle_counts, results_random, "--", label=impl.impl_name + " (random set)")
     plt.plot(particle_counts, results_ones, "--+", label=impl.impl_name + " (all ones)")
     plt.plot(particle_counts, results_zeros, "--o", label=impl.impl_name + " (all zeros)")


 Nobj = np.array(particle_counts)
 Time100M = Nobj / 1e8
 plt.plot(particle_counts, Time100M, color="grey", linestyle="-", alpha=0.7, label="100M obj/sec")


 plt.xlabel("Number of elements")
 plt.ylabel("Time (s)")
 plt.title("is_all_true performance benchmarks")

 plt.xscale("log")
 plt.yscale("log")

 plt.grid(True)

 plt.legend()
 plt.show()

Info: setting is_all_true implementation to impl : host                              [tree][rank=0]
Running is_all_true performance benchmarks for impl_param(impl_name="host", params="")...
Particle counts: [100, 183, 335, 615, 1128, 2069, 3792, 6951, 12742, 23357, 42813, 78475, 143844, 263665, 483293, 885866, 1623776, 2976351, 5455594, 10000000]
[ 1/20] Running N=  100... mean=0.000s (took 0.0s)
[ 2/20] Running N=  183... mean=0.000s (took 0.0s)
[ 3/20] Running N=  335... mean=0.000s (took 0.0s)
[ 4/20] Running N=  615... mean=0.000s (took 0.0s)
[ 5/20] Running N= 1128... mean=0.000s (took 0.0s)
[ 6/20] Running N= 2069... mean=0.000s (took 0.0s)
[ 7/20] Running N= 3792... mean=0.000s (took 0.0s)
[ 8/20] Running N= 6951... mean=0.000s (took 0.0s)
[ 9/20] Running N=12742... mean=0.000s (took 0.0s)
[10/20] Running N=23357... mean=0.000s (took 0.0s)
[11/20] Running N=42813... mean=0.000s (took 0.0s)
[12/20] Running N=78475... mean=0.000s (took 0.0s)
[13/20] Running N=143844... mean=0.000s (took 0.0s)
[14/20] Running N=263665... mean=0.000s (took 0.0s)
[15/20] Running N=483293... mean=0.000s (took 0.1s)
[16/20] Running N=885866... mean=0.000s (took 0.1s)
[17/20] Running N=1623776... mean=0.000s (took 0.3s)
[18/20] Running N=2976351... mean=0.000s (took 0.4s)
[19/20] Running N=5455594... mean=0.000s (took 0.6s)
[20/20] Running N=10000000... mean=0.001s (took 1.1s)
Info: setting is_all_true implementation to impl : sum_reduction                     [tree][rank=0]
Running is_all_true performance benchmarks for impl_param(impl_name="sum_reduction", params="")...
Particle counts: [100, 183, 335, 615, 1128, 2069, 3792, 6951, 12742, 23357, 42813, 78475, 143844, 263665, 483293, 885866, 1623776, 2976351, 5455594, 10000000]
[ 1/20] Running N=  100... mean=0.000s (took 0.0s)
[ 2/20] Running N=  183... mean=0.000s (took 0.0s)
[ 3/20] Running N=  335... mean=0.000s (took 0.0s)
[ 4/20] Running N=  615... mean=0.000s (took 0.0s)
[ 5/20] Running N= 1128... mean=0.000s (took 0.0s)
[ 6/20] Running N= 2069... mean=0.000s (took 0.0s)
[ 7/20] Running N= 3792... mean=0.000s (took 0.0s)
[ 8/20] Running N= 6951... mean=0.000s (took 0.0s)
[ 9/20] Running N=12742... mean=0.000s (took 0.0s)
[10/20] Running N=23357... mean=0.000s (took 0.0s)
[11/20] Running N=42813... mean=0.000s (took 0.0s)
[12/20] Running N=78475... mean=0.000s (took 0.0s)
[13/20] Running N=143844... mean=0.000s (took 0.0s)
[14/20] Running N=263665... mean=0.000s (took 0.0s)
[15/20] Running N=483293... mean=0.000s (took 0.1s)
[16/20] Running N=885866... mean=0.001s (took 0.1s)
[17/20] Running N=1623776... mean=0.001s (took 0.3s)
[18/20] Running N=2976351... mean=0.002s (took 0.5s)
[19/20] Running N=5455594... mean=0.004s (took 1.0s)
[20/20] Running N=10000000... mean=0.011s (took 1.8s)

Total running time of the script: (0 minutes 7.454 seconds)

Estimated memory usage: 174 MB

Gallery generated by Sphinx-Gallery

is_all_true performance benchmarks#

This Page