Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
numeric.hpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
10#pragma once
11
20#include "shambase/assert.hpp"
21#include "shambase/memory.hpp"
26#include "shambackends/sycl.hpp"
27
32namespace shamalgs::numeric {
33
43 template<class T>
44 sycl::buffer<T> scan_exclusive(sycl::queue &q, sycl::buffer<T> &buf1, u32 len);
45
54 template<class T>
56 sham::DeviceScheduler_ptr sched, sham::DeviceBuffer<T> &buf1, u32 len);
57
58 template<class T>
59 sycl::buffer<T> scan_inclusive(sycl::queue &q, sycl::buffer<T> &buf1, u32 len);
60
61 template<class T>
62 void scan_exclusive_in_place(sycl::queue &q, sycl::buffer<T> &buf, u32 len);
63
64 template<class T>
65 void scan_inclusive_in_place(sycl::queue &q, sycl::buffer<T> &buf, u32 len);
66
75 std::tuple<std::optional<sycl::buffer<u32>>, u32> stream_compact(
76 sycl::queue &q, sycl::buffer<u32> &buf_flags, u32 len);
77
87 const sham::DeviceScheduler_ptr &sched, sham::DeviceBuffer<u32> &buf_flags, u32 len);
88
89 template<class T>
92 sham::DeviceBuffer<T> bins_center;
93 sham::DeviceBuffer<T> bins_width;
94 };
95
130 template<class Tret, class T>
132 const sham::DeviceScheduler_ptr &sched,
133 const sham::DeviceBuffer<T> &bin_edges,
134 u64 nbins,
135 const sham::DeviceBuffer<T> &values,
136 u32 len);
137
138 template<class T>
139 inline sham::DeviceBuffer<u64> device_histogram_u64(
140 const sham::DeviceScheduler_ptr &sched,
141 const sham::DeviceBuffer<T> &bin_edges,
142 u64 nbins,
143 const sham::DeviceBuffer<T> &values,
144 u32 len) {
145 return device_histogram<u64, T>(sched, bin_edges, nbins, values, len);
146 }
147
148 template<class T>
149 inline sham::DeviceBuffer<u32> device_histogram_u32(
150 const sham::DeviceScheduler_ptr &sched,
151 const sham::DeviceBuffer<T> &bin_edges,
152 u64 nbins,
153 const sham::DeviceBuffer<T> &values,
154 u32 len) {
155 return device_histogram<u32, T>(sched, bin_edges, nbins, values, len);
156 }
157
173 template<class T>
175 const sham::DeviceScheduler_ptr &sched,
176 const sham::DeviceBuffer<T> &bin_edges,
177 u64 nbins,
178 const sham::DeviceBuffer<T> &values,
179 u32 len) {
180
181 SHAM_ASSERT(nbins > 1); // at least a sup and a inf
182 SHAM_ASSERT(bin_edges.get_size() == nbins + 1);
183
184 auto &q = shambase::get_check_ref(sched).get_queue();
185
186 sham::DeviceBuffer<u64> counts = device_histogram(sched, bin_edges, nbins, values, len);
187
188 sham::DeviceBuffer<T> bins_center(nbins, sched);
189 sham::DeviceBuffer<T> bins_width(nbins, sched);
190
192 q,
193 sham::MultiRef{bin_edges},
194 sham::MultiRef{bins_center, bins_width},
195 nbins,
196 [](u32 i,
197 const T *__restrict bin_edges,
198 T *__restrict bins_center,
199 T *__restrict bins_width) {
200 bins_center[i] = (bin_edges[i] + bin_edges[i + 1]) / 2;
201 bins_width[i] = bin_edges[i + 1] - bin_edges[i];
202 });
203
204 return {std::move(counts), std::move(bins_center), std::move(bins_width)};
205 }
206
215 template<class T>
221
250 template<class T>
252 const sham::DeviceScheduler_ptr &sched,
253 const sham::DeviceBuffer<T> &bin_edges,
254 u64 nbins,
255 const sham::DeviceBuffer<T> &values, // ie f(r)
256 const sham::DeviceBuffer<T> &keys, // ie r
257 u32 len);
258
295 template<class Tret, class T, class Fct>
297 const sham::DeviceScheduler_ptr &sched,
298 const sham::DeviceBuffer<T> &bin_edges,
299 u64 nbins,
300 const sham::DeviceBuffer<T> &values, // ie f(r)
301 const sham::DeviceBuffer<T> &keys, // ie r
302 u32 len,
303 Fct &&fct) {
304
305 auto bin_compute_in = binned_init_compute(sched, bin_edges, nbins, values, keys, len);
306 auto &valid_values = bin_compute_in.valid_values;
307 auto &offsets_bins = bin_compute_in.offsets_bins;
308
309 auto &q = shambase::get_check_ref(sched).get_queue();
310
311 sham::DeviceBuffer<Tret> bin_compute(nbins, sched);
312
314 q,
315 sham::MultiRef{valid_values, offsets_bins},
316 sham::MultiRef{bin_compute},
317 nbins,
318 [fct](
319 u32 i,
320 const T *__restrict valid_values,
321 const u32 *__restrict offsets_bins,
322 Tret *__restrict bin_averages) {
323 u32 bin_start = offsets_bins[i];
324 u32 bin_end = offsets_bins[i + 1];
325 u32 bin_count = bin_end - bin_start;
326
327 auto for_each_values = [&](auto func) {
328 for (u32 j = bin_start; j < bin_end; j++) {
329 func(valid_values[j]);
330 }
331 };
332
333 bin_averages[i] = fct(for_each_values, bin_count);
334 });
335
336 return bin_compute;
337 }
338
365 template<class T>
367 const sham::DeviceScheduler_ptr &sched,
368 const sham::DeviceBuffer<T> &bin_edges, // r bins
369 u64 nbins,
370 const sham::DeviceBuffer<T> &values, // ie f(r)
371 const sham::DeviceBuffer<T> &keys, // ie r
372 u32 len) {
373
374 return binned_compute<T, T>(
375 sched, bin_edges, nbins, values, keys, len, [](auto for_each_values, u32 bin_count) {
376 T sum = 0;
377 for_each_values([&](T val) {
378 sum += val;
379 });
380 return sum;
381 });
382 }
383
410 template<class T>
412 const sham::DeviceScheduler_ptr &sched,
413 const sham::DeviceBuffer<T> &bin_edges, // r bins
414 u64 nbins,
415 const sham::DeviceBuffer<T> &values, // ie f(r)
416 const sham::DeviceBuffer<T> &keys, // ie r
417 u32 len) {
418
419 return binned_compute<T, T>(
420 sched,
421 bin_edges,
422 nbins,
423 values,
424 keys,
425 len,
426 [](auto for_each_values, u32 bin_count) -> T {
427 T sum = T{};
428 for_each_values([&](T val) {
429 sum += val;
430 });
431 if (bin_count == 0) {
432 return T{};
433 } else {
434 return sum / bin_count;
435 }
436 });
437 }
438
474 template<class Tret, class T>
476 const sham::DeviceScheduler_ptr &sched,
477 const sham::DeviceBuffer<T> &bin_edges,
478 u64 nbins,
479 const sham::DeviceBuffer<T> &values,
480 u32 len) {
481
482 auto local_counts = device_histogram<Tret, T>(sched, bin_edges, nbins, values, len);
483
484 // local counts are now the global counts
485 shamalgs::collective::reduce_buffer_in_place_sum(local_counts, MPI_COMM_WORLD);
486
487 return local_counts;
488 }
489
520 template<class T>
522 const sham::DeviceScheduler_ptr &sched,
523 const sham::DeviceBuffer<T> &bin_edges,
524 u64 nbins,
525 const sham::DeviceBuffer<T> &values,
526 u32 len) {
527 return device_histogram_mpi<u64, T>(sched, bin_edges, nbins, values, len);
528 }
529
560 template<class T>
562 const sham::DeviceScheduler_ptr &sched,
563 const sham::DeviceBuffer<T> &bin_edges,
564 u64 nbins,
565 const sham::DeviceBuffer<T> &values,
566 u32 len) {
567 return device_histogram_mpi<u32, T>(sched, bin_edges, nbins, values, len);
568 }
569
605 template<class T>
607 const sham::DeviceScheduler_ptr &sched,
608 const sham::DeviceBuffer<T> &bin_edges, // r bins
609 u64 nbins,
610 const sham::DeviceBuffer<T> &values, // ie f(r)
611 const sham::DeviceBuffer<T> &keys, // ie r
612 u32 len) {
613
614 auto local_result = binned_sum(sched, bin_edges, nbins, values, keys, len);
615
616 // local result is now the global result
617 shamalgs::collective::reduce_buffer_in_place_sum(local_result, MPI_COMM_WORLD);
618
619 return local_result;
620 }
621
658 template<class T>
660 const sham::DeviceScheduler_ptr &sched,
661 const sham::DeviceBuffer<T> &bin_edges, // r bins
662 u64 nbins,
663 const sham::DeviceBuffer<T> &values, // ie f(r)
664 const sham::DeviceBuffer<T> &keys, // ie r
665 u32 len,
666 const sham::DeviceBuffer<u32> &bin_counts_global) {
667
668 auto bin_sums = binned_sum_mpi(sched, bin_edges, nbins, values, keys, len);
669
671 shambase::get_check_ref(sched).get_queue(),
672 sham::MultiRef{bin_counts_global},
673 sham::MultiRef{bin_sums},
674 nbins,
675 [](u32 i, const u32 *__restrict bin_counts, T *__restrict bin_sums) {
676 u32 bin_count = bin_counts[i];
677 if (bin_count == 0) {
678 bin_sums[i] = T{};
679 } else {
680 bin_sums[i] /= bin_count;
681 }
682 });
683
684 return bin_sums;
685 }
686
722 template<class T>
724 const sham::DeviceScheduler_ptr &sched,
725 const sham::DeviceBuffer<T> &bin_edges, // r bins
726 u64 nbins,
727 const sham::DeviceBuffer<T> &values, // ie f(r)
728 const sham::DeviceBuffer<T> &keys, // ie r
729 u32 len) {
730
731 auto bin_counts = device_histogram_u32_mpi(sched, bin_edges, nbins, keys, len);
732
733 // call the version with global bin counts pre-computed
734 return binned_average_mpi(sched, bin_edges, nbins, values, keys, len, bin_counts);
735 }
736
752 template<class T>
754 const sham::DeviceScheduler_ptr &sched,
755 const sham::DeviceBuffer<T> &bin_edges,
756 u64 nbins,
757 const sham::DeviceBuffer<T> &values,
758 u32 len) {
759
760 SHAM_ASSERT(nbins > 1); // at least a sup and a inf
761 SHAM_ASSERT(bin_edges.get_size() == nbins + 1);
762
763 auto &q = shambase::get_check_ref(sched).get_queue();
764
766 = device_histogram_u64_mpi(sched, bin_edges, nbins, values, len);
767
768 sham::DeviceBuffer<T> bins_center(nbins, sched);
769 sham::DeviceBuffer<T> bins_width(nbins, sched);
770
772 q,
773 sham::MultiRef{bin_edges},
774 sham::MultiRef{bins_center, bins_width},
775 nbins,
776 [](u32 i,
777 const T *__restrict bin_edges,
778 T *__restrict bins_center,
779 T *__restrict bins_width) {
780 bins_center[i] = (bin_edges[i] + bin_edges[i + 1]) / 2;
781 bins_width[i] = bin_edges[i + 1] - bin_edges[i];
782 });
783
784 return {std::move(counts), std::move(bins_center), std::move(bins_width)};
785 }
786
787} // namespace shamalgs::numeric
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
Shamrock assertion utility.
#define SHAM_ASSERT(x)
Shorthand for SHAM_ASSERT_NAMED without a message.
Definition assert.hpp:67
A buffer allocated in USM (Unified Shared Memory)
size_t get_size() const
Gets the number of elements in the buffer.
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
namespace containing the numeric algorithms of shamalgs
BinnedCompute< T > binned_init_compute(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
Prepare binned data for per-bin computation.
Definition numeric.cpp:183
sham::DeviceBuffer< u32 > device_histogram_u32_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
Compute the u32 histogram of values between bin_edges across all MPI ranks.
Definition numeric.hpp:561
sham::DeviceBuffer< Tret > device_histogram(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
Compute the histogram of values between bin_edges.
Definition numeric.cpp:95
sham::DeviceBuffer< Tret > device_histogram_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
Compute the histogram of values between bin_edges across all MPI ranks.
Definition numeric.hpp:475
sham::DeviceBuffer< T > binned_sum(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
Compute the sum of values in each bin.
Definition numeric.hpp:366
sham::DeviceBuffer< T > binned_sum_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
Compute the sum of values in each bin across all MPI ranks.
Definition numeric.hpp:606
sham::DeviceBuffer< T > binned_average_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len, const sham::DeviceBuffer< u32 > &bin_counts_global)
Compute the average of values in each bin across all MPI ranks (with pre-computed global counts).
Definition numeric.hpp:659
sycl::buffer< T > scan_exclusive(sycl::queue &q, sycl::buffer< T > &buf1, u32 len)
Computes the exclusive sum of elements in a SYCL buffer.
Definition numeric.cpp:35
std::tuple< std::optional< sycl::buffer< u32 > >, u32 > stream_compact(sycl::queue &q, sycl::buffer< u32 > &buf_flags, u32 len)
Stream compaction algorithm.
Definition numeric.cpp:84
sham::DeviceBuffer< T > binned_average(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
Compute the average of values in each bin.
Definition numeric.hpp:411
histogram_result< T > device_histogram_full_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
Compute the histogram and bin properties (center, width) for a set of values and bin edges.
Definition numeric.hpp:753
sham::DeviceBuffer< u64 > device_histogram_u64_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
Compute the u64 histogram of values between bin_edges across all MPI ranks.
Definition numeric.hpp:521
sham::DeviceBuffer< Tret > binned_compute(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len, Fct &&fct)
Perform a custom reduction or computation over values in each bin.
Definition numeric.hpp:296
histogram_result< T > device_histogram_full(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
Compute the histogram and bin properties (center, width) for a set of values and bin edges.
Definition numeric.hpp:174
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110
A class that references multiple buffers or similar objects.
Structure holding the result of binning values for further computation.
Definition numeric.hpp:216
sham::DeviceBuffer< u32 > offsets_bins
Offsets for each bin (size nbins+1).
Definition numeric.hpp:219
sham::DeviceBuffer< T > valid_values
Values that are within the bin range, sorted by bin.
Definition numeric.hpp:218