Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
Classes | Functions
shamalgs::numeric Namespace Reference

namespace containing the numeric algorithms of shamalgs More...

Classes

struct  BinnedCompute
 Structure holding the result of binning values for further computation. More...
 
struct  histogram_result
 

Functions

template<class T >
sycl::buffer< T > scan_exclusive (sycl::queue &q, sycl::buffer< T > &buf1, u32 len)
 Computes the exclusive sum of elements in a SYCL buffer.
 
template<class T >
sham::DeviceBuffer< T > scan_exclusive (sham::DeviceScheduler_ptr sched, sham::DeviceBuffer< T > &buf1, u32 len)
 Compute the exclusive sum of a buffer on the device.
 
template<class T >
sycl::buffer< T > scan_inclusive (sycl::queue &q, sycl::buffer< T > &buf1, u32 len)
 
template<class T >
void scan_exclusive_in_place (sycl::queue &q, sycl::buffer< T > &buf, u32 len)
 
template<class T >
void scan_inclusive_in_place (sycl::queue &q, sycl::buffer< T > &buf, u32 len)
 
std::tuple< std::optional< sycl::buffer< u32 > >, u32stream_compact (sycl::queue &q, sycl::buffer< u32 > &buf_flags, u32 len)
 Stream compaction algorithm.
 
sham::DeviceBuffer< u32stream_compact (const sham::DeviceScheduler_ptr &sched, sham::DeviceBuffer< u32 > &buf_flags, u32 len)
 Stream compaction algorithm.
 
template<class Tret , class T >
sham::DeviceBuffer< Tret > device_histogram (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
 Compute the histogram of values between bin_edges.
 
template<class T >
sham::DeviceBuffer< u64device_histogram_u64 (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
 
template<class T >
sham::DeviceBuffer< u32device_histogram_u32 (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
 
template<class T >
histogram_result< T > device_histogram_full (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
 Compute the histogram and bin properties (center, width) for a set of values and bin edges.
 
template<class T >
BinnedCompute< T > binned_init_compute (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
 Prepare binned data for per-bin computation.
 
template<class Tret , class T , class Fct >
sham::DeviceBuffer< Tret > binned_compute (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len, Fct &&fct)
 Perform a custom reduction or computation over values in each bin.
 
template<class T >
sham::DeviceBuffer< T > binned_sum (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
 Compute the sum of values in each bin.
 
template<class T >
sham::DeviceBuffer< T > binned_average (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
 Compute the average of values in each bin.
 
template<class Tret , class T >
sham::DeviceBuffer< Tret > device_histogram_mpi (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
 Compute the histogram of values between bin_edges across all MPI ranks.
 
template<class T >
sham::DeviceBuffer< u64device_histogram_u64_mpi (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
 Compute the u64 histogram of values between bin_edges across all MPI ranks.
 
template<class T >
sham::DeviceBuffer< u32device_histogram_u32_mpi (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
 Compute the u32 histogram of values between bin_edges across all MPI ranks.
 
template<class T >
sham::DeviceBuffer< T > binned_sum_mpi (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
 Compute the sum of values in each bin across all MPI ranks.
 
template<class T >
sham::DeviceBuffer< T > binned_average_mpi (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len, const sham::DeviceBuffer< u32 > &bin_counts_global)
 Compute the average of values in each bin across all MPI ranks (with pre-computed global counts).
 
template<class T >
sham::DeviceBuffer< T > binned_average_mpi (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
 Compute the average of values in each bin across all MPI ranks.
 
template<class T >
histogram_result< T > device_histogram_full_mpi (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
 Compute the histogram and bin properties (center, width) for a set of values and bin edges.
 
template sycl::buffer< u32scan_exclusive (sycl::queue &q, sycl::buffer< u32 > &buf1, u32 len)
 
template sham::DeviceBuffer< u32scan_exclusive (sham::DeviceScheduler_ptr sched, sham::DeviceBuffer< u32 > &buf1, u32 len)
 
template sycl::buffer< u32scan_inclusive (sycl::queue &q, sycl::buffer< u32 > &buf1, u32 len)
 
template void scan_exclusive_in_place (sycl::queue &q, sycl::buffer< u32 > &buf1, u32 len)
 
template void scan_inclusive_in_place (sycl::queue &q, sycl::buffer< u32 > &buf1, u32 len)
 
template sham::DeviceBuffer< u64device_histogram< u64, f64 > (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< f64 > &bin_edges, u64 nbins, const sham::DeviceBuffer< f64 > &values, u32 len)
 
template sham::DeviceBuffer< u64device_histogram< u64, f32 > (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< f32 > &bin_edges, u64 nbins, const sham::DeviceBuffer< f32 > &values, u32 len)
 
template sham::DeviceBuffer< u32device_histogram< u32, f64 > (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< f64 > &bin_edges, u64 nbins, const sham::DeviceBuffer< f64 > &values, u32 len)
 
template sham::DeviceBuffer< u32device_histogram< u32, f32 > (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< f32 > &bin_edges, u64 nbins, const sham::DeviceBuffer< f32 > &values, u32 len)
 
template BinnedCompute< f64binned_init_compute (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< f64 > &bin_edges, u64 nbins, const sham::DeviceBuffer< f64 > &values, const sham::DeviceBuffer< f64 > &keys, u32 len)
 
template BinnedCompute< f32binned_init_compute (const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< f32 > &bin_edges, u64 nbins, const sham::DeviceBuffer< f32 > &values, const sham::DeviceBuffer< f32 > &keys, u32 len)
 

Detailed Description

namespace containing the numeric algorithms of shamalgs

Function Documentation

◆ binned_average()

template<class T >
sham::DeviceBuffer< T > shamalgs::numeric::binned_average ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
const sham::DeviceBuffer< T > &  keys,
u32  len 
)

Compute the average of values in each bin.

This function calculates the average of all values in each bin, using the keys to assign values to bins. It returns a buffer containing the average for each bin.

Template Parameters
TThe data type of the values and keys.
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1).
nbinsThe number of bins.
valuesThe values to be averaged (e.g., f(r)).
keysThe keys used for binning (e.g., r).
lenThe number of elements in values/keys.
Returns
sham::DeviceBuffer<T> Buffer of averages, one per bin.

Example:

auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
sham::DeviceBuffer<double> bin_edges = ...;
u64 nbins = bin_edges.get_size() - 1;
auto averages = shamalgs::numeric::binned_average(dev_sched, bin_edges, nbins, values,
keys, values.get_size());
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
size_t get_size() const
Gets the number of elements in the buffer.
sham::DeviceBuffer< T > binned_average(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
Compute the average of values in each bin.
Definition numeric.hpp:411

Definition at line 411 of file numeric.hpp.

◆ binned_average_mpi() [1/2]

template<class T >
sham::DeviceBuffer< T > shamalgs::numeric::binned_average_mpi ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
const sham::DeviceBuffer< T > &  keys,
u32  len 
)

Compute the average of values in each bin across all MPI ranks.

This function computes the global binned average across all MPI ranks. It first computes the global histogram counts using the keys, then computes the global binned sum, and finally divides the sums by the counts to obtain the averages. This is a convenience function that automatically computes the global bin counts.

Template Parameters
TThe data type of the values, keys, and bin edges.
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1). Must be sorted in ascending order and identical across all ranks.
nbinsThe number of bins.
valuesThe local values for this rank to be averaged (e.g., f(r)).
keysThe local keys for this rank used for binning (e.g., r).
lenThe number of elements in local values/keys arrays.
Returns
sham::DeviceBuffer<T> Buffer of global averages, one per bin.

Example:

auto dev_sched = shamsys::get_compute_scheduler_ptr();
u64 nbins = bin_edges.get_size() - 1;
sham::DeviceBuffer<double> local_values = ...; // Different on each rank
sham::DeviceBuffer<double> local_keys = ...; // Different on each rank
dev_sched, bin_edges, nbins, local_values, local_keys, local_values.get_size());
sham::DeviceBuffer< T > binned_average_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len, const sham::DeviceBuffer< u32 > &bin_counts_global)
Compute the average of values in each bin across all MPI ranks (with pre-computed global counts).
Definition numeric.hpp:659

If rank 0 has keys={0.5,1.5}, values={10,30} and rank 1 has keys={0.3,1.7}, values={20,10} with bin_edges={0.0,1.0,2.0}, the result={15,20} ((10+20)/2 in bin 0, (30+10)/2 in bin 1)

Definition at line 723 of file numeric.hpp.

+ Here is the call graph for this function:

◆ binned_average_mpi() [2/2]

template<class T >
sham::DeviceBuffer< T > shamalgs::numeric::binned_average_mpi ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
const sham::DeviceBuffer< T > &  keys,
u32  len,
const sham::DeviceBuffer< u32 > &  bin_counts_global 
)

Compute the average of values in each bin across all MPI ranks (with pre-computed global counts).

This function computes the global binned average using pre-computed global bin counts. It first computes the global binned sum across all MPI ranks and then divides by the provided global counts to obtain the average. This variant is useful when the global bin counts are already known or computed separately.

Template Parameters
TThe data type of the values, keys, and bin edges.
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1). Must be sorted in ascending order and identical across all ranks.
nbinsThe number of bins.
valuesThe local values for this rank to be averaged (e.g., f(r)).
keysThe local keys for this rank used for binning (e.g., r).
lenThe number of elements in local values/keys arrays.
bin_counts_globalThe global counts for each bin across all ranks.
Returns
sham::DeviceBuffer<T> Buffer of global averages, one per bin.

Example:

auto dev_sched = shamsys::get_compute_scheduler_ptr();
u64 nbins = bin_edges.get_size() - 1;
sham::DeviceBuffer<double> local_values = ...; // Different on each rank
sham::DeviceBuffer<double> local_keys = ...; // Different on each rank
sham::DeviceBuffer<u32> global_counts = ...; // Pre-computed global counts
dev_sched, bin_edges, nbins, local_values, local_keys,
local_values.get_size(), global_counts);

Definition at line 659 of file numeric.hpp.

+ Here is the call graph for this function:

◆ binned_compute()

template<class Tret , class T , class Fct >
sham::DeviceBuffer< Tret > shamalgs::numeric::binned_compute ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
const sham::DeviceBuffer< T > &  keys,
u32  len,
Fct &&  fct 
)

Perform a custom reduction or computation over values in each bin.

This function applies a user-provided function to all values in each bin, allowing for flexible per-bin reductions (e.g., sum, mean, min, max, etc.).

Template Parameters
TThe data type of the values and keys.
TretThe return type of the per-bin computation.
FctThe type of the function to apply per bin. The function should have the signature: Tret f(for_each_values, u32 bin_count) where for_each_values is a callable that applies a function to each value in the bin.
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1).
nbinsThe number of bins.
valuesThe values to be binned (e.g., f(r)).
keysThe keys used for binning (e.g., r).
lenThe number of elements in values/keys.
fctThe function to apply to each bin's values.
Returns
sham::DeviceBuffer<Tret> Buffer of computed values, one per bin.

Example (per-bin sum):

auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
u64 nbins = bin_edges.get_size() - 1;
auto sums = shamalgs::numeric::binned_compute<double, double>(
dev_sched, bin_edges, nbins, values, keys, values.get_size(),
[](auto for_each_values, u32 bin_count) {
double sum = 0;
for_each_values([&](double v) { sum += v; });
return sum;
});
std::uint32_t u32
32 bit unsigned integer

Definition at line 296 of file numeric.hpp.

+ Here is the call graph for this function:

◆ binned_init_compute()

template<class T >
BinnedCompute< T > shamalgs::numeric::binned_init_compute ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
const sham::DeviceBuffer< T > &  keys,
u32  len 
)

Prepare binned data for per-bin computation.

Filters and sorts the input values and keys into bins defined by bin_edges, returning the valid values and the offsets for each bin. This is useful for custom per-bin reductions or statistics.

Template Parameters
TThe data type of the values and keys.
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1).
nbinsThe number of bins.
valuesThe values to be binned (e.g., f(r)).
keysThe keys used for binning (e.g., r).
lenThe number of elements in values/keys.
Returns
BinnedCompute<T> Structure containing valid values and bin offsets.

Example:

auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
sham::DeviceBuffer<double> bin_edges = ...;
sham::DeviceBuffer<double> values = ...; // f(r)
sham::DeviceBuffer<double> keys = ...; // r
u64 nbins = bin_edges.get_size() - 1;
auto binned = shamalgs::numeric::binned_init_compute(dev_sched, bin_edges, nbins, values,
keys, values.get_size());
// binned.valid_values, binned.offsets_bins
BinnedCompute< T > binned_init_compute(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
Prepare binned data for per-bin computation.
Definition numeric.cpp:183

Definition at line 183 of file numeric.cpp.

+ Here is the call graph for this function:

◆ binned_sum()

template<class T >
sham::DeviceBuffer< T > shamalgs::numeric::binned_sum ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
const sham::DeviceBuffer< T > &  keys,
u32  len 
)

Compute the sum of values in each bin.

This function computes the sum of all values in each bin, using the keys to assign values to bins.

Template Parameters
TThe data type of the values and keys.
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1).
nbinsThe number of bins.
valuesThe values to be summed (e.g., f(r)).
keysThe keys used for binning (e.g., r).
lenThe number of elements in values/keys.
Returns
sham::DeviceBuffer<T> Buffer of sums, one per bin.

Example:

auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
sham::DeviceBuffer<double> bin_edges = ...;
u64 nbins = bin_edges.get_size() - 1;
auto sums = shamalgs::numeric::binned_sum(dev_sched, bin_edges, nbins, values, keys,
values.get_size());
sham::DeviceBuffer< T > binned_sum(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
Compute the sum of values in each bin.
Definition numeric.hpp:366

Definition at line 366 of file numeric.hpp.

◆ binned_sum_mpi()

template<class T >
sham::DeviceBuffer< T > shamalgs::numeric::binned_sum_mpi ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
const sham::DeviceBuffer< T > &  keys,
u32  len 
)

Compute the sum of values in each bin across all MPI ranks.

This function computes the local binned sum on each MPI rank and then performs an MPI reduction to sum the results across all ranks, producing the global binned sum. Each rank processes its own subset of keys and values, and the final result contains the combined sums from all processes.

Template Parameters
TThe data type of the values, keys, and bin edges.
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1). Must be sorted in ascending order and identical across all ranks.
nbinsThe number of bins.
valuesThe local values for this rank to be summed (e.g., f(r)).
keysThe local keys for this rank used for binning (e.g., r).
lenThe number of elements in local values/keys arrays.
Returns
sham::DeviceBuffer<T> Buffer of global sums, one per bin.

Example:

auto dev_sched = shamsys::get_compute_scheduler_ptr();
u64 nbins = bin_edges.get_size() - 1;
sham::DeviceBuffer<double> local_values = ...; // Different on each rank
sham::DeviceBuffer<double> local_keys = ...; // Different on each rank
dev_sched, bin_edges, nbins, local_values, local_keys, local_values.get_size());
sham::DeviceBuffer< T > binned_sum_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, const sham::DeviceBuffer< T > &keys, u32 len)
Compute the sum of values in each bin across all MPI ranks.
Definition numeric.hpp:606

If rank 0 has keys={0.5,1.5}, values={10,20} and rank 1 has keys={0.3,1.7}, values={5,15} with bin_edges={0.0,1.0,2.0}, the result={15,35} (10+5 in bin 0, 20+15 in bin 1)

Definition at line 606 of file numeric.hpp.

+ Here is the call graph for this function:

◆ device_histogram()

template<class Tret , class T >
sham::DeviceBuffer< Tret > shamalgs::numeric::device_histogram ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
u32  len 
)

Compute the histogram of values between bin_edges.

This function computes the histogram of the input values, counting how many values fall into each bin defined by the bin_edges array. Only values within [bin_edges[0], bin_edges[nbins]) are counted; values outside this range are ignored.

Template Parameters
TThe data type of the values and bin edges (e.g., float, double).
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1). Must be sorted in ascending order.
nbinsThe number of bins (must be > 0, nbins = bin_edges.size() - 1).
valuesThe values to compute the histogram on.
lenThe length of the values array.
Returns
sham::DeviceBuffer<u64> The counts in each bin (length == nbins).

Example:

auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
u64 nbins = bin_edges.get_size() - 1;
sham::DeviceBuffer<u64> d_counts = shamalgs::numeric::device_histogram<u64>(
dev_sched, d_bin_edges, nbins, values, values.get_size());

bin_edges = {0.0, 1.0, 2.0, 3.0, 4.0} (4 bins: [0,1), [1,2), [2,3), [3,4)) values = {0.5, 1.5, 2.5, 3.5, 2.1, 1.9, 0.1, 3.9} result = {2, 2, 2, 2}

Definition at line 95 of file numeric.cpp.

+ Here is the call graph for this function:

◆ device_histogram_full()

template<class T >
histogram_result< T > shamalgs::numeric::device_histogram_full ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
u32  len 
)

Compute the histogram and bin properties (center, width) for a set of values and bin edges.

This function returns the histogram counts, the center of each bin, and the width of each bin.

Template Parameters
TThe data type of the values and bin edges.
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1).
nbinsThe number of bins.
valuesThe values to compute the histogram on.
lenThe length of the values array.
Returns
histogram_result<T> Structure containing counts, bin centers, and bin widths.

Definition at line 174 of file numeric.hpp.

+ Here is the call graph for this function:

◆ device_histogram_full_mpi()

template<class T >
histogram_result< T > shamalgs::numeric::device_histogram_full_mpi ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
u32  len 
)

Compute the histogram and bin properties (center, width) for a set of values and bin edges.

This function returns the histogram counts, the center of each bin, and the width of each bin.

Template Parameters
TThe data type of the values and bin edges.
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1).
nbinsThe number of bins.
valuesThe values to compute the histogram on.
lenThe length of the values array.
Returns
histogram_result<T> Structure containing counts, bin centers, and bin widths.

Definition at line 753 of file numeric.hpp.

+ Here is the call graph for this function:

◆ device_histogram_mpi()

template<class Tret , class T >
sham::DeviceBuffer< Tret > shamalgs::numeric::device_histogram_mpi ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
u32  len 
)

Compute the histogram of values between bin_edges across all MPI ranks.

This function computes the local histogram on each MPI rank and then performs an MPI reduction to sum the counts across all ranks, producing the global histogram. Each rank processes its own subset of data, and the final result contains the combined counts from all processes.

Template Parameters
TretThe data type for the return counts (e.g., u32, u64).
TThe data type of the values and bin edges (e.g., float, double).
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1). Must be sorted in ascending order and identical across all ranks.
nbinsThe number of bins (must be > 0, nbins = bin_edges.size() - 1).
valuesThe local values for this rank to compute the histogram on.
lenThe length of the local values array.
Returns
sham::DeviceBuffer<Tret> The global counts in each bin (length == nbins).

Example:

auto dev_sched = shamsys::get_compute_scheduler_ptr();
u64 nbins = bin_edges.get_size() - 1;
sham::DeviceBuffer<double> local_values = ...; // Different on each rank
sham::DeviceBuffer<u64> global_counts = shamalgs::numeric::device_histogram_mpi<u64>(
dev_sched, bin_edges, nbins, local_values, local_values.get_size());

If rank 0 has values = {0.5, 1.5} and rank 1 has values = {2.5, 3.5} with bin_edges = {0.0, 1.0, 2.0, 3.0, 4.0}, the result = {1, 1, 1, 1}

Definition at line 475 of file numeric.hpp.

◆ device_histogram_u32()

template<class T >
sham::DeviceBuffer< u32 > shamalgs::numeric::device_histogram_u32 ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
u32  len 
)
inline

Definition at line 149 of file numeric.hpp.

◆ device_histogram_u32_mpi()

template<class T >
sham::DeviceBuffer< u32 > shamalgs::numeric::device_histogram_u32_mpi ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
u32  len 
)
inline

Compute the u32 histogram of values between bin_edges across all MPI ranks.

Convenience wrapper for device_histogram_mpi with u32 return type. This function computes the local histogram on each MPI rank and then performs an MPI reduction to sum the counts across all ranks, producing the global histogram with 32-bit unsigned integer counts.

Template Parameters
TThe data type of the values and bin edges (e.g., float, double).
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1). Must be sorted in ascending order and identical across all ranks.
nbinsThe number of bins (must be > 0, nbins = bin_edges.size() - 1).
valuesThe local values for this rank to compute the histogram on.
lenThe length of the local values array.
Returns
sham::DeviceBuffer<u32> The global counts in each bin (length == nbins).

Example:

auto dev_sched = shamsys::get_compute_scheduler_ptr();
u64 nbins = bin_edges.get_size() - 1;
sham::DeviceBuffer<double> local_values = ...; // Different on each rank
dev_sched, bin_edges, nbins, local_values, local_values.get_size());
sham::DeviceBuffer< u32 > device_histogram_u32_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
Compute the u32 histogram of values between bin_edges across all MPI ranks.
Definition numeric.hpp:561

Definition at line 561 of file numeric.hpp.

◆ device_histogram_u64()

template<class T >
sham::DeviceBuffer< u64 > shamalgs::numeric::device_histogram_u64 ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
u32  len 
)
inline

Definition at line 139 of file numeric.hpp.

◆ device_histogram_u64_mpi()

template<class T >
sham::DeviceBuffer< u64 > shamalgs::numeric::device_histogram_u64_mpi ( const sham::DeviceScheduler_ptr &  sched,
const sham::DeviceBuffer< T > &  bin_edges,
u64  nbins,
const sham::DeviceBuffer< T > &  values,
u32  len 
)
inline

Compute the u64 histogram of values between bin_edges across all MPI ranks.

Convenience wrapper for device_histogram_mpi with u64 return type. This function computes the local histogram on each MPI rank and then performs an MPI reduction to sum the counts across all ranks, producing the global histogram with 64-bit unsigned integer counts.

Template Parameters
TThe data type of the values and bin edges (e.g., float, double).
Parameters
schedThe device scheduler to run on.
bin_edgesThe edges of the bins (length == nbins + 1). Must be sorted in ascending order and identical across all ranks.
nbinsThe number of bins (must be > 0, nbins = bin_edges.size() - 1).
valuesThe local values for this rank to compute the histogram on.
lenThe length of the local values array.
Returns
sham::DeviceBuffer<u64> The global counts in each bin (length == nbins).

Example:

auto dev_sched = shamsys::get_compute_scheduler_ptr();
u64 nbins = bin_edges.get_size() - 1;
sham::DeviceBuffer<double> local_values = ...; // Different on each rank
dev_sched, bin_edges, nbins, local_values, local_values.get_size());
sham::DeviceBuffer< u64 > device_histogram_u64_mpi(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &bin_edges, u64 nbins, const sham::DeviceBuffer< T > &values, u32 len)
Compute the u64 histogram of values between bin_edges across all MPI ranks.
Definition numeric.hpp:521

Definition at line 521 of file numeric.hpp.

◆ scan_exclusive() [1/2]

template<class T >
sham::DeviceBuffer< T > shamalgs::numeric::scan_exclusive ( sham::DeviceScheduler_ptr  sched,
sham::DeviceBuffer< T > &  buf1,
u32  len 
)

Compute the exclusive sum of a buffer on the device.

Parameters
schedThe scheduler to use for the computation
buf1The buffer to sum
lenThe length of the sum
Returns
A new buffer which is the output of the sum

Definition at line 48 of file numeric.cpp.

◆ scan_exclusive() [2/2]

template<class T >
sycl::buffer< T > shamalgs::numeric::scan_exclusive ( sycl::queue &  q,
sycl::buffer< T > &  buf1,
u32  len 
)

Computes the exclusive sum of elements in a SYCL buffer.

Template Parameters
TThe data type of elements in the buffer.
Parameters
qThe SYCL queue to use for computation.
buf1The input buffer whose exclusive sum is to be computed.
lenThe number of elements in the buffer.
Returns
A new SYCL buffer containing the exclusive sum of the input buffer.

Definition at line 35 of file numeric.cpp.

◆ scan_exclusive_in_place()

template<class T >
void shamalgs::numeric::scan_exclusive_in_place ( sycl::queue &  q,
sycl::buffer< T > &  buf,
u32  len 
)

Definition at line 67 of file numeric.cpp.

◆ scan_inclusive()

template<class T >
sycl::buffer< T > shamalgs::numeric::scan_inclusive ( sycl::queue &  q,
sycl::buffer< T > &  buf1,
u32  len 
)

Definition at line 62 of file numeric.cpp.

◆ scan_inclusive_in_place()

template<class T >
void shamalgs::numeric::scan_inclusive_in_place ( sycl::queue &  q,
sycl::buffer< T > &  buf,
u32  len 
)

Definition at line 72 of file numeric.cpp.

◆ stream_compact() [1/2]

sham::DeviceBuffer< u32 > shamalgs::numeric::stream_compact ( const sham::DeviceScheduler_ptr &  sched,
sham::DeviceBuffer< u32 > &  buf_flags,
u32  len 
)

Stream compaction algorithm.

Parameters
schedthe device scheduler to run on
buf_flagsbuffer of only 0 and ones
lenthe length of the buffer considered
Returns
sham::DeviceBuffer<u32> table of the index to extract

Definition at line 89 of file numeric.cpp.

◆ stream_compact() [2/2]

std::tuple< std::optional< sycl::buffer< u32 > >, u32 > shamalgs::numeric::stream_compact ( sycl::queue &  q,
sycl::buffer< u32 > &  buf_flags,
u32  len 
)

Stream compaction algorithm.

Parameters
qthe queue to run on
buf_flagsbuffer of only 0 and ones
lenthe length of the buffer considered
Returns
std::tuple<sycl::buffer<u32>, u32> table of the index to extract and its size

Definition at line 84 of file numeric.cpp.