28#include <pybind11/cast.h>
29#include <pybind11/complex.h>
36 DeviceBuffer<T> &operator+=(DeviceBuffer<T> &lhs,
const DeviceBuffer<T> &rhs) {
42 [](
u32 n,
const T *rhs, T *lhs) {
49 DeviceBuffer<T> &operator/=(DeviceBuffer<T> &lhs,
const DeviceBuffer<T> &rhs) {
55 [](
u32 n,
const T *rhs, T *lhs) {
60 lhs[n] = std::numeric_limits<f64>::quiet_NaN();
72 [](std::vector<f64> bin_edges,
75 bool do_average) -> std::vector<f64> {
76 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
78 u32 nx = bin_edges.size() - 1;
79 std::vector<f64> bin_edge_inf(nx);
80 std::vector<f64> bin_edge_sup(nx);
82 for (
size_t i = 0; i < nx; i++) {
83 bin_edge_inf[i] = bin_edges[i];
84 bin_edge_sup[i] = bin_edges[i + 1];
92 bin_inf.copy_from_stdvec(bin_edge_inf);
93 bin_sup.copy_from_stdvec(bin_edge_sup);
97 obj_cnts.
for_each([&](
u64 id_patch,
const unsigned int &obj_cnt) {
98 ret += shamalgs::primitives::compute_histogram<f64>(
103 [](
const f64 &bin_edge_inf,
104 const f64 &bin_edge_sup,
108 has_value = x_val >= bin_edge_inf && x_val < bin_edge_sup;
109 return has_value ? y_val : 0;
111 x_field.get_buf(id_patch),
112 y_field.get_buf(id_patch));
115 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
122 obj_cnts.
for_each([&](
u64 id_patch,
const unsigned int &obj_cnt) {
126 norm += shamalgs::primitives::compute_histogram<f64>(
131 [](
const f64 &bin_edge_inf,
132 const f64 &bin_edge_sup,
136 has_value = x_val >= bin_edge_inf && x_val < bin_edge_sup;
137 return has_value ? y_val : 0;
139 x_field.get_buf(id_patch),
143 shamalgs::collective::reduce_buffer_in_place_sum(norm, MPI_COMM_WORLD);
148 return ret.copy_to_stdvec();
151 py::arg(
"bin_edges"),
154 py::arg(
"do_average") =
false);
157 "compute_histogram_convolve_x",
158 [](std::vector<f64> bin_edges,
162 bool do_average) -> std::vector<f64> {
163 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
165 u32 nx = bin_edges.size() - 1;
166 std::vector<f64> bin_edge_inf(nx);
167 std::vector<f64> bin_edge_sup(nx);
169 for (
size_t i = 0; i < nx; i++) {
170 bin_edge_inf[i] = bin_edges[i];
171 bin_edge_sup[i] = bin_edges[i + 1];
179 bin_inf.copy_from_stdvec(bin_edge_inf);
180 bin_sup.copy_from_stdvec(bin_edge_sup);
184 obj_cnts.
for_each([&](
u64 id_patch,
const unsigned int &obj_cnt) {
185 ret += shamalgs::primitives::compute_histogram<f64>(
190 [](
const f64 &bin_edge_inf,
191 const f64 &bin_edge_sup,
197 = x_val >= bin_edge_inf - size_val && x_val < bin_edge_sup + size_val;
198 return has_value ? y_val : 0;
200 x_field.get_buf(id_patch),
201 y_field.get_buf(id_patch),
202 size_field.get_buf(id_patch));
205 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
212 obj_cnts.
for_each([&](
u64 id_patch,
const unsigned int &obj_cnt) {
216 norm += shamalgs::primitives::compute_histogram<f64>(
221 [](
const f64 &bin_edge_inf,
222 const f64 &bin_edge_sup,
227 has_value = x_val >= bin_edge_inf - size_val
228 && x_val < bin_edge_sup + size_val;
229 return has_value ? y_val : 0;
231 x_field.get_buf(id_patch),
233 size_field.get_buf(id_patch));
236 shamalgs::collective::reduce_buffer_in_place_sum(norm, MPI_COMM_WORLD);
241 return ret.copy_to_stdvec();
244 py::arg(
"bin_edges"),
247 py::arg(
"size_field"),
248 py::arg(
"do_average") =
false);
251 "compute_histogram_2d",
252 [](std::vector<f64> bin_edges_x,
253 std::vector<f64> bin_edges_y,
256 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
258 u32 nx = bin_edges_x.size() - 1;
259 u32 ny = bin_edges_y.size() - 1;
268 binsx.copy_from_stdvec(bin_edges_x);
269 binsy.copy_from_stdvec(bin_edges_y);
271 obj_cnts.
for_each([&](
u64 id_patch,
const unsigned int &obj_cnt) {
273 dev_sched->get_queue(),
275 binsx, binsy, x_field.get_buf(id_patch), y_field.get_buf(id_patch)},
280 const f64 *__restrict x_bins,
281 const f64 *__restrict y_bins,
282 const f64 *__restrict x_field,
283 const f64 *__restrict y_field,
284 u64 *__restrict pic) {
285 auto get_pic_coord = [&](u32 ix, u32 iy) {
289 f64 x_val = x_field[id];
290 f64 y_val = y_field[id];
292 bool is_in_x_range = x_bins[0] <= x_val && x_val <= x_bins[nx];
293 bool is_in_y_range = y_bins[0] <= y_val && y_val <= y_bins[ny];
295 if (!(is_in_x_range && is_in_y_range)) {
300 x_bins, 0, nx + 1, x_val);
302 y_bins, 0, ny + 1, y_val);
304 if (ix >= nx || iy >= ny) {
308 using atomic_ref_T = sycl::atomic_ref<
310 sycl::memory_order_relaxed,
311 sycl::memory_scope_device,
312 sycl::access::address_space::global_space>;
314 atomic_ref_T pic_ref(pic[get_pic_coord(ix, iy)]);
319 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
321 return ret.copy_to_stdvec();
324 py::arg(
"bin_edges_x"),
325 py::arg(
"bin_edges_y"),
Header file describing a Node Instance.
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
Represents a collection of objects distributed across patches identified by a u64 id.
void for_each(std::function< void(u64, T &)> &&f)
Applies a function to each object in the collection.
namespace for backends this one is named only sham since shambackends is too long to write
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
constexpr u32 binary_search_upper_bound(const Tkey *__restrict__ key, u32 first, u32 last, const Tkey &value)
GPU compatible implementation of std::upper_bound.
Pybind11 include and definitions.
#define Register_pymod(placeholdername)
Register a python module init function using static initialisation.
A class that references multiple buffers or similar objects.
GPU compatible implementation of std::upper_bound.