28#include <pybind11/cast.h>
29#include <pybind11/complex.h>
42 [](
u32 n,
const T *rhs, T *lhs) {
55 [](
u32 n,
const T *rhs, T *lhs) {
60 lhs[n] = std::numeric_limits<f64>::quiet_NaN();
69 auto &m = root_module;
73 [](std::vector<f64> bin_edges,
76 bool do_average) -> std::vector<f64> {
77 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
79 u32 nx = bin_edges.size() - 1;
80 std::vector<f64> bin_edge_inf(nx);
81 std::vector<f64> bin_edge_sup(nx);
83 for (
size_t i = 0; i < nx; i++) {
84 bin_edge_inf[i] = bin_edges[i];
85 bin_edge_sup[i] = bin_edges[i + 1];
93 bin_inf.copy_from_stdvec(bin_edge_inf);
94 bin_sup.copy_from_stdvec(bin_edge_sup);
98 obj_cnts.
for_each([&](
u64 id_patch,
const unsigned int &obj_cnt) {
99 ret += shamalgs::primitives::compute_histogram<f64>(
104 [](
const f64 &bin_edge_inf,
105 const f64 &bin_edge_sup,
109 has_value = x_val >= bin_edge_inf && x_val < bin_edge_sup;
110 return has_value ? y_val : 0;
112 x_field.get_buf(id_patch),
113 y_field.get_buf(id_patch));
116 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
123 obj_cnts.
for_each([&](
u64 id_patch,
const unsigned int &obj_cnt) {
127 norm += shamalgs::primitives::compute_histogram<f64>(
132 [](
const f64 &bin_edge_inf,
133 const f64 &bin_edge_sup,
137 has_value = x_val >= bin_edge_inf && x_val < bin_edge_sup;
138 return has_value ? y_val : 0;
140 x_field.get_buf(id_patch),
144 shamalgs::collective::reduce_buffer_in_place_sum(norm, MPI_COMM_WORLD);
149 return ret.copy_to_stdvec();
152 py::arg(
"bin_edges"),
155 py::arg(
"do_average") =
false);
158 "compute_histogram_convolve_x",
159 [](std::vector<f64> bin_edges,
163 bool do_average) -> std::vector<f64> {
164 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
166 u32 nx = bin_edges.size() - 1;
167 std::vector<f64> bin_edge_inf(nx);
168 std::vector<f64> bin_edge_sup(nx);
170 for (
size_t i = 0; i < nx; i++) {
171 bin_edge_inf[i] = bin_edges[i];
172 bin_edge_sup[i] = bin_edges[i + 1];
180 bin_inf.copy_from_stdvec(bin_edge_inf);
181 bin_sup.copy_from_stdvec(bin_edge_sup);
185 obj_cnts.
for_each([&](
u64 id_patch,
const unsigned int &obj_cnt) {
186 ret += shamalgs::primitives::compute_histogram<f64>(
191 [](
const f64 &bin_edge_inf,
192 const f64 &bin_edge_sup,
198 = x_val >= bin_edge_inf - size_val && x_val < bin_edge_sup + size_val;
199 return has_value ? y_val : 0;
201 x_field.get_buf(id_patch),
202 y_field.get_buf(id_patch),
203 size_field.get_buf(id_patch));
206 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
213 obj_cnts.
for_each([&](
u64 id_patch,
const unsigned int &obj_cnt) {
217 norm += shamalgs::primitives::compute_histogram<f64>(
222 [](
const f64 &bin_edge_inf,
223 const f64 &bin_edge_sup,
228 has_value = x_val >= bin_edge_inf - size_val
229 && x_val < bin_edge_sup + size_val;
230 return has_value ? y_val : 0;
232 x_field.get_buf(id_patch),
234 size_field.get_buf(id_patch));
237 shamalgs::collective::reduce_buffer_in_place_sum(norm, MPI_COMM_WORLD);
242 return ret.copy_to_stdvec();
245 py::arg(
"bin_edges"),
248 py::arg(
"size_field"),
249 py::arg(
"do_average") =
false);
252 "compute_histogram_2d",
253 [](std::vector<f64> bin_edges_x,
254 std::vector<f64> bin_edges_y,
257 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
259 u32 nx = bin_edges_x.size() - 1;
260 u32 ny = bin_edges_y.size() - 1;
269 binsx.copy_from_stdvec(bin_edges_x);
270 binsy.copy_from_stdvec(bin_edges_y);
272 obj_cnts.
for_each([&](
u64 id_patch,
const unsigned int &obj_cnt) {
274 dev_sched->get_queue(),
276 binsx, binsy, x_field.get_buf(id_patch), y_field.get_buf(id_patch)},
281 const f64 *__restrict x_bins,
282 const f64 *__restrict y_bins,
283 const f64 *__restrict x_field,
284 const f64 *__restrict y_field,
285 u64 *__restrict pic) {
286 auto get_pic_coord = [&](u32 ix, u32 iy) {
290 f64 x_val = x_field[
id];
291 f64 y_val = y_field[
id];
293 bool is_in_x_range = x_bins[0] <= x_val && x_val <= x_bins[nx];
294 bool is_in_y_range = y_bins[0] <= y_val && y_val <= y_bins[ny];
296 if (!(is_in_x_range && is_in_y_range)) {
301 x_bins, 0, nx + 1, x_val);
303 y_bins, 0, ny + 1, y_val);
305 if (ix >= nx || iy >= ny) {
309 using atomic_ref_T = sycl::atomic_ref<
311 sycl::memory_order_relaxed,
312 sycl::memory_scope_device,
313 sycl::access::address_space::global_space>;
315 atomic_ref_T pic_ref(pic[get_pic_coord(ix, iy)]);
320 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
322 return ret.copy_to_stdvec();
325 py::arg(
"bin_edges_x"),
326 py::arg(
"bin_edges_y"),
Header file describing a Node Instance.
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory).
Represents a collection of objects distributed across patches identified by a u64 id.
void for_each(std::function< void(u64, T &)> &&f)
Applies a function to each object in the collection.
namespace for backends this one is named only sham since shambackends is too long to write
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
constexpr u32 binary_search_upper_bound(const Tkey *__restrict__ key, u32 first, u32 last, const Tkey &value)
GPU compatible implementation of std::upper_bound.
Pybind11 include and definitions.
#define ON_PYTHON_INIT
Register a Python module init function using static initialization.
A class that references multiple buffers or similar objects.
GPU compatible implementation of std::upper_bound.