29 sham::DeviceBuffer<Tvec> pixel_to_positions(
30 Tvec center, Tvec delta_x, Tvec delta_y,
u32 nx,
u32 ny) {
32 sham::DeviceBuffer<Tvec> ret{nx * ny, shamsys::instance::get_compute_scheduler_ptr()};
34 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().
get_queue();
37 q, sham::MultiRef{}, sham::MultiRef{ret}, nx * ny, [=](
u32 gid, Tvec *position) {
40 f64 fx = ((
f64(ix) + 0.5) / nx) - 0.5;
41 f64 fy = ((
f64(iy) + 0.5) / ny) - 0.5;
42 position[gid] = center + delta_x * fx + delta_y * fy;
49 sham::DeviceBuffer<shammath::Ray<Tvec>> pixel_to_orthographic_rays(
50 Tvec center, Tvec delta_x, Tvec delta_y,
u32 nx,
u32 ny) {
52 using Tscal = shambase::VecComponent<Tvec>;
54 sham::DeviceBuffer<shammath::Ray<Tvec>> ret{
55 nx * ny, shamsys::instance::get_compute_scheduler_ptr()};
57 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().
get_queue();
59 Tvec e_z = sycl::cross(delta_x, delta_y);
60 Tscal len = sycl::length(e_z);
63 "The cross product of delta_x and delta_y is zero\n"
85 [=](
u32 gid, shammath::Ray<Tvec> *ray) {
88 f64 fx = ((
f64(ix) + 0.5) / nx) - 0.5;
89 f64 fy = ((
f64(iy) + 0.5) / ny) - 0.5;
90 Tvec pos_render = center + delta_x * fx + delta_y * fy;
92 ray[gid] = shammath::Ray<Tvec>(pos_render, e_z);
98 template<
class Tvec,
class Tfield,
template<
class>
class SPHKernel>
99 auto CartesianRender<Tvec, Tfield, SPHKernel>::compute_slice(
100 std::string field_name,
101 const sham::DeviceBuffer<Tvec> &positions,
102 std::optional<std::function<py::array_t<Tfield>(
size_t, pybind11::dict &)>> custom_getter)
103 -> sham::DeviceBuffer<Tfield> {
107 "sph::CartesianRender",
109 "compute_slice field_name: {}, positions count: {}",
120 [&](
auto field_getter) -> sham::DeviceBuffer<Tfield> {
121 return compute_slice(field_getter, positions);
128 "sph::CartesianRender",
129 shambase::format(
"compute_slice took {}", t.
get_time_str()));
135 template<
class Tvec,
class Tfield,
template<
class>
class SPHKernel>
136 auto CartesianRender<Tvec, Tfield, SPHKernel>::compute_column_integ(
137 std::string field_name,
138 const sham::DeviceBuffer<shammath::Ray<Tvec>> &rays,
139 std::optional<std::function<py::array_t<Tfield>(
size_t, pybind11::dict &)>> custom_getter)
140 -> sham::DeviceBuffer<Tfield> {
144 "sph::CartesianRender",
146 "compute_column_integ field_name: {}, rays count: {}",
157 [&](
auto field_getter) -> sham::DeviceBuffer<Tfield> {
158 return compute_column_integ(field_getter, rays);
165 "sph::CartesianRender",
166 shambase::format(
"compute_column_integ took {}", t.
get_time_str()));
172 template<
class Tvec,
class Tfield,
template<
class>
class SPHKernel>
173 auto CartesianRender<Tvec, Tfield, SPHKernel>::compute_azymuthal_integ(
174 std::string field_name,
175 const sham::DeviceBuffer<shammath::RingRay<Tvec>> &ring_rays,
176 std::optional<std::function<py::array_t<Tfield>(
size_t, pybind11::dict &)>> custom_getter)
177 -> sham::DeviceBuffer<Tfield> {
181 "sph::CartesianRender",
183 "compute_azymuthal_integ field_name: {}, ring_rays count: {}",
185 ring_rays.get_size()));
194 [&](
auto field_getter) -> sham::DeviceBuffer<Tfield> {
195 return compute_azymuthal_integ(field_getter, ring_rays);
202 "sph::CartesianRender",
203 shambase::format(
"compute_azymuthal_integ took {}", t.
get_time_str()));
209 template<
class Tvec,
class Tfield,
template<
class>
class SPHKernel>
210 auto CartesianRender<Tvec, Tfield, SPHKernel>::compute_slice(
211 std::function<field_getter_t> field_getter,
const sham::DeviceBuffer<Tvec> &positions)
212 -> sham::DeviceBuffer<Tfield> {
214 sham::DeviceBuffer<Tfield> ret{
215 positions.
get_size(), shamsys::instance::get_compute_scheduler_ptr()};
216 ret.
fill(sham::VectorProperties<Tfield>::get_zero());
218 using u_morton =
u32;
219 using RTree = RadixTree<u_morton, Tvec>;
221 shamrock::patch::PatchCoordTransform<Tvec> transf
222 = scheduler().get_sim_box().template get_patch_transform<Tvec>();
224 scheduler().for_each_patchdata_nonempty([&](
const shamrock::patch::Patch cur_p,
225 shamrock::patch::PatchDataLayer &pdat) {
226 shammath::CoordRange<Tvec> box = transf.to_obj_coord(cur_p);
228 PatchDataField<Tvec> &main_field = pdat.get_field<Tvec>(0);
230 auto &buf_xyz = pdat.get_field<Tvec>(0).get_buf();
232 = pdat.get_field<Tscal>(pdat.pdl().get_field_idx<Tscal>(
"hpart")).get_buf();
234 auto &buf_field_to_render = field_getter(cur_p, pdat);
236 u32 obj_cnt = main_field.get_obj_cnt();
239 shamsys::instance::get_compute_scheduler_ptr(),
240 {box.lower, box.upper},
243 solver_config.tree_reduction_level);
245 tree.compute_cell_ibounding_box(shamsys::instance::get_compute_queue());
246 tree.convert_bounding_box(shamsys::instance::get_compute_queue());
248 RadixTreeField<Tscal> hmax_tree = tree.compute_int_boxes(
249 shamsys::instance::get_compute_queue(),
250 pdat.get_field<Tscal>(pdat.pdl().
get_field_idx<Tscal>(
"hpart")).get_buf(),
253 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().
get_queue();
255 sham::EventList depends_list;
260 auto xyz = buf_xyz.get_read_access(depends_list);
261 auto hpart = buf_hpart.get_read_access(depends_list);
262 auto torender = buf_field_to_render.get_read_access(depends_list);
264 sycl::event e2 = q.
submit(depends_list, [&, render_field](sycl::handler &cgh) {
265 shamrock::tree::ObjectIterator particle_looper(tree, cgh);
270 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
272 Tscal partmass = solver_config.gpart_mass;
274 shambase::parallel_for(
275 cgh, positions.
get_size(),
"compute slice render", [=](
u32 gid) {
276 Tvec pos_render = pixel_positions[gid];
278 Tfield ret = sham::VectorProperties<Tfield>::get_zero();
280 particle_looper.rtree_for(
281 [&](u32 node_id, Tvec bmin, Tvec bmax) -> bool {
282 Tscal rint_cell = hmax[node_id] * Kernel::Rkern;
285 = shammath::CoordRange<Tvec>{bmin, bmax}.expand_all(rint_cell);
287 return interbox.contain_pos(pos_render);
290 Tvec dr = pos_render - xyz[id_b];
291 Tscal rab2 = sycl::dot(dr, dr);
292 Tscal h_b = hpart[id_b];
294 if (rab2 > h_b * h_b * Rker2) {
298 Tscal rab = sycl::sqrt(rab2);
300 Tfield val = torender[id_b];
302 Tscal rho_b = shamrock::sph::rho_h(partmass, h_b, Kernel::hfactd);
304 ret += partmass * val * Kernel::W_3d(rab, h_b) / rho_b;
307 render_field[gid] += ret;
312 buf_hpart.complete_event_state(e2);
313 buf_field_to_render.complete_event_state(e2);
318 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
323 template<
class Tvec,
class Tfield,
template<
class>
class SPHKernel>
324 auto CartesianRender<Tvec, Tfield, SPHKernel>::compute_column_integ(
325 std::function<field_getter_t> field_getter,
329 rays.get_size(), shamsys::instance::get_compute_scheduler_ptr()};
330 ret.
fill(sham::VectorProperties<Tfield>::get_zero());
332 using u_morton =
u32;
336 = scheduler().get_sim_box().template get_patch_transform<Tvec>();
344 auto &buf_xyz = pdat.get_field<Tvec>(0).get_buf();
346 = pdat.get_field<Tscal>(pdat.pdl().get_field_idx<Tscal>(
"hpart")).get_buf();
348 auto &buf_field_to_render = field_getter(cur_p, pdat);
350 u32 obj_cnt = main_field.get_obj_cnt();
353 shamsys::instance::get_compute_scheduler_ptr(),
354 {box.lower, box.upper},
357 solver_config.tree_reduction_level);
364 pdat.get_field<Tscal>(pdat.pdl().
get_field_idx<Tscal>(
"hpart")).get_buf(),
374 auto xyz = buf_xyz.get_read_access(depends_list);
375 auto hpart = buf_hpart.get_read_access(depends_list);
376 auto torender = buf_field_to_render.get_read_access(depends_list);
378 sycl::event e2 = q.
submit(depends_list, [&, render_field](sycl::handler &cgh) {
384 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
386 Tscal partmass = solver_config.gpart_mass;
388 shambase::parallel_for(cgh, rays.get_size(),
"compute slice render", [=](
u32 gid) {
389 Tfield ret = sham::VectorProperties<Tfield>::get_zero();
391 shammath::Ray<Tvec> ray = image_rays[gid];
393 particle_looper.rtree_for(
394 [&](u32 node_id, Tvec bmin, Tvec bmax) -> bool {
395 Tscal rint_cell = hmax[node_id] * Kernel::Rkern;
397 auto interbox = shammath::AABB<Tvec>{bmin, bmax}.expand_all(rint_cell);
399 return interbox.intersect_ray(ray);
402 Tvec dr = ray.origin - xyz[id_b];
404 dr -= ray.direction * sycl::dot(dr, ray.direction);
406 Tscal rab2 = sycl::dot(dr, dr);
407 Tscal h_b = hpart[id_b];
409 if (rab2 > h_b * h_b * Rker2) {
413 Tscal rab = sycl::sqrt(rab2);
415 Tfield val = torender[id_b];
417 Tscal rho_b = shamrock::sph::rho_h(partmass, h_b, Kernel::hfactd);
419 ret += partmass * val * Kernel::Y_3d(rab, h_b, 4) / rho_b;
422 render_field[gid] += ret;
427 buf_hpart.complete_event_state(e2);
428 buf_field_to_render.complete_event_state(e2);
430 rays.complete_event_state(e2);
433 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
438 template<
class Tvec,
class Tfield,
template<
class>
class SPHKernel>
439 auto CartesianRender<Tvec, Tfield, SPHKernel>::compute_azymuthal_integ(
440 std::function<field_getter_t> field_getter,
445 ring_rays.get_size(), shamsys::instance::get_compute_scheduler_ptr()};
446 ret.
fill(sham::VectorProperties<Tfield>::get_zero());
448 using u_morton =
u32;
452 = scheduler().get_sim_box().template get_patch_transform<Tvec>();
460 auto &buf_xyz = pdat.get_field<Tvec>(0).get_buf();
462 = pdat.get_field<Tscal>(pdat.pdl().get_field_idx<Tscal>(
"hpart")).get_buf();
464 auto &buf_field_to_render = field_getter(cur_p, pdat);
466 u32 obj_cnt = main_field.get_obj_cnt();
469 shamsys::instance::get_compute_scheduler_ptr(),
470 {box.lower, box.upper},
473 solver_config.tree_reduction_level);
480 pdat.get_field<Tscal>(pdat.pdl().
get_field_idx<Tscal>(
"hpart")).get_buf(),
490 auto xyz = buf_xyz.get_read_access(depends_list);
491 auto hpart = buf_hpart.get_read_access(depends_list);
492 auto torender = buf_field_to_render.get_read_access(depends_list);
494 sycl::event e2 = q.
submit(depends_list, [&, render_field](sycl::handler &cgh) {
500 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
502 Tscal partmass = solver_config.gpart_mass;
504 shambase::parallel_for(
505 cgh, ring_rays.get_size(),
"compute slice render", [=](
u32 gid) {
506 Tfield ret = sham::VectorProperties<Tfield>::get_zero();
508 shammath::RingRay<Tvec> ring_ray = ring_rays_ptr[gid];
509 Tvec ez = ring_ray.get_ez();
511 particle_looper.rtree_for(
512 [&](u32 node_id, Tvec bmin, Tvec bmax) -> bool {
513 Tscal rint_cell = hmax[node_id] * Kernel::Rkern;
516 = shammath::AABB<Tvec>{bmin, bmax}.expand_all(rint_cell);
518 return interbox.intersect_ring_ray_approx(ring_ray);
521 Tvec r_center = ring_ray.center - xyz[id_b];
523 Tscal z_val = sycl::dot(r_center, ez);
524 Tscal x_val = sycl::dot(r_center, ring_ray.e_x);
525 Tscal y_val = sycl::dot(r_center, ring_ray.e_y);
526 Tscal r_val = sycl::sqrt(x_val * x_val + y_val * y_val);
528 Tscal delta_r = r_val - ring_ray.radius;
530 Tscal rab2_ring = z_val * z_val + delta_r * delta_r;
531 Tscal h_b = hpart[id_b];
533 if (rab2_ring > h_b * h_b * Rker2) {
537 Tscal rab = sycl::sqrt(rab2_ring);
539 Tfield val = torender[id_b];
541 Tscal rho_b = shamrock::sph::rho_h(partmass, h_b, Kernel::hfactd);
544 ret += partmass * val * Kernel::Y_3d(rab, h_b, 4) / rho_b;
547 render_field[gid] += ret;
552 buf_hpart.complete_event_state(e2);
553 buf_field_to_render.complete_event_state(e2);
555 ring_rays.complete_event_state(e2);
558 shamalgs::collective::reduce_buffer_in_place_sum(ret, MPI_COMM_WORLD);
563 template<
class Tvec,
class Tfield,
template<
class>
class SPHKernel>
564 auto CartesianRender<Tvec, Tfield, SPHKernel>::compute_slice(
565 std::function<field_getter_t> field_getter,
572 auto positions = pixel_to_positions(center, delta_x, delta_y, nx, ny);
574 return compute_slice(field_getter, positions);
577 template<
class Tvec,
class Tfield,
template<
class>
class SPHKernel>
578 auto CartesianRender<Tvec, Tfield, SPHKernel>::compute_column_integ(
579 std::function<field_getter_t> field_getter,
586 auto rays = pixel_to_orthographic_rays(center, delta_x, delta_y, nx, ny);
588 return compute_column_integ(field_getter, rays);
591 template<
class Tvec,
class Tfield,
template<
class>
class SPHKernel>
592 auto CartesianRender<Tvec, Tfield, SPHKernel>::compute_slice(
593 std::string field_name,
599 std::optional<std::function<pybind11::array_t<Tfield>(
size_t, pybind11::dict &)>>
601 auto positions = pixel_to_positions(center, delta_x, delta_y, nx, ny);
602 return compute_slice(field_name, positions, custom_getter);
605 template<
class Tvec,
class Tfield,
template<
class>
class SPHKernel>
606 auto CartesianRender<Tvec, Tfield, SPHKernel>::compute_column_integ(
607 std::string field_name,
613 std::optional<std::function<pybind11::array_t<Tfield>(
size_t, pybind11::dict &)>>
615 auto rays = pixel_to_orthographic_rays(center, delta_x, delta_y, nx, ny);
616 return compute_column_integ(field_name, rays, custom_getter);
constexpr const char * xyz
Position field (3D coordinates).
constexpr const char * hpart
Smoothing length field.
sycl::queue & get_compute_queue(u32 id=0)
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory).
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
void fill(T value, std::array< size_t, 2 > idx_range)
Fill a subpart of the buffer with a given value.
size_t get_size() const
Gets the number of elements in the buffer.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
std::string get_time_str() const
Converts the stored nanosecond time to a string representation.
void start()
Starts the timer.
void stop()
Stops the timer and stores the elapsed time in nanoseconds.
u32 get_field_idx(const std::string &field_name) const
Get the field id if matching name & type.
PatchDataLayer container class, the layout is described in patchdata_layout.
This header file contains utility functions related to exception handling in the code.
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
ExcptTypes make_except_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Create an exception with a message and a location.
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
namespace for math utility
namespace for the sph model modules
Ray representation for intersection testing.
Ring ray representation for intersection testing.
Patch object that contain generic patch information.