29template<
class Tvec,
class Tmorton,
template<
class>
class SPHKernel>
33 using GhostHandle = sph::BasicSPHGhostHandler<Tvec>;
34 using GhostHandleCache =
typename GhostHandle::CacheMap;
43 auto build_neigh_cache = [&](
u64 patch_id) {
44 shamlog_debug_ln(
"BasicSPH",
"build particle cache id =", patch_id);
48 auto &mfield = storage.merged_xyzh.get().get(patch_id);
54 = storage.rtree_rint_field.get().get(patch_id).buf_field;
56 RTree &tree = storage.merged_pos_trees.get().get(patch_id);
57 auto obj_it = tree.get_object_iterator();
61 sycl::range range_npart{obj_cnt};
63 Tscal h_tolerance = solver_config.htol_up_coarse_cycle;
70 obj_cnt, shamsys::instance::get_compute_scheduler_ptr());
74 shamlog_debug_sycl_ln(
"Cache",
"generate cache for N=", obj_cnt);
82 auto neigh_cnt = neigh_count.get_write_access(depends_list);
83 auto particle_looper = obj_it.get_read_access(depends_list);
85 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
86 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
88 shambase::parallel_for(cgh, obj_cnt,
"compute neigh cache 1", [=](
u64 gid) {
91 Tscal rint_a =
hpart[id_a] * h_tolerance;
93 Tvec xyz_a =
xyz[id_a];
95 Tvec inter_box_a_min = xyz_a - rint_a * Kernel::Rkern;
96 Tvec inter_box_a_max = xyz_a + rint_a * Kernel::Rkern;
100 particle_looper.rtree_for(
102 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
104 using namespace walker::interaction_crit;
106 return sph_radix_cell_crit(
117 Tvec dr = xyz_a -
xyz[id_b];
118 Tscal rab2 = sycl::dot(dr, dr);
119 Tscal rint_b =
hpart[id_b] * h_tolerance;
122 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
124 cnt += (no_interact) ? 0 : 1;
127 neigh_cnt[id_a] = cnt;
133 neigh_count.complete_event_state(e);
135 obj_it.complete_event_state(e);
138 tree::ObjectCache pcache = tree::prepare_object_cache(std::move(neigh_count), obj_cnt);
148 auto scanned_neigh_cnt = pcache.scanned_cnt.
get_read_access(depends_list);
150 auto particle_looper = obj_it.get_read_access(depends_list);
152 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
153 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
155 shambase::parallel_for(cgh, obj_cnt,
"compute neigh cache 2", [=](
u64 gid) {
158 Tscal rint_a =
hpart[id_a] * h_tolerance;
160 Tvec xyz_a =
xyz[id_a];
162 Tvec inter_box_a_min = xyz_a - rint_a * Kernel::Rkern;
163 Tvec inter_box_a_max = xyz_a + rint_a * Kernel::Rkern;
165 u32 cnt = scanned_neigh_cnt[id_a];
167 particle_looper.rtree_for(
169 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
171 using namespace walker::interaction_crit;
173 return sph_radix_cell_crit(
184 Tvec dr = xyz_a -
xyz[id_b];
185 Tscal rab2 = sycl::dot(dr, dr);
186 Tscal rint_b =
hpart[id_b] * h_tolerance;
189 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
194 cnt += (no_interact) ? 0 : 1;
204 obj_it.complete_event_state(e);
212 using namespace shamrock::patch;
215 ncache.neigh_cache.add_obj(cur_p.
id_patch, build_neigh_cache(cur_p.
id_patch));
219 storage.timings_details.neighbors += time_neigh.
elasped_sec();
222template<
class Tvec,
class Tmorton,
template<
class>
class SPHKernel>
227 using GhostHandle = sph::BasicSPHGhostHandler<Tvec>;
228 using GhostHandleCache =
typename GhostHandle::CacheMap;
237 auto build_neigh_cache = [&](
u64 patch_id) {
238 shamlog_debug_ln(
"BasicSPH",
"build particle cache id =", patch_id);
242 auto &mfield = storage.merged_xyzh.get().get(patch_id);
248 = storage.rtree_rint_field.get().get(patch_id).buf_field;
250 RTree &tree = storage.merged_pos_trees.get().get(patch_id);
251 auto obj_it = tree.get_object_iterator();
252 auto leaf_it = tree.get_traverser();
254 u32 leaf_cnt = tree.get_leaf_cell_count();
255 u32 intnode_cnt = tree.get_internal_cell_count();
259 sycl::range range_nleaf{leaf_cnt};
260 sycl::range range_nobj{obj_cnt};
263 Tscal h_tolerance = solver_config.htol_up_coarse_cycle;
270 leaf_cnt, shamsys::instance::get_compute_scheduler_ptr());
274 shamlog_debug_sycl_ln(
"Cache",
"generate cache for Nleaf=", leaf_cnt);
283 auto neigh_cnt = neigh_count_leaf.get_write_access(depends_list);
284 auto leaf_looper = leaf_it.get_read_access(depends_list);
286 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
287 u32 offset_leaf = intnode_cnt;
289 shambase::parallel_for(cgh, leaf_cnt,
"compute neigh cache 1", [=](
u64 gid) {
292 Tscal leaf_a_rint = rint_tree[offset_leaf + gid] * Kernel::Rkern;
293 Tvec leaf_a_bmin = leaf_looper.aabb_min[offset_leaf + gid];
294 Tvec leaf_a_bmax = leaf_looper.aabb_max[offset_leaf + gid];
295 Tvec leaf_a_bmin_ext = leaf_a_bmin - leaf_a_rint;
296 Tvec leaf_a_bmax_ext = leaf_a_bmax + leaf_a_rint;
300 leaf_looper.rtree_for(
302 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
304 Tvec ext_bmin = node_aabb.
lower - int_r_max_cell;
305 Tvec ext_bmax = node_aabb.
upper + int_r_max_cell;
307 return BBAA::cella_neigh_b(leaf_a_bmin, leaf_a_bmax, ext_bmin, ext_bmax)
308 || BBAA::cella_neigh_b(
318 neigh_cnt[id_a] = cnt;
325 neigh_count_leaf.complete_event_state(e);
326 leaf_it.complete_event_state(e);
346 = tree::prepare_object_cache(std::move(neigh_count_leaf), leaf_cnt);
359 auto scanned_neigh_cnt = pleaf_cache.scanned_cnt.
get_read_access(depends_list);
361 auto leaf_looper = leaf_it.get_read_access(depends_list);
363 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
364 u32 offset_leaf = intnode_cnt;
366 shambase::parallel_for(cgh, leaf_cnt,
"compute neigh cache 2", [=](
u64 gid) {
369 Tscal leaf_a_rint = rint_tree[offset_leaf + gid] * Kernel::Rkern;
370 Tvec leaf_a_bmin = leaf_looper.aabb_min[offset_leaf + gid];
371 Tvec leaf_a_bmax = leaf_looper.aabb_max[offset_leaf + gid];
372 Tvec leaf_a_bmin_ext = leaf_a_bmin - leaf_a_rint;
373 Tvec leaf_a_bmax_ext = leaf_a_bmax + leaf_a_rint;
375 u32 cnt = scanned_neigh_cnt[id_a];
377 leaf_looper.rtree_for(
379 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
381 Tvec ext_bmin = node_aabb.
lower - int_r_max_cell;
382 Tvec ext_bmax = node_aabb.
upper + int_r_max_cell;
384 return BBAA::cella_neigh_b(leaf_a_bmin, leaf_a_bmax, ext_bmin, ext_bmax)
385 || BBAA::cella_neigh_b(
403 leaf_it.complete_event_state(e);
406 sycl::buffer<u32> leaf_part_id(obj_cnt);
413 auto leaf_looper = leaf_it.get_read_access(depends_list);
415 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
416 sycl::accessor found_id{leaf_part_id, cgh, sycl::write_only, sycl::no_init};
417 u32 offset_leaf = intnode_cnt;
419 shambase::parallel_for(cgh, obj_cnt,
"search particles parent leaf", [=](
u64 gid) {
422 Tvec r_a =
xyz[id_a];
427 leaf_looper.rtree_for(
429 bool ret = BBAA::is_coord_in_range_incl_max(
440 found_id_ = leaf_b - offset_leaf;
445 found_id[id_a] = found_id_;
450 leaf_it.complete_event_state(e);
466 obj_cnt, shamsys::instance::get_compute_scheduler_ptr());
470 shamlog_debug_sycl_ln(
"Cache",
"generate cache for N=", obj_cnt);
478 auto acc_neigh_leaf_looper = pleaf_cache.get_read_access(depends_list);
479 auto neigh_cnt = neigh_count.get_write_access(depends_list);
480 auto particle_looper = obj_it.cell_iterator.get_read_access(depends_list);
482 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
485 sycl::accessor leaf_owner{leaf_part_id, cgh, sycl::read_only};
487 u32 offset_leaf = intnode_cnt;
490 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
492 shambase::parallel_for(cgh, obj_cnt,
"compute neigh cache 1", [=](
u64 gid) {
495 Tscal rint_a =
hpart[id_a] * h_tolerance;
497 Tvec xyz_a =
xyz[id_a];
501 u32 leaf_own_a = leaf_owner[id_a];
503 neigh_leaf_looper.for_each_object(leaf_own_a, [&](
u32 leaf_b) {
506 particle_looper.for_each_in_leaf_cell(leaf_b - offset_leaf, [&](
u32 id_b) {
507 Tvec dr = xyz_a -
xyz[id_b];
508 Tscal rab2 = sycl::dot(dr, dr);
509 Tscal rint_b =
hpart[id_b] * h_tolerance;
512 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
514 cnt += (no_interact) ? 0 : 1;
518 neigh_cnt[id_a] = cnt;
524 pleaf_cache.complete_event_state(e);
525 neigh_count.complete_event_state(e);
526 obj_it.cell_iterator.complete_event_state(e);
529 tree::ObjectCache pcache = tree::prepare_object_cache(std::move(neigh_count), obj_cnt);
539 auto acc_neigh_leaf_looper = pleaf_cache.get_read_access(depends_list);
540 auto scanned_neigh_cnt = pcache.scanned_cnt.
get_read_access(depends_list);
542 auto particle_looper = obj_it.cell_iterator.get_read_access(depends_list);
544 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
547 sycl::accessor leaf_owner{leaf_part_id, cgh, sycl::read_only};
549 u32 offset_leaf = intnode_cnt;
551 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
553 shambase::parallel_for(cgh, obj_cnt,
"compute neigh cache 2", [=](
u64 gid) {
556 Tscal rint_a =
hpart[id_a] * h_tolerance;
558 Tvec xyz_a =
xyz[id_a];
560 u32 cnt = scanned_neigh_cnt[id_a];
562 u32 leaf_own_a = leaf_owner[id_a];
564 neigh_leaf_looper.for_each_object(leaf_own_a, [&](
u32 leaf_b) {
567 particle_looper.for_each_in_leaf_cell(leaf_b - offset_leaf, [&](
u32 id_b) {
568 Tvec dr = xyz_a -
xyz[id_b];
569 Tscal rab2 = sycl::dot(dr, dr);
570 Tscal rint_b =
hpart[id_b] * h_tolerance;
573 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
578 cnt += (no_interact) ? 0 : 1;
586 pleaf_cache.complete_event_state(e);
589 obj_it.cell_iterator.complete_event_state(e);
596 using namespace shamrock::patch;
599 ncache.neigh_cache.add_obj(cur_p.
id_patch, build_neigh_cache(cur_p.
id_patch));
603 storage.timings_details.neighbors += time_neigh.
elasped_sec();
constexpr const char * xyz
Position field (3D coordinates)
constexpr const char * hpart
Smoothing length field.
sycl::queue & get_compute_queue(u32 id=0)
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
Shamrock assertion utility.
#define SHAM_ASSERT(x)
Shorthand for SHAM_ASSERT_NAMED without a message.
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
Class Timer measures the time elapsed since the timer was started.
void end()
Stops the timer and stores the elapsed time in nanoseconds.
f64 elasped_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
void start()
Starts the timer.
PatchDataLayer container class, the layout is described in patchdata_layout.
A Compressed Leaf Bounding Volume Hierarchy (CLBVH) for neighborhood queries.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
namespace for math utility
namespace for the main framework
constexpr i32 i32_max
i32 max value
Axis-Aligned bounding box.
T lower
Lower bound of the AABB.
T upper
Upper bound of the AABB.
Patch object that contain generic patch information.
u64 id_patch
unique key that identify the patch