29template<
class Tvec,
class Tmorton,
template<
class>
class SPHKernel>
30void shammodels::sph::modules::NeighbourCache<Tvec, Tmorton, SPHKernel>::start_neighbors_cache() {
33 using GhostHandle = sph::BasicSPHGhostHandler<Tvec>;
34 using GhostHandleCache =
typename GhostHandle::CacheMap;
43 auto build_neigh_cache = [&](
u64 patch_id) {
44 shamlog_debug_ln(
"BasicSPH",
"build particle cache id =", patch_id);
48 auto &mfield = storage.merged_xyzh.get().get(patch_id);
54 = storage.rtree_rint_field.get().get(patch_id).buf_field;
56 RTree &tree = storage.merged_pos_trees.get().get(patch_id);
57 auto obj_it = tree.get_object_iterator();
61 sycl::range range_npart{obj_cnt};
63 Tscal h_tolerance = solver_config.htol_up_coarse_cycle;
70 obj_cnt, shamsys::instance::get_compute_scheduler_ptr());
74 shamlog_debug_sycl_ln(
"Cache",
"generate cache for N=", obj_cnt);
82 auto neigh_cnt = neigh_count.get_write_access(depends_list);
83 auto particle_looper = obj_it.get_read_access(depends_list);
85 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
86 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
88 shambase::parallel_for(cgh, obj_cnt,
"compute neigh cache 1", [=](
u64 gid) {
91 Tscal rint_a =
hpart[id_a] * h_tolerance;
93 Tvec xyz_a =
xyz[id_a];
95 Tvec inter_box_a_min = xyz_a - rint_a * Kernel::Rkern;
96 Tvec inter_box_a_max = xyz_a + rint_a * Kernel::Rkern;
100 particle_looper.rtree_for(
102 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
104 using namespace walker::interaction_crit;
106 return sph_radix_cell_crit(
117 Tvec dr = xyz_a -
xyz[id_b];
118 Tscal rab2 = sycl::dot(dr, dr);
119 Tscal rint_b =
hpart[id_b] * h_tolerance;
122 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
124 cnt += (no_interact) ? 0 : 1;
127 neigh_cnt[id_a] = cnt;
133 neigh_count.complete_event_state(e);
135 obj_it.complete_event_state(e);
138 tree::ObjectCache pcache = tree::prepare_object_cache(std::move(neigh_count), obj_cnt);
148 auto scanned_neigh_cnt = pcache.scanned_cnt.
get_read_access(depends_list);
150 auto particle_looper = obj_it.get_read_access(depends_list);
152 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
153 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
155 shambase::parallel_for(cgh, obj_cnt,
"compute neigh cache 2", [=](
u64 gid) {
158 Tscal rint_a =
hpart[id_a] * h_tolerance;
160 Tvec xyz_a =
xyz[id_a];
162 Tvec inter_box_a_min = xyz_a - rint_a * Kernel::Rkern;
163 Tvec inter_box_a_max = xyz_a + rint_a * Kernel::Rkern;
165 u32 cnt = scanned_neigh_cnt[id_a];
167 particle_looper.rtree_for(
169 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
171 using namespace walker::interaction_crit;
173 return sph_radix_cell_crit(
184 Tvec dr = xyz_a -
xyz[id_b];
185 Tscal rab2 = sycl::dot(dr, dr);
186 Tscal rint_b =
hpart[id_b] * h_tolerance;
189 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
194 cnt += (no_interact) ? 0 : 1;
204 obj_it.complete_event_state(e);
212 using namespace shamrock::patch;
215 ncache.neigh_cache.add_obj(cur_p.
id_patch, build_neigh_cache(cur_p.
id_patch));
219 storage.timings_details.neighbors += time_neigh.
elapsed_sec();
222template<
class Tvec,
class Tmorton,
template<
class>
class SPHKernel>
227 using GhostHandle = sph::BasicSPHGhostHandler<Tvec>;
228 using GhostHandleCache =
typename GhostHandle::CacheMap;
237 auto build_neigh_cache = [&](
u64 patch_id) {
238 shamlog_debug_ln(
"BasicSPH",
"build particle cache id =", patch_id);
242 auto &mfield = storage.merged_xyzh.get().get(patch_id);
248 = storage.rtree_rint_field.get().get(patch_id).buf_field;
250 RTree &tree = storage.merged_pos_trees.get().get(patch_id);
251 auto obj_it = tree.get_object_iterator();
252 auto leaf_it = tree.get_traverser();
254 u32 leaf_cnt = tree.get_leaf_cell_count();
255 u32 intnode_cnt = tree.get_internal_cell_count();
259 sycl::range range_nleaf{leaf_cnt};
260 sycl::range range_nobj{obj_cnt};
263 Tscal h_tolerance = solver_config.htol_up_coarse_cycle;
270 leaf_cnt, shamsys::instance::get_compute_scheduler_ptr());
274 shamlog_debug_sycl_ln(
"Cache",
"generate cache for Nleaf=", leaf_cnt);
283 auto neigh_cnt = neigh_count_leaf.get_write_access(depends_list);
284 auto leaf_looper = leaf_it.get_read_access(depends_list);
286 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
287 u32 offset_leaf = intnode_cnt;
289 shambase::parallel_for(cgh, leaf_cnt,
"compute neigh cache 1", [=](
u64 gid) {
292 Tscal leaf_a_rint = rint_tree[offset_leaf + gid] * Kernel::Rkern;
293 Tvec leaf_a_bmin = leaf_looper.aabb_min[offset_leaf + gid];
294 Tvec leaf_a_bmax = leaf_looper.aabb_max[offset_leaf + gid];
295 Tvec leaf_a_bmin_ext = leaf_a_bmin - leaf_a_rint;
296 Tvec leaf_a_bmax_ext = leaf_a_bmax + leaf_a_rint;
300 leaf_looper.rtree_for(
302 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
304 Tvec ext_bmin = node_aabb.
lower - int_r_max_cell;
305 Tvec ext_bmax = node_aabb.
upper + int_r_max_cell;
307 return BBAA::cella_neigh_b(leaf_a_bmin, leaf_a_bmax, ext_bmin, ext_bmax)
308 || BBAA::cella_neigh_b(
318 neigh_cnt[id_a] = cnt;
325 neigh_count_leaf.complete_event_state(e);
326 leaf_it.complete_event_state(e);
346 = tree::prepare_object_cache(std::move(neigh_count_leaf), leaf_cnt);
359 auto scanned_neigh_cnt = pleaf_cache.scanned_cnt.
get_read_access(depends_list);
361 auto leaf_looper = leaf_it.get_read_access(depends_list);
363 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
364 u32 offset_leaf = intnode_cnt;
366 shambase::parallel_for(cgh, leaf_cnt,
"compute neigh cache 2", [=](
u64 gid) {
369 Tscal leaf_a_rint = rint_tree[offset_leaf + gid] * Kernel::Rkern;
370 Tvec leaf_a_bmin = leaf_looper.aabb_min[offset_leaf + gid];
371 Tvec leaf_a_bmax = leaf_looper.aabb_max[offset_leaf + gid];
372 Tvec leaf_a_bmin_ext = leaf_a_bmin - leaf_a_rint;
373 Tvec leaf_a_bmax_ext = leaf_a_bmax + leaf_a_rint;
375 u32 cnt = scanned_neigh_cnt[id_a];
377 leaf_looper.rtree_for(
379 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
381 Tvec ext_bmin = node_aabb.
lower - int_r_max_cell;
382 Tvec ext_bmax = node_aabb.
upper + int_r_max_cell;
384 return BBAA::cella_neigh_b(leaf_a_bmin, leaf_a_bmax, ext_bmin, ext_bmax)
385 || BBAA::cella_neigh_b(
403 leaf_it.complete_event_state(e);
406 sycl::buffer<u32> leaf_part_id(obj_cnt);
413 auto leaf_looper = leaf_it.get_read_access(depends_list);
415 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
416 sycl::accessor found_id{leaf_part_id, cgh, sycl::write_only, sycl::no_init};
417 u32 offset_leaf = intnode_cnt;
419 shambase::parallel_for(cgh, obj_cnt,
"search particles parent leaf", [=](
u64 gid) {
422 Tvec r_a =
xyz[id_a];
427 leaf_looper.rtree_for(
429 bool ret = BBAA::is_coord_in_range_incl_max(
440 found_id_ = leaf_b - offset_leaf;
445 found_id[id_a] = found_id_;
450 leaf_it.complete_event_state(e);
466 obj_cnt, shamsys::instance::get_compute_scheduler_ptr());
470 shamlog_debug_sycl_ln(
"Cache",
"generate cache for N=", obj_cnt);
478 auto acc_neigh_leaf_looper = pleaf_cache.get_read_access(depends_list);
479 auto neigh_cnt = neigh_count.get_write_access(depends_list);
480 auto particle_looper = obj_it.cell_iterator.get_read_access(depends_list);
482 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
485 sycl::accessor leaf_owner{leaf_part_id, cgh, sycl::read_only};
487 u32 offset_leaf = intnode_cnt;
490 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
492 shambase::parallel_for(cgh, obj_cnt,
"compute neigh cache 1", [=](
u64 gid) {
495 Tscal rint_a =
hpart[id_a] * h_tolerance;
497 Tvec xyz_a =
xyz[id_a];
501 u32 leaf_own_a = leaf_owner[id_a];
503 neigh_leaf_looper.for_each_object(leaf_own_a, [&](
u32 leaf_b) {
506 particle_looper.for_each_in_leaf_cell(leaf_b - offset_leaf, [&](
u32 id_b) {
507 Tvec dr = xyz_a -
xyz[id_b];
508 Tscal rab2 = sycl::dot(dr, dr);
509 Tscal rint_b =
hpart[id_b] * h_tolerance;
512 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
514 cnt += (no_interact) ? 0 : 1;
518 neigh_cnt[id_a] = cnt;
524 pleaf_cache.complete_event_state(e);
525 neigh_count.complete_event_state(e);
526 obj_it.cell_iterator.complete_event_state(e);
529 tree::ObjectCache pcache = tree::prepare_object_cache(std::move(neigh_count), obj_cnt);
539 auto acc_neigh_leaf_looper = pleaf_cache.get_read_access(depends_list);
540 auto scanned_neigh_cnt = pcache.scanned_cnt.
get_read_access(depends_list);
542 auto particle_looper = obj_it.cell_iterator.get_read_access(depends_list);
544 auto e = q.
submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
547 sycl::accessor leaf_owner{leaf_part_id, cgh, sycl::read_only};
549 u32 offset_leaf = intnode_cnt;
551 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
553 shambase::parallel_for(cgh, obj_cnt,
"compute neigh cache 2", [=](
u64 gid) {
556 Tscal rint_a =
hpart[id_a] * h_tolerance;
558 Tvec xyz_a =
xyz[id_a];
560 u32 cnt = scanned_neigh_cnt[id_a];
562 u32 leaf_own_a = leaf_owner[id_a];
564 neigh_leaf_looper.for_each_object(leaf_own_a, [&](
u32 leaf_b) {
567 particle_looper.for_each_in_leaf_cell(leaf_b - offset_leaf, [&](
u32 id_b) {
568 Tvec dr = xyz_a -
xyz[id_b];
569 Tscal rab2 = sycl::dot(dr, dr);
570 Tscal rint_b =
hpart[id_b] * h_tolerance;
573 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
578 cnt += (no_interact) ? 0 : 1;
586 pleaf_cache.complete_event_state(e);
589 obj_it.cell_iterator.complete_event_state(e);
596 using namespace shamrock::patch;
599 ncache.neigh_cache.add_obj(cur_p.
id_patch, build_neigh_cache(cur_p.
id_patch));
603 storage.timings_details.neighbors += time_neigh.
elapsed_sec();
constexpr const char * xyz
Position field (3D coordinates).
constexpr const char * hpart
Smoothing length field.
sycl::queue & get_compute_queue(u32 id=0)
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
Shamrock assertion utility.
#define SHAM_ASSERT(x)
Shorthand for SHAM_ASSERT_NAMED without a message.
A buffer allocated in USM (Unified Shared Memory).
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
Class to manage a list of SYCL events.
Class Timer measures the time elapsed since the timer was started.
f64 elapsed_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
void start()
Starts the timer.
void stop()
Stops the timer and stores the elapsed time in nanoseconds.
PatchDataLayer container class, the layout is described in patchdata_layout.
A Compressed Leaf Bounding Volume Hierarchy (CLBVH) for neighborhood queries.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
namespace for math utility
namespace for the main framework
constexpr i32 i32_max
i32 max value
shambase::details::NamedBasicStackEntry NamedStackEntry
Alias for shambase::details::NamedBasicStackEntry.
shambase::details::BasicStackEntry StackEntry
Alias for shambase::details::BasicStackEntry.
Axis-Aligned bounding box.
T lower
Lower bound of the AABB.
T upper
Upper bound of the AABB.
Patch object that contain generic patch information.
u64 id_patch
unique key that identify the patch