49template<
class Umorton,
class Tvec>
56 static constexpr bool pos_is_int
57 = std::is_same<Tvec, u16_3>::value || std::is_same<Tvec, u32_3>::value
58 || std::is_same<Tvec, u64_3>::value || std::is_same<Tvec, i16_3>::value
59 || std::is_same<Tvec, i32_3>::value || std::is_same<Tvec, i64_3>::value;
61 static constexpr bool pos_is_float
62 = std::is_same<Tvec, f32_3>::value || std::is_same<Tvec, f64_3>::value;
68 using coord_t =
typename shambase::VectorProperties<Tvec>::component_type;
70 static constexpr u32 tree_depth = Morton::significant_bits + 1;
72 std::tuple<Tvec, Tvec> bounding_box;
83 inline bool is_tree_built() {
return tree_struct.is_built(); }
85 inline bool are_range_int_built() {
return tree_cell_ranges.are_range_int_built(); }
87 inline bool are_range_float_built() {
return tree_cell_ranges.are_range_float_built(); }
97 cmp = cmp && sham::equals(std::get<0>(t1.bounding_box), std::get<0>(t2.bounding_box));
98 cmp = cmp && sham::equals(std::get<1>(t1.bounding_box), std::get<1>(t2.bounding_box));
99 cmp = cmp && t1.tree_morton_codes == t2.tree_morton_codes;
100 cmp = cmp && t1.tree_reduced_morton_codes == t2.tree_reduced_morton_codes;
101 cmp = cmp && t1.tree_struct == t2.tree_struct;
102 cmp = cmp && t1.tree_cell_ranges == t2.tree_cell_ranges;
109 void compute_cell_ibounding_box(sycl::queue &queue);
110 void convert_bounding_box(sycl::queue &queue);
112 inline std::unique_ptr<sycl::buffer<Umorton>> build_new_morton_buf(
113 sycl::buffer<Tvec> &pos_buf,
u32 obj_cnt) {
115 return tree_morton_codes.build_raw(
116 shamsys::instance::get_compute_queue(),
124 std::tuple<Tvec, Tvec> treebox,
125 const std::unique_ptr<sycl::buffer<Tvec>> &pos_buf,
131 std::tuple<Tvec, Tvec> treebox,
132 sycl::buffer<Tvec> &pos_buf,
137 sham::DeviceScheduler_ptr dev_sched,
138 std::tuple<Tvec, Tvec> treebox,
144 : bounding_box(other.bounding_box), tree_morton_codes{other.tree_morton_codes},
145 tree_reduced_morton_codes(other.tree_reduced_morton_codes),
146 tree_struct{other.tree_struct}, tree_cell_ranges(other.tree_cell_ranges) {}
148 [[nodiscard]]
inline u64 memsize()
const {
151 sum +=
sizeof(bounding_box);
153 auto add_ptr = [&](
auto &a) {
155 sum += a->byte_size();
159 sum += tree_morton_codes.memsize();
160 sum += tree_reduced_morton_codes.memsize();
161 sum += tree_struct.memsize();
162 sum += tree_cell_ranges.memsize();
168 const auto &cur = *
this;
172 inline std::unique_ptr<RadixTree> duplicate_to_ptr() {
173 const auto &cur = *
this;
174 return std::make_unique<RadixTree>(cur);
180 cmp = cmp && (sham::equals(std::get<0>(bounding_box), std::get<0>(other.bounding_box)));
181 cmp = cmp && (sham::equals(std::get<1>(bounding_box), std::get<1>(other.bounding_box)));
182 cmp = cmp && (tree_cell_ranges == other.tree_cell_ranges);
184 && (tree_reduced_morton_codes.tree_leaf_count
185 == other.tree_reduced_morton_codes.tree_leaf_count);
186 cmp = cmp && (tree_struct == other.tree_struct);
194 template<
class T,
class LambdaComputeLeaf,
class LambdaCombinator>
198 LambdaComputeLeaf &&compute_leaf,
199 LambdaCombinator &&combine)
const;
201 template<
class LambdaForEachCell>
202 std::pair<std::set<u32>, std::set<u32>> get_walk_res_set(LambdaForEachCell &&interact_cd)
const;
204 template<
class LambdaForEachCell>
205 void for_each_leaf(sycl::queue &queue, LambdaForEachCell &&par_for_each_cell)
const;
207 std::tuple<coord_t, coord_t> get_min_max_cell_side_length();
211 std::unique_ptr<sycl::buffer<u32>> new_node_id_to_old;
213 std::unique_ptr<sycl::buffer<u32>> pdat_extract_id;
216 CuttedTree cut_tree(sycl::queue &queue, sycl::buffer<u8> &valid_node);
219 void print_tree_field(sycl::buffer<T> &buf_field);
225 sycl::accessor<u32, 1, sycl::access::mode::read, sycl::target::device> particle_index_map;
226 sycl::accessor<u32, 1, sycl::access::mode::read, sycl::target::device> reduc_index_map;
230 : particle_index_map(rtree.tree_morton_codes.buf_particle_index_map
231 ->template get_access<sycl::access::mode::read>(cgh)),
232 reduc_index_map(rtree.tree_reduced_morton_codes.buf_reduc_index_map
233 ->template get_access<sycl::access::mode::read>(cgh)) {}
236 inline void iter_object_in_leaf(
u32 leaf_id, Func &&func_it)
const noexcept {
238 uint min_ids = reduc_index_map[leaf_id];
239 uint max_ids = reduc_index_map[leaf_id + 1];
241 for (
unsigned int id_s = min_ids; id_s < max_ids; id_s++) {
244 uint id_b = particle_index_map[id_s];
252 inline LeafIterator get_leaf_access(sycl::handler &device_handler) {
257template<
class u_morton,
class vec3>
258template<
class T,
class LambdaComputeLeaf,
class LambdaCombinator>
264 LambdaComputeLeaf &&compute_leaf,
265 LambdaCombinator &&combine)
const {
270 shamlog_debug_sycl_ln(
"RadixTree",
"compute_field");
272 ret.radix_tree_field_buf = std::make_unique<sycl::buffer<T>>(
273 tree_struct.internal_cell_count + tree_reduced_morton_codes.tree_leaf_count);
274 sycl::range<1> range_leaf_cell{tree_reduced_morton_codes.tree_leaf_count};
276 queue.submit([&](sycl::handler &cgh) {
277 u32 offset_leaf = tree_struct.internal_cell_count;
280 = sycl::accessor{*ret.radix_tree_field_buf, cgh, sycl::write_only, sycl::no_init};
282 auto cell_particle_ids = tree_reduced_morton_codes.buf_reduc_index_map
283 ->template get_access<sycl::access::mode::read>(cgh);
284 auto particle_index_map = tree_morton_codes.buf_particle_index_map
285 ->template get_access<sycl::access::mode::read>(cgh);
287 compute_leaf(cgh, [&](
auto &&lambda_loop) {
288 cgh.parallel_for(range_leaf_cell, [=](sycl::item<1> item) {
289 u32 gid = (
u32) item.get_id(0);
291 u32 min_ids = cell_particle_ids[gid];
292 u32 max_ids = cell_particle_ids[gid + 1];
295 [&](
auto &&particle_it) {
296 for (
unsigned int id_s = min_ids; id_s < max_ids; id_s++) {
297 particle_it(particle_index_map[id_s]);
302 return nvar * (offset_leaf + gid);
308 sycl::range<1> range_tree{tree_struct.internal_cell_count};
309 auto ker_reduc_hmax = [&](sycl::handler &cgh) {
310 u32 offset_leaf = tree_struct.internal_cell_count;
313 = ret.radix_tree_field_buf->template get_access<sycl::access::mode::read_write>(cgh);
315 auto rchild_id = tree_struct.buf_rchild_id->get_access<sycl::access::mode::read>(cgh);
316 auto lchild_id = tree_struct.buf_lchild_id->get_access<sycl::access::mode::read>(cgh);
317 auto rchild_flag = tree_struct.buf_rchild_flag->get_access<sycl::access::mode::read>(cgh);
318 auto lchild_flag = tree_struct.buf_lchild_flag->get_access<sycl::access::mode::read>(cgh);
320 cgh.parallel_for(range_tree, [=](sycl::item<1> item) {
321 u32 gid = (
u32) item.get_id(0);
323 u32 lid = lchild_id[gid] + offset_leaf * lchild_flag[gid];
324 u32 rid = rchild_id[gid] + offset_leaf * rchild_flag[gid];
327 [&](
u32 nvar_id) -> T {
328 return tree_field[nvar * lid + nvar_id];
330 [&](
u32 nvar_id) -> T {
331 return tree_field[nvar * rid + nvar_id];
340 for (
u32 i = 0; i < tree_depth; i++) {
341 queue.submit(ker_reduc_hmax);
344 return std::move(ret);
347template<
class u_morton,
class vec3>
348template<
class LambdaForEachCell>
350 LambdaForEachCell &&interact_cd)
const {
352 std::set<u32> leaf_list;
353 std::set<u32> rejected_list;
355 auto particle_index_map = sycl::host_accessor{*tree_morton_codes.buf_particle_index_map};
356 auto cell_index_map = sycl::host_accessor{*tree_reduced_morton_codes.buf_reduc_index_map};
357 auto rchild_id = sycl::host_accessor{*tree_struct.buf_rchild_id};
358 auto lchild_id = sycl::host_accessor{*tree_struct.buf_lchild_id};
359 auto rchild_flag = sycl::host_accessor{*tree_struct.buf_rchild_flag};
360 auto lchild_flag = sycl::host_accessor{*tree_struct.buf_lchild_flag};
364 u32 leaf_offset = tree_struct.internal_cell_count;
366 u32 stack_cursor = tree_depth - 1;
367 std::array<u32, tree_depth> id_stack;
368 id_stack[stack_cursor] = 0;
370 while (stack_cursor < tree_depth) {
372 u32 current_node_id = id_stack[stack_cursor];
373 id_stack[stack_cursor] = tree_depth;
376 if (interact_cd(current_node_id)) {
379 if (current_node_id >= leaf_offset) {
381 leaf_list.insert(current_node_id);
386 u32 lid = lchild_id[current_node_id] + leaf_offset * lchild_flag[current_node_id];
387 u32 rid = rchild_id[current_node_id] + leaf_offset * rchild_flag[current_node_id];
389 id_stack[stack_cursor - 1] = rid;
392 id_stack[stack_cursor - 1] = lid;
398 rejected_list.insert(current_node_id);
402 return std::pair<std::set<u32>, std::set<u32>>{std::move(leaf_list), std::move(rejected_list)};
405template<
class u_morton,
class vec3>
406template<
class LambdaForEachCell>
408 sycl::queue &queue, LambdaForEachCell &&par_for_each_cell)
const {
410 queue.submit([&](sycl::handler &cgh) {
411 auto particle_index_map = tree_morton_codes.buf_particle_index_map
412 ->template get_access<sycl::access::mode::read>(cgh);
413 auto cell_index_map = tree_reduced_morton_codes.buf_reduc_index_map
414 ->template get_access<sycl::access::mode::read>(cgh);
416 = tree_struct.buf_rchild_id->template get_access<sycl::access::mode::read>(cgh);
418 = tree_struct.buf_lchild_id->template get_access<sycl::access::mode::read>(cgh);
420 = tree_struct.buf_rchild_flag->template get_access<sycl::access::mode::read>(cgh);
422 = tree_struct.buf_lchild_flag->template get_access<sycl::access::mode::read>(cgh);
424 sycl::range<1> range_leaf = sycl::range<1>{tree_reduced_morton_codes.tree_leaf_count};
426 u32 leaf_offset = tree_struct.internal_cell_count;
428 auto par_for = [&](
auto &&for_each_leaf) {
429 cgh.parallel_for(range_leaf, [=](sycl::item<1> item) {
430 u32 id_cell_a = (
u32) item.get_id(0) + leaf_offset;
432 auto iter_obj_cell = [&](
u32 cell_id,
auto &&func_it) {
433 uint min_ids = cell_index_map[cell_id - leaf_offset];
434 uint max_ids = cell_index_map[cell_id + 1 - leaf_offset];
436 for (
unsigned int id_s = min_ids; id_s < max_ids; id_s++) {
439 uint id_b = particle_index_map[id_s];
446 auto walk_loop = [&](
u32 id_cell_a,
auto &&for_other_cell) {
447 u32 stack_cursor = tree_depth - 1;
448 std::array<u32, tree_depth> id_stack;
449 id_stack[stack_cursor] = 0;
451 while (stack_cursor < tree_depth) {
453 u32 current_node_id = id_stack[stack_cursor];
454 id_stack[stack_cursor] = tree_depth;
457 auto walk_logic = [&](
const bool &cur_id_valid,
458 auto &&func_leaf_found,
459 auto &&func_node_rejected) {
463 if (current_node_id >= leaf_offset) {
470 u32 lid = lchild_id[current_node_id]
471 + leaf_offset * lchild_flag[current_node_id];
472 u32 rid = rchild_id[current_node_id]
473 + leaf_offset * rchild_flag[current_node_id];
475 id_stack[stack_cursor - 1] = rid;
478 id_stack[stack_cursor - 1] = lid;
484 func_node_rejected();
488 for_other_cell(current_node_id, walk_logic);
492 for_each_leaf(id_cell_a, walk_loop, iter_obj_cell);
496 par_for_each_cell(cgh, par_for);
500template<
class u_morton,
class vec3>
502 -> std::tuple<coord_t, coord_t> {
504 u32 len = tree_reduced_morton_codes.tree_leaf_count;
506 sycl::buffer<coord_t> min_side_length{len};
507 sycl::buffer<coord_t> max_side_length{len};
511 q.submit([&](sycl::handler &cgh) {
512 u32 offset_leaf = tree_struct.internal_cell_count;
514 sycl::accessor pos_min_cell{*tree_cell_ranges.buf_pos_min_cell_flt, cgh, sycl::read_only};
515 sycl::accessor pos_max_cell{*tree_cell_ranges.buf_pos_max_cell_flt, cgh, sycl::read_only};
517 sycl::accessor s_lengh_min{min_side_length, cgh, sycl::write_only, sycl::no_init};
518 sycl::accessor s_lengh_max{max_side_length, cgh, sycl::write_only, sycl::no_init};
520 sycl::range<1> range_tree{tree_reduced_morton_codes.tree_leaf_count};
522 cgh.parallel_for(range_tree, [=](sycl::item<1> item) {
523 u32 gid = (
u32) item.get_id(0);
525 vec3 min = pos_min_cell[gid + offset_leaf];
526 vec3 max = pos_max_cell[gid + offset_leaf];
530 if constexpr (pos_is_float) {
531 s_lengh_min[gid] = sycl::fmin(sycl::fmin(sz.x(), sz.y()), sz.z());
532 s_lengh_max[gid] = sycl::fmax(sycl::fmax(sz.x(), sz.y()), sz.z());
535 if constexpr (pos_is_int) {
536 s_lengh_min[gid] = sycl::min(sycl::min(sz.x(), sz.y()), sz.z());
537 s_lengh_max[gid] = sycl::max(sycl::max(sz.x(), sz.y()), sz.z());
542 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
547 tmp_min_side_length.copy_from_sycl_buffer(min_side_length);
548 tmp_max_side_length.copy_from_sycl_buffer(max_side_length);
558 template<
class u_morton,
class vec3>
565 mpi_sycl_interop::comm_type comm_mode;
566 mpi_sycl_interop::op_type comm_op;
570 std::vector<Request<u_morton>> rq_u_morton;
571 std::vector<Request<u32>> rq_u32;
572 std::vector<Request<u8>> rq_u8;
573 std::vector<Request<vec3>> rq_vec;
575 std::vector<Request<typename RTree::ipos_t>> rq_vec3i;
578 : rtree(rtree), comm_mode(mpi_sycl_interop::current_mode), comm_op(comm_op) {}
580 inline void finalize() {
581 mpi_sycl_interop::waitall(rq_u_morton);
582 mpi_sycl_interop::waitall(rq_u32);
583 mpi_sycl_interop::waitall(rq_u8);
584 mpi_sycl_interop::waitall(rq_vec3i);
585 mpi_sycl_interop::waitall(rq_vec);
587 if (comm_op == mpi_sycl_interop::Recv_Probe) {
588 rtree.tree_morton_codes.obj_cnt = rtree.tree_morton_codes.buf_morton->size();
589 rtree.tree_reduced_morton_codes.tree_leaf_count
590 = rtree.tree_reduced_morton_codes.buf_tree_morton->size();
591 rtree.tree_struct.internal_cell_count = rtree.tree_struct.buf_lchild_id->size();
594 sycl::host_accessor bmin{*rtree.tree_cell_ranges.buf_pos_min_cell_flt};
595 sycl::host_accessor bmax{*rtree.tree_cell_ranges.buf_pos_max_cell_flt};
597 rtree.bounding_box = {bmin[0], bmax[0]};
603 sycl::host_accessor indmap{
604 *rtree.tree_reduced_morton_codes.buf_reduc_index_map};
605 rtree.tree_struct.one_cell_mode
606 = (indmap[rtree.tree_reduced_morton_codes.buf_reduc_index_map->size() - 1]
613 template<
class u_morton,
class vec3>
615 for (
auto &rq : rqs) {
620 template<
class u_morton,
class vec3>
621 inline u64 comm_isend(
623 std::vector<RadixTreeMPIRequest<u_morton, vec3>> &rqs,
630 rqs.push_back(RadixTreeMPIRequest<u_morton, vec3>(rtree, mpi_sycl_interop::op_type::Send));
632 auto &rq = rqs.back();
634 ret_len += mpi_sycl_interop::isend(
635 rq.rtree.tree_morton_codes.buf_morton,
636 rq.rtree.tree_morton_codes.obj_cnt,
641 ret_len += mpi_sycl_interop::isend(
642 rq.rtree.tree_morton_codes.buf_particle_index_map,
643 rq.rtree.tree_morton_codes.obj_cnt,
649 ret_len += mpi_sycl_interop::isend(
650 rq.rtree.tree_reduced_morton_codes.buf_reduc_index_map,
651 rq.rtree.tree_reduced_morton_codes.tree_leaf_count + 1,
657 ret_len += mpi_sycl_interop::isend(
658 rq.rtree.tree_reduced_morton_codes.buf_tree_morton,
659 rq.rtree.tree_reduced_morton_codes.tree_leaf_count,
664 ret_len += mpi_sycl_interop::isend(
665 rq.rtree.tree_struct.buf_lchild_id,
666 rq.rtree.tree_struct.internal_cell_count,
671 ret_len += mpi_sycl_interop::isend(
672 rq.rtree.tree_struct.buf_rchild_id,
673 rq.rtree.tree_struct.internal_cell_count,
678 ret_len += mpi_sycl_interop::isend(
679 rq.rtree.tree_struct.buf_lchild_flag,
680 rq.rtree.tree_struct.internal_cell_count,
685 ret_len += mpi_sycl_interop::isend(
686 rq.rtree.tree_struct.buf_rchild_flag,
687 rq.rtree.tree_struct.internal_cell_count,
692 ret_len += mpi_sycl_interop::isend(
693 rq.rtree.tree_struct.buf_endrange,
694 rq.rtree.tree_struct.internal_cell_count,
700 ret_len += mpi_sycl_interop::isend(
701 rq.rtree.tree_cell_ranges.buf_pos_min_cell,
702 rq.rtree.tree_struct.internal_cell_count
703 + rq.rtree.tree_reduced_morton_codes.tree_leaf_count,
708 ret_len += mpi_sycl_interop::isend(
709 rq.rtree.tree_cell_ranges.buf_pos_max_cell,
710 rq.rtree.tree_struct.internal_cell_count
711 + rq.rtree.tree_reduced_morton_codes.tree_leaf_count,
717 ret_len += mpi_sycl_interop::isend(
718 rq.rtree.tree_cell_ranges.buf_pos_min_cell_flt,
719 rq.rtree.tree_struct.internal_cell_count
720 + rq.rtree.tree_reduced_morton_codes.tree_leaf_count,
725 ret_len += mpi_sycl_interop::isend(
726 rq.rtree.tree_cell_ranges.buf_pos_max_cell_flt,
727 rq.rtree.tree_struct.internal_cell_count
728 + rq.rtree.tree_reduced_morton_codes.tree_leaf_count,
737 template<
class u_morton,
class vec3>
738 inline u64 comm_irecv_probe(
740 std::vector<RadixTreeMPIRequest<u_morton, vec3>> &rqs,
746 RadixTreeMPIRequest<u_morton, vec3>(rtree, mpi_sycl_interop::op_type::Recv_Probe));
748 auto &rq = rqs.back();
752 ret_len += mpi_sycl_interop::irecv_probe(
753 rq.rtree.tree_morton_codes.buf_morton, rq.rq_u_morton, rank_source, tag, comm);
754 ret_len += mpi_sycl_interop::irecv_probe(
755 rq.rtree.tree_morton_codes.buf_particle_index_map, rq.rq_u32, rank_source, tag, comm);
757 ret_len += mpi_sycl_interop::irecv_probe(
758 rq.rtree.tree_reduced_morton_codes.buf_reduc_index_map,
764 ret_len += mpi_sycl_interop::irecv_probe(
765 rq.rtree.tree_reduced_morton_codes.buf_tree_morton,
770 ret_len += mpi_sycl_interop::irecv_probe(
771 rq.rtree.tree_struct.buf_lchild_id, rq.rq_u32, rank_source, tag, comm);
772 ret_len += mpi_sycl_interop::irecv_probe(
773 rq.rtree.tree_struct.buf_rchild_id, rq.rq_u32, rank_source, tag, comm);
774 ret_len += mpi_sycl_interop::irecv_probe(
775 rq.rtree.tree_struct.buf_lchild_flag, rq.rq_u8, rank_source, tag, comm);
776 ret_len += mpi_sycl_interop::irecv_probe(
777 rq.rtree.tree_struct.buf_rchild_flag, rq.rq_u8, rank_source, tag, comm);
778 ret_len += mpi_sycl_interop::irecv_probe(
779 rq.rtree.tree_struct.buf_endrange, rq.rq_u32, rank_source, tag, comm);
781 ret_len += mpi_sycl_interop::irecv_probe(
782 rq.rtree.tree_cell_ranges.buf_pos_min_cell, rq.rq_vec3i, rank_source, tag, comm);
783 ret_len += mpi_sycl_interop::irecv_probe(
784 rq.rtree.tree_cell_ranges.buf_pos_max_cell, rq.rq_vec3i, rank_source, tag, comm);
786 ret_len += mpi_sycl_interop::irecv_probe(
787 rq.rtree.tree_cell_ranges.buf_pos_min_cell_flt, rq.rq_vec, rank_source, tag, comm);
788 ret_len += mpi_sycl_interop::irecv_probe(
789 rq.rtree.tree_cell_ranges.buf_pos_max_cell_flt, rq.rq_vec, rank_source, tag, comm);
800 namespace interaction_crit {
801 template<
class vec3,
class flt>
802 inline bool sph_radix_cell_crit(
806 vec3 cur_cell_box_min,
807 vec3 cur_cell_box_max,
810 vec3 inter_box_b_min = cur_cell_box_min - box_int_sz;
811 vec3 inter_box_b_max = cur_cell_box_max + box_int_sz;
813 return BBAA::cella_neigh_b(
814 part_a_box_min, part_a_box_max, cur_cell_box_min, cur_cell_box_max)
815 || BBAA::cella_neigh_b(xyz_a, xyz_a, inter_box_b_min, inter_box_b_max);
818 template<
class vec3,
class flt>
819 inline bool sph_cell_cell_crit(
827 vec3 inter_box_a_min = cella_min - rint_a;
828 vec3 inter_box_a_max = cella_max + rint_a;
830 vec3 inter_box_b_min = cellb_min - rint_b;
831 vec3 inter_box_b_max = cellb_max + rint_b;
833 return BBAA::cella_neigh_b(inter_box_a_min, inter_box_a_max, cellb_min, cellb_max)
834 || BBAA::cella_neigh_b(inter_box_b_min, inter_box_b_max, cella_min, cella_max);
838 template<
class u_morton,
class vec3>
841 sycl::accessor<u32, 1, sycl::access::mode::read, sycl::target::device> particle_index_map;
842 sycl::accessor<u32, 1, sycl::access::mode::read, sycl::target::device> cell_index_map;
843 sycl::accessor<u32, 1, sycl::access::mode::read, sycl::target::device> rchild_id;
844 sycl::accessor<u32, 1, sycl::access::mode::read, sycl::target::device> lchild_id;
845 sycl::accessor<u8, 1, sycl::access::mode::read, sycl::target::device> rchild_flag;
846 sycl::accessor<u8, 1, sycl::access::mode::read, sycl::target::device> lchild_flag;
847 sycl::accessor<vec3, 1, sycl::access::mode::read, sycl::target::device> pos_min_cell;
848 sycl::accessor<vec3, 1, sycl::access::mode::read, sycl::target::device> pos_max_cell;
851 static constexpr u32 _nindex = 4294967295;
856 : particle_index_map(rtree.tree_morton_codes.buf_particle_index_map
857 ->template get_access<sycl::access::mode::read>(cgh)),
858 cell_index_map(rtree.tree_reduced_morton_codes.buf_reduc_index_map
859 ->template get_access<sycl::access::mode::read>(cgh)),
861 rtree.tree_struct.buf_rchild_id->template get_access<sycl::access::mode::read>(
864 rtree.tree_struct.buf_lchild_id->template get_access<sycl::access::mode::read>(
867 rtree.tree_struct.buf_rchild_flag->template get_access<sycl::access::mode::read>(
870 rtree.tree_struct.buf_lchild_flag->template get_access<sycl::access::mode::read>(
872 pos_min_cell(rtree.tree_cell_ranges.buf_pos_min_cell_flt
873 ->template get_access<sycl::access::mode::read>(cgh)),
874 pos_max_cell(rtree.tree_cell_ranges.buf_pos_max_cell_flt
875 ->template get_access<sycl::access::mode::read>(cgh)),
876 leaf_offset(rtree.tree_struct.internal_cell_count) {}
879 template<
class Rta,
class Functor_iter>
880 inline void iter_object_in_cell(
const Rta &acc,
const u32 &cell_id, Functor_iter &&func_it) {
882 uint min_ids = acc.cell_index_map[cell_id - acc.leaf_offset];
883 uint max_ids = acc.cell_index_map[cell_id + 1 - acc.leaf_offset];
885 for (
unsigned int id_s = min_ids; id_s < max_ids; id_s++) {
888 uint id_b = acc.particle_index_map[id_s];
939 template<
class Rta,
class Functor_
int_cd,
class Functor_iter,
class Functor_iter_excl>
940 inline void rtree_for_cell(
942 Functor_int_cd &&func_int_cd,
943 Functor_iter &&func_it,
944 Functor_iter_excl &&func_excl) {
945 u32 stack_cursor = Rta::tree_depth - 1;
946 std::array<u32, Rta::tree_depth> id_stack;
947 id_stack[stack_cursor] = 0;
949 while (stack_cursor < Rta::tree_depth) {
951 u32 current_node_id = id_stack[stack_cursor];
952 id_stack[stack_cursor] = Rta::_nindex;
955 bool cur_id_valid = func_int_cd(current_node_id);
960 if (current_node_id >= acc.leaf_offset) {
962 func_it(current_node_id);
967 u32 lid = acc.lchild_id[current_node_id]
968 + acc.leaf_offset * acc.lchild_flag[current_node_id];
969 u32 rid = acc.rchild_id[current_node_id]
970 + acc.leaf_offset * acc.rchild_flag[current_node_id];
972 id_stack[stack_cursor - 1] = rid;
975 id_stack[stack_cursor - 1] = lid;
980 func_excl(current_node_id);
985 template<
class Rta,
class Functor_
int_cd,
class Functor_iter,
class Functor_iter_excl>
986 inline void rtree_for(
988 Functor_int_cd &&func_int_cd,
989 Functor_iter &&func_it,
990 Functor_iter_excl &&func_excl) {
991 u32 stack_cursor = Rta::tree_depth - 1;
992 std::array<u32, Rta::tree_depth> id_stack;
993 id_stack[stack_cursor] = 0;
995 while (stack_cursor < Rta::tree_depth) {
997 u32 current_node_id = id_stack[stack_cursor];
998 id_stack[stack_cursor] = Rta::_nindex;
1001 bool cur_id_valid = func_int_cd(current_node_id);
1006 if (current_node_id >= acc.leaf_offset) {
1021 iter_object_in_cell(acc, current_node_id, func_it);
1026 u32 lid = acc.lchild_id[current_node_id]
1027 + acc.leaf_offset * acc.lchild_flag[current_node_id];
1028 u32 rid = acc.rchild_id[current_node_id]
1029 + acc.leaf_offset * acc.rchild_flag[current_node_id];
1031 id_stack[stack_cursor - 1] = rid;
1034 id_stack[stack_cursor - 1] = lid;
1039 func_excl(current_node_id);
1046 class Functor_int_cd,
1048 class Functor_iter_excl,
1050 inline void rtree_for_fill_cache(Rta &acc, arr_type &cell_cache, Functor_int_cd &&func_int_cd) {
1052 constexpr u32 cache_sz = cell_cache.size();
1055 auto push_in_cache = [&cell_cache, &cache_pos](
u32 id) {
1056 cell_cache[cache_pos] = id;
1060 u32 stack_cursor = Rta::tree_depth - 1;
1061 std::array<u32, Rta::tree_depth> id_stack;
1062 id_stack[stack_cursor] = 0;
1064 auto get_el_cnt_in_stack = [&]() ->
u32 {
1065 return Rta::tree_depth - stack_cursor;
1068 while ((stack_cursor < Rta::tree_depth) && (cache_pos + get_el_cnt_in_stack < cache_sz)) {
1070 u32 current_node_id = id_stack[stack_cursor];
1071 id_stack[stack_cursor] = Rta::_nindex;
1074 bool cur_id_valid = func_int_cd(current_node_id);
1079 if (current_node_id >= acc.leaf_offset) {
1082 push_in_cache(current_node_id);
1087 u32 lid = acc.lchild_id[current_node_id]
1088 + acc.leaf_offset * acc.lchild_flag[current_node_id];
1089 u32 rid = acc.rchild_id[current_node_id]
1090 + acc.leaf_offset * acc.rchild_flag[current_node_id];
1092 id_stack[stack_cursor - 1] = rid;
1095 id_stack[stack_cursor - 1] = lid;
1104 while (stack_cursor < Rta::tree_depth) {
1105 u32 current_node_id = id_stack[stack_cursor];
1106 id_stack[stack_cursor] = Rta::_nindex;
1108 push_in_cache(current_node_id);
1111 if (cache_pos < cache_sz) {
1118 class Functor_int_cd,
1120 class Functor_iter_excl,
1122 inline void rtree_for(
1123 Rta &acc, arr_type &cell_cache, Functor_int_cd &&func_int_cd, Functor_iter &&func_it) {
1125 constexpr u32 cache_sz = cell_cache.size();
1127 std::array<u32, Rta::tree_depth> id_stack;
1129 auto walk_step = [&](
u32 start_id) {
1130 u32 stack_cursor = Rta::tree_depth - 1;
1131 id_stack[stack_cursor] = start_id;
1133 while (stack_cursor < Rta::tree_depth) {
1135 u32 current_node_id = id_stack[stack_cursor];
1136 id_stack[stack_cursor] = Rta::_nindex;
1139 bool cur_id_valid = func_int_cd(current_node_id);
1144 if (current_node_id >= acc.leaf_offset) {
1147 uint min_ids = acc.cell_index_map[current_node_id - acc.leaf_offset];
1148 uint max_ids = acc.cell_index_map[current_node_id + 1 - acc.leaf_offset];
1150 for (
unsigned int id_s = min_ids; id_s < max_ids; id_s++) {
1153 uint id_b = acc.particle_index_map[id_s];
1162 u32 lid = acc.lchild_id[current_node_id]
1163 + acc.leaf_offset * acc.lchild_flag[current_node_id];
1164 u32 rid = acc.rchild_id[current_node_id]
1165 + acc.leaf_offset * acc.rchild_flag[current_node_id];
1167 id_stack[stack_cursor - 1] = rid;
1170 id_stack[stack_cursor - 1] = lid;
1180 for (
u32 cache_pos = 0; cache_pos < cache_sz && cell_cache[cache_pos] !=
u32_max;
1182 walk_step(cache_pos);
constexpr const char * uint
Specific internal energy u.
Header file describing a Node Instance.
sycl::queue & get_compute_queue(u32 id=0)
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
std::int32_t i32
32 bit integer
A buffer allocated in USM (Unified Shared Memory)
Morton curve implementation.
T min(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &buf1, u32 start_id, u32 end_id)
Find the minimum element in a device buffer within a specified range.
T max(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &buf1, u32 start_id, u32 end_id)
Find the maximum element in a device buffer within a specified range.
constexpr u32 u32_max
u32 max value
main include file for memory algorithms