Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
compute_ranges.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
16#include "shambase/integer.hpp"
17#include "shambackends/math.hpp"
19
20template<class u_morton>
21void sycl_compute_cell_ranges(
22
23 sycl::queue &queue,
24
25 u32 leaf_cnt,
26 u32 internal_cnt,
27 std::unique_ptr<sycl::buffer<u_morton>> &buf_morton,
28 std::unique_ptr<sycl::buffer<u32>> &buf_lchild_id,
29 std::unique_ptr<sycl::buffer<u32>> &buf_rchild_id,
30 std::unique_ptr<sycl::buffer<u8>> &buf_lchild_flag,
31 std::unique_ptr<sycl::buffer<u8>> &buf_rchild_flag,
32 std::unique_ptr<sycl::buffer<u32>> &buf_endrange,
33
34 std::unique_ptr<sycl::buffer<typename shamrock::sfc::MortonCodes<u_morton, 3>::int_vec_repr>>
35 &buf_pos_min_cell,
36 std::unique_ptr<sycl::buffer<typename shamrock::sfc::MortonCodes<u_morton, 3>::int_vec_repr>>
37 &buf_pos_max_cell) {
38
39 sycl::range<1> range_radix_tree{internal_cnt};
40
41 constexpr u32 group_size = 256;
42 u32 group_cnt = shambase::group_count(internal_cnt, group_size);
43 group_cnt = group_cnt + (group_cnt % 4);
44 u32 corrected_len = group_cnt * group_size;
45
46 auto ker_compute_cell_ranges = [&](sycl::handler &cgh) {
47 auto morton_map = buf_morton->template get_access<sycl::access::mode::read>(cgh);
48 auto end_range_map = buf_endrange->get_access<sycl::access::mode::read>(cgh);
49
50 auto pos_min_cell
51 = buf_pos_min_cell->template get_access<sycl::access::mode::discard_write>(
52 cgh); // was "write" before changed to fix warning
53 auto pos_max_cell
54 = buf_pos_max_cell->template get_access<sycl::access::mode::discard_write>(
55 cgh); // was "write" before changed to fix warning
56
57 auto rchild_flag = buf_rchild_flag->get_access<sycl::access::mode::read>(cgh);
58 auto lchild_flag = buf_lchild_flag->get_access<sycl::access::mode::read>(cgh);
59 auto rchild_id = buf_rchild_id->get_access<sycl::access::mode::read>(cgh);
60 auto lchild_id = buf_lchild_id->get_access<sycl::access::mode::read>(cgh);
61
62 u32 internal_cell_cnt = internal_cnt;
63
64 // Executing kernel
65 cgh.parallel_for(sycl::nd_range<1>{corrected_len, group_size}, [=](sycl::nd_item<1> id) {
66 u32 local_id = id.get_local_id(0);
67 u32 group_tile_id = id.get_group_linear_id();
68 u32 gid = group_tile_id * group_size + local_id;
69
70 if (gid >= internal_cell_cnt)
71 return;
72
73 uint clz_ = sham::clz_xor(morton_map[gid], morton_map[end_range_map[gid]]);
74
76
77 auto get_mask = [](u32 clz_) -> u_morton {
78 if constexpr (std::is_same<u_morton, u64>::value) {
79 constexpr u64 mask_i = 0xFFFFFFFFFFFFFFFF;
80 return mask_i << (64U - clz_);
81 }
82
83 if constexpr (std::is_same<u_morton, u32>::value) {
84 constexpr u32 mask_i = 0xFFFFFFFF;
85 return mask_i << (32 - clz_);
86 }
87 };
88
89 auto clz_offset = Morton::get_offset(clz_);
90 auto clz_offset_1 = Morton::get_offset(clz_ + 1);
91
92 auto min_cell = Morton::morton_to_icoord(morton_map[gid] & get_mask(clz_));
93
94 pos_min_cell[gid] = min_cell;
95
96 pos_max_cell[gid] = clz_offset + min_cell;
97
98 if (rchild_flag[gid]) {
99
100 auto tmp = clz_offset - clz_offset_1;
101
102 pos_min_cell[rchild_id[gid] + internal_cell_cnt] = min_cell + tmp;
103 pos_max_cell[rchild_id[gid] + internal_cell_cnt] = clz_offset_1 + min_cell + tmp;
104 }
105
106 if (lchild_flag[gid]) {
107 pos_min_cell[lchild_id[gid] + internal_cell_cnt] = min_cell;
108 pos_max_cell[lchild_id[gid] + internal_cell_cnt] = clz_offset_1 + min_cell;
109 }
110 });
111 };
112
113 queue.submit(ker_compute_cell_ranges);
114}
115
116template void sycl_compute_cell_ranges(
117 sycl::queue &queue,
118 u32 leaf_cnt,
119 u32 internal_cnt,
120 std::unique_ptr<sycl::buffer<u32>> &buf_morton,
121 std::unique_ptr<sycl::buffer<u32>> &buf_lchild_id,
122 std::unique_ptr<sycl::buffer<u32>> &buf_rchild_id,
123 std::unique_ptr<sycl::buffer<u8>> &buf_lchild_flag,
124 std::unique_ptr<sycl::buffer<u8>> &buf_rchild_flag,
125 std::unique_ptr<sycl::buffer<u32>> &buf_endrange,
126
127 std::unique_ptr<sycl::buffer<u16_3>> &buf_pos_min_cell,
128 std::unique_ptr<sycl::buffer<u16_3>> &buf_pos_max_cell);
129
130template void sycl_compute_cell_ranges(
131 sycl::queue &queue,
132 u32 leaf_cnt,
133 u32 internal_cnt,
134 std::unique_ptr<sycl::buffer<u64>> &buf_morton,
135 std::unique_ptr<sycl::buffer<u32>> &buf_lchild_id,
136 std::unique_ptr<sycl::buffer<u32>> &buf_rchild_id,
137 std::unique_ptr<sycl::buffer<u8>> &buf_lchild_flag,
138 std::unique_ptr<sycl::buffer<u8>> &buf_rchild_flag,
139 std::unique_ptr<sycl::buffer<u32>> &buf_endrange,
140
141 std::unique_ptr<sycl::buffer<u32_3>> &buf_pos_min_cell,
142 std::unique_ptr<sycl::buffer<u32_3>> &buf_pos_max_cell);
constexpr const char * uint
Specific internal energy u.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
constexpr T clz_xor(T a, T b) noexcept
give the length of the common prefix
Definition math.hpp:783
constexpr u32 group_count(u32 len, u32 group_size)
Calculates the number of groups based on the length and group size.
Definition integer.hpp:125