Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
KarrasRadixTree.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
25#include <utility>
26
28#define SGN(x) (x == 0) ? 0 : ((x > 0) ? 1 : -1)
29
45template<class u_morton>
47 sham::DeviceQueue &queue,
48 u32 internal_cell_count,
50 sham::DeviceBuffer<u32> &out_buf_lchild_id,
51 sham::DeviceBuffer<u32> &out_buf_rchild_id,
52 sham::DeviceBuffer<u8> &out_buf_lchild_flag,
53 sham::DeviceBuffer<u8> &out_buf_rchild_flag,
54 sham::DeviceBuffer<u32> &out_buf_endrange) {
55
56 // Early return if the tree is a single leaf as there is no tree structure in this case.
57 if (internal_cell_count == 0) {
58 return;
59 }
60
62 queue,
63 sham::MultiRef{in_morton},
65 out_buf_lchild_id,
66 out_buf_rchild_id,
67 out_buf_lchild_flag,
68 out_buf_rchild_flag,
69 out_buf_endrange},
70 internal_cell_count,
71
72 [morton_length = internal_cell_count + 1](
73 u32 i,
74 const u_morton *__restrict morton,
75 u32 *__restrict lchild_id,
76 u32 *__restrict rchild_id,
77 u8 *__restrict lchild_flag,
78 u8 *__restrict rchild_flag,
79 u32 *__restrict end_range_cell) {
80 auto DELTA = [=](i32 x, i32 y) {
81 return sham::karras_delta(x, y, morton_length, morton);
82 };
83
84 int ddelta = DELTA(i, i + 1) - DELTA(i, i - 1);
85
86 int d = SGN(ddelta);
87
88 // Compute upper bound for the length of the range
89 int delta_min = DELTA(i, i - d);
90 int lmax = 2;
91 while (DELTA(i, i + lmax * d) > delta_min) {
92 lmax *= 2;
93 }
94
95 // Find the other end using
96 int l = 0;
97 int t = lmax / 2;
98 while (t > 0) {
99 if (DELTA(i, i + (l + t) * d) > delta_min) {
100 l = l + t;
101 }
102 t = t / 2;
103 }
104 u32 j = i + l * d;
105
106 end_range_cell[i] = j;
107
108 // Find the split position using binary search
109 int delta_node = DELTA(i, j);
110 int s = 0;
111
112 // TODO why float
113 float div = 2;
114 t = sycl::ceil(l / div);
115 while (true) {
116 int tmp_ = i + (s + t) * d;
117 if (DELTA(i, tmp_) > delta_node) {
118 s = s + t;
119 }
120 if (t <= 1)
121 break;
122 div *= 2;
123 t = sycl::ceil(l / div);
124 }
125 int gamma = i + s * d + sycl::min(d, 0);
126
127 if (sycl::min(i, j) == gamma) {
128 lchild_id[i] = gamma;
129 lchild_flag[i] = 1; // leaf
130 } else {
131 lchild_id[i] = gamma;
132 lchild_flag[i] = 0; // leaf
133 }
134
135 if (sycl::max(i, j) == gamma + 1) {
136 rchild_id[i] = gamma + 1;
137 rchild_flag[i] = 1; // leaf
138 } else {
139 rchild_id[i] = gamma + 1;
140 rchild_flag[i] = 0; // leaf
141 }
142 });
143}
144
145namespace shamtree {
146
147 template<class Tmorton>
148 inline u32 get_tree_depth() {
150 }
151
152 template<class Tmorton>
154 sham::DeviceScheduler_ptr dev_sched,
155 u32 morton_count,
156 sham::DeviceBuffer<Tmorton> &morton_codes,
157 KarrasRadixTree &&recycled_tree) {
158
159 u32 internal_cell_count = morton_count - 1;
160
161 recycled_tree.tree_depth = get_tree_depth<Tmorton>();
162
163 recycled_tree.buf_lchild_id.resize(internal_cell_count);
164 recycled_tree.buf_rchild_id.resize(internal_cell_count);
165 recycled_tree.buf_lchild_flag.resize(internal_cell_count);
166 recycled_tree.buf_rchild_flag.resize(internal_cell_count);
167 recycled_tree.buf_endrange.resize(internal_cell_count);
168
170 dev_sched->get_queue(),
171 internal_cell_count,
172 morton_codes,
173 recycled_tree.buf_lchild_id,
174 recycled_tree.buf_rchild_id,
175 recycled_tree.buf_lchild_flag,
176 recycled_tree.buf_rchild_flag,
177 recycled_tree.buf_endrange);
178
179 return std::forward<KarrasRadixTree>(recycled_tree);
180 }
181
183 // Shortcut without caching
185
186 template<class Tmorton>
188 sham::DeviceScheduler_ptr dev_sched,
189 u32 morton_count,
190 sham::DeviceBuffer<Tmorton> &morton_codes) {
191
192 u32 internal_cell_count = morton_count - 1;
193
194 sham::DeviceBuffer<u32> buf_lchild_id(internal_cell_count, dev_sched);
195 sham::DeviceBuffer<u32> buf_rchild_id(internal_cell_count, dev_sched);
196 sham::DeviceBuffer<u8> buf_lchild_flag(internal_cell_count, dev_sched);
197 sham::DeviceBuffer<u8> buf_rchild_flag(internal_cell_count, dev_sched);
198 sham::DeviceBuffer<u32> buf_endrange(internal_cell_count, dev_sched);
199
200 KarrasRadixTree tree(
201 std::move(buf_lchild_id),
202 std::move(buf_rchild_id),
203 std::move(buf_lchild_flag),
204 std::move(buf_rchild_flag),
205 std::move(buf_endrange),
206 get_tree_depth<Tmorton>());
207
208 return karras_tree_from_morton_set(dev_sched, morton_count, morton_codes, std::move(tree));
209 }
210
212 // Explicit instantiations
214
215#ifndef DOXYGEN // To avoid doxygen complaining like always ...
216 template KarrasRadixTree karras_tree_from_morton_set(
217 sham::DeviceScheduler_ptr dev_sched,
218 u32 morton_count,
219 sham::DeviceBuffer<u32> &morton_codes,
220 KarrasRadixTree &&recycled_tree);
221
222 template KarrasRadixTree karras_tree_from_morton_set(
223 sham::DeviceScheduler_ptr dev_sched,
224 u32 morton_count,
225 sham::DeviceBuffer<u32> &morton_codes);
226
227 template KarrasRadixTree karras_tree_from_morton_set(
228 sham::DeviceScheduler_ptr dev_sched,
229 u32 morton_count,
230 sham::DeviceBuffer<u64> &morton_codes,
231 KarrasRadixTree &&recycled_tree);
232
233 template KarrasRadixTree karras_tree_from_morton_set(
234 sham::DeviceScheduler_ptr dev_sched,
235 u32 morton_count,
236 sham::DeviceBuffer<u64> &morton_codes);
237#endif
238
240
241 std::vector<u32> lchild_id = {};
242 std::vector<u8> lchild_flag = {};
243 std::vector<u32> rchild_id = {};
244 std::vector<u8> rchild_flag = {};
245 std::vector<u32> endrange = {};
246
247 lchild_id = tree.buf_lchild_id.copy_to_stdvec();
248 rchild_id = tree.buf_rchild_id.copy_to_stdvec();
249 lchild_flag = tree.buf_lchild_flag.copy_to_stdvec();
250 rchild_flag = tree.buf_rchild_flag.copy_to_stdvec();
251 endrange = tree.buf_endrange.copy_to_stdvec();
252
253 std::string dot_graph = "";
254
255 dot_graph += "digraph G {\n";
256 dot_graph += "rankdir=LR;\n";
257
258 for (u32 i = 0; i < tree.get_internal_cell_count(); ++i) {
259
260 if (lchild_flag[i] == 0) {
261 dot_graph
262 += "i" + std::to_string(i) + " -> i" + std::to_string(lchild_id[i]) + ";\n";
263 } else {
264 dot_graph
265 += "i" + std::to_string(i) + " -> l" + std::to_string(lchild_id[i]) + ";\n";
266 }
267
268 if (rchild_flag[i] == 0) {
269 dot_graph
270 += "i" + std::to_string(i) + " -> i" + std::to_string(rchild_id[i]) + ";\n";
271 } else {
272 dot_graph
273 += "i" + std::to_string(i) + " -> l" + std::to_string(rchild_id[i]) + ";\n";
274 }
275 }
276
277 dot_graph += "}\n";
278 return dot_graph;
279 }
280
281} // namespace shamtree
#define SGN(x)
Macro to get the sign of a number.
void __karras_alg(sham::DeviceQueue &queue, u32 internal_cell_count, sham::DeviceBuffer< u_morton > &in_morton, sham::DeviceBuffer< u32 > &out_buf_lchild_id, sham::DeviceBuffer< u32 > &out_buf_rchild_id, sham::DeviceBuffer< u8 > &out_buf_lchild_flag, sham::DeviceBuffer< u8 > &out_buf_rchild_flag, sham::DeviceBuffer< u32 > &out_buf_endrange)
Karras 2012 algorithm with addition endrange buffer.
KarrasRadixTree karras_tree_from_morton_set(sham::DeviceScheduler_ptr dev_sched, u32 morton_count, sham::DeviceBuffer< Tmorton > &morton_codes, KarrasRadixTree &&recycled_tree)
Constructs a KarrasRadixTree from a set of reduced Morton codes.
std::string karras_tree_to_dot_graph(KarrasRadixTree &recycled_tree)
Get tree as dot graph.
std::uint8_t u8
8 bit unsigned integer
std::uint32_t u32
32 bit unsigned integer
std::int32_t i32
32 bit integer
A buffer allocated in USM (Unified Shared Memory)
std::vector< T > copy_to_stdvec() const
Copy the content of the buffer to a std::vector.
A SYCL queue associated with a device and a context.
A data structure representing a Karras Radix Tree.
sham::DeviceBuffer< u8 > buf_rchild_flag
right child flag (size = internal_count)
sham::DeviceBuffer< u32 > buf_rchild_id
right child id (size = internal_count)
sham::DeviceBuffer< u32 > buf_endrange
endrange (size = internal_count)
sham::DeviceBuffer< u32 > buf_lchild_id
left child id (size = internal_count)
u32 get_internal_cell_count() const
Get internal cell count.
sham::DeviceBuffer< u8 > buf_lchild_flag
left child flag (size = internal_count)
Morton curve implementation.
i32 karras_delta(i32 x, i32 y, u32 morton_length, Acc m) noexcept
delta operator defined in Karras 2012
Definition math.hpp:826
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
Traits for C++ types.
A class that references multiple buffers or similar objects.