Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
AMRTree.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
18
19template<class Tvec, class TgridVec>
21
22 StackEntry stack_loc{};
23
25 using RTree = typename Storage::RTree;
26
27 MergedPDat &mpdat = storage.merged_patchdata_ghost.get();
28
30 = shambase::get_check_ref(storage.ghost_layout.get());
31
32 u32 reduc_level = 0;
33
34 storage.merge_patch_bounds.set(
35 mpdat.map<shammath::AABB<TgridVec>>([&](u64 id, shamrock::MergedPatchData &merged) {
36 shamlog_debug_ln("AMR", "compute bound merged patch", id);
37
38 TgridVec min_bound = merged.pdat.get_field<TgridVec>(0).compute_min();
39 TgridVec max_bound = merged.pdat.get_field<TgridVec>(1).compute_max();
40
41 return shammath::AABB<TgridVec>{min_bound, max_bound};
42 }));
43
44 shambase::DistributedData<shammath::AABB<TgridVec>> &bounds = storage.merge_patch_bounds.get();
45
47 = mpdat.map<RTree>([&](u64 id, shamrock::MergedPatchData &merged) {
48 shamlog_debug_ln("AMR", "compute tree for merged patch", id);
49
50 auto aabb = bounds.get(id);
51
52 TgridVec bmin = aabb.lower;
53 TgridVec bmax = aabb.upper;
54
55 TgridVec diff = bmax - bmin;
56 diff.x() = shambase::roundup_pow2(diff.x());
57 diff.y() = shambase::roundup_pow2(diff.y());
58 diff.z() = shambase::roundup_pow2(diff.z());
59 bmax = bmin + diff;
60
61 auto &field_pos = merged.pdat.get_field<TgridVec>(0);
62
63 RTree tree(
64 shamsys::instance::get_compute_scheduler_ptr(),
65 {bmin, bmax},
66 field_pos.get_buf(),
67 field_pos.get_obj_cnt(),
68 reduc_level);
69
70 return tree;
71 });
72
73 trees.for_each([](u64 id, RTree &tree) {
74 tree.compute_cell_ibounding_box(shamsys::instance::get_compute_queue());
75 tree.convert_bounding_box(shamsys::instance::get_compute_queue());
76 });
77
78 storage.trees.set(std::move(trees));
79}
80
81template<class Tvec, class TgridVec>
83
84 StackEntry stack_loc{};
85
86 using MergedPDat = shamrock::MergedPatchData;
87 using RTree = typename Storage::RTree;
88
89 storage.trees.get().for_each([&](u64 id, RTree &tree) {
90 u32 leaf_count = tree.tree_reduced_morton_codes.tree_leaf_count;
91 u32 internal_cell_count = tree.tree_struct.internal_cell_count;
92 u32 tot_count = leaf_count + internal_cell_count;
93
94 sycl::buffer<TgridVec> tmp_min_cell(tot_count);
95 sycl::buffer<TgridVec> tmp_max_cell(tot_count);
96
97 MergedPDat &mpdat = storage.merged_patchdata_ghost.get().get(id);
98 sham::DeviceBuffer<TgridVec> &buf_cell_min = mpdat.pdat.get_field_buf_ref<TgridVec>(0);
99 sham::DeviceBuffer<TgridVec> &buf_cell_max = mpdat.pdat.get_field_buf_ref<TgridVec>(1);
100
101 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
102
103 sham::EventList depends_list;
104 auto acc_bmin = buf_cell_min.get_read_access(depends_list);
105 auto acc_bmax = buf_cell_max.get_read_access(depends_list);
106
107 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
108 shamrock::tree::ObjectIterator cell_looper(tree, cgh);
109
110 u32 leaf_offset = tree.tree_struct.internal_cell_count;
111
112 sycl::accessor comp_min{tmp_min_cell, cgh, sycl::write_only, sycl::no_init};
113 sycl::accessor comp_max{tmp_max_cell, cgh, sycl::write_only, sycl::no_init};
114
117
118 shambase::parallel_for(cgh, leaf_count, "compute leaf boxes", [=](u64 leaf_id) {
119 TgridVec min = imin;
120 TgridVec max = imax;
121
122 cell_looper.iter_object_in_cell(leaf_id + leaf_offset, [&](u32 block_id) {
123 TgridVec bmin = acc_bmin[block_id];
124 TgridVec bmax = acc_bmax[block_id];
125
126 min = sham::min(min, bmin);
127 max = sham::max(max, bmax);
128 });
129
130 comp_min[leaf_offset + leaf_id] = min;
131 comp_max[leaf_offset + leaf_id] = max;
132 });
133 });
134
135 buf_cell_min.complete_event_state(e);
136 buf_cell_max.complete_event_state(e);
137
138 auto ker_reduc_hmax = [&](sycl::handler &cgh) {
139 u32 offset_leaf = internal_cell_count;
140
141 sycl::accessor comp_min{tmp_min_cell, cgh, sycl::read_write};
142 sycl::accessor comp_max{tmp_max_cell, cgh, sycl::read_write};
143
144 sycl::accessor rchild_id{
145 shambase::get_check_ref(tree.tree_struct.buf_rchild_id), cgh, sycl::read_only};
146 sycl::accessor lchild_id{
147 shambase::get_check_ref(tree.tree_struct.buf_lchild_id), cgh, sycl::read_only};
148 sycl::accessor rchild_flag{
149 shambase::get_check_ref(tree.tree_struct.buf_rchild_flag), cgh, sycl::read_only};
150 sycl::accessor lchild_flag{
151 shambase::get_check_ref(tree.tree_struct.buf_lchild_flag), cgh, sycl::read_only};
152
153 shambase::parallel_for(cgh, internal_cell_count, "propagate up", [=](u64 gid) {
154 u32 lid = lchild_id[gid] + offset_leaf * lchild_flag[gid];
155 u32 rid = rchild_id[gid] + offset_leaf * rchild_flag[gid];
156
157 TgridVec bminl = comp_min[lid];
158 TgridVec bminr = comp_min[rid];
159 TgridVec bmaxl = comp_max[lid];
160 TgridVec bmaxr = comp_max[rid];
161
162 TgridVec bmin = sham::min(bminl, bminr);
163 TgridVec bmax = sham::max(bmaxl, bmaxr);
164
165 comp_min[gid] = bmin;
166 comp_max[gid] = bmax;
167 });
168 };
169
170 for (u32 i = 0; i < tree.tree_depth; i++) {
171 shamsys::instance::get_compute_queue().submit(ker_reduc_hmax);
172 }
173
174 sycl::buffer<TgridVec> &tree_bmin
175 = shambase::get_check_ref(tree.tree_cell_ranges.buf_pos_min_cell_flt);
176 sycl::buffer<TgridVec> &tree_bmax
177 = shambase::get_check_ref(tree.tree_cell_ranges.buf_pos_max_cell_flt);
178
179 shamsys::instance::get_compute_queue().submit([&](sycl::handler &cgh) {
180 shamrock::tree::ObjectIterator cell_looper(tree, cgh);
181
182 u32 leaf_offset = tree.tree_struct.internal_cell_count;
183
184 sycl::accessor comp_bmin{tmp_min_cell, cgh, sycl::read_only};
185 sycl::accessor comp_bmax{tmp_max_cell, cgh, sycl::read_only};
186
187 sycl::accessor tree_buf_min{tree_bmin, cgh, sycl::read_write};
188 sycl::accessor tree_buf_max{tree_bmax, cgh, sycl::read_write};
189
190 shambase::parallel_for(cgh, tot_count, "write in tree range", [=](u64 nid) {
191 TgridVec load_min = comp_bmin[nid];
192 TgridVec load_max = comp_bmax[nid];
193
194 // if(
195 // (!shambase::vec_equals(load_min,tree_buf_min[nid]))
196 // || !shambase::vec_equals(load_max,tree_buf_max[nid])
197 // )
198 //{
199 //
200 // sycl::ext::oneapi::experimental::printf(
201 // "%ld : (%ld %ld %ld) -> (%ld %ld %ld) & (%ld %ld %ld) -> (%ld %ld %ld)\n",
202 // nid,
203 // tree_buf_min[nid].x(),tree_buf_min[nid].y(),tree_buf_min[nid].z(),
204 // load_min.x(),load_min.y(),load_min.z(),
205 // tree_buf_max[nid].x(),tree_buf_max[nid].y(),tree_buf_max[nid].z(),
206 // load_max.x(),load_max.y(),load_max.z()
207 // );
208 //
209 //}
210
211 tree_buf_min[nid] = load_min;
212 tree_buf_max[nid] = load_max;
213 });
214 });
215 });
216}
217
218template<class Tvec, class TgridVec>
220
221 using MergedPDat = shamrock::MergedPatchData;
222 using RTree = typename Storage::RTree;
223
224 shambase::Timer time_neigh;
225 time_neigh.start();
226
227 StackEntry stack_loc{};
228
229 // do cache
230 storage.neighbors_cache.set(shamrock::tree::ObjectCacheHandler(u64(10e9), [&](u64 patch_id) {
231 shamlog_debug_ln("BasicSPH", "build particle cache id =", patch_id);
232
233 NamedStackEntry cache_build_stack_loc{"build cache"};
234
235 MergedPDat &mfield = storage.merged_patchdata_ghost.get().get(patch_id);
236
237 sham::DeviceBuffer<TgridVec> &buf_cell_min = mfield.pdat.get_field_buf_ref<TgridVec>(0);
238 sham::DeviceBuffer<TgridVec> &buf_cell_max = mfield.pdat.get_field_buf_ref<TgridVec>(1);
239
240 RTree &tree = storage.trees.get().get(patch_id);
241
242 u32 obj_cnt = mfield.total_elements;
243
244 NamedStackEntry stack_loc1{"init cache"};
245
246 using namespace shamrock;
247
248 sham::DeviceBuffer<u32> neigh_count(
249 obj_cnt, shamsys::instance::get_compute_scheduler_ptr());
250
251 shamsys::instance::get_compute_queue().wait_and_throw();
252
253 shamlog_debug_sycl_ln("Cache", "generate cache for N=", obj_cnt);
254
255 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
256 {
257 sham::EventList depends_list;
258 auto cell_min = buf_cell_min.get_read_access(depends_list);
259 auto cell_max = buf_cell_max.get_read_access(depends_list);
260 auto neigh_cnt = neigh_count.get_write_access(depends_list);
261
262 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
263 tree::ObjectIterator cell_looper(tree, cgh);
264
265 // tree::LeafCacheObjectIterator particle_looper(tree,*xyz_cell_id,leaf_cache,cgh);
266
267 shambase::parallel_for(cgh, obj_cnt, "compute neigh cache 1", [=](u64 gid) {
268 u32 id_a = (u32) gid;
269
270 shammath::AABB<TgridVec> cell_aabb{cell_min[id_a], cell_max[id_a]};
271
272 u32 cnt = 0;
273
274 cell_looper.rtree_for(
275 [&](u32 node_id, TgridVec bmin, TgridVec bmax) -> bool {
276 return shammath::AABB<TgridVec>{bmin, bmax}
277 .get_intersect(cell_aabb)
278 .is_surface_or_volume();
279 },
280 [&](u32 id_b) {
281 bool no_interact
282 = !shammath::AABB<TgridVec>{cell_min[id_b], cell_max[id_b]}
283 .get_intersect(cell_aabb)
284 .is_surface();
285
286 cnt += (no_interact) ? 0 : 1;
287 });
288
289 // if(cell_aabb.lower.x() < 0 && cell_aabb.lower.y() == 10){
290 // sycl::ext::oneapi::experimental::printf("%d (%ld %ld %ld) : %d\n", id_a,
291 // cell_aabb.lower.x(),cell_aabb.lower.y(),cell_aabb.lower.z()
292 // ,cnt);
293 // }
294
295 neigh_cnt[id_a] = cnt;
296 });
297 });
298
299 buf_cell_min.complete_event_state(e);
300 buf_cell_max.complete_event_state(e);
301 neigh_count.complete_event_state(e);
302 }
303 tree::ObjectCache pcache = tree::prepare_object_cache(std::move(neigh_count), obj_cnt);
304
305 NamedStackEntry stack_loc2{"fill cache"};
306
307 {
308
309 sham::EventList depends_list;
310 auto cell_min = buf_cell_min.get_read_access(depends_list);
311 auto cell_max = buf_cell_max.get_read_access(depends_list);
312 auto scanned_neigh_cnt = pcache.scanned_cnt.get_read_access(depends_list);
313 auto neigh = pcache.index_neigh_map.get_write_access(depends_list);
314
315 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
316 tree::ObjectIterator cell_looper(tree, cgh);
317
318 // tree::LeafCacheObjectIterator particle_looper(tree,*xyz_cell_id,leaf_cache,cgh);
319 shambase::parallel_for(cgh, obj_cnt, "compute neigh cache 2", [=](u64 gid) {
320 u32 id_a = (u32) gid;
321
322 shammath::AABB<TgridVec> cell_aabb{cell_min[id_a], cell_max[id_a]};
323
324 u32 cnt = scanned_neigh_cnt[id_a];
325
326 cell_looper.rtree_for(
327 [&](u32 node_id, TgridVec bmin, TgridVec bmax) -> bool {
328 return shammath::AABB<TgridVec>{bmin, bmax}
329 .get_intersect(cell_aabb)
330 .is_surface_or_volume();
331 },
332 [&](u32 id_b) {
333 bool no_interact
334 = !shammath::AABB<TgridVec>{cell_min[id_b], cell_max[id_b]}
335 .get_intersect(cell_aabb)
336 .is_surface();
337
338 if (!no_interact) {
339 neigh[cnt] = id_b;
340 }
341 cnt += (no_interact) ? 0 : 1;
342 });
343 });
344 });
345
346 buf_cell_min.complete_event_state(e);
347 buf_cell_max.complete_event_state(e);
348 pcache.scanned_cnt.complete_event_state(e);
349 pcache.index_neigh_map.complete_event_state(e);
350 }
351
352 return pcache;
353 }));
354
355 using namespace shamrock::patch;
356 scheduler().for_each_patchdata_nonempty([&](Patch cur_p, PatchDataLayer &pdat) {
357 storage.neighbors_cache.get().preload(cur_p.id_patch);
358 });
359
360 time_neigh.end();
361 storage.timings_details.neighbors += time_neigh.elasped_sec();
362}
363
sycl::queue & get_compute_queue(u32 id=0)
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
Definition EventList.hpp:31
Represents a collection of objects distributed across patches identified by a u64 id.
void for_each(std::function< void(u64, T &)> &&f)
Applies a function to each object in the collection.
DistributedData< Tmap > map(std::function< Tmap(u64, T &)> map_func)
Apply a function to all objects in the collection and return a new collection containing the results.
T & get(u64 id)
Returns a reference to an object in the collection.
Class Timer measures the time elapsed since the timer was started.
Definition time.hpp:96
void end()
Stops the timer and stores the elapsed time in nanoseconds.
Definition time.hpp:111
f64 elasped_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
Definition time.hpp:123
void start()
Starts the timer.
Definition time.hpp:106
PatchDataLayer container class, the layout is described in patchdata_layout.
constexpr T roundup_pow2(T v) noexcept
round up to the next power of two Source : https://graphics.stanford.edu/~seander/bithacks....
Definition integer.hpp:92
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110
namespace for the main framework
Definition __init__.py:1
Axis-Aligned bounding box.
Definition AABB.hpp:99
bool is_surface() const noexcept
Checks if the AABB is a surface.
Definition AABB.hpp:293
AABB get_intersect(AABB other) const noexcept
Compute the intersection of two AABB.
Definition AABB.hpp:234
Patch object that contain generic patch information.
Definition Patch.hpp:33
u64 id_patch
unique key that identify the patch
Definition Patch.hpp:86