Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
FaceFlagger.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
24
25// this flags faces but not your face
26
27template<class Tvec, class TgridVec>
29
30 using namespace shamrock::patch;
31 using namespace shamrock;
32 using namespace shammath;
33 using MergedPDat = shamrock::MergedPatchData;
34
35 shambase::DistributedData<sycl::buffer<u8>> face_normals_dat_lookup;
36
37 scheduler().for_each_patchdata_nonempty([&](Patch p, PatchDataLayer &pdat) {
38 MergedPDat &mpdat = storage.merged_patchdata_ghost.get().get(p.id_patch);
39
40 sham::DeviceBuffer<TgridVec> &buf_cell_min = mpdat.pdat.get_field_buf_ref<TgridVec>(0);
41 sham::DeviceBuffer<TgridVec> &buf_cell_max = mpdat.pdat.get_field_buf_ref<TgridVec>(1);
42
43 tree::ObjectCache &pcache = storage.neighbors_cache.get().get_cache(p.id_patch);
44
45 sycl::buffer<u8> face_normals_lookup(pcache.sum_neigh_cnt);
46
47 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
48
49 sham::EventList depends_list;
50 auto cell_min = buf_cell_min.get_read_access(depends_list);
51 auto cell_max = buf_cell_max.get_read_access(depends_list);
52 auto cloop_ptrs = pcache.get_read_access(depends_list);
53
54 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
55 tree::ObjectCacheIterator cell_looper(cloop_ptrs);
56
57 sycl::accessor normals_lookup{
58 face_normals_lookup, cgh, sycl::write_only, sycl::no_init};
59
60 shambase::parallel_for(cgh, mpdat.total_elements, "flag_neigh", [=](u64 id_a) {
61 TgridVec cell2_a = (cell_min[id_a] + cell_max[id_a]);
62
63 cell_looper.for_each_object_with_id(id_a, [&](u32 id_b, u64 id_list) {
64 TgridVec cell2_b = (cell_min[id_b] + cell_max[id_b]);
65 TgridVec cell2_d = cell2_b - cell2_a;
66
67 TgridVec d_norm = sycl::abs(cell2_d).template convert<Tgridscal>();
68
69 // I mean if you are up to such
70 Tgridscal max_compo = sycl::max(sycl::max(d_norm.x(), d_norm.y()), d_norm.z());
71
72 // what a readable piece of code
73 // there can be only ONE that is the true answers
74 const u8 lookup = ((cell2_d.x() == -max_compo) ? 0 : 0)
75 + ((cell2_d.x() == max_compo) ? 1 : 0)
76 + ((cell2_d.y() == -max_compo) ? 2 : 0)
77 + ((cell2_d.y() == max_compo) ? 3 : 0)
78 + ((cell2_d.z() == -max_compo) ? 4 : 0)
79 + ((cell2_d.z() == max_compo) ? 5 : 0);
80
81 // if(cell_min[id_a].x() < 0 && cell_min[id_a].y() == 10){
82 // sycl::ext::oneapi::experimental::printf("%d (%ld %ld %ld) : %d\n", id_a,
83 // cell_min[id_a].x(),cell_min[id_a].y(),cell_min[id_a].z()
84 // ,u32(lookup));
85 // }
86
87 // F this bit bit of code
88 // i'm so done with this crap
89 // godbolts gods command's you to inline !
90 normals_lookup[id_list] = lookup;
91 });
92
93 // Chaptgpt beautifull poem about the beautifullness of the SIMD instructions
94 //
95 // Oh, SIMD instructions, you tangled mess,
96 // A source of frustration, I must confess.
97 // You promised speed, you boasted grace,
98 // Yet you leave my code a tangled case.
99 //
100 // With your cryptic syntax and obscure ways,
101 // You lead me into a bewildering maze.
102 // I try to optimize, to harness your might,
103 // But your convoluted logic gives me a fright.
104 //
105 // You claim to be efficient, a boon to behold,
106 // Yet your pitfalls and traps leave me cold.
107 // I chase after vectors, I chase after speed,
108 // But your complexities multiply with every need.
109 //
110 // Oh, SIMD instructions, you deceptive charm,
111 // You leave my patience disarmed.
112 // A Pandora's box of headaches and woes,
113 // In your shadowy realm, my confidence slows.
114 //
115 // So here's to you, SIMD, with a bitter disdain,
116 // Your alluring facade hides nothing but pain.
117 // You promised elegance, you promised glee,
118 // But all I find is chaos, as you laugh at me.
119 });
120 });
121
122 buf_cell_min.complete_event_state(e);
123 buf_cell_max.complete_event_state(e);
124
125 sham::EventList resulting_events;
126 resulting_events.add_event(e);
127 pcache.complete_event_state(resulting_events);
128
129 // store the buffer in distrib data
130 face_normals_dat_lookup.add_obj(p.id_patch, std::move(face_normals_lookup));
131 });
132
133 storage.face_normals_lookup.set(std::move(face_normals_dat_lookup));
134}
135
136template<class Tvec, class TgridVec>
138
139 using namespace shamrock::patch;
140 using namespace shamrock;
141 using namespace shammath;
142 using MergedPDat = shamrock::MergedPatchData;
143
145
146 scheduler().for_each_patchdata_nonempty([&](Patch p, PatchDataLayer &pdat) {
147 shamrock::tree::ObjectCache &cache = storage.neighbors_cache.get().get_cache(p.id_patch);
148
149 sycl::buffer<u8> &face_normals_lookup = storage.face_normals_lookup.get().get(p.id_patch);
150
151 auto build_flist = [&](u8 lookup) -> OrientedNeighFaceList<Tvec> {
152 return {isolate_lookups(cache, face_normals_lookup, lookup), lookup_to_normal(lookup)};
153 };
154
155 auto build_neigh_list = [&]() -> NeighFaceList<Tvec> {
156 return {
157 build_flist(0),
158 build_flist(1),
159 build_flist(2),
160 build_flist(3),
161 build_flist(4),
162 build_flist(5)};
163 };
164
165 neigh_lst.add_obj(p.id_patch, build_neigh_list());
166 });
167
168 storage.neighbors_cache.reset();
169 storage.face_normals_lookup.reset();
170
171 storage.face_lists.set(std::move(neigh_lst));
172}
173
175 // since it's AMR with only delta = 1 in level
176 // only cases are :
177 // - same level : block_id <-> block_id map
178 // - increase level : block_id <-> block_id map
179 // - decrease level : block_id <-> block_id + divcoord map
180 // divcoord are to see in which suboct of
181 // the block the neigh is in.
182
183 u64 id_patch;
184
185 struct {
186
187 sycl::buffer<u32> block_ids;
188
189 } level_p1;
190
191 struct {
192
193 sycl::buffer<u32> block_ids;
194
195 sycl::buffer<u32> cell_xm;
196 sycl::buffer<u32> cell_xp;
197 sycl::buffer<u32> cell_ym;
198 sycl::buffer<u32> cell_yp;
199 sycl::buffer<u32> cell_zm;
200 sycl::buffer<u32> cell_zp;
201
202 } level_m1;
203
204 struct {
205
206 // ids of the blocks having
207 sycl::buffer<u32> block_ids;
208
209 // neigh[block_id*block_size + cell_id]
210 // -> neighbourgh cell (block_id*block_size + cell_id)
211 sycl::buffer<u32> cell_xm;
212 sycl::buffer<u32> cell_xp;
213 sycl::buffer<u32> cell_ym;
214 sycl::buffer<u32> cell_yp;
215 sycl::buffer<u32> cell_zm;
216 sycl::buffer<u32> cell_zp;
217
218 } level_same;
219};
220
221template<class Tvec, class TgridVec>
223
224template<class Tvec, class TgridVec>
226 shamrock::tree::ObjectCache &cache, sycl::buffer<u8> &face_normals_lookup, u8 lookup_value) {
227
228 u32 obj_cnt = cache.cnt_neigh.get_size();
229
230 sham::DeviceBuffer<u32> face_count(obj_cnt, shamsys::instance::get_compute_scheduler_ptr());
231
232 {
233 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
234 sham::EventList depends_list;
235
236 auto cloop_ptrs = cache.get_read_access(depends_list);
237 auto face_cnts = face_count.get_write_access(depends_list);
238
239 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
240 shamrock::tree::ObjectCacheIterator cell_looper(cloop_ptrs);
241
242 sycl::accessor normals_lookup{face_normals_lookup, cgh, sycl::read_only};
243
244 u8 wanted_lookup = lookup_value;
245
246 shambase::parallel_for(cgh, obj_cnt, "compute neigh cache 1", [=](u64 gid) {
247 u32 id_a = (u32) gid;
248
249 u32 cnt = 0;
250 cell_looper.for_each_object_with_id(id_a, [&](u32 id_b, u32 id_list) {
251 cnt += (normals_lookup[id_list] == wanted_lookup) ? 1 : 0;
252 });
253
254 face_cnts[id_a] = cnt;
255 });
256 });
257
258 sham::EventList resulting_events;
259 resulting_events.add_event(e);
260
261 cache.complete_event_state(resulting_events);
262 face_count.complete_event_state(resulting_events);
263 }
264
266 = shamrock::tree::prepare_object_cache(std::move(face_count), obj_cnt);
267
269
270 {
271 NamedStackEntry stack_loc2{"fill cache"};
272 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
273 sham::EventList depends_list;
274
275 auto cloop_ptrs = cache.get_read_access(depends_list);
276 auto scanned_neigh_cnt = pcache.scanned_cnt.get_read_access(depends_list);
277 auto neigh = pcache.index_neigh_map.get_write_access(depends_list);
278
279 // logger::raw_ln(obj_cnt, pcache.cnt_neigh.size(),pcache.scanned_cnt.size(),
280 // pcache.index_neigh_map.size());
281
282 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
283 shamrock::tree::ObjectCacheIterator cell_looper(cloop_ptrs);
284
285 sycl::accessor normals_lookup{face_normals_lookup, cgh, sycl::read_only};
286
287 u8 wanted_lookup = lookup_value;
288
289 shambase::parallel_for(cgh, obj_cnt, "compute neigh cache 2", [=](u64 gid) {
290 u32 id_a = (u32) gid;
291 u32 cnt = scanned_neigh_cnt[id_a];
292
293 // sycl::ext::oneapi::experimental::printf("%d %d\n", id_a,cnt);
294
295 cell_looper.for_each_object_with_id(id_a, [&](u32 id_b, u32 id_list) {
296 bool lookup_match = normals_lookup[id_list] == wanted_lookup;
297 if (lookup_match) {
298 // sycl::ext::oneapi::experimental::printf("%d %d %d %d\n",
299 // id_a,cnt,id_b,id_list);
300 neigh[cnt] = id_b;
301 cnt++;
302 }
303 });
304 });
305 });
306
307 pcache.scanned_cnt.complete_event_state(e);
308 pcache.index_neigh_map.complete_event_state(e);
309
310 sham::EventList resulting_events;
311 resulting_events.add_event(e);
312 cache.complete_event_state(resulting_events);
313 }
314
315 shamlog_debug_sycl_ln(
316 "AMR::FaceFlagger", "lookup :", lookup_value, "found N =", pcache.sum_neigh_cnt);
317
318 return pcache;
319}
320
sycl::queue & get_compute_queue(u32 id=0)
Source location utility.
std::uint8_t u8
8 bit unsigned integer
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
size_t get_size() const
Gets the number of elements in the buffer.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
Definition EventList.hpp:31
void add_event(sycl::event e)
Add an event to the list of events.
Definition EventList.hpp:87
Represents a collection of objects distributed across patches identified by a u64 id.
iterator add_obj(u64 id, T &&obj)
Adds a new object to the collection.
void reset()
Reset the collection to its initial state.
flag faces with a lookup index for the orientation
void flag_faces()
flag faces with a lookup index performs at around 2G cell per seconds on a RTX A5000
PatchDataLayer container class, the layout is described in patchdata_layout.
namespace for math utility
Definition AABB.hpp:26
namespace for the main framework
Definition __init__.py:1
Patch object that contain generic patch information.
Definition Patch.hpp:33