27template<
class Tvec,
class Tgr
idVec>
30 using namespace shamrock::patch;
38 MergedPDat &mpdat = storage.merged_patchdata_ghost.get().get(p.id_patch);
45 sycl::buffer<u8> face_normals_lookup(pcache.sum_neigh_cnt);
51 auto cell_max = buf_cell_max.get_read_access(depends_list);
52 auto cloop_ptrs = pcache.get_read_access(depends_list);
54 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
57 sycl::accessor normals_lookup{
58 face_normals_lookup, cgh, sycl::write_only, sycl::no_init};
60 shambase::parallel_for(cgh, mpdat.total_elements,
"flag_neigh", [=](
u64 id_a) {
61 TgridVec cell2_a = (cell_min[id_a] + cell_max[id_a]);
63 cell_looper.for_each_object_with_id(id_a, [&](u32 id_b, u64 id_list) {
64 TgridVec cell2_b = (cell_min[id_b] + cell_max[id_b]);
65 TgridVec cell2_d = cell2_b - cell2_a;
67 TgridVec d_norm = sycl::abs(cell2_d).template convert<Tgridscal>();
70 Tgridscal max_compo = sycl::max(sycl::max(d_norm.x(), d_norm.y()), d_norm.z());
74 const u8 lookup = ((cell2_d.x() == -max_compo) ? 0 : 0)
75 + ((cell2_d.x() == max_compo) ? 1 : 0)
76 + ((cell2_d.y() == -max_compo) ? 2 : 0)
77 + ((cell2_d.y() == max_compo) ? 3 : 0)
78 + ((cell2_d.z() == -max_compo) ? 4 : 0)
79 + ((cell2_d.z() == max_compo) ? 5 : 0);
90 normals_lookup[id_list] = lookup;
122 buf_cell_min.complete_event_state(e);
123 buf_cell_max.complete_event_state(e);
127 pcache.complete_event_state(resulting_events);
130 face_normals_dat_lookup.
add_obj(p.id_patch, std::move(face_normals_lookup));
133 storage.face_normals_lookup.set(std::move(face_normals_dat_lookup));
136template<
class Tvec,
class Tgr
idVec>
139 using namespace shamrock::patch;
149 sycl::buffer<u8> &face_normals_lookup = storage.face_normals_lookup.get().get(p.id_patch);
152 return {isolate_lookups(cache, face_normals_lookup, lookup), lookup_to_normal(lookup)};
155 auto build_neigh_list = [&]() -> NeighFaceList<Tvec> {
165 neigh_lst.
add_obj(p.id_patch, build_neigh_list());
168 storage.neighbors_cache.
reset();
169 storage.face_normals_lookup.reset();
171 storage.face_lists.set(std::move(neigh_lst));
187 sycl::buffer<u32> block_ids;
193 sycl::buffer<u32> block_ids;
195 sycl::buffer<u32> cell_xm;
196 sycl::buffer<u32> cell_xp;
197 sycl::buffer<u32> cell_ym;
198 sycl::buffer<u32> cell_yp;
199 sycl::buffer<u32> cell_zm;
200 sycl::buffer<u32> cell_zp;
207 sycl::buffer<u32> block_ids;
211 sycl::buffer<u32> cell_xm;
212 sycl::buffer<u32> cell_xp;
213 sycl::buffer<u32> cell_ym;
214 sycl::buffer<u32> cell_yp;
215 sycl::buffer<u32> cell_zm;
216 sycl::buffer<u32> cell_zp;
221template<
class Tvec,
class Tgr
idVec>
224template<
class Tvec,
class Tgr
idVec>
236 auto cloop_ptrs = cache.get_read_access(depends_list);
237 auto face_cnts = face_count.get_write_access(depends_list);
239 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
242 sycl::accessor normals_lookup{face_normals_lookup, cgh, sycl::read_only};
244 u8 wanted_lookup = lookup_value;
246 shambase::parallel_for(cgh, obj_cnt,
"compute neigh cache 1", [=](
u64 gid) {
250 cell_looper.for_each_object_with_id(id_a, [&](
u32 id_b,
u32 id_list) {
251 cnt += (normals_lookup[id_list] == wanted_lookup) ? 1 : 0;
254 face_cnts[id_a] = cnt;
261 cache.complete_event_state(resulting_events);
262 face_count.complete_event_state(resulting_events);
266 = shamrock::tree::prepare_object_cache(std::move(face_count), obj_cnt);
275 auto cloop_ptrs = cache.get_read_access(depends_list);
276 auto scanned_neigh_cnt = pcache.scanned_cnt.
get_read_access(depends_list);
282 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
285 sycl::accessor normals_lookup{face_normals_lookup, cgh, sycl::read_only};
287 u8 wanted_lookup = lookup_value;
289 shambase::parallel_for(cgh, obj_cnt,
"compute neigh cache 2", [=](
u64 gid) {
291 u32 cnt = scanned_neigh_cnt[id_a];
295 cell_looper.for_each_object_with_id(id_a, [&](
u32 id_b,
u32 id_list) {
296 bool lookup_match = normals_lookup[id_list] == wanted_lookup;
312 cache.complete_event_state(resulting_events);
315 shamlog_debug_sycl_ln(
316 "AMR::FaceFlagger",
"lookup :", lookup_value,
"found N =", pcache.sum_neigh_cnt);
sycl::queue & get_compute_queue(u32 id=0)
std::uint8_t u8
8 bit unsigned integer
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
size_t get_size() const
Gets the number of elements in the buffer.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
void add_event(sycl::event e)
Add an event to the list of events.
Represents a collection of objects distributed across patches identified by a u64 id.
iterator add_obj(u64 id, T &&obj)
Adds a new object to the collection.
void reset()
Reset the collection to its initial state.
flag faces with a lookup index for the orientation
void flag_faces()
flag faces with a lookup index performs at around 2G cell per seconds on a RTX A5000
PatchDataLayer container class, the layout is described in patchdata_layout.
namespace for math utility
namespace for the main framework
Patch object that contain generic patch information.