19template<
class Tvec,
class Tgr
idVec>
25 using RTree =
typename Storage::RTree;
27 MergedPDat &mpdat = storage.merged_patchdata_ghost.
get();
34 storage.merge_patch_bounds.set(
36 shamlog_debug_ln(
"AMR",
"compute bound merged patch", id);
38 TgridVec min_bound = merged.pdat.get_field<TgridVec>(0).compute_min();
39 TgridVec max_bound = merged.pdat.get_field<TgridVec>(1).compute_max();
41 return shammath::AABB<TgridVec>{min_bound, max_bound};
48 shamlog_debug_ln(
"AMR",
"compute tree for merged patch",
id);
50 auto aabb = bounds.
get(
id);
52 TgridVec bmin = aabb.lower;
53 TgridVec bmax = aabb.upper;
55 TgridVec diff = bmax - bmin;
61 auto &field_pos = merged.pdat.get_field<TgridVec>(0);
64 shamsys::instance::get_compute_scheduler_ptr(),
67 field_pos.get_obj_cnt(),
74 tree.compute_cell_ibounding_box(shamsys::instance::get_compute_queue());
75 tree.convert_bounding_box(shamsys::instance::get_compute_queue());
78 storage.trees.set(std::move(trees));
81template<
class Tvec,
class Tgr
idVec>
87 using RTree =
typename Storage::RTree;
89 storage.trees.get().for_each([&](
u64 id, RTree &tree) {
90 u32 leaf_count = tree.tree_reduced_morton_codes.tree_leaf_count;
91 u32 internal_cell_count = tree.tree_struct.internal_cell_count;
92 u32 tot_count = leaf_count + internal_cell_count;
94 sycl::buffer<TgridVec> tmp_min_cell(tot_count);
95 sycl::buffer<TgridVec> tmp_max_cell(tot_count);
97 MergedPDat &mpdat = storage.merged_patchdata_ghost.get().get(
id);
105 auto acc_bmax = buf_cell_max.get_read_access(depends_list);
107 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
110 u32 leaf_offset = tree.tree_struct.internal_cell_count;
112 sycl::accessor comp_min{tmp_min_cell, cgh, sycl::write_only, sycl::no_init};
113 sycl::accessor comp_max{tmp_max_cell, cgh, sycl::write_only, sycl::no_init};
118 shambase::parallel_for(cgh, leaf_count,
"compute leaf boxes", [=](
u64 leaf_id) {
122 cell_looper.iter_object_in_cell(leaf_id + leaf_offset, [&](
u32 block_id) {
123 TgridVec bmin = acc_bmin[block_id];
124 TgridVec bmax = acc_bmax[block_id];
126 min = sham::min(min, bmin);
127 max = sham::max(max, bmax);
130 comp_min[leaf_offset + leaf_id] = min;
131 comp_max[leaf_offset + leaf_id] = max;
136 buf_cell_max.complete_event_state(e);
138 auto ker_reduc_hmax = [&](sycl::handler &cgh) {
139 u32 offset_leaf = internal_cell_count;
141 sycl::accessor comp_min{tmp_min_cell, cgh, sycl::read_write};
142 sycl::accessor comp_max{tmp_max_cell, cgh, sycl::read_write};
144 sycl::accessor rchild_id{
146 sycl::accessor lchild_id{
148 sycl::accessor rchild_flag{
150 sycl::accessor lchild_flag{
153 shambase::parallel_for(cgh, internal_cell_count,
"propagate up", [=](
u64 gid) {
154 u32 lid = lchild_id[gid] + offset_leaf * lchild_flag[gid];
155 u32 rid = rchild_id[gid] + offset_leaf * rchild_flag[gid];
157 TgridVec bminl = comp_min[lid];
158 TgridVec bminr = comp_min[rid];
159 TgridVec bmaxl = comp_max[lid];
160 TgridVec bmaxr = comp_max[rid];
162 TgridVec bmin = sham::min(bminl, bminr);
163 TgridVec bmax = sham::max(bmaxl, bmaxr);
165 comp_min[gid] = bmin;
166 comp_max[gid] = bmax;
170 for (
u32 i = 0; i < tree.tree_depth; i++) {
174 sycl::buffer<TgridVec> &tree_bmin
176 sycl::buffer<TgridVec> &tree_bmax
182 u32 leaf_offset = tree.tree_struct.internal_cell_count;
184 sycl::accessor comp_bmin{tmp_min_cell, cgh, sycl::read_only};
185 sycl::accessor comp_bmax{tmp_max_cell, cgh, sycl::read_only};
187 sycl::accessor tree_buf_min{tree_bmin, cgh, sycl::read_write};
188 sycl::accessor tree_buf_max{tree_bmax, cgh, sycl::read_write};
190 shambase::parallel_for(cgh, tot_count,
"write in tree range", [=](
u64 nid) {
191 TgridVec load_min = comp_bmin[nid];
192 TgridVec load_max = comp_bmax[nid];
211 tree_buf_min[nid] = load_min;
212 tree_buf_max[nid] = load_max;
218template<
class Tvec,
class Tgr
idVec>
222 using RTree =
typename Storage::RTree;
231 shamlog_debug_ln(
"BasicSPH",
"build particle cache id =", patch_id);
235 MergedPDat &mfield = storage.merged_patchdata_ghost.get().get(patch_id);
240 RTree &tree = storage.trees.get().get(patch_id);
242 u32 obj_cnt = mfield.total_elements;
249 obj_cnt, shamsys::instance::get_compute_scheduler_ptr());
253 shamlog_debug_sycl_ln(
"Cache",
"generate cache for N=", obj_cnt);
259 auto cell_max = buf_cell_max.get_read_access(depends_list);
260 auto neigh_cnt = neigh_count.get_write_access(depends_list);
262 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
267 shambase::parallel_for(cgh, obj_cnt,
"compute neigh cache 1", [=](
u64 gid) {
274 cell_looper.rtree_for(
275 [&](
u32 node_id, TgridVec bmin, TgridVec bmax) ->
bool {
278 .is_surface_or_volume();
286 cnt += (no_interact) ? 0 : 1;
295 neigh_cnt[id_a] = cnt;
300 buf_cell_max.complete_event_state(e);
301 neigh_count.complete_event_state(e);
303 tree::ObjectCache pcache = tree::prepare_object_cache(std::move(neigh_count), obj_cnt);
311 auto cell_max = buf_cell_max.get_read_access(depends_list);
312 auto scanned_neigh_cnt = pcache.scanned_cnt.
get_read_access(depends_list);
315 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
319 shambase::parallel_for(cgh, obj_cnt,
"compute neigh cache 2", [=](
u64 gid) {
324 u32 cnt = scanned_neigh_cnt[id_a];
326 cell_looper.rtree_for(
327 [&](
u32 node_id, TgridVec bmin, TgridVec bmax) ->
bool {
330 .is_surface_or_volume();
341 cnt += (no_interact) ? 0 : 1;
347 buf_cell_max.complete_event_state(e);
355 using namespace shamrock::patch;
357 storage.neighbors_cache.get().preload(cur_p.
id_patch);
361 storage.timings_details.neighbors += time_neigh.
elasped_sec();
sycl::queue & get_compute_queue(u32 id=0)
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
Represents a collection of objects distributed across patches identified by a u64 id.
void for_each(std::function< void(u64, T &)> &&f)
Applies a function to each object in the collection.
DistributedData< Tmap > map(std::function< Tmap(u64, T &)> map_func)
Apply a function to all objects in the collection and return a new collection containing the results.
T & get(u64 id)
Returns a reference to an object in the collection.
Class Timer measures the time elapsed since the timer was started.
void end()
Stops the timer and stores the elapsed time in nanoseconds.
f64 elasped_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
void start()
Starts the timer.
PatchDataLayer container class, the layout is described in patchdata_layout.
constexpr T roundup_pow2(T v) noexcept
round up to the next power of two Source : https://graphics.stanford.edu/~seander/bithacks....
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
namespace for the main framework
Axis-Aligned bounding box.
bool is_surface() const noexcept
Checks if the AABB is a surface.
AABB get_intersect(AABB other) const noexcept
Compute the intersection of two AABB.
Patch object that contain generic patch information.
u64 id_patch
unique key that identify the patch