Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
NeighbourCache.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
19#include "shambase/assert.hpp"
20#include "shambase/memory.hpp"
28
29template<class Tvec, class Tmorton, template<class> class SPHKernel>
31
32 // interface_control
33 using GhostHandle = sph::BasicSPHGhostHandler<Tvec>;
34 using GhostHandleCache = typename GhostHandle::CacheMap;
36
37 shambase::Timer time_neigh;
38 time_neigh.start();
39
40 StackEntry stack_loc{};
41
42 // do cache
43 auto build_neigh_cache = [&](u64 patch_id) {
44 shamlog_debug_ln("BasicSPH", "build particle cache id =", patch_id);
45
46 NamedStackEntry cache_build_stack_loc{"build cache"};
47
48 auto &mfield = storage.merged_xyzh.get().get(patch_id);
49
50 sham::DeviceBuffer<Tvec> &buf_xyz = mfield.template get_field_buf_ref<Tvec>(0);
51 sham::DeviceBuffer<Tscal> &buf_hpart = mfield.template get_field_buf_ref<Tscal>(1);
52
53 sham::DeviceBuffer<Tscal> &tree_field_rint
54 = storage.rtree_rint_field.get().get(patch_id).buf_field;
55
56 RTree &tree = storage.merged_pos_trees.get().get(patch_id);
57 auto obj_it = tree.get_object_iterator();
58
59 u32 obj_cnt = shambase::get_check_ref(storage.part_counts).indexes.get(patch_id);
60
61 sycl::range range_npart{obj_cnt};
62
63 Tscal h_tolerance = solver_config.htol_up_coarse_cycle;
64
65 NamedStackEntry stack_loc1{"init cache"};
66
67 using namespace shamrock;
68
69 sham::DeviceBuffer<u32> neigh_count(
70 obj_cnt, shamsys::instance::get_compute_scheduler_ptr());
71
72 shamsys::instance::get_compute_queue().wait_and_throw();
73
74 shamlog_debug_sycl_ln("Cache", "generate cache for N=", obj_cnt);
75 {
76 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
77 sham::EventList depends_list;
78
79 auto xyz = buf_xyz.get_read_access(depends_list);
80 auto hpart = buf_hpart.get_read_access(depends_list);
81 auto rint_tree = tree_field_rint.get_read_access(depends_list);
82 auto neigh_cnt = neigh_count.get_write_access(depends_list);
83 auto particle_looper = obj_it.get_read_access(depends_list);
84
85 auto e = q.submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
86 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
87
88 shambase::parallel_for(cgh, obj_cnt, "compute neigh cache 1", [=](u64 gid) {
89 u32 id_a = (u32) gid;
90
91 Tscal rint_a = hpart[id_a] * h_tolerance;
92
93 Tvec xyz_a = xyz[id_a];
94
95 Tvec inter_box_a_min = xyz_a - rint_a * Kernel::Rkern;
96 Tvec inter_box_a_max = xyz_a + rint_a * Kernel::Rkern;
97
98 u32 cnt = 0;
99
100 particle_looper.rtree_for(
101 [&](u32 node_id, shammath::AABB<Tvec> node_aabb) -> bool {
102 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
103
104 using namespace walker::interaction_crit;
105
106 return sph_radix_cell_crit(
107 xyz_a,
108 inter_box_a_min,
109 inter_box_a_max,
110 node_aabb.lower,
111 node_aabb.upper,
112 int_r_max_cell);
113 },
114 [&](u32 id_b) {
115 // particle_looper.for_each_object(id_a,[&](u32 id_b){
116 // compute only omega_a
117 Tvec dr = xyz_a - xyz[id_b];
118 Tscal rab2 = sycl::dot(dr, dr);
119 Tscal rint_b = hpart[id_b] * h_tolerance;
120
121 bool no_interact
122 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
123
124 cnt += (no_interact) ? 0 : 1;
125 });
126
127 neigh_cnt[id_a] = cnt;
128 });
129 });
130
131 buf_xyz.complete_event_state(e);
132 buf_hpart.complete_event_state(e);
133 neigh_count.complete_event_state(e);
134 tree_field_rint.complete_event_state(e);
135 obj_it.complete_event_state(e);
136 }
137
138 tree::ObjectCache pcache = tree::prepare_object_cache(std::move(neigh_count), obj_cnt);
139
140 NamedStackEntry stack_loc2{"fill cache"};
141 {
142 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
143 sham::EventList depends_list;
144
145 auto xyz = buf_xyz.get_read_access(depends_list);
146 auto hpart = buf_hpart.get_read_access(depends_list);
147 auto rint_tree = tree_field_rint.get_read_access(depends_list);
148 auto scanned_neigh_cnt = pcache.scanned_cnt.get_read_access(depends_list);
149 auto neigh = pcache.index_neigh_map.get_write_access(depends_list);
150 auto particle_looper = obj_it.get_read_access(depends_list);
151
152 auto e = q.submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
153 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
154
155 shambase::parallel_for(cgh, obj_cnt, "compute neigh cache 2", [=](u64 gid) {
156 u32 id_a = (u32) gid;
157
158 Tscal rint_a = hpart[id_a] * h_tolerance;
159
160 Tvec xyz_a = xyz[id_a];
161
162 Tvec inter_box_a_min = xyz_a - rint_a * Kernel::Rkern;
163 Tvec inter_box_a_max = xyz_a + rint_a * Kernel::Rkern;
164
165 u32 cnt = scanned_neigh_cnt[id_a];
166
167 particle_looper.rtree_for(
168 [&](u32 node_id, shammath::AABB<Tvec> node_aabb) -> bool {
169 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
170
171 using namespace walker::interaction_crit;
172
173 return sph_radix_cell_crit(
174 xyz_a,
175 inter_box_a_min,
176 inter_box_a_max,
177 node_aabb.lower,
178 node_aabb.upper,
179 int_r_max_cell);
180 },
181 [&](u32 id_b) {
182 // particle_looper.for_each_object(id_a,[&](u32 id_b){
183 // compute only omega_a
184 Tvec dr = xyz_a - xyz[id_b];
185 Tscal rab2 = sycl::dot(dr, dr);
186 Tscal rint_b = hpart[id_b] * h_tolerance;
187
188 bool no_interact
189 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
190
191 if (!no_interact) {
192 neigh[cnt] = id_b;
193 }
194 cnt += (no_interact) ? 0 : 1;
195 });
196 });
197 });
198
199 buf_xyz.complete_event_state(e);
200 buf_hpart.complete_event_state(e);
201 tree_field_rint.complete_event_state(e);
202 pcache.scanned_cnt.complete_event_state(e);
203 pcache.index_neigh_map.complete_event_state(e);
204 obj_it.complete_event_state(e);
205 }
206
207 return pcache;
208 };
209
210 shambase::get_check_ref(storage.neigh_cache).free_alloc();
211
212 using namespace shamrock::patch;
213 scheduler().for_each_patchdata_nonempty([&](Patch cur_p, PatchDataLayer &pdat) {
214 auto &ncache = shambase::get_check_ref(storage.neigh_cache);
215 ncache.neigh_cache.add_obj(cur_p.id_patch, build_neigh_cache(cur_p.id_patch));
216 });
217
218 time_neigh.end();
219 storage.timings_details.neighbors += time_neigh.elasped_sec();
220}
221
222template<class Tvec, class Tmorton, template<class> class SPHKernel>
225
226 // interface_control
227 using GhostHandle = sph::BasicSPHGhostHandler<Tvec>;
228 using GhostHandleCache = typename GhostHandle::CacheMap;
230
231 shambase::Timer time_neigh;
232 time_neigh.start();
233
234 StackEntry stack_loc{};
235
236 // do cache
237 auto build_neigh_cache = [&](u64 patch_id) {
238 shamlog_debug_ln("BasicSPH", "build particle cache id =", patch_id);
239
240 NamedStackEntry cache_build_stack_loc{"build cache"};
241
242 auto &mfield = storage.merged_xyzh.get().get(patch_id);
243
244 sham::DeviceBuffer<Tvec> &buf_xyz = mfield.template get_field_buf_ref<Tvec>(0);
245 sham::DeviceBuffer<Tscal> &buf_hpart = mfield.template get_field_buf_ref<Tscal>(1);
246
247 sham::DeviceBuffer<Tscal> &tree_field_rint
248 = storage.rtree_rint_field.get().get(patch_id).buf_field;
249
250 RTree &tree = storage.merged_pos_trees.get().get(patch_id);
251 auto obj_it = tree.get_object_iterator();
252 auto leaf_it = tree.get_traverser();
253
254 u32 leaf_cnt = tree.get_leaf_cell_count();
255 u32 intnode_cnt = tree.get_internal_cell_count();
256
257 u32 obj_cnt = shambase::get_check_ref(storage.part_counts).indexes.get(patch_id);
258
259 sycl::range range_nleaf{leaf_cnt};
260 sycl::range range_nobj{obj_cnt};
261 using namespace shamrock;
262
263 Tscal h_tolerance = solver_config.htol_up_coarse_cycle;
264
265 NamedStackEntry stack_loc1{"init cache"};
266
267 // start by counting number of leaf neighbours
268
269 sham::DeviceBuffer<u32> neigh_count_leaf(
270 leaf_cnt, shamsys::instance::get_compute_scheduler_ptr());
271
272 shamsys::instance::get_compute_queue().wait_and_throw();
273
274 shamlog_debug_sycl_ln("Cache", "generate cache for Nleaf=", leaf_cnt);
275
276 {
277 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
278 sham::EventList depends_list;
279
280 auto xyz = buf_xyz.get_read_access(depends_list);
281 auto hpart = buf_hpart.get_read_access(depends_list);
282 auto rint_tree = tree_field_rint.get_read_access(depends_list);
283 auto neigh_cnt = neigh_count_leaf.get_write_access(depends_list);
284 auto leaf_looper = leaf_it.get_read_access(depends_list);
285
286 auto e = q.submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
287 u32 offset_leaf = intnode_cnt;
288
289 shambase::parallel_for(cgh, leaf_cnt, "compute neigh cache 1", [=](u64 gid) {
290 u32 id_a = (u32) gid;
291
292 Tscal leaf_a_rint = rint_tree[offset_leaf + gid] * Kernel::Rkern;
293 Tvec leaf_a_bmin = leaf_looper.aabb_min[offset_leaf + gid];
294 Tvec leaf_a_bmax = leaf_looper.aabb_max[offset_leaf + gid];
295 Tvec leaf_a_bmin_ext = leaf_a_bmin - leaf_a_rint;
296 Tvec leaf_a_bmax_ext = leaf_a_bmax + leaf_a_rint;
297
298 u32 cnt = 0;
299
300 leaf_looper.rtree_for(
301 [&](u32 node_id, shammath::AABB<Tvec> node_aabb) -> bool {
302 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
303
304 Tvec ext_bmin = node_aabb.lower - int_r_max_cell;
305 Tvec ext_bmax = node_aabb.upper + int_r_max_cell;
306
307 return BBAA::cella_neigh_b(leaf_a_bmin, leaf_a_bmax, ext_bmin, ext_bmax)
308 || BBAA::cella_neigh_b(
309 leaf_a_bmin_ext,
310 leaf_a_bmax_ext,
311 node_aabb.lower,
312 node_aabb.upper);
313 },
314 [&](u32 leaf_b) {
315 cnt++;
316 });
317
318 neigh_cnt[id_a] = cnt;
319 });
320 });
321
322 buf_xyz.complete_event_state(e);
323 buf_hpart.complete_event_state(e);
324 tree_field_rint.complete_event_state(e);
325 neigh_count_leaf.complete_event_state(e);
326 leaf_it.complete_event_state(e);
327 }
328
329 //{
330 // u32 offset_leaf = intnode_cnt;
331 // sycl::host_accessor neigh_cnt{neigh_count_leaf};
332 // sycl::host_accessor pos_min_cell
333 // {shambase::get_check_ref(tree.tree_cell_ranges.buf_pos_min_cell_flt)};
334 // sycl::host_accessor pos_max_cell
335 // {shambase::get_check_ref(tree.tree_cell_ranges.buf_pos_max_cell_flt)};
336 //
337 // for (u32 i = 0; i < 1000; i++) {
338 // if(neigh_cnt[i] > 30){
339 // logger::raw_ln(i, neigh_cnt[i], pos_max_cell[i+offset_leaf] -
340 // pos_min_cell[i+offset_leaf]);
341 // }
342 // }
343 //}
344
345 tree::ObjectCache pleaf_cache
346 = tree::prepare_object_cache(std::move(neigh_count_leaf), leaf_cnt);
347
348 // fill ids of leaf neighbours
349
350 NamedStackEntry stack_loc2{"fill cache"};
351
352 {
353 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
354 sham::EventList depends_list;
355
356 auto xyz = buf_xyz.get_read_access(depends_list);
357 auto hpart = buf_hpart.get_read_access(depends_list);
358 auto rint_tree = tree_field_rint.get_read_access(depends_list);
359 auto scanned_neigh_cnt = pleaf_cache.scanned_cnt.get_read_access(depends_list);
360 auto neigh = pleaf_cache.index_neigh_map.get_write_access(depends_list);
361 auto leaf_looper = leaf_it.get_read_access(depends_list);
362
363 auto e = q.submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
364 u32 offset_leaf = intnode_cnt;
365
366 shambase::parallel_for(cgh, leaf_cnt, "compute neigh cache 2", [=](u64 gid) {
367 u32 id_a = (u32) gid;
368
369 Tscal leaf_a_rint = rint_tree[offset_leaf + gid] * Kernel::Rkern;
370 Tvec leaf_a_bmin = leaf_looper.aabb_min[offset_leaf + gid];
371 Tvec leaf_a_bmax = leaf_looper.aabb_max[offset_leaf + gid];
372 Tvec leaf_a_bmin_ext = leaf_a_bmin - leaf_a_rint;
373 Tvec leaf_a_bmax_ext = leaf_a_bmax + leaf_a_rint;
374
375 u32 cnt = scanned_neigh_cnt[id_a];
376
377 leaf_looper.rtree_for(
378 [&](u32 node_id, shammath::AABB<Tvec> node_aabb) -> bool {
379 Tscal int_r_max_cell = rint_tree[node_id] * Kernel::Rkern;
380
381 Tvec ext_bmin = node_aabb.lower - int_r_max_cell;
382 Tvec ext_bmax = node_aabb.upper + int_r_max_cell;
383
384 return BBAA::cella_neigh_b(leaf_a_bmin, leaf_a_bmax, ext_bmin, ext_bmax)
385 || BBAA::cella_neigh_b(
386 leaf_a_bmin_ext,
387 leaf_a_bmax_ext,
388 node_aabb.lower,
389 node_aabb.upper);
390 },
391 [&](u32 leaf_b) {
392 neigh[cnt] = leaf_b;
393 cnt++;
394 });
395 });
396 });
397
398 buf_xyz.complete_event_state(e);
399 buf_hpart.complete_event_state(e);
400 tree_field_rint.complete_event_state(e);
401 pleaf_cache.scanned_cnt.complete_event_state(e);
402 pleaf_cache.index_neigh_map.complete_event_state(e);
403 leaf_it.complete_event_state(e);
404 }
405 // search in which leaf each parts are
406 sycl::buffer<u32> leaf_part_id(obj_cnt);
407
408 {
409 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
410 sham::EventList depends_list;
411
412 auto xyz = buf_xyz.get_read_access(depends_list);
413 auto leaf_looper = leaf_it.get_read_access(depends_list);
414
415 auto e = q.submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
416 sycl::accessor found_id{leaf_part_id, cgh, sycl::write_only, sycl::no_init};
417 u32 offset_leaf = intnode_cnt;
418 // sycl::stream out {4096,4096,cgh};
419 shambase::parallel_for(cgh, obj_cnt, "search particles parent leaf", [=](u64 gid) {
420 u32 id_a = (u32) gid;
421
422 Tvec r_a = xyz[id_a];
423
424 u32 found_id_ = i32_max; // to ensure a crash because of out of bound
425 // access if not found
426
427 leaf_looper.rtree_for(
428 [&](u32 node_id, shammath::AABB<Tvec> node_aabb) -> bool {
429 bool ret = BBAA::is_coord_in_range_incl_max(
430 r_a, node_aabb.lower, node_aabb.upper);
431
432 // error : i= 44245 r=
433 // (0.3495433344162232,-0.005627362002766546,-0.21312104638358176)
434 // leaf_id= 2147483647 if(id_a == 44245) {out << node_id << " "
435 // << bmin
436 // << " " << bmax << " " << ret << "\n";};
437 return ret;
438 },
439 [&](u32 leaf_b) {
440 found_id_ = leaf_b - offset_leaf;
441 });
442
443 SHAM_ASSERT(found_id_ < offset_leaf + 1);
444
445 found_id[id_a] = found_id_;
446 });
447 });
448
449 buf_xyz.complete_event_state(e);
450 leaf_it.complete_event_state(e);
451 }
452
453 //{
454 // sycl::host_accessor xyz{buf_xyz};
455 // sycl::host_accessor acc {leaf_part_id};
456 //
457 // for(u32 i = 0; i < obj_cnt; i++){
458 // u32 leaf_id = acc[i];
459 // if(leaf_id >= leaf_cnt){
460 // logger::raw_ln("error : i=",i,"r=",xyz[i],"leaf_id=",leaf_id);
461 // }
462 // }
463 //}
464
465 sham::DeviceBuffer<u32> neigh_count(
466 obj_cnt, shamsys::instance::get_compute_scheduler_ptr());
467
468 shamsys::instance::get_compute_queue().wait_and_throw();
469
470 shamlog_debug_sycl_ln("Cache", "generate cache for N=", obj_cnt);
471
472 {
473 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
474 sham::EventList depends_list;
475
476 auto xyz = buf_xyz.get_read_access(depends_list);
477 auto hpart = buf_hpart.get_read_access(depends_list);
478 auto acc_neigh_leaf_looper = pleaf_cache.get_read_access(depends_list);
479 auto neigh_cnt = neigh_count.get_write_access(depends_list);
480 auto particle_looper = obj_it.cell_iterator.get_read_access(depends_list);
481
482 auto e = q.submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
483 tree::ObjectCacheIterator neigh_leaf_looper(acc_neigh_leaf_looper);
484
485 sycl::accessor leaf_owner{leaf_part_id, cgh, sycl::read_only};
486
487 u32 offset_leaf = intnode_cnt;
488 // sycl::stream out {4096,1024,cgh};
489
490 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
491
492 shambase::parallel_for(cgh, obj_cnt, "compute neigh cache 1", [=](u64 gid) {
493 u32 id_a = (u32) gid;
494
495 Tscal rint_a = hpart[id_a] * h_tolerance;
496
497 Tvec xyz_a = xyz[id_a];
498
499 u32 cnt = 0;
500
501 u32 leaf_own_a = leaf_owner[id_a];
502
503 neigh_leaf_looper.for_each_object(leaf_own_a, [&](u32 leaf_b) {
504 SHAM_ASSERT(leaf_b >= offset_leaf);
505
506 particle_looper.for_each_in_leaf_cell(leaf_b - offset_leaf, [&](u32 id_b) {
507 Tvec dr = xyz_a - xyz[id_b];
508 Tscal rab2 = sycl::dot(dr, dr);
509 Tscal rint_b = hpart[id_b] * h_tolerance;
510
511 bool no_interact
512 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
513
514 cnt += (no_interact) ? 0 : 1;
515 });
516 });
517
518 neigh_cnt[id_a] = cnt;
519 });
520 });
521
522 buf_xyz.complete_event_state(e);
523 buf_hpart.complete_event_state(e);
524 pleaf_cache.complete_event_state(e);
525 neigh_count.complete_event_state(e);
526 obj_it.cell_iterator.complete_event_state(e);
527 }
528
529 tree::ObjectCache pcache = tree::prepare_object_cache(std::move(neigh_count), obj_cnt);
530
531 NamedStackEntry stack_loc3{"fill cache"};
532
533 {
534 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
535 sham::EventList depends_list;
536
537 auto xyz = buf_xyz.get_read_access(depends_list);
538 auto hpart = buf_hpart.get_read_access(depends_list);
539 auto acc_neigh_leaf_looper = pleaf_cache.get_read_access(depends_list);
540 auto scanned_neigh_cnt = pcache.scanned_cnt.get_read_access(depends_list);
541 auto neigh = pcache.index_neigh_map.get_write_access(depends_list);
542 auto particle_looper = obj_it.cell_iterator.get_read_access(depends_list);
543
544 auto e = q.submit(depends_list, [&, h_tolerance](sycl::handler &cgh) {
545 tree::ObjectCacheIterator neigh_leaf_looper(acc_neigh_leaf_looper);
546
547 sycl::accessor leaf_owner{leaf_part_id, cgh, sycl::read_only};
548
549 u32 offset_leaf = intnode_cnt;
550
551 constexpr Tscal Rker2 = Kernel::Rkern * Kernel::Rkern;
552
553 shambase::parallel_for(cgh, obj_cnt, "compute neigh cache 2", [=](u64 gid) {
554 u32 id_a = (u32) gid;
555
556 Tscal rint_a = hpart[id_a] * h_tolerance;
557
558 Tvec xyz_a = xyz[id_a];
559
560 u32 cnt = scanned_neigh_cnt[id_a];
561
562 u32 leaf_own_a = leaf_owner[id_a];
563
564 neigh_leaf_looper.for_each_object(leaf_own_a, [&](u32 leaf_b) {
565 SHAM_ASSERT(leaf_b >= offset_leaf);
566
567 particle_looper.for_each_in_leaf_cell(leaf_b - offset_leaf, [&](u32 id_b) {
568 Tvec dr = xyz_a - xyz[id_b];
569 Tscal rab2 = sycl::dot(dr, dr);
570 Tscal rint_b = hpart[id_b] * h_tolerance;
571
572 bool no_interact
573 = rab2 > rint_a * rint_a * Rker2 && rab2 > rint_b * rint_b * Rker2;
574
575 if (!no_interact) {
576 neigh[cnt] = id_b;
577 }
578 cnt += (no_interact) ? 0 : 1;
579 });
580 });
581 });
582 });
583
584 buf_xyz.complete_event_state(e);
585 buf_hpart.complete_event_state(e);
586 pleaf_cache.complete_event_state(e);
587 pcache.scanned_cnt.complete_event_state(e);
588 pcache.index_neigh_map.complete_event_state(e);
589 obj_it.cell_iterator.complete_event_state(e);
590 }
591 return pcache;
592 };
593
594 shambase::get_check_ref(storage.neigh_cache).free_alloc();
595
596 using namespace shamrock::patch;
597 scheduler().for_each_patchdata_nonempty([&](Patch cur_p, PatchDataLayer &pdat) {
598 auto &ncache = shambase::get_check_ref(storage.neigh_cache);
599 ncache.neigh_cache.add_obj(cur_p.id_patch, build_neigh_cache(cur_p.id_patch));
600 });
601
602 time_neigh.end();
603 storage.timings_details.neighbors += time_neigh.elasped_sec();
604}
605
606using namespace shammath;
610
constexpr const char * xyz
Position field (3D coordinates)
constexpr const char * hpart
Smoothing length field.
sycl::queue & get_compute_queue(u32 id=0)
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
Shamrock assertion utility.
#define SHAM_ASSERT(x)
Shorthand for SHAM_ASSERT_NAMED without a message.
Definition assert.hpp:67
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
Definition EventList.hpp:31
Class Timer measures the time elapsed since the timer was started.
Definition time.hpp:96
void end()
Stops the timer and stores the elapsed time in nanoseconds.
Definition time.hpp:111
f64 elasped_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
Definition time.hpp:123
void start()
Starts the timer.
Definition time.hpp:106
PatchDataLayer container class, the layout is described in patchdata_layout.
A Compressed Leaf Bounding Volume Hierarchy (CLBVH) for neighborhood queries.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110
namespace for math utility
Definition AABB.hpp:26
namespace for the main framework
Definition __init__.py:1
constexpr i32 i32_max
i32 max value
sph kernels
Axis-Aligned bounding box.
Definition AABB.hpp:99
T lower
Lower bound of the AABB.
Definition AABB.hpp:104
T upper
Upper bound of the AABB.
Definition AABB.hpp:105
Patch object that contain generic patch information.
Definition Patch.hpp:33
u64 id_patch
unique key that identify the patch
Definition Patch.hpp:86