Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
RadixTree.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
16#include "shambase/floats.hpp"
17#include "shambase/integer.hpp"
18#include "shamalgs/memory.hpp"
24#include <tuple>
25#include <vector>
26
27template<class u_morton, class vec3>
29 sycl::queue &queue,
30 std::tuple<vec3, vec3> treebox,
31 sycl::buffer<vec3> &pos_buf,
32 u32 cnt_obj,
33 u32 reduc_level) {
34 if (cnt_obj > i32_max - 1) {
36 "number of element in patch above i32_max-1");
37 }
38
39 shamlog_debug_sycl_ln("RadixTree", "box dim :", std::get<0>(treebox), std::get<1>(treebox));
40
41 bounding_box = treebox;
42
43 tree_morton_codes.build(queue, shammath::CoordRange<vec3>{treebox}, cnt_obj, pos_buf);
44
45 bool one_cell_mode;
46
47 tree_reduced_morton_codes.build(
48 queue, tree_morton_codes.obj_cnt, reduc_level, tree_morton_codes, one_cell_mode);
49
50 if (!one_cell_mode) {
51 tree_struct.build(
52 queue,
53 tree_reduced_morton_codes.tree_leaf_count - 1,
54 *tree_reduced_morton_codes.buf_tree_morton);
55 } else {
56 tree_struct.build_one_cell_mode();
57 }
58}
59
60template<class u_morton, class vec3>
62 sycl::queue &queue,
63 std::tuple<vec3, vec3> treebox,
64 const std::unique_ptr<sycl::buffer<vec3>> &pos_buf,
65 u32 cnt_obj,
66 u32 reduc_level)
67 : RadixTree(queue, treebox, shambase::get_check_ref(pos_buf), cnt_obj, reduc_level) {}
68
69template<class u_morton, class Tvec>
71 sham::DeviceScheduler_ptr dev_sched,
72 std::tuple<Tvec, Tvec> treebox,
74 u32 cnt_obj,
75 u32 reduc_level) {
76
77 sycl::queue &queue = dev_sched->get_queue().q;
78
79 if (cnt_obj > i32_max - 1) {
81 "number of element in patch above i32_max-1");
82 }
83
84 shamlog_debug_sycl_ln("RadixTree", "box dim :", std::get<0>(treebox), std::get<1>(treebox));
85
86 bounding_box = treebox;
87
88 tree_morton_codes.build(dev_sched, shammath::CoordRange<Tvec>{treebox}, cnt_obj, pos_buf);
89
90 bool one_cell_mode;
91
92 tree_reduced_morton_codes.build(
93 queue, tree_morton_codes.obj_cnt, reduc_level, tree_morton_codes, one_cell_mode);
94
95 if (!one_cell_mode) {
96 tree_struct.build(
97 queue,
98 tree_reduced_morton_codes.tree_leaf_count - 1,
99 *tree_reduced_morton_codes.buf_tree_morton);
100 } else {
101 tree_struct.build_one_cell_mode();
102 }
103}
104
105template<class u_morton, class vec3>
107 StackEntry stack_loc{};
108
109 serializer.write(std::get<0>(bounding_box));
110 serializer.write(std::get<1>(bounding_box));
111 tree_morton_codes.serialize(serializer);
112 tree_reduced_morton_codes.serialize(serializer);
113 tree_struct.serialize(serializer);
114 tree_cell_ranges.serialize(serializer);
115}
116
117template<class u_morton, class pos_t>
120 return H::serialize_byte_size<pos_t>() * 2 + tree_morton_codes.serialize_byte_size()
121 + tree_reduced_morton_codes.serialize_byte_size() + tree_struct.serialize_byte_size()
122 + tree_cell_ranges.serialize_byte_size();
123}
124
125template<class u_morton, class pos_t>
127 shamalgs::SerializeHelper &serializer) {
128 StackEntry stack_loc{};
129
130 RadixTree ret;
131
132 serializer.load(std::get<0>(ret.bounding_box));
133 serializer.load(std::get<1>(ret.bounding_box));
134
135 using namespace shamrock::tree;
136
137 ret.tree_morton_codes = TreeMortonCodes<u_morton>::deserialize(serializer);
138 ret.tree_reduced_morton_codes = TreeReducedMortonCodes<u_morton>::deserialize(serializer);
139 ret.tree_struct = TreeStructure<u_morton>::deserialize(serializer);
140 ret.tree_cell_ranges = TreeCellRanges<u_morton, pos_t>::deserialize(serializer);
141
142 return ret;
143}
144
145template<class u_morton, class vec3>
147 StackEntry stack_loc{};
148 tree_cell_ranges.build1(queue, tree_reduced_morton_codes, tree_struct);
149}
150
151template<class morton_t, class pos_t>
153 StackEntry stack_loc{};
154 u32 total_count = tree_struct.internal_cell_count + tree_reduced_morton_codes.tree_leaf_count;
155 tree_cell_ranges.build2(queue, total_count, bounding_box);
156}
157
158template<class u_morton, class vec>
160 sycl::queue &queue, sham::DeviceBuffer<coord_t> &int_rad_buf, coord_t tolerance)
162
163 shamlog_debug_sycl_ln("RadixTree", "compute int boxes");
164
165 auto buf_cell_interact_rad = RadixTreeField<coord_t>::make_empty(
166 1, tree_struct.internal_cell_count + tree_reduced_morton_codes.tree_leaf_count);
167 sycl::range<1> range_leaf_cell{tree_reduced_morton_codes.tree_leaf_count};
168
169 auto &buf_cell_int_rad_buf = buf_cell_interact_rad.radix_tree_field_buf;
170
171 sham::DeviceQueue q = shamsys::instance::get_compute_scheduler().get_queue();
172 sham::EventList depends_list;
173
174 auto h = int_rad_buf.get_read_access(depends_list);
175
176 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
177 u32 offset_leaf = tree_struct.internal_cell_count;
178
179 auto h_max_cell
180 = buf_cell_int_rad_buf->template get_access<sycl::access::mode::discard_write>(cgh);
181
182 auto cell_particle_ids = tree_reduced_morton_codes.buf_reduc_index_map
183 ->template get_access<sycl::access::mode::read>(cgh);
184 auto particle_index_map = tree_morton_codes.buf_particle_index_map
185 ->template get_access<sycl::access::mode::read>(cgh);
186
187 coord_t tol = tolerance;
188
189 cgh.parallel_for(range_leaf_cell, [=](sycl::item<1> item) {
190 u32 gid = (u32) item.get_id(0);
191
192 u32 min_ids = cell_particle_ids[gid];
193 u32 max_ids = cell_particle_ids[gid + 1];
194 f32 h_tmp = 0;
195
196 for (unsigned int id_s = min_ids; id_s < max_ids; id_s++) {
197
198 f32 h_a = h[particle_index_map[id_s]] * tol;
199 h_tmp = (h_tmp > h_a ? h_tmp : h_a);
200 }
201
202 h_max_cell[offset_leaf + gid] = h_tmp;
203 });
204 });
205
206 int_rad_buf.complete_event_state(e);
207
208#if false
209 // debug code to track the DPCPP + prime number worker issue
210 {
211
212 //172827
213 //86413
214 //<<<(43207,1,1),(2,1,1)>>>
215 //gid = 86412
216
217 shamalgs::memory::print_buf(*tree_struct.buf_rchild_id, tree_struct.internal_cell_count, 16, "{} ");
218 shamalgs::memory::print_buf(*tree_struct.buf_lchild_id, tree_struct.internal_cell_count, 16, "{} ");
219 shamalgs::memory::print_buf(*tree_struct.buf_rchild_flag, tree_struct.internal_cell_count, 16, "{} ");
220 shamalgs::memory::print_buf(*tree_struct.buf_lchild_flag, tree_struct.internal_cell_count, 16, "{} ");
221
222
223 sycl::host_accessor rchild_id {*tree_struct.buf_rchild_id ,sycl::read_only};
224 sycl::host_accessor lchild_id {*tree_struct.buf_lchild_id ,sycl::read_only};
225 sycl::host_accessor rchild_flag {*tree_struct.buf_rchild_flag,sycl::read_only};
226 sycl::host_accessor lchild_flag {*tree_struct.buf_lchild_flag,sycl::read_only};
227
228 u32 gid = 86412;
229 u32 lid_0 = lchild_id[gid];
230 u32 rid_0 = rchild_id[gid];
231 u32 lfl_0 = lchild_flag[gid];
232 u32 rfl_0 = rchild_flag[gid];
233 u32 offset_leaf = tree_struct.internal_cell_count;
234 u32 lid = lchild_id[gid] + offset_leaf * lchild_flag[gid];
235 u32 rid = rchild_id[gid] + offset_leaf * rchild_flag[gid];
236
237 logger::raw_ln("gid",gid);
238 logger::raw_ln("lid_0",lid_0);
239 logger::raw_ln("rid_0",rid_0);
240 logger::raw_ln("lfl_0",lfl_0);
241 logger::raw_ln("rfl_0",rfl_0);
242 logger::raw_ln("offset_leaf",offset_leaf);
243 logger::raw_ln("lid",lid);
244 logger::raw_ln("rid",rid);
245 logger::raw_ln("sz =", buf_cell_int_rad_buf->size());
246 logger::raw_ln("internal_cell_count =", tree_struct.internal_cell_count);
247 logger::raw_ln("tree_leaf_count =", tree_reduced_morton_codes.tree_leaf_count);
248 }
249#endif
250
251 sycl::range<1> range_tree{tree_struct.internal_cell_count};
252
253 for (u32 i = 0; i < tree_depth; i++) {
254 queue.submit([&](sycl::handler &cgh) {
255 u32 offset_leaf = tree_struct.internal_cell_count;
256
257 sycl::accessor h_max_cell{*buf_cell_int_rad_buf, cgh, sycl::read_write};
258
259 sycl::accessor rchild_id{*tree_struct.buf_rchild_id, cgh, sycl::read_only};
260 sycl::accessor lchild_id{*tree_struct.buf_lchild_id, cgh, sycl::read_only};
261 sycl::accessor rchild_flag{*tree_struct.buf_rchild_flag, cgh, sycl::read_only};
262 sycl::accessor lchild_flag{*tree_struct.buf_lchild_flag, cgh, sycl::read_only};
263
264 u32 len = tree_struct.internal_cell_count;
265 constexpr u32 group_size = 64;
266 u32 max_len = len;
267 u32 group_cnt = shambase::group_count(len, group_size);
268 u32 corrected_len = group_cnt * group_size;
269
270 cgh.parallel_for(
271 sycl::nd_range<1>{corrected_len, group_size}, [=](sycl::nd_item<1> id) {
272 u32 local_id = id.get_local_id(0);
273 u32 group_tile_id = id.get_group_linear_id();
274 u32 gid = group_tile_id * group_size + local_id;
275
276 if (gid >= max_len)
277 return;
278
279 u32 lid = lchild_id[gid] + offset_leaf * lchild_flag[gid];
280 u32 rid = rchild_id[gid] + offset_leaf * rchild_flag[gid];
281
282 coord_t h_l = h_max_cell[lid];
283 coord_t h_r = h_max_cell[rid];
284
285 h_max_cell[gid] = (h_r > h_l ? h_r : h_l);
286 });
287 });
288 }
289
290 {
291 if (shamalgs::reduction::has_nan(
292 queue,
293 *buf_cell_int_rad_buf,
294 tree_struct.internal_cell_count + tree_reduced_morton_codes.tree_leaf_count)) {
296 *buf_cell_int_rad_buf,
297 tree_struct.internal_cell_count + tree_reduced_morton_codes.tree_leaf_count,
298 8,
299 "{} ");
301 "the structure of the tree as issue in ids");
302 }
303 }
304
305 return std::move(buf_cell_interact_rad);
306}
307
308template<class T>
309std::string print_member(const T &a);
310
311template<>
312std::string print_member(const u8 &a) {
313 return shambase::format_printf("%d", u32(a));
314}
315
316template<>
317std::string print_member(const u32 &a) {
318 return shambase::format_printf("%d", a);
319}
320
321template<class u_morton, class vec3>
322template<class T>
323void RadixTree<u_morton, vec3>::print_tree_field(sycl::buffer<T> &buf_field) {
324
325 sycl::host_accessor acc{buf_field, sycl::read_only};
326
327 u32 total_count = tree_struct.internal_cell_count + tree_reduced_morton_codes.tree_leaf_count;
328
329 u32 offset_leaf = tree_struct.internal_cell_count;
330
331 sycl::host_accessor rchild_id{*tree_struct.buf_rchild_id};
332 sycl::host_accessor lchild_id{*tree_struct.buf_lchild_id};
333 sycl::host_accessor rchild_flag{*tree_struct.buf_rchild_flag};
334 sycl::host_accessor lchild_flag{*tree_struct.buf_lchild_flag};
335
336 // start allow utf-8
337 auto printer = [&]() {
338 auto get_print_step
339 = [&](u32 gid, std::string prefix, bool is_left, auto &step_ref) -> std::string {
340 std::string ret_val = "";
341
342 if (!is_left) {
343 ret_val += prefix;
344 }
345
346 std::string val = " (" + print_member(acc[gid]) + ") ";
347 std::string val_empt = std::string(val.size(), ' ');
348
349 ret_val += (is_left ? "╦══" : "╚══");
350 ret_val += val;
351
352 if (gid < offset_leaf) {
353 u32 lid = lchild_id[gid] + offset_leaf * lchild_flag[gid];
354 u32 rid = rchild_id[gid] + offset_leaf * rchild_flag[gid];
355
356 ret_val += step_ref(
357 lid, prefix + (is_left ? "║ " + val_empt : " " + val_empt), true, step_ref);
358 ret_val += step_ref(
359 rid, prefix + (is_left ? "║ " + val_empt : " " + val_empt), false, step_ref);
360 } else {
361 ret_val += "\n";
362 }
363
364 return ret_val;
365 };
366
367 logger::raw_ln(get_print_step(0, "", false, get_print_step));
368 };
369 // end allow utf-8
370
371 printer();
372}
373
374template void RadixTree<u32, f64_3>::print_tree_field(sycl::buffer<u32> &buf_field);
375template void RadixTree<u32, f32_3>::print_tree_field(sycl::buffer<u32> &buf_field);
376template void RadixTree<u64, f64_3>::print_tree_field(sycl::buffer<u32> &buf_field);
377template void RadixTree<u64, f32_3>::print_tree_field(sycl::buffer<u32> &buf_field);
378
379template void RadixTree<u32, u32_3>::print_tree_field(sycl::buffer<u32> &buf_field);
380template void RadixTree<u32, u64_3>::print_tree_field(sycl::buffer<u32> &buf_field);
381template void RadixTree<u64, u32_3>::print_tree_field(sycl::buffer<u32> &buf_field);
382template void RadixTree<u64, u64_3>::print_tree_field(sycl::buffer<u32> &buf_field);
383template void RadixTree<u64, i64_3>::print_tree_field(sycl::buffer<u32> &buf_field);
384
385template<class u_morton, class vec3>
387 sycl::queue &queue, sycl::buffer<u8> &valid_node) {
388
389 u32 total_count = tree_struct.internal_cell_count + tree_reduced_morton_codes.tree_leaf_count;
390 sycl::range<1> range_tree{total_count};
391
392 {
393
394 // flag 1 valid
395 // flag 0 to be deleted
396 // flag 2 anything below should be deleted (2 if initialy 0 & parent = 1)
397 // basically 2 is le thing that would end up in the excluded lambda part
398
399 { // cascade zeros down the tree
400
401 sycl::buffer<u8> valid_node_new = sycl::buffer<u8>(total_count);
402
403 for (u32 it = 0; it < tree_depth; it++) {
404
405 shamlog_debug_sycl_ln("Radixtree", "cascading zeros step : ", it);
406 queue.submit([&](sycl::handler &cgh) {
407 sycl::accessor acc_valid_node_old{valid_node, cgh, sycl::read_only};
408 sycl::accessor acc_valid_node_new{
409 valid_node_new, cgh, sycl::write_only, sycl::no_init};
410
411 sycl::accessor acc_lchild_id{*tree_struct.buf_lchild_id, cgh, sycl::read_only};
412 sycl::accessor acc_rchild_id{*tree_struct.buf_rchild_id, cgh, sycl::read_only};
413 sycl::accessor acc_lchild_flag{
414 *tree_struct.buf_lchild_flag, cgh, sycl::read_only};
415 sycl::accessor acc_rchild_flag{
416 *tree_struct.buf_rchild_flag, cgh, sycl::read_only};
417
418 u32 leaf_offset = tree_struct.internal_cell_count;
419
420 cgh.parallel_for(
421 sycl::range<1>(tree_struct.internal_cell_count), [=](sycl::item<1> item) {
422 u32 lid = acc_lchild_id[item] + leaf_offset * acc_lchild_flag[item];
423 u32 rid = acc_rchild_id[item] + leaf_offset * acc_rchild_flag[item];
424
425 u8 old_nid_falg = acc_valid_node_old[item];
426
427 if (item.get_linear_id() == 0) {
428 acc_valid_node_new[item] = old_nid_falg;
429 }
430
431 if (old_nid_falg == 0 || old_nid_falg == 2) {
432 acc_valid_node_new[lid] = 0;
433 acc_valid_node_new[rid] = 0;
434 } else {
435 u8 old_lid_falg = acc_valid_node_old[lid];
436 u8 old_rid_falg = acc_valid_node_old[rid];
437
438 if (old_lid_falg == 0) {
439 old_lid_falg = 2;
440 }
441 if (old_rid_falg == 0) {
442 old_rid_falg = 2;
443 }
444
445 acc_valid_node_new[lid] = old_lid_falg;
446 acc_valid_node_new[rid] = old_rid_falg;
447 }
448 });
449 });
450
451 std::swap(valid_node, valid_node_new);
452 }
453 }
454
455 //{
456 // shamlog_debug_sycl_ln("Radixtree", "valid_node_state");
457 // print_tree_field(valid_node);
458 // logger::raw_ln("");
459 //}
460
461 sycl::buffer<u8> valid_tree_morton(tree_reduced_morton_codes.tree_leaf_count);
462
463 auto print_valid_morton = [&] {
464 shamlog_debug_sycl_ln("Radixtree", "valid_tree_morton");
465
466 sycl::buffer<u32> print_map(total_count);
467
468 {
469
470 sycl::host_accessor acc{print_map};
471 sycl::host_accessor acc_leaf{valid_tree_morton};
472
473 for (u32 i = 0; i < tree_reduced_morton_codes.tree_leaf_count; i++) {
474 acc[i + tree_struct.internal_cell_count] = acc_leaf[i];
475 }
476
477 for (u32 i = 0; i < tree_struct.internal_cell_count; i++) {
478 acc[i] = acc_leaf[i];
479 }
480 }
481
482 print_tree_field(print_map);
483
484 logger::raw_ln("");
485 };
486
487 queue.submit([&](sycl::handler &cgh) {
488 sycl::accessor acc_valid_tree_morton{
489 valid_tree_morton, cgh, sycl::write_only, sycl::no_init};
490
491 sycl::accessor acc_valid_node{valid_node, cgh, sycl::read_only};
492
493 u32 leaf_offset = tree_struct.internal_cell_count;
494
495 cgh.parallel_for(
496 sycl::range<1>(tree_reduced_morton_codes.tree_leaf_count), [=](sycl::item<1> item) {
497 u8 leaf_val = acc_valid_node[item.get_linear_id() + leaf_offset];
498
499 if (item.get_linear_id() < leaf_offset) {
500 if (acc_valid_node[item] == 2) {
501 leaf_val = 2;
502 }
503 }
504
505 acc_valid_tree_morton[item] = leaf_val;
506 });
507 });
508
509 // print_valid_morton();
510
511 // generate the new tree
512
513 RadixTree ret;
514
515 ret.bounding_box = bounding_box;
516
517 std::vector<u32> extract_id;
518
519 {
520
521 std::vector<u_morton> new_buf_morton;
522 std::vector<u32> new_buf_particle_index_map;
523 std::vector<u32> new_reduc_index_map;
524
525 u32 leaf_offset = tree_struct.internal_cell_count;
526
527 sycl::host_accessor cell_index_map{
528 *tree_reduced_morton_codes.buf_reduc_index_map, sycl::read_only};
529 sycl::host_accessor particle_index_map{
530 *tree_morton_codes.buf_particle_index_map, sycl::read_only};
531
532 sycl::host_accessor acc_valid_tree_morton{valid_tree_morton, sycl::read_only};
533
534 sycl::host_accessor acc_morton{*tree_morton_codes.buf_morton, sycl::read_only};
535
536 u32 cnt = 0;
537
538 for (u32 i = 0; i < tree_reduced_morton_codes.tree_leaf_count; i++) {
539 if (acc_valid_tree_morton[i] != 0) {
540
541 {
542 // loop on particle indexes
543 uint min_ids = cell_index_map[i];
544 uint max_ids = cell_index_map[i + 1];
545
546 new_reduc_index_map.push_back(cnt);
547
548 for (unsigned int id_s = min_ids; id_s < max_ids; id_s++) {
549
550 // recover old index before morton sort
551 uint id_b = particle_index_map[id_s];
552
553 // iteration function
554 {
555 extract_id.push_back(id_b);
556 new_buf_morton.push_back(acc_morton[id_b]);
557 new_buf_particle_index_map.push_back(cnt);
558
559 cnt++;
560 }
561 }
562 }
563 }
564 }
565
566 new_reduc_index_map.push_back(cnt);
567
568 std::vector<u_morton> new_morton_tree;
569
570 {
571 sycl::host_accessor acc_tree_morton{*tree_reduced_morton_codes.buf_tree_morton};
572
573 sycl::host_accessor acc_valid_tree_morton{valid_tree_morton, sycl::read_only};
574
575 for (u32 i = 0; i < tree_reduced_morton_codes.tree_leaf_count; i++) {
576 if (acc_valid_tree_morton[i] != 0) {
577 new_morton_tree.push_back(acc_tree_morton[i]);
578 }
579 }
580 }
581
582 ret.tree_reduced_morton_codes.tree_leaf_count = new_morton_tree.size();
583 ret.tree_struct.internal_cell_count = ret.tree_reduced_morton_codes.tree_leaf_count - 1;
584
585 ret.tree_morton_codes.buf_morton
586 = std::make_unique<sycl::buffer<u_morton>>(new_buf_morton.size());
587 {
588 sycl::host_accessor acc{
589 *ret.tree_morton_codes.buf_morton, sycl::write_only, sycl::no_init};
590 for (u32 i = 0; i < new_buf_morton.size(); i++) {
591 acc[i] = new_buf_morton[i];
592 }
593 }
594
595 ret.tree_morton_codes.buf_particle_index_map
596 = std::make_unique<sycl::buffer<u32>>(new_buf_particle_index_map.size());
597 {
598 sycl::host_accessor acc{
599 *ret.tree_morton_codes.buf_particle_index_map, sycl::write_only, sycl::no_init};
600 for (u32 i = 0; i < new_buf_particle_index_map.size(); i++) {
601 acc[i] = new_buf_particle_index_map[i];
602 }
603 }
604
605 if (ret.tree_reduced_morton_codes.tree_leaf_count > 1) {
606
607 ret.tree_reduced_morton_codes.buf_reduc_index_map
608 = std::make_unique<sycl::buffer<u32>>(new_reduc_index_map.size());
609 {
610 sycl::host_accessor acc{
611 *ret.tree_reduced_morton_codes.buf_reduc_index_map,
612 sycl::write_only,
613 sycl::no_init};
614 for (u32 i = 0; i < new_reduc_index_map.size(); i++) {
615 acc[i] = new_reduc_index_map[i];
616 }
617 }
618
619 ret.tree_reduced_morton_codes.buf_tree_morton
620 = std::make_unique<sycl::buffer<u_morton>>(new_morton_tree.size());
621 {
622 sycl::host_accessor acc{
623 *ret.tree_reduced_morton_codes.buf_tree_morton,
624 sycl::write_only,
625 sycl::no_init};
626 for (u32 i = 0; i < new_morton_tree.size(); i++) {
627 acc[i] = new_morton_tree[i];
628 }
629 }
630
631 ret.tree_struct.build(
632 queue,
633 ret.tree_struct.internal_cell_count,
634 *ret.tree_reduced_morton_codes.buf_tree_morton);
635
636 } else {
637 throw ShamrockSyclException("not implemented");
638 }
639 }
640
641 ret.compute_cell_ibounding_box(queue);
642 ret.convert_bounding_box(queue);
643
644#if false
645 std::unique_ptr<sycl::buffer<u32>> new_node_id_to_old_naive = std::make_unique<sycl::buffer<u32>>(ret.tree_leaf_count + ret.tree_internal_count);
646
647 {
648 auto & new_node_id_to_old = new_node_id_to_old_naive;
649
650 //junk fill
651 {
652 sycl::host_accessor acc{* new_node_id_to_old, sycl::write_only, sycl::no_init};
653 for (u32 i = 0 ; i < new_node_id_to_old->size(); i++) {
654 acc[i] = u32_max;
655 }
656 }
657
658
659 sycl::host_accessor acc_new_node_id_to_old {*new_node_id_to_old,sycl::write_only, sycl::no_init};
660
661 sycl::host_accessor new_tree_acc_pos_min_cell{*ret.buf_pos_min_cell,sycl::read_only};
662 sycl::host_accessor new_tree_acc_pos_max_cell{*ret.buf_pos_max_cell,sycl::read_only};
663
664 sycl::host_accessor old_tree_acc_pos_min_cell{*buf_pos_min_cell,sycl::read_only};
665 sycl::host_accessor old_tree_acc_pos_max_cell{*buf_pos_max_cell,sycl::read_only};
666
667 for(u32 i = 0 ; i < ret.tree_leaf_count + ret.tree_internal_count; i++){
668
669 vec3i cur_pos_min_cell_a = new_tree_acc_pos_min_cell[i];
670 vec3i cur_pos_max_cell_a = new_tree_acc_pos_max_cell[i];
671
672 for(u32 j = 0 ; j < tree_leaf_count + tree_internal_count; j++){
673
674 vec3i cur_pos_min_cell_b = old_tree_acc_pos_min_cell[j];
675 vec3i cur_pos_max_cell_b = old_tree_acc_pos_max_cell[j];
676
677
678 auto is_same_box = [&]() -> bool {
679 return
680 (cur_pos_min_cell_a.x() == cur_pos_min_cell_b.x()) &&
681 (cur_pos_min_cell_a.y() == cur_pos_min_cell_b.y()) &&
682 (cur_pos_min_cell_a.z() == cur_pos_min_cell_b.z()) &&
683 (cur_pos_max_cell_a.x() == cur_pos_max_cell_b.x()) &&
684 (cur_pos_max_cell_a.y() == cur_pos_max_cell_b.y()) &&
685 (cur_pos_max_cell_a.z() == cur_pos_max_cell_b.z()) ;
686 };
687
688 if(is_same_box()){
689
690 u32 store_val = j;
691
692 logger::raw_ln("i ->",cur_pos_min_cell_a,cur_pos_max_cell_a , "| ptr ->",cur_pos_min_cell_b,cur_pos_max_cell_b);
693
694
695 if(store_val >= tree_internal_count){
696 store_val -= tree_internal_count;
697 }
698
699 acc_new_node_id_to_old[i] = store_val;
700
701 break;
702 }
703
704
705 }
706 }
707 }
708
709 ret.print_tree_field(*new_node_id_to_old_naive);
710 std::unique_ptr<sycl::buffer<u32>> new_node_id_to_old_v1 = std::make_unique<sycl::buffer<u32>>(ret.tree_leaf_count + ret.tree_internal_count);
711
712 {
713 auto & new_node_id_to_old = new_node_id_to_old_v1;
714
715 //junk fill
716 {
717 sycl::host_accessor acc{* new_node_id_to_old, sycl::write_only, sycl::no_init};
718 for (u32 i = 0 ; i < new_node_id_to_old->size(); i++) {
719 acc[i] = u32_max;
720 }
721 }
722
723
724 sycl::host_accessor acc_new_node_id_to_old {*new_node_id_to_old,sycl::write_only, sycl::no_init};
725
726 sycl::host_accessor new_tree_acc_pos_min_cell{*ret.buf_pos_min_cell,sycl::read_only};
727 sycl::host_accessor new_tree_acc_pos_max_cell{*ret.buf_pos_max_cell,sycl::read_only};
728
729 sycl::host_accessor old_tree_acc_pos_min_cell{*buf_pos_min_cell,sycl::read_only};
730 sycl::host_accessor old_tree_acc_pos_max_cell{*buf_pos_max_cell,sycl::read_only};
731
732 sycl::host_accessor old_tree_lchild_id {*buf_lchild_id ,sycl::read_only};
733 sycl::host_accessor old_tree_rchild_id {*buf_rchild_id ,sycl::read_only};
734 sycl::host_accessor old_tree_lchild_flag {*buf_lchild_flag,sycl::read_only};
735 sycl::host_accessor old_tree_rchild_flag {*buf_rchild_flag,sycl::read_only};
736
737 u32 old_tree_leaf_offset = tree_internal_count;
738
739
740 for(u32 i = 0 ; i < ret.tree_leaf_count + ret.tree_internal_count; i++){
741
742 //logger::raw_ln();
743
744 vec3i cur_pos_min_cell_a = new_tree_acc_pos_min_cell[i];
745 vec3i cur_pos_max_cell_a = new_tree_acc_pos_max_cell[i];
746
747 u32 cur_id = 0;
748 vec3i cur_pos_min_cell_b = old_tree_acc_pos_min_cell[cur_id];
749 vec3i cur_pos_max_cell_b = old_tree_acc_pos_max_cell[cur_id];
750
751 while(true){
752
753 //logger::raw_ln("i ->",cur_pos_min_cell_a,cur_pos_max_cell_a , "| ptr ->",cur_pos_min_cell_b,cur_pos_max_cell_b);
754
755 auto is_same_box = [&]() -> bool {
756 return
757 (cur_pos_min_cell_a.x() == cur_pos_min_cell_b.x()) &&
758 (cur_pos_min_cell_a.y() == cur_pos_min_cell_b.y()) &&
759 (cur_pos_min_cell_a.z() == cur_pos_min_cell_b.z()) &&
760 (cur_pos_max_cell_a.x() == cur_pos_max_cell_b.x()) &&
761 (cur_pos_max_cell_a.y() == cur_pos_max_cell_b.y()) &&
762 (cur_pos_max_cell_a.z() == cur_pos_max_cell_b.z()) ;
763 };
764
765 auto potential_cell = [&](vec3i other_min, vec3i other_max) -> bool {
766 return
767 (cur_pos_min_cell_a.x() >= other_min.x()) &&
768 (cur_pos_min_cell_a.y() >= other_min.y()) &&
769 (cur_pos_min_cell_a.z() >= other_min.z()) &&
770 (cur_pos_max_cell_a.x() <= other_max.x()) &&
771 (cur_pos_max_cell_a.y() <= other_max.y()) &&
772 (cur_pos_max_cell_a.z() <= other_max.z()) ;
773 };
774
775 if(is_same_box()){
776
777 //logger::raw_ln("id : ",i,"found ",cur_id);
778
779 u32 store_val = cur_id;
780
781 if(store_val >= tree_internal_count){
782 store_val -= tree_internal_count;
783 }
784
785 acc_new_node_id_to_old[i] = store_val;
786
787 break;
788 }
789
790
791 u32 lid = old_tree_lchild_id[cur_id] + old_tree_leaf_offset * old_tree_lchild_flag[cur_id];
792 u32 rid = old_tree_rchild_id[cur_id] + old_tree_leaf_offset * old_tree_rchild_flag[cur_id];
793
794 vec3i cur_pos_min_cell_bl = old_tree_acc_pos_min_cell[lid];
795 vec3i cur_pos_max_cell_bl = old_tree_acc_pos_max_cell[lid];
796
797 vec3i cur_pos_min_cell_br = old_tree_acc_pos_min_cell[rid];
798 vec3i cur_pos_max_cell_br = old_tree_acc_pos_max_cell[rid];
799
800 bool l_ok = potential_cell(cur_pos_min_cell_bl,cur_pos_max_cell_bl);
801 bool r_ok = potential_cell(cur_pos_min_cell_br,cur_pos_max_cell_br);
802
803 //logger::raw_ln("options l=",lid,cur_pos_min_cell_bl,cur_pos_max_cell_bl,l_ok);
804 //logger::raw_ln("options r=",rid,cur_pos_min_cell_br,cur_pos_max_cell_br,r_ok);
805
806 if(l_ok){
807
808 cur_pos_min_cell_b = cur_pos_min_cell_bl;
809 cur_pos_max_cell_b = cur_pos_max_cell_bl;
810
811 cur_id = lid;
812 //logger::raw_ln("id : ",i,"moving to ",cur_id);
813
814 }else if(r_ok){
815 cur_pos_min_cell_b = cur_pos_min_cell_br;
816 cur_pos_max_cell_b = cur_pos_max_cell_br;
817
818 cur_id = rid;
819 //logger::raw_ln("id : ",i,"moving to ",cur_id);
820
821 }else{
822 throw "";
823 }
824
825
826
827
828
829
830 }
831
832 }
833 }
834
835 ret.print_tree_field(*new_node_id_to_old_v1);
836
837#endif
838
839 std::unique_ptr<sycl::buffer<u32>> new_node_id_to_old_v2
840 = std::make_unique<sycl::buffer<u32>>(
841 ret.tree_reduced_morton_codes.tree_leaf_count
842 + ret.tree_struct.internal_cell_count);
843
844 {
845 auto &new_node_id_to_old = new_node_id_to_old_v2;
846
847 // junk fill
848 {
849 sycl::host_accessor acc{*new_node_id_to_old, sycl::write_only, sycl::no_init};
850 for (u32 i = 0; i < new_node_id_to_old->size(); i++) {
851 acc[i] = u32_max;
852 }
853 }
854
855 shamsys::instance::get_compute_queue().submit([&](sycl::handler &cgh) {
856 sycl::accessor acc_new_node_id_to_old{
857 *new_node_id_to_old, cgh, sycl::write_only, sycl::no_init};
858
859 sycl::accessor new_tree_acc_pos_min_cell{
860 *ret.tree_cell_ranges.buf_pos_min_cell, cgh, sycl::read_write};
861 sycl::accessor new_tree_acc_pos_max_cell{
862 *ret.tree_cell_ranges.buf_pos_max_cell, cgh, sycl::read_write};
863
864 sycl::accessor old_tree_acc_pos_min_cell{
865 *tree_cell_ranges.buf_pos_min_cell, cgh, sycl::read_only};
866 sycl::accessor old_tree_acc_pos_max_cell{
867 *tree_cell_ranges.buf_pos_max_cell, cgh, sycl::read_only};
868
869 sycl::accessor old_tree_lchild_id{*tree_struct.buf_lchild_id, cgh, sycl::read_only};
870 sycl::accessor old_tree_rchild_id{*tree_struct.buf_rchild_id, cgh, sycl::read_only};
871 sycl::accessor old_tree_lchild_flag{
872 *tree_struct.buf_lchild_flag, cgh, sycl::read_only};
873 sycl::accessor old_tree_rchild_flag{
874 *tree_struct.buf_rchild_flag, cgh, sycl::read_only};
875
876 u32 old_tree_leaf_offset = tree_struct.internal_cell_count;
877
878 sycl::range<1> range_node = sycl::range<1>{
879 ret.tree_reduced_morton_codes.tree_leaf_count
880 + ret.tree_struct.internal_cell_count};
881
882 // auto out = sycl::stream(128, 128, cgh);
883
884 cgh.parallel_for(range_node, [=](sycl::item<1> item) {
885 // logger::raw_ln("\n \n ----------------\n \nnode : ",item.get_id(0));
886
887 ipos_t cur_pos_min_cell_a = new_tree_acc_pos_min_cell[item];
888 ipos_t cur_pos_max_cell_a = new_tree_acc_pos_max_cell[item];
889
890 u32 cur_id = 0;
891 ipos_t cur_pos_min_cell_b = old_tree_acc_pos_min_cell[cur_id];
892 ipos_t cur_pos_max_cell_b = old_tree_acc_pos_max_cell[cur_id];
893
894 while (true) {
895
896 // logger::raw_ln("i ->",cur_pos_min_cell_a,cur_pos_max_cell_a , "| ptr
897 // ->",cur_pos_min_cell_b,cur_pos_max_cell_b);
898
899 auto is_same_box = [&]() -> bool {
900 return (cur_pos_min_cell_a.x() == cur_pos_min_cell_b.x())
901 && (cur_pos_min_cell_a.y() == cur_pos_min_cell_b.y())
902 && (cur_pos_min_cell_a.z() == cur_pos_min_cell_b.z())
903 && (cur_pos_max_cell_a.x() == cur_pos_max_cell_b.x())
904 && (cur_pos_max_cell_a.y() == cur_pos_max_cell_b.y())
905 && (cur_pos_max_cell_a.z() == cur_pos_max_cell_b.z());
906 };
907
908 auto potential_cell = [&](ipos_t other_min, ipos_t other_max) -> bool {
909 return (cur_pos_min_cell_a.x() >= other_min.x())
910 && (cur_pos_min_cell_a.y() >= other_min.y())
911 && (cur_pos_min_cell_a.z() >= other_min.z())
912 && (cur_pos_max_cell_a.x() <= other_max.x())
913 && (cur_pos_max_cell_a.y() <= other_max.y())
914 && (cur_pos_max_cell_a.z() <= other_max.z());
915 };
916
917 auto contain_cell = [&](ipos_t other_min, ipos_t other_max) -> bool {
918 return (cur_pos_min_cell_a.x() <= other_min.x())
919 && (cur_pos_min_cell_a.y() <= other_min.y())
920 && (cur_pos_min_cell_a.z() <= other_min.z())
921 && (cur_pos_max_cell_a.x() >= other_max.x())
922 && (cur_pos_max_cell_a.y() >= other_max.y())
923 && (cur_pos_max_cell_a.z() >= other_max.z());
924 };
925
926 if (is_same_box()) {
927
928 // logger::raw_ln("found ",cur_id);
929
930 u32 store_val = cur_id;
931
932 // if(store_val >= old_tree_leaf_offset){
933 // store_val -= old_tree_leaf_offset;
934 // }
935
936 acc_new_node_id_to_old[item] = store_val;
937
938 break;
939 }
940
941 u32 lid = old_tree_lchild_id[cur_id]
942 + old_tree_leaf_offset * old_tree_lchild_flag[cur_id];
943 u32 rid = old_tree_rchild_id[cur_id]
944 + old_tree_leaf_offset * old_tree_rchild_flag[cur_id];
945
946 ipos_t cur_pos_min_cell_bl = old_tree_acc_pos_min_cell[lid];
947 ipos_t cur_pos_max_cell_bl = old_tree_acc_pos_max_cell[lid];
948
949 ipos_t cur_pos_min_cell_br = old_tree_acc_pos_min_cell[rid];
950 ipos_t cur_pos_max_cell_br = old_tree_acc_pos_max_cell[rid];
951
952 bool l_ok = potential_cell(cur_pos_min_cell_bl, cur_pos_max_cell_bl);
953 bool r_ok = potential_cell(cur_pos_min_cell_br, cur_pos_max_cell_br);
954
955 // logger::raw_ln("options
956 // l=",lid,cur_pos_min_cell_bl,cur_pos_max_cell_bl,l_ok);
957 // logger::raw_ln("options
958 // r=",rid,cur_pos_min_cell_br,cur_pos_max_cell_br,r_ok);
959
960 if (l_ok) {
961
962 cur_pos_min_cell_b = cur_pos_min_cell_bl;
963 cur_pos_max_cell_b = cur_pos_max_cell_bl;
964
965 cur_id = lid;
966 // logger::raw_ln("moving to ",cur_id);
967
968 } else if (r_ok) {
969 cur_pos_min_cell_b = cur_pos_min_cell_br;
970 cur_pos_max_cell_b = cur_pos_max_cell_br;
971
972 cur_id = rid;
973 // logger::raw_ln("moving to ",cur_id);
974
975 } else {
976
977 // if nothing is neither the same or a super set of our cell it means
978 // that
979 // our cell is a superset of one of the child hence the following check
980 bool l_contain = contain_cell(cur_pos_min_cell_bl, cur_pos_max_cell_bl);
981 bool r_contain = contain_cell(cur_pos_min_cell_br, cur_pos_max_cell_br);
982
983 // logger::raw_ln("options
984 // l=",lid,cur_pos_min_cell_bl,cur_pos_max_cell_bl,l_contain);
985 // logger::raw_ln("options
986 // r=",rid,cur_pos_min_cell_br,cur_pos_max_cell_br,r_contain);
987
988 if (l_contain) {
989 // logger::raw_ln("found ",cur_id);
990
991 u32 store_val = cur_id;
992 acc_new_node_id_to_old[item] = store_val;
993
994 // TODO : check that no particules are outside of the bound when
995 // restricted by this line
996 new_tree_acc_pos_min_cell[item] = cur_pos_min_cell_bl;
997 new_tree_acc_pos_max_cell[item] = cur_pos_max_cell_bl;
998
999 break;
1000 } else if (r_contain) {
1001 // logger::raw_ln("found ",cur_id);
1002
1003 u32 store_val = cur_id;
1004 acc_new_node_id_to_old[item] = store_val;
1005
1006 // TODO : check that no particules are outside of the bound when
1007 // restricted by this line
1008 new_tree_acc_pos_min_cell[item] = cur_pos_min_cell_br;
1009 new_tree_acc_pos_max_cell[item] = cur_pos_max_cell_br;
1010
1011 break;
1012 } else {
1013 // out << "[CRASH] Tree cut had a weird behavior during old cell
1014 // search : \n"; throw "";
1015
1016 u32 store_val = cur_id;
1017
1018 // if(store_val >= old_tree_leaf_offset){
1019 // store_val -= old_tree_leaf_offset;
1020 // }
1021
1022 acc_new_node_id_to_old[item] = store_val;
1023
1024 break;
1025 }
1026 }
1027 }
1028 });
1029 });
1030 }
1031
1032 // because we have updated the cell ranges in the tree cut
1034 queue,
1035 ret.tree_reduced_morton_codes.tree_leaf_count + ret.tree_struct.internal_cell_count,
1036 std::get<0>(ret.bounding_box),
1037 std::get<1>(ret.bounding_box),
1038 ret.tree_cell_ranges.buf_pos_min_cell,
1039 ret.tree_cell_ranges.buf_pos_max_cell,
1040 ret.tree_cell_ranges.buf_pos_min_cell_flt,
1041 ret.tree_cell_ranges.buf_pos_max_cell_flt);
1042
1043 // ret.print_tree_field(*new_node_id_to_old_v2);
1044
1045 shamlog_debug_ln(
1046 "TreeCutter",
1047 "tree cut cells:",
1048 tree_struct.internal_cell_count,
1049 "->",
1050 ret.tree_struct.internal_cell_count,
1051 "obj:",
1052 tree_morton_codes.obj_cnt,
1053 "->",
1054 extract_id.size());
1055
1056 return CuttedTree{
1057 std::move(ret),
1058 std::move(new_node_id_to_old_v2),
1059 std::make_unique<sycl::buffer<u32>>(shamalgs::memory::vector_to_buf(
1060 shamsys::instance::get_compute_queue(), std::move(extract_id)))};
1061 }
1062}
1063
1064template class RadixTree<u32, f32_3>;
1065template class RadixTree<u64, f32_3>;
1066
1067template class RadixTree<u32, f64_3>;
1068template class RadixTree<u64, f64_3>;
1069
1070template class RadixTree<u32, u32_3>;
1071template class RadixTree<u64, u32_3>;
1072
1073template class RadixTree<u32, u64_3>;
1074template class RadixTree<u64, u64_3>;
1075
1076template class RadixTree<u32, i64_3>;
1077template class RadixTree<u64, i64_3>;
constexpr const char * uint
Specific internal energy u.
Header file describing a Node Instance.
sycl::queue & get_compute_queue(u32 id=0)
Utility to build morton codes for the radix tree.
float f32
Alias for float.
std::uint8_t u8
8 bit unsigned integer
std::uint32_t u32
32 bit unsigned integer
The radix tree.
Definition RadixTree.hpp:50
A buffer allocated in USM (Unified Shared Memory)
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
Definition EventList.hpp:31
void print_buf(sycl::buffer< T > &buf, u32 len, u32 column_count, std::string_view fmt)
Print the content of a sycl::buffer
Definition memory.hpp:181
namespace for basic c++ utilities
constexpr u32 group_count(u32 len, u32 group_size)
Calculates the number of groups based on the length and group size.
Definition integer.hpp:125
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110
constexpr u32 u32_max
u32 max value
constexpr i32 i32_max
i32 max value
main include file for memory algorithms
header file to manage sycl