35 using Tscal = shambase::VecComponent<Tvec>;
38 return shamtree::details::mac(a, b, theta_crit);
41 template<
bool allow_leaf_lowering>
43 sham::DeviceScheduler_ptr dev_sched,
45 shambase::VecComponent<Tvec> theta_crit,
46 bool ordered_result) {
55 .node_interactions_m2l = std::move(node_interactions_m2l),
56 .node_interactions_p2p = std::move(node_interactions_p2p)};
58 auto add_ordering = [&]() {
63 shamtree::details::reorder_scan_dtt_result(
68 shamtree::details::reorder_scan_dtt_result(
74 .offset_m2l = std::move(offset_m2l), .offset_p2p = std::move(offset_p2p)};
76 result.ordered_result = std::move(ordering);
92 task_current.set_val_at_idx(0, {0, 0});
105 auto resize_max = [](
auto &buf,
u32 sz) {
106 if (buf.get_size() < sz) {
111 while (task_current.
get_size() > 0) {
113 shamlog_debug_ln(
"dtt_scan_multipass",
"task_current.get_size() :", task_count);
116 u32 has_pushed_task_sz = task_count + 1;
117 u32 task_next_sz = 4 * task_count;
118 u32 has_pushed_m2l_sz = task_count + 1;
119 u32 pushed_m2l_sz = task_count;
120 u32 has_pushed_p2p_sz = task_count + 1;
121 u32 pushed_p2p_sz = task_count;
123 resize_max(has_pushed_task, has_pushed_task_sz);
124 resize_max(task_next, task_next_sz);
125 resize_max(has_pushed_m2l, has_pushed_m2l_sz);
126 resize_max(pushed_m2l, pushed_m2l_sz);
127 resize_max(has_pushed_p2p, has_pushed_p2p_sz);
128 resize_max(pushed_p2p, pushed_p2p_sz);
130 has_pushed_task.
fill(0, has_pushed_task_sz);
131 has_pushed_m2l.
fill(0, has_pushed_m2l_sz);
132 has_pushed_p2p.
fill(0, has_pushed_p2p_sz);
137 using ObjItAcc =
typename ObjectIterator::acc;
153 const u32_2 *__restrict__ task_current,
155 u32 *__restrict__ has_pushed_task,
156 u32_2 *__restrict__ task_next,
157 u32 *__restrict__ has_pushed_m2l,
158 u32_2 *__restrict__ pushed_m2l,
159 u32 *__restrict__ has_pushed_p2p,
160 u32_2 *__restrict__ pushed_p2p) {
161 u32_2 t = task_current[i];
166 obj_it.tree_traverser.aabb_min[a], obj_it.tree_traverser.aabb_max[a]};
168 obj_it.tree_traverser.aabb_min[b], obj_it.tree_traverser.aabb_max[b]};
170 bool crit = mac(aabb_a, aabb_b, theta_crit) ==
false;
173 auto &ttrav = obj_it.tree_traverser.tree_traverser;
175 if constexpr (allow_leaf_lowering) {
176 bool is_a_leaf = ttrav.is_id_leaf(a);
177 bool is_b_leaf = ttrav.is_id_leaf(b);
179 if (is_a_leaf && is_b_leaf) {
180 pushed_p2p[i] = {a, b};
181 has_pushed_p2p[i] = 1;
184 u32 child_a_1 = (is_a_leaf) ? a : ttrav.get_left_child(a);
185 u32 child_a_2 = (is_a_leaf) ? a : ttrav.get_right_child(a);
186 u32 child_b_1 = (is_b_leaf) ? b : ttrav.get_left_child(b);
187 u32 child_b_2 = (is_b_leaf) ? b : ttrav.get_right_child(b);
190 bool run_a_2 = !is_a_leaf;
192 bool run_b_2 = !is_b_leaf;
196 if (run_a_1 && run_b_1) {
197 task_next[i * 4 + push_count] = {child_a_1, child_b_1};
200 if (run_a_2 && run_b_1) {
201 task_next[i * 4 + push_count] = {child_a_2, child_b_1};
204 if (run_a_1 && run_b_2) {
205 task_next[i * 4 + push_count] = {child_a_1, child_b_2};
208 if (run_a_2 && run_b_2) {
209 task_next[i * 4 + push_count] = {child_a_2, child_b_2};
212 has_pushed_task[i] += push_count;
216 u32 child_a_1 = ttrav.get_left_child(a);
217 u32 child_a_2 = ttrav.get_right_child(a);
218 u32 child_b_1 = ttrav.get_left_child(b);
219 u32 child_b_2 = ttrav.get_right_child(b);
221 bool child_a_1_leaf = ttrav.is_id_leaf(child_a_1);
222 bool child_a_2_leaf = ttrav.is_id_leaf(child_a_2);
223 bool child_b_1_leaf = ttrav.is_id_leaf(child_b_1);
224 bool child_b_2_leaf = ttrav.is_id_leaf(child_b_2);
226 if (child_a_1_leaf || child_a_2_leaf || child_b_1_leaf
228 pushed_p2p[i] = {a, b};
229 has_pushed_p2p[i] = 1;
231 task_next[i * 4 + 0] = {child_a_1, child_b_1};
232 task_next[i * 4 + 1] = {child_a_1, child_b_2};
233 task_next[i * 4 + 2] = {child_a_2, child_b_1};
234 task_next[i * 4 + 3] = {child_a_2, child_b_2};
235 has_pushed_task[i] += 4;
240 pushed_m2l[i] = {a, b};
241 has_pushed_m2l[i] = 1;
248 has_pushed_task, has_pushed_task_sz);
250 has_pushed_m2l, has_pushed_m2l_sz);
252 has_pushed_p2p, has_pushed_p2p_sz);
256 dev_sched, has_pushed_task, has_pushed_task_sz);
258 dev_sched, has_pushed_m2l, has_pushed_m2l_sz);
260 dev_sched, has_pushed_p2p, has_pushed_p2p_sz);
280 task_current.resize(count_task);
290 const u32_2 *__restrict__ task_next,
291 const u32 *__restrict__ scan_task,
292 u32_2 *__restrict__ task_current) {
293 u32 scan_task_i = scan_task[i];
294 u32 scan_task_ip1 = scan_task[i + 1];
295 u32 delta = scan_task_ip1 - scan_task_i;
297 u32 idx = scan_task_i;
299 if constexpr (allow_leaf_lowering) {
300 for (
u32 l = 0; l < delta; l++) {
301 SHAM_ASSERT(task_next[i * 4 + l].x() < max_cell_idx);
302 SHAM_ASSERT(task_next[i * 4 + l].y() < max_cell_idx);
303 task_current[idx + l] = task_next[i * 4 + l];
306 task_current[idx + 0] = task_next[i * 4 + 0];
307 task_current[idx + 1] = task_next[i * 4 + 1];
308 task_current[idx + 2] = task_next[i * 4 + 2];
309 task_current[idx + 3] = task_next[i * 4 + 3];
322 const u32_2 *__restrict__ pushed_m2l,
323 const u32 *__restrict__ scan_m2l,
324 u32_2 *__restrict__ interacts_m2l) {
325 u32 scan_m2l_i = scan_m2l[i];
326 u32 scan_m2l_ip1 = scan_m2l[i + 1];
327 if (scan_m2l_ip1 - scan_m2l_i == 1) {
328 interacts_m2l[res_sz_node_node + scan_m2l_i] = pushed_m2l[i];
340 const u32_2 *__restrict__ pushed_p2p,
341 const u32 *__restrict__ scan_p2p,
342 u32_2 *__restrict__ interact_p2p) {
343 u32 scan_p2p_i = scan_p2p[i];
344 u32 scan_p2p_ip1 = scan_p2p[i + 1];
345 if (scan_p2p_ip1 - scan_p2p_i == 1) {
346 interact_p2p[res_sz_leaf_leaf + scan_p2p_i] = pushed_p2p[i];
357 sham::DeviceScheduler_ptr dev_sched,
359 shambase::VecComponent<Tvec> theta_crit,
361 bool allow_leaf_lowering) {
362 if (allow_leaf_lowering) {
363 return dtt_internal<true>(dev_sched, bvh, theta_crit, ordered_result);
365 return dtt_internal<false>(dev_sched, bvh, theta_crit, ordered_result);