35 using Tscal = shambase::VecComponent<Tvec>;
38 return shamtree::details::mac(a, b, theta_crit);
41 template<
bool allow_leaf_lowering>
43 sham::DeviceScheduler_ptr dev_sched,
45 shambase::VecComponent<Tvec> theta_crit,
46 bool ordered_result) {
55 std::move(node_interactions_m2l), std::move(node_interactions_p2p)};
57 auto add_ordering = [&]() {
62 shamtree::details::reorder_scan_dtt_result(
64 result.node_interactions_m2l,
67 shamtree::details::reorder_scan_dtt_result(
69 result.node_interactions_p2p,
74 result.ordered_result = std::move(ordering);
81 result.node_interactions_p2p.resize(1);
82 result.node_interactions_p2p.set_val_at_idx(0, {0, 0});
90 task_current.set_val_at_idx(0, {0, 0});
103 auto resize_max = [](
auto &buf,
u32 sz) {
104 if (buf.get_size() < sz) {
109 while (task_current.
get_size() > 0) {
111 shamlog_debug_ln(
"dtt_scan_multipass",
"task_current.get_size() :", task_count);
114 u32 has_pushed_task_sz = task_count + 1;
115 u32 task_next_sz = 4 * task_count;
116 u32 has_pushed_m2l_sz = task_count + 1;
117 u32 pushed_m2l_sz = task_count;
118 u32 has_pushed_p2p_sz = task_count + 1;
119 u32 pushed_p2p_sz = task_count;
121 resize_max(has_pushed_task, has_pushed_task_sz);
122 resize_max(task_next, task_next_sz);
123 resize_max(has_pushed_m2l, has_pushed_m2l_sz);
124 resize_max(pushed_m2l, pushed_m2l_sz);
125 resize_max(has_pushed_p2p, has_pushed_p2p_sz);
126 resize_max(pushed_p2p, pushed_p2p_sz);
128 has_pushed_task.
fill(0, has_pushed_task_sz);
129 has_pushed_m2l.
fill(0, has_pushed_m2l_sz);
130 has_pushed_p2p.
fill(0, has_pushed_p2p_sz);
135 using ObjItAcc =
typename ObjectIterator::acc;
151 const u32_2 *__restrict__ task_current,
153 u32 *__restrict__ has_pushed_task,
154 u32_2 *__restrict__ task_next,
155 u32 *__restrict__ has_pushed_m2l,
156 u32_2 *__restrict__ pushed_m2l,
157 u32 *__restrict__ has_pushed_p2p,
158 u32_2 *__restrict__ pushed_p2p) {
159 u32_2 t = task_current[i];
164 obj_it.tree_traverser.aabb_min[a], obj_it.tree_traverser.aabb_max[a]};
166 obj_it.tree_traverser.aabb_min[b], obj_it.tree_traverser.aabb_max[b]};
168 bool crit = mac(aabb_a, aabb_b, theta_crit) ==
false;
171 auto &ttrav = obj_it.tree_traverser.tree_traverser;
173 if constexpr (allow_leaf_lowering) {
174 bool is_a_leaf = ttrav.is_id_leaf(a);
175 bool is_b_leaf = ttrav.is_id_leaf(b);
177 if (is_a_leaf && is_b_leaf) {
178 pushed_p2p[i] = {a, b};
179 has_pushed_p2p[i] = 1;
182 u32 child_a_1 = (is_a_leaf) ? a : ttrav.get_left_child(a);
183 u32 child_a_2 = (is_a_leaf) ? a : ttrav.get_right_child(a);
184 u32 child_b_1 = (is_b_leaf) ? b : ttrav.get_left_child(b);
185 u32 child_b_2 = (is_b_leaf) ? b : ttrav.get_right_child(b);
188 bool run_a_2 = !is_a_leaf;
190 bool run_b_2 = !is_b_leaf;
194 if (run_a_1 && run_b_1) {
195 task_next[i * 4 + push_count] = {child_a_1, child_b_1};
198 if (run_a_2 && run_b_1) {
199 task_next[i * 4 + push_count] = {child_a_2, child_b_1};
202 if (run_a_1 && run_b_2) {
203 task_next[i * 4 + push_count] = {child_a_1, child_b_2};
206 if (run_a_2 && run_b_2) {
207 task_next[i * 4 + push_count] = {child_a_2, child_b_2};
210 has_pushed_task[i] += push_count;
214 u32 child_a_1 = ttrav.get_left_child(a);
215 u32 child_a_2 = ttrav.get_right_child(a);
216 u32 child_b_1 = ttrav.get_left_child(b);
217 u32 child_b_2 = ttrav.get_right_child(b);
219 bool child_a_1_leaf = ttrav.is_id_leaf(child_a_1);
220 bool child_a_2_leaf = ttrav.is_id_leaf(child_a_2);
221 bool child_b_1_leaf = ttrav.is_id_leaf(child_b_1);
222 bool child_b_2_leaf = ttrav.is_id_leaf(child_b_2);
224 if (child_a_1_leaf || child_a_2_leaf || child_b_1_leaf
226 pushed_p2p[i] = {a, b};
227 has_pushed_p2p[i] = 1;
229 task_next[i * 4 + 0] = {child_a_1, child_b_1};
230 task_next[i * 4 + 1] = {child_a_1, child_b_2};
231 task_next[i * 4 + 2] = {child_a_2, child_b_1};
232 task_next[i * 4 + 3] = {child_a_2, child_b_2};
233 has_pushed_task[i] += 4;
238 pushed_m2l[i] = {a, b};
239 has_pushed_m2l[i] = 1;
246 has_pushed_task, has_pushed_task_sz);
248 has_pushed_m2l, has_pushed_m2l_sz);
250 has_pushed_p2p, has_pushed_p2p_sz);
254 dev_sched, has_pushed_task, has_pushed_task_sz);
256 dev_sched, has_pushed_m2l, has_pushed_m2l_sz);
258 dev_sched, has_pushed_p2p, has_pushed_p2p_sz);
265 u32 res_sz_node_node = result.node_interactions_m2l.
get_size();
266 u32 res_sz_leaf_leaf = result.node_interactions_p2p.get_size();
274 result.node_interactions_m2l.expand(count_m2l);
275 result.node_interactions_p2p.expand(count_p2p);
278 task_current.resize(count_task);
288 const u32_2 *__restrict__ task_next,
289 const u32 *__restrict__ scan_task,
290 u32_2 *__restrict__ task_current) {
291 u32 scan_task_i = scan_task[i];
292 u32 scan_task_ip1 = scan_task[i + 1];
293 u32 delta = scan_task_ip1 - scan_task_i;
295 u32 idx = scan_task_i;
297 if constexpr (allow_leaf_lowering) {
298 for (
u32 l = 0; l < delta; l++) {
299 SHAM_ASSERT(task_next[i * 4 + l].x() < max_cell_idx);
300 SHAM_ASSERT(task_next[i * 4 + l].y() < max_cell_idx);
301 task_current[idx + l] = task_next[i * 4 + l];
304 task_current[idx + 0] = task_next[i * 4 + 0];
305 task_current[idx + 1] = task_next[i * 4 + 1];
306 task_current[idx + 2] = task_next[i * 4 + 2];
307 task_current[idx + 3] = task_next[i * 4 + 3];
320 const u32_2 *__restrict__ pushed_m2l,
321 const u32 *__restrict__ scan_m2l,
322 u32_2 *__restrict__ interacts_m2l) {
323 u32 scan_m2l_i = scan_m2l[i];
324 u32 scan_m2l_ip1 = scan_m2l[i + 1];
325 if (scan_m2l_ip1 - scan_m2l_i == 1) {
326 interacts_m2l[res_sz_node_node + scan_m2l_i] = pushed_m2l[i];
338 const u32_2 *__restrict__ pushed_p2p,
339 const u32 *__restrict__ scan_p2p,
340 u32_2 *__restrict__ interact_p2p) {
341 u32 scan_p2p_i = scan_p2p[i];
342 u32 scan_p2p_ip1 = scan_p2p[i + 1];
343 if (scan_p2p_ip1 - scan_p2p_i == 1) {
344 interact_p2p[res_sz_leaf_leaf + scan_p2p_i] = pushed_p2p[i];
355 sham::DeviceScheduler_ptr dev_sched,
357 shambase::VecComponent<Tvec> theta_crit,
359 bool allow_leaf_lowering) {
360 if (allow_leaf_lowering) {
361 return dtt_internal<true>(dev_sched, bvh, theta_crit, ordered_result);
363 return dtt_internal<false>(dev_sched, bvh, theta_crit, ordered_result);