Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
smoothing_lenght_impl.hpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
10//%Impl status : Good
11
12#pragma once
13
20
22// #include "shamrock/legacy/patch/patchdata_buffer.hpp"
24#include "shamrock/sph/kernels.hpp"
25#include "shamrock/sph/sphpart.hpp"
27
28namespace impl {
29
30 template<class flt>
31 void sycl_init_h_iter_bufs(
32 sycl::queue &queue,
33 u32 or_element_cnt,
34
35 u32 ihpart,
36 shamrock::patch::PatchData &pdat_merge,
37 sycl::buffer<flt> &hnew,
38 sycl::buffer<flt> &omega,
39 sycl::buffer<flt> &eps_h
40
41 );
42
43 template<>
44 inline void sycl_init_h_iter_bufs<f32>(
45 sycl::queue &queue,
46 u32 or_element_cnt,
47
48 u32 ihpart,
49 shamrock::patch::PatchData &pdat_merge,
50 sycl::buffer<f32> &hnew,
51 sycl::buffer<f32> &omega,
52 sycl::buffer<f32> &eps_h
53
54 ) {
55
56 sycl::range range_npart{or_element_cnt};
57
58 queue.submit([&](sycl::handler &cgh) {
59 auto acc_hpart
60 = pdat_merge.get_field<f32>(ihpart).get_buf()->get_access<sycl::access::mode::read>(
61 cgh);
62 auto eps = eps_h.get_access<sycl::access::mode::discard_write>(cgh);
63 auto h = hnew.get_access<sycl::access::mode::discard_write>(cgh);
64
65 cgh.parallel_for<class Init_iterate_h_f32>(range_npart, [=](sycl::item<1> item) {
66 u32 id_a = (u32) item.get_id(0);
67
68 h[id_a] = acc_hpart[id_a];
69 eps[id_a] = 100;
70 });
71 });
72 }
73
74 template<>
75 inline void sycl_init_h_iter_bufs<f64>(
76 sycl::queue &queue,
77 u32 or_element_cnt,
78
79 u32 ihpart,
80 shamrock::patch::PatchData &pdat_merge,
81 sycl::buffer<f64> &hnew,
82 sycl::buffer<f64> &omega,
83 sycl::buffer<f64> &eps_h
84
85 ) {
86
87 sycl::range range_npart{or_element_cnt};
88
89 queue.submit([&](sycl::handler &cgh) {
90 auto acc_hpart
91 = pdat_merge.get_field<f64>(ihpart).get_buf()->get_access<sycl::access::mode::read>(
92 cgh);
93 auto eps = eps_h.get_access<sycl::access::mode::discard_write>(cgh);
94 auto h = hnew.get_access<sycl::access::mode::discard_write>(cgh);
95
96 cgh.parallel_for<class Init_iterate_h_f64>(range_npart, [=](sycl::item<1> item) {
97 u32 id_a = (u32) item.get_id(0);
98
99 h[id_a] = acc_hpart[id_a];
100 eps[id_a] = 100;
101 });
102 });
103 }
104
105#if false
106
107 template<class flt>
108 [[deprecated]]
109 inline void sycl_init_h_iter_bufs(
110 sycl::queue & queue,
111 u32 or_element_cnt,
112
113 u32 ihpart,
114 PatchDataBuffer & pdat_buf_merge,
115 sycl::buffer<flt> & hnew,
116 sycl::buffer<flt> & omega,
117 sycl::buffer<flt> & eps_h
118
119 );
120
121 template<>
122 inline void sycl_init_h_iter_bufs<f32>(
123 sycl::queue & queue,
124 u32 or_element_cnt,
125
126 u32 ihpart,
127 PatchDataBuffer & pdat_buf_merge,
128 sycl::buffer<f32> & hnew,
129 sycl::buffer<f32> & omega,
130 sycl::buffer<f32> & eps_h
131
132 ){
133
134 sycl::range range_npart{or_element_cnt};
135
136 queue.submit([&](sycl::handler &cgh) {
137
138 auto acc_hpart = pdat_buf_merge.fields_f32[ihpart]->get_access<sycl::access::mode::read>(cgh);
139 auto eps = eps_h.get_access<sycl::access::mode::discard_write>(cgh);
140 auto h = hnew.get_access<sycl::access::mode::discard_write>(cgh);
141
142 cgh.parallel_for<class Init_iterate_h>( range_npart, [=](sycl::item<1> item) {
143
144 u32 id_a = (u32) item.get_id(0);
145
146 h[id_a] = acc_hpart[id_a];
147 eps[id_a] = 100;
148
149 });
150
151 });
152 }
153
154#endif
155
156 template<class A, class B, class C>
158 template<class A, class B, class C>
160
161 template<class morton_prec, class Kernel>
163 public:
164 template<class flt>
165 static void sycl_h_iter_step(
166 sycl::queue &queue,
167 u32 or_element_cnt,
168
169 u32 ihpart,
170 u32 ixyz,
171
172 flt gpart_mass,
173 flt htol_up_tol,
174 flt htol_up_iter,
175
176 RadixTree<morton_prec, sycl::vec<flt, 3>> &radix_t,
177 RadixTreeField<flt> &int_rad,
178
179 shamrock::patch::PatchData &pdat_merge,
180 sycl::buffer<flt> &hnew,
181 sycl::buffer<flt> &omega,
182 sycl::buffer<flt> &eps_h);
183
184 template<>
185 inline static void sycl_h_iter_step<f32>(
186 sycl::queue &queue,
187 u32 or_element_cnt,
188
189 u32 ihpart,
190 u32 ixyz,
191
192 f32 gpart_mass,
193 f32 htol_up_tol,
194 f32 htol_up_iter,
195
197 RadixTreeField<f32> &int_rad,
198
199 shamrock::patch::PatchData &pdat_merge,
200 sycl::buffer<f32> &hnew,
201 sycl::buffer<f32> &omega,
202 sycl::buffer<f32> &eps_h) {
203
205
206 sycl::range range_npart{or_element_cnt};
207
208 queue.submit([&](sycl::handler &cgh) {
209 auto h_new = hnew.get_access<sycl::access::mode::read_write>(cgh);
210 auto eps = eps_h.get_access<sycl::access::mode::read_write>(cgh);
211
212 auto acc_hpart = pdat_merge.get_field<f32>(ihpart)
213 .get_buf()
214 ->get_access<sycl::access::mode::read>(cgh);
215 auto r = pdat_merge.get_field<f32_3>(ixyz)
216 .get_buf()
217 ->get_access<sycl::access::mode::read>(cgh);
218
219 Rta tree_acc(radix_t, cgh);
220
221 auto cell_int_r
222 = int_rad.radix_tree_field_buf->template get_access<sycl::access::mode::read>(
223 cgh);
224
225 const f32 part_mass = gpart_mass;
226
227 const f32 h_max_tot_max_evol = htol_up_tol;
228 const f32 h_max_evol_p = htol_up_iter;
229 const f32 h_max_evol_m = 1 / htol_up_iter;
230
232 range_npart, [=](sycl::item<1> item) {
233 u32 id_a = (u32) item.get_id(0);
234
235 if (eps[id_a] > 1e-6) {
236
237 f32_3 xyz_a = r[id_a]; // could be recovered from lambda
238
239 f32 h_a = h_new[id_a];
240 // f32 h_a2 = h_a*h_a;
241
242 f32_3 inter_box_a_min = xyz_a - h_a * Kernel::Rkern;
243 f32_3 inter_box_a_max = xyz_a + h_a * Kernel::Rkern;
244
245 f32 rho_sum = 0;
246 f32 sumdWdh = 0;
247
248 walker::rtree_for(
249 tree_acc,
250 [&tree_acc,
251 &xyz_a,
252 &inter_box_a_min,
253 &inter_box_a_max,
254 &cell_int_r](u32 node_id) {
255 f32_3 cur_pos_min_cell_b = tree_acc.pos_min_cell[node_id];
256 f32_3 cur_pos_max_cell_b = tree_acc.pos_max_cell[node_id];
257 float int_r_max_cell = cell_int_r[node_id] * Kernel::Rkern;
258
259 using namespace walker::interaction_crit;
260
261 return sph_radix_cell_crit(
262 xyz_a,
263 inter_box_a_min,
264 inter_box_a_max,
265 cur_pos_min_cell_b,
266 cur_pos_max_cell_b,
267 int_r_max_cell);
268 },
269 [&r, &xyz_a, &h_a, &rho_sum, &part_mass, &sumdWdh](u32 id_b) {
270 // f32_3 dr = xyz_a - r[id_b];
271 f32 rab = sycl::distance(xyz_a, r[id_b]);
272
273 if (rab > h_a * Kernel::Rkern) {
274 return;
275 }
276
277 // f32 rab = sycl::sqrt(rab2);
278
279 rho_sum += part_mass * Kernel::W(rab, h_a);
280 sumdWdh += part_mass * Kernel::dhW(rab, h_a);
281 },
282 [](u32 node_id) {});
283
284 using namespace shamrock::sph;
285
286 f32 rho_ha = rho_h(part_mass, h_a, Kernel::hfactd);
287 f32 new_h = newton_iterate_new_h(rho_ha, rho_sum, sumdWdh, h_a);
288
289 if (new_h < h_a * h_max_evol_m)
290 new_h = h_max_evol_m * h_a;
291 if (new_h > h_a * h_max_evol_p)
292 new_h = h_max_evol_p * h_a;
293
294 f32 ha_0 = acc_hpart[id_a];
295
296 if (new_h < ha_0 * h_max_tot_max_evol) {
297 h_new[id_a] = new_h;
298 eps[id_a] = sycl::fabs(new_h - h_a) / ha_0;
299 } else {
300 h_new[id_a] = ha_0 * h_max_tot_max_evol;
301 eps[id_a] = -1;
302 }
303 }
304 });
305 });
306 }
307
308 template<>
309 inline static void sycl_h_iter_step<f64>(
310 sycl::queue &queue,
311 u32 or_element_cnt,
312
313 u32 ihpart,
314 u32 ixyz,
315
316 f64 gpart_mass,
317 f64 htol_up_tol,
318 f64 htol_up_iter,
319
321 RadixTreeField<f64> &int_rad,
322
323 shamrock::patch::PatchData &pdat_merge,
324 sycl::buffer<f64> &hnew,
325 sycl::buffer<f64> &omega,
326 sycl::buffer<f64> &eps_h) {
327
329
330 sycl::range range_npart{or_element_cnt};
331
332 queue.submit([&](sycl::handler &cgh) {
333 auto h_new = hnew.get_access<sycl::access::mode::read_write>(cgh);
334 auto eps = eps_h.get_access<sycl::access::mode::read_write>(cgh);
335
336 auto acc_hpart = pdat_merge.get_field<f64>(ihpart)
337 .get_buf()
338 ->get_access<sycl::access::mode::read>(cgh);
339 auto r = pdat_merge.get_field<f64_3>(ixyz)
340 .get_buf()
341 ->get_access<sycl::access::mode::read>(cgh);
342
343 Rta tree_acc(radix_t, cgh);
344
345 auto cell_int_r
346 = int_rad.radix_tree_field_buf->template get_access<sycl::access::mode::read>(
347 cgh);
348
349 const f64 part_mass = gpart_mass;
350
351 const f64 h_max_tot_max_evol = htol_up_tol;
352 const f64 h_max_evol_p = htol_up_iter;
353 const f64 h_max_evol_m = 1 / htol_up_iter;
354
356 range_npart, [=](sycl::item<1> item) {
357 u32 id_a = (u32) item.get_id(0);
358
359 if (eps[id_a] > 1e-6) {
360
361 f64_3 xyz_a = r[id_a]; // could be recovered from lambda
362
363 f64 h_a = h_new[id_a];
364 // f64 h_a2 = h_a*h_a;
365
366 f64_3 inter_box_a_min = xyz_a - h_a * Kernel::Rkern;
367 f64_3 inter_box_a_max = xyz_a + h_a * Kernel::Rkern;
368
369 f64 rho_sum = 0;
370 f64 sumdWdh = 0;
371
372 walker::rtree_for(
373 tree_acc,
374 [&tree_acc,
375 &xyz_a,
376 &inter_box_a_min,
377 &inter_box_a_max,
378 &cell_int_r](u32 node_id) {
379 f64_3 cur_pos_min_cell_b = tree_acc.pos_min_cell[node_id];
380 f64_3 cur_pos_max_cell_b = tree_acc.pos_max_cell[node_id];
381 float int_r_max_cell = cell_int_r[node_id] * Kernel::Rkern;
382
383 using namespace walker::interaction_crit;
384
385 return sph_radix_cell_crit(
386 xyz_a,
387 inter_box_a_min,
388 inter_box_a_max,
389 cur_pos_min_cell_b,
390 cur_pos_max_cell_b,
391 int_r_max_cell);
392 },
393 [&r, &xyz_a, &h_a, &rho_sum, &part_mass, &sumdWdh](u32 id_b) {
394 // f64_3 dr = xyz_a - r[id_b];
395 f64 rab = sycl::distance(xyz_a, r[id_b]);
396
397 if (rab > h_a * Kernel::Rkern) {
398 return;
399 }
400
401 // f64 rab = sycl::sqrt(rab2);
402
403 rho_sum += part_mass * Kernel::W(rab, h_a);
404 sumdWdh += part_mass * Kernel::dhW(rab, h_a);
405 },
406 [](u32 node_id) {});
407
408 using namespace shamrock::sph;
409
410 f64 rho_ha = rho_h(part_mass, h_a, Kernel::hfactd);
411
412 f64 f_iter = rho_sum - rho_ha;
413 f64 df_iter = sumdWdh + 3 * rho_ha / h_a;
414
415 // f64 omega_a = 1 + (h_a/(3*rho_ha))*sumdWdh;
416 // f64 new_h = h_a - (rho_ha - rho_sum)/((-3*rho_ha/h_a)*omega_a);
417
418 f64 new_h = h_a - f_iter / df_iter;
419
420 if (new_h < h_a * h_max_evol_m)
421 new_h = h_max_evol_m * h_a;
422 if (new_h > h_a * h_max_evol_p)
423 new_h = h_max_evol_p * h_a;
424
425 f64 ha_0 = acc_hpart[id_a];
426
427 if (new_h < ha_0 * h_max_tot_max_evol) {
428 h_new[id_a] = new_h;
429 eps[id_a] = sycl::fabs(new_h - h_a) / ha_0;
430 } else {
431 h_new[id_a] = ha_0 * h_max_tot_max_evol;
432 eps[id_a] = -1;
433 }
434 }
435 });
436 });
437 }
438
439 template<class flt>
440 static void sycl_h_iter_omega(
441 sycl::queue &queue,
442 u32 or_element_cnt,
443
444 u32 ihpart,
445 u32 ixyz,
446
447 flt gpart_mass,
448 flt htol_up_tol,
449 flt htol_up_iter,
450
451 RadixTree<morton_prec, sycl::vec<flt, 3>> &radix_t,
452 RadixTreeField<flt> &int_rad,
453
454 shamrock::patch::PatchData &pdat_merge,
455 sycl::buffer<flt> &hnew,
456 sycl::buffer<flt> &omega,
457 sycl::buffer<flt> &eps_h);
458
459 template<>
460 inline static void sycl_h_iter_omega<f32>(
461 sycl::queue &queue,
462 u32 or_element_cnt,
463
464 u32 ihpart,
465 u32 ixyz,
466
467 f32 gpart_mass,
468 f32 htol_up_tol,
469 f32 htol_up_iter,
470
472 RadixTreeField<f32> &int_rad,
473
474 shamrock::patch::PatchData &pdat_merge,
475 sycl::buffer<f32> &hnew,
476 sycl::buffer<f32> &omega,
477 sycl::buffer<f32> &eps_h) {
478
480
481 sycl::range range_npart{or_element_cnt};
482
483 queue.submit([&](sycl::handler &cgh) {
484 auto h_new = hnew.get_access<sycl::access::mode::read_write>(cgh);
485 auto omga = omega.get_access<sycl::access::mode::discard_write>(cgh);
486
487 auto r = pdat_merge.get_field<f32_3>(ixyz)
488 .get_buf()
489 ->get_access<sycl::access::mode::read>(cgh);
490
492 Rta tree_acc(radix_t, cgh);
493
494 auto cell_int_r
495 = int_rad.radix_tree_field_buf->template get_access<sycl::access::mode::read>(
496 cgh);
497
498 const f32 part_mass = gpart_mass;
499
500 const f32 h_max_tot_max_evol = htol_up_tol;
501 const f32 h_max_evol_p = htol_up_tol;
502 const f32 h_max_evol_m = 1 / htol_up_tol;
503
505 range_npart, [=](sycl::item<1> item) {
506 u32 id_a = (u32) item.get_id(0);
507
508 f32_3 xyz_a = r[id_a]; // could be recovered from lambda
509
510 f32 h_a = h_new[id_a];
511 // f32 h_a2 = h_a*h_a;
512
513 f32_3 inter_box_a_min = xyz_a - h_a * Kernel::Rkern;
514 f32_3 inter_box_a_max = xyz_a + h_a * Kernel::Rkern;
515
516 f32 rho_sum = 0;
517 f32 part_omega_sum = 0;
518
519 walker::rtree_for(
520 tree_acc,
521 [&tree_acc, &xyz_a, &inter_box_a_min, &inter_box_a_max, &cell_int_r](
522 u32 node_id) {
523 f32_3 cur_pos_min_cell_b = tree_acc.pos_min_cell[node_id];
524 f32_3 cur_pos_max_cell_b = tree_acc.pos_max_cell[node_id];
525 float int_r_max_cell = cell_int_r[node_id] * Kernel::Rkern;
526
527 using namespace walker::interaction_crit;
528
529 return sph_radix_cell_crit(
530 xyz_a,
531 inter_box_a_min,
532 inter_box_a_max,
533 cur_pos_min_cell_b,
534 cur_pos_max_cell_b,
535 int_r_max_cell);
536 },
537 [&r, &xyz_a, &h_a, &rho_sum, &part_mass, &part_omega_sum](u32 id_b) {
538 // f32_3 dr = xyz_a - r[id_b];
539 f32 rab = sycl::distance(xyz_a, r[id_b]);
540
541 if (rab > h_a * Kernel::Rkern)
542 return;
543
544 // f32 rab = sycl::sqrt(rab2);
545
546 rho_sum += part_mass * Kernel::W(rab, h_a);
547 part_omega_sum += part_mass * Kernel::dhW(rab, h_a);
548 },
549 [](u32 node_id) {});
550
551 using namespace shamrock::sph;
552 f32 rho_ha = rho_h(part_mass, h_a, Kernel::hfactd);
553 omga[id_a] = 1 + (h_a / (3 * rho_ha)) * part_omega_sum;
554 });
555 });
556 }
557
558 template<>
559 inline static void sycl_h_iter_omega<f64>(
560 sycl::queue &queue,
561 u32 or_element_cnt,
562
563 u32 ihpart,
564 u32 ixyz,
565
566 f64 gpart_mass,
567 f64 htol_up_tol,
568 f64 htol_up_iter,
569
571 RadixTreeField<f64> &int_rad,
572
573 shamrock::patch::PatchData &pdat_merge,
574 sycl::buffer<f64> &hnew,
575 sycl::buffer<f64> &omega,
576 sycl::buffer<f64> &eps_h) {
577
579
580 sycl::range range_npart{or_element_cnt};
581
582 queue.submit([&](sycl::handler &cgh) {
583 auto h_new = hnew.get_access<sycl::access::mode::read_write>(cgh);
584 auto omga = omega.get_access<sycl::access::mode::discard_write>(cgh);
585
586 auto r = pdat_merge.get_field<f64_3>(ixyz)
587 .get_buf()
588 ->get_access<sycl::access::mode::read>(cgh);
589
591 Rta tree_acc(radix_t, cgh);
592
593 auto cell_int_r
594 = int_rad.radix_tree_field_buf->template get_access<sycl::access::mode::read>(
595 cgh);
596
597 const f64 part_mass = gpart_mass;
598
599 const f64 h_max_tot_max_evol = htol_up_tol;
600 const f64 h_max_evol_p = htol_up_tol;
601 const f64 h_max_evol_m = 1 / htol_up_tol;
602
604 range_npart, [=](sycl::item<1> item) {
605 u32 id_a = (u32) item.get_id(0);
606
607 f64_3 xyz_a = r[id_a]; // could be recovered from lambda
608
609 f64 h_a = h_new[id_a];
610 // f64 h_a2 = h_a*h_a;
611
612 f64_3 inter_box_a_min = xyz_a - h_a * Kernel::Rkern;
613 f64_3 inter_box_a_max = xyz_a + h_a * Kernel::Rkern;
614
615 f64 rho_sum = 0;
616 f64 part_omega_sum = 0;
617
618 walker::rtree_for(
619 tree_acc,
620 [&tree_acc, &xyz_a, &inter_box_a_min, &inter_box_a_max, &cell_int_r](
621 u32 node_id) {
622 f64_3 cur_pos_min_cell_b = tree_acc.pos_min_cell[node_id];
623 f64_3 cur_pos_max_cell_b = tree_acc.pos_max_cell[node_id];
624 float int_r_max_cell = cell_int_r[node_id] * Kernel::Rkern;
625
626 using namespace walker::interaction_crit;
627
628 return sph_radix_cell_crit(
629 xyz_a,
630 inter_box_a_min,
631 inter_box_a_max,
632 cur_pos_min_cell_b,
633 cur_pos_max_cell_b,
634 int_r_max_cell);
635 },
636 [&r, &xyz_a, &h_a, &rho_sum, &part_mass, &part_omega_sum](u32 id_b) {
637 // f32_3 dr = xyz_a - r[id_b];
638 f64 rab = sycl::distance(xyz_a, r[id_b]);
639
640 if (rab > h_a * Kernel::Rkern)
641 return;
642
643 // f32 rab = sycl::sqrt(rab2);
644
645 rho_sum += part_mass * Kernel::W(rab, h_a);
646 part_omega_sum += part_mass * Kernel::dhW(rab, h_a);
647 },
648 [](u32 node_id) {});
649
650 using namespace shamrock::sph;
651 f64 rho_ha = rho_h(part_mass, h_a, Kernel::hfactd);
652 omga[id_a] = 1 + (h_a / (3 * rho_ha)) * part_omega_sum;
653 });
654 });
655 }
656
657#if false
658
659 template<class flt>
660 [[deprecated]]
661 static void sycl_h_iter_step(
662 sycl::queue & queue,
663 u32 or_element_cnt,
664
665 u32 ihpart,
666 u32 ixyz,
667
668 flt gpart_mass,
669 flt htol_up_tol,
670 flt htol_up_iter,
671
672 Radix_Tree<morton_prec, sycl::vec<flt,3>> & radix_t,
673
674 PatchDataBuffer & pdat_buf_merge,
675 sycl::buffer<flt> & hnew,
676 sycl::buffer<flt> & omega,
677 sycl::buffer<flt> & eps_h
678 );
679
680 template<>
681 inline static void sycl_h_iter_step<f32>(
682 sycl::queue & queue,
683 u32 or_element_cnt,
684
685 u32 ihpart,
686 u32 ixyz,
687
688 f32 gpart_mass,
689 f32 htol_up_tol,
690 f32 htol_up_iter,
691
692 Radix_Tree<morton_prec, f32_3> & radix_t,
693
694 PatchDataBuffer & pdat_buf_merge,
695 sycl::buffer<f32> & hnew,
696 sycl::buffer<f32> & omega,
697 sycl::buffer<f32> & eps_h
698 ){
699
701
702 sycl::range range_npart{or_element_cnt};
703
704 queue.submit([&](sycl::handler &cgh) {
705
706 auto h_new = hnew.get_access<sycl::access::mode::read_write>(cgh);
707 auto eps = eps_h.get_access<sycl::access::mode::read_write>(cgh);
708
709 auto acc_hpart = pdat_buf_merge.fields_f32.at(ihpart)->get_access<sycl::access::mode::read>(cgh);
710 auto r = pdat_buf_merge.fields_f32_3[ixyz]->get_access<sycl::access::mode::read>(cgh);
711
712
713 Rta tree_acc(radix_t, cgh);
714
715
716
717 auto cell_int_r = radix_t.buf_cell_interact_rad->template get_access<sycl::access::mode::read>(cgh);
718
719 const f32 part_mass = gpart_mass;
720
721 const f32 h_max_tot_max_evol = htol_up_tol;
722 const f32 h_max_evol_p = htol_up_iter;
723 const f32 h_max_evol_m = 1/htol_up_iter;
724
725 cgh.parallel_for<Kernel_Iterh<f32, morton_prec, Kernel>>(range_npart, [=](sycl::item<1> item) {
726 u32 id_a = (u32)item.get_id(0);
727
728
729 if(eps[id_a] > 1e-6){
730
731 f32_3 xyz_a = r[id_a]; // could be recovered from lambda
732
733 f32 h_a = h_new[id_a];
734 //f32 h_a2 = h_a*h_a;
735
736 f32_3 inter_box_a_min = xyz_a - h_a * Kernel::Rkern;
737 f32_3 inter_box_a_max = xyz_a + h_a * Kernel::Rkern;
738
739 f32 rho_sum = 0;
740 f32 sumdWdh = 0;
741
742 walker::rtree_for(
743 tree_acc,
744 [&tree_acc,&xyz_a,&inter_box_a_min,&inter_box_a_max,&cell_int_r](u32 node_id) {
745 f32_3 cur_pos_min_cell_b = tree_acc.pos_min_cell[node_id];
746 f32_3 cur_pos_max_cell_b = tree_acc.pos_max_cell[node_id];
747 float int_r_max_cell = cell_int_r[node_id] * Kernel::Rkern;
748
749 using namespace walker::interaction_crit;
750
751 return sph_radix_cell_crit(xyz_a, inter_box_a_min, inter_box_a_max, cur_pos_min_cell_b,
752 cur_pos_max_cell_b, int_r_max_cell);
753 },
754 [&r,&xyz_a,&h_a,&rho_sum,&part_mass,&sumdWdh](u32 id_b) {
755 //f32_3 dr = xyz_a - r[id_b];
756 f32 rab = sycl::distance( xyz_a , r[id_b]);
757
758 if(rab > h_a*Kernel::Rkern) return;
759
760 //f32 rab = sycl::sqrt(rab2);
761
762 rho_sum += part_mass*Kernel::W(rab,h_a);
763 sumdWdh += part_mass*Kernel::dhW(rab,h_a);
764
765 },
766 [](u32 node_id) {});
767
768
769
770 f32 rho_ha = rho_h(part_mass, h_a);
771
772 f32 f_iter = rho_sum - rho_ha;
773 f32 df_iter = sumdWdh + 3*rho_ha/h_a;
774
775 //f32 omega_a = 1 + (h_a/(3*rho_ha))*sumdWdh;
776 //f32 new_h = h_a - (rho_ha - rho_sum)/((-3*rho_ha/h_a)*omega_a);
777
778 f32 new_h = h_a - f_iter/df_iter;
779
780
781 if(new_h < h_a*h_max_evol_m) new_h = h_max_evol_m*h_a;
782 if(new_h > h_a*h_max_evol_p) new_h = h_max_evol_p*h_a;
783
784
785 f32 ha_0 = acc_hpart[id_a];
786
787
788 if (new_h < ha_0*h_max_tot_max_evol) {
789 h_new[id_a] = new_h;
790 eps[id_a] = sycl::fabs(new_h - h_a)/ha_0;
791 }else{
792 h_new[id_a] = ha_0*h_max_tot_max_evol;
793 eps[id_a] = -1;
794 }
795 }
796
797 });
798
799 });
800 }
801
802
803
804
805
806
807
808
809
810
811
812
813 template<class flt>
814 [[deprecated]]
815 static void _sycl_h_iter_omega(
816 sycl::queue & queue,
817 u32 or_element_cnt,
818
819 u32 ihpart,
820 u32 ixyz,
821
822 flt gpart_mass,
823 flt htol_up_tol,
824 flt htol_up_iter,
825
826 Radix_Tree<morton_prec, sycl::vec<flt,3>> & radix_t,
827
828 PatchDataBuffer & pdat_buf_merge,
829 sycl::buffer<flt> & hnew,
830 sycl::buffer<flt> & omega,
831 sycl::buffer<flt> & eps_h
832 );
833
834 template<>
835 inline static void _sycl_h_iter_omega<f32>(
836 sycl::queue & queue,
837 u32 or_element_cnt,
838
839 u32 ihpart,
840 u32 ixyz,
841
842 f32 gpart_mass,
843 f32 htol_up_tol,
844 f32 htol_up_iter,
845
846 Radix_Tree<morton_prec, f32_3> & radix_t,
847
848 PatchDataBuffer & pdat_buf_merge,
849 sycl::buffer<f32> & hnew,
850 sycl::buffer<f32> & omega,
851 sycl::buffer<f32> & eps_h
852 ){
853
855
856 sycl::range range_npart{or_element_cnt};
857
858
859 queue.submit([&](sycl::handler &cgh) {
860
861 auto h_new = hnew.get_access<sycl::access::mode::read_write>(cgh);
862 auto omga = omega.get_access<sycl::access::mode::discard_write>(cgh);
863
864 auto r = pdat_buf_merge.fields_f32_3.at(ixyz)->get_access<sycl::access::mode::read>(cgh);
865
867 Rta tree_acc(radix_t, cgh);
868
869
870
871 auto cell_int_r =radix_t.buf_cell_interact_rad->template get_access<sycl::access::mode::read>(cgh);
872
873 const f32 part_mass = gpart_mass;
874
875 const f32 h_max_tot_max_evol = htol_up_tol;
876 const f32 h_max_evol_p = htol_up_tol;
877 const f32 h_max_evol_m = 1/htol_up_tol;
878
879 cgh.parallel_for<Kernel_Finalize_omega<f32, morton_prec, Kernel>>(range_npart, [=](sycl::item<1> item) {
880 u32 id_a = (u32)item.get_id(0);
881
882 f32_3 xyz_a = r[id_a]; // could be recovered from lambda
883
884 f32 h_a = h_new[id_a];
885 //f32 h_a2 = h_a*h_a;
886
887 f32_3 inter_box_a_min = xyz_a - h_a * Kernel::Rkern;
888 f32_3 inter_box_a_max = xyz_a + h_a * Kernel::Rkern;
889
890 f32 rho_sum = 0;
891 f32 part_omega_sum = 0;
892
893 walker::rtree_for(
894 tree_acc,
895 [&tree_acc,&xyz_a,&inter_box_a_min,&inter_box_a_max,&cell_int_r](u32 node_id) {
896 f32_3 cur_pos_min_cell_b = tree_acc.pos_min_cell[node_id];
897 f32_3 cur_pos_max_cell_b = tree_acc.pos_max_cell[node_id];
898 float int_r_max_cell = cell_int_r[node_id] * Kernel::Rkern;
899
900 using namespace walker::interaction_crit;
901
902 return sph_radix_cell_crit(xyz_a, inter_box_a_min, inter_box_a_max, cur_pos_min_cell_b,
903 cur_pos_max_cell_b, int_r_max_cell);
904 },
905 [&r,&xyz_a,&h_a,&rho_sum,&part_mass,&part_omega_sum](u32 id_b) {
906 //f32_3 dr = xyz_a - r[id_b];
907 f32 rab = sycl::distance( xyz_a , r[id_b]);
908
909 if(rab > h_a*Kernel::Rkern) return;
910
911 //f32 rab = sycl::sqrt(rab2);
912
913 rho_sum += part_mass*Kernel::W(rab,h_a);
914 part_omega_sum += part_mass * Kernel::dhW(rab,h_a);
915
916 },
917 [](u32 node_id) {});
918
919
920
921 f32 rho_ha = rho_h(part_mass, h_a);
922 omga[id_a] = 1 + (h_a/(3*rho_ha))*part_omega_sum;
923
924
925 });
926
927 });
928 }
929
930#endif
931 };
932
933} // namespace impl
constexpr const char * eps_h
Epsilon h references (for h-iteration convergence).
double f64
Alias for double.
float f32
Alias for float.
std::uint32_t u32
32 bit unsigned integer
The radix tree.
Definition RadixTree.hpp:50
header for PatchData related function and declaration