165 static void sycl_h_iter_step(
176 RadixTree<morton_prec, sycl::vec<flt, 3>> &radix_t,
179 shamrock::patch::PatchData &pdat_merge,
180 sycl::buffer<flt> &hnew,
181 sycl::buffer<flt> &omega,
182 sycl::buffer<flt> &eps_h);
185 inline static void sycl_h_iter_step<f32>(
199 shamrock::patch::PatchData &pdat_merge,
200 sycl::buffer<f32> &hnew,
201 sycl::buffer<f32> &omega,
202 sycl::buffer<f32> &eps_h) {
206 sycl::range range_npart{or_element_cnt};
208 queue.submit([&](sycl::handler &cgh) {
209 auto h_new = hnew.get_access<sycl::access::mode::read_write>(cgh);
210 auto eps = eps_h.get_access<sycl::access::mode::read_write>(cgh);
212 auto acc_hpart = pdat_merge.get_field<
f32>(ihpart)
214 ->get_access<sycl::access::mode::read>(cgh);
215 auto r = pdat_merge.get_field<f32_3>(ixyz)
217 ->get_access<sycl::access::mode::read>(cgh);
219 Rta tree_acc(radix_t, cgh);
222 = int_rad.radix_tree_field_buf->template get_access<sycl::access::mode::read>(
225 const f32 part_mass = gpart_mass;
227 const f32 h_max_tot_max_evol = htol_up_tol;
228 const f32 h_max_evol_p = htol_up_iter;
229 const f32 h_max_evol_m = 1 / htol_up_iter;
232 range_npart, [=](sycl::item<1> item) {
233 u32 id_a = (
u32) item.get_id(0);
235 if (eps[id_a] > 1e-6) {
237 f32_3 xyz_a = r[id_a];
239 f32 h_a = h_new[id_a];
242 f32_3 inter_box_a_min = xyz_a - h_a * Kernel::Rkern;
243 f32_3 inter_box_a_max = xyz_a + h_a * Kernel::Rkern;
254 &cell_int_r](
u32 node_id) {
255 f32_3 cur_pos_min_cell_b = tree_acc.pos_min_cell[node_id];
256 f32_3 cur_pos_max_cell_b = tree_acc.pos_max_cell[node_id];
257 float int_r_max_cell = cell_int_r[node_id] * Kernel::Rkern;
259 using namespace walker::interaction_crit;
261 return sph_radix_cell_crit(
269 [&r, &xyz_a, &h_a, &rho_sum, &part_mass, &sumdWdh](
u32 id_b) {
271 f32 rab = sycl::distance(xyz_a, r[id_b]);
273 if (rab > h_a * Kernel::Rkern) {
279 rho_sum += part_mass * Kernel::W(rab, h_a);
280 sumdWdh += part_mass * Kernel::dhW(rab, h_a);
284 using namespace shamrock::sph;
286 f32 rho_ha = rho_h(part_mass, h_a, Kernel::hfactd);
287 f32 new_h = newton_iterate_new_h(rho_ha, rho_sum, sumdWdh, h_a);
289 if (new_h < h_a * h_max_evol_m)
290 new_h = h_max_evol_m * h_a;
291 if (new_h > h_a * h_max_evol_p)
292 new_h = h_max_evol_p * h_a;
294 f32 ha_0 = acc_hpart[id_a];
296 if (new_h < ha_0 * h_max_tot_max_evol) {
298 eps[id_a] = sycl::fabs(new_h - h_a) / ha_0;
300 h_new[id_a] = ha_0 * h_max_tot_max_evol;
309 inline static void sycl_h_iter_step<f64>(
323 shamrock::patch::PatchData &pdat_merge,
324 sycl::buffer<f64> &hnew,
325 sycl::buffer<f64> &omega,
326 sycl::buffer<f64> &eps_h) {
330 sycl::range range_npart{or_element_cnt};
332 queue.submit([&](sycl::handler &cgh) {
333 auto h_new = hnew.get_access<sycl::access::mode::read_write>(cgh);
334 auto eps = eps_h.get_access<sycl::access::mode::read_write>(cgh);
336 auto acc_hpart = pdat_merge.get_field<
f64>(ihpart)
338 ->get_access<sycl::access::mode::read>(cgh);
339 auto r = pdat_merge.get_field<f64_3>(ixyz)
341 ->get_access<sycl::access::mode::read>(cgh);
343 Rta tree_acc(radix_t, cgh);
346 = int_rad.radix_tree_field_buf->template get_access<sycl::access::mode::read>(
349 const f64 part_mass = gpart_mass;
351 const f64 h_max_tot_max_evol = htol_up_tol;
352 const f64 h_max_evol_p = htol_up_iter;
353 const f64 h_max_evol_m = 1 / htol_up_iter;
356 range_npart, [=](sycl::item<1> item) {
357 u32 id_a = (
u32) item.get_id(0);
359 if (eps[id_a] > 1e-6) {
361 f64_3 xyz_a = r[id_a];
363 f64 h_a = h_new[id_a];
366 f64_3 inter_box_a_min = xyz_a - h_a * Kernel::Rkern;
367 f64_3 inter_box_a_max = xyz_a + h_a * Kernel::Rkern;
378 &cell_int_r](
u32 node_id) {
379 f64_3 cur_pos_min_cell_b = tree_acc.pos_min_cell[node_id];
380 f64_3 cur_pos_max_cell_b = tree_acc.pos_max_cell[node_id];
381 float int_r_max_cell = cell_int_r[node_id] * Kernel::Rkern;
383 using namespace walker::interaction_crit;
385 return sph_radix_cell_crit(
393 [&r, &xyz_a, &h_a, &rho_sum, &part_mass, &sumdWdh](
u32 id_b) {
395 f64 rab = sycl::distance(xyz_a, r[id_b]);
397 if (rab > h_a * Kernel::Rkern) {
403 rho_sum += part_mass * Kernel::W(rab, h_a);
404 sumdWdh += part_mass * Kernel::dhW(rab, h_a);
408 using namespace shamrock::sph;
410 f64 rho_ha = rho_h(part_mass, h_a, Kernel::hfactd);
412 f64 f_iter = rho_sum - rho_ha;
413 f64 df_iter = sumdWdh + 3 * rho_ha / h_a;
418 f64 new_h = h_a - f_iter / df_iter;
420 if (new_h < h_a * h_max_evol_m)
421 new_h = h_max_evol_m * h_a;
422 if (new_h > h_a * h_max_evol_p)
423 new_h = h_max_evol_p * h_a;
425 f64 ha_0 = acc_hpart[id_a];
427 if (new_h < ha_0 * h_max_tot_max_evol) {
429 eps[id_a] = sycl::fabs(new_h - h_a) / ha_0;
431 h_new[id_a] = ha_0 * h_max_tot_max_evol;
440 static void sycl_h_iter_omega(
451 RadixTree<morton_prec, sycl::vec<flt, 3>> &radix_t,
454 shamrock::patch::PatchData &pdat_merge,
455 sycl::buffer<flt> &hnew,
456 sycl::buffer<flt> &omega,
457 sycl::buffer<flt> &eps_h);
460 inline static void sycl_h_iter_omega<f32>(
474 shamrock::patch::PatchData &pdat_merge,
475 sycl::buffer<f32> &hnew,
476 sycl::buffer<f32> &omega,
477 sycl::buffer<f32> &eps_h) {
481 sycl::range range_npart{or_element_cnt};
483 queue.submit([&](sycl::handler &cgh) {
484 auto h_new = hnew.get_access<sycl::access::mode::read_write>(cgh);
485 auto omga = omega.get_access<sycl::access::mode::discard_write>(cgh);
487 auto r = pdat_merge.get_field<f32_3>(ixyz)
489 ->get_access<sycl::access::mode::read>(cgh);
492 Rta tree_acc(radix_t, cgh);
495 = int_rad.radix_tree_field_buf->template get_access<sycl::access::mode::read>(
498 const f32 part_mass = gpart_mass;
500 const f32 h_max_tot_max_evol = htol_up_tol;
501 const f32 h_max_evol_p = htol_up_tol;
502 const f32 h_max_evol_m = 1 / htol_up_tol;
505 range_npart, [=](sycl::item<1> item) {
506 u32 id_a = (
u32) item.get_id(0);
508 f32_3 xyz_a = r[id_a];
510 f32 h_a = h_new[id_a];
513 f32_3 inter_box_a_min = xyz_a - h_a * Kernel::Rkern;
514 f32_3 inter_box_a_max = xyz_a + h_a * Kernel::Rkern;
517 f32 part_omega_sum = 0;
521 [&tree_acc, &xyz_a, &inter_box_a_min, &inter_box_a_max, &cell_int_r](
523 f32_3 cur_pos_min_cell_b = tree_acc.pos_min_cell[node_id];
524 f32_3 cur_pos_max_cell_b = tree_acc.pos_max_cell[node_id];
525 float int_r_max_cell = cell_int_r[node_id] * Kernel::Rkern;
527 using namespace walker::interaction_crit;
529 return sph_radix_cell_crit(
537 [&r, &xyz_a, &h_a, &rho_sum, &part_mass, &part_omega_sum](
u32 id_b) {
539 f32 rab = sycl::distance(xyz_a, r[id_b]);
541 if (rab > h_a * Kernel::Rkern)
546 rho_sum += part_mass * Kernel::W(rab, h_a);
547 part_omega_sum += part_mass * Kernel::dhW(rab, h_a);
551 using namespace shamrock::sph;
552 f32 rho_ha = rho_h(part_mass, h_a, Kernel::hfactd);
553 omga[id_a] = 1 + (h_a / (3 * rho_ha)) * part_omega_sum;
559 inline static void sycl_h_iter_omega<f64>(
573 shamrock::patch::PatchData &pdat_merge,
574 sycl::buffer<f64> &hnew,
575 sycl::buffer<f64> &omega,
576 sycl::buffer<f64> &eps_h) {
580 sycl::range range_npart{or_element_cnt};
582 queue.submit([&](sycl::handler &cgh) {
583 auto h_new = hnew.get_access<sycl::access::mode::read_write>(cgh);
584 auto omga = omega.get_access<sycl::access::mode::discard_write>(cgh);
586 auto r = pdat_merge.get_field<f64_3>(ixyz)
588 ->get_access<sycl::access::mode::read>(cgh);
591 Rta tree_acc(radix_t, cgh);
594 = int_rad.radix_tree_field_buf->template get_access<sycl::access::mode::read>(
597 const f64 part_mass = gpart_mass;
599 const f64 h_max_tot_max_evol = htol_up_tol;
600 const f64 h_max_evol_p = htol_up_tol;
601 const f64 h_max_evol_m = 1 / htol_up_tol;
604 range_npart, [=](sycl::item<1> item) {
605 u32 id_a = (
u32) item.get_id(0);
607 f64_3 xyz_a = r[id_a];
609 f64 h_a = h_new[id_a];
612 f64_3 inter_box_a_min = xyz_a - h_a * Kernel::Rkern;
613 f64_3 inter_box_a_max = xyz_a + h_a * Kernel::Rkern;
616 f64 part_omega_sum = 0;
620 [&tree_acc, &xyz_a, &inter_box_a_min, &inter_box_a_max, &cell_int_r](
622 f64_3 cur_pos_min_cell_b = tree_acc.pos_min_cell[node_id];
623 f64_3 cur_pos_max_cell_b = tree_acc.pos_max_cell[node_id];
624 float int_r_max_cell = cell_int_r[node_id] * Kernel::Rkern;
626 using namespace walker::interaction_crit;
628 return sph_radix_cell_crit(
636 [&r, &xyz_a, &h_a, &rho_sum, &part_mass, &part_omega_sum](
u32 id_b) {
638 f64 rab = sycl::distance(xyz_a, r[id_b]);
640 if (rab > h_a * Kernel::Rkern)
645 rho_sum += part_mass * Kernel::W(rab, h_a);
646 part_omega_sum += part_mass * Kernel::dhW(rab, h_a);
650 using namespace shamrock::sph;
651 f64 rho_ha = rho_h(part_mass, h_a, Kernel::hfactd);
652 omga[id_a] = 1 + (h_a / (3 * rho_ha)) * part_omega_sum;
661 static void sycl_h_iter_step(
672 Radix_Tree<morton_prec, sycl::vec<flt,3>> & radix_t,
674 PatchDataBuffer & pdat_buf_merge,
675 sycl::buffer<flt> & hnew,
676 sycl::buffer<flt> & omega,
677 sycl::buffer<flt> & eps_h
681 inline static void sycl_h_iter_step<f32>(
692 Radix_Tree<morton_prec, f32_3> & radix_t,
694 PatchDataBuffer & pdat_buf_merge,
695 sycl::buffer<f32> & hnew,
696 sycl::buffer<f32> & omega,
697 sycl::buffer<f32> & eps_h
702 sycl::range range_npart{or_element_cnt};
704 queue.submit([&](sycl::handler &cgh) {
706 auto h_new = hnew.get_access<sycl::access::mode::read_write>(cgh);
707 auto eps = eps_h.get_access<sycl::access::mode::read_write>(cgh);
709 auto acc_hpart = pdat_buf_merge.fields_f32.at(ihpart)->get_access<sycl::access::mode::read>(cgh);
710 auto r = pdat_buf_merge.fields_f32_3[ixyz]->get_access<sycl::access::mode::read>(cgh);
713 Rta tree_acc(radix_t, cgh);
717 auto cell_int_r = radix_t.buf_cell_interact_rad->template get_access<sycl::access::mode::read>(cgh);
719 const f32 part_mass = gpart_mass;
721 const f32 h_max_tot_max_evol = htol_up_tol;
722 const f32 h_max_evol_p = htol_up_iter;
723 const f32 h_max_evol_m = 1/htol_up_iter;
726 u32 id_a = (
u32)item.get_id(0);
729 if(eps[id_a] > 1e-6){
731 f32_3 xyz_a = r[id_a];
733 f32 h_a = h_new[id_a];
736 f32_3 inter_box_a_min = xyz_a - h_a * Kernel::Rkern;
737 f32_3 inter_box_a_max = xyz_a + h_a * Kernel::Rkern;
744 [&tree_acc,&xyz_a,&inter_box_a_min,&inter_box_a_max,&cell_int_r](
u32 node_id) {
745 f32_3 cur_pos_min_cell_b = tree_acc.pos_min_cell[node_id];
746 f32_3 cur_pos_max_cell_b = tree_acc.pos_max_cell[node_id];
747 float int_r_max_cell = cell_int_r[node_id] * Kernel::Rkern;
749 using namespace walker::interaction_crit;
751 return sph_radix_cell_crit(xyz_a, inter_box_a_min, inter_box_a_max, cur_pos_min_cell_b,
752 cur_pos_max_cell_b, int_r_max_cell);
754 [&r,&xyz_a,&h_a,&rho_sum,&part_mass,&sumdWdh](
u32 id_b) {
756 f32 rab = sycl::distance( xyz_a , r[id_b]);
758 if(rab > h_a*Kernel::Rkern)
return;
762 rho_sum += part_mass*Kernel::W(rab,h_a);
763 sumdWdh += part_mass*Kernel::dhW(rab,h_a);
770 f32 rho_ha = rho_h(part_mass, h_a);
772 f32 f_iter = rho_sum - rho_ha;
773 f32 df_iter = sumdWdh + 3*rho_ha/h_a;
778 f32 new_h = h_a - f_iter/df_iter;
781 if(new_h < h_a*h_max_evol_m) new_h = h_max_evol_m*h_a;
782 if(new_h > h_a*h_max_evol_p) new_h = h_max_evol_p*h_a;
785 f32 ha_0 = acc_hpart[id_a];
788 if (new_h < ha_0*h_max_tot_max_evol) {
790 eps[id_a] = sycl::fabs(new_h - h_a)/ha_0;
792 h_new[id_a] = ha_0*h_max_tot_max_evol;
815 static void _sycl_h_iter_omega(
826 Radix_Tree<morton_prec, sycl::vec<flt,3>> & radix_t,
828 PatchDataBuffer & pdat_buf_merge,
829 sycl::buffer<flt> & hnew,
830 sycl::buffer<flt> & omega,
831 sycl::buffer<flt> & eps_h
835 inline static void _sycl_h_iter_omega<f32>(
846 Radix_Tree<morton_prec, f32_3> & radix_t,
848 PatchDataBuffer & pdat_buf_merge,
849 sycl::buffer<f32> & hnew,
850 sycl::buffer<f32> & omega,
851 sycl::buffer<f32> & eps_h
856 sycl::range range_npart{or_element_cnt};
859 queue.submit([&](sycl::handler &cgh) {
861 auto h_new = hnew.get_access<sycl::access::mode::read_write>(cgh);
862 auto omga = omega.get_access<sycl::access::mode::discard_write>(cgh);
864 auto r = pdat_buf_merge.fields_f32_3.at(ixyz)->get_access<sycl::access::mode::read>(cgh);
867 Rta tree_acc(radix_t, cgh);
871 auto cell_int_r =radix_t.buf_cell_interact_rad->template get_access<sycl::access::mode::read>(cgh);
873 const f32 part_mass = gpart_mass;
875 const f32 h_max_tot_max_evol = htol_up_tol;
876 const f32 h_max_evol_p = htol_up_tol;
877 const f32 h_max_evol_m = 1/htol_up_tol;
880 u32 id_a = (
u32)item.get_id(0);
882 f32_3 xyz_a = r[id_a];
884 f32 h_a = h_new[id_a];
887 f32_3 inter_box_a_min = xyz_a - h_a * Kernel::Rkern;
888 f32_3 inter_box_a_max = xyz_a + h_a * Kernel::Rkern;
891 f32 part_omega_sum = 0;
895 [&tree_acc,&xyz_a,&inter_box_a_min,&inter_box_a_max,&cell_int_r](
u32 node_id) {
896 f32_3 cur_pos_min_cell_b = tree_acc.pos_min_cell[node_id];
897 f32_3 cur_pos_max_cell_b = tree_acc.pos_max_cell[node_id];
898 float int_r_max_cell = cell_int_r[node_id] * Kernel::Rkern;
900 using namespace walker::interaction_crit;
902 return sph_radix_cell_crit(xyz_a, inter_box_a_min, inter_box_a_max, cur_pos_min_cell_b,
903 cur_pos_max_cell_b, int_r_max_cell);
905 [&r,&xyz_a,&h_a,&rho_sum,&part_mass,&part_omega_sum](
u32 id_b) {
907 f32 rab = sycl::distance( xyz_a , r[id_b]);
909 if(rab > h_a*Kernel::Rkern)
return;
913 rho_sum += part_mass*Kernel::W(rab,h_a);
914 part_omega_sum += part_mass * Kernel::dhW(rab,h_a);
921 f32 rho_ha = rho_h(part_mass, h_a);
922 omga[id_a] = 1 + (h_a/(3*rho_ha))*part_omega_sum;