29template<
class Tvec,
class Tgr
idVec>
35 using namespace shamrock::patch;
39 const u32 ndust = solver_config.dust_config.ndust;
43 = utility.make_compute_field<Tscal>(
"rho_next_bf_drag", AMRBlock::block_size);
45 = utility.make_compute_field<Tvec>(
"rhov_next_bf_drag", AMRBlock::block_size);
47 = utility.make_compute_field<Tscal>(
"rhoe_next_bf_drag", AMRBlock::block_size);
49 = utility.make_compute_field<Tscal>(
"rho_d_next_bf_drag", ndust * AMRBlock::block_size);
51 = utility.make_compute_field<Tvec>(
"rhov_d_next_bf_drag", ndust * AMRBlock::block_size);
72 scheduler().for_each_patchdata_nonempty([&, dt, ndust](
76 "[AMR evolve time step before drag ]",
"evolve field with no drag patch", p.id_patch);
99 u32 cell_count = pdat.get_obj_cnt() * AMRBlock::block_size;
107 auto rhov = buf_rhov.get_read_access(depend_list);
108 auto rhoe = buf_rhoe.get_read_access(depend_list);
110 auto acc_rho = rho_patch.get_write_access(depend_list);
114 auto e1 = q.
submit(depend_list, [&, dt](sycl::handler &cgh) {
115 shambase::parallel_for(cgh, cell_count,
"evolve field with no drag", [=](
u32 id_a) {
116 acc_rho[id_a] = rho[id_a] + dt * acc_dt_rho_patch[id_a];
117 acc_rhov[id_a] = rhov[id_a] + dt * acc_dt_rhov_patch[id_a];
118 acc_rhoe[id_a] = rhoe[id_a] + dt * acc_dt_rhoe_patch[id_a];
127 buf_rhov.complete_event_state(e1);
128 buf_rhoe.complete_event_state(e1);
130 rho_patch.complete_event_state(e1);
135 auto acc_dt_rho_d_patch = dt_rho_d_patch.
get_read_access(depend_list1);
136 auto acc_dt_rhov_d_patch = dt_rhov_d_patch.
get_read_access(depend_list1);
138 auto rho_d = buf_rho_d.get_read_access(depend_list1);
139 auto rhov_d = buf_rhov_d.get_read_access(depend_list1);
144 auto e2 = q.
submit(depend_list1, [&, dt, ndust](sycl::handler &cgh) {
145 shambase::parallel_for(
146 cgh, ndust * cell_count,
"dust evolve field no drag", [=](
u32 id_a) {
147 acc_rho_d[id_a] = rho_d[id_a] + dt * acc_dt_rho_d_patch[id_a];
148 acc_rhov_d[id_a] = rhov_d[id_a] + dt * acc_dt_rhov_d_patch[id_a];
155 buf_rho_d.complete_event_state(e2);
156 buf_rhov_d.complete_event_state(e2);
162 storage.rho_next_no_drag.set(std::move(cfield_rho_next_bf_drag));
163 storage.rhov_next_no_drag.set(std::move(cfield_rhov_next_bf_drag));
164 storage.rhoe_next_no_drag.set(std::move(cfield_rhoe_next_bf_drag));
165 storage.rho_d_next_no_drag.set(std::move(cfield_rho_d_next_bf_drag));
166 storage.rhov_d_next_no_drag.set(std::move(cfield_rhov_d_next_bf_drag));
169template<
class Tvec,
class Tgr
idVec>
174 using namespace shamrock::patch;
195 const u32 ndust = solver_config.dust_config.ndust;
197 auto alphas_vector = solver_config.drag_config.alphas;
198 std::vector<Tscal> inv_dt_alphas(ndust);
199 bool enable_frictional_heating = solver_config.drag_config.enable_frictional_heating;
200 u32 friction_control = (enable_frictional_heating ==
false) ? 1 : 0;
202 scheduler().for_each_patchdata_nonempty([&, dt, ndust, friction_control](
205 shamlog_debug_ln(
"[AMR enable drag ]",
"irk1 drag patch", p.id_patch);
209 u32 cell_count = pdat.get_obj_cnt() * AMRBlock::block_size;
225 alphas_buf.copy_from_stdvec(alphas_vector);
231 auto acc_rho_d_new_patch = rho_d_new_patch.
get_read_access(depend_list);
232 auto acc_rhov_d_new_patch = rhov_d_new_patch.
get_read_access(depend_list);
235 auto acc_rhov_old = rhov_old.get_write_access(depend_list);
236 auto acc_rhoe_old = rhoe_old.get_write_access(depend_list);
237 auto acc_rho_d_old = rho_d_old.get_write_access(depend_list);
238 auto acc_rhov_d_old = rhov_d_old.get_write_access(depend_list);
240 auto acc_alphas = alphas_buf.get_read_access(depend_list);
242 auto e = q.
submit(depend_list, [&, dt, ndust, friction_control](sycl::handler &cgh) {
243 shambase::parallel_for(cgh, cell_count,
"add_drag [irk1]", [=](
u32 id_a) {
244 Tvec tmp_mom_1 = acc_rhov_new_patch[id_a];
245 Tscal tmp_rho = acc_rho_old[id_a];
247 for (
u32 i = 0; i < ndust; i++) {
248 const Tscal inv_dt_alphas = 1.0 / (1.0 + acc_alphas[i] * dt);
249 const Tscal dt_alphas = dt * acc_alphas[i];
253 + dt_alphas * inv_dt_alphas * acc_rhov_d_new_patch[id_a * ndust + i];
254 tmp_rho = tmp_rho + dt_alphas * inv_dt_alphas * acc_rho_d_old[id_a * ndust + i];
257 Tscal tmp_inv_rho = 1.0 / tmp_rho;
258 Tvec tmp_vel = tmp_inv_rho * tmp_mom_1;
261 Tscal inv_rho_g = 1.0 / acc_rho_new_patch[id_a];
262 Tvec vg_bf = inv_rho_g * acc_rhov_new_patch[id_a];
263 Tvec vg_af = inv_rho_g * acc_rho_old[id_a] * tmp_vel;
267 * ((acc_rho_old[id_a] * tmp_vel[0] - acc_rhov_new_patch[id_a][0])
268 * (vg_bf[0] + vg_af[0])
269 + (acc_rho_old[id_a] * tmp_vel[1] - acc_rhov_new_patch[id_a][1])
270 * (vg_bf[1] + vg_af[1])
271 + (acc_rho_old[id_a] * tmp_vel[2] - acc_rhov_new_patch[id_a][2])
272 * (vg_bf[2] + vg_af[2]));
273 Tscal dissipation = 0.0;
274 for (
u32 i = 0; i < ndust; i++) {
275 const Tscal inv_dt_alphas = 1.0 / (1.0 + acc_alphas[i] * dt);
276 const Tscal dt_alphas = dt * acc_alphas[i];
277 Tscal inv_rho_d = 1.0 / acc_rho_d_new_patch[id_a * ndust + i];
278 Tvec vd_bf = inv_rho_d * acc_rhov_d_new_patch[id_a * ndust + i];
279 Tvec vd_af = inv_rho_d * inv_dt_alphas
280 * (acc_rhov_d_new_patch[id_a * ndust + i]
281 + dt_alphas * acc_rho_d_old[id_a * ndust + i] * tmp_vel);
282 dissipation += 0.5 * dt_alphas * inv_dt_alphas
283 * ((acc_rho_d_old[id_a * ndust + i] * tmp_vel[0]
284 - acc_rhov_d_new_patch[id_a * ndust + i][0])
285 * (vd_af[0] + vd_bf[0])
286 + (acc_rho_d_old[id_a * ndust + i] * tmp_vel[1]
287 - acc_rhov_d_new_patch[id_a * ndust + i][1])
288 * (vd_af[1] + vd_bf[1])
289 + (acc_rho_d_old[id_a * ndust + i] * tmp_vel[2]
290 - acc_rhov_d_new_patch[id_a * ndust + i][2])
291 * (vd_af[2] + vd_bf[2]));
294 Eg += acc_rhoe_new_patch[id_a] + (1 - friction_control) * work_drag
295 - friction_control * dissipation;
296 acc_rhov_old[id_a] = tmp_vel * acc_rho_old[id_a];
297 acc_rhoe_old[id_a] = Eg;
298 acc_rho_old[id_a] = acc_rho_new_patch[id_a];
299 for (
u32 i = 0; i < ndust; i++) {
300 const Tscal inv_dt_alphas = 1.0 / (1.0 + acc_alphas[i] * dt);
301 const Tscal dt_alphas = dt * acc_alphas[i];
302 acc_rhov_d_old[id_a * ndust + i]
304 * (acc_rhov_d_new_patch[id_a * ndust + i]
305 + dt_alphas * acc_rho_d_old[id_a * ndust + i] * tmp_vel);
306 acc_rho_d_old[id_a * ndust + i] = acc_rho_d_new_patch[id_a * ndust + i];
318 rhov_old.complete_event_state(e);
319 rhoe_old.complete_event_state(e);
320 rho_d_old.complete_event_state(e);
321 rhov_d_old.complete_event_state(e);
323 alphas_buf.complete_event_state(e);
327template<
class Tvec,
class Tgr
idVec>
332 using namespace shamrock::patch;
353 const u32 ndust = solver_config.dust_config.ndust;
356 auto alphas_vector = solver_config.drag_config.alphas;
357 std::vector<Tscal> inv_dt_alphas(ndust);
358 bool enable_frictional_heating = solver_config.drag_config.enable_frictional_heating;
359 u32 friction_control = (enable_frictional_heating ==
false) ? 1 : 0;
361 scheduler().for_each_patchdata_nonempty([&, dt, ndust, friction_control](
364 shamlog_debug_ln(
"[Ramses]",
"expo drag on patch", p.id_patch);
368 u32 cell_count = pdat.get_obj_cnt() * AMRBlock::block_size;
384 alphas_buf.copy_from_stdvec(alphas_vector);
390 auto acc_rho_d_new_patch = rho_d_new_patch.
get_read_access(depend_list);
391 auto acc_rhov_d_new_patch = rhov_d_new_patch.
get_read_access(depend_list);
394 auto acc_rhov_old = rhov_old.get_write_access(depend_list);
395 auto acc_rhoe_old = rhoe_old.get_write_access(depend_list);
396 auto acc_rho_d_old = rho_d_old.get_write_access(depend_list);
397 auto acc_rhov_d_old = rhov_d_old.get_write_access(depend_list);
399 auto acc_alphas = alphas_buf.get_read_access(depend_list);
401 size_t mat_size = ndust + 1;
402 size_t mat_size_squared = mat_size * mat_size;
405 size_t loc_acc_size = mat_size_squared * group_size;
407 size_t loc_mem_size = 5 *
sizeof(Tscal) * loc_acc_size;
409 if (group_size < 8) {
411 5 * mat_size_squared * cell_count, shamsys::instance::get_compute_scheduler_ptr());
412 Tscal *exp_scratch_ptr_base = scratch_expo.get_write_access(depend_list);
413 auto e = q.
submit(depend_list, [&, dt, ndust, friction_control](sycl::handler &cgh) {
414 shambase::parallel_for(
415 cgh, cell_count,
"add_drag [expo-global-mem]", [=](
u32 id_a) {
421 std::extents<size_t, std::dynamic_extent, std::dynamic_extent>>
423 mat_set_nul<Tscal>(jacobian);
425 for (
auto j = 1; j < jacobian.extent(1); j++)
426 jacobian(0, j) = acc_alphas[j - 1];
428 for (
auto i = 1; i < jacobian.extent(0); i++) {
429 jacobian(i, 0) = acc_alphas[i - 1]
430 * (acc_rho_d_new_patch[
id * ndust + (i - 1)]
431 / acc_rho_new_patch[
id]);
432 jacobian(0, 0) -= jacobian(i, 0);
435 for (
auto i = 1; i < jacobian.extent(0); i++)
436 jacobian(i, i) = -acc_alphas[i - 1];
440 for (
auto i = 0; i < ndust; i++) {
442 + (acc_rho_d_new_patch[id_a * ndust + i]
443 / acc_rho_new_patch[id_a]))
446 mu *= (-dt / (ndust + 1));
449 Tscal *ptr_A = exp_scratch_ptr_base + (id_a * 5 * mat_size_squared);
450 Tscal *ptr_B = exp_scratch_ptr_base + (id_a * 5 * mat_size_squared)
452 Tscal *ptr_F = exp_scratch_ptr_base + (id_a * 5 * mat_size_squared)
453 + 2 * mat_size_squared;
454 Tscal *ptr_I = exp_scratch_ptr_base + (id_a * 5 * mat_size_squared)
455 + 3 * mat_size_squared;
456 Tscal *ptr_Id = exp_scratch_ptr_base + (id_a * 5 * mat_size_squared)
457 + 4 * mat_size_squared;
462 std::extents<size_t, std::dynamic_extent, std::dynamic_extent>>
463 mdspan_A(ptr_A, mat_size, mat_size);
466 std::extents<size_t, std::dynamic_extent, std::dynamic_extent>>
467 mdspan_B(ptr_B, mat_size, mat_size);
470 std::extents<size_t, std::dynamic_extent, std::dynamic_extent>>
471 mdspan_F(ptr_F, mat_size, mat_size);
474 std::extents<size_t, std::dynamic_extent, std::dynamic_extent>>
475 mdspan_I(ptr_I, mat_size, mat_size);
478 std::extents<size_t, std::dynamic_extent, std::dynamic_extent>>
479 mdspan_Id(ptr_Id, mat_size, mat_size);
481 get_jacobian(id_a, mdspan_A);
484 shammath::mat_set_identity<Tscal>(mdspan_Id);
485 shammath::mat_axpy_beta<Tscal, Tscal>(-mu, mdspan_Id, dt, mdspan_A);
489 shammath::mat_exp<Tscal, Tscal>(
490 K_exp, mdspan_A, mdspan_F, mdspan_B, mdspan_I, mdspan_Id, ndust + 1);
493 shammath::mat_mul_scalar<Tscal>(mdspan_A, sycl::exp(mu));
496 Tvec r = {0., 0., 0.}, dd = {0., 0., 0.};
497 r += mdspan_A(0, 0) * acc_rhov_new_patch[id_a];
499 for (
auto j = 1; j < ndust + 1; j++) {
500 r += mdspan_A(0, j) * acc_rhov_d_new_patch[id_a * ndust + (j - 1)];
503 dd = r - acc_rhov_new_patch[id_a];
505 Tscal dissipation = 0, drag_work = 0;
508 Tscal inv_rho = 1.0 / (acc_rho_new_patch[id_a]);
510 Tvec v_bf = inv_rho * acc_rhov_new_patch[id_a];
511 Tvec v_af = inv_rho * r;
514 * (dd[0] * (v_bf[0] + v_af[0]) + dd[1] * (v_bf[1] + v_af[1])
515 + dd[2] * (v_bf[2] + v_af[2]));
518 acc_rhov_old[id_a] = r;
519 acc_rho_old[id_a] = acc_rho_new_patch[id_a];
521 for (
auto d_id = 1; d_id <= ndust; d_id++) {
523 r += mdspan_A(d_id, 0) * acc_rhov_new_patch[id_a];
525 for (
auto j = 1; j <= ndust; j++) {
527 r += mdspan_A(d_id, j)
528 * acc_rhov_d_new_patch[id_a * ndust + (j - 1)];
531 dd = r - acc_rhov_d_new_patch[id_a * ndust + (d_id - 1)];
533 inv_rho = 1.0 / (acc_rho_d_new_patch[id_a * ndust + (d_id - 1)]);
535 v_bf = inv_rho * acc_rhov_d_new_patch[id_a * ndust + (d_id - 1)];
542 * (dd[0] * (v_bf[0] + v_af[0]) + dd[1] * (v_bf[1] + v_af[1])
543 + dd[2] * (v_bf[2] + v_af[2]));
546 acc_rhov_d_old[id_a * ndust + (d_id - 1)] = r;
547 acc_rho_d_old[id_a * ndust + (d_id - 1)]
548 = acc_rho_d_new_patch[id_a * ndust + (d_id - 1)];
552 acc_rhoe_old[id_a] = acc_rhoe_new_patch[id_a]
553 + (1 - friction_control) * drag_work
554 - friction_control * dissipation;
565 rhov_old.complete_event_state(e);
566 rhoe_old.complete_event_state(e);
567 rho_d_old.complete_event_state(e);
568 rhov_d_old.complete_event_state(e);
570 alphas_buf.complete_event_state(e);
571 scratch_expo.complete_event_state(e);
577 "not enough local memory for expo drag integrator:\n"
578 "loc_mem_size: {} > max_local_mem: {}\n"
589 auto e = q.
submit(depend_list, [&, dt, ndust, friction_control](sycl::handler &cgh) {
591 sycl::local_accessor<Tscal> local_A(loc_acc_size, cgh);
592 sycl::local_accessor<Tscal> local_B(loc_acc_size, cgh);
593 sycl::local_accessor<Tscal> local_F(loc_acc_size, cgh);
594 sycl::local_accessor<Tscal> local_I(loc_acc_size, cgh);
595 sycl::local_accessor<Tscal> local_Id(loc_acc_size, cgh);
597 logger::debug_sycl_ln(
598 "SYCL", shambase::format(
"parallel_for add_drag [expo-shared-mem]"));
601 u32 loc_id =
id.get_local_id();
602 u32 id_a =
id.get_global_id();
603 if (id_a >= cell_count)
611 std::extents<size_t, std::dynamic_extent, std::dynamic_extent>>
613 mat_set_nul<Tscal>(jacobian);
615 for (
auto j = 1; j < jacobian.extent(1); j++)
616 jacobian(0, j) = acc_alphas[j - 1];
618 for (
auto i = 1; i < jacobian.extent(0); i++) {
619 jacobian(i, 0) = acc_alphas[i - 1]
620 * (acc_rho_d_new_patch[
id * ndust + (i - 1)]
621 / acc_rho_new_patch[
id]);
622 jacobian(0, 0) -= jacobian(i, 0);
625 for (
auto i = 1; i < jacobian.extent(0); i++)
626 jacobian(i, i) = -acc_alphas[i - 1];
631 for (
auto i = 0; i < ndust; i++) {
633 + (acc_rho_d_new_patch[id_a * ndust + i]
634 / acc_rho_new_patch[id_a]))
637 mu *= (-dt / (ndust + 1));
640 Tscal *ptr_loc_A = &(local_A[0]) + mat_size_squared * loc_id;
641 Tscal *ptr_loc_B = &(local_B[0]) + mat_size_squared * loc_id;
642 Tscal *ptr_loc_F = &(local_F[0]) + mat_size_squared * loc_id;
643 Tscal *ptr_loc_I = &(local_I[0]) + mat_size_squared * loc_id;
644 Tscal *ptr_loc_Id = &(local_Id[0]) + mat_size_squared * loc_id;
649 std::extents<size_t, std::dynamic_extent, std::dynamic_extent>>
650 mdspan_A(ptr_loc_A, mat_size, mat_size);
653 std::extents<size_t, std::dynamic_extent, std::dynamic_extent>>
654 mdspan_B(ptr_loc_B, mat_size, mat_size);
657 std::extents<size_t, std::dynamic_extent, std::dynamic_extent>>
658 mdspan_F(ptr_loc_F, mat_size, mat_size);
661 std::extents<size_t, std::dynamic_extent, std::dynamic_extent>>
662 mdspan_I(ptr_loc_I, mat_size, mat_size);
665 std::extents<size_t, std::dynamic_extent, std::dynamic_extent>>
666 mdspan_Id(ptr_loc_Id, mat_size, mat_size);
670 get_jacobian(id_a, mdspan_A);
673 shammath::mat_set_identity<Tscal>(mdspan_Id);
674 shammath::mat_axpy_beta<Tscal, Tscal>(-mu, mdspan_Id, dt, mdspan_A);
678 shammath::mat_exp<Tscal, Tscal>(
679 K_exp, mdspan_A, mdspan_F, mdspan_B, mdspan_I, mdspan_Id, ndust + 1);
682 shammath::mat_mul_scalar<Tscal>(mdspan_A, sycl::exp(mu));
685 Tvec r = {0., 0., 0.}, dd = {0., 0., 0.};
686 r += mdspan_A(0, 0) * acc_rhov_new_patch[id_a];
688 for (
auto j = 1; j < ndust + 1; j++) {
689 r += mdspan_A(0, j) * acc_rhov_d_new_patch[id_a * ndust + (j - 1)];
692 dd = r - acc_rhov_new_patch[id_a];
694 Tscal dissipation = 0, drag_work = 0;
697 Tscal inv_rho = 1.0 / (acc_rho_new_patch[id_a]);
699 Tvec v_bf = inv_rho * acc_rhov_new_patch[id_a];
700 Tvec v_af = inv_rho * r;
703 * (dd[0] * (v_bf[0] + v_af[0]) + dd[1] * (v_bf[1] + v_af[1])
704 + dd[2] * (v_bf[2] + v_af[2]));
707 acc_rhov_old[id_a] = r;
708 acc_rho_old[id_a] = acc_rho_new_patch[id_a];
710 for (
auto d_id = 1; d_id <= ndust; d_id++) {
712 r += mdspan_A(d_id, 0) * acc_rhov_new_patch[id_a];
714 for (
auto j = 1; j <= ndust; j++) {
716 r += mdspan_A(d_id, j)
717 * acc_rhov_d_new_patch[id_a * ndust + (j - 1)];
720 dd = r - acc_rhov_d_new_patch[id_a * ndust + (d_id - 1)];
722 inv_rho = 1.0 / (acc_rho_d_new_patch[id_a * ndust + (d_id - 1)]);
724 v_bf = inv_rho * acc_rhov_d_new_patch[id_a * ndust + (d_id - 1)];
731 * (dd[0] * (v_bf[0] + v_af[0]) + dd[1] * (v_bf[1] + v_af[1])
732 + dd[2] * (v_bf[2] + v_af[2]));
735 acc_rhov_d_old[id_a * ndust + (d_id - 1)] = r;
736 acc_rho_d_old[id_a * ndust + (d_id - 1)]
737 = acc_rho_d_new_patch[id_a * ndust + (d_id - 1)];
741 acc_rhoe_old[id_a] = acc_rhoe_new_patch[id_a]
742 + (1 - friction_control) * drag_work
743 - friction_control * dissipation;
754 rhov_old.complete_event_state(e);
755 rhoe_old.complete_event_state(e);
756 rho_d_old.complete_event_state(e);
757 rhov_d_old.complete_event_state(e);
759 alphas_buf.complete_event_state(e);
Header file describing a Node Instance.
std::uint32_t u32
32 bit unsigned integer
std::int32_t i32
32 bit integer
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
DeviceProperties & get_device_prop()
Retrieves the properties of the associated device.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
u32 get_field_idx(const std::string &field_name) const
Get the field id if matching name & type.
PatchDataLayer container class, the layout is described in patchdata_layout.
This header file contains utility functions related to exception handling in the code.
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
sycl::nd_range< 1 > make_range(u32 length, const u32 group_size=32)
Generate a sycl nd range out of a group size and length.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
namespace for math utility
namespace for the main framework
usize local_mem_size
The amount of shared local memory on the device in bytes.
Patch object that contain generic patch information.