Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
ValueLoader.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
20
22//
24template<class Tvec, class TgridVec, class T>
26 u32 nobj, u32 nvar, sham::DeviceBuffer<T> &buf_src, sham::DeviceBuffer<T> &buf_dest) {
27
28 StackEntry stack_loc{};
29 using Block = typename Config::AMRBlock;
30
31 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
32
33 sham::EventList depends_list;
34 auto val_out = buf_dest.get_write_access(depends_list);
35 auto src = buf_src.get_read_access(depends_list);
36
37 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
38 shambase::parallel_for(cgh, nobj * Block::block_size, "compute xm val (1)", [=](u64 id_a) {
39 const u32 base_idx = id_a;
40 const u32 lid = id_a % Block::block_size;
41
42 static_assert(dim == 3, "implemented only in dim 3");
43 std::array<u32, 3> lid_coord = Block::get_coord(lid);
44
45 if (lid_coord[0] > 0) {
46 lid_coord[0] -= 1;
47 val_out[base_idx] = src[base_idx - lid + Block::get_index(lid_coord)];
48 }
49 });
50 });
51
52 buf_dest.complete_event_state(e);
53 buf_src.complete_event_state(e);
54}
55
56template<class Tvec, class TgridVec, class T>
58 u32 nobj, u32 nvar, sham::DeviceBuffer<T> &buf_src, sham::DeviceBuffer<T> &buf_dest) {
59
60 StackEntry stack_loc{};
61 using Block = typename Config::AMRBlock;
62
63 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
64
65 sham::EventList depends_list;
66 auto val_out = buf_dest.get_write_access(depends_list);
67 auto src = buf_src.get_read_access(depends_list);
68
69 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
70 shambase::parallel_for(cgh, nobj * Block::block_size, "compute xp val (1)", [=](u64 id_a) {
71 const u32 base_idx = id_a;
72 const u32 lid = id_a % Block::block_size;
73
74 static_assert(dim == 3, "implemented only in dim 3");
75 std::array<u32, 3> lid_coord = Block::get_coord(lid);
76
77 if (lid_coord[0] < Block::Nside - 1) {
78 lid_coord[0] += 1;
79 val_out[base_idx] = src[base_idx - lid + Block::get_index(lid_coord)];
80 }
81 });
82 });
83
84 buf_dest.complete_event_state(e);
85 buf_src.complete_event_state(e);
86}
87
88template<class Tvec, class TgridVec, class T>
90 u32 nobj, u32 nvar, sham::DeviceBuffer<T> &buf_src, sham::DeviceBuffer<T> &buf_dest) {
91
92 StackEntry stack_loc{};
93 using Block = typename Config::AMRBlock;
94
95 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
96
97 sham::EventList depends_list;
98 auto val_out = buf_dest.get_write_access(depends_list);
99 auto src = buf_src.get_read_access(depends_list);
100
101 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
102 shambase::parallel_for(cgh, nobj * Block::block_size, "compute ym val (1)", [=](u64 id_a) {
103 const u32 base_idx = id_a;
104 const u32 lid = id_a % Block::block_size;
105
106 static_assert(dim == 3, "implemented only in dim 3");
107 std::array<u32, 3> lid_coord = Block::get_coord(lid);
108
109 if (lid_coord[1] > 0) {
110 lid_coord[1] -= 1;
111 val_out[base_idx] = src[base_idx - lid + Block::get_index(lid_coord)];
112 }
113 });
114 });
115
116 buf_dest.complete_event_state(e);
117 buf_src.complete_event_state(e);
118}
119
120template<class Tvec, class TgridVec, class T>
122 u32 nobj, u32 nvar, sham::DeviceBuffer<T> &buf_src, sham::DeviceBuffer<T> &buf_dest) {
123
124 StackEntry stack_loc{};
125 using Block = typename Config::AMRBlock;
126
127 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
128
129 sham::EventList depends_list;
130 auto val_out = buf_dest.get_write_access(depends_list);
131 auto src = buf_src.get_read_access(depends_list);
132
133 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
134 shambase::parallel_for(cgh, nobj * Block::block_size, "compute yp val (1)", [=](u64 id_a) {
135 const u32 base_idx = id_a;
136 const u32 lid = id_a % Block::block_size;
137
138 static_assert(dim == 3, "implemented only in dim 3");
139 std::array<u32, 3> lid_coord = Block::get_coord(lid);
140
141 if (lid_coord[1] < Block::Nside - 1) {
142 lid_coord[1] += 1;
143 val_out[base_idx] = src[base_idx - lid + Block::get_index(lid_coord)];
144 }
145 });
146 });
147
148 buf_dest.complete_event_state(e);
149 buf_src.complete_event_state(e);
150}
151
152template<class Tvec, class TgridVec, class T>
154 u32 nobj, u32 nvar, sham::DeviceBuffer<T> &buf_src, sham::DeviceBuffer<T> &buf_dest) {
155
156 StackEntry stack_loc{};
157 using Block = typename Config::AMRBlock;
158
159 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
160
161 sham::EventList depends_list;
162 auto val_out = buf_dest.get_write_access(depends_list);
163 auto src = buf_src.get_read_access(depends_list);
164
165 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
166 shambase::parallel_for(cgh, nobj * Block::block_size, "compute ym val (1)", [=](u64 id_a) {
167 const u32 base_idx = id_a;
168 const u32 lid = id_a % Block::block_size;
169
170 static_assert(dim == 3, "implemented only in dim 3");
171 std::array<u32, 3> lid_coord = Block::get_coord(lid);
172
173 if (lid_coord[2] > 0) {
174 lid_coord[2] -= 1;
175 val_out[base_idx] = src[base_idx - lid + Block::get_index(lid_coord)];
176 }
177 });
178 });
179
180 buf_dest.complete_event_state(e);
181 buf_src.complete_event_state(e);
182}
183
184template<class Tvec, class TgridVec, class T>
186 u32 nobj, u32 nvar, sham::DeviceBuffer<T> &buf_src, sham::DeviceBuffer<T> &buf_dest) {
187
188 StackEntry stack_loc{};
189 using Block = typename Config::AMRBlock;
190
191 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
192
193 sham::EventList depends_list;
194 auto val_out = buf_dest.get_write_access(depends_list);
195 auto src = buf_src.get_read_access(depends_list);
196
197 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
198 shambase::parallel_for(cgh, nobj * Block::block_size, "compute ym val (1)", [=](u64 id_a) {
199 const u32 base_idx = id_a;
200 const u32 lid = id_a % Block::block_size;
201
202 static_assert(dim == 3, "implemented only in dim 3");
203 std::array<u32, 3> lid_coord = Block::get_coord(lid);
204
205 if (lid_coord[2] < Block::Nside - 1) {
206 lid_coord[2] += 1;
207 val_out[base_idx] = src[base_idx - lid + Block::get_index(lid_coord)];
208 }
209 });
210 });
211
212 buf_dest.complete_event_state(e);
213 buf_src.complete_event_state(e);
214}
215
216template<class Tvec, class TgridVec, class T>
218 std::array<Tgridscal, dim> offset,
219 u32 nobj,
220 u32 nvar,
221 sham::DeviceBuffer<T> &buf_src,
222 sham::DeviceBuffer<T> &buf_dest) {
223
224 StackEntry stack_loc{};
225 using Block = typename Config::AMRBlock;
226
227 if constexpr (dim == 3) {
228 if (offset[0] == -1 && offset[1] == 0 && offset[2] == 0) {
229
230 load_patch_internal_block_xm(nobj, nvar, buf_src, buf_dest);
231
232 } else if (offset[0] == 0 && offset[1] == -1 && offset[2] == 0) {
233
234 load_patch_internal_block_ym(nobj, nvar, buf_src, buf_dest);
235
236 } else if (offset[0] == 0 && offset[1] == 0 && offset[2] == -1) {
237
238 load_patch_internal_block_zm(nobj, nvar, buf_src, buf_dest);
239
240 } else if (offset[0] == 1 && offset[1] == 0 && offset[2] == 0) {
241
242 load_patch_internal_block_xp(nobj, nvar, buf_src, buf_dest);
243
244 } else if (offset[0] == 0 && offset[1] == 1 && offset[2] == 0) {
245
246 load_patch_internal_block_yp(nobj, nvar, buf_src, buf_dest);
247
248 } else if (offset[0] == 0 && offset[1] == 0 && offset[2] == 1) {
249
250 load_patch_internal_block_zp(nobj, nvar, buf_src, buf_dest);
251
252 } else {
254 "offset : ({},{},{}) is invalid", offset[0], offset[1], offset[2]));
255 }
256 } else {
258 }
259}
260
262//
264
265template<class Tvec, class TgridVec, class T>
267
268 std::array<Tgridscal, dim> offset,
269 sham::DeviceBuffer<TgridVec> &buf_cell_min,
270 sham::DeviceBuffer<TgridVec> &buf_cell_max,
272 u32 nobj,
273 u32 nvar,
274 sham::DeviceBuffer<T> &buf_src,
275 sham::DeviceBuffer<T> &buf_dest
276
277) {
278 StackEntry stack_loc{};
279
280 using Block = typename Config::AMRBlock;
281 using namespace shamrock;
282
283 OrientedNeighFaceList<Tvec> &face_xm = face_lists.xm();
284
285 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
286
287 sham::EventList depends_list;
288 auto val_out = buf_dest.get_write_access(depends_list);
289 auto src = buf_src.get_read_access(depends_list);
290 auto cell_min = buf_cell_min.get_read_access(depends_list);
291 auto cell_max = buf_cell_max.get_read_access(depends_list);
292
293 auto fptr = face_xm.neigh_info.get_read_access(depends_list);
294
295 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
296 tree::ObjectCacheIterator faces_xm(fptr);
297
298 shambase::parallel_for(cgh, nobj * Block::block_size, "compute xm val (2)", [=](u64 id_a) {
299 const u32 base_idx = id_a;
300 const u32 block_id = id_a / Block::block_size;
301 const u32 lid = id_a % Block::block_size;
302
303 std::array<u32, 3> lid_coord = Block::get_coord(lid);
304
305 if (lid_coord[0] == 0) {
306 auto tmp = cell_max[block_id] - cell_min[block_id];
307 i32 Va = tmp.x() * tmp.y() * tmp.z();
308
309 static_assert(dim == 3, "implemented only in dim 3");
310 faces_xm.for_each_object(block_id, [&](u32 block_id_b) {
311 auto tmp = cell_max[block_id_b] - cell_min[block_id_b];
312 i32 nV = tmp.x() * tmp.y() * tmp.z();
313
314 if (nV == Va) { // same level
315 val_out[base_idx] = src
316 [block_id_b * Block::block_size
317 + Block::get_index({Block::Nside - 1, lid_coord[1], lid_coord[2]})];
318 }
319 });
320 }
321 });
322 });
323
324 buf_dest.complete_event_state(e);
325 buf_src.complete_event_state(e);
326 buf_cell_min.complete_event_state(e);
327 buf_cell_max.complete_event_state(e);
328
329 sham::EventList resulting_events;
330 resulting_events.add_event(e);
331 face_xm.neigh_info.complete_event_state(resulting_events);
332}
333
334template<class Tvec, class TgridVec, class T>
336
337 std::array<Tgridscal, dim> offset,
338 sham::DeviceBuffer<TgridVec> &buf_cell_min,
339 sham::DeviceBuffer<TgridVec> &buf_cell_max,
341 u32 nobj,
342 u32 nvar,
343 sham::DeviceBuffer<T> &buf_src,
344 sham::DeviceBuffer<T> &buf_dest
345
346) {
347 StackEntry stack_loc{};
348
349 using Block = typename Config::AMRBlock;
350 using namespace shamrock;
351
352 OrientedNeighFaceList<Tvec> &face_xp = face_lists.xp();
353
354 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
355
356 sham::EventList depends_list;
357 auto val_out = buf_dest.get_write_access(depends_list);
358 auto src = buf_src.get_read_access(depends_list);
359 auto cell_min = buf_cell_min.get_read_access(depends_list);
360 auto cell_max = buf_cell_max.get_read_access(depends_list);
361
362 auto fptr = face_xp.neigh_info.get_read_access(depends_list);
363
364 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
365 tree::ObjectCacheIterator faces_xp(fptr);
366
367 shambase::parallel_for(cgh, nobj * Block::block_size, "compute xm val (2)", [=](u64 id_a) {
368 const u32 base_idx = id_a;
369 const u32 block_id = id_a / Block::block_size;
370 const u32 lid = id_a % Block::block_size;
371
372 std::array<u32, 3> lid_coord = Block::get_coord(lid);
373
374 if (lid_coord[0] == Block::Nside - 1) {
375 auto tmp = cell_max[block_id] - cell_min[block_id];
376 i32 Va = tmp.x() * tmp.y() * tmp.z();
377
378 static_assert(dim == 3, "implemented only in dim 3");
379 faces_xp.for_each_object(block_id, [&](u32 block_id_b) {
380 auto tmp = cell_max[block_id_b] - cell_min[block_id_b];
381 i32 nV = tmp.x() * tmp.y() * tmp.z();
382
383 if (nV == Va) { // same level
384 auto val = src
385 [block_id_b * Block::block_size
386 + Block::get_index({0, lid_coord[1], lid_coord[2]})];
387
388 // if constexpr (std::is_same_v<T, Tvec>){
389 // sycl::ext::oneapi::experimental::printf("%d %f %f %f\n",block_id_b *
390 // Block::block_size +
391 // Block::get_index({0, lid_coord[1],
392 // lid_coord[2]}),val.x(),val.y(),val.z());
393 // }
394
395 val_out[base_idx] = val;
396 }
397 });
398 }
399 });
400 });
401
402 buf_dest.complete_event_state(e);
403 buf_src.complete_event_state(e);
404 buf_cell_min.complete_event_state(e);
405 buf_cell_max.complete_event_state(e);
406
407 sham::EventList resulting_events;
408 resulting_events.add_event(e);
409 face_xp.neigh_info.complete_event_state(resulting_events);
410}
411
412template<class Tvec, class TgridVec, class T>
414
415 std::array<Tgridscal, dim> offset,
416 sham::DeviceBuffer<TgridVec> &buf_cell_min,
417 sham::DeviceBuffer<TgridVec> &buf_cell_max,
419 u32 nobj,
420 u32 nvar,
421 sham::DeviceBuffer<T> &buf_src,
422 sham::DeviceBuffer<T> &buf_dest
423
424) {
425 StackEntry stack_loc{};
426
427 using Block = typename Config::AMRBlock;
428 using namespace shamrock;
429
430 OrientedNeighFaceList<Tvec> &face_ym = face_lists.ym();
431
432 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
433
434 sham::EventList depends_list;
435 auto val_out = buf_dest.get_write_access(depends_list);
436 auto src = buf_src.get_read_access(depends_list);
437 auto cell_min = buf_cell_min.get_read_access(depends_list);
438 auto cell_max = buf_cell_max.get_read_access(depends_list);
439
440 auto fptr = face_ym.neigh_info.get_read_access(depends_list);
441
442 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
443 tree::ObjectCacheIterator faces_ym(fptr);
444
445 shambase::parallel_for(cgh, nobj * Block::block_size, "compute ym val (2)", [=](u64 id_a) {
446 const u32 base_idx = id_a;
447 const u32 block_id = id_a / Block::block_size;
448 const u32 lid = id_a % Block::block_size;
449
450 std::array<u32, 3> lid_coord = Block::get_coord(lid);
451
452 if (lid_coord[1] == 0) {
453 auto tmp = cell_max[block_id] - cell_min[block_id];
454 i32 Va = tmp.x() * tmp.y() * tmp.z();
455
456 static_assert(dim == 3, "implemented only in dim 3");
457 faces_ym.for_each_object(block_id, [&](u32 block_id_b) {
458 auto tmp = cell_max[block_id_b] - cell_min[block_id_b];
459 i32 nV = tmp.x() * tmp.y() * tmp.z();
460
461 if (nV == Va) { // same level
462 val_out[base_idx] = src
463 [block_id_b * Block::block_size
464 + Block::get_index({lid_coord[0], Block::Nside - 1, lid_coord[2]})];
465 }
466 });
467 }
468 });
469 });
470
471 buf_dest.complete_event_state(e);
472 buf_src.complete_event_state(e);
473 buf_cell_min.complete_event_state(e);
474 buf_cell_max.complete_event_state(e);
475
476 sham::EventList resulting_events;
477 resulting_events.add_event(e);
478 face_ym.neigh_info.complete_event_state(resulting_events);
479}
480
481template<class Tvec, class TgridVec, class T>
483
484 std::array<Tgridscal, dim> offset,
485 sham::DeviceBuffer<TgridVec> &buf_cell_min,
486 sham::DeviceBuffer<TgridVec> &buf_cell_max,
488 u32 nobj,
489 u32 nvar,
490 sham::DeviceBuffer<T> &buf_src,
491 sham::DeviceBuffer<T> &buf_dest
492
493) {
494 StackEntry stack_loc{};
495
496 using Block = typename Config::AMRBlock;
497 using namespace shamrock;
498
499 OrientedNeighFaceList<Tvec> &face_yp = face_lists.yp();
500
501 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
502
503 sham::EventList depends_list;
504 auto val_out = buf_dest.get_write_access(depends_list);
505 auto src = buf_src.get_read_access(depends_list);
506 auto cell_min = buf_cell_min.get_read_access(depends_list);
507 auto cell_max = buf_cell_max.get_read_access(depends_list);
508
509 auto fptr = face_yp.neigh_info.get_read_access(depends_list);
510
511 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
512 tree::ObjectCacheIterator faces_yp(fptr);
513
514 shambase::parallel_for(cgh, nobj * Block::block_size, "compute ym val (2)", [=](u64 id_a) {
515 const u32 base_idx = id_a;
516 const u32 block_id = id_a / Block::block_size;
517 const u32 lid = id_a % Block::block_size;
518
519 std::array<u32, 3> lid_coord = Block::get_coord(lid);
520
521 if (lid_coord[1] == Block::Nside - 1) {
522 auto tmp = cell_max[block_id] - cell_min[block_id];
523 i32 Va = tmp.x() * tmp.y() * tmp.z();
524
525 static_assert(dim == 3, "implemented only in dim 3");
526 faces_yp.for_each_object(block_id, [&](u32 block_id_b) {
527 auto tmp = cell_max[block_id_b] - cell_min[block_id_b];
528 i32 nV = tmp.x() * tmp.y() * tmp.z();
529
530 if (nV == Va) { // same level
531 val_out[base_idx] = src
532 [block_id_b * Block::block_size
533 + Block::get_index({lid_coord[0], 0, lid_coord[2]})];
534 }
535 });
536 }
537 });
538 });
539
540 buf_dest.complete_event_state(e);
541 buf_src.complete_event_state(e);
542 buf_cell_min.complete_event_state(e);
543 buf_cell_max.complete_event_state(e);
544
545 sham::EventList resulting_events;
546 resulting_events.add_event(e);
547 face_yp.neigh_info.complete_event_state(resulting_events);
548}
549
550template<class Tvec, class TgridVec, class T>
552
553 std::array<Tgridscal, dim> offset,
554 sham::DeviceBuffer<TgridVec> &buf_cell_min,
555 sham::DeviceBuffer<TgridVec> &buf_cell_max,
557 u32 nobj,
558 u32 nvar,
559 sham::DeviceBuffer<T> &buf_src,
560 sham::DeviceBuffer<T> &buf_dest
561
562) {
563 StackEntry stack_loc{};
564
565 using Block = typename Config::AMRBlock;
566 using namespace shamrock;
567
568 OrientedNeighFaceList<Tvec> &face_zm = face_lists.zm();
569
570 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
571
572 sham::EventList depends_list;
573 auto val_out = buf_dest.get_write_access(depends_list);
574 auto src = buf_src.get_read_access(depends_list);
575 auto cell_min = buf_cell_min.get_read_access(depends_list);
576 auto cell_max = buf_cell_max.get_read_access(depends_list);
577
578 auto fptr = face_zm.neigh_info.get_read_access(depends_list);
579
580 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
581 tree::ObjectCacheIterator faces_zm(fptr);
582
583 shambase::parallel_for(cgh, nobj * Block::block_size, "compute zm val (2)", [=](u64 id_a) {
584 const u32 base_idx = id_a;
585 const u32 block_id = id_a / Block::block_size;
586 const u32 lid = id_a % Block::block_size;
587
588 std::array<u32, 3> lid_coord = Block::get_coord(lid);
589
590 if (lid_coord[2] == 0) {
591 auto tmp = cell_max[block_id] - cell_min[block_id];
592 i32 Va = tmp.x() * tmp.y() * tmp.z();
593
594 static_assert(dim == 3, "implemented only in dim 3");
595 faces_zm.for_each_object(block_id, [&](u32 block_id_b) {
596 auto tmp = cell_max[block_id_b] - cell_min[block_id_b];
597 i32 nV = tmp.x() * tmp.y() * tmp.z();
598
599 if (nV == Va) { // same level
600 val_out[base_idx] = src
601 [block_id_b * Block::block_size
602 + Block::get_index({lid_coord[0], lid_coord[1], Block::Nside - 1})];
603 }
604 });
605 }
606 });
607 });
608
609 buf_dest.complete_event_state(e);
610 buf_src.complete_event_state(e);
611 buf_cell_min.complete_event_state(e);
612 buf_cell_max.complete_event_state(e);
613
614 sham::EventList resulting_events;
615 resulting_events.add_event(e);
616 face_zm.neigh_info.complete_event_state(resulting_events);
617}
618
619template<class Tvec, class TgridVec, class T>
621
622 std::array<Tgridscal, dim> offset,
623 sham::DeviceBuffer<TgridVec> &buf_cell_min,
624 sham::DeviceBuffer<TgridVec> &buf_cell_max,
626 u32 nobj,
627 u32 nvar,
628 sham::DeviceBuffer<T> &buf_src,
629 sham::DeviceBuffer<T> &buf_dest
630
631) {
632 StackEntry stack_loc{};
633
634 using Block = typename Config::AMRBlock;
635 using namespace shamrock;
636
637 OrientedNeighFaceList<Tvec> &face_zp = face_lists.zp();
638
639 sham::DeviceQueue &q = shamsys::instance::get_compute_scheduler().get_queue();
640
641 sham::EventList depends_list;
642 auto val_out = buf_dest.get_write_access(depends_list);
643 auto src = buf_src.get_read_access(depends_list);
644 auto cell_min = buf_cell_min.get_read_access(depends_list);
645 auto cell_max = buf_cell_max.get_read_access(depends_list);
646
647 auto fptr = face_zp.neigh_info.get_read_access(depends_list);
648
649 auto e = q.submit(depends_list, [&](sycl::handler &cgh) {
650 tree::ObjectCacheIterator faces_zp(fptr);
651
652 shambase::parallel_for(cgh, nobj * Block::block_size, "compute zm val (2)", [=](u64 id_a) {
653 const u32 base_idx = id_a;
654 const u32 block_id = id_a / Block::block_size;
655 const u32 lid = id_a % Block::block_size;
656
657 std::array<u32, 3> lid_coord = Block::get_coord(lid);
658
659 if (lid_coord[2] == Block::Nside - 1) {
660 auto tmp = cell_max[block_id] - cell_min[block_id];
661 i32 Va = tmp.x() * tmp.y() * tmp.z();
662
663 static_assert(dim == 3, "implemented only in dim 3");
664 faces_zp.for_each_object(block_id, [&](u32 block_id_b) {
665 auto tmp = cell_max[block_id_b] - cell_min[block_id_b];
666 i32 nV = tmp.x() * tmp.y() * tmp.z();
667
668 if (nV == Va) { // same level
669 val_out[base_idx] = src
670 [block_id_b * Block::block_size
671 + Block::get_index({lid_coord[0], lid_coord[1], 0})];
672 }
673 });
674 }
675 });
676 });
677
678 buf_dest.complete_event_state(e);
679 buf_src.complete_event_state(e);
680 buf_cell_min.complete_event_state(e);
681 buf_cell_max.complete_event_state(e);
682
683 sham::EventList resulting_events;
684 resulting_events.add_event(e);
685 face_zp.neigh_info.complete_event_state(resulting_events);
686}
687
688template<class Tvec, class TgridVec, class T>
690
691 std::array<Tgridscal, dim> offset,
692 sham::DeviceBuffer<TgridVec> &buf_cell_min,
693 sham::DeviceBuffer<TgridVec> &buf_cell_max,
695 u32 nobj,
696 u32 nvar,
697 sham::DeviceBuffer<T> &buf_src,
698 sham::DeviceBuffer<T> &buf_dest
699
700) {
701 StackEntry stack_loc{};
702 using Block = typename Config::AMRBlock;
703
704 using namespace shamrock::patch;
705 using namespace shamrock;
706 using namespace shammath;
707 using MergedPDat = shamrock::MergedPatchData;
708
709 if constexpr (dim == 3) {
710 if (offset[0] == -1 && offset[1] == 0 && offset[2] == 0) {
711
712 load_patch_neigh_same_level_xm(
713 offset, buf_cell_min, buf_cell_max, face_lists, nobj, nvar, buf_src, buf_dest);
714
715 } else if (offset[0] == 0 && offset[1] == -1 && offset[2] == 0) {
716
717 load_patch_neigh_same_level_ym(
718 offset, buf_cell_min, buf_cell_max, face_lists, nobj, nvar, buf_src, buf_dest);
719
720 } else if (offset[0] == 0 && offset[1] == 0 && offset[2] == -1) {
721
722 load_patch_neigh_same_level_zm(
723 offset, buf_cell_min, buf_cell_max, face_lists, nobj, nvar, buf_src, buf_dest);
724
725 } else if (offset[0] == 1 && offset[1] == 0 && offset[2] == 0) {
726
727 load_patch_neigh_same_level_xp(
728 offset, buf_cell_min, buf_cell_max, face_lists, nobj, nvar, buf_src, buf_dest);
729
730 } else if (offset[0] == 0 && offset[1] == 1 && offset[2] == 0) {
731
732 load_patch_neigh_same_level_yp(
733 offset, buf_cell_min, buf_cell_max, face_lists, nobj, nvar, buf_src, buf_dest);
734
735 } else if (offset[0] == 0 && offset[1] == 0 && offset[2] == 1) {
736
737 load_patch_neigh_same_level_zp(
738 offset, buf_cell_min, buf_cell_max, face_lists, nobj, nvar, buf_src, buf_dest);
739
740 } else {
742 "offset : ({},{},{}) is invalid", offset[0], offset[1], offset[2]));
743 }
744 } else {
746 }
747}
748
750//
752
753template<class Tvec, class TgridVec, class T>
755
756 std::array<Tgridscal, dim> offset,
757 sham::DeviceBuffer<TgridVec> &buf_cell_min,
758 sham::DeviceBuffer<TgridVec> &buf_cell_max,
760 u32 nobj,
761 u32 nvar,
762 sham::DeviceBuffer<T> &buf_src,
763 sham::DeviceBuffer<T> &buf_dest
764
765) {
766
767 StackEntry stack_loc{};
768 using Block = typename Config::AMRBlock;
769
770 using namespace shamrock::patch;
771 using namespace shamrock;
772 using namespace shammath;
773 using MergedPDat = shamrock::MergedPatchData;
774
775 if constexpr (dim == 3) {
776 if (offset[0] == -1 && offset[1] == 0 && offset[2] == 0) {
777
778 OrientedNeighFaceList<Tvec> &face_xm = face_lists.xm();
779
780 } else if (offset[0] == 0 && offset[1] == -1 && offset[2] == 0) {
781
782 OrientedNeighFaceList<Tvec> &face_ym = face_lists.ym();
783
784 } else if (offset[0] == 0 && offset[1] == 0 && offset[2] == -1) {
785
786 OrientedNeighFaceList<Tvec> &face_zm = face_lists.zm();
787
788 } else if (offset[0] == 1 && offset[1] == 0 && offset[2] == 0) {
789
790 OrientedNeighFaceList<Tvec> &face_xp = face_lists.xp();
791
792 } else if (offset[0] == 0 && offset[1] == 1 && offset[2] == 0) {
793
794 OrientedNeighFaceList<Tvec> &face_yp = face_lists.yp();
795
796 } else if (offset[0] == 0 && offset[1] == 0 && offset[2] == 1) {
797
798 OrientedNeighFaceList<Tvec> &face_zp = face_lists.zp();
799
800 } else {
802 "offset : ({},{},{}) is invalid", offset[0], offset[1], offset[2]));
803 }
804 } else {
806 }
807}
808
810//
812
813template<class Tvec, class TgridVec, class T>
815
816 std::array<Tgridscal, dim> offset,
817 sham::DeviceBuffer<TgridVec> &buf_cell_min,
818 sham::DeviceBuffer<TgridVec> &buf_cell_max,
820 u32 nobj,
821 u32 nvar,
822 sham::DeviceBuffer<T> &buf_src,
823 sham::DeviceBuffer<T> &buf_dest
824
825) {
826 StackEntry stack_loc{};
827 using Block = typename Config::AMRBlock;
828
829 using namespace shamrock::patch;
830 using namespace shamrock;
831 using namespace shammath;
832 using MergedPDat = shamrock::MergedPatchData;
833
834 if constexpr (dim == 3) {
835 if (offset[0] == -1 && offset[1] == 0 && offset[2] == 0) {
836
837 OrientedNeighFaceList<Tvec> &face_xm = face_lists.xm();
838
839 } else if (offset[0] == 0 && offset[1] == -1 && offset[2] == 0) {
840
841 OrientedNeighFaceList<Tvec> &face_ym = face_lists.ym();
842
843 } else if (offset[0] == 0 && offset[1] == 0 && offset[2] == -1) {
844
845 OrientedNeighFaceList<Tvec> &face_zm = face_lists.zm();
846
847 } else if (offset[0] == 1 && offset[1] == 0 && offset[2] == 0) {
848
849 OrientedNeighFaceList<Tvec> &face_xp = face_lists.xp();
850
851 } else if (offset[0] == 0 && offset[1] == 1 && offset[2] == 0) {
852
853 OrientedNeighFaceList<Tvec> &face_yp = face_lists.yp();
854
855 } else if (offset[0] == 0 && offset[1] == 0 && offset[2] == 1) {
856
857 OrientedNeighFaceList<Tvec> &face_zp = face_lists.zp();
858
859 } else {
861 "offset : ({},{},{}) is invalid", offset[0], offset[1], offset[2]));
862 }
863 } else {
865 }
866}
867
869//
871
872template<class Tvec, class TgridVec, class T>
875 std::string field_name, std::array<Tgridscal, dim> offset, std::string result_name) {
876
877 StackEntry stack_loc{};
878
879 using namespace shamrock::patch;
880 using namespace shamrock;
881 using namespace shammath;
882 using MergedPDat = shamrock::MergedPatchData;
883 using Flagger = FaceFlagger<Tvec, TgridVec>;
884 using Block = typename Config::AMRBlock;
885
886 shamrock::SchedulerUtility utility(scheduler());
888 = utility.make_compute_field<T>(result_name, Block::block_size, [&](u64 id) {
889 return storage.merged_patchdata_ghost.get().get(id).total_elements;
890 });
891
893 = shambase::get_check_ref(storage.ghost_layout.get());
894 u32 ifield = ghost_layout.get_field_idx<T>(field_name);
895 u32 nvar = ghost_layout.get_field<T>(ifield).nvar;
896
897 scheduler().for_each_patchdata_nonempty([&](Patch p, PatchDataLayer &pdat) {
898 MergedPDat &mpdat = storage.merged_patchdata_ghost.get().get(p.id_patch);
899
900 sham::DeviceBuffer<T> &buf_src = mpdat.pdat.get_field_buf_ref<T>(ifield);
901 sham::DeviceBuffer<T> &buf_dest = tmp.get_buf_check(p.id_patch);
902
903 load_patch_internal_block(offset, mpdat.total_elements, nvar, buf_src, buf_dest);
904 });
905
906 scheduler().for_each_patchdata_nonempty([&](Patch p, PatchDataLayer &pdat) {
907 MergedPDat &mpdat = storage.merged_patchdata_ghost.get().get(p.id_patch);
908
909 sham::DeviceBuffer<TgridVec> &buf_cell_min = mpdat.pdat.get_field_buf_ref<TgridVec>(0);
910 sham::DeviceBuffer<TgridVec> &buf_cell_max = mpdat.pdat.get_field_buf_ref<TgridVec>(1);
911
912 sham::DeviceBuffer<T> &buf_src = mpdat.pdat.get_field_buf_ref<T>(ifield);
913 sham::DeviceBuffer<T> &buf_dest = tmp.get_buf_check(p.id_patch);
914
916 = storage.face_lists.get().get(p.id_patch);
917
918 load_patch_neigh_same_level(
919 offset,
920 buf_cell_min,
921 buf_cell_max,
922 face_lists,
923 mpdat.total_elements,
924 nvar,
925 buf_src,
926 buf_dest);
927 });
928
929 scheduler().for_each_patchdata_nonempty([&](Patch p, PatchDataLayer &pdat) {
930 MergedPDat &mpdat = storage.merged_patchdata_ghost.get().get(p.id_patch);
931
932 sham::DeviceBuffer<TgridVec> &buf_cell_min = mpdat.pdat.get_field_buf_ref<TgridVec>(0);
933 sham::DeviceBuffer<TgridVec> &buf_cell_max = mpdat.pdat.get_field_buf_ref<TgridVec>(1);
934
935 sham::DeviceBuffer<T> &buf_src = mpdat.pdat.get_field_buf_ref<T>(ifield);
936 sham::DeviceBuffer<T> &buf_dest = tmp.get_buf_check(p.id_patch);
937
939 = storage.face_lists.get().get(p.id_patch);
940
941 load_patch_neigh_level_up(
942 offset,
943 buf_cell_min,
944 buf_cell_max,
945 face_lists,
946 mpdat.total_elements,
947 nvar,
948 buf_src,
949 buf_dest);
950 });
951
952 scheduler().for_each_patchdata_nonempty([&](Patch p, PatchDataLayer &pdat) {
953 MergedPDat &mpdat = storage.merged_patchdata_ghost.get().get(p.id_patch);
954
955 sham::DeviceBuffer<TgridVec> &buf_cell_min = mpdat.pdat.get_field_buf_ref<TgridVec>(0);
956 sham::DeviceBuffer<TgridVec> &buf_cell_max = mpdat.pdat.get_field_buf_ref<TgridVec>(1);
957
958 sham::DeviceBuffer<T> &buf_src = mpdat.pdat.get_field_buf_ref<T>(ifield);
959 sham::DeviceBuffer<T> &buf_dest = tmp.get_buf_check(p.id_patch);
960
962 = storage.face_lists.get().get(p.id_patch);
963
964 load_patch_neigh_level_down(
965 offset,
966 buf_cell_min,
967 buf_cell_max,
968 face_lists,
969 mpdat.total_elements,
970 nvar,
971 buf_src,
972 buf_dest);
973 });
974
975 return tmp;
976}
977
978template<class Tvec, class TgridVec, class T>
981 shamrock::ComputeField<T> &compute_field,
982 std::array<Tgridscal, dim> offset,
983 std::string result_name) {
984
985 StackEntry stack_loc{};
986
987 using namespace shamrock::patch;
988 using namespace shamrock;
989 using namespace shammath;
990 using MergedPDat = shamrock::MergedPatchData;
991 using Flagger = FaceFlagger<Tvec, TgridVec>;
992 using Block = typename Config::AMRBlock;
993
994 shamrock::SchedulerUtility utility(scheduler());
996 = utility.make_compute_field<T>(result_name, Block::block_size, [&](u64 id) {
997 return storage.merged_patchdata_ghost.get().get(id).total_elements;
998 });
999
1000 scheduler().for_each_patchdata_nonempty([&](Patch p, PatchDataLayer &pdat) {
1001 MergedPDat &mpdat = storage.merged_patchdata_ghost.get().get(p.id_patch);
1002
1003 sham::DeviceBuffer<T> &buf_src = compute_field.get_buf_check(p.id_patch);
1004 sham::DeviceBuffer<T> &buf_dest = tmp.get_buf_check(p.id_patch);
1005
1006 load_patch_internal_block(
1007 offset,
1008 mpdat.total_elements,
1009 compute_field.get_field(p.id_patch).get_nvar(),
1010 buf_src,
1011 buf_dest);
1012 });
1013
1014 scheduler().for_each_patchdata_nonempty([&](Patch p, PatchDataLayer &pdat) {
1015 MergedPDat &mpdat = storage.merged_patchdata_ghost.get().get(p.id_patch);
1016
1017 sham::DeviceBuffer<TgridVec> &buf_cell_min = mpdat.pdat.get_field_buf_ref<TgridVec>(0);
1018 sham::DeviceBuffer<TgridVec> &buf_cell_max = mpdat.pdat.get_field_buf_ref<TgridVec>(1);
1019
1020 sham::DeviceBuffer<T> &buf_src = compute_field.get_buf_check(p.id_patch);
1021 sham::DeviceBuffer<T> &buf_dest = tmp.get_buf_check(p.id_patch);
1022
1024 = storage.face_lists.get().get(p.id_patch);
1025
1026 load_patch_neigh_same_level(
1027 offset,
1028 buf_cell_min,
1029 buf_cell_max,
1030 face_lists,
1031 mpdat.total_elements,
1032 compute_field.get_field(p.id_patch).get_nvar(),
1033 buf_src,
1034 buf_dest);
1035 });
1036
1037 scheduler().for_each_patchdata_nonempty([&](Patch p, PatchDataLayer &pdat) {
1038 MergedPDat &mpdat = storage.merged_patchdata_ghost.get().get(p.id_patch);
1039
1040 sham::DeviceBuffer<TgridVec> &buf_cell_min = mpdat.pdat.get_field_buf_ref<TgridVec>(0);
1041 sham::DeviceBuffer<TgridVec> &buf_cell_max = mpdat.pdat.get_field_buf_ref<TgridVec>(1);
1042
1043 sham::DeviceBuffer<T> &buf_src = compute_field.get_buf_check(p.id_patch);
1044 sham::DeviceBuffer<T> &buf_dest = tmp.get_buf_check(p.id_patch);
1045
1047 = storage.face_lists.get().get(p.id_patch);
1048
1049 load_patch_neigh_level_up(
1050 offset,
1051 buf_cell_min,
1052 buf_cell_max,
1053 face_lists,
1054 mpdat.total_elements,
1055 compute_field.get_field(p.id_patch).get_nvar(),
1056 buf_src,
1057 buf_dest);
1058 });
1059
1060 scheduler().for_each_patchdata_nonempty([&](Patch p, PatchDataLayer &pdat) {
1061 MergedPDat &mpdat = storage.merged_patchdata_ghost.get().get(p.id_patch);
1062
1063 sham::DeviceBuffer<TgridVec> &buf_cell_min = mpdat.pdat.get_field_buf_ref<TgridVec>(0);
1064 sham::DeviceBuffer<TgridVec> &buf_cell_max = mpdat.pdat.get_field_buf_ref<TgridVec>(1);
1065
1066 sham::DeviceBuffer<T> &buf_src = compute_field.get_buf_check(p.id_patch);
1067 sham::DeviceBuffer<T> &buf_dest = tmp.get_buf_check(p.id_patch);
1068
1070 = storage.face_lists.get().get(p.id_patch);
1071
1072 load_patch_neigh_level_down(
1073 offset,
1074 buf_cell_min,
1075 buf_cell_max,
1076 face_lists,
1077 mpdat.total_elements,
1078 compute_field.get_field(p.id_patch).get_nvar(),
1079 buf_src,
1080 buf_dest);
1081 });
1082
1083 return tmp;
1084}
1085
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
std::int32_t i32
32 bit integer
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
Definition EventList.hpp:31
void add_event(sycl::event e)
Add an event to the list of events.
Definition EventList.hpp:87
flag faces with a lookup index for the orientation
shamrock::ComputeField< T > load_value_with_gz(std::string field_name, std::array< Tgridscal, dim > offset, std::string result_name)
ComputeField< T > make_compute_field(std::string new_name, u32 nvar)
create a compute field and init it to zeros
PatchDataLayer container class, the layout is described in patchdata_layout.
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110
void throw_unimplemented(SourceLocation loc=SourceLocation{})
Throw a std::runtime_error saying that the function is unimplemented.
namespace for math utility
Definition AABB.hpp:26
namespace for the main framework
Definition __init__.py:1
utility class to handle AMR blocks
Definition AMRBlock.hpp:35
Patch object that contain generic patch information.
Definition Patch.hpp:33