Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
DeviceBuffer.hpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
10#pragma once
11
19#include "shambase/assert.hpp"
20#include "shambase/memory.hpp"
28#include <memory>
29
30namespace sham {
31
32 template<class T>
33 inline sham::EventList safe_copy(
34 sham::DeviceQueue &q, sham::EventList &depends_list, const T *src, T *dest, size_t count) {
36 u64 max_copy_len = (1 << 30) / sizeof(T); // 1GB to avoid memcpy above i32_max bytes
37
38 sham::EventList events;
39
40 for (size_t i = 0; i < count; i += max_copy_len) {
41 i32 copy_len
42 = shambase::narrow_or_throw<i32>(std::min<size_t>(max_copy_len, count - i));
43 sycl::event e = q.submit(depends_list, [&](sycl::handler &cgh) {
44 cgh.copy(src + i, dest + i, copy_len);
45 });
46 events.add_event(e);
47 }
48
49 return events;
50 }
51
52 template<class T>
53 inline sham::EventList safe_fill(
54 sham::DeviceQueue &q, sham::EventList &depends_list, T *dest, size_t count, T value) {
56 u64 max_copy_len = (1 << 30); // 1G elements, this garanteee indexing below i32_max
57
58 sham::EventList events;
59
60 for (size_t i = 0; i < count; i += max_copy_len) {
61 i32 copy_len
62 = shambase::narrow_or_throw<i32>(std::min<size_t>(max_copy_len, count - i));
63 sycl::event e = q.submit(depends_list, [&](sycl::handler &cgh) {
64 cgh.parallel_for(sycl::range<1>(copy_len), [dest, i, value](sycl::item<1> gid) {
65 dest[i + gid.get_linear_id()] = value;
66 });
67 });
68 events.add_event(e);
69 }
70
71 return events;
72 }
73
74 template<class T, class Fct>
75 inline sham::EventList safe_fill_lambda(
76 sham::DeviceQueue &q, sham::EventList &depends_list, T *dest, size_t count, Fct &&fct) {
78 u64 max_copy_len = (1 << 30); // 1G elements, this garanteee indexing below i32_max
79
80 sham::EventList events;
81
82 for (size_t i = 0; i < count; i += max_copy_len) {
83 i32 copy_len
84 = shambase::narrow_or_throw<i32>(std::min<size_t>(max_copy_len, count - i));
85 sycl::event e = q.submit(depends_list, [&](sycl::handler &cgh) {
86 cgh.parallel_for(sycl::range<1>(copy_len), [dest, i, fct](sycl::item<1> gid) {
87 dest[i + gid.get_linear_id()] = fct(i + gid.get_linear_id());
88 });
89 });
90 events.add_event(e);
91 }
92
93 return events;
94 }
95
96 template<class T, USMKindTarget target = host, USMKindTarget orgin_target = device>
97 class BufferMirror;
98
105 template<class T, USMKindTarget target = device>
107
118 static size_t upgrade_multiple(size_t sz, size_t mult) {
119 if (sz % mult)
120 return sz + (mult - sz % mult);
121 return sz;
122 };
123
124 public:
130 static std::optional<size_t> get_alignment(const DeviceScheduler_ptr &dev_sched) {
131 return upgrade_multiple(
132 alignof(T),
133 std::max(
134 shambase::get_check_ref(dev_sched)
135 .get_queue()
136 .get_device_prop()
137 .mem_base_addr_align,
138 8_u32));
139 }
140
147 static size_t alloc_request_size_fct(size_t sz, const DeviceScheduler_ptr &dev_sched) {
148 size_t ret = sz * sizeof(T);
149
150 auto align = get_alignment(dev_sched);
151 if (align) {
152 ret = upgrade_multiple(ret, *align);
153 SHAM_ASSERT(ret % *align == 0);
154 }
155
156 SHAM_ASSERT((sz == 0) ? (ret == 0) : (ret >= sz * sizeof(T)));
157
158 return ret;
159 }
160
174 : hold(std::forward<USMPtrHolder<target>>(_hold)), size(sz),
175 events_hndl(std::make_unique<details::BufferEventHandler>()) {}
176
189 DeviceBuffer(size_t sz, DeviceScheduler_ptr dev_sched)
190 : DeviceBuffer(
191 sz,
192 details::create_usm_ptr<target>(
193 alloc_request_size_fct(sz, dev_sched), dev_sched, get_alignment(dev_sched))) {
194 }
195
208 DeviceBuffer(sycl::buffer<T> &syclbuf, std::shared_ptr<DeviceScheduler> dev_sched)
209 : DeviceBuffer(syclbuf.size(), dev_sched) {
210 copy_from_sycl_buffer(syclbuf);
211 }
212
227 sycl::buffer<T> &syclbuf, size_t sz, std::shared_ptr<DeviceScheduler> dev_sched)
228 : DeviceBuffer(sz, dev_sched) {
229 copy_from_sycl_buffer(syclbuf, sz);
230 }
231
241 DeviceBuffer(sycl::buffer<T> &&syclbuf, std::shared_ptr<DeviceScheduler> dev_sched)
242 : DeviceBuffer(syclbuf, dev_sched) {}
243
257 sycl::buffer<T> &&syclbuf, size_t sz, std::shared_ptr<DeviceScheduler> dev_sched)
258 : DeviceBuffer(syclbuf, sz, dev_sched) {}
259
263 DeviceBuffer(const DeviceBuffer &other) = delete;
264
268 DeviceBuffer &operator=(const DeviceBuffer &other) = delete;
269
276 DeviceBuffer(DeviceBuffer &&other) noexcept
277 : hold(std::move(other.hold)), size(other.size),
278 events_hndl(std::move(other.events_hndl)) {}
279
287 std::swap(hold, other.hold);
288 std::swap(events_hndl, other.events_hndl);
289 size = other.size;
290 return *this;
291 }
292
300 if (!bool(events_hndl)) {
301 // If this is not allocated it must be a moved object
302 if (hold.get_raw_ptr() != nullptr) {
304 "you have an event handler but not pointer, like how ???");
305 }
306 return;
307 }
308 // This object is empty, it was probably moved
309 if (hold.get_raw_ptr() == nullptr && events_hndl->is_empty()) {
310 return;
311 }
312 // give the ptr holder and event handler to the memory handler
313 details::release_usm_ptr(std::move(hold), shambase::extract_pointer(events_hndl));
314 }
315
317 // Event handling
319
331 [[nodiscard]] inline const T *get_read_access(
332 sham::EventList &depends_list, SourceLocation src_loc = SourceLocation{}) const {
333 shambase::get_check_ref(events_hndl).read_access(depends_list, src_loc);
334 return hold.template ptr_cast<T>();
335 }
336
349 [[nodiscard]] inline T *get_write_access(
350 sham::EventList &depends_list, SourceLocation src_loc = SourceLocation{}) {
351 shambase::get_check_ref(events_hndl).write_access(depends_list, src_loc);
352 return hold.template ptr_cast<T>();
353 }
354
368 void complete_event_state(sycl::event e) const {
369 shambase::get_check_ref(events_hndl).complete_state(e);
370 }
371
385 void complete_event_state(const std::vector<sycl::event> &e) const {
386 shambase::get_check_ref(events_hndl).complete_state(e);
387 }
388
403 shambase::get_check_ref(events_hndl).complete_state(e);
404 }
405
413 void synchronize() const { shambase::get_check_ref(events_hndl).wait_all(); }
414
416 // Event handling (End)
418
420
422 // Queue / Scheduler getters
424
430 [[nodiscard]] inline DeviceScheduler &get_dev_scheduler() const {
431 return hold.get_dev_scheduler();
432 }
433
439 [[nodiscard]] inline std::shared_ptr<DeviceScheduler> &get_dev_scheduler_ptr() {
440 return hold.get_dev_scheduler_ptr();
441 }
442
448 [[nodiscard]] inline const std::shared_ptr<DeviceScheduler> &get_dev_scheduler_ptr() const {
449 return hold.get_dev_scheduler_ptr();
450 }
451
457 [[nodiscard]] inline DeviceQueue &get_queue() const {
458 return hold.get_dev_scheduler().get_queue();
459 }
460
462 // Queue / Scheduler getters (END)
464
466
468 // Size getters
470
476 [[nodiscard]] inline size_t get_size() const { return size; }
477
483 [[nodiscard]] inline size_t get_mem_usage() const { return hold.get_bytesize(); }
484
490 [[nodiscard]] inline bool is_empty() const { return size == 0; }
491
493 // Size getters (END)
495
497
499 // Copy fcts
501
510 [[nodiscard]] inline std::vector<T> copy_to_stdvec() const {
511 std::vector<T> ret(size);
512
513 if (size > 0) {
514 sham::EventList depends_list;
515 const T *ptr = get_read_access(depends_list);
516
517 sham::EventList events
518 = safe_copy(get_queue(), depends_list, ptr, ret.data(), size);
519
520 events.wait_and_throw();
521 complete_event_state(sycl::event{});
522 }
523
524 return ret;
525 }
526
540 [[nodiscard]] inline std::vector<T> copy_to_stdvec_idx_range(
541 size_t begin, size_t end) const {
542
543 if (end > size) {
545 "copy_to_stdvec_idx_range: end > size\n end = {},\n size = {}", end, size));
546 }
547
548 if (begin > end) {
550 "copy_to_stdvec_idx_range: begin >= end\n begin = {},\n end = {}",
551 begin,
552 end));
553 }
554
555 u32 size_cp = end - begin;
556 std::vector<T> ret(size_cp);
557
558 if (size_cp > 0) {
559 sham::EventList depends_list;
560 const T *ptr = get_read_access(depends_list);
561
562 sham::EventList events
563 = safe_copy(get_queue(), depends_list, ptr + begin, ret.data(), size_cp);
564
565 events.wait_and_throw();
566 complete_event_state(sycl::event{});
567 }
568
569 return ret;
570 }
571
582 template<USMKindTarget dest_target>
583 inline void copy_range_offset(
584 size_t begin,
585 size_t end,
587 size_t dest_offset) const {
588
589 if (begin > end) {
591 "copy_range_offset: begin > end\n begin = {},\n end = {}", begin, end));
592 }
593
594 if (end > get_size()) {
596 "copy_range_offset: end index is out of bounds\n end = {},\n source buffer "
597 "size = {}",
598 end,
599 get_size()));
600 }
601
602 if (dest_offset > dest.get_size()) {
604 "copy_range_offset: dest_offset > dest.get_size()\n dest_offset = {},\n "
605 "dest.get_size() = {}",
606 dest_offset,
607 dest.get_size()));
608 }
609
610 if (end - begin > (dest.get_size() - dest_offset)) {
612 "copy_range_offset: end - begin > dest.get_size() - dest_offset\n end - begin "
613 "= {},\n "
614 "dest.get_size() - dest_offset = {},\n dest_offset = {}",
615 end - begin,
616 dest.get_size() - dest_offset,
617 dest_offset));
618 }
619
620 if (static_cast<const void *>(this) == static_cast<const void *>(&dest)) {
622 "the source and destination buffers must not be the same");
623 }
624
625 if (begin == end) {
626 return;
627 }
628
629 size_t len = end - begin;
630
631 sham::EventList depends_list;
632 const T *ptr_src = get_read_access(depends_list) + begin;
633 T *ptr_dest = dest.get_write_access(depends_list) + dest_offset;
634
635 sham::EventList events = safe_copy(get_queue(), depends_list, ptr_src, ptr_dest, len);
636
637 complete_event_state(events);
638 dest.complete_event_state(events);
639 }
640
651 template<USMKindTarget dest_target>
652 inline void copy_range(
653 size_t begin, size_t end, sham::DeviceBuffer<T, dest_target> &dest) const {
654
655 copy_range_offset(begin, end, dest, 0);
656 }
657
666 inline void copy_from_stdvec(const std::vector<T> &vec) {
667
668 if (size != vec.size()) {
670 "copy_from_stdvec: size mismatch\n size = {},\n vec.size() = {}",
671 size,
672 vec.size()));
673 }
674
675 if (size > 0) {
676 sham::EventList depends_list;
677 T *ptr = get_write_access(depends_list);
678
679 sham::EventList events
680 = safe_copy(get_queue(), depends_list, vec.data(), ptr, size);
681
682 events.wait_and_throw();
683 complete_event_state(sycl::event{});
684 }
685 }
686
696 inline void copy_from_stdvec(const std::vector<T> &vec, size_t sz) {
697
698 if (sz > vec.size() || sz > size) {
700 "copy_from_stdvec: size mismatch (sz > vec.size() || sz > size)\n size = "
701 "{},\n vec.size() = {},\n sz = {}",
702 size,
703 vec.size(),
704 sz));
705 }
706
707 if (sz > 0) {
708 sham::EventList depends_list;
709 T *ptr = get_write_access(depends_list);
710
711 sham::EventList events = safe_copy(get_queue(), depends_list, vec.data(), ptr, sz);
712
713 events.wait_and_throw();
714 complete_event_state(sycl::event{});
715 }
716 }
717
726 [[nodiscard]] inline sycl::buffer<T> copy_to_sycl_buffer() const {
727 sycl::buffer<T> ret(size);
728
729 if (size > 0) {
730
732
733 sham::EventList depends_list;
734 const T *ptr = get_read_access(depends_list);
735
736 sycl::event e = get_queue().submit(depends_list, [&](sycl::handler &cgh) {
737 sycl::accessor acc(ret, cgh, sycl::write_only, sycl::no_init);
738 cgh.copy(ptr, acc);
739 });
740
742 }
743
744 return ret;
745 }
746
755 inline void copy_from_sycl_buffer(sycl::buffer<T> &buf) {
756
757 if (size != buf.size()) {
759 "copy_from_sycl_buffer: size mismatch\n size = {},\n buf.size() = {}",
760 size,
761 buf.size()));
762 }
763
764 if (size > 0) {
766
767 sham::EventList depends_list;
768 T *ptr = get_write_access(depends_list);
769
770 sycl::event e = get_queue().submit(depends_list, [&](sycl::handler &cgh) {
771 sycl::accessor acc(buf, cgh, sycl::read_only);
772 cgh.copy(acc, ptr);
773 });
774
776 }
777 }
778
788 inline void copy_from_sycl_buffer(sycl::buffer<T> &buf, size_t sz) {
789
790 if (sz > buf.size() || sz > size) {
792 "copy_from_sycl_buffer: size mismatch (sz > buf.size() || sz > size)\n size = "
793 "{},\n buf.size() = {},\n sz = {}",
794 size,
795 buf.size(),
796 sz));
797 }
798
799 if (sz > u32_max) {
801 "copy_from_sycl_buffer: size mismatch (sz > u32_max)\n sz = {}", sz));
802 }
803
804 if (size > 0) {
805 sham::EventList depends_list;
806 T *ptr = get_write_access(depends_list);
807
808 sycl::event e = get_queue().submit(depends_list, [&](sycl::handler &cgh) {
809 sycl::accessor acc(buf, cgh, sycl::read_only);
810
811 shambase::parallel_for(
812 cgh, shambase::narrow_or_throw<i32>(sz), "copy field", [=](u32 gid) {
813 ptr[gid] = acc[gid];
814 });
815 });
816
818 }
819 }
820
829 template<USMKindTarget new_target>
830 [[nodiscard]] inline DeviceBuffer<T, new_target> copy_to() const {
832
833 if (size > 0) {
834 sham::EventList depends_list;
835 const T *ptr_src = get_read_access(depends_list);
836 T *ptr_dest = ret.get_write_access(depends_list);
837
838 sham::EventList events
839 = safe_copy(get_queue(), depends_list, ptr_src, ptr_dest, size);
840
841 complete_event_state(events);
842 ret.complete_event_state(events);
843 }
844
845 return ret;
846 }
847
858 template<USMKindTarget new_target>
859 inline void copy_from(const DeviceBuffer<T, new_target> &other, size_t copy_size) {
860
861 if (!(copy_size <= get_size() && copy_size <= other.get_size())) {
863 "The size of the copy must be smaller than the size of the buffer involved\n "
864 "copy_size: {}\n get_size(): {}\n other.get_size(): {}",
865 copy_size,
866 get_size(),
867 other.get_size()));
868 }
869
870 if (copy_size > 0) {
871 sham::EventList depends_list;
872 T *ptr_dest = get_write_access(depends_list);
873 const T *ptr_src = other.get_read_access(depends_list);
874
875 sham::EventList events
876 = safe_copy(get_queue(), depends_list, ptr_src, ptr_dest, copy_size);
877
878 complete_event_state(events);
879 other.complete_event_state(events);
880 }
881 }
882
891 template<USMKindTarget new_target>
892 inline void copy_from(const DeviceBuffer<T, new_target> &other) {
893
894 if (get_size() != other.get_size()) {
896 "The other field must be of the same size\n get_size = {},\n other.get_size "
897 "= {}",
898 get_size(),
899 other.get_size()));
900 }
901
902 copy_from(other, get_size());
903 }
904
913 inline DeviceBuffer<T, target> copy() const { return copy_to<target>(); }
914
921 template<USMKindTarget mirror_target>
925
927 // Copy fcts (END)
929
931
933 // Filler fcts
935
951 inline void fill(T value, std::array<size_t, 2> idx_range) {
952
953 size_t start_index = idx_range[0];
954 size_t idx_count = idx_range[1] - start_index;
955
956 if (!(start_index + idx_count <= get_size())) {
958 "!(start_index + idx_count <= get_size())\n start_index = {},\n idx_count = "
959 "{},\n get_size() = {}",
960 start_index,
961 idx_count,
962 get_size()));
963 }
964
966
967 sham::EventList depends_list;
968 T *ptr = get_write_access(depends_list);
969
970 sham::EventList events
971 = safe_fill(get_queue(), depends_list, ptr + start_index, idx_count, value);
972
973 complete_event_state(events);
974 }
975
986 inline void fill(T value, size_t idx_count) { fill(value, {0, idx_count}); }
987
997 inline void fill(T value) { fill(value, get_size()); }
998
999 template<class Fct>
1000 inline void fill_lambda(Fct &&fct) {
1001 if (get_size() == 0) {
1002 return;
1003 }
1004
1005 sham::EventList depends_list;
1006 T *__restrict ptr = get_write_access(depends_list);
1007
1008 sham::EventList events
1009 = safe_fill_lambda(get_queue(), depends_list, ptr, get_size(), fct);
1010
1011 complete_event_state(events);
1012 }
1013
1015 // Filler fcts (END)
1017
1019
1021 // Getter fcts
1023
1032 T get_val_at_idx(size_t idx) const {
1033 T ret;
1034
1035 if (idx >= size) {
1037 "get_val_at_idx: idx >= size\n idx = {},\n size = {}", idx, size));
1038 }
1039
1040 sham::EventList depends_list;
1041 const T *ptr = get_read_access(depends_list);
1042
1043 sycl::event e = get_queue().submit(depends_list, [&](sycl::handler &cgh) {
1044 cgh.copy(ptr + idx, &ret, 1);
1045 });
1046
1047 e.wait_and_throw();
1048 complete_event_state(sycl::event{});
1049
1050 return ret;
1051 }
1052
1053 void set_val_at_idx(size_t idx, T val) {
1054
1055 if (idx >= size) {
1057 "set_val_at_idx: idx >= size\n idx = {},\n size = {}", idx, size));
1058 }
1059
1060 sham::EventList depends_list;
1061 T *ptr = get_write_access(depends_list);
1062
1063 sycl::event e = get_queue().submit(depends_list, [&](sycl::handler &cgh) {
1064 cgh.copy(&val, ptr + idx, 1);
1065 });
1066
1067 e.wait_and_throw();
1068 complete_event_state(sycl::event{});
1069 }
1071 // Getter fcts (END)
1073
1075
1077 // Size manipulation
1079
1080 inline size_t get_max_alloc_size() const {
1081 auto &dev_prop = hold.get_dev_scheduler().get_queue().get_device_prop();
1082
1083 if constexpr (target == device) {
1084 return dev_prop.max_mem_alloc_size_dev;
1085 } else if constexpr (target == host) {
1086 return dev_prop.max_mem_alloc_size_host;
1087 } else if constexpr (target == shared) {
1088 return sycl::min(dev_prop.max_mem_alloc_size_dev, dev_prop.max_mem_alloc_size_host);
1089 } else {
1090 static_assert(
1091 shambase::always_false_v<decltype(target)>,
1092 "get_max_alloc_size: invalid target");
1093 }
1094 }
1095
1103 inline void resize(size_t new_size, bool keep_data = true) {
1104
1105 auto dev_sched = hold.get_dev_scheduler_ptr();
1106
1107 StackEntry __st{};
1108
1109 if (alloc_request_size_fct(new_size, dev_sched) > hold.get_bytesize()) {
1110 // expand storage
1111
1112 size_t max_alloc_size = get_max_alloc_size();
1113 std::optional<size_t> alignment = get_alignment(dev_sched);
1114 size_t min_size_new_alloc = alloc_request_size_fct(new_size, dev_sched);
1115 size_t wanted_size_new_alloc
1116 = alloc_request_size_fct(new_size + new_size / 2, dev_sched);
1117 size_t max_possible_alloc = max_alloc_size;
1118
1119 if (alignment) {
1120 max_possible_alloc = max_possible_alloc - (max_possible_alloc % *alignment);
1121 }
1122
1123 size_t new_storage_size = wanted_size_new_alloc;
1124 if (new_storage_size > max_alloc_size) {
1125 new_storage_size = sycl::max(max_possible_alloc, min_size_new_alloc);
1126 }
1127
1128 if (new_storage_size > max_alloc_size) {
1130 "new_storage_size > max_alloc_size\n"
1131 " new_storage_size = {}\n"
1132 " max_alloc_size = {}\n"
1133 " min_size_new_alloc = {}\n"
1134 " wanted_size_new_alloc = {}",
1135 new_storage_size,
1136 max_alloc_size,
1137 min_size_new_alloc,
1138 wanted_size_new_alloc));
1139 }
1140
1141 DeviceBuffer new_buf(
1142 new_size,
1143 details::create_usm_ptr<target>(
1144 new_storage_size, get_dev_scheduler_ptr(), get_alignment(dev_sched)));
1145
1146 // copy data
1147 if (keep_data) {
1148 new_buf.copy_from(*this, get_size());
1149 }
1150
1151 // override old buffer
1152 std::swap(new_buf, *this);
1153
1154 } else if (alloc_request_size_fct(new_size, dev_sched) < hold.get_bytesize() * 0.5) {
1155 // shrink storage
1156
1157 size_t new_storage_size = alloc_request_size_fct(new_size, dev_sched);
1158
1159 DeviceBuffer new_buf(
1160 new_size,
1161 details::create_usm_ptr<target>(
1162 new_storage_size, get_dev_scheduler_ptr(), get_alignment(dev_sched)));
1163
1164 // copy data
1165 if (keep_data) {
1166 new_buf.copy_from(*this, new_size);
1167 }
1168
1169 // override old buffer
1170 std::swap(new_buf, *this);
1171
1172 } else {
1173 size = new_size;
1174 // no need to resize
1175 }
1176 }
1177
1179 inline void resize_discard_data(size_t new_size) { resize(new_size, false); }
1180
1184 inline void free_alloc() { resize_discard_data(0); }
1185
1194 inline void expand(u32 add_sz) { resize(get_size() + add_sz); }
1195
1204 inline void shrink(u32 sub_sz) {
1205 if (sub_sz > get_size()) {
1207 "shrink called with sub_sz > get_size()\n sub_sz: {}\n get_size(): {}",
1208 sub_sz,
1209 get_size()));
1210 }
1211 resize(get_size() - sub_sz);
1212 }
1213
1223 inline void append(const DeviceBuffer &other) {
1224 if (this == &other) {
1225 shambase::throw_with_loc<std::invalid_argument>("cannot append a buffer to itself");
1226 }
1227
1228 u32 other_size = other.get_size();
1229 if (other_size == 0) {
1230 return; // early exit if the other buffer is empty
1231 }
1232 u32 old_size = get_size();
1233
1234 // allocate space
1235 expand(other_size);
1236
1237 sham::EventList depends_list;
1238 T *ptr = get_write_access(depends_list);
1239 const T *other_ptr = other.get_read_access(depends_list);
1240
1241 sham::EventList events
1242 = safe_copy(get_queue(), depends_list, other_ptr, ptr + old_size, other_size);
1243
1244 complete_event_state(events);
1245 other.complete_event_state(events);
1246 }
1247
1249 // Size manipulation (END)
1251
1252 // I'm not sure if enabling this one is a good idea
1263 inline void reserve(size_t add_sz) {
1264#if false
1265 size_t old_sz = get_size();
1266 resize(old_sz + add_sz);
1267 size = old_sz;
1268#endif
1269 }
1270
1271 private:
1276
1280 size_t size = 0;
1281
1292 std::unique_ptr<details::BufferEventHandler> events_hndl;
1293 };
1294
1295} // namespace sham
This file contains the declaration of the USMPtrHolder class.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
std::int32_t i32
32 bit integer
Shamrock assertion utility.
#define SHAM_ASSERT(x)
Shorthand for SHAM_ASSERT_NAMED without a message.
Definition assert.hpp:67
A class template for creating a mirrored buffer.
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
void copy_from_stdvec(const std::vector< T > &vec)
Copy the content of a std::vector into the buffer.
DeviceBuffer(sycl::buffer< T > &&syclbuf, size_t sz, std::shared_ptr< DeviceScheduler > dev_sched)
Construct a new Device Buffer object by moving from a SYCL buffer with a given size.
void complete_event_state(sham::EventList &e) const
Complete the event state of the buffer.
void fill(T value)
Fill the buffer with a given value.
DeviceQueue & get_queue() const
Gets the DeviceQueue associated with the held allocation.
static size_t alloc_request_size_fct(size_t sz, const DeviceScheduler_ptr &dev_sched)
Convert a size in number of elements to a size in bytes.
void resize(size_t new_size, bool keep_data=true)
Resizes the buffer to a given size.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
DeviceBuffer & operator=(DeviceBuffer &&other) noexcept
Move assignment operator for DeviceBuffer.
void fill(T value, std::array< size_t, 2 > idx_range)
Fill a subpart of the buffer with a given value.
std::shared_ptr< DeviceScheduler > & get_dev_scheduler_ptr()
Gets the Device scheduler pointer corresponding to the held allocation.
void copy_range_offset(size_t begin, size_t end, sham::DeviceBuffer< T, dest_target > &dest, size_t dest_offset) const
Copy a range of elements from the buffer to another buffer.
DeviceBuffer(sycl::buffer< T > &&syclbuf, std::shared_ptr< DeviceScheduler > dev_sched)
Construct a new Device Buffer object by moving from a SYCL buffer.
std::vector< T > copy_to_stdvec() const
Copy the content of the buffer to a std::vector.
void free_alloc()
Alias for resize_discard_data(0).
BufferMirror< T, mirror_target, target > mirror_to()
Creates a new buffer that is a mirror of the current one. Upon destruction of the mirror the changes ...
DeviceBuffer(size_t sz, USMPtrHolder< target > &&_hold)
Construct a new Device Buffer object with a given USM pointer.
void copy_from(const DeviceBuffer< T, new_target > &other, size_t copy_size)
Copies the content of another buffer to this one.
void copy_from_sycl_buffer(sycl::buffer< T > &buf, size_t sz)
Copy the content of a SYCL buffer into the buffer.
void copy_range(size_t begin, size_t end, sham::DeviceBuffer< T, dest_target > &dest) const
Copy a range of elements from the buffer to another buffer.
DeviceBuffer(const DeviceBuffer &other)=delete
Deleted copy constructor.
size_t get_mem_usage() const
Gets the amount of memory used by the buffer.
void complete_event_state(const std::vector< sycl::event > &e) const
Complete the event state of the buffer.
void synchronize() const
Wait for all the events associated with the buffer to be completed.
static std::optional< size_t > get_alignment(const DeviceScheduler_ptr &dev_sched)
Get the memory alignment of the type T in bytes.
DeviceBuffer(DeviceBuffer &&other) noexcept
Move constructor for DeviceBuffer.
void copy_from_sycl_buffer(sycl::buffer< T > &buf)
Copy the content of a SYCL buffer into the buffer.
const std::shared_ptr< DeviceScheduler > & get_dev_scheduler_ptr() const
Gets the Device scheduler pointer corresponding to the held allocation.
T get_val_at_idx(size_t idx) const
Get the value at a given index in the buffer.
DeviceBuffer< T, new_target > copy_to() const
Copy the content of the buffer to a new buffer with a different USM target.
void append(const DeviceBuffer &other)
Append the content of another buffer to this one.
void copy_from(const DeviceBuffer< T, new_target > &other)
Copies the data from another buffer to this one.
void fill(T value, size_t idx_count)
Fill the first idx_count elements of the buffer with a given value.
size_t get_size() const
Gets the number of elements in the buffer.
~DeviceBuffer()
Destructor for DeviceBuffer.
DeviceBuffer(sycl::buffer< T > &syclbuf, size_t sz, std::shared_ptr< DeviceScheduler > dev_sched)
Construct a new Device Buffer object from a SYCL buffer with a given size.
void copy_from_stdvec(const std::vector< T > &vec, size_t sz)
Copy the content of a std::vector into the buffer.
DeviceScheduler & get_dev_scheduler() const
Gets the Device scheduler corresponding to the held allocation.
std::vector< T > copy_to_stdvec_idx_range(size_t begin, size_t end) const
Copies a specified range of elements from the buffer to a std::vector.
void reserve(size_t add_sz)
Reserves space in the buffer for add_sz elements, but doesn't change the buffer's size.
DeviceBuffer(size_t sz, DeviceScheduler_ptr dev_sched)
Construct a new Device Buffer object.
void expand(u32 add_sz)
Expand the buffer by add_sz elements.
void resize_discard_data(size_t new_size)
same as resize but data will not be copied if reallocation is needed
void shrink(u32 sub_sz)
Shrink the buffer by sub_sz elements.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
DeviceBuffer & operator=(const DeviceBuffer &other)=delete
Deleted copy assignment operator.
sycl::buffer< T > copy_to_sycl_buffer() const
Copy the content of the buffer to a new SYCL buffer.
bool is_empty() const
Check if the buffer is empty.
DeviceBuffer(sycl::buffer< T > &syclbuf, std::shared_ptr< DeviceScheduler > dev_sched)
Construct a new Device Buffer object from a SYCL buffer.
DeviceBuffer< T, target > copy() const
Copy the current buffer.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
Class to manage the scheduling of kernels on a device.
Class to manage a list of SYCL events.
Definition EventList.hpp:31
void add_event(sycl::event e)
Add an event to the list of events.
Definition EventList.hpp:87
void wait_and_throw()
Wait for all events in the list to be finished and throw an exception if one has occurred.
Definition EventList.hpp:72
Class for holding a USM pointer.
This file contains the declaration of the memory handling and its methods.
Namespace for internal details of the logs module.
namespace for backends this one is named only sham since shambackends is too long to write
@ host
Host memory.
@ device
Device memory.
@ shared
Shared memory.
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110
auto extract_pointer(std::unique_ptr< T > &o, SourceLocation loc=SourceLocation()) -> T
extract content out of unique_ptr
Definition memory.hpp:227
constexpr bool always_false_v
Helper variable template that is always false. Especially useful to perform static asserts based on t...
STL namespace.
Utilities for safe type narrowing conversions.
constexpr u32 u32_max
u32 max value
This file contains the class definition for BufferEventHandler.
Traits for C++ types.
#define __shamrock_stack_entry()
Macro to create a stack entry.
provide information about the source location