36 u64 max_copy_len = (1 << 30) /
sizeof(T);
40 for (
size_t i = 0; i < count; i += max_copy_len) {
43 sycl::event e = q.
submit(depends_list, [&](sycl::handler &cgh) {
44 cgh.copy(src + i, dest + i, copy_len);
56 u64 max_copy_len = (1 << 30);
60 for (
size_t i = 0; i < count; i += max_copy_len) {
63 sycl::event e = q.
submit(depends_list, [&](sycl::handler &cgh) {
64 cgh.parallel_for(sycl::range<1>(copy_len), [dest, i, value](sycl::item<1> gid) {
65 dest[i + gid.get_linear_id()] = value;
74 template<
class T,
class Fct>
78 u64 max_copy_len = (1 << 30);
82 for (
size_t i = 0; i < count; i += max_copy_len) {
85 sycl::event e = q.
submit(depends_list, [&](sycl::handler &cgh) {
86 cgh.parallel_for(sycl::range<1>(copy_len), [dest, i, fct](sycl::item<1> gid) {
87 dest[i + gid.get_linear_id()] = fct(i + gid.get_linear_id());
96 template<
class T, USMKindTarget target = host, USMKindTarget orgin_target = device>
105 template<
class T, USMKindTarget target = device>
118 static size_t upgrade_multiple(
size_t sz,
size_t mult) {
120 return sz + (mult - sz % mult);
130 static std::optional<size_t>
get_alignment(
const DeviceScheduler_ptr &dev_sched) {
131 return upgrade_multiple(
137 .mem_base_addr_align,
148 size_t ret = sz *
sizeof(T);
152 ret = upgrade_multiple(ret, *align);
156 SHAM_ASSERT((sz == 0) ? (ret == 0) : (ret >= sz *
sizeof(T)));
175 events_hndl(
std::make_unique<
details::BufferEventHandler>()) {}
192 details::create_usm_ptr<target>(
208 DeviceBuffer(sycl::buffer<T> &syclbuf, std::shared_ptr<DeviceScheduler> dev_sched)
227 sycl::buffer<T> &syclbuf,
size_t sz, std::shared_ptr<DeviceScheduler> dev_sched)
241 DeviceBuffer(sycl::buffer<T> &&syclbuf, std::shared_ptr<DeviceScheduler> dev_sched)
257 sycl::buffer<T> &&syclbuf,
size_t sz, std::shared_ptr<DeviceScheduler> dev_sched)
277 : hold(std::move(other.hold)), size(other.size),
278 events_hndl(std::move(other.events_hndl)) {}
287 std::swap(hold, other.hold);
288 std::swap(events_hndl, other.events_hndl);
300 if (!
bool(events_hndl)) {
302 if (hold.get_raw_ptr() !=
nullptr) {
304 "you have an event handler but not pointer, like how ???");
309 if (hold.get_raw_ptr() ==
nullptr && events_hndl->is_empty()) {
334 return hold.template ptr_cast<T>();
352 return hold.template ptr_cast<T>();
431 return hold.get_dev_scheduler();
440 return hold.get_dev_scheduler_ptr();
449 return hold.get_dev_scheduler_ptr();
458 return hold.get_dev_scheduler().get_queue();
476 [[nodiscard]]
inline size_t get_size()
const {
return size; }
483 [[nodiscard]]
inline size_t get_mem_usage()
const {
return hold.get_bytesize(); }
490 [[nodiscard]]
inline bool is_empty()
const {
return size == 0; }
511 std::vector<T> ret(size);
518 = safe_copy(
get_queue(), depends_list, ptr, ret.data(), size);
541 size_t begin,
size_t end)
const {
545 "copy_to_stdvec_idx_range: end > size\n end = {},\n size = {}", end, size));
550 "copy_to_stdvec_idx_range: begin >= end\n begin = {},\n end = {}",
555 u32 size_cp = end - begin;
556 std::vector<T> ret(size_cp);
563 = safe_copy(
get_queue(), depends_list, ptr + begin, ret.data(), size_cp);
582 template<USMKindTarget dest_target>
587 size_t dest_offset)
const {
591 "copy_range_offset: begin > end\n begin = {},\n end = {}", begin, end));
596 "copy_range_offset: end index is out of bounds\n end = {},\n source buffer "
602 if (dest_offset > dest.
get_size()) {
604 "copy_range_offset: dest_offset > dest.get_size()\n dest_offset = {},\n "
605 "dest.get_size() = {}",
610 if (end - begin > (dest.
get_size() - dest_offset)) {
612 "copy_range_offset: end - begin > dest.get_size() - dest_offset\n end - begin "
614 "dest.get_size() - dest_offset = {},\n dest_offset = {}",
620 if (
static_cast<const void *
>(
this) ==
static_cast<const void *
>(&dest)) {
622 "the source and destination buffers must not be the same");
629 size_t len = end - begin;
651 template<USMKindTarget dest_target>
668 if (size != vec.size()) {
670 "copy_from_stdvec: size mismatch\n size = {},\n vec.size() = {}",
680 = safe_copy(
get_queue(), depends_list, vec.data(), ptr, size);
698 if (sz > vec.size() || sz > size) {
700 "copy_from_stdvec: size mismatch (sz > vec.size() || sz > size)\n size = "
701 "{},\n vec.size() = {},\n sz = {}",
727 sycl::buffer<T> ret(size);
736 sycl::event e =
get_queue().
submit(depends_list, [&](sycl::handler &cgh) {
737 sycl::accessor acc(ret, cgh, sycl::write_only, sycl::no_init);
757 if (size != buf.size()) {
759 "copy_from_sycl_buffer: size mismatch\n size = {},\n buf.size() = {}",
770 sycl::event e =
get_queue().
submit(depends_list, [&](sycl::handler &cgh) {
771 sycl::accessor acc(buf, cgh, sycl::read_only);
790 if (sz > buf.size() || sz > size) {
792 "copy_from_sycl_buffer: size mismatch (sz > buf.size() || sz > size)\n size = "
793 "{},\n buf.size() = {},\n sz = {}",
801 "copy_from_sycl_buffer: size mismatch (sz > u32_max)\n sz = {}", sz));
808 sycl::event e =
get_queue().
submit(depends_list, [&](sycl::handler &cgh) {
809 sycl::accessor acc(buf, cgh, sycl::read_only);
811 shambase::parallel_for(
829 template<USMKindTarget new_target>
839 = safe_copy(
get_queue(), depends_list, ptr_src, ptr_dest, size);
858 template<USMKindTarget new_target>
863 "The size of the copy must be smaller than the size of the buffer involved\n "
864 "copy_size: {}\n get_size(): {}\n other.get_size(): {}",
876 = safe_copy(
get_queue(), depends_list, ptr_src, ptr_dest, copy_size);
891 template<USMKindTarget new_target>
896 "The other field must be of the same size\n get_size = {},\n other.get_size "
921 template<USMKindTarget mirror_target>
951 inline void fill(T value, std::array<size_t, 2> idx_range) {
953 size_t start_index = idx_range[0];
954 size_t idx_count = idx_range[1] - start_index;
956 if (!(start_index + idx_count <=
get_size())) {
958 "!(start_index + idx_count <= get_size())\n start_index = {},\n idx_count = "
959 "{},\n get_size() = {}",
971 = safe_fill(
get_queue(), depends_list, ptr + start_index, idx_count, value);
986 inline void fill(T value,
size_t idx_count) {
fill(value, {0, idx_count}); }
1000 inline void fill_lambda(Fct &&fct) {
1037 "get_val_at_idx: idx >= size\n idx = {},\n size = {}", idx, size));
1043 sycl::event e =
get_queue().
submit(depends_list, [&](sycl::handler &cgh) {
1044 cgh.copy(ptr + idx, &ret, 1);
1053 void set_val_at_idx(
size_t idx, T val) {
1057 "set_val_at_idx: idx >= size\n idx = {},\n size = {}", idx, size));
1063 sycl::event e =
get_queue().
submit(depends_list, [&](sycl::handler &cgh) {
1064 cgh.copy(&val, ptr + idx, 1);
1080 inline size_t get_max_alloc_size()
const {
1081 auto &dev_prop = hold.get_dev_scheduler().get_queue().get_device_prop();
1083 if constexpr (target ==
device) {
1084 return dev_prop.max_mem_alloc_size_dev;
1085 }
else if constexpr (target ==
host) {
1086 return dev_prop.max_mem_alloc_size_host;
1087 }
else if constexpr (target ==
shared) {
1088 return sycl::min(dev_prop.max_mem_alloc_size_dev, dev_prop.max_mem_alloc_size_host);
1092 "get_max_alloc_size: invalid target");
1103 inline void resize(
size_t new_size,
bool keep_data =
true) {
1105 auto dev_sched = hold.get_dev_scheduler_ptr();
1112 size_t max_alloc_size = get_max_alloc_size();
1115 size_t wanted_size_new_alloc
1117 size_t max_possible_alloc = max_alloc_size;
1120 max_possible_alloc = max_possible_alloc - (max_possible_alloc % *alignment);
1123 size_t new_storage_size = wanted_size_new_alloc;
1124 if (new_storage_size > max_alloc_size) {
1125 new_storage_size = sycl::max(max_possible_alloc, min_size_new_alloc);
1128 if (new_storage_size > max_alloc_size) {
1130 "new_storage_size > max_alloc_size\n"
1131 " new_storage_size = {}\n"
1132 " max_alloc_size = {}\n"
1133 " min_size_new_alloc = {}\n"
1134 " wanted_size_new_alloc = {}",
1138 wanted_size_new_alloc));
1143 details::create_usm_ptr<target>(
1152 std::swap(new_buf, *
this);
1161 details::create_usm_ptr<target>(
1170 std::swap(new_buf, *
this);
1207 "shrink called with sub_sz > get_size()\n sub_sz: {}\n get_size(): {}",
1224 if (
this == &other) {
1229 if (other_size == 0) {
1242 = safe_copy(
get_queue(), depends_list, other_ptr, ptr + old_size, other_size);
1292 std::unique_ptr<details::BufferEventHandler> events_hndl;
This file contains the declaration of the USMPtrHolder class.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
std::int32_t i32
32 bit integer
Shamrock assertion utility.
#define SHAM_ASSERT(x)
Shorthand for SHAM_ASSERT_NAMED without a message.
A class template for creating a mirrored buffer.
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
void copy_from_stdvec(const std::vector< T > &vec)
Copy the content of a std::vector into the buffer.
DeviceBuffer(sycl::buffer< T > &&syclbuf, size_t sz, std::shared_ptr< DeviceScheduler > dev_sched)
Construct a new Device Buffer object by moving from a SYCL buffer with a given size.
void complete_event_state(sham::EventList &e) const
Complete the event state of the buffer.
void fill(T value)
Fill the buffer with a given value.
DeviceQueue & get_queue() const
Gets the DeviceQueue associated with the held allocation.
static size_t alloc_request_size_fct(size_t sz, const DeviceScheduler_ptr &dev_sched)
Convert a size in number of elements to a size in bytes.
void resize(size_t new_size, bool keep_data=true)
Resizes the buffer to a given size.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
DeviceBuffer & operator=(DeviceBuffer &&other) noexcept
Move assignment operator for DeviceBuffer.
void fill(T value, std::array< size_t, 2 > idx_range)
Fill a subpart of the buffer with a given value.
std::shared_ptr< DeviceScheduler > & get_dev_scheduler_ptr()
Gets the Device scheduler pointer corresponding to the held allocation.
void copy_range_offset(size_t begin, size_t end, sham::DeviceBuffer< T, dest_target > &dest, size_t dest_offset) const
Copy a range of elements from the buffer to another buffer.
DeviceBuffer(sycl::buffer< T > &&syclbuf, std::shared_ptr< DeviceScheduler > dev_sched)
Construct a new Device Buffer object by moving from a SYCL buffer.
std::vector< T > copy_to_stdvec() const
Copy the content of the buffer to a std::vector.
void free_alloc()
Alias for resize_discard_data(0).
BufferMirror< T, mirror_target, target > mirror_to()
Creates a new buffer that is a mirror of the current one. Upon destruction of the mirror the changes ...
DeviceBuffer(size_t sz, USMPtrHolder< target > &&_hold)
Construct a new Device Buffer object with a given USM pointer.
void copy_from(const DeviceBuffer< T, new_target > &other, size_t copy_size)
Copies the content of another buffer to this one.
void copy_from_sycl_buffer(sycl::buffer< T > &buf, size_t sz)
Copy the content of a SYCL buffer into the buffer.
void copy_range(size_t begin, size_t end, sham::DeviceBuffer< T, dest_target > &dest) const
Copy a range of elements from the buffer to another buffer.
DeviceBuffer(const DeviceBuffer &other)=delete
Deleted copy constructor.
size_t get_mem_usage() const
Gets the amount of memory used by the buffer.
void complete_event_state(const std::vector< sycl::event > &e) const
Complete the event state of the buffer.
void synchronize() const
Wait for all the events associated with the buffer to be completed.
static std::optional< size_t > get_alignment(const DeviceScheduler_ptr &dev_sched)
Get the memory alignment of the type T in bytes.
DeviceBuffer(DeviceBuffer &&other) noexcept
Move constructor for DeviceBuffer.
void copy_from_sycl_buffer(sycl::buffer< T > &buf)
Copy the content of a SYCL buffer into the buffer.
const std::shared_ptr< DeviceScheduler > & get_dev_scheduler_ptr() const
Gets the Device scheduler pointer corresponding to the held allocation.
T get_val_at_idx(size_t idx) const
Get the value at a given index in the buffer.
DeviceBuffer< T, new_target > copy_to() const
Copy the content of the buffer to a new buffer with a different USM target.
void append(const DeviceBuffer &other)
Append the content of another buffer to this one.
void copy_from(const DeviceBuffer< T, new_target > &other)
Copies the data from another buffer to this one.
void fill(T value, size_t idx_count)
Fill the first idx_count elements of the buffer with a given value.
size_t get_size() const
Gets the number of elements in the buffer.
~DeviceBuffer()
Destructor for DeviceBuffer.
DeviceBuffer(sycl::buffer< T > &syclbuf, size_t sz, std::shared_ptr< DeviceScheduler > dev_sched)
Construct a new Device Buffer object from a SYCL buffer with a given size.
void copy_from_stdvec(const std::vector< T > &vec, size_t sz)
Copy the content of a std::vector into the buffer.
DeviceScheduler & get_dev_scheduler() const
Gets the Device scheduler corresponding to the held allocation.
std::vector< T > copy_to_stdvec_idx_range(size_t begin, size_t end) const
Copies a specified range of elements from the buffer to a std::vector.
void reserve(size_t add_sz)
Reserves space in the buffer for add_sz elements, but doesn't change the buffer's size.
DeviceBuffer(size_t sz, DeviceScheduler_ptr dev_sched)
Construct a new Device Buffer object.
void expand(u32 add_sz)
Expand the buffer by add_sz elements.
void resize_discard_data(size_t new_size)
same as resize but data will not be copied if reallocation is needed
void shrink(u32 sub_sz)
Shrink the buffer by sub_sz elements.
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
DeviceBuffer & operator=(const DeviceBuffer &other)=delete
Deleted copy assignment operator.
sycl::buffer< T > copy_to_sycl_buffer() const
Copy the content of the buffer to a new SYCL buffer.
bool is_empty() const
Check if the buffer is empty.
DeviceBuffer(sycl::buffer< T > &syclbuf, std::shared_ptr< DeviceScheduler > dev_sched)
Construct a new Device Buffer object from a SYCL buffer.
DeviceBuffer< T, target > copy() const
Copy the current buffer.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
Class to manage the scheduling of kernels on a device.
Class to manage a list of SYCL events.
void add_event(sycl::event e)
Add an event to the list of events.
void wait_and_throw()
Wait for all events in the list to be finished and throw an exception if one has occurred.
Class for holding a USM pointer.
This file contains the declaration of the memory handling and its methods.
Namespace for internal details of the logs module.
namespace for backends this one is named only sham since shambackends is too long to write
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
auto extract_pointer(std::unique_ptr< T > &o, SourceLocation loc=SourceLocation()) -> T
extract content out of unique_ptr
constexpr bool always_false_v
Helper variable template that is always false. Especially useful to perform static asserts based on t...
Utilities for safe type narrowing conversions.
constexpr u32 u32_max
u32 max value
This file contains the class definition for BufferEventHandler.
#define __shamrock_stack_entry()
Macro to create a stack entry.
provide information about the source location