28 void register_alloc_device(
size_t size,
f64 timed) {
38 "Device alloc time", shambase::details::get_wtime(), mem_perf_infos.
time_alloc_device);
41 void register_alloc_shared(
size_t size,
f64 timed) {
52 "Shared alloc time", shambase::details::get_wtime(), mem_perf_infos.
time_alloc_shared);
55 void register_alloc_host(
size_t size,
f64 timed) {
65 "Host alloc time", shambase::details::get_wtime(), mem_perf_infos.
time_alloc_host);
68 void register_free_device(
size_t size,
f64 timed) {
76 "Device free time", shambase::details::get_wtime(), mem_perf_infos.
time_free_device);
79 void register_free_shared(
size_t size,
f64 timed) {
87 "Shared free time", shambase::details::get_wtime(), mem_perf_infos.
time_free_shared);
90 void register_free_host(
size_t size,
f64 timed) {
98 "Host free time", shambase::details::get_wtime(), mem_perf_infos.
time_free_host);
101 template<sham::USMKindTarget target>
102 std::string get_mode_name();
105 std::string get_mode_name<sham::device>() {
110 std::string get_mode_name<sham::shared>() {
115 std::string get_mode_name<sham::host>() {
121namespace sham::details {
131 std::string log_mem_perf_info(
const std::shared_ptr<DeviceScheduler> &dev_sched) {
133 return shambase::format(
141 max_allocated_byte_host = {}
142 max_allocated_byte_device = {}
143 max_allocated_byte_shared = {}
144 allocated_byte_host = {}
145 allocated_byte_device = {}
146 allocated_byte_shared = {}
150 dev_sched->ctx->device->dev.get_info<sycl::info::device::name>(),
159 template<USMKindTarget target>
161 void *usm_ptr,
size_t sz,
const std::shared_ptr<DeviceScheduler> &dev_sched) {
165 f64 start_time = shambase::details::get_wtime();
167 shamcomm::logs::debug_alloc_ln(
169 "free usm pointer size :",
174 get_mode_name<target>());
176 sycl::context &sycl_ctx = dev_sched->ctx->ctx;
177 sycl::free(usm_ptr, sycl_ctx);
179 f64 end_time = shambase::details::get_wtime();
181 if constexpr (target ==
device) {
182 register_free_device(sz, end_time - start_time);
183 }
else if constexpr (target ==
shared) {
184 register_free_shared(sz, end_time - start_time);
185 }
else if constexpr (target ==
host) {
186 register_free_host(sz, end_time - start_time);
190 template<USMKindTarget target>
193 const std::shared_ptr<DeviceScheduler> &dev_sched,
194 std::optional<size_t> alignment) {
200 "memoryHandle",
"alloc usm pointer size :", sz,
" | mode =", get_mode_name<target>());
203 sycl::context &sycl_ctx = ds.ctx->ctx;
204 sycl::device &dev = ds.ctx->device->dev;
206 void *usm_ptr =
nullptr;
208 auto catch_alloc_except = [&](
auto alloc_lambda) {
210 usm_ptr = alloc_lambda();
211 }
catch (std::exception &ex) {
212 std::string log = shambase::format(
213 "Alloc failed with exception : {}\nShamrock mem infos : {}",
215 log_mem_perf_info(dev_sched));
221 if constexpr (target ==
device) {
222 if (sz > ds.get_queue().get_device_prop().max_mem_alloc_size_dev) {
223 std::string err_log = shambase::format(
224 "You are trying to allocate more than the maximum allocation size allowed by "
227 " size = {} | max_alloc_size = {}",
229 ds.get_queue().get_device_prop().max_mem_alloc_size_dev);
232 }
else if constexpr (target ==
shared) {
233 size_t max_alloc_size_dev = ds.get_queue().get_device_prop().max_mem_alloc_size_dev;
234 size_t max_alloc_size_host = ds.get_queue().get_device_prop().max_mem_alloc_size_host;
235 if (sz > sycl::min(max_alloc_size_dev, max_alloc_size_host)) {
236 std::string err_log = shambase::format(
237 "You are trying to allocate more than the maximum allocation size allowed by "
240 " size = {} | max_alloc_size = {}",
242 sycl::min(max_alloc_size_dev, max_alloc_size_host));
245 }
else if constexpr (target ==
host) {
246 if (sz > ds.get_queue().get_device_prop().max_mem_alloc_size_host) {
247 std::string err_log = shambase::format(
248 "You are trying to allocate more than the maximum allocation size allowed by "
251 " size = {} | max_alloc_size = {}",
253 ds.get_queue().get_device_prop().max_mem_alloc_size_host);
262 if (*alignment % ds.get_queue().get_device_prop().mem_base_addr_align != 0) {
264 "The alignment of the USM pointer is not aligned with minimum device "
266 " alignment = {} | device alignment = {} | alignment % device alignment = {}",
268 ds.get_queue().get_device_prop().mem_base_addr_align,
269 *alignment % ds.get_queue().get_device_prop().mem_base_addr_align));
272 if (sz % *alignment != 0) {
274 "The size of the USM pointer is not aligned with the given alignment\n"
275 " size = {} | alignment = {} | size % alignment = {}",
283 if constexpr (target ==
device) {
284 catch_alloc_except([&] {
285 return sycl::aligned_alloc_device(*alignment, sz, dev, sycl_ctx);
287 }
else if constexpr (target ==
shared) {
288 catch_alloc_except([&] {
289 return sycl::aligned_alloc_shared(*alignment, sz, dev, sycl_ctx);
291 }
else if constexpr (target ==
host) {
292 catch_alloc_except([&] {
293 return sycl::aligned_alloc_host(*alignment, sz, sycl_ctx);
299 if constexpr (target ==
device) {
300 catch_alloc_except([&] {
301 return sycl::malloc_device(sz, dev, sycl_ctx);
303 }
else if constexpr (target ==
shared) {
304 catch_alloc_except([&] {
305 return sycl::malloc_shared(sz, dev, sycl_ctx);
307 }
else if constexpr (target ==
host) {
308 catch_alloc_except([&] {
309 return sycl::malloc_host(sz, sycl_ctx);
316 if (usm_ptr ==
nullptr) {
317 std::string err_msg =
"";
319 err_msg = shambase::format(
320 "USM allocation failed, details : sz={}, target={}, alignment={}, alloc "
323 get_mode_name<target>(),
327 err_msg = shambase::format(
328 "USM allocation failed, details : sz={}, target={}, alloc result = {}",
330 get_mode_name<target>(),
339 "memoryHandle",
"pointer created : ptr =", usm_ptr,
"alignment =", *alignment);
343 "The pointer is not aligned with the given alignment");
349 "memoryHandle",
"pointer created : ptr =", usm_ptr,
"alignment = None");
354 if constexpr (target ==
device) {
355 register_alloc_device(sz, end_time - start_time);
356 }
else if constexpr (target ==
shared) {
357 register_alloc_shared(sz, end_time - start_time);
358 }
else if constexpr (target ==
host) {
359 register_alloc_host(sz, end_time - start_time);
366 template void internal_free<host>(
367 void *usm_ptr,
size_t sz,
const std::shared_ptr<DeviceScheduler> &dev_sched);
368 template void *internal_alloc<host>(
370 const std::shared_ptr<DeviceScheduler> &dev_sched,
371 std::optional<size_t> alignment);
372 template void internal_free<device>(
373 void *usm_ptr,
size_t sz,
const std::shared_ptr<DeviceScheduler> &dev_sched);
374 template void *internal_alloc<device>(
376 const std::shared_ptr<DeviceScheduler> &dev_sched,
377 std::optional<size_t> alignment);
378 template void internal_free<shared>(
379 void *usm_ptr,
size_t sz,
const std::shared_ptr<DeviceScheduler> &dev_sched);
380 template void *internal_alloc<shared>(
382 const std::shared_ptr<DeviceScheduler> &dev_sched,
383 std::optional<size_t> alignment);
double f64
Alias for double.
This file contains the methods to actually allocate memory.
void * internal_alloc(size_t sz, const std::shared_ptr< DeviceScheduler > &dev_sched, std::optional< size_t > alignment)
Allocate a USM pointer with at least the given size in bytes.
MemPerfInfos get_mem_perf_info()
Retrieve the memory performance information.
void reset_mem_info_max()
Reset the memory information for the maximum allocated bytes.
bool is_aligned(const void *ptr, size_t alignment) noexcept
Check if a pointer is aligned with the given alignment.
std::string readable_sizeof(double size)
given a sizeof value return a readble string Example : readable_sizeof(1024*1024*1024) -> "1....
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
void throw_unimplemented(SourceLocation loc=SourceLocation{})
Throw a std::runtime_error saying that the function is unimplemented.
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
i32 world_size()
Gives the size of the MPI communicator.
void register_counter_val(const std::string &name, f64 time, f64 val)
Register a counter value.
void debug_alloc_ln(std::string module_name, Types... var2)
Prints a log message with multiple arguments followed by a newline.
f64 get_wtime()
Returns the current wall clock time in seconds.
Structure to store the performance informations about memory allocation and deallocation.
f64 time_alloc_host
Time spent allocating memory on the host.
size_t max_allocated_byte_host
max bytes allocated on the host
size_t allocated_byte_shared
Bytes allocated in shared memory.
f64 time_free_device
Time spent deallocating memory on the device.
f64 time_free_shared
Time spent deallocating memory in shared memory.
size_t max_allocated_byte_device
max bytes allocated on the device
f64 time_alloc_device
Time spent allocating memory on the device.
size_t allocated_byte_device
Bytes allocated on the device.
size_t allocated_byte_host
Bytes allocated on the host.
f64 time_free_host
Time spent deallocating memory on the host.
f64 time_alloc_shared
Time spent allocating memory in shared memory.
size_t max_allocated_byte_shared
max bytes allocated in shared memory
Functions related to the MPI communicator.