Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
internal_alloc.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
16#include "shambase/memory.hpp"
18#include "shambase/string.hpp"
20#include "shamcomm/logs.hpp"
22#include <exception>
23
24namespace {
25
26 sham::MemPerfInfos mem_perf_infos;
27
28 void register_alloc_device(size_t size, f64 timed) {
29
30 mem_perf_infos.allocated_byte_device += size;
31 mem_perf_infos.time_alloc_device += timed;
32 mem_perf_infos.max_allocated_byte_device = std::max(
33 mem_perf_infos.max_allocated_byte_device, mem_perf_infos.allocated_byte_device);
34
36 "Device Memory", shambase::details::get_wtime(), mem_perf_infos.allocated_byte_device);
38 "Device alloc time", shambase::details::get_wtime(), mem_perf_infos.time_alloc_device);
39 }
40
41 void register_alloc_shared(size_t size, f64 timed) {
42
43 mem_perf_infos.allocated_byte_shared += size;
44 mem_perf_infos.time_alloc_shared += timed;
45 mem_perf_infos.max_allocated_byte_shared = std::max(
46 mem_perf_infos.max_allocated_byte_shared, mem_perf_infos.allocated_byte_shared);
47
49
50 "Shared Memory", shambase::details::get_wtime(), mem_perf_infos.allocated_byte_shared);
52 "Shared alloc time", shambase::details::get_wtime(), mem_perf_infos.time_alloc_shared);
53 }
54
55 void register_alloc_host(size_t size, f64 timed) {
56
57 mem_perf_infos.allocated_byte_host += size;
58 mem_perf_infos.time_alloc_host += timed;
59 mem_perf_infos.max_allocated_byte_host
60 = std::max(mem_perf_infos.max_allocated_byte_host, mem_perf_infos.allocated_byte_host);
61
63 "Host Memory", shambase::details::get_wtime(), mem_perf_infos.allocated_byte_host);
65 "Host alloc time", shambase::details::get_wtime(), mem_perf_infos.time_alloc_host);
66 }
67
68 void register_free_device(size_t size, f64 timed) {
69
70 mem_perf_infos.allocated_byte_device -= size;
71 mem_perf_infos.time_free_device += timed;
72
74 "Device Memory", shambase::details::get_wtime(), mem_perf_infos.allocated_byte_device);
76 "Device free time", shambase::details::get_wtime(), mem_perf_infos.time_free_device);
77 }
78
79 void register_free_shared(size_t size, f64 timed) {
80
81 mem_perf_infos.allocated_byte_shared -= size;
82 mem_perf_infos.time_free_shared += timed;
83
85 "Shared Memory", shambase::details::get_wtime(), mem_perf_infos.allocated_byte_shared);
87 "Shared free time", shambase::details::get_wtime(), mem_perf_infos.time_free_shared);
88 }
89
90 void register_free_host(size_t size, f64 timed) {
91
92 mem_perf_infos.allocated_byte_host -= size;
93 mem_perf_infos.time_free_host += timed;
94
96 "Host Memory", shambase::details::get_wtime(), mem_perf_infos.allocated_byte_host);
98 "Host free time", shambase::details::get_wtime(), mem_perf_infos.time_free_host);
99 }
100
101 template<sham::USMKindTarget target>
102 std::string get_mode_name();
103
104 template<>
105 std::string get_mode_name<sham::device>() {
106 return "device";
107 }
108
109 template<>
110 std::string get_mode_name<sham::shared>() {
111 return "shared";
112 }
113
114 template<>
115 std::string get_mode_name<sham::host>() {
116 return "host";
117 }
118
119} // namespace
120
121namespace sham::details {
122
123 MemPerfInfos get_mem_perf_info() { return mem_perf_infos; }
124
126 mem_perf_infos.max_allocated_byte_host = mem_perf_infos.allocated_byte_host;
127 mem_perf_infos.max_allocated_byte_device = mem_perf_infos.allocated_byte_device;
128 mem_perf_infos.max_allocated_byte_shared = mem_perf_infos.allocated_byte_shared;
129 }
130
131 std::string log_mem_perf_info(const std::shared_ptr<DeviceScheduler> &dev_sched) {
132
133 return shambase::format(
134 R"log(
135 World infos :
136 World size = {}
137 World rank = {}
138 Device infos :
139 Device name = {}
140 Allocs :
141 max_allocated_byte_host = {}
142 max_allocated_byte_device = {}
143 max_allocated_byte_shared = {}
144 allocated_byte_host = {}
145 allocated_byte_device = {}
146 allocated_byte_shared = {}
147 )log",
150 dev_sched->ctx->device->dev.get_info<sycl::info::device::name>(),
157 }
158
159 template<USMKindTarget target>
160 void internal_free(
161 void *usm_ptr, size_t sz, const std::shared_ptr<DeviceScheduler> &dev_sched) {
162
163 StackEntry __st{};
164
165 f64 start_time = shambase::details::get_wtime();
166
167 shamcomm::logs::debug_alloc_ln(
168 "memoryHandle",
169 "free usm pointer size :",
170 sz,
171 " | ptr =",
172 usm_ptr,
173 " | mode =",
174 get_mode_name<target>());
175
176 sycl::context &sycl_ctx = dev_sched->ctx->ctx;
177 sycl::free(usm_ptr, sycl_ctx);
179 f64 end_time = shambase::details::get_wtime();
180
181 if constexpr (target == device) {
182 register_free_device(sz, end_time - start_time);
183 } else if constexpr (target == shared) {
184 register_free_shared(sz, end_time - start_time);
185 } else if constexpr (target == host) {
186 register_free_host(sz, end_time - start_time);
187 }
188 }
189
190 template<USMKindTarget target>
191 void *internal_alloc(
192 size_t sz,
193 const std::shared_ptr<DeviceScheduler> &dev_sched,
194 std::optional<size_t> alignment) {
195
196 StackEntry __st{};
197 f64 start_time = shambase::details::get_wtime();
198
200 "memoryHandle", "alloc usm pointer size :", sz, " | mode =", get_mode_name<target>());
201
202 auto &ds = shambase::get_check_ref(dev_sched);
203 sycl::context &sycl_ctx = ds.ctx->ctx;
204 sycl::device &dev = ds.ctx->device->dev;
205
206 void *usm_ptr = nullptr;
207
208 auto catch_alloc_except = [&](auto alloc_lambda) {
209 try {
210 usm_ptr = alloc_lambda();
211 } catch (std::exception &ex) {
212 std::string log = shambase::format(
213 "Alloc failed with exception : {}\nShamrock mem infos : {}",
214 ex.what(),
215 log_mem_perf_info(dev_sched));
217 }
218 };
219
220 // check max alloc sizes
221 if constexpr (target == device) {
222 if (sz > ds.get_queue().get_device_prop().max_mem_alloc_size_dev) {
223 std::string err_log = shambase::format(
224 "You are trying to allocate more than the maximum allocation size allowed by "
225 "the "
226 "device\n"
227 " size = {} | max_alloc_size = {}",
228 sz,
229 ds.get_queue().get_device_prop().max_mem_alloc_size_dev);
231 }
232 } else if constexpr (target == shared) {
233 size_t max_alloc_size_dev = ds.get_queue().get_device_prop().max_mem_alloc_size_dev;
234 size_t max_alloc_size_host = ds.get_queue().get_device_prop().max_mem_alloc_size_host;
235 if (sz > sycl::min(max_alloc_size_dev, max_alloc_size_host)) {
236 std::string err_log = shambase::format(
237 "You are trying to allocate more than the maximum allocation size allowed by "
238 "the "
239 "device\n"
240 " size = {} | max_alloc_size = {}",
241 sz,
242 sycl::min(max_alloc_size_dev, max_alloc_size_host));
244 }
245 } else if constexpr (target == host) {
246 if (sz > ds.get_queue().get_device_prop().max_mem_alloc_size_host) {
247 std::string err_log = shambase::format(
248 "You are trying to allocate more than the maximum allocation size allowed by "
249 "the "
250 "host\n"
251 " size = {} | max_alloc_size = {}",
252 sz,
253 ds.get_queue().get_device_prop().max_mem_alloc_size_host);
255 }
256 } else {
258 }
259
260 if (alignment) {
261
262 if (*alignment % ds.get_queue().get_device_prop().mem_base_addr_align != 0) {
264 "The alignment of the USM pointer is not aligned with minimum device "
265 "alignment\n"
266 " alignment = {} | device alignment = {} | alignment % device alignment = {}",
267 *alignment,
268 ds.get_queue().get_device_prop().mem_base_addr_align,
269 *alignment % ds.get_queue().get_device_prop().mem_base_addr_align));
270 }
271
272 if (sz % *alignment != 0) {
274 "The size of the USM pointer is not aligned with the given alignment\n"
275 " size = {} | alignment = {} | size % alignment = {}",
276 sz,
277 *alignment,
278 sz % *alignment));
279 }
280
281 // TODO upgrade alignment to 256-bit for CUDA ?
282
283 if constexpr (target == device) {
284 catch_alloc_except([&] {
285 return sycl::aligned_alloc_device(*alignment, sz, dev, sycl_ctx);
286 });
287 } else if constexpr (target == shared) {
288 catch_alloc_except([&] {
289 return sycl::aligned_alloc_shared(*alignment, sz, dev, sycl_ctx);
290 });
291 } else if constexpr (target == host) {
292 catch_alloc_except([&] {
293 return sycl::aligned_alloc_host(*alignment, sz, sycl_ctx);
294 });
295 } else {
297 }
298 } else {
299 if constexpr (target == device) {
300 catch_alloc_except([&] {
301 return sycl::malloc_device(sz, dev, sycl_ctx);
302 });
303 } else if constexpr (target == shared) {
304 catch_alloc_except([&] {
305 return sycl::malloc_shared(sz, dev, sycl_ctx);
306 });
307 } else if constexpr (target == host) {
308 catch_alloc_except([&] {
309 return sycl::malloc_host(sz, sycl_ctx);
310 });
311 } else {
313 }
314 }
315
316 if (usm_ptr == nullptr) {
317 std::string err_msg = "";
318 if (alignment) {
319 err_msg = shambase::format(
320 "USM allocation failed, details : sz={}, target={}, alignment={}, alloc "
321 "result = {}",
322 sz,
323 get_mode_name<target>(),
324 *alignment,
325 usm_ptr);
326 } else {
327 err_msg = shambase::format(
328 "USM allocation failed, details : sz={}, target={}, alloc result = {}",
329 sz,
330 get_mode_name<target>(),
331 usm_ptr);
332 }
333 shambase::throw_with_loc<std::runtime_error>(err_msg + log_mem_perf_info(dev_sched));
334 }
335
336 if (alignment) {
337
339 "memoryHandle", "pointer created : ptr =", usm_ptr, "alignment =", *alignment);
340
341 if (!shambase::is_aligned(usm_ptr, *alignment)) {
343 "The pointer is not aligned with the given alignment");
344 }
345
346 } else {
347
349 "memoryHandle", "pointer created : ptr =", usm_ptr, "alignment = None");
350 }
351
353
354 if constexpr (target == device) {
355 register_alloc_device(sz, end_time - start_time);
356 } else if constexpr (target == shared) {
357 register_alloc_shared(sz, end_time - start_time);
358 } else if constexpr (target == host) {
359 register_alloc_host(sz, end_time - start_time);
360 }
361
362 return usm_ptr;
363 }
364
365#ifndef DOXYGEN
366 template void internal_free<host>(
367 void *usm_ptr, size_t sz, const std::shared_ptr<DeviceScheduler> &dev_sched);
368 template void *internal_alloc<host>(
369 size_t sz,
370 const std::shared_ptr<DeviceScheduler> &dev_sched,
371 std::optional<size_t> alignment);
372 template void internal_free<device>(
373 void *usm_ptr, size_t sz, const std::shared_ptr<DeviceScheduler> &dev_sched);
374 template void *internal_alloc<device>(
375 size_t sz,
376 const std::shared_ptr<DeviceScheduler> &dev_sched,
377 std::optional<size_t> alignment);
378 template void internal_free<shared>(
379 void *usm_ptr, size_t sz, const std::shared_ptr<DeviceScheduler> &dev_sched);
380 template void *internal_alloc<shared>(
381 size_t sz,
382 const std::shared_ptr<DeviceScheduler> &dev_sched,
383 std::optional<size_t> alignment);
384#endif
385
386} // namespace sham::details
double f64
Alias for double.
This file contains the methods to actually allocate memory.
void * internal_alloc(size_t sz, const std::shared_ptr< DeviceScheduler > &dev_sched, std::optional< size_t > alignment)
Allocate a USM pointer with at least the given size in bytes.
MemPerfInfos get_mem_perf_info()
Retrieve the memory performance information.
void reset_mem_info_max()
Reset the memory information for the maximum allocated bytes.
@ host
Host memory.
@ device
Device memory.
@ shared
Shared memory.
bool is_aligned(const void *ptr, size_t alignment) noexcept
Check if a pointer is aligned with the given alignment.
Definition ptr.hpp:29
std::string readable_sizeof(double size)
given a sizeof value return a readble string Example : readable_sizeof(1024*1024*1024) -> "1....
Definition string.hpp:139
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110
void throw_unimplemented(SourceLocation loc=SourceLocation{})
Throw a std::runtime_error saying that the function is unimplemented.
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
Definition worldInfo.cpp:40
i32 world_size()
Gives the size of the MPI communicator.
Definition worldInfo.cpp:38
void register_counter_val(const std::string &name, f64 time, f64 val)
Register a counter value.
void debug_alloc_ln(std::string module_name, Types... var2)
Prints a log message with multiple arguments followed by a newline.
Definition logs.hpp:133
f64 get_wtime()
Returns the current wall clock time in seconds.
Structure to store the performance informations about memory allocation and deallocation.
f64 time_alloc_host
Time spent allocating memory on the host.
size_t max_allocated_byte_host
max bytes allocated on the host
size_t allocated_byte_shared
Bytes allocated in shared memory.
f64 time_free_device
Time spent deallocating memory on the device.
f64 time_free_shared
Time spent deallocating memory in shared memory.
size_t max_allocated_byte_device
max bytes allocated on the device
f64 time_alloc_device
Time spent allocating memory on the device.
size_t allocated_byte_device
Bytes allocated on the device.
size_t allocated_byte_host
Bytes allocated on the host.
f64 time_free_host
Time spent deallocating memory on the host.
f64 time_alloc_shared
Time spent allocating memory in shared memory.
size_t max_allocated_byte_shared
max bytes allocated in shared memory
Functions related to the MPI communicator.