Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
Device.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
19#include "shambase/memory.hpp"
21#include "shambase/popen.hpp"
22#include "shambase/string.hpp"
24#include "shamcmdopt/env.hpp"
25#include "shamcomm/logs.hpp"
26#include "shamcomm/mpiInfo.hpp"
27#include <fmt/ranges.h>
28#include <nlohmann/json.hpp>
29
30auto SHAM_MAX_ALLOC_SIZE
31 = shamcmdopt::getenv_str_register("SHAM_MAX_ALLOC_SIZE", "shamrock max alloc size if set");
32
33namespace {
34
35 std::optional<std::string> get_cpu_name() {
36#ifdef __linux__
37 std::string lscpu_json = "";
38 try {
39 lscpu_json = shambase::popen_fetch_output("lscpu -J");
40 } catch (const std::exception &e) {
41 return std::nullopt;
42 }
43
44 nlohmann::json lscpu_json_parsed;
45 try {
46 lscpu_json_parsed = nlohmann::json::parse(lscpu_json);
47 } catch (const std::exception &e) {
48 return std::nullopt;
49 }
50
51 try {
52 const auto &entries = lscpu_json_parsed.at("lscpu");
53 if (!entries.is_array()) {
54 return std::nullopt;
55 }
56 std::optional<std::string> model_name;
57 std::optional<std::string> socket_count;
58 for (const auto &entry : entries) {
59 const std::string field = entry.at("field").get<std::string>();
60 const std::string data = entry.at("data").get<std::string>();
61 if (field == "Model name:") {
62 model_name = data;
63 } else if (field == "Socket(s):") {
64 socket_count = data;
65 }
66 }
67 if (!model_name.has_value() || !socket_count.has_value() || model_name->empty()
68 || socket_count->empty()) {
69 return std::nullopt;
70 }
71
72 if (*socket_count == "1") {
73 return *model_name;
74 } else {
75 return fmt::format("{} x {}", *model_name, *socket_count);
76 }
77
78 } catch (const std::exception &e) {
79 return std::nullopt;
80 }
81#elif defined(__APPLE__)
82 // in that case the command is simply sysctl -n machdep.cpu.brand_string
83 std::string brand_string = "";
84 try {
85 brand_string = shambase::popen_fetch_output("sysctl -n machdep.cpu.brand_string");
86 shambase::replace_all(brand_string, "\n", "");
87 return brand_string;
88 } catch (const std::exception &e) {
89 return std::nullopt;
90 }
91 return brand_string;
92#else
93 return std::nullopt;
94#endif
95 }
96
97} // namespace
98
99namespace sham {
100
106 Backend get_device_backend(const sycl::device &dev) {
107 std::string pname = dev.get_platform().get_info<sycl::info::platform::name>();
108
109 // The platform name may include information about the device
110 // and/or the backend. We look for some keywords to determine
111 // the backend.
112 if (shambase::contain_substr(pname, "CUDA")) {
113 return Backend::CUDA; // NVIDIA CUDA
114 }
115 if (shambase::contain_substr(pname, "NVIDIA")) {
116 return Backend::CUDA;
117 }
118 if (shambase::contain_substr(pname, "ROCM")) {
119 return Backend::ROCM; // AMD ROCm
120 }
121 if (shambase::contain_substr(pname, "AMD")) {
122 return Backend::ROCM;
123 }
124 if (shambase::contain_substr(pname, "HIP")) {
125 return Backend::ROCM; // AMD ROCm
126 }
127 if (shambase::contain_substr(pname, "OpenMP")) {
128 return Backend::OPENMP; // OpenMP
129 }
130
131 return Backend::UNKNOWN; // Unknown backend
132 }
133
144 DeviceType get_device_type(const sycl::device &dev) {
145 auto DeviceType = dev.get_info<sycl::info::device::device_type>();
146 switch (DeviceType) {
147 case sycl::info::device_type::cpu: return DeviceType::CPU;
148 case sycl::info::device_type::gpu: return DeviceType::GPU;
149 default : return DeviceType::UNKNOWN;
150 }
151 }
152
154#define FETCH_PROP(info_, info_type) \
155 std::optional<info_type> info_ = [&]() -> std::optional<info_type> { \
156 try { \
157 return {dev.get_info<sycl::info::device::info_>()}; \
158 } catch (...) { \
159 warnings.push_back( \
160 "dev.get_info<sycl::info::device::" #info_ ">() raised an exception for device " \
161 + name); \
162 return {}; \
163 } \
164 }();
165
167#define FETCH_PROPN(info_, info_type, n) \
168 std::optional<info_type> n = [&]() -> std::optional<info_type> { \
169 try { \
170 return {dev.get_info<sycl::info::device::info_>()}; \
171 } catch (...) { \
172 warnings.push_back( \
173 "dev.get_info<sycl::info::device::" #info_ ">() raised an exception for device " \
174 + name); \
175 return {}; \
176 } \
177 }();
178
180#define FETCH_PROPN_FULL(info_, info_type, n) \
181 std::optional<info_type> n = [&]() -> std::optional<info_type> { \
182 try { \
183 return {dev.get_info<info_>()}; \
184 } catch (...) { \
185 warnings.push_back( \
186 "dev.get_info<" #info_ ">() raised an exception for device " + name); \
187 return {}; \
188 } \
189 }();
190
198 DeviceProperties fetch_properties(const sycl::device &dev) {
199
200 std::vector<std::string> warnings;
201
202 // Just to ensure that this one is not empty
203 std::string name = "?";
204 FETCH_PROPN(name, std::string, dev_name);
205 if (dev_name) {
206 name = *dev_name;
207 }
208
209#if SYCL_COMP_ACPP
210 if (get_device_backend(dev) == Backend::OPENMP) {
211 auto cpu_name = get_cpu_name();
212 if (cpu_name) {
213 name = *cpu_name;
214 }
215 }
216#endif
217
218 FETCH_PROP(vendor, std::string)
219
220 FETCH_PROP(device_type, sycl::info::device_type)
221 FETCH_PROP(vendor_id, uint32_t)
222 FETCH_PROP(max_compute_units, uint32_t)
223 FETCH_PROP(max_work_item_dimensions, uint32_t)
224 FETCH_PROPN(max_work_item_sizes<1>, sycl::id<1>, max_work_item_sizes_1d)
225 FETCH_PROPN(max_work_item_sizes<2>, sycl::id<2>, max_work_item_sizes_2d)
226 FETCH_PROPN(max_work_item_sizes<3>, sycl::id<3>, max_work_item_sizes_3d)
227 FETCH_PROP(max_work_group_size, size_t)
228 FETCH_PROP(max_num_sub_groups, uint32_t)
229 FETCH_PROP(sub_group_independent_forward_progress, bool)
230 FETCH_PROP(sub_group_sizes, std::vector<size_t>)
231
232 FETCH_PROP(preferred_vector_width_char, uint32_t)
233 FETCH_PROP(preferred_vector_width_short, uint32_t)
234 FETCH_PROP(preferred_vector_width_int, uint32_t)
235 FETCH_PROP(preferred_vector_width_long, uint32_t)
236 FETCH_PROP(preferred_vector_width_float, uint32_t)
237 FETCH_PROP(preferred_vector_width_double, uint32_t)
238 FETCH_PROP(preferred_vector_width_half, uint32_t)
239 FETCH_PROP(native_vector_width_char, uint32_t)
240 FETCH_PROP(native_vector_width_short, uint32_t)
241 FETCH_PROP(native_vector_width_int, uint32_t)
242 FETCH_PROP(native_vector_width_long, uint32_t)
243 FETCH_PROP(native_vector_width_float, uint32_t)
244 FETCH_PROP(native_vector_width_double, uint32_t)
245 FETCH_PROP(native_vector_width_half, uint32_t)
246
247 FETCH_PROP(max_clock_frequency, uint32_t)
248 FETCH_PROP(address_bits, uint32_t)
249 FETCH_PROP(max_mem_alloc_size, uint64_t)
250
251 // Image a really second class objects in SYCL right now ...
252 // FETCH_PROP(max_read_image_args, uint32_t)
253 // FETCH_PROP(max_write_image_args, uint32_t)
254 // FETCH_PROP(image2d_max_width, size_t)
255 // FETCH_PROP(image2d_max_height, size_t)
256 // FETCH_PROP(image3d_max_width, size_t)
257 // FETCH_PROP(image3d_max_height, size_t)
258 // FETCH_PROP(image3d_max_depth, size_t)
259 // FETCH_PROP(image_max_buffer_size, size_t)
260 // FETCH_PROP(max_samplers, uint32_t)
261
262 FETCH_PROP(max_parameter_size, size_t)
263 FETCH_PROP(mem_base_addr_align, uint32_t)
264 FETCH_PROP(half_fp_config, std::vector<sycl::info::fp_config>)
265 FETCH_PROP(single_fp_config, std::vector<sycl::info::fp_config>)
266 FETCH_PROP(double_fp_config, std::vector<sycl::info::fp_config>)
267 FETCH_PROP(global_mem_cache_type, sycl::info::global_mem_cache_type)
268 FETCH_PROP(global_mem_cache_line_size, uint32_t)
269 FETCH_PROP(global_mem_cache_size, uint64_t)
270 FETCH_PROP(global_mem_size, uint64_t)
271 FETCH_PROP(local_mem_type, sycl::info::local_mem_type)
272 FETCH_PROP(local_mem_size, uint64_t)
273 FETCH_PROP(error_correction_support, bool)
274#ifdef SYCL_COMP_INTEL_LLVM
275 FETCH_PROP(atomic_memory_order_capabilities, std::vector<sycl::memory_order>)
276 FETCH_PROP(atomic_fence_order_capabilities, std::vector<sycl::memory_order>)
277 FETCH_PROP(atomic_memory_scope_capabilities, std::vector<sycl::memory_scope>)
278 FETCH_PROP(atomic_fence_scope_capabilities, std::vector<sycl::memory_scope>)
279#endif
280 FETCH_PROP(profiling_timer_resolution, size_t)
281 FETCH_PROP(is_available, bool)
282 FETCH_PROP(execution_capabilities, std::vector<sycl::info::execution_capability>)
283 // FETCH_PROP(built_in_kernel_ids,std::vector<sycl::kernel_id>)
284 // FETCH_PROP(built_in_kernels, std::vector<std::string>)
285 // FETCH_PROP(platform, sycl::platform)
286
287 FETCH_PROP(driver_version, std::string)
288 FETCH_PROP(version, std::string)
289#ifdef SYCL_COMP_INTEL_LLVM
290 FETCH_PROP(backend_version, std::string)
291#endif
292 // FETCH_PROP(aspects, std::vector<sycl::aspect>)
293 // FETCH_PROP(printf_buffer_size, size_t)
294#ifdef SYCL_COMP_INTEL_LLVM
295 // FETCH_PROP(parent_device, device)
296#endif
297 FETCH_PROP(partition_max_sub_devices, uint32_t)
298 FETCH_PROP(partition_properties, std::vector<sycl::info::partition_property>)
299 FETCH_PROP(partition_affinity_domains, std::vector<sycl::info::partition_affinity_domain>)
300 FETCH_PROP(partition_type_property, sycl::info::partition_property)
301 FETCH_PROP(partition_type_affinity_domain, sycl::info::partition_affinity_domain)
302
303 auto physmem = sham::getPhysicalMemory();
304
305// On acpp 2^64-1 is returned, so we need to correct it
306// see : https://github.com/AdaptiveCpp/AdaptiveCpp/issues/1573
307#ifdef SYCL_COMP_ACPP
308 if (get_device_backend(dev) == Backend::OPENMP) {
309 // Correct memory size
310 if (physmem) {
311 global_mem_size = {*physmem};
312 }
313 }
314#endif
315
316 // with acpp 8 bit is returned for most backends so we default to 8 bytes (64 bits)
317 if (*mem_base_addr_align && mem_base_addr_align == 8) {
318 warnings.push_back(
319 shambase::format(
320 "mem_base_addr_align for is {} bits. I will assume that this is an "
321 "issue and default to 64 bits (8 bytes) instead.",
322 *mem_base_addr_align));
323 mem_base_addr_align = CHAR_BIT * 8;
324 }
325
326 // Some backends do not report sub_group_sizes, so we default to {1}
327 u32 default_work_group_size = 1;
328 if (!sub_group_sizes) {
329 sub_group_sizes = std::vector<size_t>{default_work_group_size};
330 warnings.push_back(
331 shambase::format(
332 "cannot fetch sub_group_sizes, defaulting to {}", default_work_group_size));
333 }
334 default_work_group_size = shambase::get_check_ref(sub_group_sizes)[0];
335
336 size_t max_alloc_dev = shambase::get_check_ref(max_mem_alloc_size);
337 size_t max_alloc_host = ((physmem) ? *physmem : i64_max);
338 if (SHAM_MAX_ALLOC_SIZE) {
339 try {
340 const auto max_alloc = std::stoull(SHAM_MAX_ALLOC_SIZE.value());
341 max_alloc_dev = max_alloc;
342 max_alloc_host = max_alloc;
343 } catch (const std::exception &e) {
344 warnings.push_back(
345 shambase::format(
346 "Could not parse SHAM_MAX_ALLOC_SIZE value '{}'. Error: {}. "
347 "Ignoring override.",
348 SHAM_MAX_ALLOC_SIZE.value(),
349 e.what()));
350 }
351 }
352
353 DeviceProperties ret = {
354 Vendor::UNKNOWN, // We cannot determine the vendor
355 get_device_backend(dev), // Query the backend based on the platform name
356 get_device_type(dev),
357 name,
358 dev.get_platform().get_info<sycl::info::platform::name>(),
359 shambase::get_check_ref(global_mem_size),
360 shambase::get_check_ref(global_mem_cache_line_size),
361 shambase::get_check_ref(global_mem_cache_size),
362 shambase::get_check_ref(local_mem_size),
363 shambase::get_check_ref(max_compute_units),
364 max_alloc_dev,
365 max_alloc_host,
366 // the SYCL standard returns the alignment in bits, we convert to bytes for convenience
367 shambase::get_check_ref(mem_base_addr_align) / CHAR_BIT,
368 shambase::get_check_ref(sub_group_sizes),
369 default_work_group_size,
370 std::nullopt,
371 warnings};
372
373 { // PCI id infos
374#if defined(SYCL_EXT_INTEL_DEVICE_INFO) && SYCL_EXT_INTEL_DEVICE_INFO >= 5
375 FETCH_PROPN_FULL(sycl::ext::intel::info::device::pci_address, std::string, pci_address)
376 if (pci_address) {
377 ret.pci_address = *pci_address;
378 }
379#endif
380 }
381
382 return ret;
383 }
384
395 const sycl::device &dev, const DeviceProperties &prop) {
396 bool dgpu_capable = false;
397
398 // If CUDA-aware MPI is enabled, and the device is a CUDA device,
399 // then we can use it
400 if (shamcomm::is_direct_comm_aware(shamcomm::get_mpi_cuda_aware_status())
401 && (prop.backend == Backend::CUDA)) {
402 dgpu_capable = true;
403 }
404
405 // Same for ROCm-aware MPI and ROCm devices
406 if (shamcomm::is_direct_comm_aware(shamcomm::get_mpi_rocm_aware_status())
407 && (prop.backend == Backend::ROCM)) {
408 dgpu_capable = true;
409 }
410
411 // And for OpenMP since the data is on host is it by definition aware
412 if (prop.backend == Backend::OPENMP) {
413 dgpu_capable = true;
414 }
415
416 // For other cases we can still force the DGPU state by setting a forced state
417 if (auto forcing = shamcomm::should_force_dgpu_state()) {
418 dgpu_capable = shamcomm::is_direct_comm_aware(*forcing);
419 }
420
421 return DeviceMPIProperties{dgpu_capable};
422 }
423
432 std::vector<sycl::device> get_sycl_device_list() {
433 std::vector<sycl::device> devs; // The list of devices to be returned
434 const auto &Platforms = sycl::platform::get_platforms();
435 for (const auto &Platform : Platforms) {
436 const auto &Devices = Platform.get_devices();
437 for (const auto &Device : Devices) {
438 devs.push_back(Device);
439 }
440 }
441 return devs;
442 }
443
453 Device sycl_dev_to_sham_dev(usize i, const sycl::device &dev) {
454 DeviceProperties prop = fetch_properties(dev); // Get the properties of the device
455 DeviceMPIProperties propmpi = {false}; // Get the MPI properties
456 return Device{
457 i, // The index of the device
458 dev, // The SYCL device
459 prop, // The properties of the device
460 propmpi // The MPI properties of the device
461 };
462 }
463
472 std::vector<std::unique_ptr<Device>> get_device_list() {
473 std::vector<sycl::device> devs = get_sycl_device_list();
474 std::vector<std::unique_ptr<Device>> ret; // The return list of unique pointers to Device
475 ret.reserve(devs.size());
476
477 for (const sycl::device &dev : devs) {
478 usize i = ret.size(); // Get the current index of the device
479 ret.push_back(std::make_unique<Device>(sycl_dev_to_sham_dev(i, dev)));
480 }
481
482 return ret;
483 }
484
486
488 shamcomm::logs::raw_ln(" Device info :");
489 switch (prop.backend) {
490 case sham::Backend::OPENMP : shamcomm::logs::raw_ln(" - Backend : OpenMP"); break;
491 case sham::Backend::CUDA : shamcomm::logs::raw_ln(" - Backend : CUDA"); break;
492 case sham::Backend::ROCM : shamcomm::logs::raw_ln(" - Backend : ROCM"); break;
493 case sham::Backend::UNKNOWN: shamcomm::logs::raw_ln(" - Backend : Unknown"); break;
494 }
495 switch (prop.vendor) {
496 case sham::Vendor::AMD : shamcomm::logs::raw_ln(" - Vendor : AMD"); break;
497 case sham::Vendor::APPLE : shamcomm::logs::raw_ln(" - Vendor : Apple"); break;
498 case sham::Vendor::INTEL : shamcomm::logs::raw_ln(" - Vendor : Intel"); break;
499 case sham::Vendor::NVIDIA : shamcomm::logs::raw_ln(" - Vendor : Nvidia"); break;
500 case sham::Vendor::UNKNOWN: shamcomm::logs::raw_ln(" - Vendor : Unknown"); break;
501 }
502 logger::raw_ln(" - Global mem size :", shambase::readable_sizeof(prop.global_mem_size));
503 logger::raw_ln(
505 logger::raw_ln(
507 logger::raw_ln(" - Local mem size :", shambase::readable_sizeof(prop.local_mem_size));
508 logger::raw_ln(" - Direct MPI capable :", mpi_prop.is_mpi_direct_capable);
509 }
510
511} // namespace sham
#define FETCH_PROPN_FULL(info_, info_type, n)
Fetches a property of a SYCL device (for cases where multiple prop would have the same name)
Definition Device.cpp:180
#define FETCH_PROPN(info_, info_type, n)
Fetches a property of a SYCL device (for cases where multiple prop would have the same name)
Definition Device.cpp:167
#define FETCH_PROP(info_, info_type)
Fetches a property of a SYCL device.
Definition Device.cpp:154
std::uint32_t u32
32 bit unsigned integer
std::size_t usize
size_t alias
Represents a SYCL device.
Definition Device.hpp:147
DeviceMPIProperties mpi_prop
Properties of the device regarding MPI.
Definition Device.hpp:170
sycl::device dev
The SYCL device object.
Definition Device.hpp:157
DeviceProperties prop
Properties of the device.
Definition Device.hpp:165
void update_mpi_prop()
Update the MPI properties of the device.
Definition Device.cpp:485
void print_info()
Print info about the device.
Definition Device.cpp:487
Provide information about MPI capabilities.
namespace for backends this one is named only sham since shambackends is too long to write
DeviceProperties fetch_properties(const sycl::device &dev)
Fetches the properties of a SYCL device.
Definition Device.cpp:198
Device sycl_dev_to_sham_dev(usize i, const sycl::device &dev)
Convert a SYCL device to a shamrock backend device.
Definition Device.cpp:453
std::optional< std::size_t > getPhysicalMemory()
Get the amount of physical memory (RAM) available on the system, in bytes.
Definition sysinfo.cpp:51
std::vector< sycl::device > get_sycl_device_list()
Get a list of all SYCL devices.
Definition Device.cpp:432
DeviceMPIProperties fetch_mpi_properties(const sycl::device &dev, const DeviceProperties &prop)
Fetches the MPI-related properties of a SYCL device.
Definition Device.cpp:394
std::vector< std::unique_ptr< Device > > get_device_list()
Get a list of all available devices.
Definition Device.cpp:472
DeviceType get_device_type(const sycl::device &dev)
Returns the type of a SYCL device.
Definition Device.cpp:144
Backend get_device_backend(const sycl::device &dev)
Returns the type of backend of a SYCL device.
Definition Device.cpp:106
DeviceType
The type of a device.
Definition Device.hpp:67
std::string readable_sizeof(double size)
given a sizeof value return a readble string Example : readable_sizeof(1024*1024*1024) -> "1....
Definition string.hpp:139
void replace_all(std::string &inout, std::string_view what, std::string_view with)
replace all occurence of a search string with another
Definition string.hpp:183
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110
std::string popen_fetch_output(const char *command)
Run a command and return the output.
Definition popen.cpp:23
bool contain_substr(std::string str, std::string what)
Check if a substring is present in a given string.
Definition string.hpp:258
std::optional< std::string > getenv_str_register(const char *env_var, std::string desc)
Get the content of the environment variable if it exist and register it documentation.
Definition env.hpp:70
std::optional< StateMPI_Aware > should_force_dgpu_state()
Should DGPU should be forced.
Definition mpiInfo.cpp:113
StateMPI_Aware get_mpi_cuda_aware_status()
Get the MPI CUDA aware capability.
Definition mpiInfo.cpp:52
StateMPI_Aware get_mpi_rocm_aware_status()
Get the MPI ROCM aware capability.
Definition mpiInfo.cpp:60
constexpr i64 i64_max
i64 max value
Properties of a device.
Definition Device.hpp:84
usize global_mem_size
The amount of global memory on the device in bytes.
Definition Device.hpp:101
usize global_mem_cache_size
The amount of global memory cache on the device in bytes.
Definition Device.hpp:107
std::optional< std::string > pci_address
PCI address of the device.
Definition Device.hpp:131
usize global_mem_cache_line_size
The size of the cache line used by the device in bytes.
Definition Device.hpp:104
Vendor vendor
The vendor of the device.
Definition Device.hpp:86
usize local_mem_size
The amount of shared local memory on the device in bytes.
Definition Device.hpp:110
Backend backend
The backend of the device.
Definition Device.hpp:89