51namespace shamsys::instance::details {
53 void print_device_list() {
58 for_each_device([&](
u32 key_global,
const sycl::platform &plat,
const sycl::device &dev) {
59 auto PlatformName = plat.get_info<sycl::info::platform::name>();
60 auto DeviceName = dev.get_info<sycl::info::device::name>();
67 "| {:>4} | {:>2} | {:>29.29} | {:>24.24} | {:>6} |",
80 std::string
print =
"Cluster SYCL Info : \n";
81 print += (
"----------------------------------------------------------------------------"
83 print += (
"| rank | id | Device name | Platform name | "
85 print += (
"----------------------------------------------------------------------------"
88 print += (
"----------------------------------------------------------------------------"
90 printf(
"%s\n",
print.data());
98 bool initialized =
false;
100 std::shared_ptr<sham::Device> device_compute;
101 std::shared_ptr<sham::Device> device_alt;
103 std::shared_ptr<sham::DeviceContext> ctx_compute;
104 std::shared_ptr<sham::DeviceContext> ctx_alt;
106 std::shared_ptr<sham::DeviceScheduler> sched_compute;
107 std::shared_ptr<sham::DeviceScheduler> sched_alt;
109 std::string callback_mem_perf_info() {
112 return "Memory usage & performance info:\n"
113 + sham::details::log_mem_perf_info(sched_compute);
116 void init_device_scheduling() {
118 ctx_compute = std::make_shared<sham::DeviceContext>(device_compute);
119 ctx_alt = std::make_shared<sham::DeviceContext>(device_alt);
121 sched_compute = std::make_shared<sham::DeviceScheduler>(ctx_compute);
122 sched_alt = std::make_shared<sham::DeviceScheduler>(ctx_alt);
124 test_device_scheduler(sched_compute);
125 test_device_scheduler(sched_alt);
127 shambase::add_callstack_gen_info_generator(callback_mem_perf_info);
135 void init_queues(std::string search_key) {
140 device_alt = std::move(devs.device_alt);
141 device_compute = std::move(devs.device_compute);
143 init_device_scheduling();
150 device_compute.reset();
156 sched_compute.reset();
161namespace shamsys::instance {
163 u32 compute_queue_eu_count = 64;
165 u32 get_compute_queue_eu_count(
u32 id) {
return compute_queue_eu_count; }
172 std::string print_buf =
"";
174 std::optional<u32> loc = shamcomm::node_local_rank();
176 print_buf = shambase::format(
177 "| {:>4} | {:>8} | {:>12} | {:>16} |\n",
183 print_buf = shambase::format(
184 "| {:>4} | {:>8} | {:>12} | {:>16} |\n",
192 shamalgs::collective::gather_str(print_buf, recv);
195 std::string print =
"Queue map : \n";
196 print += (
"----------------------------------------------------\n");
197 print += (
"| rank | local id | alt queue id | compute queue id |\n");
198 print += (
"----------------------------------------------------\n");
200 print += (
"----------------------------------------------------");
201 printf(
"%s\n\n", print.data());
207 void print_device_list_debug() {
210 std::string print_buf =
"device avail : \n";
213 [&](
u32 key_global,
const sycl::platform &plat,
const sycl::device &dev) {
214 auto PlatformName = plat.get_info<sycl::info::platform::name>();
215 auto DeviceName = dev.get_info<sycl::info::device::name>();
217 std::string devname = DeviceName;
218 std::string platname = PlatformName;
219 std::string devtype =
"truc";
221 print_buf += std::to_string(key_global) +
" " + devname +
" " + platname +
"\n";
224 shamlog_debug_sycl_ln(
"InitSYCL", print_buf);
229 void start_sycl_auto(std::string search_key) {
232 tmp::print_device_list_debug();
234 if (syclinit::initialized) {
235 throw ShamsysInstanceException(
"Sycl is already initialized");
239 shamlog_debug_ln(
"Sys",
"start sycl queues ...");
242 syclinit::init_queues(search_key);
249 mpi::init(&mpi_info.argc, &mpi_info.argv);
266 error = mpi::comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_ARE_FATAL);
268 if (error != MPI_SUCCESS) {
275 "[{:03}]: \x1B[32mMPI_Init : node n {:03} | world size : {} | name = {}\033[0m",
281 mpi::barrier(MPI_COMM_WORLD);
284 shamlog_debug_ln(
"NodeInstance",
"------------ MPI init ok ------------");
285 shamlog_debug_ln(
"NodeInstance",
"creating MPI type for interop");
287 create_sycl_mpi_types();
289 shamlog_debug_ln(
"NodeInstance",
"MPI type for interop created");
290 shamlog_debug_ln(
"NodeInstance",
"------------ MPI / SYCL init ok ------------");
292 mpidtypehandler::init_mpidtype();
296 "SHAM_MPI_INIT_STRATEGY",
298 "Select the MPI init strategy (mpifirst, syclfirst) [default: syclfirst]");
302 if (init_strategy ==
"syclfirst") {
303 start_sycl_auto(search_key);
305 }
else if (init_strategy ==
"mpifirst") {
307 start_sycl_auto(search_key);
316 void init(
int argc,
char *argv[]) {
318 std::optional<shamcomm::StateMPI_Aware> forced_state = std::nullopt;
338 logger::err_ln(
"NodeInstance",
"Please specify a sycl configuration (--sycl-cfg x:x)");
346 mpidtypehandler::free_mpidtype();
348 free_sycl_mpi_types();
351 logger::print_faint_row();
352 logger::raw_ln(
" - MPI finalize \nExiting ...\n");
353 logger::raw_ln(
" Hopefully it was quick :')\n");
363 syclinit::finalize();
378 std::shared_ptr<sham::DeviceScheduler> get_compute_scheduler_ptr() {
379 return syclinit::sched_compute;
382 std::shared_ptr<sham::DeviceScheduler> get_alt_scheduler_ptr() {
return syclinit::sched_alt; }
384 void print_device_info(
const sycl::device &Device) {
385 std::cout <<
" - " << Device.get_info<sycl::info::device::name>() <<
" "
387 Device.get_info<sycl::info::device::global_mem_size>())
391 void print_device_list() { details::print_device_list(); }
401 void check_dgpu_available() {
403 using namespace shambase::term_colors;
405 u32 loc_use_direct_gpu
408 u32 num_dgpu_use = shamalgs::collective::allreduce_sum(loc_use_direct_gpu);
415 }
else if (num_dgpu_use > 0) {
418 " - MPI use Direct Comm : {} ({} of {})",
424 shambase::format(
" - MPI use Direct Comm : {}",
col8b_red() +
"No" +
reset()));
Shamrock communication buffers.
This header does the MPI include and wrap MPI calls.
Header file describing a Node Instance.
void start_mpi(MPIInitInfo mpi_info)
Start MPI.
void print_queue_map()
Print SYCL queue map.
sycl::queue & get_compute_queue(u32 id=0)
void init_sycl_mpi(std::string search_key, MPIInitInfo mpi_info)
Start SYCL & MPI.
sycl::queue & get_alt_queue(u32 id=0)
Get the alternative queue.
bool is_initialized()
to check whether the NodeInstance is initialized
void close()
close the NodeInstance Aka : Finalize both MPI & SYCL
void close_mpi()
Finalize MPI.
std::uint32_t u32
32 bit unsigned integer
Class to manage the scheduling of kernels on a device.
Exception type for the NodeInstance.
This header file contains utility functions related to exception handling in the code.
MPI string gather / allgather helpers (declarations; implementations in shamalgs/src/collective/gathe...
void gather_str(const std::string &send_vec, std::string &recv_vec)
Gathers a string from all nodes and store the result in a std::string.
Functions related to the MPI communicator.
Provide information about MPI capabilities.
Use this header to include MPI properly.
void print_buf(sycl::buffer< T > &buf, u32 len, u32 column_count, std::string_view fmt)
Print the content of a sycl::buffer
void print()
Prints a log message with no arguments.
std::string readable_sizeof(double size)
given a sizeof value return a readble string Example : readable_sizeof(1024*1024*1024) -> "1....
std::string trunc_str(std::string s, u32 max_len)
Truncate a string to a specified length, adding an ellipsis if necessary.
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
std::string getDevice_type(const sycl::device &Device)
Get the Device Type Name.
void throw_unimplemented(SourceLocation loc=SourceLocation{})
Throw a std::runtime_error saying that the function is unimplemented.
std::string getenv_str_default_register(const char *env_var, std::string default_val, std::string desc)
Get the content of the environment variable if it exist and register it documentation,...
bool has_option(const std::string_view &option_name)
Check if an option is present.
std::string_view get_option(const std::string_view &option_name)
Get the value of an option.
void print_mpi_comm_info()
Print the MPI communicator infos.
void fetch_world_info()
Gets the information about the MPI communicator.
i32 world_rank()
Gives the rank of the current process in the MPI communicator.
@ ForcedYes
Feature forced on by the user.
@ ForcedNo
Feature forced off by the user.
i32 world_size()
Gives the size of the MPI communicator.
void fetch_mpi_capabilities(std::optional< StateMPI_Aware > forced_state)
Fetch the MPI capabilities.
std::string get_process_name()
Get the process name.
bool is_mpi_initialized()
Check if MPI is initialized.
void print_mpi_capabilities()
Print the MPI capabilities.
DeviceSelectRet_t select_devices(std::string sycl_cfg)
Select the devices for the queues.
void change_log_format()
Change the log formatter according to the SHAMLOGFORMATTER and SHAMLOG_ERR_ON_EXCEPT environment vari...
u32 for_each_device(std::function< void(u32, const sycl::platform &, const sycl::device &)> fct)
Iterate over all SYCL devices and perform a given function.
This file contains the definition for the stacktrace related functionality.
Struct containing MPI Init informations Usage.
const std::string reset()
Get the reset terminal escape char.
const std::string col8b_yellow()
Get the yellow terminal escape char.
const std::string col8b_green()
Get the green terminal escape char.
const std::string col8b_red()
Get the red terminal escape char.
This file contains tty info getters.
Functions related to the MPI communicator.