Shamrock/doxygen/sycl__utils_8hpp_source.html

// -------------------------------------------------------//

//

// SHAMROCK code for hydrodynamics

// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>

// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1

// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information

//

// -------------------------------------------------------//


#pragma once


#include "shambase/exception.hpp"

#include "shambase/integer.hpp"

#include "shambase/stacktrace.hpp"

#include "shambackends/sycl.hpp"

#include "shambackends/vec.hpp"

#include "shamcomm/logs.hpp"

#include <stdexcept>


#ifdef SHAMROCK_USE_NVTX

    #include <nvtx3/nvtx3.hpp>

#endif


namespace shambase {


    template<class T>


    void check_buffer_size(

        sycl::buffer<T> &buf, u64 max_range, const SourceLocation loc = SourceLocation()) {

        if (buf.size() < max_range) {

            throw make_except_with_loc<std::invalid_argument>("buffer is too small", loc);

        }

    }


    inline std::string getDevice_type(const sycl::device &Device) {

        auto DeviceType = Device.get_info<sycl::info::device::device_type>();

        switch (DeviceType) {

        case sycl::info::device_type::cpu        : return "CPU";

        case sycl::info::device_type::gpu        : return "GPU";

        case sycl::info::device_type::host       : return "HOST";

        case sycl::info::device_type::accelerator: return "ACCELERATOR";

        default                                  : return "UNKNOWN";

        }

    }


    inline sycl::nd_range<1> make_range(u32 length, const u32 group_size = 32) {

        u32 group_cnt = shambase::group_count(length, group_size);

        u32 len       = group_cnt * group_size;

        return sycl::nd_range<1>{len, group_size};

    }


    enum ParallelForWrapMode { PARALLEL_FOR, PARALLEL_FOR_ROUND, ND_RANGE };


#ifdef SHAMROCK_LOOP_DEFAULT_PARALLEL_FOR

    constexpr ParallelForWrapMode default_loop_mode = PARALLEL_FOR;

#endif


#ifdef SHAMROCK_LOOP_DEFAULT_PARALLEL_FOR_ROUND

    constexpr ParallelForWrapMode default_loop_mode = PARALLEL_FOR_ROUND;

#endif


#ifdef SHAMROCK_LOOP_DEFAULT_ND_RANGE

    constexpr ParallelForWrapMode default_loop_mode = ND_RANGE;

#endif


    constexpr u32 default_gsize    = SHAMROCK_LOOP_GSIZE;

    constexpr u32 default_gsize_2d = 16;

    constexpr u32 default_gsize_3d = 4;


    template<

        u32 group_size           = default_gsize,

        ParallelForWrapMode mode = default_loop_mode,

        class LambdaKernel>

    inline void parallel_for(sycl::handler &cgh, u32 length, const char *name, LambdaKernel &&ker) {


#ifdef SHAMROCK_USE_NVTX

        nvtxRangePush(name);

#endif


        shamlog_debug_sycl_ln("SYCL", shambase::format("parallel_for {} N={}", name, length));


        if constexpr (mode == PARALLEL_FOR) {


            cgh.parallel_for(sycl::range<1>{length}, [=](sycl::item<1> id) {

                ker(id.get_linear_id());

            });


        } else if constexpr (mode == PARALLEL_FOR_ROUND) {


            u32 len = shambase::group_count(length, group_size) * group_size;


            cgh.parallel_for(sycl::range<1>{len}, [=](sycl::item<1> id) {

                u64 gid = id.get_linear_id();

                if (gid >= length)

                    return;


                ker(gid);

            });


        } else if constexpr (mode == ND_RANGE) {


            cgh.parallel_for(make_range(length, group_size), [=](sycl::nd_item<1> id) {

                u64 gid = id.get_global_linear_id();

                if (gid >= length)

                    return;


                ker(gid);

            });


        } else {

            throw_unimplemented();

        }


#ifdef SHAMROCK_USE_NVTX

        nvtxRangePop();

#endif

    }


    template<

        u32 group_size           = default_gsize_2d,

        ParallelForWrapMode mode = default_loop_mode,

        class LambdaKernel>

    inline void parallel_for_2d(

        sycl::handler &cgh, u32 length_x, u32 length_y, const char *name, LambdaKernel &&ker) {


#ifdef SHAMROCK_USE_NVTX

        nvtxRangePush(name);

#endif


        shamlog_debug_sycl_ln(

            "SYCL", shambase::format("parallel_for {} N={} {}", name, length_x, length_y));


        if constexpr (mode == PARALLEL_FOR) {


            cgh.parallel_for(sycl::range<2>{length_x, length_y}, [=](sycl::item<2> id) {

                ker(id.get_id(0), id.get_id(1));

            });


        } else if constexpr (mode == PARALLEL_FOR_ROUND) {


            u32 len_x = shambase::group_count(length_x, group_size) * group_size;

            u32 len_y = shambase::group_count(length_y, group_size) * group_size;


            cgh.parallel_for(sycl::range<2>{len_x, len_y}, [=](sycl::item<2> id) {

                if (id.get_id(0) >= length_x || id.get_id(1) >= length_y)

                    return;


                ker(id.get_id(0), id.get_id(1));

            });


        } else if constexpr (mode == ND_RANGE) {


            sycl::nd_range<1> rx = make_range(length_x, group_size);

            sycl::nd_range<1> ry = make_range(length_y, group_size);


            sycl::range<2> tmp_s{rx.get_global_range().size(), ry.get_global_range().size()};

            sycl::range<2> tmp_g{rx.get_group_range().size(), ry.get_group_range().size()};


            cgh.parallel_for(sycl::nd_range<2>{tmp_s, tmp_g}, [=](sycl::nd_item<2> id) {

                if (id.get_global_id(0) >= length_x || id.get_global_id(1) >= length_y)

                    return;


                ker(id.get_global_id(0), id.get_global_id(1));

            });


        } else {

            throw_unimplemented();

        }


#ifdef SHAMROCK_USE_NVTX

        nvtxRangePop();

#endif

    }


    template<

        u32 group_size           = default_gsize_3d,

        ParallelForWrapMode mode = default_loop_mode,

        class LambdaKernel>

    inline void parallel_for_3d(

        sycl::handler &cgh,

        u32 length_x,

        u32 length_y,

        u32 length_z,

        const char *name,

        LambdaKernel &&ker) {


#ifdef SHAMROCK_USE_NVTX

        nvtxRangePush(name);

#endif


        shamlog_debug_sycl_ln(

            "SYCL",

            shambase::format("parallel_for {} N={} {} {}", name, length_x, length_y, length_z));


        if constexpr (mode == PARALLEL_FOR) {


            cgh.parallel_for(sycl::range<3>{length_x, length_y, length_z}, [=](sycl::item<3> id) {

                ker(id.get_id(0), id.get_id(1), id.get_id(2));

            });


        } else if constexpr (mode == PARALLEL_FOR_ROUND) {


            u32 len_x = shambase::group_count(length_x, group_size) * group_size;

            u32 len_y = shambase::group_count(length_y, group_size) * group_size;

            u32 len_z = shambase::group_count(length_z, group_size) * group_size;


            cgh.parallel_for(sycl::range<3>{len_x, len_y, len_z}, [=](sycl::item<3> id) {

                if (id.get_id(0) >= length_x || id.get_id(1) >= length_y

                    || id.get_id(2) >= length_z)

                    return;


                ker(id.get_id(0), id.get_id(1), id.get_id(2));

            });


        } else if constexpr (mode == ND_RANGE) {


            sycl::nd_range<1> rx = make_range(length_x, group_size);

            sycl::nd_range<1> ry = make_range(length_y, group_size);

            sycl::nd_range<1> rz = make_range(length_z, group_size);


            sycl::range<3> tmp_s{

                rx.get_global_range().size(),

                ry.get_global_range().size(),

                rz.get_global_range().size()};

            sycl::range<3> tmp_g{

                rx.get_group_range().size(),

                ry.get_group_range().size(),

                rz.get_group_range().size()};


            cgh.parallel_for(sycl::nd_range<3>{tmp_s, tmp_g}, [=](sycl::nd_item<3> id) {

                if (id.get_global_id(0) >= length_x || id.get_global_id(1) >= length_y

                    || id.get_global_id(2) >= length_z)

                    return;


                ker(id.get_global_id(0), id.get_global_id(1), id.get_global_id(2));

            });


        } else {

            throw_unimplemented();

        }


#ifdef SHAMROCK_USE_NVTX

        nvtxRangePop();

#endif

    }


    template<ParallelForWrapMode mode = default_loop_mode, class LambdaKernel>

    inline void parallel_for_gsize(

        sycl::handler &cgh, u32 length, u32 group_size, const char *name, LambdaKernel &&ker) {


#ifdef SHAMROCK_USE_NVTX

        nvtxRangePush(name);

#endif


        if constexpr (mode == PARALLEL_FOR) {


            cgh.parallel_for(sycl::range<1>{length}, [=](sycl::item<1> id) {

                ker(id.get_linear_id());

            });


        } else if constexpr (mode == PARALLEL_FOR_ROUND) {


            u32 len = shambase::group_count(length, group_size) * group_size;


            cgh.parallel_for(sycl::range<1>{len}, [=](sycl::item<1> id) {

                u64 gid = id.get_linear_id();

                if (gid >= length)

                    return;


                ker(gid);

            });


        } else if constexpr (mode == ND_RANGE) {


            cgh.parallel_for(make_range(length, group_size), [=](sycl::nd_item<1> id) {

                u64 gid = id.get_global_linear_id();

                if (gid >= length)

                    return;


                ker(gid);

            });


        } else {

            throw_unimplemented();

        }


#ifdef SHAMROCK_USE_NVTX

        nvtxRangePop();

#endif

    }


    inline void check_queue_state(sycl::queue &q, SourceLocation loc = SourceLocation()) {

        shamlog_debug_sycl_ln("SYCL", "checking queue state", loc.format_one_line());

        q.wait_and_throw();

        shamlog_debug_sycl_ln("SYCL", "checking queue state : OK !");

    }


} // namespace shambase

u32
std::uint32_t u32
32 bit unsigned integer
Definition aliases_int.hpp:27

u64
std::uint64_t u64
64 bit unsigned integer
Definition aliases_int.hpp:26

exception.hpp
This header file contains utility functions related to exception handling in the code.

integer.hpp

shambase
namespace for basic c++ utilities
Definition sycl_builtins.hpp:25

shambase::group_count
constexpr u32 group_count(u32 len, u32 group_size)
Calculates the number of groups based on the length and group size.
Definition integer.hpp:125

shambase::throw_with_loc
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
Definition exception.hpp:132

shambase::make_range
sycl::nd_range< 1 > make_range(u32 length, const u32 group_size=32)
Generate a sycl nd range out of a group size and length.
Definition sycl_utils.hpp:74

shambase::check_buffer_size
void check_buffer_size(sycl::buffer< T > &buf, u64 max_range, const SourceLocation loc=SourceLocation())
check that the size of a sycl buffer is below or equal to the value of max range throw if it is not t...
Definition sycl_utils.hpp:43

shambase::getDevice_type
std::string getDevice_type(const sycl::device &Device)
Get the Device Type Name.
Definition sycl_utils.hpp:56

shambase::throw_unimplemented
void throw_unimplemented(SourceLocation loc=SourceLocation{})
Throw a std::runtime_error saying that the function is unimplemented.
Definition exception.hpp:153

logs.hpp

stacktrace.hpp
This file contains the definition for the stacktrace related functionality.

SourceLocation
provide information about the source location
Definition SourceLocation.hpp:30

sycl.hpp

vec.hpp