Shamrock/doxygen/algorithm_8cpp_source.html

// -------------------------------------------------------//

//

// SHAMROCK code for hydrodynamics

// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>

// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1

// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information

//

// -------------------------------------------------------//


#include "shamalgs/details/algorithm/algorithm.hpp"

#include "shambase/memory.hpp"

#include "shambackends/DeviceBuffer.hpp"

#include "shambackends/DeviceScheduler.hpp"

#include "shambackends/kernel_call.hpp"


namespace shamalgs::algorithm {


    sycl::buffer<u32> gen_buffer_index(sycl::queue &q, u32 len) {

        return gen_buffer_device(q, len, [](u32 i) -> u32 {

            return i;

        });

    }


    template<class T>


    sycl::buffer<T> index_remap(

        sycl::queue &q, sycl::buffer<T> &buf, sycl::buffer<u32> &index_map, u32 len) {


        sycl::buffer<T> ret(len);


        q.submit([&](sycl::handler &cgh) {

            sycl::accessor in{buf, cgh, sycl::read_only};

            sycl::accessor out{ret, cgh, sycl::write_only, sycl::no_init};

            sycl::accessor permut{index_map, cgh, sycl::read_only};


            cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {

                out[item] = in[permut[item]];

            });

        });


        return std::move(ret);

    }


    template<class T>


    sycl::buffer<T> index_remap_nvar(

        sycl::queue &q, sycl::buffer<T> &buf, sycl::buffer<u32> &index_map, u32 len, u32 nvar) {


        sycl::buffer<T> ret(len * nvar);


        q.submit([&](sycl::handler &cgh) {

            sycl::accessor in{buf, cgh, sycl::read_only};

            sycl::accessor out{ret, cgh, sycl::write_only, sycl::no_init};

            sycl::accessor permut{index_map, cgh, sycl::read_only};


            u32 nvar_loc = nvar;


            cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {

                u32 in_id  = permut[item] * nvar_loc;

                u32 out_id = item.get_linear_id() * nvar_loc;


                for (u32 a = 0; a < nvar_loc; a++) {

                    out[out_id + a] = in[in_id + a];

                }

            });

        });


        return std::move(ret);

    }


    template<class T>

    void index_remap(

        const sham::DeviceScheduler_ptr &sched_ptr,

        sham::DeviceBuffer<T> &source,

        sham::DeviceBuffer<T> &dest,

        sham::DeviceBuffer<u32> &index_map,

        u32 len) {


        sham::DeviceQueue &q = shambase::get_check_ref(sched_ptr).get_queue();


        sham::EventList el;


        const T *in       = source.get_read_access(el);

        T *out            = dest.get_write_access(el);

        const u32 *permut = index_map.get_read_access(el);


        auto e = q.submit(el, [&](sycl::handler &cgh) {

            cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {

                out[item] = in[permut[item]];

            });

        });


        source.complete_event_state(e);

        dest.complete_event_state(e);

        index_map.complete_event_state(e);

    }


    template<class T>

    void index_remap_nvar(

        const sham::DeviceScheduler_ptr &sched_ptr,

        sham::DeviceBuffer<T> &source,

        sham::DeviceBuffer<T> &dest,

        sham::DeviceBuffer<u32> &index_map,

        u32 len,

        u32 nvar) {


        sham::DeviceQueue &q = shambase::get_check_ref(sched_ptr).get_queue();


        sham::EventList el;


        const T *in       = source.get_read_access(el);

        T *out            = dest.get_write_access(el);

        const u32 *permut = index_map.get_read_access(el);


        auto e = q.submit(el, [&](sycl::handler &cgh) {

            u32 nvar_loc = nvar;


            cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) {

                u32 in_id  = permut[item] * nvar_loc;

                u32 out_id = item.get_linear_id() * nvar_loc;


                for (u32 a = 0; a < nvar_loc; a++) {

                    out[out_id + a] = in[in_id + a];

                }

            });

        });


        source.complete_event_state(e);

        dest.complete_event_state(e);

        index_map.complete_event_state(e);

    }


#define XMAC_TYPES                                                                                 \

    X(f32)                                                                                         \

    X(f32_2)                                                                                       \

    X(f32_3)                                                                                       \

    X(f32_4)                                                                                       \

    X(f32_8)                                                                                       \

    X(f32_16)                                                                                      \

    X(f64)                                                                                         \

    X(f64_2)                                                                                       \

    X(f64_3)                                                                                       \

    X(f64_4)                                                                                       \

    X(f64_8)                                                                                       \

    X(f64_16)                                                                                      \

    X(u32)                                                                                         \

    X(u64)                                                                                         \

    X(u32_3)                                                                                       \

    X(u64_3)                                                                                       \

    X(i64_3)                                                                                       \

    X(i64)


#define X(_arg_)                                                                                   \

    template sycl::buffer<_arg_> index_remap(                                                      \

        sycl::queue &q, sycl::buffer<_arg_> &buf, sycl::buffer<u32> &index_map, u32 len);          \

                                                                                                   \

    template sycl::buffer<_arg_> index_remap_nvar(                                                 \

        sycl::queue &q,                                                                            \

        sycl::buffer<_arg_> &buf,                                                                  \

        sycl::buffer<u32> &index_map,                                                              \

        u32 len,                                                                                   \

        u32 nvar);                                                                                 \

                                                                                                   \

    template void index_remap(                                                                     \

        const sham::DeviceScheduler_ptr &sched,                                                    \

        sham::DeviceBuffer<_arg_> &source,                                                         \

        sham::DeviceBuffer<_arg_> &dest,                                                           \

        sham::DeviceBuffer<u32> &index_map,                                                        \

        u32 len);                                                                                  \

                                                                                                   \

    template void index_remap_nvar(                                                                \

        const sham::DeviceScheduler_ptr &sched,                                                    \

        sham::DeviceBuffer<_arg_> &source,                                                         \

        sham::DeviceBuffer<_arg_> &dest,                                                           \

        sham::DeviceBuffer<u32> &index_map,                                                        \

        u32 len,                                                                                   \

        u32 nvar);


    XMAC_TYPES


#undef X


} // namespace shamalgs::algorithm

DeviceBuffer.hpp

DeviceScheduler.hpp

u32
std::uint32_t u32
32 bit unsigned integer
Definition aliases_int.hpp:27

sham::DeviceBuffer
A buffer allocated in USM (Unified Shared Memory).
Definition DeviceBuffer.hpp:106

sham::DeviceBuffer::complete_event_state
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
Definition DeviceBuffer.hpp:368

sham::DeviceBuffer::get_write_access
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
Definition DeviceBuffer.hpp:349

sham::DeviceBuffer::get_read_access
const T * get_read_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{}) const
Get a read-only pointer to the buffer's data.
Definition DeviceBuffer.hpp:331

sham::DeviceQueue
A SYCL queue associated with a device and a context.
Definition DeviceQueue.hpp:32

sham::DeviceQueue::submit
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
Definition DeviceQueue.hpp:101

sham::EventList
Class to manage a list of SYCL events.
Definition EventList.hpp:31

algorithm.hpp
main include file for the shamalgs algorithms

kernel_call.hpp

shamalgs::algorithm
namespace to store algorithms implemented by shamalgs
Definition algorithm.hpp:29

shamalgs::algorithm::index_remap
sycl::buffer< T > index_remap(sycl::queue &q, sycl::buffer< T > &source_buf, sycl::buffer< u32 > &index_map, u32 len)
remap a buffer according to a given index map result[i] = result[index_map[i]]
Definition algorithm.cpp:32

shamalgs::algorithm::gen_buffer_index
sycl::buffer< u32 > gen_buffer_index(sycl::queue &q, u32 len)
generate a buffer such that for i in [0,len[, buf[i] = i
Definition algorithm.cpp:25

shamalgs::algorithm::index_remap_nvar
sycl::buffer< T > index_remap_nvar(sycl::queue &q, sycl::buffer< T > &source_buf, sycl::buffer< u32 > &index_map, u32 len, u32 nvar)
remap a buffer (with multiple variable per index) according to a given index map result[i] = result[i...
Definition algorithm.cpp:51

shamalgs::algorithm::gen_buffer_device
sycl::buffer< typename std::invoke_result_t< Fct, u32 > > gen_buffer_device(sycl::queue &q, u32 len, Fct &&func)
generate a buffer from a lambda expression based on the indexes
Definition algorithm.hpp:65

shambase::get_check_ref
T & get_check_ref(const std::unique_ptr< T > &ptr, SourceLocation loc=SourceLocation())
Takes a std::unique_ptr and returns a reference to the object it holds. It throws a std::runtime_erro...
Definition memory.hpp:110

memory.hpp