Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
saxpy.hpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
10#pragma once
11
18#include "shambase/assert.hpp"
19#include "shambase/time.hpp"
22#include "shambackends/math.hpp"
23
24namespace sham::benchmarks {
25
34 template<class T>
35 inline void saxpy(u32 i, int n, T a, T *__restrict x, T *__restrict y) {
36 if (i < n)
37 y[i] = a * x[i] + y[i];
38 }
39
51
69 template<class T>
71 DeviceScheduler_ptr sched,
72 int N,
73 T init_x,
74 T init_y,
75 T a,
76 int load_size,
77 bool check_correctness) {
78
79 sham::DeviceQueue &q = sched->get_queue();
80
81 double seconds = shambase::get_max<double>();
82
83 sham::DeviceBuffer<T> x{size_t(N), sched};
84 sham::DeviceBuffer<T> y{size_t(N), sched};
85
86 x.fill(init_x);
87 y.fill(init_y);
88
89 if (x.get_size() < N) {
91 "x.get_size() < N\n x.get_size() = {},\n N = {}", x.get_size(), N));
92 }
93
94 if (y.get_size() < N) {
96 "y.get_size() < N\n y.get_size() = {},\n N = {}", y.get_size(), N));
97 }
98
99 std::vector<T> y_res = {};
100
101 for (int i = 0; i < 5; i++) {
102
103 sham::EventList depends_list;
104
105 auto x_ptr = x.get_write_access(depends_list);
106 auto y_ptr = y.get_write_access(depends_list);
107
108 depends_list.wait();
109
110 sham::EventList empty_list{};
111
113 t.start();
114 auto e = q.submit(empty_list, [&](sycl::handler &cgh) {
115 cgh.parallel_for(sycl::range<1>{size_t(N)}, [=](sycl::item<1> item) {
116 // printf("%d\n", item.get_linear_id());
117 saxpy(item.get_linear_id(), N, a, x_ptr, y_ptr);
118 });
119 });
120 e.wait();
121 t.end();
122
123 x.complete_event_state(sycl::event{});
124 y.complete_event_state(sycl::event{});
125
126 seconds = sham::min(seconds, t.elasped_sec());
127
128 if (i == 0) {
129 y_res = y.copy_to_stdvec();
130 }
131 }
132
133 T expected = a * init_x + init_y;
134
135 if (check_correctness) {
136 T maxError = {};
137 for (int i = 0; i < N; i++) {
138 T delt = y_res[i] - expected;
139
140 if constexpr (std::is_same_v<T, sycl::marray<float, 3>>) {
141 maxError[0] = sham::max(maxError[0], sham::abs(delt[0]));
142 maxError[1] = sham::max(maxError[1], sham::abs(delt[1]));
143 maxError[2] = sham::max(maxError[2], sham::abs(delt[2]));
144 } else if constexpr (std::is_same_v<T, sycl::marray<float, 4>>) {
145 maxError[0] = sham::max(maxError[0], sham::abs(delt[0]));
146 maxError[1] = sham::max(maxError[1], sham::abs(delt[1]));
147 maxError[2] = sham::max(maxError[2], sham::abs(delt[2]));
148 maxError[3] = sham::max(maxError[3], sham::abs(delt[3]));
149 } else {
150 maxError = sham::max(maxError, sham::abs(delt));
151 }
152 }
153
154 SHAM_ASSERT(sham::equals(maxError, T{}));
155 }
156
157 return {
159 seconds,
160 double(N) * load_size * 3 / seconds / 1e9,
161 u64(N) * u64(load_size) * 2_u64};
162 }
163
164} // namespace sham::benchmarks
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
Shamrock assertion utility.
#define SHAM_ASSERT(x)
Shorthand for SHAM_ASSERT_NAMED without a message.
Definition assert.hpp:67
A buffer allocated in USM (Unified Shared Memory)
void fill(T value, std::array< size_t, 2 > idx_range)
Fill a subpart of the buffer with a given value.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
Class to manage a list of SYCL events.
Definition EventList.hpp:31
void wait()
Wait for all events in the list to be finished.
Definition EventList.hpp:57
Class Timer measures the time elapsed since the timer was started.
Definition time.hpp:96
void end()
Stops the timer and stores the elapsed time in nanoseconds.
Definition time.hpp:111
f64 elasped_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
Definition time.hpp:123
void start()
Starts the timer.
Definition time.hpp:106
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
void saxpy(u32 i, int n, T a, T *__restrict x, T *__restrict y)
saxpy function for benchmarking.
Definition saxpy.hpp:35
saxpy_result saxpy_bench(DeviceScheduler_ptr sched, int N, T init_x, T init_y, T a, int load_size, bool check_correctness)
saxpy function for benchmarking.
Definition saxpy.hpp:70
provide information about the source location
Structure containing the results of a saxpy benchmark.
Definition saxpy.hpp:41
f64 seconds
Computation time in seconds.
Definition saxpy.hpp:45
f64 bandwidth
Bandwidth in gibibytes per second.
Definition saxpy.hpp:47
u64 byte_used
Byte count used in the test.
Definition saxpy.hpp:49
std::string func_name
Name of the function.
Definition saxpy.hpp:43
constexpr const char * function_name() const noexcept
Returns the function name of the source location.