24namespace sham::benchmarks {
35 inline void saxpy(
u32 i,
int n, T a, T *__restrict x, T *__restrict y) {
37 y[i] = a * x[i] + y[i];
71 DeviceScheduler_ptr sched,
77 bool check_correctness) {
81 double seconds = shambase::get_max<double>();
91 "x.get_size() < N\n x.get_size() = {},\n N = {}", x.
get_size(), N));
96 "y.get_size() < N\n y.get_size() = {},\n N = {}", y.
get_size(), N));
99 std::vector<T> y_res = {};
101 for (
int i = 0; i < 5; i++) {
114 auto e = q.
submit(empty_list, [&](sycl::handler &cgh) {
115 cgh.parallel_for(sycl::range<1>{size_t(N)}, [=](sycl::item<1> item) {
117 saxpy(item.get_linear_id(), N, a, x_ptr, y_ptr);
133 T expected = a * init_x + init_y;
135 if (check_correctness) {
137 for (
int i = 0; i < N; i++) {
138 T delt = y_res[i] - expected;
140 if constexpr (std::is_same_v<T, sycl::marray<float, 3>>) {
141 maxError[0] = sham::max(maxError[0], sham::abs(delt[0]));
142 maxError[1] = sham::max(maxError[1], sham::abs(delt[1]));
143 maxError[2] = sham::max(maxError[2], sham::abs(delt[2]));
144 }
else if constexpr (std::is_same_v<T, sycl::marray<float, 4>>) {
145 maxError[0] = sham::max(maxError[0], sham::abs(delt[0]));
146 maxError[1] = sham::max(maxError[1], sham::abs(delt[1]));
147 maxError[2] = sham::max(maxError[2], sham::abs(delt[2]));
148 maxError[3] = sham::max(maxError[3], sham::abs(delt[3]));
150 maxError = sham::max(maxError, sham::abs(delt));
160 .bandwidth = double(N) * load_size * 3 / seconds / 1e9,
161 .byte_used =
u64(N) *
u64(load_size) * 2_u64};
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
Shamrock assertion utility.
#define SHAM_ASSERT(x)
Shorthand for SHAM_ASSERT_NAMED without a message.
A buffer allocated in USM (Unified Shared Memory).
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
void fill(T value, std::array< size_t, 2 > idx_range)
Fill a subpart of the buffer with a given value.
std::vector< T > copy_to_stdvec() const
Copy the content of the buffer to a std::vector.
size_t get_size() const
Gets the number of elements in the buffer.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
Class to manage a list of SYCL events.
void wait()
Wait for all events in the list to be finished.
Class Timer measures the time elapsed since the timer was started.
f64 elapsed_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
void start()
Starts the timer.
void stop()
Stops the timer and stores the elapsed time in nanoseconds.
ExcptTypes make_except_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Create an exception with a message and a location.
void saxpy(u32 i, int n, T a, T *__restrict x, T *__restrict y)
saxpy function for benchmarking.
saxpy_result saxpy_bench(DeviceScheduler_ptr sched, int N, T init_x, T init_y, T a, int load_size, bool check_correctness)
saxpy function for benchmarking.
provide information about the source location
Structure containing the results of a saxpy benchmark.
f64 seconds
Computation time in seconds.
f64 bandwidth
Bandwidth in gibibytes per second.
u64 byte_used
Byte count used in the test.
std::string func_name
Name of the function.