71 DeviceScheduler_ptr sched,
77 bool check_correctness) {
89 if (x.get_size() < N) {
91 "x.get_size() < N\n x.get_size() = {},\n N = {}", x.get_size(), N));
94 if (y.get_size() < N) {
96 "y.get_size() < N\n y.get_size() = {},\n N = {}", y.get_size(), N));
99 std::vector<T> y_res = {};
101 for (
int i = 0; i < 5; i++) {
105 auto x_ptr = x.get_write_access(depends_list);
106 auto y_ptr = y.get_write_access(depends_list);
114 auto e = q.
submit(empty_list, [&](sycl::handler &cgh) {
115 cgh.parallel_for(sycl::range<1>{size_t(N)}, [=](sycl::item<1> item) {
117 saxpy(item.get_linear_id(), N, a, x_ptr, y_ptr);
123 x.complete_event_state(sycl::event{});
124 y.complete_event_state(sycl::event{});
129 y_res = y.copy_to_stdvec();
133 T expected = a * init_x + init_y;
135 if (check_correctness) {
137 for (
int i = 0; i < N; i++) {
138 T delt = y_res[i] - expected;
140 if constexpr (std::is_same_v<T, sycl::marray<float, 3>>) {
141 maxError[0] = sham::max(maxError[0], sham::abs(delt[0]));
142 maxError[1] = sham::max(maxError[1], sham::abs(delt[1]));
143 maxError[2] = sham::max(maxError[2], sham::abs(delt[2]));
144 }
else if constexpr (std::is_same_v<T, sycl::marray<float, 4>>) {
145 maxError[0] = sham::max(maxError[0], sham::abs(delt[0]));
146 maxError[1] = sham::max(maxError[1], sham::abs(delt[1]));
147 maxError[2] = sham::max(maxError[2], sham::abs(delt[2]));
148 maxError[3] = sham::max(maxError[3], sham::abs(delt[3]));
150 maxError = sham::max(maxError, sham::abs(delt));
160 double(N) * load_size * 3 / seconds / 1e9,
161 u64(N) *
u64(load_size) * 2_u64};
void saxpy(u32 i, int n, T a, T *__restrict x, T *__restrict y)
saxpy function for benchmarking.
saxpy_result saxpy_bench(DeviceScheduler_ptr sched, int N, T init_x, T init_y, T a, int load_size, bool check_correctness)
saxpy function for benchmarking.