24namespace sham::benchmarks {
41 inline void fma_chains(
u32 i,
int nrotation, T y0, T *__restrict in, T *__restrict out) {
55 for (
int j = 0; j < nrotation; j++) {
86 DeviceScheduler_ptr sched,
int N,
f64 time_threshold) {
109 auto run_bench = [&q, &N, &x_ptr, &y_ptr, y0](
u32 nrotation) ->
f64 {
114 auto e = q.
submit(empty_list, [=](sycl::handler &cgh) {
115 cgh.parallel_for(sycl::range<1>{size_t(N)}, [=](sycl::item<1> item) {
116 fma_chains(item.get_linear_id(), nrotation, y0, x_ptr, y_ptr);
128 double ref = run_bench(0);
132 sec = run_bench(nrotation);
134 if (sec >= time_threshold || nrotation >= 256 * 256 * 4) {
146 u64 flop_per_thread =
u64(nrotation) * 2_u64 * 16_u64;
147 double flop_count = double(N) * flop_per_thread;
148 double flops = flop_count / (sec);
154 .nrotations = nrotation};
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
Shamrock assertion utility.
A buffer allocated in USM (Unified Shared Memory).
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
void fill(T value, std::array< size_t, 2 > idx_range)
Fill a subpart of the buffer with a given value.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
Class to manage a list of SYCL events.
void wait()
Wait for all events in the list to be finished.
Class Timer measures the time elapsed since the timer was started.
f64 elapsed_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
void start()
Starts the timer.
void stop()
Stops the timer and stores the elapsed time in nanoseconds.
fma_chains_result fma_chains_bench(DeviceScheduler_ptr sched, int N, f64 time_threshold)
Run the fma_chains benchmark.
void fma_chains(u32 i, int nrotation, T y0, T *__restrict in, T *__restrict out)
Kernel for the fma_chains benchmark.
provide information about the source location
Structure containing the results of an fma_chains benchmark.
f64 flops
Flops per second.
std::string func_name
Name of the function.
f64 seconds
Computation time in seconds.
u32 nrotations
Number of rotation performed.