24namespace sham::benchmarks {
41 inline void fma_chains(
u32 i,
int nrotation, T y0, T *__restrict in, T *__restrict out) {
55 for (
int j = 0; j < nrotation; j++) {
86 DeviceScheduler_ptr sched,
int N,
f64 time_threshold) {
109 auto run_bench = [&q, &N, &x_ptr, &y_ptr, y0](
u32 nrotation) ->
f64 {
114 auto e = q.
submit(empty_list, [=](sycl::handler &cgh) {
115 cgh.parallel_for(sycl::range<1>{size_t(N)}, [=](sycl::item<1> item) {
116 fma_chains(item.get_linear_id(), nrotation, y0, x_ptr, y_ptr);
128 double ref = run_bench(0);
132 sec = run_bench(nrotation);
134 if (sec >= time_threshold || nrotation >= 256 * 256 * 4) {
146 u64 flop_per_thread =
u64(nrotation) * 2_u64 * 16_u64;
147 double flop_count = double(N) * flop_per_thread;
148 double flops = flop_count / (sec);
double f64
Alias for double.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
Shamrock assertion utility.
A buffer allocated in USM (Unified Shared Memory)
void complete_event_state(sycl::event e) const
Complete the event state of the buffer.
T * get_write_access(sham::EventList &depends_list, SourceLocation src_loc=SourceLocation{})
Get a read-write pointer to the buffer's data.
void fill(T value, std::array< size_t, 2 > idx_range)
Fill a subpart of the buffer with a given value.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
Class to manage a list of SYCL events.
void wait()
Wait for all events in the list to be finished.
Class Timer measures the time elapsed since the timer was started.
void end()
Stops the timer and stores the elapsed time in nanoseconds.
f64 elasped_sec() const
Converts the stored nanosecond time to a floating point representation in seconds.
void start()
Starts the timer.
fma_chains_result fma_chains_bench(DeviceScheduler_ptr sched, int N, f64 time_threshold)
Run the fma_chains benchmark.
void fma_chains(u32 i, int nrotation, T y0, T *__restrict in, T *__restrict out)
Kernel for the fma_chains benchmark.
provide information about the source location
Structure containing the results of an fma_chains benchmark.
f64 flops
Flops per second.
std::string func_name
Name of the function.
f64 seconds
Computation time in seconds.
u32 nrotations
Number of rotation performed.
constexpr const char * function_name() const noexcept
Returns the function name of the source location.