Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
get_device_clock.hpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
10#pragma once
11
20
22// Get device internal clock
24
25#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
26 #define SHAMROCK_INTRISICS_GET_DEVICE_CLOCK_AVAILABLE
27
28namespace sham {
29 // yeah ok what the heck is this
30 // I don't know how to call cuda functions from intel/oneapi device code
31 // so I'm just going to use the ptx intrinsics ...
32 // But assembly is a piece of crap, so i dug some weird intrinsics out clang's
33 // not really documented stuff, like try to google this function you will have fun
34 DEVICE_ATTRIBUTE_ON_ACPP inline u64 get_device_clock() {
35 #if __has_builtin(__nvvm_read_ptx_sreg_globaltimer)
36 return __nvvm_read_ptx_sreg_globaltimer();
37 #else
38 u64 clock;
39 asm("mov.u64 %0, %%globaltimer;" : "=l"(clock));
40 return clock;
41 #endif
42 }
43} // namespace sham
44
45#elif defined(_IS_ACPP_SSCP)
46 #define SHAMROCK_INTRISICS_GET_DEVICE_CLOCK_AVAILABLE
47
48namespace sham {
49 inline u64 get_device_clock() {
50 u64 ret_val = -1;
51
52 namespace jit = sycl::AdaptiveCpp_jit;
53
54 __acpp_if_target_sscp(
55 jit::compile_if(
56 jit::reflect<jit::reflection_query::compiler_backend>()
57 == jit::compiler_backend::host,
58 [&]() {
59 ret_val = std::chrono::high_resolution_clock::now().time_since_epoch().count();
60 }););
61
62 #if __has_builtin(__nvvm_read_ptx_sreg_globaltimer)
63 __acpp_if_target_sscp(
64 jit::compile_if(
65 jit::reflect<jit::reflection_query::target_vendor_id>() == jit::vendor_id::nvidia,
66 [&]() {
67 ret_val = __nvvm_read_ptx_sreg_globaltimer();
68 }););
69
70 #else
71 __acpp_if_target_sscp(
72 jit::compile_if(
73 jit::reflect<jit::reflection_query::target_vendor_id>() == jit::vendor_id::nvidia,
74 [&]() {
75 u64 clock;
76 asm("mov.u64 %0, %%globaltimer;" : "=l"(clock));
77 ret_val = clock;
78 }););
79 #endif
80
81 return ret_val;
82 }
83} // namespace sham
84
85#elif defined(_IS_ACPP_SMCP_HOST)
86 #define SHAMROCK_INTRISICS_GET_DEVICE_CLOCK_AVAILABLE
87
88namespace sham {
89 inline u64 get_device_clock() {
90 return std::chrono::high_resolution_clock::now().time_since_epoch().count();
91 }
92} // namespace sham
93
94#else
95namespace sham {
101} // namespace sham
102#endif
std::uint64_t u64
64 bit unsigned integer
This file implement the GPU core timeline tool from A. Richermoz, F. Neyret 2024.
This file implement the GPU core timeline tool from A. Richermoz, F. Neyret 2024.
namespace for backends this one is named only sham since shambackends is too long to write
u64 get_device_clock()
Return the number of clock cycles elapsed since an arbitrary starting point on the device.