Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
math.hpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
10#pragma once
11
20#include "shambase/integer.hpp"
22#include "shambackends/sycl.hpp"
23#include "shambackends/vec.hpp"
24#include <vector>
25
26namespace sham::syclbackport {
27
28#ifndef SYCL2020_FEATURE_ISINF
29 #ifdef SYCL_COMP_ACPP
30 template<class T>
31 HIPSYCL_UNIVERSAL_TARGET bool fallback_is_inf(T value) {
32
33 __hipsycl_if_target_host(return std::isinf(value);)
34
35 __hipsycl_if_target_hiplike(return isinf(value);)
36
37 __hipsycl_if_target_spirv(static_assert(false, "this case is not implemented");)
38 }
39 #endif
40#endif
41
42} // namespace sham::syclbackport
43
44namespace sham {
45
47 // product_accumulate
49
50 template<class T>
51 inline constexpr T product_accumulate(T v) noexcept {
52 return v;
53 }
54
55 template<class T, int n, std::enable_if_t<n == 2, int> = 0>
56 inline constexpr T product_accumulate(sycl::vec<T, n> v) noexcept {
57 return v.x() * v.y();
58 }
59
60 template<class T, int n, std::enable_if_t<n == 3, int> = 0>
61 inline constexpr T product_accumulate(sycl::vec<T, n> v) noexcept {
62 return v.x() * v.y() * v.z();
63 }
64
65 template<class T, int n, std::enable_if_t<n == 4, int> = 0>
66 inline constexpr T product_accumulate(sycl::vec<T, n> v) noexcept {
67 return v.x() * v.y() * v.z() * v.w();
68 }
69
70 template<class T, int n, std::enable_if_t<n == 8, int> = 0>
71 inline constexpr T product_accumulate(sycl::vec<T, n> v) noexcept {
72 return v.s0() * v.s1() * v.s2() * v.s3() * v.s4() * v.s5() * v.s6() * v.s7();
73 }
74
75 template<class T, int n, std::enable_if_t<n == 16, int> = 0>
76 inline constexpr T product_accumulate(sycl::vec<T, n> v) noexcept {
77 return v.s0() * v.s1() * v.s2() * v.s3() * v.s4() * v.s5() * v.s6() * v.s7() * v.s8()
78 * v.s9() * v.sA() * v.sB() * v.sC() * v.sD() * v.sE() * v.sF();
79 }
80
81 template<class T>
82 inline constexpr T sum_accumulate(T v) noexcept {
83 return v;
84 }
85
86 template<class T, int n, std::enable_if_t<n == 2, int> = 0>
87 inline constexpr T sum_accumulate(sycl::vec<T, n> v) noexcept {
88 return v.x() + v.y();
89 }
90
91 template<class T, int n, std::enable_if_t<n == 3, int> = 0>
92 inline constexpr T sum_accumulate(sycl::vec<T, n> v) noexcept {
93 return v.x() + v.y() + v.z();
94 }
95
96 template<class T, int n, std::enable_if_t<n == 4, int> = 0>
97 inline constexpr T sum_accumulate(sycl::vec<T, n> v) noexcept {
98 return v.x() + v.y() + v.z() + v.w();
99 }
100
101 template<class T, int n, std::enable_if_t<n == 8, int> = 0>
102 inline constexpr T sum_accumulate(sycl::vec<T, n> v) noexcept {
103 return v.s0() + v.s1() + v.s2() + v.s3() + v.s4() + v.s5() + v.s6() + v.s7();
104 }
105
106 template<class T, int n, std::enable_if_t<n == 16, int> = 0>
107 inline constexpr T sum_accumulate(sycl::vec<T, n> v) noexcept {
108 return v.s0() + v.s1() + v.s2() + v.s3() + v.s4() + v.s5() + v.s6() + v.s7() + v.s8()
109 + v.s9() + v.sA() + v.sB() + v.sC() + v.sD() + v.sE() + v.sF();
110 }
111
113 // all_component_are_negative
115
116 template<class T, std::enable_if_t<std::is_signed<T>::value, int> = 0>
117 inline constexpr bool all_component_are_negative(T a) {
118 return a < 0;
119 }
120
121 template<class T, int n, std::enable_if_t<n == 2 && std::is_signed<T>::value, int> = 0>
122 inline constexpr bool all_component_are_negative(sycl::vec<T, n> v) noexcept {
123 return (v.x() < 0) && (v.y() < 0);
124 }
125
126 template<class T, int n, std::enable_if_t<n == 3 && std::is_signed<T>::value, int> = 0>
127 inline constexpr bool all_component_are_negative(sycl::vec<T, n> v) noexcept {
128 return (v.x() < 0) && (v.y() < 0) && (v.z() < 0);
129 }
130
131 template<class T, int n, std::enable_if_t<n == 4 && std::is_signed<T>::value, int> = 0>
132 inline constexpr bool all_component_are_negative(sycl::vec<T, n> v) noexcept {
133 return (v.x() < 0) && (v.y() < 0) && (v.z() < 0) && (v.w() < 0);
134 }
135
136 template<class T, int n, std::enable_if_t<n == 8 && std::is_signed<T>::value, int> = 0>
137 inline constexpr bool all_component_are_negative(sycl::vec<T, n> v) noexcept {
138 return (v.s0() < 0) && (v.s1() < 0) && (v.s2() < 0) && (v.s3() < 0) && (v.s4() < 0)
139 && (v.s5() < 0) && (v.s6() < 0) && (v.s7() < 0);
140 }
141
142 template<class T, int n, std::enable_if_t<n == 16 && std::is_signed<T>::value, int> = 0>
143 inline constexpr bool all_component_are_negative(sycl::vec<T, n> v) noexcept {
144 return (v.s0() < 0) && (v.s1() < 0) && (v.s2() < 0) && (v.s3() < 0) && (v.s4() < 0)
145 && (v.s5() < 0) && (v.s6() < 0) && (v.s7() < 0) && (v.s8() < 0) && (v.s9() < 0)
146 && (v.sA() < 0) && (v.sB() < 0) && (v.sC() < 0) && (v.sD() < 0) && (v.sE() < 0)
147 && (v.sF() < 0);
148 }
149
151 // vec_compare_geq
153
154 template<class T>
155 inline constexpr bool vec_compare_geq(T a, T b) {
156 return a >= b;
157 }
158
159 template<class T, int n, std::enable_if_t<n == 2, int> = 0>
160 inline constexpr bool vec_compare_geq(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
161 return (v.x() >= w.x()) && (v.y() >= w.y());
162 }
163
164 template<class T, int n, std::enable_if_t<n == 3, int> = 0>
165 inline constexpr bool vec_compare_geq(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
166 return (v.x() >= w.x()) && (v.y() >= w.y()) && (v.z() >= w.z());
167 }
168
169 template<class T, int n, std::enable_if_t<n == 4, int> = 0>
170 inline constexpr bool vec_compare_geq(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
171 return (v.x() >= w.x()) && (v.y() >= w.y()) && (v.z() >= w.z()) && (v.w() >= w.w());
172 }
173
174 template<class T, int n, std::enable_if_t<n == 8, int> = 0>
175 inline constexpr bool vec_compare_geq(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
176 return (v.s0() >= w.s0()) && (v.s1() >= w.s1()) && (v.s2() >= w.s2()) && (v.s3() >= w.s3())
177 && (v.s4() >= w.s4()) && (v.s5() >= w.s5()) && (v.s6() >= w.s6())
178 && (v.s7() >= w.s7());
179 }
180
181 template<class T, int n, std::enable_if_t<n == 16, int> = 0>
182 inline constexpr bool vec_compare_geq(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
183 return (v.s0() >= w.s0()) && (v.s1() >= w.s1()) && (v.s2() >= w.s2()) && (v.s3() >= w.s3())
184 && (v.s4() >= w.s4()) && (v.s5() >= w.s5()) && (v.s6() >= w.s6())
185 && (v.s7() >= w.s7()) && (v.s8() >= w.s8()) && (v.s9() >= w.s9())
186 && (v.sA() >= w.sA()) && (v.sB() >= w.sB()) && (v.sC() >= w.sC())
187 && (v.sD() >= w.sD()) && (v.sE() >= w.sE()) && (v.sF() >= w.sF());
188 }
189
191 // vec_compare_leq
193
194 template<class T>
195 inline constexpr bool vec_compare_leq(T a, T b) {
196 return a <= b;
197 }
198
199 template<class T, int n, std::enable_if_t<n == 2, int> = 0>
200 inline constexpr bool vec_compare_leq(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
201 return (v.x() <= w.x()) && (v.y() <= w.y());
202 }
203
204 template<class T, int n, std::enable_if_t<n == 3, int> = 0>
205 inline constexpr bool vec_compare_leq(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
206 return (v.x() <= w.x()) && (v.y() <= w.y()) && (v.z() <= w.z());
207 }
208
209 template<class T, int n, std::enable_if_t<n == 4, int> = 0>
210 inline constexpr bool vec_compare_leq(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
211 return (v.x() <= w.x()) && (v.y() <= w.y()) && (v.z() <= w.z()) && (v.w() <= w.w());
212 }
213
214 template<class T, int n, std::enable_if_t<n == 8, int> = 0>
215 inline constexpr bool vec_compare_leq(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
216 return (v.s0() <= w.s0()) && (v.s1() <= w.s1()) && (v.s2() <= w.s2()) && (v.s3() <= w.s3())
217 && (v.s4() <= w.s4()) && (v.s5() <= w.s5()) && (v.s6() <= w.s6())
218 && (v.s7() <= w.s7());
219 }
220
221 template<class T, int n, std::enable_if_t<n == 16, int> = 0>
222 inline constexpr bool vec_compare_leq(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
223 return (v.s0() <= w.s0()) && (v.s1() <= w.s1()) && (v.s2() <= w.s2()) && (v.s3() <= w.s3())
224 && (v.s4() <= w.s4()) && (v.s5() <= w.s5()) && (v.s6() <= w.s6())
225 && (v.s7() <= w.s7()) && (v.s8() <= w.s8()) && (v.s9() <= w.s9())
226 && (v.sA() <= w.sA()) && (v.sB() <= w.sB()) && (v.sC() <= w.sC())
227 && (v.sD() <= w.sD()) && (v.sE() <= w.sE()) && (v.sF() <= w.sF());
228 }
229
231 // vec_compare_g
233
234 template<class T>
235 inline constexpr bool vec_compare_g(T a, T b) {
236 return a > b;
237 }
238
239 template<class T, int n, std::enable_if_t<n == 2, int> = 0>
240 inline constexpr bool vec_compare_g(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
241 return (v.x() > w.x()) && (v.y() > w.y());
242 }
243
244 template<class T, int n, std::enable_if_t<n == 3, int> = 0>
245 inline constexpr bool vec_compare_g(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
246 return (v.x() > w.x()) && (v.y() > w.y()) && (v.z() > w.z());
247 }
248
249 template<class T, int n, std::enable_if_t<n == 4, int> = 0>
250 inline constexpr bool vec_compare_g(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
251 return (v.x() > w.x()) && (v.y() > w.y()) && (v.z() > w.z()) && (v.w() > w.w());
252 }
253
254 template<class T, int n, std::enable_if_t<n == 8, int> = 0>
255 inline constexpr bool vec_compare_g(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
256 return (v.s0() > w.s0()) && (v.s1() > w.s1()) && (v.s2() > w.s2()) && (v.s3() > w.s3())
257 && (v.s4() > w.s4()) && (v.s5() > w.s5()) && (v.s6() > w.s6()) && (v.s7() > w.s7());
258 }
259
260 template<class T, int n, std::enable_if_t<n == 16, int> = 0>
261 inline constexpr bool vec_compare_g(sycl::vec<T, n> v, sycl::vec<T, n> w) noexcept {
262 return (v.s0() > w.s0()) && (v.s1() > w.s1()) && (v.s2() > w.s2()) && (v.s3() > w.s3())
263 && (v.s4() > w.s4()) && (v.s5() > w.s5()) && (v.s6() > w.s6()) && (v.s7() > w.s7())
264 && (v.s8() > w.s8()) && (v.s9() > w.s9()) && (v.sA() > w.sA()) && (v.sB() > w.sB())
265 && (v.sC() > w.sC()) && (v.sD() > w.sD()) && (v.sE() > w.sE()) && (v.sF() > w.sF());
266 }
267
269 // component_have_a_zero
271
272 template<class T>
273 inline constexpr bool component_have_a_zero(T a) {
274 return a == 0;
275 }
276
277 template<class T, int n, std::enable_if_t<n == 2, int> = 0>
278 inline constexpr bool component_have_a_zero(sycl::vec<T, n> v) noexcept {
279 return (v.x() == 0) || (v.y() == 0);
280 }
281
282 template<class T, int n, std::enable_if_t<n == 3, int> = 0>
283 inline constexpr bool component_have_a_zero(sycl::vec<T, n> v) noexcept {
284 return (v.x() == 0) || (v.y() == 0) || (v.z() == 0);
285 }
286
287 template<class T, int n, std::enable_if_t<n == 4, int> = 0>
288 inline constexpr bool component_have_a_zero(sycl::vec<T, n> v) noexcept {
289 return (v.x() == 0) || (v.y() == 0) || (v.z() == 0) || (v.w() == 0);
290 }
291
292 template<class T, int n, std::enable_if_t<n == 8, int> = 0>
293 inline constexpr bool component_have_a_zero(sycl::vec<T, n> v) noexcept {
294 return (v.s0() == 0) || (v.s1() == 0) || (v.s2() == 0) || (v.s3() == 0) || (v.s4() == 0)
295 || (v.s5() == 0) || (v.s6() == 0) || (v.s7() == 0);
296 }
297
298 template<class T, int n, std::enable_if_t<n == 16, int> = 0>
299 inline constexpr bool component_have_a_zero(sycl::vec<T, n> v) noexcept {
300 return (v.s0() == 0) || (v.s1() == 0) || (v.s2() == 0) || (v.s3() == 0) || (v.s4() == 0)
301 || (v.s5() == 0) || (v.s6() == 0) || (v.s7() == 0) || (v.s8() == 0) || (v.s9() == 0)
302 || (v.sA() == 0) || (v.sB() == 0) || (v.sC() == 0) || (v.sD() == 0) || (v.sE() == 0)
303 || (v.sF() == 0);
304 }
305
307 // component_have_only_one_zero
309
310 template<class T>
311 inline constexpr bool component_have_only_one_zero(T a) {
312 return a == 0;
313 }
314
315 template<class T, int n, std::enable_if_t<n == 2, int> = 0>
316 inline constexpr bool component_have_only_one_zero(sycl::vec<T, n> v) noexcept {
317 return (v.x() == 0) != (v.y() == 0);
318 }
319
320 template<class T, int n, std::enable_if_t<n == 3, int> = 0>
321 inline constexpr bool component_have_only_one_zero(sycl::vec<T, n> v) noexcept {
322 return 1 == int{(v.x() == 0)} + int{(v.y() == 0)} + int{(v.z() == 0)};
323 }
324
325 template<class T, int n, std::enable_if_t<n == 4, int> = 0>
326 inline constexpr bool component_have_only_one_zero(sycl::vec<T, n> v) noexcept {
327 return 1 == int{(v.x() == 0)} + int{(v.y() == 0)} + int{(v.z() == 0)} + int{(v.w() == 0)};
328 }
329
330 template<class T, int n, std::enable_if_t<n == 8, int> = 0>
331 inline constexpr bool component_have_only_one_zero(sycl::vec<T, n> v) noexcept {
332 return 1
333 == int{(v.s0() == 0)} + int{(v.s1() == 0)} + int{(v.s2() == 0)} + int{(v.s3() == 0)}
334 + int{(v.s4() == 0)} + int{(v.s5() == 0)} + int{(v.s6() == 0)}
335 + int{(v.s7() == 0)};
336 }
337
338 template<class T, int n, std::enable_if_t<n == 16, int> = 0>
339 inline constexpr bool component_have_only_one_zero(sycl::vec<T, n> v) noexcept {
340 return 1
341 == int{(v.s0() == 0)} + int{(v.s1() == 0)} + int{(v.s2() == 0)} + int{(v.s3() == 0)}
342 + int{(v.s4() == 0)} + int{(v.s5() == 0)} + int{(v.s6() == 0)}
343 + int{(v.s7() == 0)} + int{(v.s8() == 0)} + int{(v.s9() == 0)}
344 + int{(v.sA() == 0)} + int{(v.sB() == 0)} + int{(v.sC() == 0)}
345 + int{(v.sD() == 0)} + int{(v.sE() == 0)} + int{(v.sF() == 0)};
346 }
347
349 // component_have_at_most_one_zero
351
352 template<class T>
353 inline constexpr bool component_have_at_most_one_zero(T a) {
354 return true;
355 }
356
357 template<class T, int n, std::enable_if_t<n == 2, int> = 0>
358 inline constexpr bool component_have_at_most_one_zero(sycl::vec<T, n> v) noexcept {
359 return 2 > int{(v.x() == 0)} + int{(v.y() == 0)};
360 }
361
362 template<class T, int n, std::enable_if_t<n == 3, int> = 0>
363 inline constexpr bool component_have_at_most_one_zero(sycl::vec<T, n> v) noexcept {
364 return 2 > int{(v.x() == 0)} + int{(v.y() == 0)} + int{(v.z() == 0)};
365 }
366
367 template<class T, int n, std::enable_if_t<n == 4, int> = 0>
368 inline constexpr bool component_have_at_most_one_zero(sycl::vec<T, n> v) noexcept {
369 return 2 > int{(v.x() == 0)} + int{(v.y() == 0)} + int{(v.z() == 0)} + int{(v.w() == 0)};
370 }
371
372 template<class T, int n, std::enable_if_t<n == 8, int> = 0>
373 inline constexpr bool component_have_at_most_one_zero(sycl::vec<T, n> v) noexcept {
374 return 2 > int{(v.s0() == 0)} + int{(v.s1() == 0)} + int{(v.s2() == 0)} + int{(v.s3() == 0)}
375 + int{(v.s4() == 0)} + int{(v.s5() == 0)} + int{(v.s6() == 0)}
376 + int{(v.s7() == 0)};
377 }
378
379 template<class T, int n, std::enable_if_t<n == 16, int> = 0>
380 inline constexpr bool component_have_at_most_one_zero(sycl::vec<T, n> v) noexcept {
381 return 2 > int{(v.s0() == 0)} + int{(v.s1() == 0)} + int{(v.s2() == 0)} + int{(v.s3() == 0)}
382 + int{(v.s4() == 0)} + int{(v.s5() == 0)} + int{(v.s6() == 0)}
383 + int{(v.s7() == 0)} + int{(v.s8() == 0)} + int{(v.s9() == 0)}
384 + int{(v.sA() == 0)} + int{(v.sB() == 0)} + int{(v.sC() == 0)}
385 + int{(v.sD() == 0)} + int{(v.sE() == 0)} + int{(v.sF() == 0)};
386 }
387
388} // namespace sham
389
390namespace sham::details {
391 template<class T>
392 inline T g_sycl_min(T a, T b) {
393
394 static_assert(shambase::VectorProperties<T>::has_info, "no info about this type");
395
397 return sycl::fmin(a, b);
398 } else if constexpr (shambase::VectorProperties<T>::is_int_based) {
399 return sycl::min(a, b);
401 return sycl::min(a, b);
402 }
403 }
404
405 template<class T>
406 inline T g_sycl_max(T a, T b) {
407
408 static_assert(shambase::VectorProperties<T>::has_info, "no info about this type");
409
411 return sycl::fmax(a, b);
412 } else if constexpr (shambase::VectorProperties<T>::is_int_based) {
413 return sycl::max(a, b);
415 return sycl::max(a, b);
416 }
417 }
418
419 template<class T>
420 inline T g_sycl_abs(T a) {
421
422 static_assert(shambase::VectorProperties<T>::has_info, "no info about this type");
423
425 return sycl::fabs(a);
426 } else if constexpr (shambase::VectorProperties<T>::is_int_based) {
427 return sycl::abs(a);
429 return sycl::abs(a);
430 }
431 }
432
433 template<class T>
434 inline shambase::VecComponent<T> g_sycl_dot(T a, T b) {
435
436 static_assert(shambase::VectorProperties<T>::has_info, "no info about this type");
437
438 if constexpr (
441
442 return sycl::dot(a, b);
443
444 } else {
445
446 return sum_accumulate(a * b);
447 }
448 }
449
450 template<class T>
451 inline constexpr bool vec_equals(sycl::vec<T, 2> a, sycl::vec<T, 2> b) noexcept {
452 bool eqx = a.x() == b.x();
453 bool eqy = a.y() == b.y();
454 return eqx && eqy;
455 }
456
457 template<class T>
458 inline constexpr bool vec_equals(sycl::vec<T, 3> a, sycl::vec<T, 3> b) noexcept {
459 bool eqx = a.x() == b.x();
460 bool eqy = a.y() == b.y();
461 bool eqz = a.z() == b.z();
462 return eqx && eqy && eqz;
463 }
464
465 template<class T>
466 inline constexpr bool vec_equals(sycl::vec<T, 4> a, sycl::vec<T, 4> b) noexcept {
467 bool eqx = a.x() == b.x();
468 bool eqy = a.y() == b.y();
469 bool eqz = a.z() == b.z();
470 bool eqw = a.w() == b.w();
471 return eqx && eqy && eqz && eqw;
472 }
473
474 template<class T>
475 inline constexpr bool vec_equals(sycl::vec<T, 8> a, sycl::vec<T, 8> b) noexcept {
476 bool eqs0 = a.s0() == b.s0();
477 bool eqs1 = a.s1() == b.s1();
478 bool eqs2 = a.s2() == b.s2();
479 bool eqs3 = a.s3() == b.s3();
480 bool eqs4 = a.s4() == b.s4();
481 bool eqs5 = a.s5() == b.s5();
482 bool eqs6 = a.s6() == b.s6();
483 bool eqs7 = a.s7() == b.s7();
484 return eqs0 && eqs1 && eqs2 && eqs3 && eqs4 && eqs5 && eqs6 && eqs7;
485 }
486
487 template<class T>
488 inline constexpr bool vec_equals(sycl::vec<T, 16> a, sycl::vec<T, 16> b) noexcept {
489 bool eqs0 = a.s0() == b.s0();
490 bool eqs1 = a.s1() == b.s1();
491 bool eqs2 = a.s2() == b.s2();
492 bool eqs3 = a.s3() == b.s3();
493 bool eqs4 = a.s4() == b.s4();
494 bool eqs5 = a.s5() == b.s5();
495 bool eqs6 = a.s6() == b.s6();
496 bool eqs7 = a.s7() == b.s7();
497
498 bool eqs8 = a.s8() == b.s8();
499 bool eqs9 = a.s9() == b.s9();
500 bool eqsA = a.sA() == b.sA();
501 bool eqsB = a.sB() == b.sB();
502 bool eqsC = a.sC() == b.sC();
503 bool eqsD = a.sD() == b.sD();
504 bool eqsE = a.sE() == b.sE();
505 bool eqsF = a.sF() == b.sF();
506
507 return eqs0 && eqs1 && eqs2 && eqs3 && eqs4 && eqs5 && eqs6 && eqs7 && eqs8 && eqs9 && eqsA
508 && eqsB && eqsC && eqsD && eqsE && eqsF;
509 }
510
511 template<class T>
512 inline constexpr bool vec_equals(T a, T b) noexcept {
513 bool eqx = a == b;
514 return eqx;
515 }
516} // namespace sham::details
517
518namespace sham {
519
520 template<class T>
521 inline T min(T a, T b) {
522 return sham::details::g_sycl_min(a, b);
523 }
524
525 template<class T>
526 inline T max(T a, T b) {
527 return sham::details::g_sycl_max(a, b);
528 }
529
530 template<class T>
531 inline shambase::VecComponent<T> max_component(T a) {
532
533 using Tscal = shambase::VecComponent<T>;
534
535 if constexpr (std::is_same_v<T, sycl::vec<Tscal, 2>>) {
536 return sycl::max(a.x(), a.y());
537 } else if constexpr (std::is_same_v<T, sycl::vec<Tscal, 3>>) {
538 return sycl::max(a.x(), sycl::max(a.y(), a.z()));
539 } else if constexpr (std::is_same_v<T, sycl::vec<Tscal, 4>>) {
540 return sycl::max(sycl::max(a.x(), a.y()), sycl::max(a.z(), a.w()));
541 } else if constexpr (std::is_same_v<T, sycl::vec<Tscal, 8>>) {
542 return sycl::max(
543 sycl::max(sycl::max(a.s0(), a.s1()), sycl::max(a.s2(), a.s3())),
544 sycl::max(sycl::max(a.s4(), a.s5()), sycl::max(a.s6(), a.s7())));
545 } else if constexpr (std::is_same_v<T, sycl::vec<Tscal, 16>>) {
546 return sycl::max(
547 sycl::max(
548 sycl::max(sycl::max(a.s0(), a.s1()), sycl::max(a.s2(), a.s3())),
549 sycl::max(sycl::max(a.s4(), a.s5()), sycl::max(a.s6(), a.s7()))),
550 sycl::max(
551 sycl::max(sycl::max(a.s8(), a.s9()), sycl::max(a.sA(), a.sB())),
552 sycl::max(sycl::max(a.sC(), a.sD()), sycl::max(a.sE(), a.sF()))));
553 } else {
554 static_assert(
555 shambase::always_false_v<T>, "max_component is not implemented for this type");
556 }
557 }
558
559 template<class T>
560 inline shambase::VecComponent<T> dot(T a, T b) {
561 return sham::details::g_sycl_dot(a, b);
562 }
563
564 template<class T>
565 inline shambase::VecComponent<T> length2(T a) {
566 return sham::dot(a, a);
567 }
568
569 template<class T>
570 inline T max_8points(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) {
571 return max(max(max(v0, v1), max(v2, v3)), max(max(v4, v5), max(v6, v7)));
572 }
573
574 template<class T>
575 inline T min_8points(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) {
576 return min(min(min(v0, v1), min(v2, v3)), min(min(v4, v5), min(v6, v7)));
577 }
578
579 template<class T>
580 inline T abs(T a) {
581 return sham::details::g_sycl_abs(a);
582 }
583
584 template<class T>
585 inline T positive_part(T a) {
586 return (sham::abs(a) + a) / 2;
587 }
588
589 template<class T>
590 inline T negative_part(T a) {
591 return (sham::abs(a) - a) / 2;
592 }
593
594 template<class T>
595 inline bool equals(T a, T b) {
596 return details::vec_equals(a, b);
597 }
598
600 template<class T>
601 inline bool equals(const std::vector<T> &a, const std::vector<T> &b) {
602 if (a.size() != b.size()) {
603 return false;
604 }
605 for (u32 i = 0; i < a.size(); i++) {
606 if (!sham::equals(a[i], b[i])) {
607 return false;
608 }
609 }
610 return true;
611 }
612
613 inline auto pack32(u32 a, u32 b) -> u64 { return (u64(a) << 32U) + b; };
614
615 inline auto unpack32(u64 v) -> sycl::vec<u32, 2> { return {u32(v >> 32U), u32(v)}; };
616
617 template<class T>
618 inline T m1pown(u32 n) {
619 return (n % 2 == 0) ? T(1) : -T(1);
620 }
621
622 template<class T>
623 inline bool has_nan(T v) {
624 auto tmp = !sycl::isnan(v);
625 return !(tmp);
626 }
627
628 template<class T>
629 inline bool has_inf(T v) {
630#ifdef SYCL2020_FEATURE_ISINF
631 auto tmp = !sycl::isinf(v);
632 return !(tmp);
633#else
634 auto tmp = !syclbackport::fallback_is_inf(v);
635 return !(tmp);
636#endif
637 }
638
639 template<class T>
640 inline bool has_nan_or_inf(T v) {
641#ifdef SYCL2020_FEATURE_ISINF
642 auto tmp = !(sycl::isnan(v) || sycl::isinf(v));
643 return !(tmp);
644#else
645 auto tmp = !(sycl::isnan(v) || syclbackport::fallback_is_inf(v));
646 return !(tmp);
647#endif
648 }
649
659 template<class T, int n>
660 inline bool has_nan(sycl::vec<T, n> v) {
661 bool has = false;
662#pragma unroll
663 for (i32 i = 0; i < n; i++) {
664 has = has || (sycl::isnan(v[i]));
665 }
666 return has;
667 }
668
678 template<class T, int n>
679 inline bool has_inf(sycl::vec<T, n> v) {
680 bool has = false;
681#pragma unroll
682 for (i32 i = 0; i < n; i++) {
683#ifdef SYCL2020_FEATURE_ISINF
684 has = has || (sycl::isinf(v[i]));
685#else
686 has = has || (syclbackport::fallback_is_inf(v[i]));
687#endif
688 }
689 return has;
690 }
691
701 template<class T, int n>
702 inline bool has_nan_or_inf(sycl::vec<T, n> v) {
703 bool has = false;
704#pragma unroll
705 for (i32 i = 0; i < n; i++) {
706#ifdef SYCL2020_FEATURE_ISINF
707 has = has || (sycl::isnan(v[i]) || sycl::isinf(v[i]));
708#else
709 has = has || (sycl::isnan(v[i]) || syclbackport::fallback_is_inf(v[i]));
710#endif
711 }
712 return has;
713 }
714
723 template<i32 power, class T>
724 inline constexpr T pow_constexpr(T a) noexcept {
725
726 if constexpr (power < 0) {
727 return pow_constexpr<-power>(T{1} / a);
728 } else if constexpr (power == 0) {
729 return T{1};
730 } else if constexpr (power == 1) {
731 return a;
732 } else if constexpr (power % 2 == 0) {
733 T tmp = pow_constexpr<power / 2>(a);
734 return tmp * tmp;
735 } else if constexpr (power % 2 == 1) {
736 T tmp = pow_constexpr<(power - 1) / 2>(a);
737 return tmp * tmp * a;
738 }
739 }
740
741 template<class T>
742 inline constexpr T clz(T a) noexcept {
743#ifdef SYCL2020_FEATURE_CLZ
744 return sycl::clz(a);
745#else
746 #ifdef SYCL_COMP_ACPP
747
748 if constexpr (std::is_same_v<T, u32>) {
749
750 __hipsycl_if_target_host(return __builtin_clz(a);)
751
752 __hipsycl_if_target_hiplike(return __clz(a);)
753
754 __hipsycl_if_target_spirv(return __spirv_ocl_clz(a);)
755
756 __hipsycl_if_target_sscp(return sycl::clz(a);)
757 }
758
759 if constexpr (std::is_same_v<T, u64>) {
760
761 __hipsycl_if_target_host(return __builtin_clzll(a);)
762
763 __hipsycl_if_target_hiplike(return __clzll(a);)
764
765 __hipsycl_if_target_spirv(return __spirv_ocl_clz(a);)
766
767 __hipsycl_if_target_sscp(return sycl::clz(a);)
768 }
769
770 #endif
771#endif
772 }
773
782 template<class T, std::enable_if_t<std::is_integral_v<T>, int> = 0>
783 inline constexpr T clz_xor(T a, T b) noexcept {
784 return sham::clz(a ^ b);
785 }
786
790 template<class T, std::enable_if_t<std::is_integral_v<T>, int> = 0>
791 inline constexpr T log2_pow2_num(T v) noexcept {
792 return shambase::bitsizeof<T> - sham::clz(v) - 1;
793 }
794
804 template<class T, std::enable_if_t<std::is_integral_v<T> || (!std::is_signed_v<T>), int> = 0>
805 inline constexpr T roundup_pow2_clz(T v) noexcept {
806
807 constexpr T max_signed_p1 = (shambase::get_max<T>() >> 1) + 1;
808
809 bool is_pow2 = shambase::is_pow_of_two(v);
810 bool is_above_max = v > max_signed_p1;
811
812 return (is_above_max) ? 0 : ((is_pow2) ? v : 1U << (shambase::bitsizeof<T> - sham::clz(v)));
813 };
814
825 template<class Acc>
826 inline i32 karras_delta(i32 x, i32 y, u32 morton_length, Acc m) noexcept {
827 return ((y > morton_length - 1 || y < 0) ? -1 : int(clz_xor(m[x], m[y])));
828 }
829
840 template<class T>
841 inline T inv_sat_positive(T v, T minvsat = T{1e-9}, T satval = T{0.}) noexcept {
842 return (v >= minvsat) ? T{1.} / v : satval;
843 }
844
855 template<class T>
856 inline T inv_sat(T v, T minvsat = T{1e-9}, T satval = T{0.}) noexcept {
857 return (std::abs(v) >= minvsat) ? T{1.} / v : satval;
858 }
859
869 template<class T>
870 inline T inv_sat_zero(T v, T satval = T{0.}) noexcept {
871 // return div only if v != 0 and is not NaN
872 return (v != T{0} && v == v) ? T{1.} / v : satval;
873 }
874
875 namespace details {
876 template<class Tdest, class Tsource>
877 inline Tdest convert_internal(Tsource coord) {
878 return static_cast<Tdest>(coord);
879 }
880
881 template<class Tdest, class Tsource, int N>
882 inline sycl::vec<Tdest, N> convert_internal(sycl::vec<Tsource, N> coord) {
883 sycl::vec<Tdest, N> result;
884 for (int i = 0; i < N; ++i) {
885 result[i] = static_cast<Tdest>(coord[i]);
886 }
887 return result;
888 }
889 } // namespace details
890
892 template<class Tdest, class Tsource>
893 inline Tdest convert(Tsource coord) {
894 return details::convert_internal<shambase::VecComponent<Tdest>>(coord);
895 }
896
897} // namespace sham
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
std::int32_t i32
32 bit integer
Namespace for internal details of the logs module.
namespace for backends this one is named only sham since shambackends is too long to write
constexpr T pow_constexpr(T a) noexcept
generalized pow constexpr
Definition math.hpp:724
constexpr T clz_xor(T a, T b) noexcept
give the length of the common prefix
Definition math.hpp:783
T inv_sat_positive(T v, T minvsat=T{1e-9}, T satval=T{0.}) noexcept
inverse saturated (positive numbers only)
Definition math.hpp:841
Tdest convert(Tsource coord)
Helper to avoid differences between SYCL implementations of convert, it always static cast.
Definition math.hpp:893
i32 karras_delta(i32 x, i32 y, u32 morton_length, Acc m) noexcept
delta operator defined in Karras 2012
Definition math.hpp:826
constexpr T log2_pow2_num(T v) noexcept
compute the log2 of the number v being a power of 2
Definition math.hpp:791
constexpr T roundup_pow2_clz(T v) noexcept
round up to the next power of two 0 is rounded up to 1 as it is not a pow of 2 every input above the ...
Definition math.hpp:805
T inv_sat(T v, T minvsat=T{1e-9}, T satval=T{0.}) noexcept
inverse saturated
Definition math.hpp:856
T inv_sat_zero(T v, T satval=T{0.}) noexcept
inverse saturated (zero version)
Definition math.hpp:870
bool equals(sycl::queue &q, sycl::buffer< T > &buf1, sycl::buffer< T > &buf2, u32 cnt)
Compare elements between two sycl::buffers for equality.
Definition equals.hpp:77
constexpr bool is_pow_of_two(T v) noexcept
determine if v is a power of two and check if v==0 Source : https://graphics.stanford....
Definition integer.hpp:49
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
Traits for C++ types.