26namespace sham::syclbackport {
28#ifndef SYCL2020_FEATURE_ISINF
31 HIPSYCL_UNIVERSAL_TARGET
bool fallback_is_inf(T value) {
33 __hipsycl_if_target_host(
return std::isinf(value);)
35 __hipsycl_if_target_hiplike(
return isinf(value);)
37 __hipsycl_if_target_spirv(
static_assert(
false,
"this case is not implemented");)
51 inline constexpr T product_accumulate(T v)
noexcept {
55 template<
class T,
int n, std::enable_if_t<n == 2,
int> = 0>
56 inline constexpr T product_accumulate(sycl::vec<T, n> v)
noexcept {
60 template<
class T,
int n, std::enable_if_t<n == 3,
int> = 0>
61 inline constexpr T product_accumulate(sycl::vec<T, n> v)
noexcept {
62 return v.x() * v.y() * v.z();
65 template<
class T,
int n, std::enable_if_t<n == 4,
int> = 0>
66 inline constexpr T product_accumulate(sycl::vec<T, n> v)
noexcept {
67 return v.x() * v.y() * v.z() * v.w();
70 template<
class T,
int n, std::enable_if_t<n == 8,
int> = 0>
71 inline constexpr T product_accumulate(sycl::vec<T, n> v)
noexcept {
72 return v.s0() * v.s1() * v.s2() * v.s3() * v.s4() * v.s5() * v.s6() * v.s7();
75 template<
class T,
int n, std::enable_if_t<n == 16,
int> = 0>
76 inline constexpr T product_accumulate(sycl::vec<T, n> v)
noexcept {
77 return v.s0() * v.s1() * v.s2() * v.s3() * v.s4() * v.s5() * v.s6() * v.s7() * v.s8()
78 * v.s9() * v.sA() * v.sB() * v.sC() * v.sD() * v.sE() * v.sF();
82 inline constexpr T sum_accumulate(T v)
noexcept {
86 template<
class T,
int n, std::enable_if_t<n == 2,
int> = 0>
87 inline constexpr T sum_accumulate(sycl::vec<T, n> v)
noexcept {
91 template<
class T,
int n, std::enable_if_t<n == 3,
int> = 0>
92 inline constexpr T sum_accumulate(sycl::vec<T, n> v)
noexcept {
93 return v.x() + v.y() + v.z();
96 template<
class T,
int n, std::enable_if_t<n == 4,
int> = 0>
97 inline constexpr T sum_accumulate(sycl::vec<T, n> v)
noexcept {
98 return v.x() + v.y() + v.z() + v.w();
101 template<
class T,
int n, std::enable_if_t<n == 8,
int> = 0>
102 inline constexpr T sum_accumulate(sycl::vec<T, n> v)
noexcept {
103 return v.s0() + v.s1() + v.s2() + v.s3() + v.s4() + v.s5() + v.s6() + v.s7();
106 template<
class T,
int n, std::enable_if_t<n == 16,
int> = 0>
107 inline constexpr T sum_accumulate(sycl::vec<T, n> v)
noexcept {
108 return v.s0() + v.s1() + v.s2() + v.s3() + v.s4() + v.s5() + v.s6() + v.s7() + v.s8()
109 + v.s9() + v.sA() + v.sB() + v.sC() + v.sD() + v.sE() + v.sF();
116 template<class T, std::enable_if_t<std::is_signed<T>::value,
int> = 0>
117 inline constexpr bool all_component_are_negative(T a) {
121 template<class T, int n, std::enable_if_t<n == 2 && std::is_signed<T>::value,
int> = 0>
122 inline constexpr bool all_component_are_negative(sycl::vec<T, n> v)
noexcept {
123 return (v.x() < 0) && (v.y() < 0);
126 template<class T, int n, std::enable_if_t<n == 3 && std::is_signed<T>::value,
int> = 0>
127 inline constexpr bool all_component_are_negative(sycl::vec<T, n> v)
noexcept {
128 return (v.x() < 0) && (v.y() < 0) && (v.z() < 0);
131 template<class T, int n, std::enable_if_t<n == 4 && std::is_signed<T>::value,
int> = 0>
132 inline constexpr bool all_component_are_negative(sycl::vec<T, n> v)
noexcept {
133 return (v.x() < 0) && (v.y() < 0) && (v.z() < 0) && (v.w() < 0);
136 template<class T, int n, std::enable_if_t<n == 8 && std::is_signed<T>::value,
int> = 0>
137 inline constexpr bool all_component_are_negative(sycl::vec<T, n> v)
noexcept {
138 return (v.s0() < 0) && (v.s1() < 0) && (v.s2() < 0) && (v.s3() < 0) && (v.s4() < 0)
139 && (v.s5() < 0) && (v.s6() < 0) && (v.s7() < 0);
142 template<class T, int n, std::enable_if_t<n == 16 && std::is_signed<T>::value,
int> = 0>
143 inline constexpr bool all_component_are_negative(sycl::vec<T, n> v)
noexcept {
144 return (v.s0() < 0) && (v.s1() < 0) && (v.s2() < 0) && (v.s3() < 0) && (v.s4() < 0)
145 && (v.s5() < 0) && (v.s6() < 0) && (v.s7() < 0) && (v.s8() < 0) && (v.s9() < 0)
146 && (v.sA() < 0) && (v.sB() < 0) && (v.sC() < 0) && (v.sD() < 0) && (v.sE() < 0)
155 inline constexpr bool vec_compare_geq(T a, T b) {
159 template<
class T,
int n, std::enable_if_t<n == 2,
int> = 0>
160 inline constexpr bool vec_compare_geq(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
161 return (v.x() >= w.x()) && (v.y() >= w.y());
164 template<
class T,
int n, std::enable_if_t<n == 3,
int> = 0>
165 inline constexpr bool vec_compare_geq(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
166 return (v.x() >= w.x()) && (v.y() >= w.y()) && (v.z() >= w.z());
169 template<
class T,
int n, std::enable_if_t<n == 4,
int> = 0>
170 inline constexpr bool vec_compare_geq(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
171 return (v.x() >= w.x()) && (v.y() >= w.y()) && (v.z() >= w.z()) && (v.w() >= w.w());
174 template<
class T,
int n, std::enable_if_t<n == 8,
int> = 0>
175 inline constexpr bool vec_compare_geq(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
176 return (v.s0() >= w.s0()) && (v.s1() >= w.s1()) && (v.s2() >= w.s2()) && (v.s3() >= w.s3())
177 && (v.s4() >= w.s4()) && (v.s5() >= w.s5()) && (v.s6() >= w.s6())
178 && (v.s7() >= w.s7());
181 template<
class T,
int n, std::enable_if_t<n == 16,
int> = 0>
182 inline constexpr bool vec_compare_geq(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
183 return (v.s0() >= w.s0()) && (v.s1() >= w.s1()) && (v.s2() >= w.s2()) && (v.s3() >= w.s3())
184 && (v.s4() >= w.s4()) && (v.s5() >= w.s5()) && (v.s6() >= w.s6())
185 && (v.s7() >= w.s7()) && (v.s8() >= w.s8()) && (v.s9() >= w.s9())
186 && (v.sA() >= w.sA()) && (v.sB() >= w.sB()) && (v.sC() >= w.sC())
187 && (v.sD() >= w.sD()) && (v.sE() >= w.sE()) && (v.sF() >= w.sF());
195 inline constexpr bool vec_compare_leq(T a, T b) {
199 template<
class T,
int n, std::enable_if_t<n == 2,
int> = 0>
200 inline constexpr bool vec_compare_leq(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
201 return (v.x() <= w.x()) && (v.y() <= w.y());
204 template<
class T,
int n, std::enable_if_t<n == 3,
int> = 0>
205 inline constexpr bool vec_compare_leq(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
206 return (v.x() <= w.x()) && (v.y() <= w.y()) && (v.z() <= w.z());
209 template<
class T,
int n, std::enable_if_t<n == 4,
int> = 0>
210 inline constexpr bool vec_compare_leq(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
211 return (v.x() <= w.x()) && (v.y() <= w.y()) && (v.z() <= w.z()) && (v.w() <= w.w());
214 template<
class T,
int n, std::enable_if_t<n == 8,
int> = 0>
215 inline constexpr bool vec_compare_leq(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
216 return (v.s0() <= w.s0()) && (v.s1() <= w.s1()) && (v.s2() <= w.s2()) && (v.s3() <= w.s3())
217 && (v.s4() <= w.s4()) && (v.s5() <= w.s5()) && (v.s6() <= w.s6())
218 && (v.s7() <= w.s7());
221 template<
class T,
int n, std::enable_if_t<n == 16,
int> = 0>
222 inline constexpr bool vec_compare_leq(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
223 return (v.s0() <= w.s0()) && (v.s1() <= w.s1()) && (v.s2() <= w.s2()) && (v.s3() <= w.s3())
224 && (v.s4() <= w.s4()) && (v.s5() <= w.s5()) && (v.s6() <= w.s6())
225 && (v.s7() <= w.s7()) && (v.s8() <= w.s8()) && (v.s9() <= w.s9())
226 && (v.sA() <= w.sA()) && (v.sB() <= w.sB()) && (v.sC() <= w.sC())
227 && (v.sD() <= w.sD()) && (v.sE() <= w.sE()) && (v.sF() <= w.sF());
235 inline constexpr bool vec_compare_g(T a, T b) {
239 template<
class T,
int n, std::enable_if_t<n == 2,
int> = 0>
240 inline constexpr bool vec_compare_g(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
241 return (v.x() > w.x()) && (v.y() > w.y());
244 template<
class T,
int n, std::enable_if_t<n == 3,
int> = 0>
245 inline constexpr bool vec_compare_g(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
246 return (v.x() > w.x()) && (v.y() > w.y()) && (v.z() > w.z());
249 template<
class T,
int n, std::enable_if_t<n == 4,
int> = 0>
250 inline constexpr bool vec_compare_g(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
251 return (v.x() > w.x()) && (v.y() > w.y()) && (v.z() > w.z()) && (v.w() > w.w());
254 template<
class T,
int n, std::enable_if_t<n == 8,
int> = 0>
255 inline constexpr bool vec_compare_g(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
256 return (v.s0() > w.s0()) && (v.s1() > w.s1()) && (v.s2() > w.s2()) && (v.s3() > w.s3())
257 && (v.s4() > w.s4()) && (v.s5() > w.s5()) && (v.s6() > w.s6()) && (v.s7() > w.s7());
260 template<
class T,
int n, std::enable_if_t<n == 16,
int> = 0>
261 inline constexpr bool vec_compare_g(sycl::vec<T, n> v, sycl::vec<T, n> w)
noexcept {
262 return (v.s0() > w.s0()) && (v.s1() > w.s1()) && (v.s2() > w.s2()) && (v.s3() > w.s3())
263 && (v.s4() > w.s4()) && (v.s5() > w.s5()) && (v.s6() > w.s6()) && (v.s7() > w.s7())
264 && (v.s8() > w.s8()) && (v.s9() > w.s9()) && (v.sA() > w.sA()) && (v.sB() > w.sB())
265 && (v.sC() > w.sC()) && (v.sD() > w.sD()) && (v.sE() > w.sE()) && (v.sF() > w.sF());
273 inline constexpr bool component_have_a_zero(T a) {
277 template<
class T,
int n, std::enable_if_t<n == 2,
int> = 0>
278 inline constexpr bool component_have_a_zero(sycl::vec<T, n> v)
noexcept {
279 return (v.x() == 0) || (v.y() == 0);
282 template<
class T,
int n, std::enable_if_t<n == 3,
int> = 0>
283 inline constexpr bool component_have_a_zero(sycl::vec<T, n> v)
noexcept {
284 return (v.x() == 0) || (v.y() == 0) || (v.z() == 0);
287 template<
class T,
int n, std::enable_if_t<n == 4,
int> = 0>
288 inline constexpr bool component_have_a_zero(sycl::vec<T, n> v)
noexcept {
289 return (v.x() == 0) || (v.y() == 0) || (v.z() == 0) || (v.w() == 0);
292 template<
class T,
int n, std::enable_if_t<n == 8,
int> = 0>
293 inline constexpr bool component_have_a_zero(sycl::vec<T, n> v)
noexcept {
294 return (v.s0() == 0) || (v.s1() == 0) || (v.s2() == 0) || (v.s3() == 0) || (v.s4() == 0)
295 || (v.s5() == 0) || (v.s6() == 0) || (v.s7() == 0);
298 template<
class T,
int n, std::enable_if_t<n == 16,
int> = 0>
299 inline constexpr bool component_have_a_zero(sycl::vec<T, n> v)
noexcept {
300 return (v.s0() == 0) || (v.s1() == 0) || (v.s2() == 0) || (v.s3() == 0) || (v.s4() == 0)
301 || (v.s5() == 0) || (v.s6() == 0) || (v.s7() == 0) || (v.s8() == 0) || (v.s9() == 0)
302 || (v.sA() == 0) || (v.sB() == 0) || (v.sC() == 0) || (v.sD() == 0) || (v.sE() == 0)
311 inline constexpr bool component_have_only_one_zero(T a) {
315 template<
class T,
int n, std::enable_if_t<n == 2,
int> = 0>
316 inline constexpr bool component_have_only_one_zero(sycl::vec<T, n> v)
noexcept {
317 return (v.x() == 0) != (v.y() == 0);
320 template<
class T,
int n, std::enable_if_t<n == 3,
int> = 0>
321 inline constexpr bool component_have_only_one_zero(sycl::vec<T, n> v)
noexcept {
322 return 1 ==
int{(v.x() == 0)} +
int{(v.y() == 0)} +
int{(v.z() == 0)};
325 template<
class T,
int n, std::enable_if_t<n == 4,
int> = 0>
326 inline constexpr bool component_have_only_one_zero(sycl::vec<T, n> v)
noexcept {
327 return 1 ==
int{(v.x() == 0)} +
int{(v.y() == 0)} +
int{(v.z() == 0)} +
int{(v.w() == 0)};
330 template<
class T,
int n, std::enable_if_t<n == 8,
int> = 0>
331 inline constexpr bool component_have_only_one_zero(sycl::vec<T, n> v)
noexcept {
333 ==
int{(v.s0() == 0)} +
int{(v.s1() == 0)} +
int{(v.s2() == 0)} +
int{(v.s3() == 0)}
334 +
int{(v.s4() == 0)} +
int{(v.s5() == 0)} +
int{(v.s6() == 0)}
335 +
int{(v.s7() == 0)};
338 template<
class T,
int n, std::enable_if_t<n == 16,
int> = 0>
339 inline constexpr bool component_have_only_one_zero(sycl::vec<T, n> v)
noexcept {
341 ==
int{(v.s0() == 0)} +
int{(v.s1() == 0)} +
int{(v.s2() == 0)} +
int{(v.s3() == 0)}
342 +
int{(v.s4() == 0)} +
int{(v.s5() == 0)} +
int{(v.s6() == 0)}
343 +
int{(v.s7() == 0)} +
int{(v.s8() == 0)} +
int{(v.s9() == 0)}
344 +
int{(v.sA() == 0)} +
int{(v.sB() == 0)} +
int{(v.sC() == 0)}
345 +
int{(v.sD() == 0)} +
int{(v.sE() == 0)} +
int{(v.sF() == 0)};
353 inline constexpr bool component_have_at_most_one_zero(T a) {
357 template<
class T,
int n, std::enable_if_t<n == 2,
int> = 0>
358 inline constexpr bool component_have_at_most_one_zero(sycl::vec<T, n> v)
noexcept {
359 return 2 >
int{(v.x() == 0)} +
int{(v.y() == 0)};
362 template<
class T,
int n, std::enable_if_t<n == 3,
int> = 0>
363 inline constexpr bool component_have_at_most_one_zero(sycl::vec<T, n> v)
noexcept {
364 return 2 >
int{(v.x() == 0)} +
int{(v.y() == 0)} +
int{(v.z() == 0)};
367 template<
class T,
int n, std::enable_if_t<n == 4,
int> = 0>
368 inline constexpr bool component_have_at_most_one_zero(sycl::vec<T, n> v)
noexcept {
369 return 2 >
int{(v.x() == 0)} +
int{(v.y() == 0)} +
int{(v.z() == 0)} +
int{(v.w() == 0)};
372 template<
class T,
int n, std::enable_if_t<n == 8,
int> = 0>
373 inline constexpr bool component_have_at_most_one_zero(sycl::vec<T, n> v)
noexcept {
374 return 2 >
int{(v.s0() == 0)} +
int{(v.s1() == 0)} +
int{(v.s2() == 0)} +
int{(v.s3() == 0)}
375 +
int{(v.s4() == 0)} +
int{(v.s5() == 0)} +
int{(v.s6() == 0)}
376 +
int{(v.s7() == 0)};
379 template<
class T,
int n, std::enable_if_t<n == 16,
int> = 0>
380 inline constexpr bool component_have_at_most_one_zero(sycl::vec<T, n> v)
noexcept {
381 return 2 >
int{(v.s0() == 0)} +
int{(v.s1() == 0)} +
int{(v.s2() == 0)} +
int{(v.s3() == 0)}
382 +
int{(v.s4() == 0)} +
int{(v.s5() == 0)} +
int{(v.s6() == 0)}
383 +
int{(v.s7() == 0)} +
int{(v.s8() == 0)} +
int{(v.s9() == 0)}
384 +
int{(v.sA() == 0)} +
int{(v.sB() == 0)} +
int{(v.sC() == 0)}
385 +
int{(v.sD() == 0)} +
int{(v.sE() == 0)} +
int{(v.sF() == 0)};
390namespace sham::details {
392 inline T g_sycl_min(T a, T b) {
397 return sycl::fmin(a, b);
399 return sycl::min(a, b);
401 return sycl::min(a, b);
406 inline T g_sycl_max(T a, T b) {
411 return sycl::fmax(a, b);
413 return sycl::max(a, b);
415 return sycl::max(a, b);
420 inline T g_sycl_abs(T a) {
425 return sycl::fabs(a);
434 inline shambase::VecComponent<T> g_sycl_dot(T a, T b) {
442 return sycl::dot(a, b);
446 return sum_accumulate(a * b);
451 inline constexpr bool vec_equals(sycl::vec<T, 2> a, sycl::vec<T, 2> b)
noexcept {
452 bool eqx = a.x() == b.x();
453 bool eqy = a.y() == b.y();
458 inline constexpr bool vec_equals(sycl::vec<T, 3> a, sycl::vec<T, 3> b)
noexcept {
459 bool eqx = a.x() == b.x();
460 bool eqy = a.y() == b.y();
461 bool eqz = a.z() == b.z();
462 return eqx && eqy && eqz;
466 inline constexpr bool vec_equals(sycl::vec<T, 4> a, sycl::vec<T, 4> b)
noexcept {
467 bool eqx = a.x() == b.x();
468 bool eqy = a.y() == b.y();
469 bool eqz = a.z() == b.z();
470 bool eqw = a.w() == b.w();
471 return eqx && eqy && eqz && eqw;
475 inline constexpr bool vec_equals(sycl::vec<T, 8> a, sycl::vec<T, 8> b)
noexcept {
476 bool eqs0 = a.s0() == b.s0();
477 bool eqs1 = a.s1() == b.s1();
478 bool eqs2 = a.s2() == b.s2();
479 bool eqs3 = a.s3() == b.s3();
480 bool eqs4 = a.s4() == b.s4();
481 bool eqs5 = a.s5() == b.s5();
482 bool eqs6 = a.s6() == b.s6();
483 bool eqs7 = a.s7() == b.s7();
484 return eqs0 && eqs1 && eqs2 && eqs3 && eqs4 && eqs5 && eqs6 && eqs7;
488 inline constexpr bool vec_equals(sycl::vec<T, 16> a, sycl::vec<T, 16> b)
noexcept {
489 bool eqs0 = a.s0() == b.s0();
490 bool eqs1 = a.s1() == b.s1();
491 bool eqs2 = a.s2() == b.s2();
492 bool eqs3 = a.s3() == b.s3();
493 bool eqs4 = a.s4() == b.s4();
494 bool eqs5 = a.s5() == b.s5();
495 bool eqs6 = a.s6() == b.s6();
496 bool eqs7 = a.s7() == b.s7();
498 bool eqs8 = a.s8() == b.s8();
499 bool eqs9 = a.s9() == b.s9();
500 bool eqsA = a.sA() == b.sA();
501 bool eqsB = a.sB() == b.sB();
502 bool eqsC = a.sC() == b.sC();
503 bool eqsD = a.sD() == b.sD();
504 bool eqsE = a.sE() == b.sE();
505 bool eqsF = a.sF() == b.sF();
507 return eqs0 && eqs1 && eqs2 && eqs3 && eqs4 && eqs5 && eqs6 && eqs7 && eqs8 && eqs9 && eqsA
508 && eqsB && eqsC && eqsD && eqsE && eqsF;
512 inline constexpr bool vec_equals(T a, T b)
noexcept {
521 inline T min(T a, T b) {
522 return sham::details::g_sycl_min(a, b);
526 inline T max(T a, T b) {
527 return sham::details::g_sycl_max(a, b);
531 inline shambase::VecComponent<T> max_component(T a) {
533 using Tscal = shambase::VecComponent<T>;
535 if constexpr (std::is_same_v<T, sycl::vec<Tscal, 2>>) {
536 return sycl::max(a.x(), a.y());
537 }
else if constexpr (std::is_same_v<T, sycl::vec<Tscal, 3>>) {
538 return sycl::max(a.x(), sycl::max(a.y(), a.z()));
539 }
else if constexpr (std::is_same_v<T, sycl::vec<Tscal, 4>>) {
540 return sycl::max(sycl::max(a.x(), a.y()), sycl::max(a.z(), a.w()));
541 }
else if constexpr (std::is_same_v<T, sycl::vec<Tscal, 8>>) {
543 sycl::max(sycl::max(a.s0(), a.s1()), sycl::max(a.s2(), a.s3())),
544 sycl::max(sycl::max(a.s4(), a.s5()), sycl::max(a.s6(), a.s7())));
545 }
else if constexpr (std::is_same_v<T, sycl::vec<Tscal, 16>>) {
548 sycl::max(sycl::max(a.s0(), a.s1()), sycl::max(a.s2(), a.s3())),
549 sycl::max(sycl::max(a.s4(), a.s5()), sycl::max(a.s6(), a.s7()))),
551 sycl::max(sycl::max(a.s8(), a.s9()), sycl::max(a.sA(), a.sB())),
552 sycl::max(sycl::max(a.sC(), a.sD()), sycl::max(a.sE(), a.sF()))));
560 inline shambase::VecComponent<T> dot(T a, T b) {
561 return sham::details::g_sycl_dot(a, b);
565 inline shambase::VecComponent<T> length2(T a) {
566 return sham::dot(a, a);
570 inline T max_8points(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) {
571 return max(max(max(v0, v1), max(v2, v3)), max(max(v4, v5), max(v6, v7)));
575 inline T min_8points(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) {
576 return min(min(min(v0, v1), min(v2, v3)), min(min(v4, v5), min(v6, v7)));
581 return sham::details::g_sycl_abs(a);
585 inline T positive_part(T a) {
586 return (sham::abs(a) + a) / 2;
590 inline T negative_part(T a) {
591 return (sham::abs(a) - a) / 2;
595 inline bool equals(T a, T b) {
596 return details::vec_equals(a, b);
601 inline bool equals(
const std::vector<T> &a,
const std::vector<T> &b) {
602 if (a.size() != b.size()) {
605 for (
u32 i = 0; i < a.size(); i++) {
606 if (!sham::equals(a[i], b[i])) {
613 inline auto pack32(
u32 a,
u32 b) ->
u64 {
return (
u64(a) << 32U) + b; };
615 inline auto unpack32(
u64 v) -> sycl::vec<u32, 2> {
return {
u32(v >> 32U),
u32(v)}; };
618 inline T m1pown(
u32 n) {
619 return (n % 2 == 0) ? T(1) : -T(1);
623 inline bool has_nan(T v) {
624 auto tmp = !sycl::isnan(v);
629 inline bool has_inf(T v) {
630#ifdef SYCL2020_FEATURE_ISINF
631 auto tmp = !sycl::isinf(v);
634 auto tmp = !syclbackport::fallback_is_inf(v);
640 inline bool has_nan_or_inf(T v) {
641#ifdef SYCL2020_FEATURE_ISINF
642 auto tmp = !(sycl::isnan(v) || sycl::isinf(v));
645 auto tmp = !(sycl::isnan(v) || syclbackport::fallback_is_inf(v));
659 template<
class T,
int n>
660 inline bool has_nan(sycl::vec<T, n> v) {
663 for (
i32 i = 0; i < n; i++) {
664 has = has || (sycl::isnan(v[i]));
678 template<
class T,
int n>
679 inline bool has_inf(sycl::vec<T, n> v) {
682 for (
i32 i = 0; i < n; i++) {
683#ifdef SYCL2020_FEATURE_ISINF
684 has = has || (sycl::isinf(v[i]));
686 has = has || (syclbackport::fallback_is_inf(v[i]));
701 template<
class T,
int n>
702 inline bool has_nan_or_inf(sycl::vec<T, n> v) {
705 for (
i32 i = 0; i < n; i++) {
706#ifdef SYCL2020_FEATURE_ISINF
707 has = has || (sycl::isnan(v[i]) || sycl::isinf(v[i]));
709 has = has || (sycl::isnan(v[i]) || syclbackport::fallback_is_inf(v[i]));
723 template<i32 power,
class T>
726 if constexpr (power < 0) {
728 }
else if constexpr (power == 0) {
730 }
else if constexpr (power == 1) {
732 }
else if constexpr (power % 2 == 0) {
735 }
else if constexpr (power % 2 == 1) {
737 return tmp * tmp * a;
742 inline constexpr T clz(T a)
noexcept {
743#ifdef SYCL2020_FEATURE_CLZ
746 #ifdef SYCL_COMP_ACPP
748 if constexpr (std::is_same_v<T, u32>) {
750 __hipsycl_if_target_host(
return __builtin_clz(a);)
752 __hipsycl_if_target_hiplike(
return __clz(a);)
754 __hipsycl_if_target_spirv(
return __spirv_ocl_clz(a);)
756 __hipsycl_if_target_sscp(
return sycl::clz(a);)
759 if constexpr (std::is_same_v<T, u64>) {
761 __hipsycl_if_target_host(
return __builtin_clzll(a);)
763 __hipsycl_if_target_hiplike(
return __clzll(a);)
765 __hipsycl_if_target_spirv(
return __spirv_ocl_clz(a);)
767 __hipsycl_if_target_sscp(
return sycl::clz(a);)
782 template<
class T, std::enable_if_t<std::is_
integral_v<T>,
int> = 0>
783 inline constexpr T
clz_xor(T a, T b)
noexcept {
784 return sham::clz(a ^ b);
790 template<
class T, std::enable_if_t<std::is_
integral_v<T>,
int> = 0>
804 template<
class T, std::enable_if_t<std::is_
integral_v<T> || (!std::is_
signed_v<T>),
int> = 0>
810 bool is_above_max = v > max_signed_p1;
827 return ((y > morton_length - 1 || y < 0) ? -1 : int(
clz_xor(m[x], m[y])));
842 return (v >= minvsat) ? T{1.} / v : satval;
856 inline T
inv_sat(T v, T minvsat = T{1e-9}, T satval = T{0.})
noexcept {
857 return (std::abs(v) >= minvsat) ? T{1.} / v : satval;
872 return (v != T{0} && v == v) ? T{1.} / v : satval;
876 template<
class Tdest,
class Tsource>
877 inline Tdest convert_internal(Tsource coord) {
878 return static_cast<Tdest
>(coord);
881 template<
class Tdest,
class Tsource,
int N>
882 inline sycl::vec<Tdest, N> convert_internal(sycl::vec<Tsource, N> coord) {
883 sycl::vec<Tdest, N> result;
884 for (
int i = 0; i < N; ++i) {
885 result[i] =
static_cast<Tdest
>(coord[i]);
892 template<
class Tdest,
class Tsource>
894 return details::convert_internal<shambase::VecComponent<Tdest>>(coord);
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
std::int32_t i32
32 bit integer
Namespace for internal details of the logs module.
namespace for backends this one is named only sham since shambackends is too long to write
constexpr T pow_constexpr(T a) noexcept
generalized pow constexpr
constexpr T clz_xor(T a, T b) noexcept
give the length of the common prefix
T inv_sat_positive(T v, T minvsat=T{1e-9}, T satval=T{0.}) noexcept
inverse saturated (positive numbers only)
Tdest convert(Tsource coord)
Helper to avoid differences between SYCL implementations of convert, it always static cast.
i32 karras_delta(i32 x, i32 y, u32 morton_length, Acc m) noexcept
delta operator defined in Karras 2012
constexpr T log2_pow2_num(T v) noexcept
compute the log2 of the number v being a power of 2
constexpr T roundup_pow2_clz(T v) noexcept
round up to the next power of two 0 is rounded up to 1 as it is not a pow of 2 every input above the ...
T inv_sat(T v, T minvsat=T{1e-9}, T satval=T{0.}) noexcept
inverse saturated
T inv_sat_zero(T v, T satval=T{0.}) noexcept
inverse saturated (zero version)
bool equals(sycl::queue &q, sycl::buffer< T > &buf1, sycl::buffer< T > &buf2, u32 cnt)
Compare elements between two sycl::buffers for equality.
constexpr bool is_pow_of_two(T v) noexcept
determine if v is a power of two and check if v==0 Source : https://graphics.stanford....
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.