Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
matrix_exponential.hpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
10#pragma once
11
21#include "matrix_op.hpp"
22#include "shambackends/sycl.hpp"
23
24namespace shammath {
25
41 inline constexpr auto sequence_mk() {
42 std::array<i32, 9> seq = {0};
43 seq[0] = 2;
44 seq[1] = 4;
45 seq[2] = 6;
46 seq[3] = 9;
47 seq[4] = 12;
48 seq[5] = 16;
49 seq[6] = 20;
50 seq[7] = 25;
51 seq[8] = 30;
52
53 return seq;
54 }
55
60 inline constexpr auto sequence_qk() {
61 std::array<i32, 9> seq = {0};
62 seq[0] = 1;
63 seq[1] = 2;
64 seq[2] = 2;
65 seq[3] = 3;
66 seq[4] = 3;
67 seq[5] = 4;
68 seq[6] = 4;
69 seq[7] = 5;
70 seq[8] = 5;
71 return seq;
72 }
73
77 inline constexpr auto sequence_rk() {
78 std::array<i32, 9> seq = {0};
79 seq[0] = 2;
80 seq[1] = 2;
81 seq[2] = 3;
82 seq[3] = 3;
83 seq[4] = 4;
84 seq[5] = 4;
85 seq[6] = 5;
86 seq[7] = 5;
87 seq[8] = 6;
88 return seq;
89 }
90
94 inline constexpr auto sequence_theta_mk() {
95 std::array<f64, 9> seq = {0};
96 seq[0] = 2.5810e-8;
97 seq[1] = 3.3972e-4;
98 seq[2] = 9.0657e-3;
99 seq[3] = 8.9578e-2;
100 seq[4] = 2.9962e-1;
101 seq[5] = 7.80e-1;
102 seq[6] = 1.4383;
103 seq[7] = 2.4286;
104 seq[8] = 3.5397;
105 return seq;
106 }
107
111 inline constexpr auto sequence_nheta_mk() {
112 std::array<f64, 9> seq = {0};
113 seq[0] = 8.7334e-6;
114 seq[1] = 1.6778e-3;
115 seq[2] = 1.7720e-3;
116 seq[3] = 1.1354e-1;
117 seq[4] = 3.2690e-1;
118 seq[5] = 7.8738e-1;
119 seq[6] = 1.4383;
120 seq[7] = 2.42860;
121 seq[8] = 3.5397;
122 return seq;
123 }
124
128 inline constexpr auto define_bexp_coef() {
129 std::array<f64, 30> coefs = {0};
130 coefs[0] = 1.0;
131 coefs[1] = 1.0;
132 coefs[2] = 0.5;
133 coefs[3] = 0.16666666666666666;
134 coefs[4] = 0.041666666666666664;
135 coefs[5] = 0.008333333333333333;
136 coefs[6] = 0.001388888888888889;
137 coefs[7] = 0.0001984126984126984;
138 coefs[8] = 2.48015873015873e-05;
139 coefs[9] = 2.7557319223985893e-06;
140 coefs[10] = 2.755731922398589e-07;
141 coefs[11] = 2.505210838544172e-08;
142 coefs[12] = 2.08767569878681e-09;
143 coefs[13] = 1.6059043836821613e-10;
144 coefs[14] = 1.1470745597729725e-11;
145 coefs[15] = 7.647163731819816e-13;
146 coefs[16] = 4.779477332387385e-14;
147 coefs[17] = 2.8114572543455206e-15;
148 coefs[18] = 1.5619206968586225e-16;
149 coefs[19] = 8.22063524662433e-18;
150 coefs[20] = 4.110317623312165e-19;
151 coefs[21] = 1.9572941063391263e-20;
152 coefs[22] = 8.896791392450574e-22;
153 coefs[23] = 3.868170170630684e-23;
154 coefs[24] = 1.6117375710961184e-24;
155 coefs[25] = 6.446950284384474e-26;
156 coefs[26] = 2.4795962632247976e-27;
157 coefs[27] = 9.183689863795546e-29;
158 coefs[28] = 3.279889237069838e-30;
159 coefs[29] = 1.1309962886447716e-31;
160
161 return coefs;
162 }
163
177 template<class T, class Extents1, class Layout1, class Accessor1>
178 inline void order_scale(
179 const i32 K,
180 std::array<i32, 9> &seq_mk,
181 std::array<f64, 9> &seq_theta_mk,
182 const std::mdspan<T, Extents1, Layout1, Accessor1> &A,
183 const size_t size_A,
184 i32 &k_star,
185 i32 &m_star,
186 i32 &s_star) {
187 m_star = seq_mk[K - 1];
188 k_star = K;
189 s_star = 0;
190
191 T norm_A = 0;
192 mat_L1_norm<T>(A, norm_A);
193
194 i32 s_tilde = static_cast<i32>(sycl::ceil(
195 sham::max(
196 static_cast<f64>(0.0),
197 static_cast<f64>(sycl::log2(norm_A / seq_theta_mk[K - 1])))));
198 s_star = s_tilde;
199 i32 k = 2;
200 bool cond = false;
201 for (; k < (K + 1) && !cond; k++) {
202 cond = (norm_A <= seq_theta_mk[k - 1]) && (norm_A <= seq_theta_mk[K - 1]);
203 m_star = cond * seq_mk[k - 1] + !cond * m_star;
204 k_star = cond * k + !cond * k_star;
205 }
206 i32 k_choice = sham::min(K, k); // if we break the preceding loop then use k else K
207 auto ld_7 = [&](i32 s_val) {
208 k_star = k_choice - 1;
209 s_star = sham::max(static_cast<i32>(0), s_val);
210 m_star = seq_mk[k_star - 1];
211 };
212
213 auto ld_8 = [&](i32 s_val) {
214 k_star = k_choice - 2;
215 s_star = sham::max(static_cast<i32>(1), s_val + 1);
216 m_star = seq_mk[k_star - 1];
217 };
218
219 auto ld_9 = [&](i32 s_val) {
220 k_star = k_choice - 3;
221 s_star = sham::max(static_cast<i32>(2), s_val + 2);
222 m_star = seq_mk[k_star - 1];
223 };
224
225 f64 cmp_1 = norm_A / (1 << s_tilde);
226
227 i32 val_2 = ((k_choice >= 8) && (cmp_1 <= 2 * seq_theta_mk[k_choice - 3])) * 2;
228 i32 val_3 = ((k_choice >= 9) && (cmp_1 <= 4 * seq_theta_mk[k_choice - 4])) * 3;
229 i32 val_1 = ((k_choice >= 7) && (cmp_1 <= seq_theta_mk[k_choice - 2]));
230
231 i32 val = sham::max(val_1, sham::max(val_2, val_3));
232
233 auto process_val = [&](int val) {
234 if (val == 1) {
235 ld_7(s_tilde);
236 } else if (val == 2) {
237 ld_8(s_tilde);
238 } else if (val == 3) {
239 ld_9(s_tilde);
240 }
241 };
242
243 process_val(val);
244 }
245
256 template<
257 typename T,
258 class U,
259 class Extents1,
260 class Extents2,
261 class Extents3,
262 class Extents4,
263 class Extents5,
264 class Layout1,
265 class Layout2,
266 class Layout3,
267 class Layout4,
268 class Layout5,
269 class Accessor1,
270 class Accessor2,
271 class Accessor3,
272 class Accessor4,
273 class Accessor5>
274 inline void taylor_eval(
275 const i32 r,
276 const i32 q,
277 std::array<f64, 30> &bi_seq,
278 const size_t size,
279 const std::mdspan<T, Extents1, Layout1, Accessor1> &A,
280 const std::mdspan<T, Extents2, Layout2, Accessor2> &F,
281 const std::mdspan<T, Extents3, Layout3, Accessor3> &B,
282 const std::mdspan<T, Extents4, Layout4, Accessor4> &I,
283 const std::mdspan<T, Extents5, Layout5, Accessor5> &Id) {
284 mat_set_nul<T>(F);
285
286 for (auto k = r - 1; k >= 0; k--) {
287 mat_set_identity<T>(I);
288 mat_set_identity<T>(Id);
289 mat_set_nul<T>(B);
290 i32 cc = 0;
291
292 for (auto j = 1; j <= q; j++) {
293 mat_copy<T>(I, Id);
294 mat_gemm<T, U>(1, A, Id, 0, I);
295 cc = q * k + j;
296 mat_axpy_beta<T, U>(bi_seq[cc], I, 1, B);
297 }
298 mat_set_identity<T>(Id);
299
300 i32 cond = (k >= 1);
301 mat_axpy_beta<T, U>(1, B, 1, F);
302 mat_axpy_beta<T, U>(1 - cond, Id, cond, I);
303
304 mat_gemm<T, U>(1, F, I, 0, B);
305 mat_copy<T>(B, F);
306 }
307 mat_plus_equal_scalar_id<T, U>(F, 1);
308 }
309
318 template<
319 typename T,
320 class U,
321 class Extents1,
322 class Extents2,
323 class Extents3,
324 class Extents4,
325 class Extents5,
326 class Layout1,
327 class Layout2,
328 class Layout3,
329 class Layout4,
330 class Layout5,
331 class Accessor1,
332 class Accessor2,
333 class Accessor3,
334 class Accessor4,
335 class Accessor5>
336 inline void mat_exp(
337 const i32 K,
338 const std::mdspan<T, Extents1, Layout1, Accessor1> &A,
339 const std::mdspan<T, Extents2, Layout2, Accessor2> &F,
340 const std::mdspan<T, Extents3, Layout3, Accessor3> &B,
341 const std::mdspan<T, Extents4, Layout4, Accessor4> &I,
342 const std::mdspan<T, Extents5, Layout5, Accessor5> &Id,
343 const size_t size_A) {
344 auto seq_mk = sequence_mk();
345 auto seq_qk = sequence_qk();
346 auto seq_rk = sequence_rk();
347 auto seq_ntheta_mk = sequence_nheta_mk();
348 auto seq_bi = define_bexp_coef();
349
350 i32 k_star{0}, m_star{0}, s_star{0};
351 // computation of k*, s*, m*
352 order_scale<T>(K, seq_mk, seq_ntheta_mk, A, size_A, k_star, m_star, s_star);
353 i32 r = seq_rk[k_star - 1];
354 i32 q = seq_qk[k_star - 1];
355 // scaling step
356 i32 pw = (1 << s_star);
357 f64 scale_factor = 1.0 / pw;
358 mat_mul_scalar<T>(A, scale_factor);
359
360 // Taylor polynomial evaluation
361 taylor_eval<T, U>(r, q, seq_bi, size_A, A, F, B, I, Id);
362
363 // squaring step
364 mat_set_identity<T>(Id);
365 mat_set_identity<T>(I);
366
367 for (auto j = 1; j <= pw; j++) {
368 mat_copy<T>(I, Id);
369 mat_gemm<T, U>(1, F, Id, 0, I);
370 }
371 mat_copy<T>(I, A);
372 }
373} // namespace shammath
double f64
Alias for double.
std::int32_t i32
32 bit integer
namespace for math utility
Definition AABB.hpp:26
void taylor_eval(const i32 r, const i32 q, std::array< f64, 30 > &bi_seq, const size_t size, const std::mdspan< T, Extents1, Layout1, Accessor1 > &A, const std::mdspan< T, Extents2, Layout2, Accessor2 > &F, const std::mdspan< T, Extents3, Layout3, Accessor3 > &B, const std::mdspan< T, Extents4, Layout4, Accessor4 > &I, const std::mdspan< T, Extents5, Layout5, Accessor5 > &Id)
This function compute the Taylor polynomial up to order m_star.
constexpr auto sequence_nheta_mk()
precomputed optimal sequence based on backward error analysis
void order_scale(const i32 K, std::array< i32, 9 > &seq_mk, std::array< f64, 9 > &seq_theta_mk, const std::mdspan< T, Extents1, Layout1, Accessor1 > &A, const size_t size_A, i32 &k_star, i32 &m_star, i32 &s_star)
this function compute the Taylor's polynomial order (m_star) the optimal number of matrix product dur...
constexpr auto define_bexp_coef()
1/(i!)
constexpr auto sequence_rk()
precomputed optimal Paterson-Stockmeyer polynomial degrees
constexpr auto sequence_qk()
precomputed optimal Paterson-Stockmeyer intergers (it's used to compute the matrix power)
constexpr auto sequence_theta_mk()
precomputed optimal sequence based on backward error analysis
void mat_exp(const i32 K, const std::mdspan< T, Extents1, Layout1, Accessor1 > &A, const std::mdspan< T, Extents2, Layout2, Accessor2 > &F, const std::mdspan< T, Extents3, Layout3, Accessor3 > &B, const std::mdspan< T, Extents4, Layout4, Accessor4 > &I, const std::mdspan< T, Extents5, Layout5, Accessor5 > &Id, const size_t size_A)
matrix scaling-squaring Talylor-based matrix exponential
constexpr auto sequence_mk()
precomputed optimal Taylor's polynomial orders
Traits for C++ types.