Shamrock 2025.10.0
Astrophysical Code
Loading...
Searching...
No Matches
convert_ranges.cpp
Go to the documentation of this file.
1// -------------------------------------------------------//
2//
3// SHAMROCK code for hydrodynamics
4// Copyright (c) 2021-2026 Timothée David--Cléris <tim.shamrock@proton.me>
5// SPDX-License-Identifier: CeCILL Free Software License Agreement v2.1
6// Shamrock is licensed under the CeCILL 2.1 License, see LICENSE for more information
7//
8// -------------------------------------------------------//
9
16#include "shambase/integer.hpp"
18
19template<>
20void sycl_convert_cell_range<u32, f32_3>(
21 sycl::queue &queue,
22
23 u32 leaf_cnt,
24 u32 internal_cnt,
25 f32_3 bounding_box_min,
26 f32_3 bounding_box_max,
27 std::unique_ptr<sycl::buffer<u16_3>> &buf_pos_min_cell,
28 std::unique_ptr<sycl::buffer<u16_3>> &buf_pos_max_cell,
29 std::unique_ptr<sycl::buffer<f32_3>> &buf_pos_min_cell_flt,
30 std::unique_ptr<sycl::buffer<f32_3>> &buf_pos_max_cell_flt) {
31
32 using f3_xyzh = f32_3;
33
34 sycl::range<1> range_cell{leaf_cnt + internal_cnt};
35
36 constexpr u32 group_size = 256;
37 u32 max_len = leaf_cnt + internal_cnt;
38 u32 group_cnt = shambase::group_count(leaf_cnt + internal_cnt, group_size);
39 group_cnt = group_cnt + (group_cnt % 4);
40 u32 corrected_len = group_cnt * group_size;
41
42 auto ker_convert_cell_ranges = [&, max_len](sycl::handler &cgh) {
43 f3_xyzh b_box_min = bounding_box_min;
44 f3_xyzh b_box_max = bounding_box_max;
45
46 auto pos_min_cell = buf_pos_min_cell->get_access<sycl::access::mode::read>(cgh);
47 auto pos_max_cell = buf_pos_max_cell->get_access<sycl::access::mode::read>(cgh);
48
49 // auto pos_min_cell_flt =
50 // buf_pos_min_cell_flt->get_access<sycl::access::mode::discard_write>(cgh); auto
51 // pos_max_cell_flt =
52 // buf_pos_max_cell_flt->get_access<sycl::access::mode::discard_write>(cgh);
53
54 auto pos_min_cell_flt = sycl::accessor{
55 *buf_pos_min_cell_flt, cgh, sycl::write_only, sycl::property::no_init{}};
56 auto pos_max_cell_flt = sycl::accessor{
57 *buf_pos_max_cell_flt, cgh, sycl::write_only, sycl::property::no_init{}};
58
59 cgh.parallel_for<class Convert_cell_range_u32_f32>(
60 sycl::nd_range<1>{corrected_len, group_size}, [=](sycl::nd_item<1> id) {
61 u32 local_id = id.get_local_id(0);
62 u32 group_tile_id = id.get_group_linear_id();
63 u32 gid = group_tile_id * group_size + local_id;
64
65 if (gid >= max_len)
66 return;
67
68 pos_min_cell_flt[gid].s0() = f32(pos_min_cell[gid].s0()) * (1 / 1024.f);
69 pos_max_cell_flt[gid].s0() = f32(pos_max_cell[gid].s0()) * (1 / 1024.f);
70
71 pos_min_cell_flt[gid].s1() = f32(pos_min_cell[gid].s1()) * (1 / 1024.f);
72 pos_max_cell_flt[gid].s1() = f32(pos_max_cell[gid].s1()) * (1 / 1024.f);
73
74 pos_min_cell_flt[gid].s2() = f32(pos_min_cell[gid].s2()) * (1 / 1024.f);
75 pos_max_cell_flt[gid].s2() = f32(pos_max_cell[gid].s2()) * (1 / 1024.f);
76
77 pos_min_cell_flt[gid] *= b_box_max - b_box_min;
78 pos_min_cell_flt[gid] += b_box_min;
79
80 pos_max_cell_flt[gid] *= b_box_max - b_box_min;
81 pos_max_cell_flt[gid] += b_box_min;
82 });
83 };
84
85 queue.submit(ker_convert_cell_ranges);
86}
87
88template<>
89void sycl_convert_cell_range<u64, f32_3>(
90 sycl::queue &queue,
91
92 u32 leaf_cnt,
93 u32 internal_cnt,
94 f32_3 bounding_box_min,
95 f32_3 bounding_box_max,
96 std::unique_ptr<sycl::buffer<u32_3>> &buf_pos_min_cell,
97 std::unique_ptr<sycl::buffer<u32_3>> &buf_pos_max_cell,
98 std::unique_ptr<sycl::buffer<f32_3>> &buf_pos_min_cell_flt,
99 std::unique_ptr<sycl::buffer<f32_3>> &buf_pos_max_cell_flt) {
100
101 using f3_xyzh = f32_3;
102
103 sycl::range<1> range_cell{leaf_cnt + internal_cnt};
104
105 constexpr u32 group_size = 256;
106 u32 max_len = leaf_cnt + internal_cnt;
107 u32 group_cnt = shambase::group_count(leaf_cnt + internal_cnt, group_size);
108 group_cnt = group_cnt + (group_cnt % 4);
109 u32 corrected_len = group_cnt * group_size;
110
111 auto ker_convert_cell_ranges = [&, max_len](sycl::handler &cgh) {
112 f3_xyzh b_box_min = bounding_box_min;
113 f3_xyzh b_box_max = bounding_box_max;
114
115 auto pos_min_cell = buf_pos_min_cell->get_access<sycl::access::mode::read>(cgh);
116 auto pos_max_cell = buf_pos_max_cell->get_access<sycl::access::mode::read>(cgh);
117
118 // auto pos_min_cell_flt =
119 // buf_pos_min_cell_flt->get_access<sycl::access::mode::discard_write>(cgh); auto
120 // pos_max_cell_flt =
121 // buf_pos_max_cell_flt->get_access<sycl::access::mode::discard_write>(cgh);
122
123 auto pos_min_cell_flt = sycl::accessor{
124 *buf_pos_min_cell_flt, cgh, sycl::write_only, sycl::property::no_init{}};
125 auto pos_max_cell_flt = sycl::accessor{
126 *buf_pos_max_cell_flt, cgh, sycl::write_only, sycl::property::no_init{}};
127
128 cgh.parallel_for<class Convert_cell_range_u64_f32>(
129 sycl::nd_range<1>{corrected_len, group_size}, [=](sycl::nd_item<1> id) {
130 u32 local_id = id.get_local_id(0);
131 u32 group_tile_id = id.get_group_linear_id();
132 u32 gid = group_tile_id * group_size + local_id;
133
134 if (gid >= max_len)
135 return;
136
137 pos_min_cell_flt[gid].s0() = f32(pos_min_cell[gid].s0()) * (1 / 2097152.f);
138 pos_max_cell_flt[gid].s0() = f32(pos_max_cell[gid].s0()) * (1 / 2097152.f);
139
140 pos_min_cell_flt[gid].s1() = f32(pos_min_cell[gid].s1()) * (1 / 2097152.f);
141 pos_max_cell_flt[gid].s1() = f32(pos_max_cell[gid].s1()) * (1 / 2097152.f);
142
143 pos_min_cell_flt[gid].s2() = f32(pos_min_cell[gid].s2()) * (1 / 2097152.f);
144 pos_max_cell_flt[gid].s2() = f32(pos_max_cell[gid].s2()) * (1 / 2097152.f);
145
146 pos_min_cell_flt[gid] *= b_box_max - b_box_min;
147 pos_min_cell_flt[gid] += b_box_min;
148
149 pos_max_cell_flt[gid] *= b_box_max - b_box_min;
150 pos_max_cell_flt[gid] += b_box_min;
151 });
152 };
153
154 queue.submit(ker_convert_cell_ranges);
155}
156
157template<>
158void sycl_convert_cell_range<u32, f64_3>(
159 sycl::queue &queue,
160
161 u32 leaf_cnt,
162 u32 internal_cnt,
163 f64_3 bounding_box_min,
164 f64_3 bounding_box_max,
165 std::unique_ptr<sycl::buffer<u16_3>> &buf_pos_min_cell,
166 std::unique_ptr<sycl::buffer<u16_3>> &buf_pos_max_cell,
167 std::unique_ptr<sycl::buffer<f64_3>> &buf_pos_min_cell_flt,
168 std::unique_ptr<sycl::buffer<f64_3>> &buf_pos_max_cell_flt) {
169
170 using f3_xyzh = f64_3;
171
172 sycl::range<1> range_cell{leaf_cnt + internal_cnt};
173
174 constexpr u32 group_size = 256;
175 u32 max_len = leaf_cnt + internal_cnt;
176 u32 group_cnt = shambase::group_count(leaf_cnt + internal_cnt, group_size);
177 group_cnt = group_cnt + (group_cnt % 4);
178 u32 corrected_len = group_cnt * group_size;
179
180 auto ker_convert_cell_ranges = [&, max_len](sycl::handler &cgh) {
181 f3_xyzh b_box_min = bounding_box_min;
182 f3_xyzh b_box_max = bounding_box_max;
183
184 auto pos_min_cell = buf_pos_min_cell->get_access<sycl::access::mode::read>(cgh);
185 auto pos_max_cell = buf_pos_max_cell->get_access<sycl::access::mode::read>(cgh);
186
187 // auto pos_min_cell_flt =
188 // buf_pos_min_cell_flt->get_access<sycl::access::mode::discard_write>(cgh); auto
189 // pos_max_cell_flt =
190 // buf_pos_max_cell_flt->get_access<sycl::access::mode::discard_write>(cgh);
191
192 auto pos_min_cell_flt = sycl::accessor{
193 *buf_pos_min_cell_flt, cgh, sycl::write_only, sycl::property::no_init{}};
194 auto pos_max_cell_flt = sycl::accessor{
195 *buf_pos_max_cell_flt, cgh, sycl::write_only, sycl::property::no_init{}};
196
197 cgh.parallel_for<class Convert_cell_range_u32_f64>(
198 sycl::nd_range<1>{corrected_len, group_size}, [=](sycl::nd_item<1> id) {
199 u32 local_id = id.get_local_id(0);
200 u32 group_tile_id = id.get_group_linear_id();
201 u32 gid = group_tile_id * group_size + local_id;
202
203 if (gid >= max_len)
204 return;
205
206 pos_min_cell_flt[gid].s0() = f64(pos_min_cell[gid].s0()) * (1 / 1024.);
207 pos_max_cell_flt[gid].s0() = f64(pos_max_cell[gid].s0()) * (1 / 1024.);
208
209 pos_min_cell_flt[gid].s1() = f64(pos_min_cell[gid].s1()) * (1 / 1024.);
210 pos_max_cell_flt[gid].s1() = f64(pos_max_cell[gid].s1()) * (1 / 1024.);
211
212 pos_min_cell_flt[gid].s2() = f64(pos_min_cell[gid].s2()) * (1 / 1024.);
213 pos_max_cell_flt[gid].s2() = f64(pos_max_cell[gid].s2()) * (1 / 1024.);
214
215 pos_min_cell_flt[gid] *= b_box_max - b_box_min;
216 pos_min_cell_flt[gid] += b_box_min;
217
218 pos_max_cell_flt[gid] *= b_box_max - b_box_min;
219 pos_max_cell_flt[gid] += b_box_min;
220 });
221 };
222
223 queue.submit(ker_convert_cell_ranges);
224}
225
226template<>
227void sycl_convert_cell_range<u64, f64_3>(
228 sycl::queue &queue,
229
230 u32 leaf_cnt,
231 u32 internal_cnt,
232 f64_3 bounding_box_min,
233 f64_3 bounding_box_max,
234 std::unique_ptr<sycl::buffer<u32_3>> &buf_pos_min_cell,
235 std::unique_ptr<sycl::buffer<u32_3>> &buf_pos_max_cell,
236 std::unique_ptr<sycl::buffer<f64_3>> &buf_pos_min_cell_flt,
237 std::unique_ptr<sycl::buffer<f64_3>> &buf_pos_max_cell_flt) {
238
239 using f3_xyzh = f64_3;
240
241 sycl::range<1> range_cell{leaf_cnt + internal_cnt};
242
243 constexpr u32 group_size = 256;
244 u32 max_len = leaf_cnt + internal_cnt;
245 u32 group_cnt = shambase::group_count(leaf_cnt + internal_cnt, group_size);
246 group_cnt = group_cnt + (group_cnt % 4);
247 u32 corrected_len = group_cnt * group_size;
248
249 auto ker_convert_cell_ranges = [&, max_len](sycl::handler &cgh) {
250 f3_xyzh b_box_min = bounding_box_min;
251 f3_xyzh b_box_max = bounding_box_max;
252
253 auto pos_min_cell = buf_pos_min_cell->get_access<sycl::access::mode::read>(cgh);
254 auto pos_max_cell = buf_pos_max_cell->get_access<sycl::access::mode::read>(cgh);
255
256 // auto pos_min_cell_flt =
257 // buf_pos_min_cell_flt->get_access<sycl::access::mode::discard_write>(cgh); auto
258 // pos_max_cell_flt =
259 // buf_pos_max_cell_flt->get_access<sycl::access::mode::discard_write>(cgh);
260
261 auto pos_min_cell_flt = sycl::accessor{
262 *buf_pos_min_cell_flt, cgh, sycl::write_only, sycl::property::no_init{}};
263 auto pos_max_cell_flt = sycl::accessor{
264 *buf_pos_max_cell_flt, cgh, sycl::write_only, sycl::property::no_init{}};
265
266 cgh.parallel_for<class Convert_cell_range_u64_f64>(
267 sycl::nd_range<1>{corrected_len, group_size}, [=](sycl::nd_item<1> id) {
268 u32 local_id = id.get_local_id(0);
269 u32 group_tile_id = id.get_group_linear_id();
270 u32 gid = group_tile_id * group_size + local_id;
271
272 if (gid >= max_len)
273 return;
274
275 pos_min_cell_flt[gid].s0() = f64(pos_min_cell[gid].s0()) * (1 / 2097152.);
276 pos_max_cell_flt[gid].s0() = f64(pos_max_cell[gid].s0()) * (1 / 2097152.);
277
278 pos_min_cell_flt[gid].s1() = f64(pos_min_cell[gid].s1()) * (1 / 2097152.);
279 pos_max_cell_flt[gid].s1() = f64(pos_max_cell[gid].s1()) * (1 / 2097152.);
280
281 pos_min_cell_flt[gid].s2() = f64(pos_min_cell[gid].s2()) * (1 / 2097152.);
282 pos_max_cell_flt[gid].s2() = f64(pos_max_cell[gid].s2()) * (1 / 2097152.);
283
284 pos_min_cell_flt[gid] *= b_box_max - b_box_min;
285 pos_min_cell_flt[gid] += b_box_min;
286
287 pos_max_cell_flt[gid] *= b_box_max - b_box_min;
288 pos_max_cell_flt[gid] += b_box_min;
289 });
290 };
291
292 queue.submit(ker_convert_cell_ranges);
293}
double f64
Alias for double.
float f32
Alias for float.
std::uint32_t u32
32 bit unsigned integer
constexpr u32 group_count(u32 len, u32 group_size)
Calculates the number of groups based on the length and group size.
Definition integer.hpp:125