26 auto edges = get_edges();
28 if (edges.sizes.indexes.get_ids().size() != 1) {
30 "Self gravity direct mode only supports one patch so far, current number "
32 + std::to_string(edges.sizes.indexes.get_ids().size()));
35 const Tscal G = edges.constant_G.data;
36 const Tscal gpart_mass = edges.gpart_mass.data;
38 const Tscal gravitational_softening = epsilon * epsilon;
40 edges.sizes.indexes.for_each([&](
u64 id,
const u64 &n) {
45 std::vector<Tvec> xyz_vec = xyz.get_buf().copy_to_stdvec();
46 std::vector<Tvec> axyz_ext_vec = axyz_ext.get_buf().copy_to_stdvec();
48 for (
u64 i = 0; i < n; i++) {
50 for (
u64 j = 0; j < n; j++) {
55 Tvec R = xyz_vec[j] - xyz_vec[i];
56 const Tscal r_inv = sycl::rsqrt(
57 R.x() * R.x() + R.y() * R.y() + R.z() * R.z()
58 + gravitational_softening);
59 force += gpart_mass * r_inv * r_inv * r_inv * R;
61 axyz_ext_vec[i] += force * G;
68 const u32 group_size = 32;
70 const u32 corrected_len = group_cnt * group_size;
72 sham::kernel_call_hndl(
73 shamsys::instance::get_compute_scheduler_ptr()->get_queue(),
77 [corrected_len, group_size, G, gpart_mass, gravitational_softening](
78 u32 Npart,
const Tvec *__restrict xyz, Tvec *__restrict axyz_ext) {
79 auto range = sycl::nd_range<1>{corrected_len, group_size};
81 return [=](sycl::handler &cgh) {
82 using vec4 = sycl::vec<Tscal, 4>;
85 = sycl::local_accessor<vec4>{sycl::range<1>{group_size}, cgh};
87 cgh.parallel_for(range, [=](sycl::nd_item<1> tid) {
88 const u64 global_id = tid.get_global_linear_id();
89 const u64 local_id = tid.get_local_linear_id();
93 const Tvec my_particle
94 = (global_id < Npart) ? xyz[global_id] : Tvec{0.0f};
96 for (
u32 offset = 0; offset < Npart; offset += group_size) {
98 if (offset + local_id < Npart) {
99 position_scratch[local_id]
100 = vec4{xyz[offset + local_id], gpart_mass};
102 position_scratch[local_id] = vec4{0.0f};
105 sycl::group_barrier(tid.get_group());
107 for (
u32 i = 0; i < group_size; ++i) {
109 = position_scratch[i].template swizzle<0, 1, 2>();
110 const Tvec R = p - my_particle;
112 const Tscal r_inv = sycl::rsqrt(
113 R.x() * R.x() + R.y() * R.y() + R.z() * R.z()
114 + gravitational_softening);
116 if (global_id != offset + i) {
117 force += position_scratch[i].w() * r_inv * r_inv * r_inv
122 sycl::group_barrier(tid.get_group());
125 if (global_id < Npart) {
126 axyz_ext[global_id] += force * G;