48 auto fast_extract_ptr = [](
u32 idx,
u32 length,
auto cnt) {
49 T end_ = cnt[length - 1];
59 const u32 nvar = from.get_nvar();
60 const u32 idx_val = pidx * nvar;
63 u32 from_sz = from.get_val_cnt();
69 auto &buf_to = to.get_buf();
70 auto &buf_from = from.get_buf();
73 T *acc_to = buf_to.get_write_access(depends_list);
74 T *acc_from = buf_from.get_write_access(depends_list);
78 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
79 const u32 nvar_loc = nvar;
82 for (
u32 i = nvar_loc - 1; i < nvar_loc; i--) {
83 acc_to[idx_out_val + i]
84 = (fast_extract_ptr(idx_val + i, from_sz, acc_from));
89 buf_to.complete_event_state(e);
90 buf_from.complete_event_state(e);
96 sub_extract(pidx, *
this, to);
104 "source and destination for extract_elements cannot be the same");
115 match = match && (field_name == f2.field_name);
116 match = match && (nvar == f2.nvar);
118 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
130 if (pfield.nvar != nvar)
132 "field must be similar for extraction");
134 if (
static_cast<size_t>(sz) != idxs_buf.
get_size())
136 "the size of the idxs buffer does not match the size of the subset");
140 const u32 nvar = get_nvar();
153 buffer.copy_from_sycl_buffer(idxs_buf);
166 append_subset_to(idxs_buf, sz, pfield);
178 u32 ins_pos = get_val_cnt();
181 auto sptr = shamsys::instance::get_compute_scheduler_ptr();
182 auto &q = sptr->get_queue();
185 T *acc = get_buf().get_write_access(depends_list);
187 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
188 auto id_ins = ins_pos;
196 get_buf().complete_event_state(e);
205 if (get_obj_cnt() > 0) {
207 auto sptr = shamsys::instance::get_compute_scheduler_ptr();
208 auto &q = sptr->get_queue();
211 T *acc = get_buf().get_write_access(depends_list);
213 auto e = q.
submit(depends_list, [&](sycl::handler &cgh) {
217 sycl::range<1>{get_val_cnt()}, [=](sycl::id<1> idx) {
221 get_buf().complete_event_state(e);
230 get_buf().append(f2.get_buf());
236 if (!buf.is_empty()) {
238 auto sched_ptr = shamsys::instance::get_compute_scheduler_ptr();
240 auto get_new_buf = [&]() {
255 if (len != get_obj_cnt()) {
257 "the match of the new index map does not match with the patchdatafield obj count: {} "
263 index_remap_resize(index_map, len);
268 if (permut.size() != get_nvar()) {
270 "the number of permut is not equal to the patchdatafield nvar: {} != {}",
275 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
276 auto &q = dev_sched->get_queue();
279 permut.size(), shamsys::instance::get_compute_scheduler_ptr());
289 [nvar = nvar](
u32 i,
const T *src,
const u32 *permut, T *dst) {
290 u32 obj_id = i / nvar;
291 u32 var_id = i % nvar;
293 u32 new_var_id = permut[var_id];
295 dst[obj_id * nvar + new_var_id] = src[i];
302 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
303 auto &q = dev_sched->get_queue();
305 if (len > get_obj_cnt()) {
307 "the number of ids to remove is greater than the patchdatafield obj count: {} > {}",
316 auto nobj = get_obj_cnt();
317 auto remaining = nobj - len;
327 [](
u32 i,
const u32 *idx,
u32 *idx_map) {
333 if (keep_ids.get_size() != remaining) {
339 std::sort(ids_to_rem_vec.begin(), ids_to_rem_vec.end());
341 bool has_duplicates =
false;
344 has_duplicates = std::adjacent_find(ids_to_rem_vec.begin(), ids_to_rem_vec.end())
345 != ids_to_rem_vec.end();
350 u32 keep_flags_sum = std::accumulate(keep_flags_vec.begin(), keep_flags_vec.end(),
u32(0));
352 std::string log = shambase::format(
353 "the number of remaining ids {} is different from the expected {}",
357 log +=
"\n\nAdditional information:\n";
358 if (has_duplicates) {
359 log +=
" ids_to_rem has duplicates = true\n";
361 log +=
" ids_to_rem has duplicates = false\n";
363 log += shambase::format(
" keep flags sum = {}\n", keep_flags_sum);
368 index_remap_resize(keep_ids, remaining);
373 u64 seed,
u32 obj_cnt, std::string name,
u32 nvar, T vmin, T vmax) {
375 std::vector<T> buf = shamalgs::primitives::mock_vector<T>(seed, obj_cnt * nvar, vmin, vmax);
377 ret.get_buf().copy_from_stdvec(buf);
386 seed, obj_cnt, name, nvar, Prop::get_min(), Prop::get_max());
392 u32 obj_cnt = get_obj_cnt();
393 serializer.write(obj_cnt);
394 shamlog_debug_sycl_ln(
"PatchDataField",
"serialize patchdatafield len=", obj_cnt);
396 serializer.write_buf(buf, get_val_cnt());
405 serializer.load(cnt);
406 shamlog_debug_sycl_ln(
"PatchDataField",
"deserialize patchdatafield len=", cnt);
410 serializer.load_buf(buf, cnt * nvar);
421 return H::serialize_byte_size<u32>() + H::serialize_byte_size<T>(get_val_cnt());
427 serializer.write(nvar);
428 serializer.write(field_name);
429 serialize_buf(serializer);
435 return (H::serialize_byte_size<u32>()) + H::serialize_byte_size(field_name)
436 + serialize_buf_byte_size();
443 serializer.load(nvar);
444 std::string field_name;
445 serializer.load(field_name);
447 return deserialize_buf(serializer, field_name, nvar);
457 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
468 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
479 auto dev_sched = shamsys::instance::get_compute_scheduler_ptr();
497 auto tmp = buf.copy_to_sycl_buffer();
499 return shamalgs::reduction::has_nan(shamsys::instance::get_compute_queue(), tmp, get_val_cnt());
505 auto tmp = buf.copy_to_sycl_buffer();
507 return shamalgs::reduction::has_inf(shamsys::instance::get_compute_queue(), tmp, get_val_cnt());
513 auto tmp = buf.copy_to_sycl_buffer();
515 return shamalgs::reduction::has_nan_or_inf(
516 shamsys::instance::get_compute_queue(), tmp, get_val_cnt());
524 #define X(a) template class PatchDataField<a>;
525XMAC_LIST_ENABLED_FIELD
535const u32 obj_mock_cnt = 6000;
542 std::vector<f32> out(obj_cnt * nvar);
543 std::uniform_real_distribution<f64> distf64(1, obj_mock_cnt);
545 for (
u32 i = 0; i < get_val_cnt(); i++) {
546 out[i] =
f32(distf64(eng));
549 buf.copy_from_stdvec(out);
555 std::uniform_real_distribution<f64> distf64(1, obj_mock_cnt);
557 std::vector<f32_2> out(obj_cnt * nvar);
559 for (
u32 i = 0; i < get_val_cnt(); i++) {
560 out[i] = f32_2{distf64(eng), distf64(eng)};
562 buf.copy_from_stdvec(out);
568 std::uniform_real_distribution<f64> distf64(1, obj_mock_cnt);
570 std::vector<f32_3> out(obj_cnt * nvar);
572 for (
u32 i = 0; i < get_val_cnt(); i++) {
573 out[i] = f32_3{distf64(eng), distf64(eng), distf64(eng)};
575 buf.copy_from_stdvec(out);
581 std::uniform_real_distribution<f64> distf64(1, obj_mock_cnt);
583 std::vector<f32_4> out(obj_cnt * nvar);
585 for (
u32 i = 0; i < get_val_cnt(); i++) {
586 out[i] = f32_4{distf64(eng), distf64(eng), distf64(eng), distf64(eng)};
588 buf.copy_from_stdvec(out);
594 std::uniform_real_distribution<f64> distf64(1, obj_mock_cnt);
596 std::vector<f32_8> out(obj_cnt * nvar);
598 for (
u32 i = 0; i < get_val_cnt(); i++) {
609 buf.copy_from_stdvec(out);
615 std::uniform_real_distribution<f64> distf64(1, obj_mock_cnt);
617 std::vector<f32_16> out(obj_cnt * nvar);
619 for (
u32 i = 0; i < get_val_cnt(); i++) {
638 buf.copy_from_stdvec(out);
644 std::uniform_real_distribution<f64> distf64(1, obj_mock_cnt);
646 std::vector<f64> out(obj_cnt * nvar);
648 for (
u32 i = 0; i < get_val_cnt(); i++) {
649 out[i] =
f64(distf64(eng));
651 buf.copy_from_stdvec(out);
657 std::uniform_real_distribution<f64> distf64(1, obj_mock_cnt);
659 std::vector<f64_2> out(obj_cnt * nvar);
661 for (
u32 i = 0; i < get_val_cnt(); i++) {
662 out[i] = f64_2{distf64(eng), distf64(eng)};
664 buf.copy_from_stdvec(out);
670 std::uniform_real_distribution<f64> distf64(1, obj_mock_cnt);
672 std::vector<f64_3> out(obj_cnt * nvar);
674 for (
u32 i = 0; i < get_val_cnt(); i++) {
675 out[i] = f64_3{distf64(eng), distf64(eng), distf64(eng)};
677 buf.copy_from_stdvec(out);
683 std::uniform_real_distribution<f64> distf64(1, obj_mock_cnt);
685 std::vector<f64_4> out(obj_cnt * nvar);
687 for (
u32 i = 0; i < get_val_cnt(); i++) {
688 out[i] = f64_4{distf64(eng), distf64(eng), distf64(eng), distf64(eng)};
690 buf.copy_from_stdvec(out);
696 std::uniform_real_distribution<f64> distf64(1, obj_mock_cnt);
698 std::vector<f64_8> out(obj_cnt * nvar);
700 for (
u32 i = 0; i < get_val_cnt(); i++) {
711 buf.copy_from_stdvec(out);
717 std::uniform_real_distribution<f64> distf64(1, obj_mock_cnt);
719 std::vector<f64_16> out(obj_cnt * nvar);
721 for (
u32 i = 0; i < get_val_cnt(); i++) {
740 buf.copy_from_stdvec(out);
746 std::uniform_int_distribution<u32> distu32(1, obj_mock_cnt);
748 std::vector<u32> out(obj_cnt * nvar);
750 for (
u32 i = 0; i < get_val_cnt(); i++) {
751 out[i] = distu32(eng);
753 buf.copy_from_stdvec(out);
758 std::uniform_int_distribution<u64> distu64(1, obj_mock_cnt);
760 std::vector<u64> out(obj_cnt * nvar);
762 for (
u32 i = 0; i < get_val_cnt(); i++) {
763 out[i] = distu64(eng);
765 buf.copy_from_stdvec(out);
771 std::uniform_int_distribution<u32> distu32(1, obj_mock_cnt);
773 std::vector<u32_3> out(obj_cnt * nvar);
775 for (
u32 i = 0; i < get_val_cnt(); i++) {
776 out[i] = u32_3{distu32(eng), distu32(eng), distu32(eng)};
778 buf.copy_from_stdvec(out);
783 std::uniform_int_distribution<u64> distu64(1, obj_mock_cnt);
785 std::vector<u64_3> out(obj_cnt * nvar);
787 for (
u32 i = 0; i < get_val_cnt(); i++) {
788 out[i] = u64_3{distu64(eng), distu64(eng), distu64(eng)};
790 buf.copy_from_stdvec(out);
796 std::uniform_int_distribution<i64> disti64(1, obj_mock_cnt);
798 std::vector<i64_3> out(obj_cnt * nvar);
800 for (
u32 i = 0; i < get_val_cnt(); i++) {
801 out[i] = i64_3{disti64(eng), disti64(eng), disti64(eng)};
803 buf.copy_from_stdvec(out);
809 std::uniform_int_distribution<i64> disti64(1, obj_mock_cnt);
811 std::vector<i64> out(obj_cnt * nvar);
813 for (
u32 i = 0; i < get_val_cnt(); i++) {
814 out[i] =
i64{disti64(eng)};
816 buf.copy_from_stdvec(out);
Header file describing a Node Instance.
double f64
Alias for double.
float f32
Alias for float.
std::uint32_t u32
32 bit unsigned integer
std::uint64_t u64
64 bit unsigned integer
std::int64_t i64
64 bit integer
shamalgs::SerializeSize serialize_full_byte_size()
give the size usage of serialize_full
void permut_vars(const std::vector< u32 > &permut)
permut the variables of the field according to the permut
void index_remap(sham::DeviceBuffer< u32 > &index_map, u32 len)
this function remaps the patchdatafield like so val[id] = val[index_map[id]] index map describe : at ...
void remove_ids(const sham::DeviceBuffer< u32 > &indexes, u32 len)
remove the ids from the field
static PatchDataField deserialize_buf(shamalgs::SerializeHelper &serializer, std::string field_name, u32 nvar)
deserialize a field inverse of serialize_buf
void append_subset_to(const std::vector< u32 > &idxs, PatchDataField &pfield)
Copy all objects in idxs to pfield.
shamalgs::SerializeSize serialize_buf_byte_size()
record the size usage of the serialization using serialize_buf
u32 get_val_cnt() const
Get the number of values stored in the field.
static PatchDataField deserialize_full(shamalgs::SerializeHelper &serializer)
deserialize a field inverse of serialize_full
void index_remap_resize(sham::DeviceBuffer< u32 > &index_map, u32 len)
this function remaps the patchdatafield like so val[id] = val[index_map[id]] index map describe : at ...
void serialize_buf(shamalgs::SerializeHelper &serializer)
minimal serialization assuming the user know the layout of the field
void serialize_full(shamalgs::SerializeHelper &serializer)
serialize everything in the class
A buffer allocated in USM (Unified Shared Memory)
void copy_from_stdvec(const std::vector< T > &vec)
Copy the content of a std::vector into the buffer.
void fill(T value, std::array< size_t, 2 > idx_range)
Fill a subpart of the buffer with a given value.
std::vector< T > copy_to_stdvec() const
Copy the content of the buffer to a std::vector.
size_t get_size() const
Gets the number of elements in the buffer.
std::vector< T > copy_to_stdvec_idx_range(size_t begin, size_t end) const
Copies a specified range of elements from the buffer to a std::vector.
DeviceBuffer< T, target > copy() const
Copy the current buffer.
A SYCL queue associated with a device and a context.
sycl::event submit(Fct &&fct)
Submits a kernel to the SYCL queue.
DeviceQueue & get_queue(u32 id=0)
Get a reference to a DeviceQueue.
Class to manage a list of SYCL events.
Provides functions to compute the sum of dot products of elements in a device buffer with themselves.
Element-wise equality comparison algorithms for buffers.
This header file contains utility functions related to exception handling in the code.
Utility functions for generating random mock values.
Utility functions for generating random mock vectors.
void kernel_call(sham::DeviceQueue &q, RefIn in, RefOut in_out, u32 n, Functor &&func, SourceLocation &&callsite=SourceLocation{})
Submit a kernel to a SYCL queue.
sycl::buffer< T > index_remap(sycl::queue &q, sycl::buffer< T > &source_buf, sycl::buffer< u32 > &index_map, u32 len)
remap a buffer according to a given index map result[i] = result[index_map[i]]
sycl::buffer< T > index_remap_nvar(sycl::queue &q, sycl::buffer< T > &source_buf, sycl::buffer< u32 > &index_map, u32 len, u32 nvar)
remap a buffer (with multiple variable per index) according to a given index map result[i] = result[i...
std::tuple< std::optional< sycl::buffer< u32 > >, u32 > stream_compact(sycl::queue &q, sycl::buffer< u32 > &buf_flags, u32 len)
Stream compaction algorithm.
T sum(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &buf1, u32 start_id, u32 end_id)
Compute the sum of elements in a device buffer within a specified range.
bool equals(sycl::queue &q, sycl::buffer< T > &buf1, sycl::buffer< T > &buf2, u32 cnt)
Compare elements between two sycl::buffers for equality.
shambase::VecComponent< T > dot_sum(sham::DeviceBuffer< T > &buf1, u32 start_id, u32 end_id)
Compute the sum of dot products of elements in a device buffer with themselves.
T min(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &buf1, u32 start_id, u32 end_id)
Find the minimum element in a device buffer within a specified range.
T max(const sham::DeviceScheduler_ptr &sched, const sham::DeviceBuffer< T > &buf1, u32 start_id, u32 end_id)
Find the maximum element in a device buffer within a specified range.
void append_subset_to(const sham::DeviceBuffer< T > &buf, const sham::DeviceBuffer< u32 > &idxs_buf, u32 nvar, sham::DeviceBuffer< T > &buf_other, u32 start_enque)
Appends a subset of elements from one buffer to another.
void throw_with_loc(std::string message, SourceLocation loc=SourceLocation{})
Throw an exception and append the source location to it.
void throw_unimplemented(SourceLocation loc=SourceLocation{})
Throw a std::runtime_error saying that the function is unimplemented.
Utilities for safe type narrowing conversions.
A class that references multiple buffers or similar objects.