135 std::vector<u8> storage_header = {};
139 static constexpr u64 alignment = 8;
142 inline void check_head_move_device(
u64 off,
u64 len) {
145 SHAM_ASSERT(off == align_repr(len * Helper::szrepr));
147 if (head_device + off > storage.
get_size()) {
149 "Serializer device buffer overflow: cannot move device head.\n"
150 " storage size : {}\n"
151 " current head_device : {}\n"
152 " requested head_device : {}\n"
155 " Helper::szrepr : {}",
166 inline void check_head_move_host(
u64 off,
u64 len) {
169 SHAM_ASSERT(off == align_repr(len * Helper::szrepr));
171 if (head_host + off > storage_header.size()) {
173 "Serializer host buffer overflow: cannot move host head.\n"
174 " storage_header size : {}\n"
175 " current head_host : {}\n"
176 " requested head_host : {}\n"
179 " Helper::szrepr : {}",
180 storage_header.size(),
189 inline static u64 align_repr(
u64 offset) {
return details::align_repr<alignment>(offset); }
191 static u64 pre_head_length();
193 std::shared_ptr<sham::DeviceScheduler> dev_sched;
196 std::shared_ptr<sham::DeviceScheduler> &get_device_scheduler() {
return dev_sched; }
209 return details::serialize_byte_size<alignment, T>();
214 return details::serialize_byte_size<alignment, T>(len);
217 inline static SerializeSize serialize_byte_size(std::string s) {
218 return details::serialize_byte_size<alignment>(s);
222 inline void write(T val) {
227 u64 current_head = head_host;
229 u64 offset = align_repr(Helper::szrepr);
230 check_head_move_host<T>(offset, 1);
232 Helper::store(&(storage_header)[current_head], val);
238 inline void load(T &val) {
243 u64 current_head = head_host;
244 u64 offset = align_repr(Helper::szrepr);
245 check_head_move_host<T>(offset, 1);
250 val = Helper::load(&(storage_header)[current_head]);
256 inline void write(std::string s) {
258 write(
u32(s.size()));
260 sycl::buffer<char> buf(s.size());
262 sycl::host_accessor acc{buf, sycl::write_only, sycl::no_init};
263 for (
u32 i = 0; i < s.size(); i++) {
267 write_buf(buf, s.size());
270 inline void load(std::string &s) {
276 sycl::buffer<char> buf(len);
279 sycl::host_accessor acc{buf, sycl::read_only};
280 for (
u32 i = 0; i < len; i++) {
287 inline void write_buf(sycl::buffer<T> &buf,
u64 len) {
291 u64 current_head = head_device;
293 u64 offset = align_repr(len * Helper::szrepr);
294 check_head_move_device<T>(offset, len);
300 auto e = dev_sched->get_queue().submit(
301 depends_list, [&, current_head](sycl::handler &cgh) {
302 sycl::accessor accbuf{buf, cgh, sycl::read_only};
304 cgh.parallel_for(sycl::range<1>{len}, [=](sycl::item<1> id) {
305 u64 head = current_head +
id.get_linear_id() * Helper::szrepr;
306 Helper::store(&accbufbyte[head], accbuf[
id]);
312 head_device += offset;
316 inline void load_buf(sycl::buffer<T> &buf,
u64 len) {
320 u64 current_head = head_device;
322 u64 offset = align_repr(len * Helper::szrepr);
323 check_head_move_device<T>(offset, len);
329 auto e = dev_sched->get_queue().submit(
330 depends_list, [&, current_head](sycl::handler &cgh) {
331 sycl::accessor accbuf{buf, cgh, sycl::write_only, sycl::no_init};
333 cgh.parallel_for(sycl::range<1>{len}, [=](sycl::item<1> id) {
334 u64 head = current_head +
id.get_linear_id() * Helper::szrepr;
335 accbuf[id] = Helper::load(&accbufbyte[head]);
341 head_device += offset;
349 u64 current_head = head_device;
351 u64 offset = align_repr(len * Helper::szrepr);
352 check_head_move_device<T>(offset, len);
355 const T *accbuf = buf.get_read_access(depends_list);
358 auto e = dev_sched->get_queue().submit(
359 depends_list, [&, current_head](sycl::handler &cgh) {
360 cgh.parallel_for(sycl::range<1>{len}, [=](sycl::item<1> id) {
361 u64 head = current_head +
id.get_linear_id() * Helper::szrepr;
362 Helper::store(&accbufbyte[head], accbuf[
id]);
366 buf.complete_event_state(e);
369 head_device += offset;
377 u64 current_head = head_device;
379 u64 offset = align_repr(len * Helper::szrepr);
380 check_head_move_device<T>(offset, len);
382 if (buf.get_size() < len) {
384 "SerializeHelper::load_buf: (buf.get_size() < len)\n buf.get_size()={}\n "
391 T *accbuf = buf.get_write_access(depends_list);
394 auto e = dev_sched->get_queue().submit(
395 depends_list, [&, current_head](sycl::handler &cgh) {
396 cgh.parallel_for(sycl::range<1>{len}, [=](sycl::item<1> id) {
397 u64 head = current_head +
id.get_linear_id() * Helper::szrepr;
398 accbuf[id] = Helper::load(&accbufbyte[head]);
402 buf.complete_event_state(e);
405 head_device += offset;