130 class SerializeHelper {
135 std::vector<u8> storage_header = {};
139 static constexpr u64 alignment = 8;
142 inline void check_head_move_device(
u64 off,
u64 len) {
145 SHAM_ASSERT(off == align_repr(len * Helper::szrepr));
147 if (head_device + off > storage.get_size()) {
149 "Serializer device buffer overflow: cannot move device head.\n"
150 " storage size : {}\n"
151 " current head_device : {}\n"
152 " requested head_device : {}\n"
155 " Helper::szrepr : {}",
166 inline void check_head_move_host(
u64 off,
u64 len) {
169 SHAM_ASSERT(off == align_repr(len * Helper::szrepr));
171 if (head_host + off > storage_header.size()) {
173 "Serializer host buffer overflow: cannot move host head.\n"
174 " storage_header size : {}\n"
175 " current head_host : {}\n"
176 " requested head_host : {}\n"
179 " Helper::szrepr : {}",
180 storage_header.size(),
189 inline static u64 align_repr(
u64 offset) {
return details::align_repr<alignment>(offset); }
191 static u64 pre_head_length();
193 std::shared_ptr<sham::DeviceScheduler> dev_sched;
196 std::shared_ptr<sham::DeviceScheduler> &get_device_scheduler() {
return dev_sched; }
198 SerializeHelper(std::shared_ptr<sham::DeviceScheduler> dev_sched);
201 std::shared_ptr<sham::DeviceScheduler> dev_sched,
203 bool allow_large_int_size =
false);
205 void allocate(
SerializeSize szinfo,
bool allow_large_int_size =
false);
211 return details::serialize_byte_size<alignment, T>();
216 return details::serialize_byte_size<alignment, T>(len);
219 inline static SerializeSize serialize_byte_size(std::string s) {
220 return details::serialize_byte_size<alignment>(s);
224 inline void write(T val) {
229 u64 current_head = head_host;
231 u64 offset = align_repr(Helper::szrepr);
232 check_head_move_host<T>(offset, 1);
234 Helper::store(&(storage_header)[current_head], val);
240 inline void load(T &val) {
245 u64 current_head = head_host;
246 u64 offset = align_repr(Helper::szrepr);
247 check_head_move_host<T>(offset, 1);
252 val = Helper::load(&(storage_header)[current_head]);
258 inline void write(std::string s) {
260 write(
u32(s.size()));
262 sycl::buffer<char> buf(s.size());
264 sycl::host_accessor acc{buf, sycl::write_only, sycl::no_init};
265 for (
u32 i = 0; i < s.size(); i++) {
269 write_buf(buf, s.size());
272 inline void load(std::string &s) {
278 sycl::buffer<char> buf(len);
281 sycl::host_accessor acc{buf, sycl::read_only};
282 for (
u32 i = 0; i < len; i++) {
289 inline void write_buf(sycl::buffer<T> &buf,
u64 len) {
293 u64 current_head = head_device;
295 u64 offset = align_repr(len * Helper::szrepr);
296 check_head_move_device<T>(offset, len);
300 auto accbufbyte = storage.get_write_access(depends_list);
302 auto e = dev_sched->get_queue().submit(
303 depends_list, [&, current_head](sycl::handler &cgh) {
304 sycl::accessor accbuf{buf, cgh, sycl::read_only};
306 cgh.parallel_for(sycl::range<1>{len}, [=](sycl::item<1> id) {
307 u64 head = current_head +
id.get_linear_id() * Helper::szrepr;
308 Helper::store(&accbufbyte[head], accbuf[
id]);
312 storage.complete_event_state(e);
314 head_device += offset;
318 inline void load_buf(sycl::buffer<T> &buf,
u64 len) {
322 u64 current_head = head_device;
324 u64 offset = align_repr(len * Helper::szrepr);
325 check_head_move_device<T>(offset, len);
329 auto accbufbyte = storage.get_read_access(depends_list);
331 auto e = dev_sched->get_queue().submit(
332 depends_list, [&, current_head](sycl::handler &cgh) {
333 sycl::accessor accbuf{buf, cgh, sycl::write_only, sycl::no_init};
335 cgh.parallel_for(sycl::range<1>{len}, [=](sycl::item<1> id) {
336 u64 head = current_head +
id.get_linear_id() * Helper::szrepr;
337 accbuf[id] = Helper::load(&accbufbyte[head]);
341 storage.complete_event_state(e);
343 head_device += offset;
351 u64 current_head = head_device;
353 u64 offset = align_repr(len * Helper::szrepr);
354 check_head_move_device<T>(offset, len);
357 const T *accbuf = buf.get_read_access(depends_list);
358 auto accbufbyte = storage.get_write_access(depends_list);
360 auto e = dev_sched->get_queue().submit(
361 depends_list, [&, current_head](sycl::handler &cgh) {
362 cgh.parallel_for(sycl::range<1>{len}, [=](sycl::item<1> id) {
363 u64 head = current_head +
id.get_linear_id() * Helper::szrepr;
364 Helper::store(&accbufbyte[head], accbuf[
id]);
368 buf.complete_event_state(e);
369 storage.complete_event_state(e);
371 head_device += offset;
379 u64 current_head = head_device;
381 u64 offset = align_repr(len * Helper::szrepr);
382 check_head_move_device<T>(offset, len);
384 if (buf.get_size() < len) {
386 "SerializeHelper::load_buf: (buf.get_size() < len)\n buf.get_size()={}\n "
393 T *accbuf = buf.get_write_access(depends_list);
394 auto accbufbyte = storage.get_read_access(depends_list);
396 auto e = dev_sched->get_queue().submit(
397 depends_list, [&, current_head](sycl::handler &cgh) {
398 cgh.parallel_for(sycl::range<1>{len}, [=](sycl::item<1> id) {
399 u64 head = current_head +
id.get_linear_id() * Helper::szrepr;
400 accbuf[id] = Helper::load(&accbufbyte[head]);
404 buf.complete_event_state(e);
405 storage.complete_event_state(e);
407 head_device += offset;