From a9b52940ff2d567dc6c26b28d86eab54f5876819 Mon Sep 17 00:00:00 2001 From: Sylvain BERTRAND Date: Sun, 20 Sep 2020 22:00:32 +0000 Subject: [PATCH] npv: correct coarse sync of vulkan image acquisition --- npv/TODO | 2 +- npv/nyanvk/consts.h | 5 +++++ npv/nyanvk/syms_dev.h | 16 ++++++++++++++-- npv/nyanvk/types.h | 5 +++++ npv/video/local/code.frag.c | 33 ++++++++++++++++++++++++++++++--- npv/video/namespace/main.c | 4 ++++ npv/video/public/code.frag.c | 2 ++ npv/vk/api_usage.h | 13 +++++++++++-- npv/vk/local/code.frag.c | 15 +++++++++++++++ npv/vk/namespace/main.c | 2 ++ npv/vk/public/code.frag.c | 1 + npv/vk/public/state.frag.h | 1 + 12 files changed, 91 insertions(+), 8 deletions(-) diff --git a/npv/TODO b/npv/TODO index 8d590d7..57e3367 100644 --- a/npv/TODO +++ b/npv/TODO @@ -1,9 +1,9 @@ not ordered: - we were lied to: presentation/decoding timestamps from a demuxer can be discontinuous without any warning. -- "buffering" indicatorr? subtitles? - forced monotonic video frame selection? may be, probably, more robust in regard to latency spikes. +- "buffering" indicator (no)? subtitles (mmmmh...)? - use vulkan shaders (no glsl or hlsl), compute or not, in order to perform some yuvX pixel formats to srgb format conversions. do NOT use the vulkan yuvX samplers as they are mostly dirty hack tricks built into spirv diff --git a/npv/nyanvk/consts.h b/npv/nyanvk/consts.h index 1094587..522aad5 100644 --- a/npv/nyanvk/consts.h +++ b/npv/nyanvk/consts.h @@ -43,6 +43,7 @@ enum { vk_struct_type_dev_create_info = 3, vk_struct_type_submit_info = 4, vk_struct_type_mem_alloc_info = 5, + vk_struct_type_fence_create_info = 8, vk_struct_type_sem_create_info = 9, vk_struct_type_img_create_info = 14, vk_struct_type_imgview_create_info = 15, @@ -295,4 +296,8 @@ enum { vk_front_face_clockwise = 1, vk_front_face_enum_max = 0x7fffffff }; +enum { + vk_fence_create_signaled_bit = 0x00000001, + vk_fence_create_flag_bits_enum_max = 0x7fffffff +}; #endif diff --git a/npv/nyanvk/syms_dev.h b/npv/nyanvk/syms_dev.h index 4333e86..8a8874f 100644 --- a/npv/nyanvk/syms_dev.h +++ b/npv/nyanvk/syms_dev.h @@ -168,12 +168,24 @@ u32 vtxs_n, \ u32 instances_n, \ u32 first_vtx, \ - u32 first_instance);\ + u32 first_instance); \ void (*dl_vk_cmd_clr_color_img)(\ struct vk_cb_t *cb,\ struct vk_img_t *img,\ u32 img_layout,\ union vk_clr_color_val_t *color,\ u32 ranges_n,\ - struct vk_img_subrsrc_range_t *ranges); + struct vk_img_subrsrc_range_t *ranges); \ + s32 (*dl_vk_create_fence)(\ + struct vk_dev_t *dev, \ + struct vk_fence_create_info_t *info, \ + void * allocator, \ + struct vk_fence_t **fence); \ + s32 (*dl_vk_get_fence_status)(\ + struct vk_dev_t *dev, \ + struct vk_fence_t *fence); \ + s32 (*dl_vk_reset_fences)( \ + struct vk_dev_t *dev, \ + u32 fences_n, \ + struct vk_fence_t **fences); #endif diff --git a/npv/nyanvk/types.h b/npv/nyanvk/types.h index 72da47b..b264ce3 100644 --- a/npv/nyanvk/types.h +++ b/npv/nyanvk/types.h @@ -633,4 +633,9 @@ struct vk_rp_begin_info_t { u32 clr_vals_n; union vk_clr_val_t *clr_vals; }; +struct vk_fence_create_info_t { + u32 type; + void *next; + u32 flags; +}; #endif diff --git a/npv/video/local/code.frag.c b/npv/video/local/code.frag.c index 285cfab..48ef6ab 100644 --- a/npv/video/local/code.frag.c +++ b/npv/video/local/code.frag.c @@ -119,7 +119,7 @@ STATIC void img_mem_barrier_run_once(struct vk_img_mem_barrier_t *b) submit_info.type = vk_struct_type_submit_info; submit_info.cbs_n = 1; submit_info.cbs = &npv_vk_surf_p.dev.cbs[0]; - vk_q_submit(&submit_info); + vk_q_submit(&submit_info, 0); IF_FATALVVK("%d:queue:%p:unable to submit the initial layout transition command buffer\n", r, npv_vk_surf_p.dev.q); /*--------------------------------------------------------------------*/ vk_q_wait_idle(); @@ -528,7 +528,6 @@ STATIC void blit_setup(u8 swpchn_img) IF_FATALVVK("%d:swapchain img:%u:command buffer:%p:unable to end recording\n", r, swpchn_img, npv_vk_surf_p.dev.cbs[swpchn_img]); blit_l[swpchn_img].update_requested = false; } -/*NSPC*/ STATIC void blits_request_update(void) { u8 i; @@ -541,6 +540,20 @@ STATIC void blits_request_update(void) ++i; } } +STATIC bool are_sems_available(void) +{ + s32 r; + /* coarse synchronization (we could use a pool of semaphores/fences) */ + vk_get_fence_status(); + if (r == vk_not_ready) + return false; + else if (r != vk_success) + npv_vk_fatal("%d:device:%p:unable to get fence %p status\n", r, npv_vk_surf_p.dev.vk, npv_vk_surf_p.dev.fence); + /* vk_success */ + vk_reset_fences(); + IF_FATALVVK("%d:device:%p:unable to reset the fence\n", r, npv_vk_surf_p.dev.vk, npv_vk_surf_p.dev.fence); + return true; +} #define READY 0 #define NOT_READY 1 STATIC u8 swpchn_next_img(u32 *swpchn_img) { loop @@ -554,6 +567,11 @@ STATIC u8 swpchn_next_img(u32 *swpchn_img) { loop info.timeout = 0; info.devs = 0x00000001; /* no device group then 1 */ info.sem = npv_vk_surf_p.dev.sems[npv_vk_sem_acquire_img_done]; + /* + * XXX: for this vk func, the wait sem _MUST_ be unsignaled _AND_ have + * all its "wait" operations completed. state we secure with our usage + * of a fence. + */ vk_acquire_next_img(&info, swpchn_img); if (r == vk_not_ready) return NOT_READY; @@ -581,6 +599,10 @@ STATIC u8 send_to_pe(u32 swpchn_img) memset(&submit_info, 0, sizeof(submit_info)); submit_info.type = vk_struct_type_submit_info; submit_info.wait_sems_n = 1; + /* + * the "semaphore wait operation" will unsignal this semaphore once the + * "wait" is done. + */ submit_info.wait_sems = &npv_vk_surf_p.dev.sems[npv_vk_sem_acquire_img_done]; wait_dst_stage = vk_pl_stage_bottom_of_pipe_bit; @@ -589,12 +611,17 @@ STATIC u8 send_to_pe(u32 swpchn_img) submit_info.cbs = &npv_vk_surf_p.dev.cbs[swpchn_img]; submit_info.signal_sems_n = 1; submit_info.signal_sems = &npv_vk_surf_p.dev.sems[npv_vk_sem_blit_done]; - vk_q_submit(&submit_info); + /* XXX: coarse synchronization happens here */ + vk_q_submit(&submit_info, npv_vk_surf_p.dev.fence); IF_FATALVVK("%d:queue:%p:unable to submit the image pre-recorded command buffer\n", r, npv_vk_surf_p.dev.q); /*--------------------------------------------------------------------*/ idxs[0] = swpchn_img; memset(&present_info, 0, sizeof(present_info)); present_info.type = vk_struct_type_present_info; + /* + * the "semaphore wait operation" will unsignal this semaphore once the + * "wait" is done. + */ present_info.wait_sems_n = 1; present_info.wait_sems = &npv_vk_surf_p.dev.sems[npv_vk_sem_blit_done]; present_info.swpchns_n = 1; diff --git a/npv/video/namespace/main.c b/npv/video/namespace/main.c index 7844aa7..2780159 100644 --- a/npv/video/namespace/main.c +++ b/npv/video/namespace/main.c @@ -4,10 +4,12 @@ #define avutil_video_fr_ref_alloc av_frame_alloc #define avutil_video_fr_unref av_frame_unref /*----------------------------------------------------------------------------*/ +#define are_sems_available npv_video_are_sems_available #define blit_compute_offsets npv_video_blit_compute_offsets #define blit_l npv_video_blit_l #define blit_vp_t npv_video_blit_vp_t #define blit_setup npv_video_blit_setup +#define blit_request npv_video_blits_request #define dec_a_grow npv_video_dec_a_grow #define dec_ctx_mutex_l npv_video_dec_ctx_mutex_l #define dec_l npv_video_dec_l @@ -49,10 +51,12 @@ #undef avutil_video_fr_ref_alloc #undef avutil_video_fr_unref /*----------------------------------------------------------------------------*/ +#undef are_sems_available #undef blit_compute_offsets #undef blit_l #undef blit_vp_t #undef blit_setup +#undef blit_request #undef dec_a_grow #undef dec_ctx_mutex_l #undef dec_l diff --git a/npv/video/public/code.frag.c b/npv/video/public/code.frag.c index 3c2fc4b..c8b403b 100644 --- a/npv/video/public/code.frag.c +++ b/npv/video/public/code.frag.c @@ -234,6 +234,8 @@ STATIC void timer_evt(void) if (npv_paused_p) npv_video_osd_rop_restore(); npv_video_osd_rop_blend(now); + if (!are_sems_available()) + return; loop { /* because the swpchn can change for many reasons */ r = swpchn_next_img(&swpchn_img); if (r == NOT_READY) diff --git a/npv/vk/api_usage.h b/npv/vk/api_usage.h index 8ee52a2..5d59ce4 100644 --- a/npv/vk/api_usage.h +++ b/npv/vk/api_usage.h @@ -63,8 +63,8 @@ r = npv_vk_surf_p.dev.dl_vk_end_cb(__VA_ARGS__) #define vk_cmd_pl_barrier(cb, b) \ npv_vk_surf_p.dev.dl_vk_cmd_pl_barrier(cb, vk_pl_stage_top_of_pipe_bit, vk_pl_stage_top_of_pipe_bit, 0, 0, 0, 0, 0, 1, b) -#define vk_q_submit(info) \ -r = npv_vk_surf_p.dev.dl_vk_q_submit(npv_vk_surf_p.dev.q, 1, info, 0) +#define vk_q_submit(info, fence) \ +r = npv_vk_surf_p.dev.dl_vk_q_submit(npv_vk_surf_p.dev.q, 1, info, fence) #define vk_q_wait_idle() \ r = npv_vk_surf_p.dev.dl_vk_q_wait_idle(npv_vk_surf_p.dev.q) @@ -90,6 +90,15 @@ r = npv_vk_surf_p.dev.dl_vk_q_present(npv_vk_surf_p.dev.q, info) r = npv_vk_surf_p.dev.dl_vk_create_sem(npv_vk_surf_p.dev.vk, info, 0, sem) #define vk_cmd_clr_color_img npv_vk_surf_p.dev.dl_vk_cmd_clr_color_img + +#define vk_create_fence(info) \ +r = npv_vk_surf_p.dev.dl_vk_create_fence(npv_vk_surf_p.dev.vk, info, 0, &npv_vk_surf_p.dev.fence) + +#define vk_get_fence_status() \ +r = npv_vk_surf_p.dev.dl_vk_get_fence_status(npv_vk_surf_p.dev.vk, npv_vk_surf_p.dev.fence) + +#define vk_reset_fences() \ +r = npv_vk_surf_p.dev.dl_vk_reset_fences(npv_vk_surf_p.dev.vk, 1, &npv_vk_surf_p.dev.fence) /******************************************************************************/ /* cherry picked from nyanvk/syms_global.h */ #define VK_GLOBAL_SYMS \ diff --git a/npv/vk/local/code.frag.c b/npv/vk/local/code.frag.c index f996199..19afef6 100644 --- a/npv/vk/local/code.frag.c +++ b/npv/vk/local/code.frag.c @@ -84,6 +84,9 @@ STATIC void dev_syms(void) DEV_SYM(vkQueuePresentKHR, vk_q_present); DEV_SYM(vkCreateSemaphore, vk_create_sem); DEV_SYM(vkCmdClearColorImage, vk_cmd_clr_color_img); + DEV_SYM(vkCreateFence, vk_create_fence); + DEV_SYM(vkGetFenceStatus, vk_get_fence_status); + DEV_SYM(vkResetFences, vk_reset_fences); } #undef DEVICE_SYM /*----------------------------------------------------------------------------*/ @@ -888,6 +891,18 @@ STATIC void sems_create(void) ++sem; } } +STATIC void fence_create(void) +{ + s32 r; + struct vk_fence_create_info_t info; + + memset(&info, 0, sizeof(info)); + info.type = vk_struct_type_fence_create_info; + info.flags = vk_fence_create_signaled_bit; + vk_create_fence(&info); + IF_FATALVK("%d:device:%p:unable to create the synchronization fence\n", r, surf_p.dev.vk); + pout("device:%p:synchronization fence created %p\n", surf_p.dev.vk, surf_p.dev.fence); +} STATIC void swpchn_imgs_cbs_init_once(void) { s32 r; diff --git a/npv/vk/namespace/main.c b/npv/vk/namespace/main.c index 893a047..47fd1e3 100644 --- a/npv/vk/namespace/main.c +++ b/npv/vk/namespace/main.c @@ -5,6 +5,7 @@ #define dev_init npv_vk_dev_init #define dev_syms npv_vk_devs_syms #define dev_type_str npv_vk_dev_type_str +#define fence_create npv_vk_fence_create #define instance_create npv_vk_instance_create #define instance_exts_dump npv_vk_instance_exts_dump #define instance_l npv_vk_instance_l @@ -66,6 +67,7 @@ #undef dev_init #undef dev_syms #undef dev_type_str +#undef fence_create #undef instance_create #undef instance_exts_dump #undef instance_l diff --git a/npv/vk/public/code.frag.c b/npv/vk/public/code.frag.c index 021e14f..fa46cab 100644 --- a/npv/vk/public/code.frag.c +++ b/npv/vk/public/code.frag.c @@ -45,6 +45,7 @@ STATIC void surf_init_once(xcb_connection_t *c, u32 win_id) surf_create(c, win_id); dev_init(); sems_create(); + fence_create(); /*====================================================================*/ swpchn_init_once(); swpchn_imgs_cbs_init_once(); diff --git a/npv/vk/public/state.frag.h b/npv/vk/public/state.frag.h index 8c40139..d8e7006 100644 --- a/npv/vk/public/state.frag.h +++ b/npv/vk/public/state.frag.h @@ -26,6 +26,7 @@ struct dev_t { struct vk_dev_t *vk; struct phydev_t phydev; struct swpchn_t swpchn; + struct vk_fence_t *fence; struct vk_q_t *q; struct vk_cp_t *cp; struct vk_cb_t *cbs[swpchn_imgs_n_max]; -- 2.11.4.GIT