From ae93aaa1f36c81faee712557398b84c2148ae746 Mon Sep 17 00:00:00 2001 From: Thomas Guillem Date: Fri, 16 Mar 2018 09:06:33 +0100 Subject: [PATCH] chroma: copy: add a way to shift bits for 16 bits conversion --- modules/hw/vaapi/chroma.c | 4 +- modules/video_chroma/copy.c | 350 ++++++++++++++++++++++++++++++++------------ modules/video_chroma/copy.h | 8 +- 3 files changed, 264 insertions(+), 98 deletions(-) diff --git a/modules/hw/vaapi/chroma.c b/modules/hw/vaapi/chroma.c index 3e5aef35db..5fc965283a 100644 --- a/modules/hw/vaapi/chroma.c +++ b/modules/hw/vaapi/chroma.c @@ -110,7 +110,7 @@ FillPictureFromVAImage(picture_t *dest, break; case VLC_CODEC_I420_10B: Copy420_16_SP_to_P(dest, src_planes, src_pitches, - src_img->height, cache); + src_img->height, 0, cache); break; default: vlc_assert_unreachable(); @@ -219,7 +219,7 @@ FillVAImageFromPicture(VAImage *dest_img, uint8_t *dest_buf, case VLC_CODEC_I420_10B: assert(dest_pic->format.i_chroma == VLC_CODEC_VAAPI_420_10BPP); Copy420_16_P_to_SP(dest_pic, src_planes, src_pitches, - src->format.i_height, cache); + src->format.i_height, 0, cache); break; case VLC_CODEC_P010: { diff --git a/modules/video_chroma/copy.c b/modules/video_chroma/copy.c index de4c241d25..1895218037 100644 --- a/modules/video_chroma/copy.c +++ b/modules/video_chroma/copy.c @@ -36,6 +36,9 @@ #include #include "copy.h" +static void CopyPlane(uint8_t *dst, size_t dst_pitch, + const uint8_t *src, size_t src_pitch, + unsigned height, int bitshift); #define ASSERT_PLANE(i) assert(src[i]); \ assert(src_pitch[i]) @@ -77,24 +80,48 @@ void CopyCleanCache(copy_cache_t *cache) /* Copy 16/64 bytes from srcp to dstp loading data with the SSE>=2 instruction * load and storing data with the SSE>=2 instruction store. */ -#define COPY16(dstp, srcp, load, store) \ + +#define COPY16_SHIFTR(x) \ + "psrlw "x", %%xmm1\n" +#define COPY16_SHIFTL(x) \ + "psllw "x", %%xmm1\n" + +#define COPY16_S(dstp, srcp, load, store, shiftstr) \ asm volatile ( \ load " 0(%[src]), %%xmm1\n" \ + shiftstr \ store " %%xmm1, 0(%[dst])\n" \ : : [dst]"r"(dstp), [src]"r"(srcp) : "memory", "xmm1") -#define COPY64(dstp, srcp, load, store) \ +#define COPY16(dstp, srcp, load, store) COPY16_S(dstp, srcp, load, store, "") + +#define COPY64_SHIFTR(x) \ + "psrlw "x", %%xmm1\n" \ + "psrlw "x", %%xmm2\n" \ + "psrlw "x", %%xmm3\n" \ + "psrlw "x", %%xmm4\n" +#define COPY64_SHIFTL(x) \ + "psllw "x", %%xmm1\n" \ + "psllw "x", %%xmm2\n" \ + "psllw "x", %%xmm3\n" \ + "psllw "x", %%xmm4\n" + +#define COPY64_S(dstp, srcp, load, store, shiftstr) \ asm volatile ( \ load " 0(%[src]), %%xmm1\n" \ load " 16(%[src]), %%xmm2\n" \ load " 32(%[src]), %%xmm3\n" \ load " 48(%[src]), %%xmm4\n" \ + shiftstr \ store " %%xmm1, 0(%[dst])\n" \ store " %%xmm2, 16(%[dst])\n" \ store " %%xmm3, 32(%[dst])\n" \ store " %%xmm4, 48(%[dst])\n" \ : : [dst]"r"(dstp), [src]"r"(srcp) : "memory", "xmm1", "xmm2", "xmm3", "xmm4") +#define COPY64(dstp, srcp, load, store) \ + COPY64_S(dstp, srcp, load, store, "") + #ifdef COPY_TEST_NOOPTIM # undef vlc_CPU_SSE4_1 # define vlc_CPU_SSE4_1() (0) @@ -113,45 +140,70 @@ void CopyCleanCache(copy_cache_t *cache) VLC_SSE static void CopyFromUswc(uint8_t *dst, size_t dst_pitch, const uint8_t *src, size_t src_pitch, - unsigned width, unsigned height) + unsigned width, unsigned height, int bitshift) { assert(((intptr_t)dst & 0x0f) == 0 && (dst_pitch & 0x0f) == 0); asm volatile ("mfence"); - for (unsigned y = 0; y < height; y++) { - const unsigned unaligned = (-(uintptr_t)src) & 0x0f; - unsigned x = unaligned; - -#ifdef CAN_COMPILE_SSE4_1 - if (vlc_CPU_SSE4_1()) { - if (!unaligned) { - for (; x+63 < width; x += 64) - COPY64(&dst[x], &src[x], "movntdqa", "movdqa"); - } else { - COPY16(dst, src, "movdqu", "movdqa"); - for (; x+63 < width; x += 64) - COPY64(&dst[x], &src[x], "movntdqa", "movdqu"); - } - } else -#endif - { - if (!unaligned) { - for (; x+63 < width; x += 64) - COPY64(&dst[x], &src[x], "movdqa", "movdqa"); - } else { - COPY16(dst, src, "movdqu", "movdqa"); - for (; x+63 < width; x += 64) - COPY64(&dst[x], &src[x], "movdqa", "movdqu"); - } - } - - for (; x < width; x++) - dst[x] = src[x]; +#define SSE_USWC_COPY(shiftstr16, shiftstr64) \ + for (unsigned y = 0; y < height; y++) { \ + const unsigned unaligned = (-(uintptr_t)src) & 0x0f; \ + unsigned x = unaligned; \ + if (vlc_CPU_SSE4_1()) { \ + if (!unaligned) { \ + for (; x+63 < width; x += 64) \ + COPY64_S(&dst[x], &src[x], "movntdqa", "movdqa", shiftstr64); \ + } else { \ + COPY16_S(dst, src, "movdqu", "movdqa", shiftstr16); \ + for (; x+63 < width; x += 64) \ + COPY64_S(&dst[x], &src[x], "movntdqa", "movdqu", shiftstr64); \ + } \ + } else { \ + if (!unaligned) { \ + for (; x+63 < width; x += 64) \ + COPY64_S(&dst[x], &src[x], "movdqa", "movdqa", shiftstr64); \ + } else { \ + COPY16_S(dst, src, "movdqu", "movdqa", shiftstr16); \ + for (; x+63 < width; x += 64) \ + COPY64_S(&dst[x], &src[x], "movdqa", "movdqu", shiftstr64); \ + } \ + } \ + /* The following should not happen since buffers are generally well aligned */ \ + if (x < width) \ + CopyPlane(&dst[x], dst_pitch - x, &src[x], src_pitch - x, 1, bitshift); \ + src += src_pitch; \ + dst += dst_pitch; \ + } - src += src_pitch; - dst += dst_pitch; + switch (bitshift) + { + case 0: + SSE_USWC_COPY("", "") + break; + case -6: + SSE_USWC_COPY(COPY16_SHIFTL("$6"), COPY64_SHIFTL("$6")) + break; + case 6: + SSE_USWC_COPY(COPY16_SHIFTR("$6"), COPY64_SHIFTR("$6")) + break; + case 2: + SSE_USWC_COPY(COPY16_SHIFTR("$2"), COPY64_SHIFTR("$2")) + break; + case -2: + SSE_USWC_COPY(COPY16_SHIFTL("$2"), COPY64_SHIFTL("$2")) + break; + case 4: + SSE_USWC_COPY(COPY16_SHIFTR("$4"), COPY64_SHIFTR("$4")) + break; + case -4: + SSE_USWC_COPY(COPY16_SHIFTL("$2"), COPY64_SHIFTL("$2")) + break; + default: + vlc_assert_unreachable(); } +#undef SSE_USWC_COPY + asm volatile ("mfence"); } @@ -412,21 +464,21 @@ static void SSE_SplitUV(uint8_t *dstu, size_t dstu_pitch, static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch, const uint8_t *src, size_t src_pitch, uint8_t *cache, size_t cache_size, - unsigned height) + unsigned height, int bitshift) { const unsigned w16 = (src_pitch+15) & ~15; const unsigned hstep = cache_size / w16; assert(hstep > 0); /* If SSE4.1: CopyFromUswc is faster than memcpy */ - if (!vlc_CPU_SSE4_1() && src_pitch == dst_pitch) + if (!vlc_CPU_SSE4_1() && bitshift == 0 && src_pitch == dst_pitch) memcpy(dst, src, src_pitch * height); else for (unsigned y = 0; y < height; y += hstep) { const unsigned hblock = __MIN(hstep, height - y); /* Copy a bunch of line into our cache */ - CopyFromUswc(cache, w16, src, src_pitch, src_pitch, hblock); + CopyFromUswc(cache, w16, src, src_pitch, src_pitch, hblock, bitshift); /* Copy from our cache to the destination */ Copy2d(dst, dst_pitch, cache, w16, src_pitch, hblock); @@ -442,7 +494,7 @@ SSE_InterleavePlanes(uint8_t *dst, size_t dst_pitch, const uint8_t *srcu, size_t srcu_pitch, const uint8_t *srcv, size_t srcv_pitch, uint8_t *cache, size_t cache_size, - unsigned int height, uint8_t pixel_size) + unsigned int height, uint8_t pixel_size, int bitshift) { assert(srcu_pitch == srcv_pitch); unsigned int const w16 = (srcu_pitch+15) & ~15; @@ -454,9 +506,9 @@ SSE_InterleavePlanes(uint8_t *dst, size_t dst_pitch, unsigned int const hblock = __MIN(hstep, height - y); /* Copy a bunch of line into our cache */ - CopyFromUswc(cache, w16, srcu, srcu_pitch, srcu_pitch, hblock); + CopyFromUswc(cache, w16, srcu, srcu_pitch, srcu_pitch, hblock, bitshift); CopyFromUswc(cache+w16*hblock, w16, srcv, srcv_pitch, - srcv_pitch, hblock); + srcv_pitch, hblock, bitshift); /* Copy from our cache to the destination */ SSE_InterleaveUV(dst, dst_pitch, cache, w16, @@ -474,7 +526,7 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch, uint8_t *dstv, size_t dstv_pitch, const uint8_t *src, size_t src_pitch, uint8_t *cache, size_t cache_size, - unsigned height, uint8_t pixel_size) + unsigned height, uint8_t pixel_size, int bitshift) { const unsigned w16 = (src_pitch+15) & ~15; const unsigned hstep = cache_size / w16; @@ -484,7 +536,7 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch, const unsigned hblock = __MIN(hstep, height - y); /* Copy a bunch of line into our cache */ - CopyFromUswc(cache, w16, src, src_pitch, src_pitch, hblock); + CopyFromUswc(cache, w16, src, src_pitch, src_pitch, hblock, bitshift); /* Copy from our cache to the destination */ SSE_SplitUV(dstu, dstu_pitch, dstv, dstv_pitch, @@ -506,7 +558,7 @@ static void SSE_Copy420_P_to_P(picture_t *dst, const uint8_t *src[static 3], SSE_CopyPlane(dst->p[n].p_pixels, dst->p[n].i_pitch, src[n], src_pitch[n], cache->buffer, cache->size, - (height+d-1)/d); + (height+d-1)/d, 0); } asm volatile ("emms"); } @@ -517,37 +569,38 @@ static void SSE_Copy420_SP_to_SP(picture_t *dst, const uint8_t *src[static 2], const copy_cache_t *cache) { SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch, src[0], src_pitch[0], - cache->buffer, cache->size, height); + cache->buffer, cache->size, height, 0); SSE_CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch, src[1], src_pitch[1], - cache->buffer, cache->size, height / 2); + cache->buffer, cache->size, height / 2, 0); asm volatile ("emms"); } static void SSE_Copy420_SP_to_P(picture_t *dest, const uint8_t *src[static 2], const size_t src_pitch[static 2], unsigned int height, - const copy_cache_t *cache, uint8_t pixel_size) + uint8_t pixel_size, int bitshift, const copy_cache_t *cache) { SSE_CopyPlane(dest->p[0].p_pixels, dest->p[0].i_pitch, - src[0], src_pitch[0], cache->buffer, cache->size, height); + src[0], src_pitch[0], cache->buffer, cache->size, height, bitshift); + SSE_SplitPlanes(dest->p[1].p_pixels, dest->p[1].i_pitch, dest->p[2].p_pixels, dest->p[2].i_pitch, src[1], src_pitch[1], cache->buffer, cache->size, - height / 2, pixel_size); + height / 2, pixel_size, bitshift); asm volatile ("emms"); } static void SSE_Copy420_P_to_SP(picture_t *dst, const uint8_t *src[static 3], const size_t src_pitch[static 3], - unsigned height, const copy_cache_t *cache, - uint8_t pixel_size) + unsigned height, uint8_t pixel_size, + int bitshift, const copy_cache_t *cache) { SSE_CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch, src[0], src_pitch[0], - cache->buffer, cache->size, height); + cache->buffer, cache->size, height, bitshift); SSE_InterleavePlanes(dst->p[1].p_pixels, dst->p[1].i_pitch, src[U_PLANE], src_pitch[U_PLANE], src[V_PLANE], src_pitch[V_PLANE], - cache->buffer, cache->size, height / 2, pixel_size); + cache->buffer, cache->size, height / 2, pixel_size, bitshift); asm volatile ("emms"); } #undef COPY64 @@ -555,9 +608,26 @@ static void SSE_Copy420_P_to_SP(picture_t *dst, const uint8_t *src[static 3], static void CopyPlane(uint8_t *dst, size_t dst_pitch, const uint8_t *src, size_t src_pitch, - unsigned height) + unsigned height, int bitshift) { - if (src_pitch == dst_pitch) + if (bitshift != 0) + { + for (unsigned y = 0; y < height; y++) + { + uint16_t *dst16 = (uint16_t *) dst; + const uint16_t *src16 = (const uint16_t *) src; + + if (bitshift > 0) + for (unsigned x = 0; x < (src_pitch / 2); x++) + *dst16++ = (*src16++) >> (bitshift & 0xf); + else + for (unsigned x = 0; x < (src_pitch / 2); x++) + *dst16++ = (*src16++) << ((-bitshift) & 0xf); + src += src_pitch; + dst += dst_pitch; + } + } + else if (src_pitch == dst_pitch) memcpy(dst, src, src_pitch * height); else for (unsigned y = 0; y < height; y++) { @@ -580,9 +650,9 @@ void Copy420_SP_to_SP(picture_t *dst, const uint8_t *src[static 2], #endif CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch, - src[0], src_pitch[0], height); + src[0], src_pitch[0], height, 0); CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch, - src[1], src_pitch[1], height/2); + src[1], src_pitch[1], height/2, 0); } #define SPLIT_PLANES(type, pitch_den) do { \ @@ -597,6 +667,30 @@ void Copy420_SP_to_SP(picture_t *dst, const uint8_t *src[static 2], } \ } while(0) +#define SPLIT_PLANES_SHIFTR(type, pitch_den, bitshift) do { \ + for (unsigned y = 0; y < height; y++) { \ + for (unsigned x = 0; x < src_pitch / pitch_den; x++) { \ + ((type *) dstu)[x] = (((const type *) src)[2*x+0]) >> (bitshift); \ + ((type *) dstv)[x] = (((const type *) src)[2*x+1]) >> (bitshift); \ + } \ + src += src_pitch; \ + dstu += dstu_pitch; \ + dstv += dstv_pitch; \ + } \ +} while(0) + +#define SPLIT_PLANES_SHIFTL(type, pitch_den, bitshift) do { \ + for (unsigned y = 0; y < height; y++) { \ + for (unsigned x = 0; x < src_pitch / pitch_den; x++) { \ + ((type *) dstu)[x] = (((const type *) src)[2*x+0]) << (bitshift); \ + ((type *) dstv)[x] = (((const type *) src)[2*x+1]) << (bitshift); \ + } \ + src += src_pitch; \ + dstu += dstu_pitch; \ + dstv += dstv_pitch; \ + } \ +} while(0) + static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch, uint8_t *dstv, size_t dstv_pitch, const uint8_t *src, size_t src_pitch, unsigned height) @@ -606,9 +700,15 @@ static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch, static void SplitPlanes16(uint8_t *dstu, size_t dstu_pitch, uint8_t *dstv, size_t dstv_pitch, - const uint8_t *src, size_t src_pitch, unsigned height) + const uint8_t *src, size_t src_pitch, unsigned height, + int bitshift) { - SPLIT_PLANES(uint16_t, 4); + if (bitshift == 0) + SPLIT_PLANES(uint16_t, 4); + else if (bitshift > 0) + SPLIT_PLANES_SHIFTR(uint16_t, 4, bitshift & 0xf); + else + SPLIT_PLANES_SHIFTL(uint16_t, 4, (-bitshift) & 0xf); } void Copy420_SP_to_P(picture_t *dst, const uint8_t *src[static 2], @@ -618,13 +718,13 @@ void Copy420_SP_to_P(picture_t *dst, const uint8_t *src[static 2], ASSERT_2PLANES; #ifdef CAN_COMPILE_SSE2 if (vlc_CPU_SSE2()) - return SSE_Copy420_SP_to_P(dst, src, src_pitch, height, cache, 1); + return SSE_Copy420_SP_to_P(dst, src, src_pitch, height, 1, 0, cache); #else VLC_UNUSED(cache); #endif CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch, - src[0], src_pitch[0], height); + src[0], src_pitch[0], height, 0); SplitPlanes(dst->p[1].p_pixels, dst->p[1].i_pitch, dst->p[2].p_pixels, dst->p[2].i_pitch, src[1], src_pitch[1], height/2); @@ -632,21 +732,23 @@ void Copy420_SP_to_P(picture_t *dst, const uint8_t *src[static 2], void Copy420_16_SP_to_P(picture_t *dst, const uint8_t *src[static 2], const size_t src_pitch[static 2], unsigned height, - const copy_cache_t *cache) + int bitshift, const copy_cache_t *cache) { ASSERT_2PLANES; + assert(bitshift >= -6 && bitshift <= 6 && (bitshift % 2 == 0)); + #ifdef CAN_COMPILE_SSE3 if (vlc_CPU_SSSE3()) - return SSE_Copy420_SP_to_P(dst, src, src_pitch, height, cache, 2); + return SSE_Copy420_SP_to_P(dst, src, src_pitch, height, 2, bitshift, cache); #else VLC_UNUSED(cache); #endif CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch, - src[0], src_pitch[0], height); + src[0], src_pitch[0], height, bitshift); SplitPlanes16(dst->p[1].p_pixels, dst->p[1].i_pitch, dst->p[2].p_pixels, dst->p[2].i_pitch, - src[1], src_pitch[1], height/2); + src[1], src_pitch[1], height/2, bitshift); } #define INTERLEAVE_UV() do { \ @@ -661,6 +763,30 @@ void Copy420_16_SP_to_P(picture_t *dst, const uint8_t *src[static 2], } \ }while(0) +#define INTERLEAVE_UV_SHIFTR(bitshitf) do { \ + for ( unsigned int line = 0; line < copy_lines; line++ ) { \ + for ( unsigned int col = 0; col < copy_pitch; col++ ) { \ + *dstUV++ = (*srcU++) >> (bitshitf); \ + *dstUV++ = (*srcV++) >> (bitshitf); \ + } \ + dstUV += i_extra_pitch_uv; \ + srcU += i_extra_pitch_u; \ + srcV += i_extra_pitch_v; \ + } \ +}while(0) + +#define INTERLEAVE_UV_SHIFTL(bitshitf) do { \ + for ( unsigned int line = 0; line < copy_lines; line++ ) { \ + for ( unsigned int col = 0; col < copy_pitch; col++ ) { \ + *dstUV++ = (*srcU++) << (bitshitf); \ + *dstUV++ = (*srcV++) << (bitshitf); \ + } \ + dstUV += i_extra_pitch_uv; \ + srcU += i_extra_pitch_u; \ + srcV += i_extra_pitch_v; \ + } \ +}while(0) + void Copy420_P_to_SP(picture_t *dst, const uint8_t *src[static 3], const size_t src_pitch[static 3], unsigned height, const copy_cache_t *cache) @@ -668,13 +794,13 @@ void Copy420_P_to_SP(picture_t *dst, const uint8_t *src[static 3], ASSERT_3PLANES; #ifdef CAN_COMPILE_SSE2 if (vlc_CPU_SSE2()) - return SSE_Copy420_P_to_SP(dst, src, src_pitch, height, cache, 1); + return SSE_Copy420_P_to_SP(dst, src, src_pitch, height, 1, 0, cache); #else (void) cache; #endif CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch, - src[0], src_pitch[0], height); + src[0], src_pitch[0], height, 0); const unsigned copy_lines = height / 2; const unsigned copy_pitch = src_pitch[1]; @@ -691,18 +817,19 @@ void Copy420_P_to_SP(picture_t *dst, const uint8_t *src[static 3], void Copy420_16_P_to_SP(picture_t *dst, const uint8_t *src[static 3], const size_t src_pitch[static 3], unsigned height, - const copy_cache_t *cache) + int bitshift, const copy_cache_t *cache) { ASSERT_3PLANES; + assert(bitshift >= -6 && bitshift <= 6 && (bitshift % 2 == 0)); #ifdef CAN_COMPILE_SSE2 if (vlc_CPU_SSSE3()) - return SSE_Copy420_P_to_SP(dst, src, src_pitch, height, cache, 2); + return SSE_Copy420_P_to_SP(dst, src, src_pitch, height, 2, bitshift, cache); #else (void) cache; #endif CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch, - src[0], src_pitch[0], height); + src[0], src_pitch[0], height, bitshift); const unsigned copy_lines = height / 2; const unsigned copy_pitch = src_pitch[1] / 2; @@ -714,7 +841,13 @@ void Copy420_16_P_to_SP(picture_t *dst, const uint8_t *src[static 3], uint16_t *dstUV = (void*) dst->p[1].p_pixels; const uint16_t *srcU = (const uint16_t *) src[U_PLANE]; const uint16_t *srcV = (const uint16_t *) src[V_PLANE]; - INTERLEAVE_UV(); + + if (bitshift == 0) + INTERLEAVE_UV(); + else if (bitshift > 0) + INTERLEAVE_UV_SHIFTR(bitshift & 0xf); + else + INTERLEAVE_UV_SHIFTL((-bitshift) & 0xf); } void CopyFromI420_10ToP010(picture_t *dst, const uint8_t *src[static 3], @@ -771,11 +904,11 @@ void Copy420_P_to_P(picture_t *dst, const uint8_t *src[static 3], #endif CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch, - src[0], src_pitch[0], height); + src[0], src_pitch[0], height, 0); CopyPlane(dst->p[1].p_pixels, dst->p[1].i_pitch, - src[1], src_pitch[1], height / 2); + src[1], src_pitch[1], height / 2, 0); CopyPlane(dst->p[2].p_pixels, dst->p[2].i_pitch, - src[2], src_pitch[2], height / 2); + src[2], src_pitch[2], height / 2, 0); } int picture_UpdatePlanes(picture_t *picture, uint8_t *data, unsigned pitch) @@ -837,8 +970,14 @@ int picture_UpdatePlanes(picture_t *picture, uint8_t *data, unsigned pitch) struct test_dst { vlc_fourcc_t chroma; - void (*conv)(picture_t *, const uint8_t *[], const size_t [], unsigned, - const copy_cache_t *); + int bitshift; + union + { + void (*conv)(picture_t *, const uint8_t *[], const size_t [], unsigned, + const copy_cache_t *); + void (*conv16)(picture_t *, const uint8_t *[], const size_t [], unsigned, int, + const copy_cache_t *); + }; }; struct test_conv @@ -849,18 +988,18 @@ struct test_conv static const struct test_conv convs[] = { { .src_chroma = VLC_CODEC_NV12, - .dsts = { { VLC_CODEC_I420, Copy420_SP_to_P }, - { VLC_CODEC_NV12, Copy420_SP_to_SP } }, + .dsts = { { VLC_CODEC_I420, 0, .conv = Copy420_SP_to_P }, + { VLC_CODEC_NV12, 0, .conv = Copy420_SP_to_SP } }, }, { .src_chroma = VLC_CODEC_I420, - .dsts = { { VLC_CODEC_I420, Copy420_P_to_P }, - { VLC_CODEC_NV12, Copy420_P_to_SP } }, + .dsts = { { VLC_CODEC_I420, 0, .conv = Copy420_P_to_P }, + { VLC_CODEC_NV12, 0, .conv = Copy420_P_to_SP } }, }, { .src_chroma = VLC_CODEC_P010, - .dsts = { { VLC_CODEC_I420_10B, Copy420_16_SP_to_P } }, + .dsts = { { VLC_CODEC_I420_10L, 6, .conv16 = Copy420_16_SP_to_P } }, }, - { .src_chroma = VLC_CODEC_I420_10B, - .dsts = { { VLC_CODEC_P010, Copy420_16_P_to_SP } }, + { .src_chroma = VLC_CODEC_I420_10L, + .dsts = { { VLC_CODEC_P010, -6, .conv16 = Copy420_16_P_to_SP } }, }, }; #define NB_CONVS ARRAY_SIZE(convs) @@ -889,8 +1028,8 @@ static const struct test_size sizes[] = { static void piccheck(picture_t *pic, const vlc_chroma_description_t *dsc, bool init) { -#define ASSERT_COLOR() do { \ - fprintf(stderr, "error: pixel doesn't match @ plane: %d: %d x %d: %X\n", i, x, y, *(--p)); \ +#define ASSERT_COLOR(good) do { \ + fprintf(stderr, "error: pixel doesn't match @ plane: %d: %d x %d: 0x%X vs 0x%X\n", i, x, y, *(--p), good); \ assert(!"error: pixel doesn't match"); \ } while(0) @@ -907,7 +1046,7 @@ static void piccheck(picture_t *pic, const vlc_chroma_description_t *dsc, if (init) \ *(p++) = color_UV; \ else if (*(p++) != color_UV) \ - ASSERT_COLOR(); \ + ASSERT_COLOR(color_UV); \ } \ else \ { \ @@ -916,24 +1055,42 @@ static void piccheck(picture_t *pic, const vlc_chroma_description_t *dsc, if (init) \ *(p++) = colors_P[i]; \ else if (*(p++) != colors_P[i]) \ - ASSERT_COLOR(); \ + ASSERT_COLOR(colors_P[i]); \ } \ } \ } \ } while (0) assert(pic->i_planes == 2 || pic->i_planes == 3); - const uint8_t colors_8_P[3] = { 0x42, 0xF1, 0x36 }; - const uint16_t color_8_UV = ntoh16(0xF136); - - const uint16_t colors_16_P[3] = { ntoh16(0x1042), ntoh16(0xF114), ntoh16(0x3645) }; - const uint32_t color_16_UV = ntoh32(0xF1143645); - assert(dsc->pixel_size == 1 || dsc->pixel_size == 2); + if (dsc->pixel_size == 1) + { + const uint8_t colors_8_P[3] = { 0x42, 0xF1, 0x36 }; + const uint16_t color_8_UV = ntoh16(0xF136); PICCHECK(uint8_t, uint16_t, colors_8_P, color_8_UV, 1); + } else + { + const unsigned mask = (1 << dsc->pixel_bits) - 1; + uint16_t colors_16_P[3] = { 0x1042 &mask, 0xF114 &mask, 0x3645 &mask}; + + switch (pic->format.i_chroma) + { + case VLC_CODEC_P010: + for (size_t i = 0; i < 3; ++i) + colors_16_P[i] <<= 6; + break; + case VLC_CODEC_I420_10L: + break; + default: + vlc_assert_unreachable(); + } + + uint32_t color_16_UV = (colors_16_P[2] << 16) | colors_16_P[1]; + PICCHECK(uint16_t, uint32_t, colors_16_P, color_16_UV, 2); + } } static void pic_rsc_destroy(picture_t *pic) @@ -1022,8 +1179,13 @@ int main(void) size->i_visible_width, size->i_visible_height, (const char *) &src->format.i_chroma, (const char *) &dst->format.i_chroma); - test_dst->conv(dst, src_planes, src_pitches, - src->format.i_visible_height, &cache); + if (test_dst->bitshift == 0) + test_dst->conv(dst, src_planes, src_pitches, + src->format.i_visible_height, &cache); + else + test_dst->conv16(dst, src_planes, src_pitches, + src->format.i_visible_height, test_dst->bitshift, + &cache); piccheck(dst, dst_dsc, false); picture_Release(dst); } diff --git a/modules/video_chroma/copy.h b/modules/video_chroma/copy.h index 296d1a6734..b2ced779c8 100644 --- a/modules/video_chroma/copy.h +++ b/modules/video_chroma/copy.h @@ -56,13 +56,17 @@ void Copy420_SP_to_P(picture_t *dst, const uint8_t *src[static 2], const size_t src_pitch[static 2], unsigned height, const copy_cache_t *cache); +/* Copy planes from I420_10 to P010. A positive bitshift value will shift bits + * to the right, a negative value will shift to the left. */ void Copy420_16_P_to_SP(picture_t *dst, const uint8_t *src[static 3], const size_t src_pitch[static 3], unsigned height, - const copy_cache_t *cache); + int bitshift, const copy_cache_t *cache); +/* Copy planes from P010 to I420_10. A positive bitshift value will shift bits + * to the right, a negative value will shift to the left. */ void Copy420_16_SP_to_P(picture_t *dst, const uint8_t *src[static 2], const size_t src_pitch[static 2], unsigned height, - const copy_cache_t *cache); + int bitshift, const copy_cache_t *cache); /* XXX: Not optimized copy (no SEE) */ void CopyFromI420_10ToP010(picture_t *dst, const uint8_t *src[static 3], -- 2.11.4.GIT