Update translations from 2.2.x branch
[vlc.git] / modules / packetizer / startcode_helper.h
blob04d79a280b84b931795fad2e1cc6615f066fa817
1 /*****************************************************************************
2 * startcode_helper.h: Startcodes helpers
3 *****************************************************************************
4 * Copyright (C) 2016 VideoLAN Authors
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this program; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19 *****************************************************************************/
20 #ifndef VLC_STARTCODE_HELPER_H_
21 #define VLC_STARTCODE_HELPER_H_
23 #include <vlc_cpu.h>
25 #if !defined(CAN_COMPILE_SSE2) && defined(HAVE_SSE2_INTRINSICS)
26 #include <emmintrin.h>
27 #endif
29 /* Looks up efficiently for an AnnexB startcode 0x00 0x00 0x01
30 * by using a 4 times faster trick than single byte lookup. */
32 #define TRY_MATCH(p,a) {\
33 if (p[a+1] == 0) {\
34 if (p[a+0] == 0 && p[a+2] == 1)\
35 return a+p;\
36 if (p[a+2] == 0 && p[a+3] == 1)\
37 return a+p+1;\
39 if (p[a+3] == 0) {\
40 if (p[a+2] == 0 && p[a+4] == 1)\
41 return a+p+2;\
42 if (p[a+4] == 0 && p[a+5] == 1)\
43 return a+p+3;\
47 #if defined(CAN_COMPILE_SSE2) || defined(HAVE_SSE2_INTRINSICS)
49 __attribute__ ((__target__ ("sse2")))
50 static inline const uint8_t * startcode_FindAnnexB_SSE2( const uint8_t *p, const uint8_t *end )
52 /* First align to 16 */
53 /* Skipping this step and doing unaligned loads isn't faster */
54 const uint8_t *alignedend = p + 16 - ((intptr_t)p & 15);
55 for (end -= 3; p < alignedend && p < end; p++) {
56 if (p[0] == 0 && p[1] == 0 && p[2] == 1)
57 return p;
60 if( p == end )
61 return NULL;
63 alignedend = end - ((intptr_t) end & 15);
64 if( alignedend > p )
66 #ifdef CAN_COMPILE_SSE2
67 asm volatile(
68 "pxor %%xmm1, %%xmm1\n"
69 ::: "xmm1"
71 #else
72 __m128i zeros = _mm_set1_epi8( 0x00 );
73 #endif
74 for( ; p < alignedend; p += 16)
76 uint32_t match;
77 #ifdef CAN_COMPILE_SSE2
78 asm volatile(
79 "movdqa 0(%[v]), %%xmm0\n"
80 "pcmpeqb %%xmm1, %%xmm0\n"
81 "pmovmskb %%xmm0, %[match]\n"
82 : [match]"=r"(match)
83 : [v]"r"(p)
84 : "xmm0"
86 #else
87 __m128i v = _mm_load_si128((__m128i*)p);
88 __m128i res = _mm_cmpeq_epi8( zeros, v );
89 match = _mm_movemask_epi8( res ); /* mask will be in reversed match order */
90 #endif
91 if( match & 0x000F )
92 TRY_MATCH(p, 0);
93 if( match & 0x00F0 )
94 TRY_MATCH(p, 4);
95 if( match & 0x0F00 )
96 TRY_MATCH(p, 8);
97 if( match & 0xF000 )
98 TRY_MATCH(p, 12);
102 for (; p < end; p++) {
103 if (p[0] == 0 && p[1] == 0 && p[2] == 1)
104 return p;
107 return NULL;
110 #endif
112 /* That code is adapted from libav's ff_avc_find_startcode_internal
113 * and i believe the trick originated from
114 * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
116 static inline const uint8_t * startcode_FindAnnexB( const uint8_t *p, const uint8_t *end )
118 #if defined(CAN_COMPILE_SSE2) || defined(HAVE_SSE2_INTRINSICS)
119 if (vlc_CPU_SSE2())
120 return startcode_FindAnnexB_SSE2(p, end);
121 #endif
122 const uint8_t *a = p + 4 - ((intptr_t)p & 3);
124 for (end -= 3; p < a && p < end; p++) {
125 if (p[0] == 0 && p[1] == 0 && p[2] == 1)
126 return p;
129 for (end -= 3; p < end; p += 4) {
130 uint32_t x = *(const uint32_t*)p;
131 if ((x - 0x01010101) & (~x) & 0x80808080)
133 /* matching DW isn't faster */
134 TRY_MATCH(p, 0);
138 for (end += 3; p < end; p++) {
139 if (p[0] == 0 && p[1] == 0 && p[2] == 1)
140 return p;
143 return NULL;
146 /* Special variation to return on prefix only and no data */
147 static inline const uint8_t * startcode_FindAnyAnnexB( const uint8_t *p, const uint8_t *end )
149 size_t i_size = end - p;
150 if( i_size <= 4 )
152 if( i_size == 4 )
154 TRY_MATCH(p, 0);
156 else if ( i_size == 3 && p[0] == 0 && p[1] == 0 && p[2] == 1 )
157 return p;
158 return NULL;
160 else return startcode_FindAnnexB( p, end );
163 #undef TRY_MATCH
165 #endif