demux: mp4: avoid audio cuts on seek
[vlc.git] / modules / video_filter / deinterlace / yadif_template.h
blobdf4897db35cdef6d3318f145ffa13428c5201e80
1 /*
2 * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 /* For some reason clang doens't like that %%rip macro */
23 #if defined(__x86_64__) && !defined(__APPLE__)
24 #define MANGLE(a) "" #a "(%%rip)"
25 #define MANGLEVARIABLES [mode] "g"(mode),
26 #else
27 #define MANGLE(a) "" "%["#a"]"
28 #define MANGLEVARIABLES [pw_1] "m"(pw_1),\
29 [pb_1] "m"(pb_1),\
30 [mode] "g"(mode),
31 #endif
33 #ifdef COMPILE_TEMPLATE_SSE
34 #define REGMM "xmm"
35 #define MM "%%"REGMM
36 #define MOV "movq"
37 #define MOVQ "movdqa"
38 #define MOVQU "movdqu"
39 #define STEP 8
40 #define LOAD(mem,dst) \
41 MOV" "mem", "dst" \n\t"\
42 "punpcklbw "MM"7, "dst" \n\t"
43 #define PSRL1(reg) "psrldq $1, "reg" \n\t"
44 #define PSRL2(reg) "psrldq $2, "reg" \n\t"
45 #define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
46 "psrldq $2, "src" \n\t"
47 #else
48 #define REGMM "mm"
49 #define MM "%%"REGMM
50 #define MOV "movd"
51 #define MOVQ "movq"
52 #define MOVQU "movq"
53 #define STEP 4
54 #define LOAD(mem,dst) \
55 MOV" "mem", "dst" \n\t"\
56 "punpcklbw "MM"7, "dst" \n\t"
57 #define PSRL1(reg) "psrlq $8, "reg" \n\t"
58 #define PSRL2(reg) "psrlq $16, "reg" \n\t"
59 #define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
60 #endif
62 #ifdef COMPILE_TEMPLATE_SSSE3
63 #define PABS(tmp,dst) \
64 "pabsw "dst", "dst" \n\t"
65 #else
66 #define PABS(tmp,dst) \
67 "pxor "tmp", "tmp" \n\t"\
68 "psubw "dst", "tmp" \n\t"\
69 "pmaxsw "tmp", "dst" \n\t"
70 #endif
73 #define CHECK(pj,mj) \
74 MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1+j] */\
75 MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1-j] */\
76 MOVQ" "MM"2, "MM"4 \n\t"\
77 MOVQ" "MM"2, "MM"5 \n\t"\
78 "pxor "MM"3, "MM"4 \n\t"\
79 "pavgb "MM"3, "MM"5 \n\t"\
80 "pand "MANGLE(pb_1)", "MM"4 \n\t"\
81 "psubusb "MM"4, "MM"5 \n\t"\
82 PSRL1(MM"5") \
83 "punpcklbw "MM"7, "MM"5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
84 MOVQ" "MM"2, "MM"4 \n\t"\
85 "psubusb "MM"3, "MM"2 \n\t"\
86 "psubusb "MM"4, "MM"3 \n\t"\
87 "pmaxub "MM"3, "MM"2 \n\t"\
88 MOVQ" "MM"2, "MM"3 \n\t"\
89 MOVQ" "MM"2, "MM"4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
90 PSRL1(MM"3") /* ABS(cur[x-refs +j] - cur[x+refs -j]) */\
91 PSRL2(MM"4") /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
92 "punpcklbw "MM"7, "MM"2 \n\t"\
93 "punpcklbw "MM"7, "MM"3 \n\t"\
94 "punpcklbw "MM"7, "MM"4 \n\t"\
95 "paddw "MM"3, "MM"2 \n\t"\
96 "paddw "MM"4, "MM"2 \n\t" /* score */
98 #define CHECK1 \
99 MOVQ" "MM"0, "MM"3 \n\t"\
100 "pcmpgtw "MM"2, "MM"3 \n\t" /* if(score < spatial_score) */\
101 "pminsw "MM"2, "MM"0 \n\t" /* spatial_score= score; */\
102 MOVQ" "MM"3, "MM"6 \n\t"\
103 "pand "MM"3, "MM"5 \n\t"\
104 "pandn "MM"1, "MM"3 \n\t"\
105 "por "MM"5, "MM"3 \n\t"\
106 MOVQ" "MM"3, "MM"1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
108 #define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
109 hurts both quality and speed, but matches the C version. */\
110 "paddw "MANGLE(pw_1)", "MM"6 \n\t"\
111 "psllw $14, "MM"6 \n\t"\
112 "paddsw "MM"6, "MM"2 \n\t"\
113 MOVQ" "MM"0, "MM"3 \n\t"\
114 "pcmpgtw "MM"2, "MM"3 \n\t"\
115 "pminsw "MM"2, "MM"0 \n\t"\
116 "pand "MM"3, "MM"5 \n\t"\
117 "pandn "MM"1, "MM"3 \n\t"\
118 "por "MM"5, "MM"3 \n\t"\
119 MOVQ" "MM"3, "MM"1 \n\t"
121 #if defined(__MINGW32__) && defined(_WIN32) && !defined(_WIN64)
122 __attribute__((__force_align_arg_pointer__))
123 #endif
124 VLC_TARGET static void RENAME(yadif_filter_line)(uint8_t *dst,
125 uint8_t *prev, uint8_t *cur, uint8_t *next,
126 int w, int prefs, int mrefs, int parity, int mode)
128 uint8_t tmpU[5*16];
129 uint8_t *tmp= (uint8_t*)(((uintptr_t)(tmpU+15)) & ~15);
130 int x;
132 #define FILTER\
133 for(x=0; x<w; x+=STEP){\
134 __asm__ volatile(\
135 "pxor "MM"7, "MM"7 \n\t"\
136 LOAD("(%[cur],%[mrefs])", MM"0") /* c = cur[x-refs] */\
137 LOAD("(%[cur],%[prefs])", MM"1") /* e = cur[x+refs] */\
138 LOAD("(%["prev2"])", MM"2") /* prev2[x] */\
139 LOAD("(%["next2"])", MM"3") /* next2[x] */\
140 MOVQ" "MM"3, "MM"4 \n\t"\
141 "paddw "MM"2, "MM"3 \n\t"\
142 "psraw $1, "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
143 MOVQ" "MM"0, (%[tmp]) \n\t" /* c */\
144 MOVQ" "MM"3, 16(%[tmp]) \n\t" /* d */\
145 MOVQ" "MM"1, 32(%[tmp]) \n\t" /* e */\
146 "psubw "MM"4, "MM"2 \n\t"\
147 PABS( MM"4", MM"2") /* temporal_diff0 */\
148 LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
149 LOAD("(%[prev],%[prefs])", MM"4") /* prev[x+refs] */\
150 "psubw "MM"0, "MM"3 \n\t"\
151 "psubw "MM"1, "MM"4 \n\t"\
152 PABS( MM"5", MM"3")\
153 PABS( MM"5", MM"4")\
154 "paddw "MM"4, "MM"3 \n\t" /* temporal_diff1 */\
155 "psrlw $1, "MM"2 \n\t"\
156 "psrlw $1, "MM"3 \n\t"\
157 "pmaxsw "MM"3, "MM"2 \n\t"\
158 LOAD("(%[next],%[mrefs])", MM"3") /* next[x-refs] */\
159 LOAD("(%[next],%[prefs])", MM"4") /* next[x+refs] */\
160 "psubw "MM"0, "MM"3 \n\t"\
161 "psubw "MM"1, "MM"4 \n\t"\
162 PABS( MM"5", MM"3")\
163 PABS( MM"5", MM"4")\
164 "paddw "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
165 "psrlw $1, "MM"3 \n\t"\
166 "pmaxsw "MM"3, "MM"2 \n\t"\
167 MOVQ" "MM"2, 48(%[tmp]) \n\t" /* diff */\
169 "paddw "MM"0, "MM"1 \n\t"\
170 "paddw "MM"0, "MM"0 \n\t"\
171 "psubw "MM"1, "MM"0 \n\t"\
172 "psrlw $1, "MM"1 \n\t" /* spatial_pred */\
173 PABS( MM"2", MM"0") /* ABS(c-e) */\
175 MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1] */\
176 MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1] */\
177 MOVQ" "MM"2, "MM"4 \n\t"\
178 "psubusb "MM"3, "MM"2 \n\t"\
179 "psubusb "MM"4, "MM"3 \n\t"\
180 "pmaxub "MM"3, "MM"2 \n\t"\
181 PSHUF(MM"3", MM"2") \
182 "punpcklbw "MM"7, "MM"2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
183 "punpcklbw "MM"7, "MM"3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
184 "paddw "MM"2, "MM"0 \n\t"\
185 "paddw "MM"3, "MM"0 \n\t"\
186 "psubw "MANGLE(pw_1)", "MM"0 \n\t" /* spatial_score */\
188 CHECK(-2,0)\
189 CHECK1\
190 CHECK(-3,1)\
191 CHECK2\
192 CHECK(0,-2)\
193 CHECK1\
194 CHECK(1,-3)\
195 CHECK2\
197 /* if(p->mode<2) ... */\
198 MOVQ" 48(%[tmp]), "MM"6 \n\t" /* diff */\
199 "cmpl $2, %[mode] \n\t"\
200 "jge 1f \n\t"\
201 LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
202 LOAD("(%["next2"],%[mrefs],2)", MM"4") /* next2[x-2*refs] */\
203 LOAD("(%["prev2"],%[prefs],2)", MM"3") /* prev2[x+2*refs] */\
204 LOAD("(%["next2"],%[prefs],2)", MM"5") /* next2[x+2*refs] */\
205 "paddw "MM"4, "MM"2 \n\t"\
206 "paddw "MM"5, "MM"3 \n\t"\
207 "psrlw $1, "MM"2 \n\t" /* b */\
208 "psrlw $1, "MM"3 \n\t" /* f */\
209 MOVQ" (%[tmp]), "MM"4 \n\t" /* c */\
210 MOVQ" 16(%[tmp]), "MM"5 \n\t" /* d */\
211 MOVQ" 32(%[tmp]), "MM"7 \n\t" /* e */\
212 "psubw "MM"4, "MM"2 \n\t" /* b-c */\
213 "psubw "MM"7, "MM"3 \n\t" /* f-e */\
214 MOVQ" "MM"5, "MM"0 \n\t"\
215 "psubw "MM"4, "MM"5 \n\t" /* d-c */\
216 "psubw "MM"7, "MM"0 \n\t" /* d-e */\
217 MOVQ" "MM"2, "MM"4 \n\t"\
218 "pminsw "MM"3, "MM"2 \n\t"\
219 "pmaxsw "MM"4, "MM"3 \n\t"\
220 "pmaxsw "MM"5, "MM"2 \n\t"\
221 "pminsw "MM"5, "MM"3 \n\t"\
222 "pmaxsw "MM"0, "MM"2 \n\t" /* max */\
223 "pminsw "MM"0, "MM"3 \n\t" /* min */\
224 "pxor "MM"4, "MM"4 \n\t"\
225 "pmaxsw "MM"3, "MM"6 \n\t"\
226 "psubw "MM"2, "MM"4 \n\t" /* -max */\
227 "pmaxsw "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
228 "1: \n\t"\
230 MOVQ" 16(%[tmp]), "MM"2 \n\t" /* d */\
231 MOVQ" "MM"2, "MM"3 \n\t"\
232 "psubw "MM"6, "MM"2 \n\t" /* d-diff */\
233 "paddw "MM"6, "MM"3 \n\t" /* d+diff */\
234 "pmaxsw "MM"2, "MM"1 \n\t"\
235 "pminsw "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
236 "packuswb "MM"1, "MM"1 \n\t"\
238 ::[prev] "r"(prev),\
239 [cur] "r"(cur),\
240 [next] "r"(next),\
241 [prefs]"r"((x86_reg)prefs),\
242 [mrefs]"r"((x86_reg)mrefs),\
243 MANGLEVARIABLES\
244 [tmp] "r"(tmp)\
246 __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
247 dst += STEP;\
248 prev+= STEP;\
249 cur += STEP;\
250 next+= STEP;\
253 if (parity) {
254 #define prev2 "prev"
255 #define next2 "cur"
256 FILTER
257 #undef prev2
258 #undef next2
259 } else {
260 #define prev2 "cur"
261 #define next2 "next"
262 FILTER
263 #undef prev2
264 #undef next2
267 #undef STEP
268 #undef REGMM
269 #undef MM
270 #undef MOV
271 #undef MOVQ
272 #undef MOVQU
273 #undef PSHUF
274 #undef PSRL1
275 #undef PSRL2
276 #undef LOAD
277 #undef PABS
278 #undef CHECK
279 #undef CHECK1
280 #undef CHECK2
281 #undef FILTER