Fixed a crash caused by yadif on Windows XP (again #5793)
[vlc/solaris.git] / modules / video_filter / deinterlace / yadif_template.h
blob0a4b63bd980a3aa23c89bf9a41cac2de850e0cb6
1 /*
2 * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 #ifdef COMPILE_TEMPLATE_SSE
22 #define REGMM "xmm"
23 #define MM "%%"REGMM
24 #define MOV "movq"
25 #define MOVQ "movdqa"
26 #define MOVQU "movdqu"
27 #define STEP 8
28 #define LOAD(mem,dst) \
29 MOV" "mem", "dst" \n\t"\
30 "punpcklbw "MM"7, "dst" \n\t"
31 #define PSRL1(reg) "psrldq $1, "reg" \n\t"
32 #define PSRL2(reg) "psrldq $2, "reg" \n\t"
33 #define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
34 "psrldq $2, "src" \n\t"
35 #else
36 #define REGMM "mm"
37 #define MM "%%"REGMM
38 #define MOV "movd"
39 #define MOVQ "movq"
40 #define MOVQU "movq"
41 #define STEP 4
42 #define LOAD(mem,dst) \
43 MOV" "mem", "dst" \n\t"\
44 "punpcklbw "MM"7, "dst" \n\t"
45 #define PSRL1(reg) "psrlq $8, "reg" \n\t"
46 #define PSRL2(reg) "psrlq $16, "reg" \n\t"
47 #define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
48 #endif
50 #ifdef COMPILE_TEMPLATE_SSSE3
51 #define PABS(tmp,dst) \
52 "pabsw "dst", "dst" \n\t"
53 #else
54 #define PABS(tmp,dst) \
55 "pxor "tmp", "tmp" \n\t"\
56 "psubw "dst", "tmp" \n\t"\
57 "pmaxsw "tmp", "dst" \n\t"
58 #endif
61 #define CHECK(pj,mj) \
62 MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1+j] */\
63 MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1-j] */\
64 MOVQ" "MM"2, "MM"4 \n\t"\
65 MOVQ" "MM"2, "MM"5 \n\t"\
66 "pxor "MM"3, "MM"4 \n\t"\
67 "pavgb "MM"3, "MM"5 \n\t"\
68 "pand %[pb_1], "MM"4 \n\t"\
69 "psubusb "MM"4, "MM"5 \n\t"\
70 PSRL1(MM"5") \
71 "punpcklbw "MM"7, "MM"5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
72 MOVQ" "MM"2, "MM"4 \n\t"\
73 "psubusb "MM"3, "MM"2 \n\t"\
74 "psubusb "MM"4, "MM"3 \n\t"\
75 "pmaxub "MM"3, "MM"2 \n\t"\
76 MOVQ" "MM"2, "MM"3 \n\t"\
77 MOVQ" "MM"2, "MM"4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
78 PSRL1(MM"3") /* ABS(cur[x-refs +j] - cur[x+refs -j]) */\
79 PSRL2(MM"4") /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
80 "punpcklbw "MM"7, "MM"2 \n\t"\
81 "punpcklbw "MM"7, "MM"3 \n\t"\
82 "punpcklbw "MM"7, "MM"4 \n\t"\
83 "paddw "MM"3, "MM"2 \n\t"\
84 "paddw "MM"4, "MM"2 \n\t" /* score */
86 #define CHECK1 \
87 MOVQ" "MM"0, "MM"3 \n\t"\
88 "pcmpgtw "MM"2, "MM"3 \n\t" /* if(score < spatial_score) */\
89 "pminsw "MM"2, "MM"0 \n\t" /* spatial_score= score; */\
90 MOVQ" "MM"3, "MM"6 \n\t"\
91 "pand "MM"3, "MM"5 \n\t"\
92 "pandn "MM"1, "MM"3 \n\t"\
93 "por "MM"5, "MM"3 \n\t"\
94 MOVQ" "MM"3, "MM"1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
96 #define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
97 hurts both quality and speed, but matches the C version. */\
98 "paddw %[pw_1], "MM"6 \n\t"\
99 "psllw $14, "MM"6 \n\t"\
100 "paddsw "MM"6, "MM"2 \n\t"\
101 MOVQ" "MM"0, "MM"3 \n\t"\
102 "pcmpgtw "MM"2, "MM"3 \n\t"\
103 "pminsw "MM"2, "MM"0 \n\t"\
104 "pand "MM"3, "MM"5 \n\t"\
105 "pandn "MM"1, "MM"3 \n\t"\
106 "por "MM"5, "MM"3 \n\t"\
107 MOVQ" "MM"3, "MM"1 \n\t"
109 VLC_TARGET static void RENAME(yadif_filter_line)(uint8_t *dst,
110 uint8_t *prev, uint8_t *cur, uint8_t *next,
111 int w, int prefs, int mrefs, int parity, int mode)
113 uint8_t tmp[5*16];
114 uint8_t *tmpA= (uint8_t*)(((uint64_t)(tmp+15)) & ~15);
115 int x;
117 #define FILTER\
118 for(x=0; x<w; x+=STEP){\
119 __asm__ volatile(\
120 "pxor "MM"7, "MM"7 \n\t"\
121 LOAD("(%[cur],%[mrefs])", MM"0") /* c = cur[x-refs] */\
122 LOAD("(%[cur],%[prefs])", MM"1") /* e = cur[x+refs] */\
123 LOAD("(%["prev2"])", MM"2") /* prev2[x] */\
124 LOAD("(%["next2"])", MM"3") /* next2[x] */\
125 MOVQ" "MM"3, "MM"4 \n\t"\
126 "paddw "MM"2, "MM"3 \n\t"\
127 "psraw $1, "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
128 MOVQ" "MM"0, (%[tmpA]) \n\t" /* c */\
129 MOVQ" "MM"3, 16(%[tmpA]) \n\t" /* d */\
130 MOVQ" "MM"1, 32(%[tmpA]) \n\t" /* e */\
131 "psubw "MM"4, "MM"2 \n\t"\
132 PABS( MM"4", MM"2") /* temporal_diff0 */\
133 LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
134 LOAD("(%[prev],%[prefs])", MM"4") /* prev[x+refs] */\
135 "psubw "MM"0, "MM"3 \n\t"\
136 "psubw "MM"1, "MM"4 \n\t"\
137 PABS( MM"5", MM"3")\
138 PABS( MM"5", MM"4")\
139 "paddw "MM"4, "MM"3 \n\t" /* temporal_diff1 */\
140 "psrlw $1, "MM"2 \n\t"\
141 "psrlw $1, "MM"3 \n\t"\
142 "pmaxsw "MM"3, "MM"2 \n\t"\
143 LOAD("(%[next],%[mrefs])", MM"3") /* next[x-refs] */\
144 LOAD("(%[next],%[prefs])", MM"4") /* next[x+refs] */\
145 "psubw "MM"0, "MM"3 \n\t"\
146 "psubw "MM"1, "MM"4 \n\t"\
147 PABS( MM"5", MM"3")\
148 PABS( MM"5", MM"4")\
149 "paddw "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
150 "psrlw $1, "MM"3 \n\t"\
151 "pmaxsw "MM"3, "MM"2 \n\t"\
152 MOVQ" "MM"2, 48(%[tmpA]) \n\t" /* diff */\
154 "paddw "MM"0, "MM"1 \n\t"\
155 "paddw "MM"0, "MM"0 \n\t"\
156 "psubw "MM"1, "MM"0 \n\t"\
157 "psrlw $1, "MM"1 \n\t" /* spatial_pred */\
158 PABS( MM"2", MM"0") /* ABS(c-e) */\
160 MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1] */\
161 MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1] */\
162 MOVQ" "MM"2, "MM"4 \n\t"\
163 "psubusb "MM"3, "MM"2 \n\t"\
164 "psubusb "MM"4, "MM"3 \n\t"\
165 "pmaxub "MM"3, "MM"2 \n\t"\
166 PSHUF(MM"3", MM"2") \
167 "punpcklbw "MM"7, "MM"2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
168 "punpcklbw "MM"7, "MM"3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
169 "paddw "MM"2, "MM"0 \n\t"\
170 "paddw "MM"3, "MM"0 \n\t"\
171 "psubw %[pw_1], "MM"0 \n\t" /* spatial_score */\
173 CHECK(-2,0)\
174 CHECK1\
175 CHECK(-3,1)\
176 CHECK2\
177 CHECK(0,-2)\
178 CHECK1\
179 CHECK(1,-3)\
180 CHECK2\
182 /* if(p->mode<2) ... */\
183 MOVQ" 48(%[tmpA]), "MM"6 \n\t" /* diff */\
184 "cmpl $2, %[mode] \n\t"\
185 "jge 1f \n\t"\
186 LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
187 LOAD("(%["next2"],%[mrefs],2)", MM"4") /* next2[x-2*refs] */\
188 LOAD("(%["prev2"],%[prefs],2)", MM"3") /* prev2[x+2*refs] */\
189 LOAD("(%["next2"],%[prefs],2)", MM"5") /* next2[x+2*refs] */\
190 "paddw "MM"4, "MM"2 \n\t"\
191 "paddw "MM"5, "MM"3 \n\t"\
192 "psrlw $1, "MM"2 \n\t" /* b */\
193 "psrlw $1, "MM"3 \n\t" /* f */\
194 MOVQ" (%[tmpA]), "MM"4 \n\t" /* c */\
195 MOVQ" 16(%[tmpA]), "MM"5 \n\t" /* d */\
196 MOVQ" 32(%[tmpA]), "MM"7 \n\t" /* e */\
197 "psubw "MM"4, "MM"2 \n\t" /* b-c */\
198 "psubw "MM"7, "MM"3 \n\t" /* f-e */\
199 MOVQ" "MM"5, "MM"0 \n\t"\
200 "psubw "MM"4, "MM"5 \n\t" /* d-c */\
201 "psubw "MM"7, "MM"0 \n\t" /* d-e */\
202 MOVQ" "MM"2, "MM"4 \n\t"\
203 "pminsw "MM"3, "MM"2 \n\t"\
204 "pmaxsw "MM"4, "MM"3 \n\t"\
205 "pmaxsw "MM"5, "MM"2 \n\t"\
206 "pminsw "MM"5, "MM"3 \n\t"\
207 "pmaxsw "MM"0, "MM"2 \n\t" /* max */\
208 "pminsw "MM"0, "MM"3 \n\t" /* min */\
209 "pxor "MM"4, "MM"4 \n\t"\
210 "pmaxsw "MM"3, "MM"6 \n\t"\
211 "psubw "MM"2, "MM"4 \n\t" /* -max */\
212 "pmaxsw "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
213 "1: \n\t"\
215 MOVQ" 16(%[tmpA]), "MM"2 \n\t" /* d */\
216 MOVQ" "MM"2, "MM"3 \n\t"\
217 "psubw "MM"6, "MM"2 \n\t" /* d-diff */\
218 "paddw "MM"6, "MM"3 \n\t" /* d+diff */\
219 "pmaxsw "MM"2, "MM"1 \n\t"\
220 "pminsw "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
221 "packuswb "MM"1, "MM"1 \n\t"\
224 :[tmpA] "r"(tmpA),\
225 [prev] "r"(prev),\
226 [cur] "r"(cur),\
227 [next] "r"(next),\
228 [prefs]"r"((x86_reg)prefs),\
229 [mrefs]"r"((x86_reg)mrefs),\
230 [pw_1] "m"(pw_1),\
231 [pb_1] "m"(pb_1),\
232 [mode] "g"(mode)\
233 :REGMM"0",REGMM"1",REGMM"2",REGMM"3",REGMM"4",REGMM"5",REGMM"6",REGMM"7"\
235 __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
236 dst += STEP;\
237 prev+= STEP;\
238 cur += STEP;\
239 next+= STEP;\
242 if (parity) {
243 #define prev2 "prev"
244 #define next2 "cur"
245 FILTER
246 #undef prev2
247 #undef next2
248 } else {
249 #define prev2 "cur"
250 #define next2 "next"
251 FILTER
252 #undef prev2
253 #undef next2
256 #undef STEP
257 #undef REGMM
258 #undef MM
259 #undef MOV
260 #undef MOVQ
261 #undef MOVQU
262 #undef PSHUF
263 #undef PSRL1
264 #undef PSRL2
265 #undef LOAD
266 #undef PABS
267 #undef CHECK
268 #undef CHECK1
269 #undef CHECK2
270 #undef FILTER