10 #include "img_format.h"
14 typedef void (pack_func_t
)(unsigned char *dst
, unsigned char *y
,
15 unsigned char *u
, unsigned char *v
, int w
, int us
, int vs
);
22 static void pack_nn_C(unsigned char *dst
, unsigned char *y
,
23 unsigned char *u
, unsigned char *v
, int w
)
26 for (j
= w
/2; j
; j
--) {
34 static void pack_li_0_C(unsigned char *dst
, unsigned char *y
,
35 unsigned char *u
, unsigned char *v
, int w
, int us
, int vs
)
38 for (j
= w
/2; j
; j
--) {
40 *dst
++ = (u
[us
+us
] + 7*u
[0])>>3;
42 *dst
++ = (v
[vs
+vs
] + 7*v
[0])>>3;
47 static void pack_li_1_C(unsigned char *dst
, unsigned char *y
,
48 unsigned char *u
, unsigned char *v
, int w
, int us
, int vs
)
51 for (j
= w
/2; j
; j
--) {
53 *dst
++ = (3*u
[us
+us
] + 5*u
[0])>>3;
55 *dst
++ = (3*v
[vs
+vs
] + 5*v
[0])>>3;
61 static void pack_nn_MMX(unsigned char *dst
, unsigned char *y
,
62 unsigned char *u
, unsigned char *v
, int w
)
67 "movq (%0), %%mm1 \n\t"
68 "movq (%0), %%mm2 \n\t"
69 "movq (%1), %%mm4 \n\t"
70 "movq (%2), %%mm6 \n\t"
71 "punpcklbw %%mm6, %%mm4 \n\t"
72 "punpcklbw %%mm4, %%mm1 \n\t"
73 "punpckhbw %%mm4, %%mm2 \n\t"
78 "movq %%mm1, (%3) \n\t"
79 "movq %%mm2, 8(%3) \n\t"
85 : "r" (y
), "r" (u
), "r" (v
), "r" (dst
), "r" (w
/8)
88 pack_nn_C(dst
, y
, u
, v
, (w
&7));
91 #if HAVE_EBX_AVAILABLE
92 static void pack_li_0_MMX(unsigned char *dst
, unsigned char *y
,
93 unsigned char *u
, unsigned char *v
, int w
, int us
, int vs
)
96 "push %%"REG_BP
" \n\t"
98 "mov %6, %%"REG_BP
" \n\t"
100 "movl 4(%%"REG_d
"), %%"REG_BP
" \n\t"
101 "movl (%%"REG_d
"), %%"REG_d
" \n\t"
103 "pxor %%mm0, %%mm0 \n\t"
107 "movq (%%"REG_S
"), %%mm1 \n\t"
108 "movq (%%"REG_S
"), %%mm2 \n\t"
110 "movq (%%"REG_a
",%%"REG_d
",2), %%mm4 \n\t"
111 "movq (%%"REG_b
",%%"REG_BP
",2), %%mm6 \n\t"
112 "punpcklbw %%mm0, %%mm4 \n\t"
113 "punpcklbw %%mm0, %%mm6 \n\t"
114 "movq (%%"REG_a
"), %%mm3 \n\t"
115 "movq (%%"REG_b
"), %%mm5 \n\t"
116 "punpcklbw %%mm0, %%mm3 \n\t"
117 "punpcklbw %%mm0, %%mm5 \n\t"
118 "paddw %%mm3, %%mm4 \n\t"
119 "paddw %%mm5, %%mm6 \n\t"
120 "paddw %%mm3, %%mm4 \n\t"
121 "paddw %%mm5, %%mm6 \n\t"
122 "paddw %%mm3, %%mm4 \n\t"
123 "paddw %%mm5, %%mm6 \n\t"
124 "paddw %%mm3, %%mm4 \n\t"
125 "paddw %%mm5, %%mm6 \n\t"
126 "paddw %%mm3, %%mm4 \n\t"
127 "paddw %%mm5, %%mm6 \n\t"
128 "paddw %%mm3, %%mm4 \n\t"
129 "paddw %%mm5, %%mm6 \n\t"
130 "paddw %%mm3, %%mm4 \n\t"
131 "paddw %%mm5, %%mm6 \n\t"
132 "psrlw $3, %%mm4 \n\t"
133 "psrlw $3, %%mm6 \n\t"
134 "packuswb %%mm4, %%mm4 \n\t"
135 "packuswb %%mm6, %%mm6 \n\t"
136 "punpcklbw %%mm6, %%mm4 \n\t"
137 "punpcklbw %%mm4, %%mm1 \n\t"
138 "punpckhbw %%mm4, %%mm2 \n\t"
140 "movq %%mm1, (%%"REG_D
") \n\t"
141 "movq %%mm2, 8(%%"REG_D
") \n\t"
143 "movq 8(%%"REG_S
"), %%mm1 \n\t"
144 "movq 8(%%"REG_S
"), %%mm2 \n\t"
146 "movq (%%"REG_a
",%%"REG_d
",2), %%mm4 \n\t"
147 "movq (%%"REG_b
",%%"REG_BP
",2), %%mm6 \n\t"
148 "punpckhbw %%mm0, %%mm4 \n\t"
149 "punpckhbw %%mm0, %%mm6 \n\t"
150 "movq (%%"REG_a
"), %%mm3 \n\t"
151 "movq (%%"REG_b
"), %%mm5 \n\t"
152 "punpckhbw %%mm0, %%mm3 \n\t"
153 "punpckhbw %%mm0, %%mm5 \n\t"
154 "paddw %%mm3, %%mm4 \n\t"
155 "paddw %%mm5, %%mm6 \n\t"
156 "paddw %%mm3, %%mm4 \n\t"
157 "paddw %%mm5, %%mm6 \n\t"
158 "paddw %%mm3, %%mm4 \n\t"
159 "paddw %%mm5, %%mm6 \n\t"
160 "paddw %%mm3, %%mm4 \n\t"
161 "paddw %%mm5, %%mm6 \n\t"
162 "paddw %%mm3, %%mm4 \n\t"
163 "paddw %%mm5, %%mm6 \n\t"
164 "paddw %%mm3, %%mm4 \n\t"
165 "paddw %%mm5, %%mm6 \n\t"
166 "paddw %%mm3, %%mm4 \n\t"
167 "paddw %%mm5, %%mm6 \n\t"
168 "psrlw $3, %%mm4 \n\t"
169 "psrlw $3, %%mm6 \n\t"
170 "packuswb %%mm4, %%mm4 \n\t"
171 "packuswb %%mm6, %%mm6 \n\t"
172 "punpcklbw %%mm6, %%mm4 \n\t"
173 "punpcklbw %%mm4, %%mm1 \n\t"
174 "punpckhbw %%mm4, %%mm2 \n\t"
176 "add $16, %%"REG_S
" \n\t"
177 "add $8, %%"REG_a
" \n\t"
178 "add $8, %%"REG_b
" \n\t"
180 "movq %%mm1, 16(%%"REG_D
") \n\t"
181 "movq %%mm2, 24(%%"REG_D
") \n\t"
182 "add $32, %%"REG_D
" \n\t"
187 "pop %%"REG_BP
" \n\t"
189 : "S" (y
), "D" (dst
), "a" (u
), "b" (v
), "c" (w
/16),
191 "d" ((x86_reg
)us
), "r" ((x86_reg
)vs
)
197 pack_li_0_C(dst
, y
, u
, v
, (w
&15), us
, vs
);
200 static void pack_li_1_MMX(unsigned char *dst
, unsigned char *y
,
201 unsigned char *u
, unsigned char *v
, int w
, int us
, int vs
)
204 "push %%"REG_BP
" \n\t"
206 "mov %6, %%"REG_BP
" \n\t"
208 "movl 4(%%"REG_d
"), %%"REG_BP
" \n\t"
209 "movl (%%"REG_d
"), %%"REG_d
" \n\t"
211 "pxor %%mm0, %%mm0 \n\t"
215 "movq (%%"REG_S
"), %%mm1 \n\t"
216 "movq (%%"REG_S
"), %%mm2 \n\t"
218 "movq (%%"REG_a
",%%"REG_d
",2), %%mm4 \n\t"
219 "movq (%%"REG_b
",%%"REG_BP
",2), %%mm6 \n\t"
220 "punpcklbw %%mm0, %%mm4 \n\t"
221 "punpcklbw %%mm0, %%mm6 \n\t"
222 "movq (%%"REG_a
"), %%mm3 \n\t"
223 "movq (%%"REG_b
"), %%mm5 \n\t"
224 "punpcklbw %%mm0, %%mm3 \n\t"
225 "punpcklbw %%mm0, %%mm5 \n\t"
226 "movq %%mm4, %%mm7 \n\t"
227 "paddw %%mm4, %%mm4 \n\t"
228 "paddw %%mm7, %%mm4 \n\t"
229 "movq %%mm6, %%mm7 \n\t"
230 "paddw %%mm6, %%mm6 \n\t"
231 "paddw %%mm7, %%mm6 \n\t"
232 "paddw %%mm3, %%mm4 \n\t"
233 "paddw %%mm5, %%mm6 \n\t"
234 "paddw %%mm3, %%mm4 \n\t"
235 "paddw %%mm5, %%mm6 \n\t"
236 "paddw %%mm3, %%mm4 \n\t"
237 "paddw %%mm5, %%mm6 \n\t"
238 "paddw %%mm3, %%mm4 \n\t"
239 "paddw %%mm5, %%mm6 \n\t"
240 "paddw %%mm3, %%mm4 \n\t"
241 "paddw %%mm5, %%mm6 \n\t"
242 "psrlw $3, %%mm4 \n\t"
243 "psrlw $3, %%mm6 \n\t"
244 "packuswb %%mm4, %%mm4 \n\t"
245 "packuswb %%mm6, %%mm6 \n\t"
246 "punpcklbw %%mm6, %%mm4 \n\t"
247 "punpcklbw %%mm4, %%mm1 \n\t"
248 "punpckhbw %%mm4, %%mm2 \n\t"
250 "movq %%mm1, (%%"REG_D
") \n\t"
251 "movq %%mm2, 8(%%"REG_D
") \n\t"
253 "movq 8(%%"REG_S
"), %%mm1 \n\t"
254 "movq 8(%%"REG_S
"), %%mm2 \n\t"
256 "movq (%%"REG_a
",%%"REG_d
",2), %%mm4 \n\t"
257 "movq (%%"REG_b
",%%"REG_BP
",2), %%mm6 \n\t"
258 "punpckhbw %%mm0, %%mm4 \n\t"
259 "punpckhbw %%mm0, %%mm6 \n\t"
260 "movq (%%"REG_a
"), %%mm3 \n\t"
261 "movq (%%"REG_b
"), %%mm5 \n\t"
262 "punpckhbw %%mm0, %%mm3 \n\t"
263 "punpckhbw %%mm0, %%mm5 \n\t"
264 "movq %%mm4, %%mm7 \n\t"
265 "paddw %%mm4, %%mm4 \n\t"
266 "paddw %%mm7, %%mm4 \n\t"
267 "movq %%mm6, %%mm7 \n\t"
268 "paddw %%mm6, %%mm6 \n\t"
269 "paddw %%mm7, %%mm6 \n\t"
270 "paddw %%mm3, %%mm4 \n\t"
271 "paddw %%mm5, %%mm6 \n\t"
272 "paddw %%mm3, %%mm4 \n\t"
273 "paddw %%mm5, %%mm6 \n\t"
274 "paddw %%mm3, %%mm4 \n\t"
275 "paddw %%mm5, %%mm6 \n\t"
276 "paddw %%mm3, %%mm4 \n\t"
277 "paddw %%mm5, %%mm6 \n\t"
278 "paddw %%mm3, %%mm4 \n\t"
279 "paddw %%mm5, %%mm6 \n\t"
280 "psrlw $3, %%mm4 \n\t"
281 "psrlw $3, %%mm6 \n\t"
282 "packuswb %%mm4, %%mm4 \n\t"
283 "packuswb %%mm6, %%mm6 \n\t"
284 "punpcklbw %%mm6, %%mm4 \n\t"
285 "punpcklbw %%mm4, %%mm1 \n\t"
286 "punpckhbw %%mm4, %%mm2 \n\t"
288 "add $16, %%"REG_S
" \n\t"
289 "add $8, %%"REG_a
" \n\t"
290 "add $8, %%"REG_b
" \n\t"
292 "movq %%mm1, 16(%%"REG_D
") \n\t"
293 "movq %%mm2, 24(%%"REG_D
") \n\t"
294 "add $32, %%"REG_D
" \n\t"
299 "pop %%"REG_BP
" \n\t"
301 : "S" (y
), "D" (dst
), "a" (u
), "b" (v
), "c" (w
/16),
303 "d" ((x86_reg
)us
), "r" ((x86_reg
)vs
)
309 pack_li_1_C(dst
, y
, u
, v
, (w
&15), us
, vs
);
311 #endif /* HAVE_EBX_AVAILABLE */
314 static pack_func_t
*pack_nn
;
315 static pack_func_t
*pack_li_0
;
316 static pack_func_t
*pack_li_1
;
318 static void ilpack(unsigned char *dst
, unsigned char *src
[3],
319 int dststride
, int srcstride
[3], int w
, int h
, pack_func_t
*pack
[2])
322 unsigned char *y
, *u
, *v
;
323 int ys
= srcstride
[0], us
= srcstride
[1], vs
= srcstride
[2];
330 pack_nn(dst
, y
, u
, v
, w
, 0, 0);
331 y
+= ys
; dst
+= dststride
;
332 pack_nn(dst
, y
, u
+us
, v
+vs
, w
, 0, 0);
333 y
+= ys
; dst
+= dststride
;
334 for (i
=2; i
<h
-2; i
++) {
336 b
= (i
&1) ^ ((i
&2)>>1);
337 pack
[b
](dst
, y
, u
, v
, w
, us
*a
, vs
*a
);
348 pack_nn(dst
, y
, u
, v
, w
, 0, 0);
349 y
+= ys
; dst
+= dststride
; u
+= us
; v
+= vs
;
350 pack_nn(dst
, y
, u
, v
, w
, 0, 0);
354 static int put_image(struct vf_instance_s
* vf
, mp_image_t
*mpi
, double pts
)
358 // hope we'll get DR buffer:
359 dmpi
=vf_get_image(vf
->next
, IMGFMT_YUY2
,
360 MP_IMGTYPE_TEMP
, MP_IMGFLAG_ACCEPT_STRIDE
,
363 ilpack(dmpi
->planes
[0], mpi
->planes
, dmpi
->stride
[0], mpi
->stride
, mpi
->w
, mpi
->h
, vf
->priv
->pack
);
365 return vf_next_put_image(vf
,dmpi
, pts
);
368 static int config(struct vf_instance_s
* vf
,
369 int width
, int height
, int d_width
, int d_height
,
370 unsigned int flags
, unsigned int outfmt
)
372 /* FIXME - also support UYVY output? */
373 return vf_next_config(vf
, width
, height
, d_width
, d_height
, flags
, IMGFMT_YUY2
);
377 static int query_format(struct vf_instance_s
* vf
, unsigned int fmt
)
379 /* FIXME - really any YUV 4:2:0 input format should work */
384 return vf_next_query_format(vf
,IMGFMT_YUY2
);
389 static int open(vf_instance_t
*vf
, char* args
)
392 vf
->query_format
=query_format
;
393 vf
->put_image
=put_image
;
394 vf
->priv
= calloc(1, sizeof(struct vf_priv_s
));
396 if (args
) sscanf(args
, "%d", &vf
->priv
->mode
);
398 pack_nn
= (pack_func_t
*)pack_nn_C
;
399 pack_li_0
= pack_li_0_C
;
400 pack_li_1
= pack_li_1_C
;
402 if(gCpuCaps
.hasMMX
) {
403 pack_nn
= (pack_func_t
*)pack_nn_MMX
;
404 #if HAVE_EBX_AVAILABLE
405 pack_li_0
= pack_li_0_MMX
;
406 pack_li_1
= pack_li_1_MMX
;
411 switch(vf
->priv
->mode
) {
413 vf
->priv
->pack
[0] = vf
->priv
->pack
[1] = pack_nn
;
416 mp_msg(MSGT_VFILTER
, MSGL_WARN
,
417 "ilpack: unknown mode %d (fallback to linear)\n",
420 vf
->priv
->pack
[0] = pack_li_0
;
421 vf
->priv
->pack
[1] = pack_li_1
;
428 const vf_info_t vf_info_ilpack
= {
429 "4:2:0 planar -> 4:2:2 packed reinterlacer",