10 #include "img_format.h"
14 typedef void (pack_func_t
)(unsigned char *dst
, unsigned char *y
,
15 unsigned char *u
, unsigned char *v
, int w
, int us
, int vs
);
22 static void pack_nn_C(unsigned char *dst
, unsigned char *y
,
23 unsigned char *u
, unsigned char *v
, int w
)
26 for (j
= w
/2; j
; j
--) {
34 static void pack_li_0_C(unsigned char *dst
, unsigned char *y
,
35 unsigned char *u
, unsigned char *v
, int w
, int us
, int vs
)
38 for (j
= w
/2; j
; j
--) {
40 *dst
++ = (u
[us
+us
] + 7*u
[0])>>3;
42 *dst
++ = (v
[vs
+vs
] + 7*v
[0])>>3;
47 static void pack_li_1_C(unsigned char *dst
, unsigned char *y
,
48 unsigned char *u
, unsigned char *v
, int w
, int us
, int vs
)
51 for (j
= w
/2; j
; j
--) {
53 *dst
++ = (3*u
[us
+us
] + 5*u
[0])>>3;
55 *dst
++ = (3*v
[vs
+vs
] + 5*v
[0])>>3;
61 static void pack_nn_MMX(unsigned char *dst
, unsigned char *y
,
62 unsigned char *u
, unsigned char *v
, int w
)
67 "movq (%0), %%mm1 \n\t"
68 "movq (%0), %%mm2 \n\t"
69 "movq (%1), %%mm4 \n\t"
70 "movq (%2), %%mm6 \n\t"
71 "punpcklbw %%mm6, %%mm4 \n\t"
72 "punpcklbw %%mm4, %%mm1 \n\t"
73 "punpckhbw %%mm4, %%mm2 \n\t"
78 "movq %%mm1, (%3) \n\t"
79 "movq %%mm2, 8(%3) \n\t"
85 : "r" (y
), "r" (u
), "r" (v
), "r" (dst
), "r" (w
/8)
88 pack_nn_C(dst
, y
, u
, v
, (w
&7));
91 static void pack_li_0_MMX(unsigned char *dst
, unsigned char *y
,
92 unsigned char *u
, unsigned char *v
, int w
, int us
, int vs
)
95 "push %%"REG_BP
" \n\t"
97 "mov %6, %%"REG_BP
" \n\t"
99 "movl 4(%%"REG_d
"), %%"REG_BP
" \n\t"
100 "movl (%%"REG_d
"), %%"REG_d
" \n\t"
102 "pxor %%mm0, %%mm0 \n\t"
106 "movq (%%"REG_S
"), %%mm1 \n\t"
107 "movq (%%"REG_S
"), %%mm2 \n\t"
109 "movq (%%"REG_a
",%%"REG_d
",2), %%mm4 \n\t"
110 "movq (%%"REG_b
",%%"REG_BP
",2), %%mm6 \n\t"
111 "punpcklbw %%mm0, %%mm4 \n\t"
112 "punpcklbw %%mm0, %%mm6 \n\t"
113 "movq (%%"REG_a
"), %%mm3 \n\t"
114 "movq (%%"REG_b
"), %%mm5 \n\t"
115 "punpcklbw %%mm0, %%mm3 \n\t"
116 "punpcklbw %%mm0, %%mm5 \n\t"
117 "paddw %%mm3, %%mm4 \n\t"
118 "paddw %%mm5, %%mm6 \n\t"
119 "paddw %%mm3, %%mm4 \n\t"
120 "paddw %%mm5, %%mm6 \n\t"
121 "paddw %%mm3, %%mm4 \n\t"
122 "paddw %%mm5, %%mm6 \n\t"
123 "paddw %%mm3, %%mm4 \n\t"
124 "paddw %%mm5, %%mm6 \n\t"
125 "paddw %%mm3, %%mm4 \n\t"
126 "paddw %%mm5, %%mm6 \n\t"
127 "paddw %%mm3, %%mm4 \n\t"
128 "paddw %%mm5, %%mm6 \n\t"
129 "paddw %%mm3, %%mm4 \n\t"
130 "paddw %%mm5, %%mm6 \n\t"
131 "psrlw $3, %%mm4 \n\t"
132 "psrlw $3, %%mm6 \n\t"
133 "packuswb %%mm4, %%mm4 \n\t"
134 "packuswb %%mm6, %%mm6 \n\t"
135 "punpcklbw %%mm6, %%mm4 \n\t"
136 "punpcklbw %%mm4, %%mm1 \n\t"
137 "punpckhbw %%mm4, %%mm2 \n\t"
139 "movq %%mm1, (%%"REG_D
") \n\t"
140 "movq %%mm2, 8(%%"REG_D
") \n\t"
142 "movq 8(%%"REG_S
"), %%mm1 \n\t"
143 "movq 8(%%"REG_S
"), %%mm2 \n\t"
145 "movq (%%"REG_a
",%%"REG_d
",2), %%mm4 \n\t"
146 "movq (%%"REG_b
",%%"REG_BP
",2), %%mm6 \n\t"
147 "punpckhbw %%mm0, %%mm4 \n\t"
148 "punpckhbw %%mm0, %%mm6 \n\t"
149 "movq (%%"REG_a
"), %%mm3 \n\t"
150 "movq (%%"REG_b
"), %%mm5 \n\t"
151 "punpckhbw %%mm0, %%mm3 \n\t"
152 "punpckhbw %%mm0, %%mm5 \n\t"
153 "paddw %%mm3, %%mm4 \n\t"
154 "paddw %%mm5, %%mm6 \n\t"
155 "paddw %%mm3, %%mm4 \n\t"
156 "paddw %%mm5, %%mm6 \n\t"
157 "paddw %%mm3, %%mm4 \n\t"
158 "paddw %%mm5, %%mm6 \n\t"
159 "paddw %%mm3, %%mm4 \n\t"
160 "paddw %%mm5, %%mm6 \n\t"
161 "paddw %%mm3, %%mm4 \n\t"
162 "paddw %%mm5, %%mm6 \n\t"
163 "paddw %%mm3, %%mm4 \n\t"
164 "paddw %%mm5, %%mm6 \n\t"
165 "paddw %%mm3, %%mm4 \n\t"
166 "paddw %%mm5, %%mm6 \n\t"
167 "psrlw $3, %%mm4 \n\t"
168 "psrlw $3, %%mm6 \n\t"
169 "packuswb %%mm4, %%mm4 \n\t"
170 "packuswb %%mm6, %%mm6 \n\t"
171 "punpcklbw %%mm6, %%mm4 \n\t"
172 "punpcklbw %%mm4, %%mm1 \n\t"
173 "punpckhbw %%mm4, %%mm2 \n\t"
175 "add $16, %%"REG_S
" \n\t"
176 "add $8, %%"REG_a
" \n\t"
177 "add $8, %%"REG_b
" \n\t"
179 "movq %%mm1, 16(%%"REG_D
") \n\t"
180 "movq %%mm2, 24(%%"REG_D
") \n\t"
181 "add $32, %%"REG_D
" \n\t"
186 "pop %%"REG_BP
" \n\t"
188 : "S" (y
), "D" (dst
), "a" (u
), "b" (v
), "c" (w
/16),
190 "d" ((x86_reg
)us
), "r" ((x86_reg
)vs
)
196 pack_li_0_C(dst
, y
, u
, v
, (w
&15), us
, vs
);
199 static void pack_li_1_MMX(unsigned char *dst
, unsigned char *y
,
200 unsigned char *u
, unsigned char *v
, int w
, int us
, int vs
)
203 "push %%"REG_BP
" \n\t"
205 "mov %6, %%"REG_BP
" \n\t"
207 "movl 4(%%"REG_d
"), %%"REG_BP
" \n\t"
208 "movl (%%"REG_d
"), %%"REG_d
" \n\t"
210 "pxor %%mm0, %%mm0 \n\t"
214 "movq (%%"REG_S
"), %%mm1 \n\t"
215 "movq (%%"REG_S
"), %%mm2 \n\t"
217 "movq (%%"REG_a
",%%"REG_d
",2), %%mm4 \n\t"
218 "movq (%%"REG_b
",%%"REG_BP
",2), %%mm6 \n\t"
219 "punpcklbw %%mm0, %%mm4 \n\t"
220 "punpcklbw %%mm0, %%mm6 \n\t"
221 "movq (%%"REG_a
"), %%mm3 \n\t"
222 "movq (%%"REG_b
"), %%mm5 \n\t"
223 "punpcklbw %%mm0, %%mm3 \n\t"
224 "punpcklbw %%mm0, %%mm5 \n\t"
225 "movq %%mm4, %%mm7 \n\t"
226 "paddw %%mm4, %%mm4 \n\t"
227 "paddw %%mm7, %%mm4 \n\t"
228 "movq %%mm6, %%mm7 \n\t"
229 "paddw %%mm6, %%mm6 \n\t"
230 "paddw %%mm7, %%mm6 \n\t"
231 "paddw %%mm3, %%mm4 \n\t"
232 "paddw %%mm5, %%mm6 \n\t"
233 "paddw %%mm3, %%mm4 \n\t"
234 "paddw %%mm5, %%mm6 \n\t"
235 "paddw %%mm3, %%mm4 \n\t"
236 "paddw %%mm5, %%mm6 \n\t"
237 "paddw %%mm3, %%mm4 \n\t"
238 "paddw %%mm5, %%mm6 \n\t"
239 "paddw %%mm3, %%mm4 \n\t"
240 "paddw %%mm5, %%mm6 \n\t"
241 "psrlw $3, %%mm4 \n\t"
242 "psrlw $3, %%mm6 \n\t"
243 "packuswb %%mm4, %%mm4 \n\t"
244 "packuswb %%mm6, %%mm6 \n\t"
245 "punpcklbw %%mm6, %%mm4 \n\t"
246 "punpcklbw %%mm4, %%mm1 \n\t"
247 "punpckhbw %%mm4, %%mm2 \n\t"
249 "movq %%mm1, (%%"REG_D
") \n\t"
250 "movq %%mm2, 8(%%"REG_D
") \n\t"
252 "movq 8(%%"REG_S
"), %%mm1 \n\t"
253 "movq 8(%%"REG_S
"), %%mm2 \n\t"
255 "movq (%%"REG_a
",%%"REG_d
",2), %%mm4 \n\t"
256 "movq (%%"REG_b
",%%"REG_BP
",2), %%mm6 \n\t"
257 "punpckhbw %%mm0, %%mm4 \n\t"
258 "punpckhbw %%mm0, %%mm6 \n\t"
259 "movq (%%"REG_a
"), %%mm3 \n\t"
260 "movq (%%"REG_b
"), %%mm5 \n\t"
261 "punpckhbw %%mm0, %%mm3 \n\t"
262 "punpckhbw %%mm0, %%mm5 \n\t"
263 "movq %%mm4, %%mm7 \n\t"
264 "paddw %%mm4, %%mm4 \n\t"
265 "paddw %%mm7, %%mm4 \n\t"
266 "movq %%mm6, %%mm7 \n\t"
267 "paddw %%mm6, %%mm6 \n\t"
268 "paddw %%mm7, %%mm6 \n\t"
269 "paddw %%mm3, %%mm4 \n\t"
270 "paddw %%mm5, %%mm6 \n\t"
271 "paddw %%mm3, %%mm4 \n\t"
272 "paddw %%mm5, %%mm6 \n\t"
273 "paddw %%mm3, %%mm4 \n\t"
274 "paddw %%mm5, %%mm6 \n\t"
275 "paddw %%mm3, %%mm4 \n\t"
276 "paddw %%mm5, %%mm6 \n\t"
277 "paddw %%mm3, %%mm4 \n\t"
278 "paddw %%mm5, %%mm6 \n\t"
279 "psrlw $3, %%mm4 \n\t"
280 "psrlw $3, %%mm6 \n\t"
281 "packuswb %%mm4, %%mm4 \n\t"
282 "packuswb %%mm6, %%mm6 \n\t"
283 "punpcklbw %%mm6, %%mm4 \n\t"
284 "punpcklbw %%mm4, %%mm1 \n\t"
285 "punpckhbw %%mm4, %%mm2 \n\t"
287 "add $16, %%"REG_S
" \n\t"
288 "add $8, %%"REG_a
" \n\t"
289 "add $8, %%"REG_b
" \n\t"
291 "movq %%mm1, 16(%%"REG_D
") \n\t"
292 "movq %%mm2, 24(%%"REG_D
") \n\t"
293 "add $32, %%"REG_D
" \n\t"
298 "pop %%"REG_BP
" \n\t"
300 : "S" (y
), "D" (dst
), "a" (u
), "b" (v
), "c" (w
/16),
302 "d" ((x86_reg
)us
), "r" ((x86_reg
)vs
)
308 pack_li_1_C(dst
, y
, u
, v
, (w
&15), us
, vs
);
312 static pack_func_t
*pack_nn
;
313 static pack_func_t
*pack_li_0
;
314 static pack_func_t
*pack_li_1
;
316 static void ilpack(unsigned char *dst
, unsigned char *src
[3],
317 int dststride
, int srcstride
[3], int w
, int h
, pack_func_t
*pack
[2])
320 unsigned char *y
, *u
, *v
;
321 int ys
= srcstride
[0], us
= srcstride
[1], vs
= srcstride
[2];
328 pack_nn(dst
, y
, u
, v
, w
, 0, 0);
329 y
+= ys
; dst
+= dststride
;
330 pack_nn(dst
, y
, u
+us
, v
+vs
, w
, 0, 0);
331 y
+= ys
; dst
+= dststride
;
332 for (i
=2; i
<h
-2; i
++) {
334 b
= (i
&1) ^ ((i
&2)>>1);
335 pack
[b
](dst
, y
, u
, v
, w
, us
*a
, vs
*a
);
346 pack_nn(dst
, y
, u
, v
, w
, 0, 0);
347 y
+= ys
; dst
+= dststride
; u
+= us
; v
+= vs
;
348 pack_nn(dst
, y
, u
, v
, w
, 0, 0);
352 static int put_image(struct vf_instance_s
* vf
, mp_image_t
*mpi
, double pts
)
356 // hope we'll get DR buffer:
357 dmpi
=vf_get_image(vf
->next
, IMGFMT_YUY2
,
358 MP_IMGTYPE_TEMP
, MP_IMGFLAG_ACCEPT_STRIDE
,
361 ilpack(dmpi
->planes
[0], mpi
->planes
, dmpi
->stride
[0], mpi
->stride
, mpi
->w
, mpi
->h
, vf
->priv
->pack
);
363 return vf_next_put_image(vf
,dmpi
, pts
);
366 static int config(struct vf_instance_s
* vf
,
367 int width
, int height
, int d_width
, int d_height
,
368 unsigned int flags
, unsigned int outfmt
)
370 /* FIXME - also support UYVY output? */
371 return vf_next_config(vf
, width
, height
, d_width
, d_height
, flags
, IMGFMT_YUY2
);
375 static int query_format(struct vf_instance_s
* vf
, unsigned int fmt
)
377 /* FIXME - really any YUV 4:2:0 input format should work */
382 return vf_next_query_format(vf
,IMGFMT_YUY2
);
387 static int open(vf_instance_t
*vf
, char* args
)
390 vf
->query_format
=query_format
;
391 vf
->put_image
=put_image
;
392 vf
->priv
= calloc(1, sizeof(struct vf_priv_s
));
394 if (args
) sscanf(args
, "%d", &vf
->priv
->mode
);
396 pack_nn
= (pack_func_t
*)pack_nn_C
;
397 pack_li_0
= pack_li_0_C
;
398 pack_li_1
= pack_li_1_C
;
400 if(gCpuCaps
.hasMMX
) {
401 pack_nn
= (pack_func_t
*)pack_nn_MMX
;
402 pack_li_0
= pack_li_0_MMX
;
403 pack_li_1
= pack_li_1_MMX
;
407 switch(vf
->priv
->mode
) {
409 vf
->priv
->pack
[0] = vf
->priv
->pack
[1] = pack_nn
;
412 mp_msg(MSGT_VFILTER
, MSGL_WARN
,
413 "ilpack: unknown mode %d (fallback to linear)\n",
416 vf
->priv
->pack
[0] = pack_li_0
;
417 vf
->priv
->pack
[1] = pack_li_1
;
424 const vf_info_t vf_info_ilpack
= {
425 "4:2:0 planar -> 4:2:2 packed reinterlacer",