2 * This file is part of MPlayer.
4 * MPlayer is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * MPlayer is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 #include "cpudetect.h"
28 #include "img_format.h"
32 #include "libvo/fastmemcpy.h"
38 mp_image_t
*buffered_mpi
;
42 static void deint(unsigned char *dest
, int ds
, unsigned char *src
, int ss
, int w
, int h
, int field
)
49 fast_memcpy(dest
- ds
, src
- ss
, w
);
54 for (y
=h
/2; y
> 0; y
--) {
56 for (x
=1; x
<w
-1; x
++) {
57 if (((src
[x
-ss
] < src
[x
]) && (src
[x
+ss
] < src
[x
])) ||
58 ((src
[x
-ss
] > src
[x
]) && (src
[x
+ss
] > src
[x
]))) {
59 //dest[x] = (src[x+ss] + src[x-ss])>>1;
60 dest
[x
] = ((src
[x
+ss
]<<1) + (src
[x
-ss
]<<1)
61 + src
[x
+ss
+1] + src
[x
-ss
+1]
62 + src
[x
+ss
-1] + src
[x
-ss
-1])>>3;
64 else dest
[x
] = src
[x
];
71 fast_memcpy(dest
, src
, w
);
75 static void qpel_li_3DNOW(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
88 "movq (%%"REG_S
"), %%mm0 \n\t"
89 "movq (%%"REG_S
",%%"REG_a
"), %%mm1 \n\t"
90 "pavgusb %%mm0, %%mm1 \n\t"
91 "add $8, %%"REG_S
" \n\t"
92 "pavgusb %%mm0, %%mm1 \n\t"
93 "movq %%mm1, (%%"REG_D
") \n\t"
94 "add $8, %%"REG_D
" \n\t"
97 : "=S"(crap1
), "=D"(crap2
)
98 : "c"(w
>>3), "S"(s
), "D"(d
), "a"((long)ssd
)
100 for (j
=w
-(w
&7); j
<w
; j
++)
101 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
105 if (!up
) fast_memcpy(d
, s
, w
);
106 __asm__
volatile("emms \n\t" : : : "memory");
111 static void qpel_li_MMX2(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
117 fast_memcpy(d
, s
, w
);
121 for (i
=h
-1; i
; i
--) {
123 "pxor %%mm7, %%mm7 \n\t"
125 "movq (%%"REG_S
"), %%mm0 \n\t"
126 "movq (%%"REG_S
",%%"REG_a
"), %%mm1 \n\t"
127 "pavgb %%mm0, %%mm1 \n\t"
128 "add $8, %%"REG_S
" \n\t"
129 "pavgb %%mm0, %%mm1 \n\t"
130 "movq %%mm1, (%%"REG_D
") \n\t"
131 "add $8, %%"REG_D
" \n\t"
134 : "=S"(crap1
), "=D"(crap2
)
135 : "c"(w
>>3), "S"(s
), "D"(d
), "a"((long)ssd
)
137 for (j
=w
-(w
&7); j
<w
; j
++)
138 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
142 if (!up
) fast_memcpy(d
, s
, w
);
143 __asm__
volatile("emms \n\t" : : : "memory");
148 static void qpel_li_MMX(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
154 fast_memcpy(d
, s
, w
);
158 for (i
=h
-1; i
; i
--) {
160 "pxor %%mm7, %%mm7 \n\t"
162 "movq (%%"REG_S
"), %%mm0 \n\t"
163 "movq (%%"REG_S
"), %%mm1 \n\t"
164 "movq (%%"REG_S
",%%"REG_a
"), %%mm2 \n\t"
165 "movq (%%"REG_S
",%%"REG_a
"), %%mm3 \n\t"
166 "add $8, %%"REG_S
" \n\t"
167 "punpcklbw %%mm7, %%mm0 \n\t"
168 "punpckhbw %%mm7, %%mm1 \n\t"
169 "punpcklbw %%mm7, %%mm2 \n\t"
170 "punpckhbw %%mm7, %%mm3 \n\t"
171 "paddw %%mm0, %%mm2 \n\t"
172 "paddw %%mm1, %%mm3 \n\t"
173 "paddw %%mm0, %%mm2 \n\t"
174 "paddw %%mm1, %%mm3 \n\t"
175 "paddw %%mm0, %%mm2 \n\t"
176 "paddw %%mm1, %%mm3 \n\t"
177 "psrlw $2, %%mm2 \n\t"
178 "psrlw $2, %%mm3 \n\t"
179 "packsswb %%mm3, %%mm2 \n\t"
180 "movq %%mm2, (%%"REG_D
") \n\t"
181 "add $8, %%"REG_D
" \n\t"
184 : "=S"(crap1
), "=D"(crap2
)
185 : "c"(w
>>3), "S"(s
), "D"(d
), "a"((long)ssd
)
187 for (j
=w
-(w
&7); j
<w
; j
++)
188 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
192 if (!up
) fast_memcpy(d
, s
, w
);
193 __asm__
volatile("emms \n\t" : : : "memory");
196 #if HAVE_EBX_AVAILABLE
197 static void qpel_4tap_MMX(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
200 static const short filter
[] = {
201 29, 29, 29, 29, 110, 110, 110, 110,
202 9, 9, 9, 9, 3, 3, 3, 3,
207 fast_memcpy(d
, s
, w
);
211 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
213 for (i
=h
-3; i
; i
--) {
215 "pxor %%mm0, %%mm0 \n\t"
216 "movq (%%"REG_d
"), %%mm4 \n\t"
217 "movq 8(%%"REG_d
"), %%mm5 \n\t"
218 "movq 16(%%"REG_d
"), %%mm6 \n\t"
219 "movq 24(%%"REG_d
"), %%mm7 \n\t"
222 "movq (%%"REG_S
",%%"REG_a
"), %%mm1 \n\t"
223 "movq (%%"REG_S
"), %%mm2 \n\t"
224 "movq (%%"REG_S
",%%"REG_b
"), %%mm3 \n\t"
225 "punpcklbw %%mm0, %%mm1 \n\t"
226 "punpcklbw %%mm0, %%mm2 \n\t"
227 "pmullw %%mm4, %%mm1 \n\t"
228 "punpcklbw %%mm0, %%mm3 \n\t"
229 "pmullw %%mm5, %%mm2 \n\t"
230 "paddusw %%mm2, %%mm1 \n\t"
231 "pmullw %%mm6, %%mm3 \n\t"
232 "movq (%%"REG_S
",%%"REG_a
",2), %%mm2 \n\t"
233 "psubusw %%mm3, %%mm1 \n\t"
234 "punpcklbw %%mm0, %%mm2 \n\t"
235 "pmullw %%mm7, %%mm2 \n\t"
236 "psubusw %%mm2, %%mm1 \n\t"
237 "psrlw $7, %%mm1 \n\t"
239 "movq (%%"REG_S
",%%"REG_a
"), %%mm2 \n\t"
240 "movq (%%"REG_S
"), %%mm3 \n\t"
241 "punpckhbw %%mm0, %%mm2 \n\t"
242 "punpckhbw %%mm0, %%mm3 \n\t"
243 "pmullw %%mm4, %%mm2 \n\t"
244 "pmullw %%mm5, %%mm3 \n\t"
245 "paddusw %%mm3, %%mm2 \n\t"
246 "movq (%%"REG_S
",%%"REG_b
"), %%mm3 \n\t"
247 "punpckhbw %%mm0, %%mm3 \n\t"
248 "pmullw %%mm6, %%mm3 \n\t"
249 "psubusw %%mm3, %%mm2 \n\t"
250 "movq (%%"REG_S
",%%"REG_a
",2), %%mm3 \n\t"
251 "punpckhbw %%mm0, %%mm3 \n\t"
252 "add $8, %%"REG_S
" \n\t"
253 "pmullw %%mm7, %%mm3 \n\t"
254 "psubusw %%mm3, %%mm2 \n\t"
255 "psrlw $7, %%mm2 \n\t"
257 "packuswb %%mm2, %%mm1 \n\t"
258 "movq %%mm1, (%%"REG_D
") \n\t"
259 "add $8, %%"REG_D
" \n\t"
262 : "=S"(crap1
), "=D"(crap2
)
263 : "c"(w
>>3), "S"(s
), "D"(d
), "a"((long)ssd
), "b"((long)-ssd
), "d"(filter
)
265 for (j
=w
-(w
&7); j
<w
; j
++)
266 d
[j
] = (-9*s
[j
-ssd
] + 111*s
[j
] + 29*s
[j
+ssd
] - 3*s
[j
+ssd
+ssd
])>>7;
271 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
273 if (!up
) fast_memcpy(d
, s
, w
);
274 __asm__
volatile("emms \n\t" : : : "memory");
276 #endif /* HAVE_EBX_AVAILABLE */
279 static inline int clamp(int a
)
281 // If a<512, this is equivalent to:
282 // return (a<0) ? 0 : ( (a>255) ? 255 : a);
283 return (~(a
>>31)) & (a
| ((a
<<23)>>31));
286 static void qpel_li_C(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
291 fast_memcpy(d
, s
, w
);
295 for (i
=h
-1; i
; i
--) {
297 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
301 if (!up
) fast_memcpy(d
, s
, w
);
304 static void qpel_4tap_C(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
309 fast_memcpy(d
, s
, w
);
313 d
[j
] = (s
[j
+ssd
] + 3*s
[j
] + 2)>>2;
315 for (i
=h
-3; i
; i
--) {
317 d
[j
] = clamp((-9*s
[j
-ssd
] + 111*s
[j
] + 29*s
[j
+ssd
] - 3*s
[j
+ssd
+ssd
] + 64)>>7);
321 d
[j
] = (s
[j
+ssd
] + 3*s
[j
] + 2)>>2;
323 if (!up
) fast_memcpy(d
, s
, w
);
326 static void (*qpel_li
)(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
);
327 static void (*qpel_4tap
)(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
);
329 static int continue_buffered_image(struct vf_instance
*vf
);
331 static int put_image(struct vf_instance
*vf
, mp_image_t
*mpi
, double pts
)
333 vf
->priv
->buffered_mpi
= mpi
;
334 vf
->priv
->buffered_pts
= pts
;
335 vf
->priv
->buffered_i
= 0;
336 return continue_buffered_image(vf
);
339 static int continue_buffered_image(struct vf_instance
*vf
)
341 int i
=vf
->priv
->buffered_i
;
342 double pts
= vf
->priv
->buffered_pts
;
343 mp_image_t
*mpi
= vf
->priv
->buffered_mpi
;
346 void (*qpel
)(unsigned char *, unsigned char *, int, int, int, int, int);
351 vf_queue_frame(vf
, continue_buffered_image
);
352 pts
+= i
* .02; // XXX not right
354 if (!(mpi
->flags
& MP_IMGFLAG_PLANAR
)) bpp
= mpi
->bpp
/8;
355 if (vf
->priv
->parity
< 0) {
356 if (mpi
->fields
& MP_IMGFIELD_ORDERED
)
357 tff
= mpi
->fields
& MP_IMGFIELD_TOP_FIRST
;
361 else tff
= (vf
->priv
->parity
&1)^1;
363 switch (vf
->priv
->mode
) {
368 // TODO: add 3tap filter
376 switch (vf
->priv
->mode
) {
379 dmpi
= vf_get_image(vf
->next
, mpi
->imgfmt
,
380 MP_IMGTYPE_EXPORT
, MP_IMGFLAG_ACCEPT_STRIDE
,
381 mpi
->width
, mpi
->height
/2);
382 dmpi
->planes
[0] = mpi
->planes
[0] + (i
^!tff
)*mpi
->stride
[0];
383 dmpi
->stride
[0] = 2*mpi
->stride
[0];
384 if (mpi
->flags
& MP_IMGFLAG_PLANAR
) {
385 dmpi
->planes
[1] = mpi
->planes
[1] + (i
^!tff
)*mpi
->stride
[1];
386 dmpi
->planes
[2] = mpi
->planes
[2] + (i
^!tff
)*mpi
->stride
[2];
387 dmpi
->stride
[1] = 2*mpi
->stride
[1];
388 dmpi
->stride
[2] = 2*mpi
->stride
[2];
390 ret
|= vf_next_put_image(vf
, dmpi
, pts
);
396 dmpi
= vf_get_image(vf
->next
, mpi
->imgfmt
,
397 MP_IMGTYPE_TEMP
, MP_IMGFLAG_ACCEPT_STRIDE
,
398 mpi
->width
, mpi
->height
);
399 my_memcpy_pic(dmpi
->planes
[0] + (i
^!tff
)*dmpi
->stride
[0],
400 mpi
->planes
[0] + (i
^!tff
)*mpi
->stride
[0],
401 mpi
->w
*bpp
, mpi
->h
/2, dmpi
->stride
[0]*2, mpi
->stride
[0]*2);
402 deint(dmpi
->planes
[0], dmpi
->stride
[0], mpi
->planes
[0], mpi
->stride
[0], mpi
->w
, mpi
->h
, (i
^!tff
));
403 if (mpi
->flags
& MP_IMGFLAG_PLANAR
) {
404 my_memcpy_pic(dmpi
->planes
[1] + (i
^!tff
)*dmpi
->stride
[1],
405 mpi
->planes
[1] + (i
^!tff
)*mpi
->stride
[1],
406 mpi
->chroma_width
, mpi
->chroma_height
/2,
407 dmpi
->stride
[1]*2, mpi
->stride
[1]*2);
408 my_memcpy_pic(dmpi
->planes
[2] + (i
^!tff
)*dmpi
->stride
[2],
409 mpi
->planes
[2] + (i
^!tff
)*mpi
->stride
[2],
410 mpi
->chroma_width
, mpi
->chroma_height
/2,
411 dmpi
->stride
[2]*2, mpi
->stride
[2]*2);
412 deint(dmpi
->planes
[1], dmpi
->stride
[1], mpi
->planes
[1], mpi
->stride
[1],
413 mpi
->chroma_width
, mpi
->chroma_height
, (i
^!tff
));
414 deint(dmpi
->planes
[2], dmpi
->stride
[2], mpi
->planes
[2], mpi
->stride
[2],
415 mpi
->chroma_width
, mpi
->chroma_height
, (i
^!tff
));
417 ret
|= vf_next_put_image(vf
, dmpi
, pts
);
425 dmpi
= vf_get_image(vf
->next
, mpi
->imgfmt
,
426 MP_IMGTYPE_TEMP
, MP_IMGFLAG_ACCEPT_STRIDE
,
427 mpi
->width
, mpi
->height
/2);
428 qpel(dmpi
->planes
[0], mpi
->planes
[0] + (i
^!tff
)*mpi
->stride
[0],
429 mpi
->w
*bpp
, mpi
->h
/2, dmpi
->stride
[0], mpi
->stride
[0]*2, (i
^!tff
));
430 if (mpi
->flags
& MP_IMGFLAG_PLANAR
) {
431 qpel(dmpi
->planes
[1],
432 mpi
->planes
[1] + (i
^!tff
)*mpi
->stride
[1],
433 mpi
->chroma_width
, mpi
->chroma_height
/2,
434 dmpi
->stride
[1], mpi
->stride
[1]*2, (i
^!tff
));
435 qpel(dmpi
->planes
[2],
436 mpi
->planes
[2] + (i
^!tff
)*mpi
->stride
[2],
437 mpi
->chroma_width
, mpi
->chroma_height
/2,
438 dmpi
->stride
[2], mpi
->stride
[2]*2, (i
^!tff
));
440 ret
|= vf_next_put_image(vf
, dmpi
, pts
);
445 vf
->priv
->buffered_i
= 1;
449 static int query_format(struct vf_instance
*vf
, unsigned int fmt
)
451 /* FIXME - figure out which formats exactly work */
454 if (vf
->priv
->mode
== 1)
459 return vf_next_query_format(vf
, fmt
);
464 static int config(struct vf_instance
*vf
,
465 int width
, int height
, int d_width
, int d_height
,
466 unsigned int flags
, unsigned int outfmt
)
468 switch (vf
->priv
->mode
) {
473 return vf_next_config(vf
,width
,height
/2,d_width
,d_height
,flags
,outfmt
);
475 return vf_next_config(vf
,width
,height
,d_width
,d_height
,flags
,outfmt
);
480 static void uninit(struct vf_instance
*vf
)
485 static int vf_open(vf_instance_t
*vf
, char *args
)
489 vf
->put_image
= put_image
;
490 vf
->query_format
= query_format
;
492 vf
->default_reqs
= VFCAP_ACCEPT_STRIDE
;
493 vf
->priv
= p
= calloc(1, sizeof(struct vf_priv_s
));
495 vf
->priv
->parity
= -1;
496 if (args
) sscanf(args
, "%d:%d", &vf
->priv
->mode
, &vf
->priv
->parity
);
498 qpel_4tap
= qpel_4tap_C
;
500 if(gCpuCaps
.hasMMX
) qpel_li
= qpel_li_MMX
;
501 #if HAVE_EBX_AVAILABLE
502 if(gCpuCaps
.hasMMX
) qpel_4tap
= qpel_4tap_MMX
;
506 if(gCpuCaps
.hasMMX2
) qpel_li
= qpel_li_MMX2
;
509 if(gCpuCaps
.has3DNow
) qpel_li
= qpel_li_3DNOW
;
514 const vf_info_t vf_info_tfields
= {
515 "temporal field separation",