9 #include "img_format.h"
13 #include "libvo/fastmemcpy.h"
19 mp_image_t
*buffered_mpi
;
23 static void deint(unsigned char *dest
, int ds
, unsigned char *src
, int ss
, int w
, int h
, int field
)
35 if (((src
[x
-ss
] < src
[x
]) && (src
[x
+ss
] < src
[x
])) ||
36 ((src
[x
-ss
] > src
[x
]) && (src
[x
+ss
] > src
[x
]))) {
37 //dest[x] = (src[x+ss] + src[x-ss])>>1;
38 dest
[x
] = ((src
[x
+ss
]<<1) + (src
[x
-ss
]<<1)
39 + src
[x
+ss
+1] + src
[x
-ss
+1]
40 + src
[x
+ss
-1] + src
[x
-ss
-1])>>3;
42 else dest
[x
] = src
[x
];
50 static void qpel_li_3DNOW(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
63 "movq (%%"REG_S
"), %%mm0 \n\t"
64 "movq (%%"REG_S
",%%"REG_a
"), %%mm1 \n\t"
65 "pavgusb %%mm0, %%mm1 \n\t"
66 "add $8, %%"REG_S
" \n\t"
67 "pavgusb %%mm0, %%mm1 \n\t"
68 "movq %%mm1, (%%"REG_D
") \n\t"
69 "add $8, %%"REG_D
" \n\t"
72 : "=S"(crap1
), "=D"(crap2
)
73 : "c"(w
>>3), "S"(s
), "D"(d
), "a"((long)ssd
)
75 for (j
=w
-(w
&7); j
<w
; j
++)
76 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
80 if (!up
) fast_memcpy(d
, s
, w
);
81 __asm__
volatile("emms \n\t" : : : "memory");
86 static void qpel_li_MMX2(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
98 "pxor %%mm7, %%mm7 \n\t"
100 "movq (%%"REG_S
"), %%mm0 \n\t"
101 "movq (%%"REG_S
",%%"REG_a
"), %%mm1 \n\t"
102 "pavgb %%mm0, %%mm1 \n\t"
103 "add $8, %%"REG_S
" \n\t"
104 "pavgb %%mm0, %%mm1 \n\t"
105 "movq %%mm1, (%%"REG_D
") \n\t"
106 "add $8, %%"REG_D
" \n\t"
109 : "=S"(crap1
), "=D"(crap2
)
110 : "c"(w
>>3), "S"(s
), "D"(d
), "a"((long)ssd
)
112 for (j
=w
-(w
&7); j
<w
; j
++)
113 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
117 if (!up
) fast_memcpy(d
, s
, w
);
118 __asm__
volatile("emms \n\t" : : : "memory");
123 static void qpel_li_MMX(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
129 fast_memcpy(d
, s
, w
);
133 for (i
=h
-1; i
; i
--) {
135 "pxor %%mm7, %%mm7 \n\t"
137 "movq (%%"REG_S
"), %%mm0 \n\t"
138 "movq (%%"REG_S
"), %%mm1 \n\t"
139 "movq (%%"REG_S
",%%"REG_a
"), %%mm2 \n\t"
140 "movq (%%"REG_S
",%%"REG_a
"), %%mm3 \n\t"
141 "add $8, %%"REG_S
" \n\t"
142 "punpcklbw %%mm7, %%mm0 \n\t"
143 "punpckhbw %%mm7, %%mm1 \n\t"
144 "punpcklbw %%mm7, %%mm2 \n\t"
145 "punpckhbw %%mm7, %%mm3 \n\t"
146 "paddw %%mm0, %%mm2 \n\t"
147 "paddw %%mm1, %%mm3 \n\t"
148 "paddw %%mm0, %%mm2 \n\t"
149 "paddw %%mm1, %%mm3 \n\t"
150 "paddw %%mm0, %%mm2 \n\t"
151 "paddw %%mm1, %%mm3 \n\t"
152 "psrlw $2, %%mm2 \n\t"
153 "psrlw $2, %%mm3 \n\t"
154 "packsswb %%mm3, %%mm2 \n\t"
155 "movq %%mm2, (%%"REG_D
") \n\t"
156 "add $8, %%"REG_D
" \n\t"
159 : "=S"(crap1
), "=D"(crap2
)
160 : "c"(w
>>3), "S"(s
), "D"(d
), "a"((long)ssd
)
162 for (j
=w
-(w
&7); j
<w
; j
++)
163 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
167 if (!up
) fast_memcpy(d
, s
, w
);
168 __asm__
volatile("emms \n\t" : : : "memory");
171 #if HAVE_EBX_AVAILABLE
172 static void qpel_4tap_MMX(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
175 static const short filter
[] = {
176 29, 29, 29, 29, 110, 110, 110, 110,
177 9, 9, 9, 9, 3, 3, 3, 3,
182 fast_memcpy(d
, s
, w
);
186 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
188 for (i
=h
-3; i
; i
--) {
190 "pxor %%mm0, %%mm0 \n\t"
191 "movq (%%"REG_d
"), %%mm4 \n\t"
192 "movq 8(%%"REG_d
"), %%mm5 \n\t"
193 "movq 16(%%"REG_d
"), %%mm6 \n\t"
194 "movq 24(%%"REG_d
"), %%mm7 \n\t"
197 "movq (%%"REG_S
",%%"REG_a
"), %%mm1 \n\t"
198 "movq (%%"REG_S
"), %%mm2 \n\t"
199 "movq (%%"REG_S
",%%"REG_b
"), %%mm3 \n\t"
200 "punpcklbw %%mm0, %%mm1 \n\t"
201 "punpcklbw %%mm0, %%mm2 \n\t"
202 "pmullw %%mm4, %%mm1 \n\t"
203 "punpcklbw %%mm0, %%mm3 \n\t"
204 "pmullw %%mm5, %%mm2 \n\t"
205 "paddusw %%mm2, %%mm1 \n\t"
206 "pmullw %%mm6, %%mm3 \n\t"
207 "movq (%%"REG_S
",%%"REG_a
",2), %%mm2 \n\t"
208 "psubusw %%mm3, %%mm1 \n\t"
209 "punpcklbw %%mm0, %%mm2 \n\t"
210 "pmullw %%mm7, %%mm2 \n\t"
211 "psubusw %%mm2, %%mm1 \n\t"
212 "psrlw $7, %%mm1 \n\t"
214 "movq (%%"REG_S
",%%"REG_a
"), %%mm2 \n\t"
215 "movq (%%"REG_S
"), %%mm3 \n\t"
216 "punpckhbw %%mm0, %%mm2 \n\t"
217 "punpckhbw %%mm0, %%mm3 \n\t"
218 "pmullw %%mm4, %%mm2 \n\t"
219 "pmullw %%mm5, %%mm3 \n\t"
220 "paddusw %%mm3, %%mm2 \n\t"
221 "movq (%%"REG_S
",%%"REG_b
"), %%mm3 \n\t"
222 "punpckhbw %%mm0, %%mm3 \n\t"
223 "pmullw %%mm6, %%mm3 \n\t"
224 "psubusw %%mm3, %%mm2 \n\t"
225 "movq (%%"REG_S
",%%"REG_a
",2), %%mm3 \n\t"
226 "punpckhbw %%mm0, %%mm3 \n\t"
227 "add $8, %%"REG_S
" \n\t"
228 "pmullw %%mm7, %%mm3 \n\t"
229 "psubusw %%mm3, %%mm2 \n\t"
230 "psrlw $7, %%mm2 \n\t"
232 "packuswb %%mm2, %%mm1 \n\t"
233 "movq %%mm1, (%%"REG_D
") \n\t"
234 "add $8, %%"REG_D
" \n\t"
237 : "=S"(crap1
), "=D"(crap2
)
238 : "c"(w
>>3), "S"(s
), "D"(d
), "a"((long)ssd
), "b"((long)-ssd
), "d"(filter
)
240 for (j
=w
-(w
&7); j
<w
; j
++)
241 d
[j
] = (-9*s
[j
-ssd
] + 111*s
[j
] + 29*s
[j
+ssd
] - 3*s
[j
+ssd
+ssd
])>>7;
246 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
248 if (!up
) fast_memcpy(d
, s
, w
);
249 __asm__
volatile("emms \n\t" : : : "memory");
251 #endif /* HAVE_EBX_AVAILABLE */
254 static inline int clamp(int a
)
256 // If a<512, this is equivalent to:
257 // return (a<0) ? 0 : ( (a>255) ? 255 : a);
258 return (~(a
>>31)) & (a
| ((a
<<23)>>31));
261 static void qpel_li_C(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
266 fast_memcpy(d
, s
, w
);
270 for (i
=h
-1; i
; i
--) {
272 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
276 if (!up
) fast_memcpy(d
, s
, w
);
279 static void qpel_4tap_C(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
284 fast_memcpy(d
, s
, w
);
288 d
[j
] = (s
[j
+ssd
] + 3*s
[j
] + 2)>>2;
290 for (i
=h
-3; i
; i
--) {
292 d
[j
] = clamp((-9*s
[j
-ssd
] + 111*s
[j
] + 29*s
[j
+ssd
] - 3*s
[j
+ssd
+ssd
] + 64)>>7);
296 d
[j
] = (s
[j
+ssd
] + 3*s
[j
] + 2)>>2;
298 if (!up
) fast_memcpy(d
, s
, w
);
301 static void (*qpel_li
)(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
);
302 static void (*qpel_4tap
)(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
);
304 static int continue_buffered_image(struct vf_instance_s
*);
305 extern int correct_pts
;
307 static int put_image(struct vf_instance_s
* vf
, mp_image_t
*mpi
, double pts
)
309 vf
->priv
->buffered_mpi
= mpi
;
310 vf
->priv
->buffered_pts
= pts
;
311 vf
->priv
->buffered_i
= 0;
312 return continue_buffered_image(vf
);
315 static int continue_buffered_image(struct vf_instance_s
*vf
)
317 int i
=vf
->priv
->buffered_i
;
318 double pts
= vf
->priv
->buffered_pts
;
319 mp_image_t
*mpi
= vf
->priv
->buffered_mpi
;
322 void (*qpel
)(unsigned char *, unsigned char *, int, int, int, int, int);
327 vf_queue_frame(vf
, continue_buffered_image
);
328 pts
+= i
* .02; // XXX not right
330 if (!(mpi
->flags
& MP_IMGFLAG_PLANAR
)) bpp
= mpi
->bpp
/8;
331 if (vf
->priv
->parity
< 0) {
332 if (mpi
->fields
& MP_IMGFIELD_ORDERED
)
333 tff
= mpi
->fields
& MP_IMGFIELD_TOP_FIRST
;
337 else tff
= (vf
->priv
->parity
&1)^1;
339 switch (vf
->priv
->mode
) {
344 // TODO: add 3tap filter
352 switch (vf
->priv
->mode
) {
355 dmpi
= vf_get_image(vf
->next
, mpi
->imgfmt
,
356 MP_IMGTYPE_EXPORT
, MP_IMGFLAG_ACCEPT_STRIDE
,
357 mpi
->width
, mpi
->height
/2);
358 dmpi
->planes
[0] = mpi
->planes
[0] + (i
^!tff
)*mpi
->stride
[0];
359 dmpi
->stride
[0] = 2*mpi
->stride
[0];
360 if (mpi
->flags
& MP_IMGFLAG_PLANAR
) {
361 dmpi
->planes
[1] = mpi
->planes
[1] + (i
^!tff
)*mpi
->stride
[1];
362 dmpi
->planes
[2] = mpi
->planes
[2] + (i
^!tff
)*mpi
->stride
[2];
363 dmpi
->stride
[1] = 2*mpi
->stride
[1];
364 dmpi
->stride
[2] = 2*mpi
->stride
[2];
366 ret
|= vf_next_put_image(vf
, dmpi
, pts
);
370 if (!i
) vf_extra_flip(vf
);
375 dmpi
= vf_get_image(vf
->next
, mpi
->imgfmt
,
376 MP_IMGTYPE_TEMP
, MP_IMGFLAG_ACCEPT_STRIDE
,
377 mpi
->width
, mpi
->height
);
378 my_memcpy_pic(dmpi
->planes
[0] + (i
^!tff
)*dmpi
->stride
[0],
379 mpi
->planes
[0] + (i
^!tff
)*mpi
->stride
[0],
380 mpi
->w
*bpp
, mpi
->h
/2, dmpi
->stride
[0]*2, mpi
->stride
[0]*2);
381 deint(dmpi
->planes
[0], dmpi
->stride
[0], mpi
->planes
[0], mpi
->stride
[0], mpi
->w
, mpi
->h
, (i
^!tff
));
382 if (mpi
->flags
& MP_IMGFLAG_PLANAR
) {
383 my_memcpy_pic(dmpi
->planes
[1] + (i
^!tff
)*dmpi
->stride
[1],
384 mpi
->planes
[1] + (i
^!tff
)*mpi
->stride
[1],
385 mpi
->chroma_width
, mpi
->chroma_height
/2,
386 dmpi
->stride
[1]*2, mpi
->stride
[1]*2);
387 my_memcpy_pic(dmpi
->planes
[2] + (i
^!tff
)*dmpi
->stride
[2],
388 mpi
->planes
[2] + (i
^!tff
)*mpi
->stride
[2],
389 mpi
->chroma_width
, mpi
->chroma_height
/2,
390 dmpi
->stride
[2]*2, mpi
->stride
[2]*2);
391 deint(dmpi
->planes
[1], dmpi
->stride
[1], mpi
->planes
[1], mpi
->stride
[1],
392 mpi
->chroma_width
, mpi
->chroma_height
, (i
^!tff
));
393 deint(dmpi
->planes
[2], dmpi
->stride
[2], mpi
->planes
[2], mpi
->stride
[2],
394 mpi
->chroma_width
, mpi
->chroma_height
, (i
^!tff
));
396 ret
|= vf_next_put_image(vf
, dmpi
, pts
);
400 if (!i
) vf_extra_flip(vf
);
407 dmpi
= vf_get_image(vf
->next
, mpi
->imgfmt
,
408 MP_IMGTYPE_TEMP
, MP_IMGFLAG_ACCEPT_STRIDE
,
409 mpi
->width
, mpi
->height
/2);
410 qpel(dmpi
->planes
[0], mpi
->planes
[0] + (i
^!tff
)*mpi
->stride
[0],
411 mpi
->w
*bpp
, mpi
->h
/2, dmpi
->stride
[0], mpi
->stride
[0]*2, (i
^!tff
));
412 if (mpi
->flags
& MP_IMGFLAG_PLANAR
) {
413 qpel(dmpi
->planes
[1],
414 mpi
->planes
[1] + (i
^!tff
)*mpi
->stride
[1],
415 mpi
->chroma_width
, mpi
->chroma_height
/2,
416 dmpi
->stride
[1], mpi
->stride
[1]*2, (i
^!tff
));
417 qpel(dmpi
->planes
[2],
418 mpi
->planes
[2] + (i
^!tff
)*mpi
->stride
[2],
419 mpi
->chroma_width
, mpi
->chroma_height
/2,
420 dmpi
->stride
[2], mpi
->stride
[2]*2, (i
^!tff
));
422 ret
|= vf_next_put_image(vf
, dmpi
, pts
);
426 if (!i
) vf_extra_flip(vf
);
430 vf
->priv
->buffered_i
= 1;
435 static int query_format(struct vf_instance_s
* vf
, unsigned int fmt
)
437 /* FIXME - figure out which other formats work */
442 return vf_next_query_format(vf
, fmt
);
448 static int config(struct vf_instance_s
* vf
,
449 int width
, int height
, int d_width
, int d_height
,
450 unsigned int flags
, unsigned int outfmt
)
452 switch (vf
->priv
->mode
) {
457 return vf_next_config(vf
,width
,height
/2,d_width
,d_height
,flags
,outfmt
);
459 return vf_next_config(vf
,width
,height
,d_width
,d_height
,flags
,outfmt
);
464 static void uninit(struct vf_instance_s
* vf
)
469 static int open(vf_instance_t
*vf
, char* args
)
473 vf
->put_image
= put_image
;
474 //vf->query_format = query_format;
476 vf
->default_reqs
= VFCAP_ACCEPT_STRIDE
;
477 vf
->priv
= p
= calloc(1, sizeof(struct vf_priv_s
));
479 vf
->priv
->parity
= -1;
480 if (args
) sscanf(args
, "%d:%d", &vf
->priv
->mode
, &vf
->priv
->parity
);
482 qpel_4tap
= qpel_4tap_C
;
484 if(gCpuCaps
.hasMMX
) qpel_li
= qpel_li_MMX
;
485 #if HAVE_EBX_AVAILABLE
486 if(gCpuCaps
.hasMMX
) qpel_4tap
= qpel_4tap_MMX
;
490 if(gCpuCaps
.hasMMX2
) qpel_li
= qpel_li_MMX2
;
493 if(gCpuCaps
.has3DNow
) qpel_li
= qpel_li_3DNOW
;
498 const vf_info_t vf_info_tfields
= {
499 "temporal field separation",