9 #include "img_format.h"
13 #include "libvo/fastmemcpy.h"
20 static inline void *my_memcpy_pic(void * dst
, void * src
, int bytesPerLine
, int height
, int dstStride
, int srcStride
)
25 for(i
=0; i
<height
; i
++)
27 memcpy(dst
, src
, bytesPerLine
);
35 static void deint(unsigned char *dest
, int ds
, unsigned char *src
, int ss
, int w
, int h
, int field
)
47 if (((src
[x
-ss
] < src
[x
]) && (src
[x
+ss
] < src
[x
])) ||
48 ((src
[x
-ss
] > src
[x
]) && (src
[x
+ss
] > src
[x
]))) {
49 //dest[x] = (src[x+ss] + src[x-ss])>>1;
50 dest
[x
] = ((src
[x
+ss
]<<1) + (src
[x
-ss
]<<1)
51 + src
[x
+ss
+1] + src
[x
-ss
+1]
52 + src
[x
+ss
-1] + src
[x
-ss
-1])>>3;
54 else dest
[x
] = src
[x
];
62 static void qpel_li_3DNOW(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
75 "movq (%%"REG_S
"), %%mm0 \n\t"
76 "movq (%%"REG_S
",%%"REG_a
"), %%mm1 \n\t"
77 "pavgusb %%mm0, %%mm1 \n\t"
78 "add $8, %%"REG_S
" \n\t"
79 "pavgusb %%mm0, %%mm1 \n\t"
80 "movq %%mm1, (%%"REG_D
") \n\t"
81 "add $8, %%"REG_D
" \n\t"
84 : "=S"(crap1
), "=D"(crap2
)
85 : "c"(w
>>3), "S"(s
), "D"(d
), "a"((long)ssd
)
87 for (j
=w
-(w
&7); j
<w
; j
++)
88 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
92 if (!up
) memcpy(d
, s
, w
);
93 asm volatile("emms \n\t" : : : "memory");
98 static void qpel_li_MMX2(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
108 for (i
=h
-1; i
; i
--) {
110 "pxor %%mm7, %%mm7 \n\t"
112 "movq (%%"REG_S
"), %%mm0 \n\t"
113 "movq (%%"REG_S
",%%"REG_a
"), %%mm1 \n\t"
114 "pavgb %%mm0, %%mm1 \n\t"
115 "add $8, %%"REG_S
" \n\t"
116 "pavgb %%mm0, %%mm1 \n\t"
117 "movq %%mm1, (%%"REG_D
") \n\t"
118 "add $8, %%"REG_D
" \n\t"
121 : "=S"(crap1
), "=D"(crap2
)
122 : "c"(w
>>3), "S"(s
), "D"(d
), "a"((long)ssd
)
124 for (j
=w
-(w
&7); j
<w
; j
++)
125 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
129 if (!up
) memcpy(d
, s
, w
);
130 asm volatile("emms \n\t" : : : "memory");
135 static void qpel_li_MMX(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
145 for (i
=h
-1; i
; i
--) {
147 "pxor %%mm7, %%mm7 \n\t"
149 "movq (%%"REG_S
"), %%mm0 \n\t"
150 "movq (%%"REG_S
"), %%mm1 \n\t"
151 "movq (%%"REG_S
",%%"REG_a
"), %%mm2 \n\t"
152 "movq (%%"REG_S
",%%"REG_a
"), %%mm3 \n\t"
153 "add $8, %%"REG_S
" \n\t"
154 "punpcklbw %%mm7, %%mm0 \n\t"
155 "punpckhbw %%mm7, %%mm1 \n\t"
156 "punpcklbw %%mm7, %%mm2 \n\t"
157 "punpckhbw %%mm7, %%mm3 \n\t"
158 "paddw %%mm0, %%mm2 \n\t"
159 "paddw %%mm1, %%mm3 \n\t"
160 "paddw %%mm0, %%mm2 \n\t"
161 "paddw %%mm1, %%mm3 \n\t"
162 "paddw %%mm0, %%mm2 \n\t"
163 "paddw %%mm1, %%mm3 \n\t"
164 "psrlw $2, %%mm2 \n\t"
165 "psrlw $2, %%mm3 \n\t"
166 "packsswb %%mm3, %%mm2 \n\t"
167 "movq %%mm2, (%%"REG_D
") \n\t"
168 "add $8, %%"REG_D
" \n\t"
171 : "=S"(crap1
), "=D"(crap2
)
172 : "c"(w
>>3), "S"(s
), "D"(d
), "a"((long)ssd
)
174 for (j
=w
-(w
&7); j
<w
; j
++)
175 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
179 if (!up
) memcpy(d
, s
, w
);
180 asm volatile("emms \n\t" : : : "memory");
183 static void qpel_4tap_MMX(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
186 static const short filter
[] = {
187 29, 29, 29, 29, 110, 110, 110, 110,
188 9, 9, 9, 9, 3, 3, 3, 3,
197 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
199 for (i
=h
-3; i
; i
--) {
201 "pxor %%mm0, %%mm0 \n\t"
202 "movq (%%"REG_d
"), %%mm4 \n\t"
203 "movq 8(%%"REG_d
"), %%mm5 \n\t"
204 "movq 16(%%"REG_d
"), %%mm6 \n\t"
205 "movq 24(%%"REG_d
"), %%mm7 \n\t"
208 "movq (%%"REG_S
",%%"REG_a
"), %%mm1 \n\t"
209 "movq (%%"REG_S
"), %%mm2 \n\t"
210 "movq (%%"REG_S
",%%"REG_b
"), %%mm3 \n\t"
211 "punpcklbw %%mm0, %%mm1 \n\t"
212 "punpcklbw %%mm0, %%mm2 \n\t"
213 "pmullw %%mm4, %%mm1 \n\t"
214 "punpcklbw %%mm0, %%mm3 \n\t"
215 "pmullw %%mm5, %%mm2 \n\t"
216 "paddusw %%mm2, %%mm1 \n\t"
217 "pmullw %%mm6, %%mm3 \n\t"
218 "movq (%%"REG_S
",%%"REG_a
",2), %%mm2 \n\t"
219 "psubusw %%mm3, %%mm1 \n\t"
220 "punpcklbw %%mm0, %%mm2 \n\t"
221 "pmullw %%mm7, %%mm2 \n\t"
222 "psubusw %%mm2, %%mm1 \n\t"
223 "psrlw $7, %%mm1 \n\t"
225 "movq (%%"REG_S
",%%"REG_a
"), %%mm2 \n\t"
226 "movq (%%"REG_S
"), %%mm3 \n\t"
227 "punpckhbw %%mm0, %%mm2 \n\t"
228 "punpckhbw %%mm0, %%mm3 \n\t"
229 "pmullw %%mm4, %%mm2 \n\t"
230 "pmullw %%mm5, %%mm3 \n\t"
231 "paddusw %%mm3, %%mm2 \n\t"
232 "movq (%%"REG_S
",%%"REG_b
"), %%mm3 \n\t"
233 "punpckhbw %%mm0, %%mm3 \n\t"
234 "pmullw %%mm6, %%mm3 \n\t"
235 "psubusw %%mm3, %%mm2 \n\t"
236 "movq (%%"REG_S
",%%"REG_a
",2), %%mm3 \n\t"
237 "punpckhbw %%mm0, %%mm3 \n\t"
238 "add $8, %%"REG_S
" \n\t"
239 "pmullw %%mm7, %%mm3 \n\t"
240 "psubusw %%mm3, %%mm2 \n\t"
241 "psrlw $7, %%mm2 \n\t"
243 "packuswb %%mm2, %%mm1 \n\t"
244 "movq %%mm1, (%%"REG_D
") \n\t"
245 "add $8, %%"REG_D
" \n\t"
248 : "=S"(crap1
), "=D"(crap2
)
249 : "c"(w
>>3), "S"(s
), "D"(d
), "a"((long)ssd
), "b"((long)-ssd
), "d"(filter
)
251 for (j
=w
-(w
&7); j
<w
; j
++)
252 d
[j
] = (-9*s
[j
-ssd
] + 111*s
[j
] + 29*s
[j
+ssd
] - 3*s
[j
+ssd
+ssd
])>>7;
257 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
259 if (!up
) memcpy(d
, s
, w
);
260 asm volatile("emms \n\t" : : : "memory");
264 static inline int clamp(int a
)
266 // If a<512, this is equivalent to:
267 // return (a<0) ? 0 : ( (a>255) ? 255 : a);
268 return (~(a
>>31)) & (a
| ((a
<<23)>>31));
271 static void qpel_li_C(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
280 for (i
=h
-1; i
; i
--) {
282 d
[j
] = (s
[j
+ssd
] + 3*s
[j
])>>2;
286 if (!up
) memcpy(d
, s
, w
);
289 static void qpel_4tap_C(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
)
298 d
[j
] = (s
[j
+ssd
] + 3*s
[j
] + 2)>>2;
300 for (i
=h
-3; i
; i
--) {
302 d
[j
] = clamp((-9*s
[j
-ssd
] + 111*s
[j
] + 29*s
[j
+ssd
] - 3*s
[j
+ssd
+ssd
] + 64)>>7);
306 d
[j
] = (s
[j
+ssd
] + 3*s
[j
] + 2)>>2;
308 if (!up
) memcpy(d
, s
, w
);
311 static void (*qpel_li
)(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
);
312 static void (*qpel_4tap
)(unsigned char *d
, unsigned char *s
, int w
, int h
, int ds
, int ss
, int up
);
314 static int put_image(struct vf_instance_s
* vf
, mp_image_t
*mpi
, double pts
)
319 void (*qpel
)(unsigned char *, unsigned char *, int, int, int, int, int);
323 if (!(mpi
->flags
& MP_IMGFLAG_PLANAR
)) bpp
= mpi
->bpp
/8;
324 if (vf
->priv
->parity
< 0) {
325 if (mpi
->fields
& MP_IMGFIELD_ORDERED
)
326 tff
= mpi
->fields
& MP_IMGFIELD_TOP_FIRST
;
330 else tff
= (vf
->priv
->parity
&1)^1;
332 switch (vf
->priv
->mode
) {
337 // TODO: add 3tap filter
345 switch (vf
->priv
->mode
) {
347 for (i
=0; i
<2; i
++) {
348 dmpi
= vf_get_image(vf
->next
, mpi
->imgfmt
,
349 MP_IMGTYPE_EXPORT
, MP_IMGFLAG_ACCEPT_STRIDE
,
350 mpi
->width
, mpi
->height
/2);
351 dmpi
->planes
[0] = mpi
->planes
[0] + (i
^!tff
)*mpi
->stride
[0];
352 dmpi
->stride
[0] = 2*mpi
->stride
[0];
353 if (mpi
->flags
& MP_IMGFLAG_PLANAR
) {
354 dmpi
->planes
[1] = mpi
->planes
[1] + (i
^!tff
)*mpi
->stride
[1];
355 dmpi
->planes
[2] = mpi
->planes
[2] + (i
^!tff
)*mpi
->stride
[2];
356 dmpi
->stride
[1] = 2*mpi
->stride
[1];
357 dmpi
->stride
[2] = 2*mpi
->stride
[2];
359 ret
|= vf_next_put_image(vf
, dmpi
, MP_NOPTS_VALUE
);
360 if (!i
) vf_next_control(vf
, VFCTRL_FLIP_PAGE
, NULL
);
364 for (i
=0; i
<2; i
++) {
365 dmpi
= vf_get_image(vf
->next
, mpi
->imgfmt
,
366 MP_IMGTYPE_TEMP
, MP_IMGFLAG_ACCEPT_STRIDE
,
367 mpi
->width
, mpi
->height
);
368 my_memcpy_pic(dmpi
->planes
[0] + (i
^!tff
)*dmpi
->stride
[0],
369 mpi
->planes
[0] + (i
^!tff
)*mpi
->stride
[0],
370 mpi
->w
*bpp
, mpi
->h
/2, dmpi
->stride
[0]*2, mpi
->stride
[0]*2);
371 deint(dmpi
->planes
[0], dmpi
->stride
[0], mpi
->planes
[0], mpi
->stride
[0], mpi
->w
, mpi
->h
, (i
^!tff
));
372 if (mpi
->flags
& MP_IMGFLAG_PLANAR
) {
373 my_memcpy_pic(dmpi
->planes
[1] + (i
^!tff
)*dmpi
->stride
[1],
374 mpi
->planes
[1] + (i
^!tff
)*mpi
->stride
[1],
375 mpi
->chroma_width
, mpi
->chroma_height
/2,
376 dmpi
->stride
[1]*2, mpi
->stride
[1]*2);
377 my_memcpy_pic(dmpi
->planes
[2] + (i
^!tff
)*dmpi
->stride
[2],
378 mpi
->planes
[2] + (i
^!tff
)*mpi
->stride
[2],
379 mpi
->chroma_width
, mpi
->chroma_height
/2,
380 dmpi
->stride
[2]*2, mpi
->stride
[2]*2);
381 deint(dmpi
->planes
[1], dmpi
->stride
[1], mpi
->planes
[1], mpi
->stride
[1],
382 mpi
->chroma_width
, mpi
->chroma_height
, (i
^!tff
));
383 deint(dmpi
->planes
[2], dmpi
->stride
[2], mpi
->planes
[2], mpi
->stride
[2],
384 mpi
->chroma_width
, mpi
->chroma_height
, (i
^!tff
));
386 ret
|= vf_next_put_image(vf
, dmpi
, MP_NOPTS_VALUE
);
387 if (!i
) vf_next_control(vf
, VFCTRL_FLIP_PAGE
, NULL
);
393 for (i
=0; i
<2; i
++) {
394 dmpi
= vf_get_image(vf
->next
, mpi
->imgfmt
,
395 MP_IMGTYPE_TEMP
, MP_IMGFLAG_ACCEPT_STRIDE
,
396 mpi
->width
, mpi
->height
/2);
397 qpel(dmpi
->planes
[0], mpi
->planes
[0] + (i
^!tff
)*mpi
->stride
[0],
398 mpi
->w
*bpp
, mpi
->h
/2, dmpi
->stride
[0], mpi
->stride
[0]*2, (i
^!tff
));
399 if (mpi
->flags
& MP_IMGFLAG_PLANAR
) {
400 qpel(dmpi
->planes
[1],
401 mpi
->planes
[1] + (i
^!tff
)*mpi
->stride
[1],
402 mpi
->chroma_width
, mpi
->chroma_height
/2,
403 dmpi
->stride
[1], mpi
->stride
[1]*2, (i
^!tff
));
404 qpel(dmpi
->planes
[2],
405 mpi
->planes
[2] + (i
^!tff
)*mpi
->stride
[2],
406 mpi
->chroma_width
, mpi
->chroma_height
/2,
407 dmpi
->stride
[2], mpi
->stride
[2]*2, (i
^!tff
));
409 ret
|= vf_next_put_image(vf
, dmpi
, MP_NOPTS_VALUE
);
410 if (!i
) vf_next_control(vf
, VFCTRL_FLIP_PAGE
, NULL
);
417 static int query_format(struct vf_instance_s
* vf
, unsigned int fmt
)
419 /* FIXME - figure out which other formats work */
424 return vf_next_query_format(vf
, fmt
);
429 static int config(struct vf_instance_s
* vf
,
430 int width
, int height
, int d_width
, int d_height
,
431 unsigned int flags
, unsigned int outfmt
)
433 switch (vf
->priv
->mode
) {
438 return vf_next_config(vf
,width
,height
/2,d_width
,d_height
,flags
,outfmt
);
440 return vf_next_config(vf
,width
,height
,d_width
,d_height
,flags
,outfmt
);
445 static void uninit(struct vf_instance_s
* vf
)
450 static int open(vf_instance_t
*vf
, char* args
)
454 vf
->put_image
= put_image
;
455 //vf->query_format = query_format;
457 vf
->default_reqs
= VFCAP_ACCEPT_STRIDE
;
458 vf
->priv
= p
= calloc(1, sizeof(struct vf_priv_s
));
460 vf
->priv
->parity
= -1;
461 if (args
) sscanf(args
, "%d:%d", &vf
->priv
->mode
, &vf
->priv
->parity
);
463 qpel_4tap
= qpel_4tap_C
;
465 if(gCpuCaps
.hasMMX
) qpel_li
= qpel_li_MMX
;
466 if(gCpuCaps
.hasMMX
) qpel_4tap
= qpel_4tap_MMX
;
469 if(gCpuCaps
.hasMMX2
) qpel_li
= qpel_li_MMX2
;
472 if(gCpuCaps
.has3DNow
) qpel_li
= qpel_li_3DNOW
;
477 vf_info_t vf_info_tfields
= {
478 "temporal field separation",