vo_mga: switch to newer screen dimension handling API
[mplayer.git] / libmpcodecs / vf_filmdint.c
blobc8da011d81ba890f5150e65d5b08e846da8c366b
1 /*
2 * This file is part of MPlayer.
4 * MPlayer is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * MPlayer is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <sys/time.h>
24 #include "config.h"
25 #include "mp_msg.h"
26 #include "cpudetect.h"
27 #include "options.h"
29 #include "img_format.h"
30 #include "mp_image.h"
31 #include "vf.h"
32 #include "cmmx.h"
34 #include "libvo/fastmemcpy.h"
36 #define NUM_STORED 4
38 enum pu_field_type_t {
39 PU_1ST_OF_3,
40 PU_2ND_OF_3,
41 PU_3RD_OF_3,
42 PU_1ST_OF_2,
43 PU_2ND_OF_2,
44 PU_INTERLACED
47 struct metrics {
48 /* This struct maps to a packed word 64-bit MMX register */
49 unsigned short int even;
50 unsigned short int odd;
51 unsigned short int noise;
52 unsigned short int temp;
53 } __attribute__ ((aligned (8)));
55 struct frame_stats {
56 struct metrics tiny, low, high, bigger, twox, max;
57 struct { unsigned int even, odd, noise, temp; } sad;
58 unsigned short interlaced_high;
59 unsigned short interlaced_low;
60 unsigned short num_blocks;
63 struct vf_priv_s {
64 unsigned long inframes;
65 unsigned long outframes;
66 enum pu_field_type_t prev_type;
67 unsigned swapped, chroma_swapped;
68 unsigned luma_only;
69 unsigned verbose;
70 unsigned fast;
71 unsigned long w, h, cw, ch, stride, chroma_stride, nplanes;
72 unsigned long sad_thres;
73 unsigned long dint_thres;
74 unsigned char *memory_allocated;
75 unsigned char *planes[2*NUM_STORED][4];
76 unsigned char **old_planes;
77 unsigned long static_idx;
78 unsigned long temp_idx;
79 unsigned long crop_x, crop_y, crop_cx, crop_cy;
80 unsigned long export_count, merge_count;
81 unsigned long num_breaks;
82 unsigned long num_copies;
83 long in_inc, out_dec, iosync;
84 long num_fields;
85 long prev_fields;
86 long notout;
87 long mmx2;
88 unsigned small_bytes[2];
89 unsigned mmx_temp[2];
90 struct frame_stats stats[2];
91 struct metrics thres;
92 char chflag;
93 double diff_time, merge_time, decode_time, vo_time, filter_time;
96 #define PPZ { 2000, 2000, 0, 2000 }
97 #define PPR { 2000, 2000, 0, 2000 }
98 static const struct frame_stats ppzs = {PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,0,0,9999};
99 static const struct frame_stats pprs = {PPR,PPR,PPR,PPR,PPR,PPR,PPR,0,0,9999};
101 #ifndef MIN
102 #define MIN(a,b) (((a)<(b))?(a):(b))
103 #endif
104 #ifndef MAX
105 #define MAX(a,b) (((a)>(b))?(a):(b))
106 #endif
108 #define PDIFFUB(X,Y,T) "movq " #X "," #T "\n\t" \
109 "psubusb " #Y "," #T "\n\t" \
110 "psubusb " #X "," #Y "\n\t" \
111 "paddusb " #Y "," #T "\n\t"
113 #define PDIFFUBT(X,Y,T) "movq " #X "," #T "\n\t" \
114 "psubusb " #Y "," #T "\n\t" \
115 "psubusb " #X "," #Y "\n\t" \
116 "paddusb " #T "," #Y "\n\t"
118 #define PSUMBW(X,T,Z) "movq " #X "," #T "\n\t" \
119 "punpcklbw " #Z "," #X "\n\t" \
120 "punpckhbw " #Z "," #T "\n\t" \
121 "paddw " #T "," #X "\n\t" \
122 "movq " #X "," #T "\n\t" \
123 "psllq $32, " #T "\n\t" \
124 "paddw " #T "," #X "\n\t" \
125 "movq " #X "," #T "\n\t" \
126 "psllq $16, " #T "\n\t" \
127 "paddw " #T "," #X "\n\t" \
128 "psrlq $48, " #X "\n\t"
130 #define PSADBW(X,Y,T,Z) PDIFFUBT(X,Y,T) PSUMBW(Y,T,Z)
132 #define PMAXUB(X,Y) "psubusb " #X "," #Y "\n\tpaddusb " #X "," #Y "\n\t"
133 #define PMAXUW(X,Y) "psubusw " #X "," #Y "\n\tpaddusw " #X "," #Y "\n\t"
134 #define PMINUBT(X,Y,T) "movq " #Y "," #T "\n\t" \
135 "psubusb " #X "," #T "\n\t" \
136 "psubusb " #T "," #Y "\n\t"
137 #define PAVGB(X,Y) "pavgusb " #X "," #Y "\n\t"
139 static inline void
140 get_metrics_c(unsigned char *a, unsigned char *b, int as, int bs, int lines,
141 struct metrics *m)
143 a -= as;
144 b -= bs;
145 do {
146 cmmx_t old_po = *(cmmx_t*)(a );
147 cmmx_t po = *(cmmx_t*)(b );
148 cmmx_t e = *(cmmx_t*)(b + bs);
149 cmmx_t old_o = *(cmmx_t*)(a + 2*as);
150 cmmx_t o = *(cmmx_t*)(b + 2*bs);
151 cmmx_t ne = *(cmmx_t*)(b + 3*bs);
152 cmmx_t old_no = *(cmmx_t*)(a + 4*as);
153 cmmx_t no = *(cmmx_t*)(b + 4*bs);
155 cmmx_t qup_old_odd = p31avgb(old_o, old_po);
156 cmmx_t qup_odd = p31avgb( o, po);
157 cmmx_t qdown_old_odd = p31avgb(old_o, old_no);
158 cmmx_t qdown_odd = p31avgb( o, no);
160 cmmx_t qup_even = p31avgb(ne, e);
161 cmmx_t qdown_even = p31avgb(e, ne);
163 cmmx_t temp_up_diff = pdiffub(qdown_even, qup_old_odd);
164 cmmx_t noise_up_diff = pdiffub(qdown_even, qup_odd);
165 cmmx_t temp_down_diff = pdiffub(qup_even, qdown_old_odd);
166 cmmx_t noise_down_diff = pdiffub(qup_even, qdown_odd);
168 cmmx_t odd_diff = pdiffub(o, old_o);
169 m->odd += psumbw(odd_diff);
170 m->even += psadbw(e, *(cmmx_t*)(a+as));
172 temp_up_diff = pminub(temp_up_diff, temp_down_diff);
173 temp_up_diff = pminub(temp_up_diff, odd_diff);
174 m->temp += psumbw(temp_up_diff);
175 noise_up_diff = pminub(noise_up_diff, odd_diff);
176 noise_up_diff = pminub(noise_up_diff, noise_down_diff);
178 m->noise += psumbw(noise_up_diff);
179 a += 2*as;
180 b += 2*bs;
181 } while (--lines);
184 static inline void
185 get_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs,
186 int lines, struct metrics *m)
188 a -= as;
189 b -= bs;
190 do {
191 cmmx_t old_po = (*(cmmx_t*)(a ) >> 1) & ~SIGN_BITS;
192 cmmx_t po = (*(cmmx_t*)(b ) >> 1) & ~SIGN_BITS;
193 cmmx_t old_e = (*(cmmx_t*)(a + as) >> 1) & ~SIGN_BITS;
194 cmmx_t e = (*(cmmx_t*)(b + bs) >> 1) & ~SIGN_BITS;
195 cmmx_t old_o = (*(cmmx_t*)(a + 2*as) >> 1) & ~SIGN_BITS;
196 cmmx_t o = (*(cmmx_t*)(b + 2*bs) >> 1) & ~SIGN_BITS;
197 cmmx_t ne = (*(cmmx_t*)(b + 3*bs) >> 1) & ~SIGN_BITS;
198 cmmx_t old_no = (*(cmmx_t*)(a + 4*as) >> 1) & ~SIGN_BITS;
199 cmmx_t no = (*(cmmx_t*)(b + 4*bs) >> 1) & ~SIGN_BITS;
201 cmmx_t qup_old_odd = p31avgb_s(old_o, old_po);
202 cmmx_t qup_odd = p31avgb_s( o, po);
203 cmmx_t qdown_old_odd = p31avgb_s(old_o, old_no);
204 cmmx_t qdown_odd = p31avgb_s( o, no);
206 cmmx_t qup_even = p31avgb_s(ne, e);
207 cmmx_t qdown_even = p31avgb_s(e, ne);
209 cmmx_t temp_up_diff = pdiffub_s(qdown_even, qup_old_odd);
210 cmmx_t noise_up_diff = pdiffub_s(qdown_even, qup_odd);
211 cmmx_t temp_down_diff = pdiffub_s(qup_even, qdown_old_odd);
212 cmmx_t noise_down_diff = pdiffub_s(qup_even, qdown_odd);
214 cmmx_t odd_diff = pdiffub_s(o, old_o);
215 m->odd += psumbw_s(odd_diff) << 1;
216 m->even += psadbw_s(e, old_e) << 1;
218 temp_up_diff = pminub_s(temp_up_diff, temp_down_diff);
219 temp_up_diff = pminub_s(temp_up_diff, odd_diff);
220 m->temp += psumbw_s(temp_up_diff) << 1;
221 noise_up_diff = pminub_s(noise_up_diff, odd_diff);
222 noise_up_diff = pminub_s(noise_up_diff, noise_down_diff);
224 m->noise += psumbw_s(noise_up_diff) << 1;
225 a += 2*as;
226 b += 2*bs;
227 } while (--lines);
230 static inline void
231 get_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
232 int lines, struct metrics *m)
234 a -= as;
235 b -= bs;
236 do {
237 cmmx_t old_po = (*(cmmx_t*)(a )>>1) & ~SIGN_BITS;
238 cmmx_t po = (*(cmmx_t*)(b )>>1) & ~SIGN_BITS;
239 cmmx_t old_e = (*(cmmx_t*)(a + as)>>1) & ~SIGN_BITS;
240 cmmx_t e = (*(cmmx_t*)(b + bs)>>1) & ~SIGN_BITS;
241 cmmx_t old_o = (*(cmmx_t*)(a + 2*as)>>1) & ~SIGN_BITS;
242 cmmx_t o = (*(cmmx_t*)(b + 2*bs)>>1) & ~SIGN_BITS;
243 cmmx_t ne = (*(cmmx_t*)(b + 3*bs)>>1) & ~SIGN_BITS;
245 cmmx_t down_even = p31avgb_s(e, ne);
246 cmmx_t up_odd = p31avgb_s(o, po);
247 cmmx_t up_old_odd = p31avgb_s(old_o, old_po);
249 cmmx_t odd_diff = pdiffub_s(o, old_o);
250 cmmx_t temp_diff = pdiffub_s(down_even, up_old_odd);
251 cmmx_t noise_diff = pdiffub_s(down_even, up_odd);
253 m->even += psadbw_s(e, old_e) << 1;
254 m->odd += psumbw_s(odd_diff) << 1;
256 temp_diff = pminub_s(temp_diff, odd_diff);
257 noise_diff = pminub_s(noise_diff, odd_diff);
259 m->noise += psumbw_s(noise_diff) << 1;
260 m->temp += psumbw_s(temp_diff) << 1;
261 a += 2*as;
262 b += 2*bs;
263 } while (--lines);
267 static inline void
268 get_block_stats(struct metrics *m, struct vf_priv_s *p, struct frame_stats *s)
270 unsigned two_e = m->even + MAX(m->even , p->thres.even );
271 unsigned two_o = m->odd + MAX(m->odd , p->thres.odd );
272 unsigned two_n = m->noise + MAX(m->noise, p->thres.noise);
273 unsigned two_t = m->temp + MAX(m->temp , p->thres.temp );
275 unsigned e_big = m->even >= (m->odd + two_o + 1)/2;
276 unsigned o_big = m->odd >= (m->even + two_e + 1)/2;
277 unsigned n_big = m->noise >= (m->temp + two_t + 1)/2;
278 unsigned t_big = m->temp >= (m->noise + two_n + 1)/2;
280 unsigned e2x = m->even >= two_o;
281 unsigned o2x = m->odd >= two_e;
282 unsigned n2x = m->noise >= two_t;
283 unsigned t2x = m->temp >= two_n;
285 unsigned ntiny_e = m->even > p->thres.even ;
286 unsigned ntiny_o = m->odd > p->thres.odd ;
287 unsigned ntiny_n = m->noise > p->thres.noise;
288 unsigned ntiny_t = m->temp > p->thres.temp ;
290 unsigned nlow_e = m->even > 2*p->thres.even ;
291 unsigned nlow_o = m->odd > 2*p->thres.odd ;
292 unsigned nlow_n = m->noise > 2*p->thres.noise;
293 unsigned nlow_t = m->temp > 2*p->thres.temp ;
295 unsigned high_e = m->even > 4*p->thres.even ;
296 unsigned high_o = m->odd > 4*p->thres.odd ;
297 unsigned high_n = m->noise > 4*p->thres.noise;
298 unsigned high_t = m->temp > 4*p->thres.temp ;
300 unsigned low_il = !n_big && !t_big && ntiny_n && ntiny_t;
301 unsigned high_il = !n_big && !t_big && nlow_n && nlow_t;
303 if (low_il | high_il) {
304 s->interlaced_low += low_il;
305 s->interlaced_high += high_il;
306 } else {
307 s->tiny.even += ntiny_e;
308 s->tiny.odd += ntiny_o;
309 s->tiny.noise += ntiny_n;
310 s->tiny.temp += ntiny_t;
312 s->low .even += nlow_e ;
313 s->low .odd += nlow_o ;
314 s->low .noise += nlow_n ;
315 s->low .temp += nlow_t ;
317 s->high.even += high_e ;
318 s->high.odd += high_o ;
319 s->high.noise += high_n ;
320 s->high.temp += high_t ;
322 if (m->even >= p->sad_thres) s->sad.even += m->even ;
323 if (m->odd >= p->sad_thres) s->sad.odd += m->odd ;
324 if (m->noise >= p->sad_thres) s->sad.noise += m->noise;
325 if (m->temp >= p->sad_thres) s->sad.temp += m->temp ;
327 s->num_blocks++;
328 s->max.even = MAX(s->max.even , m->even );
329 s->max.odd = MAX(s->max.odd , m->odd );
330 s->max.noise = MAX(s->max.noise, m->noise);
331 s->max.temp = MAX(s->max.temp , m->temp );
333 s->bigger.even += e_big ;
334 s->bigger.odd += o_big ;
335 s->bigger.noise += n_big ;
336 s->bigger.temp += t_big ;
338 s->twox.even += e2x ;
339 s->twox.odd += o2x ;
340 s->twox.noise += n2x ;
341 s->twox.temp += t2x ;
345 static inline struct metrics
346 block_metrics_c(unsigned char *a, unsigned char *b, int as, int bs,
347 int lines, struct vf_priv_s *p, struct frame_stats *s)
349 struct metrics tm;
350 tm.even = tm.odd = tm.noise = tm.temp = 0;
351 get_metrics_c(a, b, as, bs, lines, &tm);
352 if (sizeof(cmmx_t) < 8)
353 get_metrics_c(a+4, b+4, as, bs, lines, &tm);
354 get_block_stats(&tm, p, s);
355 return tm;
358 static inline struct metrics
359 block_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs,
360 int lines, struct vf_priv_s *p, struct frame_stats *s)
362 struct metrics tm;
363 tm.even = tm.odd = tm.noise = tm.temp = 0;
364 get_metrics_fast_c(a, b, as, bs, lines, &tm);
365 if (sizeof(cmmx_t) < 8)
366 get_metrics_fast_c(a+4, b+4, as, bs, lines, &tm);
367 get_block_stats(&tm, p, s);
368 return tm;
371 static inline struct metrics
372 block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
373 int lines, struct vf_priv_s *p, struct frame_stats *s)
375 struct metrics tm;
376 tm.even = tm.odd = tm.noise = tm.temp = 0;
377 get_metrics_faster_c(a, b, as, bs, lines, &tm);
378 if (sizeof(cmmx_t) < 8)
379 get_metrics_faster_c(a+4, b+4, as, bs, lines, &tm);
380 get_block_stats(&tm, p, s);
381 return tm;
384 #define MEQ(X,Y) ((X).even == (Y).even && (X).odd == (Y).odd && (X).temp == (Y).temp && (X).noise == (Y).noise)
386 #define BLOCK_METRICS_TEMPLATE() \
387 __asm__ volatile("pxor %mm7, %mm7\n\t" /* The result is colleted in mm7 */ \
388 "pxor %mm6, %mm6\n\t" /* Temp to stay at 0 */ \
389 ); \
390 a -= as; \
391 b -= bs; \
392 do { \
393 __asm__ volatile( \
394 "movq (%0,%2), %%mm0\n\t" \
395 "movq (%1,%3), %%mm1\n\t" /* mm1 = even */ \
396 PSADBW(%%mm1, %%mm0, %%mm4, %%mm6) \
397 "paddusw %%mm0, %%mm7\n\t" /* even diff */ \
398 "movq (%0,%2,2), %%mm0\n\t" /* mm0 = old odd */ \
399 "movq (%1,%3,2), %%mm2\n\t" /* mm2 = odd */ \
400 "movq (%0), %%mm3\n\t" \
401 "psubusb %4, %%mm3\n\t" \
402 PAVGB(%%mm0, %%mm3) \
403 PAVGB(%%mm0, %%mm3) /* mm3 = qup old odd */ \
404 "movq %%mm0, %%mm5\n\t" \
405 PSADBW(%%mm2, %%mm0, %%mm4, %%mm6) \
406 "psllq $16, %%mm0\n\t" \
407 "paddusw %%mm0, %%mm7\n\t" \
408 "movq (%1), %%mm4\n\t" \
409 "lea (%0,%2,2), %0\n\t" \
410 "lea (%1,%3,2), %1\n\t" \
411 "psubusb %4, %%mm4\n\t" \
412 PAVGB(%%mm2, %%mm4) \
413 PAVGB(%%mm2, %%mm4) /* mm4 = qup odd */ \
414 PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 =abs(oldodd-odd) */ \
415 "movq (%1,%3), %%mm5\n\t" \
416 "psubusb %4, %%mm5\n\t" \
417 PAVGB(%%mm1, %%mm5) \
418 PAVGB(%%mm5, %%mm1) /* mm1 = qdown even */ \
419 PAVGB((%1,%3), %%mm5) /* mm5 = qup next even */ \
420 PDIFFUBT(%%mm1, %%mm3, %%mm0) /* mm3 = abs(qupoldo-qde) */ \
421 PDIFFUBT(%%mm1, %%mm4, %%mm0) /* mm4 = abs(qupodd-qde) */ \
422 PMINUBT(%%mm2, %%mm3, %%mm0) /* limit temp to odd diff */ \
423 PMINUBT(%%mm2, %%mm4, %%mm0) /* limit noise to odd diff */ \
424 "movq (%1,%3,2), %%mm2\n\t" \
425 "psubusb %4, %%mm2\n\t" \
426 PAVGB((%1), %%mm2) \
427 PAVGB((%1), %%mm2) /* mm2 = qdown odd */ \
428 "movq (%0,%2,2), %%mm1\n\t" \
429 "psubusb %4, %%mm1\n\t" \
430 PAVGB((%0), %%mm1) \
431 PAVGB((%0), %%mm1) /* mm1 = qdown old odd */ \
432 PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 = abs(qdo-qune) */ \
433 PDIFFUBT(%%mm5, %%mm1, %%mm0) /* mm1 = abs(qdoo-qune) */ \
434 PMINUBT(%%mm4, %%mm2, %%mm0) /* current */ \
435 PMINUBT(%%mm3, %%mm1, %%mm0) /* old */ \
436 PSUMBW(%%mm2, %%mm0, %%mm6) \
437 PSUMBW(%%mm1, %%mm0, %%mm6) \
438 "psllq $32, %%mm2\n\t" \
439 "psllq $48, %%mm1\n\t" \
440 "paddusw %%mm2, %%mm7\n\t" \
441 "paddusw %%mm1, %%mm7\n\t" \
442 : "=r" (a), "=r" (b) \
443 : "r"((x86_reg)as), "r"((x86_reg)bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \
444 ); \
445 } while (--lines);
447 static inline struct metrics
448 block_metrics_3dnow(unsigned char *a, unsigned char *b, int as, int bs,
449 int lines, struct vf_priv_s *p, struct frame_stats *s)
451 struct metrics tm;
452 #if !HAVE_AMD3DNOW
453 mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_3dnow: internal error\n");
454 #else
455 static const unsigned long long ones = 0x0101010101010101ull;
457 BLOCK_METRICS_TEMPLATE();
458 __asm__ volatile("movq %%mm7, %0\n\temms" : "=m" (tm));
459 get_block_stats(&tm, p, s);
460 #endif
461 return tm;
464 #undef PSUMBW
465 #undef PSADBW
466 #undef PMAXUB
467 #undef PMINUBT
468 #undef PAVGB
470 #define PSUMBW(X,T,Z) "psadbw " #Z "," #X "\n\t"
471 #define PSADBW(X,Y,T,Z) "psadbw " #X "," #Y "\n\t"
472 #define PMAXUB(X,Y) "pmaxub " #X "," #Y "\n\t"
473 #define PMINUBT(X,Y,T) "pminub " #X "," #Y "\n\t"
474 #define PAVGB(X,Y) "pavgb " #X "," #Y "\n\t"
476 static inline struct metrics
477 block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
478 int lines, struct vf_priv_s *p, struct frame_stats *s)
480 struct metrics tm;
481 #if !HAVE_MMX
482 mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_mmx2: internal error\n");
483 #else
484 static const unsigned long long ones = 0x0101010101010101ull;
485 x86_reg interlaced;
486 x86_reg prefetch_line = (((long)a>>3) & 7) + 10;
487 #ifdef DEBUG
488 struct frame_stats ts = *s;
489 #endif
490 __asm__ volatile("prefetcht0 (%0,%2)\n\t"
491 "prefetcht0 (%1,%3)\n\t" :
492 : "r" (a), "r" (b),
493 "r" (prefetch_line * as), "r" (prefetch_line * bs));
495 BLOCK_METRICS_TEMPLATE();
497 s->num_blocks++;
498 __asm__ volatile(
499 "movq %3, %%mm0\n\t"
500 "movq %%mm7, %%mm1\n\t"
501 "psubusw %%mm0, %%mm1\n\t"
502 "movq %%mm1, %%mm2\n\t"
503 "paddusw %%mm0, %%mm2\n\t"
504 "paddusw %%mm7, %%mm2\n\t"
505 "pshufw $0xb1, %%mm2, %%mm3\n\t"
506 "pavgw %%mm7, %%mm2\n\t"
507 "pshufw $0xb1, %%mm2, %%mm2\n\t"
508 "psubusw %%mm7, %%mm2\n\t"
509 "pcmpeqw %%mm6, %%mm2\n\t" /* 1 if >= 1.5x */
510 "psubusw %%mm7, %%mm3\n\t"
511 "pcmpeqw %%mm6, %%mm3\n\t" /* 1 if >= 2x */
512 "movq %1, %%mm4\n\t"
513 "movq %2, %%mm5\n\t"
514 "psubw %%mm2, %%mm4\n\t"
515 "psubw %%mm3, %%mm5\n\t"
516 "movq %%mm4, %1\n\t"
517 "movq %%mm5, %2\n\t"
518 "pxor %%mm4, %%mm4\n\t"
519 "pcmpeqw %%mm1, %%mm4\n\t" /* 1 if <= t */
520 "psubusw %%mm0, %%mm1\n\t"
521 "pxor %%mm5, %%mm5\n\t"
522 "pcmpeqw %%mm1, %%mm5\n\t" /* 1 if <= 2t */
523 "psubusw %%mm0, %%mm1\n\t"
524 "psubusw %%mm0, %%mm1\n\t"
525 "pcmpeqw %%mm6, %%mm1\n\t" /* 1 if <= 4t */
526 "pshufw $0xb1, %%mm2, %%mm0\n\t"
527 "por %%mm2, %%mm0\n\t" /* 1 if not close */
528 "punpckhdq %%mm0, %%mm0\n\t"
529 "movq %%mm4, %%mm2\n\t" /* tttt */
530 "punpckhdq %%mm5, %%mm2\n\t" /* ttll */
531 "por %%mm2, %%mm0\n\t"
532 "pcmpeqd %%mm6, %%mm0\n\t" /* close && big */
533 "psrlq $16, %%mm0\n\t"
534 "psrlw $15, %%mm0\n\t"
535 "movd %%mm0, %0\n\t"
536 : "=r" (interlaced), "=m" (s->bigger), "=m" (s->twox)
537 : "m" (p->thres)
540 if (interlaced) {
541 s->interlaced_high += interlaced >> 16;
542 s->interlaced_low += interlaced;
543 } else {
544 __asm__ volatile(
545 "pcmpeqw %%mm0, %%mm0\n\t" /* -1 */
546 "psubw %%mm0, %%mm4\n\t"
547 "psubw %%mm0, %%mm5\n\t"
548 "psubw %%mm0, %%mm1\n\t"
549 "paddw %0, %%mm4\n\t"
550 "paddw %1, %%mm5\n\t"
551 "paddw %2, %%mm1\n\t"
552 "movq %%mm4, %0\n\t"
553 "movq %%mm5, %1\n\t"
554 "movq %%mm1, %2\n\t"
555 : "=m" (s->tiny), "=m" (s->low), "=m" (s->high)
558 __asm__ volatile(
559 "pshufw $0, %2, %%mm0\n\t"
560 "psubusw %%mm7, %%mm0\n\t"
561 "pcmpeqw %%mm6, %%mm0\n\t" /* 0 if below sad_thres */
562 "pand %%mm7, %%mm0\n\t"
563 "movq %%mm0, %%mm1\n\t"
564 "punpcklwd %%mm6, %%mm0\n\t" /* sad even, odd */
565 "punpckhwd %%mm6, %%mm1\n\t" /* sad noise, temp */
566 "paddd %0, %%mm0\n\t"
567 "paddd %1, %%mm1\n\t"
568 "movq %%mm0, %0\n\t"
569 "movq %%mm1, %1\n\t"
570 : "=m" (s->sad.even), "=m" (s->sad.noise)
571 : "m" (p->sad_thres)
575 __asm__ volatile(
576 "movq %%mm7, (%1)\n\t"
577 PMAXUW((%0), %%mm7)
578 "movq %%mm7, (%0)\n\t"
579 "emms"
580 : : "r" (&s->max), "r" (&tm), "X" (s->max)
581 : "memory"
583 #ifdef DEBUG
584 if (1) {
585 struct metrics cm;
586 a -= 7*as;
587 b -= 7*bs;
588 cm = block_metrics_c(a, b, as, bs, 4, p, &ts);
589 if (!MEQ(tm, cm))
590 mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad metrics\n");
591 if (s) {
592 # define CHECK(X) if (!MEQ(s->X, ts.X)) \
593 mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad " #X "\n");
594 CHECK(tiny);
595 CHECK(low);
596 CHECK(high);
597 CHECK(sad);
598 CHECK(max);
601 #endif
602 #endif
603 return tm;
606 static inline int
607 dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
608 long cos, int ds, int ss, int w, int t)
610 #if !HAVE_MMX
611 mp_msg(MSGT_VFILTER, MSGL_FATAL, "dint_copy_line_mmx2: internal error\n");
612 return 0;
613 #else
614 unsigned long len = (w+7) >> 3;
615 int ret;
616 __asm__ volatile (
617 "pxor %%mm6, %%mm6 \n\t" /* deinterlaced pixel counter */
618 "movd %0, %%mm7 \n\t"
619 "punpcklbw %%mm7, %%mm7 \n\t"
620 "punpcklwd %%mm7, %%mm7 \n\t"
621 "punpckldq %%mm7, %%mm7 \n\t" /* mm7 = threshold */
622 : /* no output */
623 : "rm" (t)
625 do {
626 __asm__ volatile (
627 "movq (%0), %%mm0\n\t"
628 "movq (%0,%3,2), %%mm1\n\t"
629 "movq %%mm0, (%2)\n\t"
630 "pmaxub %%mm1, %%mm0\n\t"
631 "pavgb (%0), %%mm1\n\t"
632 "psubusb %%mm1, %%mm0\n\t"
633 "paddusb %%mm7, %%mm0\n\t" /* mm0 = max-avg+thr */
634 "movq (%0,%1), %%mm2\n\t"
635 "movq (%0,%5), %%mm3\n\t"
636 "movq %%mm2, %%mm4\n\t"
637 PDIFFUBT(%%mm1, %%mm2, %%mm5)
638 PDIFFUBT(%%mm1, %%mm3, %%mm5)
639 "pminub %%mm2, %%mm3\n\t"
640 "pcmpeqb %%mm3, %%mm2\n\t" /* b = min */
641 "pand %%mm2, %%mm4\n\t"
642 "pandn (%0,%5), %%mm2\n\t"
643 "por %%mm4, %%mm2\n\t"
644 "pminub %%mm0, %%mm3\n\t"
645 "pcmpeqb %%mm0, %%mm3\n\t" /* set to 1s if >= threshold */
646 "psubb %%mm3, %%mm6\n\t" /* count pixels above thr. */
647 "pand %%mm3, %%mm1 \n\t"
648 "pandn %%mm2, %%mm3 \n\t"
649 "por %%mm3, %%mm1 \n\t" /* avg if >= threshold */
650 "movq %%mm1, (%2,%4) \n\t"
651 : /* no output */
652 : "r" (a), "r" ((x86_reg)bos), "r" ((x86_reg)dst), "r" ((x86_reg)ss), "r" ((x86_reg)ds), "r" ((x86_reg)cos)
654 a += 8;
655 dst += 8;
656 } while (--len);
658 __asm__ volatile ("pxor %%mm7, %%mm7 \n\t"
659 "psadbw %%mm6, %%mm7 \n\t"
660 "movd %%mm7, %0 \n\t"
661 "emms \n\t"
662 : "=r" (ret)
664 return ret;
665 #endif
668 static inline int
669 dint_copy_line(unsigned char *dst, unsigned char *a, long bos,
670 long cos, int ds, int ss, int w, int t)
672 unsigned long len = ((unsigned long)w+sizeof(cmmx_t)-1) / sizeof(cmmx_t);
673 cmmx_t dint_count = 0;
674 cmmx_t thr;
675 t |= t << 8;
676 thr = t | (t << 16);
677 if (sizeof(cmmx_t) > 4)
678 thr |= thr << (sizeof(cmmx_t)*4);
679 do {
680 cmmx_t e = *(cmmx_t*)a;
681 cmmx_t ne = *(cmmx_t*)(a+2*ss);
682 cmmx_t o = *(cmmx_t*)(a+bos);
683 cmmx_t oo = *(cmmx_t*)(a+cos);
684 cmmx_t maxe = pmaxub(e, ne);
685 cmmx_t avge = pavgb(e, ne);
686 cmmx_t max_diff = maxe - avge + thr; /* 0<=max-avg<128, thr<128 */
687 cmmx_t diffo = pdiffub(avge, o);
688 cmmx_t diffoo = pdiffub(avge, oo);
689 cmmx_t diffcmp = pcmpgtub(diffo, diffoo);
690 cmmx_t bo = ((oo ^ o) & diffcmp) ^ o;
691 cmmx_t diffbo = ((diffoo ^ diffo) & diffcmp) ^ diffo;
692 cmmx_t above_thr = ~pcmpgtub(max_diff, diffbo);
693 cmmx_t bo_or_avg = ((avge ^ bo) & above_thr) ^ bo;
694 dint_count += above_thr & ONE_BYTES;
695 *(cmmx_t*)(dst) = e;
696 *(cmmx_t*)(dst+ds) = bo_or_avg;
697 a += sizeof(cmmx_t);
698 dst += sizeof(cmmx_t);
699 } while (--len);
700 return psumbw(dint_count);
703 static int
704 dint_copy_plane(unsigned char *d, unsigned char *a, unsigned char *b,
705 unsigned char *c, unsigned long w, unsigned long h,
706 unsigned long ds, unsigned long ss, unsigned long threshold,
707 long field, long mmx2)
709 unsigned long ret = 0;
710 long bos = b - a;
711 long cos = c - a;
712 if (field) {
713 fast_memcpy(d, b, w);
714 h--;
715 d += ds;
716 a += ss;
718 bos += ss;
719 cos += ss;
720 while (h > 2) {
721 if (threshold >= 128) {
722 fast_memcpy(d, a, w);
723 fast_memcpy(d+ds, a+bos, w);
724 } else if (mmx2 == 1) {
725 ret += dint_copy_line_mmx2(d, a, bos, cos, ds, ss, w, threshold);
726 } else
727 ret += dint_copy_line(d, a, bos, cos, ds, ss, w, threshold);
728 h -= 2;
729 d += 2*ds;
730 a += 2*ss;
732 fast_memcpy(d, a, w);
733 if (h == 2)
734 fast_memcpy(d+ds, a+bos, w);
735 return ret;
738 static void
739 copy_merge_fields(struct vf_priv_s *p, mp_image_t *dmpi,
740 unsigned char **old, unsigned char **new, unsigned long show)
742 unsigned long threshold = 256;
743 unsigned long field = p->swapped;
744 unsigned long dint_pixels = 0;
745 unsigned char **other = old;
746 if (show >= 12 || !(show & 3))
747 show >>= 2, other = new, new = old;
748 if (show <= 2) { /* Single field: de-interlace */
749 threshold = p->dint_thres;
750 field ^= show & 1;
751 old = new;
752 } else if (show == 3)
753 old = new;
754 else
755 field ^= 1;
756 dint_pixels +=dint_copy_plane(dmpi->planes[0], old[0], new[0],
757 other[0], p->w, p->h, dmpi->stride[0],
758 p->stride, threshold, field, p->mmx2);
759 if (dmpi->flags & MP_IMGFLAG_PLANAR) {
760 if (p->luma_only)
761 old = new, other = new;
762 else
763 threshold = threshold/2 + 1;
764 field ^= p->chroma_swapped;
765 dint_copy_plane(dmpi->planes[1], old[1], new[1],
766 other[1], p->cw, p->ch, dmpi->stride[1],
767 p->chroma_stride, threshold, field, p->mmx2);
768 dint_copy_plane(dmpi->planes[2], old[2], new[2],
769 other[2], p->cw, p->ch, dmpi->stride[2],
770 p->chroma_stride, threshold, field, p->mmx2);
772 if (dint_pixels > 0 && p->verbose)
773 mp_msg(MSGT_VFILTER,MSGL_INFO,"Deinterlaced %lu pixels\n",dint_pixels);
776 static void diff_planes(struct vf_priv_s *p, struct frame_stats *s,
777 unsigned char *of, unsigned char *nf,
778 int w, int h, int os, int ns, int swapped)
780 int i, y;
781 int align = -(long)nf & 7;
782 of += align;
783 nf += align;
784 w -= align;
785 if (swapped)
786 of -= os, nf -= ns;
787 i = (h*3 >> 7) & ~1;
788 of += i*os + 8;
789 nf += i*ns + 8;
790 h -= i;
791 w -= 16;
793 memset(s, 0, sizeof(*s));
795 for (y = (h-8) >> 3; y; y--) {
796 if (p->mmx2 == 1) {
797 for (i = 0; i < w; i += 8)
798 block_metrics_mmx2(of+i, nf+i, os, ns, 4, p, s);
799 } else if (p->mmx2 == 2) {
800 for (i = 0; i < w; i += 8)
801 block_metrics_3dnow(of+i, nf+i, os, ns, 4, p, s);
802 } else if (p->fast > 3) {
803 for (i = 0; i < w; i += 8)
804 block_metrics_faster_c(of+i, nf+i, os, ns, 4, p, s);
805 } else if (p->fast > 1) {
806 for (i = 0; i < w; i += 8)
807 block_metrics_fast_c(of+i, nf+i, os, ns, 4, p, s);
808 } else {
809 for (i = 0; i < w; i += 8)
810 block_metrics_c(of+i, nf+i, os, ns, 4, p, s);
812 of += 8*os;
813 nf += 8*ns;
817 #define METRICS(X) (X).even, (X).odd, (X).noise, (X).temp
819 static void diff_fields(struct vf_priv_s *p, struct frame_stats *s,
820 unsigned char **old, unsigned char **new)
822 diff_planes(p, s, old[0], new[0], p->w, p->h,
823 p->stride, p->stride, p->swapped);
824 s->sad.even = (s->sad.even * 16ul) / s->num_blocks;
825 s->sad.odd = (s->sad.odd * 16ul) / s->num_blocks;
826 s->sad.noise = (s->sad.noise * 16ul) / s->num_blocks;
827 s->sad.temp = (s->sad.temp * 16ul) / s->num_blocks;
828 if (p->verbose)
829 mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu%c M:%d/%d/%d/%d - %d, "
830 "t:%d/%d/%d/%d, l:%d/%d/%d/%d, h:%d/%d/%d/%d, bg:%d/%d/%d/%d, "
831 "2x:%d/%d/%d/%d, sad:%d/%d/%d/%d, lil:%d, hil:%d, ios:%.1f\n",
832 p->inframes, p->chflag, METRICS(s->max), s->num_blocks,
833 METRICS(s->tiny), METRICS(s->low), METRICS(s->high),
834 METRICS(s->bigger), METRICS(s->twox), METRICS(s->sad),
835 s->interlaced_low, s->interlaced_high,
836 p->iosync / (double) p->in_inc);
839 static const char *parse_args(struct vf_priv_s *p, const char *args)
841 args--;
842 while (args && *++args &&
843 (sscanf(args, "io=%lu:%lu", &p->out_dec, &p->in_inc) == 2 ||
844 sscanf(args, "diff_thres=%hu", &p->thres.even ) == 1 ||
845 sscanf(args, "comb_thres=%hu", &p->thres.noise) == 1 ||
846 sscanf(args, "sad_thres=%lu", &p->sad_thres ) == 1 ||
847 sscanf(args, "dint_thres=%lu", &p->dint_thres ) == 1 ||
848 sscanf(args, "fast=%u", &p->fast ) == 1 ||
849 sscanf(args, "mmx2=%lu", &p->mmx2 ) == 1 ||
850 sscanf(args, "luma_only=%u", &p->luma_only ) == 1 ||
851 sscanf(args, "verbose=%u", &p->verbose ) == 1 ||
852 sscanf(args, "crop=%lu:%lu:%lu:%lu", &p->w,
853 &p->h, &p->crop_x, &p->crop_y) == 4))
854 args = strchr(args, '/');
855 return args;
858 static unsigned long gcd(unsigned long x, unsigned long y)
860 unsigned long t;
861 if (x > y)
862 t = x, x = y, y = t;
864 while (x) {
865 t = y % x;
866 y = x;
867 x = t;
869 return y;
872 static void init(struct vf_priv_s *p, mp_image_t *mpi)
874 unsigned long i;
875 unsigned long plane_size, chroma_plane_size;
876 unsigned char *plane;
877 unsigned long cos, los;
878 p->crop_cx = p->crop_x >> mpi->chroma_x_shift;
879 p->crop_cy = p->crop_y >> mpi->chroma_y_shift;
880 if (mpi->flags & MP_IMGFLAG_ACCEPT_STRIDE) {
881 p->stride = (mpi->w + 15) & ~15;
882 p->chroma_stride = p->stride >> mpi->chroma_x_shift;
883 } else {
884 p->stride = mpi->width;
885 p->chroma_stride = mpi->chroma_width;
887 p->cw = p->w >> mpi->chroma_x_shift;
888 p->ch = p->h >> mpi->chroma_y_shift;
889 p->nplanes = 1;
890 p->static_idx = 0;
891 p->temp_idx = 0;
892 p->old_planes = p->planes[0];
893 plane_size = mpi->h * p->stride;
894 chroma_plane_size = mpi->flags & MP_IMGFLAG_PLANAR ?
895 mpi->chroma_height * p->chroma_stride : 0;
896 p->memory_allocated =
897 malloc(NUM_STORED * (plane_size+2*chroma_plane_size) +
898 8*p->chroma_stride + 4096);
899 /* align to page boundary */
900 plane = p->memory_allocated + (-(long)p->memory_allocated & 4095);
901 memset(plane, 0, NUM_STORED * plane_size);
902 los = p->crop_x + p->crop_y * p->stride;
903 cos = p->crop_cx + p->crop_cy * p->chroma_stride;
904 for (i = 0; i != NUM_STORED; i++, plane += plane_size) {
905 p->planes[i][0] = plane;
906 p->planes[NUM_STORED + i][0] = plane + los;
908 if (mpi->flags & MP_IMGFLAG_PLANAR) {
909 p->nplanes = 3;
910 memset(plane, 0x80, NUM_STORED * 2 * chroma_plane_size);
911 for (i = 0; i != NUM_STORED; i++) {
912 p->planes[i][1] = plane;
913 p->planes[NUM_STORED + i][1] = plane + cos;
914 plane += chroma_plane_size;
915 p->planes[i][2] = plane;
916 p->planes[NUM_STORED + i][2] = plane + cos;
917 plane += chroma_plane_size;
920 p->out_dec <<= 2;
921 i = gcd(p->in_inc, p->out_dec);
922 p->in_inc /= i;
923 p->out_dec /= i;
924 p->iosync = 0;
925 p->num_fields = 3;
928 static inline double get_time(void)
930 struct timeval tv;
931 gettimeofday(&tv, 0);
932 return tv.tv_sec + tv.tv_usec * 1e-6;
935 static void get_image(struct vf_instance *vf, mp_image_t *mpi)
937 struct vf_priv_s *p = vf->priv;
938 static unsigned char **planes, planes_idx;
940 if (mpi->type == MP_IMGTYPE_STATIC) return;
942 if (!p->planes[0][0]) init(p, mpi);
944 if (mpi->type == MP_IMGTYPE_TEMP ||
945 (mpi->type == MP_IMGTYPE_IPB && !(mpi->flags & MP_IMGFLAG_READABLE)))
946 planes_idx = NUM_STORED/2 + (++p->temp_idx % (NUM_STORED/2));
947 else
948 planes_idx = ++p->static_idx % (NUM_STORED/2);
949 planes = p->planes[planes_idx];
950 mpi->priv = p->planes[NUM_STORED + planes_idx];
951 if (mpi->priv == p->old_planes) {
952 unsigned char **old_planes =
953 p->planes[NUM_STORED + 2 + (++p->temp_idx & 1)];
954 my_memcpy_pic(old_planes[0], p->old_planes[0],
955 p->w, p->h, p->stride, p->stride);
956 if (mpi->flags & MP_IMGFLAG_PLANAR) {
957 my_memcpy_pic(old_planes[1], p->old_planes[1],
958 p->cw, p->ch, p->chroma_stride, p->chroma_stride);
959 my_memcpy_pic(old_planes[2], p->old_planes[2],
960 p->cw, p->ch, p->chroma_stride, p->chroma_stride);
962 p->old_planes = old_planes;
963 p->num_copies++;
965 mpi->planes[0] = planes[0];
966 mpi->stride[0] = p->stride;
967 if (mpi->flags & MP_IMGFLAG_PLANAR) {
968 mpi->planes[1] = planes[1];
969 mpi->planes[2] = planes[2];
970 mpi->stride[1] = mpi->stride[2] = p->chroma_stride;
972 mpi->width = p->stride;
974 mpi->flags |= MP_IMGFLAG_DIRECT;
975 mpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK;
978 static inline long
979 cmpe(unsigned long x, unsigned long y, unsigned long err, unsigned long e)
981 long diff = x-y;
982 long unit = ((x+y+err) >> e);
983 long ret = (diff > unit) - (diff < -unit);
984 unit >>= 1;
985 return ret + (diff > unit) - (diff < -unit);
988 static unsigned long
989 find_breaks(struct vf_priv_s *p, struct frame_stats *s)
991 struct frame_stats *ps = &p->stats[(p->inframes-1) & 1];
992 long notfilm = 5*p->in_inc - p->out_dec;
993 unsigned long n = s->num_blocks >> 8;
994 unsigned long sad_comb_cmp = cmpe(s->sad.temp, s->sad.noise, 512, 1);
995 unsigned long ret = 8;
997 if (cmpe(s->sad.temp, s->sad.even, 512, 1) > 0)
998 mp_msg(MSGT_VFILTER, MSGL_WARN,
999 "@@@@@@@@ Bottom-first field??? @@@@@@@@\n");
1000 if (s->sad.temp > 1000 && s->sad.noise > 1000)
1001 return 3;
1002 if (s->interlaced_high >= 2*n && s->sad.temp > 256 && s->sad.noise > 256)
1003 return 3;
1004 if (s->high.noise > s->num_blocks/4 && s->sad.noise > 10000 &&
1005 s->sad.noise > 2*s->sad.even && s->sad.noise > 2*ps->sad.odd) {
1006 // Mid-frame scene change
1007 if (s->tiny.temp + s->interlaced_low < n ||
1008 s->low.temp + s->interlaced_high < n/4 ||
1009 s->high.temp + s->interlaced_high < n/8 ||
1010 s->sad.temp < 160)
1011 return 1;
1012 return 3;
1014 if (s->high.temp > s->num_blocks/4 && s->sad.temp > 10000 &&
1015 s->sad.temp > 2*ps->sad.odd && s->sad.temp > 2*ps->sad.even) {
1016 // Start frame scene change
1017 if (s->tiny.noise + s->interlaced_low < n ||
1018 s->low.noise + s->interlaced_high < n/4 ||
1019 s->high.noise + s->interlaced_high < n/8 ||
1020 s->sad.noise < 160)
1021 return 2;
1022 return 3;
1024 if (sad_comb_cmp == 2)
1025 return 2;
1026 if (sad_comb_cmp == -2)
1027 return 1;
1029 if (s->tiny.odd > 3*MAX(n,s->tiny.even) + s->interlaced_low)
1030 return 1;
1031 if (s->tiny.even > 3*MAX(n,s->tiny.odd)+s->interlaced_low &&
1032 (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4)))
1033 return 4;
1035 if (s->sad.noise < 64 && s->sad.temp < 64 &&
1036 s->low.noise <= n/2 && s->high.noise <= n/4 &&
1037 s->low.temp <= n/2 && s->high.temp <= n/4)
1038 goto still;
1040 if (s->tiny.temp > 3*MAX(n,s->tiny.noise) + s->interlaced_low)
1041 return 2;
1042 if (s->tiny.noise > 3*MAX(n,s->tiny.temp) + s->interlaced_low)
1043 return 1;
1045 if (s->low.odd > 3*MAX(n/4,s->low.even) + s->interlaced_high)
1046 return 1;
1047 if (s->low.even > 3*MAX(n/4,s->low.odd)+s->interlaced_high &&
1048 s->sad.even > 2*s->sad.odd &&
1049 (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4)))
1050 return 4;
1052 if (s->low.temp > 3*MAX(n/4,s->low.noise) + s->interlaced_high)
1053 return 2;
1054 if (s->low.noise > 3*MAX(n/4,s->low.temp) + s->interlaced_high)
1055 return 1;
1057 if (sad_comb_cmp == 1 && s->sad.noise < 64)
1058 return 2;
1059 if (sad_comb_cmp == -1 && s->sad.temp < 64)
1060 return 1;
1062 if (s->tiny.odd <= n || (s->tiny.noise <= n/2 && s->tiny.temp <= n/2)) {
1063 if (s->interlaced_low <= n) {
1064 if (p->num_fields == 1)
1065 goto still;
1066 if (s->tiny.even <= n || ps->tiny.noise <= n/2)
1067 /* Still frame */
1068 goto still;
1069 if (s->bigger.even >= 2*MAX(n,s->bigger.odd) + s->interlaced_low)
1070 return 4;
1071 if (s->low.even >= 2*n + s->interlaced_low)
1072 return 4;
1073 goto still;
1076 if (s->low.odd <= n/4) {
1077 if (s->interlaced_high <= n/4) {
1078 if (p->num_fields == 1)
1079 goto still;
1080 if (s->low.even <= n/4)
1081 /* Still frame */
1082 goto still;
1083 if (s->bigger.even >= 2*MAX(n/4,s->bigger.odd)+s->interlaced_high)
1084 return 4;
1085 if (s->low.even >= n/2 + s->interlaced_high)
1086 return 4;
1087 goto still;
1090 if (s->bigger.temp > 2*MAX(n,s->bigger.noise) + s->interlaced_low)
1091 return 2;
1092 if (s->bigger.noise > 2*MAX(n,s->bigger.temp) + s->interlaced_low)
1093 return 1;
1094 if (s->bigger.temp > 2*MAX(n,s->bigger.noise) + s->interlaced_high)
1095 return 2;
1096 if (s->bigger.noise > 2*MAX(n,s->bigger.temp) + s->interlaced_high)
1097 return 1;
1098 if (s->twox.temp > 2*MAX(n,s->twox.noise) + s->interlaced_high)
1099 return 2;
1100 if (s->twox.noise > 2*MAX(n,s->twox.temp) + s->interlaced_high)
1101 return 1;
1102 if (s->bigger.even > 2*MAX(n,s->bigger.odd) + s->interlaced_low &&
1103 s->bigger.temp < n && s->bigger.noise < n)
1104 return 4;
1105 if (s->interlaced_low > MIN(2*n, s->tiny.odd))
1106 return 3;
1107 ret = 8 + (1 << (s->sad.temp > s->sad.noise));
1108 still:
1109 if (p->num_fields == 1 && p->prev_fields == 3 && notfilm >= 0 &&
1110 (s->tiny.temp <= s->tiny.noise || s->sad.temp < s->sad.noise+16))
1111 return 1;
1112 if (p->notout < p->num_fields && p->iosync > 2*p->in_inc && notfilm < 0)
1113 notfilm = 0;
1114 if (p->num_fields < 2 ||
1115 (p->num_fields == 2 && p->prev_fields == 2 && notfilm < 0))
1116 return ret;
1117 if (!notfilm && (p->prev_fields&~1) == 2) {
1118 if (p->prev_fields + p->num_fields == 5) {
1119 if (s->tiny.noise <= s->tiny.temp ||
1120 s->low.noise == 0 || s->low.noise < s->low.temp ||
1121 s->sad.noise < s->sad.temp+16)
1122 return 2;
1124 if (p->prev_fields + p->num_fields == 4) {
1125 if (s->tiny.temp <= s->tiny.noise ||
1126 s->low.temp == 0 || s->low.temp < s->low.noise ||
1127 s->sad.temp < s->sad.noise+16)
1128 return 1;
1131 if (p->num_fields > 2 &&
1132 ps->sad.noise > s->sad.noise && ps->sad.noise > s->sad.temp)
1133 return 4;
1134 return 2 >> (s->sad.noise > s->sad.temp);
1137 #define ITOC(X) (!(X) ? ' ' : (X) + ((X)>9 ? 'a'-10 : '0'))
1139 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
1141 mp_image_t *dmpi;
1142 struct vf_priv_s *p = vf->priv;
1143 unsigned char **planes, **old_planes;
1144 struct frame_stats *s = &p->stats[p->inframes & 1];
1145 struct frame_stats *ps = &p->stats[(p->inframes-1) & 1];
1146 int swapped = 0;
1147 const int flags = mpi->fields;
1148 int breaks, prev;
1149 int show_fields = 0;
1150 int dropped_fields = 0;
1151 double start_time, diff_time;
1152 char prev_chflag = p->chflag;
1153 int keep_rate;
1155 if (!p->planes[0][0]) init(p, mpi);
1157 old_planes = p->old_planes;
1159 if ((mpi->flags & MP_IMGFLAG_DIRECT) && mpi->priv) {
1160 planes = mpi->priv;
1161 mpi->priv = 0;
1162 } else {
1163 planes = p->planes[2 + (++p->temp_idx & 1)];
1164 my_memcpy_pic(planes[0],
1165 mpi->planes[0] + p->crop_x + p->crop_y * mpi->stride[0],
1166 p->w, p->h, p->stride, mpi->stride[0]);
1167 if (mpi->flags & MP_IMGFLAG_PLANAR) {
1168 my_memcpy_pic(planes[1],
1169 mpi->planes[1] + p->crop_cx + p->crop_cy * mpi->stride[1],
1170 p->cw, p->ch, p->chroma_stride, mpi->stride[1]);
1171 my_memcpy_pic(planes[2],
1172 mpi->planes[2] + p->crop_cx + p->crop_cy * mpi->stride[2],
1173 p->cw, p->ch, p->chroma_stride, mpi->stride[2]);
1174 p->num_copies++;
1178 p->old_planes = planes;
1179 p->chflag = ';';
1180 if (flags & MP_IMGFIELD_ORDERED) {
1181 swapped = !(flags & MP_IMGFIELD_TOP_FIRST);
1182 p->chflag = (flags & MP_IMGFIELD_REPEAT_FIRST ? '|' :
1183 flags & MP_IMGFIELD_TOP_FIRST ? ':' : '.');
1185 p->swapped = swapped;
1187 start_time = get_time();
1188 if (p->chflag == '|') {
1189 *s = ppzs;
1190 p->iosync += p->in_inc;
1191 } else if ((p->fast & 1) && prev_chflag == '|')
1192 *s = pprs;
1193 else
1194 diff_fields(p, s, old_planes, planes);
1195 diff_time = get_time();
1196 p->diff_time += diff_time - start_time;
1197 breaks = p->inframes ? find_breaks(p, s) : 2;
1198 p->inframes++;
1199 keep_rate = 4*p->in_inc == p->out_dec;
1201 switch (breaks) {
1202 case 0:
1203 case 8:
1204 case 9:
1205 case 10:
1206 if (!keep_rate && p->notout < p->num_fields && p->iosync < 2*p->in_inc)
1207 break;
1208 if (p->notout < p->num_fields)
1209 dropped_fields = -2;
1210 case 4:
1211 if (keep_rate || p->iosync >= -2*p->in_inc)
1212 show_fields = (4<<p->num_fields)-1;
1213 break;
1214 case 3:
1215 if (keep_rate)
1216 show_fields = 2;
1217 else if (p->iosync > 0) {
1218 if (p->notout >= p->num_fields && p->iosync > 2*p->in_inc) {
1219 show_fields = 4; /* prev odd only */
1220 if (p->num_fields > 1)
1221 show_fields |= 8; /* + prev even */
1222 } else {
1223 show_fields = 2; /* even only */
1224 if (p->notout >= p->num_fields)
1225 dropped_fields += p->num_fields;
1228 break;
1229 case 2:
1230 if (p->iosync <= -3*p->in_inc) {
1231 if (p->notout >= p->num_fields)
1232 dropped_fields = p->num_fields;
1233 break;
1235 if (p->num_fields == 1) {
1236 int prevbreak = ps->sad.noise >= 128;
1237 if (p->iosync < 4*p->in_inc) {
1238 show_fields = 3;
1239 dropped_fields = prevbreak;
1240 } else {
1241 show_fields = 4 | (!prevbreak << 3);
1242 if (p->notout < 1 + p->prev_fields)
1243 dropped_fields = -!prevbreak;
1245 break;
1247 default:
1248 if (keep_rate)
1249 show_fields = 3 << (breaks & 1);
1250 else if (p->notout >= p->num_fields &&
1251 p->iosync >= (breaks == 1 ? -p->in_inc :
1252 p->in_inc << (p->num_fields == 1))) {
1253 show_fields = (1 << (2 + p->num_fields)) - (1<<breaks);
1254 } else {
1255 if (p->notout >= p->num_fields)
1256 dropped_fields += p->num_fields + 2 - breaks;
1257 if (breaks == 1) {
1258 if (p->iosync >= 4*p->in_inc)
1259 show_fields = 6;
1260 } else if (p->iosync > -3*p->in_inc)
1261 show_fields = 3; /* odd+even */
1263 break;
1266 show_fields &= 15;
1267 prev = p->prev_fields;
1268 if (breaks < 8) {
1269 if (p->num_fields == 1)
1270 breaks &= ~4;
1271 if (breaks)
1272 p->num_breaks++;
1273 if (breaks == 3)
1274 p->prev_fields = p->num_fields = 1;
1275 else if (breaks) {
1276 p->prev_fields = p->num_fields + (breaks==1) - (breaks==4);
1277 p->num_fields = breaks - (breaks == 4) + (p->chflag == '|');
1278 } else
1279 p->num_fields += 2;
1280 } else
1281 p->num_fields += 2;
1283 p->iosync += 4 * p->in_inc;
1284 if (p->chflag == '|')
1285 p->iosync += p->in_inc;
1287 if (show_fields) {
1288 p->iosync -= p->out_dec;
1289 p->notout = !(show_fields & 1) + !(show_fields & 3);
1290 if (((show_fields & 3) == 3 &&
1291 (s->low.noise + s->interlaced_low < (s->num_blocks>>8) ||
1292 s->sad.noise < 160)) ||
1293 ((show_fields & 12) == 12 &&
1294 (ps->low.noise + ps->interlaced_low < (s->num_blocks>>8) ||
1295 ps->sad.noise < 160))) {
1296 p->export_count++;
1297 dmpi = vf_get_image(vf->next, mpi->imgfmt, MP_IMGTYPE_EXPORT,
1298 MP_IMGFLAG_PRESERVE|MP_IMGFLAG_READABLE,
1299 p->w, p->h);
1300 if ((show_fields & 3) != 3) planes = old_planes;
1301 dmpi->planes[0] = planes[0];
1302 dmpi->stride[0] = p->stride;
1303 dmpi->width = mpi->width;
1304 if (mpi->flags & MP_IMGFLAG_PLANAR) {
1305 dmpi->planes[1] = planes[1];
1306 dmpi->planes[2] = planes[2];
1307 dmpi->stride[1] = p->chroma_stride;
1308 dmpi->stride[2] = p->chroma_stride;
1310 } else {
1311 p->merge_count++;
1312 dmpi = vf_get_image(vf->next, mpi->imgfmt,
1313 MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
1314 p->w, p->h);
1315 copy_merge_fields(p, dmpi, old_planes, planes, show_fields);
1317 p->outframes++;
1318 } else
1319 p->notout += 2;
1321 if (p->verbose)
1322 mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu %lu: %x %c %c %lu%s%s%c%s\n",
1323 p->inframes, p->outframes,
1324 breaks, breaks<8 && breaks>0 ? (int) p->prev_fields+'0' : ' ',
1325 ITOC(show_fields),
1326 p->num_breaks, 5*p->in_inc == p->out_dec && breaks<8 &&
1327 breaks>0 && ((prev&~1)!=2 || prev+p->prev_fields!=5) ?
1328 " ######## bad telecine ########" : "",
1329 dropped_fields ? " ======== dropped ":"", ITOC(dropped_fields),
1330 !show_fields || (show_fields & (show_fields-1)) ?
1331 "" : " @@@@@@@@@@@@@@@@@");
1333 p->merge_time += get_time() - diff_time;
1334 return show_fields ? vf_next_put_image(vf, dmpi, MP_NOPTS_VALUE) : 0;
1337 static int query_format(struct vf_instance *vf, unsigned int fmt)
1339 /* FIXME - support more formats */
1340 switch (fmt) {
1341 case IMGFMT_YV12:
1342 case IMGFMT_IYUV:
1343 case IMGFMT_I420:
1344 case IMGFMT_411P:
1345 case IMGFMT_422P:
1346 case IMGFMT_444P:
1347 return vf_next_query_format(vf, fmt);
1349 return 0;
1352 static int config(struct vf_instance *vf,
1353 int width, int height, int d_width, int d_height,
1354 unsigned int flags, unsigned int outfmt)
1356 struct MPOpts *opts = vf->opts;
1357 unsigned long cxm = 0;
1358 unsigned long cym = 0;
1359 struct vf_priv_s *p = vf->priv;
1360 // rounding:
1361 if(!IMGFMT_IS_RGB(outfmt) && !IMGFMT_IS_BGR(outfmt)){
1362 switch(outfmt){
1363 case IMGFMT_444P:
1364 case IMGFMT_Y800:
1365 case IMGFMT_Y8:
1366 break;
1367 case IMGFMT_YVU9:
1368 case IMGFMT_IF09:
1369 cym = 3;
1370 case IMGFMT_411P:
1371 cxm = 3;
1372 break;
1373 case IMGFMT_YV12:
1374 case IMGFMT_I420:
1375 case IMGFMT_IYUV:
1376 cym = 1;
1377 default:
1378 cxm = 1;
1381 p->chroma_swapped = !!(p->crop_y & (cym+1));
1382 if (p->w) p->w += p->crop_x & cxm;
1383 if (p->h) p->h += p->crop_y & cym;
1384 p->crop_x &= ~cxm;
1385 p->crop_y &= ~cym;
1386 if (!p->w || p->w > width ) p->w = width;
1387 if (!p->h || p->h > height) p->h = height;
1388 if (p->crop_x + p->w > width ) p->crop_x = 0;
1389 if (p->crop_y + p->h > height) p->crop_y = 0;
1391 if(!opts->screen_size_x && !opts->screen_size_y){
1392 d_width = d_width * p->w/width;
1393 d_height = d_height * p->h/height;
1395 return vf_next_config(vf, p->w, p->h, d_width, d_height, flags, outfmt);
1398 static void uninit(struct vf_instance *vf)
1400 struct vf_priv_s *p = vf->priv;
1401 mp_msg(MSGT_VFILTER, MSGL_INFO, "diff_time: %.3f, merge_time: %.3f, "
1402 "export: %lu, merge: %lu, copy: %lu\n", p->diff_time, p->merge_time,
1403 p->export_count, p->merge_count, p->num_copies);
1404 free(p->memory_allocated);
1405 free(p);
1408 static int vf_open(vf_instance_t *vf, char *args)
1410 struct vf_priv_s *p;
1411 vf->get_image = get_image;
1412 vf->put_image = put_image;
1413 vf->config = config;
1414 vf->query_format = query_format;
1415 vf->uninit = uninit;
1416 vf->default_reqs = VFCAP_ACCEPT_STRIDE;
1417 vf->priv = p = calloc(1, sizeof(struct vf_priv_s));
1418 p->out_dec = 5;
1419 p->in_inc = 4;
1420 p->thres.noise = 128;
1421 p->thres.even = 128;
1422 p->sad_thres = 64;
1423 p->dint_thres = 4;
1424 p->luma_only = 0;
1425 p->fast = 3;
1426 p->mmx2 = gCpuCaps.hasMMX2 ? 1 : gCpuCaps.has3DNow ? 2 : 0;
1427 if (args) {
1428 const char *args_remain = parse_args(p, args);
1429 if (args_remain) {
1430 mp_msg(MSGT_VFILTER, MSGL_FATAL,
1431 "filmdint: unknown suboption: %s\n", args_remain);
1432 return 0;
1434 if (p->out_dec < p->in_inc) {
1435 mp_msg(MSGT_VFILTER, MSGL_FATAL,
1436 "filmdint: increasing the frame rate is not supported\n");
1437 return 0;
1440 if (p->mmx2 > 2)
1441 p->mmx2 = 0;
1442 #if !HAVE_MMX
1443 p->mmx2 = 0;
1444 #endif
1445 #if !HAVE_AMD3DNOW
1446 p->mmx2 &= 1;
1447 #endif
1448 p->thres.odd = p->thres.even;
1449 p->thres.temp = p->thres.noise;
1450 p->diff_time = 0;
1451 p->merge_time = 0;
1452 return 1;
1455 const vf_info_t vf_info_filmdint = {
1456 "Advanced inverse telecine filer",
1457 "filmdint",
1458 "Zoltan Hidvegi",
1460 vf_open,
1461 NULL