Fix vf_tcdump's compilation
[mplayer/kovensky.git] / libmpcodecs / vf_filmdint.c
blob754308c714ea74c314a625cc220055c7cecd3808
1 /*
2 * This file is part of MPlayer.
4 * MPlayer is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * MPlayer is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <sys/time.h>
24 #include "config.h"
25 #include "mp_msg.h"
26 #include "cpudetect.h"
27 #include "options.h"
29 #include "img_format.h"
30 #include "mp_image.h"
31 #include "vf.h"
32 #include "cmmx.h"
34 #include "libvo/fastmemcpy.h"
36 #define NUM_STORED 4
38 enum pu_field_type_t {
39 PU_1ST_OF_3,
40 PU_2ND_OF_3,
41 PU_3RD_OF_3,
42 PU_1ST_OF_2,
43 PU_2ND_OF_2,
44 PU_INTERLACED
47 struct metrics {
48 /* This struct maps to a packed word 64-bit MMX register */
49 unsigned short int even;
50 unsigned short int odd;
51 unsigned short int noise;
52 unsigned short int temp;
53 } __attribute__ ((aligned (8)));
55 struct frame_stats {
56 struct metrics tiny, low, high, bigger, twox, max;
57 struct { unsigned int even, odd, noise, temp; } sad;
58 unsigned short interlaced_high;
59 unsigned short interlaced_low;
60 unsigned short num_blocks;
63 struct vf_priv_s {
64 unsigned long inframes;
65 unsigned long outframes;
66 enum pu_field_type_t prev_type;
67 unsigned swapped, chroma_swapped;
68 unsigned luma_only;
69 unsigned verbose;
70 unsigned fast;
71 unsigned long w, h, cw, ch, stride, chroma_stride, nplanes;
72 unsigned long sad_thres;
73 unsigned long dint_thres;
74 unsigned char *memory_allocated;
75 unsigned char *planes[2*NUM_STORED][4];
76 unsigned char **old_planes;
77 unsigned long static_idx;
78 unsigned long temp_idx;
79 unsigned long crop_x, crop_y, crop_cx, crop_cy;
80 unsigned long export_count, merge_count;
81 unsigned long num_breaks;
82 unsigned long num_copies;
83 long in_inc, out_dec, iosync;
84 long num_fields;
85 long prev_fields;
86 long notout;
87 long mmx2;
88 unsigned small_bytes[2];
89 unsigned mmx_temp[2];
90 struct frame_stats stats[2];
91 struct metrics thres;
92 char chflag;
93 double diff_time, merge_time, decode_time, vo_time, filter_time;
94 double prev_pts, prev_out_pts, drop_pts, undrop_pts, prev_diff;
95 int vfr_mode, drop_countdown;
98 #define PPZ { 2000, 2000, 0, 2000 }
99 #define PPR { 2000, 2000, 0, 2000 }
100 static const struct frame_stats ppzs = {PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,0,0,9999};
101 static const struct frame_stats pprs = {PPR,PPR,PPR,PPR,PPR,PPR,PPR,0,0,9999};
103 #ifndef MIN
104 #define MIN(a,b) (((a)<(b))?(a):(b))
105 #endif
106 #ifndef MAX
107 #define MAX(a,b) (((a)>(b))?(a):(b))
108 #endif
110 #define PDIFFUB(X,Y,T) "movq " #X "," #T "\n\t" \
111 "psubusb " #Y "," #T "\n\t" \
112 "psubusb " #X "," #Y "\n\t" \
113 "paddusb " #Y "," #T "\n\t"
115 #define PDIFFUBT(X,Y,T) "movq " #X "," #T "\n\t" \
116 "psubusb " #Y "," #T "\n\t" \
117 "psubusb " #X "," #Y "\n\t" \
118 "paddusb " #T "," #Y "\n\t"
120 #define PSUMBW(X,T,Z) "movq " #X "," #T "\n\t" \
121 "punpcklbw " #Z "," #X "\n\t" \
122 "punpckhbw " #Z "," #T "\n\t" \
123 "paddw " #T "," #X "\n\t" \
124 "movq " #X "," #T "\n\t" \
125 "psllq $32, " #T "\n\t" \
126 "paddw " #T "," #X "\n\t" \
127 "movq " #X "," #T "\n\t" \
128 "psllq $16, " #T "\n\t" \
129 "paddw " #T "," #X "\n\t" \
130 "psrlq $48, " #X "\n\t"
132 #define PSADBW(X,Y,T,Z) PDIFFUBT(X,Y,T) PSUMBW(Y,T,Z)
134 #define PMAXUB(X,Y) "psubusb " #X "," #Y "\n\tpaddusb " #X "," #Y "\n\t"
135 #define PMAXUW(X,Y) "psubusw " #X "," #Y "\n\tpaddusw " #X "," #Y "\n\t"
136 #define PMINUBT(X,Y,T) "movq " #Y "," #T "\n\t" \
137 "psubusb " #X "," #T "\n\t" \
138 "psubusb " #T "," #Y "\n\t"
139 #define PAVGB(X,Y) "pavgusb " #X "," #Y "\n\t"
141 static inline void
142 get_metrics_c(unsigned char *a, unsigned char *b, int as, int bs, int lines,
143 struct metrics *m)
145 a -= as;
146 b -= bs;
147 do {
148 cmmx_t old_po = *(cmmx_t*)(a );
149 cmmx_t po = *(cmmx_t*)(b );
150 cmmx_t e = *(cmmx_t*)(b + bs);
151 cmmx_t old_o = *(cmmx_t*)(a + 2*as);
152 cmmx_t o = *(cmmx_t*)(b + 2*bs);
153 cmmx_t ne = *(cmmx_t*)(b + 3*bs);
154 cmmx_t old_no = *(cmmx_t*)(a + 4*as);
155 cmmx_t no = *(cmmx_t*)(b + 4*bs);
157 cmmx_t qup_old_odd = p31avgb(old_o, old_po);
158 cmmx_t qup_odd = p31avgb( o, po);
159 cmmx_t qdown_old_odd = p31avgb(old_o, old_no);
160 cmmx_t qdown_odd = p31avgb( o, no);
162 cmmx_t qup_even = p31avgb(ne, e);
163 cmmx_t qdown_even = p31avgb(e, ne);
165 cmmx_t temp_up_diff = pdiffub(qdown_even, qup_old_odd);
166 cmmx_t noise_up_diff = pdiffub(qdown_even, qup_odd);
167 cmmx_t temp_down_diff = pdiffub(qup_even, qdown_old_odd);
168 cmmx_t noise_down_diff = pdiffub(qup_even, qdown_odd);
170 cmmx_t odd_diff = pdiffub(o, old_o);
171 m->odd += psumbw(odd_diff);
172 m->even += psadbw(e, *(cmmx_t*)(a+as));
174 temp_up_diff = pminub(temp_up_diff, temp_down_diff);
175 temp_up_diff = pminub(temp_up_diff, odd_diff);
176 m->temp += psumbw(temp_up_diff);
177 noise_up_diff = pminub(noise_up_diff, odd_diff);
178 noise_up_diff = pminub(noise_up_diff, noise_down_diff);
180 m->noise += psumbw(noise_up_diff);
181 a += 2*as;
182 b += 2*bs;
183 } while (--lines);
186 static inline void
187 get_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs,
188 int lines, struct metrics *m)
190 a -= as;
191 b -= bs;
192 do {
193 cmmx_t old_po = (*(cmmx_t*)(a ) >> 1) & ~SIGN_BITS;
194 cmmx_t po = (*(cmmx_t*)(b ) >> 1) & ~SIGN_BITS;
195 cmmx_t old_e = (*(cmmx_t*)(a + as) >> 1) & ~SIGN_BITS;
196 cmmx_t e = (*(cmmx_t*)(b + bs) >> 1) & ~SIGN_BITS;
197 cmmx_t old_o = (*(cmmx_t*)(a + 2*as) >> 1) & ~SIGN_BITS;
198 cmmx_t o = (*(cmmx_t*)(b + 2*bs) >> 1) & ~SIGN_BITS;
199 cmmx_t ne = (*(cmmx_t*)(b + 3*bs) >> 1) & ~SIGN_BITS;
200 cmmx_t old_no = (*(cmmx_t*)(a + 4*as) >> 1) & ~SIGN_BITS;
201 cmmx_t no = (*(cmmx_t*)(b + 4*bs) >> 1) & ~SIGN_BITS;
203 cmmx_t qup_old_odd = p31avgb_s(old_o, old_po);
204 cmmx_t qup_odd = p31avgb_s( o, po);
205 cmmx_t qdown_old_odd = p31avgb_s(old_o, old_no);
206 cmmx_t qdown_odd = p31avgb_s( o, no);
208 cmmx_t qup_even = p31avgb_s(ne, e);
209 cmmx_t qdown_even = p31avgb_s(e, ne);
211 cmmx_t temp_up_diff = pdiffub_s(qdown_even, qup_old_odd);
212 cmmx_t noise_up_diff = pdiffub_s(qdown_even, qup_odd);
213 cmmx_t temp_down_diff = pdiffub_s(qup_even, qdown_old_odd);
214 cmmx_t noise_down_diff = pdiffub_s(qup_even, qdown_odd);
216 cmmx_t odd_diff = pdiffub_s(o, old_o);
217 m->odd += psumbw_s(odd_diff) << 1;
218 m->even += psadbw_s(e, old_e) << 1;
220 temp_up_diff = pminub_s(temp_up_diff, temp_down_diff);
221 temp_up_diff = pminub_s(temp_up_diff, odd_diff);
222 m->temp += psumbw_s(temp_up_diff) << 1;
223 noise_up_diff = pminub_s(noise_up_diff, odd_diff);
224 noise_up_diff = pminub_s(noise_up_diff, noise_down_diff);
226 m->noise += psumbw_s(noise_up_diff) << 1;
227 a += 2*as;
228 b += 2*bs;
229 } while (--lines);
232 static inline void
233 get_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
234 int lines, struct metrics *m)
236 a -= as;
237 b -= bs;
238 do {
239 cmmx_t old_po = (*(cmmx_t*)(a )>>1) & ~SIGN_BITS;
240 cmmx_t po = (*(cmmx_t*)(b )>>1) & ~SIGN_BITS;
241 cmmx_t old_e = (*(cmmx_t*)(a + as)>>1) & ~SIGN_BITS;
242 cmmx_t e = (*(cmmx_t*)(b + bs)>>1) & ~SIGN_BITS;
243 cmmx_t old_o = (*(cmmx_t*)(a + 2*as)>>1) & ~SIGN_BITS;
244 cmmx_t o = (*(cmmx_t*)(b + 2*bs)>>1) & ~SIGN_BITS;
245 cmmx_t ne = (*(cmmx_t*)(b + 3*bs)>>1) & ~SIGN_BITS;
247 cmmx_t down_even = p31avgb_s(e, ne);
248 cmmx_t up_odd = p31avgb_s(o, po);
249 cmmx_t up_old_odd = p31avgb_s(old_o, old_po);
251 cmmx_t odd_diff = pdiffub_s(o, old_o);
252 cmmx_t temp_diff = pdiffub_s(down_even, up_old_odd);
253 cmmx_t noise_diff = pdiffub_s(down_even, up_odd);
255 m->even += psadbw_s(e, old_e) << 1;
256 m->odd += psumbw_s(odd_diff) << 1;
258 temp_diff = pminub_s(temp_diff, odd_diff);
259 noise_diff = pminub_s(noise_diff, odd_diff);
261 m->noise += psumbw_s(noise_diff) << 1;
262 m->temp += psumbw_s(temp_diff) << 1;
263 a += 2*as;
264 b += 2*bs;
265 } while (--lines);
269 static inline void
270 get_block_stats(struct metrics *m, struct vf_priv_s *p, struct frame_stats *s)
272 unsigned two_e = m->even + MAX(m->even , p->thres.even );
273 unsigned two_o = m->odd + MAX(m->odd , p->thres.odd );
274 unsigned two_n = m->noise + MAX(m->noise, p->thres.noise);
275 unsigned two_t = m->temp + MAX(m->temp , p->thres.temp );
277 unsigned e_big = m->even >= (m->odd + two_o + 1)/2;
278 unsigned o_big = m->odd >= (m->even + two_e + 1)/2;
279 unsigned n_big = m->noise >= (m->temp + two_t + 1)/2;
280 unsigned t_big = m->temp >= (m->noise + two_n + 1)/2;
282 unsigned e2x = m->even >= two_o;
283 unsigned o2x = m->odd >= two_e;
284 unsigned n2x = m->noise >= two_t;
285 unsigned t2x = m->temp >= two_n;
287 unsigned ntiny_e = m->even > p->thres.even ;
288 unsigned ntiny_o = m->odd > p->thres.odd ;
289 unsigned ntiny_n = m->noise > p->thres.noise;
290 unsigned ntiny_t = m->temp > p->thres.temp ;
292 unsigned nlow_e = m->even > 2*p->thres.even ;
293 unsigned nlow_o = m->odd > 2*p->thres.odd ;
294 unsigned nlow_n = m->noise > 2*p->thres.noise;
295 unsigned nlow_t = m->temp > 2*p->thres.temp ;
297 unsigned high_e = m->even > 4*p->thres.even ;
298 unsigned high_o = m->odd > 4*p->thres.odd ;
299 unsigned high_n = m->noise > 4*p->thres.noise;
300 unsigned high_t = m->temp > 4*p->thres.temp ;
302 unsigned low_il = !n_big && !t_big && ntiny_n && ntiny_t;
303 unsigned high_il = !n_big && !t_big && nlow_n && nlow_t;
305 if (low_il | high_il) {
306 s->interlaced_low += low_il;
307 s->interlaced_high += high_il;
308 } else {
309 s->tiny.even += ntiny_e;
310 s->tiny.odd += ntiny_o;
311 s->tiny.noise += ntiny_n;
312 s->tiny.temp += ntiny_t;
314 s->low .even += nlow_e ;
315 s->low .odd += nlow_o ;
316 s->low .noise += nlow_n ;
317 s->low .temp += nlow_t ;
319 s->high.even += high_e ;
320 s->high.odd += high_o ;
321 s->high.noise += high_n ;
322 s->high.temp += high_t ;
324 if (m->even >= p->sad_thres) s->sad.even += m->even ;
325 if (m->odd >= p->sad_thres) s->sad.odd += m->odd ;
326 if (m->noise >= p->sad_thres) s->sad.noise += m->noise;
327 if (m->temp >= p->sad_thres) s->sad.temp += m->temp ;
329 s->num_blocks++;
330 s->max.even = MAX(s->max.even , m->even );
331 s->max.odd = MAX(s->max.odd , m->odd );
332 s->max.noise = MAX(s->max.noise, m->noise);
333 s->max.temp = MAX(s->max.temp , m->temp );
335 s->bigger.even += e_big ;
336 s->bigger.odd += o_big ;
337 s->bigger.noise += n_big ;
338 s->bigger.temp += t_big ;
340 s->twox.even += e2x ;
341 s->twox.odd += o2x ;
342 s->twox.noise += n2x ;
343 s->twox.temp += t2x ;
347 static inline struct metrics
348 block_metrics_c(unsigned char *a, unsigned char *b, int as, int bs,
349 int lines, struct vf_priv_s *p, struct frame_stats *s)
351 struct metrics tm;
352 tm.even = tm.odd = tm.noise = tm.temp = 0;
353 get_metrics_c(a, b, as, bs, lines, &tm);
354 if (sizeof(cmmx_t) < 8)
355 get_metrics_c(a+4, b+4, as, bs, lines, &tm);
356 get_block_stats(&tm, p, s);
357 return tm;
360 static inline struct metrics
361 block_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs,
362 int lines, struct vf_priv_s *p, struct frame_stats *s)
364 struct metrics tm;
365 tm.even = tm.odd = tm.noise = tm.temp = 0;
366 get_metrics_fast_c(a, b, as, bs, lines, &tm);
367 if (sizeof(cmmx_t) < 8)
368 get_metrics_fast_c(a+4, b+4, as, bs, lines, &tm);
369 get_block_stats(&tm, p, s);
370 return tm;
373 static inline struct metrics
374 block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
375 int lines, struct vf_priv_s *p, struct frame_stats *s)
377 struct metrics tm;
378 tm.even = tm.odd = tm.noise = tm.temp = 0;
379 get_metrics_faster_c(a, b, as, bs, lines, &tm);
380 if (sizeof(cmmx_t) < 8)
381 get_metrics_faster_c(a+4, b+4, as, bs, lines, &tm);
382 get_block_stats(&tm, p, s);
383 return tm;
386 #define MEQ(X,Y) ((X).even == (Y).even && (X).odd == (Y).odd && (X).temp == (Y).temp && (X).noise == (Y).noise)
388 #define BLOCK_METRICS_TEMPLATE() \
389 __asm__ volatile("pxor %mm7, %mm7\n\t" /* The result is colleted in mm7 */ \
390 "pxor %mm6, %mm6\n\t" /* Temp to stay at 0 */ \
391 ); \
392 a -= as; \
393 b -= bs; \
394 do { \
395 __asm__ volatile( \
396 "movq (%0,%2), %%mm0\n\t" \
397 "movq (%1,%3), %%mm1\n\t" /* mm1 = even */ \
398 PSADBW(%%mm1, %%mm0, %%mm4, %%mm6) \
399 "paddusw %%mm0, %%mm7\n\t" /* even diff */ \
400 "movq (%0,%2,2), %%mm0\n\t" /* mm0 = old odd */ \
401 "movq (%1,%3,2), %%mm2\n\t" /* mm2 = odd */ \
402 "movq (%0), %%mm3\n\t" \
403 "psubusb %4, %%mm3\n\t" \
404 PAVGB(%%mm0, %%mm3) \
405 PAVGB(%%mm0, %%mm3) /* mm3 = qup old odd */ \
406 "movq %%mm0, %%mm5\n\t" \
407 PSADBW(%%mm2, %%mm0, %%mm4, %%mm6) \
408 "psllq $16, %%mm0\n\t" \
409 "paddusw %%mm0, %%mm7\n\t" \
410 "movq (%1), %%mm4\n\t" \
411 "lea (%0,%2,2), %0\n\t" \
412 "lea (%1,%3,2), %1\n\t" \
413 "psubusb %4, %%mm4\n\t" \
414 PAVGB(%%mm2, %%mm4) \
415 PAVGB(%%mm2, %%mm4) /* mm4 = qup odd */ \
416 PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 =abs(oldodd-odd) */ \
417 "movq (%1,%3), %%mm5\n\t" \
418 "psubusb %4, %%mm5\n\t" \
419 PAVGB(%%mm1, %%mm5) \
420 PAVGB(%%mm5, %%mm1) /* mm1 = qdown even */ \
421 PAVGB((%1,%3), %%mm5) /* mm5 = qup next even */ \
422 PDIFFUBT(%%mm1, %%mm3, %%mm0) /* mm3 = abs(qupoldo-qde) */ \
423 PDIFFUBT(%%mm1, %%mm4, %%mm0) /* mm4 = abs(qupodd-qde) */ \
424 PMINUBT(%%mm2, %%mm3, %%mm0) /* limit temp to odd diff */ \
425 PMINUBT(%%mm2, %%mm4, %%mm0) /* limit noise to odd diff */ \
426 "movq (%1,%3,2), %%mm2\n\t" \
427 "psubusb %4, %%mm2\n\t" \
428 PAVGB((%1), %%mm2) \
429 PAVGB((%1), %%mm2) /* mm2 = qdown odd */ \
430 "movq (%0,%2,2), %%mm1\n\t" \
431 "psubusb %4, %%mm1\n\t" \
432 PAVGB((%0), %%mm1) \
433 PAVGB((%0), %%mm1) /* mm1 = qdown old odd */ \
434 PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 = abs(qdo-qune) */ \
435 PDIFFUBT(%%mm5, %%mm1, %%mm0) /* mm1 = abs(qdoo-qune) */ \
436 PMINUBT(%%mm4, %%mm2, %%mm0) /* current */ \
437 PMINUBT(%%mm3, %%mm1, %%mm0) /* old */ \
438 PSUMBW(%%mm2, %%mm0, %%mm6) \
439 PSUMBW(%%mm1, %%mm0, %%mm6) \
440 "psllq $32, %%mm2\n\t" \
441 "psllq $48, %%mm1\n\t" \
442 "paddusw %%mm2, %%mm7\n\t" \
443 "paddusw %%mm1, %%mm7\n\t" \
444 : "=r" (a), "=r" (b) \
445 : "r"((x86_reg)as), "r"((x86_reg)bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \
446 ); \
447 } while (--lines);
449 static inline struct metrics
450 block_metrics_3dnow(unsigned char *a, unsigned char *b, int as, int bs,
451 int lines, struct vf_priv_s *p, struct frame_stats *s)
453 struct metrics tm;
454 #if !HAVE_AMD3DNOW
455 mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_3dnow: internal error\n");
456 #else
457 static const unsigned long long ones = 0x0101010101010101ull;
459 BLOCK_METRICS_TEMPLATE();
460 __asm__ volatile("movq %%mm7, %0\n\temms" : "=m" (tm));
461 get_block_stats(&tm, p, s);
462 #endif
463 return tm;
466 #undef PSUMBW
467 #undef PSADBW
468 #undef PMAXUB
469 #undef PMINUBT
470 #undef PAVGB
472 #define PSUMBW(X,T,Z) "psadbw " #Z "," #X "\n\t"
473 #define PSADBW(X,Y,T,Z) "psadbw " #X "," #Y "\n\t"
474 #define PMAXUB(X,Y) "pmaxub " #X "," #Y "\n\t"
475 #define PMINUBT(X,Y,T) "pminub " #X "," #Y "\n\t"
476 #define PAVGB(X,Y) "pavgb " #X "," #Y "\n\t"
478 static inline struct metrics
479 block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
480 int lines, struct vf_priv_s *p, struct frame_stats *s)
482 struct metrics tm;
483 #if !HAVE_MMX
484 mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_mmx2: internal error\n");
485 #else
486 static const unsigned long long ones = 0x0101010101010101ull;
487 x86_reg interlaced;
488 x86_reg prefetch_line = (((long)a>>3) & 7) + 10;
489 #ifdef DEBUG
490 struct frame_stats ts = *s;
491 #endif
492 __asm__ volatile("prefetcht0 (%0,%2)\n\t"
493 "prefetcht0 (%1,%3)\n\t" :
494 : "r" (a), "r" (b),
495 "r" (prefetch_line * as), "r" (prefetch_line * bs));
497 BLOCK_METRICS_TEMPLATE();
499 s->num_blocks++;
500 __asm__ volatile(
501 "movq %3, %%mm0\n\t"
502 "movq %%mm7, %%mm1\n\t"
503 "psubusw %%mm0, %%mm1\n\t"
504 "movq %%mm1, %%mm2\n\t"
505 "paddusw %%mm0, %%mm2\n\t"
506 "paddusw %%mm7, %%mm2\n\t"
507 "pshufw $0xb1, %%mm2, %%mm3\n\t"
508 "pavgw %%mm7, %%mm2\n\t"
509 "pshufw $0xb1, %%mm2, %%mm2\n\t"
510 "psubusw %%mm7, %%mm2\n\t"
511 "pcmpeqw %%mm6, %%mm2\n\t" /* 1 if >= 1.5x */
512 "psubusw %%mm7, %%mm3\n\t"
513 "pcmpeqw %%mm6, %%mm3\n\t" /* 1 if >= 2x */
514 "movq %1, %%mm4\n\t"
515 "movq %2, %%mm5\n\t"
516 "psubw %%mm2, %%mm4\n\t"
517 "psubw %%mm3, %%mm5\n\t"
518 "movq %%mm4, %1\n\t"
519 "movq %%mm5, %2\n\t"
520 "pxor %%mm4, %%mm4\n\t"
521 "pcmpeqw %%mm1, %%mm4\n\t" /* 1 if <= t */
522 "psubusw %%mm0, %%mm1\n\t"
523 "pxor %%mm5, %%mm5\n\t"
524 "pcmpeqw %%mm1, %%mm5\n\t" /* 1 if <= 2t */
525 "psubusw %%mm0, %%mm1\n\t"
526 "psubusw %%mm0, %%mm1\n\t"
527 "pcmpeqw %%mm6, %%mm1\n\t" /* 1 if <= 4t */
528 "pshufw $0xb1, %%mm2, %%mm0\n\t"
529 "por %%mm2, %%mm0\n\t" /* 1 if not close */
530 "punpckhdq %%mm0, %%mm0\n\t"
531 "movq %%mm4, %%mm2\n\t" /* tttt */
532 "punpckhdq %%mm5, %%mm2\n\t" /* ttll */
533 "por %%mm2, %%mm0\n\t"
534 "pcmpeqd %%mm6, %%mm0\n\t" /* close && big */
535 "psrlq $16, %%mm0\n\t"
536 "psrlw $15, %%mm0\n\t"
537 "movd %%mm0, %0\n\t"
538 : "=r" (interlaced), "=m" (s->bigger), "=m" (s->twox)
539 : "m" (p->thres)
542 if (interlaced) {
543 s->interlaced_high += interlaced >> 16;
544 s->interlaced_low += interlaced;
545 } else {
546 __asm__ volatile(
547 "pcmpeqw %%mm0, %%mm0\n\t" /* -1 */
548 "psubw %%mm0, %%mm4\n\t"
549 "psubw %%mm0, %%mm5\n\t"
550 "psubw %%mm0, %%mm1\n\t"
551 "paddw %0, %%mm4\n\t"
552 "paddw %1, %%mm5\n\t"
553 "paddw %2, %%mm1\n\t"
554 "movq %%mm4, %0\n\t"
555 "movq %%mm5, %1\n\t"
556 "movq %%mm1, %2\n\t"
557 : "=m" (s->tiny), "=m" (s->low), "=m" (s->high)
560 __asm__ volatile(
561 "pshufw $0, %2, %%mm0\n\t"
562 "psubusw %%mm7, %%mm0\n\t"
563 "pcmpeqw %%mm6, %%mm0\n\t" /* 0 if below sad_thres */
564 "pand %%mm7, %%mm0\n\t"
565 "movq %%mm0, %%mm1\n\t"
566 "punpcklwd %%mm6, %%mm0\n\t" /* sad even, odd */
567 "punpckhwd %%mm6, %%mm1\n\t" /* sad noise, temp */
568 "paddd %0, %%mm0\n\t"
569 "paddd %1, %%mm1\n\t"
570 "movq %%mm0, %0\n\t"
571 "movq %%mm1, %1\n\t"
572 : "=m" (s->sad.even), "=m" (s->sad.noise)
573 : "m" (p->sad_thres)
577 __asm__ volatile(
578 "movq %%mm7, (%1)\n\t"
579 PMAXUW((%0), %%mm7)
580 "movq %%mm7, (%0)\n\t"
581 "emms"
582 : : "r" (&s->max), "r" (&tm), "X" (s->max)
583 : "memory"
585 #ifdef DEBUG
586 if (1) {
587 struct metrics cm;
588 a -= 7*as;
589 b -= 7*bs;
590 cm = block_metrics_c(a, b, as, bs, 4, p, &ts);
591 if (!MEQ(tm, cm))
592 mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad metrics\n");
593 if (s) {
594 # define CHECK(X) if (!MEQ(s->X, ts.X)) \
595 mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad " #X "\n");
596 CHECK(tiny);
597 CHECK(low);
598 CHECK(high);
599 CHECK(sad);
600 CHECK(max);
603 #endif
604 #endif
605 return tm;
608 static inline int
609 dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
610 long cos, int ds, int ss, int w, int t)
612 #if !HAVE_MMX
613 mp_msg(MSGT_VFILTER, MSGL_FATAL, "dint_copy_line_mmx2: internal error\n");
614 return 0;
615 #else
616 unsigned long len = (w+7) >> 3;
617 int ret;
618 __asm__ volatile (
619 "pxor %%mm6, %%mm6 \n\t" /* deinterlaced pixel counter */
620 "movd %0, %%mm7 \n\t"
621 "punpcklbw %%mm7, %%mm7 \n\t"
622 "punpcklwd %%mm7, %%mm7 \n\t"
623 "punpckldq %%mm7, %%mm7 \n\t" /* mm7 = threshold */
624 : /* no output */
625 : "rm" (t)
627 do {
628 __asm__ volatile (
629 "movq (%0), %%mm0\n\t"
630 "movq (%0,%3,2), %%mm1\n\t"
631 "movq %%mm0, (%2)\n\t"
632 "pmaxub %%mm1, %%mm0\n\t"
633 "pavgb (%0), %%mm1\n\t"
634 "psubusb %%mm1, %%mm0\n\t"
635 "paddusb %%mm7, %%mm0\n\t" /* mm0 = max-avg+thr */
636 "movq (%0,%1), %%mm2\n\t"
637 "movq (%0,%5), %%mm3\n\t"
638 "movq %%mm2, %%mm4\n\t"
639 PDIFFUBT(%%mm1, %%mm2, %%mm5)
640 PDIFFUBT(%%mm1, %%mm3, %%mm5)
641 "pminub %%mm2, %%mm3\n\t"
642 "pcmpeqb %%mm3, %%mm2\n\t" /* b = min */
643 "pand %%mm2, %%mm4\n\t"
644 "pandn (%0,%5), %%mm2\n\t"
645 "por %%mm4, %%mm2\n\t"
646 "pminub %%mm0, %%mm3\n\t"
647 "pcmpeqb %%mm0, %%mm3\n\t" /* set to 1s if >= threshold */
648 "psubb %%mm3, %%mm6\n\t" /* count pixels above thr. */
649 "pand %%mm3, %%mm1 \n\t"
650 "pandn %%mm2, %%mm3 \n\t"
651 "por %%mm3, %%mm1 \n\t" /* avg if >= threshold */
652 "movq %%mm1, (%2,%4) \n\t"
653 : /* no output */
654 : "r" (a), "r" ((x86_reg)bos), "r" ((x86_reg)dst), "r" ((x86_reg)ss), "r" ((x86_reg)ds), "r" ((x86_reg)cos)
656 a += 8;
657 dst += 8;
658 } while (--len);
660 __asm__ volatile ("pxor %%mm7, %%mm7 \n\t"
661 "psadbw %%mm6, %%mm7 \n\t"
662 "movd %%mm7, %0 \n\t"
663 "emms \n\t"
664 : "=r" (ret)
666 return ret;
667 #endif
670 static inline int
671 dint_copy_line(unsigned char *dst, unsigned char *a, long bos,
672 long cos, int ds, int ss, int w, int t)
674 unsigned long len = ((unsigned long)w+sizeof(cmmx_t)-1) / sizeof(cmmx_t);
675 cmmx_t dint_count = 0;
676 cmmx_t thr;
677 t |= t << 8;
678 thr = t | (t << 16);
679 if (sizeof(cmmx_t) > 4)
680 thr |= thr << (sizeof(cmmx_t)*4);
681 do {
682 cmmx_t e = *(cmmx_t*)a;
683 cmmx_t ne = *(cmmx_t*)(a+2*ss);
684 cmmx_t o = *(cmmx_t*)(a+bos);
685 cmmx_t oo = *(cmmx_t*)(a+cos);
686 cmmx_t maxe = pmaxub(e, ne);
687 cmmx_t avge = pavgb(e, ne);
688 cmmx_t max_diff = maxe - avge + thr; /* 0<=max-avg<128, thr<128 */
689 cmmx_t diffo = pdiffub(avge, o);
690 cmmx_t diffoo = pdiffub(avge, oo);
691 cmmx_t diffcmp = pcmpgtub(diffo, diffoo);
692 cmmx_t bo = ((oo ^ o) & diffcmp) ^ o;
693 cmmx_t diffbo = ((diffoo ^ diffo) & diffcmp) ^ diffo;
694 cmmx_t above_thr = ~pcmpgtub(max_diff, diffbo);
695 cmmx_t bo_or_avg = ((avge ^ bo) & above_thr) ^ bo;
696 dint_count += above_thr & ONE_BYTES;
697 *(cmmx_t*)(dst) = e;
698 *(cmmx_t*)(dst+ds) = bo_or_avg;
699 a += sizeof(cmmx_t);
700 dst += sizeof(cmmx_t);
701 } while (--len);
702 return psumbw(dint_count);
705 static int
706 dint_copy_plane(unsigned char *d, unsigned char *a, unsigned char *b,
707 unsigned char *c, unsigned long w, unsigned long h,
708 unsigned long ds, unsigned long ss, unsigned long threshold,
709 long field, long mmx2)
711 unsigned long ret = 0;
712 long bos = b - a;
713 long cos = c - a;
714 if (field) {
715 fast_memcpy(d, b, w);
716 h--;
717 d += ds;
718 a += ss;
720 bos += ss;
721 cos += ss;
722 while (h > 2) {
723 if (threshold >= 128) {
724 fast_memcpy(d, a, w);
725 fast_memcpy(d+ds, a+bos, w);
726 } else if (mmx2 == 1) {
727 ret += dint_copy_line_mmx2(d, a, bos, cos, ds, ss, w, threshold);
728 } else
729 ret += dint_copy_line(d, a, bos, cos, ds, ss, w, threshold);
730 h -= 2;
731 d += 2*ds;
732 a += 2*ss;
734 fast_memcpy(d, a, w);
735 if (h == 2)
736 fast_memcpy(d+ds, a+bos, w);
737 return ret;
740 static void
741 copy_merge_fields(struct vf_priv_s *p, mp_image_t *dmpi,
742 unsigned char **old, unsigned char **new, unsigned long show)
744 unsigned long threshold = 256;
745 unsigned long field = p->swapped;
746 unsigned long dint_pixels = 0;
747 unsigned char **other = old;
748 if (show >= 12 || !(show & 3))
749 show >>= 2, other = new, new = old;
750 if (show <= 2) { /* Single field: de-interlace */
751 threshold = p->dint_thres;
752 field ^= show & 1;
753 old = new;
754 } else if (show == 3)
755 old = new;
756 else
757 field ^= 1;
758 dint_pixels +=dint_copy_plane(dmpi->planes[0], old[0], new[0],
759 other[0], p->w, p->h, dmpi->stride[0],
760 p->stride, threshold, field, p->mmx2);
761 if (dmpi->flags & MP_IMGFLAG_PLANAR) {
762 if (p->luma_only)
763 old = new, other = new;
764 else
765 threshold = threshold/2 + 1;
766 field ^= p->chroma_swapped;
767 dint_copy_plane(dmpi->planes[1], old[1], new[1],
768 other[1], p->cw, p->ch, dmpi->stride[1],
769 p->chroma_stride, threshold, field, p->mmx2);
770 dint_copy_plane(dmpi->planes[2], old[2], new[2],
771 other[2], p->cw, p->ch, dmpi->stride[2],
772 p->chroma_stride, threshold, field, p->mmx2);
774 if (dint_pixels > 0 && p->verbose)
775 mp_msg(MSGT_VFILTER,MSGL_INFO,"Deinterlaced %lu pixels\n",dint_pixels);
778 static void diff_planes(struct vf_priv_s *p, struct frame_stats *s,
779 unsigned char *of, unsigned char *nf,
780 int w, int h, int os, int ns, int swapped)
782 int i, y;
783 int align = -(long)nf & 7;
784 of += align;
785 nf += align;
786 w -= align;
787 if (swapped)
788 of -= os, nf -= ns;
789 i = (h*3 >> 7) & ~1;
790 of += i*os + 8;
791 nf += i*ns + 8;
792 h -= i;
793 w -= 16;
795 memset(s, 0, sizeof(*s));
797 for (y = (h-8) >> 3; y; y--) {
798 if (p->mmx2 == 1) {
799 for (i = 0; i < w; i += 8)
800 block_metrics_mmx2(of+i, nf+i, os, ns, 4, p, s);
801 } else if (p->mmx2 == 2) {
802 for (i = 0; i < w; i += 8)
803 block_metrics_3dnow(of+i, nf+i, os, ns, 4, p, s);
804 } else if (p->fast > 3) {
805 for (i = 0; i < w; i += 8)
806 block_metrics_faster_c(of+i, nf+i, os, ns, 4, p, s);
807 } else if (p->fast > 1) {
808 for (i = 0; i < w; i += 8)
809 block_metrics_fast_c(of+i, nf+i, os, ns, 4, p, s);
810 } else {
811 for (i = 0; i < w; i += 8)
812 block_metrics_c(of+i, nf+i, os, ns, 4, p, s);
814 of += 8*os;
815 nf += 8*ns;
819 #define METRICS(X) (X).even, (X).odd, (X).noise, (X).temp
821 static void diff_fields(struct vf_priv_s *p, struct frame_stats *s,
822 unsigned char **old, unsigned char **new)
824 diff_planes(p, s, old[0], new[0], p->w, p->h,
825 p->stride, p->stride, p->swapped);
826 s->sad.even = (s->sad.even * 16ul) / s->num_blocks;
827 s->sad.odd = (s->sad.odd * 16ul) / s->num_blocks;
828 s->sad.noise = (s->sad.noise * 16ul) / s->num_blocks;
829 s->sad.temp = (s->sad.temp * 16ul) / s->num_blocks;
830 if (p->verbose)
831 mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu%c M:%d/%d/%d/%d - %d, "
832 "t:%d/%d/%d/%d, l:%d/%d/%d/%d, h:%d/%d/%d/%d, bg:%d/%d/%d/%d, "
833 "2x:%d/%d/%d/%d, sad:%d/%d/%d/%d, lil:%d, hil:%d, ios:%.1f\n",
834 p->inframes, p->chflag, METRICS(s->max), s->num_blocks,
835 METRICS(s->tiny), METRICS(s->low), METRICS(s->high),
836 METRICS(s->bigger), METRICS(s->twox), METRICS(s->sad),
837 s->interlaced_low, s->interlaced_high,
838 p->iosync / (double) p->in_inc);
841 static const char *parse_args(struct vf_priv_s *p, const char *args)
843 args--;
844 while (args && *++args &&
845 (sscanf(args, "io=%lu:%lu", &p->out_dec, &p->in_inc) == 2 ||
846 sscanf(args, "diff_thres=%hu", &p->thres.even ) == 1 ||
847 sscanf(args, "comb_thres=%hu", &p->thres.noise) == 1 ||
848 sscanf(args, "sad_thres=%lu", &p->sad_thres ) == 1 ||
849 sscanf(args, "dint_thres=%lu", &p->dint_thres ) == 1 ||
850 sscanf(args, "fast=%u", &p->fast ) == 1 ||
851 sscanf(args, "mmx2=%lu", &p->mmx2 ) == 1 ||
852 sscanf(args, "luma_only=%u", &p->luma_only ) == 1 ||
853 sscanf(args, "verbose=%u", &p->verbose ) == 1 ||
854 sscanf(args, "vfr=%u", &p->vfr_mode ) == 1 ||
855 sscanf(args, "crop=%lu:%lu:%lu:%lu", &p->w,
856 &p->h, &p->crop_x, &p->crop_y) == 4))
857 args = strchr(args, '/');
858 return args;
861 static unsigned long gcd(unsigned long x, unsigned long y)
863 unsigned long t;
864 if (x > y)
865 t = x, x = y, y = t;
867 while (x) {
868 t = y % x;
869 y = x;
870 x = t;
872 return y;
875 static void init(struct vf_priv_s *p, mp_image_t *mpi)
877 unsigned long i;
878 unsigned long plane_size, chroma_plane_size;
879 unsigned char *plane;
880 unsigned long cos, los;
881 p->crop_cx = p->crop_x >> mpi->chroma_x_shift;
882 p->crop_cy = p->crop_y >> mpi->chroma_y_shift;
883 if (mpi->flags & MP_IMGFLAG_ACCEPT_STRIDE) {
884 p->stride = (mpi->w + 15) & ~15;
885 p->chroma_stride = p->stride >> mpi->chroma_x_shift;
886 } else {
887 p->stride = mpi->width;
888 p->chroma_stride = mpi->chroma_width;
890 p->cw = p->w >> mpi->chroma_x_shift;
891 p->ch = p->h >> mpi->chroma_y_shift;
892 p->nplanes = 1;
893 p->static_idx = 0;
894 p->temp_idx = 0;
895 p->old_planes = p->planes[0];
896 plane_size = mpi->h * p->stride;
897 chroma_plane_size = mpi->flags & MP_IMGFLAG_PLANAR ?
898 mpi->chroma_height * p->chroma_stride : 0;
899 p->memory_allocated =
900 malloc(NUM_STORED * (plane_size+2*chroma_plane_size) +
901 8*p->chroma_stride + 4096);
902 /* align to page boundary */
903 plane = p->memory_allocated + (-(long)p->memory_allocated & 4095);
904 memset(plane, 0, NUM_STORED * plane_size);
905 los = p->crop_x + p->crop_y * p->stride;
906 cos = p->crop_cx + p->crop_cy * p->chroma_stride;
907 for (i = 0; i != NUM_STORED; i++, plane += plane_size) {
908 p->planes[i][0] = plane;
909 p->planes[NUM_STORED + i][0] = plane + los;
911 if (mpi->flags & MP_IMGFLAG_PLANAR) {
912 p->nplanes = 3;
913 memset(plane, 0x80, NUM_STORED * 2 * chroma_plane_size);
914 for (i = 0; i != NUM_STORED; i++) {
915 p->planes[i][1] = plane;
916 p->planes[NUM_STORED + i][1] = plane + cos;
917 plane += chroma_plane_size;
918 p->planes[i][2] = plane;
919 p->planes[NUM_STORED + i][2] = plane + cos;
920 plane += chroma_plane_size;
923 p->out_dec <<= 2;
924 i = gcd(p->in_inc, p->out_dec);
925 p->in_inc /= i;
926 p->out_dec /= i;
927 p->iosync = 0;
928 p->num_fields = 3;
931 static inline double get_time(void)
933 struct timeval tv;
934 gettimeofday(&tv, 0);
935 return tv.tv_sec + tv.tv_usec * 1e-6;
938 static void get_image(struct vf_instance* vf, mp_image_t *mpi)
940 struct vf_priv_s *p = vf->priv;
941 static unsigned char **planes, planes_idx;
943 if (mpi->type == MP_IMGTYPE_STATIC) return;
945 if (!p->planes[0][0]) init(p, mpi);
947 if (mpi->type == MP_IMGTYPE_TEMP ||
948 (mpi->type == MP_IMGTYPE_IPB && !(mpi->flags & MP_IMGFLAG_READABLE)))
949 planes_idx = NUM_STORED/2 + (++p->temp_idx % (NUM_STORED/2));
950 else
951 planes_idx = ++p->static_idx % (NUM_STORED/2);
952 planes = p->planes[planes_idx];
953 mpi->priv = p->planes[NUM_STORED + planes_idx];
954 if (mpi->priv == p->old_planes) {
955 unsigned char **old_planes =
956 p->planes[NUM_STORED + 2 + (++p->temp_idx & 1)];
957 my_memcpy_pic(old_planes[0], p->old_planes[0],
958 p->w, p->h, p->stride, p->stride);
959 if (mpi->flags & MP_IMGFLAG_PLANAR) {
960 my_memcpy_pic(old_planes[1], p->old_planes[1],
961 p->cw, p->ch, p->chroma_stride, p->chroma_stride);
962 my_memcpy_pic(old_planes[2], p->old_planes[2],
963 p->cw, p->ch, p->chroma_stride, p->chroma_stride);
965 p->old_planes = old_planes;
966 p->num_copies++;
968 mpi->planes[0] = planes[0];
969 mpi->stride[0] = p->stride;
970 if (mpi->flags & MP_IMGFLAG_PLANAR) {
971 mpi->planes[1] = planes[1];
972 mpi->planes[2] = planes[2];
973 mpi->stride[1] = mpi->stride[2] = p->chroma_stride;
975 mpi->width = p->stride;
977 mpi->flags |= MP_IMGFLAG_DIRECT;
978 mpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK;
981 static inline long
982 cmpe(unsigned long x, unsigned long y, unsigned long err, unsigned long e)
984 long diff = x-y;
985 long unit = ((x+y+err) >> e);
986 long ret = (diff > unit) - (diff < -unit);
987 unit >>= 1;
988 return ret + (diff > unit) - (diff < -unit);
991 static unsigned long
992 find_breaks(struct vf_priv_s *p, struct frame_stats *s)
994 struct frame_stats *ps = &p->stats[(p->inframes-1) & 1];
995 long notfilm = 5*p->in_inc - p->out_dec;
996 unsigned long n = s->num_blocks >> 8;
997 unsigned long sad_comb_cmp = cmpe(s->sad.temp, s->sad.noise, 512, 1);
998 unsigned long ret = 8;
1000 if (cmpe(s->sad.temp, s->sad.even, 512, 1) > 0)
1001 mp_msg(MSGT_VFILTER, MSGL_WARN,
1002 "@@@@@@@@ Bottom-first field??? @@@@@@@@\n");
1003 if (s->sad.temp > 1000 && s->sad.noise > 1000)
1004 return 3;
1005 if (s->interlaced_high >= 2*n && s->sad.temp > 256 && s->sad.noise > 256)
1006 return 3;
1007 if (s->high.noise > s->num_blocks/4 && s->sad.noise > 10000 &&
1008 s->sad.noise > 2*s->sad.even && s->sad.noise > 2*ps->sad.odd) {
1009 // Mid-frame scene change
1010 if (s->tiny.temp + s->interlaced_low < n ||
1011 s->low.temp + s->interlaced_high < n/4 ||
1012 s->high.temp + s->interlaced_high < n/8 ||
1013 s->sad.temp < 160)
1014 return 1;
1015 return 3;
1017 if (s->high.temp > s->num_blocks/4 && s->sad.temp > 10000 &&
1018 s->sad.temp > 2*ps->sad.odd && s->sad.temp > 2*ps->sad.even) {
1019 // Start frame scene change
1020 if (s->tiny.noise + s->interlaced_low < n ||
1021 s->low.noise + s->interlaced_high < n/4 ||
1022 s->high.noise + s->interlaced_high < n/8 ||
1023 s->sad.noise < 160)
1024 return 2;
1025 return 3;
1027 if (sad_comb_cmp == 2)
1028 return 2;
1029 if (sad_comb_cmp == -2)
1030 return 1;
1032 if (s->tiny.odd > 3*MAX(n,s->tiny.even) + s->interlaced_low)
1033 return 1;
1034 if (s->tiny.even > 3*MAX(n,s->tiny.odd)+s->interlaced_low &&
1035 (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4)))
1036 return 4;
1038 if (s->sad.noise < 64 && s->sad.temp < 64 &&
1039 s->low.noise <= n/2 && s->high.noise <= n/4 &&
1040 s->low.temp <= n/2 && s->high.temp <= n/4)
1041 goto still;
1043 if (s->tiny.temp > 3*MAX(n,s->tiny.noise) + s->interlaced_low)
1044 return 2;
1045 if (s->tiny.noise > 3*MAX(n,s->tiny.temp) + s->interlaced_low)
1046 return 1;
1048 if (s->low.odd > 3*MAX(n/4,s->low.even) + s->interlaced_high)
1049 return 1;
1050 if (s->low.even > 3*MAX(n/4,s->low.odd)+s->interlaced_high &&
1051 s->sad.even > 2*s->sad.odd &&
1052 (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4)))
1053 return 4;
1055 if (s->low.temp > 3*MAX(n/4,s->low.noise) + s->interlaced_high)
1056 return 2;
1057 if (s->low.noise > 3*MAX(n/4,s->low.temp) + s->interlaced_high)
1058 return 1;
1060 if (sad_comb_cmp == 1 && s->sad.noise < 64)
1061 return 2;
1062 if (sad_comb_cmp == -1 && s->sad.temp < 64)
1063 return 1;
1065 if (s->tiny.odd <= n || (s->tiny.noise <= n/2 && s->tiny.temp <= n/2)) {
1066 if (s->interlaced_low <= n) {
1067 if (p->num_fields == 1)
1068 goto still;
1069 if (s->tiny.even <= n || ps->tiny.noise <= n/2)
1070 /* Still frame */
1071 goto still;
1072 if (s->bigger.even >= 2*MAX(n,s->bigger.odd) + s->interlaced_low)
1073 return 4;
1074 if (s->low.even >= 2*n + s->interlaced_low)
1075 return 4;
1076 goto still;
1079 if (s->low.odd <= n/4) {
1080 if (s->interlaced_high <= n/4) {
1081 if (p->num_fields == 1)
1082 goto still;
1083 if (s->low.even <= n/4)
1084 /* Still frame */
1085 goto still;
1086 if (s->bigger.even >= 2*MAX(n/4,s->bigger.odd)+s->interlaced_high)
1087 return 4;
1088 if (s->low.even >= n/2 + s->interlaced_high)
1089 return 4;
1090 goto still;
1093 if (s->bigger.temp > 2*MAX(n,s->bigger.noise) + s->interlaced_low)
1094 return 2;
1095 if (s->bigger.noise > 2*MAX(n,s->bigger.temp) + s->interlaced_low)
1096 return 1;
1097 if (s->bigger.temp > 2*MAX(n,s->bigger.noise) + s->interlaced_high)
1098 return 2;
1099 if (s->bigger.noise > 2*MAX(n,s->bigger.temp) + s->interlaced_high)
1100 return 1;
1101 if (s->twox.temp > 2*MAX(n,s->twox.noise) + s->interlaced_high)
1102 return 2;
1103 if (s->twox.noise > 2*MAX(n,s->twox.temp) + s->interlaced_high)
1104 return 1;
1105 if (s->bigger.even > 2*MAX(n,s->bigger.odd) + s->interlaced_low &&
1106 s->bigger.temp < n && s->bigger.noise < n)
1107 return 4;
1108 if (s->interlaced_low > MIN(2*n, s->tiny.odd))
1109 return 3;
1110 ret = 8 + (1 << (s->sad.temp > s->sad.noise));
1111 still:
1112 if (p->num_fields == 1 && p->prev_fields == 3 && notfilm >= 0 &&
1113 (s->tiny.temp <= s->tiny.noise || s->sad.temp < s->sad.noise+16))
1114 return 1;
1115 if (p->notout < p->num_fields && p->iosync > 2*p->in_inc && notfilm < 0)
1116 notfilm = 0;
1117 if (p->num_fields < 2 ||
1118 (p->num_fields == 2 && p->prev_fields == 2 && notfilm < 0))
1119 return ret;
1120 if (!notfilm && (p->prev_fields&~1) == 2) {
1121 if (p->prev_fields + p->num_fields == 5) {
1122 if (s->tiny.noise <= s->tiny.temp ||
1123 s->low.noise == 0 || s->low.noise < s->low.temp ||
1124 s->sad.noise < s->sad.temp+16)
1125 return 2;
1127 if (p->prev_fields + p->num_fields == 4) {
1128 if (s->tiny.temp <= s->tiny.noise ||
1129 s->low.temp == 0 || s->low.temp < s->low.noise ||
1130 s->sad.temp < s->sad.noise+16)
1131 return 1;
1134 if (p->num_fields > 2 &&
1135 ps->sad.noise > s->sad.noise && ps->sad.noise > s->sad.temp)
1136 return 4;
1137 return 2 >> (s->sad.noise > s->sad.temp);
1140 #define ITOC(X) (!(X) ? ' ' : (X) + ((X)>9 ? 'a'-10 : '0'))
1142 static int put_image(struct vf_instance* vf, mp_image_t *mpi, double pts)
1144 mp_image_t *dmpi;
1145 struct vf_priv_s *p = vf->priv;
1146 unsigned char **planes, **old_planes;
1147 struct frame_stats *s = &p->stats[p->inframes & 1];
1148 struct frame_stats *ps = &p->stats[(p->inframes-1) & 1];
1149 int swapped = 0;
1150 const int flags = mpi->fields;
1151 int breaks, prev;
1152 int show_fields = 0;
1153 int dropped_fields = 0;
1154 double start_time, diff_time;
1155 char prev_chflag = p->chflag;
1156 int keep_rate;
1157 double diff, fps, my_pts;
1159 if (!p->planes[0][0]) init(p, mpi);
1161 diff = pts - p->prev_pts;
1162 if (diff > 0)
1163 p->prev_diff = diff;
1164 p->prev_pts = pts;
1165 fps = (diff > 0 && p->prev_pts != MP_NOPTS_VALUE && pts != MP_NOPTS_VALUE) ? 1/diff : -1;
1167 old_planes = p->old_planes;
1169 if ((mpi->flags & MP_IMGFLAG_DIRECT) && mpi->priv) {
1170 planes = mpi->priv;
1171 mpi->priv = 0;
1172 } else {
1173 planes = p->planes[2 + (++p->temp_idx & 1)];
1174 my_memcpy_pic(planes[0],
1175 mpi->planes[0] + p->crop_x + p->crop_y * mpi->stride[0],
1176 p->w, p->h, p->stride, mpi->stride[0]);
1177 if (mpi->flags & MP_IMGFLAG_PLANAR) {
1178 my_memcpy_pic(planes[1],
1179 mpi->planes[1] + p->crop_cx + p->crop_cy * mpi->stride[1],
1180 p->cw, p->ch, p->chroma_stride, mpi->stride[1]);
1181 my_memcpy_pic(planes[2],
1182 mpi->planes[2] + p->crop_cx + p->crop_cy * mpi->stride[2],
1183 p->cw, p->ch, p->chroma_stride, mpi->stride[2]);
1184 p->num_copies++;
1188 p->old_planes = planes;
1189 p->chflag = ';';
1190 if (flags & MP_IMGFIELD_ORDERED) {
1191 swapped = !(flags & MP_IMGFIELD_TOP_FIRST);
1192 p->chflag = (flags & MP_IMGFIELD_REPEAT_FIRST ? '|' :
1193 flags & MP_IMGFIELD_TOP_FIRST ? ':' : '.');
1195 p->swapped = swapped;
1197 start_time = get_time();
1198 if (p->chflag == '|') {
1199 *s = ppzs;
1200 p->iosync += p->in_inc;
1201 } else if ((p->fast & 1) && prev_chflag == '|')
1202 *s = pprs;
1203 else
1204 diff_fields(p, s, old_planes, planes);
1205 diff_time = get_time();
1206 p->diff_time += diff_time - start_time;
1207 breaks = p->inframes ? find_breaks(p, s) : 2;
1208 p->inframes++;
1209 keep_rate = 4*p->in_inc == p->out_dec;
1211 if (0 && p->vfr_mode && fps > 31.0)
1212 breaks = 2;
1213 if (p->vfr_mode && fps < 0.0 || fps > 31.0 || breaks > 1) {
1214 p->notout = 0;
1215 p->iosync = p->in_inc;
1217 switch (breaks) {
1218 case 0:
1219 case 8:
1220 case 9:
1221 case 10:
1222 if (!keep_rate && p->notout < p->num_fields && p->iosync < 2*p->in_inc)
1223 break;
1224 if (p->notout < p->num_fields)
1225 dropped_fields = -2;
1226 case 4:
1227 if (keep_rate || p->iosync >= -2*p->in_inc)
1228 show_fields = (4<<p->num_fields)-1;
1229 break;
1230 case 3:
1231 if (keep_rate)
1232 show_fields = 2;
1233 else if (p->iosync > 0) {
1234 if (p->notout >= p->num_fields && p->iosync > 2*p->in_inc) {
1235 show_fields = 4; /* prev odd only */
1236 if (p->num_fields > 1)
1237 show_fields |= 8; /* + prev even */
1238 } else {
1239 show_fields = 2; /* even only */
1240 if (p->notout >= p->num_fields)
1241 dropped_fields += p->num_fields;
1244 break;
1245 case 2:
1246 if (p->iosync <= -3*p->in_inc) {
1247 if (p->notout >= p->num_fields)
1248 dropped_fields = p->num_fields;
1249 break;
1251 if (p->num_fields == 1) {
1252 int prevbreak = ps->sad.noise >= 128;
1253 if (p->iosync < 4*p->in_inc) {
1254 show_fields = 3;
1255 dropped_fields = prevbreak;
1256 } else {
1257 show_fields = 4 | (!prevbreak << 3);
1258 if (p->notout < 1 + p->prev_fields)
1259 dropped_fields = -!prevbreak;
1261 break;
1263 default:
1264 if (keep_rate)
1265 show_fields = 3 << (breaks & 1);
1266 else if (p->notout >= p->num_fields &&
1267 p->iosync >= (breaks == 1 ? -p->in_inc :
1268 p->in_inc << (p->num_fields == 1))) {
1269 show_fields = (1 << (2 + p->num_fields)) - (1<<breaks);
1270 } else {
1271 if (p->notout >= p->num_fields)
1272 dropped_fields += p->num_fields + 2 - breaks;
1273 if (breaks == 1) {
1274 if (p->iosync >= 4*p->in_inc)
1275 show_fields = 6;
1276 } else if (p->iosync > -3*p->in_inc)
1277 show_fields = 3; /* odd+even */
1279 break;
1282 show_fields &= 15;
1283 prev = p->prev_fields;
1284 if (breaks < 8) {
1285 if (p->num_fields == 1)
1286 breaks &= ~4;
1287 if (breaks)
1288 p->num_breaks++;
1289 if (breaks == 3)
1290 p->prev_fields = p->num_fields = 1;
1291 else if (breaks) {
1292 p->prev_fields = p->num_fields + (breaks==1) - (breaks==4);
1293 p->num_fields = breaks - (breaks == 4) + (p->chflag == '|');
1294 } else
1295 p->num_fields += 2;
1296 } else
1297 p->num_fields += 2;
1299 p->iosync += 4 * p->in_inc;
1300 if (p->chflag == '|')
1301 p->iosync += p->in_inc;
1303 if (show_fields) {
1304 p->iosync -= p->out_dec;
1305 p->notout = !(show_fields & 1) + !(show_fields & 3);
1306 if (((show_fields & 3) == 3 &&
1307 (s->low.noise + s->interlaced_low < (s->num_blocks>>8) ||
1308 s->sad.noise < 160)) ||
1309 ((show_fields & 12) == 12 &&
1310 (ps->low.noise + ps->interlaced_low < (s->num_blocks>>8) ||
1311 ps->sad.noise < 160))) {
1312 p->export_count++;
1313 dmpi = vf_get_image(vf->next, mpi->imgfmt, MP_IMGTYPE_EXPORT,
1314 MP_IMGFLAG_PRESERVE|MP_IMGFLAG_READABLE,
1315 p->w, p->h);
1316 if ((show_fields & 3) != 3) planes = old_planes;
1317 dmpi->planes[0] = planes[0];
1318 dmpi->stride[0] = p->stride;
1319 dmpi->width = mpi->width;
1320 if (mpi->flags & MP_IMGFLAG_PLANAR) {
1321 dmpi->planes[1] = planes[1];
1322 dmpi->planes[2] = planes[2];
1323 dmpi->stride[1] = p->chroma_stride;
1324 dmpi->stride[2] = p->chroma_stride;
1326 } else {
1327 p->merge_count++;
1328 dmpi = vf_get_image(vf->next, mpi->imgfmt,
1329 MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
1330 p->w, p->h);
1331 copy_merge_fields(p, dmpi, old_planes, planes, show_fields);
1333 p->outframes++;
1334 } else
1335 p->notout += 2;
1337 if (p->verbose)
1338 mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu %lu: %x %c %c %lu%s%s%c%s\n",
1339 p->inframes, p->outframes,
1340 breaks, breaks<8 && breaks>0 ? (int) p->prev_fields+'0' : ' ',
1341 ITOC(show_fields),
1342 p->num_breaks, 5*p->in_inc == p->out_dec && breaks<8 &&
1343 breaks>0 && ((prev&~1)!=2 || prev+p->prev_fields!=5) ?
1344 " ######## bad telecine ########" : "",
1345 dropped_fields ? " ======== dropped ":"", ITOC(dropped_fields),
1346 !show_fields || (show_fields & (show_fields-1)) ?
1347 "" : " @@@@@@@@@@@@@@@@@");
1349 p->merge_time += get_time() - diff_time;
1350 if (show_fields) {
1351 if (! p->vfr_mode)
1352 return vf_next_put_image(vf, dmpi, MP_NOPTS_VALUE);
1353 else {
1354 if (fps < 29.0 || fps > 31.0)
1355 p->drop_countdown = 0;
1356 switch(p->drop_countdown) {
1357 case 0:
1358 my_pts = pts;
1359 break;
1360 case 4:
1361 p->undrop_pts = pts;
1362 default:
1363 my_pts = p->drop_pts + (pts - vf->priv->undrop_pts) * 1.25;
1364 vf->priv->drop_countdown--;
1366 if (my_pts < p->prev_out_pts) {
1367 my_pts = p->prev_out_pts + p->prev_diff / 2;
1369 p->prev_out_pts = my_pts;
1370 return vf_next_put_image(vf, dmpi, my_pts);
1373 } else {
1374 p->drop_pts = pts;
1375 p->drop_countdown = 4;
1376 return 0;
1380 static int query_format(struct vf_instance* vf, unsigned int fmt)
1382 /* FIXME - support more formats */
1383 switch (fmt) {
1384 case IMGFMT_YV12:
1385 case IMGFMT_IYUV:
1386 case IMGFMT_I420:
1387 case IMGFMT_411P:
1388 case IMGFMT_422P:
1389 case IMGFMT_444P:
1390 return vf_next_query_format(vf, fmt);
1392 return 0;
1395 static int config(struct vf_instance* vf,
1396 int width, int height, int d_width, int d_height,
1397 unsigned int flags, unsigned int outfmt)
1399 struct MPOpts *opts = vf->opts;
1400 unsigned long cxm = 0;
1401 unsigned long cym = 0;
1402 struct vf_priv_s *p = vf->priv;
1403 // rounding:
1404 if(!IMGFMT_IS_RGB(outfmt) && !IMGFMT_IS_BGR(outfmt)){
1405 switch(outfmt){
1406 case IMGFMT_444P:
1407 case IMGFMT_Y800:
1408 case IMGFMT_Y8:
1409 break;
1410 case IMGFMT_YVU9:
1411 case IMGFMT_IF09:
1412 cym = 3;
1413 case IMGFMT_411P:
1414 cxm = 3;
1415 break;
1416 case IMGFMT_YV12:
1417 case IMGFMT_I420:
1418 case IMGFMT_IYUV:
1419 cym = 1;
1420 default:
1421 cxm = 1;
1424 p->chroma_swapped = !!(p->crop_y & (cym+1));
1425 if (p->w) p->w += p->crop_x & cxm;
1426 if (p->h) p->h += p->crop_y & cym;
1427 p->crop_x &= ~cxm;
1428 p->crop_y &= ~cym;
1429 if (!p->w || p->w > width ) p->w = width;
1430 if (!p->h || p->h > height) p->h = height;
1431 if (p->crop_x + p->w > width ) p->crop_x = 0;
1432 if (p->crop_y + p->h > height) p->crop_y = 0;
1434 if(!opts->screen_size_x && !opts->screen_size_y){
1435 d_width = d_width * p->w/width;
1436 d_height = d_height * p->h/height;
1438 return vf_next_config(vf, p->w, p->h, d_width, d_height, flags, outfmt);
1441 static void uninit(struct vf_instance* vf)
1443 struct vf_priv_s *p = vf->priv;
1444 mp_msg(MSGT_VFILTER, MSGL_INFO, "diff_time: %.3f, merge_time: %.3f, "
1445 "export: %lu, merge: %lu, copy: %lu\n", p->diff_time, p->merge_time,
1446 p->export_count, p->merge_count, p->num_copies);
1447 free(p->memory_allocated);
1448 free(p);
1451 static int vf_open(vf_instance_t *vf, char *args)
1453 struct vf_priv_s *p;
1454 vf->get_image = get_image;
1455 vf->put_image = put_image;
1456 vf->config = config;
1457 vf->query_format = query_format;
1458 vf->uninit = uninit;
1459 vf->default_reqs = VFCAP_ACCEPT_STRIDE;
1460 vf->priv = p = calloc(1, sizeof(struct vf_priv_s));
1461 p->out_dec = 5;
1462 p->in_inc = 4;
1463 p->thres.noise = 128;
1464 p->thres.even = 128;
1465 p->sad_thres = 64;
1466 p->dint_thres = 4;
1467 p->luma_only = 0;
1468 p->fast = 3;
1469 p->mmx2 = gCpuCaps.hasMMX2 ? 1 : gCpuCaps.has3DNow ? 2 : 0;
1470 p->prev_diff = 0;
1471 p->prev_pts = 0;
1472 p->prev_out_pts = 0;
1473 p->prev_diff = 1.0/80;
1474 p->drop_countdown = 0;
1475 p->vfr_mode = 0;
1476 if (args) {
1477 const char *args_remain = parse_args(p, args);
1478 if (args_remain) {
1479 mp_msg(MSGT_VFILTER, MSGL_FATAL,
1480 "filmdint: unknown suboption: %s\n", args_remain);
1481 return 0;
1483 if (p->out_dec < p->in_inc) {
1484 mp_msg(MSGT_VFILTER, MSGL_FATAL,
1485 "filmdint: increasing the frame rate is not supported\n");
1486 return 0;
1489 if (p->mmx2 > 2)
1490 p->mmx2 = 0;
1491 #if !HAVE_MMX
1492 p->mmx2 = 0;
1493 #endif
1494 #if !HAVE_AMD3DNOW
1495 p->mmx2 &= 1;
1496 #endif
1497 p->thres.odd = p->thres.even;
1498 p->thres.temp = p->thres.noise;
1499 p->diff_time = 0;
1500 p->merge_time = 0;
1501 return 1;
1504 const vf_info_t vf_info_filmdint = {
1505 "Advanced inverse telecine filer",
1506 "filmdint",
1507 "Zoltan Hidvegi",
1509 vf_open,
1510 NULL