13 static int diff_y_mmx(unsigned char *a
, unsigned char *b
, int s
)
18 "pxor %%mm4, %%mm4 \n\t"
19 "pxor %%mm7, %%mm7 \n\t"
24 "movq (%%esi), %%mm0 \n\t"
25 "movq (%%esi), %%mm2 \n\t"
26 "addl %%eax, %%esi \n\t"
27 "movq (%%edi), %%mm1 \n\t"
28 "addl %%eax, %%edi \n\t"
29 "psubusb %%mm1, %%mm2 \n\t"
30 "psubusb %%mm0, %%mm1 \n\t"
31 "movq %%mm2, %%mm0 \n\t"
32 "movq %%mm1, %%mm3 \n\t"
33 "punpcklbw %%mm7, %%mm0 \n\t"
34 "punpcklbw %%mm7, %%mm1 \n\t"
35 "punpckhbw %%mm7, %%mm2 \n\t"
36 "punpckhbw %%mm7, %%mm3 \n\t"
37 "paddw %%mm0, %%mm4 \n\t"
38 "paddw %%mm1, %%mm4 \n\t"
39 "paddw %%mm2, %%mm4 \n\t"
40 "paddw %%mm3, %%mm4 \n\t"
45 "movq %%mm4, %%mm3 \n\t"
46 "punpcklwd %%mm7, %%mm4 \n\t"
47 "punpckhwd %%mm7, %%mm3 \n\t"
48 "paddd %%mm4, %%mm3 \n\t"
49 "movd %%mm3, %%eax \n\t"
50 "psrlq $32, %%mm3 \n\t"
51 "movd %%mm3, %%edx \n\t"
52 "addl %%edx, %%eax \n\t"
55 : "S" (a
), "D" (b
), "a" (s
)
61 static int licomb_y_mmx(unsigned char *a
, unsigned char *b
, int s
)
66 "pxor %%mm6, %%mm6 \n\t"
67 "pxor %%mm7, %%mm7 \n\t"
68 "subl %%eax, %%edi \n\t"
73 "movq (%%esi), %%mm0 \n\t"
74 "movq (%%edi), %%mm1 \n\t"
75 "punpcklbw %%mm7, %%mm0 \n\t"
76 "movq (%%edi,%%eax), %%mm2 \n\t"
77 "punpcklbw %%mm7, %%mm1 \n\t"
78 "punpcklbw %%mm7, %%mm2 \n\t"
79 "paddw %%mm0, %%mm0 \n\t"
80 "paddw %%mm2, %%mm1 \n\t"
81 "movq %%mm0, %%mm2 \n\t"
82 "psubusw %%mm1, %%mm0 \n\t"
83 "psubusw %%mm2, %%mm1 \n\t"
84 "paddw %%mm0, %%mm6 \n\t"
85 "paddw %%mm1, %%mm6 \n\t"
87 "movq (%%esi), %%mm0 \n\t"
88 "movq (%%edi), %%mm1 \n\t"
89 "punpckhbw %%mm7, %%mm0 \n\t"
90 "movq (%%edi,%%eax), %%mm2 \n\t"
91 "punpckhbw %%mm7, %%mm1 \n\t"
92 "punpckhbw %%mm7, %%mm2 \n\t"
93 "paddw %%mm0, %%mm0 \n\t"
94 "paddw %%mm2, %%mm1 \n\t"
95 "movq %%mm0, %%mm2 \n\t"
96 "psubusw %%mm1, %%mm0 \n\t"
97 "psubusw %%mm2, %%mm1 \n\t"
98 "paddw %%mm0, %%mm6 \n\t"
99 "paddw %%mm1, %%mm6 \n\t"
101 "movq (%%edi,%%eax), %%mm0 \n\t"
102 "movq (%%esi), %%mm1 \n\t"
103 "punpcklbw %%mm7, %%mm0 \n\t"
104 "movq (%%esi,%%eax), %%mm2 \n\t"
105 "punpcklbw %%mm7, %%mm1 \n\t"
106 "punpcklbw %%mm7, %%mm2 \n\t"
107 "paddw %%mm0, %%mm0 \n\t"
108 "paddw %%mm2, %%mm1 \n\t"
109 "movq %%mm0, %%mm2 \n\t"
110 "psubusw %%mm1, %%mm0 \n\t"
111 "psubusw %%mm2, %%mm1 \n\t"
112 "paddw %%mm0, %%mm6 \n\t"
113 "paddw %%mm1, %%mm6 \n\t"
115 "movq (%%edi,%%eax), %%mm0 \n\t"
116 "movq (%%esi), %%mm1 \n\t"
117 "punpckhbw %%mm7, %%mm0 \n\t"
118 "movq (%%esi,%%eax), %%mm2 \n\t"
119 "punpckhbw %%mm7, %%mm1 \n\t"
120 "punpckhbw %%mm7, %%mm2 \n\t"
121 "paddw %%mm0, %%mm0 \n\t"
122 "paddw %%mm2, %%mm1 \n\t"
123 "movq %%mm0, %%mm2 \n\t"
124 "psubusw %%mm1, %%mm0 \n\t"
125 "psubusw %%mm2, %%mm1 \n\t"
126 "paddw %%mm0, %%mm6 \n\t"
127 "paddw %%mm1, %%mm6 \n\t"
129 "addl %%eax, %%esi \n\t"
130 "addl %%eax, %%edi \n\t"
134 "movq %%mm6, %%mm5 \n\t"
135 "punpcklwd %%mm7, %%mm6 \n\t"
136 "punpckhwd %%mm7, %%mm5 \n\t"
137 "paddd %%mm6, %%mm5 \n\t"
138 "movd %%mm5, %%eax \n\t"
139 "psrlq $32, %%mm5 \n\t"
140 "movd %%mm5, %%edx \n\t"
141 "addl %%edx, %%eax \n\t"
145 : "S" (a
), "D" (b
), "a" (s
)
153 #define ABS(a) (((a)^((a)>>31))-((a)>>31))
155 static int diff_y(unsigned char *a
, unsigned char *b
, int s
)
159 for (j
=0; j
<8; j
++) diff
+= ABS(a
[j
]-b
[j
]);
165 static int licomb_y(unsigned char *a
, unsigned char *b
, int s
)
170 diff
+= ABS((a
[j
]<<1) - b
[j
-s
] - b
[j
])
171 + ABS((b
[j
]<<1) - a
[j
] - a
[j
+s
]);
177 static int qpcomb_y(unsigned char *a
, unsigned char *b
, int s
)
182 diff
+= ABS(a
[j
] - 3*b
[j
-s
] + 3*a
[j
+s
] - b
[j
]);
189 static int licomb_y_test(unsigned char *a
, unsigned char *b
, int s
)
191 int c
= licomb_y(a
,b
,s
);
192 int m
= licomb_y_mmx(a
,b
,s
);
193 if (c
!= m
) printf("%d != %d\n", c
, m
);
206 static void alloc_buffer(struct pullup_context
*c
, struct pullup_buffer
*b
)
209 if (b
->planes
) return;
210 b
->planes
= calloc(c
->nplanes
, sizeof(unsigned char *));
211 for (i
= 0; i
< c
->nplanes
; i
++) {
212 b
->planes
[i
] = malloc(c
->h
[i
]*c
->stride
[i
]);
213 /* Deal with idiotic 128=0 for chroma: */
214 memset(b
->planes
[i
], c
->background
[i
], c
->h
[i
]*c
->stride
[i
]);
218 struct pullup_buffer
*pullup_lock_buffer(struct pullup_buffer
*b
, int parity
)
221 if (parity
+1 & 1) b
->lock
[0]++;
222 if (parity
+1 & 2) b
->lock
[1]++;
226 void pullup_release_buffer(struct pullup_buffer
*b
, int parity
)
229 if (parity
+1 & 1) b
->lock
[0]--;
230 if (parity
+1 & 2) b
->lock
[1]--;
233 struct pullup_buffer
*pullup_get_buffer(struct pullup_context
*c
, int parity
)
237 /* Try first to get the sister buffer for the previous field */
238 if (parity
< 2 && c
->last
&& parity
!= c
->last
->parity
239 && !c
->last
->buffer
->lock
[parity
]) {
240 alloc_buffer(c
, c
->last
->buffer
);
241 return pullup_lock_buffer(c
->last
->buffer
, parity
);
244 /* Prefer a buffer with both fields open */
245 for (i
= 0; i
< c
->nbuffers
; i
++) {
246 if (c
->buffers
[i
].lock
[0]) continue;
247 if (c
->buffers
[i
].lock
[1]) continue;
248 alloc_buffer(c
, &c
->buffers
[i
]);
249 return pullup_lock_buffer(&c
->buffers
[i
], parity
);
252 if (parity
== 2) return 0;
254 /* Search for any half-free buffer */
255 for (i
= 0; i
< c
->nbuffers
; i
++) {
256 if (parity
+1 & 1 && c
->buffers
[i
].lock
[0]) continue;
257 if (parity
+1 & 2 && c
->buffers
[i
].lock
[1]) continue;
258 alloc_buffer(c
, &c
->buffers
[i
]);
259 return pullup_lock_buffer(&c
->buffers
[i
], parity
);
270 static void compute_metric(struct pullup_context
*c
,
271 struct pullup_field
*fa
, int pa
,
272 struct pullup_field
*fb
, int pb
,
273 int (*func
)(unsigned char *, unsigned char *, int), int *dest
)
275 unsigned char *a
, *b
;
277 int mp
= c
->metric_plane
;
278 int xstep
= c
->bpp
[mp
];
279 int ystep
= c
->stride
[mp
]<<3;
280 int s
= c
->stride
[mp
]<<1; /* field stride */
281 int w
= c
->metric_w
*xstep
;
283 if (!fa
->buffer
|| !fb
->buffer
) return;
285 /* Shortcut for duplicate fields (e.g. from RFF flag) */
286 if (fa
->buffer
== fb
->buffer
&& pa
== pb
) {
287 memset(dest
, 0, c
->metric_len
* sizeof(int));
291 a
= fa
->buffer
->planes
[mp
] + pa
* c
->stride
[mp
] + c
->metric_offset
;
292 b
= fb
->buffer
->planes
[mp
] + pb
* c
->stride
[mp
] + c
->metric_offset
;
294 for (y
= c
->metric_h
; y
; y
--) {
295 for (x
= 0; x
< w
; x
+= xstep
) {
296 *dest
++ = func(a
+ x
, b
+ x
, s
);
298 a
+= ystep
; b
+= ystep
;
306 static void alloc_metrics(struct pullup_context
*c
, struct pullup_field
*f
)
308 f
->diffs
= calloc(c
->metric_len
, sizeof(int));
309 f
->comb
= calloc(c
->metric_len
, sizeof(int));
310 /* add more metrics here as needed */
313 static struct pullup_field
*make_field_queue(struct pullup_context
*c
, int len
)
315 struct pullup_field
*head
, *f
;
316 f
= head
= calloc(1, sizeof(struct pullup_field
));
318 for (; len
> 0; len
--) {
319 f
->next
= calloc(1, sizeof(struct pullup_field
));
329 static void check_field_queue(struct pullup_context
*c
)
331 if (c
->head
->next
== c
->first
) {
332 struct pullup_field
*f
= calloc(1, sizeof(struct pullup_field
));
341 void pullup_submit_field(struct pullup_context
*c
, struct pullup_buffer
*b
, int parity
)
343 struct pullup_field
*f
;
345 /* Grow the circular list if needed */
346 check_field_queue(c
);
348 /* Cannot have two fields of same parity in a row; drop the new one */
349 if (c
->last
&& c
->last
->parity
== parity
) return;
353 f
->buffer
= pullup_lock_buffer(b
, parity
);
358 compute_metric(c
, f
, parity
, f
->prev
->prev
, parity
, c
->diff
, f
->diffs
);
359 compute_metric(c
, parity
?f
->prev
:f
, 0, parity
?f
:f
->prev
, 1, c
->comb
, f
->comb
);
361 /* Advance the circular list */
362 if (!c
->first
) c
->first
= c
->head
;
364 c
->head
= c
->head
->next
;
367 void pullup_flush_fields(struct pullup_context
*c
)
369 struct pullup_field
*f
;
371 for (f
= c
->first
; f
&& f
!= c
->head
; f
= f
->next
) {
372 pullup_release_buffer(f
->buffer
, f
->parity
);
375 c
->first
= c
->last
= 0;
385 #define F_HAVE_BREAKS 1
386 #define F_HAVE_AFFINITY 2
390 #define BREAK_RIGHT 2
395 static int queue_length(struct pullup_field
*begin
, struct pullup_field
*end
)
398 struct pullup_field
*f
;
400 if (!begin
|| !end
) return 0;
401 for (f
= begin
; f
!= end
; f
= f
->next
) count
++;
405 static int find_first_break(struct pullup_field
*f
, int max
)
408 for (i
= 0; i
< max
; i
++) {
409 if (f
->breaks
& BREAK_RIGHT
|| f
->next
->breaks
& BREAK_LEFT
)
416 static void compute_breaks(struct pullup_context
*c
, struct pullup_field
*f0
)
419 struct pullup_field
*f1
= f0
->next
;
420 struct pullup_field
*f2
= f1
->next
;
421 struct pullup_field
*f3
= f2
->next
;
422 int l
, max_l
=0, max_r
=0;
423 //struct pullup_field *ff;
424 //for (i=0, ff=c->first; ff != f0; i++, ff=ff->next);
426 if (f0
->flags
& F_HAVE_BREAKS
) return;
427 //printf("\n%d: ", i);
428 f0
->flags
|= F_HAVE_BREAKS
;
430 /* Special case when fields are 100% identical */
431 if (f0
->buffer
== f2
->buffer
&& f1
->buffer
!= f3
->buffer
) {
432 f2
->breaks
|= BREAK_RIGHT
;
435 if (f0
->buffer
!= f2
->buffer
&& f1
->buffer
== f3
->buffer
) {
436 f1
->breaks
|= BREAK_LEFT
;
440 for (i
= 0; i
< c
->metric_len
; i
++) {
441 l
= f2
->diffs
[i
] - f3
->diffs
[i
];
442 if (l
> max_l
) max_l
= l
;
443 if (-l
> max_r
) max_r
= -l
;
445 /* Don't get tripped up when differences are mostly quant error */
446 //printf("%d %d\n", max_l, max_r);
447 if (max_l
+ max_r
< 128) return;
448 if (max_l
> 4*max_r
) f1
->breaks
|= BREAK_LEFT
;
449 if (max_r
> 4*max_l
) f2
->breaks
|= BREAK_RIGHT
;
452 static void compute_affinity(struct pullup_context
*c
, struct pullup_field
*f
)
455 int max_l
=0, max_r
=0, l
;
456 if (f
->flags
& F_HAVE_AFFINITY
) return;
457 f
->flags
|= F_HAVE_AFFINITY
;
458 for (i
= 0; i
< c
->metric_len
; i
++) {
459 l
= f
->comb
[i
] - f
->next
->comb
[i
];
460 if (l
> max_l
) max_l
= l
;
461 if (-l
> max_r
) max_r
= -l
;
463 if (max_l
+ max_r
< 64) return;
464 if (max_r
> 2*max_l
) f
->affinity
= -1;
465 else if (max_l
> 2*max_r
) f
->affinity
= 1;
468 static void foo(struct pullup_context
*c
)
470 struct pullup_field
*f
= c
->first
;
471 int i
, n
= queue_length(f
, c
->last
);
472 for (i
= 0; i
< n
-1; i
++) {
473 if (i
< n
-3) compute_breaks(c
, f
);
474 compute_affinity(c
, f
);
479 static int decide_frame_length(struct pullup_context
*c
)
481 struct pullup_field
*f0
= c
->first
;
482 struct pullup_field
*f1
= f0
->next
;
483 struct pullup_field
*f2
= f1
->next
;
484 struct pullup_field
*f3
= f2
->next
;
487 if (queue_length(c
->first
, c
->last
) < 6) return 0;
490 if (f0
->affinity
== -1) return 1;
492 l
= find_first_break(f0
, 3);
493 if (l
== 1 && c
->strict_breaks
< 0) l
= 0;
497 if (c
->strict_breaks
> 0 && f0
->affinity
== 1 && f1
->affinity
== -1)
501 /* FIXME: strictly speaking, f0->prev is no longer valid... :) */
503 && (f0
->prev
->breaks
& BREAK_RIGHT
) && (f2
->breaks
& BREAK_LEFT
)
504 && (f0
->affinity
!= 1 || f1
->affinity
!= -1) )
506 if (f1
->affinity
== 1) return 1;
509 if (f2
->affinity
== 1) return 2;
512 /* 9 possibilities covered before switch */
513 if (f1
->affinity
== 1) return 1; /* covers 6 */
514 else if (f1
->affinity
== -1) return 2; /* covers 6 */
515 else if (f2
->affinity
== -1) { /* covers 2 */
516 if (f0
->affinity
== 1) return 3;
519 else return 2; /* the remaining 6 */
524 static void print_aff_and_breaks(struct pullup_context
*c
, struct pullup_field
*f
)
528 struct pullup_field
*f0
= f
;
529 const char aff_l
[] = "+..", aff_r
[] = "..+";
530 printf("\naffinity: ");
531 for (i
= 0; i
< 6; i
++) {
532 printf("%c%d%c", aff_l
[1+f
->affinity
], i
, aff_r
[1+f
->affinity
]);
536 printf("\nbreaks: ");
537 for (i
=0; i
<6; i
++) {
538 printf("%c%d%c", f
->breaks
& BREAK_LEFT
? '|' : '.', i
, f
->breaks
& BREAK_RIGHT
? '|' : '.');
548 struct pullup_frame
*pullup_get_frame(struct pullup_context
*c
)
551 struct pullup_frame
*fr
= c
->frame
;
552 int n
= decide_frame_length(c
);
553 int aff
= c
->first
->next
->affinity
;
556 if (fr
->lock
) return 0;
559 print_aff_and_breaks(c
, c
->first
);
560 printf("duration: %d \n", n
);
565 fr
->parity
= c
->first
->parity
;
567 for (i
= 0; i
< n
; i
++) {
568 /* We cheat and steal the buffer without release+relock */
569 fr
->ifields
[i
] = c
->first
->buffer
;
570 c
->first
->buffer
= 0;
571 c
->first
= c
->first
->next
;
575 fr
->ofields
[fr
->parity
] = fr
->ifields
[0];
576 fr
->ofields
[fr
->parity
^1] = 0;
578 fr
->ofields
[fr
->parity
] = fr
->ifields
[0];
579 fr
->ofields
[fr
->parity
^1] = fr
->ifields
[1];
582 aff
= (fr
->ifields
[0] == fr
->ifields
[1]) ? -1 : 1;
583 /* else if (c->verbose) printf("forced aff: %d \n", aff); */
584 fr
->ofields
[fr
->parity
] = fr
->ifields
[1+aff
];
585 fr
->ofields
[fr
->parity
^1] = fr
->ifields
[1];
587 pullup_lock_buffer(fr
->ofields
[0], 0);
588 pullup_lock_buffer(fr
->ofields
[1], 1);
590 if (fr
->ofields
[0] == fr
->ofields
[1]) {
591 fr
->buffer
= fr
->ofields
[0];
592 pullup_lock_buffer(fr
->buffer
, 2);
598 static void copy_field(struct pullup_context
*c
, struct pullup_buffer
*dest
,
599 struct pullup_buffer
*src
, int parity
)
602 unsigned char *d
, *s
;
603 for (i
= 0; i
< c
->nplanes
; i
++) {
604 s
= src
->planes
[i
] + parity
*c
->stride
[i
];
605 d
= dest
->planes
[i
] + parity
*c
->stride
[i
];
606 for (j
= c
->h
[i
]>>1; j
; j
--) {
607 memcpy(d
, s
, c
->stride
[i
]);
608 s
+= c
->stride
[i
]<<1;
609 d
+= c
->stride
[i
]<<1;
614 void pullup_pack_frame(struct pullup_context
*c
, struct pullup_frame
*fr
)
617 int par
= fr
->parity
;
618 if (fr
->buffer
) return;
619 if (fr
->length
< 2) return; /* FIXME: deal with this */
620 for (i
= 0; i
< 2; i
++)
622 if (fr
->ofields
[i
]->lock
[i
^1]) continue;
623 fr
->buffer
= fr
->ofields
[i
];
624 pullup_lock_buffer(fr
->buffer
, 2);
625 copy_field(c
, fr
->buffer
, fr
->ofields
[i
^1], i
^1);
628 fr
->buffer
= pullup_get_buffer(c
, 2);
629 copy_field(c
, fr
->buffer
, fr
->ofields
[0], 0);
630 copy_field(c
, fr
->buffer
, fr
->ofields
[1], 1);
633 void pullup_release_frame(struct pullup_frame
*fr
)
636 for (i
= 0; i
< fr
->length
; i
++)
637 pullup_release_buffer(fr
->ifields
[i
], fr
->parity
^ (i
&1));
638 pullup_release_buffer(fr
->ofields
[0], 0);
639 pullup_release_buffer(fr
->ofields
[1], 1);
640 if (fr
->buffer
) pullup_release_buffer(fr
->buffer
, 2);
649 struct pullup_context
*pullup_alloc_context()
651 struct pullup_context
*c
;
653 c
= calloc(1, sizeof(struct pullup_context
));
658 void pullup_preinit_context(struct pullup_context
*c
)
660 c
->bpp
= calloc(c
->nplanes
, sizeof(int));
661 c
->w
= calloc(c
->nplanes
, sizeof(int));
662 c
->h
= calloc(c
->nplanes
, sizeof(int));
663 c
->stride
= calloc(c
->nplanes
, sizeof(int));
664 c
->background
= calloc(c
->nplanes
, sizeof(int));
667 void pullup_init_context(struct pullup_context
*c
)
669 int mp
= c
->metric_plane
;
670 if (c
->nbuffers
< 10) c
->nbuffers
= 10;
671 c
->buffers
= calloc(c
->nbuffers
, sizeof (struct pullup_buffer
));
673 c
->metric_w
= (c
->w
[mp
] - (c
->junk_left
+ c
->junk_right
<< 3)) >> 3;
674 c
->metric_h
= (c
->h
[mp
] - (c
->junk_top
+ c
->junk_bottom
<< 1)) >> 3;
675 c
->metric_offset
= c
->junk_left
*c
->bpp
[mp
] + (c
->junk_top
<<1)*c
->stride
[mp
];
676 c
->metric_len
= c
->metric_w
* c
->metric_h
;
678 c
->head
= make_field_queue(c
, 8);
680 c
->frame
= calloc(1, sizeof (struct pullup_frame
));
681 c
->frame
->ifields
= calloc(3, sizeof (struct pullup_buffer
*));
689 if (c
->cpu
& PULLUP_CPU_MMX
) {
690 c
->diff
= diff_y_mmx
;
691 c
->comb
= licomb_y_mmx
;
695 /* c->comb = qpcomb_y; */
698 case PULLUP_FMT_YUY2
:
701 case PULLUP_FMT_RGB32
:
702 c
->diff
= diff_rgb32
;
708 void pullup_free_context(struct pullup_context
*c
)
710 struct pullup_field
*f
;
718 } while (f
!= c
->head
);