small compilation fix
[mplayer/glamo.git] / libmpcodecs / pullup.c
blob3627084cb53498aed8dc196b21bbad38980d343c
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include "pullup.h"
7 #include "config.h"
11 #ifdef ARCH_X86
12 #ifdef HAVE_MMX
13 static int diff_y_mmx(unsigned char *a, unsigned char *b, int s)
15 int ret;
16 asm volatile (
17 "movl $4, %%ecx \n\t"
18 "pxor %%mm4, %%mm4 \n\t"
19 "pxor %%mm7, %%mm7 \n\t"
21 ".balign 16 \n\t"
22 "1: \n\t"
24 "movq (%%esi), %%mm0 \n\t"
25 "movq (%%esi), %%mm2 \n\t"
26 "addl %%eax, %%esi \n\t"
27 "movq (%%edi), %%mm1 \n\t"
28 "addl %%eax, %%edi \n\t"
29 "psubusb %%mm1, %%mm2 \n\t"
30 "psubusb %%mm0, %%mm1 \n\t"
31 "movq %%mm2, %%mm0 \n\t"
32 "movq %%mm1, %%mm3 \n\t"
33 "punpcklbw %%mm7, %%mm0 \n\t"
34 "punpcklbw %%mm7, %%mm1 \n\t"
35 "punpckhbw %%mm7, %%mm2 \n\t"
36 "punpckhbw %%mm7, %%mm3 \n\t"
37 "paddw %%mm0, %%mm4 \n\t"
38 "paddw %%mm1, %%mm4 \n\t"
39 "paddw %%mm2, %%mm4 \n\t"
40 "paddw %%mm3, %%mm4 \n\t"
42 "decl %%ecx \n\t"
43 "jnz 1b \n\t"
45 "movq %%mm4, %%mm3 \n\t"
46 "punpcklwd %%mm7, %%mm4 \n\t"
47 "punpckhwd %%mm7, %%mm3 \n\t"
48 "paddd %%mm4, %%mm3 \n\t"
49 "movd %%mm3, %%eax \n\t"
50 "psrlq $32, %%mm3 \n\t"
51 "movd %%mm3, %%edx \n\t"
52 "addl %%edx, %%eax \n\t"
53 "emms \n\t"
54 : "=a" (ret)
55 : "S" (a), "D" (b), "a" (s)
56 : "%edx"
58 return ret;
61 static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s)
63 int ret;
64 asm volatile (
65 "movl $4, %%ecx \n\t"
66 "pxor %%mm6, %%mm6 \n\t"
67 "pxor %%mm7, %%mm7 \n\t"
68 "subl %%eax, %%edi \n\t"
70 ".balign 16 \n\t"
71 "2: \n\t"
73 "movq (%%esi), %%mm0 \n\t"
74 "movq (%%edi), %%mm1 \n\t"
75 "punpcklbw %%mm7, %%mm0 \n\t"
76 "movq (%%edi,%%eax), %%mm2 \n\t"
77 "punpcklbw %%mm7, %%mm1 \n\t"
78 "punpcklbw %%mm7, %%mm2 \n\t"
79 "paddw %%mm0, %%mm0 \n\t"
80 "paddw %%mm2, %%mm1 \n\t"
81 "movq %%mm0, %%mm2 \n\t"
82 "psubusw %%mm1, %%mm0 \n\t"
83 "psubusw %%mm2, %%mm1 \n\t"
84 "paddw %%mm0, %%mm6 \n\t"
85 "paddw %%mm1, %%mm6 \n\t"
87 "movq (%%esi), %%mm0 \n\t"
88 "movq (%%edi), %%mm1 \n\t"
89 "punpckhbw %%mm7, %%mm0 \n\t"
90 "movq (%%edi,%%eax), %%mm2 \n\t"
91 "punpckhbw %%mm7, %%mm1 \n\t"
92 "punpckhbw %%mm7, %%mm2 \n\t"
93 "paddw %%mm0, %%mm0 \n\t"
94 "paddw %%mm2, %%mm1 \n\t"
95 "movq %%mm0, %%mm2 \n\t"
96 "psubusw %%mm1, %%mm0 \n\t"
97 "psubusw %%mm2, %%mm1 \n\t"
98 "paddw %%mm0, %%mm6 \n\t"
99 "paddw %%mm1, %%mm6 \n\t"
101 "movq (%%edi,%%eax), %%mm0 \n\t"
102 "movq (%%esi), %%mm1 \n\t"
103 "punpcklbw %%mm7, %%mm0 \n\t"
104 "movq (%%esi,%%eax), %%mm2 \n\t"
105 "punpcklbw %%mm7, %%mm1 \n\t"
106 "punpcklbw %%mm7, %%mm2 \n\t"
107 "paddw %%mm0, %%mm0 \n\t"
108 "paddw %%mm2, %%mm1 \n\t"
109 "movq %%mm0, %%mm2 \n\t"
110 "psubusw %%mm1, %%mm0 \n\t"
111 "psubusw %%mm2, %%mm1 \n\t"
112 "paddw %%mm0, %%mm6 \n\t"
113 "paddw %%mm1, %%mm6 \n\t"
115 "movq (%%edi,%%eax), %%mm0 \n\t"
116 "movq (%%esi), %%mm1 \n\t"
117 "punpckhbw %%mm7, %%mm0 \n\t"
118 "movq (%%esi,%%eax), %%mm2 \n\t"
119 "punpckhbw %%mm7, %%mm1 \n\t"
120 "punpckhbw %%mm7, %%mm2 \n\t"
121 "paddw %%mm0, %%mm0 \n\t"
122 "paddw %%mm2, %%mm1 \n\t"
123 "movq %%mm0, %%mm2 \n\t"
124 "psubusw %%mm1, %%mm0 \n\t"
125 "psubusw %%mm2, %%mm1 \n\t"
126 "paddw %%mm0, %%mm6 \n\t"
127 "paddw %%mm1, %%mm6 \n\t"
129 "addl %%eax, %%esi \n\t"
130 "addl %%eax, %%edi \n\t"
131 "decl %%ecx \n\t"
132 "jnz 2b \n\t"
134 "movq %%mm6, %%mm5 \n\t"
135 "punpcklwd %%mm7, %%mm6 \n\t"
136 "punpckhwd %%mm7, %%mm5 \n\t"
137 "paddd %%mm6, %%mm5 \n\t"
138 "movd %%mm5, %%eax \n\t"
139 "psrlq $32, %%mm5 \n\t"
140 "movd %%mm5, %%edx \n\t"
141 "addl %%edx, %%eax \n\t"
143 "emms \n\t"
144 : "=a" (ret)
145 : "S" (a), "D" (b), "a" (s)
146 : "%edx"
148 return ret;
150 #endif
151 #endif
153 #define ABS(a) (((a)^((a)>>31))-((a)>>31))
155 static int diff_y(unsigned char *a, unsigned char *b, int s)
157 int i, j, diff=0;
158 for (i=4; i; i--) {
159 for (j=0; j<8; j++) diff += ABS(a[j]-b[j]);
160 a+=s; b+=s;
162 return diff;
165 static int licomb_y(unsigned char *a, unsigned char *b, int s)
167 int i, j, diff=0;
168 for (i=4; i; i--) {
169 for (j=0; j<8; j++)
170 diff += ABS((a[j]<<1) - b[j-s] - b[j])
171 + ABS((b[j]<<1) - a[j] - a[j+s]);
172 a+=s; b+=s;
174 return diff;
177 static int qpcomb_y(unsigned char *a, unsigned char *b, int s)
179 int i, j, diff=0;
180 for (i=4; i; i--) {
181 for (j=0; j<8; j++)
182 diff += ABS(a[j] - 3*b[j-s] + 3*a[j+s] - b[j]);
183 a+=s; b+=s;
185 return diff;
188 #if 0
189 static int licomb_y_test(unsigned char *a, unsigned char *b, int s)
191 int c = licomb_y(a,b,s);
192 int m = licomb_y_mmx(a,b,s);
193 if (c != m) printf("%d != %d\n", c, m);
194 return m;
196 #endif
206 static void alloc_buffer(struct pullup_context *c, struct pullup_buffer *b)
208 int i;
209 if (b->planes) return;
210 b->planes = calloc(c->nplanes, sizeof(unsigned char *));
211 for (i = 0; i < c->nplanes; i++) {
212 b->planes[i] = malloc(c->h[i]*c->stride[i]);
213 /* Deal with idiotic 128=0 for chroma: */
214 memset(b->planes[i], c->background[i], c->h[i]*c->stride[i]);
218 struct pullup_buffer *pullup_lock_buffer(struct pullup_buffer *b, int parity)
220 if (!b) return 0;
221 if (parity+1 & 1) b->lock[0]++;
222 if (parity+1 & 2) b->lock[1]++;
223 return b;
226 void pullup_release_buffer(struct pullup_buffer *b, int parity)
228 if (!b) return;
229 if (parity+1 & 1) b->lock[0]--;
230 if (parity+1 & 2) b->lock[1]--;
233 struct pullup_buffer *pullup_get_buffer(struct pullup_context *c, int parity)
235 int i;
237 /* Try first to get the sister buffer for the previous field */
238 if (parity < 2 && c->last && parity != c->last->parity
239 && !c->last->buffer->lock[parity]) {
240 alloc_buffer(c, c->last->buffer);
241 return pullup_lock_buffer(c->last->buffer, parity);
244 /* Prefer a buffer with both fields open */
245 for (i = 0; i < c->nbuffers; i++) {
246 if (c->buffers[i].lock[0]) continue;
247 if (c->buffers[i].lock[1]) continue;
248 alloc_buffer(c, &c->buffers[i]);
249 return pullup_lock_buffer(&c->buffers[i], parity);
252 if (parity == 2) return 0;
254 /* Search for any half-free buffer */
255 for (i = 0; i < c->nbuffers; i++) {
256 if (parity+1 & 1 && c->buffers[i].lock[0]) continue;
257 if (parity+1 & 2 && c->buffers[i].lock[1]) continue;
258 alloc_buffer(c, &c->buffers[i]);
259 return pullup_lock_buffer(&c->buffers[i], parity);
262 return 0;
270 static void compute_metric(struct pullup_context *c,
271 struct pullup_field *fa, int pa,
272 struct pullup_field *fb, int pb,
273 int (*func)(unsigned char *, unsigned char *, int), int *dest)
275 unsigned char *a, *b;
276 int x, y;
277 int mp = c->metric_plane;
278 int xstep = c->bpp[mp];
279 int ystep = c->stride[mp]<<3;
280 int s = c->stride[mp]<<1; /* field stride */
281 int w = c->metric_w*xstep;
283 if (!fa->buffer || !fb->buffer) return;
285 /* Shortcut for duplicate fields (e.g. from RFF flag) */
286 if (fa->buffer == fb->buffer && pa == pb) {
287 memset(dest, 0, c->metric_len * sizeof(int));
288 return;
291 a = fa->buffer->planes[mp] + pa * c->stride[mp] + c->metric_offset;
292 b = fb->buffer->planes[mp] + pb * c->stride[mp] + c->metric_offset;
294 for (y = c->metric_h; y; y--) {
295 for (x = 0; x < w; x += xstep) {
296 *dest++ = func(a + x, b + x, s);
298 a += ystep; b += ystep;
306 static void alloc_metrics(struct pullup_context *c, struct pullup_field *f)
308 f->diffs = calloc(c->metric_len, sizeof(int));
309 f->comb = calloc(c->metric_len, sizeof(int));
310 /* add more metrics here as needed */
313 static struct pullup_field *make_field_queue(struct pullup_context *c, int len)
315 struct pullup_field *head, *f;
316 f = head = calloc(1, sizeof(struct pullup_field));
317 alloc_metrics(c, f);
318 for (; len > 0; len--) {
319 f->next = calloc(1, sizeof(struct pullup_field));
320 f->next->prev = f;
321 f = f->next;
322 alloc_metrics(c, f);
324 f->next = head;
325 head->prev = f;
326 return head;
329 static void check_field_queue(struct pullup_context *c)
331 if (c->head->next == c->first) {
332 struct pullup_field *f = calloc(1, sizeof(struct pullup_field));
333 alloc_metrics(c, f);
334 f->prev = c->head;
335 f->next = c->first;
336 c->head->next = f;
337 c->first->prev = f;
341 void pullup_submit_field(struct pullup_context *c, struct pullup_buffer *b, int parity)
343 struct pullup_field *f;
345 /* Grow the circular list if needed */
346 check_field_queue(c);
348 /* Cannot have two fields of same parity in a row; drop the new one */
349 if (c->last && c->last->parity == parity) return;
351 f = c->head;
352 f->parity = parity;
353 f->buffer = pullup_lock_buffer(b, parity);
354 f->flags = 0;
355 f->breaks = 0;
356 f->affinity = 0;
358 compute_metric(c, f, parity, f->prev->prev, parity, c->diff, f->diffs);
359 compute_metric(c, parity?f->prev:f, 0, parity?f:f->prev, 1, c->comb, f->comb);
361 /* Advance the circular list */
362 if (!c->first) c->first = c->head;
363 c->last = c->head;
364 c->head = c->head->next;
367 void pullup_flush_fields(struct pullup_context *c)
369 struct pullup_field *f;
371 for (f = c->first; f && f != c->head; f = f->next) {
372 pullup_release_buffer(f->buffer, f->parity);
373 f->buffer = 0;
375 c->first = c->last = 0;
385 #define F_HAVE_BREAKS 1
386 #define F_HAVE_AFFINITY 2
389 #define BREAK_LEFT 1
390 #define BREAK_RIGHT 2
395 static int queue_length(struct pullup_field *begin, struct pullup_field *end)
397 int count = 1;
398 struct pullup_field *f;
400 if (!begin || !end) return 0;
401 for (f = begin; f != end; f = f->next) count++;
402 return count;
405 static int find_first_break(struct pullup_field *f, int max)
407 int i;
408 for (i = 0; i < max; i++) {
409 if (f->breaks & BREAK_RIGHT || f->next->breaks & BREAK_LEFT)
410 return i+1;
411 f = f->next;
413 return 0;
416 static void compute_breaks(struct pullup_context *c, struct pullup_field *f0)
418 int i;
419 struct pullup_field *f1 = f0->next;
420 struct pullup_field *f2 = f1->next;
421 struct pullup_field *f3 = f2->next;
422 int l, max_l=0, max_r=0;
423 //struct pullup_field *ff;
424 //for (i=0, ff=c->first; ff != f0; i++, ff=ff->next);
426 if (f0->flags & F_HAVE_BREAKS) return;
427 //printf("\n%d: ", i);
428 f0->flags |= F_HAVE_BREAKS;
430 /* Special case when fields are 100% identical */
431 if (f0->buffer == f2->buffer && f1->buffer != f3->buffer) {
432 f2->breaks |= BREAK_RIGHT;
433 return;
435 if (f0->buffer != f2->buffer && f1->buffer == f3->buffer) {
436 f1->breaks |= BREAK_LEFT;
437 return;
440 for (i = 0; i < c->metric_len; i++) {
441 l = f2->diffs[i] - f3->diffs[i];
442 if (l > max_l) max_l = l;
443 if (-l > max_r) max_r = -l;
445 /* Don't get tripped up when differences are mostly quant error */
446 //printf("%d %d\n", max_l, max_r);
447 if (max_l + max_r < 128) return;
448 if (max_l > 4*max_r) f1->breaks |= BREAK_LEFT;
449 if (max_r > 4*max_l) f2->breaks |= BREAK_RIGHT;
452 static void compute_affinity(struct pullup_context *c, struct pullup_field *f)
454 int i;
455 int max_l=0, max_r=0, l;
456 if (f->flags & F_HAVE_AFFINITY) return;
457 f->flags |= F_HAVE_AFFINITY;
458 for (i = 0; i < c->metric_len; i++) {
459 l = f->comb[i] - f->next->comb[i];
460 if (l > max_l) max_l = l;
461 if (-l > max_r) max_r = -l;
463 if (max_l + max_r < 64) return;
464 if (max_r > 2*max_l) f->affinity = -1;
465 else if (max_l > 2*max_r) f->affinity = 1;
468 static void foo(struct pullup_context *c)
470 struct pullup_field *f = c->first;
471 int i, n = queue_length(f, c->last);
472 for (i = 0; i < n-1; i++) {
473 if (i < n-3) compute_breaks(c, f);
474 compute_affinity(c, f);
475 f = f->next;
479 static int decide_frame_length(struct pullup_context *c)
481 struct pullup_field *f0 = c->first;
482 struct pullup_field *f1 = f0->next;
483 struct pullup_field *f2 = f1->next;
484 struct pullup_field *f3 = f2->next;
485 int l;
487 if (queue_length(c->first, c->last) < 6) return 0;
488 foo(c);
490 if (f0->affinity == -1) return 1;
492 l = find_first_break(f0, 3);
493 if (l == 1 && c->strict_breaks < 0) l = 0;
495 switch (l) {
496 case 1:
497 if (c->strict_breaks > 0 && f0->affinity == 1 && f1->affinity == -1)
498 return 2;
499 else return 1;
500 case 2:
501 /* FIXME: strictly speaking, f0->prev is no longer valid... :) */
502 if (c->strict_pairs
503 && (f0->prev->breaks & BREAK_RIGHT) && (f2->breaks & BREAK_LEFT)
504 && (f0->affinity != 1 || f1->affinity != -1) )
505 return 1;
506 if (f1->affinity == 1) return 1;
507 else return 2;
508 case 3:
509 if (f2->affinity == 1) return 2;
510 else return 3;
511 default:
512 /* 9 possibilities covered before switch */
513 if (f1->affinity == 1) return 1; /* covers 6 */
514 else if (f1->affinity == -1) return 2; /* covers 6 */
515 else if (f2->affinity == -1) { /* covers 2 */
516 if (f0->affinity == 1) return 3;
517 else return 1;
519 else return 2; /* the remaining 6 */
524 static void print_aff_and_breaks(struct pullup_context *c, struct pullup_field *f)
526 int i;
527 int max_l, max_r, l;
528 struct pullup_field *f0 = f;
529 const char aff_l[] = "+..", aff_r[] = "..+";
530 printf("\naffinity: ");
531 for (i = 0; i < 6; i++) {
532 printf("%c%d%c", aff_l[1+f->affinity], i, aff_r[1+f->affinity]);
533 f = f->next;
535 f = f0;
536 printf("\nbreaks: ");
537 for (i=0; i<6; i++) {
538 printf("%c%d%c", f->breaks & BREAK_LEFT ? '|' : '.', i, f->breaks & BREAK_RIGHT ? '|' : '.');
539 f = f->next;
541 printf("\n");
548 struct pullup_frame *pullup_get_frame(struct pullup_context *c)
550 int i;
551 struct pullup_frame *fr = c->frame;
552 int n = decide_frame_length(c);
553 int aff = c->first->next->affinity;
555 if (!n) return 0;
556 if (fr->lock) return 0;
558 if (c->verbose) {
559 print_aff_and_breaks(c, c->first);
560 printf("duration: %d \n", n);
563 fr->lock++;
564 fr->length = n;
565 fr->parity = c->first->parity;
566 fr->buffer = 0;
567 for (i = 0; i < n; i++) {
568 /* We cheat and steal the buffer without release+relock */
569 fr->ifields[i] = c->first->buffer;
570 c->first->buffer = 0;
571 c->first = c->first->next;
574 if (n == 1) {
575 fr->ofields[fr->parity] = fr->ifields[0];
576 fr->ofields[fr->parity^1] = 0;
577 } else if (n == 2) {
578 fr->ofields[fr->parity] = fr->ifields[0];
579 fr->ofields[fr->parity^1] = fr->ifields[1];
580 } else if (n == 3) {
581 if (aff == 0)
582 aff = (fr->ifields[0] == fr->ifields[1]) ? -1 : 1;
583 /* else if (c->verbose) printf("forced aff: %d \n", aff); */
584 fr->ofields[fr->parity] = fr->ifields[1+aff];
585 fr->ofields[fr->parity^1] = fr->ifields[1];
587 pullup_lock_buffer(fr->ofields[0], 0);
588 pullup_lock_buffer(fr->ofields[1], 1);
590 if (fr->ofields[0] == fr->ofields[1]) {
591 fr->buffer = fr->ofields[0];
592 pullup_lock_buffer(fr->buffer, 2);
593 return fr;
595 return fr;
598 static void copy_field(struct pullup_context *c, struct pullup_buffer *dest,
599 struct pullup_buffer *src, int parity)
601 int i, j;
602 unsigned char *d, *s;
603 for (i = 0; i < c->nplanes; i++) {
604 s = src->planes[i] + parity*c->stride[i];
605 d = dest->planes[i] + parity*c->stride[i];
606 for (j = c->h[i]>>1; j; j--) {
607 memcpy(d, s, c->stride[i]);
608 s += c->stride[i]<<1;
609 d += c->stride[i]<<1;
614 void pullup_pack_frame(struct pullup_context *c, struct pullup_frame *fr)
616 int i;
617 int par = fr->parity;
618 if (fr->buffer) return;
619 if (fr->length < 2) return; /* FIXME: deal with this */
620 for (i = 0; i < 2; i++)
622 if (fr->ofields[i]->lock[i^1]) continue;
623 fr->buffer = fr->ofields[i];
624 pullup_lock_buffer(fr->buffer, 2);
625 copy_field(c, fr->buffer, fr->ofields[i^1], i^1);
626 return;
628 fr->buffer = pullup_get_buffer(c, 2);
629 copy_field(c, fr->buffer, fr->ofields[0], 0);
630 copy_field(c, fr->buffer, fr->ofields[1], 1);
633 void pullup_release_frame(struct pullup_frame *fr)
635 int i;
636 for (i = 0; i < fr->length; i++)
637 pullup_release_buffer(fr->ifields[i], fr->parity ^ (i&1));
638 pullup_release_buffer(fr->ofields[0], 0);
639 pullup_release_buffer(fr->ofields[1], 1);
640 if (fr->buffer) pullup_release_buffer(fr->buffer, 2);
641 fr->lock--;
649 struct pullup_context *pullup_alloc_context()
651 struct pullup_context *c;
653 c = calloc(1, sizeof(struct pullup_context));
655 return c;
658 void pullup_preinit_context(struct pullup_context *c)
660 c->bpp = calloc(c->nplanes, sizeof(int));
661 c->w = calloc(c->nplanes, sizeof(int));
662 c->h = calloc(c->nplanes, sizeof(int));
663 c->stride = calloc(c->nplanes, sizeof(int));
664 c->background = calloc(c->nplanes, sizeof(int));
667 void pullup_init_context(struct pullup_context *c)
669 int mp = c->metric_plane;
670 if (c->nbuffers < 10) c->nbuffers = 10;
671 c->buffers = calloc(c->nbuffers, sizeof (struct pullup_buffer));
673 c->metric_w = (c->w[mp] - (c->junk_left + c->junk_right << 3)) >> 3;
674 c->metric_h = (c->h[mp] - (c->junk_top + c->junk_bottom << 1)) >> 3;
675 c->metric_offset = c->junk_left*c->bpp[mp] + (c->junk_top<<1)*c->stride[mp];
676 c->metric_len = c->metric_w * c->metric_h;
678 c->head = make_field_queue(c, 8);
680 c->frame = calloc(1, sizeof (struct pullup_frame));
681 c->frame->ifields = calloc(3, sizeof (struct pullup_buffer *));
683 switch(c->format) {
684 case PULLUP_FMT_Y:
685 c->diff = diff_y;
686 c->comb = licomb_y;
687 #ifdef ARCH_X86
688 #ifdef HAVE_MMX
689 if (c->cpu & PULLUP_CPU_MMX) {
690 c->diff = diff_y_mmx;
691 c->comb = licomb_y_mmx;
693 #endif
694 #endif
695 /* c->comb = qpcomb_y; */
696 break;
697 #if 0
698 case PULLUP_FMT_YUY2:
699 c->diff = diff_yuy2;
700 break;
701 case PULLUP_FMT_RGB32:
702 c->diff = diff_rgb32;
703 break;
704 #endif
708 void pullup_free_context(struct pullup_context *c)
710 struct pullup_field *f;
711 free(c->buffers);
712 f = c->head;
713 do {
714 free(f->diffs);
715 free(f->comb);
716 f = f->next;
717 free(f->prev);
718 } while (f != c->head);
719 free(c->frame);
720 free(c);