r864: Merge 2.1:
[cinelerra_cv/ct.git] / mpeg2enc / predict.c
bloba1b10417d7d52e41044a311e80c26d5a8910218b
1 /* predict.c, motion compensated prediction */
3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
5 /*
6 * Disclaimer of Warranty
8 * These software programs are available to the user without any license fee or
9 * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
10 * any and all warranties, whether express, implied, or statuary, including any
11 * implied warranties or merchantability or of fitness for a particular
12 * purpose. In no event shall the copyright-holder be liable for any
13 * incidental, punitive, or consequential damages of any kind whatsoever
14 * arising from the use of these programs.
16 * This disclaimer of warranty extends to the user of these programs and user's
17 * customers, employees, agents, transferees, successors, and assigns.
19 * The MPEG Software Simulation Group does not represent or warrant that the
20 * programs furnished hereunder are free of infringement of any third-party
21 * patents.
23 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
24 * are subject to royalty fees to patent holders. Many of these patents are
25 * general enough such that they are unavoidable regardless of implementation
26 * design.
30 #include "config.h"
31 #include <stdio.h>
32 #include "global.h"
33 #include "cpu_accel.h"
34 #include "simd.h"
37 /* private prototypes */
38 static void predict_mb (
39 pict_data_s *picture,
40 uint8_t *oldref[], uint8_t *newref[], uint8_t *cur[],
41 int lx, int bx, int by, mbinfo_s *mbi, int secondfield);
43 static void pred (
44 pict_data_s *picture,
45 uint8_t *src[], int sfield,
46 uint8_t *dst[], int dfield,
47 int lx, int w, int h, int x, int y, int dx, int dy, int addflag);
49 static void pred_comp (
50 pict_data_s *picture,
51 uint8_t *src, uint8_t *dst,
52 int lx, int w, int h, int x, int y, int dx, int dy, int addflag);
53 #ifdef X86_CPU
54 static void pred_comp_mmxe(
55 pict_data_s *picture,
56 uint8_t *src, uint8_t *dst,
57 int lx, int w, int h, int x, int y, int dx, int dy, int addflag);
58 static void pred_comp_mmx(
59 pict_data_s *picture,
60 uint8_t *src, uint8_t *dst,
61 int lx, int w, int h, int x, int y, int dx, int dy, int addflag);
62 #endif
63 static void calc_DMV
64 ( pict_data_s *picture,int DMV[][2],
65 int *dmvector, int mvx, int mvy);
67 static void clearblock (pict_data_s *picture,
68 uint8_t *cur[], int i0, int j0);
71 Initialise prediction - currently purely selection of which
72 versions of the various low level computation routines to use
76 static void (*ppred_comp)(
77 pict_data_s *picture,
78 uint8_t *src, uint8_t *dst,
79 int lx, int w, int h, int x, int y, int dx, int dy, int addflag);
81 void init_predict_hv()
83 int cpucap = cpu_accel();
85 if( cpucap == 0 ) /* No MMX/SSE etc support available */
87 ppred_comp = pred_comp;
90 #ifdef X86_CPU
91 else if(cpucap & ACCEL_X86_MMXEXT ) /* AMD MMX or SSE... */
93 if(verbose) fprintf( stderr, "SETTING EXTENDED MMX for PREDICTION!\n");
94 ppred_comp = pred_comp_mmxe;
96 else if(cpucap & ACCEL_X86_MMX ) /* Original MMX... */
98 if(verbose) fprintf( stderr, "SETTING MMX for PREDICTION!\n");
99 ppred_comp = pred_comp_mmx;
101 #endif
102 else
104 ppred_comp = pred_comp;
108 /* form prediction for a complete picture (frontend for predict_mb)
110 * reff: reference frame for forward prediction
111 * refb: reference frame for backward prediction
112 * cur: destination (current) frame
113 * secondfield: predict second field of a frame
114 * mbi: macroblock info
116 * Notes:
117 * - cf. predict_mb
120 void predict(pict_data_s *picture,
121 uint8_t *reff[],
122 uint8_t *refb[],
123 uint8_t *cur[3],
124 int secondfield)
126 int i, j, k;
127 mbinfo_s *mbi = picture->mbinfo;
128 k = 0;
130 /* loop through all macroblocks of the picture */
131 for (j=0; j<height2; j+=16)
132 for (i=0; i<width; i+=16)
134 predict_mb(picture,reff,refb,cur,width,i,j,
135 &mbi[k], secondfield );
136 k++;
140 /* form prediction for one macroblock
142 * oldref: reference frame for forward prediction
143 * newref: reference frame for backward prediction
144 * cur: destination (current) frame
145 * lx: frame width (identical to global var `width')
146 * bx,by: picture (field or frame) coordinates of macroblock to be predicted
147 * pict_type: I, P or B
148 * pict_struct: FRAME_PICTURE, TOP_FIELD, BOTTOM_FIELD
149 * mb_type: MB_FORWARD, MB_BACKWARD, MB_INTRA
150 * motion_type: MC_FRAME, MC_FIELD, MC_16X8, MC_DMV
151 * secondfield: predict second field of a frame
152 * PMV[2][2][2]: motion vectors (in half pel picture coordinates)
153 * mv_field_sel[2][2]: motion vertical field selects (for field predictions)
154 * dmvector: differential motion vectors (for dual prime)
156 * Notes:
157 * - when predicting a P type picture which is the second field of
158 * a frame, the same parity reference field is in oldref, while the
159 * opposite parity reference field is assumed to be in newref!
160 * - intra macroblocks are modelled to have a constant prediction of 128
161 * for all pels; this results in a DC DCT coefficient symmetric to 0
162 * - vectors for field prediction in frame pictures are in half pel frame
163 * coordinates (vertical component is twice the field value and always
164 * even)
166 * already covers dual prime (not yet used)
169 static void predict_mb (
170 pict_data_s *picture,
171 uint8_t *oldref[], uint8_t *newref[], uint8_t *cur[],
172 int lx, int bx, int by, mbinfo_s *mbi, int secondfield
175 int addflag, currentfield;
176 uint8_t **predframe;
177 int DMV[2][2];
179 if (mbi->mb_type&MB_INTRA)
181 clearblock(picture,cur,bx,by);
182 return;
185 addflag = 0; /* first prediction is stored, second is added and averaged */
187 if ((mbi->mb_type & MB_FORWARD) || (picture->pict_type==P_TYPE))
189 /* forward prediction, including zero MV in P pictures */
191 if (picture->pict_struct==FRAME_PICTURE)
193 /* frame picture */
195 if ((mbi->motion_type==MC_FRAME) || !(mbi->mb_type & MB_FORWARD))
197 /* frame-based prediction in frame picture */
198 pred(picture,
199 oldref,0,cur,0,
200 lx,16,16,bx,by,mbi->MV[0][0][0],mbi->MV[0][0][1],0);
202 else if (mbi->motion_type==MC_FIELD)
204 /* field-based prediction in frame picture
206 * note scaling of the vertical coordinates (by, mbi->MV[][0][1])
207 * from frame to field!
210 /* top field prediction */
211 pred(picture,oldref,mbi->mv_field_sel[0][0],cur,0,
212 lx<<1,16,8,bx,by>>1,mbi->MV[0][0][0],mbi->MV[0][0][1]>>1,0);
214 /* bottom field prediction */
215 pred(picture,oldref,mbi->mv_field_sel[1][0],cur,1,
216 lx<<1,16,8,bx,by>>1,mbi->MV[1][0][0],mbi->MV[1][0][1]>>1,0);
218 else if (mbi->motion_type==MC_DMV)
220 /* dual prime prediction */
222 /* calculate derived motion vectors */
223 calc_DMV(picture,DMV,mbi->dmvector,mbi->MV[0][0][0],mbi->MV[0][0][1]>>1);
225 /* predict top field from top field */
226 pred(picture,oldref,0,cur,0,
227 lx<<1,16,8,bx,by>>1,mbi->MV[0][0][0],mbi->MV[0][0][1]>>1,0);
229 /* predict bottom field from bottom field */
230 pred(picture,oldref,1,cur,1,
231 lx<<1,16,8,bx,by>>1,mbi->MV[0][0][0],mbi->MV[0][0][1]>>1,0);
233 /* predict and add to top field from bottom field */
234 pred(picture,oldref,1,cur,0,
235 lx<<1,16,8,bx,by>>1,DMV[0][0],DMV[0][1],1);
237 /* predict and add to bottom field from top field */
238 pred(picture,oldref,0,cur,1,
239 lx<<1,16,8,bx,by>>1,DMV[1][0],DMV[1][1],1);
241 else
243 /* invalid mbi->motion_type in frame picture */
244 fprintf(stderr,"invalid motion_type\n");
247 else /* TOP_FIELD or BOTTOM_FIELD */
249 /* field picture */
251 currentfield = (picture->pict_struct==BOTTOM_FIELD);
253 /* determine which frame to use for prediction */
254 if ((picture->pict_type==P_TYPE) && secondfield
255 && (currentfield!=mbi->mv_field_sel[0][0]))
256 predframe = newref; /* same frame */
257 else
258 predframe = oldref; /* previous frame */
260 if ((mbi->motion_type==MC_FIELD) || !(mbi->mb_type & MB_FORWARD))
262 /* field-based prediction in field picture */
263 pred(picture,predframe,mbi->mv_field_sel[0][0],cur,currentfield,
264 lx<<1,16,16,bx,by,mbi->MV[0][0][0],mbi->MV[0][0][1],0);
266 else if (mbi->motion_type==MC_16X8)
268 /* 16 x 8 motion compensation in field picture */
270 /* upper half */
271 pred(picture,predframe,mbi->mv_field_sel[0][0],cur,currentfield,
272 lx<<1,16,8,bx,by,mbi->MV[0][0][0],mbi->MV[0][0][1],0);
274 /* determine which frame to use for lower half prediction */
275 if ((picture->pict_type==P_TYPE) && secondfield
276 && (currentfield!=mbi->mv_field_sel[1][0]))
277 predframe = newref; /* same frame */
278 else
279 predframe = oldref; /* previous frame */
281 /* lower half */
282 pred(picture,predframe,mbi->mv_field_sel[1][0],cur,currentfield,
283 lx<<1,16,8,bx,by+8,mbi->MV[1][0][0],mbi->MV[1][0][1],0);
285 else if (mbi->motion_type==MC_DMV)
287 /* dual prime prediction */
289 /* determine which frame to use for prediction */
290 if (secondfield)
291 predframe = newref; /* same frame */
292 else
293 predframe = oldref; /* previous frame */
295 /* calculate derived motion vectors */
296 calc_DMV(picture,DMV,mbi->dmvector,mbi->MV[0][0][0],mbi->MV[0][0][1]);
298 /* predict from field of same parity */
299 pred(picture,oldref,currentfield,cur,currentfield,
300 lx<<1,16,16,bx,by,mbi->MV[0][0][0],mbi->MV[0][0][1],0);
302 /* predict from field of opposite parity */
303 pred(picture,predframe,!currentfield,cur,currentfield,
304 lx<<1,16,16,bx,by,DMV[0][0],DMV[0][1],1);
306 else
308 /* invalid motion_type in field picture */
309 fprintf(stderr,"invalid motion_type\n");
312 addflag = 1; /* next prediction (if any) will be averaged with this one */
315 if (mbi->mb_type & MB_BACKWARD)
317 /* backward prediction */
319 if (picture->pict_struct==FRAME_PICTURE)
321 /* frame picture */
323 if (mbi->motion_type==MC_FRAME)
325 /* frame-based prediction in frame picture */
326 pred(picture,newref,0,cur,0,
327 lx,16,16,bx,by,mbi->MV[0][1][0],mbi->MV[0][1][1],addflag);
329 else
331 /* field-based prediction in frame picture
333 * note scaling of the vertical coordinates (by, mbi->MV[][1][1])
334 * from frame to field!
337 /* top field prediction */
338 pred(picture,newref,mbi->mv_field_sel[0][1],cur,0,
339 lx<<1,16,8,bx,by>>1,mbi->MV[0][1][0],mbi->MV[0][1][1]>>1,addflag);
341 /* bottom field prediction */
342 pred(picture,newref,mbi->mv_field_sel[1][1],cur,1,
343 lx<<1,16,8,bx,by>>1,mbi->MV[1][1][0],mbi->MV[1][1][1]>>1,addflag);
346 else /* TOP_FIELD or BOTTOM_FIELD */
348 /* field picture */
350 currentfield = (picture->pict_struct==BOTTOM_FIELD);
352 if (mbi->motion_type==MC_FIELD)
354 /* field-based prediction in field picture */
355 pred(picture,newref,mbi->mv_field_sel[0][1],cur,currentfield,
356 lx<<1,16,16,bx,by,mbi->MV[0][1][0],mbi->MV[0][1][1],addflag);
358 else if (mbi->motion_type==MC_16X8)
360 /* 16 x 8 motion compensation in field picture */
362 /* upper half */
363 pred(picture,newref,mbi->mv_field_sel[0][1],cur,currentfield,
364 lx<<1,16,8,bx,by,mbi->MV[0][1][0],mbi->MV[0][1][1],addflag);
366 /* lower half */
367 pred(picture,newref,mbi->mv_field_sel[1][1],cur,currentfield,
368 lx<<1,16,8,bx,by+8,mbi->MV[1][1][0],mbi->MV[1][1][1],addflag);
370 else
372 /* invalid motion_type in field picture */
373 fprintf(stderr,"invalid motion_type\n");
379 /* predict a rectangular block (all three components)
381 * src: source frame (Y,U,V)
382 * sfield: source field select (0: frame or top field, 1: bottom field)
383 * dst: destination frame (Y,U,V)
384 * dfield: destination field select (0: frame or top field, 1: bottom field)
386 * the following values are in luminance picture (frame or field) dimensions
387 * lx: distance of vertically adjacent pels (selects frame or field pred.)
388 * w,h: width and height of block (only 16x16 or 16x8 are used)
389 * x,y: coordinates of destination block
390 * dx,dy: half pel motion vector
391 * addflag: store or add (= average) prediction
394 static void pred (
395 pict_data_s *picture,
396 uint8_t *src[], int sfield,
397 uint8_t *dst[], int dfield,
398 int lx, int w, int h, int x, int y, int dx, int dy, int addflag
401 int cc;
403 for (cc=0; cc<3; cc++)
405 if (cc==1)
407 /* scale for color components */
408 if (chroma_format==CHROMA420)
410 /* vertical */
411 h >>= 1; y >>= 1; dy /= 2;
413 if (chroma_format!=CHROMA444)
415 /* horizontal */
416 w >>= 1; x >>= 1; dx /= 2;
417 lx >>= 1;
420 (*ppred_comp)( picture,
421 src[cc]+(sfield?lx>>1:0),dst[cc]+(dfield?lx>>1:0),
422 lx,w,h,x,y,dx,dy,addflag);
426 /* low level prediction routine
428 * src: prediction source
429 * dst: prediction destination
430 * lx: line width (for both src and dst)
431 * x,y: destination coordinates
432 * dx,dy: half pel motion vector
433 * w,h: size of prediction block
434 * addflag: store or add prediction
436 * There are also SIMD versions of this routine...
439 static void pred_comp(
440 pict_data_s *picture,
441 uint8_t *src,
442 uint8_t *dst,
443 int lx,
444 int w, int h,
445 int x, int y,
446 int dx, int dy,
447 int addflag)
449 int xint, xh, yint, yh;
450 int i, j;
451 uint8_t *s, *d;
453 /* half pel scaling */
454 xint = dx>>1; /* integer part */
455 xh = dx & 1; /* half pel flag */
456 yint = dy>>1;
457 yh = dy & 1;
459 /* origins */
460 s = src + lx*(y+yint) + (x+xint); /* motion vector */
461 d = dst + lx*y + x;
463 if (!xh && !yh)
464 if (addflag)
465 for (j=0; j<h; j++)
467 for (i=0; i<w; i++)
468 d[i] = (unsigned int)(d[i]+s[i]+1)>>1;
469 s+= lx;
470 d+= lx;
472 else
473 for (j=0; j<h; j++)
475 for (i=0; i<w; i++)
476 d[i] = s[i];
477 s+= lx;
478 d+= lx;
480 else if (!xh && yh)
481 if (addflag)
482 for (j=0; j<h; j++)
484 for (i=0; i<w; i++)
485 d[i] = (d[i] + ((unsigned int)(s[i]+s[i+lx]+1)>>1)+1)>>1;
486 s+= lx;
487 d+= lx;
489 else
490 for (j=0; j<h; j++)
492 for (i=0; i<w; i++)
493 d[i] = (unsigned int)(s[i]+s[i+lx]+1)>>1;
494 s+= lx;
495 d+= lx;
497 else if (xh && !yh)
498 if (addflag)
499 for (j=0; j<h; j++)
501 for (i=0; i<w; i++)
502 d[i] = (d[i] + ((unsigned int)(s[i]+s[i+1]+1)>>1)+1)>>1;
503 s+= lx;
504 d+= lx;
506 else
507 for (j=0; j<h; j++)
509 for (i=0; i<w; i++)
510 d[i] = (unsigned int)(s[i]+s[i+1]+1)>>1;
511 s+= lx;
512 d+= lx;
514 else /* if (xh && yh) */
515 if (addflag)
516 for (j=0; j<h; j++)
518 for (i=0; i<w; i++)
519 d[i] = (d[i] + ((unsigned int)(s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2)+1)>>1;
520 s+= lx;
521 d+= lx;
523 else
524 for (j=0; j<h; j++)
526 for (i=0; i<w; i++)
527 d[i] = (unsigned int)(s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2;
528 s+= lx;
529 d+= lx;
533 #ifdef X86_CPU
534 static void pred_comp_mmxe(
535 pict_data_s *picture,
536 uint8_t *src,
537 uint8_t *dst,
538 int lx,
539 int w, int h,
540 int x, int y,
541 int dx, int dy,
542 int addflag)
544 int xint, xh, yint, yh;
545 uint8_t *s, *d;
547 /* half pel scaling */
548 xint = dx>>1; /* integer part */
549 xh = dx & 1; /* half pel flag */
550 yint = dy>>1;
551 yh = dy & 1;
553 /* origins */
554 s = src + lx*(y+yint) + (x+xint); /* motion vector */
555 d = dst + lx*y + x;
557 if( xh )
559 if( yh )
560 predcomp_11_mmxe(s,d,lx,w,h,addflag);
561 else /* !yh */
562 predcomp_10_mmxe(s,d,lx,w,h,addflag);
564 else /* !xh */
566 if( yh )
567 predcomp_01_mmxe(s,d,lx,w,h,addflag);
568 else /* !yh */
569 predcomp_00_mmxe(s,d,lx,w,h,addflag);
574 static void pred_comp_mmx(
575 pict_data_s *picture,
576 uint8_t *src,
577 uint8_t *dst,
578 int lx,
579 int w, int h,
580 int x, int y,
581 int dx, int dy,
582 int addflag)
584 int xint, xh, yint, yh;
585 uint8_t *s, *d;
587 /* half pel scaling */
588 xint = dx>>1; /* integer part */
589 xh = dx & 1; /* half pel flag */
590 yint = dy>>1;
591 yh = dy & 1;
593 /* origins */
594 s = src + lx*(y+yint) + (x+xint); /* motion vector */
595 d = dst + lx*y + x;
597 if( xh )
599 if( yh )
600 predcomp_11_mmx(s,d,lx,w,h,addflag);
601 else /* !yh */
602 predcomp_10_mmx(s,d,lx,w,h,addflag);
604 else /* !xh */
606 if( yh )
607 predcomp_01_mmx(s,d,lx,w,h,addflag);
608 else /* !yh */
609 predcomp_00_mmx(s,d,lx,w,h,addflag);
613 #endif
615 /* calculate derived motion vectors (DMV) for dual prime prediction
616 * dmvector[2]: differential motion vectors (-1,0,+1)
617 * mvx,mvy: motion vector (for same parity)
619 * DMV[2][2]: derived motion vectors (for opposite parity)
621 * uses global variables pict_struct and topfirst
623 * Notes:
624 * - all vectors are in field coordinates (even for frame pictures)
627 static void calc_DMV(
628 pict_data_s *picture,int DMV[][2],
629 int *dmvector, int mvx, int mvy
632 if (picture->pict_struct==FRAME_PICTURE)
634 if (picture->topfirst)
636 /* vector for prediction of top field from bottom field */
637 DMV[0][0] = ((mvx +(mvx>0))>>1) + dmvector[0];
638 DMV[0][1] = ((mvy +(mvy>0))>>1) + dmvector[1] - 1;
640 /* vector for prediction of bottom field from top field */
641 DMV[1][0] = ((3*mvx+(mvx>0))>>1) + dmvector[0];
642 DMV[1][1] = ((3*mvy+(mvy>0))>>1) + dmvector[1] + 1;
644 else
646 /* vector for prediction of top field from bottom field */
647 DMV[0][0] = ((3*mvx+(mvx>0))>>1) + dmvector[0];
648 DMV[0][1] = ((3*mvy+(mvy>0))>>1) + dmvector[1] - 1;
650 /* vector for prediction of bottom field from top field */
651 DMV[1][0] = ((mvx +(mvx>0))>>1) + dmvector[0];
652 DMV[1][1] = ((mvy +(mvy>0))>>1) + dmvector[1] + 1;
655 else
657 /* vector for prediction from field of opposite 'parity' */
658 DMV[0][0] = ((mvx+(mvx>0))>>1) + dmvector[0];
659 DMV[0][1] = ((mvy+(mvy>0))>>1) + dmvector[1];
661 /* correct for vertical field shift */
662 if (picture->pict_struct==TOP_FIELD)
663 DMV[0][1]--;
664 else
665 DMV[0][1]++;
669 static void clearblock(
670 pict_data_s *picture,
671 uint8_t *cur[], int i0, int j0
674 int i, j, w, h;
675 uint8_t *p;
677 p = cur[0] + ((picture->pict_struct==BOTTOM_FIELD) ? width : 0) + i0 + width2*j0;
679 for (j=0; j<16; j++)
681 for (i=0; i<16; i++)
682 p[i] = 128;
683 p+= width2;
686 w = h = 16;
688 if (chroma_format!=CHROMA444)
690 i0>>=1; w>>=1;
693 if (chroma_format==CHROMA420)
695 j0>>=1; h>>=1;
698 p = cur[1] + ((picture->pict_struct==BOTTOM_FIELD) ? chrom_width : 0) + i0
699 + chrom_width2*j0;
701 for (j=0; j<h; j++)
703 for (i=0; i<w; i++)
704 p[i] = 128;
705 p+= chrom_width2;
708 p = cur[2] + ((picture->pict_struct==BOTTOM_FIELD) ? chrom_width : 0) + i0
709 + chrom_width2*j0;
711 for (j=0; j<h; j++)
713 for (i=0; i<w; i++)
714 p[i] = 128;
715 p+= chrom_width2;