1 /*****************************************************************************
2 * predict.c: h264 encoder
3 *****************************************************************************
4 * Copyright (C) 2003-2008 x264 project
6 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 * Loren Merritt <lorenm@u.washington.edu>
8 * Jason Garrett-Glaser <darkshikari@gmail.com>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
23 *****************************************************************************/
25 /* predict4x4 are inspired from ffmpeg h264 decoder */
31 # include "x86/predict.h"
34 # include "ppc/predict.h"
37 /****************************************************************************
38 * 16x16 prediction for intra luma block
39 ****************************************************************************/
41 #define PREDICT_16x16_DC(v) \
42 for( i = 0; i < 16; i++ )\
44 uint32_t *p = (uint32_t*)src;\
52 static void predict_16x16_dc( uint8_t *src
)
57 for( i
= 0; i
< 16; i
++ )
59 dc
+= src
[-1 + i
* FDEC_STRIDE
];
60 dc
+= src
[i
- FDEC_STRIDE
];
62 dc
= (( dc
+ 16 ) >> 5) * 0x01010101;
66 static void predict_16x16_dc_left( uint8_t *src
)
71 for( i
= 0; i
< 16; i
++ )
73 dc
+= src
[-1 + i
* FDEC_STRIDE
];
75 dc
= (( dc
+ 8 ) >> 4) * 0x01010101;
79 static void predict_16x16_dc_top( uint8_t *src
)
84 for( i
= 0; i
< 16; i
++ )
86 dc
+= src
[i
- FDEC_STRIDE
];
88 dc
= (( dc
+ 8 ) >> 4) * 0x01010101;
92 static void predict_16x16_dc_128( uint8_t *src
)
95 PREDICT_16x16_DC(0x80808080);
97 static void predict_16x16_h( uint8_t *src
)
101 for( i
= 0; i
< 16; i
++ )
103 const uint32_t v
= 0x01010101 * src
[-1];
104 uint32_t *p
= (uint32_t*)src
;
115 static void predict_16x16_v( uint8_t *src
)
117 uint32_t v0
= *(uint32_t*)&src
[ 0-FDEC_STRIDE
];
118 uint32_t v1
= *(uint32_t*)&src
[ 4-FDEC_STRIDE
];
119 uint32_t v2
= *(uint32_t*)&src
[ 8-FDEC_STRIDE
];
120 uint32_t v3
= *(uint32_t*)&src
[12-FDEC_STRIDE
];
123 for( i
= 0; i
< 16; i
++ )
125 uint32_t *p
= (uint32_t*)src
;
133 static void predict_16x16_p( uint8_t *src
)
141 /* calculate H and V */
142 for( i
= 0; i
<= 7; i
++ )
144 H
+= ( i
+ 1 ) * ( src
[ 8 + i
- FDEC_STRIDE
] - src
[6 -i
-FDEC_STRIDE
] );
145 V
+= ( i
+ 1 ) * ( src
[-1 + (8+i
)*FDEC_STRIDE
] - src
[-1 + (6-i
)*FDEC_STRIDE
] );
148 a
= 16 * ( src
[-1 + 15*FDEC_STRIDE
] + src
[15 - FDEC_STRIDE
] );
149 b
= ( 5 * H
+ 32 ) >> 6;
150 c
= ( 5 * V
+ 32 ) >> 6;
152 i00
= a
- b
* 7 - c
* 7 + 16;
154 for( y
= 0; y
< 16; y
++ )
157 for( x
= 0; x
< 16; x
++ )
159 src
[x
] = x264_clip_uint8( pix
>>5 );
168 /****************************************************************************
169 * 8x8 prediction for intra chroma block
170 ****************************************************************************/
172 static void predict_8x8c_dc_128( uint8_t *src
)
176 for( y
= 0; y
< 8; y
++ )
178 uint32_t *p
= (uint32_t*)src
;
184 static void predict_8x8c_dc_left( uint8_t *src
)
187 uint32_t dc0
= 0, dc1
= 0;
189 for( y
= 0; y
< 4; y
++ )
191 dc0
+= src
[y
* FDEC_STRIDE
- 1];
192 dc1
+= src
[(y
+4) * FDEC_STRIDE
- 1];
194 dc0
= (( dc0
+ 2 ) >> 2)*0x01010101;
195 dc1
= (( dc1
+ 2 ) >> 2)*0x01010101;
197 for( y
= 0; y
< 4; y
++ )
199 uint32_t *p
= (uint32_t*)src
;
204 for( y
= 0; y
< 4; y
++ )
206 uint32_t *p
= (uint32_t*)src
;
213 static void predict_8x8c_dc_top( uint8_t *src
)
216 uint32_t dc0
= 0, dc1
= 0;
218 for( x
= 0; x
< 4; x
++ )
220 dc0
+= src
[x
- FDEC_STRIDE
];
221 dc1
+= src
[x
+ 4 - FDEC_STRIDE
];
223 dc0
= (( dc0
+ 2 ) >> 2)*0x01010101;
224 dc1
= (( dc1
+ 2 ) >> 2)*0x01010101;
226 for( y
= 0; y
< 8; y
++ )
228 uint32_t *p
= (uint32_t*)src
;
234 static void predict_8x8c_dc( uint8_t *src
)
237 int s0
= 0, s1
= 0, s2
= 0, s3
= 0;
238 uint32_t dc0
, dc1
, dc2
, dc3
;
246 for( i
= 0; i
< 4; i
++ )
248 s0
+= src
[i
- FDEC_STRIDE
];
249 s1
+= src
[i
+ 4 - FDEC_STRIDE
];
250 s2
+= src
[-1 + i
* FDEC_STRIDE
];
251 s3
+= src
[-1 + (i
+4)*FDEC_STRIDE
];
257 dc0
= (( s0
+ s2
+ 4 ) >> 3)*0x01010101;
258 dc1
= (( s1
+ 2 ) >> 2)*0x01010101;
259 dc2
= (( s3
+ 2 ) >> 2)*0x01010101;
260 dc3
= (( s1
+ s3
+ 4 ) >> 3)*0x01010101;
262 for( y
= 0; y
< 4; y
++ )
264 uint32_t *p
= (uint32_t*)src
;
270 for( y
= 0; y
< 4; y
++ )
272 uint32_t *p
= (uint32_t*)src
;
278 static void predict_8x8c_h( uint8_t *src
)
282 for( i
= 0; i
< 8; i
++ )
284 uint32_t v
= 0x01010101 * src
[-1];
285 uint32_t *p
= (uint32_t*)src
;
291 static void predict_8x8c_v( uint8_t *src
)
293 uint32_t v0
= *(uint32_t*)&src
[0-FDEC_STRIDE
];
294 uint32_t v1
= *(uint32_t*)&src
[4-FDEC_STRIDE
];
297 for( i
= 0; i
< 8; i
++ )
299 uint32_t *p
= (uint32_t*)src
;
305 static void predict_8x8c_p( uint8_t *src
)
314 for( i
= 0; i
< 4; i
++ )
316 H
+= ( i
+ 1 ) * ( src
[4+i
- FDEC_STRIDE
] - src
[2 - i
-FDEC_STRIDE
] );
317 V
+= ( i
+ 1 ) * ( src
[-1 +(i
+4)*FDEC_STRIDE
] - src
[-1+(2-i
)*FDEC_STRIDE
] );
320 a
= 16 * ( src
[-1+7*FDEC_STRIDE
] + src
[7 - FDEC_STRIDE
] );
321 b
= ( 17 * H
+ 16 ) >> 5;
322 c
= ( 17 * V
+ 16 ) >> 5;
323 i00
= a
-3*b
-3*c
+ 16;
325 for( y
= 0; y
< 8; y
++ )
328 for( x
= 0; x
< 8; x
++ )
330 src
[x
] = x264_clip_uint8( pix
>>5 );
338 /****************************************************************************
339 * 4x4 prediction for intra luma block
340 ****************************************************************************/
342 #define SRC(x,y) src[(x)+(y)*FDEC_STRIDE]
343 #define SRC32(x,y) *(uint32_t*)&SRC(x,y)
345 #define PREDICT_4x4_DC(v)\
346 SRC32(0,0) = SRC32(0,1) = SRC32(0,2) = SRC32(0,3) = v;
348 static void predict_4x4_dc_128( uint8_t *src
)
350 PREDICT_4x4_DC(0x80808080);
352 static void predict_4x4_dc_left( uint8_t *src
)
354 uint32_t dc
= ((SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) + 2) >> 2) * 0x01010101;
357 static void predict_4x4_dc_top( uint8_t *src
)
359 uint32_t dc
= ((SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 2) >> 2) * 0x01010101;
362 static void predict_4x4_dc( uint8_t *src
)
364 uint32_t dc
= ((SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) +
365 SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 4) >> 3) * 0x01010101;
368 static void predict_4x4_h( uint8_t *src
)
370 SRC32(0,0) = SRC(-1,0) * 0x01010101;
371 SRC32(0,1) = SRC(-1,1) * 0x01010101;
372 SRC32(0,2) = SRC(-1,2) * 0x01010101;
373 SRC32(0,3) = SRC(-1,3) * 0x01010101;
375 static void predict_4x4_v( uint8_t *src
)
377 PREDICT_4x4_DC(SRC32(0,-1));
380 #define PREDICT_4x4_LOAD_LEFT\
381 const int l0 = SRC(-1,0);\
382 const int l1 = SRC(-1,1);\
383 const int l2 = SRC(-1,2);\
384 UNUSED const int l3 = SRC(-1,3);
386 #define PREDICT_4x4_LOAD_TOP\
387 const int t0 = SRC(0,-1);\
388 const int t1 = SRC(1,-1);\
389 const int t2 = SRC(2,-1);\
390 UNUSED const int t3 = SRC(3,-1);
392 #define PREDICT_4x4_LOAD_TOP_RIGHT\
393 const int t4 = SRC(4,-1);\
394 const int t5 = SRC(5,-1);\
395 const int t6 = SRC(6,-1);\
396 UNUSED const int t7 = SRC(7,-1);
398 #define F1(a,b) (((a)+(b)+1)>>1)
399 #define F2(a,b,c) (((a)+2*(b)+(c)+2)>>2)
401 static void predict_4x4_ddl( uint8_t *src
)
404 PREDICT_4x4_LOAD_TOP_RIGHT
405 SRC(0,0)= F2(t0
,t1
,t2
);
406 SRC(1,0)=SRC(0,1)= F2(t1
,t2
,t3
);
407 SRC(2,0)=SRC(1,1)=SRC(0,2)= F2(t2
,t3
,t4
);
408 SRC(3,0)=SRC(2,1)=SRC(1,2)=SRC(0,3)= F2(t3
,t4
,t5
);
409 SRC(3,1)=SRC(2,2)=SRC(1,3)= F2(t4
,t5
,t6
);
410 SRC(3,2)=SRC(2,3)= F2(t5
,t6
,t7
);
411 SRC(3,3)= F2(t6
,t7
,t7
);
413 static void predict_4x4_ddr( uint8_t *src
)
415 const int lt
= SRC(-1,-1);
416 PREDICT_4x4_LOAD_LEFT
418 SRC(3,0)= F2(t3
,t2
,t1
);
419 SRC(2,0)=SRC(3,1)= F2(t2
,t1
,t0
);
420 SRC(1,0)=SRC(2,1)=SRC(3,2)= F2(t1
,t0
,lt
);
421 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)= F2(t0
,lt
,l0
);
422 SRC(0,1)=SRC(1,2)=SRC(2,3)= F2(lt
,l0
,l1
);
423 SRC(0,2)=SRC(1,3)= F2(l0
,l1
,l2
);
424 SRC(0,3)= F2(l1
,l2
,l3
);
427 static void predict_4x4_vr( uint8_t *src
)
429 const int lt
= SRC(-1,-1);
430 PREDICT_4x4_LOAD_LEFT
432 SRC(0,3)= F2(l2
,l1
,l0
);
433 SRC(0,2)= F2(l1
,l0
,lt
);
434 SRC(0,1)=SRC(1,3)= F2(l0
,lt
,t0
);
435 SRC(0,0)=SRC(1,2)= F1(lt
,t0
);
436 SRC(1,1)=SRC(2,3)= F2(lt
,t0
,t1
);
437 SRC(1,0)=SRC(2,2)= F1(t0
,t1
);
438 SRC(2,1)=SRC(3,3)= F2(t0
,t1
,t2
);
439 SRC(2,0)=SRC(3,2)= F1(t1
,t2
);
440 SRC(3,1)= F2(t1
,t2
,t3
);
444 static void predict_4x4_hd( uint8_t *src
)
446 const int lt
= SRC(-1,-1);
447 PREDICT_4x4_LOAD_LEFT
450 SRC(1,3)= F2(l1
,l2
,l3
);
451 SRC(0,2)=SRC(2,3)= F1(l1
,l2
);
452 SRC(1,2)=SRC(3,3)= F2(l0
,l1
,l2
);
453 SRC(0,1)=SRC(2,2)= F1(l0
,l1
);
454 SRC(1,1)=SRC(3,2)= F2(lt
,l0
,l1
);
455 SRC(0,0)=SRC(2,1)= F1(lt
,l0
);
456 SRC(1,0)=SRC(3,1)= F2(t0
,lt
,l0
);
457 SRC(2,0)= F2(t1
,t0
,lt
);
458 SRC(3,0)= F2(t2
,t1
,t0
);
461 static void predict_4x4_vl( uint8_t *src
)
464 PREDICT_4x4_LOAD_TOP_RIGHT
466 SRC(0,1)= F2(t0
,t1
,t2
);
467 SRC(1,0)=SRC(0,2)= F1(t1
,t2
);
468 SRC(1,1)=SRC(0,3)= F2(t1
,t2
,t3
);
469 SRC(2,0)=SRC(1,2)= F1(t2
,t3
);
470 SRC(2,1)=SRC(1,3)= F2(t2
,t3
,t4
);
471 SRC(3,0)=SRC(2,2)= F1(t3
,t4
);
472 SRC(3,1)=SRC(2,3)= F2(t3
,t4
,t5
);
474 SRC(3,3)= F2(t4
,t5
,t6
);
477 static void predict_4x4_hu( uint8_t *src
)
479 PREDICT_4x4_LOAD_LEFT
481 SRC(1,0)= F2(l0
,l1
,l2
);
482 SRC(2,0)=SRC(0,1)= F1(l1
,l2
);
483 SRC(3,0)=SRC(1,1)= F2(l1
,l2
,l3
);
484 SRC(2,1)=SRC(0,2)= F1(l2
,l3
);
485 SRC(3,1)=SRC(1,2)= F2(l2
,l3
,l3
);
486 SRC(3,2)=SRC(1,3)=SRC(0,3)=
487 SRC(2,2)=SRC(2,3)=SRC(3,3)= l3
;
490 /****************************************************************************
491 * 8x8 prediction for intra luma block
492 ****************************************************************************/
495 edge[14-y] = F2(SRC(-1,y-1), SRC(-1,y), SRC(-1,y+1));
497 edge[16+x] = F2(SRC(x-1,-1), SRC(x,-1), SRC(x+1,-1));
499 void x264_predict_8x8_filter( uint8_t *src
, uint8_t edge
[33], int i_neighbor
, int i_filters
)
501 /* edge[7..14] = l7..l0
503 * edge[16..31] = t0 .. t15
506 int have_lt
= i_neighbor
& MB_TOPLEFT
;
507 if( i_filters
& MB_LEFT
)
509 edge
[15] = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2;
510 edge
[14] = ((have_lt
? SRC(-1,-1) : SRC(-1,0))
511 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2;
512 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6)
513 edge
[7] = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2;
516 if( i_filters
& MB_TOP
)
518 int have_tr
= i_neighbor
& MB_TOPRIGHT
;
519 edge
[16] = ((have_lt
? SRC(-1,-1) : SRC(0,-1))
520 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2;
521 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6)
522 edge
[23] = ((have_tr
? SRC(8,-1) : SRC(7,-1))
523 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2;
525 if( i_filters
& MB_TOPRIGHT
)
529 PT(8) PT(9) PT(10) PT(11) PT(12) PT(13) PT(14)
531 edge
[32] = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2;
535 *(uint64_t*)(edge
+24) = SRC(7,-1) * 0x0101010101010101ULL
;
536 edge
[32] = SRC(7,-1);
546 UNUSED const int l##y = edge[14-y];
548 UNUSED const int t##x = edge[16+x];
549 #define PREDICT_8x8_LOAD_TOPLEFT \
550 const int lt = edge[15];
551 #define PREDICT_8x8_LOAD_LEFT \
552 PL(0) PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) PL(7)
553 #define PREDICT_8x8_LOAD_TOP \
554 PT(0) PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) PT(7)
555 #define PREDICT_8x8_LOAD_TOPRIGHT \
556 PT(8) PT(9) PT(10) PT(11) PT(12) PT(13) PT(14) PT(15)
558 #define PREDICT_8x8_DC(v) \
560 for( y = 0; y < 8; y++ ) { \
561 ((uint32_t*)src)[0] = \
562 ((uint32_t*)src)[1] = v; \
563 src += FDEC_STRIDE; \
566 /* SIMD is much faster than C for all of these except HU and HD. */
567 static void predict_8x8_dc_128( uint8_t *src
, uint8_t edge
[33] )
569 PREDICT_8x8_DC(0x80808080);
571 static void predict_8x8_dc_left( uint8_t *src
, uint8_t edge
[33] )
573 PREDICT_8x8_LOAD_LEFT
574 const uint32_t dc
= ((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
+4) >> 3) * 0x01010101;
577 static void predict_8x8_dc_top( uint8_t *src
, uint8_t edge
[33] )
580 const uint32_t dc
= ((t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+4) >> 3) * 0x01010101;
583 static void predict_8x8_dc( uint8_t *src
, uint8_t edge
[33] )
585 PREDICT_8x8_LOAD_LEFT
587 const uint32_t dc
= ((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
588 +t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+8) >> 4) * 0x01010101;
591 static void predict_8x8_h( uint8_t *src
, uint8_t edge
[33] )
593 PREDICT_8x8_LOAD_LEFT
594 #define ROW(y) ((uint32_t*)(src+y*FDEC_STRIDE))[0] =\
595 ((uint32_t*)(src+y*FDEC_STRIDE))[1] = 0x01010101U * l##y
596 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
599 static void predict_8x8_v( uint8_t *src
, uint8_t edge
[33] )
601 const uint64_t top
= *(uint64_t*)(edge
+16);
603 for( y
= 0; y
< 8; y
++ )
604 *(uint64_t*)(src
+y
*FDEC_STRIDE
) = top
;
606 static void predict_8x8_ddl( uint8_t *src
, uint8_t edge
[33] )
609 PREDICT_8x8_LOAD_TOPRIGHT
610 SRC(0,0)= F2(t0
,t1
,t2
);
611 SRC(0,1)=SRC(1,0)= F2(t1
,t2
,t3
);
612 SRC(0,2)=SRC(1,1)=SRC(2,0)= F2(t2
,t3
,t4
);
613 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= F2(t3
,t4
,t5
);
614 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= F2(t4
,t5
,t6
);
615 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= F2(t5
,t6
,t7
);
616 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= F2(t6
,t7
,t8
);
617 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= F2(t7
,t8
,t9
);
618 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= F2(t8
,t9
,t10
);
619 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= F2(t9
,t10
,t11
);
620 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= F2(t10
,t11
,t12
);
621 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= F2(t11
,t12
,t13
);
622 SRC(5,7)=SRC(6,6)=SRC(7,5)= F2(t12
,t13
,t14
);
623 SRC(6,7)=SRC(7,6)= F2(t13
,t14
,t15
);
624 SRC(7,7)= F2(t14
,t15
,t15
);
626 static void predict_8x8_ddr( uint8_t *src
, uint8_t edge
[33] )
629 PREDICT_8x8_LOAD_LEFT
630 PREDICT_8x8_LOAD_TOPLEFT
631 SRC(0,7)= F2(l7
,l6
,l5
);
632 SRC(0,6)=SRC(1,7)= F2(l6
,l5
,l4
);
633 SRC(0,5)=SRC(1,6)=SRC(2,7)= F2(l5
,l4
,l3
);
634 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= F2(l4
,l3
,l2
);
635 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= F2(l3
,l2
,l1
);
636 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= F2(l2
,l1
,l0
);
637 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= F2(l1
,l0
,lt
);
638 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= F2(l0
,lt
,t0
);
639 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= F2(lt
,t0
,t1
);
640 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= F2(t0
,t1
,t2
);
641 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= F2(t1
,t2
,t3
);
642 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= F2(t2
,t3
,t4
);
643 SRC(5,0)=SRC(6,1)=SRC(7,2)= F2(t3
,t4
,t5
);
644 SRC(6,0)=SRC(7,1)= F2(t4
,t5
,t6
);
645 SRC(7,0)= F2(t5
,t6
,t7
);
648 static void predict_8x8_vr( uint8_t *src
, uint8_t edge
[33] )
651 PREDICT_8x8_LOAD_LEFT
652 PREDICT_8x8_LOAD_TOPLEFT
653 SRC(0,6)= F2(l5
,l4
,l3
);
654 SRC(0,7)= F2(l6
,l5
,l4
);
655 SRC(0,4)=SRC(1,6)= F2(l3
,l2
,l1
);
656 SRC(0,5)=SRC(1,7)= F2(l4
,l3
,l2
);
657 SRC(0,2)=SRC(1,4)=SRC(2,6)= F2(l1
,l0
,lt
);
658 SRC(0,3)=SRC(1,5)=SRC(2,7)= F2(l2
,l1
,l0
);
659 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= F2(l0
,lt
,t0
);
660 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= F1(lt
,t0
);
661 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= F2(lt
,t0
,t1
);
662 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= F1(t0
,t1
);
663 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= F2(t0
,t1
,t2
);
664 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= F1(t1
,t2
);
665 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= F2(t1
,t2
,t3
);
666 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= F1(t2
,t3
);
667 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= F2(t2
,t3
,t4
);
668 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= F1(t3
,t4
);
669 SRC(5,1)=SRC(6,3)=SRC(7,5)= F2(t3
,t4
,t5
);
670 SRC(5,0)=SRC(6,2)=SRC(7,4)= F1(t4
,t5
);
671 SRC(6,1)=SRC(7,3)= F2(t4
,t5
,t6
);
672 SRC(6,0)=SRC(7,2)= F1(t5
,t6
);
673 SRC(7,1)= F2(t5
,t6
,t7
);
676 static void predict_8x8_hd( uint8_t *src
, uint8_t edge
[33] )
679 PREDICT_8x8_LOAD_LEFT
680 PREDICT_8x8_LOAD_TOPLEFT
681 int p1
= pack8to16(F1(l6
,l7
), F2(l5
,l6
,l7
));
682 int p2
= pack8to16(F1(l5
,l6
), F2(l4
,l5
,l6
));
683 int p3
= pack8to16(F1(l4
,l5
), F2(l3
,l4
,l5
));
684 int p4
= pack8to16(F1(l3
,l4
), F2(l2
,l3
,l4
));
685 int p5
= pack8to16(F1(l2
,l3
), F2(l1
,l2
,l3
));
686 int p6
= pack8to16(F1(l1
,l2
), F2(l0
,l1
,l2
));
687 int p7
= pack8to16(F1(l0
,l1
), F2(lt
,l0
,l1
));
688 int p8
= pack8to16(F1(lt
,l0
), F2(l0
,lt
,t0
));
689 int p9
= pack8to16(F2(t1
,t0
,lt
), F2(t2
,t1
,t0
));
690 int p10
= pack8to16(F2(t3
,t2
,t1
), F2(t4
,t3
,t2
));
691 int p11
= pack8to16(F2(t5
,t4
,t3
), F2(t6
,t5
,t4
));
692 SRC32(0,7)= pack16to32(p1
,p2
);
693 SRC32(0,6)= pack16to32(p2
,p3
);
694 SRC32(4,7)=SRC32(0,5)= pack16to32(p3
,p4
);
695 SRC32(4,6)=SRC32(0,4)= pack16to32(p4
,p5
);
696 SRC32(4,5)=SRC32(0,3)= pack16to32(p5
,p6
);
697 SRC32(4,4)=SRC32(0,2)= pack16to32(p6
,p7
);
698 SRC32(4,3)=SRC32(0,1)= pack16to32(p7
,p8
);
699 SRC32(4,2)=SRC32(0,0)= pack16to32(p8
,p9
);
700 SRC32(4,1)= pack16to32(p9
,p10
);
701 SRC32(4,0)= pack16to32(p10
,p11
);
703 static void predict_8x8_vl( uint8_t *src
, uint8_t edge
[33] )
706 PREDICT_8x8_LOAD_TOPRIGHT
708 SRC(0,1)= F2(t0
,t1
,t2
);
709 SRC(0,2)=SRC(1,0)= F1(t1
,t2
);
710 SRC(0,3)=SRC(1,1)= F2(t1
,t2
,t3
);
711 SRC(0,4)=SRC(1,2)=SRC(2,0)= F1(t2
,t3
);
712 SRC(0,5)=SRC(1,3)=SRC(2,1)= F2(t2
,t3
,t4
);
713 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= F1(t3
,t4
);
714 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= F2(t3
,t4
,t5
);
715 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= F1(t4
,t5
);
716 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= F2(t4
,t5
,t6
);
717 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= F1(t5
,t6
);
718 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= F2(t5
,t6
,t7
);
719 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= F1(t6
,t7
);
720 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= F2(t6
,t7
,t8
);
721 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= F1(t7
,t8
);
722 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= F2(t7
,t8
,t9
);
723 SRC(5,6)=SRC(6,4)=SRC(7,2)= F1(t8
,t9
);
724 SRC(5,7)=SRC(6,5)=SRC(7,3)= F2(t8
,t9
,t10
);
725 SRC(6,6)=SRC(7,4)= F1(t9
,t10
);
726 SRC(6,7)=SRC(7,5)= F2(t9
,t10
,t11
);
727 SRC(7,6)= F1(t10
,t11
);
728 SRC(7,7)= F2(t10
,t11
,t12
);
730 static void predict_8x8_hu( uint8_t *src
, uint8_t edge
[33] )
732 PREDICT_8x8_LOAD_LEFT
733 int p1
= pack8to16(F1(l0
,l1
), F2(l0
,l1
,l2
));
734 int p2
= pack8to16(F1(l1
,l2
), F2(l1
,l2
,l3
));
735 int p3
= pack8to16(F1(l2
,l3
), F2(l2
,l3
,l4
));
736 int p4
= pack8to16(F1(l3
,l4
), F2(l3
,l4
,l5
));
737 int p5
= pack8to16(F1(l4
,l5
), F2(l4
,l5
,l6
));
738 int p6
= pack8to16(F1(l5
,l6
), F2(l5
,l6
,l7
));
739 int p7
= pack8to16(F1(l6
,l7
), F2(l6
,l7
,l7
));
740 int p8
= pack8to16(l7
,l7
);
741 SRC32(0,0)= pack16to32(p1
,p2
);
742 SRC32(0,1)= pack16to32(p2
,p3
);
743 SRC32(4,0)=SRC32(0,2)= pack16to32(p3
,p4
);
744 SRC32(4,1)=SRC32(0,3)= pack16to32(p4
,p5
);
745 SRC32(4,2)=SRC32(0,4)= pack16to32(p5
,p6
);
746 SRC32(4,3)=SRC32(0,5)= pack16to32(p6
,p7
);
747 SRC32(4,4)=SRC32(0,6)= pack16to32(p7
,p8
);
748 SRC32(4,5)=SRC32(4,6)= SRC32(0,7) = SRC32(4,7) = pack16to32(p8
,p8
);
751 /****************************************************************************
752 * Exported functions:
753 ****************************************************************************/
754 void x264_predict_16x16_init( int cpu
, x264_predict_t pf
[7] )
756 pf
[I_PRED_16x16_V
] = predict_16x16_v
;
757 pf
[I_PRED_16x16_H
] = predict_16x16_h
;
758 pf
[I_PRED_16x16_DC
] = predict_16x16_dc
;
759 pf
[I_PRED_16x16_P
] = predict_16x16_p
;
760 pf
[I_PRED_16x16_DC_LEFT
]= predict_16x16_dc_left
;
761 pf
[I_PRED_16x16_DC_TOP
]= predict_16x16_dc_top
;
762 pf
[I_PRED_16x16_DC_128
]= predict_16x16_dc_128
;
765 x264_predict_16x16_init_mmx( cpu
, pf
);
769 if( cpu
&X264_CPU_ALTIVEC
)
771 x264_predict_16x16_init_altivec( pf
);
776 void x264_predict_8x8c_init( int cpu
, x264_predict_t pf
[7] )
778 pf
[I_PRED_CHROMA_V
] = predict_8x8c_v
;
779 pf
[I_PRED_CHROMA_H
] = predict_8x8c_h
;
780 pf
[I_PRED_CHROMA_DC
] = predict_8x8c_dc
;
781 pf
[I_PRED_CHROMA_P
] = predict_8x8c_p
;
782 pf
[I_PRED_CHROMA_DC_LEFT
]= predict_8x8c_dc_left
;
783 pf
[I_PRED_CHROMA_DC_TOP
]= predict_8x8c_dc_top
;
784 pf
[I_PRED_CHROMA_DC_128
]= predict_8x8c_dc_128
;
787 x264_predict_8x8c_init_mmx( cpu
, pf
);
791 void x264_predict_8x8_init( int cpu
, x264_predict8x8_t pf
[12] )
793 pf
[I_PRED_8x8_V
] = predict_8x8_v
;
794 pf
[I_PRED_8x8_H
] = predict_8x8_h
;
795 pf
[I_PRED_8x8_DC
] = predict_8x8_dc
;
796 pf
[I_PRED_8x8_DDL
] = predict_8x8_ddl
;
797 pf
[I_PRED_8x8_DDR
] = predict_8x8_ddr
;
798 pf
[I_PRED_8x8_VR
] = predict_8x8_vr
;
799 pf
[I_PRED_8x8_HD
] = predict_8x8_hd
;
800 pf
[I_PRED_8x8_VL
] = predict_8x8_vl
;
801 pf
[I_PRED_8x8_HU
] = predict_8x8_hu
;
802 pf
[I_PRED_8x8_DC_LEFT
]= predict_8x8_dc_left
;
803 pf
[I_PRED_8x8_DC_TOP
] = predict_8x8_dc_top
;
804 pf
[I_PRED_8x8_DC_128
] = predict_8x8_dc_128
;
807 x264_predict_8x8_init_mmx( cpu
, pf
);
811 void x264_predict_4x4_init( int cpu
, x264_predict_t pf
[12] )
813 pf
[I_PRED_4x4_V
] = predict_4x4_v
;
814 pf
[I_PRED_4x4_H
] = predict_4x4_h
;
815 pf
[I_PRED_4x4_DC
] = predict_4x4_dc
;
816 pf
[I_PRED_4x4_DDL
] = predict_4x4_ddl
;
817 pf
[I_PRED_4x4_DDR
] = predict_4x4_ddr
;
818 pf
[I_PRED_4x4_VR
] = predict_4x4_vr
;
819 pf
[I_PRED_4x4_HD
] = predict_4x4_hd
;
820 pf
[I_PRED_4x4_VL
] = predict_4x4_vl
;
821 pf
[I_PRED_4x4_HU
] = predict_4x4_hu
;
822 pf
[I_PRED_4x4_DC_LEFT
]= predict_4x4_dc_left
;
823 pf
[I_PRED_4x4_DC_TOP
] = predict_4x4_dc_top
;
824 pf
[I_PRED_4x4_DC_128
] = predict_4x4_dc_128
;
827 x264_predict_4x4_init_mmx( cpu
, pf
);