3 * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264idct.c
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 static av_always_inline
void idct_internal(uint8_t *dst
, DCTELEM
*block
, int stride
, int block_stride
, int shift
, int add
){
32 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
34 block
[0] += 1<<(shift
-1);
37 const int z0
= block
[0 + block_stride
*i
] + block
[2 + block_stride
*i
];
38 const int z1
= block
[0 + block_stride
*i
] - block
[2 + block_stride
*i
];
39 const int z2
= (block
[1 + block_stride
*i
]>>1) - block
[3 + block_stride
*i
];
40 const int z3
= block
[1 + block_stride
*i
] + (block
[3 + block_stride
*i
]>>1);
42 block
[0 + block_stride
*i
]= z0
+ z3
;
43 block
[1 + block_stride
*i
]= z1
+ z2
;
44 block
[2 + block_stride
*i
]= z1
- z2
;
45 block
[3 + block_stride
*i
]= z0
- z3
;
49 const int z0
= block
[i
+ block_stride
*0] + block
[i
+ block_stride
*2];
50 const int z1
= block
[i
+ block_stride
*0] - block
[i
+ block_stride
*2];
51 const int z2
= (block
[i
+ block_stride
*1]>>1) - block
[i
+ block_stride
*3];
52 const int z3
= block
[i
+ block_stride
*1] + (block
[i
+ block_stride
*3]>>1);
54 dst
[i
+ 0*stride
]= cm
[ add
*dst
[i
+ 0*stride
] + ((z0
+ z3
) >> shift
) ];
55 dst
[i
+ 1*stride
]= cm
[ add
*dst
[i
+ 1*stride
] + ((z1
+ z2
) >> shift
) ];
56 dst
[i
+ 2*stride
]= cm
[ add
*dst
[i
+ 2*stride
] + ((z1
- z2
) >> shift
) ];
57 dst
[i
+ 3*stride
]= cm
[ add
*dst
[i
+ 3*stride
] + ((z0
- z3
) >> shift
) ];
61 void ff_h264_idct_add_c(uint8_t *dst
, DCTELEM
*block
, int stride
){
62 idct_internal(dst
, block
, stride
, 4, 6, 1);
65 void ff_h264_lowres_idct_add_c(uint8_t *dst
, int stride
, DCTELEM
*block
){
66 idct_internal(dst
, block
, stride
, 8, 3, 1);
69 void ff_h264_lowres_idct_put_c(uint8_t *dst
, int stride
, DCTELEM
*block
){
70 idct_internal(dst
, block
, stride
, 8, 3, 0);
73 void ff_h264_idct8_add_c(uint8_t *dst
, DCTELEM
*block
, int stride
){
75 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
79 for( i
= 0; i
< 8; i
++ )
81 const int a0
= block
[0+i
*8] + block
[4+i
*8];
82 const int a2
= block
[0+i
*8] - block
[4+i
*8];
83 const int a4
= (block
[2+i
*8]>>1) - block
[6+i
*8];
84 const int a6
= (block
[6+i
*8]>>1) + block
[2+i
*8];
86 const int b0
= a0
+ a6
;
87 const int b2
= a2
+ a4
;
88 const int b4
= a2
- a4
;
89 const int b6
= a0
- a6
;
91 const int a1
= -block
[3+i
*8] + block
[5+i
*8] - block
[7+i
*8] - (block
[7+i
*8]>>1);
92 const int a3
= block
[1+i
*8] + block
[7+i
*8] - block
[3+i
*8] - (block
[3+i
*8]>>1);
93 const int a5
= -block
[1+i
*8] + block
[7+i
*8] + block
[5+i
*8] + (block
[5+i
*8]>>1);
94 const int a7
= block
[3+i
*8] + block
[5+i
*8] + block
[1+i
*8] + (block
[1+i
*8]>>1);
96 const int b1
= (a7
>>2) + a1
;
97 const int b3
= a3
+ (a5
>>2);
98 const int b5
= (a3
>>2) - a5
;
99 const int b7
= a7
- (a1
>>2);
101 block
[0+i
*8] = b0
+ b7
;
102 block
[7+i
*8] = b0
- b7
;
103 block
[1+i
*8] = b2
+ b5
;
104 block
[6+i
*8] = b2
- b5
;
105 block
[2+i
*8] = b4
+ b3
;
106 block
[5+i
*8] = b4
- b3
;
107 block
[3+i
*8] = b6
+ b1
;
108 block
[4+i
*8] = b6
- b1
;
110 for( i
= 0; i
< 8; i
++ )
112 const int a0
= block
[i
+0*8] + block
[i
+4*8];
113 const int a2
= block
[i
+0*8] - block
[i
+4*8];
114 const int a4
= (block
[i
+2*8]>>1) - block
[i
+6*8];
115 const int a6
= (block
[i
+6*8]>>1) + block
[i
+2*8];
117 const int b0
= a0
+ a6
;
118 const int b2
= a2
+ a4
;
119 const int b4
= a2
- a4
;
120 const int b6
= a0
- a6
;
122 const int a1
= -block
[i
+3*8] + block
[i
+5*8] - block
[i
+7*8] - (block
[i
+7*8]>>1);
123 const int a3
= block
[i
+1*8] + block
[i
+7*8] - block
[i
+3*8] - (block
[i
+3*8]>>1);
124 const int a5
= -block
[i
+1*8] + block
[i
+7*8] + block
[i
+5*8] + (block
[i
+5*8]>>1);
125 const int a7
= block
[i
+3*8] + block
[i
+5*8] + block
[i
+1*8] + (block
[i
+1*8]>>1);
127 const int b1
= (a7
>>2) + a1
;
128 const int b3
= a3
+ (a5
>>2);
129 const int b5
= (a3
>>2) - a5
;
130 const int b7
= a7
- (a1
>>2);
132 dst
[i
+ 0*stride
] = cm
[ dst
[i
+ 0*stride
] + ((b0
+ b7
) >> 6) ];
133 dst
[i
+ 1*stride
] = cm
[ dst
[i
+ 1*stride
] + ((b2
+ b5
) >> 6) ];
134 dst
[i
+ 2*stride
] = cm
[ dst
[i
+ 2*stride
] + ((b4
+ b3
) >> 6) ];
135 dst
[i
+ 3*stride
] = cm
[ dst
[i
+ 3*stride
] + ((b6
+ b1
) >> 6) ];
136 dst
[i
+ 4*stride
] = cm
[ dst
[i
+ 4*stride
] + ((b6
- b1
) >> 6) ];
137 dst
[i
+ 5*stride
] = cm
[ dst
[i
+ 5*stride
] + ((b4
- b3
) >> 6) ];
138 dst
[i
+ 6*stride
] = cm
[ dst
[i
+ 6*stride
] + ((b2
- b5
) >> 6) ];
139 dst
[i
+ 7*stride
] = cm
[ dst
[i
+ 7*stride
] + ((b0
- b7
) >> 6) ];
143 // assumes all AC coefs are 0
144 void ff_h264_idct_dc_add_c(uint8_t *dst
, DCTELEM
*block
, int stride
){
146 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
147 int dc
= (block
[0] + 32) >> 6;
148 for( j
= 0; j
< 4; j
++ )
150 for( i
= 0; i
< 4; i
++ )
151 dst
[i
] = cm
[ dst
[i
] + dc
];
156 void ff_h264_idct8_dc_add_c(uint8_t *dst
, DCTELEM
*block
, int stride
){
158 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
159 int dc
= (block
[0] + 32) >> 6;
160 for( j
= 0; j
< 8; j
++ )
162 for( i
= 0; i
< 8; i
++ )
163 dst
[i
] = cm
[ dst
[i
] + dc
];
168 //FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
169 static const uint8_t scan8
[16 + 2*4]={
170 4+1*8, 5+1*8, 4+2*8, 5+2*8,
171 6+1*8, 7+1*8, 6+2*8, 7+2*8,
172 4+3*8, 5+3*8, 4+4*8, 5+4*8,
173 6+3*8, 7+3*8, 6+4*8, 7+4*8,
180 void ff_h264_idct_add16_c(uint8_t *dst
, const int *block_offset
, DCTELEM
*block
, int stride
, const uint8_t nnzc
[6*8]){
183 int nnz
= nnzc
[ scan8
[i
] ];
185 if(nnz
==1 && block
[i
*16]) ff_h264_idct_dc_add_c(dst
+ block_offset
[i
], block
+ i
*16, stride
);
186 else idct_internal (dst
+ block_offset
[i
], block
+ i
*16, stride
, 4, 6, 1);
191 void ff_h264_idct_add16intra_c(uint8_t *dst
, const int *block_offset
, DCTELEM
*block
, int stride
, const uint8_t nnzc
[6*8]){
194 if(nnzc
[ scan8
[i
] ]) idct_internal (dst
+ block_offset
[i
], block
+ i
*16, stride
, 4, 6, 1);
195 else if(block
[i
*16]) ff_h264_idct_dc_add_c(dst
+ block_offset
[i
], block
+ i
*16, stride
);
199 void ff_h264_idct8_add4_c(uint8_t *dst
, const int *block_offset
, DCTELEM
*block
, int stride
, const uint8_t nnzc
[6*8]){
201 for(i
=0; i
<16; i
+=4){
202 int nnz
= nnzc
[ scan8
[i
] ];
204 if(nnz
==1 && block
[i
*16]) ff_h264_idct8_dc_add_c(dst
+ block_offset
[i
], block
+ i
*16, stride
);
205 else ff_h264_idct8_add_c (dst
+ block_offset
[i
], block
+ i
*16, stride
);
210 void ff_h264_idct_add8_c(uint8_t **dest
, const int *block_offset
, DCTELEM
*block
, int stride
, const uint8_t nnzc
[6*8]){
212 for(i
=16; i
<16+8; i
++){
214 ff_h264_idct_add_c (dest
[(i
&4)>>2] + block_offset
[i
], block
+ i
*16, stride
);
216 ff_h264_idct_dc_add_c(dest
[(i
&4)>>2] + block_offset
[i
], block
+ i
*16, stride
);