2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
14 #include "vpx_dsp/inv_txfm.h"
16 void vpx_iwht4x4_16_add_c(const tran_low_t
*input
, uint8_t *dest
, int stride
) {
17 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
18 0.5 shifts per pixel. */
20 tran_low_t output
[16];
21 tran_high_t a1
, b1
, c1
, d1
, e1
;
22 const tran_low_t
*ip
= input
;
23 tran_low_t
*op
= output
;
25 for (i
= 0; i
< 4; i
++) {
26 a1
= ip
[0] >> UNIT_QUANT_SHIFT
;
27 c1
= ip
[1] >> UNIT_QUANT_SHIFT
;
28 d1
= ip
[2] >> UNIT_QUANT_SHIFT
;
29 b1
= ip
[3] >> UNIT_QUANT_SHIFT
;
37 op
[0] = WRAPLOW(a1
, 8);
38 op
[1] = WRAPLOW(b1
, 8);
39 op
[2] = WRAPLOW(c1
, 8);
40 op
[3] = WRAPLOW(d1
, 8);
46 for (i
= 0; i
< 4; i
++) {
58 dest
[stride
* 0] = clip_pixel_add(dest
[stride
* 0], a1
);
59 dest
[stride
* 1] = clip_pixel_add(dest
[stride
* 1], b1
);
60 dest
[stride
* 2] = clip_pixel_add(dest
[stride
* 2], c1
);
61 dest
[stride
* 3] = clip_pixel_add(dest
[stride
* 3], d1
);
68 void vpx_iwht4x4_1_add_c(const tran_low_t
*in
, uint8_t *dest
, int dest_stride
) {
72 const tran_low_t
*ip
= in
;
75 a1
= ip
[0] >> UNIT_QUANT_SHIFT
;
78 op
[0] = WRAPLOW(a1
, 8);
79 op
[1] = op
[2] = op
[3] = WRAPLOW(e1
, 8);
82 for (i
= 0; i
< 4; i
++) {
85 dest
[dest_stride
* 0] = clip_pixel_add(dest
[dest_stride
* 0], a1
);
86 dest
[dest_stride
* 1] = clip_pixel_add(dest
[dest_stride
* 1], e1
);
87 dest
[dest_stride
* 2] = clip_pixel_add(dest
[dest_stride
* 2], e1
);
88 dest
[dest_stride
* 3] = clip_pixel_add(dest
[dest_stride
* 3], e1
);
94 void idct4_c(const tran_low_t
*input
, tran_low_t
*output
) {
96 tran_high_t temp1
, temp2
;
98 temp1
= (input
[0] + input
[2]) * cospi_16_64
;
99 temp2
= (input
[0] - input
[2]) * cospi_16_64
;
100 step
[0] = WRAPLOW(dct_const_round_shift(temp1
), 8);
101 step
[1] = WRAPLOW(dct_const_round_shift(temp2
), 8);
102 temp1
= input
[1] * cospi_24_64
- input
[3] * cospi_8_64
;
103 temp2
= input
[1] * cospi_8_64
+ input
[3] * cospi_24_64
;
104 step
[2] = WRAPLOW(dct_const_round_shift(temp1
), 8);
105 step
[3] = WRAPLOW(dct_const_round_shift(temp2
), 8);
108 output
[0] = WRAPLOW(step
[0] + step
[3], 8);
109 output
[1] = WRAPLOW(step
[1] + step
[2], 8);
110 output
[2] = WRAPLOW(step
[1] - step
[2], 8);
111 output
[3] = WRAPLOW(step
[0] - step
[3], 8);
114 void vpx_idct4x4_16_add_c(const tran_low_t
*input
, uint8_t *dest
, int stride
) {
115 tran_low_t out
[4 * 4];
116 tran_low_t
*outptr
= out
;
118 tran_low_t temp_in
[4], temp_out
[4];
121 for (i
= 0; i
< 4; ++i
) {
122 idct4_c(input
, outptr
);
128 for (i
= 0; i
< 4; ++i
) {
129 for (j
= 0; j
< 4; ++j
)
130 temp_in
[j
] = out
[j
* 4 + i
];
131 idct4_c(temp_in
, temp_out
);
132 for (j
= 0; j
< 4; ++j
) {
133 dest
[j
* stride
+ i
] = clip_pixel_add(dest
[j
* stride
+ i
],
134 ROUND_POWER_OF_TWO(temp_out
[j
], 4));
139 void vpx_idct4x4_1_add_c(const tran_low_t
*input
, uint8_t *dest
,
143 tran_low_t out
= WRAPLOW(dct_const_round_shift(input
[0] * cospi_16_64
), 8);
144 out
= WRAPLOW(dct_const_round_shift(out
* cospi_16_64
), 8);
145 a1
= ROUND_POWER_OF_TWO(out
, 4);
147 for (i
= 0; i
< 4; i
++) {
148 dest
[0] = clip_pixel_add(dest
[0], a1
);
149 dest
[1] = clip_pixel_add(dest
[1], a1
);
150 dest
[2] = clip_pixel_add(dest
[2], a1
);
151 dest
[3] = clip_pixel_add(dest
[3], a1
);
156 void idct8_c(const tran_low_t
*input
, tran_low_t
*output
) {
157 tran_low_t step1
[8], step2
[8];
158 tran_high_t temp1
, temp2
;
164 temp1
= input
[1] * cospi_28_64
- input
[7] * cospi_4_64
;
165 temp2
= input
[1] * cospi_4_64
+ input
[7] * cospi_28_64
;
166 step1
[4] = WRAPLOW(dct_const_round_shift(temp1
), 8);
167 step1
[7] = WRAPLOW(dct_const_round_shift(temp2
), 8);
168 temp1
= input
[5] * cospi_12_64
- input
[3] * cospi_20_64
;
169 temp2
= input
[5] * cospi_20_64
+ input
[3] * cospi_12_64
;
170 step1
[5] = WRAPLOW(dct_const_round_shift(temp1
), 8);
171 step1
[6] = WRAPLOW(dct_const_round_shift(temp2
), 8);
174 temp1
= (step1
[0] + step1
[2]) * cospi_16_64
;
175 temp2
= (step1
[0] - step1
[2]) * cospi_16_64
;
176 step2
[0] = WRAPLOW(dct_const_round_shift(temp1
), 8);
177 step2
[1] = WRAPLOW(dct_const_round_shift(temp2
), 8);
178 temp1
= step1
[1] * cospi_24_64
- step1
[3] * cospi_8_64
;
179 temp2
= step1
[1] * cospi_8_64
+ step1
[3] * cospi_24_64
;
180 step2
[2] = WRAPLOW(dct_const_round_shift(temp1
), 8);
181 step2
[3] = WRAPLOW(dct_const_round_shift(temp2
), 8);
182 step2
[4] = WRAPLOW(step1
[4] + step1
[5], 8);
183 step2
[5] = WRAPLOW(step1
[4] - step1
[5], 8);
184 step2
[6] = WRAPLOW(-step1
[6] + step1
[7], 8);
185 step2
[7] = WRAPLOW(step1
[6] + step1
[7], 8);
188 step1
[0] = WRAPLOW(step2
[0] + step2
[3], 8);
189 step1
[1] = WRAPLOW(step2
[1] + step2
[2], 8);
190 step1
[2] = WRAPLOW(step2
[1] - step2
[2], 8);
191 step1
[3] = WRAPLOW(step2
[0] - step2
[3], 8);
193 temp1
= (step2
[6] - step2
[5]) * cospi_16_64
;
194 temp2
= (step2
[5] + step2
[6]) * cospi_16_64
;
195 step1
[5] = WRAPLOW(dct_const_round_shift(temp1
), 8);
196 step1
[6] = WRAPLOW(dct_const_round_shift(temp2
), 8);
200 output
[0] = WRAPLOW(step1
[0] + step1
[7], 8);
201 output
[1] = WRAPLOW(step1
[1] + step1
[6], 8);
202 output
[2] = WRAPLOW(step1
[2] + step1
[5], 8);
203 output
[3] = WRAPLOW(step1
[3] + step1
[4], 8);
204 output
[4] = WRAPLOW(step1
[3] - step1
[4], 8);
205 output
[5] = WRAPLOW(step1
[2] - step1
[5], 8);
206 output
[6] = WRAPLOW(step1
[1] - step1
[6], 8);
207 output
[7] = WRAPLOW(step1
[0] - step1
[7], 8);
210 void vpx_idct8x8_64_add_c(const tran_low_t
*input
, uint8_t *dest
, int stride
) {
211 tran_low_t out
[8 * 8];
212 tran_low_t
*outptr
= out
;
214 tran_low_t temp_in
[8], temp_out
[8];
216 // First transform rows
217 for (i
= 0; i
< 8; ++i
) {
218 idct8_c(input
, outptr
);
223 // Then transform columns
224 for (i
= 0; i
< 8; ++i
) {
225 for (j
= 0; j
< 8; ++j
)
226 temp_in
[j
] = out
[j
* 8 + i
];
227 idct8_c(temp_in
, temp_out
);
228 for (j
= 0; j
< 8; ++j
) {
229 dest
[j
* stride
+ i
] = clip_pixel_add(dest
[j
* stride
+ i
],
230 ROUND_POWER_OF_TWO(temp_out
[j
], 5));
235 void vpx_idct8x8_1_add_c(const tran_low_t
*input
, uint8_t *dest
, int stride
) {
238 tran_low_t out
= WRAPLOW(dct_const_round_shift(input
[0] * cospi_16_64
), 8);
239 out
= WRAPLOW(dct_const_round_shift(out
* cospi_16_64
), 8);
240 a1
= ROUND_POWER_OF_TWO(out
, 5);
241 for (j
= 0; j
< 8; ++j
) {
242 for (i
= 0; i
< 8; ++i
)
243 dest
[i
] = clip_pixel_add(dest
[i
], a1
);
248 void iadst4_c(const tran_low_t
*input
, tran_low_t
*output
) {
249 tran_high_t s0
, s1
, s2
, s3
, s4
, s5
, s6
, s7
;
251 tran_low_t x0
= input
[0];
252 tran_low_t x1
= input
[1];
253 tran_low_t x2
= input
[2];
254 tran_low_t x3
= input
[3];
256 if (!(x0
| x1
| x2
| x3
)) {
257 output
[0] = output
[1] = output
[2] = output
[3] = 0;
275 // 1-D transform scaling factor is sqrt(2).
276 // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
277 // + 1b (addition) = 29b.
278 // Hence the output bit depth is 15b.
279 output
[0] = WRAPLOW(dct_const_round_shift(s0
+ s3
), 8);
280 output
[1] = WRAPLOW(dct_const_round_shift(s1
+ s3
), 8);
281 output
[2] = WRAPLOW(dct_const_round_shift(s2
), 8);
282 output
[3] = WRAPLOW(dct_const_round_shift(s0
+ s1
- s3
), 8);
285 void iadst8_c(const tran_low_t
*input
, tran_low_t
*output
) {
286 int s0
, s1
, s2
, s3
, s4
, s5
, s6
, s7
;
288 tran_high_t x0
= input
[7];
289 tran_high_t x1
= input
[0];
290 tran_high_t x2
= input
[5];
291 tran_high_t x3
= input
[2];
292 tran_high_t x4
= input
[3];
293 tran_high_t x5
= input
[4];
294 tran_high_t x6
= input
[1];
295 tran_high_t x7
= input
[6];
297 if (!(x0
| x1
| x2
| x3
| x4
| x5
| x6
| x7
)) {
298 output
[0] = output
[1] = output
[2] = output
[3] = output
[4]
299 = output
[5] = output
[6] = output
[7] = 0;
304 s0
= (int)(cospi_2_64
* x0
+ cospi_30_64
* x1
);
305 s1
= (int)(cospi_30_64
* x0
- cospi_2_64
* x1
);
306 s2
= (int)(cospi_10_64
* x2
+ cospi_22_64
* x3
);
307 s3
= (int)(cospi_22_64
* x2
- cospi_10_64
* x3
);
308 s4
= (int)(cospi_18_64
* x4
+ cospi_14_64
* x5
);
309 s5
= (int)(cospi_14_64
* x4
- cospi_18_64
* x5
);
310 s6
= (int)(cospi_26_64
* x6
+ cospi_6_64
* x7
);
311 s7
= (int)(cospi_6_64
* x6
- cospi_26_64
* x7
);
313 x0
= WRAPLOW(dct_const_round_shift(s0
+ s4
), 8);
314 x1
= WRAPLOW(dct_const_round_shift(s1
+ s5
), 8);
315 x2
= WRAPLOW(dct_const_round_shift(s2
+ s6
), 8);
316 x3
= WRAPLOW(dct_const_round_shift(s3
+ s7
), 8);
317 x4
= WRAPLOW(dct_const_round_shift(s0
- s4
), 8);
318 x5
= WRAPLOW(dct_const_round_shift(s1
- s5
), 8);
319 x6
= WRAPLOW(dct_const_round_shift(s2
- s6
), 8);
320 x7
= WRAPLOW(dct_const_round_shift(s3
- s7
), 8);
327 s4
= (int)(cospi_8_64
* x4
+ cospi_24_64
* x5
);
328 s5
= (int)(cospi_24_64
* x4
- cospi_8_64
* x5
);
329 s6
= (int)(-cospi_24_64
* x6
+ cospi_8_64
* x7
);
330 s7
= (int)(cospi_8_64
* x6
+ cospi_24_64
* x7
);
332 x0
= WRAPLOW(s0
+ s2
, 8);
333 x1
= WRAPLOW(s1
+ s3
, 8);
334 x2
= WRAPLOW(s0
- s2
, 8);
335 x3
= WRAPLOW(s1
- s3
, 8);
336 x4
= WRAPLOW(dct_const_round_shift(s4
+ s6
), 8);
337 x5
= WRAPLOW(dct_const_round_shift(s5
+ s7
), 8);
338 x6
= WRAPLOW(dct_const_round_shift(s4
- s6
), 8);
339 x7
= WRAPLOW(dct_const_round_shift(s5
- s7
), 8);
342 s2
= (int)(cospi_16_64
* (x2
+ x3
));
343 s3
= (int)(cospi_16_64
* (x2
- x3
));
344 s6
= (int)(cospi_16_64
* (x6
+ x7
));
345 s7
= (int)(cospi_16_64
* (x6
- x7
));
347 x2
= WRAPLOW(dct_const_round_shift(s2
), 8);
348 x3
= WRAPLOW(dct_const_round_shift(s3
), 8);
349 x6
= WRAPLOW(dct_const_round_shift(s6
), 8);
350 x7
= WRAPLOW(dct_const_round_shift(s7
), 8);
352 output
[0] = WRAPLOW(x0
, 8);
353 output
[1] = WRAPLOW(-x4
, 8);
354 output
[2] = WRAPLOW(x6
, 8);
355 output
[3] = WRAPLOW(-x2
, 8);
356 output
[4] = WRAPLOW(x3
, 8);
357 output
[5] = WRAPLOW(-x7
, 8);
358 output
[6] = WRAPLOW(x5
, 8);
359 output
[7] = WRAPLOW(-x1
, 8);
362 void vpx_idct8x8_12_add_c(const tran_low_t
*input
, uint8_t *dest
, int stride
) {
363 tran_low_t out
[8 * 8] = { 0 };
364 tran_low_t
*outptr
= out
;
366 tran_low_t temp_in
[8], temp_out
[8];
368 // First transform rows
369 // only first 4 row has non-zero coefs
370 for (i
= 0; i
< 4; ++i
) {
371 idct8_c(input
, outptr
);
376 // Then transform columns
377 for (i
= 0; i
< 8; ++i
) {
378 for (j
= 0; j
< 8; ++j
)
379 temp_in
[j
] = out
[j
* 8 + i
];
380 idct8_c(temp_in
, temp_out
);
381 for (j
= 0; j
< 8; ++j
) {
382 dest
[j
* stride
+ i
] = clip_pixel_add(dest
[j
* stride
+ i
],
383 ROUND_POWER_OF_TWO(temp_out
[j
], 5));
388 void idct16_c(const tran_low_t
*input
, tran_low_t
*output
) {
389 tran_low_t step1
[16], step2
[16];
390 tran_high_t temp1
, temp2
;
393 step1
[0] = input
[0/2];
394 step1
[1] = input
[16/2];
395 step1
[2] = input
[8/2];
396 step1
[3] = input
[24/2];
397 step1
[4] = input
[4/2];
398 step1
[5] = input
[20/2];
399 step1
[6] = input
[12/2];
400 step1
[7] = input
[28/2];
401 step1
[8] = input
[2/2];
402 step1
[9] = input
[18/2];
403 step1
[10] = input
[10/2];
404 step1
[11] = input
[26/2];
405 step1
[12] = input
[6/2];
406 step1
[13] = input
[22/2];
407 step1
[14] = input
[14/2];
408 step1
[15] = input
[30/2];
420 temp1
= step1
[8] * cospi_30_64
- step1
[15] * cospi_2_64
;
421 temp2
= step1
[8] * cospi_2_64
+ step1
[15] * cospi_30_64
;
422 step2
[8] = WRAPLOW(dct_const_round_shift(temp1
), 8);
423 step2
[15] = WRAPLOW(dct_const_round_shift(temp2
), 8);
425 temp1
= step1
[9] * cospi_14_64
- step1
[14] * cospi_18_64
;
426 temp2
= step1
[9] * cospi_18_64
+ step1
[14] * cospi_14_64
;
427 step2
[9] = WRAPLOW(dct_const_round_shift(temp1
), 8);
428 step2
[14] = WRAPLOW(dct_const_round_shift(temp2
), 8);
430 temp1
= step1
[10] * cospi_22_64
- step1
[13] * cospi_10_64
;
431 temp2
= step1
[10] * cospi_10_64
+ step1
[13] * cospi_22_64
;
432 step2
[10] = WRAPLOW(dct_const_round_shift(temp1
), 8);
433 step2
[13] = WRAPLOW(dct_const_round_shift(temp2
), 8);
435 temp1
= step1
[11] * cospi_6_64
- step1
[12] * cospi_26_64
;
436 temp2
= step1
[11] * cospi_26_64
+ step1
[12] * cospi_6_64
;
437 step2
[11] = WRAPLOW(dct_const_round_shift(temp1
), 8);
438 step2
[12] = WRAPLOW(dct_const_round_shift(temp2
), 8);
446 temp1
= step2
[4] * cospi_28_64
- step2
[7] * cospi_4_64
;
447 temp2
= step2
[4] * cospi_4_64
+ step2
[7] * cospi_28_64
;
448 step1
[4] = WRAPLOW(dct_const_round_shift(temp1
), 8);
449 step1
[7] = WRAPLOW(dct_const_round_shift(temp2
), 8);
450 temp1
= step2
[5] * cospi_12_64
- step2
[6] * cospi_20_64
;
451 temp2
= step2
[5] * cospi_20_64
+ step2
[6] * cospi_12_64
;
452 step1
[5] = WRAPLOW(dct_const_round_shift(temp1
), 8);
453 step1
[6] = WRAPLOW(dct_const_round_shift(temp2
), 8);
455 step1
[8] = WRAPLOW(step2
[8] + step2
[9], 8);
456 step1
[9] = WRAPLOW(step2
[8] - step2
[9], 8);
457 step1
[10] = WRAPLOW(-step2
[10] + step2
[11], 8);
458 step1
[11] = WRAPLOW(step2
[10] + step2
[11], 8);
459 step1
[12] = WRAPLOW(step2
[12] + step2
[13], 8);
460 step1
[13] = WRAPLOW(step2
[12] - step2
[13], 8);
461 step1
[14] = WRAPLOW(-step2
[14] + step2
[15], 8);
462 step1
[15] = WRAPLOW(step2
[14] + step2
[15], 8);
465 temp1
= (step1
[0] + step1
[1]) * cospi_16_64
;
466 temp2
= (step1
[0] - step1
[1]) * cospi_16_64
;
467 step2
[0] = WRAPLOW(dct_const_round_shift(temp1
), 8);
468 step2
[1] = WRAPLOW(dct_const_round_shift(temp2
), 8);
469 temp1
= step1
[2] * cospi_24_64
- step1
[3] * cospi_8_64
;
470 temp2
= step1
[2] * cospi_8_64
+ step1
[3] * cospi_24_64
;
471 step2
[2] = WRAPLOW(dct_const_round_shift(temp1
), 8);
472 step2
[3] = WRAPLOW(dct_const_round_shift(temp2
), 8);
473 step2
[4] = WRAPLOW(step1
[4] + step1
[5], 8);
474 step2
[5] = WRAPLOW(step1
[4] - step1
[5], 8);
475 step2
[6] = WRAPLOW(-step1
[6] + step1
[7], 8);
476 step2
[7] = WRAPLOW(step1
[6] + step1
[7], 8);
479 step2
[15] = step1
[15];
480 temp1
= -step1
[9] * cospi_8_64
+ step1
[14] * cospi_24_64
;
481 temp2
= step1
[9] * cospi_24_64
+ step1
[14] * cospi_8_64
;
482 step2
[9] = WRAPLOW(dct_const_round_shift(temp1
), 8);
483 step2
[14] = WRAPLOW(dct_const_round_shift(temp2
), 8);
484 temp1
= -step1
[10] * cospi_24_64
- step1
[13] * cospi_8_64
;
485 temp2
= -step1
[10] * cospi_8_64
+ step1
[13] * cospi_24_64
;
486 step2
[10] = WRAPLOW(dct_const_round_shift(temp1
), 8);
487 step2
[13] = WRAPLOW(dct_const_round_shift(temp2
), 8);
488 step2
[11] = step1
[11];
489 step2
[12] = step1
[12];
492 step1
[0] = WRAPLOW(step2
[0] + step2
[3], 8);
493 step1
[1] = WRAPLOW(step2
[1] + step2
[2], 8);
494 step1
[2] = WRAPLOW(step2
[1] - step2
[2], 8);
495 step1
[3] = WRAPLOW(step2
[0] - step2
[3], 8);
497 temp1
= (step2
[6] - step2
[5]) * cospi_16_64
;
498 temp2
= (step2
[5] + step2
[6]) * cospi_16_64
;
499 step1
[5] = WRAPLOW(dct_const_round_shift(temp1
), 8);
500 step1
[6] = WRAPLOW(dct_const_round_shift(temp2
), 8);
503 step1
[8] = WRAPLOW(step2
[8] + step2
[11], 8);
504 step1
[9] = WRAPLOW(step2
[9] + step2
[10], 8);
505 step1
[10] = WRAPLOW(step2
[9] - step2
[10], 8);
506 step1
[11] = WRAPLOW(step2
[8] - step2
[11], 8);
507 step1
[12] = WRAPLOW(-step2
[12] + step2
[15], 8);
508 step1
[13] = WRAPLOW(-step2
[13] + step2
[14], 8);
509 step1
[14] = WRAPLOW(step2
[13] + step2
[14], 8);
510 step1
[15] = WRAPLOW(step2
[12] + step2
[15], 8);
513 step2
[0] = WRAPLOW(step1
[0] + step1
[7], 8);
514 step2
[1] = WRAPLOW(step1
[1] + step1
[6], 8);
515 step2
[2] = WRAPLOW(step1
[2] + step1
[5], 8);
516 step2
[3] = WRAPLOW(step1
[3] + step1
[4], 8);
517 step2
[4] = WRAPLOW(step1
[3] - step1
[4], 8);
518 step2
[5] = WRAPLOW(step1
[2] - step1
[5], 8);
519 step2
[6] = WRAPLOW(step1
[1] - step1
[6], 8);
520 step2
[7] = WRAPLOW(step1
[0] - step1
[7], 8);
523 temp1
= (-step1
[10] + step1
[13]) * cospi_16_64
;
524 temp2
= (step1
[10] + step1
[13]) * cospi_16_64
;
525 step2
[10] = WRAPLOW(dct_const_round_shift(temp1
), 8);
526 step2
[13] = WRAPLOW(dct_const_round_shift(temp2
), 8);
527 temp1
= (-step1
[11] + step1
[12]) * cospi_16_64
;
528 temp2
= (step1
[11] + step1
[12]) * cospi_16_64
;
529 step2
[11] = WRAPLOW(dct_const_round_shift(temp1
), 8);
530 step2
[12] = WRAPLOW(dct_const_round_shift(temp2
), 8);
531 step2
[14] = step1
[14];
532 step2
[15] = step1
[15];
535 output
[0] = WRAPLOW(step2
[0] + step2
[15], 8);
536 output
[1] = WRAPLOW(step2
[1] + step2
[14], 8);
537 output
[2] = WRAPLOW(step2
[2] + step2
[13], 8);
538 output
[3] = WRAPLOW(step2
[3] + step2
[12], 8);
539 output
[4] = WRAPLOW(step2
[4] + step2
[11], 8);
540 output
[5] = WRAPLOW(step2
[5] + step2
[10], 8);
541 output
[6] = WRAPLOW(step2
[6] + step2
[9], 8);
542 output
[7] = WRAPLOW(step2
[7] + step2
[8], 8);
543 output
[8] = WRAPLOW(step2
[7] - step2
[8], 8);
544 output
[9] = WRAPLOW(step2
[6] - step2
[9], 8);
545 output
[10] = WRAPLOW(step2
[5] - step2
[10], 8);
546 output
[11] = WRAPLOW(step2
[4] - step2
[11], 8);
547 output
[12] = WRAPLOW(step2
[3] - step2
[12], 8);
548 output
[13] = WRAPLOW(step2
[2] - step2
[13], 8);
549 output
[14] = WRAPLOW(step2
[1] - step2
[14], 8);
550 output
[15] = WRAPLOW(step2
[0] - step2
[15], 8);
553 void vpx_idct16x16_256_add_c(const tran_low_t
*input
, uint8_t *dest
,
555 tran_low_t out
[16 * 16];
556 tran_low_t
*outptr
= out
;
558 tran_low_t temp_in
[16], temp_out
[16];
560 // First transform rows
561 for (i
= 0; i
< 16; ++i
) {
562 idct16_c(input
, outptr
);
567 // Then transform columns
568 for (i
= 0; i
< 16; ++i
) {
569 for (j
= 0; j
< 16; ++j
)
570 temp_in
[j
] = out
[j
* 16 + i
];
571 idct16_c(temp_in
, temp_out
);
572 for (j
= 0; j
< 16; ++j
) {
573 dest
[j
* stride
+ i
] = clip_pixel_add(dest
[j
* stride
+ i
],
574 ROUND_POWER_OF_TWO(temp_out
[j
], 6));
579 void iadst16_c(const tran_low_t
*input
, tran_low_t
*output
) {
580 tran_high_t s0
, s1
, s2
, s3
, s4
, s5
, s6
, s7
, s8
;
581 tran_high_t s9
, s10
, s11
, s12
, s13
, s14
, s15
;
583 tran_high_t x0
= input
[15];
584 tran_high_t x1
= input
[0];
585 tran_high_t x2
= input
[13];
586 tran_high_t x3
= input
[2];
587 tran_high_t x4
= input
[11];
588 tran_high_t x5
= input
[4];
589 tran_high_t x6
= input
[9];
590 tran_high_t x7
= input
[6];
591 tran_high_t x8
= input
[7];
592 tran_high_t x9
= input
[8];
593 tran_high_t x10
= input
[5];
594 tran_high_t x11
= input
[10];
595 tran_high_t x12
= input
[3];
596 tran_high_t x13
= input
[12];
597 tran_high_t x14
= input
[1];
598 tran_high_t x15
= input
[14];
600 if (!(x0
| x1
| x2
| x3
| x4
| x5
| x6
| x7
| x8
601 | x9
| x10
| x11
| x12
| x13
| x14
| x15
)) {
602 output
[0] = output
[1] = output
[2] = output
[3] = output
[4]
603 = output
[5] = output
[6] = output
[7] = output
[8]
604 = output
[9] = output
[10] = output
[11] = output
[12]
605 = output
[13] = output
[14] = output
[15] = 0;
610 s0
= x0
* cospi_1_64
+ x1
* cospi_31_64
;
611 s1
= x0
* cospi_31_64
- x1
* cospi_1_64
;
612 s2
= x2
* cospi_5_64
+ x3
* cospi_27_64
;
613 s3
= x2
* cospi_27_64
- x3
* cospi_5_64
;
614 s4
= x4
* cospi_9_64
+ x5
* cospi_23_64
;
615 s5
= x4
* cospi_23_64
- x5
* cospi_9_64
;
616 s6
= x6
* cospi_13_64
+ x7
* cospi_19_64
;
617 s7
= x6
* cospi_19_64
- x7
* cospi_13_64
;
618 s8
= x8
* cospi_17_64
+ x9
* cospi_15_64
;
619 s9
= x8
* cospi_15_64
- x9
* cospi_17_64
;
620 s10
= x10
* cospi_21_64
+ x11
* cospi_11_64
;
621 s11
= x10
* cospi_11_64
- x11
* cospi_21_64
;
622 s12
= x12
* cospi_25_64
+ x13
* cospi_7_64
;
623 s13
= x12
* cospi_7_64
- x13
* cospi_25_64
;
624 s14
= x14
* cospi_29_64
+ x15
* cospi_3_64
;
625 s15
= x14
* cospi_3_64
- x15
* cospi_29_64
;
627 x0
= WRAPLOW(dct_const_round_shift(s0
+ s8
), 8);
628 x1
= WRAPLOW(dct_const_round_shift(s1
+ s9
), 8);
629 x2
= WRAPLOW(dct_const_round_shift(s2
+ s10
), 8);
630 x3
= WRAPLOW(dct_const_round_shift(s3
+ s11
), 8);
631 x4
= WRAPLOW(dct_const_round_shift(s4
+ s12
), 8);
632 x5
= WRAPLOW(dct_const_round_shift(s5
+ s13
), 8);
633 x6
= WRAPLOW(dct_const_round_shift(s6
+ s14
), 8);
634 x7
= WRAPLOW(dct_const_round_shift(s7
+ s15
), 8);
635 x8
= WRAPLOW(dct_const_round_shift(s0
- s8
), 8);
636 x9
= WRAPLOW(dct_const_round_shift(s1
- s9
), 8);
637 x10
= WRAPLOW(dct_const_round_shift(s2
- s10
), 8);
638 x11
= WRAPLOW(dct_const_round_shift(s3
- s11
), 8);
639 x12
= WRAPLOW(dct_const_round_shift(s4
- s12
), 8);
640 x13
= WRAPLOW(dct_const_round_shift(s5
- s13
), 8);
641 x14
= WRAPLOW(dct_const_round_shift(s6
- s14
), 8);
642 x15
= WRAPLOW(dct_const_round_shift(s7
- s15
), 8);
653 s8
= x8
* cospi_4_64
+ x9
* cospi_28_64
;
654 s9
= x8
* cospi_28_64
- x9
* cospi_4_64
;
655 s10
= x10
* cospi_20_64
+ x11
* cospi_12_64
;
656 s11
= x10
* cospi_12_64
- x11
* cospi_20_64
;
657 s12
= - x12
* cospi_28_64
+ x13
* cospi_4_64
;
658 s13
= x12
* cospi_4_64
+ x13
* cospi_28_64
;
659 s14
= - x14
* cospi_12_64
+ x15
* cospi_20_64
;
660 s15
= x14
* cospi_20_64
+ x15
* cospi_12_64
;
662 x0
= WRAPLOW(s0
+ s4
, 8);
663 x1
= WRAPLOW(s1
+ s5
, 8);
664 x2
= WRAPLOW(s2
+ s6
, 8);
665 x3
= WRAPLOW(s3
+ s7
, 8);
666 x4
= WRAPLOW(s0
- s4
, 8);
667 x5
= WRAPLOW(s1
- s5
, 8);
668 x6
= WRAPLOW(s2
- s6
, 8);
669 x7
= WRAPLOW(s3
- s7
, 8);
670 x8
= WRAPLOW(dct_const_round_shift(s8
+ s12
), 8);
671 x9
= WRAPLOW(dct_const_round_shift(s9
+ s13
), 8);
672 x10
= WRAPLOW(dct_const_round_shift(s10
+ s14
), 8);
673 x11
= WRAPLOW(dct_const_round_shift(s11
+ s15
), 8);
674 x12
= WRAPLOW(dct_const_round_shift(s8
- s12
), 8);
675 x13
= WRAPLOW(dct_const_round_shift(s9
- s13
), 8);
676 x14
= WRAPLOW(dct_const_round_shift(s10
- s14
), 8);
677 x15
= WRAPLOW(dct_const_round_shift(s11
- s15
), 8);
684 s4
= x4
* cospi_8_64
+ x5
* cospi_24_64
;
685 s5
= x4
* cospi_24_64
- x5
* cospi_8_64
;
686 s6
= - x6
* cospi_24_64
+ x7
* cospi_8_64
;
687 s7
= x6
* cospi_8_64
+ x7
* cospi_24_64
;
692 s12
= x12
* cospi_8_64
+ x13
* cospi_24_64
;
693 s13
= x12
* cospi_24_64
- x13
* cospi_8_64
;
694 s14
= - x14
* cospi_24_64
+ x15
* cospi_8_64
;
695 s15
= x14
* cospi_8_64
+ x15
* cospi_24_64
;
697 x0
= WRAPLOW(check_range(s0
+ s2
), 8);
698 x1
= WRAPLOW(check_range(s1
+ s3
), 8);
699 x2
= WRAPLOW(check_range(s0
- s2
), 8);
700 x3
= WRAPLOW(check_range(s1
- s3
), 8);
701 x4
= WRAPLOW(dct_const_round_shift(s4
+ s6
), 8);
702 x5
= WRAPLOW(dct_const_round_shift(s5
+ s7
), 8);
703 x6
= WRAPLOW(dct_const_round_shift(s4
- s6
), 8);
704 x7
= WRAPLOW(dct_const_round_shift(s5
- s7
), 8);
705 x8
= WRAPLOW(check_range(s8
+ s10
), 8);
706 x9
= WRAPLOW(check_range(s9
+ s11
), 8);
707 x10
= WRAPLOW(check_range(s8
- s10
), 8);
708 x11
= WRAPLOW(check_range(s9
- s11
), 8);
709 x12
= WRAPLOW(dct_const_round_shift(s12
+ s14
), 8);
710 x13
= WRAPLOW(dct_const_round_shift(s13
+ s15
), 8);
711 x14
= WRAPLOW(dct_const_round_shift(s12
- s14
), 8);
712 x15
= WRAPLOW(dct_const_round_shift(s13
- s15
), 8);
715 s2
= (- cospi_16_64
) * (x2
+ x3
);
716 s3
= cospi_16_64
* (x2
- x3
);
717 s6
= cospi_16_64
* (x6
+ x7
);
718 s7
= cospi_16_64
* (- x6
+ x7
);
719 s10
= cospi_16_64
* (x10
+ x11
);
720 s11
= cospi_16_64
* (- x10
+ x11
);
721 s14
= (- cospi_16_64
) * (x14
+ x15
);
722 s15
= cospi_16_64
* (x14
- x15
);
724 x2
= WRAPLOW(dct_const_round_shift(s2
), 8);
725 x3
= WRAPLOW(dct_const_round_shift(s3
), 8);
726 x6
= WRAPLOW(dct_const_round_shift(s6
), 8);
727 x7
= WRAPLOW(dct_const_round_shift(s7
), 8);
728 x10
= WRAPLOW(dct_const_round_shift(s10
), 8);
729 x11
= WRAPLOW(dct_const_round_shift(s11
), 8);
730 x14
= WRAPLOW(dct_const_round_shift(s14
), 8);
731 x15
= WRAPLOW(dct_const_round_shift(s15
), 8);
733 output
[0] = WRAPLOW(x0
, 8);
734 output
[1] = WRAPLOW(-x8
, 8);
735 output
[2] = WRAPLOW(x12
, 8);
736 output
[3] = WRAPLOW(-x4
, 8);
737 output
[4] = WRAPLOW(x6
, 8);
738 output
[5] = WRAPLOW(x14
, 8);
739 output
[6] = WRAPLOW(x10
, 8);
740 output
[7] = WRAPLOW(x2
, 8);
741 output
[8] = WRAPLOW(x3
, 8);
742 output
[9] = WRAPLOW(x11
, 8);
743 output
[10] = WRAPLOW(x15
, 8);
744 output
[11] = WRAPLOW(x7
, 8);
745 output
[12] = WRAPLOW(x5
, 8);
746 output
[13] = WRAPLOW(-x13
, 8);
747 output
[14] = WRAPLOW(x9
, 8);
748 output
[15] = WRAPLOW(-x1
, 8);
751 void vpx_idct16x16_10_add_c(const tran_low_t
*input
, uint8_t *dest
,
753 tran_low_t out
[16 * 16] = { 0 };
754 tran_low_t
*outptr
= out
;
756 tran_low_t temp_in
[16], temp_out
[16];
758 // First transform rows. Since all non-zero dct coefficients are in
759 // upper-left 4x4 area, we only need to calculate first 4 rows here.
760 for (i
= 0; i
< 4; ++i
) {
761 idct16_c(input
, outptr
);
766 // Then transform columns
767 for (i
= 0; i
< 16; ++i
) {
768 for (j
= 0; j
< 16; ++j
)
769 temp_in
[j
] = out
[j
*16 + i
];
770 idct16_c(temp_in
, temp_out
);
771 for (j
= 0; j
< 16; ++j
) {
772 dest
[j
* stride
+ i
] = clip_pixel_add(dest
[j
* stride
+ i
],
773 ROUND_POWER_OF_TWO(temp_out
[j
], 6));
778 void vpx_idct16x16_1_add_c(const tran_low_t
*input
, uint8_t *dest
, int stride
) {
781 tran_low_t out
= WRAPLOW(dct_const_round_shift(input
[0] * cospi_16_64
), 8);
782 out
= WRAPLOW(dct_const_round_shift(out
* cospi_16_64
), 8);
783 a1
= ROUND_POWER_OF_TWO(out
, 6);
784 for (j
= 0; j
< 16; ++j
) {
785 for (i
= 0; i
< 16; ++i
)
786 dest
[i
] = clip_pixel_add(dest
[i
], a1
);
791 void idct32_c(const tran_low_t
*input
, tran_low_t
*output
) {
792 tran_low_t step1
[32], step2
[32];
793 tran_high_t temp1
, temp2
;
797 step1
[1] = input
[16];
799 step1
[3] = input
[24];
801 step1
[5] = input
[20];
802 step1
[6] = input
[12];
803 step1
[7] = input
[28];
805 step1
[9] = input
[18];
806 step1
[10] = input
[10];
807 step1
[11] = input
[26];
808 step1
[12] = input
[6];
809 step1
[13] = input
[22];
810 step1
[14] = input
[14];
811 step1
[15] = input
[30];
813 temp1
= input
[1] * cospi_31_64
- input
[31] * cospi_1_64
;
814 temp2
= input
[1] * cospi_1_64
+ input
[31] * cospi_31_64
;
815 step1
[16] = WRAPLOW(dct_const_round_shift(temp1
), 8);
816 step1
[31] = WRAPLOW(dct_const_round_shift(temp2
), 8);
818 temp1
= input
[17] * cospi_15_64
- input
[15] * cospi_17_64
;
819 temp2
= input
[17] * cospi_17_64
+ input
[15] * cospi_15_64
;
820 step1
[17] = WRAPLOW(dct_const_round_shift(temp1
), 8);
821 step1
[30] = WRAPLOW(dct_const_round_shift(temp2
), 8);
823 temp1
= input
[9] * cospi_23_64
- input
[23] * cospi_9_64
;
824 temp2
= input
[9] * cospi_9_64
+ input
[23] * cospi_23_64
;
825 step1
[18] = WRAPLOW(dct_const_round_shift(temp1
), 8);
826 step1
[29] = WRAPLOW(dct_const_round_shift(temp2
), 8);
828 temp1
= input
[25] * cospi_7_64
- input
[7] * cospi_25_64
;
829 temp2
= input
[25] * cospi_25_64
+ input
[7] * cospi_7_64
;
830 step1
[19] = WRAPLOW(dct_const_round_shift(temp1
), 8);
831 step1
[28] = WRAPLOW(dct_const_round_shift(temp2
), 8);
833 temp1
= input
[5] * cospi_27_64
- input
[27] * cospi_5_64
;
834 temp2
= input
[5] * cospi_5_64
+ input
[27] * cospi_27_64
;
835 step1
[20] = WRAPLOW(dct_const_round_shift(temp1
), 8);
836 step1
[27] = WRAPLOW(dct_const_round_shift(temp2
), 8);
838 temp1
= input
[21] * cospi_11_64
- input
[11] * cospi_21_64
;
839 temp2
= input
[21] * cospi_21_64
+ input
[11] * cospi_11_64
;
840 step1
[21] = WRAPLOW(dct_const_round_shift(temp1
), 8);
841 step1
[26] = WRAPLOW(dct_const_round_shift(temp2
), 8);
843 temp1
= input
[13] * cospi_19_64
- input
[19] * cospi_13_64
;
844 temp2
= input
[13] * cospi_13_64
+ input
[19] * cospi_19_64
;
845 step1
[22] = WRAPLOW(dct_const_round_shift(temp1
), 8);
846 step1
[25] = WRAPLOW(dct_const_round_shift(temp2
), 8);
848 temp1
= input
[29] * cospi_3_64
- input
[3] * cospi_29_64
;
849 temp2
= input
[29] * cospi_29_64
+ input
[3] * cospi_3_64
;
850 step1
[23] = WRAPLOW(dct_const_round_shift(temp1
), 8);
851 step1
[24] = WRAPLOW(dct_const_round_shift(temp2
), 8);
863 temp1
= step1
[8] * cospi_30_64
- step1
[15] * cospi_2_64
;
864 temp2
= step1
[8] * cospi_2_64
+ step1
[15] * cospi_30_64
;
865 step2
[8] = WRAPLOW(dct_const_round_shift(temp1
), 8);
866 step2
[15] = WRAPLOW(dct_const_round_shift(temp2
), 8);
868 temp1
= step1
[9] * cospi_14_64
- step1
[14] * cospi_18_64
;
869 temp2
= step1
[9] * cospi_18_64
+ step1
[14] * cospi_14_64
;
870 step2
[9] = WRAPLOW(dct_const_round_shift(temp1
), 8);
871 step2
[14] = WRAPLOW(dct_const_round_shift(temp2
), 8);
873 temp1
= step1
[10] * cospi_22_64
- step1
[13] * cospi_10_64
;
874 temp2
= step1
[10] * cospi_10_64
+ step1
[13] * cospi_22_64
;
875 step2
[10] = WRAPLOW(dct_const_round_shift(temp1
), 8);
876 step2
[13] = WRAPLOW(dct_const_round_shift(temp2
), 8);
878 temp1
= step1
[11] * cospi_6_64
- step1
[12] * cospi_26_64
;
879 temp2
= step1
[11] * cospi_26_64
+ step1
[12] * cospi_6_64
;
880 step2
[11] = WRAPLOW(dct_const_round_shift(temp1
), 8);
881 step2
[12] = WRAPLOW(dct_const_round_shift(temp2
), 8);
883 step2
[16] = WRAPLOW(step1
[16] + step1
[17], 8);
884 step2
[17] = WRAPLOW(step1
[16] - step1
[17], 8);
885 step2
[18] = WRAPLOW(-step1
[18] + step1
[19], 8);
886 step2
[19] = WRAPLOW(step1
[18] + step1
[19], 8);
887 step2
[20] = WRAPLOW(step1
[20] + step1
[21], 8);
888 step2
[21] = WRAPLOW(step1
[20] - step1
[21], 8);
889 step2
[22] = WRAPLOW(-step1
[22] + step1
[23], 8);
890 step2
[23] = WRAPLOW(step1
[22] + step1
[23], 8);
891 step2
[24] = WRAPLOW(step1
[24] + step1
[25], 8);
892 step2
[25] = WRAPLOW(step1
[24] - step1
[25], 8);
893 step2
[26] = WRAPLOW(-step1
[26] + step1
[27], 8);
894 step2
[27] = WRAPLOW(step1
[26] + step1
[27], 8);
895 step2
[28] = WRAPLOW(step1
[28] + step1
[29], 8);
896 step2
[29] = WRAPLOW(step1
[28] - step1
[29], 8);
897 step2
[30] = WRAPLOW(-step1
[30] + step1
[31], 8);
898 step2
[31] = WRAPLOW(step1
[30] + step1
[31], 8);
906 temp1
= step2
[4] * cospi_28_64
- step2
[7] * cospi_4_64
;
907 temp2
= step2
[4] * cospi_4_64
+ step2
[7] * cospi_28_64
;
908 step1
[4] = WRAPLOW(dct_const_round_shift(temp1
), 8);
909 step1
[7] = WRAPLOW(dct_const_round_shift(temp2
), 8);
910 temp1
= step2
[5] * cospi_12_64
- step2
[6] * cospi_20_64
;
911 temp2
= step2
[5] * cospi_20_64
+ step2
[6] * cospi_12_64
;
912 step1
[5] = WRAPLOW(dct_const_round_shift(temp1
), 8);
913 step1
[6] = WRAPLOW(dct_const_round_shift(temp2
), 8);
915 step1
[8] = WRAPLOW(step2
[8] + step2
[9], 8);
916 step1
[9] = WRAPLOW(step2
[8] - step2
[9], 8);
917 step1
[10] = WRAPLOW(-step2
[10] + step2
[11], 8);
918 step1
[11] = WRAPLOW(step2
[10] + step2
[11], 8);
919 step1
[12] = WRAPLOW(step2
[12] + step2
[13], 8);
920 step1
[13] = WRAPLOW(step2
[12] - step2
[13], 8);
921 step1
[14] = WRAPLOW(-step2
[14] + step2
[15], 8);
922 step1
[15] = WRAPLOW(step2
[14] + step2
[15], 8);
924 step1
[16] = step2
[16];
925 step1
[31] = step2
[31];
926 temp1
= -step2
[17] * cospi_4_64
+ step2
[30] * cospi_28_64
;
927 temp2
= step2
[17] * cospi_28_64
+ step2
[30] * cospi_4_64
;
928 step1
[17] = WRAPLOW(dct_const_round_shift(temp1
), 8);
929 step1
[30] = WRAPLOW(dct_const_round_shift(temp2
), 8);
930 temp1
= -step2
[18] * cospi_28_64
- step2
[29] * cospi_4_64
;
931 temp2
= -step2
[18] * cospi_4_64
+ step2
[29] * cospi_28_64
;
932 step1
[18] = WRAPLOW(dct_const_round_shift(temp1
), 8);
933 step1
[29] = WRAPLOW(dct_const_round_shift(temp2
), 8);
934 step1
[19] = step2
[19];
935 step1
[20] = step2
[20];
936 temp1
= -step2
[21] * cospi_20_64
+ step2
[26] * cospi_12_64
;
937 temp2
= step2
[21] * cospi_12_64
+ step2
[26] * cospi_20_64
;
938 step1
[21] = WRAPLOW(dct_const_round_shift(temp1
), 8);
939 step1
[26] = WRAPLOW(dct_const_round_shift(temp2
), 8);
940 temp1
= -step2
[22] * cospi_12_64
- step2
[25] * cospi_20_64
;
941 temp2
= -step2
[22] * cospi_20_64
+ step2
[25] * cospi_12_64
;
942 step1
[22] = WRAPLOW(dct_const_round_shift(temp1
), 8);
943 step1
[25] = WRAPLOW(dct_const_round_shift(temp2
), 8);
944 step1
[23] = step2
[23];
945 step1
[24] = step2
[24];
946 step1
[27] = step2
[27];
947 step1
[28] = step2
[28];
950 temp1
= (step1
[0] + step1
[1]) * cospi_16_64
;
951 temp2
= (step1
[0] - step1
[1]) * cospi_16_64
;
952 step2
[0] = WRAPLOW(dct_const_round_shift(temp1
), 8);
953 step2
[1] = WRAPLOW(dct_const_round_shift(temp2
), 8);
954 temp1
= step1
[2] * cospi_24_64
- step1
[3] * cospi_8_64
;
955 temp2
= step1
[2] * cospi_8_64
+ step1
[3] * cospi_24_64
;
956 step2
[2] = WRAPLOW(dct_const_round_shift(temp1
), 8);
957 step2
[3] = WRAPLOW(dct_const_round_shift(temp2
), 8);
958 step2
[4] = WRAPLOW(step1
[4] + step1
[5], 8);
959 step2
[5] = WRAPLOW(step1
[4] - step1
[5], 8);
960 step2
[6] = WRAPLOW(-step1
[6] + step1
[7], 8);
961 step2
[7] = WRAPLOW(step1
[6] + step1
[7], 8);
964 step2
[15] = step1
[15];
965 temp1
= -step1
[9] * cospi_8_64
+ step1
[14] * cospi_24_64
;
966 temp2
= step1
[9] * cospi_24_64
+ step1
[14] * cospi_8_64
;
967 step2
[9] = WRAPLOW(dct_const_round_shift(temp1
), 8);
968 step2
[14] = WRAPLOW(dct_const_round_shift(temp2
), 8);
969 temp1
= -step1
[10] * cospi_24_64
- step1
[13] * cospi_8_64
;
970 temp2
= -step1
[10] * cospi_8_64
+ step1
[13] * cospi_24_64
;
971 step2
[10] = WRAPLOW(dct_const_round_shift(temp1
), 8);
972 step2
[13] = WRAPLOW(dct_const_round_shift(temp2
), 8);
973 step2
[11] = step1
[11];
974 step2
[12] = step1
[12];
976 step2
[16] = WRAPLOW(step1
[16] + step1
[19], 8);
977 step2
[17] = WRAPLOW(step1
[17] + step1
[18], 8);
978 step2
[18] = WRAPLOW(step1
[17] - step1
[18], 8);
979 step2
[19] = WRAPLOW(step1
[16] - step1
[19], 8);
980 step2
[20] = WRAPLOW(-step1
[20] + step1
[23], 8);
981 step2
[21] = WRAPLOW(-step1
[21] + step1
[22], 8);
982 step2
[22] = WRAPLOW(step1
[21] + step1
[22], 8);
983 step2
[23] = WRAPLOW(step1
[20] + step1
[23], 8);
985 step2
[24] = WRAPLOW(step1
[24] + step1
[27], 8);
986 step2
[25] = WRAPLOW(step1
[25] + step1
[26], 8);
987 step2
[26] = WRAPLOW(step1
[25] - step1
[26], 8);
988 step2
[27] = WRAPLOW(step1
[24] - step1
[27], 8);
989 step2
[28] = WRAPLOW(-step1
[28] + step1
[31], 8);
990 step2
[29] = WRAPLOW(-step1
[29] + step1
[30], 8);
991 step2
[30] = WRAPLOW(step1
[29] + step1
[30], 8);
992 step2
[31] = WRAPLOW(step1
[28] + step1
[31], 8);
995 step1
[0] = WRAPLOW(step2
[0] + step2
[3], 8);
996 step1
[1] = WRAPLOW(step2
[1] + step2
[2], 8);
997 step1
[2] = WRAPLOW(step2
[1] - step2
[2], 8);
998 step1
[3] = WRAPLOW(step2
[0] - step2
[3], 8);
1000 temp1
= (step2
[6] - step2
[5]) * cospi_16_64
;
1001 temp2
= (step2
[5] + step2
[6]) * cospi_16_64
;
1002 step1
[5] = WRAPLOW(dct_const_round_shift(temp1
), 8);
1003 step1
[6] = WRAPLOW(dct_const_round_shift(temp2
), 8);
1004 step1
[7] = step2
[7];
1006 step1
[8] = WRAPLOW(step2
[8] + step2
[11], 8);
1007 step1
[9] = WRAPLOW(step2
[9] + step2
[10], 8);
1008 step1
[10] = WRAPLOW(step2
[9] - step2
[10], 8);
1009 step1
[11] = WRAPLOW(step2
[8] - step2
[11], 8);
1010 step1
[12] = WRAPLOW(-step2
[12] + step2
[15], 8);
1011 step1
[13] = WRAPLOW(-step2
[13] + step2
[14], 8);
1012 step1
[14] = WRAPLOW(step2
[13] + step2
[14], 8);
1013 step1
[15] = WRAPLOW(step2
[12] + step2
[15], 8);
1015 step1
[16] = step2
[16];
1016 step1
[17] = step2
[17];
1017 temp1
= -step2
[18] * cospi_8_64
+ step2
[29] * cospi_24_64
;
1018 temp2
= step2
[18] * cospi_24_64
+ step2
[29] * cospi_8_64
;
1019 step1
[18] = WRAPLOW(dct_const_round_shift(temp1
), 8);
1020 step1
[29] = WRAPLOW(dct_const_round_shift(temp2
), 8);
1021 temp1
= -step2
[19] * cospi_8_64
+ step2
[28] * cospi_24_64
;
1022 temp2
= step2
[19] * cospi_24_64
+ step2
[28] * cospi_8_64
;
1023 step1
[19] = WRAPLOW(dct_const_round_shift(temp1
), 8);
1024 step1
[28] = WRAPLOW(dct_const_round_shift(temp2
), 8);
1025 temp1
= -step2
[20] * cospi_24_64
- step2
[27] * cospi_8_64
;
1026 temp2
= -step2
[20] * cospi_8_64
+ step2
[27] * cospi_24_64
;
1027 step1
[20] = WRAPLOW(dct_const_round_shift(temp1
), 8);
1028 step1
[27] = WRAPLOW(dct_const_round_shift(temp2
), 8);
1029 temp1
= -step2
[21] * cospi_24_64
- step2
[26] * cospi_8_64
;
1030 temp2
= -step2
[21] * cospi_8_64
+ step2
[26] * cospi_24_64
;
1031 step1
[21] = WRAPLOW(dct_const_round_shift(temp1
), 8);
1032 step1
[26] = WRAPLOW(dct_const_round_shift(temp2
), 8);
1033 step1
[22] = step2
[22];
1034 step1
[23] = step2
[23];
1035 step1
[24] = step2
[24];
1036 step1
[25] = step2
[25];
1037 step1
[30] = step2
[30];
1038 step1
[31] = step2
[31];
1041 step2
[0] = WRAPLOW(step1
[0] + step1
[7], 8);
1042 step2
[1] = WRAPLOW(step1
[1] + step1
[6], 8);
1043 step2
[2] = WRAPLOW(step1
[2] + step1
[5], 8);
1044 step2
[3] = WRAPLOW(step1
[3] + step1
[4], 8);
1045 step2
[4] = WRAPLOW(step1
[3] - step1
[4], 8);
1046 step2
[5] = WRAPLOW(step1
[2] - step1
[5], 8);
1047 step2
[6] = WRAPLOW(step1
[1] - step1
[6], 8);
1048 step2
[7] = WRAPLOW(step1
[0] - step1
[7], 8);
1049 step2
[8] = step1
[8];
1050 step2
[9] = step1
[9];
1051 temp1
= (-step1
[10] + step1
[13]) * cospi_16_64
;
1052 temp2
= (step1
[10] + step1
[13]) * cospi_16_64
;
1053 step2
[10] = WRAPLOW(dct_const_round_shift(temp1
), 8);
1054 step2
[13] = WRAPLOW(dct_const_round_shift(temp2
), 8);
1055 temp1
= (-step1
[11] + step1
[12]) * cospi_16_64
;
1056 temp2
= (step1
[11] + step1
[12]) * cospi_16_64
;
1057 step2
[11] = WRAPLOW(dct_const_round_shift(temp1
), 8);
1058 step2
[12] = WRAPLOW(dct_const_round_shift(temp2
), 8);
1059 step2
[14] = step1
[14];
1060 step2
[15] = step1
[15];
1062 step2
[16] = WRAPLOW(step1
[16] + step1
[23], 8);
1063 step2
[17] = WRAPLOW(step1
[17] + step1
[22], 8);
1064 step2
[18] = WRAPLOW(step1
[18] + step1
[21], 8);
1065 step2
[19] = WRAPLOW(step1
[19] + step1
[20], 8);
1066 step2
[20] = WRAPLOW(step1
[19] - step1
[20], 8);
1067 step2
[21] = WRAPLOW(step1
[18] - step1
[21], 8);
1068 step2
[22] = WRAPLOW(step1
[17] - step1
[22], 8);
1069 step2
[23] = WRAPLOW(step1
[16] - step1
[23], 8);
1071 step2
[24] = WRAPLOW(-step1
[24] + step1
[31], 8);
1072 step2
[25] = WRAPLOW(-step1
[25] + step1
[30], 8);
1073 step2
[26] = WRAPLOW(-step1
[26] + step1
[29], 8);
1074 step2
[27] = WRAPLOW(-step1
[27] + step1
[28], 8);
1075 step2
[28] = WRAPLOW(step1
[27] + step1
[28], 8);
1076 step2
[29] = WRAPLOW(step1
[26] + step1
[29], 8);
1077 step2
[30] = WRAPLOW(step1
[25] + step1
[30], 8);
1078 step2
[31] = WRAPLOW(step1
[24] + step1
[31], 8);
1081 step1
[0] = WRAPLOW(step2
[0] + step2
[15], 8);
1082 step1
[1] = WRAPLOW(step2
[1] + step2
[14], 8);
1083 step1
[2] = WRAPLOW(step2
[2] + step2
[13], 8);
1084 step1
[3] = WRAPLOW(step2
[3] + step2
[12], 8);
1085 step1
[4] = WRAPLOW(step2
[4] + step2
[11], 8);
1086 step1
[5] = WRAPLOW(step2
[5] + step2
[10], 8);
1087 step1
[6] = WRAPLOW(step2
[6] + step2
[9], 8);
1088 step1
[7] = WRAPLOW(step2
[7] + step2
[8], 8);
1089 step1
[8] = WRAPLOW(step2
[7] - step2
[8], 8);
1090 step1
[9] = WRAPLOW(step2
[6] - step2
[9], 8);
1091 step1
[10] = WRAPLOW(step2
[5] - step2
[10], 8);
1092 step1
[11] = WRAPLOW(step2
[4] - step2
[11], 8);
1093 step1
[12] = WRAPLOW(step2
[3] - step2
[12], 8);
1094 step1
[13] = WRAPLOW(step2
[2] - step2
[13], 8);
1095 step1
[14] = WRAPLOW(step2
[1] - step2
[14], 8);
1096 step1
[15] = WRAPLOW(step2
[0] - step2
[15], 8);
1098 step1
[16] = step2
[16];
1099 step1
[17] = step2
[17];
1100 step1
[18] = step2
[18];
1101 step1
[19] = step2
[19];
1102 temp1
= (-step2
[20] + step2
[27]) * cospi_16_64
;
1103 temp2
= (step2
[20] + step2
[27]) * cospi_16_64
;
1104 step1
[20] = WRAPLOW(dct_const_round_shift(temp1
), 8);
1105 step1
[27] = WRAPLOW(dct_const_round_shift(temp2
), 8);
1106 temp1
= (-step2
[21] + step2
[26]) * cospi_16_64
;
1107 temp2
= (step2
[21] + step2
[26]) * cospi_16_64
;
1108 step1
[21] = WRAPLOW(dct_const_round_shift(temp1
), 8);
1109 step1
[26] = WRAPLOW(dct_const_round_shift(temp2
), 8);
1110 temp1
= (-step2
[22] + step2
[25]) * cospi_16_64
;
1111 temp2
= (step2
[22] + step2
[25]) * cospi_16_64
;
1112 step1
[22] = WRAPLOW(dct_const_round_shift(temp1
), 8);
1113 step1
[25] = WRAPLOW(dct_const_round_shift(temp2
), 8);
1114 temp1
= (-step2
[23] + step2
[24]) * cospi_16_64
;
1115 temp2
= (step2
[23] + step2
[24]) * cospi_16_64
;
1116 step1
[23] = WRAPLOW(dct_const_round_shift(temp1
), 8);
1117 step1
[24] = WRAPLOW(dct_const_round_shift(temp2
), 8);
1118 step1
[28] = step2
[28];
1119 step1
[29] = step2
[29];
1120 step1
[30] = step2
[30];
1121 step1
[31] = step2
[31];
1124 output
[0] = WRAPLOW(step1
[0] + step1
[31], 8);
1125 output
[1] = WRAPLOW(step1
[1] + step1
[30], 8);
1126 output
[2] = WRAPLOW(step1
[2] + step1
[29], 8);
1127 output
[3] = WRAPLOW(step1
[3] + step1
[28], 8);
1128 output
[4] = WRAPLOW(step1
[4] + step1
[27], 8);
1129 output
[5] = WRAPLOW(step1
[5] + step1
[26], 8);
1130 output
[6] = WRAPLOW(step1
[6] + step1
[25], 8);
1131 output
[7] = WRAPLOW(step1
[7] + step1
[24], 8);
1132 output
[8] = WRAPLOW(step1
[8] + step1
[23], 8);
1133 output
[9] = WRAPLOW(step1
[9] + step1
[22], 8);
1134 output
[10] = WRAPLOW(step1
[10] + step1
[21], 8);
1135 output
[11] = WRAPLOW(step1
[11] + step1
[20], 8);
1136 output
[12] = WRAPLOW(step1
[12] + step1
[19], 8);
1137 output
[13] = WRAPLOW(step1
[13] + step1
[18], 8);
1138 output
[14] = WRAPLOW(step1
[14] + step1
[17], 8);
1139 output
[15] = WRAPLOW(step1
[15] + step1
[16], 8);
1140 output
[16] = WRAPLOW(step1
[15] - step1
[16], 8);
1141 output
[17] = WRAPLOW(step1
[14] - step1
[17], 8);
1142 output
[18] = WRAPLOW(step1
[13] - step1
[18], 8);
1143 output
[19] = WRAPLOW(step1
[12] - step1
[19], 8);
1144 output
[20] = WRAPLOW(step1
[11] - step1
[20], 8);
1145 output
[21] = WRAPLOW(step1
[10] - step1
[21], 8);
1146 output
[22] = WRAPLOW(step1
[9] - step1
[22], 8);
1147 output
[23] = WRAPLOW(step1
[8] - step1
[23], 8);
1148 output
[24] = WRAPLOW(step1
[7] - step1
[24], 8);
1149 output
[25] = WRAPLOW(step1
[6] - step1
[25], 8);
1150 output
[26] = WRAPLOW(step1
[5] - step1
[26], 8);
1151 output
[27] = WRAPLOW(step1
[4] - step1
[27], 8);
1152 output
[28] = WRAPLOW(step1
[3] - step1
[28], 8);
1153 output
[29] = WRAPLOW(step1
[2] - step1
[29], 8);
1154 output
[30] = WRAPLOW(step1
[1] - step1
[30], 8);
1155 output
[31] = WRAPLOW(step1
[0] - step1
[31], 8);
1158 void vpx_idct32x32_1024_add_c(const tran_low_t
*input
, uint8_t *dest
,
1160 tran_low_t out
[32 * 32];
1161 tran_low_t
*outptr
= out
;
1163 tran_low_t temp_in
[32], temp_out
[32];
1166 for (i
= 0; i
< 32; ++i
) {
1167 int16_t zero_coeff
[16];
1168 for (j
= 0; j
< 16; ++j
)
1169 zero_coeff
[j
] = input
[2 * j
] | input
[2 * j
+ 1];
1170 for (j
= 0; j
< 8; ++j
)
1171 zero_coeff
[j
] = zero_coeff
[2 * j
] | zero_coeff
[2 * j
+ 1];
1172 for (j
= 0; j
< 4; ++j
)
1173 zero_coeff
[j
] = zero_coeff
[2 * j
] | zero_coeff
[2 * j
+ 1];
1174 for (j
= 0; j
< 2; ++j
)
1175 zero_coeff
[j
] = zero_coeff
[2 * j
] | zero_coeff
[2 * j
+ 1];
1177 if (zero_coeff
[0] | zero_coeff
[1])
1178 idct32_c(input
, outptr
);
1180 memset(outptr
, 0, sizeof(tran_low_t
) * 32);
1186 for (i
= 0; i
< 32; ++i
) {
1187 for (j
= 0; j
< 32; ++j
)
1188 temp_in
[j
] = out
[j
* 32 + i
];
1189 idct32_c(temp_in
, temp_out
);
1190 for (j
= 0; j
< 32; ++j
) {
1191 dest
[j
* stride
+ i
] = clip_pixel_add(dest
[j
* stride
+ i
],
1192 ROUND_POWER_OF_TWO(temp_out
[j
], 6));
1197 void vpx_idct32x32_34_add_c(const tran_low_t
*input
, uint8_t *dest
,
1199 tran_low_t out
[32 * 32] = {0};
1200 tran_low_t
*outptr
= out
;
1202 tran_low_t temp_in
[32], temp_out
[32];
1205 // only upper-left 8x8 has non-zero coeff
1206 for (i
= 0; i
< 8; ++i
) {
1207 idct32_c(input
, outptr
);
1213 for (i
= 0; i
< 32; ++i
) {
1214 for (j
= 0; j
< 32; ++j
)
1215 temp_in
[j
] = out
[j
* 32 + i
];
1216 idct32_c(temp_in
, temp_out
);
1217 for (j
= 0; j
< 32; ++j
) {
1218 dest
[j
* stride
+ i
] = clip_pixel_add(dest
[j
* stride
+ i
],
1219 ROUND_POWER_OF_TWO(temp_out
[j
], 6));
1224 void vpx_idct32x32_1_add_c(const tran_low_t
*input
, uint8_t *dest
, int stride
) {
1228 tran_low_t out
= WRAPLOW(dct_const_round_shift(input
[0] * cospi_16_64
), 8);
1229 out
= WRAPLOW(dct_const_round_shift(out
* cospi_16_64
), 8);
1230 a1
= ROUND_POWER_OF_TWO(out
, 6);
1232 for (j
= 0; j
< 32; ++j
) {
1233 for (i
= 0; i
< 32; ++i
)
1234 dest
[i
] = clip_pixel_add(dest
[i
], a1
);
1239 #if CONFIG_VP9_HIGHBITDEPTH
1240 void vpx_highbd_iwht4x4_16_add_c(const tran_low_t
*input
, uint8_t *dest8
,
1241 int stride
, int bd
) {
1242 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
1243 0.5 shifts per pixel. */
1245 tran_low_t output
[16];
1246 tran_high_t a1
, b1
, c1
, d1
, e1
;
1247 const tran_low_t
*ip
= input
;
1248 tran_low_t
*op
= output
;
1249 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
1251 for (i
= 0; i
< 4; i
++) {
1252 a1
= ip
[0] >> UNIT_QUANT_SHIFT
;
1253 c1
= ip
[1] >> UNIT_QUANT_SHIFT
;
1254 d1
= ip
[2] >> UNIT_QUANT_SHIFT
;
1255 b1
= ip
[3] >> UNIT_QUANT_SHIFT
;
1258 e1
= (a1
- d1
) >> 1;
1263 op
[0] = WRAPLOW(a1
, bd
);
1264 op
[1] = WRAPLOW(b1
, bd
);
1265 op
[2] = WRAPLOW(c1
, bd
);
1266 op
[3] = WRAPLOW(d1
, bd
);
1272 for (i
= 0; i
< 4; i
++) {
1279 e1
= (a1
- d1
) >> 1;
1284 dest
[stride
* 0] = highbd_clip_pixel_add(dest
[stride
* 0], a1
, bd
);
1285 dest
[stride
* 1] = highbd_clip_pixel_add(dest
[stride
* 1], b1
, bd
);
1286 dest
[stride
* 2] = highbd_clip_pixel_add(dest
[stride
* 2], c1
, bd
);
1287 dest
[stride
* 3] = highbd_clip_pixel_add(dest
[stride
* 3], d1
, bd
);
1294 void vpx_highbd_iwht4x4_1_add_c(const tran_low_t
*in
, uint8_t *dest8
,
1295 int dest_stride
, int bd
) {
1299 const tran_low_t
*ip
= in
;
1300 tran_low_t
*op
= tmp
;
1301 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
1304 a1
= ip
[0] >> UNIT_QUANT_SHIFT
;
1307 op
[0] = WRAPLOW(a1
, bd
);
1308 op
[1] = op
[2] = op
[3] = WRAPLOW(e1
, bd
);
1311 for (i
= 0; i
< 4; i
++) {
1314 dest
[dest_stride
* 0] = highbd_clip_pixel_add(
1315 dest
[dest_stride
* 0], a1
, bd
);
1316 dest
[dest_stride
* 1] = highbd_clip_pixel_add(
1317 dest
[dest_stride
* 1], e1
, bd
);
1318 dest
[dest_stride
* 2] = highbd_clip_pixel_add(
1319 dest
[dest_stride
* 2], e1
, bd
);
1320 dest
[dest_stride
* 3] = highbd_clip_pixel_add(
1321 dest
[dest_stride
* 3], e1
, bd
);
1327 void vpx_highbd_idct4_c(const tran_low_t
*input
, tran_low_t
*output
, int bd
) {
1329 tran_high_t temp1
, temp2
;
1332 temp1
= (input
[0] + input
[2]) * cospi_16_64
;
1333 temp2
= (input
[0] - input
[2]) * cospi_16_64
;
1334 step
[0] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1335 step
[1] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1336 temp1
= input
[1] * cospi_24_64
- input
[3] * cospi_8_64
;
1337 temp2
= input
[1] * cospi_8_64
+ input
[3] * cospi_24_64
;
1338 step
[2] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1339 step
[3] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1342 output
[0] = WRAPLOW(step
[0] + step
[3], bd
);
1343 output
[1] = WRAPLOW(step
[1] + step
[2], bd
);
1344 output
[2] = WRAPLOW(step
[1] - step
[2], bd
);
1345 output
[3] = WRAPLOW(step
[0] - step
[3], bd
);
1348 void vpx_highbd_idct4x4_16_add_c(const tran_low_t
*input
, uint8_t *dest8
,
1349 int stride
, int bd
) {
1350 tran_low_t out
[4 * 4];
1351 tran_low_t
*outptr
= out
;
1353 tran_low_t temp_in
[4], temp_out
[4];
1354 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
1357 for (i
= 0; i
< 4; ++i
) {
1358 vpx_highbd_idct4_c(input
, outptr
, bd
);
1364 for (i
= 0; i
< 4; ++i
) {
1365 for (j
= 0; j
< 4; ++j
)
1366 temp_in
[j
] = out
[j
* 4 + i
];
1367 vpx_highbd_idct4_c(temp_in
, temp_out
, bd
);
1368 for (j
= 0; j
< 4; ++j
) {
1369 dest
[j
* stride
+ i
] = highbd_clip_pixel_add(
1370 dest
[j
* stride
+ i
], ROUND_POWER_OF_TWO(temp_out
[j
], 4), bd
);
1375 void vpx_highbd_idct4x4_1_add_c(const tran_low_t
*input
, uint8_t *dest8
,
1376 int dest_stride
, int bd
) {
1379 tran_low_t out
= WRAPLOW(
1380 highbd_dct_const_round_shift(input
[0] * cospi_16_64
, bd
), bd
);
1381 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
1383 out
= WRAPLOW(highbd_dct_const_round_shift(out
* cospi_16_64
, bd
), bd
);
1384 a1
= ROUND_POWER_OF_TWO(out
, 4);
1386 for (i
= 0; i
< 4; i
++) {
1387 dest
[0] = highbd_clip_pixel_add(dest
[0], a1
, bd
);
1388 dest
[1] = highbd_clip_pixel_add(dest
[1], a1
, bd
);
1389 dest
[2] = highbd_clip_pixel_add(dest
[2], a1
, bd
);
1390 dest
[3] = highbd_clip_pixel_add(dest
[3], a1
, bd
);
1391 dest
+= dest_stride
;
1395 void vpx_highbd_idct8_c(const tran_low_t
*input
, tran_low_t
*output
, int bd
) {
1396 tran_low_t step1
[8], step2
[8];
1397 tran_high_t temp1
, temp2
;
1399 step1
[0] = input
[0];
1400 step1
[2] = input
[4];
1401 step1
[1] = input
[2];
1402 step1
[3] = input
[6];
1403 temp1
= input
[1] * cospi_28_64
- input
[7] * cospi_4_64
;
1404 temp2
= input
[1] * cospi_4_64
+ input
[7] * cospi_28_64
;
1405 step1
[4] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1406 step1
[7] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1407 temp1
= input
[5] * cospi_12_64
- input
[3] * cospi_20_64
;
1408 temp2
= input
[5] * cospi_20_64
+ input
[3] * cospi_12_64
;
1409 step1
[5] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1410 step1
[6] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1412 // stage 2 & stage 3 - even half
1413 vpx_highbd_idct4_c(step1
, step1
, bd
);
1415 // stage 2 - odd half
1416 step2
[4] = WRAPLOW(step1
[4] + step1
[5], bd
);
1417 step2
[5] = WRAPLOW(step1
[4] - step1
[5], bd
);
1418 step2
[6] = WRAPLOW(-step1
[6] + step1
[7], bd
);
1419 step2
[7] = WRAPLOW(step1
[6] + step1
[7], bd
);
1421 // stage 3 - odd half
1422 step1
[4] = step2
[4];
1423 temp1
= (step2
[6] - step2
[5]) * cospi_16_64
;
1424 temp2
= (step2
[5] + step2
[6]) * cospi_16_64
;
1425 step1
[5] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1426 step1
[6] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1427 step1
[7] = step2
[7];
1430 output
[0] = WRAPLOW(step1
[0] + step1
[7], bd
);
1431 output
[1] = WRAPLOW(step1
[1] + step1
[6], bd
);
1432 output
[2] = WRAPLOW(step1
[2] + step1
[5], bd
);
1433 output
[3] = WRAPLOW(step1
[3] + step1
[4], bd
);
1434 output
[4] = WRAPLOW(step1
[3] - step1
[4], bd
);
1435 output
[5] = WRAPLOW(step1
[2] - step1
[5], bd
);
1436 output
[6] = WRAPLOW(step1
[1] - step1
[6], bd
);
1437 output
[7] = WRAPLOW(step1
[0] - step1
[7], bd
);
1440 void vpx_highbd_idct8x8_64_add_c(const tran_low_t
*input
, uint8_t *dest8
,
1441 int stride
, int bd
) {
1442 tran_low_t out
[8 * 8];
1443 tran_low_t
*outptr
= out
;
1445 tran_low_t temp_in
[8], temp_out
[8];
1446 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
1448 // First transform rows.
1449 for (i
= 0; i
< 8; ++i
) {
1450 vpx_highbd_idct8_c(input
, outptr
, bd
);
1455 // Then transform columns.
1456 for (i
= 0; i
< 8; ++i
) {
1457 for (j
= 0; j
< 8; ++j
)
1458 temp_in
[j
] = out
[j
* 8 + i
];
1459 vpx_highbd_idct8_c(temp_in
, temp_out
, bd
);
1460 for (j
= 0; j
< 8; ++j
) {
1461 dest
[j
* stride
+ i
] = highbd_clip_pixel_add(
1462 dest
[j
* stride
+ i
], ROUND_POWER_OF_TWO(temp_out
[j
], 5), bd
);
1467 void vpx_highbd_idct8x8_1_add_c(const tran_low_t
*input
, uint8_t *dest8
,
1468 int stride
, int bd
) {
1471 tran_low_t out
= WRAPLOW(
1472 highbd_dct_const_round_shift(input
[0] * cospi_16_64
, bd
), bd
);
1473 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
1474 out
= WRAPLOW(highbd_dct_const_round_shift(out
* cospi_16_64
, bd
), bd
);
1475 a1
= ROUND_POWER_OF_TWO(out
, 5);
1476 for (j
= 0; j
< 8; ++j
) {
1477 for (i
= 0; i
< 8; ++i
)
1478 dest
[i
] = highbd_clip_pixel_add(dest
[i
], a1
, bd
);
1483 void vpx_highbd_iadst4_c(const tran_low_t
*input
, tran_low_t
*output
, int bd
) {
1484 tran_high_t s0
, s1
, s2
, s3
, s4
, s5
, s6
, s7
;
1486 tran_low_t x0
= input
[0];
1487 tran_low_t x1
= input
[1];
1488 tran_low_t x2
= input
[2];
1489 tran_low_t x3
= input
[3];
1492 if (!(x0
| x1
| x2
| x3
)) {
1493 memset(output
, 0, 4 * sizeof(*output
));
1497 s0
= sinpi_1_9
* x0
;
1498 s1
= sinpi_2_9
* x0
;
1499 s2
= sinpi_3_9
* x1
;
1500 s3
= sinpi_4_9
* x2
;
1501 s4
= sinpi_1_9
* x2
;
1502 s5
= sinpi_2_9
* x3
;
1503 s6
= sinpi_4_9
* x3
;
1504 s7
= (tran_high_t
)(x0
- x2
+ x3
);
1509 s2
= sinpi_3_9
* s7
;
1511 // 1-D transform scaling factor is sqrt(2).
1512 // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
1513 // + 1b (addition) = 29b.
1514 // Hence the output bit depth is 15b.
1515 output
[0] = WRAPLOW(highbd_dct_const_round_shift(s0
+ s3
, bd
), bd
);
1516 output
[1] = WRAPLOW(highbd_dct_const_round_shift(s1
+ s3
, bd
), bd
);
1517 output
[2] = WRAPLOW(highbd_dct_const_round_shift(s2
, bd
), bd
);
1518 output
[3] = WRAPLOW(highbd_dct_const_round_shift(s0
+ s1
- s3
, bd
), bd
);
1521 void vpx_highbd_iadst8_c(const tran_low_t
*input
, tran_low_t
*output
, int bd
) {
1522 tran_high_t s0
, s1
, s2
, s3
, s4
, s5
, s6
, s7
;
1524 tran_low_t x0
= input
[7];
1525 tran_low_t x1
= input
[0];
1526 tran_low_t x2
= input
[5];
1527 tran_low_t x3
= input
[2];
1528 tran_low_t x4
= input
[3];
1529 tran_low_t x5
= input
[4];
1530 tran_low_t x6
= input
[1];
1531 tran_low_t x7
= input
[6];
1534 if (!(x0
| x1
| x2
| x3
| x4
| x5
| x6
| x7
)) {
1535 memset(output
, 0, 8 * sizeof(*output
));
1540 s0
= cospi_2_64
* x0
+ cospi_30_64
* x1
;
1541 s1
= cospi_30_64
* x0
- cospi_2_64
* x1
;
1542 s2
= cospi_10_64
* x2
+ cospi_22_64
* x3
;
1543 s3
= cospi_22_64
* x2
- cospi_10_64
* x3
;
1544 s4
= cospi_18_64
* x4
+ cospi_14_64
* x5
;
1545 s5
= cospi_14_64
* x4
- cospi_18_64
* x5
;
1546 s6
= cospi_26_64
* x6
+ cospi_6_64
* x7
;
1547 s7
= cospi_6_64
* x6
- cospi_26_64
* x7
;
1549 x0
= WRAPLOW(highbd_dct_const_round_shift(s0
+ s4
, bd
), bd
);
1550 x1
= WRAPLOW(highbd_dct_const_round_shift(s1
+ s5
, bd
), bd
);
1551 x2
= WRAPLOW(highbd_dct_const_round_shift(s2
+ s6
, bd
), bd
);
1552 x3
= WRAPLOW(highbd_dct_const_round_shift(s3
+ s7
, bd
), bd
);
1553 x4
= WRAPLOW(highbd_dct_const_round_shift(s0
- s4
, bd
), bd
);
1554 x5
= WRAPLOW(highbd_dct_const_round_shift(s1
- s5
, bd
), bd
);
1555 x6
= WRAPLOW(highbd_dct_const_round_shift(s2
- s6
, bd
), bd
);
1556 x7
= WRAPLOW(highbd_dct_const_round_shift(s3
- s7
, bd
), bd
);
1563 s4
= cospi_8_64
* x4
+ cospi_24_64
* x5
;
1564 s5
= cospi_24_64
* x4
- cospi_8_64
* x5
;
1565 s6
= -cospi_24_64
* x6
+ cospi_8_64
* x7
;
1566 s7
= cospi_8_64
* x6
+ cospi_24_64
* x7
;
1568 x0
= WRAPLOW(s0
+ s2
, bd
);
1569 x1
= WRAPLOW(s1
+ s3
, bd
);
1570 x2
= WRAPLOW(s0
- s2
, bd
);
1571 x3
= WRAPLOW(s1
- s3
, bd
);
1572 x4
= WRAPLOW(highbd_dct_const_round_shift(s4
+ s6
, bd
), bd
);
1573 x5
= WRAPLOW(highbd_dct_const_round_shift(s5
+ s7
, bd
), bd
);
1574 x6
= WRAPLOW(highbd_dct_const_round_shift(s4
- s6
, bd
), bd
);
1575 x7
= WRAPLOW(highbd_dct_const_round_shift(s5
- s7
, bd
), bd
);
1578 s2
= cospi_16_64
* (x2
+ x3
);
1579 s3
= cospi_16_64
* (x2
- x3
);
1580 s6
= cospi_16_64
* (x6
+ x7
);
1581 s7
= cospi_16_64
* (x6
- x7
);
1583 x2
= WRAPLOW(highbd_dct_const_round_shift(s2
, bd
), bd
);
1584 x3
= WRAPLOW(highbd_dct_const_round_shift(s3
, bd
), bd
);
1585 x6
= WRAPLOW(highbd_dct_const_round_shift(s6
, bd
), bd
);
1586 x7
= WRAPLOW(highbd_dct_const_round_shift(s7
, bd
), bd
);
1588 output
[0] = WRAPLOW(x0
, bd
);
1589 output
[1] = WRAPLOW(-x4
, bd
);
1590 output
[2] = WRAPLOW(x6
, bd
);
1591 output
[3] = WRAPLOW(-x2
, bd
);
1592 output
[4] = WRAPLOW(x3
, bd
);
1593 output
[5] = WRAPLOW(-x7
, bd
);
1594 output
[6] = WRAPLOW(x5
, bd
);
1595 output
[7] = WRAPLOW(-x1
, bd
);
1598 void vpx_highbd_idct8x8_10_add_c(const tran_low_t
*input
, uint8_t *dest8
,
1599 int stride
, int bd
) {
1600 tran_low_t out
[8 * 8] = { 0 };
1601 tran_low_t
*outptr
= out
;
1603 tran_low_t temp_in
[8], temp_out
[8];
1604 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
1606 // First transform rows.
1607 // Only first 4 row has non-zero coefs.
1608 for (i
= 0; i
< 4; ++i
) {
1609 vpx_highbd_idct8_c(input
, outptr
, bd
);
1613 // Then transform columns.
1614 for (i
= 0; i
< 8; ++i
) {
1615 for (j
= 0; j
< 8; ++j
)
1616 temp_in
[j
] = out
[j
* 8 + i
];
1617 vpx_highbd_idct8_c(temp_in
, temp_out
, bd
);
1618 for (j
= 0; j
< 8; ++j
) {
1619 dest
[j
* stride
+ i
] = highbd_clip_pixel_add(
1620 dest
[j
* stride
+ i
], ROUND_POWER_OF_TWO(temp_out
[j
], 5), bd
);
1625 void vpx_highbd_idct16_c(const tran_low_t
*input
, tran_low_t
*output
, int bd
) {
1626 tran_low_t step1
[16], step2
[16];
1627 tran_high_t temp1
, temp2
;
1631 step1
[0] = input
[0/2];
1632 step1
[1] = input
[16/2];
1633 step1
[2] = input
[8/2];
1634 step1
[3] = input
[24/2];
1635 step1
[4] = input
[4/2];
1636 step1
[5] = input
[20/2];
1637 step1
[6] = input
[12/2];
1638 step1
[7] = input
[28/2];
1639 step1
[8] = input
[2/2];
1640 step1
[9] = input
[18/2];
1641 step1
[10] = input
[10/2];
1642 step1
[11] = input
[26/2];
1643 step1
[12] = input
[6/2];
1644 step1
[13] = input
[22/2];
1645 step1
[14] = input
[14/2];
1646 step1
[15] = input
[30/2];
1649 step2
[0] = step1
[0];
1650 step2
[1] = step1
[1];
1651 step2
[2] = step1
[2];
1652 step2
[3] = step1
[3];
1653 step2
[4] = step1
[4];
1654 step2
[5] = step1
[5];
1655 step2
[6] = step1
[6];
1656 step2
[7] = step1
[7];
1658 temp1
= step1
[8] * cospi_30_64
- step1
[15] * cospi_2_64
;
1659 temp2
= step1
[8] * cospi_2_64
+ step1
[15] * cospi_30_64
;
1660 step2
[8] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1661 step2
[15] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1663 temp1
= step1
[9] * cospi_14_64
- step1
[14] * cospi_18_64
;
1664 temp2
= step1
[9] * cospi_18_64
+ step1
[14] * cospi_14_64
;
1665 step2
[9] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1666 step2
[14] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1668 temp1
= step1
[10] * cospi_22_64
- step1
[13] * cospi_10_64
;
1669 temp2
= step1
[10] * cospi_10_64
+ step1
[13] * cospi_22_64
;
1670 step2
[10] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1671 step2
[13] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1673 temp1
= step1
[11] * cospi_6_64
- step1
[12] * cospi_26_64
;
1674 temp2
= step1
[11] * cospi_26_64
+ step1
[12] * cospi_6_64
;
1675 step2
[11] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1676 step2
[12] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1679 step1
[0] = step2
[0];
1680 step1
[1] = step2
[1];
1681 step1
[2] = step2
[2];
1682 step1
[3] = step2
[3];
1684 temp1
= step2
[4] * cospi_28_64
- step2
[7] * cospi_4_64
;
1685 temp2
= step2
[4] * cospi_4_64
+ step2
[7] * cospi_28_64
;
1686 step1
[4] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1687 step1
[7] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1688 temp1
= step2
[5] * cospi_12_64
- step2
[6] * cospi_20_64
;
1689 temp2
= step2
[5] * cospi_20_64
+ step2
[6] * cospi_12_64
;
1690 step1
[5] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1691 step1
[6] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1693 step1
[8] = WRAPLOW(step2
[8] + step2
[9], bd
);
1694 step1
[9] = WRAPLOW(step2
[8] - step2
[9], bd
);
1695 step1
[10] = WRAPLOW(-step2
[10] + step2
[11], bd
);
1696 step1
[11] = WRAPLOW(step2
[10] + step2
[11], bd
);
1697 step1
[12] = WRAPLOW(step2
[12] + step2
[13], bd
);
1698 step1
[13] = WRAPLOW(step2
[12] - step2
[13], bd
);
1699 step1
[14] = WRAPLOW(-step2
[14] + step2
[15], bd
);
1700 step1
[15] = WRAPLOW(step2
[14] + step2
[15], bd
);
1703 temp1
= (step1
[0] + step1
[1]) * cospi_16_64
;
1704 temp2
= (step1
[0] - step1
[1]) * cospi_16_64
;
1705 step2
[0] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1706 step2
[1] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1707 temp1
= step1
[2] * cospi_24_64
- step1
[3] * cospi_8_64
;
1708 temp2
= step1
[2] * cospi_8_64
+ step1
[3] * cospi_24_64
;
1709 step2
[2] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1710 step2
[3] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1711 step2
[4] = WRAPLOW(step1
[4] + step1
[5], bd
);
1712 step2
[5] = WRAPLOW(step1
[4] - step1
[5], bd
);
1713 step2
[6] = WRAPLOW(-step1
[6] + step1
[7], bd
);
1714 step2
[7] = WRAPLOW(step1
[6] + step1
[7], bd
);
1716 step2
[8] = step1
[8];
1717 step2
[15] = step1
[15];
1718 temp1
= -step1
[9] * cospi_8_64
+ step1
[14] * cospi_24_64
;
1719 temp2
= step1
[9] * cospi_24_64
+ step1
[14] * cospi_8_64
;
1720 step2
[9] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1721 step2
[14] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1722 temp1
= -step1
[10] * cospi_24_64
- step1
[13] * cospi_8_64
;
1723 temp2
= -step1
[10] * cospi_8_64
+ step1
[13] * cospi_24_64
;
1724 step2
[10] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1725 step2
[13] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1726 step2
[11] = step1
[11];
1727 step2
[12] = step1
[12];
1730 step1
[0] = WRAPLOW(step2
[0] + step2
[3], bd
);
1731 step1
[1] = WRAPLOW(step2
[1] + step2
[2], bd
);
1732 step1
[2] = WRAPLOW(step2
[1] - step2
[2], bd
);
1733 step1
[3] = WRAPLOW(step2
[0] - step2
[3], bd
);
1734 step1
[4] = step2
[4];
1735 temp1
= (step2
[6] - step2
[5]) * cospi_16_64
;
1736 temp2
= (step2
[5] + step2
[6]) * cospi_16_64
;
1737 step1
[5] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1738 step1
[6] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1739 step1
[7] = step2
[7];
1741 step1
[8] = WRAPLOW(step2
[8] + step2
[11], bd
);
1742 step1
[9] = WRAPLOW(step2
[9] + step2
[10], bd
);
1743 step1
[10] = WRAPLOW(step2
[9] - step2
[10], bd
);
1744 step1
[11] = WRAPLOW(step2
[8] - step2
[11], bd
);
1745 step1
[12] = WRAPLOW(-step2
[12] + step2
[15], bd
);
1746 step1
[13] = WRAPLOW(-step2
[13] + step2
[14], bd
);
1747 step1
[14] = WRAPLOW(step2
[13] + step2
[14], bd
);
1748 step1
[15] = WRAPLOW(step2
[12] + step2
[15], bd
);
1751 step2
[0] = WRAPLOW(step1
[0] + step1
[7], bd
);
1752 step2
[1] = WRAPLOW(step1
[1] + step1
[6], bd
);
1753 step2
[2] = WRAPLOW(step1
[2] + step1
[5], bd
);
1754 step2
[3] = WRAPLOW(step1
[3] + step1
[4], bd
);
1755 step2
[4] = WRAPLOW(step1
[3] - step1
[4], bd
);
1756 step2
[5] = WRAPLOW(step1
[2] - step1
[5], bd
);
1757 step2
[6] = WRAPLOW(step1
[1] - step1
[6], bd
);
1758 step2
[7] = WRAPLOW(step1
[0] - step1
[7], bd
);
1759 step2
[8] = step1
[8];
1760 step2
[9] = step1
[9];
1761 temp1
= (-step1
[10] + step1
[13]) * cospi_16_64
;
1762 temp2
= (step1
[10] + step1
[13]) * cospi_16_64
;
1763 step2
[10] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1764 step2
[13] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1765 temp1
= (-step1
[11] + step1
[12]) * cospi_16_64
;
1766 temp2
= (step1
[11] + step1
[12]) * cospi_16_64
;
1767 step2
[11] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
1768 step2
[12] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
1769 step2
[14] = step1
[14];
1770 step2
[15] = step1
[15];
1773 output
[0] = WRAPLOW(step2
[0] + step2
[15], bd
);
1774 output
[1] = WRAPLOW(step2
[1] + step2
[14], bd
);
1775 output
[2] = WRAPLOW(step2
[2] + step2
[13], bd
);
1776 output
[3] = WRAPLOW(step2
[3] + step2
[12], bd
);
1777 output
[4] = WRAPLOW(step2
[4] + step2
[11], bd
);
1778 output
[5] = WRAPLOW(step2
[5] + step2
[10], bd
);
1779 output
[6] = WRAPLOW(step2
[6] + step2
[9], bd
);
1780 output
[7] = WRAPLOW(step2
[7] + step2
[8], bd
);
1781 output
[8] = WRAPLOW(step2
[7] - step2
[8], bd
);
1782 output
[9] = WRAPLOW(step2
[6] - step2
[9], bd
);
1783 output
[10] = WRAPLOW(step2
[5] - step2
[10], bd
);
1784 output
[11] = WRAPLOW(step2
[4] - step2
[11], bd
);
1785 output
[12] = WRAPLOW(step2
[3] - step2
[12], bd
);
1786 output
[13] = WRAPLOW(step2
[2] - step2
[13], bd
);
1787 output
[14] = WRAPLOW(step2
[1] - step2
[14], bd
);
1788 output
[15] = WRAPLOW(step2
[0] - step2
[15], bd
);
1791 void vpx_highbd_idct16x16_256_add_c(const tran_low_t
*input
, uint8_t *dest8
,
1792 int stride
, int bd
) {
1793 tran_low_t out
[16 * 16];
1794 tran_low_t
*outptr
= out
;
1796 tran_low_t temp_in
[16], temp_out
[16];
1797 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
1799 // First transform rows.
1800 for (i
= 0; i
< 16; ++i
) {
1801 vpx_highbd_idct16_c(input
, outptr
, bd
);
1806 // Then transform columns.
1807 for (i
= 0; i
< 16; ++i
) {
1808 for (j
= 0; j
< 16; ++j
)
1809 temp_in
[j
] = out
[j
* 16 + i
];
1810 vpx_highbd_idct16_c(temp_in
, temp_out
, bd
);
1811 for (j
= 0; j
< 16; ++j
) {
1812 dest
[j
* stride
+ i
] = highbd_clip_pixel_add(
1813 dest
[j
* stride
+ i
], ROUND_POWER_OF_TWO(temp_out
[j
], 6), bd
);
1818 void vpx_highbd_iadst16_c(const tran_low_t
*input
, tran_low_t
*output
, int bd
) {
1819 tran_high_t s0
, s1
, s2
, s3
, s4
, s5
, s6
, s7
, s8
;
1820 tran_high_t s9
, s10
, s11
, s12
, s13
, s14
, s15
;
1822 tran_low_t x0
= input
[15];
1823 tran_low_t x1
= input
[0];
1824 tran_low_t x2
= input
[13];
1825 tran_low_t x3
= input
[2];
1826 tran_low_t x4
= input
[11];
1827 tran_low_t x5
= input
[4];
1828 tran_low_t x6
= input
[9];
1829 tran_low_t x7
= input
[6];
1830 tran_low_t x8
= input
[7];
1831 tran_low_t x9
= input
[8];
1832 tran_low_t x10
= input
[5];
1833 tran_low_t x11
= input
[10];
1834 tran_low_t x12
= input
[3];
1835 tran_low_t x13
= input
[12];
1836 tran_low_t x14
= input
[1];
1837 tran_low_t x15
= input
[14];
1840 if (!(x0
| x1
| x2
| x3
| x4
| x5
| x6
| x7
| x8
1841 | x9
| x10
| x11
| x12
| x13
| x14
| x15
)) {
1842 memset(output
, 0, 16 * sizeof(*output
));
1847 s0
= x0
* cospi_1_64
+ x1
* cospi_31_64
;
1848 s1
= x0
* cospi_31_64
- x1
* cospi_1_64
;
1849 s2
= x2
* cospi_5_64
+ x3
* cospi_27_64
;
1850 s3
= x2
* cospi_27_64
- x3
* cospi_5_64
;
1851 s4
= x4
* cospi_9_64
+ x5
* cospi_23_64
;
1852 s5
= x4
* cospi_23_64
- x5
* cospi_9_64
;
1853 s6
= x6
* cospi_13_64
+ x7
* cospi_19_64
;
1854 s7
= x6
* cospi_19_64
- x7
* cospi_13_64
;
1855 s8
= x8
* cospi_17_64
+ x9
* cospi_15_64
;
1856 s9
= x8
* cospi_15_64
- x9
* cospi_17_64
;
1857 s10
= x10
* cospi_21_64
+ x11
* cospi_11_64
;
1858 s11
= x10
* cospi_11_64
- x11
* cospi_21_64
;
1859 s12
= x12
* cospi_25_64
+ x13
* cospi_7_64
;
1860 s13
= x12
* cospi_7_64
- x13
* cospi_25_64
;
1861 s14
= x14
* cospi_29_64
+ x15
* cospi_3_64
;
1862 s15
= x14
* cospi_3_64
- x15
* cospi_29_64
;
1864 x0
= WRAPLOW(highbd_dct_const_round_shift(s0
+ s8
, bd
), bd
);
1865 x1
= WRAPLOW(highbd_dct_const_round_shift(s1
+ s9
, bd
), bd
);
1866 x2
= WRAPLOW(highbd_dct_const_round_shift(s2
+ s10
, bd
), bd
);
1867 x3
= WRAPLOW(highbd_dct_const_round_shift(s3
+ s11
, bd
), bd
);
1868 x4
= WRAPLOW(highbd_dct_const_round_shift(s4
+ s12
, bd
), bd
);
1869 x5
= WRAPLOW(highbd_dct_const_round_shift(s5
+ s13
, bd
), bd
);
1870 x6
= WRAPLOW(highbd_dct_const_round_shift(s6
+ s14
, bd
), bd
);
1871 x7
= WRAPLOW(highbd_dct_const_round_shift(s7
+ s15
, bd
), bd
);
1872 x8
= WRAPLOW(highbd_dct_const_round_shift(s0
- s8
, bd
), bd
);
1873 x9
= WRAPLOW(highbd_dct_const_round_shift(s1
- s9
, bd
), bd
);
1874 x10
= WRAPLOW(highbd_dct_const_round_shift(s2
- s10
, bd
), bd
);
1875 x11
= WRAPLOW(highbd_dct_const_round_shift(s3
- s11
, bd
), bd
);
1876 x12
= WRAPLOW(highbd_dct_const_round_shift(s4
- s12
, bd
), bd
);
1877 x13
= WRAPLOW(highbd_dct_const_round_shift(s5
- s13
, bd
), bd
);
1878 x14
= WRAPLOW(highbd_dct_const_round_shift(s6
- s14
, bd
), bd
);
1879 x15
= WRAPLOW(highbd_dct_const_round_shift(s7
- s15
, bd
), bd
);
1890 s8
= x8
* cospi_4_64
+ x9
* cospi_28_64
;
1891 s9
= x8
* cospi_28_64
- x9
* cospi_4_64
;
1892 s10
= x10
* cospi_20_64
+ x11
* cospi_12_64
;
1893 s11
= x10
* cospi_12_64
- x11
* cospi_20_64
;
1894 s12
= -x12
* cospi_28_64
+ x13
* cospi_4_64
;
1895 s13
= x12
* cospi_4_64
+ x13
* cospi_28_64
;
1896 s14
= -x14
* cospi_12_64
+ x15
* cospi_20_64
;
1897 s15
= x14
* cospi_20_64
+ x15
* cospi_12_64
;
1899 x0
= WRAPLOW(s0
+ s4
, bd
);
1900 x1
= WRAPLOW(s1
+ s5
, bd
);
1901 x2
= WRAPLOW(s2
+ s6
, bd
);
1902 x3
= WRAPLOW(s3
+ s7
, bd
);
1903 x4
= WRAPLOW(s0
- s4
, bd
);
1904 x5
= WRAPLOW(s1
- s5
, bd
);
1905 x6
= WRAPLOW(s2
- s6
, bd
);
1906 x7
= WRAPLOW(s3
- s7
, bd
);
1907 x8
= WRAPLOW(highbd_dct_const_round_shift(s8
+ s12
, bd
), bd
);
1908 x9
= WRAPLOW(highbd_dct_const_round_shift(s9
+ s13
, bd
), bd
);
1909 x10
= WRAPLOW(highbd_dct_const_round_shift(s10
+ s14
, bd
), bd
);
1910 x11
= WRAPLOW(highbd_dct_const_round_shift(s11
+ s15
, bd
), bd
);
1911 x12
= WRAPLOW(highbd_dct_const_round_shift(s8
- s12
, bd
), bd
);
1912 x13
= WRAPLOW(highbd_dct_const_round_shift(s9
- s13
, bd
), bd
);
1913 x14
= WRAPLOW(highbd_dct_const_round_shift(s10
- s14
, bd
), bd
);
1914 x15
= WRAPLOW(highbd_dct_const_round_shift(s11
- s15
, bd
), bd
);
1921 s4
= x4
* cospi_8_64
+ x5
* cospi_24_64
;
1922 s5
= x4
* cospi_24_64
- x5
* cospi_8_64
;
1923 s6
= -x6
* cospi_24_64
+ x7
* cospi_8_64
;
1924 s7
= x6
* cospi_8_64
+ x7
* cospi_24_64
;
1929 s12
= x12
* cospi_8_64
+ x13
* cospi_24_64
;
1930 s13
= x12
* cospi_24_64
- x13
* cospi_8_64
;
1931 s14
= -x14
* cospi_24_64
+ x15
* cospi_8_64
;
1932 s15
= x14
* cospi_8_64
+ x15
* cospi_24_64
;
1934 x0
= WRAPLOW(s0
+ s2
, bd
);
1935 x1
= WRAPLOW(s1
+ s3
, bd
);
1936 x2
= WRAPLOW(s0
- s2
, bd
);
1937 x3
= WRAPLOW(s1
- s3
, bd
);
1938 x4
= WRAPLOW(highbd_dct_const_round_shift(s4
+ s6
, bd
), bd
);
1939 x5
= WRAPLOW(highbd_dct_const_round_shift(s5
+ s7
, bd
), bd
);
1940 x6
= WRAPLOW(highbd_dct_const_round_shift(s4
- s6
, bd
), bd
);
1941 x7
= WRAPLOW(highbd_dct_const_round_shift(s5
- s7
, bd
), bd
);
1942 x8
= WRAPLOW(s8
+ s10
, bd
);
1943 x9
= WRAPLOW(s9
+ s11
, bd
);
1944 x10
= WRAPLOW(s8
- s10
, bd
);
1945 x11
= WRAPLOW(s9
- s11
, bd
);
1946 x12
= WRAPLOW(highbd_dct_const_round_shift(s12
+ s14
, bd
), bd
);
1947 x13
= WRAPLOW(highbd_dct_const_round_shift(s13
+ s15
, bd
), bd
);
1948 x14
= WRAPLOW(highbd_dct_const_round_shift(s12
- s14
, bd
), bd
);
1949 x15
= WRAPLOW(highbd_dct_const_round_shift(s13
- s15
, bd
), bd
);
1952 s2
= (- cospi_16_64
) * (x2
+ x3
);
1953 s3
= cospi_16_64
* (x2
- x3
);
1954 s6
= cospi_16_64
* (x6
+ x7
);
1955 s7
= cospi_16_64
* (-x6
+ x7
);
1956 s10
= cospi_16_64
* (x10
+ x11
);
1957 s11
= cospi_16_64
* (-x10
+ x11
);
1958 s14
= (- cospi_16_64
) * (x14
+ x15
);
1959 s15
= cospi_16_64
* (x14
- x15
);
1961 x2
= WRAPLOW(highbd_dct_const_round_shift(s2
, bd
), bd
);
1962 x3
= WRAPLOW(highbd_dct_const_round_shift(s3
, bd
), bd
);
1963 x6
= WRAPLOW(highbd_dct_const_round_shift(s6
, bd
), bd
);
1964 x7
= WRAPLOW(highbd_dct_const_round_shift(s7
, bd
), bd
);
1965 x10
= WRAPLOW(highbd_dct_const_round_shift(s10
, bd
), bd
);
1966 x11
= WRAPLOW(highbd_dct_const_round_shift(s11
, bd
), bd
);
1967 x14
= WRAPLOW(highbd_dct_const_round_shift(s14
, bd
), bd
);
1968 x15
= WRAPLOW(highbd_dct_const_round_shift(s15
, bd
), bd
);
1970 output
[0] = WRAPLOW(x0
, bd
);
1971 output
[1] = WRAPLOW(-x8
, bd
);
1972 output
[2] = WRAPLOW(x12
, bd
);
1973 output
[3] = WRAPLOW(-x4
, bd
);
1974 output
[4] = WRAPLOW(x6
, bd
);
1975 output
[5] = WRAPLOW(x14
, bd
);
1976 output
[6] = WRAPLOW(x10
, bd
);
1977 output
[7] = WRAPLOW(x2
, bd
);
1978 output
[8] = WRAPLOW(x3
, bd
);
1979 output
[9] = WRAPLOW(x11
, bd
);
1980 output
[10] = WRAPLOW(x15
, bd
);
1981 output
[11] = WRAPLOW(x7
, bd
);
1982 output
[12] = WRAPLOW(x5
, bd
);
1983 output
[13] = WRAPLOW(-x13
, bd
);
1984 output
[14] = WRAPLOW(x9
, bd
);
1985 output
[15] = WRAPLOW(-x1
, bd
);
1988 void vpx_highbd_idct16x16_10_add_c(const tran_low_t
*input
, uint8_t *dest8
,
1989 int stride
, int bd
) {
1990 tran_low_t out
[16 * 16] = { 0 };
1991 tran_low_t
*outptr
= out
;
1993 tran_low_t temp_in
[16], temp_out
[16];
1994 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
1996 // First transform rows. Since all non-zero dct coefficients are in
1997 // upper-left 4x4 area, we only need to calculate first 4 rows here.
1998 for (i
= 0; i
< 4; ++i
) {
1999 vpx_highbd_idct16_c(input
, outptr
, bd
);
2004 // Then transform columns.
2005 for (i
= 0; i
< 16; ++i
) {
2006 for (j
= 0; j
< 16; ++j
)
2007 temp_in
[j
] = out
[j
*16 + i
];
2008 vpx_highbd_idct16_c(temp_in
, temp_out
, bd
);
2009 for (j
= 0; j
< 16; ++j
) {
2010 dest
[j
* stride
+ i
] = highbd_clip_pixel_add(
2011 dest
[j
* stride
+ i
], ROUND_POWER_OF_TWO(temp_out
[j
], 6), bd
);
2016 void vpx_highbd_idct16x16_1_add_c(const tran_low_t
*input
, uint8_t *dest8
,
2017 int stride
, int bd
) {
2020 tran_low_t out
= WRAPLOW(
2021 highbd_dct_const_round_shift(input
[0] * cospi_16_64
, bd
), bd
);
2022 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
2024 out
= WRAPLOW(highbd_dct_const_round_shift(out
* cospi_16_64
, bd
), bd
);
2025 a1
= ROUND_POWER_OF_TWO(out
, 6);
2026 for (j
= 0; j
< 16; ++j
) {
2027 for (i
= 0; i
< 16; ++i
)
2028 dest
[i
] = highbd_clip_pixel_add(dest
[i
], a1
, bd
);
2033 static void highbd_idct32_c(const tran_low_t
*input
,
2034 tran_low_t
*output
, int bd
) {
2035 tran_low_t step1
[32], step2
[32];
2036 tran_high_t temp1
, temp2
;
2040 step1
[0] = input
[0];
2041 step1
[1] = input
[16];
2042 step1
[2] = input
[8];
2043 step1
[3] = input
[24];
2044 step1
[4] = input
[4];
2045 step1
[5] = input
[20];
2046 step1
[6] = input
[12];
2047 step1
[7] = input
[28];
2048 step1
[8] = input
[2];
2049 step1
[9] = input
[18];
2050 step1
[10] = input
[10];
2051 step1
[11] = input
[26];
2052 step1
[12] = input
[6];
2053 step1
[13] = input
[22];
2054 step1
[14] = input
[14];
2055 step1
[15] = input
[30];
2057 temp1
= input
[1] * cospi_31_64
- input
[31] * cospi_1_64
;
2058 temp2
= input
[1] * cospi_1_64
+ input
[31] * cospi_31_64
;
2059 step1
[16] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2060 step1
[31] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2062 temp1
= input
[17] * cospi_15_64
- input
[15] * cospi_17_64
;
2063 temp2
= input
[17] * cospi_17_64
+ input
[15] * cospi_15_64
;
2064 step1
[17] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2065 step1
[30] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2067 temp1
= input
[9] * cospi_23_64
- input
[23] * cospi_9_64
;
2068 temp2
= input
[9] * cospi_9_64
+ input
[23] * cospi_23_64
;
2069 step1
[18] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2070 step1
[29] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2072 temp1
= input
[25] * cospi_7_64
- input
[7] * cospi_25_64
;
2073 temp2
= input
[25] * cospi_25_64
+ input
[7] * cospi_7_64
;
2074 step1
[19] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2075 step1
[28] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2077 temp1
= input
[5] * cospi_27_64
- input
[27] * cospi_5_64
;
2078 temp2
= input
[5] * cospi_5_64
+ input
[27] * cospi_27_64
;
2079 step1
[20] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2080 step1
[27] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2082 temp1
= input
[21] * cospi_11_64
- input
[11] * cospi_21_64
;
2083 temp2
= input
[21] * cospi_21_64
+ input
[11] * cospi_11_64
;
2084 step1
[21] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2085 step1
[26] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2087 temp1
= input
[13] * cospi_19_64
- input
[19] * cospi_13_64
;
2088 temp2
= input
[13] * cospi_13_64
+ input
[19] * cospi_19_64
;
2089 step1
[22] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2090 step1
[25] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2092 temp1
= input
[29] * cospi_3_64
- input
[3] * cospi_29_64
;
2093 temp2
= input
[29] * cospi_29_64
+ input
[3] * cospi_3_64
;
2094 step1
[23] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2095 step1
[24] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2098 step2
[0] = step1
[0];
2099 step2
[1] = step1
[1];
2100 step2
[2] = step1
[2];
2101 step2
[3] = step1
[3];
2102 step2
[4] = step1
[4];
2103 step2
[5] = step1
[5];
2104 step2
[6] = step1
[6];
2105 step2
[7] = step1
[7];
2107 temp1
= step1
[8] * cospi_30_64
- step1
[15] * cospi_2_64
;
2108 temp2
= step1
[8] * cospi_2_64
+ step1
[15] * cospi_30_64
;
2109 step2
[8] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2110 step2
[15] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2112 temp1
= step1
[9] * cospi_14_64
- step1
[14] * cospi_18_64
;
2113 temp2
= step1
[9] * cospi_18_64
+ step1
[14] * cospi_14_64
;
2114 step2
[9] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2115 step2
[14] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2117 temp1
= step1
[10] * cospi_22_64
- step1
[13] * cospi_10_64
;
2118 temp2
= step1
[10] * cospi_10_64
+ step1
[13] * cospi_22_64
;
2119 step2
[10] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2120 step2
[13] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2122 temp1
= step1
[11] * cospi_6_64
- step1
[12] * cospi_26_64
;
2123 temp2
= step1
[11] * cospi_26_64
+ step1
[12] * cospi_6_64
;
2124 step2
[11] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2125 step2
[12] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2127 step2
[16] = WRAPLOW(step1
[16] + step1
[17], bd
);
2128 step2
[17] = WRAPLOW(step1
[16] - step1
[17], bd
);
2129 step2
[18] = WRAPLOW(-step1
[18] + step1
[19], bd
);
2130 step2
[19] = WRAPLOW(step1
[18] + step1
[19], bd
);
2131 step2
[20] = WRAPLOW(step1
[20] + step1
[21], bd
);
2132 step2
[21] = WRAPLOW(step1
[20] - step1
[21], bd
);
2133 step2
[22] = WRAPLOW(-step1
[22] + step1
[23], bd
);
2134 step2
[23] = WRAPLOW(step1
[22] + step1
[23], bd
);
2135 step2
[24] = WRAPLOW(step1
[24] + step1
[25], bd
);
2136 step2
[25] = WRAPLOW(step1
[24] - step1
[25], bd
);
2137 step2
[26] = WRAPLOW(-step1
[26] + step1
[27], bd
);
2138 step2
[27] = WRAPLOW(step1
[26] + step1
[27], bd
);
2139 step2
[28] = WRAPLOW(step1
[28] + step1
[29], bd
);
2140 step2
[29] = WRAPLOW(step1
[28] - step1
[29], bd
);
2141 step2
[30] = WRAPLOW(-step1
[30] + step1
[31], bd
);
2142 step2
[31] = WRAPLOW(step1
[30] + step1
[31], bd
);
2145 step1
[0] = step2
[0];
2146 step1
[1] = step2
[1];
2147 step1
[2] = step2
[2];
2148 step1
[3] = step2
[3];
2150 temp1
= step2
[4] * cospi_28_64
- step2
[7] * cospi_4_64
;
2151 temp2
= step2
[4] * cospi_4_64
+ step2
[7] * cospi_28_64
;
2152 step1
[4] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2153 step1
[7] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2154 temp1
= step2
[5] * cospi_12_64
- step2
[6] * cospi_20_64
;
2155 temp2
= step2
[5] * cospi_20_64
+ step2
[6] * cospi_12_64
;
2156 step1
[5] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2157 step1
[6] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2159 step1
[8] = WRAPLOW(step2
[8] + step2
[9], bd
);
2160 step1
[9] = WRAPLOW(step2
[8] - step2
[9], bd
);
2161 step1
[10] = WRAPLOW(-step2
[10] + step2
[11], bd
);
2162 step1
[11] = WRAPLOW(step2
[10] + step2
[11], bd
);
2163 step1
[12] = WRAPLOW(step2
[12] + step2
[13], bd
);
2164 step1
[13] = WRAPLOW(step2
[12] - step2
[13], bd
);
2165 step1
[14] = WRAPLOW(-step2
[14] + step2
[15], bd
);
2166 step1
[15] = WRAPLOW(step2
[14] + step2
[15], bd
);
2168 step1
[16] = step2
[16];
2169 step1
[31] = step2
[31];
2170 temp1
= -step2
[17] * cospi_4_64
+ step2
[30] * cospi_28_64
;
2171 temp2
= step2
[17] * cospi_28_64
+ step2
[30] * cospi_4_64
;
2172 step1
[17] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2173 step1
[30] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2174 temp1
= -step2
[18] * cospi_28_64
- step2
[29] * cospi_4_64
;
2175 temp2
= -step2
[18] * cospi_4_64
+ step2
[29] * cospi_28_64
;
2176 step1
[18] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2177 step1
[29] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2178 step1
[19] = step2
[19];
2179 step1
[20] = step2
[20];
2180 temp1
= -step2
[21] * cospi_20_64
+ step2
[26] * cospi_12_64
;
2181 temp2
= step2
[21] * cospi_12_64
+ step2
[26] * cospi_20_64
;
2182 step1
[21] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2183 step1
[26] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2184 temp1
= -step2
[22] * cospi_12_64
- step2
[25] * cospi_20_64
;
2185 temp2
= -step2
[22] * cospi_20_64
+ step2
[25] * cospi_12_64
;
2186 step1
[22] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2187 step1
[25] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2188 step1
[23] = step2
[23];
2189 step1
[24] = step2
[24];
2190 step1
[27] = step2
[27];
2191 step1
[28] = step2
[28];
2194 temp1
= (step1
[0] + step1
[1]) * cospi_16_64
;
2195 temp2
= (step1
[0] - step1
[1]) * cospi_16_64
;
2196 step2
[0] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2197 step2
[1] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2198 temp1
= step1
[2] * cospi_24_64
- step1
[3] * cospi_8_64
;
2199 temp2
= step1
[2] * cospi_8_64
+ step1
[3] * cospi_24_64
;
2200 step2
[2] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2201 step2
[3] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2202 step2
[4] = WRAPLOW(step1
[4] + step1
[5], bd
);
2203 step2
[5] = WRAPLOW(step1
[4] - step1
[5], bd
);
2204 step2
[6] = WRAPLOW(-step1
[6] + step1
[7], bd
);
2205 step2
[7] = WRAPLOW(step1
[6] + step1
[7], bd
);
2207 step2
[8] = step1
[8];
2208 step2
[15] = step1
[15];
2209 temp1
= -step1
[9] * cospi_8_64
+ step1
[14] * cospi_24_64
;
2210 temp2
= step1
[9] * cospi_24_64
+ step1
[14] * cospi_8_64
;
2211 step2
[9] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2212 step2
[14] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2213 temp1
= -step1
[10] * cospi_24_64
- step1
[13] * cospi_8_64
;
2214 temp2
= -step1
[10] * cospi_8_64
+ step1
[13] * cospi_24_64
;
2215 step2
[10] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2216 step2
[13] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2217 step2
[11] = step1
[11];
2218 step2
[12] = step1
[12];
2220 step2
[16] = WRAPLOW(step1
[16] + step1
[19], bd
);
2221 step2
[17] = WRAPLOW(step1
[17] + step1
[18], bd
);
2222 step2
[18] = WRAPLOW(step1
[17] - step1
[18], bd
);
2223 step2
[19] = WRAPLOW(step1
[16] - step1
[19], bd
);
2224 step2
[20] = WRAPLOW(-step1
[20] + step1
[23], bd
);
2225 step2
[21] = WRAPLOW(-step1
[21] + step1
[22], bd
);
2226 step2
[22] = WRAPLOW(step1
[21] + step1
[22], bd
);
2227 step2
[23] = WRAPLOW(step1
[20] + step1
[23], bd
);
2229 step2
[24] = WRAPLOW(step1
[24] + step1
[27], bd
);
2230 step2
[25] = WRAPLOW(step1
[25] + step1
[26], bd
);
2231 step2
[26] = WRAPLOW(step1
[25] - step1
[26], bd
);
2232 step2
[27] = WRAPLOW(step1
[24] - step1
[27], bd
);
2233 step2
[28] = WRAPLOW(-step1
[28] + step1
[31], bd
);
2234 step2
[29] = WRAPLOW(-step1
[29] + step1
[30], bd
);
2235 step2
[30] = WRAPLOW(step1
[29] + step1
[30], bd
);
2236 step2
[31] = WRAPLOW(step1
[28] + step1
[31], bd
);
2239 step1
[0] = WRAPLOW(step2
[0] + step2
[3], bd
);
2240 step1
[1] = WRAPLOW(step2
[1] + step2
[2], bd
);
2241 step1
[2] = WRAPLOW(step2
[1] - step2
[2], bd
);
2242 step1
[3] = WRAPLOW(step2
[0] - step2
[3], bd
);
2243 step1
[4] = step2
[4];
2244 temp1
= (step2
[6] - step2
[5]) * cospi_16_64
;
2245 temp2
= (step2
[5] + step2
[6]) * cospi_16_64
;
2246 step1
[5] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2247 step1
[6] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2248 step1
[7] = step2
[7];
2250 step1
[8] = WRAPLOW(step2
[8] + step2
[11], bd
);
2251 step1
[9] = WRAPLOW(step2
[9] + step2
[10], bd
);
2252 step1
[10] = WRAPLOW(step2
[9] - step2
[10], bd
);
2253 step1
[11] = WRAPLOW(step2
[8] - step2
[11], bd
);
2254 step1
[12] = WRAPLOW(-step2
[12] + step2
[15], bd
);
2255 step1
[13] = WRAPLOW(-step2
[13] + step2
[14], bd
);
2256 step1
[14] = WRAPLOW(step2
[13] + step2
[14], bd
);
2257 step1
[15] = WRAPLOW(step2
[12] + step2
[15], bd
);
2259 step1
[16] = step2
[16];
2260 step1
[17] = step2
[17];
2261 temp1
= -step2
[18] * cospi_8_64
+ step2
[29] * cospi_24_64
;
2262 temp2
= step2
[18] * cospi_24_64
+ step2
[29] * cospi_8_64
;
2263 step1
[18] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2264 step1
[29] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2265 temp1
= -step2
[19] * cospi_8_64
+ step2
[28] * cospi_24_64
;
2266 temp2
= step2
[19] * cospi_24_64
+ step2
[28] * cospi_8_64
;
2267 step1
[19] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2268 step1
[28] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2269 temp1
= -step2
[20] * cospi_24_64
- step2
[27] * cospi_8_64
;
2270 temp2
= -step2
[20] * cospi_8_64
+ step2
[27] * cospi_24_64
;
2271 step1
[20] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2272 step1
[27] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2273 temp1
= -step2
[21] * cospi_24_64
- step2
[26] * cospi_8_64
;
2274 temp2
= -step2
[21] * cospi_8_64
+ step2
[26] * cospi_24_64
;
2275 step1
[21] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2276 step1
[26] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2277 step1
[22] = step2
[22];
2278 step1
[23] = step2
[23];
2279 step1
[24] = step2
[24];
2280 step1
[25] = step2
[25];
2281 step1
[30] = step2
[30];
2282 step1
[31] = step2
[31];
2285 step2
[0] = WRAPLOW(step1
[0] + step1
[7], bd
);
2286 step2
[1] = WRAPLOW(step1
[1] + step1
[6], bd
);
2287 step2
[2] = WRAPLOW(step1
[2] + step1
[5], bd
);
2288 step2
[3] = WRAPLOW(step1
[3] + step1
[4], bd
);
2289 step2
[4] = WRAPLOW(step1
[3] - step1
[4], bd
);
2290 step2
[5] = WRAPLOW(step1
[2] - step1
[5], bd
);
2291 step2
[6] = WRAPLOW(step1
[1] - step1
[6], bd
);
2292 step2
[7] = WRAPLOW(step1
[0] - step1
[7], bd
);
2293 step2
[8] = step1
[8];
2294 step2
[9] = step1
[9];
2295 temp1
= (-step1
[10] + step1
[13]) * cospi_16_64
;
2296 temp2
= (step1
[10] + step1
[13]) * cospi_16_64
;
2297 step2
[10] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2298 step2
[13] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2299 temp1
= (-step1
[11] + step1
[12]) * cospi_16_64
;
2300 temp2
= (step1
[11] + step1
[12]) * cospi_16_64
;
2301 step2
[11] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2302 step2
[12] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2303 step2
[14] = step1
[14];
2304 step2
[15] = step1
[15];
2306 step2
[16] = WRAPLOW(step1
[16] + step1
[23], bd
);
2307 step2
[17] = WRAPLOW(step1
[17] + step1
[22], bd
);
2308 step2
[18] = WRAPLOW(step1
[18] + step1
[21], bd
);
2309 step2
[19] = WRAPLOW(step1
[19] + step1
[20], bd
);
2310 step2
[20] = WRAPLOW(step1
[19] - step1
[20], bd
);
2311 step2
[21] = WRAPLOW(step1
[18] - step1
[21], bd
);
2312 step2
[22] = WRAPLOW(step1
[17] - step1
[22], bd
);
2313 step2
[23] = WRAPLOW(step1
[16] - step1
[23], bd
);
2315 step2
[24] = WRAPLOW(-step1
[24] + step1
[31], bd
);
2316 step2
[25] = WRAPLOW(-step1
[25] + step1
[30], bd
);
2317 step2
[26] = WRAPLOW(-step1
[26] + step1
[29], bd
);
2318 step2
[27] = WRAPLOW(-step1
[27] + step1
[28], bd
);
2319 step2
[28] = WRAPLOW(step1
[27] + step1
[28], bd
);
2320 step2
[29] = WRAPLOW(step1
[26] + step1
[29], bd
);
2321 step2
[30] = WRAPLOW(step1
[25] + step1
[30], bd
);
2322 step2
[31] = WRAPLOW(step1
[24] + step1
[31], bd
);
2325 step1
[0] = WRAPLOW(step2
[0] + step2
[15], bd
);
2326 step1
[1] = WRAPLOW(step2
[1] + step2
[14], bd
);
2327 step1
[2] = WRAPLOW(step2
[2] + step2
[13], bd
);
2328 step1
[3] = WRAPLOW(step2
[3] + step2
[12], bd
);
2329 step1
[4] = WRAPLOW(step2
[4] + step2
[11], bd
);
2330 step1
[5] = WRAPLOW(step2
[5] + step2
[10], bd
);
2331 step1
[6] = WRAPLOW(step2
[6] + step2
[9], bd
);
2332 step1
[7] = WRAPLOW(step2
[7] + step2
[8], bd
);
2333 step1
[8] = WRAPLOW(step2
[7] - step2
[8], bd
);
2334 step1
[9] = WRAPLOW(step2
[6] - step2
[9], bd
);
2335 step1
[10] = WRAPLOW(step2
[5] - step2
[10], bd
);
2336 step1
[11] = WRAPLOW(step2
[4] - step2
[11], bd
);
2337 step1
[12] = WRAPLOW(step2
[3] - step2
[12], bd
);
2338 step1
[13] = WRAPLOW(step2
[2] - step2
[13], bd
);
2339 step1
[14] = WRAPLOW(step2
[1] - step2
[14], bd
);
2340 step1
[15] = WRAPLOW(step2
[0] - step2
[15], bd
);
2342 step1
[16] = step2
[16];
2343 step1
[17] = step2
[17];
2344 step1
[18] = step2
[18];
2345 step1
[19] = step2
[19];
2346 temp1
= (-step2
[20] + step2
[27]) * cospi_16_64
;
2347 temp2
= (step2
[20] + step2
[27]) * cospi_16_64
;
2348 step1
[20] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2349 step1
[27] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2350 temp1
= (-step2
[21] + step2
[26]) * cospi_16_64
;
2351 temp2
= (step2
[21] + step2
[26]) * cospi_16_64
;
2352 step1
[21] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2353 step1
[26] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2354 temp1
= (-step2
[22] + step2
[25]) * cospi_16_64
;
2355 temp2
= (step2
[22] + step2
[25]) * cospi_16_64
;
2356 step1
[22] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2357 step1
[25] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2358 temp1
= (-step2
[23] + step2
[24]) * cospi_16_64
;
2359 temp2
= (step2
[23] + step2
[24]) * cospi_16_64
;
2360 step1
[23] = WRAPLOW(highbd_dct_const_round_shift(temp1
, bd
), bd
);
2361 step1
[24] = WRAPLOW(highbd_dct_const_round_shift(temp2
, bd
), bd
);
2362 step1
[28] = step2
[28];
2363 step1
[29] = step2
[29];
2364 step1
[30] = step2
[30];
2365 step1
[31] = step2
[31];
2368 output
[0] = WRAPLOW(step1
[0] + step1
[31], bd
);
2369 output
[1] = WRAPLOW(step1
[1] + step1
[30], bd
);
2370 output
[2] = WRAPLOW(step1
[2] + step1
[29], bd
);
2371 output
[3] = WRAPLOW(step1
[3] + step1
[28], bd
);
2372 output
[4] = WRAPLOW(step1
[4] + step1
[27], bd
);
2373 output
[5] = WRAPLOW(step1
[5] + step1
[26], bd
);
2374 output
[6] = WRAPLOW(step1
[6] + step1
[25], bd
);
2375 output
[7] = WRAPLOW(step1
[7] + step1
[24], bd
);
2376 output
[8] = WRAPLOW(step1
[8] + step1
[23], bd
);
2377 output
[9] = WRAPLOW(step1
[9] + step1
[22], bd
);
2378 output
[10] = WRAPLOW(step1
[10] + step1
[21], bd
);
2379 output
[11] = WRAPLOW(step1
[11] + step1
[20], bd
);
2380 output
[12] = WRAPLOW(step1
[12] + step1
[19], bd
);
2381 output
[13] = WRAPLOW(step1
[13] + step1
[18], bd
);
2382 output
[14] = WRAPLOW(step1
[14] + step1
[17], bd
);
2383 output
[15] = WRAPLOW(step1
[15] + step1
[16], bd
);
2384 output
[16] = WRAPLOW(step1
[15] - step1
[16], bd
);
2385 output
[17] = WRAPLOW(step1
[14] - step1
[17], bd
);
2386 output
[18] = WRAPLOW(step1
[13] - step1
[18], bd
);
2387 output
[19] = WRAPLOW(step1
[12] - step1
[19], bd
);
2388 output
[20] = WRAPLOW(step1
[11] - step1
[20], bd
);
2389 output
[21] = WRAPLOW(step1
[10] - step1
[21], bd
);
2390 output
[22] = WRAPLOW(step1
[9] - step1
[22], bd
);
2391 output
[23] = WRAPLOW(step1
[8] - step1
[23], bd
);
2392 output
[24] = WRAPLOW(step1
[7] - step1
[24], bd
);
2393 output
[25] = WRAPLOW(step1
[6] - step1
[25], bd
);
2394 output
[26] = WRAPLOW(step1
[5] - step1
[26], bd
);
2395 output
[27] = WRAPLOW(step1
[4] - step1
[27], bd
);
2396 output
[28] = WRAPLOW(step1
[3] - step1
[28], bd
);
2397 output
[29] = WRAPLOW(step1
[2] - step1
[29], bd
);
2398 output
[30] = WRAPLOW(step1
[1] - step1
[30], bd
);
2399 output
[31] = WRAPLOW(step1
[0] - step1
[31], bd
);
2402 void vpx_highbd_idct32x32_1024_add_c(const tran_low_t
*input
, uint8_t *dest8
,
2403 int stride
, int bd
) {
2404 tran_low_t out
[32 * 32];
2405 tran_low_t
*outptr
= out
;
2407 tran_low_t temp_in
[32], temp_out
[32];
2408 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
2411 for (i
= 0; i
< 32; ++i
) {
2412 tran_low_t zero_coeff
[16];
2413 for (j
= 0; j
< 16; ++j
)
2414 zero_coeff
[j
] = input
[2 * j
] | input
[2 * j
+ 1];
2415 for (j
= 0; j
< 8; ++j
)
2416 zero_coeff
[j
] = zero_coeff
[2 * j
] | zero_coeff
[2 * j
+ 1];
2417 for (j
= 0; j
< 4; ++j
)
2418 zero_coeff
[j
] = zero_coeff
[2 * j
] | zero_coeff
[2 * j
+ 1];
2419 for (j
= 0; j
< 2; ++j
)
2420 zero_coeff
[j
] = zero_coeff
[2 * j
] | zero_coeff
[2 * j
+ 1];
2422 if (zero_coeff
[0] | zero_coeff
[1])
2423 highbd_idct32_c(input
, outptr
, bd
);
2425 memset(outptr
, 0, sizeof(tran_low_t
) * 32);
2431 for (i
= 0; i
< 32; ++i
) {
2432 for (j
= 0; j
< 32; ++j
)
2433 temp_in
[j
] = out
[j
* 32 + i
];
2434 highbd_idct32_c(temp_in
, temp_out
, bd
);
2435 for (j
= 0; j
< 32; ++j
) {
2436 dest
[j
* stride
+ i
] = highbd_clip_pixel_add(
2437 dest
[j
* stride
+ i
], ROUND_POWER_OF_TWO(temp_out
[j
], 6), bd
);
2442 void vpx_highbd_idct32x32_34_add_c(const tran_low_t
*input
, uint8_t *dest8
,
2443 int stride
, int bd
) {
2444 tran_low_t out
[32 * 32] = {0};
2445 tran_low_t
*outptr
= out
;
2447 tran_low_t temp_in
[32], temp_out
[32];
2448 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
2451 // Only upper-left 8x8 has non-zero coeff.
2452 for (i
= 0; i
< 8; ++i
) {
2453 highbd_idct32_c(input
, outptr
, bd
);
2458 for (i
= 0; i
< 32; ++i
) {
2459 for (j
= 0; j
< 32; ++j
)
2460 temp_in
[j
] = out
[j
* 32 + i
];
2461 highbd_idct32_c(temp_in
, temp_out
, bd
);
2462 for (j
= 0; j
< 32; ++j
) {
2463 dest
[j
* stride
+ i
] = highbd_clip_pixel_add(
2464 dest
[j
* stride
+ i
], ROUND_POWER_OF_TWO(temp_out
[j
], 6), bd
);
2469 void vpx_highbd_idct32x32_1_add_c(const tran_low_t
*input
, uint8_t *dest8
,
2470 int stride
, int bd
) {
2473 uint16_t *dest
= CONVERT_TO_SHORTPTR(dest8
);
2475 tran_low_t out
= WRAPLOW(
2476 highbd_dct_const_round_shift(input
[0] * cospi_16_64
, bd
), bd
);
2477 out
= WRAPLOW(highbd_dct_const_round_shift(out
* cospi_16_64
, bd
), bd
);
2478 a1
= ROUND_POWER_OF_TWO(out
, 6);
2480 for (j
= 0; j
< 32; ++j
) {
2481 for (i
= 0; i
< 32; ++i
)
2482 dest
[i
] = highbd_clip_pixel_add(dest
[i
], a1
, bd
);
2486 #endif // CONFIG_VP9_HIGHBITDEPTH