2 * Floating point AAN DCT
3 * this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c)
5 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
6 * Copyright (c) 2003 Roman Shaposhnik
8 * Permission to use, copy, modify, and/or distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24 * Floating point AAN DCT
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "libavutil/internal.h"
31 #include "libavutil/libm.h"
35 //numbers generated by simple c code (not as accurate as they could be)
38 printf("#define B%d %1.20llf\n", i, (long double)1.0/(cosl(i*acosl(-1.0)/(long double)16.0)*sqrtl(2)));
41 #define B0 1.00000000000000000000
42 #define B1 0.72095982200694791383 // (cos(pi*1/16)sqrt(2))^-1
43 #define B2 0.76536686473017954350 // (cos(pi*2/16)sqrt(2))^-1
44 #define B3 0.85043009476725644878 // (cos(pi*3/16)sqrt(2))^-1
45 #define B4 1.00000000000000000000 // (cos(pi*4/16)sqrt(2))^-1
46 #define B5 1.27275858057283393842 // (cos(pi*5/16)sqrt(2))^-1
47 #define B6 1.84775906502257351242 // (cos(pi*6/16)sqrt(2))^-1
48 #define B7 3.62450978541155137218 // (cos(pi*7/16)sqrt(2))^-1
51 #define A1 0.70710678118654752438 // cos(pi*4/16)
52 #define A2 0.54119610014619698435 // cos(pi*6/16)sqrt(2)
53 #define A5 0.38268343236508977170 // cos(pi*6/16)
54 #define A4 1.30656296487637652774 // cos(pi*2/16)sqrt(2)
56 static const FLOAT postscale
[64]={
57 B0
*B0
, B0
*B1
, B0
*B2
, B0
*B3
, B0
*B4
, B0
*B5
, B0
*B6
, B0
*B7
,
58 B1
*B0
, B1
*B1
, B1
*B2
, B1
*B3
, B1
*B4
, B1
*B5
, B1
*B6
, B1
*B7
,
59 B2
*B0
, B2
*B1
, B2
*B2
, B2
*B3
, B2
*B4
, B2
*B5
, B2
*B6
, B2
*B7
,
60 B3
*B0
, B3
*B1
, B3
*B2
, B3
*B3
, B3
*B4
, B3
*B5
, B3
*B6
, B3
*B7
,
61 B4
*B0
, B4
*B1
, B4
*B2
, B4
*B3
, B4
*B4
, B4
*B5
, B4
*B6
, B4
*B7
,
62 B5
*B0
, B5
*B1
, B5
*B2
, B5
*B3
, B5
*B4
, B5
*B5
, B5
*B6
, B5
*B7
,
63 B6
*B0
, B6
*B1
, B6
*B2
, B6
*B3
, B6
*B4
, B6
*B5
, B6
*B6
, B6
*B7
,
64 B7
*B0
, B7
*B1
, B7
*B2
, B7
*B3
, B7
*B4
, B7
*B5
, B7
*B6
, B7
*B7
,
67 static av_always_inline
void row_fdct(FLOAT temp
[64], int16_t *data
)
69 FLOAT tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
70 FLOAT tmp10
, tmp11
, tmp12
, tmp13
;
71 FLOAT z2
, z4
, z11
, z13
;
75 for (i
=0; i
<8*8; i
+=8) {
76 tmp0
= data
[0 + i
] + data
[7 + i
];
77 tmp7
= data
[0 + i
] - data
[7 + i
];
78 tmp1
= data
[1 + i
] + data
[6 + i
];
79 tmp6
= data
[1 + i
] - data
[6 + i
];
80 tmp2
= data
[2 + i
] + data
[5 + i
];
81 tmp5
= data
[2 + i
] - data
[5 + i
];
82 tmp3
= data
[3 + i
] + data
[4 + i
];
83 tmp4
= data
[3 + i
] - data
[4 + i
];
90 temp
[0 + i
]= tmp10
+ tmp11
;
91 temp
[4 + i
]= tmp10
- tmp11
;
95 temp
[2 + i
]= tmp13
+ tmp12
;
96 temp
[6 + i
]= tmp13
- tmp12
;
103 z5
= (tmp4
- tmp6
) * A5
;
107 z2
= tmp4
*(A2
+A5
) - tmp6
*A5
;
108 z4
= tmp6
*(A4
-A5
) + tmp4
*A5
;
115 temp
[5 + i
]= z13
+ z2
;
116 temp
[3 + i
]= z13
- z2
;
117 temp
[1 + i
]= z11
+ z4
;
118 temp
[7 + i
]= z11
- z4
;
122 void ff_faandct(int16_t *data
)
124 FLOAT tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
125 FLOAT tmp10
, tmp11
, tmp12
, tmp13
;
126 FLOAT z2
, z4
, z11
, z13
;
133 row_fdct(temp
, data
);
135 for (i
=0; i
<8; i
++) {
136 tmp0
= temp
[8*0 + i
] + temp
[8*7 + i
];
137 tmp7
= temp
[8*0 + i
] - temp
[8*7 + i
];
138 tmp1
= temp
[8*1 + i
] + temp
[8*6 + i
];
139 tmp6
= temp
[8*1 + i
] - temp
[8*6 + i
];
140 tmp2
= temp
[8*2 + i
] + temp
[8*5 + i
];
141 tmp5
= temp
[8*2 + i
] - temp
[8*5 + i
];
142 tmp3
= temp
[8*3 + i
] + temp
[8*4 + i
];
143 tmp4
= temp
[8*3 + i
] - temp
[8*4 + i
];
150 data
[8*0 + i
]= lrintf(postscale
[8*0 + i
] * (tmp10
+ tmp11
));
151 data
[8*4 + i
]= lrintf(postscale
[8*4 + i
] * (tmp10
- tmp11
));
155 data
[8*2 + i
]= lrintf(postscale
[8*2 + i
] * (tmp13
+ tmp12
));
156 data
[8*6 + i
]= lrintf(postscale
[8*6 + i
] * (tmp13
- tmp12
));
163 z5
= (tmp4
- tmp6
) * A5
;
167 z2
= tmp4
*(A2
+A5
) - tmp6
*A5
;
168 z4
= tmp6
*(A4
-A5
) + tmp4
*A5
;
175 data
[8*5 + i
]= lrintf(postscale
[8*5 + i
] * (z13
+ z2
));
176 data
[8*3 + i
]= lrintf(postscale
[8*3 + i
] * (z13
- z2
));
177 data
[8*1 + i
]= lrintf(postscale
[8*1 + i
] * (z11
+ z4
));
178 data
[8*7 + i
]= lrintf(postscale
[8*7 + i
] * (z11
- z4
));
182 void ff_faandct248(int16_t *data
)
184 FLOAT tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
185 FLOAT tmp10
, tmp11
, tmp12
, tmp13
;
191 row_fdct(temp
, data
);
193 for (i
=0; i
<8; i
++) {
194 tmp0
= temp
[8*0 + i
] + temp
[8*1 + i
];
195 tmp1
= temp
[8*2 + i
] + temp
[8*3 + i
];
196 tmp2
= temp
[8*4 + i
] + temp
[8*5 + i
];
197 tmp3
= temp
[8*6 + i
] + temp
[8*7 + i
];
198 tmp4
= temp
[8*0 + i
] - temp
[8*1 + i
];
199 tmp5
= temp
[8*2 + i
] - temp
[8*3 + i
];
200 tmp6
= temp
[8*4 + i
] - temp
[8*5 + i
];
201 tmp7
= temp
[8*6 + i
] - temp
[8*7 + i
];
208 data
[8*0 + i
] = lrintf(postscale
[8*0 + i
] * (tmp10
+ tmp11
));
209 data
[8*4 + i
] = lrintf(postscale
[8*4 + i
] * (tmp10
- tmp11
));
213 data
[8*2 + i
] = lrintf(postscale
[8*2 + i
] * (tmp13
+ tmp12
));
214 data
[8*6 + i
] = lrintf(postscale
[8*6 + i
] * (tmp13
- tmp12
));
221 data
[8*1 + i
] = lrintf(postscale
[8*0 + i
] * (tmp10
+ tmp11
));
222 data
[8*5 + i
] = lrintf(postscale
[8*4 + i
] * (tmp10
- tmp11
));
226 data
[8*3 + i
] = lrintf(postscale
[8*2 + i
] * (tmp13
+ tmp12
));
227 data
[8*7 + i
] = lrintf(postscale
[8*6 + i
] * (tmp13
- tmp12
));