egra: agg will respect clip rect now
[iv.d.git] / dopus / orig / imdct15.d
blobe5221a1230bb125f494b006a941f5fd5a3868ab9
1 /*
2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 module imdct15 /*is aliced*/;
19 import iv.alice;
21 /**
22 * @file
23 * Celt non-power of 2 iMDCT
25 import avmem;
26 import avfft;
27 import opus;
30 struct IMDCT15Context {
31 int fft_n;
32 int len2;
33 int len4;
35 FFTComplex* tmp;
37 FFTComplex* twiddle_exptab;
39 FFTComplex*[6] exptab;
41 /**
42 * Calculate the middle half of the iMDCT
44 void function (IMDCT15Context* s, float* dst, const(float)* src, ptrdiff_t src_stride, float scale) imdct_half;
48 /**
49 * Init an iMDCT of the length 2 * 15 * (2^N)
51 int ff_imdct15_init(IMDCT15Context **s, int N);
53 /**
54 * Free an iMDCT.
56 void ff_imdct15_uninit(IMDCT15Context **s);
58 void ff_imdct15_init_aarch64(IMDCT15Context *s);
62 // minimal iMDCT size to make SIMD opts easier
63 enum CELT_MIN_IMDCT_SIZE = 120;
65 // complex c = a * b
66 enum CMUL3(string cre, string cim, string are, string aim, string bre, string bim) =
67 ""~cre~" = "~are~" * "~bre~" - "~aim~" * "~bim~";\n"~
68 ""~cim~" = "~are~" * "~bim~" + "~aim~" * "~bre~";\n";
70 enum CMUL(string c, string a, string b) = CMUL3!("("~c~").re", "("~c~").im", "("~a~").re", "("~a~").im", "("~b~").re", "("~b~").im");
72 // complex c = a * b
73 // d = a * conjugate(b)
74 enum CMUL2(string c, string d, string a, string b) =
75 "{\n"~
76 "float are = ("~a~").re;\n"~
77 "float aim = ("~a~").im;\n"~
78 "float bre = ("~b~").re;\n"~
79 "float bim = ("~b~").im;\n"~
80 "float rr = are * bre;\n"~
81 "float ri = are * bim;\n"~
82 "float ir = aim * bre;\n"~
83 "float ii = aim * bim;\n"~
84 "("~c~").re = rr - ii;\n"~
85 "("~c~").im = ri + ir;\n"~
86 "("~d~").re = rr + ii;\n"~
87 "("~d~").im = -ri + ir;\n"~
88 "}\n";
90 /*av_cold*/ void ff_imdct15_uninit (IMDCT15Context** ps) {
91 IMDCT15Context* s = *ps;
92 if (s is null) return;
93 for (int i = 0; i < /*FF_ARRAY_ELEMS*/cast(int)s.exptab.length; ++i) av_freep(&s.exptab[i]);
94 av_freep(&s.twiddle_exptab);
95 av_freep(&s.tmp);
96 av_freep(ps);
99 //static void imdct15_half (IMDCT15Context* s, float* dst, const(float)* src, ptrdiff_t stride, float scale);
101 /*av_cold*/ int ff_imdct15_init (IMDCT15Context** ps, int N) {
102 import std.math : cos, sin, PI;
104 IMDCT15Context* s;
105 int len2 = 15*(1<<N);
106 int len = 2*len2;
107 int i, j;
109 if (len2 > CELT_MAX_FRAME_SIZE || len2 < CELT_MIN_IMDCT_SIZE) return AVERROR(EINVAL);
111 s = av_mallocz!IMDCT15Context();
112 if (!s) return AVERROR(ENOMEM);
114 s.fft_n = N - 1;
115 s.len4 = len2 / 2;
116 s.len2 = len2;
118 s.tmp = av_malloc_array!(typeof(*s.tmp))(len);
119 if (!s.tmp) goto fail;
121 s.twiddle_exptab = av_malloc_array!(typeof(*s.twiddle_exptab))(s.len4);
122 if (!s.twiddle_exptab) goto fail;
124 for (i = 0; i < s.len4; i++) {
125 s.twiddle_exptab[i].re = cos(2 * PI * (i + 0.125 + s.len4) / len);
126 s.twiddle_exptab[i].im = sin(2 * PI * (i + 0.125 + s.len4) / len);
129 for (i = 0; i < /*FF_ARRAY_ELEMS*/cast(int)s.exptab.length; i++) {
130 int NN = 15 * (1 << i);
131 s.exptab[i] = av_malloc!(typeof(*s.exptab[i]))(FFMAX(NN, 19));
132 if (!s.exptab[i]) goto fail;
133 for (j = 0; j < NN; j++) {
134 s.exptab[i][j].re = cos(2 * PI * j / NN);
135 s.exptab[i][j].im = sin(2 * PI * j / NN);
139 // wrap around to simplify fft15
140 for (j = 15; j < 19; j++) s.exptab[0][j] = s.exptab[0][j - 15];
142 s.imdct_half = &imdct15_half;
144 //if (ARCH_AARCH64) ff_imdct15_init_aarch64(s);
146 *ps = s;
148 return 0;
150 fail:
151 ff_imdct15_uninit(&s);
152 return AVERROR(ENOMEM);
156 private void fft5(FFTComplex* out_, const(FFTComplex)* in_, ptrdiff_t stride) {
157 // [0] = exp(2 * i * pi / 5), [1] = exp(2 * i * pi * 2 / 5)
158 static immutable FFTComplex[2] fact = [ { 0.30901699437494745, 0.95105651629515353 },
159 { -0.80901699437494734, 0.58778525229247325 } ];
161 FFTComplex[4][4] z;
163 mixin(CMUL2!("z[0][0]", "z[0][3]", "in_[1 * stride]", "fact[0]"));
164 mixin(CMUL2!("z[0][1]", "z[0][2]", "in_[1 * stride]", "fact[1]"));
165 mixin(CMUL2!("z[1][0]", "z[1][3]", "in_[2 * stride]", "fact[0]"));
166 mixin(CMUL2!("z[1][1]", "z[1][2]", "in_[2 * stride]", "fact[1]"));
167 mixin(CMUL2!("z[2][0]", "z[2][3]", "in_[3 * stride]", "fact[0]"));
168 mixin(CMUL2!("z[2][1]", "z[2][2]", "in_[3 * stride]", "fact[1]"));
169 mixin(CMUL2!("z[3][0]", "z[3][3]", "in_[4 * stride]", "fact[0]"));
170 mixin(CMUL2!("z[3][1]", "z[3][2]", "in_[4 * stride]", "fact[1]"));
172 out_[0].re = in_[0].re + in_[stride].re + in_[2 * stride].re + in_[3 * stride].re + in_[4 * stride].re;
173 out_[0].im = in_[0].im + in_[stride].im + in_[2 * stride].im + in_[3 * stride].im + in_[4 * stride].im;
175 out_[1].re = in_[0].re + z[0][0].re + z[1][1].re + z[2][2].re + z[3][3].re;
176 out_[1].im = in_[0].im + z[0][0].im + z[1][1].im + z[2][2].im + z[3][3].im;
178 out_[2].re = in_[0].re + z[0][1].re + z[1][3].re + z[2][0].re + z[3][2].re;
179 out_[2].im = in_[0].im + z[0][1].im + z[1][3].im + z[2][0].im + z[3][2].im;
181 out_[3].re = in_[0].re + z[0][2].re + z[1][0].re + z[2][3].re + z[3][1].re;
182 out_[3].im = in_[0].im + z[0][2].im + z[1][0].im + z[2][3].im + z[3][1].im;
184 out_[4].re = in_[0].re + z[0][3].re + z[1][2].re + z[2][1].re + z[3][0].re;
185 out_[4].im = in_[0].im + z[0][3].im + z[1][2].im + z[2][1].im + z[3][0].im;
188 private void fft15 (IMDCT15Context* s, FFTComplex* out_, const(FFTComplex)* in_, ptrdiff_t stride) {
189 const(FFTComplex)* exptab = s.exptab[0];
190 FFTComplex[5] tmp;
191 FFTComplex[5] tmp1;
192 FFTComplex[5] tmp2;
193 int k;
195 fft5(tmp.ptr, in_, stride * 3);
196 fft5(tmp1.ptr, in_ + stride, stride * 3);
197 fft5(tmp2.ptr, in_ + 2 * stride, stride * 3);
199 for (k = 0; k < 5; k++) {
200 FFTComplex t1, t2;
202 mixin(CMUL!("t1", "tmp1[k]", "exptab[k]"));
203 mixin(CMUL!("t2", "tmp2[k]", "exptab[2 * k]"));
204 out_[k].re = tmp[k].re + t1.re + t2.re;
205 out_[k].im = tmp[k].im + t1.im + t2.im;
207 mixin(CMUL!("t1", "tmp1[k]", "exptab[k + 5]"));
208 mixin(CMUL!("t2", "tmp2[k]", "exptab[2 * (k + 5)]"));
209 out_[k + 5].re = tmp[k].re + t1.re + t2.re;
210 out_[k + 5].im = tmp[k].im + t1.im + t2.im;
212 mixin(CMUL!("t1", "tmp1[k]", "exptab[k + 10]"));
213 mixin(CMUL!("t2", "tmp2[k]", "exptab[2 * k + 5]"));
214 out_[k + 10].re = tmp[k].re + t1.re + t2.re;
215 out_[k + 10].im = tmp[k].im + t1.im + t2.im;
220 * FFT of the length 15 * (2^N)
222 private void fft_calc (IMDCT15Context* s, FFTComplex* out_, const(FFTComplex)* in_, int N, ptrdiff_t stride) {
223 if (N) {
224 const(FFTComplex)* exptab = s.exptab[N];
225 const int len2 = 15 * (1 << (N - 1));
226 int k;
228 fft_calc(s, out_, in_, N - 1, stride * 2);
229 fft_calc(s, out_ + len2, in_ + stride, N - 1, stride * 2);
231 for (k = 0; k < len2; k++) {
232 FFTComplex t;
234 mixin(CMUL!("t", "out_[len2 + k]", "exptab[k]"));
236 out_[len2 + k].re = out_[k].re - t.re;
237 out_[len2 + k].im = out_[k].im - t.im;
239 out_[k].re += t.re;
240 out_[k].im += t.im;
242 } else {
243 fft15(s, out_, in_, stride);
247 private void imdct15_half (IMDCT15Context* s, float* dst, const(float)* src, ptrdiff_t stride, float scale) {
248 FFTComplex *z = cast(FFTComplex *)dst;
249 const int len8 = s.len4 / 2;
250 const(float)* in1 = src;
251 const(float)* in2 = src + (s.len2 - 1) * stride;
252 int i;
254 for (i = 0; i < s.len4; i++) {
255 FFTComplex tmp = { *in2, *in1 };
256 mixin(CMUL!("s.tmp[i]", "tmp", "s.twiddle_exptab[i]"));
257 in1 += 2 * stride;
258 in2 -= 2 * stride;
261 fft_calc(s, z, s.tmp, s.fft_n, 1);
263 for (i = 0; i < len8; i++) {
264 float r0, i0, r1, i1;
266 mixin(CMUL3!("r0", "i1", "z[len8 - i - 1].im", "z[len8 - i - 1].re", "s.twiddle_exptab[len8 - i - 1].im", "s.twiddle_exptab[len8 - i - 1].re"));
267 mixin(CMUL3!("r1", "i0", "z[len8 + i].im", "z[len8 + i].re", "s.twiddle_exptab[len8 + i].im", "s.twiddle_exptab[len8 + i].re"));
268 z[len8 - i - 1].re = scale * r0;
269 z[len8 - i - 1].im = scale * i0;
270 z[len8 + i].re = scale * r1;
271 z[len8 + i].im = scale * i1;