2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 module imdct15
/*is aliced*/;
23 * Celt non-power of 2 iMDCT
30 struct IMDCT15Context
{
37 FFTComplex
* twiddle_exptab
;
39 FFTComplex
*[6] exptab
;
42 * Calculate the middle half of the iMDCT
44 void function (IMDCT15Context
* s
, float* dst
, const(float)* src
, ptrdiff_t src_stride
, float scale
) imdct_half
;
49 * Init an iMDCT of the length 2 * 15 * (2^N)
51 int ff_imdct15_init(IMDCT15Context **s, int N);
56 void ff_imdct15_uninit(IMDCT15Context **s);
58 void ff_imdct15_init_aarch64(IMDCT15Context *s);
62 // minimal iMDCT size to make SIMD opts easier
63 enum CELT_MIN_IMDCT_SIZE
= 120;
66 enum CMUL3(string cre
, string cim
, string are
, string aim
, string bre
, string bim
) =
67 ""~cre
~" = "~are
~" * "~bre
~" - "~aim
~" * "~bim
~";\n"~
68 ""~cim
~" = "~are
~" * "~bim
~" + "~aim
~" * "~bre
~";\n";
70 enum CMUL(string c
, string a
, string b
) = CMUL3
!("("~c
~").re", "("~c
~").im", "("~a
~").re", "("~a
~").im", "("~b
~").re", "("~b
~").im");
73 // d = a * conjugate(b)
74 enum CMUL2(string c
, string d
, string a
, string b
) =
76 "float are = ("~a
~").re;\n"~
77 "float aim = ("~a
~").im;\n"~
78 "float bre = ("~b
~").re;\n"~
79 "float bim = ("~b
~").im;\n"~
80 "float rr = are * bre;\n"~
81 "float ri = are * bim;\n"~
82 "float ir = aim * bre;\n"~
83 "float ii = aim * bim;\n"~
84 "("~c
~").re = rr - ii;\n"~
85 "("~c
~").im = ri + ir;\n"~
86 "("~d
~").re = rr + ii;\n"~
87 "("~d
~").im = -ri + ir;\n"~
90 /*av_cold*/ void ff_imdct15_uninit (IMDCT15Context
** ps
) {
91 IMDCT15Context
* s
= *ps
;
92 if (s
is null) return;
93 for (int i
= 0; i
< /*FF_ARRAY_ELEMS*/cast(int)s
.exptab
.length
; ++i
) av_freep(&s
.exptab
[i
]);
94 av_freep(&s
.twiddle_exptab
);
99 //static void imdct15_half (IMDCT15Context* s, float* dst, const(float)* src, ptrdiff_t stride, float scale);
101 /*av_cold*/ int ff_imdct15_init (IMDCT15Context
** ps
, int N
) {
102 import std
.math
: cos
, sin
, PI
;
105 int len2
= 15*(1<<N
);
109 if (len2
> CELT_MAX_FRAME_SIZE || len2
< CELT_MIN_IMDCT_SIZE
) return AVERROR(EINVAL
);
111 s
= av_mallocz
!IMDCT15Context();
112 if (!s
) return AVERROR(ENOMEM
);
118 s
.tmp
= av_malloc_array
!(typeof(*s
.tmp
))(len
);
119 if (!s
.tmp
) goto fail
;
121 s
.twiddle_exptab
= av_malloc_array
!(typeof(*s
.twiddle_exptab
))(s
.len4
);
122 if (!s
.twiddle_exptab
) goto fail
;
124 for (i
= 0; i
< s
.len4
; i
++) {
125 s
.twiddle_exptab
[i
].re
= cos(2 * PI
* (i
+ 0.125 + s
.len4
) / len
);
126 s
.twiddle_exptab
[i
].im
= sin(2 * PI
* (i
+ 0.125 + s
.len4
) / len
);
129 for (i
= 0; i
< /*FF_ARRAY_ELEMS*/cast(int)s
.exptab
.length
; i
++) {
130 int NN
= 15 * (1 << i
);
131 s
.exptab
[i
] = av_malloc
!(typeof(*s
.exptab
[i
]))(FFMAX(NN
, 19));
132 if (!s
.exptab
[i
]) goto fail
;
133 for (j
= 0; j
< NN
; j
++) {
134 s
.exptab
[i
][j
].re
= cos(2 * PI
* j
/ NN
);
135 s
.exptab
[i
][j
].im
= sin(2 * PI
* j
/ NN
);
139 // wrap around to simplify fft15
140 for (j
= 15; j
< 19; j
++) s
.exptab
[0][j
] = s
.exptab
[0][j
- 15];
142 s
.imdct_half
= &imdct15_half
;
144 //if (ARCH_AARCH64) ff_imdct15_init_aarch64(s);
151 ff_imdct15_uninit(&s
);
152 return AVERROR(ENOMEM
);
156 private void fft5(FFTComplex
* out_
, const(FFTComplex
)* in_
, ptrdiff_t stride
) {
157 // [0] = exp(2 * i * pi / 5), [1] = exp(2 * i * pi * 2 / 5)
158 static immutable FFTComplex
[2] fact
= [ { 0.30901699437494745, 0.95105651629515353 },
159 { -0.80901699437494734, 0.58778525229247325 } ];
163 mixin(CMUL2
!("z[0][0]", "z[0][3]", "in_[1 * stride]", "fact[0]"));
164 mixin(CMUL2
!("z[0][1]", "z[0][2]", "in_[1 * stride]", "fact[1]"));
165 mixin(CMUL2
!("z[1][0]", "z[1][3]", "in_[2 * stride]", "fact[0]"));
166 mixin(CMUL2
!("z[1][1]", "z[1][2]", "in_[2 * stride]", "fact[1]"));
167 mixin(CMUL2
!("z[2][0]", "z[2][3]", "in_[3 * stride]", "fact[0]"));
168 mixin(CMUL2
!("z[2][1]", "z[2][2]", "in_[3 * stride]", "fact[1]"));
169 mixin(CMUL2
!("z[3][0]", "z[3][3]", "in_[4 * stride]", "fact[0]"));
170 mixin(CMUL2
!("z[3][1]", "z[3][2]", "in_[4 * stride]", "fact[1]"));
172 out_
[0].re
= in_
[0].re
+ in_
[stride
].re
+ in_
[2 * stride
].re
+ in_
[3 * stride
].re
+ in_
[4 * stride
].re
;
173 out_
[0].im
= in_
[0].im
+ in_
[stride
].im
+ in_
[2 * stride
].im
+ in_
[3 * stride
].im
+ in_
[4 * stride
].im
;
175 out_
[1].re
= in_
[0].re
+ z
[0][0].re
+ z
[1][1].re
+ z
[2][2].re
+ z
[3][3].re
;
176 out_
[1].im
= in_
[0].im
+ z
[0][0].im
+ z
[1][1].im
+ z
[2][2].im
+ z
[3][3].im
;
178 out_
[2].re
= in_
[0].re
+ z
[0][1].re
+ z
[1][3].re
+ z
[2][0].re
+ z
[3][2].re
;
179 out_
[2].im
= in_
[0].im
+ z
[0][1].im
+ z
[1][3].im
+ z
[2][0].im
+ z
[3][2].im
;
181 out_
[3].re
= in_
[0].re
+ z
[0][2].re
+ z
[1][0].re
+ z
[2][3].re
+ z
[3][1].re
;
182 out_
[3].im
= in_
[0].im
+ z
[0][2].im
+ z
[1][0].im
+ z
[2][3].im
+ z
[3][1].im
;
184 out_
[4].re
= in_
[0].re
+ z
[0][3].re
+ z
[1][2].re
+ z
[2][1].re
+ z
[3][0].re
;
185 out_
[4].im
= in_
[0].im
+ z
[0][3].im
+ z
[1][2].im
+ z
[2][1].im
+ z
[3][0].im
;
188 private void fft15 (IMDCT15Context
* s
, FFTComplex
* out_
, const(FFTComplex
)* in_
, ptrdiff_t stride
) {
189 const(FFTComplex
)* exptab
= s
.exptab
[0];
195 fft5(tmp
.ptr
, in_
, stride
* 3);
196 fft5(tmp1
.ptr
, in_
+ stride
, stride
* 3);
197 fft5(tmp2
.ptr
, in_
+ 2 * stride
, stride
* 3);
199 for (k
= 0; k
< 5; k
++) {
202 mixin(CMUL
!("t1", "tmp1[k]", "exptab[k]"));
203 mixin(CMUL
!("t2", "tmp2[k]", "exptab[2 * k]"));
204 out_
[k
].re
= tmp
[k
].re
+ t1
.re
+ t2
.re
;
205 out_
[k
].im
= tmp
[k
].im
+ t1
.im
+ t2
.im
;
207 mixin(CMUL
!("t1", "tmp1[k]", "exptab[k + 5]"));
208 mixin(CMUL
!("t2", "tmp2[k]", "exptab[2 * (k + 5)]"));
209 out_
[k
+ 5].re
= tmp
[k
].re
+ t1
.re
+ t2
.re
;
210 out_
[k
+ 5].im
= tmp
[k
].im
+ t1
.im
+ t2
.im
;
212 mixin(CMUL
!("t1", "tmp1[k]", "exptab[k + 10]"));
213 mixin(CMUL
!("t2", "tmp2[k]", "exptab[2 * k + 5]"));
214 out_
[k
+ 10].re
= tmp
[k
].re
+ t1
.re
+ t2
.re
;
215 out_
[k
+ 10].im
= tmp
[k
].im
+ t1
.im
+ t2
.im
;
220 * FFT of the length 15 * (2^N)
222 private void fft_calc (IMDCT15Context
* s
, FFTComplex
* out_
, const(FFTComplex
)* in_
, int N
, ptrdiff_t stride
) {
224 const(FFTComplex
)* exptab
= s
.exptab
[N
];
225 const int len2
= 15 * (1 << (N
- 1));
228 fft_calc(s
, out_
, in_
, N
- 1, stride
* 2);
229 fft_calc(s
, out_
+ len2
, in_
+ stride
, N
- 1, stride
* 2);
231 for (k
= 0; k
< len2
; k
++) {
234 mixin(CMUL
!("t", "out_[len2 + k]", "exptab[k]"));
236 out_
[len2
+ k
].re
= out_
[k
].re
- t
.re
;
237 out_
[len2
+ k
].im
= out_
[k
].im
- t
.im
;
243 fft15(s
, out_
, in_
, stride
);
247 private void imdct15_half (IMDCT15Context
* s
, float* dst
, const(float)* src
, ptrdiff_t stride
, float scale
) {
248 FFTComplex
*z
= cast(FFTComplex
*)dst
;
249 const int len8
= s
.len4
/ 2;
250 const(float)* in1
= src
;
251 const(float)* in2
= src
+ (s
.len2
- 1) * stride
;
254 for (i
= 0; i
< s
.len4
; i
++) {
255 FFTComplex tmp
= { *in2
, *in1
};
256 mixin(CMUL
!("s.tmp[i]", "tmp", "s.twiddle_exptab[i]"));
261 fft_calc(s
, z
, s
.tmp
, s
.fft_n
, 1);
263 for (i
= 0; i
< len8
; i
++) {
264 float r0
, i0
, r1
, i1
;
266 mixin(CMUL3
!("r0", "i1", "z[len8 - i - 1].im", "z[len8 - i - 1].re", "s.twiddle_exptab[len8 - i - 1].im", "s.twiddle_exptab[len8 - i - 1].re"));
267 mixin(CMUL3
!("r1", "i0", "z[len8 + i].im", "z[len8 + i].re", "s.twiddle_exptab[len8 + i].im", "s.twiddle_exptab[len8 + i].re"));
268 z
[len8
- i
- 1].re
= scale
* r0
;
269 z
[len8
- i
- 1].im
= scale
* i0
;
270 z
[len8
+ i
].re
= scale
* r1
;
271 z
[len8
+ i
].im
= scale
* i1
;