Fix yellow.
[kugel-rb.git] / apps / codecs / libspeex / kiss_fft.c
blob5b699a362fde497aa1538e358aba1e92bdb8510f
1 /*
2 Copyright (c) 2003-2004, Mark Borgerding
3 Copyright (c) 2005-2007, Jean-Marc Valin
5 All rights reserved.
7 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
9 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
11 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
13 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17 #ifdef HAVE_CONFIG_H
18 #include "config-speex.h"
19 #endif
21 #include "_kiss_fft_guts.h"
22 #include "arch.h"
23 #include "os_support.h"
25 /* The guts header contains all the multiplication and addition macros that are defined for
26 fixed or floating point complex numbers. It also delares the kf_ internal functions.
29 static void kf_bfly2(
30 kiss_fft_cpx * Fout,
31 const size_t fstride,
32 const kiss_fft_cfg st,
33 int m,
34 int N,
35 int mm
38 kiss_fft_cpx * Fout2;
39 kiss_fft_cpx * tw1;
40 kiss_fft_cpx t;
41 if (!st->inverse) {
42 int i,j;
43 kiss_fft_cpx * Fout_beg = Fout;
44 for (i=0;i<N;i++)
46 Fout = Fout_beg + i*mm;
47 Fout2 = Fout + m;
48 tw1 = st->twiddles;
49 for(j=0;j<m;j++)
51 /* Almost the same as the code path below, except that we divide the input by two
52 (while keeping the best accuracy possible) */
53 spx_word32_t tr, ti;
54 tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1);
55 ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1);
56 tw1 += fstride;
57 Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
58 Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
59 Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
60 Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
61 ++Fout2;
62 ++Fout;
65 } else {
66 int i,j;
67 kiss_fft_cpx * Fout_beg = Fout;
68 for (i=0;i<N;i++)
70 Fout = Fout_beg + i*mm;
71 Fout2 = Fout + m;
72 tw1 = st->twiddles;
73 for(j=0;j<m;j++)
75 C_MUL (t, *Fout2 , *tw1);
76 tw1 += fstride;
77 C_SUB( *Fout2 , *Fout , t );
78 C_ADDTO( *Fout , t );
79 ++Fout2;
80 ++Fout;
86 static void kf_bfly4(
87 kiss_fft_cpx * Fout,
88 const size_t fstride,
89 const kiss_fft_cfg st,
90 int m,
91 int N,
92 int mm
95 kiss_fft_cpx *tw1,*tw2,*tw3;
96 kiss_fft_cpx scratch[6];
97 const size_t m2=2*m;
98 const size_t m3=3*m;
99 int i, j;
101 if (st->inverse)
103 kiss_fft_cpx * Fout_beg = Fout;
104 for (i=0;i<N;i++)
106 Fout = Fout_beg + i*mm;
107 tw3 = tw2 = tw1 = st->twiddles;
108 for (j=0;j<m;j++)
110 C_MUL(scratch[0],Fout[m] , *tw1 );
111 C_MUL(scratch[1],Fout[m2] , *tw2 );
112 C_MUL(scratch[2],Fout[m3] , *tw3 );
114 C_SUB( scratch[5] , *Fout, scratch[1] );
115 C_ADDTO(*Fout, scratch[1]);
116 C_ADD( scratch[3] , scratch[0] , scratch[2] );
117 C_SUB( scratch[4] , scratch[0] , scratch[2] );
118 C_SUB( Fout[m2], *Fout, scratch[3] );
119 tw1 += fstride;
120 tw2 += fstride*2;
121 tw3 += fstride*3;
122 C_ADDTO( *Fout , scratch[3] );
124 Fout[m].r = scratch[5].r - scratch[4].i;
125 Fout[m].i = scratch[5].i + scratch[4].r;
126 Fout[m3].r = scratch[5].r + scratch[4].i;
127 Fout[m3].i = scratch[5].i - scratch[4].r;
128 ++Fout;
131 } else
133 kiss_fft_cpx * Fout_beg = Fout;
134 for (i=0;i<N;i++)
136 Fout = Fout_beg + i*mm;
137 tw3 = tw2 = tw1 = st->twiddles;
138 for (j=0;j<m;j++)
140 C_MUL4(scratch[0],Fout[m] , *tw1 );
141 C_MUL4(scratch[1],Fout[m2] , *tw2 );
142 C_MUL4(scratch[2],Fout[m3] , *tw3 );
144 Fout->r = PSHR16(Fout->r, 2);
145 Fout->i = PSHR16(Fout->i, 2);
146 C_SUB( scratch[5] , *Fout, scratch[1] );
147 C_ADDTO(*Fout, scratch[1]);
148 C_ADD( scratch[3] , scratch[0] , scratch[2] );
149 C_SUB( scratch[4] , scratch[0] , scratch[2] );
150 Fout[m2].r = PSHR16(Fout[m2].r, 2);
151 Fout[m2].i = PSHR16(Fout[m2].i, 2);
152 C_SUB( Fout[m2], *Fout, scratch[3] );
153 tw1 += fstride;
154 tw2 += fstride*2;
155 tw3 += fstride*3;
156 C_ADDTO( *Fout , scratch[3] );
158 Fout[m].r = scratch[5].r + scratch[4].i;
159 Fout[m].i = scratch[5].i - scratch[4].r;
160 Fout[m3].r = scratch[5].r - scratch[4].i;
161 Fout[m3].i = scratch[5].i + scratch[4].r;
162 ++Fout;
168 static void kf_bfly3(
169 kiss_fft_cpx * Fout,
170 const size_t fstride,
171 const kiss_fft_cfg st,
172 size_t m
175 size_t k=m;
176 const size_t m2 = 2*m;
177 kiss_fft_cpx *tw1,*tw2;
178 kiss_fft_cpx scratch[5];
179 kiss_fft_cpx epi3;
180 epi3 = st->twiddles[fstride*m];
182 tw1=tw2=st->twiddles;
185 if (!st->inverse) {
186 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
189 C_MUL(scratch[1],Fout[m] , *tw1);
190 C_MUL(scratch[2],Fout[m2] , *tw2);
192 C_ADD(scratch[3],scratch[1],scratch[2]);
193 C_SUB(scratch[0],scratch[1],scratch[2]);
194 tw1 += fstride;
195 tw2 += fstride*2;
197 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
198 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
200 C_MULBYSCALAR( scratch[0] , epi3.i );
202 C_ADDTO(*Fout,scratch[3]);
204 Fout[m2].r = Fout[m].r + scratch[0].i;
205 Fout[m2].i = Fout[m].i - scratch[0].r;
207 Fout[m].r -= scratch[0].i;
208 Fout[m].i += scratch[0].r;
210 ++Fout;
211 }while(--k);
214 static void kf_bfly5(
215 kiss_fft_cpx * Fout,
216 const size_t fstride,
217 const kiss_fft_cfg st,
218 int m
221 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
222 int u;
223 kiss_fft_cpx scratch[13];
224 kiss_fft_cpx * twiddles = st->twiddles;
225 kiss_fft_cpx *tw;
226 kiss_fft_cpx ya,yb;
227 ya = twiddles[fstride*m];
228 yb = twiddles[fstride*2*m];
230 Fout0=Fout;
231 Fout1=Fout0+m;
232 Fout2=Fout0+2*m;
233 Fout3=Fout0+3*m;
234 Fout4=Fout0+4*m;
236 tw=st->twiddles;
237 for ( u=0; u<m; ++u ) {
238 if (!st->inverse) {
239 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
241 scratch[0] = *Fout0;
243 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
244 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
245 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
246 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
248 C_ADD( scratch[7],scratch[1],scratch[4]);
249 C_SUB( scratch[10],scratch[1],scratch[4]);
250 C_ADD( scratch[8],scratch[2],scratch[3]);
251 C_SUB( scratch[9],scratch[2],scratch[3]);
253 Fout0->r += scratch[7].r + scratch[8].r;
254 Fout0->i += scratch[7].i + scratch[8].i;
256 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
257 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
259 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
260 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
262 C_SUB(*Fout1,scratch[5],scratch[6]);
263 C_ADD(*Fout4,scratch[5],scratch[6]);
265 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
266 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
267 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
268 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
270 C_ADD(*Fout2,scratch[11],scratch[12]);
271 C_SUB(*Fout3,scratch[11],scratch[12]);
273 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
277 /* perform the butterfly for one stage of a mixed radix FFT */
278 static void kf_bfly_generic(
279 kiss_fft_cpx * Fout,
280 const size_t fstride,
281 const kiss_fft_cfg st,
282 int m,
283 int p
286 int u,k,q1,q;
287 kiss_fft_cpx * twiddles = st->twiddles;
288 kiss_fft_cpx t;
289 kiss_fft_cpx scratchbuf[17];
290 int Norig = st->nfft;
292 /*CHECKBUF(scratchbuf,nscratchbuf,p);*/
293 if (p>17)
294 speex_fatal("KissFFT: max radix supported is 17");
296 for ( u=0; u<m; ++u ) {
297 k=u;
298 for ( q1=0 ; q1<p ; ++q1 ) {
299 scratchbuf[q1] = Fout[ k ];
300 if (!st->inverse) {
301 C_FIXDIV(scratchbuf[q1],p);
303 k += m;
306 k=u;
307 for ( q1=0 ; q1<p ; ++q1 ) {
308 int twidx=0;
309 Fout[ k ] = scratchbuf[0];
310 for (q=1;q<p;++q ) {
311 twidx += fstride * k;
312 if (twidx>=Norig) twidx-=Norig;
313 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
314 C_ADDTO( Fout[ k ] ,t);
316 k += m;
321 static
322 void kf_shuffle(
323 kiss_fft_cpx * Fout,
324 const kiss_fft_cpx * f,
325 const size_t fstride,
326 int in_stride,
327 int * factors,
328 const kiss_fft_cfg st
331 const int p=*factors++; /* the radix */
332 const int m=*factors++; /* stage's fft length/p */
334 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
335 if (m==1)
337 int j;
338 for (j=0;j<p;j++)
340 Fout[j] = *f;
341 f += fstride*in_stride;
343 } else {
344 int j;
345 for (j=0;j<p;j++)
347 kf_shuffle( Fout , f, fstride*p, in_stride, factors,st);
348 f += fstride*in_stride;
349 Fout += m;
354 static
355 void kf_work(
356 kiss_fft_cpx * Fout,
357 const kiss_fft_cpx * f,
358 const size_t fstride,
359 int in_stride,
360 int * factors,
361 const kiss_fft_cfg st,
362 int N,
363 int s2,
364 int m2
367 int i;
368 kiss_fft_cpx * Fout_beg=Fout;
369 const int p=*factors++; /* the radix */
370 const int m=*factors++; /* stage's fft length/p */
371 #if 0
372 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
373 if (m==1)
375 /* int j;
376 for (j=0;j<p;j++)
378 Fout[j] = *f;
379 f += fstride*in_stride;
381 } else {
382 int j;
383 for (j=0;j<p;j++)
385 kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
386 f += fstride*in_stride;
387 Fout += m;
391 Fout=Fout_beg;
393 switch (p) {
394 case 2: kf_bfly2(Fout,fstride,st,m); break;
395 case 3: kf_bfly3(Fout,fstride,st,m); break;
396 case 4: kf_bfly4(Fout,fstride,st,m); break;
397 case 5: kf_bfly5(Fout,fstride,st,m); break;
398 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
400 #else
401 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
402 if (m==1)
404 /*for (i=0;i<N;i++)
406 int j;
407 Fout = Fout_beg+i*m2;
408 const kiss_fft_cpx * f2 = f+i*s2;
409 for (j=0;j<p;j++)
411 *Fout++ = *f2;
412 f2 += fstride*in_stride;
415 }else{
416 kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
422 switch (p) {
423 case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break;
424 case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly3(Fout,fstride,st,m);} break;
425 case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break;
426 case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly5(Fout,fstride,st,m);} break;
427 default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly_generic(Fout,fstride,st,m,p);} break;
429 #endif
432 /* facbuf is populated by p1,m1,p2,m2, ...
433 where
434 p[i] * m[i] = m[i-1]
435 m0 = n */
436 static
437 void kf_factor(int n,int * facbuf)
439 int p=4;
441 /*factor out powers of 4, powers of 2, then any remaining primes */
442 do {
443 while (n % p) {
444 switch (p) {
445 case 4: p = 2; break;
446 case 2: p = 3; break;
447 default: p += 2; break;
449 if (p>32000 || (spx_int32_t)p*(spx_int32_t)p > n)
450 p = n; /* no more factors, skip to end */
452 n /= p;
453 *facbuf++ = p;
454 *facbuf++ = n;
455 } while (n > 1);
459 * User-callable function to allocate all necessary storage space for the fft.
461 * The return value is a contiguous block of memory, allocated with malloc. As such,
462 * It can be freed with free(), rather than a kiss_fft-specific function.
463 * */
464 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
466 kiss_fft_cfg st=NULL;
467 size_t memneeded = sizeof(struct kiss_fft_state)
468 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
470 if ( lenmem==NULL ) {
471 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
472 }else{
473 if (mem != NULL && *lenmem >= memneeded)
474 st = (kiss_fft_cfg)mem;
475 *lenmem = memneeded;
477 if (st) {
478 int i;
479 st->nfft=nfft;
480 st->inverse = inverse_fft;
481 #ifdef FIXED_POINT
482 for (i=0;i<nfft;++i) {
483 spx_word32_t phase = i;
484 if (!st->inverse)
485 phase = -phase;
486 kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));
488 #else
489 for (i=0;i<nfft;++i) {
490 const double pi=3.14159265358979323846264338327;
491 double phase = ( -2*pi /nfft ) * i;
492 if (st->inverse)
493 phase *= -1;
494 kf_cexp(st->twiddles+i, phase );
496 #endif
497 kf_factor(nfft,st->factors);
499 return st;
505 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
507 if (fin == fout)
509 speex_fatal("In-place FFT not supported");
510 /*CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
511 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
512 SPEEX_MOVE(fout,tmpbuf,st->nfft);*/
513 } else {
514 kf_shuffle( fout, fin, 1,in_stride, st->factors,st);
515 kf_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1);
519 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
521 kiss_fft_stride(cfg,fin,fout,1);