Remove floor0 stuff from IRAM. Results in a tiny speed decrease for files encoded...
[kugel-rb.git] / apps / codecs / libtremor / floor0.c
blobcceb113a6b149520173361423dc0b36e980e8681
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: floor backend 0 implementation
16 ********************************************************************/
18 #include "config-tremor.h"
19 #include <string.h>
20 #include <math.h>
21 #include "ogg.h"
22 #include "ivorbiscodec.h"
23 #include "codec_internal.h"
24 #include "registry.h"
25 #include "codebook.h"
26 #include "misc.h"
27 #include "os.h"
29 #define LSP_FRACBITS 14
31 typedef struct {
32 long n;
33 int ln;
34 int m;
35 int *linearmap;
37 vorbis_info_floor0 *vi;
38 ogg_int32_t *lsp_look;
40 } vorbis_look_floor0;
42 /*************** LSP decode ********************/
44 #include "lsp_lookup.h"
46 /* interpolated 1./sqrt(p) where .5 <= a < 1. (.100000... to .111111...) in
47 16.16 format
48 returns in m.8 format */
50 static const long ADJUST_SQRT2[2] ={8192,5792};
51 static inline ogg_int32_t vorbis_invsqlook_i(long a,long e){
52 long i=(a&0x7fff)>>(INVSQ_LOOKUP_I_SHIFT-1);
53 long d=a&INVSQ_LOOKUP_I_MASK; /* 0.10 */
54 long val=INVSQ_LOOKUP_I[i]- /* 1.16 */
55 ((INVSQ_LOOKUP_IDel[i]*d)>>INVSQ_LOOKUP_I_SHIFT); /* result 1.16 */
56 val*=ADJUST_SQRT2[e&1];
57 e=(e>>1)+21;
58 return(val>>e);
61 /* interpolated lookup based fromdB function, domain -140dB to 0dB only */
62 /* a is in n.12 format */
63 static inline ogg_int32_t vorbis_fromdBlook_i(long a){
64 int i=(-a)>>(12-FROMdB2_SHIFT);
65 if(i<0) return 0x7fffffff;
66 if(i>=(FROMdB_LOOKUP_SZ<<FROMdB_SHIFT))return 0;
68 return FROMdB_LOOKUP[i>>FROMdB_SHIFT] * FROMdB2_LOOKUP[i&FROMdB2_MASK];
71 /* interpolated lookup based cos function, domain 0 to PI only */
72 /* a is in 0.16 format, where 0==0, 2^^16-1==PI, return 0.14 */
73 static inline ogg_int32_t vorbis_coslook_i(long a){
74 int i=a>>COS_LOOKUP_I_SHIFT;
75 int d=a&COS_LOOKUP_I_MASK;
76 return COS_LOOKUP_I[i]- ((d*(COS_LOOKUP_I[i]-COS_LOOKUP_I[i+1]))>>
77 COS_LOOKUP_I_SHIFT);
80 /* interpolated lookup based cos function */
81 /* a is in 0.16 format, where 0==0, 2^^16==PI, return .LSP_FRACBITS */
82 static inline ogg_int32_t vorbis_coslook2_i(long a){
83 a=a&0x1ffff;
85 if(a>0x10000)a=0x20000-a;
87 int i=a>>COS_LOOKUP_I_SHIFT;
88 int d=a&COS_LOOKUP_I_MASK;
89 a=((COS_LOOKUP_I[i]<<COS_LOOKUP_I_SHIFT)-
90 d*(COS_LOOKUP_I[i]-COS_LOOKUP_I[i+1]))>>
91 (COS_LOOKUP_I_SHIFT-LSP_FRACBITS+14);
94 return(a);
97 static const int barklook[28] ={
98 0,100,200,301, 405,516,635,766,
99 912,1077,1263,1476, 1720,2003,2333,2721,
100 3184,3742,4428,5285, 6376,7791,9662,12181,
101 15624,20397,27087,36554
104 /* used in init only; interpolate the long way */
105 static inline ogg_int32_t toBARK(int n){
106 int i;
107 for(i=0;i<27;i++)
108 if(n>=barklook[i] && n<barklook[i+1])break;
110 if(i==27){
111 return 27<<15;
112 }else{
113 int gap=barklook[i+1]-barklook[i];
114 int del=n-barklook[i];
116 return((i<<15)+((del<<15)/gap));
120 static const unsigned char MLOOP_1[64] ={
121 0,10,11,11, 12,12,12,12, 13,13,13,13, 13,13,13,13,
122 14,14,14,14, 14,14,14,14, 14,14,14,14, 14,14,14,14,
123 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
124 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
127 static const unsigned char MLOOP_2[64] ={
128 0,4,5,5, 6,6,6,6, 7,7,7,7, 7,7,7,7,
129 8,8,8,8, 8,8,8,8, 8,8,8,8, 8,8,8,8,
130 9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9,
131 9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9,
134 static const unsigned char MLOOP_3[8] ={0,1,2,2,3,3,3,3};
136 static void vorbis_lsp_to_curve(ogg_int32_t *curve,int *map,int n,int ln,
137 ogg_int32_t *lsp,int m,
138 ogg_int32_t amp,
139 ogg_int32_t ampoffset,
140 ogg_int32_t *icos){
142 (void)ln;
143 /* 0 <= m < 256 */
145 /* set up for using all int later */
146 int i;
147 int ampoffseti=ampoffset*4096;
148 int ampi=amp;
149 ogg_int32_t *ilsp=(ogg_int32_t *)alloca(m*sizeof(*ilsp));
150 /* lsp is in 8.24, range 0 to PI; coslook wants it in .16 0 to 1*/
151 for(i=0;i<m;i++){
152 #ifndef _LOW_ACCURACY_
153 ogg_int32_t val=MULT32(lsp[i],0x517cc2);
154 #else
155 ogg_int32_t val=((lsp[i]>>10)*0x517d)>>14;
156 #endif
158 /* safeguard against a malicious stream */
159 if(val<0 || (val>>COS_LOOKUP_I_SHIFT)>=COS_LOOKUP_I_SZ){
160 memset(curve,0,sizeof(*curve)*n);
161 return;
164 ilsp[i]=vorbis_coslook_i(val);
167 i=0;
168 while(i<n){
169 int j,k=map[i];
170 ogg_uint32_t pi=46341; /* 2**-.5 in 0.16 */
171 ogg_uint32_t qi=46341;
172 ogg_int32_t qexp=0,shift;
173 ogg_int32_t wi=icos[k];
175 #ifdef _V_LSP_MATH_ASM
176 (void)shift; /* kill warnings */
177 (void)j;
178 lsp_loop_asm(&qi,&pi,&qexp,ilsp,wi,m);
180 pi=((pi*pi)>>16);
181 qi=((qi*qi)>>16);
183 if(m&1){
184 qexp= qexp*2-28*((m+1)>>1)+m;
185 pi*=(1<<14)-((wi*wi)>>14);
186 qi+=pi>>14;
187 }else{
188 qexp= qexp*2-13*m;
190 pi*=(1<<14)-wi;
191 qi*=(1<<14)+wi;
193 qi=(qi+pi)>>14;
196 if(qi&0xffff0000){ /* checks for 1.xxxxxxxxxxxxxxxx */
197 qi>>=1; qexp++;
198 }else
199 lsp_norm_asm(&qi,&qexp);
201 #else
203 qi*=labs(ilsp[0]-wi);
204 pi*=labs(ilsp[1]-wi);
206 for(j=3;j<m;j+=2){
207 if(!(shift=MLOOP_1[(pi|qi)>>25]))
208 if(!(shift=MLOOP_2[(pi|qi)>>19]))
209 shift=MLOOP_3[(pi|qi)>>16];
210 qi=(qi>>shift)*labs(ilsp[j-1]-wi);
211 pi=(pi>>shift)*labs(ilsp[j]-wi);
212 qexp+=shift;
214 if(!(shift=MLOOP_1[(pi|qi)>>25]))
215 if(!(shift=MLOOP_2[(pi|qi)>>19]))
216 shift=MLOOP_3[(pi|qi)>>16];
218 /* pi,qi normalized collectively, both tracked using qexp */
220 if(m&1){
221 /* odd order filter; slightly assymetric */
222 /* the last coefficient */
223 qi=(qi>>shift)*labs(ilsp[j-1]-wi);
224 pi=(pi>>shift)<<14;
225 qexp+=shift;
227 if(!(shift=MLOOP_1[(pi|qi)>>25]))
228 if(!(shift=MLOOP_2[(pi|qi)>>19]))
229 shift=MLOOP_3[(pi|qi)>>16];
231 pi>>=shift;
232 qi>>=shift;
233 qexp+=shift-14*((m+1)>>1);
235 pi=((pi*pi)>>16);
236 qi=((qi*qi)>>16);
237 qexp=qexp*2+m;
239 pi*=(1<<14)-((wi*wi)>>14);
240 qi+=pi>>14;
242 }else{
243 /* even order filter; still symmetric */
245 /* p*=p(1-w), q*=q(1+w), let normalization drift because it isn't
246 worth tracking step by step */
248 pi>>=shift;
249 qi>>=shift;
250 qexp+=shift-7*m;
252 pi=((pi*pi)>>16);
253 qi=((qi*qi)>>16);
254 qexp=qexp*2+m;
256 pi*=(1<<14)-wi;
257 qi*=(1<<14)+wi;
258 qi=(qi+pi)>>14;
263 /* we've let the normalization drift because it wasn't important;
264 however, for the lookup, things must be normalized again. We
265 need at most one right shift or a number of left shifts */
267 if(qi&0xffff0000){ /* checks for 1.xxxxxxxxxxxxxxxx */
268 qi>>=1; qexp++;
269 }else
270 while(qi && !(qi&0x8000)){ /* checks for 0.0xxxxxxxxxxxxxxx or less*/
271 qi<<=1; qexp--;
274 #endif
276 amp=vorbis_fromdBlook_i(ampi* /* n.4 */
277 vorbis_invsqlook_i(qi,qexp)-
278 /* m.8, m+n<=8 */
279 ampoffseti); /* 8.12[0] */
281 #ifdef _LOW_ACCURACY_
282 amp>>=9;
283 #endif
284 curve[i]= MULT31_SHIFT15(curve[i],amp);
285 while(map[++i]==k) curve[i]= MULT31_SHIFT15(curve[i],amp);
289 /*************** vorbis decode glue ************/
291 static void floor0_free_info(vorbis_info_floor *i){
292 vorbis_info_floor0 *info=(vorbis_info_floor0 *)i;
293 if(info){
294 memset(info,0,sizeof(*info));
295 _ogg_free(info);
299 static void floor0_free_look(vorbis_look_floor *i){
300 vorbis_look_floor0 *look=(vorbis_look_floor0 *)i;
301 if(look){
303 if(look->linearmap)_ogg_free(look->linearmap);
304 if(look->lsp_look)_ogg_free(look->lsp_look);
305 memset(look,0,sizeof(*look));
306 _ogg_free(look);
310 static vorbis_info_floor *floor0_unpack (vorbis_info *vi,oggpack_buffer *opb){
311 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
312 int j;
314 vorbis_info_floor0 *info=(vorbis_info_floor0 *)_ogg_malloc(sizeof(*info));
315 info->order=oggpack_read(opb,8);
316 info->rate=oggpack_read(opb,16);
317 info->barkmap=oggpack_read(opb,16);
318 info->ampbits=oggpack_read(opb,6);
319 info->ampdB=oggpack_read(opb,8);
320 info->numbooks=oggpack_read(opb,4)+1;
322 if(info->order<1)goto err_out;
323 if(info->rate<1)goto err_out;
324 if(info->barkmap<1)goto err_out;
325 if(info->numbooks<1)goto err_out;
327 for(j=0;j<info->numbooks;j++){
328 info->books[j]=oggpack_read(opb,8);
329 if(info->books[j]<0 || info->books[j]>=ci->books)goto err_out;
331 return(info);
333 err_out:
334 floor0_free_info(info);
335 return(NULL);
338 /* initialize Bark scale and normalization lookups. We could do this
339 with static tables, but Vorbis allows a number of possible
340 combinations, so it's best to do it computationally.
342 The below is authoritative in terms of defining scale mapping.
343 Note that the scale depends on the sampling rate as well as the
344 linear block and mapping sizes */
346 static vorbis_look_floor *floor0_look (vorbis_dsp_state *vd,vorbis_info_mode *mi,
347 vorbis_info_floor *i){
348 int j;
349 // ogg_int32_t scale;
350 vorbis_info *vi=vd->vi;
351 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
352 vorbis_info_floor0 *info=(vorbis_info_floor0 *)i;
353 vorbis_look_floor0 *look=(vorbis_look_floor0 *)_ogg_calloc(1,sizeof(*look));
354 look->m=info->order;
355 look->n=ci->blocksizes[mi->blockflag]/2;
356 look->ln=info->barkmap;
357 look->vi=info;
359 /* the mapping from a linear scale to a smaller bark scale is
360 straightforward. We do *not* make sure that the linear mapping
361 does not skip bark-scale bins; the decoder simply skips them and
362 the encoder may do what it wishes in filling them. They're
363 necessary in some mapping combinations to keep the scale spacing
364 accurate */
365 look->linearmap=(int *)_ogg_malloc((look->n+1)*sizeof(*look->linearmap));
366 for(j=0;j<look->n;j++){
368 int val=(look->ln*
369 ((toBARK(info->rate/2*j/look->n)<<11)/toBARK(info->rate/2)))>>11;
371 if(val>=look->ln)val=look->ln-1; /* guard against the approximation */
372 look->linearmap[j]=val;
374 look->linearmap[j]=-1;
376 look->lsp_look=(ogg_int32_t *)_ogg_malloc(look->ln*sizeof(*look->lsp_look));
377 for(j=0;j<look->ln;j++)
378 look->lsp_look[j]=vorbis_coslook2_i(0x10000*j/look->ln);
380 return look;
383 static void *floor0_inverse1(vorbis_block *vb,vorbis_look_floor *i){
384 vorbis_look_floor0 *look=(vorbis_look_floor0 *)i;
385 vorbis_info_floor0 *info=look->vi;
386 int j,k;
388 int ampraw=oggpack_read(&vb->opb,info->ampbits);
389 if(ampraw>0){ /* also handles the -1 out of data case */
390 long maxval=(1<<info->ampbits)-1;
391 int amp=((ampraw*info->ampdB)<<4)/maxval;
392 int booknum=oggpack_read(&vb->opb,_ilog(info->numbooks));
394 if(booknum!=-1 && booknum<info->numbooks){ /* be paranoid */
395 codec_setup_info *ci=(codec_setup_info *)vb->vd->vi->codec_setup;
396 codebook *b=ci->fullbooks+info->books[booknum];
397 ogg_int32_t last=0;
398 ogg_int32_t *lsp=(ogg_int32_t *)_vorbis_block_alloc(vb,sizeof(*lsp)*(look->m+1));
400 for(j=0;j<look->m;j+=b->dim)
401 if(vorbis_book_decodev_set(b,lsp+j,&vb->opb,b->dim,-24)==-1)goto eop;
402 for(j=0;j<look->m;){
403 for(k=0;k<b->dim;k++,j++)lsp[j]+=last;
404 last=lsp[j-1];
407 lsp[look->m]=amp;
408 return(lsp);
411 eop:
412 return(NULL);
415 static int floor0_inverse2(vorbis_block *vb,vorbis_look_floor *i,
416 void *memo,ogg_int32_t *out){
417 vorbis_look_floor0 *look=(vorbis_look_floor0 *)i;
418 vorbis_info_floor0 *info=look->vi;
419 (void)vb;
421 if(memo){
422 ogg_int32_t *lsp=(ogg_int32_t *)memo;
423 ogg_int32_t amp=lsp[look->m];
425 /* take the coefficients back to a spectral envelope curve */
426 vorbis_lsp_to_curve(out,look->linearmap,look->n,look->ln,
427 lsp,look->m,amp,info->ampdB,look->lsp_look);
428 return(1);
430 memset(out,0,sizeof(*out)*look->n);
431 return(0);
434 /* export hooks */
435 const vorbis_func_floor floor0_exportbundle ICONST_ATTR ={
436 &floor0_unpack,&floor0_look,&floor0_free_info,
437 &floor0_free_look,&floor0_inverse1,&floor0_inverse2