Improved bitrev with approach suggested by Jens Arnold, gives 0.5%-1% speedup for...
[kugel-rb.git] / apps / codecs / libtremor / block.c
blobfe736c8defa779a3faa1551d7fe68f0b32235b03
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: PCM data vector blocking, windowing and dis/reassembly
16 ********************************************************************/
18 #include "config-tremor.h"
19 #include <stdio.h>
20 #include <string.h>
21 #include "ogg.h"
22 #include "ivorbiscodec.h"
23 #include "codec_internal.h"
25 #include "window.h"
26 #include "registry.h"
27 #include "misc.h"
29 static int ilog(unsigned int v){
30 int ret=0;
31 if(v)--v;
32 while(v){
33 ret++;
34 v>>=1;
36 return(ret);
39 static ogg_int32_t* _pcmp [CHANNELS] IBSS_ATTR;
40 static ogg_int32_t* _pcmbp[CHANNELS] IBSS_ATTR;
41 static ogg_int32_t* _pcmret[CHANNELS] IBSS_ATTR;
43 /* pcm accumulator examples (not exhaustive):
45 <-------------- lW ---------------->
46 <--------------- W ---------------->
47 : .....|..... _______________ |
48 : .''' | '''_--- | |\ |
49 :.....''' |_____--- '''......| | \_______|
50 :.................|__________________|_______|__|______|
51 |<------ Sl ------>| > Sr < |endW
52 |beginSl |endSl | |endSr
53 |beginW |endlW |beginSr
56 |< lW >|
57 <--------------- W ---------------->
58 | | .. ______________ |
59 | | ' `/ | ---_ |
60 |___.'___/`. | ---_____|
61 |_______|__|_______|_________________|
62 | >|Sl|< |<------ Sr ----->|endW
63 | | |endSl |beginSr |endSr
64 |beginW | |endlW
65 mult[0] |beginSl mult[n]
67 <-------------- lW ----------------->
68 |<--W-->|
69 : .............. ___ | |
70 : .''' |`/ \ | |
71 :.....''' |/`....\|...|
72 :.........................|___|___|___|
73 |Sl |Sr |endW
74 | | |endSr
75 | |beginSr
76 | |endSl
77 |beginSl
78 |beginW
81 /* block abstraction setup *********************************************/
83 #ifndef WORD_ALIGN
84 #define WORD_ALIGN 8
85 #endif
87 int vorbis_block_init(vorbis_dsp_state *v, vorbis_block *vb){
88 memset(vb,0,sizeof(*vb));
89 vb->vd=v;
90 vb->localalloc=0;
91 vb->localstore=NULL;
93 return(0);
96 void *_vorbis_block_alloc(vorbis_block *vb,long bytes){
97 bytes=(bytes+(WORD_ALIGN-1)) & ~(WORD_ALIGN-1);
98 if(bytes+vb->localtop>vb->localalloc){
99 /* can't just _ogg_realloc... there are outstanding pointers */
100 if(vb->localstore){
101 struct alloc_chain *link=(struct alloc_chain *)_ogg_malloc(sizeof(*link));
102 vb->totaluse+=vb->localtop;
103 link->next=vb->reap;
104 link->ptr=vb->localstore;
105 vb->reap=link;
107 /* highly conservative */
108 vb->localalloc=bytes;
109 vb->localstore=_ogg_malloc(vb->localalloc);
110 vb->localtop=0;
113 void *ret=(void *)(((char *)vb->localstore)+vb->localtop);
114 vb->localtop+=bytes;
115 return ret;
119 /* reap the chain, pull the ripcord */
120 void _vorbis_block_ripcord(vorbis_block *vb){
121 /* reap the chain */
122 struct alloc_chain *reap=vb->reap;
123 while(reap){
124 struct alloc_chain *next=reap->next;
125 _ogg_free(reap->ptr);
126 memset(reap,0,sizeof(*reap));
127 _ogg_free(reap);
128 reap=next;
130 /* consolidate storage */
131 if(vb->totaluse){
132 vb->localstore=_ogg_realloc(vb->localstore,vb->totaluse+vb->localalloc);
133 vb->localalloc+=vb->totaluse;
134 vb->totaluse=0;
137 /* pull the ripcord */
138 vb->localtop=0;
139 vb->reap=NULL;
142 int vorbis_block_clear(vorbis_block *vb){
143 _vorbis_block_ripcord(vb);
144 if(vb->localstore)_ogg_free(vb->localstore);
146 memset(vb,0,sizeof(*vb));
147 return(0);
150 static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
151 int i;
152 long b_size[2];
153 LOOKUP_TNC *iramposw;
155 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
156 private_state *b=NULL;
158 memset(v,0,sizeof(*v));
159 v->reset_pcmb=true;
160 b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b)));
162 v->vi=vi;
163 b->modebits=ilog(ci->modes);
165 /* allocate IRAM buffer for the PCM data generated by synthesis */
166 iram_malloc_init();
167 v->first_pcm=(ogg_int32_t *)iram_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t));
168 /* when can't allocate IRAM buffer, allocate normal RAM buffer */
169 if(v->first_pcm == NULL){
170 v->first_pcm=(ogg_int32_t *)_ogg_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t));
173 v->centerW=0;
175 /* Vorbis I uses only window type 0 */
176 b_size[0]=ci->blocksizes[0]/2;
177 b_size[1]=ci->blocksizes[1]/2;
178 b->window[0]=_vorbis_window(0,b_size[0]);
179 b->window[1]=_vorbis_window(0,b_size[1]);
181 /* allocate IRAM buffer for window tables too, if sufficient iram available */
182 /* give preference to the larger window over the smaller window
183 (on the assumption that both windows are equally likely used) */
184 for(i=1; i>=0; i--){
185 iramposw=(LOOKUP_TNC *)iram_malloc(b_size[i]*sizeof(LOOKUP_TNC));
186 if(iramposw!=NULL) {
187 memcpy(iramposw, b->window[i], b_size[i]*sizeof(LOOKUP_TNC));
188 b->window[i]=iramposw;
192 /* finish the codebooks */
193 if(!ci->fullbooks){
194 ci->fullbooks=(codebook *)_ogg_calloc(ci->books,sizeof(*ci->fullbooks));
195 for(i=0;i<ci->books;i++){
196 vorbis_book_init_decode(ci->fullbooks+i,ci->book_param[i]);
197 /* decode codebooks are now standalone after init */
198 vorbis_staticbook_destroy(ci->book_param[i]);
199 ci->book_param[i]=NULL;
203 /* if we can get away with it, put a double buffer into IRAM too, so that
204 overlap-add runs iram-to-iram and we avoid needing to memcpy */
205 v->pcm_storage=ci->blocksizes[1];
206 v->pcm=_pcmp;
207 v->pcmret=_pcmret;
208 v->pcmb=_pcmbp;
210 _pcmp[0]=NULL;
211 _pcmp[1]=NULL;
212 _pcmbp[0]=NULL;
213 _pcmbp[1]=NULL;
214 if(NULL != (v->iram_double_pcm = iram_malloc(vi->channels*v->pcm_storage*sizeof(ogg_int32_t))))
216 /* one-time initialisation at codec start or on switch from
217 blocksizes greater than IRAM_PCM_END to sizes that fit */
218 for(i=0;i<vi->channels;i++)
219 v->pcm[i]=&v->iram_double_pcm[i*v->pcm_storage];
221 else
223 /* one-time initialisation at codec start or on switch from
224 blocksizes that fit in IRAM_PCM_END to those that don't */
225 for(i=0;i<vi->channels;i++)
226 v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i]));
229 /* all 1 (large block) or 0 (small block) */
230 /* explicitly set for the sake of clarity */
231 v->lW=0; /* previous window size */
232 v->W=0; /* current window size */
234 /* initialize all the mapping/backend lookups */
235 b->mode=(vorbis_look_mapping **)_ogg_calloc(ci->modes,sizeof(*b->mode));
236 for(i=0;i<ci->modes;i++){
237 int mapnum=ci->mode_param[i]->mapping;
238 int maptype=ci->map_type[mapnum];
239 b->mode[i]=_mapping_P[maptype]->look(v,ci->mode_param[i],
240 ci->map_param[mapnum]);
242 return(0);
245 int vorbis_synthesis_restart(vorbis_dsp_state *v){
246 vorbis_info *vi=v->vi;
247 codec_setup_info *ci;
248 int i;
250 if(!v->backend_state)return -1;
251 if(!vi)return -1;
252 ci=vi->codec_setup;
253 if(!ci)return -1;
255 v->centerW=0;
256 v->pcm_current=0;
258 v->pcm_returned=-1;
259 v->granulepos=-1;
260 v->sequence=-1;
261 ((private_state *)(v->backend_state))->sample_count=-1;
263 /* indicate to synthesis code that buffer pointers no longer valid
264 (if we're using double pcm buffer) and will need to reset them */
265 v->reset_pcmb = true;
266 /* also reset our copy of the double buffer pointers if we have one */
267 if(v->iram_double_pcm)
268 for(i=0;i<vi->channels;i++)
269 v->pcm[i]=&v->iram_double_pcm[i*v->pcm_storage];
271 return(0);
274 int vorbis_synthesis_init(vorbis_dsp_state *v,vorbis_info *vi){
275 _vds_init(v,vi);
276 vorbis_synthesis_restart(v);
278 return(0);
281 void vorbis_dsp_clear(vorbis_dsp_state *v){
282 int i;
283 if(v){
284 vorbis_info *vi=v->vi;
285 codec_setup_info *ci=(codec_setup_info *)(vi?vi->codec_setup:NULL);
286 private_state *b=(private_state *)v->backend_state;
288 if(NULL == v->iram_double_pcm && vi != NULL)
290 /* pcm buffer came from oggmalloc rather than iram */
291 for(i=0;i<vi->channels;i++)
292 if(v->pcm[i])_ogg_free(v->pcm[i]);
295 /* free mode lookups; these are actually vorbis_look_mapping structs */
296 if(ci){
297 for(i=0;i<ci->modes;i++){
298 int mapnum=ci->mode_param[i]->mapping;
299 int maptype=ci->map_type[mapnum];
300 if(b && b->mode)_mapping_P[maptype]->free_look(b->mode[i]);
304 if(b){
305 if(b->mode)_ogg_free(b->mode);
306 _ogg_free(b);
309 memset(v,0,sizeof(*v));
313 /* Unlike in analysis, the window is only partially applied for each
314 block. The time domain envelope is not yet handled at the point of
315 calling (as it relies on the previous block). */
317 int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb)
318 ICODE_ATTR;
319 int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
320 vorbis_info *vi=v->vi;
321 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
322 private_state *b=v->backend_state;
323 int j;
324 bool iram_pcm_doublebuffer = (NULL != v->iram_double_pcm);
326 if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL);
328 v->lW=v->W;
329 v->W=vb->W;
330 v->nW=-1;
332 if((v->sequence==-1)||
333 (v->sequence+1 != vb->sequence)){
334 v->granulepos=-1; /* out of sequence; lose count */
335 b->sample_count=-1;
338 v->sequence=vb->sequence;
339 int n=ci->blocksizes[v->W]/2;
340 int ln=ci->blocksizes[v->lW]/2;
342 if(LIKELY(vb->pcm)){ /* no pcm to process if vorbis_synthesis_trackonly
343 was called on block */
344 int prevCenter;
345 int n0=ci->blocksizes[0]/2;
346 int n1=ci->blocksizes[1]/2;
348 if(iram_pcm_doublebuffer)
350 prevCenter = ln;
352 else
354 prevCenter = v->centerW;
355 v->centerW = n1 - v->centerW;
358 /* overlap/add PCM */
359 /* nb nothing to overlap with on first block so don't bother */
360 if(LIKELY(v->pcm_returned!=-1))
362 for(j=0;j<vi->channels;j++)
364 ogg_int32_t *pcm=v->pcm[j]+prevCenter;
365 ogg_int32_t *p=vb->pcm[j];
367 /* the overlap/add section */
368 if(v->lW == v->W)
370 /* large/large or small/small */
371 vect_add_right_left(pcm,p,n);
372 v->pcmb[j]=pcm;
374 else if (!v->W)
376 /* large/small */
377 vect_add_right_left(pcm + (n1-n0)/2, p, n0);
378 v->pcmb[j]=pcm;
380 else
382 /* small/large */
383 p += (n1-n0)/2;
384 vect_add_left_right(p,pcm,n0);
385 v->pcmb[j]=p;
390 /* the copy section */
391 if(iram_pcm_doublebuffer)
393 /* just flip the pointers over as we have a double buffer in iram */
394 ogg_int32_t *p;
395 p=v->pcm[0];
396 v->pcm[0]=vb->pcm[0];
397 vb->pcm[0] = p;
398 p=v->pcm[1];
399 v->pcm[1]=vb->pcm[1];
400 vb->pcm[1] = p;
402 else
404 for(j=0;j<vi->channels;j++)
406 /* at best only vb->pcm is in iram, and that's where we do the
407 synthesis, so we copy out the right-hand subframe of last
408 synthesis into (noniram) local buffer so we can still do
409 synth in iram */
410 vect_copy(v->pcm[j]+v->centerW, vb->pcm[j]+n, n);
414 /* deal with initial packet state; we do this using the explicit
415 pcm_returned==-1 flag otherwise we're sensitive to first block
416 being short or long */
418 if(v->pcm_returned==-1){
419 v->pcm_returned=0;
420 v->pcm_current=0;
421 }else{
422 v->pcm_returned=0;
423 v->pcm_current=(n+ln)/2;
428 /* track the frame number... This is for convenience, but also
429 making sure our last packet doesn't end with added padding. If
430 the last packet is partial, the number of samples we'll have to
431 return will be past the vb->granulepos.
433 This is not foolproof! It will be confused if we begin
434 decoding at the last page after a seek or hole. In that case,
435 we don't have a starting point to judge where the last frame
436 is. For this reason, vorbisfile will always try to make sure
437 it reads the last two marked pages in proper sequence */
439 if(b->sample_count==-1){
440 b->sample_count=0;
441 }else{
442 b->sample_count+=(n+ln)/2;
445 if(v->granulepos==-1){
446 if(vb->granulepos!=-1){ /* only set if we have a position to set to */
448 v->granulepos=vb->granulepos;
450 /* is this a short page? */
451 if(b->sample_count>v->granulepos){
452 /* corner case; if this is both the first and last audio page,
453 then spec says the end is cut, not beginning */
454 if(vb->eofflag){
455 /* trim the end */
456 /* no preceeding granulepos; assume we started at zero (we'd
457 have to in a short single-page stream) */
458 /* granulepos could be -1 due to a seek, but that would result
459 in a long coun`t, not short count */
461 v->pcm_current-=(b->sample_count-v->granulepos);
462 }else{
463 /* trim the beginning */
464 v->pcm_returned+=(b->sample_count-v->granulepos);
465 if(v->pcm_returned>v->pcm_current)
466 v->pcm_returned=v->pcm_current;
472 }else{
473 v->granulepos+=(n+ln)/2;
474 if(vb->granulepos!=-1 && v->granulepos!=vb->granulepos){
476 if(v->granulepos>vb->granulepos){
477 long extra=v->granulepos-vb->granulepos;
479 if(extra)
480 if(vb->eofflag){
481 /* partial last frame. Strip the extra samples off */
482 v->pcm_current-=extra;
483 } /* else {Shouldn't happen *unless* the bitstream is out of
484 spec. Either way, believe the bitstream } */
485 } /* else {Shouldn't happen *unless* the bitstream is out of
486 spec. Either way, believe the bitstream } */
487 v->granulepos=vb->granulepos;
491 /* Update, cleanup */
493 if(vb->eofflag)v->eofflag=1;
494 return(0);
497 /* pcm==NULL indicates we just want the pending samples, no more */
498 int vorbis_synthesis_pcmout(vorbis_dsp_state *v,ogg_int32_t ***pcm) ICODE_ATTR;
499 int vorbis_synthesis_pcmout(vorbis_dsp_state *v,ogg_int32_t ***pcm){
500 vorbis_info *vi=v->vi;
501 if(v->pcm_returned>-1 && v->pcm_returned<v->pcm_current){
502 if(pcm){
503 int i;
504 for(i=0;i<vi->channels;i++)
505 v->pcmret[i]=v->pcmb[i]+v->pcm_returned;
506 *pcm=v->pcmret;
508 return(v->pcm_current-v->pcm_returned);
510 return(0);
513 int vorbis_synthesis_read(vorbis_dsp_state *v,int bytes){
514 if(bytes && v->pcm_returned+bytes>v->pcm_current)return(OV_EINVAL);
515 v->pcm_returned+=bytes;
516 return(0);