libtremor: tweak a hot function for codebook decoding, mostly moving pointer lookups...
[maemo-rb.git] / apps / codecs / libtremor / codebook.c
blobe75dddb29c665cb99be9a512a371ac1474c49c26
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: basic codebook pack/unpack/code/decode operations
16 ********************************************************************/
18 #include "config-tremor.h"
19 #include <string.h>
20 #include <math.h>
21 #include "ogg.h"
22 #include "ivorbiscodec.h"
23 #include "codebook.h"
24 #include "misc.h"
25 #include "os.h"
27 /* unpacks a codebook from the packet buffer into the codebook struct,
28 readies the codebook auxiliary structures for decode *************/
29 int vorbis_staticbook_unpack(oggpack_buffer *opb,static_codebook *s){
30 long i,j;
31 memset(s,0,sizeof(*s));
33 /* make sure alignment is correct */
34 if(oggpack_read(opb,24)!=0x564342)goto _eofout;
36 /* first the basic parameters */
37 s->dim=oggpack_read(opb,16);
38 s->entries=oggpack_read(opb,24);
39 if(s->entries==-1)goto _eofout;
41 /* codeword ordering.... length ordered or unordered? */
42 switch((int)oggpack_read(opb,1)){
43 case 0:
44 /* unordered */
45 s->lengthlist=(long *)_ogg_malloc(sizeof(*s->lengthlist)*s->entries);
47 /* allocated but unused entries? */
48 if(oggpack_read(opb,1)){
49 /* yes, unused entries */
51 for(i=0;i<s->entries;i++){
52 if(oggpack_read(opb,1)){
53 long num=oggpack_read(opb,5);
54 if(num==-1)goto _eofout;
55 s->lengthlist[i]=num+1;
56 }else
57 s->lengthlist[i]=0;
59 }else{
60 /* all entries used; no tagging */
61 for(i=0;i<s->entries;i++){
62 long num=oggpack_read(opb,5);
63 if(num==-1)goto _eofout;
64 s->lengthlist[i]=num+1;
68 break;
69 case 1:
70 /* ordered */
72 long length=oggpack_read(opb,5)+1;
73 s->lengthlist=(long *)_ogg_malloc(sizeof(*s->lengthlist)*s->entries);
75 for(i=0;i<s->entries;){
76 long num=oggpack_read(opb,_ilog(s->entries-i));
77 if(num==-1)goto _eofout;
78 for(j=0;j<num && i<s->entries;j++,i++)
79 s->lengthlist[i]=length;
80 length++;
83 break;
84 default:
85 /* EOF */
86 return(-1);
89 /* Do we have a mapping to unpack? */
90 switch((s->maptype=oggpack_read(opb,4))){
91 case 0:
92 /* no mapping */
93 break;
94 case 1: case 2:
95 /* implicitly populated value mapping */
96 /* explicitly populated value mapping */
98 s->q_min=oggpack_read(opb,32);
99 s->q_delta=oggpack_read(opb,32);
100 s->q_quant=oggpack_read(opb,4)+1;
101 s->q_sequencep=oggpack_read(opb,1);
104 int quantvals=0;
105 switch(s->maptype){
106 case 1:
107 quantvals=_book_maptype1_quantvals(s);
108 break;
109 case 2:
110 quantvals=s->entries*s->dim;
111 break;
114 /* quantized values */
115 s->quantlist=(long *)_ogg_malloc(sizeof(*s->quantlist)*quantvals);
116 for(i=0;i<quantvals;i++)
117 s->quantlist[i]=oggpack_read(opb,s->q_quant);
119 if(quantvals&&s->quantlist[quantvals-1]==-1)goto _eofout;
121 break;
122 default:
123 goto _errout;
126 /* all set */
127 return(0);
129 _errout:
130 _eofout:
131 vorbis_staticbook_clear(s);
132 return(-1);
135 /* the 'eliminate the decode tree' optimization actually requires the
136 codewords to be MSb first, not LSb. This is an annoying inelegancy
137 (and one of the first places where carefully thought out design
138 turned out to be wrong; Vorbis II and future Ogg codecs should go
139 to an MSb bitpacker), but not actually the huge hit it appears to
140 be. The first-stage decode table catches most words so that
141 bitreverse is not in the main execution path. */
143 static inline ogg_uint32_t bitreverse(register ogg_uint32_t x)
145 unsigned tmp, ret;
146 #ifdef _ARM_ASSEM_
147 #if ARM_ARCH >= 6
148 unsigned mask = 0x0f0f0f0f;
149 #else
150 unsigned mask = 0x00ff00ff;
151 #endif
152 asm (
153 #if ARM_ARCH >= 6
154 "rev %[r], %[x] \n" /* swap halfwords and bytes */
155 "and %[t], %[m], %[r] \n" /* Sequence is one instruction */
156 "eor %[r], %[t], %[r] \n" /* longer than on <= ARMv5, but */
157 "mov %[t], %[t], lsl #4 \n" /* interlock free */
158 "orr %[r], %[t], %[r], lsr #4\n" /* nibbles swapped */
159 "eor %[m], %[m], %[m], lsl #2\n" /* mask = 0x33333333 */
160 "and %[t], %[m], %[r] \n"
161 "eor %[r], %[t], %[r] \n"
162 "mov %[t], %[t], lsl #2 \n"
163 "orr %[r], %[t], %[r], lsr #2\n" /* dibits swapped */
164 "eor %[m], %[m], %[m], lsl #1\n" /* mask = 0x55555555 */
165 "and %[t], %[m], %[r] \n"
166 "eor %[r], %[t], %[r] \n"
167 "mov %[t], %[t], lsl #1 \n"
168 "orr %[r], %[t], %[r], lsr #1\n" /* bits swapped */
169 #else /* ARM_ARCH <= 5 */
170 "mov %[r], %[x], ror #16 \n" /* swap halfwords */
171 "and %[t], %[m], %[r], lsr #8\n"
172 "eor %[r], %[r], %[t], lsl #8\n"
173 "orr %[r], %[t], %[r], lsl #8\n" /* bytes swapped */
174 "eor %[m], %[m], %[m], lsl #4\n" /* mask = 0x0f0f0f0f */
175 "and %[t], %[m], %[r], lsr #4\n"
176 "eor %[r], %[r], %[t], lsl #4\n"
177 "orr %[r], %[t], %[r], lsl #4\n" /* nibbles swapped */
178 "eor %[m], %[m], %[m], lsl #2\n" /* mask = 0x33333333 */
179 "and %[t], %[m], %[r], lsr #2\n"
180 "eor %[r], %[r], %[t], lsl #2\n"
181 "orr %[r], %[t], %[r], lsl #2\n" /* dibits swapped */
182 "eor %[m], %[m], %[m], lsl #1\n" /* mask = 0x55555555 */
183 "and %[t], %[m], %[r], lsr #1\n"
184 "eor %[r], %[r], %[t], lsl #1\n"
185 "orr %[r], %[t], %[r], lsl #1\n" /* bits swapped */
186 #endif /* ARM_ARCH */
187 : /* outputs */
188 [m]"+r"(mask),
189 [r]"=r"(ret),
190 [t]"=r"(tmp)
191 : /* inputs */
192 [x]"r"(x)
194 #else /* !_ARM_ASSEM_ */
196 #ifdef CPU_COLDFIRE
197 ret = x;
198 asm ("swap %[r]" : [r] "+r" (ret)); /* swap halfwords */
199 #else
200 ret = (x>>16) | (x<<16);
201 #endif
202 tmp = ret & 0x00ff00ff;
203 ret ^= tmp;
204 ret = (ret >> 8) | (tmp << 8); /* bytes swapped */
205 tmp = ret & 0x0f0f0f0f;
206 ret ^= tmp;
207 ret = (ret >> 4) | (tmp << 4); /* 4-bit units swapped */
208 tmp = ret & 0x33333333;
209 ret ^= tmp;
210 ret = (ret >> 2) | (tmp << 2); /* 2-bit units swapped */
211 tmp = ret & 0x55555555;
212 ret ^= tmp;
213 ret = (ret >> 1) | (tmp << 1); /* done */
214 #endif /* !_ARM_ASSEM_ */
215 return ret;
218 static inline long bisect_codelist(long lo, long hi, ogg_uint32_t cache,
219 const ogg_uint32_t *codelist)
221 ogg_uint32_t testword=bitreverse(cache);
222 long p;
223 while(LIKELY(p = (hi-lo) >> 1) > 0){
224 if(codelist[lo+p] > testword)
225 hi -= p;
226 else
227 lo += p;
229 return lo;
232 STIN long decode_packed_entry_number(codebook *book,
233 oggpack_buffer *b){
234 int read=book->dec_maxlength;
235 long lo,hi;
236 long lok = oggpack_look(b,book->dec_firsttablen);
238 if (LIKELY(lok >= 0)) {
239 ogg_int32_t entry = book->dec_firsttable[lok];
240 if(UNLIKELY(entry < 0)){
241 lo=(entry>>15)&0x7fff;
242 hi=book->used_entries-(entry&0x7fff);
243 }else{
244 oggpack_adv(b, book->dec_codelengths[entry-1]);
245 return(entry-1);
247 }else{
248 lo=0;
249 hi=book->used_entries;
252 lok = oggpack_look(b, read);
254 while(lok<0 && read>1)
255 lok = oggpack_look(b, --read);
257 if(lok<0){
258 oggpack_adv(b,1); /* force eop */
259 return -1;
262 /* bisect search for the codeword in the ordered list */
264 lo = bisect_codelist(lo, hi, lok, book->codelist);
266 if(book->dec_codelengths[lo]<=read){
267 oggpack_adv(b, book->dec_codelengths[lo]);
268 return(lo);
272 oggpack_adv(b, read+1);
273 return(-1);
276 static long decode_packed_block(codebook *book, oggpack_buffer *b,
277 long *buf, int n){
278 long *bufptr = buf;
279 long *bufend = buf + n;
281 while (bufptr<bufend) {
282 if (b->headend > 8) {
283 ogg_uint32_t *ptr;
284 unsigned long bit, bitend;
285 unsigned long adr;
286 ogg_uint32_t cache = 0;
287 int cachesize = 0;
288 const unsigned int cachemask = (1<<book->dec_firsttablen)-1;
289 const int book_dec_maxlength = book->dec_maxlength;
290 const ogg_uint32_t *book_dec_firsttable = book->dec_firsttable;
291 const long book_used_entries = book->used_entries;
292 const ogg_uint32_t *book_codelist = book->codelist;
293 const char *book_dec_codelengths = book->dec_codelengths;
295 adr = (unsigned long)b->headptr;
296 bit = (adr&3)*8+b->headbit;
297 ptr = (ogg_uint32_t*)(adr&~3);
298 bitend = ((adr&3)+b->headend)*8;
299 while (bufptr<bufend){
300 if (UNLIKELY(cachesize<book_dec_maxlength)) {
301 if (bit-cachesize+32>=bitend)
302 break;
303 bit-=cachesize;
304 cache = letoh32(ptr[bit>>5]);
305 if (bit&31) {
306 cache >>= (bit&31);
307 cache |= letoh32(ptr[(bit>>5)+1]) << (32-(bit&31));
309 cachesize=32;
310 bit+=32;
313 ogg_int32_t entry = book_dec_firsttable[cache&cachemask];
314 if(UNLIKELY(entry < 0)){
315 const long lo = (entry>>15)&0x7fff, hi = book_used_entries-(entry&0x7fff);
316 entry = bisect_codelist(lo, hi, cache, book_codelist);
317 }else
318 entry--;
320 *bufptr++ = entry;
321 int l = book_dec_codelengths[entry];
322 cachesize -= l;
323 cache >>= l;
326 adr=(unsigned long)b->headptr;
327 bit-=(adr&3)*8+cachesize;
328 b->headend-=(bit/8);
329 b->headptr+=bit/8;
330 b->headbit=bit%8;
331 } else {
332 long r = decode_packed_entry_number(book, b);
333 if (r == -1) return bufptr-buf;
334 *bufptr++ = r;
337 return n;
341 /* Decode side is specced and easier, because we don't need to find
342 matches using different criteria; we simply read and map. There are
343 two things we need to do 'depending':
345 We may need to support interleave. We don't really, but it's
346 convenient to do it here rather than rebuild the vector later.
348 Cascades may be additive or multiplicitive; this is not inherent in
349 the codebook, but set in the code using the codebook. Like
350 interleaving, it's easiest to do it here.
351 addmul==0 -> declarative (set the value)
352 addmul==1 -> additive
353 addmul==2 -> multiplicitive */
355 /* returns the [original, not compacted] entry number or -1 on eof *********/
356 long vorbis_book_decode(codebook *book, oggpack_buffer *b){
357 if(book->used_entries>0){
358 long packed_entry=decode_packed_entry_number(book,b);
359 if(packed_entry>=0)
360 return(book->dec_index[packed_entry]);
363 /* if there's no dec_index, the codebook unpacking isn't collapsed */
364 return(-1);
367 /* returns 0 on OK or -1 on eof *************************************/
368 long vorbis_book_decodevs_add(codebook *book,ogg_int32_t *a,
369 oggpack_buffer *b,int n,int point){
370 if(book->used_entries>0){
371 int step=n/book->dim;
372 long *entry = (long *)alloca(sizeof(*entry)*step);
373 ogg_int32_t **t = (ogg_int32_t **)alloca(sizeof(*t)*step);
374 int i,j,o;
375 int shift=point-book->binarypoint;
377 if(shift>=0){
378 for (i = 0; i < step; i++) {
379 entry[i]=decode_packed_entry_number(book,b);
380 if(entry[i]==-1)return(-1);
381 t[i] = book->valuelist+entry[i]*book->dim;
383 for(i=0,o=0;i<book->dim;i++,o+=step)
384 for (j=0;j<step;j++)
385 a[o+j]+=t[j][i]>>shift;
386 }else{
387 for (i = 0; i < step; i++) {
388 entry[i]=decode_packed_entry_number(book,b);
389 if(entry[i]==-1)return(-1);
390 t[i] = book->valuelist+entry[i]*book->dim;
392 for(i=0,o=0;i<book->dim;i++,o+=step)
393 for (j=0;j<step;j++)
394 a[o+j]+=t[j][i]<<-shift;
397 return(0);
400 long vorbis_book_decodev_add(codebook *book,ogg_int32_t *a,
401 oggpack_buffer *b,int n,int point){
402 if(book->used_entries>0){
403 int i,j,entry;
404 ogg_int32_t *t;
405 int shift=point-book->binarypoint;
407 if(shift>=0){
408 for(i=0;i<n;){
409 entry = decode_packed_entry_number(book,b);
410 if(entry==-1)return(-1);
411 t = book->valuelist+entry*book->dim;
412 for (j=0;j<book->dim;)
413 a[i++]+=t[j++]>>shift;
415 }else{
416 shift = -shift;
417 for(i=0;i<n;){
418 entry = decode_packed_entry_number(book,b);
419 if(entry==-1)return(-1);
420 t = book->valuelist+entry*book->dim;
421 for (j=0;j<book->dim;)
422 a[i++]+=t[j++]<<shift;
426 return(0);
429 long vorbis_book_decodev_set(codebook *book,ogg_int32_t *a,
430 oggpack_buffer *b,int n,int point){
431 if(book->used_entries>0){
432 int i,j,entry;
433 ogg_int32_t *t;
434 int shift=point-book->binarypoint;
436 if(shift>=0){
438 for(i=0;i<n;){
439 entry = decode_packed_entry_number(book,b);
440 if(entry==-1)return(-1);
441 t = book->valuelist+entry*book->dim;
442 for (j=0;j<book->dim;){
443 a[i++]=t[j++]>>shift;
446 }else{
447 shift = -shift;
448 for(i=0;i<n;){
449 entry = decode_packed_entry_number(book,b);
450 if(entry==-1)return(-1);
451 t = book->valuelist+entry*book->dim;
452 for (j=0;j<book->dim;){
453 a[i++]=t[j++]<<shift;
457 }else{
459 int i,j;
460 for(i=0;i<n;){
461 for (j=0;j<book->dim;){
462 a[i++]=0;
466 return(0);
469 static long vorbis_book_decodevv_add_2ch_even(codebook *book,ogg_int32_t **a,
470 long offset,oggpack_buffer *b,
471 unsigned int n,int point){
472 long k,chunk,read;
473 int shift=point-book->binarypoint;
474 long entries[32];
475 ogg_int32_t *p0 = &(a[0][offset]);
476 ogg_int32_t *p1 = &(a[1][offset]);
477 const unsigned long dim = book->dim;
478 const ogg_int32_t * const vlist = book->valuelist;
480 if(shift>=0){
481 while(n>0){
482 chunk=32;
483 if (16*dim>n)
484 chunk=(n*2-1)/dim + 1;
485 read = decode_packed_block(book,b,entries,chunk);
486 for(k=0;k<read;k++){
487 const ogg_int32_t *t = vlist+entries[k]*dim;
488 const ogg_int32_t *u = t+dim;
490 *p0++ += *t++>>shift;
491 *p1++ += *t++>>shift;
492 }while(t<u);
494 if (read<chunk)return-1;
495 n -= read*dim/2;
497 }else{
498 shift = -shift;
499 while(n>0){
500 chunk=32;
501 if (16*dim>n)
502 chunk=(n*2-1)/dim + 1;
503 read = decode_packed_block(book,b,entries,chunk);
504 for(k=0;k<read;k++){
505 const ogg_int32_t *t = vlist+entries[k]*dim;
506 const ogg_int32_t *u = t+dim;
508 *p0++ += *t++<<shift;
509 *p1++ += *t++<<shift;
510 }while(t<u);
512 if (read<chunk)return-1;
513 n -= read*dim/2;
516 return(0);
519 long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a,
520 long offset,int ch,
521 oggpack_buffer *b,int n,int point){
522 if(LIKELY(book->used_entries>0)){
523 long i,j,k,chunk,read;
524 int chptr=0;
525 int shift=point-book->binarypoint;
526 long entries[32];
528 if (!(book->dim&1) && ch==2)
529 return vorbis_book_decodevv_add_2ch_even(book,a,offset,b,n,point);
531 if(shift>=0){
533 for(i=offset;i<offset+n;){
534 chunk=32;
535 if (chunk*book->dim>(offset+n-i)*ch)
536 chunk=((offset+n-i)*ch+book->dim-1)/book->dim;
537 read = decode_packed_block(book,b,entries,chunk);
538 for(k=0;k<read;k++){
539 const ogg_int32_t *t = book->valuelist+entries[k]*book->dim;
540 for (j=0;j<book->dim;j++){
541 a[chptr++][i]+=t[j]>>shift;
542 if(chptr==ch){
543 chptr=0;
544 i++;
548 if (read<chunk)return-1;
550 }else{
551 shift = -shift;
552 for(i=offset;i<offset+n;){
553 chunk=32;
554 if (chunk*book->dim>(offset+n-i)*ch)
555 chunk=((offset+n-i)*ch+book->dim-1)/book->dim;
556 read = decode_packed_block(book,b,entries,chunk);
557 for(k=0;k<read;k++){
558 const ogg_int32_t *t = book->valuelist+entries[k]*book->dim;
559 for (j=0;j<book->dim;j++){
560 a[chptr++][i]+=t[j]<<shift;
561 if(chptr==ch){
562 chptr=0;
563 i++;
567 if (read<chunk)return-1;
571 return(0);