Simplify special case function, speedup of about 0.2MHz on both coldfire and pp decod...
[kugel-rb.git] / apps / codecs / libtremor / codebook.c
blobc938f2f73d1b8d9b47e0dbdee3300e033b9a473b
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: basic codebook pack/unpack/code/decode operations
16 ********************************************************************/
18 #include "config-tremor.h"
19 #include <string.h>
20 #include <math.h>
21 #include "ogg.h"
22 #include "ivorbiscodec.h"
23 #include "codebook.h"
24 #include "misc.h"
25 #include "os.h"
27 /* unpacks a codebook from the packet buffer into the codebook struct,
28 readies the codebook auxiliary structures for decode *************/
29 int vorbis_staticbook_unpack(oggpack_buffer *opb,static_codebook *s){
30 long i,j;
31 memset(s,0,sizeof(*s));
33 /* make sure alignment is correct */
34 if(oggpack_read(opb,24)!=0x564342)goto _eofout;
36 /* first the basic parameters */
37 s->dim=oggpack_read(opb,16);
38 s->entries=oggpack_read(opb,24);
39 if(s->entries==-1)goto _eofout;
41 /* codeword ordering.... length ordered or unordered? */
42 switch((int)oggpack_read(opb,1)){
43 case 0:
44 /* unordered */
45 s->lengthlist=(long *)_ogg_malloc(sizeof(*s->lengthlist)*s->entries);
47 /* allocated but unused entries? */
48 if(oggpack_read(opb,1)){
49 /* yes, unused entries */
51 for(i=0;i<s->entries;i++){
52 if(oggpack_read(opb,1)){
53 long num=oggpack_read(opb,5);
54 if(num==-1)goto _eofout;
55 s->lengthlist[i]=num+1;
56 }else
57 s->lengthlist[i]=0;
59 }else{
60 /* all entries used; no tagging */
61 for(i=0;i<s->entries;i++){
62 long num=oggpack_read(opb,5);
63 if(num==-1)goto _eofout;
64 s->lengthlist[i]=num+1;
68 break;
69 case 1:
70 /* ordered */
72 long length=oggpack_read(opb,5)+1;
73 s->lengthlist=(long *)_ogg_malloc(sizeof(*s->lengthlist)*s->entries);
75 for(i=0;i<s->entries;){
76 long num=oggpack_read(opb,_ilog(s->entries-i));
77 if(num==-1)goto _eofout;
78 for(j=0;j<num && i<s->entries;j++,i++)
79 s->lengthlist[i]=length;
80 length++;
83 break;
84 default:
85 /* EOF */
86 return(-1);
89 /* Do we have a mapping to unpack? */
90 switch((s->maptype=oggpack_read(opb,4))){
91 case 0:
92 /* no mapping */
93 break;
94 case 1: case 2:
95 /* implicitly populated value mapping */
96 /* explicitly populated value mapping */
98 s->q_min=oggpack_read(opb,32);
99 s->q_delta=oggpack_read(opb,32);
100 s->q_quant=oggpack_read(opb,4)+1;
101 s->q_sequencep=oggpack_read(opb,1);
104 int quantvals=0;
105 switch(s->maptype){
106 case 1:
107 quantvals=_book_maptype1_quantvals(s);
108 break;
109 case 2:
110 quantvals=s->entries*s->dim;
111 break;
114 /* quantized values */
115 s->quantlist=(long *)_ogg_malloc(sizeof(*s->quantlist)*quantvals);
116 for(i=0;i<quantvals;i++)
117 s->quantlist[i]=oggpack_read(opb,s->q_quant);
119 if(quantvals&&s->quantlist[quantvals-1]==-1)goto _eofout;
121 break;
122 default:
123 goto _errout;
126 /* all set */
127 return(0);
129 _errout:
130 _eofout:
131 vorbis_staticbook_clear(s);
132 return(-1);
135 /* the 'eliminate the decode tree' optimization actually requires the
136 codewords to be MSb first, not LSb. This is an annoying inelegancy
137 (and one of the first places where carefully thought out design
138 turned out to be wrong; Vorbis II and future Ogg codecs should go
139 to an MSb bitpacker), but not actually the huge hit it appears to
140 be. The first-stage decode table catches most words so that
141 bitreverse is not in the main execution path. */
143 static inline ogg_uint32_t bitreverse(register ogg_uint32_t x)
145 unsigned tmp, ret;
146 #ifdef _ARM_ASSEM_
147 #if ARM_ARCH >= 6
148 unsigned mask = 0x0f0f0f0f;
149 #else
150 unsigned mask = 0x00ff00ff;
151 #endif
152 asm (
153 #if ARM_ARCH >= 6
154 "rev %[r], %[x] \n" /* swap halfwords and bytes */
155 "and %[t], %[m], %[r] \n" /* Sequence is one instruction */
156 "eor %[r], %[t], %[r] \n" /* longer than on <= ARMv5, but */
157 "mov %[t], %[t], lsl #4 \n" /* interlock free */
158 "orr %[r], %[t], %[r], lsr #4\n" /* nibbles swapped */
159 "eor %[m], %[m], %[m], lsl #2\n" /* mask = 0x33333333 */
160 "and %[t], %[m], %[r] \n"
161 "eor %[r], %[t], %[r] \n"
162 "mov %[t], %[t], lsl #2 \n"
163 "orr %[r], %[t], %[r], lsr #2\n" /* dibits swapped */
164 "eor %[m], %[m], %[m], lsl #1\n" /* mask = 0x55555555 */
165 "and %[t], %[m], %[r] \n"
166 "eor %[r], %[t], %[r] \n"
167 "mov %[t], %[t], lsl #1 \n"
168 "orr %[r], %[t], %[r], lsr #1\n" /* bits swapped */
169 #else /* ARM_ARCH <= 5 */
170 "mov %[r], %[x], ror #16 \n" /* swap halfwords */
171 "and %[t], %[m], %[r], lsr #8\n"
172 "eor %[r], %[r], %[t], lsl #8\n"
173 "orr %[r], %[t], %[r], lsl #8\n" /* bytes swapped */
174 "eor %[m], %[m], %[m], lsl #4\n" /* mask = 0x0f0f0f0f */
175 "and %[t], %[m], %[r], lsr #4\n"
176 "eor %[r], %[r], %[t], lsl #4\n"
177 "orr %[r], %[t], %[r], lsl #4\n" /* nibbles swapped */
178 "eor %[m], %[m], %[m], lsl #2\n" /* mask = 0x33333333 */
179 "and %[t], %[m], %[r], lsr #2\n"
180 "eor %[r], %[r], %[t], lsl #2\n"
181 "orr %[r], %[t], %[r], lsl #2\n" /* dibits swapped */
182 "eor %[m], %[m], %[m], lsl #1\n" /* mask = 0x55555555 */
183 "and %[t], %[m], %[r], lsr #1\n"
184 "eor %[r], %[r], %[t], lsl #1\n"
185 "orr %[r], %[t], %[r], lsl #1\n" /* bits swapped */
186 #endif /* ARM_ARCH */
187 : /* outputs */
188 [m]"+r"(mask),
189 [r]"=r"(ret),
190 [t]"=r"(tmp)
191 : /* inputs */
192 [x]"r"(x)
194 #else /* !_ARM_ASSEM_ */
196 #ifdef CPU_COLDFIRE
197 ret = x;
198 asm ("swap %[r]" : [r] "+r" (ret)); /* swap halfwords */
199 #else
200 ret = (x>>16) | (x<<16);
201 #endif
202 tmp = ret & 0x00ff00ff;
203 ret ^= tmp;
204 ret = (ret >> 8) | (tmp << 8); /* bytes swapped */
205 tmp = ret & 0x0f0f0f0f;
206 ret ^= tmp;
207 ret = (ret >> 4) | (tmp << 4); /* 4-bit units swapped */
208 tmp = ret & 0x33333333;
209 ret ^= tmp;
210 ret = (ret >> 2) | (tmp << 2); /* 2-bit units swapped */
211 tmp = ret & 0x55555555;
212 ret ^= tmp;
213 ret = (ret >> 1) | (tmp << 1); /* done */
214 #endif /* !_ARM_ASSEM_ */
215 return ret;
218 STIN long decode_packed_entry_number(codebook *book,
219 oggpack_buffer *b){
220 int read=book->dec_maxlength;
221 long lo,hi;
222 long lok = oggpack_look(b,book->dec_firsttablen);
224 if (LIKELY(lok >= 0)) {
225 long entry = book->dec_firsttable[lok];
226 if(UNLIKELY(entry&0x80000000UL)){
227 lo=(entry>>15)&0x7fff;
228 hi=book->used_entries-(entry&0x7fff);
229 }else{
230 oggpack_adv(b, book->dec_codelengths[entry-1]);
231 return(entry-1);
233 }else{
234 lo=0;
235 hi=book->used_entries;
238 lok = oggpack_look(b, read);
240 while(lok<0 && read>1)
241 lok = oggpack_look(b, --read);
243 if(lok<0){
244 oggpack_adv(b,1); /* force eop */
245 return -1;
248 /* bisect search for the codeword in the ordered list */
250 ogg_uint32_t testword=bitreverse((ogg_uint32_t)lok);
252 while(hi-lo>1){
253 long p=(hi-lo)>>1;
254 long test=book->codelist[lo+p]>testword;
255 lo+=p&(test-1);
256 hi-=p&(-test);
259 if(book->dec_codelengths[lo]<=read){
260 oggpack_adv(b, book->dec_codelengths[lo]);
261 return(lo);
265 oggpack_adv(b, read+1);
266 return(-1);
269 static long decode_packed_block(codebook *book, oggpack_buffer *b,
270 long *buf, int n){
271 long *bufptr = buf;
272 long *bufend = buf + n;
274 while (bufptr<bufend) {
275 if (b->headend > 8) {
276 ogg_uint32_t *ptr;
277 unsigned long bit, bitend;
278 unsigned long adr;
279 ogg_uint32_t cache = 0;
280 int cachesize = 0;
282 adr = (unsigned long)b->headptr;
283 bit = (adr&3)*8+b->headbit;
284 ptr = (ogg_uint32_t *)(adr&~3);
285 bitend = ((adr&3)+b->headend)*8;
286 while (bufptr<bufend){
287 long entry, lo, hi;
288 if (UNLIKELY(cachesize<book->dec_maxlength)) {
289 if (bit-cachesize+32>=bitend)
290 break;
291 bit-=cachesize;
292 cache=letoh32(ptr[bit>>5]) >> (bit&31);
293 if (bit&31)
294 cache|=letoh32(ptr[(bit>>5)+1]) << (32-(bit&31));
295 cachesize=32;
296 bit+=32;
299 entry=book->dec_firsttable[cache&((1<<book->dec_firsttablen)-1)];
300 if(UNLIKELY(entry&0x80000000UL)){
301 lo=(entry>>15)&0x7fff;
302 hi=book->used_entries-(entry&0x7fff);
303 ogg_uint32_t testword=bitreverse((ogg_uint32_t)cache);
305 while(LIKELY(hi-lo>1)){
306 long p=(hi-lo)>>1;
307 if (book->codelist[lo+p]>testword)
308 hi-=p;
309 else
310 lo+=p;
312 entry=lo;
313 }else
314 entry--;
316 *bufptr++=entry;
317 int l=book->dec_codelengths[entry];
318 cachesize-=l;
319 cache>>=l;
322 adr=(unsigned long)b->headptr;
323 bit-=(adr&3)*8+cachesize;
324 b->headend-=(bit/8);
325 b->headptr+=bit/8;
326 b->headbit=bit%8;
327 } else {
328 long r = decode_packed_entry_number(book, b);
329 if (r == -1) return bufptr-buf;
330 *bufptr++ = r;
333 return n;
337 /* Decode side is specced and easier, because we don't need to find
338 matches using different criteria; we simply read and map. There are
339 two things we need to do 'depending':
341 We may need to support interleave. We don't really, but it's
342 convenient to do it here rather than rebuild the vector later.
344 Cascades may be additive or multiplicitive; this is not inherent in
345 the codebook, but set in the code using the codebook. Like
346 interleaving, it's easiest to do it here.
347 addmul==0 -> declarative (set the value)
348 addmul==1 -> additive
349 addmul==2 -> multiplicitive */
351 /* returns the [original, not compacted] entry number or -1 on eof *********/
352 long vorbis_book_decode(codebook *book, oggpack_buffer *b){
353 if(book->used_entries>0){
354 long packed_entry=decode_packed_entry_number(book,b);
355 if(packed_entry>=0)
356 return(book->dec_index[packed_entry]);
359 /* if there's no dec_index, the codebook unpacking isn't collapsed */
360 return(-1);
363 /* returns 0 on OK or -1 on eof *************************************/
364 long vorbis_book_decodevs_add(codebook *book,ogg_int32_t *a,
365 oggpack_buffer *b,int n,int point){
366 if(book->used_entries>0){
367 int step=n/book->dim;
368 long *entry = (long *)alloca(sizeof(*entry)*step);
369 ogg_int32_t **t = (ogg_int32_t **)alloca(sizeof(*t)*step);
370 int i,j,o;
371 int shift=point-book->binarypoint;
373 if(shift>=0){
374 for (i = 0; i < step; i++) {
375 entry[i]=decode_packed_entry_number(book,b);
376 if(entry[i]==-1)return(-1);
377 t[i] = book->valuelist+entry[i]*book->dim;
379 for(i=0,o=0;i<book->dim;i++,o+=step)
380 for (j=0;j<step;j++)
381 a[o+j]+=t[j][i]>>shift;
382 }else{
383 for (i = 0; i < step; i++) {
384 entry[i]=decode_packed_entry_number(book,b);
385 if(entry[i]==-1)return(-1);
386 t[i] = book->valuelist+entry[i]*book->dim;
388 for(i=0,o=0;i<book->dim;i++,o+=step)
389 for (j=0;j<step;j++)
390 a[o+j]+=t[j][i]<<-shift;
393 return(0);
396 long vorbis_book_decodev_add(codebook *book,ogg_int32_t *a,
397 oggpack_buffer *b,int n,int point){
398 if(book->used_entries>0){
399 int i,j,entry;
400 ogg_int32_t *t;
401 int shift=point-book->binarypoint;
403 if(shift>=0){
404 for(i=0;i<n;){
405 entry = decode_packed_entry_number(book,b);
406 if(entry==-1)return(-1);
407 t = book->valuelist+entry*book->dim;
408 for (j=0;j<book->dim;)
409 a[i++]+=t[j++]>>shift;
411 }else{
412 shift = -shift;
413 for(i=0;i<n;){
414 entry = decode_packed_entry_number(book,b);
415 if(entry==-1)return(-1);
416 t = book->valuelist+entry*book->dim;
417 for (j=0;j<book->dim;)
418 a[i++]+=t[j++]<<shift;
422 return(0);
425 long vorbis_book_decodev_set(codebook *book,ogg_int32_t *a,
426 oggpack_buffer *b,int n,int point){
427 if(book->used_entries>0){
428 int i,j,entry;
429 ogg_int32_t *t;
430 int shift=point-book->binarypoint;
432 if(shift>=0){
434 for(i=0;i<n;){
435 entry = decode_packed_entry_number(book,b);
436 if(entry==-1)return(-1);
437 t = book->valuelist+entry*book->dim;
438 for (j=0;j<book->dim;){
439 a[i++]=t[j++]>>shift;
442 }else{
443 shift = -shift;
444 for(i=0;i<n;){
445 entry = decode_packed_entry_number(book,b);
446 if(entry==-1)return(-1);
447 t = book->valuelist+entry*book->dim;
448 for (j=0;j<book->dim;){
449 a[i++]=t[j++]<<shift;
453 }else{
455 int i,j;
456 for(i=0;i<n;){
457 for (j=0;j<book->dim;){
458 a[i++]=0;
462 return(0);
465 static long vorbis_book_decodevv_add_2ch_even(codebook *book,ogg_int32_t **a,
466 long offset,oggpack_buffer *b,
467 unsigned int n,int point){
468 long k,chunk,read;
469 int shift=point-book->binarypoint;
470 long entries[32];
471 ogg_int32_t *p0 = &(a[0][offset]);
472 ogg_int32_t *p1 = &(a[1][offset]);
473 const unsigned long dim = book->dim;
474 const ogg_int32_t * const vlist = book->valuelist;
476 if(shift>=0){
477 while(n>0){
478 chunk=32;
479 if (16*dim>n)
480 chunk=(n*2-1)/dim + 1;
481 read = decode_packed_block(book,b,entries,chunk);
482 for(k=0;k<read;k++){
483 const ogg_int32_t *t = vlist+entries[k]*dim;
484 const ogg_int32_t *u = t+dim;
486 *p0++ += *t++>>shift;
487 *p1++ += *t++>>shift;
488 }while(t<u);
490 if (read<chunk)return-1;
491 n -= read*dim/2;
493 }else{
494 shift = -shift;
495 while(n>0){
496 chunk=32;
497 if (16*dim>n)
498 chunk=(n*2-1)/dim + 1;
499 read = decode_packed_block(book,b,entries,chunk);
500 for(k=0;k<read;k++){
501 const ogg_int32_t *t = vlist+entries[k]*dim;
502 const ogg_int32_t *u = t+dim;
504 *p0++ += *t++<<shift;
505 *p1++ += *t++<<shift;
506 }while(t<u);
508 if (read<chunk)return-1;
509 n -= read*dim/2;
512 return(0);
515 long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a,
516 long offset,int ch,
517 oggpack_buffer *b,int n,int point){
518 if(LIKELY(book->used_entries>0)){
519 long i,j,k,chunk,read;
520 int chptr=0;
521 int shift=point-book->binarypoint;
522 long entries[32];
524 if (!(book->dim&1) && ch==2)
525 return vorbis_book_decodevv_add_2ch_even(book,a,offset,b,n,point);
527 if(shift>=0){
529 for(i=offset;i<offset+n;){
530 chunk=32;
531 if (chunk*book->dim>(offset+n-i)*ch)
532 chunk=((offset+n-i)*ch+book->dim-1)/book->dim;
533 read = decode_packed_block(book,b,entries,chunk);
534 for(k=0;k<read;k++){
535 const ogg_int32_t *t = book->valuelist+entries[k]*book->dim;
536 for (j=0;j<book->dim;j++){
537 a[chptr++][i]+=t[j]>>shift;
538 if(chptr==ch){
539 chptr=0;
540 i++;
544 if (read<chunk)return-1;
546 }else{
547 shift = -shift;
548 for(i=offset;i<offset+n;){
549 chunk=32;
550 if (chunk*book->dim>(offset+n-i)*ch)
551 chunk=((offset+n-i)*ch+book->dim-1)/book->dim;
552 read = decode_packed_block(book,b,entries,chunk);
553 for(k=0;k<read;k++){
554 const ogg_int32_t *t = book->valuelist+entries[k]*book->dim;
555 for (j=0;j<book->dim;j++){
556 a[chptr++][i]+=t[j]<<shift;
557 if(chptr==ch){
558 chptr=0;
559 i++;
563 if (read<chunk)return-1;
567 return(0);