Improved bitrev with approach suggested by Jens Arnold, gives 0.5%-1% speedup for...
[kugel-rb.git] / apps / codecs / libtremor / codebook.c
bloba4fc9ee6c0a3c5260cd6c66449359559a0ebcff8
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: basic codebook pack/unpack/code/decode operations
16 ********************************************************************/
18 #include "config-tremor.h"
19 #include <string.h>
20 #include <math.h>
21 #include "ogg.h"
22 #include "ivorbiscodec.h"
23 #include "codebook.h"
24 #include "misc.h"
25 #include "os.h"
27 /* unpacks a codebook from the packet buffer into the codebook struct,
28 readies the codebook auxiliary structures for decode *************/
29 int vorbis_staticbook_unpack(oggpack_buffer *opb,static_codebook *s){
30 long i,j;
31 memset(s,0,sizeof(*s));
33 /* make sure alignment is correct */
34 if(oggpack_read(opb,24)!=0x564342)goto _eofout;
36 /* first the basic parameters */
37 s->dim=oggpack_read(opb,16);
38 s->entries=oggpack_read(opb,24);
39 if(s->entries==-1)goto _eofout;
41 /* codeword ordering.... length ordered or unordered? */
42 switch((int)oggpack_read(opb,1)){
43 case 0:
44 /* unordered */
45 s->lengthlist=(long *)_ogg_malloc(sizeof(*s->lengthlist)*s->entries);
47 /* allocated but unused entries? */
48 if(oggpack_read(opb,1)){
49 /* yes, unused entries */
51 for(i=0;i<s->entries;i++){
52 if(oggpack_read(opb,1)){
53 long num=oggpack_read(opb,5);
54 if(num==-1)goto _eofout;
55 s->lengthlist[i]=num+1;
56 }else
57 s->lengthlist[i]=0;
59 }else{
60 /* all entries used; no tagging */
61 for(i=0;i<s->entries;i++){
62 long num=oggpack_read(opb,5);
63 if(num==-1)goto _eofout;
64 s->lengthlist[i]=num+1;
68 break;
69 case 1:
70 /* ordered */
72 long length=oggpack_read(opb,5)+1;
73 s->lengthlist=(long *)_ogg_malloc(sizeof(*s->lengthlist)*s->entries);
75 for(i=0;i<s->entries;){
76 long num=oggpack_read(opb,_ilog(s->entries-i));
77 if(num==-1)goto _eofout;
78 for(j=0;j<num && i<s->entries;j++,i++)
79 s->lengthlist[i]=length;
80 length++;
83 break;
84 default:
85 /* EOF */
86 return(-1);
89 /* Do we have a mapping to unpack? */
90 switch((s->maptype=oggpack_read(opb,4))){
91 case 0:
92 /* no mapping */
93 break;
94 case 1: case 2:
95 /* implicitly populated value mapping */
96 /* explicitly populated value mapping */
98 s->q_min=oggpack_read(opb,32);
99 s->q_delta=oggpack_read(opb,32);
100 s->q_quant=oggpack_read(opb,4)+1;
101 s->q_sequencep=oggpack_read(opb,1);
104 int quantvals=0;
105 switch(s->maptype){
106 case 1:
107 quantvals=_book_maptype1_quantvals(s);
108 break;
109 case 2:
110 quantvals=s->entries*s->dim;
111 break;
114 /* quantized values */
115 s->quantlist=(long *)_ogg_malloc(sizeof(*s->quantlist)*quantvals);
116 for(i=0;i<quantvals;i++)
117 s->quantlist[i]=oggpack_read(opb,s->q_quant);
119 if(quantvals&&s->quantlist[quantvals-1]==-1)goto _eofout;
121 break;
122 default:
123 goto _errout;
126 /* all set */
127 return(0);
129 _errout:
130 _eofout:
131 vorbis_staticbook_clear(s);
132 return(-1);
135 /* the 'eliminate the decode tree' optimization actually requires the
136 codewords to be MSb first, not LSb. This is an annoying inelegancy
137 (and one of the first places where carefully thought out design
138 turned out to be wrong; Vorbis II and future Ogg codecs should go
139 to an MSb bitpacker), but not actually the huge hit it appears to
140 be. The first-stage decode table catches most words so that
141 bitreverse is not in the main execution path. */
143 static inline ogg_uint32_t bitreverse(register ogg_uint32_t x)
145 unsigned int mask;
146 #if defined(CPU_ARM) && ARM_ARCH >= 6
147 asm ("rev %[x], %[x]" : [x] "+r" (x)); /* swap bytes */
148 #else
149 #if defined(CPU_COLDFIRE)
150 asm ("swap %[x]" : [x] "+r" (x)); /* swap halfwords */
151 #else
152 x = (x>>16) | (x<<16);
153 #endif
154 mask = x&0x00ff00ff;
155 x ^= mask;
156 x = (x >> 8) | (mask << 8); /* bytes swapped */
157 #endif
158 mask = x&0x0f0f0f0f;
159 x ^= mask;
160 x = (x >> 4) | (mask << 4); /* 4-bit units swapped */
161 mask = x&0x33333333;
162 x ^= mask;
163 x = (x >> 2) | (mask << 2); /* 2-bit units swapped */
164 mask = x&0x55555555;
165 x ^= mask;
166 x = (x >> 1) | (mask << 1); /* done */
167 return x;
170 STIN long decode_packed_entry_number(codebook *book,
171 oggpack_buffer *b){
172 int read=book->dec_maxlength;
173 long lo,hi;
174 long lok = oggpack_look(b,book->dec_firsttablen);
176 if (LIKELY(lok >= 0)) {
177 long entry = book->dec_firsttable[lok];
178 if(UNLIKELY(entry&0x80000000UL)){
179 lo=(entry>>15)&0x7fff;
180 hi=book->used_entries-(entry&0x7fff);
181 }else{
182 oggpack_adv(b, book->dec_codelengths[entry-1]);
183 return(entry-1);
185 }else{
186 lo=0;
187 hi=book->used_entries;
190 lok = oggpack_look(b, read);
192 while(lok<0 && read>1)
193 lok = oggpack_look(b, --read);
195 if(lok<0){
196 oggpack_adv(b,1); /* force eop */
197 return -1;
200 /* bisect search for the codeword in the ordered list */
202 ogg_uint32_t testword=bitreverse((ogg_uint32_t)lok);
204 while(hi-lo>1){
205 long p=(hi-lo)>>1;
206 long test=book->codelist[lo+p]>testword;
207 lo+=p&(test-1);
208 hi-=p&(-test);
211 if(book->dec_codelengths[lo]<=read){
212 oggpack_adv(b, book->dec_codelengths[lo]);
213 return(lo);
217 oggpack_adv(b, read+1);
218 return(-1);
221 static long decode_packed_block(codebook *book, oggpack_buffer *b,
222 long *buf, int n){
223 long *bufptr = buf;
224 long *bufend = buf + n;
226 while (bufptr<bufend) {
227 if (b->headend > 8) {
228 ogg_uint32_t *ptr;
229 unsigned long bit, bitend;
230 unsigned long adr;
231 ogg_uint32_t cache = 0;
232 int cachesize = 0;
234 adr = (unsigned long)b->headptr;
235 bit = (adr&3)*8+b->headbit;
236 ptr = (ogg_uint32_t *)(adr&~3);
237 bitend = ((adr&3)+b->headend)*8;
238 while (bufptr<bufend){
239 long entry, lo, hi;
240 if (UNLIKELY(cachesize<book->dec_maxlength)) {
241 if (bit-cachesize+32>=bitend)
242 break;
243 bit-=cachesize;
244 cache=letoh32(ptr[bit>>5]) >> (bit&31);
245 if (bit&31)
246 cache|=letoh32(ptr[(bit>>5)+1]) << (32-(bit&31));
247 cachesize=32;
248 bit+=32;
251 entry=book->dec_firsttable[cache&((1<<book->dec_firsttablen)-1)];
252 if(UNLIKELY(entry&0x80000000UL)){
253 lo=(entry>>15)&0x7fff;
254 hi=book->used_entries-(entry&0x7fff);
256 ogg_uint32_t testword=bitreverse((ogg_uint32_t)cache);
258 while(LIKELY(hi-lo>1)){
259 long p=(hi-lo)>>1;
260 if (book->codelist[lo+p]>testword)
261 hi-=p;
262 else
263 lo+=p;
265 entry=lo;
267 }else
268 entry--;
270 *bufptr++=entry;
272 int l=book->dec_codelengths[entry];
273 cachesize-=l;
274 cache>>=l;
278 adr=(unsigned long)b->headptr;
279 bit-=(adr&3)*8+cachesize;
280 b->headend-=(bit/8);
281 b->headptr+=bit/8;
282 b->headbit=bit%8;
283 } else {
284 long r = decode_packed_entry_number(book, b);
285 if (r == -1) return bufptr-buf;
286 *bufptr++ = r;
289 return n;
293 /* Decode side is specced and easier, because we don't need to find
294 matches using different criteria; we simply read and map. There are
295 two things we need to do 'depending':
297 We may need to support interleave. We don't really, but it's
298 convenient to do it here rather than rebuild the vector later.
300 Cascades may be additive or multiplicitive; this is not inherent in
301 the codebook, but set in the code using the codebook. Like
302 interleaving, it's easiest to do it here.
303 addmul==0 -> declarative (set the value)
304 addmul==1 -> additive
305 addmul==2 -> multiplicitive */
307 /* returns the [original, not compacted] entry number or -1 on eof *********/
308 long vorbis_book_decode(codebook *book, oggpack_buffer *b){
309 if(book->used_entries>0){
310 long packed_entry=decode_packed_entry_number(book,b);
311 if(packed_entry>=0)
312 return(book->dec_index[packed_entry]);
315 /* if there's no dec_index, the codebook unpacking isn't collapsed */
316 return(-1);
319 /* returns 0 on OK or -1 on eof *************************************/
320 long vorbis_book_decodevs_add(codebook *book,ogg_int32_t *a,
321 oggpack_buffer *b,int n,int point){
322 if(book->used_entries>0){
323 int step=n/book->dim;
324 long *entry = (long *)alloca(sizeof(*entry)*step);
325 ogg_int32_t **t = (ogg_int32_t **)alloca(sizeof(*t)*step);
326 int i,j,o;
327 int shift=point-book->binarypoint;
329 if(shift>=0){
330 for (i = 0; i < step; i++) {
331 entry[i]=decode_packed_entry_number(book,b);
332 if(entry[i]==-1)return(-1);
333 t[i] = book->valuelist+entry[i]*book->dim;
335 for(i=0,o=0;i<book->dim;i++,o+=step)
336 for (j=0;j<step;j++)
337 a[o+j]+=t[j][i]>>shift;
338 }else{
339 for (i = 0; i < step; i++) {
340 entry[i]=decode_packed_entry_number(book,b);
341 if(entry[i]==-1)return(-1);
342 t[i] = book->valuelist+entry[i]*book->dim;
344 for(i=0,o=0;i<book->dim;i++,o+=step)
345 for (j=0;j<step;j++)
346 a[o+j]+=t[j][i]<<-shift;
349 return(0);
352 long vorbis_book_decodev_add(codebook *book,ogg_int32_t *a,
353 oggpack_buffer *b,int n,int point){
354 if(book->used_entries>0){
355 int i,j,entry;
356 ogg_int32_t *t;
357 int shift=point-book->binarypoint;
359 if(shift>=0){
360 for(i=0;i<n;){
361 entry = decode_packed_entry_number(book,b);
362 if(entry==-1)return(-1);
363 t = book->valuelist+entry*book->dim;
364 for (j=0;j<book->dim;)
365 a[i++]+=t[j++]>>shift;
367 }else{
368 shift = -shift;
369 for(i=0;i<n;){
370 entry = decode_packed_entry_number(book,b);
371 if(entry==-1)return(-1);
372 t = book->valuelist+entry*book->dim;
373 for (j=0;j<book->dim;)
374 a[i++]+=t[j++]<<shift;
378 return(0);
381 long vorbis_book_decodev_set(codebook *book,ogg_int32_t *a,
382 oggpack_buffer *b,int n,int point){
383 if(book->used_entries>0){
384 int i,j,entry;
385 ogg_int32_t *t;
386 int shift=point-book->binarypoint;
388 if(shift>=0){
390 for(i=0;i<n;){
391 entry = decode_packed_entry_number(book,b);
392 if(entry==-1)return(-1);
393 t = book->valuelist+entry*book->dim;
394 for (j=0;j<book->dim;){
395 a[i++]=t[j++]>>shift;
398 }else{
399 shift = -shift;
400 for(i=0;i<n;){
401 entry = decode_packed_entry_number(book,b);
402 if(entry==-1)return(-1);
403 t = book->valuelist+entry*book->dim;
404 for (j=0;j<book->dim;){
405 a[i++]=t[j++]<<shift;
409 }else{
411 int i,j;
412 for(i=0;i<n;){
413 for (j=0;j<book->dim;){
414 a[i++]=0;
418 return(0);
421 static long vorbis_book_decodevv_add_2ch_even(codebook *book,ogg_int32_t **a,
422 long offset,oggpack_buffer *b,
423 int n,int point){
424 long i,k,chunk,read;
425 int shift=point-book->binarypoint;
426 long entries[32];
427 ogg_int32_t *p0 = &(a[0][offset]);
428 ogg_int32_t *p1 = &(a[1][offset]);
430 if(shift>=0){
432 for(i=0;i<n;){
433 chunk=32;
434 if (chunk*book->dim>(n-i)*2)
435 chunk=((n-i)*2+book->dim-1)/book->dim;
436 read = decode_packed_block(book,b,entries,chunk);
437 for(k=0;k<read;k++){
438 const ogg_int32_t *t = book->valuelist+entries[k]*book->dim;
439 const ogg_int32_t *u = t+book->dim;
441 *p0++ += *t++>>shift;
442 *p1++ += *t++>>shift;
443 }while(t<u);
445 if (read<chunk)return-1;
446 i += read*book->dim/2;
448 }else{
449 shift = -shift;
450 for(i=0;i<n;){
451 chunk=32;
452 if (chunk*book->dim>(n-i)*2)
453 chunk=((n-i)*2+book->dim-1)/book->dim;
454 read = decode_packed_block(book,b,entries,chunk);
455 for(k=0;k<read;k++){
456 const ogg_int32_t *t = book->valuelist+entries[k]*book->dim;
457 const ogg_int32_t *u = t+book->dim;
459 *p0++ += *t++<<shift;
460 *p1++ += *t++<<shift;
461 }while(t<u);
463 if (read<chunk)return-1;
464 i += read*book->dim/2;
467 return(0);
470 long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a,
471 long offset,int ch,
472 oggpack_buffer *b,int n,int point){
473 if(LIKELY(book->used_entries>0)){
474 long i,j,k,chunk,read;
475 int chptr=0;
476 int shift=point-book->binarypoint;
477 long entries[32];
479 if (!(book->dim&1) && ch==2)
480 return vorbis_book_decodevv_add_2ch_even(book,a,offset,b,n,point);
482 if(shift>=0){
484 for(i=offset;i<offset+n;){
485 chunk=32;
486 if (chunk*book->dim>(offset+n-i)*ch)
487 chunk=((offset+n-i)*ch+book->dim-1)/book->dim;
488 read = decode_packed_block(book,b,entries,chunk);
489 for(k=0;k<read;k++){
490 const ogg_int32_t *t = book->valuelist+entries[k]*book->dim;
491 for (j=0;j<book->dim;j++){
492 a[chptr++][i]+=t[j]>>shift;
493 if(chptr==ch){
494 chptr=0;
495 i++;
499 if (read<chunk)return-1;
501 }else{
502 shift = -shift;
503 for(i=offset;i<offset+n;){
504 chunk=32;
505 if (chunk*book->dim>(offset+n-i)*ch)
506 chunk=((offset+n-i)*ch+book->dim-1)/book->dim;
507 read = decode_packed_block(book,b,entries,chunk);
508 for(k=0;k<read;k++){
509 const ogg_int32_t *t = book->valuelist+entries[k]*book->dim;
510 for (j=0;j<book->dim;j++){
511 a[chptr++][i]+=t[j]<<shift;
512 if(chptr==ch){
513 chptr=0;
514 i++;
518 if (read<chunk)return-1;
522 return(0);