Branch optimisation in both C (giving hints to gcc - verified using -fprofile-arcs...
[kugel-rb.git] / apps / codecs / demac / libdemac / entropy.c
blobe8561122a708583499b401d2912418ebb3bdddda
1 /*
3 libdemac - A Monkey's Audio decoder
5 $Id$
7 Copyright (C) Dave Chapman 2007
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
25 #include <inttypes.h>
26 #include <string.h>
28 #include "parser.h"
29 #include "entropy.h"
30 #include "demac_config.h"
32 #define MODEL_ELEMENTS 64
35 The following counts arrays for use with the range decoder are
36 hard-coded in the Monkey's Audio decoder.
39 static const int counts_3970[65] ICONST_ATTR =
41 0,14824,28224,39348,47855,53994,58171,60926,
42 62682,63786,64463,64878,65126,65276,65365,65419,
43 65450,65469,65480,65487,65491,65493,65494,65495,
44 65496,65497,65498,65499,65500,65501,65502,65503,
45 65504,65505,65506,65507,65508,65509,65510,65511,
46 65512,65513,65514,65515,65516,65517,65518,65519,
47 65520,65521,65522,65523,65524,65525,65526,65527,
48 65528,65529,65530,65531,65532,65533,65534,65535,
49 65536
52 /* counts_diff_3970[i] = counts_3970[i+1] - counts_3970[i] */
53 static const int counts_diff_3970[64] ICONST_ATTR =
55 14824,13400,11124,8507,6139,4177,2755,1756,
56 1104,677,415,248,150,89,54,31,
57 19,11,7,4,2,1,1,1,
58 1,1,1,1,1,1,1,1,
59 1,1,1,1,1,1,1,1,
60 1,1,1,1,1,1,1,1,
61 1,1,1,1,1,1,1,1,
62 1,1,1,1,1,1,1,1
65 static const int counts_3980[65] ICONST_ATTR =
67 0,19578,36160,48417,56323,60899,63265,64435,
68 64971,65232,65351,65416,65447,65466,65476,65482,
69 65485,65488,65490,65491,65492,65493,65494,65495,
70 65496,65497,65498,65499,65500,65501,65502,65503,
71 65504,65505,65506,65507,65508,65509,65510,65511,
72 65512,65513,65514,65515,65516,65517,65518,65519,
73 65520,65521,65522,65523,65524,65525,65526,65527,
74 65528,65529,65530,65531,65532,65533,65534,65535,
75 65536
78 /* counts_diff_3980[i] = counts_3980[i+1] - counts_3980[i] */
80 static const int counts_diff_3980[64] ICONST_ATTR =
82 19578,16582,12257,7906,4576,2366,1170,536,
83 261,119,65,31,19,10,6,3,
84 3,2,1,1,1,1,1,1,
85 1,1,1,1,1,1,1,1,
86 1,1,1,1,1,1,1,1,
87 1,1,1,1,1,1,1,1,
88 1,1,1,1,1,1,1,1,
89 1,1,1,1,1,1,1,1
94 Range decoder adapted from rangecod.c included in:
96 http://www.compressconsult.com/rangecoder/rngcod13.zip
98 rangecod.c range encoding
100 (c) Michael Schindler
101 1997, 1998, 1999, 2000
102 http://www.compressconsult.com/
103 michael@compressconsult.com
105 This program is free software; you can redistribute it and/or modify
106 it under the terms of the GNU General Public License as published by
107 the Free Software Foundation; either version 2 of the License, or
108 (at your option) any later version.
111 The encoding functions were removed, and functions turned into "static
112 inline" functions. Some minor cosmetic changes were made (e.g. turning
113 pre-processor symbols into upper-case, removing the rc parameter from
114 each function (and the RNGC macro)).
118 /* BITSTREAM READING FUNCTIONS */
120 /* We deal with the input data one byte at a time - to ensure
121 functionality on CPUs of any endianness regardless of any requirements
122 for aligned reads.
125 static unsigned char* bytebuffer IBSS_ATTR;
126 static int bytebufferoffset IBSS_ATTR;
128 static inline void skip_byte(void)
130 bytebufferoffset--;
131 bytebuffer += bytebufferoffset & 4;
132 bytebufferoffset &= 3;
135 static inline int read_byte(void)
137 int ch = bytebuffer[bytebufferoffset];
139 skip_byte();
141 return ch;
144 /* RANGE DECODING FUNCTIONS */
146 /* SIZE OF RANGE ENCODING CODE VALUES. */
148 #define CODE_BITS 32
149 #define TOP_VALUE ((unsigned int)1 << (CODE_BITS-1))
150 #define SHIFT_BITS (CODE_BITS - 9)
151 #define EXTRA_BITS ((CODE_BITS-2) % 8 + 1)
152 #define BOTTOM_VALUE (TOP_VALUE >> 8)
154 struct rangecoder_t
156 uint32_t low; /* low end of interval */
157 uint32_t range; /* length of interval */
158 uint32_t help; /* bytes_to_follow resp. intermediate value */
159 unsigned int buffer; /* buffer for input/output */
162 static struct rangecoder_t rc IBSS_ATTR;
164 /* Start the decoder */
165 static inline void range_start_decoding(void)
167 rc.buffer = read_byte();
168 rc.low = rc.buffer >> (8 - EXTRA_BITS);
169 rc.range = (uint32_t) 1 << EXTRA_BITS;
172 static inline void range_dec_normalize(void)
174 while (rc.range <= BOTTOM_VALUE)
176 rc.buffer = (rc.buffer << 8) | read_byte();
177 rc.low = (rc.low << 8) | ((rc.buffer >> 1) & 0xff);
178 rc.range <<= 8;
182 /* Calculate culmulative frequency for next symbol. Does NO update!*/
183 /* tot_f is the total frequency */
184 /* or: totf is (code_value)1<<shift */
185 /* returns the culmulative frequency */
186 static inline int range_decode_culfreq(int tot_f)
188 range_dec_normalize();
189 rc.help = UDIV32(rc.range, tot_f);
190 return UDIV32(rc.low, rc.help);
193 static inline int range_decode_culshift(int shift)
195 range_dec_normalize();
196 rc.help = rc.range >> shift;
197 return UDIV32(rc.low, rc.help);
201 /* Update decoding state */
202 /* sy_f is the interval length (frequency of the symbol) */
203 /* lt_f is the lower end (frequency sum of < symbols) */
204 static inline void range_decode_update(int sy_f, int lt_f)
206 rc.low -= rc.help * lt_f;
207 rc.range = rc.help * sy_f;
211 /* Decode a byte/short without modelling */
212 static inline unsigned char decode_byte(void)
213 { int tmp = range_decode_culshift(8);
214 range_decode_update( 1,tmp);
215 return tmp;
218 static inline int short range_decode_short(void)
219 { int tmp = range_decode_culshift(16);
220 range_decode_update( 1,tmp);
221 return tmp;
224 /* Decode n bits (n <= 16) without modelling - based on range_decode_short */
225 static inline int range_decode_bits(int n)
226 { int tmp = range_decode_culshift(n);
227 range_decode_update( 1,tmp);
228 return tmp;
232 /* Finish decoding */
233 static inline void range_done_decoding(void)
234 { range_dec_normalize(); /* normalize to use up all bytes */
238 range_get_symbol_* functions based on main decoding loop in simple_d.c from
239 http://www.compressconsult.com/rangecoder/rngcod13.zip
240 (c) Michael Schindler
243 static inline int range_get_symbol_3980(void)
245 int symbol, cf;
247 cf = range_decode_culshift(16);
249 /* figure out the symbol inefficiently; a binary search would be much better */
250 for (symbol = 0; counts_3980[symbol+1] <= cf; symbol++);
252 range_decode_update(counts_diff_3980[symbol],counts_3980[symbol]);
254 return symbol;
257 static inline int range_get_symbol_3970(void)
259 int symbol, cf;
261 cf = range_decode_culshift(16);
263 /* figure out the symbol inefficiently; a binary search would be much better */
264 for (symbol = 0; counts_3970[symbol+1] <= cf; symbol++);
266 range_decode_update(counts_diff_3970[symbol],counts_3970[symbol]);
268 return symbol;
271 /* MAIN DECODING FUNCTIONS */
273 struct rice_t
275 uint32_t k;
276 uint32_t ksum;
279 static struct rice_t riceX IBSS_ATTR;
280 static struct rice_t riceY IBSS_ATTR;
282 static inline void update_rice(struct rice_t* rice, int x)
284 rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5);
286 if (UNLIKELY(rice->k == 0)) {
287 rice->k = 1;
288 } else {
289 uint32_t lim = 1 << (rice->k + 4);
290 if (UNLIKELY(rice->ksum < lim)) {
291 rice->k--;
292 } else if (UNLIKELY(rice->ksum >= 2 * lim)) {
293 rice->k++;
298 static inline int entropy_decode3980(struct rice_t* rice)
300 int base, x, pivot, overflow;
302 pivot = rice->ksum >> 5;
303 if (UNLIKELY(pivot == 0))
304 pivot=1;
306 overflow = range_get_symbol_3980();
308 if (UNLIKELY(overflow == (MODEL_ELEMENTS-1))) {
309 overflow = range_decode_short() << 16;
310 overflow |= range_decode_short();
313 if (pivot >= 0x10000) {
314 /* Codepath for 24-bit streams */
315 int nbits, lo_bits, base_hi, base_lo;
317 /* Count the number of bits in pivot */
318 nbits = 17; /* We know there must be at least 17 bits */
319 while ((pivot >> nbits) > 0) { nbits++; }
321 /* base_lo is the low (nbits-16) bits of base
322 base_hi is the high 16 bits of base
324 lo_bits = (nbits - 16);
326 base_hi = range_decode_culfreq((pivot >> lo_bits) + 1);
327 range_decode_update(1, base_hi);
329 base_lo = range_decode_culshift(lo_bits);
330 range_decode_update(1, base_lo);
332 base = (base_hi << lo_bits) + base_lo;
333 } else {
334 /* Codepath for 16-bit streams */
335 base = range_decode_culfreq(pivot);
336 range_decode_update(1, base);
339 x = base + (overflow * pivot);
340 update_rice(rice, x);
342 /* Convert to signed */
343 if (x & 1)
344 return (x >> 1) + 1;
345 else
346 return -(x >> 1);
350 static inline int entropy_decode3970(struct rice_t* rice)
352 int x, tmpk;
354 int overflow = range_get_symbol_3970();
356 if (UNLIKELY(overflow == (MODEL_ELEMENTS - 1))) {
357 tmpk = range_decode_bits(5);
358 overflow = 0;
359 } else {
360 tmpk = (rice->k < 1) ? 0 : rice->k - 1;
363 if (tmpk <= 16) {
364 x = range_decode_bits(tmpk);
365 } else {
366 x = range_decode_short();
367 x |= (range_decode_bits(tmpk - 16) << 16);
369 x += (overflow << tmpk);
371 update_rice(rice, x);
373 /* Convert to signed */
374 if (x & 1)
375 return (x >> 1) + 1;
376 else
377 return -(x >> 1);
380 void init_entropy_decoder(struct ape_ctx_t* ape_ctx,
381 unsigned char* inbuffer, int* firstbyte,
382 int* bytesconsumed)
384 bytebuffer = inbuffer;
385 bytebufferoffset = *firstbyte;
387 /* Read the CRC */
388 ape_ctx->CRC = read_byte();
389 ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
390 ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
391 ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
393 /* Read the frame flags if they exist */
394 ape_ctx->frameflags = 0;
395 if ((ape_ctx->fileversion > 3820) && (ape_ctx->CRC & 0x80000000)) {
396 ape_ctx->CRC &= ~0x80000000;
398 ape_ctx->frameflags = read_byte();
399 ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
400 ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
401 ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
403 /* Keep a count of the blocks decoded in this frame */
404 ape_ctx->blocksdecoded = 0;
406 /* Initialise the rice structs */
407 riceX.k = 10;
408 riceX.ksum = (1 << riceX.k) * 16;
409 riceY.k = 10;
410 riceY.ksum = (1 << riceY.k) * 16;
412 /* The first 8 bits of input are ignored. */
413 skip_byte();
415 range_start_decoding();
417 /* Return the new state of the buffer */
418 *bytesconsumed = (intptr_t)bytebuffer - (intptr_t)inbuffer;
419 *firstbyte = bytebufferoffset;
422 int ICODE_ATTR_DEMAC entropy_decode(struct ape_ctx_t* ape_ctx,
423 unsigned char* inbuffer, int* firstbyte,
424 int* bytesconsumed,
425 int32_t* decoded0, int32_t* decoded1,
426 int blockstodecode)
428 bytebuffer = inbuffer;
429 bytebufferoffset = *firstbyte;
431 ape_ctx->blocksdecoded += blockstodecode;
433 if (ape_ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) {
434 /* We are pure silence, just memset the output buffer. */
435 memset(decoded0, 0, blockstodecode * sizeof(int32_t));
436 memset(decoded1, 0, blockstodecode * sizeof(int32_t));
437 } else {
438 if (ape_ctx->fileversion > 3970) {
439 while (LIKELY(blockstodecode--)) {
440 *(decoded0++) = entropy_decode3980(&riceY);
441 if (decoded1 != NULL)
442 *(decoded1++) = entropy_decode3980(&riceX);
444 } else {
445 while (LIKELY(blockstodecode--)) {
446 *(decoded0++) = entropy_decode3970(&riceY);
447 if (decoded1 != NULL)
448 *(decoded1++) = entropy_decode3970(&riceX);
453 if (ape_ctx->blocksdecoded == ape_ctx->currentframeblocks)
455 range_done_decoding();
458 /* Return the new state of the buffer */
459 *bytesconsumed = bytebuffer - inbuffer;
460 *firstbyte = bytebufferoffset;
462 return(0);