Improved bitrev with approach suggested by Jens Arnold, gives 0.5%-1% speedup for...
[kugel-rb.git] / apps / codecs / lib / codeclib_misc.h
blob015a15ece3fed68234aafd7dc4e6d9e299048cf3
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: miscellaneous math and prototypes
16 ********************************************************************/
18 //#include "config-tremor.h"
20 #ifndef _V_RANDOM_H_
21 #define _V_RANDOM_H_
22 //#include "ivorbiscodec.h"
23 //#include "os_types.h"
25 //#include "asm_arm.h"
26 //#include "asm_mcf5249.h"
29 /* Some prototypes that were not defined elsewhere */
30 //void *_vorbis_block_alloc(vorbis_block *vb,long bytes);
31 //void _vorbis_block_ripcord(vorbis_block *vb);
32 //extern int _ilog(unsigned int v);
34 #ifndef _V_WIDE_MATH
35 #define _V_WIDE_MATH
37 #ifndef ROCKBOX
38 #include <inttypes.h>
39 #endif /* ROCKBOX */
41 #ifndef _LOW_ACCURACY_
42 /* 64 bit multiply */
43 /* #include <sys/types.h> */
45 #if ROCKBOX_LITTLE_ENDIAN == 1
46 union magic {
47 struct {
48 int32_t lo;
49 int32_t hi;
50 } halves;
51 int64_t whole;
53 #elif ROCKBOX_BIG_ENDIAN == 1
54 union magic {
55 struct {
56 int32_t hi;
57 int32_t lo;
58 } halves;
59 int64_t whole;
61 #endif
63 static inline int32_t MULT32(int32_t x, int32_t y) {
64 union magic magic;
65 magic.whole = (int64_t)x * y;
66 return magic.halves.hi;
68 static inline int32_t MULT31(int32_t x, int32_t y) {
69 return MULT32(x,y)<<1;
72 static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
73 union magic magic;
74 magic.whole = (int64_t)x * y;
75 return ((uint32_t)(magic.halves.lo)>>15) | ((magic.halves.hi)<<17);
78 #else
79 /* 32 bit multiply, more portable but less accurate */
82 * Note: Precision is biased towards the first argument therefore ordering
83 * is important. Shift values were chosen for the best sound quality after
84 * many listening tests.
88 * For MULT32 and MULT31: The second argument is always a lookup table
89 * value already preshifted from 31 to 8 bits. We therefore take the
90 * opportunity to save on text space and use unsigned char for those
91 * tables in this case.
94 static inline int32_t MULT32(int32_t x, int32_t y) {
95 return (x >> 9) * y; /* y preshifted >>23 */
98 static inline int32_t MULT31(int32_t x, int32_t y) {
99 return (x >> 8) * y; /* y preshifted >>23 */
102 static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
103 return (x >> 6) * y; /* y preshifted >>9 */
105 #endif
108 * The XPROD functions are meant to optimize the cross products found all
109 * over the place in mdct.c by forcing memory operation ordering to avoid
110 * unnecessary register reloads as soon as memory is being written to.
111 * However this is only beneficial on CPUs with a sane number of general
112 * purpose registers which exclude the Intel x86. On Intel, better let the
113 * compiler actually reload registers directly from original memory by using
114 * macros.
117 /* replaced XPROD32 with a macro to avoid memory reference
118 _x, _y are the results (must be l-values) */
119 #define XPROD32(_a, _b, _t, _v, _x, _y) \
120 { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
121 (_y)=MULT32(_b,_t)-MULT32(_a,_v); }
124 #ifdef __i386__
126 #define XPROD31(_a, _b, _t, _v, _x, _y) \
127 { *(_x)=MULT31(_a,_t)+MULT31(_b,_v); \
128 *(_y)=MULT31(_b,_t)-MULT31(_a,_v); }
129 #define XNPROD31(_a, _b, _t, _v, _x, _y) \
130 { *(_x)=MULT31(_a,_t)-MULT31(_b,_v); \
131 *(_y)=MULT31(_b,_t)+MULT31(_a,_v); }
133 #else
135 static inline void XPROD31(int32_t a, int32_t b,
136 int32_t t, int32_t v,
137 int32_t *x, int32_t *y)
139 *x = MULT31(a, t) + MULT31(b, v);
140 *y = MULT31(b, t) - MULT31(a, v);
143 static inline void XNPROD31(int32_t a, int32_t b,
144 int32_t t, int32_t v,
145 int32_t *x, int32_t *y)
147 *x = MULT31(a, t) - MULT31(b, v);
148 *y = MULT31(b, t) + MULT31(a, v);
150 #endif
152 #ifndef _V_VECT_OPS
153 #define _V_VECT_OPS
155 static inline
156 void vect_add(int32_t *x, int32_t *y, int n)
158 while (n>0) {
159 *x++ += *y++;
160 n--;
164 static inline
165 void vect_copy(int32_t *x, int32_t *y, int n)
167 while (n>0) {
168 *x++ = *y++;
169 n--;
173 static inline
174 void vect_mult_fw(int32_t *data, int32_t *window, int n)
176 while(n>0) {
177 *data = MULT31(*data, *window);
178 data++;
179 window++;
180 n--;
184 static inline
185 void vect_mult_bw(int32_t *data, int32_t *window, int n)
187 while(n>0) {
188 *data = MULT31(*data, *window);
189 data++;
190 window--;
191 n--;
194 #endif
196 #endif
198 #ifndef _V_CLIP_MATH
199 #define _V_CLIP_MATH
201 static inline int32_t CLIP_TO_15(int32_t x) {
202 int ret=x;
203 ret-= ((x<=32767)-1)&(x-32767);
204 ret-= ((x>=-32768)-1)&(x+32768);
205 return(ret);
208 #endif
210 static inline int32_t VFLOAT_MULT(int32_t a,int32_t ap,
211 int32_t b,int32_t bp,
212 int32_t *p){
213 if(a && b){
214 #ifndef _LOW_ACCURACY_
215 *p=ap+bp+32;
216 return MULT32(a,b);
217 #else
218 *p=ap+bp+31;
219 return (a>>15)*(b>>16);
220 #endif
221 }else
222 return 0;
225 /*static inline int32_t VFLOAT_MULTI(int32_t a,int32_t ap,
226 int32_t i,
227 int32_t *p){
229 int ip=_ilog(abs(i))-31;
230 return VFLOAT_MULT(a,ap,i<<-ip,ip,p);
233 static inline int32_t VFLOAT_ADD(int32_t a,int32_t ap,
234 int32_t b,int32_t bp,
235 int32_t *p){
237 if(!a){
238 *p=bp;
239 return b;
240 }else if(!b){
241 *p=ap;
242 return a;
245 /* yes, this can leak a bit. */
246 if(ap>bp){
247 int shift=ap-bp+1;
248 *p=ap+1;
249 a>>=1;
250 if(shift<32){
251 b=(b+(1<<(shift-1)))>>shift;
252 }else{
253 b=0;
255 }else{
256 int shift=bp-ap+1;
257 *p=bp+1;
258 b>>=1;
259 if(shift<32){
260 a=(a+(1<<(shift-1)))>>shift;
261 }else{
262 a=0;
266 a+=b;
267 if((a&0xc0000000)==0xc0000000 ||
268 (a&0xc0000000)==0){
269 a<<=1;
270 (*p)--;
272 return(a);
275 #endif