Use FFALIGN and FFMAX3
[mplayer/glamo.git] / tremor / misc.h
blobcb9b66a67849b0d63b033e76b46cd55f86276809
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: miscellaneous math and prototypes
16 ********************************************************************/
18 #ifndef _V_RANDOM_H_
19 #define _V_RANDOM_H_
20 #include "ivorbiscodec.h"
21 #include "os_types.h"
23 #include "asm_arm.h"
25 #ifndef _V_WIDE_MATH
26 #define _V_WIDE_MATH
28 #ifndef _LOW_ACCURACY_
29 /* 64 bit multiply */
31 #include <sys/types.h>
32 #include "config.h"
34 #ifndef WORDS_BIGENDIAN
35 union magic {
36 struct {
37 ogg_int32_t lo;
38 ogg_int32_t hi;
39 } halves;
40 ogg_int64_t whole;
42 #else
43 union magic {
44 struct {
45 ogg_int32_t hi;
46 ogg_int32_t lo;
47 } halves;
48 ogg_int64_t whole;
50 #endif
52 static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
53 union magic magic;
54 magic.whole = (ogg_int64_t)x * y;
55 return magic.halves.hi;
58 static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
59 return MULT32(x,y)<<1;
62 static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
63 union magic magic;
64 magic.whole = (ogg_int64_t)x * y;
65 return ((ogg_uint32_t)(magic.halves.lo)>>15) | ((magic.halves.hi)<<17);
68 #else
69 /* 32 bit multiply, more portable but less accurate */
72 * Note: Precision is biased towards the first argument therefore ordering
73 * is important. Shift values were chosen for the best sound quality after
74 * many listening tests.
78 * For MULT32 and MULT31: The second argument is always a lookup table
79 * value already preshifted from 31 to 8 bits. We therefore take the
80 * opportunity to save on text space and use unsigned char for those
81 * tables in this case.
84 static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
85 return (x >> 9) * y; /* y preshifted >>23 */
88 static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
89 return (x >> 8) * y; /* y preshifted >>23 */
92 static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
93 return (x >> 6) * y; /* y preshifted >>9 */
96 #endif
99 * This should be used as a memory barrier, forcing all cached values in
100 * registers to wr writen back to memory. Might or might not be beneficial
101 * depending on the architecture and compiler.
103 #define MB()
106 * The XPROD functions are meant to optimize the cross products found all
107 * over the place in mdct.c by forcing memory operation ordering to avoid
108 * unnecessary register reloads as soon as memory is being written to.
109 * However this is only beneficial on CPUs with a sane number of general
110 * purpose registers which exclude the Intel x86. On Intel, better let the
111 * compiler actually reload registers directly from original memory by using
112 * macros.
115 #ifdef __i386__
117 #define XPROD32(_a, _b, _t, _v, _x, _y) \
118 { *(_x)=MULT32(_a,_t)+MULT32(_b,_v); \
119 *(_y)=MULT32(_b,_t)-MULT32(_a,_v); }
120 #define XPROD31(_a, _b, _t, _v, _x, _y) \
121 { *(_x)=MULT31(_a,_t)+MULT31(_b,_v); \
122 *(_y)=MULT31(_b,_t)-MULT31(_a,_v); }
123 #define XNPROD31(_a, _b, _t, _v, _x, _y) \
124 { *(_x)=MULT31(_a,_t)-MULT31(_b,_v); \
125 *(_y)=MULT31(_b,_t)+MULT31(_a,_v); }
127 #else
129 static inline void XPROD32(ogg_int32_t a, ogg_int32_t b,
130 ogg_int32_t t, ogg_int32_t v,
131 ogg_int32_t *x, ogg_int32_t *y)
133 *x = MULT32(a, t) + MULT32(b, v);
134 *y = MULT32(b, t) - MULT32(a, v);
137 static inline void XPROD31(ogg_int32_t a, ogg_int32_t b,
138 ogg_int32_t t, ogg_int32_t v,
139 ogg_int32_t *x, ogg_int32_t *y)
141 *x = MULT31(a, t) + MULT31(b, v);
142 *y = MULT31(b, t) - MULT31(a, v);
145 static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
146 ogg_int32_t t, ogg_int32_t v,
147 ogg_int32_t *x, ogg_int32_t *y)
149 *x = MULT31(a, t) - MULT31(b, v);
150 *y = MULT31(b, t) + MULT31(a, v);
153 #endif
155 #endif
157 #ifndef _V_CLIP_MATH
158 #define _V_CLIP_MATH
160 static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) {
161 int ret=x;
162 ret-= ((x<=32767)-1)&(x-32767);
163 ret-= ((x>=-32768)-1)&(x+32768);
164 return(ret);
167 #endif
169 static inline ogg_int32_t VFLOAT_MULT(ogg_int32_t a,ogg_int32_t ap,
170 ogg_int32_t b,ogg_int32_t bp,
171 ogg_int32_t *p){
172 if(a && b){
173 #ifndef _LOW_ACCURACY_
174 *p=ap+bp+32;
175 return MULT32(a,b);
176 #else
177 *p=ap+bp+31;
178 return (a>>15)*(b>>16);
179 #endif
180 }else
181 return 0;
184 static inline ogg_int32_t VFLOAT_MULTI(ogg_int32_t a,ogg_int32_t ap,
185 ogg_int32_t i,
186 ogg_int32_t *p){
188 int ip=_ilog(abs(i))-31;
189 return VFLOAT_MULT(a,ap,i<<-ip,ip,p);
192 static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap,
193 ogg_int32_t b,ogg_int32_t bp,
194 ogg_int32_t *p){
196 if(!a){
197 *p=bp;
198 return b;
199 }else if(!b){
200 *p=ap;
201 return a;
204 /* yes, this can leak a bit. */
205 if(ap>bp){
206 int shift=ap-bp+1;
207 *p=ap+1;
208 a>>=1;
209 if(shift<32){
210 b=(b+(1<<(shift-1)))>>shift;
211 }else{
212 b=0;
214 }else{
215 int shift=bp-ap+1;
216 *p=bp+1;
217 b>>=1;
218 if(shift<32){
219 a=(a+(1<<(shift-1)))>>shift;
220 }else{
221 a=0;
225 a+=b;
226 if((a&0xc0000000)==0xc0000000 ||
227 (a&0xc0000000)==0){
228 a<<=1;
229 (*p)--;
231 return(a);
234 #endif