Same fix as r45172 for classes/iconimage:
[AROS-Contrib.git] / MultiMedia / mad / fixed.h
blob00ade6240fc373547629ffcdc82b83b953c8dc85
1 /*
2 * mad - MPEG audio decoder
3 * Copyright (C) 2000-2001 Robert Leslie
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 * $Id$
22 # ifndef LIBMAD_FIXED_H
23 # define LIBMAD_FIXED_H
25 # if SIZEOF_INT >= 4
26 typedef signed int mad_fixed_t;
28 typedef signed int mad_fixed64hi_t;
29 typedef unsigned int mad_fixed64lo_t;
30 # else
31 typedef signed long mad_fixed_t;
33 typedef signed long mad_fixed64hi_t;
34 typedef unsigned long mad_fixed64lo_t;
35 # endif
38 * Fixed-point format: 0xABBBBBBB
39 * A == whole part (sign + 3 bits)
40 * B == fractional part (28 bits)
42 * Values are signed two's complement, so the effective range is:
43 * 0x80000000 to 0x7fffffff
44 * -8.0 to +7.9999999962747097015380859375
46 * The smallest representable value is:
47 * 0x00000001 == 0.0000000037252902984619140625 (i.e. about 3.725e-9)
49 * 28 bits of fractional accuracy represent about
50 * 8.6 digits of decimal accuracy.
52 * Fixed-point numbers can be added or subtracted as normal
53 * integers, but multiplication requires shifting the 64-bit result
54 * from 56 fractional bits back to 28 (and rounding.)
56 * Changing the definition of MAD_F_FRACBITS is only partially
57 * supported, and must be done with care.
60 # define MAD_F_FRACBITS 28
62 # if MAD_F_FRACBITS == 28
63 # define MAD_F(x) ((mad_fixed_t) (x##L))
64 # else
65 # if MAD_F_FRACBITS < 28
66 # warning "MAD_F_FRACBITS < 28"
67 # define MAD_F(x) ((mad_fixed_t) \
68 (((x##L) + \
69 (1L << (28 - MAD_F_FRACBITS - 1))) >> \
70 (28 - MAD_F_FRACBITS)))
71 # elif MAD_F_FRACBITS > 28
72 # error "MAD_F_FRACBITS > 28 not currently supported"
73 # define MAD_F(x) ((mad_fixed_t) \
74 ((x##L) << (MAD_F_FRACBITS - 28)))
75 # endif
76 # endif
78 # define MAD_F_MIN ((mad_fixed_t) -0x80000000L)
79 # define MAD_F_MAX ((mad_fixed_t) +0x7fffffffL)
81 # define MAD_F_ONE MAD_F(0x10000000)
83 # define mad_f_tofixed(x) ((mad_fixed_t) \
84 ((x) * (double) (1L << MAD_F_FRACBITS) + 0.5))
85 # define mad_f_todouble(x) ((double) \
86 ((x) / (double) (1L << MAD_F_FRACBITS)))
88 # define mad_f_intpart(x) ((x) >> MAD_F_FRACBITS)
89 # define mad_f_fracpart(x) ((x) & ((1L << MAD_F_FRACBITS) - 1))
90 /* (x should be positive) */
92 # define mad_f_fromint(x) ((x) << MAD_F_FRACBITS)
94 # define mad_f_add(x, y) ((x) + (y))
95 # define mad_f_sub(x, y) ((x) - (y))
97 # if defined(FPM_64BIT)
100 * This version should be the most accurate if 64-bit (long long) types are
101 * supported by the compiler, although it may not be the most efficient.
103 # if defined(OPT_ACCURACY)
104 # define mad_f_mul(x, y) \
105 ((mad_fixed_t) \
106 ((((signed long long) (x) * (y)) + \
107 (1L << (MAD_F_SCALEBITS - 1))) >> MAD_F_SCALEBITS))
108 # else
109 # define mad_f_mul(x, y) \
110 ((mad_fixed_t) (((signed long long) (x) * (y)) >> MAD_F_SCALEBITS))
111 # endif
113 # define MAD_F_SCALEBITS MAD_F_FRACBITS
115 /* --- Intel --------------------------------------------------------------- */
117 # elif defined(FPM_INTEL)
120 * This Intel version is fast and accurate; the disposition of the least
121 * significant bit depends on OPT_ACCURACY via mad_f_scale64().
123 # define MAD_F_MLX(hi, lo, x, y) \
124 asm ("imull %3" \
125 : "=a" (lo), "=d" (hi) \
126 : "%a" (x), "rm" (y) \
127 : "cc")
129 # if defined(OPT_ACCURACY)
131 * This gives best accuracy but is not very fast.
133 # define MAD_F_MLA(hi, lo, x, y) \
134 ({ mad_fixed64hi_t __hi; \
135 mad_fixed64lo_t __lo; \
136 MAD_F_MLX(__hi, __lo, (x), (y)); \
137 asm ("addl %2,%0\n\t" \
138 "adcl %3,%1" \
139 : "=rm" (lo), "=rm" (hi) \
140 : "r" (__lo), "r" (__hi), "0" (lo), "1" (hi) \
141 : "cc"); \
143 # endif /* OPT_ACCURACY */
145 # if defined(OPT_ACCURACY)
147 * Surprisingly, this is faster than SHRD followed by ADC.
149 # define mad_f_scale64(hi, lo) \
150 ({ mad_fixed64hi_t __hi_; \
151 mad_fixed64lo_t __lo_; \
152 mad_fixed_t __result; \
153 asm ("addl %4,%2\n\t" \
154 "adcl %5,%3" \
155 : "=rm" (__lo_), "=rm" (__hi_) \
156 : "0" (lo), "1" (hi), \
157 "ir" (1L << (MAD_F_SCALEBITS - 1)), "ir" (0) \
158 : "cc"); \
159 asm ("shrdl %3,%2,%1" \
160 : "=rm" (__result) \
161 : "0" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS) \
162 : "cc"); \
163 __result; \
165 # else
166 # define mad_f_scale64(hi, lo) \
167 ({ mad_fixed_t __result; \
168 asm ("shrdl %3,%2,%1" \
169 : "=rm" (__result) \
170 : "0" (lo), "r" (hi), "I" (MAD_F_SCALEBITS) \
171 : "cc"); \
172 __result; \
174 # endif /* OPT_ACCURACY */
176 # define MAD_F_SCALEBITS MAD_F_FRACBITS
178 /* --- ARM ----------------------------------------------------------------- */
180 # elif defined(FPM_ARM)
183 * This ARM V4 version is as accurate as FPM_64BIT but much faster. The
184 * least significant bit is properly rounded at no CPU cycle cost!
186 # if 1
188 * There's a bug somewhere, possibly in the compiler, that sometimes makes
189 * this necessary instead of the default implementation via MAD_F_MLX and
190 * mad_f_scale64. It may be related to the use (or lack) of
191 * -finline-functions and/or -fstrength-reduce.
193 * This is also apparently faster than MAD_F_MLX/mad_f_scale64.
195 # define mad_f_mul(x, y) \
196 ({ mad_fixed64hi_t __hi; \
197 mad_fixed64lo_t __lo; \
198 mad_fixed_t __result; \
199 asm ("smull %0, %1, %3, %4\n\t" \
200 "movs %0, %0, lsr %5\n\t" \
201 "adc %2, %0, %1, lsl %6" \
202 : "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
203 : "%r" (x), "r" (y), \
204 "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS) \
205 : "cc"); \
206 __result; \
208 # endif
210 # define MAD_F_MLX(hi, lo, x, y) \
211 asm ("smull %0, %1, %2, %3" \
212 : "=&r" (lo), "=&r" (hi) \
213 : "%r" (x), "r" (y))
215 # define MAD_F_MLA(hi, lo, x, y) \
216 asm ("smlal %0, %1, %2, %3" \
217 : "+r" (lo), "+r" (hi) \
218 : "%r" (x), "r" (y))
220 # define mad_f_scale64(hi, lo) \
221 ({ mad_fixed_t __result; \
222 asm ("movs %0, %1, lsr %3\n\t" \
223 "adc %0, %0, %2, lsl %4" \
224 : "=r" (__result) \
225 : "r" (lo), "r" (hi), \
226 "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS) \
227 : "cc"); \
228 __result; \
231 # define MAD_F_SCALEBITS MAD_F_FRACBITS
233 /* --- MIPS ---------------------------------------------------------------- */
235 # elif defined(FPM_MIPS)
238 * This MIPS version is fast and accurate; the disposition of the least
239 * significant bit depends on OPT_ACCURACY via mad_f_scale64().
241 # define MAD_F_MLX(hi, lo, x, y) \
242 asm ("mult %2,%3" \
243 : "=l" (lo), "=h" (hi) \
244 : "%r" (x), "r" (y))
246 # if defined(HAVE_MADD_ASM)
247 # define MAD_F_MLA(hi, lo, x, y) \
248 asm ("madd %2,%3" \
249 : "+l" (lo), "+h" (hi) \
250 : "%r" (x), "r" (y))
251 # elif defined(HAVE_MADD16_ASM)
253 * This loses significant accuracy due to the 16-bit integer limit in the
254 * multiply/accumulate instruction.
256 # define MAD_F_ML0(hi, lo, x, y) \
257 asm ("mult %2,%3" \
258 : "=l" (lo), "=h" (hi) \
259 : "%r" ((x) >> 12), "r" ((y) >> 16))
260 # define MAD_F_MLA(hi, lo, x, y) \
261 asm ("madd16 %2,%3" \
262 : "+l" (lo), "+h" (hi) \
263 : "%r" ((x) >> 12), "r" ((y) >> 16))
264 # define MAD_F_MLZ(hi, lo) ((mad_fixed_t) (lo))
265 # endif
267 # if defined(OPT_SPEED)
268 # define mad_f_scale64(hi, lo) \
269 ((mad_fixed_t) ((hi) << (32 - MAD_F_SCALEBITS)))
270 # define MAD_F_SCALEBITS MAD_F_FRACBITS
271 # endif
273 /* --- SPARC --------------------------------------------------------------- */
275 # elif defined(FPM_SPARC)
278 * This SPARC V8 version is fast and accurate; the disposition of the least
279 * significant bit depends on OPT_ACCURACY via mad_f_scale64().
281 # define MAD_F_MLX(hi, lo, x, y) \
282 asm ("smul %2, %3, %0\n\t" \
283 "rd %%y, %1" \
284 : "=r" (lo), "=r" (hi) \
285 : "%r" (x), "rI" (y))
287 /* --- PowerPC ------------------------------------------------------------- */
289 # elif defined(FPM_PPC)
292 * This PowerPC version is tuned for the 4xx embedded processors. It is
293 * effectively a tuned version of FPM_64BIT. It is a little faster and just
294 * as accurate. The disposition of the least significant bit depends on
295 * OPT_ACCURACY via mad_f_scale64().
297 # define MAD_F_MLX(hi, lo, x, y) \
298 asm ("mulhw %1, %2, %3\n\t" \
299 "mullw %0, %2, %3" \
300 : "=&r" (lo), "=&r" (hi) \
301 : "%r" (x), "r" (y))
303 # define MAD_F_MLA(hi, lo, x, y) \
304 ({ mad_fixed64hi_t __hi; \
305 mad_fixed64lo_t __lo; \
306 MAD_F_MLX(__hi, __lo, (x), (y)); \
307 asm ("addc %0, %2, %3\n\t" \
308 "adde %1, %4, %5" \
309 : "=r" (lo), "=r" (hi) \
310 : "%r" (__lo), "0" (lo), "%r" (__hi), "1" (hi)); \
313 # if defined(OPT_ACCURACY)
315 * This is accurate and ~2 - 2.5 times slower than the unrounded version.
317 * The __volatile__ improves the generated code by another 5% (fewer spills
318 * to memory); eventually they should be removed.
320 # define mad_f_scale64(hi, lo) \
321 ({ mad_fixed_t __result; \
322 mad_fixed64hi_t __hi_; \
323 mad_fixed64lo_t __lo_; \
324 asm __volatile__ ("addc %0, %2, %4\n\t" \
325 "addze %1, %3" \
326 : "=r" (__lo_), "=r" (__hi_) \
327 : "r" (lo), "r" (hi), "r" (1 << (MAD_F_SCALEBITS - 1))); \
328 asm __volatile__ ("rlwinm %0, %2,32-%3,0,%3-1\n\t" \
329 "rlwimi %0, %1,32-%3,%3,31" \
330 : "=&r" (__result) \
331 : "r" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS)); \
332 __result; \
334 # else
335 # define mad_f_scale64(hi, lo) \
336 ({ mad_fixed_t __result; \
337 asm ("rlwinm %0, %2,32-%3,0,%3-1\n\t" \
338 "rlwimi %0, %1,32-%3,%3,31" \
339 : "=r" (__result) \
340 : "r" (lo), "r" (hi), "I" (MAD_F_SCALEBITS)); \
341 __result; \
343 # endif /* OPT_ACCURACY */
345 # define MAD_F_SCALEBITS MAD_F_FRACBITS
347 /* --- Default ------------------------------------------------------------- */
349 # elif defined(FPM_DEFAULT)
352 * This version is the most portable but it loses significant accuracy.
353 * Furthermore, accuracy is biased against the second argument, so care
354 * should be taken when ordering operands.
356 * The scale factors are constant as this is not used with SSO.
358 * Pre-rounding is required to stay within the limits of compliance.
360 # define mad_f_mul(x, y) ((((x) + (1L << 11)) >> 12) * \
361 (((y) + (1L << 15)) >> 16))
363 /* ------------------------------------------------------------------------- */
365 # else
366 # error "no FPM selected"
367 # endif
369 /* default implementations */
371 # if !defined(mad_f_mul)
372 # define mad_f_mul(x, y) \
373 ({ mad_fixed64hi_t __hi; \
374 mad_fixed64lo_t __lo; \
375 MAD_F_MLX(__hi, __lo, (x), (y)); \
376 mad_f_scale64(__hi, __lo); \
378 # endif
380 # if !defined(MAD_F_MLA)
381 # define MAD_F_ML0(hi, lo, x, y) ((lo) = mad_f_mul((x), (y)))
382 # define MAD_F_MLA(hi, lo, x, y) ((lo) += mad_f_mul((x), (y)))
383 # define MAD_F_MLZ(hi, lo) ((void) (hi), (mad_fixed_t) (lo))
384 # endif
386 # if !defined(MAD_F_ML0)
387 # define MAD_F_ML0(hi, lo, x, y) MAD_F_MLX((hi), (lo), (x), (y))
388 # endif
390 # if !defined(MAD_F_MLZ)
391 # define MAD_F_MLZ(hi, lo) mad_f_scale64((hi), (lo))
392 # endif
394 # if !defined(mad_f_scale64)
395 # if defined(OPT_ACCURACY)
396 # define mad_f_scale64(hi, lo) \
397 ((((mad_fixed_t) \
398 (((hi) << (32 - (MAD_F_SCALEBITS - 1))) | \
399 ((lo) >> (MAD_F_SCALEBITS - 1)))) + 1) >> 1)
400 # else
401 # define mad_f_scale64(hi, lo) \
402 ((mad_fixed_t) \
403 (((hi) << (32 - MAD_F_SCALEBITS)) | \
404 ((lo) >> MAD_F_SCALEBITS)))
405 # endif
406 # define MAD_F_SCALEBITS MAD_F_FRACBITS
407 # endif
409 /* miscellaneous C routines */
411 mad_fixed_t mad_f_abs(mad_fixed_t);
413 # endif