Update.
[glibc.git] / sysdeps / powerpc / q_qtos.c
blobcd2715f8e97965505724fff271d66ca360a0dd1d
1 /* 128-bit floating point to 32-bit floating point.
2 Copyright (C) 1997 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 #include <quad_float.h>
22 /* float _q_qtos(const long double *a);
23 Convert 'a' to float. Round as per current rounding flags.
25 Input Rounding Output
26 +/-0 * +/-0
27 +/-Inf * +/-Inf
28 +/-NaN * +/-NaN (with mantissa truncated)
29 +/-SNaN * +/-NaN (with mantissa truncated, MSB of mantissa <- 1)
30 && raise VXSNAN
31 [Note: just truncating the mantissa may not give you
32 a SNaN!]
33 |a|>=2^128 Nearest +/-Inf && raise overflow && raise inexact
34 |a|>=2^128 Truncate +/-(2^128-2^104) && raise overflow && raise inexact
35 a>=2^128 +Inf +Inf && raise overflow && raise inexact
36 a<=-2^128 +Inf -(2^128-2^104) && raise overflow && raise inexact
37 a>=2^128 -Inf +(2^128-2^104) && raise overflow && raise inexact
38 a<=-2^128 -Inf -Inf && raise overflow && raise inexact
40 We also need to raise 'inexact' if the result will be inexact, which
41 depends on the current rounding mode.
43 To avoid having to deal with all that, we convert to a 'double'
44 that will round correctly (but is not itself rounded correctly),
45 and convert that to a float. This makes this procedure much
46 simpler and much faster. */
48 float
49 __q_qtos(const unsigned long long a[2])
51 unsigned long long a0,d;
52 union {
53 double d;
54 unsigned long long ull;
55 } u;
57 a0 = a[0];
59 /* Truncate the mantissa to 48 bits. */
60 d = a0 << 4;
61 /* Set the low bit in the mantissa if any of the bits we are dropping
62 were 1. This ensures correct rounding, and also distinguishes
63 0 and Inf from denormalised numbers and SNaN (respectively). */
64 d |= a[1] != 0;
65 /* Copy the sign bit. */
66 d = d & 0x7fffffffffffffffULL | a0 & 0x8000000000000000ULL;
68 /* Now, we need to fix the exponent. If the exponent of a was in
69 the range +127 to -152, or was +16384 or -16383, it is already
70 correct in 'd'. Otherwise, we need to ensure that the new
71 exponent is in the range +1023 to +128, or -153 to -1022, with
72 the same sign as the exponent of 'a'. We can do this by setting
73 bits 1-3 (the second through fourth-most significant bit) of 'd'
74 to 101 if bit 1 of 'a' is 1, or 010 if bit 1 of 'a' is 0. */
75 if ((a0 >> 56 & 0x7f) - 0x3f > 1)
77 unsigned t = (a0 >> 32+2 & 2 << 31-1-2)*3 + (2 << 31-2);
78 d = (d & 0x8fffffffffffffffULL
79 | (unsigned long long)t<<32 & 0x7000000000000000ULL);
82 u.ull = d;
83 return (float)u.d;