dlls/ntdll/math.c

   1 /*
   2  * Math functions
   3  *
   4  * Copyright 2021 Alexandre Julliard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19  *
  20  *
  21  * For functions copied from musl libc (http://musl.libc.org/):
  22  * ====================================================
  23  * Copyright 2005-2020 Rich Felker, et al.
  24  *
  25  * Permission is hereby granted, free of charge, to any person obtaining
  26  * a copy of this software and associated documentation files (the
  27  * "Software"), to deal in the Software without restriction, including
  28  * without limitation the rights to use, copy, modify, merge, publish,
  29  * distribute, sublicense, and/or sell copies of the Software, and to
  30  * permit persons to whom the Software is furnished to do so, subject to
  31  * the following conditions:
  32  *
  33  * The above copyright notice and this permission notice shall be
  34  * included in all copies or substantial portions of the Software.
  35  * ====================================================
  36  */
  37
  38 #include <math.h>
  39 #include <float.h>
  40
  41 #include "ntstatus.h"
  42 #define WIN32_NO_STATUS
  43 #include "ntdll_misc.h"
  44
  45 double math_error( int type, const char *name, double arg1, double arg2, double retval )
  46 {
  47     return retval;
  48 }
  49
  50 /* Copied from musl: src/internal/libm.h */
  51 static inline double fp_barrier(double x)
  52 {
  53     volatile double y = x;
  54     return y;
  55 }
  56
  57
  58 /* Based on musl implementation: src/math/round.c */
  59 static double __round(double x)
  60 {
  61     ULONGLONG llx = *(ULONGLONG*)&x, tmp;
  62     int e = (llx >> 52 & 0x7ff) - 0x3ff;
  63
  64     if (e >= 52)
  65         return x;
  66     if (e < -1)
  67         return 0 * x;
  68     else if (e == -1)
  69         return signbit(x) ? -1 : 1;
  70
  71     tmp = 0x000fffffffffffffULL >> e;
  72     if (!(llx & tmp))
  73         return x;
  74     llx += 0x0008000000000000ULL >> e;
  75     llx &= ~tmp;
  76     return *(double*)&llx;
  77 }
  78
  79 /* Copied from musl: src/math/exp_data.c */
  80 static const UINT64 exp_T[] = {
  81     0x0ULL, 0x3ff0000000000000ULL,
  82     0x3c9b3b4f1a88bf6eULL, 0x3feff63da9fb3335ULL,
  83     0xbc7160139cd8dc5dULL, 0x3fefec9a3e778061ULL,
  84     0xbc905e7a108766d1ULL, 0x3fefe315e86e7f85ULL,
  85     0x3c8cd2523567f613ULL, 0x3fefd9b0d3158574ULL,
  86     0xbc8bce8023f98efaULL, 0x3fefd06b29ddf6deULL,
  87     0x3c60f74e61e6c861ULL, 0x3fefc74518759bc8ULL,
  88     0x3c90a3e45b33d399ULL, 0x3fefbe3ecac6f383ULL,
  89     0x3c979aa65d837b6dULL, 0x3fefb5586cf9890fULL,
  90     0x3c8eb51a92fdeffcULL, 0x3fefac922b7247f7ULL,
  91     0x3c3ebe3d702f9cd1ULL, 0x3fefa3ec32d3d1a2ULL,
  92     0xbc6a033489906e0bULL, 0x3fef9b66affed31bULL,
  93     0xbc9556522a2fbd0eULL, 0x3fef9301d0125b51ULL,
  94     0xbc5080ef8c4eea55ULL, 0x3fef8abdc06c31ccULL,
  95     0xbc91c923b9d5f416ULL, 0x3fef829aaea92de0ULL,
  96     0x3c80d3e3e95c55afULL, 0x3fef7a98c8a58e51ULL,
  97     0xbc801b15eaa59348ULL, 0x3fef72b83c7d517bULL,
  98     0xbc8f1ff055de323dULL, 0x3fef6af9388c8deaULL,
  99     0x3c8b898c3f1353bfULL, 0x3fef635beb6fcb75ULL,
 100     0xbc96d99c7611eb26ULL, 0x3fef5be084045cd4ULL,
 101     0x3c9aecf73e3a2f60ULL, 0x3fef54873168b9aaULL,
 102     0xbc8fe782cb86389dULL, 0x3fef4d5022fcd91dULL,
 103     0x3c8a6f4144a6c38dULL, 0x3fef463b88628cd6ULL,
 104     0x3c807a05b0e4047dULL, 0x3fef3f49917ddc96ULL,
 105     0x3c968efde3a8a894ULL, 0x3fef387a6e756238ULL,
 106     0x3c875e18f274487dULL, 0x3fef31ce4fb2a63fULL,
 107     0x3c80472b981fe7f2ULL, 0x3fef2b4565e27cddULL,
 108     0xbc96b87b3f71085eULL, 0x3fef24dfe1f56381ULL,
 109     0x3c82f7e16d09ab31ULL, 0x3fef1e9df51fdee1ULL,
 110     0xbc3d219b1a6fbffaULL, 0x3fef187fd0dad990ULL,
 111     0x3c8b3782720c0ab4ULL, 0x3fef1285a6e4030bULL,
 112     0x3c6e149289cecb8fULL, 0x3fef0cafa93e2f56ULL,
 113     0x3c834d754db0abb6ULL, 0x3fef06fe0a31b715ULL,
 114     0x3c864201e2ac744cULL, 0x3fef0170fc4cd831ULL,
 115     0x3c8fdd395dd3f84aULL, 0x3feefc08b26416ffULL,
 116     0xbc86a3803b8e5b04ULL, 0x3feef6c55f929ff1ULL,
 117     0xbc924aedcc4b5068ULL, 0x3feef1a7373aa9cbULL,
 118     0xbc9907f81b512d8eULL, 0x3feeecae6d05d866ULL,
 119     0xbc71d1e83e9436d2ULL, 0x3feee7db34e59ff7ULL,
 120     0xbc991919b3ce1b15ULL, 0x3feee32dc313a8e5ULL,
 121     0x3c859f48a72a4c6dULL, 0x3feedea64c123422ULL,
 122     0xbc9312607a28698aULL, 0x3feeda4504ac801cULL,
 123     0xbc58a78f4817895bULL, 0x3feed60a21f72e2aULL,
 124     0xbc7c2c9b67499a1bULL, 0x3feed1f5d950a897ULL,
 125     0x3c4363ed60c2ac11ULL, 0x3feece086061892dULL,
 126     0x3c9666093b0664efULL, 0x3feeca41ed1d0057ULL,
 127     0x3c6ecce1daa10379ULL, 0x3feec6a2b5c13cd0ULL,
 128     0x3c93ff8e3f0f1230ULL, 0x3feec32af0d7d3deULL,
 129     0x3c7690cebb7aafb0ULL, 0x3feebfdad5362a27ULL,
 130     0x3c931dbdeb54e077ULL, 0x3feebcb299fddd0dULL,
 131     0xbc8f94340071a38eULL, 0x3feeb9b2769d2ca7ULL,
 132     0xbc87deccdc93a349ULL, 0x3feeb6daa2cf6642ULL,
 133     0xbc78dec6bd0f385fULL, 0x3feeb42b569d4f82ULL,
 134     0xbc861246ec7b5cf6ULL, 0x3feeb1a4ca5d920fULL,
 135     0x3c93350518fdd78eULL, 0x3feeaf4736b527daULL,
 136     0x3c7b98b72f8a9b05ULL, 0x3feead12d497c7fdULL,
 137     0x3c9063e1e21c5409ULL, 0x3feeab07dd485429ULL,
 138     0x3c34c7855019c6eaULL, 0x3feea9268a5946b7ULL,
 139     0x3c9432e62b64c035ULL, 0x3feea76f15ad2148ULL,
 140     0xbc8ce44a6199769fULL, 0x3feea5e1b976dc09ULL,
 141     0xbc8c33c53bef4da8ULL, 0x3feea47eb03a5585ULL,
 142     0xbc845378892be9aeULL, 0x3feea34634ccc320ULL,
 143     0xbc93cedd78565858ULL, 0x3feea23882552225ULL,
 144     0x3c5710aa807e1964ULL, 0x3feea155d44ca973ULL,
 145     0xbc93b3efbf5e2228ULL, 0x3feea09e667f3bcdULL,
 146     0xbc6a12ad8734b982ULL, 0x3feea012750bdabfULL,
 147     0xbc6367efb86da9eeULL, 0x3fee9fb23c651a2fULL,
 148     0xbc80dc3d54e08851ULL, 0x3fee9f7df9519484ULL,
 149     0xbc781f647e5a3ecfULL, 0x3fee9f75e8ec5f74ULL,
 150     0xbc86ee4ac08b7db0ULL, 0x3fee9f9a48a58174ULL,
 151     0xbc8619321e55e68aULL, 0x3fee9feb564267c9ULL,
 152     0x3c909ccb5e09d4d3ULL, 0x3feea0694fde5d3fULL,
 153     0xbc7b32dcb94da51dULL, 0x3feea11473eb0187ULL,
 154     0x3c94ecfd5467c06bULL, 0x3feea1ed0130c132ULL,
 155     0x3c65ebe1abd66c55ULL, 0x3feea2f336cf4e62ULL,
 156     0xbc88a1c52fb3cf42ULL, 0x3feea427543e1a12ULL,
 157     0xbc9369b6f13b3734ULL, 0x3feea589994cce13ULL,
 158     0xbc805e843a19ff1eULL, 0x3feea71a4623c7adULL,
 159     0xbc94d450d872576eULL, 0x3feea8d99b4492edULL,
 160     0x3c90ad675b0e8a00ULL, 0x3feeaac7d98a6699ULL,
 161     0x3c8db72fc1f0eab4ULL, 0x3feeace5422aa0dbULL,
 162     0xbc65b6609cc5e7ffULL, 0x3feeaf3216b5448cULL,
 163     0x3c7bf68359f35f44ULL, 0x3feeb1ae99157736ULL,
 164     0xbc93091fa71e3d83ULL, 0x3feeb45b0b91ffc6ULL,
 165     0xbc5da9b88b6c1e29ULL, 0x3feeb737b0cdc5e5ULL,
 166     0xbc6c23f97c90b959ULL, 0x3feeba44cbc8520fULL,
 167     0xbc92434322f4f9aaULL, 0x3feebd829fde4e50ULL,
 168     0xbc85ca6cd7668e4bULL, 0x3feec0f170ca07baULL,
 169     0x3c71affc2b91ce27ULL, 0x3feec49182a3f090ULL,
 170     0x3c6dd235e10a73bbULL, 0x3feec86319e32323ULL,
 171     0xbc87c50422622263ULL, 0x3feecc667b5de565ULL,
 172     0x3c8b1c86e3e231d5ULL, 0x3feed09bec4a2d33ULL,
 173     0xbc91bbd1d3bcbb15ULL, 0x3feed503b23e255dULL,
 174     0x3c90cc319cee31d2ULL, 0x3feed99e1330b358ULL,
 175     0x3c8469846e735ab3ULL, 0x3feede6b5579fdbfULL,
 176     0xbc82dfcd978e9db4ULL, 0x3feee36bbfd3f37aULL,
 177     0x3c8c1a7792cb3387ULL, 0x3feee89f995ad3adULL,
 178     0xbc907b8f4ad1d9faULL, 0x3feeee07298db666ULL,
 179     0xbc55c3d956dcaebaULL, 0x3feef3a2b84f15fbULL,
 180     0xbc90a40e3da6f640ULL, 0x3feef9728de5593aULL,
 181     0xbc68d6f438ad9334ULL, 0x3feeff76f2fb5e47ULL,
 182     0xbc91eee26b588a35ULL, 0x3fef05b030a1064aULL,
 183     0x3c74ffd70a5fddcdULL, 0x3fef0c1e904bc1d2ULL,
 184     0xbc91bdfbfa9298acULL, 0x3fef12c25bd71e09ULL,
 185     0x3c736eae30af0cb3ULL, 0x3fef199bdd85529cULL,
 186     0x3c8ee3325c9ffd94ULL, 0x3fef20ab5fffd07aULL,
 187     0x3c84e08fd10959acULL, 0x3fef27f12e57d14bULL,
 188     0x3c63cdaf384e1a67ULL, 0x3fef2f6d9406e7b5ULL,
 189     0x3c676b2c6c921968ULL, 0x3fef3720dcef9069ULL,
 190     0xbc808a1883ccb5d2ULL, 0x3fef3f0b555dc3faULL,
 191     0xbc8fad5d3ffffa6fULL, 0x3fef472d4a07897cULL,
 192     0xbc900dae3875a949ULL, 0x3fef4f87080d89f2ULL,
 193     0x3c74a385a63d07a7ULL, 0x3fef5818dcfba487ULL,
 194     0xbc82919e2040220fULL, 0x3fef60e316c98398ULL,
 195     0x3c8e5a50d5c192acULL, 0x3fef69e603db3285ULL,
 196     0x3c843a59ac016b4bULL, 0x3fef7321f301b460ULL,
 197     0xbc82d52107b43e1fULL, 0x3fef7c97337b9b5fULL,
 198     0xbc892ab93b470dc9ULL, 0x3fef864614f5a129ULL,
 199     0x3c74b604603a88d3ULL, 0x3fef902ee78b3ff6ULL,
 200     0x3c83c5ec519d7271ULL, 0x3fef9a51fbc74c83ULL,
 201     0xbc8ff7128fd391f0ULL, 0x3fefa4afa2a490daULL,
 202     0xbc8dae98e223747dULL, 0x3fefaf482d8e67f1ULL,
 203     0x3c8ec3bc41aa2008ULL, 0x3fefba1bee615a27ULL,
 204     0x3c842b94c3a9eb32ULL, 0x3fefc52b376bba97ULL,
 205     0x3c8a64a931d185eeULL, 0x3fefd0765b6e4540ULL,
 206     0xbc8e37bae43be3edULL, 0x3fefdbfdad9cbe14ULL,
 207     0x3c77893b4d91cd9dULL, 0x3fefe7c1819e90d8ULL,
 208     0x3c5305c14160cc89ULL, 0x3feff3c22b8f71f1ULL
 209 };
 210
 211 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
 212    additional 15 bits precision. IX is the bit representation of x, but
 213    normalized in the subnormal range using the sign bit for the exponent. */
 214 static double pow_log(UINT64 ix, double *tail)
 215 {
 216     static const struct {
 217         double invc, logc, logctail;
 218     } T[] = {
 219         {0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48},
 220         {0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46},
 221         {0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45},
 222         {0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49},
 223         {0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47},
 224         {0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46},
 225         {0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50},
 226         {0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45},
 227         {0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45},
 228         {0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45},
 229         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
 230         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
 231         {0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46},
 232         {0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46},
 233         {0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46},
 234         {0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45},
 235         {0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47},
 236         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
 237         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
 238         {0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47},
 239         {0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45},
 240         {0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46},
 241         {0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45},
 242         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
 243         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
 244         {0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46},
 245         {0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52},
 246         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
 247         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
 248         {0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45},
 249         {0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45},
 250         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
 251         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
 252         {0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46},
 253         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
 254         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
 255         {0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45},
 256         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
 257         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
 258         {0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48},
 259         {0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45},
 260         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
 261         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
 262         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
 263         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
 264         {0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45},
 265         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
 266         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
 267         {0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46},
 268         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
 269         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
 270         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
 271         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
 272         {0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45},
 273         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
 274         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
 275         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
 276         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
 277         {0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46},
 278         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
 279         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
 280         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
 281         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
 282         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
 283         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
 284         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
 285         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
 286         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
 287         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
 288         {0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45},
 289         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
 290         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
 291         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
 292         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
 293         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
 294         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
 295         {0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46},
 296         {0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45},
 297         {0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45},
 298         {0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47},
 299         {0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45},
 300         {0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46},
 301         {0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46},
 302         {0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47},
 303         {0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45},
 304         {0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45},
 305         {0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45},
 306         {0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49},
 307         {0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45},
 308         {0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46},
 309         {0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45},
 310         {0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45},
 311         {0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45},
 312         {0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45},
 313         {0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45},
 314         {0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47},
 315         {0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51},
 316         {0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45},
 317         {0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45},
 318         {0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46},
 319         {0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45},
 320         {0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46},
 321         {0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47},
 322         {0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47},
 323         {0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45},
 324         {0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47},
 325         {0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45},
 326         {0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48},
 327         {0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45},
 328         {0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51},
 329         {0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51},
 330         {0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46},
 331         {0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48},
 332         {0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45},
 333         {0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45},
 334         {0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45},
 335         {0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45},
 336         {0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47},
 337         {0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45},
 338         {0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45},
 339         {0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46},
 340         {0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46},
 341         {0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47},
 342         {0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45},
 343         {0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45},
 344         {0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45},
 345         {0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46},
 346         {0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47},
 347     };
 348     static const double A[] = {
 349         -0x1p-1,
 350         0x1.555555555556p-2 * -2,
 351         -0x1.0000000000006p-2 * -2,
 352         0x1.999999959554ep-3 * 4,
 353         -0x1.555555529a47ap-3 * 4,
 354         0x1.2495b9b4845e9p-3 * -8,
 355         -0x1.0002b8b263fc3p-3 * -8
 356     };
 357     static const double ln2hi = 0x1.62e42fefa3800p-1,
 358         ln2lo = 0x1.ef35793c76730p-45;
 359
 360     double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
 361     double zhi, zlo, rhi, rlo, ar, ar2, ar3, lo3, lo4, arhi, arhi2;
 362     UINT64 iz, tmp;
 363     int k, i;
 364
 365     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
 366        The range is split into N subintervals.
 367        The ith subinterval contains z and c is near its center. */
 368     tmp = ix - 0x3fe6955500000000ULL;
 369     i = (tmp >> (52 - 7)) % (1 << 7);
 370     k = (INT64)tmp >> 52; /* arithmetic shift */
 371     iz = ix - (tmp & 0xfffULL << 52);
 372     z = *(double*)&iz;
 373     kd = k;
 374
 375     /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
 376     invc = T[i].invc;
 377     logc = T[i].logc;
 378     logctail = T[i].logctail;
 379
 380     /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
 381      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
 382     /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
 383     iz = (iz + (1ULL << 31)) & (-1ULL << 32);
 384     zhi = *(double*)&iz;
 385     zlo = z - zhi;
 386     rhi = zhi * invc - 1.0;
 387     rlo = zlo * invc;
 388     r = rhi + rlo;
 389
 390     /* k*Ln2 + log(c) + r. */
 391     t1 = kd * ln2hi + logc;
 392     t2 = t1 + r;
 393     lo1 = kd * ln2lo + logctail;
 394     lo2 = t1 - t2 + r;
 395
 396     /* Evaluation is optimized assuming superscalar pipelined execution. */
 397     ar = A[0] * r; /* A[0] = -0.5. */
 398     ar2 = r * ar;
 399     ar3 = r * ar2;
 400     /* k*Ln2 + log(c) + r + A[0]*r*r. */
 401     arhi = A[0] * rhi;
 402     arhi2 = rhi * arhi;
 403     hi = t2 + arhi2;
 404     lo3 = rlo * (ar + arhi);
 405     lo4 = t2 - hi + arhi2;
 406     /* p = log1p(r) - r - A[0]*r*r. */
 407     p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
 408     lo = lo1 + lo2 + lo3 + lo4 + p;
 409     y = hi + lo;
 410     *tail = hi - y + lo;
 411     return y;
 412 }
 413
 414 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
 415    The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
 416 static double pow_exp(double argx, double argy, double x, double xtail, UINT32 sign_bias)
 417 {
 418     static const double C[] = {
 419         0x1.ffffffffffdbdp-2,
 420         0x1.555555555543cp-3,
 421         0x1.55555cf172b91p-5,
 422         0x1.1111167a4d017p-7
 423     };
 424     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
 425         negln2hiN = -0x1.62e42fefa0000p-8,
 426         negln2loN = -0x1.cf79abc9e3b3ap-47;
 427
 428     UINT32 abstop;
 429     UINT64 ki, idx, top, sbits;
 430     double kd, z, r, r2, scale, tail, tmp;
 431
 432     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
 433     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
 434         if (abstop - 0x3c9 >= 0x80000000) {
 435             /* Avoid spurious underflow for tiny x. */
 436             /* Note: 0 is common input. */
 437             double one = 1.0 + x;
 438             return sign_bias ? -one : one;
 439         }
 440         if (abstop >= 0x409) {
 441             /* Note: inf and nan are already handled. */
 442             if (*(UINT64*)&x >> 63)
 443                 return (sign_bias ? -DBL_MIN : DBL_MIN) * DBL_MIN;
 444             return (sign_bias ? -DBL_MAX : DBL_MAX) * DBL_MAX;
 445         }
 446         /* Large x is special cased below. */
 447         abstop = 0;
 448     }
 449
 450     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
 451     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
 452     z = invln2N * x;
 453     kd = __round(z);
 454     ki = (INT64)kd;
 455     r = x + kd * negln2hiN + kd * negln2loN;
 456     /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
 457     r += xtail;
 458     /* 2^(k/N) ~= scale * (1 + tail). */
 459     idx = 2 * (ki % (1 << 7));
 460     top = (ki + sign_bias) << (52 - 7);
 461     tail = *(double*)&exp_T[idx];
 462     /* This is only a valid scale when -1023*N < k < 1024*N. */
 463     sbits = exp_T[idx + 1] + top;
 464     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
 465     /* Evaluation is optimized assuming superscalar pipelined execution. */
 466     r2 = r * r;
 467     /* Without fma the worst case error is 0.25/N ulp larger. */
 468     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
 469     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
 470     if (abstop == 0) {
 471         /* Handle cases that may overflow or underflow when computing the result that
 472            is scale*(1+TMP) without intermediate rounding. The bit representation of
 473            scale is in SBITS, however it has a computed exponent that may have
 474            overflown into the sign bit so that needs to be adjusted before using it as
 475            a double. (int32_t)KI is the k used in the argument reduction and exponent
 476            adjustment of scale, positive k here means the result may overflow and
 477            negative k means the result may underflow. */
 478         double scale, y;
 479
 480         if ((ki & 0x80000000) == 0) {
 481             /* k > 0, the exponent of scale might have overflowed by <= 460. */
 482             sbits -= 1009ull << 52;
 483             scale = *(double*)&sbits;
 484             y = 0x1p1009 * (scale + scale * tmp);
 485             return y;
 486         }
 487         /* k < 0, need special care in the subnormal range. */
 488         sbits += 1022ull << 52;
 489         /* Note: sbits is signed scale. */
 490         scale = *(double*)&sbits;
 491         y = scale + scale * tmp;
 492         if (fabs(y) < 1.0) {
 493             /* Round y to the right precision before scaling it into the subnormal
 494                range to avoid double rounding that can cause 0.5+E/2 ulp error where
 495                E is the worst-case ulp error outside the subnormal range. So this
 496                is only useful if the goal is better than 1 ulp worst-case error. */
 497             double hi, lo, one = 1.0;
 498             if (y < 0.0)
 499                 one = -1.0;
 500             lo = scale - y + scale * tmp;
 501             hi = one + y;
 502             lo = one - hi + y + lo;
 503             y = hi + lo - one;
 504             /* Fix the sign of 0. */
 505             if (y == 0.0) {
 506                 sbits &= 0x8000000000000000ULL;
 507                 y = *(double*)&sbits;
 508             }
 509             /* The underflow exception needs to be signaled explicitly. */
 510             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
 511             y = 0x1p-1022 * y;
 512             return y;
 513         }
 514         y = 0x1p-1022 * y;
 515         return y;
 516     }
 517     scale = *(double*)&sbits;
 518     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
 519        is no spurious underflow here even without fma. */
 520     return scale + scale * tmp;
 521 }
 522
 523 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
 524    the bit representation of a non-zero finite floating-point value. */
 525 static inline int pow_checkint(UINT64 iy)
 526 {
 527     int e = iy >> 52 & 0x7ff;
 528     if (e < 0x3ff)
 529         return 0;
 530     if (e > 0x3ff + 52)
 531         return 2;
 532     if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
 533         return 0;
 534     if (iy & (1ULL << (0x3ff + 52 - e)))
 535         return 1;
 536     return 2;
 537 }
 538
 539 /* Copied from musl: src/math/__fpclassify.c */
 540 static short _dclass(double x)
 541 {
 542     union { double f; UINT64 i; } u = { x };
 543     int e = u.i >> 52 & 0x7ff;
 544
 545     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
 546     if (e == 0x7ff) return (u.i << 12) ? FP_NAN : FP_INFINITE;
 547     return FP_NORMAL;
 548 }
 549
 550 static BOOL sqrt_validate( double *x, BOOL update_sw )
 551 {
 552     short c = _dclass(*x);
 553
 554     if (c == FP_ZERO) return FALSE;
 555     if (c == FP_NAN)
 556     {
 557         /* set signaling bit */
 558         *(ULONGLONG*)x |= 0x8000000000000ULL;
 559         return FALSE;
 560     }
 561     if (signbit(*x))
 562     {
 563         *x = -NAN;
 564         return FALSE;
 565     }
 566     if (c == FP_INFINITE) return FALSE;
 567     return TRUE;
 568 }
 569
 570
 571 /*********************************************************************
 572  *                  abs   (NTDLL.@)
 573  */
 574 int CDECL abs( int i )
 575 {
 576     return i >= 0 ? i : -i;
 577 }
 578
 579 /*********************************************************************
 580  *                  ceil   (NTDLL.@)
 581  *
 582  * Based on musl: src/math/ceilf.c
 583  */
 584 double CDECL ceil( double x )
 585 {
 586     union {double f; UINT64 i;} u = {x};
 587     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
 588     UINT64 m;
 589
 590     if (e >= 52)
 591         return x;
 592     if (e >= 0) {
 593         m = 0x000fffffffffffffULL >> e;
 594         if ((u.i & m) == 0)
 595             return x;
 596         if (u.i >> 63 == 0)
 597             u.i += m;
 598         u.i &= ~m;
 599     } else {
 600         if (u.i >> 63)
 601             return -0.0;
 602         else if (u.i << 1)
 603             return 1.0;
 604     }
 605     return u.f;
 606 }
 607
 608 /*********************************************************************
 609  *                  fabs   (NTDLL.@)
 610  *
 611  * Copied from musl: src/math/fabsf.c
 612  */
 613 double CDECL fabs( double x )
 614 {
 615     union { double f; UINT64 i; } u = { x };
 616     u.i &= ~0ull >> 1;
 617     return u.f;
 618 }
 619
 620 /*********************************************************************
 621  *                  floor   (NTDLL.@)
 622  *
 623  * Based on musl: src/math/floorf.c
 624  */
 625 double CDECL floor( double x )
 626 {
 627     union {double f; UINT64 i;} u = {x};
 628     int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff;
 629     UINT64 m;
 630
 631     if (e >= 52)
 632         return x;
 633     if (e >= 0) {
 634         m = 0x000fffffffffffffULL >> e;
 635         if ((u.i & m) == 0)
 636             return x;
 637         if (u.i >> 63)
 638             u.i += m;
 639         u.i &= ~m;
 640     } else {
 641         if (u.i >> 63 == 0)
 642             return 0;
 643         else if (u.i << 1)
 644             return -1;
 645     }
 646     return u.f;
 647 }
 648
 649 /*********************************************************************
 650  *                  pow   (NTDLL.@)
 651  *
 652  * Copied from musl: src/math/pow.c
 653  */
 654 double CDECL pow( double x, double y )
 655 {
 656     UINT32 sign_bias = 0;
 657     UINT64 ix, iy;
 658     UINT32 topx, topy;
 659     double lo, hi, ehi, elo, yhi, ylo, lhi, llo;
 660
 661     ix = *(UINT64*)&x;
 662     iy = *(UINT64*)&y;
 663     topx = ix >> 52;
 664     topy = iy >> 52;
 665     if (topx - 0x001 >= 0x7ff - 0x001 ||
 666             (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
 667         /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
 668            and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
 669         /* Special cases: (x < 0x1p-126 or inf or nan) or
 670            (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
 671         if (2 * iy - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
 672             if (2 * iy == 0)
 673                 return 1.0;
 674             if (ix == 0x3ff0000000000000ULL)
 675                 return 1.0;
 676             if (2 * ix > 2 * 0x7ff0000000000000ULL ||
 677                     2 * iy > 2 * 0x7ff0000000000000ULL)
 678                 return x + y;
 679             if (2 * ix == 2 * 0x3ff0000000000000ULL)
 680                 return 1.0;
 681             if ((2 * ix < 2 * 0x3ff0000000000000ULL) == !(iy >> 63))
 682                 return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
 683             return y * y;
 684         }
 685         if (2 * ix - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
 686             double x2 = x * x;
 687             if (ix >> 63 && pow_checkint(iy) == 1)
 688                 x2 = -x2;
 689             if (iy & 0x8000000000000000ULL && x2 == 0.0)
 690                 return 1 / x2;
 691             /* Without the barrier some versions of clang hoist the 1/x2 and
 692                thus division by zero exception can be signaled spuriously. */
 693             return iy >> 63 ? fp_barrier(1 / x2) : x2;
 694         }
 695         /* Here x and y are non-zero finite. */
 696         if (ix >> 63) {
 697             /* Finite x < 0. */
 698             int yint = pow_checkint(iy);
 699             if (yint == 0)
 700                 return 0 / (x - x);
 701             if (yint == 1)
 702                 sign_bias = 0x800 << 7;
 703             ix &= 0x7fffffffffffffff;
 704             topx &= 0x7ff;
 705         }
 706         if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
 707             /* Note: sign_bias == 0 here because y is not odd. */
 708             if (ix == 0x3ff0000000000000ULL)
 709                 return 1.0;
 710             if ((topy & 0x7ff) < 0x3be) {
 711                 /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
 712                 return ix > 0x3ff0000000000000ULL ? 1.0 + y : 1.0 - y;
 713             }
 714             if ((ix > 0x3ff0000000000000ULL) == (topy < 0x800))
 715                 return fp_barrier(DBL_MAX) * DBL_MAX;
 716             return fp_barrier(DBL_MIN) * DBL_MIN;
 717         }
 718         if (topx == 0) {
 719             /* Normalize subnormal x so exponent becomes negative. */
 720             x *= 0x1p52;
 721             ix = *(UINT64*)&x;
 722             ix &= 0x7fffffffffffffff;
 723             ix -= 52ULL << 52;
 724         }
 725     }
 726
 727     hi = pow_log(ix, &lo);
 728     iy &= -1ULL << 27;
 729     yhi = *(double*)&iy;
 730     ylo = y - yhi;
 731     *(UINT64*)&lhi = *(UINT64*)&hi & -1ULL << 27;
 732     llo = fp_barrier(hi - lhi + lo);
 733     ehi = yhi * lhi;
 734     elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
 735     return pow_exp(x, y, ehi, elo, sign_bias);
 736 }
 737
 738 /*********************************************************************
 739  *                  sqrt   (NTDLL.@)
 740  *
 741  * Copied from musl: src/math/sqrt.c
 742  */
 743 double CDECL sqrt( double x )
 744 {
 745     static const double tiny = 1.0e-300;
 746
 747     double z;
 748     int sign = 0x80000000;
 749     int ix0,s0,q,m,t,i;
 750     unsigned int r,t1,s1,ix1,q1;
 751     ULONGLONG ix;
 752
 753     if (!sqrt_validate(&x, TRUE))
 754         return x;
 755
 756     ix = *(ULONGLONG*)&x;
 757     ix0 = ix >> 32;
 758     ix1 = ix;
 759
 760     /* normalize x */
 761     m = ix0 >> 20;
 762     if (m == 0) {  /* subnormal x */
 763         while (ix0 == 0) {
 764             m -= 21;
 765             ix0 |= (ix1 >> 11);
 766             ix1 <<= 21;
 767         }
 768         for (i=0; (ix0 & 0x00100000) == 0; i++)
 769             ix0 <<= 1;
 770         m -= i - 1;
 771         ix0 |= ix1 >> (32 - i);
 772         ix1 <<= i;
 773     }
 774     m -= 1023;    /* unbias exponent */
 775     ix0 = (ix0 & 0x000fffff) | 0x00100000;
 776     if (m & 1) {  /* odd m, double x to make it even */
 777         ix0 += ix0 + ((ix1 & sign) >> 31);
 778         ix1 += ix1;
 779     }
 780     m >>= 1;      /* m = [m/2] */
 781
 782     /* generate sqrt(x) bit by bit */
 783     ix0 += ix0 + ((ix1 & sign) >> 31);
 784     ix1 += ix1;
 785     q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */
 786     r = 0x00200000;        /* r = moving bit from right to left */
 787
 788     while (r != 0) {
 789         t = s0 + r;
 790         if (t <= ix0) {
 791             s0   = t + r;
 792             ix0 -= t;
 793             q   += r;
 794         }
 795         ix0 += ix0 + ((ix1 & sign) >> 31);
 796         ix1 += ix1;
 797         r >>= 1;
 798     }
 799
 800     r = sign;
 801     while (r != 0) {
 802         t1 = s1 + r;
 803         t  = s0;
 804         if (t < ix0 || (t == ix0 && t1 <= ix1)) {
 805             s1 = t1 + r;
 806             if ((t1&sign) == sign && (s1 & sign) == 0)
 807                 s0++;
 808             ix0 -= t;
 809             if (ix1 < t1)
 810                 ix0--;
 811             ix1 -= t1;
 812             q1 += r;
 813         }
 814         ix0 += ix0 + ((ix1 & sign) >> 31);
 815         ix1 += ix1;
 816         r >>= 1;
 817     }
 818
 819     /* use floating add to find out rounding direction */
 820     if ((ix0 | ix1) != 0) {
 821         z = 1.0 - tiny; /* raise inexact flag */
 822         if (z >= 1.0) {
 823             z = 1.0 + tiny;
 824             if (q1 == (unsigned int)0xffffffff) {
 825                 q1 = 0;
 826                 q++;
 827             } else if (z > 1.0) {
 828                 if (q1 == (unsigned int)0xfffffffe)
 829                     q++;
 830                 q1 += 2;
 831             } else
 832                 q1 += q1 & 1;
 833         }
 834     }
 835     ix0 = (q >> 1) + 0x3fe00000;
 836     ix1 = q1 >> 1;
 837     if (q & 1)
 838         ix1 |= sign;
 839     ix = ix0 + ((unsigned int)m << 20);
 840     ix <<= 32;
 841     ix |= ix1;
 842     return *(double*)&ix;
 843 }
 844
 845 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
 846
 847 #define FPU_DOUBLE(var) double var; \
 848     __asm__ __volatile__( "fstpl %0;fwait" : "=m" (var) : )
 849 #define FPU_DOUBLES(var1,var2) double var1,var2; \
 850     __asm__ __volatile__( "fstpl %0;fwait" : "=m" (var2) : ); \
 851     __asm__ __volatile__( "fstpl %0;fwait" : "=m" (var1) : )
 852
 853 /*********************************************************************
 854  *              _CIcos (NTDLL.@)
 855  */
 856 double CDECL _CIcos(void)
 857 {
 858     FPU_DOUBLE(x);
 859     return cos(x);
 860 }
 861
 862 /*********************************************************************
 863  *              _CIlog (NTDLL.@)
 864  */
 865 double CDECL _CIlog(void)
 866 {
 867     FPU_DOUBLE(x);
 868     return log(x);
 869 }
 870
 871 /*********************************************************************
 872  *              _CIpow (NTDLL.@)
 873  */
 874 double CDECL _CIpow(void)
 875 {
 876     FPU_DOUBLES(x,y);
 877     return pow(x,y);
 878 }
 879
 880 /*********************************************************************
 881  *              _CIsin (NTDLL.@)
 882  */
 883 double CDECL _CIsin(void)
 884 {
 885     FPU_DOUBLE(x);
 886     return sin(x);
 887 }
 888
 889 /*********************************************************************
 890  *              _CIsqrt (NTDLL.@)
 891  */
 892 double CDECL _CIsqrt(void)
 893 {
 894     FPU_DOUBLE(x);
 895     return sqrt(x);
 896 }
 897
 898 /*********************************************************************
 899  *                  _ftol   (NTDLL.@)
 900  */
 901 LONGLONG CDECL _ftol(void)
 902 {
 903     FPU_DOUBLE(x);
 904     return (LONGLONG)x;
 905 }
 906
 907 #endif /* (defined(__GNUC__) || defined(__clang__)) && defined(__i386__) */