mono/metadata/decimal-ms.c

   1 /**
   2  * \file
   3  * Copyright (c) Microsoft. All rights reserved.
   4  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
   5  *
   6  * Copyright 2015 Xamarin Inc
   7  *
   8  * File: decimal.c
   9  *
  10  * Ported from C++ to C and adjusted to Mono runtime
  11  *
  12  * Pending:
  13  *   DoToCurrency (they look like new methods we do not have)
  14  */
  15 #ifndef DISABLE_DECIMAL
  16 #include "config.h"
  17 #include <stdint.h>
  18 #include <glib.h>
  19 #include <mono/utils/mono-compiler.h>
  20 #include <mono/metadata/exception.h>
  21 #include <mono/metadata/object-internals.h>
  22 #include <stdio.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 #include <math.h>
  26 #ifdef HAVE_MEMORY_H
  27 #include <memory.h>
  28 #endif
  29 #ifdef _MSC_VER
  30 #include <intrin.h>
  31 #endif
  32 #include "decimal-ms.h"
  33 #include "number-ms.h"
  34
  35 #define min(a, b) (((a) < (b)) ? (a) : (b))
  36
  37 typedef enum {
  38         MONO_DECIMAL_OK,
  39         MONO_DECIMAL_OVERFLOW,
  40         MONO_DECIMAL_INVALID_ARGUMENT,
  41         MONO_DECIMAL_DIVBYZERO,
  42         MONO_DECIMAL_ARGUMENT_OUT_OF_RANGE
  43 } MonoDecimalStatus;
  44
  45 #ifndef FC_GC_POLL
  46 #   define FC_GC_POLL()
  47 #endif
  48
  49 static const uint32_t ten_to_nine    = 1000000000U;
  50 static const uint32_t ten_to_ten_div_4 = 2500000000U;
  51 #define POWER10_MAX     9
  52 #define DECIMAL_NEG ((uint8_t)0x80)
  53 #define DECMAX 28
  54 #define DECIMAL_SCALE(dec)       ((dec).u.u.scale)
  55 #define DECIMAL_SIGN(dec)        ((dec).u.u.sign)
  56 #define DECIMAL_SIGNSCALE(dec)   ((dec).u.signscale)
  57 #define DECIMAL_LO32(dec)        ((dec).v.v.Lo32)
  58 #define DECIMAL_MID32(dec)       ((dec).v.v.Mid32)
  59 #define DECIMAL_HI32(dec)        ((dec).Hi32)
  60 #if G_BYTE_ORDER != G_LITTLE_ENDIAN
  61 # define DECIMAL_LO64_GET(dec)   (((uint64_t)((dec).v.v.Mid32) << 32) | (dec).v.v.Lo32)
  62 # define DECIMAL_LO64_SET(dec,value)   {(dec).v.v.Lo32 = (value); (dec).v.v.Mid32 = ((value) >> 32); }
  63 #else
  64 # define DECIMAL_LO64_GET(dec)    ((dec).v.Lo64)
  65 # define DECIMAL_LO64_SET(dec,value)   {(dec).v.Lo64 = value; }
  66 #endif
  67
  68 #define DECIMAL_SETZERO(dec) {DECIMAL_LO32(dec) = 0; DECIMAL_MID32(dec) = 0; DECIMAL_HI32(dec) = 0; DECIMAL_SIGNSCALE(dec) = 0;}
  69 #define COPYDEC(dest, src) {DECIMAL_SIGNSCALE(dest) = DECIMAL_SIGNSCALE(src); DECIMAL_HI32(dest) = DECIMAL_HI32(src); \
  70     DECIMAL_MID32(dest) = DECIMAL_MID32(src); DECIMAL_LO32(dest) = DECIMAL_LO32(src); }
  71
  72 #define DEC_SCALE_MAX   28
  73 #define POWER10_MAX     9
  74
  75 #define OVFL_MAX_9_HI   4
  76 #define OVFL_MAX_9_MID  1266874889
  77 #define OVFL_MAX_9_LO   3047500985u
  78
  79 #define OVFL_MAX_5_HI   42949
  80 #define OVFL_MAX_5_MID  2890341191
  81
  82 #define OVFL_MAX_1_HI   429496729
  83
  84 typedef union {
  85         uint64_t int64;
  86         struct {
  87 #if BYTE_ORDER == G_BIG_ENDIAN
  88         uint32_t Hi;
  89         uint32_t Lo;
  90 #else
  91         uint32_t Lo;
  92         uint32_t Hi;
  93 #endif
  94     } u;
  95 } SPLIT64;
  96
  97 static const SPLIT64    ten_to_eighteen = { 1000000000000000000ULL };
  98
  99 static const MonoDouble_double ds2to64 = { .s = { .sign = 0, .exp = MONO_DOUBLE_BIAS + 65, .mantHi = 0, .mantLo = 0 } };
 100
 101 //
 102 // Data tables
 103 //
 104
 105 static const uint32_t power10 [POWER10_MAX+1] = {
 106         1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000
 107 };
 108
 109
 110 static const double double_power10[] = {
 111         1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
 112         1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
 113         1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
 114         1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
 115         1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49,
 116         1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59,
 117         1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69,
 118         1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
 119         1e80 };
 120
 121 static const SPLIT64 sdl_power10[] = { {10000000000ULL},          // 1E10
 122                                 {100000000000ULL},         // 1E11
 123                                 {1000000000000ULL},        // 1E12
 124                                 {10000000000000ULL},       // 1E13
 125                                 {100000000000000ULL} };    // 1E14
 126
 127 static const uint64_t long_power10[] = {
 128         1,
 129         10ULL,
 130         100ULL,
 131         1000ULL,
 132         10000ULL,
 133         100000ULL,
 134         1000000ULL,
 135         10000000ULL,
 136         100000000ULL,
 137         1000000000ULL,
 138         10000000000ULL,
 139         100000000000ULL,
 140         1000000000000ULL,
 141         10000000000000ULL,
 142         100000000000000ULL,
 143         1000000000000000ULL,
 144         10000000000000000ULL,
 145         100000000000000000ULL,
 146         1000000000000000000ULL,
 147         10000000000000000000ULL};
 148
 149 typedef struct  {
 150         uint32_t Hi, Mid, Lo;
 151 } DECOVFL;
 152
 153 static const DECOVFL power_overflow[] = {
 154 // This is a table of the largest values that can be in the upper two
 155 // ULONGs of a 96-bit number that will not overflow when multiplied
 156 // by a given power.  For the upper word, this is a table of
 157 // 2^32 / 10^n for 1 <= n <= 9.  For the lower word, this is the
 158 // remaining fraction part * 2^32.  2^32 = 4294967296.
 159 //
 160     { 429496729u, 2576980377u, 2576980377u }, // 10^1 remainder 0.6
 161     { 42949672u,  4123168604u, 687194767u  }, // 10^2 remainder 0.16
 162     { 4294967u,   1271310319u, 2645699854u }, // 10^3 remainder 0.616
 163     { 429496u,    3133608139u, 694066715u  }, // 10^4 remainder 0.1616
 164     { 42949u,     2890341191u, 2216890319u }, // 10^5 remainder 0.51616
 165     { 4294u,      4154504685u, 2369172679u }, // 10^6 remainder 0.551616
 166     { 429u,       2133437386u, 4102387834u }, // 10^7 remainder 0.9551616
 167     { 42u,        4078814305u, 410238783u  }, // 10^8 remainder 0.09991616
 168     { 4u,         1266874889u, 3047500985u }, // 10^9 remainder 0.709551616
 169 };
 170
 171 #define UInt32x32To64(a, b) ((uint64_t)((uint32_t)(a)) * (uint64_t)((uint32_t)(b)))
 172 #if G_BYTE_ORDER != G_LITTLE_ENDIAN
 173 /* hacky endian swap where losing the other end is OK, since we truncate to 32bit */
 174 #define Div64by32(num, den) ((uint32_t) (((uint64_t)(num) / (uint32_t)(den)) >> 32) )
 175 #define Mod64by32(num, den) ((uint32_t) (((uint64_t)(num) % (uint32_t)(den)) >> 32) )
 176 #else
 177 #define Div64by32(num, den) ((uint32_t)((uint64_t)(num) / (uint32_t)(den)))
 178 #define Mod64by32(num, den) ((uint32_t)((uint64_t)(num) % (uint32_t)(den)))
 179 #endif
 180
 181 static double
 182 fnDblPower10(int ix)
 183 {
 184     const int maxIx = (sizeof(double_power10)/sizeof(double_power10[0]));
 185     g_assert(ix >= 0);
 186     if (ix < maxIx)
 187         return double_power10[ix];
 188     return pow(10.0, ix);
 189 } // double fnDblPower10()
 190
 191
 192 static inline int64_t
 193 DivMod32by32(int32_t num, int32_t den)
 194 {
 195     SPLIT64  sdl;
 196
 197     sdl.u.Lo = num / den;
 198     sdl.u.Hi = num % den;
 199     return sdl.int64;
 200 }
 201
 202 static inline int64_t
 203 DivMod64by32(int64_t num, int32_t den)
 204 {
 205     SPLIT64  sdl;
 206
 207     sdl.u.Lo = Div64by32(num, den);
 208     sdl.u.Hi = Mod64by32(num, den);
 209     return sdl.int64;
 210 }
 211
 212 static uint64_t
 213 UInt64x64To128(SPLIT64 op1, SPLIT64 op2, uint64_t *hi)
 214 {
 215         SPLIT64  tmp1;
 216         SPLIT64  tmp2;
 217         SPLIT64  tmp3;
 218
 219         tmp1.int64 = UInt32x32To64(op1.u.Lo, op2.u.Lo); // lo partial prod
 220         tmp2.int64 = UInt32x32To64(op1.u.Lo, op2.u.Hi); // mid 1 partial prod
 221         tmp1.u.Hi += tmp2.u.Lo;
 222         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 223                 tmp2.u.Hi++;
 224         tmp3.int64 = UInt32x32To64(op1.u.Hi, op2.u.Hi) + (uint64_t)tmp2.u.Hi;
 225         tmp2.int64 = UInt32x32To64(op1.u.Hi, op2.u.Lo);
 226         tmp1.u.Hi += tmp2.u.Lo;
 227         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 228                 tmp2.u.Hi++;
 229         tmp3.int64 += (uint64_t)tmp2.u.Hi;
 230
 231         *hi = tmp3.int64;
 232         return tmp1.int64;
 233 }
 234
 235 /**
 236 * FullDiv64By32:
 237 *
 238 * Entry:
 239 *   pdlNum  - Pointer to 64-bit dividend
 240 *   ulDen   - 32-bit divisor
 241 *
 242 * Purpose:
 243 *   Do full divide, yielding 64-bit result and 32-bit remainder.
 244 *
 245 * Exit:
 246 *   Quotient overwrites dividend.
 247 *   Returns remainder.
 248 *
 249 * Exceptions:
 250 *   None.
 251 */
 252 // Was: FullDiv64By32
 253 static uint32_t
 254 FullDiv64By32 (uint64_t *num, uint32_t den)
 255 {
 256         SPLIT64  tmp;
 257         SPLIT64  res;
 258
 259         tmp.int64 = *num;
 260         res.u.Hi = 0;
 261
 262         if (tmp.u.Hi >= den) {
 263                 // DivMod64by32 returns quotient in Lo, remainder in Hi.
 264                 //
 265                 res.u.Lo = tmp.u.Hi;
 266                 res.int64 = DivMod64by32(res.int64, den);
 267                 tmp.u.Hi = res.u.Hi;
 268                 res.u.Hi = res.u.Lo;
 269         }
 270
 271         tmp.int64 = DivMod64by32(tmp.int64, den);
 272         res.u.Lo = tmp.u.Lo;
 273         *num = res.int64;
 274         return tmp.u.Hi;
 275 }
 276
 277 /***
 278  * SearchScale
 279  *
 280  * Entry:
 281  *   res_hi - Top uint32_t of quotient
 282  *   res_mid - Middle uint32_t of quotient
 283  *   res_lo - Bottom uint32_t of quotient
 284  *   scale  - Scale factor of quotient, range -DEC_SCALE_MAX to DEC_SCALE_MAX
 285  *
 286  * Purpose:
 287  *   Determine the max power of 10, <= 9, that the quotient can be scaled
 288  *   up by and still fit in 96 bits.
 289  *
 290  * Exit:
 291  *   Returns power of 10 to scale by, -1 if overflow error.
 292  *
 293  ***********************************************************************/
 294
 295 static int
 296 SearchScale(uint32_t res_hi, uint32_t res_mid, uint32_t res_lo, int scale)
 297 {
 298         int   cur_scale;
 299
 300         // Quick check to stop us from trying to scale any more.
 301         //
 302         if (res_hi > OVFL_MAX_1_HI || scale >= DEC_SCALE_MAX) {
 303                 cur_scale = 0;
 304                 goto HaveScale;
 305         }
 306
 307         if (scale > DEC_SCALE_MAX - 9) {
 308                 // We can't scale by 10^9 without exceeding the max scale factor.
 309                 // See if we can scale to the max.  If not, we'll fall into
 310                 // standard search for scale factor.
 311                 //
 312                 cur_scale = DEC_SCALE_MAX - scale;
 313                 if (res_hi < power_overflow[cur_scale - 1].Hi)
 314                         goto HaveScale;
 315
 316                 if (res_hi == power_overflow[cur_scale - 1].Hi) {
 317                 UpperEq:
 318                         if (res_mid > power_overflow[cur_scale - 1].Mid ||
 319                             (res_mid == power_overflow[cur_scale - 1].Mid && res_lo > power_overflow[cur_scale - 1].Lo)) {
 320                                 cur_scale--;
 321                         }
 322                         goto HaveScale;
 323                 }
 324         } else if (res_hi < OVFL_MAX_9_HI || (res_hi == OVFL_MAX_9_HI && res_mid < OVFL_MAX_9_MID) || (res_hi == OVFL_MAX_9_HI && res_mid == OVFL_MAX_9_MID && res_lo <= OVFL_MAX_9_LO))
 325                 return 9;
 326
 327         // Search for a power to scale by < 9.  Do a binary search
 328         // on power_overflow[].
 329         //
 330         cur_scale = 5;
 331         if (res_hi < OVFL_MAX_5_HI)
 332                 cur_scale = 7;
 333         else if (res_hi > OVFL_MAX_5_HI)
 334                 cur_scale = 3;
 335         else
 336                 goto UpperEq;
 337
 338         // cur_scale is 3 or 7.
 339         //
 340         if (res_hi < power_overflow[cur_scale - 1].Hi)
 341                 cur_scale++;
 342         else if (res_hi > power_overflow[cur_scale - 1].Hi)
 343                 cur_scale--;
 344         else
 345                 goto UpperEq;
 346
 347         // cur_scale is 2, 4, 6, or 8.
 348         //
 349         // In all cases, we already found we could not use the power one larger.
 350         // So if we can use this power, it is the biggest, and we're done.  If
 351         // we can't use this power, the one below it is correct for all cases
 352         // unless it's 10^1 -- we might have to go to 10^0 (no scaling).
 353         //
 354         if (res_hi > power_overflow[cur_scale - 1].Hi)
 355                 cur_scale--;
 356
 357         if (res_hi == power_overflow[cur_scale - 1].Hi)
 358                 goto UpperEq;
 359
 360 HaveScale:
 361         // cur_scale = largest power of 10 we can scale by without overflow,
 362         // cur_scale < 9.  See if this is enough to make scale factor
 363         // positive if it isn't already.
 364         //
 365         if (cur_scale + scale < 0)
 366                 cur_scale = -1;
 367
 368         return cur_scale;
 369 }
 370
 371
 372 /**
 373 * Div96By32
 374 *
 375 * Entry:
 376 *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
 377 *   ulDen   - 32-bit divisor.
 378 *
 379 * Purpose:
 380 *   Do full divide, yielding 96-bit result and 32-bit remainder.
 381 *
 382 * Exit:
 383 *   Quotient overwrites dividend.
 384 *   Returns remainder.
 385 *
 386 * Exceptions:
 387 *   None.
 388 *
 389 */
 390 static uint32_t
 391 Div96By32(uint32_t *num, uint32_t den)
 392 {
 393         SPLIT64  tmp;
 394
 395         tmp.u.Hi = 0;
 396
 397         if (num[2] != 0)
 398                 goto Div3Word;
 399
 400         if (num[1] >= den)
 401                 goto Div2Word;
 402
 403         tmp.u.Hi = num[1];
 404         num[1] = 0;
 405         goto Div1Word;
 406
 407 Div3Word:
 408         tmp.u.Lo = num[2];
 409         tmp.int64 = DivMod64by32(tmp.int64, den);
 410         num[2] = tmp.u.Lo;
 411 Div2Word:
 412         tmp.u.Lo = num[1];
 413         tmp.int64 = DivMod64by32(tmp.int64, den);
 414         num[1] = tmp.u.Lo;
 415 Div1Word:
 416         tmp.u.Lo = num[0];
 417         tmp.int64 = DivMod64by32(tmp.int64, den);
 418         num[0] = tmp.u.Lo;
 419         return tmp.u.Hi;
 420 }
 421
 422 /***
 423  * DecFixInt
 424  *
 425  * Entry:
 426  *   pdecRes - Pointer to Decimal result location
 427  *   operand  - Pointer to Decimal operand
 428  *
 429  * Purpose:
 430  *   Chop the value to integer.  Return remainder so Int() function
 431  *   can round down if non-zero.
 432  *
 433  * Exit:
 434  *   Returns remainder.
 435  *
 436  * Exceptions:
 437  *   None.
 438  *
 439  ***********************************************************************/
 440
 441 static uint32_t
 442 DecFixInt(MonoDecimal * result, MonoDecimal * operand)
 443 {
 444         uint32_t   num[3];
 445         uint32_t   rem;
 446         uint32_t   pwr;
 447         int     scale;
 448
 449         if (operand->u.u.scale > 0) {
 450                 num[0] = operand->v.v.Lo32;
 451                 num[1] = operand->v.v.Mid32;
 452                 num[2] = operand->Hi32;
 453                 scale = operand->u.u.scale;
 454                 result->u.u.sign = operand->u.u.sign;
 455                 rem = 0;
 456
 457                 do {
 458                         if (scale > POWER10_MAX)
 459                                 pwr = ten_to_nine;
 460                         else
 461                                 pwr = power10[scale];
 462
 463                         rem |= Div96By32(num, pwr);
 464                         scale -= 9;
 465                 }while (scale > 0);
 466
 467                 result->v.v.Lo32 = num[0];
 468                 result->v.v.Mid32 = num[1];
 469                 result->Hi32 = num[2];
 470                 result->u.u.scale = 0;
 471
 472                 return rem;
 473         }
 474
 475         COPYDEC(*result, *operand);
 476         // Odd, the Microsoft code does not set result->reserved to zero on this case
 477         return 0;
 478 }
 479
 480 /**
 481  * ScaleResult:
 482  *
 483  * Entry:
 484  *   res - Array of uint32_ts with value, least-significant first.
 485  *   hi_res  - Index of last non-zero value in res.
 486  *   scale  - Scale factor for this value, range 0 - 2 * DEC_SCALE_MAX
 487  *
 488  * Purpose:
 489  *   See if we need to scale the result to fit it in 96 bits.
 490  *   Perform needed scaling.  Adjust scale factor accordingly.
 491  *
 492  * Exit:
 493  *   res updated in place, always 3 uint32_ts.
 494  *   New scale factor returned, -1 if overflow error.
 495  *
 496  */
 497 static int
 498 ScaleResult(uint32_t *res, int hi_res, int scale)
 499 {
 500         int     new_scale;
 501         int     cur;
 502         uint32_t   pwr;
 503         uint32_t   tmp;
 504         uint32_t   sticky;
 505         SPLIT64 sdlTmp;
 506
 507         // See if we need to scale the result.  The combined scale must
 508         // be <= DEC_SCALE_MAX and the upper 96 bits must be zero.
 509         //
 510         // Start by figuring a lower bound on the scaling needed to make
 511         // the upper 96 bits zero.  hi_res is the index into res[]
 512         // of the highest non-zero uint32_t.
 513         //
 514         new_scale =   hi_res * 32 - 64 - 1;
 515         if (new_scale > 0) {
 516
 517                 // Find the MSB.
 518                 //
 519                 tmp = res[hi_res];
 520                 if (!(tmp & 0xFFFF0000)) {
 521                         new_scale -= 16;
 522                         tmp <<= 16;
 523                 }
 524                 if (!(tmp & 0xFF000000)) {
 525                         new_scale -= 8;
 526                         tmp <<= 8;
 527                 }
 528                 if (!(tmp & 0xF0000000)) {
 529                         new_scale -= 4;
 530                         tmp <<= 4;
 531                 }
 532                 if (!(tmp & 0xC0000000)) {
 533                         new_scale -= 2;
 534                         tmp <<= 2;
 535                 }
 536                 if (!(tmp & 0x80000000)) {
 537                         new_scale--;
 538                         tmp <<= 1;
 539                 }
 540
 541                 // Multiply bit position by log10(2) to figure it's power of 10.
 542                 // We scale the log by 256.  log(2) = .30103, * 256 = 77.  Doing this
 543                 // with a multiply saves a 96-byte lookup table.  The power returned
 544                 // is <= the power of the number, so we must add one power of 10
 545                 // to make it's integer part zero after dividing by 256.
 546                 //
 547                 // Note: the result of this multiplication by an approximation of
 548                 // log10(2) have been exhaustively checked to verify it gives the
 549                 // correct result.  (There were only 95 to check...)
 550                 //
 551                 new_scale = ((new_scale * 77) >> 8) + 1;
 552
 553                 // new_scale = min scale factor to make high 96 bits zero, 0 - 29.
 554                 // This reduces the scale factor of the result.  If it exceeds the
 555                 // current scale of the result, we'll overflow.
 556                 //
 557                 if (new_scale > scale)
 558                         return -1;
 559         }
 560         else
 561                 new_scale = 0;
 562
 563         // Make sure we scale by enough to bring the current scale factor
 564         // into valid range.
 565         //
 566         if (new_scale < scale - DEC_SCALE_MAX)
 567                 new_scale = scale - DEC_SCALE_MAX;
 568
 569         if (new_scale != 0) {
 570                 // Scale by the power of 10 given by new_scale.  Note that this is
 571                 // NOT guaranteed to bring the number within 96 bits -- it could
 572                 // be 1 power of 10 short.
 573                 //
 574                 scale -= new_scale;
 575                 sticky = 0;
 576                 sdlTmp.u.Hi = 0; // initialize remainder
 577
 578                 for (;;) {
 579
 580                         sticky |= sdlTmp.u.Hi; // record remainder as sticky bit
 581
 582                         if (new_scale > POWER10_MAX)
 583                                 pwr = ten_to_nine;
 584                         else
 585                                 pwr = power10[new_scale];
 586
 587                         // Compute first quotient.
 588                         // DivMod64by32 returns quotient in Lo, remainder in Hi.
 589                         //
 590                         sdlTmp.int64 = DivMod64by32(res[hi_res], pwr);
 591                         res[hi_res] = sdlTmp.u.Lo;
 592                         cur = hi_res - 1;
 593
 594                         if (cur >= 0) {
 595                                 // If first quotient was 0, update hi_res.
 596                                 //
 597                                 if (sdlTmp.u.Lo == 0)
 598                                         hi_res--;
 599
 600                                 // Compute subsequent quotients.
 601                                 //
 602                                 do {
 603                                         sdlTmp.u.Lo = res[cur];
 604                                         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, pwr);
 605                                         res[cur] = sdlTmp.u.Lo;
 606                                         cur--;
 607                                 } while (cur >= 0);
 608
 609                         }
 610
 611                         new_scale -= POWER10_MAX;
 612                         if (new_scale > 0)
 613                                 continue; // scale some more
 614
 615                         // If we scaled enough, hi_res would be 2 or less.  If not,
 616                         // divide by 10 more.
 617                         //
 618                         if (hi_res > 2) {
 619                                 new_scale = 1;
 620                                 scale--;
 621                                 continue; // scale by 10
 622                         }
 623
 624                         // Round final result.  See if remainder >= 1/2 of divisor.
 625                         // If remainder == 1/2 divisor, round up if odd or sticky bit set.
 626                         //
 627                         pwr >>= 1;  // power of 10 always even
 628                         if ( pwr <= sdlTmp.u.Hi && (pwr < sdlTmp.u.Hi ||
 629                                                     ((res[0] & 1) | sticky)) ) {
 630                                 cur = -1;
 631                                 while (++res[++cur] == 0);
 632
 633                                 if (cur > 2) {
 634                                         // The rounding caused us to carry beyond 96 bits.
 635                                         // Scale by 10 more.
 636                                         //
 637                                         hi_res = cur;
 638                                         sticky = 0;  // no sticky bit
 639                                         sdlTmp.u.Hi = 0; // or remainder
 640                                         new_scale = 1;
 641                                         scale--;
 642                                         continue; // scale by 10
 643                                 }
 644                         }
 645
 646                         // We may have scaled it more than we planned.  Make sure the scale
 647                         // factor hasn't gone negative, indicating overflow.
 648                         //
 649                         if (scale < 0)
 650                                 return -1;
 651
 652                         return scale;
 653                 } // for(;;)
 654         }
 655         return scale;
 656 }
 657
 658 // Decimal multiply
 659 // Returns: MONO_DECIMAL_OVERFLOW or MONO_DECIMAL_OK
 660 static MonoDecimalStatus
 661 mono_decimal_multiply_result(MonoDecimal * left, MonoDecimal * right, MonoDecimal * result)
 662 {
 663         SPLIT64 tmp;
 664         SPLIT64 tmp2;
 665         SPLIT64 tmp3;
 666         int     scale;
 667         int     hi_prod;
 668         uint32_t   pwr;
 669         uint32_t   rem_lo;
 670         uint32_t   rem_hi;
 671         uint32_t   prod[6];
 672
 673         scale = left->u.u.scale + right->u.u.scale;
 674
 675         if ((left->Hi32 | left->v.v.Mid32 | right->Hi32 | right->v.v.Mid32) == 0) {
 676                 // Upper 64 bits are zero.
 677                 //
 678                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 679                 if (scale > DEC_SCALE_MAX)
 680                 {
 681                         // Result scale is too big.  Divide result by power of 10 to reduce it.
 682                         // If the amount to divide by is > 19 the result is guaranteed
 683                         // less than 1/2.  [max value in 64 bits = 1.84E19]
 684                         //
 685                         scale -= DEC_SCALE_MAX;
 686                         if (scale > 19) {
 687                         ReturnZero:
 688                                 DECIMAL_SETZERO(*result);
 689                                 return MONO_DECIMAL_OK;
 690                         }
 691
 692                         if (scale > POWER10_MAX) {
 693                                 // Divide by 1E10 first, to get the power down to a 32-bit quantity.
 694                                 // 1E10 itself doesn't fit in 32 bits, so we'll divide by 2.5E9 now
 695                                 // then multiply the next divisor by 4 (which will be a max of 4E9).
 696                                 //
 697                                 rem_lo = FullDiv64By32(&tmp.int64, ten_to_ten_div_4);
 698                                 pwr = power10[scale - 10] << 2;
 699                         } else {
 700                                 pwr = power10[scale];
 701                                 rem_lo = 0;
 702                         }
 703
 704                         // Power to divide by fits in 32 bits.
 705                         //
 706                         rem_hi = FullDiv64By32(&tmp.int64, pwr);
 707
 708                         // Round result.  See if remainder >= 1/2 of divisor.
 709                         // Divisor is a power of 10, so it is always even.
 710                         //
 711                         pwr >>= 1;
 712                         if (rem_hi >= pwr && (rem_hi > pwr || (rem_lo | (tmp.u.Lo & 1))))
 713                                 tmp.int64++;
 714
 715                         scale = DEC_SCALE_MAX;
 716                 }
 717                 DECIMAL_LO32(*result) = tmp.u.Lo;
 718                 DECIMAL_MID32(*result) = tmp.u.Hi;
 719                 DECIMAL_HI32(*result) = 0;
 720         } else {
 721                 // At least one operand has bits set in the upper 64 bits.
 722                 //
 723                 // Compute and accumulate the 9 partial products into a
 724                 // 192-bit (24-byte) result.
 725                 //
 726                 //                [l-h][l-m][l-l]   left high, middle, low
 727                 //             x  [r-h][r-m][r-l]   right high, middle, low
 728                 // ------------------------------
 729                 //
 730                 //                     [0-h][0-l]   l-l * r-l
 731                 //                [1ah][1al]        l-l * r-m
 732                 //                [1bh][1bl]        l-m * r-l
 733                 //           [2ah][2al]             l-m * r-m
 734                 //           [2bh][2bl]             l-l * r-h
 735                 //           [2ch][2cl]             l-h * r-l
 736                 //      [3ah][3al]                  l-m * r-h
 737                 //      [3bh][3bl]                  l-h * r-m
 738                 // [4-h][4-l]                       l-h * r-h
 739                 // ------------------------------
 740                 // [p-5][p-4][p-3][p-2][p-1][p-0]   prod[] array
 741                 //
 742                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 743                 prod[0] = tmp.u.Lo;
 744
 745                 tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Mid32) + tmp.u.Hi;
 746
 747                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Lo32);
 748                 tmp.int64 += tmp2.int64; // this could generate carry
 749                 prod[1] = tmp.u.Lo;
 750                 if (tmp.int64 < tmp2.int64) // detect carry
 751                         tmp2.u.Hi = 1;
 752                 else
 753                         tmp2.u.Hi = 0;
 754                 tmp2.u.Lo = tmp.u.Hi;
 755
 756                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Mid32) + tmp2.int64;
 757
 758                 if (left->Hi32 | right->Hi32) {
 759                         // Highest 32 bits is non-zero.  Calculate 5 more partial products.
 760                         //
 761                         tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->Hi32);
 762                         tmp.int64 += tmp2.int64; // this could generate carry
 763                         if (tmp.int64 < tmp2.int64) // detect carry
 764                                 tmp3.u.Hi = 1;
 765                         else
 766                                 tmp3.u.Hi = 0;
 767
 768                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Lo32);
 769                         tmp.int64 += tmp2.int64; // this could generate carry
 770                         prod[2] = tmp.u.Lo;
 771                         if (tmp.int64 < tmp2.int64) // detect carry
 772                                 tmp3.u.Hi++;
 773                         tmp3.u.Lo = tmp.u.Hi;
 774
 775                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->Hi32);
 776                         tmp.int64 += tmp3.int64; // this could generate carry
 777                         if (tmp.int64 < tmp3.int64) // detect carry
 778                                 tmp3.u.Hi = 1;
 779                         else
 780                                 tmp3.u.Hi = 0;
 781
 782                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Mid32);
 783                         tmp.int64 += tmp2.int64; // this could generate carry
 784                         prod[3] = tmp.u.Lo;
 785                         if (tmp.int64 < tmp2.int64) // detect carry
 786                                 tmp3.u.Hi++;
 787                         tmp3.u.Lo = tmp.u.Hi;
 788
 789                         tmp.int64 = UInt32x32To64(left->Hi32, right->Hi32) + tmp3.int64;
 790                         prod[4] = tmp.u.Lo;
 791                         prod[5] = tmp.u.Hi;
 792
 793                         hi_prod = 5;
 794                 }
 795                 else {
 796                         prod[2] = tmp.u.Lo;
 797                         prod[3] = tmp.u.Hi;
 798                         hi_prod = 3;
 799                 }
 800
 801                 // Check for leading zero uint32_ts on the product
 802                 //
 803                 while (prod[hi_prod] == 0) {
 804                         hi_prod--;
 805                         if (hi_prod < 0)
 806                                 goto ReturnZero;
 807                 }
 808
 809                 scale = ScaleResult(prod, hi_prod, scale);
 810                 if (scale == -1)
 811                         return MONO_DECIMAL_OVERFLOW;
 812
 813                 result->v.v.Lo32 = prod[0];
 814                 result->v.v.Mid32 = prod[1];
 815                 result->Hi32 = prod[2];
 816         }
 817
 818         result->u.u.sign = right->u.u.sign ^ left->u.u.sign;
 819         result->u.u.scale = (char)scale;
 820         return MONO_DECIMAL_OK;
 821 }
 822
 823 // Addition and subtraction
 824 static MonoDecimalStatus
 825 DecAddSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result, int8_t sign)
 826 {
 827         uint32_t     num[6];
 828         uint32_t     pwr;
 829         int       scale;
 830         int       hi_prod;
 831         int       cur;
 832         SPLIT64   tmp;
 833         MonoDecimal decRes;
 834         MonoDecimal decTmp;
 835         MonoDecimal *pdecTmp;
 836
 837         sign ^= (right->u.u.sign ^ left->u.u.sign) & DECIMAL_NEG;
 838
 839         if (right->u.u.scale == left->u.u.scale) {
 840                 // Scale factors are equal, no alignment necessary.
 841                 //
 842                 decRes.u.signscale = left->u.signscale;
 843
 844         AlignedAdd:
 845                 if (sign) {
 846                         // Signs differ - subtract
 847                         //
 848                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right));
 849                         DECIMAL_HI32(decRes) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
 850
 851                         // Propagate carry
 852                         //
 853                         if (DECIMAL_LO64_GET(decRes) > DECIMAL_LO64_GET(*left)) {
 854                                 decRes.Hi32--;
 855                                 if (decRes.Hi32 >= left->Hi32)
 856                                         goto SignFlip;
 857                         } else if (decRes.Hi32 > left->Hi32) {
 858                                 // Got negative result.  Flip its sign.
 859                                 //
 860                         SignFlip:
 861                                 DECIMAL_LO64_SET(decRes, -(uint64_t)DECIMAL_LO64_GET(decRes));
 862                                 decRes.Hi32 = ~decRes.Hi32;
 863                                 if (DECIMAL_LO64_GET(decRes) == 0)
 864                                         decRes.Hi32++;
 865                                 decRes.u.u.sign ^= DECIMAL_NEG;
 866                         }
 867
 868                 } else {
 869                         // Signs are the same - add
 870                         //
 871                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right));
 872                         decRes.Hi32 = left->Hi32 + right->Hi32;
 873
 874                         // Propagate carry
 875                         //
 876                         if (DECIMAL_LO64_GET(decRes) < DECIMAL_LO64_GET(*left)) {
 877                                 decRes.Hi32++;
 878                                 if (decRes.Hi32 <= left->Hi32)
 879                                         goto AlignedScale;
 880                         } else if (decRes.Hi32 < left->Hi32) {
 881                         AlignedScale:
 882                                 // The addition carried above 96 bits.  Divide the result by 10,
 883                                 // dropping the scale factor.
 884                                 //
 885                                 if (decRes.u.u.scale == 0)
 886                                         return MONO_DECIMAL_OVERFLOW;
 887                                 decRes.u.u.scale--;
 888
 889                                 tmp.u.Lo = decRes.Hi32;
 890                                 tmp.u.Hi = 1;
 891                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 892                                 decRes.Hi32 = tmp.u.Lo;
 893
 894                                 tmp.u.Lo = decRes.v.v.Mid32;
 895                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 896                                 decRes.v.v.Mid32 = tmp.u.Lo;
 897
 898                                 tmp.u.Lo = decRes.v.v.Lo32;
 899                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 900                                 decRes.v.v.Lo32 = tmp.u.Lo;
 901
 902                                 // See if we need to round up.
 903                                 //
 904                                 if (tmp.u.Hi >= 5 && (tmp.u.Hi > 5 || (decRes.v.v.Lo32 & 1))) {
 905                                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(decRes)+1)
 906                                                 if (DECIMAL_LO64_GET(decRes) == 0)
 907                                                         decRes.Hi32++;
 908                                 }
 909                         }
 910                 }
 911         }
 912         else {
 913                 // Scale factors are not equal.  Assume that a larger scale
 914                 // factor (more decimal places) is likely to mean that number
 915                 // is smaller.  Start by guessing that the right operand has
 916                 // the larger scale factor.  The result will have the larger
 917                 // scale factor.
 918                 //
 919                 decRes.u.u.scale = right->u.u.scale;  // scale factor of "smaller"
 920                 decRes.u.u.sign = left->u.u.sign;    // but sign of "larger"
 921                 scale = decRes.u.u.scale - left->u.u.scale;
 922
 923                 if (scale < 0) {
 924                         // Guessed scale factor wrong. Swap operands.
 925                         //
 926                         scale = -scale;
 927                         decRes.u.u.scale = left->u.u.scale;
 928                         decRes.u.u.sign ^= sign;
 929                         pdecTmp = right;
 930                         right = left;
 931                         left = pdecTmp;
 932                 }
 933
 934                 // *left will need to be multiplied by 10^scale so
 935                 // it will have the same scale as *right.  We could be
 936                 // extending it to up to 192 bits of precision.
 937                 //
 938                 if (scale <= POWER10_MAX) {
 939                         // Scaling won't make it larger than 4 uint32_ts
 940                         //
 941                         pwr = power10[scale];
 942                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(left->v.v.Lo32, pwr));
 943                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, pwr);
 944                         tmp.int64 += decTmp.v.v.Mid32;
 945                         decTmp.v.v.Mid32 = tmp.u.Lo;
 946                         decTmp.Hi32 = tmp.u.Hi;
 947                         tmp.int64 = UInt32x32To64(left->Hi32, pwr);
 948                         tmp.int64 += decTmp.Hi32;
 949                         if (tmp.u.Hi == 0) {
 950                                 // Result fits in 96 bits.  Use standard aligned add.
 951                                 //
 952                                 decTmp.Hi32 = tmp.u.Lo;
 953                                 left = &decTmp;
 954                                 goto AlignedAdd;
 955                         }
 956                         num[0] = decTmp.v.v.Lo32;
 957                         num[1] = decTmp.v.v.Mid32;
 958                         num[2] = tmp.u.Lo;
 959                         num[3] = tmp.u.Hi;
 960                         hi_prod = 3;
 961                 }
 962                 else {
 963                         // Have to scale by a bunch.  Move the number to a buffer
 964                         // where it has room to grow as it's scaled.
 965                         //
 966                         num[0] = left->v.v.Lo32;
 967                         num[1] = left->v.v.Mid32;
 968                         num[2] = left->Hi32;
 969                         hi_prod = 2;
 970
 971                         // Scan for zeros in the upper words.
 972                         //
 973                         if (num[2] == 0) {
 974                                 hi_prod = 1;
 975                                 if (num[1] == 0) {
 976                                         hi_prod = 0;
 977                                         if (num[0] == 0) {
 978                                                 // Left arg is zero, return right.
 979                                                 //
 980                                                 DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*right));
 981                                                 decRes.Hi32 = right->Hi32;
 982                                                 decRes.u.u.sign ^= sign;
 983                                                 goto RetDec;
 984                                         }
 985                                 }
 986                         }
 987
 988                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
 989                         // with index of highest non-zero uint32_t.
 990                         //
 991                         for (; scale > 0; scale -= POWER10_MAX) {
 992                                 if (scale > POWER10_MAX)
 993                                         pwr = ten_to_nine;
 994                                 else
 995                                         pwr = power10[scale];
 996
 997                                 tmp.u.Hi = 0;
 998                                 for (cur = 0; cur <= hi_prod; cur++) {
 999                                         tmp.int64 = UInt32x32To64(num[cur], pwr) + tmp.u.Hi;
1000                                         num[cur] = tmp.u.Lo;
1001                                 }
1002
1003                                 if (tmp.u.Hi != 0)
1004                                         // We're extending the result by another uint32_t.
1005                                         num[++hi_prod] = tmp.u.Hi;
1006                         }
1007                 }
1008
1009                 // Scaling complete, do the add.  Could be subtract if signs differ.
1010                 //
1011                 tmp.u.Lo = num[0];
1012                 tmp.u.Hi = num[1];
1013
1014                 if (sign) {
1015                         // Signs differ, subtract.
1016                         //
1017                         DECIMAL_LO64_SET(decRes, tmp.int64 - DECIMAL_LO64_GET(*right));
1018                         decRes.Hi32 = num[2] - right->Hi32;
1019
1020                         // Propagate carry
1021                         //
1022                         if (DECIMAL_LO64_GET(decRes) > tmp.int64) {
1023                                 decRes.Hi32--;
1024                                 if (decRes.Hi32 >= num[2])
1025                                         goto LongSub;
1026                         }
1027                         else if (decRes.Hi32 > num[2]) {
1028                         LongSub:
1029                                 // If num has more than 96 bits of precision, then we need to
1030                                 // carry the subtraction into the higher bits.  If it doesn't,
1031                                 // then we subtracted in the wrong order and have to flip the
1032                                 // sign of the result.
1033                                 //
1034                                 if (hi_prod <= 2)
1035                                         goto SignFlip;
1036
1037                                 cur = 3;
1038                                 while(num[cur++]-- == 0);
1039                                 if (num[hi_prod] == 0)
1040                                         hi_prod--;
1041                         }
1042                 }
1043                 else {
1044                         // Signs the same, add.
1045                         //
1046                         DECIMAL_LO64_SET(decRes, tmp.int64 + DECIMAL_LO64_GET(*right));
1047                         decRes.Hi32 = num[2] + right->Hi32;
1048
1049                         // Propagate carry
1050                         //
1051                         if (DECIMAL_LO64_GET(decRes) < tmp.int64) {
1052                                 decRes.Hi32++;
1053                                 if (decRes.Hi32 <= num[2])
1054                                         goto LongAdd;
1055                         }
1056                         else if (decRes.Hi32 < num[2]) {
1057                         LongAdd:
1058                                 // Had a carry above 96 bits.
1059                                 //
1060                                 cur = 3;
1061                                 do {
1062                                         if (hi_prod < cur) {
1063                                                 num[cur] = 1;
1064                                                 hi_prod = cur;
1065                                                 break;
1066                                         }
1067                                 }while (++num[cur++] == 0);
1068                         }
1069                 }
1070
1071                 if (hi_prod > 2) {
1072                         num[0] = decRes.v.v.Lo32;
1073                         num[1] = decRes.v.v.Mid32;
1074                         num[2] = decRes.Hi32;
1075                         decRes.u.u.scale = ScaleResult(num, hi_prod, decRes.u.u.scale);
1076                         if (decRes.u.u.scale == (uint8_t) -1)
1077                                 return MONO_DECIMAL_OVERFLOW;
1078
1079                         decRes.v.v.Lo32 = num[0];
1080                         decRes.v.v.Mid32 = num[1];
1081                         decRes.Hi32 = num[2];
1082                 }
1083         }
1084
1085 RetDec:
1086         COPYDEC(*result, decRes);
1087         // Odd, the Microsoft code does not set result->reserved to zero on this case
1088         return MONO_DECIMAL_OK;
1089 }
1090
1091 // Decimal addition
1092 static MonoDecimalStatus G_GNUC_UNUSED
1093 mono_decimal_add(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1094 {
1095     return DecAddSub (left, right, result, 0);
1096 }
1097
1098 // Decimal subtraction
1099 static MonoDecimalStatus G_GNUC_UNUSED
1100 mono_decimal_sub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1101 {
1102     return DecAddSub (left, right, result, DECIMAL_NEG);
1103 }
1104
1105 /**
1106  * IncreaseScale:
1107  *
1108  * Entry:
1109  *   num - Pointer to 96-bit number as array of uint32_ts, least-sig first
1110  *   pwr   - Scale factor to multiply by
1111  *
1112  * Purpose:
1113  *   Multiply the two numbers.  The low 96 bits of the result overwrite
1114  *   the input.  The last 32 bits of the product are the return value.
1115  *
1116  * Exit:
1117  *   Returns highest 32 bits of product.
1118  *
1119  * Exceptions:
1120  *   None.
1121  *
1122  */
1123 static uint32_t
1124 IncreaseScale(uint32_t *num, uint32_t pwr)
1125 {
1126         SPLIT64   sdlTmp;
1127
1128         sdlTmp.int64 = UInt32x32To64(num[0], pwr);
1129         num[0] = sdlTmp.u.Lo;
1130         sdlTmp.int64 = UInt32x32To64(num[1], pwr) + sdlTmp.u.Hi;
1131         num[1] = sdlTmp.u.Lo;
1132         sdlTmp.int64 = UInt32x32To64(num[2], pwr) + sdlTmp.u.Hi;
1133         num[2] = sdlTmp.u.Lo;
1134         return sdlTmp.u.Hi;
1135 }
1136
1137 /**
1138  * Div96By64:
1139  *
1140  * Entry:
1141  *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
1142  *   sdlDen  - 64-bit divisor.
1143  *
1144  * Purpose:
1145  *   Do partial divide, yielding 32-bit result and 64-bit remainder.
1146  *   Divisor must be larger than upper 64 bits of dividend.
1147  *
1148  * Exit:
1149  *   Remainder overwrites lower 64-bits of dividend.
1150  *   Returns quotient.
1151  *
1152  * Exceptions:
1153  *   None.
1154  *
1155  */
1156 static uint32_t
1157 Div96By64(uint32_t *num, SPLIT64 den)
1158 {
1159         SPLIT64 quo;
1160         SPLIT64 sdlNum;
1161         SPLIT64 prod;
1162
1163         sdlNum.u.Lo = num[0];
1164
1165         if (num[2] >= den.u.Hi) {
1166                 // Divide would overflow.  Assume a quotient of 2^32, and set
1167                 // up remainder accordingly.  Then jump to loop which reduces
1168                 // the quotient.
1169                 //
1170                 sdlNum.u.Hi = num[1] - den.u.Lo;
1171                 quo.u.Lo = 0;
1172                 goto NegRem;
1173         }
1174
1175         // Hardware divide won't overflow
1176         //
1177         if (num[2] == 0 && num[1] < den.u.Hi)
1178                 // Result is zero.  Entire dividend is remainder.
1179                 //
1180                 return 0;
1181
1182         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1183         //
1184         quo.u.Lo = num[1];
1185         quo.u.Hi = num[2];
1186         quo.int64 = DivMod64by32(quo.int64, den.u.Hi);
1187         sdlNum.u.Hi = quo.u.Hi; // remainder
1188
1189         // Compute full remainder, rem = dividend - (quo * divisor).
1190         //
1191         prod.int64 = UInt32x32To64(quo.u.Lo, den.u.Lo); // quo * lo divisor
1192         sdlNum.int64 -= prod.int64;
1193
1194         if (sdlNum.int64 > ~prod.int64) {
1195         NegRem:
1196                 // Remainder went negative.  Add divisor back in until it's positive,
1197                 // a max of 2 times.
1198                 //
1199                 do {
1200                         quo.u.Lo--;
1201                         sdlNum.int64 += den.int64;
1202                 }while (sdlNum.int64 >= den.int64);
1203         }
1204
1205         num[0] = sdlNum.u.Lo;
1206         num[1] = sdlNum.u.Hi;
1207         return quo.u.Lo;
1208 }
1209
1210 /***
1211 * Div128By96
1212 *
1213 * Entry:
1214 *   rgulNum - Pointer to 128-bit dividend as array of uint32_ts, least-sig first
1215 *   den - Pointer to 96-bit divisor.
1216 *
1217 * Purpose:
1218 *   Do partial divide, yielding 32-bit result and 96-bit remainder.
1219 *   Top divisor uint32_t must be larger than top dividend uint32_t.  This is
1220 *   assured in the initial call because the divisor is normalized
1221 *   and the dividend can't be.  In subsequent calls, the remainder
1222 *   is multiplied by 10^9 (max), so it can be no more than 1/4 of
1223 *   the divisor which is effectively multiplied by 2^32 (4 * 10^9).
1224 *
1225 * Exit:
1226 *   Remainder overwrites lower 96-bits of dividend.
1227 *   Returns quotient.
1228 *
1229 * Exceptions:
1230 *   None.
1231 *
1232 ***********************************************************************/
1233
1234 static uint32_t
1235 Div128By96(uint32_t *num, uint32_t *den)
1236 {
1237         SPLIT64 sdlQuo;
1238         SPLIT64 sdlNum;
1239         SPLIT64 sdlProd1;
1240         SPLIT64 sdlProd2;
1241
1242         sdlNum.u.Lo = num[0];
1243         sdlNum.u.Hi = num[1];
1244
1245         if (num[3] == 0 && num[2] < den[2]){
1246                 // Result is zero.  Entire dividend is remainder.
1247                 //
1248                 return 0;
1249         }
1250
1251         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1252         //
1253         sdlQuo.u.Lo = num[2];
1254         sdlQuo.u.Hi = num[3];
1255         sdlQuo.int64 = DivMod64by32(sdlQuo.int64, den[2]);
1256
1257         // Compute full remainder, rem = dividend - (quo * divisor).
1258         //
1259         sdlProd1.int64 = UInt32x32To64(sdlQuo.u.Lo, den[0]); // quo * lo divisor
1260         sdlProd2.int64 = UInt32x32To64(sdlQuo.u.Lo, den[1]); // quo * mid divisor
1261         sdlProd2.int64 += sdlProd1.u.Hi;
1262         sdlProd1.u.Hi = sdlProd2.u.Lo;
1263
1264         sdlNum.int64 -= sdlProd1.int64;
1265         num[2] = sdlQuo.u.Hi - sdlProd2.u.Hi; // sdlQuo.Hi is remainder
1266
1267         // Propagate carries
1268         //
1269         if (sdlNum.int64 > ~sdlProd1.int64) {
1270                 num[2]--;
1271                 if (num[2] >= ~sdlProd2.u.Hi)
1272                         goto NegRem;
1273         } else if (num[2] > ~sdlProd2.u.Hi) {
1274         NegRem:
1275                 // Remainder went negative.  Add divisor back in until it's positive,
1276                 // a max of 2 times.
1277                 //
1278                 sdlProd1.u.Lo = den[0];
1279                 sdlProd1.u.Hi = den[1];
1280
1281                 for (;;) {
1282                         sdlQuo.u.Lo--;
1283                         sdlNum.int64 += sdlProd1.int64;
1284                         num[2] += den[2];
1285
1286                         if (sdlNum.int64 < sdlProd1.int64) {
1287                                 // Detected carry. Check for carry out of top
1288                                 // before adding it in.
1289                                 //
1290                                 if (num[2]++ < den[2])
1291                                         break;
1292                         }
1293                         if (num[2] < den[2])
1294                                 break; // detected carry
1295                 }
1296         }
1297
1298         num[0] = sdlNum.u.Lo;
1299         num[1] = sdlNum.u.Hi;
1300         return sdlQuo.u.Lo;
1301 }
1302
1303 // Add a 32 bit unsigned long to an array of 3 unsigned longs representing a 96 integer
1304 // Returns FALSE if there is an overflow
1305 static gboolean
1306 Add32To96(uint32_t *num, uint32_t value)
1307 {
1308         num[0] += value;
1309         if (num[0] < value) {
1310                 if (++num[1] == 0) {
1311                         if (++num[2] == 0) {
1312                                 return FALSE;
1313                         }
1314                 }
1315         }
1316         return TRUE;
1317 }
1318
1319 static void
1320 OverflowUnscale (uint32_t *quo, gboolean remainder)
1321 {
1322         SPLIT64  sdlTmp;
1323
1324         // We have overflown, so load the high bit with a one.
1325         sdlTmp.u.Hi = 1u;
1326         sdlTmp.u.Lo = quo[2];
1327         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1328         quo[2] = sdlTmp.u.Lo;
1329         sdlTmp.u.Lo = quo[1];
1330         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1331         quo[1] = sdlTmp.u.Lo;
1332         sdlTmp.u.Lo = quo[0];
1333         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1334         quo[0] = sdlTmp.u.Lo;
1335         // The remainder is the last digit that does not fit, so we can use it to work out if we need to round up
1336         if ((sdlTmp.u.Hi > 5) || ((sdlTmp.u.Hi == 5) && ( remainder || (quo[0] & 1)))) {
1337                 Add32To96(quo, 1u);
1338         }
1339 }
1340
1341 // mono_decimal_divide - Decimal divide
1342 static MonoDecimalStatus G_GNUC_UNUSED
1343 mono_decimal_divide_result(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1344 {
1345         uint32_t   quo[3];
1346         uint32_t   quoSave[3];
1347         uint32_t   rem[4];
1348         uint32_t   divisor[3];
1349         uint32_t   pwr;
1350         uint32_t   utmp;
1351         uint32_t   utmp1;
1352         SPLIT64 sdlTmp;
1353         SPLIT64 sdlDivisor;
1354         int     scale;
1355         int     cur_scale;
1356
1357         scale = left->u.u.scale - right->u.u.scale;
1358         divisor[0] = right->v.v.Lo32;
1359         divisor[1] = right->v.v.Mid32;
1360         divisor[2] = right->Hi32;
1361
1362         if (divisor[1] == 0 && divisor[2] == 0) {
1363                 // Divisor is only 32 bits.  Easy divide.
1364                 //
1365                 if (divisor[0] == 0)
1366                         return MONO_DECIMAL_DIVBYZERO;
1367
1368                 quo[0] = left->v.v.Lo32;
1369                 quo[1] = left->v.v.Mid32;
1370                 quo[2] = left->Hi32;
1371                 rem[0] = Div96By32(quo, divisor[0]);
1372
1373                 for (;;) {
1374                         if (rem[0] == 0) {
1375                                 if (scale < 0) {
1376                                         cur_scale = min(9, -scale);
1377                                         goto HaveScale;
1378                                 }
1379                                 break;
1380                         }
1381
1382                         // We have computed a quotient based on the natural scale
1383                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
1384                         // remainder, so now we should increase the scale if possible to
1385                         // include more quotient bits.
1386                         //
1387                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
1388                         // computing more quotient bits as long as the remainder stays
1389                         // non-zero.  If scaling by that much would cause overflow, we'll
1390                         // drop out of the loop and scale by as much as we can.
1391                         //
1392                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
1393                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
1394                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
1395                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
1396                         // is the largest value in quo[1] (when quo[2] == 4) that is
1397                         // assured not to overflow.
1398                         //
1399                         cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1400                         if (cur_scale == 0) {
1401                                 // No more scaling to be done, but remainder is non-zero.
1402                                 // Round quotient.
1403                                 //
1404                                 utmp = rem[0] << 1;
1405                                 if (utmp < rem[0] || (utmp >= divisor[0] &&
1406                                                       (utmp > divisor[0] || (quo[0] & 1)))) {
1407                                 RoundUp:
1408                                         if (++quo[0] == 0)
1409                                                 if (++quo[1] == 0)
1410                                                         quo[2]++;
1411                                 }
1412                                 break;
1413                         }
1414
1415                         if (cur_scale == -1)
1416                                 return MONO_DECIMAL_OVERFLOW;
1417
1418                 HaveScale:
1419                         pwr = power10[cur_scale];
1420                         scale += cur_scale;
1421
1422                         if (IncreaseScale(quo, pwr) != 0)
1423                                 return MONO_DECIMAL_OVERFLOW;
1424
1425                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
1426                         rem[0] = sdlTmp.u.Hi;
1427
1428                         quo[0] += sdlTmp.u.Lo;
1429                         if (quo[0] < sdlTmp.u.Lo) {
1430                                 if (++quo[1] == 0)
1431                                         quo[2]++;
1432                         }
1433                 } // for (;;)
1434         }
1435         else {
1436                 // Divisor has bits set in the upper 64 bits.
1437                 //
1438                 // Divisor must be fully normalized (shifted so bit 31 of the most
1439                 // significant uint32_t is 1).  Locate the MSB so we know how much to
1440                 // normalize by.  The dividend will be shifted by the same amount so
1441                 // the quotient is not changed.
1442                 //
1443                 if (divisor[2] == 0)
1444                         utmp = divisor[1];
1445                 else
1446                         utmp = divisor[2];
1447
1448                 cur_scale = 0;
1449                 if (!(utmp & 0xFFFF0000)) {
1450                         cur_scale += 16;
1451                         utmp <<= 16;
1452                 }
1453                 if (!(utmp & 0xFF000000)) {
1454                         cur_scale += 8;
1455                         utmp <<= 8;
1456                 }
1457                 if (!(utmp & 0xF0000000)) {
1458                         cur_scale += 4;
1459                         utmp <<= 4;
1460                 }
1461                 if (!(utmp & 0xC0000000)) {
1462                         cur_scale += 2;
1463                         utmp <<= 2;
1464                 }
1465                 if (!(utmp & 0x80000000)) {
1466                         cur_scale++;
1467                         utmp <<= 1;
1468                 }
1469
1470                 // Shift both dividend and divisor left by cur_scale.
1471                 //
1472                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
1473                 rem[0] = sdlTmp.u.Lo;
1474                 rem[1] = sdlTmp.u.Hi;
1475                 sdlTmp.u.Lo = left->v.v.Mid32;
1476                 sdlTmp.u.Hi = left->Hi32;
1477                 sdlTmp.int64 <<= cur_scale;
1478                 rem[2] = sdlTmp.u.Hi;
1479                 rem[3] = (left->Hi32 >> (31 - cur_scale)) >> 1;
1480
1481                 sdlDivisor.u.Lo = divisor[0];
1482                 sdlDivisor.u.Hi = divisor[1];
1483                 sdlDivisor.int64 <<= cur_scale;
1484
1485                 if (divisor[2] == 0) {
1486                         // Have a 64-bit divisor in sdlDivisor.  The remainder
1487                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
1488                         //
1489                         sdlTmp.u.Lo = rem[2];
1490                         sdlTmp.u.Hi = rem[3];
1491
1492                         quo[2] = 0;
1493                         quo[1] = Div96By64(&rem[1], sdlDivisor);
1494                         quo[0] = Div96By64(rem, sdlDivisor);
1495
1496                         for (;;) {
1497                                 if ((rem[0] | rem[1]) == 0) {
1498                                         if (scale < 0) {
1499                                                 cur_scale = min(9, -scale);
1500                                                 goto HaveScale64;
1501                                         }
1502                                         break;
1503                                 }
1504
1505                                 // Remainder is non-zero.  Scale up quotient and remainder by
1506                                 // powers of 10 so we can compute more significant bits.
1507                                 //
1508                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1509                                 if (cur_scale == 0) {
1510                                         // No more scaling to be done, but remainder is non-zero.
1511                                         // Round quotient.
1512                                         //
1513                                         sdlTmp.u.Lo = rem[0];
1514                                         sdlTmp.u.Hi = rem[1];
1515                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
1516                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
1517                                                 goto RoundUp;
1518                                         break;
1519                                 }
1520
1521                                 if (cur_scale == -1)
1522                                         return MONO_DECIMAL_OVERFLOW;
1523
1524                         HaveScale64:
1525                                 pwr = power10[cur_scale];
1526                                 scale += cur_scale;
1527
1528                                 if (IncreaseScale(quo, pwr) != 0)
1529                                         return MONO_DECIMAL_OVERFLOW;
1530
1531                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
1532                                 IncreaseScale(rem, pwr);
1533                                 utmp = Div96By64(rem, sdlDivisor);
1534                                 quo[0] += utmp;
1535                                 if (quo[0] < utmp)
1536                                         if (++quo[1] == 0)
1537                                                 quo[2]++;
1538
1539                         } // for (;;)
1540                 }
1541                 else {
1542                         // Have a 96-bit divisor in divisor[].
1543                         //
1544                         // Start by finishing the shift left by cur_scale.
1545                         //
1546                         sdlTmp.u.Lo = divisor[1];
1547                         sdlTmp.u.Hi = divisor[2];
1548                         sdlTmp.int64 <<= cur_scale;
1549                         divisor[0] = sdlDivisor.u.Lo;
1550                         divisor[1] = sdlDivisor.u.Hi;
1551                         divisor[2] = sdlTmp.u.Hi;
1552
1553                         // The remainder (currently 96 bits spread over 4 uint32_ts)
1554                         // will be < divisor.
1555                         //
1556                         quo[2] = 0;
1557                         quo[1] = 0;
1558                         quo[0] = Div128By96(rem, divisor);
1559
1560                         for (;;) {
1561                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
1562                                         if (scale < 0) {
1563                                                 cur_scale = min(9, -scale);
1564                                                 goto HaveScale96;
1565                                         }
1566                                         break;
1567                                 }
1568
1569                                 // Remainder is non-zero.  Scale up quotient and remainder by
1570                                 // powers of 10 so we can compute more significant bits.
1571                                 //
1572                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1573                                 if (cur_scale == 0) {
1574                                         // No more scaling to be done, but remainder is non-zero.
1575                                         // Round quotient.
1576                                         //
1577                                         if (rem[2] >= 0x80000000)
1578                                                 goto RoundUp;
1579
1580                                         utmp = rem[0] > 0x80000000;
1581                                         utmp1 = rem[1] > 0x80000000;
1582                                         rem[0] <<= 1;
1583                                         rem[1] = (rem[1] << 1) + utmp;
1584                                         rem[2] = (rem[2] << 1) + utmp1;
1585
1586                                         if ((rem[2] > divisor[2] || rem[2] == divisor[2]) &&
1587                                             ((rem[1] > divisor[1] || rem[1] == divisor[1]) &&
1588                                              ((rem[0] > divisor[0] || rem[0] == divisor[0]) &&
1589                                               (quo[0] & 1))))
1590                                                 goto RoundUp;
1591                                         break;
1592                                 }
1593
1594                                 if (cur_scale == -1)
1595                                         return MONO_DECIMAL_OVERFLOW;
1596
1597                         HaveScale96:
1598                                 pwr = power10[cur_scale];
1599                                 scale += cur_scale;
1600
1601                                 if (IncreaseScale(quo, pwr) != 0)
1602                                         return MONO_DECIMAL_OVERFLOW;
1603
1604                                 rem[3] = IncreaseScale(rem, pwr);
1605                                 utmp = Div128By96(rem, divisor);
1606                                 quo[0] += utmp;
1607                                 if (quo[0] < utmp)
1608                                         if (++quo[1] == 0)
1609                                                 quo[2]++;
1610
1611                         } // for (;;)
1612                 }
1613         }
1614
1615         // No more remainder.  Try extracting any extra powers of 10 we may have
1616         // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
1617         // If a division by one of these powers returns a zero remainder, then
1618         // we keep the quotient.  If the remainder is not zero, then we restore
1619         // the previous value.
1620         //
1621         // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
1622         // we can extract.  We use this as a quick test on whether to try a
1623         // given power.
1624         //
1625         while ((quo[0] & 0xFF) == 0 && scale >= 8) {
1626                 quoSave[0] = quo[0];
1627                 quoSave[1] = quo[1];
1628                 quoSave[2] = quo[2];
1629
1630                 if (Div96By32(quoSave, 100000000) == 0) {
1631                         quo[0] = quoSave[0];
1632                         quo[1] = quoSave[1];
1633                         quo[2] = quoSave[2];
1634                         scale -= 8;
1635                 }
1636                 else
1637                         break;
1638         }
1639
1640         if ((quo[0] & 0xF) == 0 && scale >= 4) {
1641                 quoSave[0] = quo[0];
1642                 quoSave[1] = quo[1];
1643                 quoSave[2] = quo[2];
1644
1645                 if (Div96By32(quoSave, 10000) == 0) {
1646                         quo[0] = quoSave[0];
1647                         quo[1] = quoSave[1];
1648                         quo[2] = quoSave[2];
1649                         scale -= 4;
1650                 }
1651         }
1652
1653         if ((quo[0] & 3) == 0 && scale >= 2) {
1654                 quoSave[0] = quo[0];
1655                 quoSave[1] = quo[1];
1656                 quoSave[2] = quo[2];
1657
1658                 if (Div96By32(quoSave, 100) == 0) {
1659                         quo[0] = quoSave[0];
1660                         quo[1] = quoSave[1];
1661                         quo[2] = quoSave[2];
1662                         scale -= 2;
1663                 }
1664         }
1665
1666         if ((quo[0] & 1) == 0 && scale >= 1) {
1667                 quoSave[0] = quo[0];
1668                 quoSave[1] = quo[1];
1669                 quoSave[2] = quo[2];
1670
1671                 if (Div96By32(quoSave, 10) == 0) {
1672                         quo[0] = quoSave[0];
1673                         quo[1] = quoSave[1];
1674                         quo[2] = quoSave[2];
1675                         scale -= 1;
1676                 }
1677         }
1678
1679         result->Hi32 = quo[2];
1680         result->v.v.Mid32 = quo[1];
1681         result->v.v.Lo32 = quo[0];
1682         result->u.u.scale = scale;
1683         result->u.u.sign = left->u.u.sign ^ right->u.u.sign;
1684         return MONO_DECIMAL_OK;
1685 }
1686
1687 // mono_decimal_absolute - Decimal Absolute Value
1688 static void G_GNUC_UNUSED
1689 mono_decimal_absolute (MonoDecimal *pdecOprd, MonoDecimal *result)
1690 {
1691         COPYDEC(*result, *pdecOprd);
1692         result->u.u.sign &= ~DECIMAL_NEG;
1693         // Microsoft does not set reserved here
1694 }
1695
1696 // mono_decimal_fix - Decimal Fix (chop to integer)
1697 static void
1698 mono_decimal_fix (MonoDecimal *pdecOprd, MonoDecimal *result)
1699 {
1700         DecFixInt(result, pdecOprd);
1701 }
1702
1703 // mono_decimal_round_to_int - Decimal Int (round down to integer)
1704 static void
1705 mono_decimal_round_to_int (MonoDecimal *pdecOprd, MonoDecimal *result)
1706 {
1707         if (DecFixInt(result, pdecOprd) != 0 && (result->u.u.sign & DECIMAL_NEG)) {
1708                 // We have chopped off a non-zero amount from a negative value.  Since
1709                 // we round toward -infinity, we must increase the integer result by
1710                 // 1 to make it more negative.  This will never overflow because
1711                 // in order to have a remainder, we must have had a non-zero scale factor.
1712                 // Our scale factor is back to zero now.
1713                 //
1714                 DECIMAL_LO64_SET(*result, DECIMAL_LO64_GET(*result) + 1);
1715                 if (DECIMAL_LO64_GET(*result) == 0)
1716                         result->Hi32++;
1717         }
1718 }
1719
1720 // mono_decimal_negate - Decimal Negate
1721 static void G_GNUC_UNUSED
1722 mono_decimal_negate (MonoDecimal *pdecOprd, MonoDecimal *result)
1723 {
1724         COPYDEC(*result, *pdecOprd);
1725         // Microsoft does not set result->reserved to zero on this case.
1726         result->u.u.sign ^= DECIMAL_NEG;
1727 }
1728
1729 //
1730 // Returns: MONO_DECIMAL_INVALID_ARGUMENT, MONO_DECIMAL_OK
1731 //
1732 static MonoDecimalStatus
1733 mono_decimal_round_result(MonoDecimal *input, int cDecimals, MonoDecimal *result)
1734 {
1735         uint32_t num[3];
1736         uint32_t rem;
1737         uint32_t sticky;
1738         uint32_t pwr;
1739         int scale;
1740
1741         if (cDecimals < 0)
1742                 return MONO_DECIMAL_INVALID_ARGUMENT;
1743
1744         scale = input->u.u.scale - cDecimals;
1745         if (scale > 0) {
1746                 num[0] = input->v.v.Lo32;
1747                 num[1] = input->v.v.Mid32;
1748                 num[2] = input->Hi32;
1749                 result->u.u.sign = input->u.u.sign;
1750                 rem = sticky = 0;
1751
1752                 do {
1753                         sticky |= rem;
1754                         if (scale > POWER10_MAX)
1755                                 pwr = ten_to_nine;
1756                         else
1757                                 pwr = power10[scale];
1758
1759                         rem = Div96By32(num, pwr);
1760                         scale -= 9;
1761                 }while (scale > 0);
1762
1763                 // Now round.  rem has last remainder, sticky has sticky bits.
1764                 // To do IEEE rounding, we add LSB of result to sticky bits so
1765                 // either causes round up if remainder * 2 == last divisor.
1766                 //
1767                 sticky |= num[0] & 1;
1768                 rem = (rem << 1) + (sticky != 0);
1769                 if (pwr < rem &&
1770                     ++num[0] == 0 &&
1771                     ++num[1] == 0
1772                         )
1773                         ++num[2];
1774
1775                 result->v.v.Lo32 = num[0];
1776                 result->v.v.Mid32 = num[1];
1777                 result->Hi32 = num[2];
1778                 result->u.u.scale = cDecimals;
1779                 return MONO_DECIMAL_OK;
1780         }
1781
1782         COPYDEC(*result, *input);
1783         // Odd, the Microsoft source does not set the result->reserved to zero here.
1784         return MONO_DECIMAL_OK;
1785 }
1786
1787 //
1788 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1789 static MonoDecimalStatus
1790 mono_decimal_from_float (float input_f, MonoDecimal* result)
1791 {
1792         int         exp;    // number of bits to left of binary point
1793         int         power;
1794         uint32_t       mant;
1795         double      dbl;
1796         SPLIT64     sdlLo;
1797         SPLIT64     sdlHi;
1798         int         lmax, cur;  // temps used during scale reduction
1799         MonoSingle_float input = { .f = input_f };
1800
1801         // The most we can scale by is 10^28, which is just slightly more
1802         // than 2^93.  So a float with an exponent of -94 could just
1803         // barely reach 0.5, but smaller exponents will always round to zero.
1804         //
1805         if ((exp = input.s.exp - MONO_SINGLE_BIAS) < -94 ) {
1806                 DECIMAL_SETZERO(*result);
1807                 return MONO_DECIMAL_OK;
1808         }
1809
1810         if (exp > 96)
1811                 return MONO_DECIMAL_OVERFLOW;
1812
1813         // Round the input to a 7-digit integer.  The R4 format has
1814         // only 7 digits of precision, and we want to keep garbage digits
1815         // out of the Decimal were making.
1816         //
1817         // Calculate max power of 10 input value could have by multiplying
1818         // the exponent by log10(2).  Using scaled integer multiplcation,
1819         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1820         //
1821         dbl = fabs(input.f);
1822         power = 6 - ((exp * 19728) >> 16);
1823
1824         if (power >= 0) {
1825                 // We have less than 7 digits, scale input up.
1826                 //
1827                 if (power > DECMAX)
1828                         power = DECMAX;
1829
1830                 dbl = dbl * double_power10[power];
1831         } else {
1832                 if (power != -1 || dbl >= 1E7)
1833                         dbl = dbl / fnDblPower10(-power);
1834                 else
1835                         power = 0; // didn't scale it
1836         }
1837
1838         g_assert (dbl < 1E7);
1839         if (dbl < 1E6 && power < DECMAX) {
1840                 dbl *= 10;
1841                 power++;
1842                 g_assert(dbl >= 1E6);
1843         }
1844
1845         // Round to integer
1846         //
1847         mant = (int32_t)dbl;
1848         dbl -= (double)mant;  // difference between input & integer
1849         if ( dbl > 0.5 || (dbl == 0.5 && (mant & 1)))
1850                 mant++;
1851
1852         if (mant == 0) {
1853                 DECIMAL_SETZERO(*result);
1854                 return MONO_DECIMAL_OK;
1855         }
1856
1857         if (power < 0) {
1858                 // Add -power factors of 10, -power <= (29 - 7) = 22.
1859                 //
1860                 power = -power;
1861                 if (power < 10) {
1862                         sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power]);
1863
1864                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1865                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1866                         DECIMAL_HI32(*result) = 0;
1867                 } else {
1868                         // Have a big power of 10.
1869                         //
1870                         if (power > 18) {
1871                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 18]);
1872                                 sdlLo.int64 = UInt64x64To128(sdlLo, ten_to_eighteen, &sdlHi.int64);
1873
1874                                 if (sdlHi.u.Hi != 0)
1875                                         return MONO_DECIMAL_OVERFLOW;
1876                         }
1877                         else {
1878                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 9]);
1879                                 sdlHi.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Hi);
1880                                 sdlLo.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Lo);
1881                                 sdlHi.int64 += sdlLo.u.Hi;
1882                                 sdlLo.u.Hi = sdlHi.u.Lo;
1883                                 sdlHi.u.Lo = sdlHi.u.Hi;
1884                         }
1885                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1886                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1887                         DECIMAL_HI32(*result) = sdlHi.u.Lo;
1888                 }
1889                 DECIMAL_SCALE(*result) = 0;
1890         } else {
1891                 // Factor out powers of 10 to reduce the scale, if possible.
1892                 // The maximum number we could factor out would be 6.  This
1893                 // comes from the fact we have a 7-digit number, and the
1894                 // MSD must be non-zero -- but the lower 6 digits could be
1895                 // zero.  Note also the scale factor is never negative, so
1896                 // we can't scale by any more than the power we used to
1897                 // get the integer.
1898                 //
1899                 // DivMod32by32 returns the quotient in Lo, the remainder in Hi.
1900                 //
1901                 lmax = min(power, 6);
1902
1903                 // lmax is the largest power of 10 to try, lmax <= 6.
1904                 // We'll try powers 4, 2, and 1 unless they're too big.
1905                 //
1906                 for (cur = 4; cur > 0; cur >>= 1)
1907                 {
1908                         if (cur > lmax)
1909                                 continue;
1910
1911                         sdlLo.int64 = DivMod32by32(mant, (uint32_t)long_power10[cur]);
1912
1913                         if (sdlLo.u.Hi == 0) {
1914                                 mant = sdlLo.u.Lo;
1915                                 power -= cur;
1916                                 lmax -= cur;
1917                         }
1918                 }
1919                 DECIMAL_LO32(*result) = mant;
1920                 DECIMAL_MID32(*result) = 0;
1921                 DECIMAL_HI32(*result) = 0;
1922                 DECIMAL_SCALE(*result) = power;
1923         }
1924
1925         DECIMAL_SIGN(*result) = (char)input.s.sign << 7;
1926         return MONO_DECIMAL_OK;
1927 }
1928
1929 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1930 static MonoDecimalStatus
1931 mono_decimal_from_double (double input_d, MonoDecimal *result)
1932 {
1933         int         exp;    // number of bits to left of binary point
1934         int         power;  // power-of-10 scale factor
1935         SPLIT64     sdlMant;
1936         SPLIT64     sdlLo;
1937         double      dbl;
1938         int         lmax, cur;  // temps used during scale reduction
1939         uint32_t       pwr_cur;
1940         uint32_t       quo;
1941         MonoDouble_double input = { .d = input_d };
1942
1943         // The most we can scale by is 10^28, which is just slightly more
1944         // than 2^93.  So a float with an exponent of -94 could just
1945         // barely reach 0.5, but smaller exponents will always round to zero.
1946         //
1947         if ((exp = input.s.exp - MONO_DOUBLE_BIAS) < -94) {
1948                 DECIMAL_SETZERO(*result);
1949                 return MONO_DECIMAL_OK;
1950         }
1951
1952         if (exp > 96)
1953                 return MONO_DECIMAL_OVERFLOW;
1954
1955         // Round the input to a 15-digit integer.  The R8 format has
1956         // only 15 digits of precision, and we want to keep garbage digits
1957         // out of the Decimal were making.
1958         //
1959         // Calculate max power of 10 input value could have by multiplying
1960         // the exponent by log10(2).  Using scaled integer multiplcation,
1961         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1962         //
1963         dbl = fabs(input.d);
1964         power = 14 - ((exp * 19728) >> 16);
1965
1966         if (power >= 0) {
1967                 // We have less than 15 digits, scale input up.
1968                 //
1969                 if (power > DECMAX)
1970                         power = DECMAX;
1971
1972                 dbl = dbl * double_power10[power];
1973         } else {
1974                 if (power != -1 || dbl >= 1E15)
1975                         dbl = dbl / fnDblPower10(-power);
1976                 else
1977                         power = 0; // didn't scale it
1978         }
1979
1980         g_assert (dbl < 1E15);
1981         if (dbl < 1E14 && power < DECMAX) {
1982                 dbl *= 10;
1983                 power++;
1984                 g_assert(dbl >= 1E14);
1985         }
1986
1987         // Round to int64
1988         //
1989         sdlMant.int64 = (int64_t)dbl;
1990         dbl -= (double)(int64_t)sdlMant.int64;  // dif between input & integer
1991         if ( dbl > 0.5 || (dbl == 0.5 && (sdlMant.u.Lo & 1)))
1992                 sdlMant.int64++;
1993
1994         if (sdlMant.int64 == 0) {
1995                 DECIMAL_SETZERO(*result);
1996                 return MONO_DECIMAL_OK;
1997         }
1998
1999         if (power < 0) {
2000                 // Add -power factors of 10, -power <= (29 - 15) = 14.
2001                 //
2002                 power = -power;
2003                 if (power < 10) {
2004                         sdlLo.int64 = UInt32x32To64(sdlMant.u.Lo, (uint32_t)long_power10[power]);
2005                         sdlMant.int64 = UInt32x32To64(sdlMant.u.Hi, (uint32_t)long_power10[power]);
2006                         sdlMant.int64 += sdlLo.u.Hi;
2007                         sdlLo.u.Hi = sdlMant.u.Lo;
2008                         sdlMant.u.Lo = sdlMant.u.Hi;
2009                 }
2010                 else {
2011                         // Have a big power of 10.
2012                         //
2013                         g_assert(power <= 14);
2014                         sdlLo.int64 = UInt64x64To128(sdlMant, sdl_power10[power-10], &sdlMant.int64);
2015
2016                         if (sdlMant.u.Hi != 0)
2017                                 return MONO_DECIMAL_OVERFLOW;
2018                 }
2019                 DECIMAL_LO32(*result) = sdlLo.u.Lo;
2020                 DECIMAL_MID32(*result) = sdlLo.u.Hi;
2021                 DECIMAL_HI32(*result) = sdlMant.u.Lo;
2022                 DECIMAL_SCALE(*result) = 0;
2023         }
2024         else {
2025                 // Factor out powers of 10 to reduce the scale, if possible.
2026                 // The maximum number we could factor out would be 14.  This
2027                 // comes from the fact we have a 15-digit number, and the
2028                 // MSD must be non-zero -- but the lower 14 digits could be
2029                 // zero.  Note also the scale factor is never negative, so
2030                 // we can't scale by any more than the power we used to
2031                 // get the integer.
2032                 //
2033                 // DivMod64by32 returns the quotient in Lo, the remainder in Hi.
2034                 //
2035                 lmax = min(power, 14);
2036
2037                 // lmax is the largest power of 10 to try, lmax <= 14.
2038                 // We'll try powers 8, 4, 2, and 1 unless they're too big.
2039                 //
2040                 for (cur = 8; cur > 0; cur >>= 1)
2041                 {
2042                         if (cur > lmax)
2043                                 continue;
2044
2045                         pwr_cur = (uint32_t)long_power10[cur];
2046
2047                         if (sdlMant.u.Hi >= pwr_cur) {
2048                                 // Overflow if we try to divide in one step.
2049                                 //
2050                                 sdlLo.int64 = DivMod64by32(sdlMant.u.Hi, pwr_cur);
2051                                 quo = sdlLo.u.Lo;
2052                                 sdlLo.u.Lo = sdlMant.u.Lo;
2053                                 sdlLo.int64 = DivMod64by32(sdlLo.int64, pwr_cur);
2054                         }
2055                         else {
2056                                 quo = 0;
2057                                 sdlLo.int64 = DivMod64by32(sdlMant.int64, pwr_cur);
2058                         }
2059
2060                         if (sdlLo.u.Hi == 0) {
2061                                 sdlMant.u.Hi = quo;
2062                                 sdlMant.u.Lo = sdlLo.u.Lo;
2063                                 power -= cur;
2064                                 lmax -= cur;
2065                         }
2066                 }
2067
2068                 DECIMAL_HI32(*result) = 0;
2069                 DECIMAL_SCALE(*result) = power;
2070                 DECIMAL_LO32(*result) = sdlMant.u.Lo;
2071                 DECIMAL_MID32(*result) = sdlMant.u.Hi;
2072         }
2073
2074         DECIMAL_SIGN(*result) = (char)input.s.sign << 7;
2075         return MONO_DECIMAL_OK;
2076 }
2077
2078 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2079 static MonoDecimalStatus
2080 mono_decimal_to_double_result(MonoDecimal *input, double *result)
2081 {
2082         SPLIT64  tmp;
2083         double   dbl;
2084
2085         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2086                 return MONO_DECIMAL_INVALID_ARGUMENT;
2087
2088         tmp.u.Lo = DECIMAL_LO32(*input);
2089         tmp.u.Hi = DECIMAL_MID32(*input);
2090
2091         if ((int32_t)DECIMAL_MID32(*input) < 0)
2092                 dbl = (ds2to64.d + (double)(int64_t)tmp.int64 +
2093                        (double)DECIMAL_HI32(*input) * ds2to64.d) / fnDblPower10(DECIMAL_SCALE(*input)) ;
2094         else
2095                 dbl = ((double)(int64_t)tmp.int64 +
2096                        (double)DECIMAL_HI32(*input) * ds2to64.d) / fnDblPower10(DECIMAL_SCALE(*input));
2097
2098         if (DECIMAL_SIGN(*input))
2099                 dbl = -dbl;
2100
2101         *result = dbl;
2102         return MONO_DECIMAL_OK;
2103 }
2104
2105 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2106 static MonoDecimalStatus
2107 mono_decimal_to_float_result(MonoDecimal *input, float *result)
2108 {
2109         double   dbl;
2110
2111         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2112                 return MONO_DECIMAL_INVALID_ARGUMENT;
2113
2114         // Can't overflow; no errors possible.
2115         //
2116         mono_decimal_to_double_result(input, &dbl);
2117         *result = (float)dbl;
2118         return MONO_DECIMAL_OK;
2119 }
2120
2121 static void
2122 DecShiftLeft(MonoDecimal* value)
2123 {
2124         unsigned int c0 = DECIMAL_LO32(*value) & 0x80000000? 1: 0;
2125     unsigned int c1 = DECIMAL_MID32(*value) & 0x80000000? 1: 0;
2126     g_assert(value != NULL);
2127
2128     DECIMAL_LO32(*value) <<= 1;
2129     DECIMAL_MID32(*value) = DECIMAL_MID32(*value) << 1 | c0;
2130     DECIMAL_HI32(*value) = DECIMAL_HI32(*value) << 1 | c1;
2131 }
2132
2133 static int
2134 D32AddCarry(uint32_t* value, uint32_t i)
2135 {
2136     uint32_t v = *value;
2137     uint32_t sum = v + i;
2138     *value = sum;
2139     return sum < v || sum < i? 1: 0;
2140 }
2141
2142 static void
2143 DecAdd(MonoDecimal *value, MonoDecimal* d)
2144 {
2145         g_assert(value != NULL && d != NULL);
2146
2147         if (D32AddCarry(&DECIMAL_LO32(*value), DECIMAL_LO32(*d))) {
2148                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2149                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2150                 }
2151         }
2152         if (D32AddCarry(&DECIMAL_MID32(*value), DECIMAL_MID32(*d))) {
2153                 D32AddCarry(&DECIMAL_HI32(*value), 1);
2154         }
2155         D32AddCarry(&DECIMAL_HI32(*value), DECIMAL_HI32(*d));
2156 }
2157
2158 static void
2159 DecMul10(MonoDecimal* value)
2160 {
2161         MonoDecimal d = *value;
2162         g_assert (value != NULL);
2163
2164         DecShiftLeft(value);
2165         DecShiftLeft(value);
2166         DecAdd(value, &d);
2167         DecShiftLeft(value);
2168 }
2169
2170 static void
2171 DecAddInt32(MonoDecimal* value, unsigned int i)
2172 {
2173         g_assert(value != NULL);
2174
2175         if (D32AddCarry(&DECIMAL_LO32(*value), i)) {
2176                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2177                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2178                 }
2179         }
2180 }
2181
2182 MonoDecimalCompareResult
2183 mono_decimal_compare (MonoDecimal *left, MonoDecimal *right)
2184 {
2185         uint32_t   left_sign;
2186         uint32_t   right_sign;
2187         MonoDecimal result;
2188
2189         result.Hi32 = 0;        // Just to shut up the compiler
2190
2191         // First check signs and whether either are zero.  If both are
2192         // non-zero and of the same sign, just use subtraction to compare.
2193         //
2194         left_sign = left->v.v.Lo32 | left->v.v.Mid32 | left->Hi32;
2195         right_sign = right->v.v.Lo32 | right->v.v.Mid32 | right->Hi32;
2196         if (left_sign != 0)
2197                 left_sign = (left->u.u.sign & DECIMAL_NEG) | 1;
2198
2199         if (right_sign != 0)
2200                 right_sign = (right->u.u.sign & DECIMAL_NEG) | 1;
2201
2202         // left_sign & right_sign have values 1, 0, or 0x81 depending on if the left/right
2203         // operand is +, 0, or -.
2204         //
2205         if (left_sign == right_sign) {
2206                 if (left_sign == 0)    // both are zero
2207                         return MONO_DECIMAL_CMP_EQ; // return equal
2208
2209                 DecAddSub(left, right, &result, DECIMAL_NEG);
2210                 if (DECIMAL_LO64_GET(result) == 0 && result.Hi32 == 0)
2211                         return MONO_DECIMAL_CMP_EQ;
2212                 if (result.u.u.sign & DECIMAL_NEG)
2213                         return MONO_DECIMAL_CMP_LT;
2214                 return MONO_DECIMAL_CMP_GT;
2215         }
2216
2217         //
2218         // Signs are different.  Use signed byte comparison
2219         //
2220         if ((signed char)left_sign > (signed char)right_sign)
2221                 return MONO_DECIMAL_CMP_GT;
2222         return MONO_DECIMAL_CMP_LT;
2223 }
2224
2225 void
2226 mono_decimal_init_single (MonoDecimal *_this, float value)
2227 {
2228         if (mono_decimal_from_float (value, _this) == MONO_DECIMAL_OVERFLOW) {
2229                 mono_set_pending_exception (mono_get_exception_overflow ());
2230                 return;
2231         }
2232         _this->reserved = 0;
2233 }
2234
2235 void
2236 mono_decimal_init_double (MonoDecimal *_this, double value)
2237 {
2238         if (mono_decimal_from_double (value, _this) == MONO_DECIMAL_OVERFLOW) {
2239                 mono_set_pending_exception (mono_get_exception_overflow ());
2240                 return;
2241         }
2242         _this->reserved = 0;
2243 }
2244
2245 void
2246 mono_decimal_floor (MonoDecimal *d)
2247 {
2248         MonoDecimal decRes;
2249
2250         mono_decimal_round_to_int(d, &decRes);
2251
2252         // copy decRes into d
2253         COPYDEC(*d, decRes);
2254         d->reserved = 0;
2255         FC_GC_POLL ();
2256 }
2257
2258 int32_t
2259 mono_decimal_get_hash_code (MonoDecimal *d)
2260 {
2261         double dbl;
2262
2263         if (mono_decimal_to_double_result(d, &dbl) != MONO_DECIMAL_OK)
2264                 return 0;
2265
2266         if (dbl == 0.0) {
2267                 // Ensure 0 and -0 have the same hash code
2268                 return 0;
2269         }
2270         // conversion to double is lossy and produces rounding errors so we mask off the lowest 4 bits
2271         //
2272         // For example these two numerically equal decimals with different internal representations produce
2273         // slightly different results when converted to double:
2274         //
2275         // decimal a = new decimal(new int[] { 0x76969696, 0x2fdd49fa, 0x409783ff, 0x00160000 });
2276         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.176470588
2277         // decimal b = new decimal(new int[] { 0x3f0f0f0f, 0x1e62edcc, 0x06758d33, 0x00150000 });
2278         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.1764705882
2279         //
2280         return ((((int *)&dbl)[0]) & 0xFFFFFFF0) ^ ((int *)&dbl)[1];
2281
2282 }
2283
2284 void
2285 mono_decimal_multiply (MonoDecimal *d1, MonoDecimal *d2)
2286 {
2287         MonoDecimal decRes;
2288
2289         MonoDecimalStatus status = mono_decimal_multiply_result(d1, d2, &decRes);
2290         if (status != MONO_DECIMAL_OK) {
2291                 mono_set_pending_exception (mono_get_exception_overflow ());
2292                 return;
2293         }
2294
2295         COPYDEC(*d1, decRes);
2296         d1->reserved = 0;
2297
2298         FC_GC_POLL ();
2299 }
2300
2301 void
2302 mono_decimal_round (MonoDecimal *d, int32_t decimals)
2303 {
2304         MonoDecimal decRes;
2305
2306         // GC is only triggered for throwing, no need to protect result
2307         if (decimals < 0 || decimals > 28) {
2308                 mono_set_pending_exception (mono_get_exception_argument_out_of_range ("d"));
2309                 return;
2310         }
2311
2312         mono_decimal_round_result(d, decimals, &decRes);
2313
2314         // copy decRes into d
2315         COPYDEC(*d, decRes);
2316         d->reserved = 0;
2317
2318         FC_GC_POLL();
2319 }
2320
2321 void
2322 mono_decimal_tocurrency (MonoDecimal *decimal)
2323 {
2324         // TODO
2325 }
2326
2327 double
2328 mono_decimal_to_double (MonoDecimal d)
2329 {
2330         double result = 0.0;
2331         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2332         mono_decimal_to_double_result(&d, &result);
2333         return result;
2334 }
2335
2336 int32_t
2337 mono_decimal_to_int32 (MonoDecimal d)
2338 {
2339         MonoDecimal result;
2340
2341         // The following can not return an error, it only returns INVALID_ARG if the decimals is < 0
2342         mono_decimal_round_result(&d, 0, &result);
2343
2344         if (DECIMAL_SCALE(result) != 0) {
2345                 d = result;
2346                 mono_decimal_fix (&d, &result);
2347         }
2348
2349         if (DECIMAL_HI32(result) == 0 && DECIMAL_MID32(result) == 0) {
2350                 int32_t i = DECIMAL_LO32(result);
2351                 if ((int16_t)DECIMAL_SIGNSCALE(result) >= 0) {
2352                         if (i >= 0)
2353                                 return i;
2354                 } else {
2355                         i = -i;
2356                         if (i <= 0)
2357                                 return i;
2358                 }
2359         }
2360
2361         mono_set_pending_exception (mono_get_exception_overflow ());
2362         return 0;
2363 }
2364
2365 float
2366 mono_decimal_to_float (MonoDecimal d)
2367 {
2368         float result = 0.0f;
2369         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2370         mono_decimal_to_float_result(&d, &result);
2371         return result;
2372 }
2373
2374 void
2375 mono_decimal_truncate (MonoDecimal *d)
2376 {
2377         MonoDecimal decRes;
2378
2379         mono_decimal_fix(d, &decRes);
2380
2381         // copy decRes into d
2382         COPYDEC(*d, decRes);
2383         d->reserved = 0;
2384         FC_GC_POLL();
2385 }
2386
2387 void
2388 mono_decimal_addsub (MonoDecimal *left, MonoDecimal *right, uint8_t sign)
2389 {
2390         MonoDecimal result, decTmp;
2391         MonoDecimal *pdecTmp, *leftOriginal;
2392         uint32_t    num[6], pwr;
2393         int         scale, hi_prod, cur;
2394         SPLIT64     sdlTmp;
2395
2396         g_assert(sign == 0 || sign == DECIMAL_NEG);
2397
2398         leftOriginal = left;
2399
2400         sign ^= (DECIMAL_SIGN(*right) ^ DECIMAL_SIGN(*left)) & DECIMAL_NEG;
2401
2402         if (DECIMAL_SCALE(*right) == DECIMAL_SCALE(*left)) {
2403                 // Scale factors are equal, no alignment necessary.
2404                 //
2405                 DECIMAL_SIGNSCALE(result) = DECIMAL_SIGNSCALE(*left);
2406
2407         AlignedAdd:
2408                 if (sign) {
2409                         // Signs differ - subtract
2410                         //
2411                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right)));
2412                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
2413
2414                         // Propagate carry
2415                         //
2416                         if (DECIMAL_LO64_GET(result) > DECIMAL_LO64_GET(*left)) {
2417                                 DECIMAL_HI32(result)--;
2418                                 if (DECIMAL_HI32(result) >= DECIMAL_HI32(*left))
2419                                         goto SignFlip;
2420                         } else if (DECIMAL_HI32(result) > DECIMAL_HI32(*left)) {
2421                                 // Got negative result.  Flip its sign.
2422                                 //
2423                         SignFlip:
2424                                 DECIMAL_LO64_SET(result, -(int64_t)DECIMAL_LO64_GET(result));
2425                                 DECIMAL_HI32(result) = ~DECIMAL_HI32(result);
2426                                 if (DECIMAL_LO64_GET(result) == 0)
2427                                         DECIMAL_HI32(result)++;
2428                                 DECIMAL_SIGN(result) ^= DECIMAL_NEG;
2429                         }
2430
2431                 } else {
2432                         // Signs are the same - add
2433                         //
2434                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right)));
2435                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) + DECIMAL_HI32(*right);
2436
2437                         // Propagate carry
2438                         //
2439                         if (DECIMAL_LO64_GET(result) < DECIMAL_LO64_GET(*left)) {
2440                                 DECIMAL_HI32(result)++;
2441                                 if (DECIMAL_HI32(result) <= DECIMAL_HI32(*left))
2442                                         goto AlignedScale;
2443                         } else if (DECIMAL_HI32(result) < DECIMAL_HI32(*left)) {
2444                         AlignedScale:
2445                                 // The addition carried above 96 bits.  Divide the result by 10,
2446                                 // dropping the scale factor.
2447                                 //
2448                                 if (DECIMAL_SCALE(result) == 0) {
2449                                         mono_set_pending_exception (mono_get_exception_overflow ());
2450                                         return;
2451                                 }
2452                                 DECIMAL_SCALE(result)--;
2453
2454                                 sdlTmp.u.Lo = DECIMAL_HI32(result);
2455                                 sdlTmp.u.Hi = 1;
2456                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2457                                 DECIMAL_HI32(result) = sdlTmp.u.Lo;
2458
2459                                 sdlTmp.u.Lo = DECIMAL_MID32(result);
2460                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2461                                 DECIMAL_MID32(result) = sdlTmp.u.Lo;
2462
2463                                 sdlTmp.u.Lo = DECIMAL_LO32(result);
2464                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2465                                 DECIMAL_LO32(result) = sdlTmp.u.Lo;
2466
2467                                 // See if we need to round up.
2468                                 //
2469                                 if (sdlTmp.u.Hi >= 5 && (sdlTmp.u.Hi > 5 || (DECIMAL_LO32(result) & 1))) {
2470                                         DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(result)+1);
2471                                         if (DECIMAL_LO64_GET(result) == 0)
2472                                                 DECIMAL_HI32(result)++;
2473                                 }
2474                         }
2475                 }
2476         } else {
2477                 // Scale factors are not equal.  Assume that a larger scale
2478                 // factor (more decimal places) is likely to mean that number
2479                 // is smaller.  Start by guessing that the right operand has
2480                 // the larger scale factor.  The result will have the larger
2481                 // scale factor.
2482                 //
2483                 DECIMAL_SCALE(result) = DECIMAL_SCALE(*right);  // scale factor of "smaller"
2484                 DECIMAL_SIGN(result) = DECIMAL_SIGN(*left);    // but sign of "larger"
2485                 scale = DECIMAL_SCALE(result)- DECIMAL_SCALE(*left);
2486
2487                 if (scale < 0) {
2488                         // Guessed scale factor wrong. Swap operands.
2489                         //
2490                         scale = -scale;
2491                         DECIMAL_SCALE(result) = DECIMAL_SCALE(*left);
2492                         DECIMAL_SIGN(result) ^= sign;
2493                         pdecTmp = right;
2494                         right = left;
2495                         left = pdecTmp;
2496                 }
2497
2498                 // *left will need to be multiplied by 10^scale so
2499                 // it will have the same scale as *right.  We could be
2500                 // extending it to up to 192 bits of precision.
2501                 //
2502                 if (scale <= POWER10_MAX) {
2503                         // Scaling won't make it larger than 4 uint32_ts
2504                         //
2505                         pwr = power10[scale];
2506                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(DECIMAL_LO32(*left), pwr));
2507                         sdlTmp.int64 = UInt32x32To64(DECIMAL_MID32(*left), pwr);
2508                         sdlTmp.int64 += DECIMAL_MID32(decTmp);
2509                         DECIMAL_MID32(decTmp) = sdlTmp.u.Lo;
2510                         DECIMAL_HI32(decTmp) = sdlTmp.u.Hi;
2511                         sdlTmp.int64 = UInt32x32To64(DECIMAL_HI32(*left), pwr);
2512                         sdlTmp.int64 += DECIMAL_HI32(decTmp);
2513                         if (sdlTmp.u.Hi == 0) {
2514                                 // Result fits in 96 bits.  Use standard aligned add.
2515                                 //
2516                                 DECIMAL_HI32(decTmp) = sdlTmp.u.Lo;
2517                                 left = &decTmp;
2518                                 goto AlignedAdd;
2519                         }
2520                         num[0] = DECIMAL_LO32(decTmp);
2521                         num[1] = DECIMAL_MID32(decTmp);
2522                         num[2] = sdlTmp.u.Lo;
2523                         num[3] = sdlTmp.u.Hi;
2524                         hi_prod = 3;
2525                 } else {
2526                         // Have to scale by a bunch.  Move the number to a buffer
2527                         // where it has room to grow as it's scaled.
2528                         //
2529                         num[0] = DECIMAL_LO32(*left);
2530                         num[1] = DECIMAL_MID32(*left);
2531                         num[2] = DECIMAL_HI32(*left);
2532                         hi_prod = 2;
2533
2534                         // Scan for zeros in the upper words.
2535                         //
2536                         if (num[2] == 0) {
2537                                 hi_prod = 1;
2538                                 if (num[1] == 0) {
2539                                         hi_prod = 0;
2540                                         if (num[0] == 0) {
2541                                                 // Left arg is zero, return right.
2542                                                 //
2543                                                 DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(*right));
2544                                                 DECIMAL_HI32(result) = DECIMAL_HI32(*right);
2545                                                 DECIMAL_SIGN(result) ^= sign;
2546                                                 goto RetDec;
2547                                         }
2548                                 }
2549                         }
2550
2551                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
2552                         // with index of highest non-zero uint32_t.
2553                         //
2554                         for (; scale > 0; scale -= POWER10_MAX) {
2555                                 if (scale > POWER10_MAX)
2556                                         pwr = ten_to_nine;
2557                                 else
2558                                         pwr = power10[scale];
2559
2560                                 sdlTmp.u.Hi = 0;
2561                                 for (cur = 0; cur <= hi_prod; cur++) {
2562                                         sdlTmp.int64 = UInt32x32To64(num[cur], pwr) + sdlTmp.u.Hi;
2563                                         num[cur] = sdlTmp.u.Lo;
2564                                 }
2565
2566                                 if (sdlTmp.u.Hi != 0)
2567                                         // We're extending the result by another uint32_t.
2568                                         num[++hi_prod] = sdlTmp.u.Hi;
2569                         }
2570                 }
2571
2572                 // Scaling complete, do the add.  Could be subtract if signs differ.
2573                 //
2574                 sdlTmp.u.Lo = num[0];
2575                 sdlTmp.u.Hi = num[1];
2576
2577                 if (sign) {
2578                         // Signs differ, subtract.
2579                         //
2580                         DECIMAL_LO64_SET(result, (sdlTmp.int64 - DECIMAL_LO64_GET(*right)));
2581                         DECIMAL_HI32(result) = num[2] - DECIMAL_HI32(*right);
2582
2583                         // Propagate carry
2584                         //
2585                         if (DECIMAL_LO64_GET(result) > sdlTmp.int64) {
2586                                 DECIMAL_HI32(result)--;
2587                                 if (DECIMAL_HI32(result) >= num[2])
2588                                         goto LongSub;
2589                         } else if (DECIMAL_HI32(result) > num[2]) {
2590                         LongSub:
2591                                 // If num has more than 96 bits of precision, then we need to
2592                                 // carry the subtraction into the higher bits.  If it doesn't,
2593                                 // then we subtracted in the wrong order and have to flip the
2594                                 // sign of the result.
2595                                 //
2596                                 if (hi_prod <= 2)
2597                                         goto SignFlip;
2598
2599                                 cur = 3;
2600                                 while(num[cur++]-- == 0);
2601                                 if (num[hi_prod] == 0)
2602                                         hi_prod--;
2603                         }
2604                 } else {
2605                         // Signs the same, add.
2606                         //
2607                         DECIMAL_LO64_SET(result, (sdlTmp.int64 + DECIMAL_LO64_GET(*right)));
2608                         DECIMAL_HI32(result) = num[2] + DECIMAL_HI32(*right);
2609
2610                         // Propagate carry
2611                         //
2612                         if (DECIMAL_LO64_GET(result) < sdlTmp.int64) {
2613                                 DECIMAL_HI32(result)++;
2614                                 if (DECIMAL_HI32(result) <= num[2])
2615                                         goto LongAdd;
2616                         } else if (DECIMAL_HI32(result) < num[2]) {
2617                         LongAdd:
2618                                 // Had a carry above 96 bits.
2619                                 //
2620                                 cur = 3;
2621                                 do {
2622                                         if (hi_prod < cur) {
2623                                                 num[cur] = 1;
2624                                                 hi_prod = cur;
2625                                                 break;
2626                                         }
2627                                 }while (++num[cur++] == 0);
2628                         }
2629                 }
2630
2631                 if (hi_prod > 2) {
2632                         num[0] = DECIMAL_LO32(result);
2633                         num[1] = DECIMAL_MID32(result);
2634                         num[2] = DECIMAL_HI32(result);
2635                         DECIMAL_SCALE(result) = (uint8_t)ScaleResult(num, hi_prod, DECIMAL_SCALE(result));
2636                         if (DECIMAL_SCALE(result) == (uint8_t)-1) {
2637                                 mono_set_pending_exception (mono_get_exception_overflow ());
2638                                 return;
2639                         }
2640
2641                         DECIMAL_LO32(result) = num[0];
2642                         DECIMAL_MID32(result) = num[1];
2643                         DECIMAL_HI32(result) = num[2];
2644                 }
2645         }
2646
2647 RetDec:
2648         left = leftOriginal;
2649         COPYDEC(*left, result);
2650         left->reserved = 0;
2651 }
2652
2653 void
2654 mono_decimal_divide (MonoDecimal *left, MonoDecimal *right)
2655 {
2656         uint32_t quo[3], quo_save[3],rem[4], divisor[3];
2657         uint32_t pwr, tmp, tmp1;
2658         SPLIT64  sdlTmp, sdlDivisor;
2659         int      scale, cur_scale;
2660         gboolean unscale;
2661
2662         scale = DECIMAL_SCALE(*left) - DECIMAL_SCALE(*right);
2663         unscale = FALSE;
2664         divisor[0] = DECIMAL_LO32(*right);
2665         divisor[1] = DECIMAL_MID32(*right);
2666         divisor[2] = DECIMAL_HI32(*right);
2667
2668         if (divisor[1] == 0 && divisor[2] == 0) {
2669                 // Divisor is only 32 bits.  Easy divide.
2670                 //
2671                 if (divisor[0] == 0) {
2672                         mono_set_pending_exception (mono_get_exception_divide_by_zero ());
2673                         return;
2674                 }
2675
2676                 quo[0] = DECIMAL_LO32(*left);
2677                 quo[1] = DECIMAL_MID32(*left);
2678                 quo[2] = DECIMAL_HI32(*left);
2679                 rem[0] = Div96By32(quo, divisor[0]);
2680
2681                 for (;;) {
2682                         if (rem[0] == 0) {
2683                                 if (scale < 0) {
2684                                         cur_scale = min(9, -scale);
2685                                         goto HaveScale;
2686                                 }
2687                                 break;
2688                         }
2689                         // We need to unscale if and only if we have a non-zero remainder
2690                         unscale = TRUE;
2691
2692                         // We have computed a quotient based on the natural scale
2693                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
2694                         // remainder, so now we should increase the scale if possible to
2695                         // include more quotient bits.
2696                         //
2697                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
2698                         // computing more quotient bits as long as the remainder stays
2699                         // non-zero.  If scaling by that much would cause overflow, we'll
2700                         // drop out of the loop and scale by as much as we can.
2701                         //
2702                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
2703                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
2704                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
2705                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
2706                         // is the largest value in quo[1] (when quo[2] == 4) that is
2707                         // assured not to overflow.
2708                         //
2709                         cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2710                         if (cur_scale == 0) {
2711                                 // No more scaling to be done, but remainder is non-zero.
2712                                 // Round quotient.
2713                                 //
2714                                 tmp = rem[0] << 1;
2715                                 if (tmp < rem[0] || (tmp >= divisor[0] &&
2716                                                            (tmp > divisor[0] || (quo[0] & 1)))) {
2717                                 RoundUp:
2718                                         if (!Add32To96(quo, 1)) {
2719                                                 if (scale == 0) {
2720                                                         mono_set_pending_exception (mono_get_exception_overflow ());
2721                                                         return;
2722                                                 }
2723                                                 scale--;
2724                                                 OverflowUnscale(quo, TRUE);
2725                                                 break;
2726                                         }
2727                                 }
2728                                 break;
2729                         }
2730
2731                         if (cur_scale < 0) {
2732                                 mono_set_pending_exception (mono_get_exception_overflow ());
2733                                 return;
2734                         }
2735
2736                 HaveScale:
2737                         pwr = power10[cur_scale];
2738                         scale += cur_scale;
2739
2740                         if (IncreaseScale(quo, pwr) != 0) {
2741                                 mono_set_pending_exception (mono_get_exception_overflow ());
2742                                 return;
2743                         }
2744
2745                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
2746                         rem[0] = sdlTmp.u.Hi;
2747
2748                         if (!Add32To96(quo, sdlTmp.u.Lo)) {
2749                                 if (scale == 0) {
2750                                         mono_set_pending_exception (mono_get_exception_overflow ());
2751                                         return;
2752                                 }
2753                                 scale--;
2754                                 OverflowUnscale(quo, (rem[0] != 0));
2755                                 break;
2756                         }
2757                 } // for (;;)
2758         } else {
2759                 // Divisor has bits set in the upper 64 bits.
2760                 //
2761                 // Divisor must be fully normalized (shifted so bit 31 of the most
2762                 // significant uint32_t is 1).  Locate the MSB so we know how much to
2763                 // normalize by.  The dividend will be shifted by the same amount so
2764                 // the quotient is not changed.
2765                 //
2766                 if (divisor[2] == 0)
2767                         tmp = divisor[1];
2768                 else
2769                         tmp = divisor[2];
2770
2771                 cur_scale = 0;
2772                 if (!(tmp & 0xFFFF0000)) {
2773                         cur_scale += 16;
2774                         tmp <<= 16;
2775                 }
2776                 if (!(tmp & 0xFF000000)) {
2777                         cur_scale += 8;
2778                         tmp <<= 8;
2779                 }
2780                 if (!(tmp & 0xF0000000)) {
2781                         cur_scale += 4;
2782                         tmp <<= 4;
2783                 }
2784                 if (!(tmp & 0xC0000000)) {
2785                         cur_scale += 2;
2786                         tmp <<= 2;
2787                 }
2788                 if (!(tmp & 0x80000000)) {
2789                         cur_scale++;
2790                         tmp <<= 1;
2791                 }
2792
2793                 // Shift both dividend and divisor left by cur_scale.
2794                 //
2795                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
2796                 rem[0] = sdlTmp.u.Lo;
2797                 rem[1] = sdlTmp.u.Hi;
2798                 sdlTmp.u.Lo = DECIMAL_MID32(*left);
2799                 sdlTmp.u.Hi = DECIMAL_HI32(*left);
2800                 sdlTmp.int64 <<= cur_scale;
2801                 rem[2] = sdlTmp.u.Hi;
2802                 rem[3] = (DECIMAL_HI32(*left) >> (31 - cur_scale)) >> 1;
2803
2804                 sdlDivisor.u.Lo = divisor[0];
2805                 sdlDivisor.u.Hi = divisor[1];
2806                 sdlDivisor.int64 <<= cur_scale;
2807
2808                 if (divisor[2] == 0) {
2809                         // Have a 64-bit divisor in sdlDivisor.  The remainder
2810                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
2811                         //
2812                         sdlTmp.u.Lo = rem[2];
2813                         sdlTmp.u.Hi = rem[3];
2814
2815                         quo[2] = 0;
2816                         quo[1] = Div96By64(&rem[1], sdlDivisor);
2817                         quo[0] = Div96By64(rem, sdlDivisor);
2818
2819                         for (;;) {
2820                                 if ((rem[0] | rem[1]) == 0) {
2821                                         if (scale < 0) {
2822                                                 cur_scale = min(9, -scale);
2823                                                 goto HaveScale64;
2824                                         }
2825                                         break;
2826                                 }
2827
2828                                 // We need to unscale if and only if we have a non-zero remainder
2829                                 unscale = TRUE;
2830
2831                                 // Remainder is non-zero.  Scale up quotient and remainder by
2832                                 // powers of 10 so we can compute more significant bits.
2833                                 //
2834                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2835                                 if (cur_scale == 0) {
2836                                         // No more scaling to be done, but remainder is non-zero.
2837                                         // Round quotient.
2838                                         //
2839                                         sdlTmp.u.Lo = rem[0];
2840                                         sdlTmp.u.Hi = rem[1];
2841                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
2842                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
2843                                                 goto RoundUp;
2844                                         break;
2845                                 }
2846
2847                                 if (cur_scale < 0) {
2848                                         mono_set_pending_exception (mono_get_exception_overflow ());
2849                                         return;
2850                                 }
2851
2852                         HaveScale64:
2853                                 pwr = power10[cur_scale];
2854                                 scale += cur_scale;
2855
2856                                 if (IncreaseScale(quo, pwr) != 0) {
2857                                         mono_set_pending_exception (mono_get_exception_overflow ());
2858                                         return;
2859                                 }
2860
2861                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
2862                                 IncreaseScale(rem, pwr);
2863                                 tmp = Div96By64(rem, sdlDivisor);
2864                                 if (!Add32To96(quo, tmp)) {
2865                                         if (scale == 0) {
2866                                                 mono_set_pending_exception (mono_get_exception_overflow ());
2867                                                 return;
2868                                         }
2869                                         scale--;
2870                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0));
2871                                         break;
2872                                 }
2873
2874                         } // for (;;)
2875                 } else {
2876                         // Have a 96-bit divisor in divisor[].
2877                         //
2878                         // Start by finishing the shift left by cur_scale.
2879                         //
2880                         sdlTmp.u.Lo = divisor[1];
2881                         sdlTmp.u.Hi = divisor[2];
2882                         sdlTmp.int64 <<= cur_scale;
2883                         divisor[0] = sdlDivisor.u.Lo;
2884                         divisor[1] = sdlDivisor.u.Hi;
2885                         divisor[2] = sdlTmp.u.Hi;
2886
2887                         // The remainder (currently 96 bits spread over 4 uint32_ts)
2888                         // will be < divisor.
2889                         //
2890                         quo[2] = 0;
2891                         quo[1] = 0;
2892                         quo[0] = Div128By96(rem, divisor);
2893
2894                         for (;;) {
2895                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
2896                                         if (scale < 0) {
2897                                                 cur_scale = min(9, -scale);
2898                                                 goto HaveScale96;
2899                                         }
2900                                         break;
2901                                 }
2902
2903                                 // We need to unscale if and only if we have a non-zero remainder
2904                                 unscale = TRUE;
2905
2906                                 // Remainder is non-zero.  Scale up quotient and remainder by
2907                                 // powers of 10 so we can compute more significant bits.
2908                                 //
2909                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2910                                 if (cur_scale == 0) {
2911                                         // No more scaling to be done, but remainder is non-zero.
2912                                         // Round quotient.
2913                                         //
2914                                         if (rem[2] >= 0x80000000)
2915                                                 goto RoundUp;
2916
2917                                         tmp = rem[0] > 0x80000000;
2918                                         tmp1 = rem[1] > 0x80000000;
2919                                         rem[0] <<= 1;
2920                                         rem[1] = (rem[1] << 1) + tmp;
2921                                         rem[2] = (rem[2] << 1) + tmp1;
2922
2923                                         if (rem[2] > divisor[2] || (rem[2] == divisor[2] && (rem[1] > divisor[1] || rem[1] == (divisor[1] && (rem[0] > divisor[0] || (rem[0] == divisor[0] && (quo[0] & 1)))))))
2924                                                 goto RoundUp;
2925                                         break;
2926                                 }
2927
2928                                 if (cur_scale < 0) {
2929                                         mono_set_pending_exception (mono_get_exception_overflow ());
2930                                         return;
2931                                 }
2932
2933                         HaveScale96:
2934                                 pwr = power10[cur_scale];
2935                                 scale += cur_scale;
2936
2937                                 if (IncreaseScale(quo, pwr) != 0) {
2938                                         mono_set_pending_exception (mono_get_exception_overflow ());
2939                                         return;
2940                                 }
2941
2942                                 rem[3] = IncreaseScale(rem, pwr);
2943                                 tmp = Div128By96(rem, divisor);
2944                                 if (!Add32To96(quo, tmp)) {
2945                                         if (scale == 0) {
2946                                                 mono_set_pending_exception (mono_get_exception_overflow ());
2947                                                 return;
2948                                         }
2949
2950                                         scale--;
2951                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0 || rem[2] != 0 || rem[3] != 0));
2952                                         break;
2953                                 }
2954
2955                         } // for (;;)
2956                 }
2957         }
2958
2959         // We need to unscale if and only if we have a non-zero remainder
2960         if (unscale) {
2961                 // Try extracting any extra powers of 10 we may have
2962                 // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
2963                 // If a division by one of these powers returns a zero remainder, then
2964                 // we keep the quotient.  If the remainder is not zero, then we restore
2965                 // the previous value.
2966                 //
2967                 // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
2968                 // we can extract.  We use this as a quick test on whether to try a
2969                 // given power.
2970                 //
2971                 while ((quo[0] & 0xFF) == 0 && scale >= 8) {
2972                         quo_save[0] = quo[0];
2973                         quo_save[1] = quo[1];
2974                         quo_save[2] = quo[2];
2975
2976                         if (Div96By32(quo_save, 100000000) == 0) {
2977                                 quo[0] = quo_save[0];
2978                                 quo[1] = quo_save[1];
2979                                 quo[2] = quo_save[2];
2980                                 scale -= 8;
2981                         } else
2982                                 break;
2983                 }
2984
2985                 if ((quo[0] & 0xF) == 0 && scale >= 4) {
2986                         quo_save[0] = quo[0];
2987                         quo_save[1] = quo[1];
2988                         quo_save[2] = quo[2];
2989
2990                         if (Div96By32(quo_save, 10000) == 0) {
2991                                 quo[0] = quo_save[0];
2992                                 quo[1] = quo_save[1];
2993                                 quo[2] = quo_save[2];
2994                                 scale -= 4;
2995                         }
2996                 }
2997
2998                 if ((quo[0] & 3) == 0 && scale >= 2) {
2999                         quo_save[0] = quo[0];
3000                         quo_save[1] = quo[1];
3001                         quo_save[2] = quo[2];
3002
3003                         if (Div96By32(quo_save, 100) == 0) {
3004                                 quo[0] = quo_save[0];
3005                                 quo[1] = quo_save[1];
3006                                 quo[2] = quo_save[2];
3007                                 scale -= 2;
3008                         }
3009                 }
3010
3011                 if ((quo[0] & 1) == 0 && scale >= 1) {
3012                         quo_save[0] = quo[0];
3013                         quo_save[1] = quo[1];
3014                         quo_save[2] = quo[2];
3015
3016                         if (Div96By32(quo_save, 10) == 0) {
3017                                 quo[0] = quo_save[0];
3018                                 quo[1] = quo_save[1];
3019                                 quo[2] = quo_save[2];
3020                                 scale -= 1;
3021                         }
3022                 }
3023         }
3024
3025         DECIMAL_SIGN(*left) = DECIMAL_SIGN(*left) ^ DECIMAL_SIGN(*right);
3026         DECIMAL_HI32(*left) = quo[2];
3027         DECIMAL_MID32(*left) = quo[1];
3028         DECIMAL_LO32(*left) = quo[0];
3029         DECIMAL_SCALE(*left) = (uint8_t)scale;
3030         left->reserved = 0;
3031
3032 }
3033
3034 #define DECIMAL_PRECISION 29
3035
3036 int
3037 mono_decimal_from_number (void *from, MonoDecimal *target)
3038 {
3039         MonoNumber *number = (MonoNumber *) from;
3040         uint16_t* p = number->digits;
3041         MonoDecimal d;
3042         int e = number->scale;
3043         g_assert(number != NULL);
3044         g_assert(target != NULL);
3045
3046         d.reserved = 0;
3047         DECIMAL_SIGNSCALE(d) = 0;
3048         DECIMAL_HI32(d) = 0;
3049         DECIMAL_LO32(d) = 0;
3050         DECIMAL_MID32(d) = 0;
3051         g_assert(p != NULL);
3052         if (!*p) {
3053                 // To avoid risking an app-compat issue with pre 4.5 (where some app was illegally using Reflection to examine the internal scale bits), we'll only force
3054                 // the scale to 0 if the scale was previously positive
3055                 if (e > 0) {
3056                         e = 0;
3057                 }
3058         } else {
3059                 if (e > DECIMAL_PRECISION) return 0;
3060                 while ((e > 0 || (*p && e > -28)) && (DECIMAL_HI32(d) < 0x19999999 || (DECIMAL_HI32(d) == 0x19999999 && (DECIMAL_MID32(d) < 0x99999999 || (DECIMAL_MID32(d) == 0x99999999 && (DECIMAL_LO32(d) < 0x99999999 || (DECIMAL_LO32(d) == 0x99999999 && *p <= '5'))))))) {
3061                         DecMul10(&d);
3062                         if (*p)
3063                                 DecAddInt32(&d, *p++ - '0');
3064                         e--;
3065                 }
3066                 if (*p++ >= '5') {
3067                         gboolean round = TRUE;
3068                         if (*(p-1) == '5' && *(p-2) % 2 == 0) { // Check if previous digit is even, only if the when we are unsure whether hows to do Banker's rounding
3069                                 // For digits > 5 we will be roundinp up anyway.
3070                                 int count = 20; // Look at the next 20 digits to check to round
3071                                 while (*p == '0' && count != 0) {
3072                                         p++;
3073                                         count--;
3074                                 }
3075                                 if (*p == '\0' || count == 0)
3076                                         round = FALSE;// Do nothing
3077                         }
3078
3079                         if (round) {
3080                                 DecAddInt32(&d, 1);
3081                                 if ((DECIMAL_HI32(d) | DECIMAL_MID32(d) | DECIMAL_LO32(d)) == 0) {
3082                                         DECIMAL_HI32(d) = 0x19999999;
3083                                         DECIMAL_MID32(d) = 0x99999999;
3084                                         DECIMAL_LO32(d) = 0x9999999A;
3085                                         e++;
3086                                 }
3087                         }
3088                 }
3089         }
3090         if (e > 0)
3091                 return 0;
3092         if (e <= -DECIMAL_PRECISION) {
3093                 // Parsing a large scale zero can give you more precision than fits in the decimal.
3094                 // This should only happen for actual zeros or very small numbers that round to zero.
3095                 DECIMAL_SIGNSCALE(d) = 0;
3096                 DECIMAL_HI32(d) = 0;
3097                 DECIMAL_LO32(d) = 0;
3098                 DECIMAL_MID32(d) = 0;
3099                 DECIMAL_SCALE(d) = (DECIMAL_PRECISION - 1);
3100         } else {
3101                 DECIMAL_SCALE(d) = (uint8_t)(-e);
3102         }
3103
3104         DECIMAL_SIGN(d) = number->sign? DECIMAL_NEG: 0;
3105         *target = d;
3106         return 1;
3107 }
3108
3109
3110 #endif