gcc/config/arm/ieee754-df.S

   1 /* ieee754-df.S double-precision floating point support for ARM
   2
   3    Copyright (C) 2003, 2004  Free Software Foundation, Inc.
   4    Contributed by Nicolas Pitre (nico@cam.org)
   5
   6    This file is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by the
   8    Free Software Foundation; either version 2, or (at your option) any
   9    later version.
  10
  11    In addition to the permissions in the GNU General Public License, the
  12    Free Software Foundation gives you unlimited permission to link the
  13    compiled version of this file into combinations with other programs,
  14    and to distribute those combinations without any restriction coming
  15    from the use of this file.  (The General Public License restrictions
  16    do apply in other respects; for example, they cover modification of
  17    the file, and distribution when not linked into a combine
  18    executable.)
  19
  20    This file is distributed in the hope that it will be useful, but
  21    WITHOUT ANY WARRANTY; without even the implied warranty of
  22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23    General Public License for more details.
  24
  25    You should have received a copy of the GNU General Public License
  26    along with this program; see the file COPYING.  If not, write to
  27    the Free Software Foundation, 59 Temple Place - Suite 330,
  28    Boston, MA 02111-1307, USA.  */
  29
  30 /*
  31  * Notes:
  32  *
  33  * The goal of this code is to be as fast as possible.  This is
  34  * not meant to be easy to understand for the casual reader.
  35  * For slightly simpler code please see the single precision version
  36  * of this file.
  37  *
  38  * Only the default rounding mode is intended for best performances.
  39  * Exceptions aren't supported yet, but that can be added quite easily
  40  * if necessary without impacting performances.
  41  */
  42
  43
  44 @ For FPA, float words are always big-endian.
  45 @ For VFP, floats words follow the memory system mode.
  46 #if defined(__VFP_FP__) && !defined(__ARMEB__)
  47 #define xl r0
  48 #define xh r1
  49 #define yl r2
  50 #define yh r3
  51 #else
  52 #define xh r0
  53 #define xl r1
  54 #define yh r2
  55 #define yl r3
  56 #endif
  57
  58
  59 #ifdef L_negdf2
  60
  61 ARM_FUNC_START negdf2
  62         @ flip sign bit
  63         eor     xh, xh, #0x80000000
  64         RET
  65
  66         FUNC_END negdf2
  67
  68 #endif
  69
  70 #ifdef L_addsubdf3
  71
  72 ARM_FUNC_START subdf3
  73         @ flip sign bit of second arg
  74         eor     yh, yh, #0x80000000
  75 #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
  76         b       1f                      @ Skip Thumb-code prologue
  77 #endif
  78
  79 ARM_FUNC_START adddf3
  80
  81 1:      @ Compare both args, return zero if equal but the sign.
  82         teq     xl, yl
  83         eoreq   ip, xh, yh
  84         teqeq   ip, #0x80000000
  85         beq     LSYM(Lad_z)
  86
  87         @ If first arg is 0 or -0, return second arg.
  88         @ If second arg is 0 or -0, return first arg.
  89         orrs    ip, xl, xh, lsl #1
  90         moveq   xl, yl
  91         moveq   xh, yh
  92         orrnes  ip, yl, yh, lsl #1
  93         RETc(eq)
  94
  95         stmfd   sp!, {r4, r5, lr}
  96
  97         @ Mask out exponents.
  98         mov     ip, #0x7f000000
  99         orr     ip, ip, #0x00f00000
 100         and     r4, xh, ip
 101         and     r5, yh, ip
 102
 103         @ If either of them is 0x7ff, result will be INF or NAN
 104         teq     r4, ip
 105         teqne   r5, ip
 106         beq     LSYM(Lad_i)
 107
 108         @ Compute exponent difference.  Make largest exponent in r4,
 109         @ corresponding arg in xh-xl, and positive exponent difference in r5.
 110         subs    r5, r5, r4
 111         rsblt   r5, r5, #0
 112         ble     1f
 113         add     r4, r4, r5
 114         eor     yl, xl, yl
 115         eor     yh, xh, yh
 116         eor     xl, yl, xl
 117         eor     xh, yh, xh
 118         eor     yl, xl, yl
 119         eor     yh, xh, yh
 120 1:
 121
 122         @ If exponent difference is too large, return largest argument
 123         @ already in xh-xl.  We need up to 54 bit to handle proper rounding
 124         @ of 0x1p54 - 1.1.
 125         cmp     r5, #(54 << 20)
 126         RETLDM  "r4, r5" hi
 127
 128         @ Convert mantissa to signed integer.
 129         tst     xh, #0x80000000
 130         bic     xh, xh, ip, lsl #1
 131         orr     xh, xh, #0x00100000
 132         beq     1f
 133         rsbs    xl, xl, #0
 134         rsc     xh, xh, #0
 135 1:
 136         tst     yh, #0x80000000
 137         bic     yh, yh, ip, lsl #1
 138         orr     yh, yh, #0x00100000
 139         beq     1f
 140         rsbs    yl, yl, #0
 141         rsc     yh, yh, #0
 142 1:
 143         @ If exponent == difference, one or both args were denormalized.
 144         @ Since this is not common case, rescale them off line.
 145         teq     r4, r5
 146         beq     LSYM(Lad_d)
 147 LSYM(Lad_x):
 148         @ Scale down second arg with exponent difference.
 149         @ Apply shift one bit left to first arg and the rest to second arg
 150         @ to simplify things later, but only if exponent does not become 0.
 151         mov     ip, #0
 152         movs    r5, r5, lsr #20
 153         beq     3f
 154         teq     r4, #(1 << 20)
 155         beq     1f
 156         movs    xl, xl, lsl #1
 157         adc     xh, ip, xh, lsl #1
 158         sub     r4, r4, #(1 << 20)
 159         subs    r5, r5, #1
 160         beq     3f
 161
 162         @ Shift yh-yl right per r5, keep leftover bits into ip.
 163 1:      rsbs    lr, r5, #32
 164         blt     2f
 165         mov     ip, yl, lsl lr
 166         mov     yl, yl, lsr r5
 167         orr     yl, yl, yh, lsl lr
 168         mov     yh, yh, asr r5
 169         b       3f
 170 2:      sub     r5, r5, #32
 171         add     lr, lr, #32
 172         cmp     yl, #1
 173         adc     ip, ip, yh, lsl lr
 174         mov     yl, yh, asr r5
 175         mov     yh, yh, asr #32
 176 3:
 177         @ the actual addition
 178         adds    xl, xl, yl
 179         adc     xh, xh, yh
 180
 181         @ We now have a result in xh-xl-ip.
 182         @ Keep absolute value in xh-xl-ip, sign in r5.
 183         ands    r5, xh, #0x80000000
 184         bpl     LSYM(Lad_p)
 185         rsbs    ip, ip, #0
 186         rscs    xl, xl, #0
 187         rsc     xh, xh, #0
 188
 189         @ Determine how to normalize the result.
 190 LSYM(Lad_p):
 191         cmp     xh, #0x00100000
 192         bcc     LSYM(Lad_l)
 193         cmp     xh, #0x00200000
 194         bcc     LSYM(Lad_r0)
 195         cmp     xh, #0x00400000
 196         bcc     LSYM(Lad_r1)
 197
 198         @ Result needs to be shifted right.
 199         movs    xh, xh, lsr #1
 200         movs    xl, xl, rrx
 201         movs    ip, ip, rrx
 202         orrcs   ip, ip, #1
 203         add     r4, r4, #(1 << 20)
 204 LSYM(Lad_r1):
 205         movs    xh, xh, lsr #1
 206         movs    xl, xl, rrx
 207         movs    ip, ip, rrx
 208         orrcs   ip, ip, #1
 209         add     r4, r4, #(1 << 20)
 210
 211         @ Our result is now properly aligned into xh-xl, remaining bits in ip.
 212         @ Round with MSB of ip. If halfway between two numbers, round towards
 213         @ LSB of xl = 0.
 214 LSYM(Lad_r0):
 215         adds    xl, xl, ip, lsr #31
 216         adc     xh, xh, #0
 217         teq     ip, #0x80000000
 218         biceq   xl, xl, #1
 219
 220         @ One extreme rounding case may add a new MSB.  Adjust exponent.
 221         @ That MSB will be cleared when exponent is merged below.
 222         tst     xh, #0x00200000
 223         addne   r4, r4, #(1 << 20)
 224
 225         @ Make sure we did not bust our exponent.
 226         adds    ip, r4, #(1 << 20)
 227         bmi     LSYM(Lad_o)
 228
 229         @ Pack final result together.
 230 LSYM(Lad_e):
 231         bic     xh, xh, #0x00300000
 232         orr     xh, xh, r4
 233         orr     xh, xh, r5
 234         RETLDM  "r4, r5"
 235
 236 LSYM(Lad_l):
 237         @ Result must be shifted left and exponent adjusted.
 238         @ No rounding necessary since ip will always be 0.
 239 #if __ARM_ARCH__ < 5
 240
 241         teq     xh, #0
 242         movne   r3, #-11
 243         moveq   r3, #21
 244         moveq   xh, xl
 245         moveq   xl, #0
 246         mov     r2, xh
 247         movs    ip, xh, lsr #16
 248         moveq   r2, r2, lsl #16
 249         addeq   r3, r3, #16
 250         tst     r2, #0xff000000
 251         moveq   r2, r2, lsl #8
 252         addeq   r3, r3, #8
 253         tst     r2, #0xf0000000
 254         moveq   r2, r2, lsl #4
 255         addeq   r3, r3, #4
 256         tst     r2, #0xc0000000
 257         moveq   r2, r2, lsl #2
 258         addeq   r3, r3, #2
 259         tst     r2, #0x80000000
 260         addeq   r3, r3, #1
 261
 262 #else
 263
 264         teq     xh, #0
 265         moveq   xh, xl
 266         moveq   xl, #0
 267         clz     r3, xh
 268         addeq   r3, r3, #32
 269         sub     r3, r3, #11
 270
 271 #endif
 272
 273         @ determine how to shift the value.
 274         subs    r2, r3, #32
 275         bge     2f
 276         adds    r2, r2, #12
 277         ble     1f
 278
 279         @ shift value left 21 to 31 bits, or actually right 11 to 1 bits
 280         @ since a register switch happened above.
 281         add     ip, r2, #20
 282         rsb     r2, r2, #12
 283         mov     xl, xh, lsl ip
 284         mov     xh, xh, lsr r2
 285         b       3f
 286
 287         @ actually shift value left 1 to 20 bits, which might also represent
 288         @ 32 to 52 bits if counting the register switch that happened earlier.
 289 1:      add     r2, r2, #20
 290 2:      rsble   ip, r2, #32
 291         mov     xh, xh, lsl r2
 292         orrle   xh, xh, xl, lsr ip
 293         movle   xl, xl, lsl r2
 294
 295         @ adjust exponent accordingly.
 296 3:      subs    r4, r4, r3, lsl #20
 297         bgt     LSYM(Lad_e)
 298
 299         @ Exponent too small, denormalize result.
 300         @ Find out proper shift value.
 301         mvn     r4, r4, asr #20
 302         subs    r4, r4, #30
 303         bge     2f
 304         adds    r4, r4, #12
 305         bgt     1f
 306
 307         @ shift result right of 1 to 20 bits, sign is in r5.
 308         add     r4, r4, #20
 309         rsb     r2, r4, #32
 310         mov     xl, xl, lsr r4
 311         orr     xl, xl, xh, lsl r2
 312         orr     xh, r5, xh, lsr r4
 313         RETLDM  "r4, r5"
 314
 315         @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
 316         @ a register switch from xh to xl.
 317 1:      rsb     r4, r4, #12
 318         rsb     r2, r4, #32
 319         mov     xl, xl, lsr r2
 320         orr     xl, xl, xh, lsl r4
 321         mov     xh, r5
 322         RETLDM  "r4, r5"
 323
 324         @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
 325         @ from xh to xl.
 326 2:      mov     xl, xh, lsr r4
 327         mov     xh, r5
 328         RETLDM  "r4, r5"
 329
 330         @ Adjust exponents for denormalized arguments.
 331 LSYM(Lad_d):
 332         teq     r4, #0
 333         eoreq   xh, xh, #0x00100000
 334         addeq   r4, r4, #(1 << 20)
 335         eor     yh, yh, #0x00100000
 336         subne   r5, r5, #(1 << 20)
 337         b       LSYM(Lad_x)
 338
 339         @ Result is x - x = 0, unless x = INF or NAN.
 340 LSYM(Lad_z):
 341         sub     ip, ip, #0x00100000     @ ip becomes 0x7ff00000
 342         and     r2, xh, ip
 343         teq     r2, ip
 344         orreq   xh, ip, #0x00080000
 345         movne   xh, #0
 346         mov     xl, #0
 347         RET
 348
 349         @ Overflow: return INF.
 350 LSYM(Lad_o):
 351         orr     xh, r5, #0x7f000000
 352         orr     xh, xh, #0x00f00000
 353         mov     xl, #0
 354         RETLDM  "r4, r5"
 355
 356         @ At least one of x or y is INF/NAN.
 357         @   if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
 358         @   if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
 359         @   if either is NAN: return NAN
 360         @   if opposite sign: return NAN
 361         @   return xh-xl (which is INF or -INF)
 362 LSYM(Lad_i):
 363         teq     r4, ip
 364         movne   xh, yh
 365         movne   xl, yl
 366         teqeq   r5, ip
 367         RETLDM  "r4, r5" ne
 368
 369         orrs    r4, xl, xh, lsl #12
 370         orreqs  r4, yl, yh, lsl #12
 371         teqeq   xh, yh
 372         orrne   xh, r5, #0x00080000
 373         movne   xl, #0
 374         RETLDM  "r4, r5"
 375
 376         FUNC_END subdf3
 377         FUNC_END adddf3
 378
 379 ARM_FUNC_START floatunsidf
 380         teq     r0, #0
 381         moveq   r1, #0
 382         RETc(eq)
 383         stmfd   sp!, {r4, r5, lr}
 384         mov     r4, #(0x400 << 20)      @ initial exponent
 385         add     r4, r4, #((52-1) << 20)
 386         mov     r5, #0                  @ sign bit is 0
 387         mov     xl, r0
 388         mov     xh, #0
 389         b       LSYM(Lad_l)
 390
 391         FUNC_END floatunsidf
 392
 393 ARM_FUNC_START floatsidf
 394         teq     r0, #0
 395         moveq   r1, #0
 396         RETc(eq)
 397         stmfd   sp!, {r4, r5, lr}
 398         mov     r4, #(0x400 << 20)      @ initial exponent
 399         add     r4, r4, #((52-1) << 20)
 400         ands    r5, r0, #0x80000000     @ sign bit in r5
 401         rsbmi   r0, r0, #0              @ absolute value
 402         mov     xl, r0
 403         mov     xh, #0
 404         b       LSYM(Lad_l)
 405
 406         FUNC_END floatsidf
 407
 408 ARM_FUNC_START extendsfdf2
 409         movs    r2, r0, lsl #1
 410         beq     1f                      @ value is 0.0 or -0.0
 411         mov     xh, r2, asr #3          @ stretch exponent
 412         mov     xh, xh, rrx             @ retrieve sign bit
 413         mov     xl, r2, lsl #28         @ retrieve remaining bits
 414         ands    r2, r2, #0xff000000     @ isolate exponent
 415         beq     2f                      @ exponent was 0 but not mantissa
 416         teq     r2, #0xff000000         @ check if INF or NAN
 417         eorne   xh, xh, #0x38000000     @ fixup exponent otherwise.
 418         RET
 419
 420 1:      mov     xh, r0
 421         mov     xl, #0
 422         RET
 423
 424 2:      @ value was denormalized.  We can normalize it now.
 425         stmfd   sp!, {r4, r5, lr}
 426         mov     r4, #(0x380 << 20)      @ setup corresponding exponent
 427         add     r4, r4, #(1 << 20)
 428         and     r5, xh, #0x80000000     @ move sign bit in r5
 429         bic     xh, xh, #0x80000000
 430         b       LSYM(Lad_l)
 431
 432         FUNC_END extendsfdf2
 433
 434 #endif /* L_addsubdf3 */
 435
 436 #ifdef L_muldivdf3
 437
 438 ARM_FUNC_START muldf3
 439
 440         stmfd   sp!, {r4, r5, r6, lr}
 441
 442         @ Mask out exponents.
 443         mov     ip, #0x7f000000
 444         orr     ip, ip, #0x00f00000
 445         and     r4, xh, ip
 446         and     r5, yh, ip
 447
 448         @ Trap any INF/NAN.
 449         teq     r4, ip
 450         teqne   r5, ip
 451         beq     LSYM(Lml_s)
 452
 453         @ Trap any multiplication by 0.
 454         orrs    r6, xl, xh, lsl #1
 455         orrnes  r6, yl, yh, lsl #1
 456         beq     LSYM(Lml_z)
 457
 458         @ Shift exponents right one bit to make room for overflow bit.
 459         @ If either of them is 0, scale denormalized arguments off line.
 460         @ Then add both exponents together.
 461         movs    r4, r4, lsr #1
 462         teqne   r5, #0
 463         beq     LSYM(Lml_d)
 464 LSYM(Lml_x):
 465         add     r4, r4, r5, asr #1
 466
 467         @ Preserve final sign in r4 along with exponent for now.
 468         teq     xh, yh
 469         orrmi   r4, r4, #0x8000
 470
 471         @ Convert mantissa to unsigned integer.
 472         bic     xh, xh, ip, lsl #1
 473         bic     yh, yh, ip, lsl #1
 474         orr     xh, xh, #0x00100000
 475         orr     yh, yh, #0x00100000
 476
 477 #if __ARM_ARCH__ < 4
 478
 479         @ Well, no way to make it shorter without the umull instruction.
 480         @ We must perform that 53 x 53 bit multiplication by hand.
 481         stmfd   sp!, {r7, r8, r9, sl, fp}
 482         mov     r7, xl, lsr #16
 483         mov     r8, yl, lsr #16
 484         mov     r9, xh, lsr #16
 485         mov     sl, yh, lsr #16
 486         bic     xl, xl, r7, lsl #16
 487         bic     yl, yl, r8, lsl #16
 488         bic     xh, xh, r9, lsl #16
 489         bic     yh, yh, sl, lsl #16
 490         mul     ip, xl, yl
 491         mul     fp, xl, r8
 492         mov     lr, #0
 493         adds    ip, ip, fp, lsl #16
 494         adc     lr, lr, fp, lsr #16
 495         mul     fp, r7, yl
 496         adds    ip, ip, fp, lsl #16
 497         adc     lr, lr, fp, lsr #16
 498         mul     fp, xl, sl
 499         mov     r5, #0
 500         adds    lr, lr, fp, lsl #16
 501         adc     r5, r5, fp, lsr #16
 502         mul     fp, r7, yh
 503         adds    lr, lr, fp, lsl #16
 504         adc     r5, r5, fp, lsr #16
 505         mul     fp, xh, r8
 506         adds    lr, lr, fp, lsl #16
 507         adc     r5, r5, fp, lsr #16
 508         mul     fp, r9, yl
 509         adds    lr, lr, fp, lsl #16
 510         adc     r5, r5, fp, lsr #16
 511         mul     fp, xh, sl
 512         mul     r6, r9, sl
 513         adds    r5, r5, fp, lsl #16
 514         adc     r6, r6, fp, lsr #16
 515         mul     fp, r9, yh
 516         adds    r5, r5, fp, lsl #16
 517         adc     r6, r6, fp, lsr #16
 518         mul     fp, xl, yh
 519         adds    lr, lr, fp
 520         mul     fp, r7, sl
 521         adcs    r5, r5, fp
 522         mul     fp, xh, yl
 523         adc     r6, r6, #0
 524         adds    lr, lr, fp
 525         mul     fp, r9, r8
 526         adcs    r5, r5, fp
 527         mul     fp, r7, r8
 528         adc     r6, r6, #0
 529         adds    lr, lr, fp
 530         mul     fp, xh, yh
 531         adcs    r5, r5, fp
 532         adc     r6, r6, #0
 533         ldmfd   sp!, {r7, r8, r9, sl, fp}
 534
 535 #else
 536
 537         @ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits.
 538         umull   ip, lr, xl, yl
 539         mov     r5, #0
 540         umlal   lr, r5, xl, yh
 541         umlal   lr, r5, xh, yl
 542         mov     r6, #0
 543         umlal   r5, r6, xh, yh
 544
 545 #endif
 546
 547         @ The LSBs in ip are only significant for the final rounding.
 548         @ Fold them into one bit of lr.
 549         teq     ip, #0
 550         orrne   lr, lr, #1
 551
 552         @ Put final sign in xh.
 553         mov     xh, r4, lsl #16
 554         bic     r4, r4, #0x8000
 555
 556         @ Adjust result if one extra MSB appeared (one of four times).
 557         tst     r6, #(1 << 9)
 558         beq     1f
 559         add     r4, r4, #(1 << 19)
 560         movs    r6, r6, lsr #1
 561         movs    r5, r5, rrx
 562         movs    lr, lr, rrx
 563         orrcs   lr, lr, #1
 564 1:
 565         @ Scale back to 53 bits.
 566         @ xh contains sign bit already.
 567         orr     xh, xh, r6, lsl #12
 568         orr     xh, xh, r5, lsr #20
 569         mov     xl, r5, lsl #12
 570         orr     xl, xl, lr, lsr #20
 571
 572         @ Apply exponent bias, check range for underflow.
 573         sub     r4, r4, #0x00f80000
 574         subs    r4, r4, #0x1f000000
 575         ble     LSYM(Lml_u)
 576
 577         @ Round the result.
 578         movs    lr, lr, lsl #12
 579         bpl     1f
 580         adds    xl, xl, #1
 581         adc     xh, xh, #0
 582         teq     lr, #0x80000000
 583         biceq   xl, xl, #1
 584
 585         @ Rounding may have produced an extra MSB here.
 586         @ The extra bit is cleared before merging the exponent below.
 587         tst     xh, #0x00200000
 588         addne   r4, r4, #(1 << 19)
 589 1:
 590         @ Check exponent for overflow.
 591         adds    ip, r4, #(1 << 19)
 592         tst     ip, #(1 << 30)
 593         bne     LSYM(Lml_o)
 594
 595         @ Add final exponent.
 596         bic     xh, xh, #0x00300000
 597         orr     xh, xh, r4, lsl #1
 598         RETLDM  "r4, r5, r6"
 599
 600         @ Result is 0, but determine sign anyway.
 601 LSYM(Lml_z):
 602         eor     xh, xh, yh
 603 LSYM(Ldv_z):
 604         bic     xh, xh, #0x7fffffff
 605         mov     xl, #0
 606         RETLDM  "r4, r5, r6"
 607
 608         @ Check if denormalized result is possible, otherwise return signed 0.
 609 LSYM(Lml_u):
 610         cmn     r4, #(53 << 19)
 611         movle   xl, #0
 612         bicle   xh, xh, #0x7fffffff
 613         RETLDM  "r4, r5, r6" le
 614
 615         @ Find out proper shift value.
 616 LSYM(Lml_r):
 617         mvn     r4, r4, asr #19
 618         subs    r4, r4, #30
 619         bge     2f
 620         adds    r4, r4, #12
 621         bgt     1f
 622
 623         @ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
 624         add     r4, r4, #20
 625         rsb     r5, r4, #32
 626         mov     r3, xl, lsl r5
 627         mov     xl, xl, lsr r4
 628         orr     xl, xl, xh, lsl r5
 629         movs    xh, xh, lsl #1
 630         mov     xh, xh, lsr r4
 631         mov     xh, xh, rrx
 632         adds    xl, xl, r3, lsr #31
 633         adc     xh, xh, #0
 634         teq     lr, #0
 635         teqeq   r3, #0x80000000
 636         biceq   xl, xl, #1
 637         RETLDM  "r4, r5, r6"
 638
 639         @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
 640         @ a register switch from xh to xl. Then round.
 641 1:      rsb     r4, r4, #12
 642         rsb     r5, r4, #32
 643         mov     r3, xl, lsl r4
 644         mov     xl, xl, lsr r5
 645         orr     xl, xl, xh, lsl r4
 646         bic     xh, xh, #0x7fffffff
 647         adds    xl, xl, r3, lsr #31
 648         adc     xh, xh, #0
 649         teq     lr, #0
 650         teqeq   r3, #0x80000000
 651         biceq   xl, xl, #1
 652         RETLDM  "r4, r5, r6"
 653
 654         @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
 655         @ from xh to xl.  Leftover bits are in r3-r6-lr for rounding.
 656 2:      rsb     r5, r4, #32
 657         mov     r6, xl, lsl r5
 658         mov     r3, xl, lsr r4
 659         orr     r3, r3, xh, lsl r5
 660         mov     xl, xh, lsr r4
 661         bic     xh, xh, #0x7fffffff
 662         bic     xl, xl, xh, lsr r4
 663         add     xl, xl, r3, lsr #31
 664         orrs    r6, r6, lr
 665         teqeq   r3, #0x80000000
 666         biceq   xl, xl, #1
 667         RETLDM  "r4, r5, r6"
 668
 669         @ One or both arguments are denormalized.
 670         @ Scale them leftwards and preserve sign bit.
 671 LSYM(Lml_d):
 672         mov     lr, #0
 673         teq     r4, #0
 674         bne     2f
 675         and     r6, xh, #0x80000000
 676 1:      movs    xl, xl, lsl #1
 677         adc     xh, lr, xh, lsl #1
 678         tst     xh, #0x00100000
 679         subeq   r4, r4, #(1 << 19)
 680         beq     1b
 681         orr     xh, xh, r6
 682         teq     r5, #0
 683         bne     LSYM(Lml_x)
 684 2:      and     r6, yh, #0x80000000
 685 3:      movs    yl, yl, lsl #1
 686         adc     yh, lr, yh, lsl #1
 687         tst     yh, #0x00100000
 688         subeq   r5, r5, #(1 << 20)
 689         beq     3b
 690         orr     yh, yh, r6
 691         b       LSYM(Lml_x)
 692
 693         @ One or both args are INF or NAN.
 694 LSYM(Lml_s):
 695         orrs    r6, xl, xh, lsl #1
 696         orrnes  r6, yl, yh, lsl #1
 697         beq     LSYM(Lml_n)             @ 0 * INF or INF * 0 -> NAN
 698         teq     r4, ip
 699         bne     1f
 700         orrs    r6, xl, xh, lsl #12
 701         bne     LSYM(Lml_n)             @ NAN * <anything> -> NAN
 702 1:      teq     r5, ip
 703         bne     LSYM(Lml_i)
 704         orrs    r6, yl, yh, lsl #12
 705         bne     LSYM(Lml_n)             @ <anything> * NAN -> NAN
 706
 707         @ Result is INF, but we need to determine its sign.
 708 LSYM(Lml_i):
 709         eor     xh, xh, yh
 710
 711         @ Overflow: return INF (sign already in xh).
 712 LSYM(Lml_o):
 713         and     xh, xh, #0x80000000
 714         orr     xh, xh, #0x7f000000
 715         orr     xh, xh, #0x00f00000
 716         mov     xl, #0
 717         RETLDM  "r4, r5, r6"
 718
 719         @ Return NAN.
 720 LSYM(Lml_n):
 721         mov     xh, #0x7f000000
 722         orr     xh, xh, #0x00f80000
 723         RETLDM  "r4, r5, r6"
 724
 725         FUNC_END muldf3
 726
 727 ARM_FUNC_START divdf3
 728
 729         stmfd   sp!, {r4, r5, r6, lr}
 730
 731         @ Mask out exponents.
 732         mov     ip, #0x7f000000
 733         orr     ip, ip, #0x00f00000
 734         and     r4, xh, ip
 735         and     r5, yh, ip
 736
 737         @ Trap any INF/NAN or zeroes.
 738         teq     r4, ip
 739         teqne   r5, ip
 740         orrnes  r6, xl, xh, lsl #1
 741         orrnes  r6, yl, yh, lsl #1
 742         beq     LSYM(Ldv_s)
 743
 744         @ Shift exponents right one bit to make room for overflow bit.
 745         @ If either of them is 0, scale denormalized arguments off line.
 746         @ Then substract divisor exponent from dividend''s.
 747         movs    r4, r4, lsr #1
 748         teqne   r5, #0
 749         beq     LSYM(Ldv_d)
 750 LSYM(Ldv_x):
 751         sub     r4, r4, r5, asr #1
 752
 753         @ Preserve final sign into lr.
 754         eor     lr, xh, yh
 755
 756         @ Convert mantissa to unsigned integer.
 757         @ Dividend -> r5-r6, divisor -> yh-yl.
 758         mov     r5, #0x10000000
 759         mov     yh, yh, lsl #12
 760         orr     yh, r5, yh, lsr #4
 761         orr     yh, yh, yl, lsr #24
 762         movs    yl, yl, lsl #8
 763         mov     xh, xh, lsl #12
 764         teqeq   yh, r5
 765         beq     LSYM(Ldv_1)
 766         orr     r5, r5, xh, lsr #4
 767         orr     r5, r5, xl, lsr #24
 768         mov     r6, xl, lsl #8
 769
 770         @ Initialize xh with final sign bit.
 771         and     xh, lr, #0x80000000
 772
 773         @ Ensure result will land to known bit position.
 774         cmp     r5, yh
 775         cmpeq   r6, yl
 776         bcs     1f
 777         sub     r4, r4, #(1 << 19)
 778         movs    yh, yh, lsr #1
 779         mov     yl, yl, rrx
 780 1:
 781         @ Apply exponent bias, check range for over/underflow.
 782         add     r4, r4, #0x1f000000
 783         add     r4, r4, #0x00f80000
 784         cmn     r4, #(53 << 19)
 785         ble     LSYM(Ldv_z)
 786         cmp     r4, ip, lsr #1
 787         bge     LSYM(Lml_o)
 788
 789         @ Perform first substraction to align result to a nibble.
 790         subs    r6, r6, yl
 791         sbc     r5, r5, yh
 792         movs    yh, yh, lsr #1
 793         mov     yl, yl, rrx
 794         mov     xl, #0x00100000
 795         mov     ip, #0x00080000
 796
 797         @ The actual division loop.
 798 1:      subs    lr, r6, yl
 799         sbcs    lr, r5, yh
 800         subcs   r6, r6, yl
 801         movcs   r5, lr
 802         orrcs   xl, xl, ip
 803         movs    yh, yh, lsr #1
 804         mov     yl, yl, rrx
 805         subs    lr, r6, yl
 806         sbcs    lr, r5, yh
 807         subcs   r6, r6, yl
 808         movcs   r5, lr
 809         orrcs   xl, xl, ip, lsr #1
 810         movs    yh, yh, lsr #1
 811         mov     yl, yl, rrx
 812         subs    lr, r6, yl
 813         sbcs    lr, r5, yh
 814         subcs   r6, r6, yl
 815         movcs   r5, lr
 816         orrcs   xl, xl, ip, lsr #2
 817         movs    yh, yh, lsr #1
 818         mov     yl, yl, rrx
 819         subs    lr, r6, yl
 820         sbcs    lr, r5, yh
 821         subcs   r6, r6, yl
 822         movcs   r5, lr
 823         orrcs   xl, xl, ip, lsr #3
 824
 825         orrs    lr, r5, r6
 826         beq     2f
 827         mov     r5, r5, lsl #4
 828         orr     r5, r5, r6, lsr #28
 829         mov     r6, r6, lsl #4
 830         mov     yh, yh, lsl #3
 831         orr     yh, yh, yl, lsr #29
 832         mov     yl, yl, lsl #3
 833         movs    ip, ip, lsr #4
 834         bne     1b
 835
 836         @ We are done with a word of the result.
 837         @ Loop again for the low word if this pass was for the high word.
 838         tst     xh, #0x00100000
 839         bne     3f
 840         orr     xh, xh, xl
 841         mov     xl, #0
 842         mov     ip, #0x80000000
 843         b       1b
 844 2:
 845         @ Be sure result starts in the high word.
 846         tst     xh, #0x00100000
 847         orreq   xh, xh, xl
 848         moveq   xl, #0
 849 3:
 850         @ Check if denormalized result is needed.
 851         cmp     r4, #0
 852         ble     LSYM(Ldv_u)
 853
 854         @ Apply proper rounding.
 855         subs    ip, r5, yh
 856         subeqs  ip, r6, yl
 857         adcs    xl, xl, #0
 858         adc     xh, xh, #0
 859         teq     ip, #0
 860         biceq   xl, xl, #1
 861
 862         @ Add exponent to result.
 863         bic     xh, xh, #0x00100000
 864         orr     xh, xh, r4, lsl #1
 865         RETLDM  "r4, r5, r6"
 866
 867         @ Division by 0x1p*: shortcut a lot of code.
 868 LSYM(Ldv_1):
 869         and     lr, lr, #0x80000000
 870         orr     xh, lr, xh, lsr #12
 871         add     r4, r4, #0x1f000000
 872         add     r4, r4, #0x00f80000
 873         cmp     r4, ip, lsr #1
 874         bge     LSYM(Lml_o)
 875         cmp     r4, #0
 876         orrgt   xh, xh, r4, lsl #1
 877         RETLDM  "r4, r5, r6" gt
 878
 879         cmn     r4, #(53 << 19)
 880         ble     LSYM(Ldv_z)
 881         orr     xh, xh, #0x00100000
 882         mov     lr, #0
 883         b       LSYM(Lml_r)
 884
 885         @ Result must be denormalized: put remainder in lr for
 886         @ rounding considerations.
 887 LSYM(Ldv_u):
 888         orr     lr, r5, r6
 889         b       LSYM(Lml_r)
 890
 891         @ One or both arguments are denormalized.
 892         @ Scale them leftwards and preserve sign bit.
 893 LSYM(Ldv_d):
 894         mov     lr, #0
 895         teq     r4, #0
 896         bne     2f
 897         and     r6, xh, #0x80000000
 898 1:      movs    xl, xl, lsl #1
 899         adc     xh, lr, xh, lsl #1
 900         tst     xh, #0x00100000
 901         subeq   r4, r4, #(1 << 19)
 902         beq     1b
 903         orr     xh, xh, r6
 904         teq     r5, #0
 905         bne     LSYM(Ldv_x)
 906 2:      and     r6, yh, #0x80000000
 907 3:      movs    yl, yl, lsl #1
 908         adc     yh, lr, yh, lsl #1
 909         tst     yh, #0x00100000
 910         subeq   r5, r5, #(1 << 20)
 911         beq     3b
 912         orr     yh, yh, r6
 913         b       LSYM(Ldv_x)
 914
 915         @ One or both arguments is either INF, NAN or zero.
 916 LSYM(Ldv_s):
 917         teq     r4, ip
 918         teqeq   r5, ip
 919         beq     LSYM(Lml_n)             @ INF/NAN / INF/NAN -> NAN
 920         teq     r4, ip
 921         bne     1f
 922         orrs    r4, xl, xh, lsl #12
 923         bne     LSYM(Lml_n)             @ NAN / <anything> -> NAN
 924         b       LSYM(Lml_i)             @ INF / <anything> -> INF
 925 1:      teq     r5, ip
 926         bne     2f
 927         orrs    r5, yl, yh, lsl #12
 928         bne     LSYM(Lml_n)             @ <anything> / NAN -> NAN
 929         b       LSYM(Lml_z)             @ <anything> / INF -> 0
 930 2:      @ One or both arguments are 0.
 931         orrs    r4, xl, xh, lsl #1
 932         bne     LSYM(Lml_i)             @ <non_zero> / 0 -> INF
 933         orrs    r5, yl, yh, lsl #1
 934         bne     LSYM(Lml_z)             @ 0 / <non_zero> -> 0
 935         b       LSYM(Lml_n)             @ 0 / 0 -> NAN
 936
 937         FUNC_END divdf3
 938
 939 #endif /* L_muldivdf3 */
 940
 941 #ifdef L_cmpdf2
 942
 943 ARM_FUNC_START gtdf2
 944 ARM_FUNC_ALIAS gedf2 gtdf2
 945         mov     ip, #-1
 946         b       1f
 947
 948 ARM_FUNC_START ltdf2
 949 ARM_FUNC_ALIAS ledf2 ltdf2
 950         mov     ip, #1
 951         b       1f
 952
 953 ARM_FUNC_START cmpdf2
 954 ARM_FUNC_ALIAS nedf2 cmpdf2
 955 ARM_FUNC_ALIAS eqdf2 cmpdf2
 956         mov     ip, #1                  @ how should we specify unordered here?
 957
 958 1:      stmfd   sp!, {r4, r5, lr}
 959
 960         @ Trap any INF/NAN first.
 961         mov     lr, #0x7f000000
 962         orr     lr, lr, #0x00f00000
 963         and     r4, xh, lr
 964         and     r5, yh, lr
 965         teq     r4, lr
 966         teqne   r5, lr
 967         beq     3f
 968
 969         @ Test for equality.
 970         @ Note that 0.0 is equal to -0.0.
 971 2:      orrs    ip, xl, xh, lsl #1      @ if x == 0.0 or -0.0
 972         orreqs  ip, yl, yh, lsl #1      @ and y == 0.0 or -0.0
 973         teqne   xh, yh                  @ or xh == yh
 974         teqeq   xl, yl                  @ and xl == yl
 975         moveq   r0, #0                  @ then equal.
 976         RETLDM  "r4, r5" eq
 977
 978         @ Check for sign difference.
 979         teq     xh, yh
 980         movmi   r0, xh, asr #31
 981         orrmi   r0, r0, #1
 982         RETLDM  "r4, r5" mi
 983
 984         @ Compare exponents.
 985         cmp     r4, r5
 986
 987         @ Compare mantissa if exponents are equal.
 988         moveq   xh, xh, lsl #12
 989         cmpeq   xh, yh, lsl #12
 990         cmpeq   xl, yl
 991         movcs   r0, yh, asr #31
 992         mvncc   r0, yh, asr #31
 993         orr     r0, r0, #1
 994         RETLDM  "r4, r5"
 995
 996         @ Look for a NAN.
 997 3:      teq     r4, lr
 998         bne     4f
 999         orrs    xl, xl, xh, lsl #12
1000         bne     5f                      @ x is NAN
1001 4:      teq     r5, lr
1002         bne     2b
1003         orrs    yl, yl, yh, lsl #12
1004         beq     2b                      @ y is not NAN
1005 5:      mov     r0, ip                  @ return unordered code from ip
1006         RETLDM  "r4, r5"
1007
1008         FUNC_END gedf2
1009         FUNC_END gtdf2
1010         FUNC_END ledf2
1011         FUNC_END ltdf2
1012         FUNC_END nedf2
1013         FUNC_END eqdf2
1014         FUNC_END cmpdf2
1015
1016 #endif /* L_cmpdf2 */
1017
1018 #ifdef L_unorddf2
1019
1020 ARM_FUNC_START unorddf2
1021         str     lr, [sp, #-4]!
1022         mov     ip, #0x7f000000
1023         orr     ip, ip, #0x00f00000
1024         and     lr, xh, ip
1025         teq     lr, ip
1026         bne     1f
1027         orrs    xl, xl, xh, lsl #12
1028         bne     3f                      @ x is NAN
1029 1:      and     lr, yh, ip
1030         teq     lr, ip
1031         bne     2f
1032         orrs    yl, yl, yh, lsl #12
1033         bne     3f                      @ y is NAN
1034 2:      mov     r0, #0                  @ arguments are ordered.
1035         RETLDM
1036
1037 3:      mov     r0, #1                  @ arguments are unordered.
1038         RETLDM
1039
1040         FUNC_END unorddf2
1041
1042 #endif /* L_unorddf2 */
1043
1044 #ifdef L_fixdfsi
1045
1046 ARM_FUNC_START fixdfsi
1047         orrs    ip, xl, xh, lsl #1
1048         beq     1f                      @ value is 0.
1049
1050         mov     r3, r3, rrx             @ preserve C flag (the actual sign)
1051
1052         @ check exponent range.
1053         mov     ip, #0x7f000000
1054         orr     ip, ip, #0x00f00000
1055         and     r2, xh, ip
1056         teq     r2, ip
1057         beq     2f                      @ value is INF or NAN
1058         bic     ip, ip, #0x40000000
1059         cmp     r2, ip
1060         bcc     1f                      @ value is too small
1061         add     ip, ip, #(31 << 20)
1062         cmp     r2, ip
1063         bcs     3f                      @ value is too large
1064
1065         rsb     r2, r2, ip
1066         mov     ip, xh, lsl #11
1067         orr     ip, ip, #0x80000000
1068         orr     ip, ip, xl, lsr #21
1069         mov     r2, r2, lsr #20
1070         tst     r3, #0x80000000         @ the sign bit
1071         mov     r0, ip, lsr r2
1072         rsbne   r0, r0, #0
1073         RET
1074
1075 1:      mov     r0, #0
1076         RET
1077
1078 2:      orrs    xl, xl, xh, lsl #12
1079         bne     4f                      @ r0 is NAN.
1080 3:      ands    r0, r3, #0x80000000     @ the sign bit
1081         moveq   r0, #0x7fffffff         @ maximum signed positive si
1082         RET
1083
1084 4:      mov     r0, #0                  @ How should we convert NAN?
1085         RET
1086
1087         FUNC_END fixdfsi
1088
1089 #endif /* L_fixdfsi */
1090
1091 #ifdef L_fixunsdfsi
1092
1093 ARM_FUNC_START fixunsdfsi
1094         orrs    ip, xl, xh, lsl #1
1095         movcss  r0, #0                  @ value is negative
1096         RETc(eq)                        @ or 0 (xl, xh overlap r0)
1097
1098         @ check exponent range.
1099         mov     ip, #0x7f000000
1100         orr     ip, ip, #0x00f00000
1101         and     r2, xh, ip
1102         teq     r2, ip
1103         beq     2f                      @ value is INF or NAN
1104         bic     ip, ip, #0x40000000
1105         cmp     r2, ip
1106         bcc     1f                      @ value is too small
1107         add     ip, ip, #(31 << 20)
1108         cmp     r2, ip
1109         bhi     3f                      @ value is too large
1110
1111         rsb     r2, r2, ip
1112         mov     ip, xh, lsl #11
1113         orr     ip, ip, #0x80000000
1114         orr     ip, ip, xl, lsr #21
1115         mov     r2, r2, lsr #20
1116         mov     r0, ip, lsr r2
1117         RET
1118
1119 1:      mov     r0, #0
1120         RET
1121
1122 2:      orrs    xl, xl, xh, lsl #12
1123         bne     4f                      @ value is NAN.
1124 3:      mov     r0, #0xffffffff         @ maximum unsigned si
1125         RET
1126
1127 4:      mov     r0, #0                  @ How should we convert NAN?
1128         RET
1129
1130         FUNC_END fixunsdfsi
1131
1132 #endif /* L_fixunsdfsi */
1133
1134 #ifdef L_truncdfsf2
1135
1136 ARM_FUNC_START truncdfsf2
1137         orrs    r2, xl, xh, lsl #1
1138         moveq   r0, r2, rrx
1139         RETc(eq)                        @ value is 0.0 or -0.0
1140
1141         @ check exponent range.
1142         mov     ip, #0x7f000000
1143         orr     ip, ip, #0x00f00000
1144         and     r2, ip, xh
1145         teq     r2, ip
1146         beq     2f                      @ value is INF or NAN
1147         bic     xh, xh, ip
1148         cmp     r2, #(0x380 << 20)
1149         bls     4f                      @ value is too small
1150
1151         @ shift and round mantissa
1152 1:      movs    r3, xl, lsr #29
1153         adc     r3, r3, xh, lsl #3
1154
1155         @ if halfway between two numbers, round towards LSB = 0.
1156         mov     xl, xl, lsl #3
1157         teq     xl, #0x80000000
1158         biceq   r3, r3, #1
1159
1160         @ rounding might have created an extra MSB.  If so adjust exponent.
1161         tst     r3, #0x00800000
1162         addne   r2, r2, #(1 << 20)
1163         bicne   r3, r3, #0x00800000
1164
1165         @ check exponent for overflow
1166         mov     ip, #(0x400 << 20)
1167         orr     ip, ip, #(0x07f << 20)
1168         cmp     r2, ip
1169         bcs     3f                      @ overflow
1170
1171         @ adjust exponent, merge with sign bit and mantissa.
1172         movs    xh, xh, lsl #1
1173         mov     r2, r2, lsl #4
1174         orr     r0, r3, r2, rrx
1175         eor     r0, r0, #0x40000000
1176         RET
1177
1178 2:      @ chech for NAN
1179         orrs    xl, xl, xh, lsl #12
1180         movne   r0, #0x7f000000
1181         orrne   r0, r0, #0x00c00000
1182         RETc(ne)                        @ return NAN
1183
1184 3:      @ return INF with sign
1185         and     r0, xh, #0x80000000
1186         orr     r0, r0, #0x7f000000
1187         orr     r0, r0, #0x00800000
1188         RET
1189
1190 4:      @ check if denormalized value is possible
1191         subs    r2, r2, #((0x380 - 24) << 20)
1192         andle   r0, xh, #0x80000000     @ too small, return signed 0.
1193         RETc(le)
1194
1195         @ denormalize value so we can resume with the code above afterwards.
1196         orr     xh, xh, #0x00100000
1197         mov     r2, r2, lsr #20
1198         rsb     r2, r2, #25
1199         cmp     r2, #20
1200         bgt     6f
1201
1202         rsb     ip, r2, #32
1203         mov     r3, xl, lsl ip
1204         mov     xl, xl, lsr r2
1205         orr     xl, xl, xh, lsl ip
1206         movs    xh, xh, lsl #1
1207         mov     xh, xh, lsr r2
1208         mov     xh, xh, rrx
1209 5:      teq     r3, #0                  @ fold r3 bits into the LSB
1210         orrne   xl, xl, #1              @ for rounding considerations.
1211         mov     r2, #(0x380 << 20)      @ equivalent to the 0 float exponent
1212         b       1b
1213
1214 6:      rsb     r2, r2, #(12 + 20)
1215         rsb     ip, r2, #32
1216         mov     r3, xl, lsl r2
1217         mov     xl, xl, lsr ip
1218         orr     xl, xl, xh, lsl r2
1219         and     xh, xh, #0x80000000
1220         b       5b
1221
1222         FUNC_END truncdfsf2
1223
1224 #endif /* L_truncdfsf2 */