gcc/config/arm/ieee754-sf.S

   1 /* ieee754-sf.S single-precision floating point support for ARM
   2
   3    Copyright (C) 2003, 2004, 2005, 2007  Free Software Foundation, Inc.
   4    Contributed by Nicolas Pitre (nico@cam.org)
   5
   6    This file is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by the
   8    Free Software Foundation; either version 2, or (at your option) any
   9    later version.
  10
  11    In addition to the permissions in the GNU General Public License, the
  12    Free Software Foundation gives you unlimited permission to link the
  13    compiled version of this file into combinations with other programs,
  14    and to distribute those combinations without any restriction coming
  15    from the use of this file.  (The General Public License restrictions
  16    do apply in other respects; for example, they cover modification of
  17    the file, and distribution when not linked into a combine
  18    executable.)
  19
  20    This file is distributed in the hope that it will be useful, but
  21    WITHOUT ANY WARRANTY; without even the implied warranty of
  22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23    General Public License for more details.
  24
  25    You should have received a copy of the GNU General Public License
  26    along with this program; see the file COPYING.  If not, write to
  27    the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  28    Boston, MA 02110-1301, USA.  */
  29
  30 /*
  31  * Notes:
  32  *
  33  * The goal of this code is to be as fast as possible.  This is
  34  * not meant to be easy to understand for the casual reader.
  35  *
  36  * Only the default rounding mode is intended for best performances.
  37  * Exceptions aren't supported yet, but that can be added quite easily
  38  * if necessary without impacting performances.
  39  */
  40
  41 #ifdef L_negsf2
  42
  43 ARM_FUNC_START negsf2
  44 ARM_FUNC_ALIAS aeabi_fneg negsf2
  45
  46         eor     r0, r0, #0x80000000     @ flip sign bit
  47         RET
  48
  49         FUNC_END aeabi_fneg
  50         FUNC_END negsf2
  51
  52 #endif
  53
  54 #ifdef L_addsubsf3
  55
  56 ARM_FUNC_START aeabi_frsub
  57
  58         eor     r0, r0, #0x80000000     @ flip sign bit of first arg
  59         b       1f
  60
  61 ARM_FUNC_START subsf3
  62 ARM_FUNC_ALIAS aeabi_fsub subsf3
  63
  64         eor     r1, r1, #0x80000000     @ flip sign bit of second arg
  65 #if defined(__INTERWORKING_STUBS__)
  66         b       1f                      @ Skip Thumb-code prologue
  67 #endif
  68
  69 ARM_FUNC_START addsf3
  70 ARM_FUNC_ALIAS aeabi_fadd addsf3
  71
  72 1:      @ Look for zeroes, equal values, INF, or NAN.
  73         movs    r2, r0, lsl #1
  74         do_it   ne, ttt
  75         COND(mov,s,ne)  r3, r1, lsl #1
  76         teqne   r2, r3
  77         COND(mvn,s,ne)  ip, r2, asr #24
  78         COND(mvn,s,ne)  ip, r3, asr #24
  79         beq     LSYM(Lad_s)
  80
  81         @ Compute exponent difference.  Make largest exponent in r2,
  82         @ corresponding arg in r0, and positive exponent difference in r3.
  83         mov     r2, r2, lsr #24
  84         rsbs    r3, r2, r3, lsr #24
  85         do_it   gt, ttt
  86         addgt   r2, r2, r3
  87         eorgt   r1, r0, r1
  88         eorgt   r0, r1, r0
  89         eorgt   r1, r0, r1
  90         do_it   lt
  91         rsblt   r3, r3, #0
  92
  93         @ If exponent difference is too large, return largest argument
  94         @ already in r0.  We need up to 25 bit to handle proper rounding
  95         @ of 0x1p25 - 1.1.
  96         cmp     r3, #25
  97         do_it   hi
  98         RETc(hi)
  99
 100         @ Convert mantissa to signed integer.
 101         tst     r0, #0x80000000
 102         orr     r0, r0, #0x00800000
 103         bic     r0, r0, #0xff000000
 104         do_it   ne
 105         rsbne   r0, r0, #0
 106         tst     r1, #0x80000000
 107         orr     r1, r1, #0x00800000
 108         bic     r1, r1, #0xff000000
 109         do_it   ne
 110         rsbne   r1, r1, #0
 111
 112         @ If exponent == difference, one or both args were denormalized.
 113         @ Since this is not common case, rescale them off line.
 114         teq     r2, r3
 115         beq     LSYM(Lad_d)
 116 LSYM(Lad_x):
 117
 118         @ Compensate for the exponent overlapping the mantissa MSB added later
 119         sub     r2, r2, #1
 120
 121         @ Shift and add second arg to first arg in r0.
 122         @ Keep leftover bits into r1.
 123         shiftop adds r0 r0 r1 asr r3 ip
 124         rsb     r3, r3, #32
 125         shift1  lsl, r1, r1, r3
 126
 127         @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
 128         and     r3, r0, #0x80000000
 129         bpl     LSYM(Lad_p)
 130 #if defined(__thumb2__)
 131         negs    r1, r1
 132         sbc     r0, r0, r0, lsl #1
 133 #else
 134         rsbs    r1, r1, #0
 135         rsc     r0, r0, #0
 136 #endif
 137
 138         @ Determine how to normalize the result.
 139 LSYM(Lad_p):
 140         cmp     r0, #0x00800000
 141         bcc     LSYM(Lad_a)
 142         cmp     r0, #0x01000000
 143         bcc     LSYM(Lad_e)
 144
 145         @ Result needs to be shifted right.
 146         movs    r0, r0, lsr #1
 147         mov     r1, r1, rrx
 148         add     r2, r2, #1
 149
 150         @ Make sure we did not bust our exponent.
 151         cmp     r2, #254
 152         bhs     LSYM(Lad_o)
 153
 154         @ Our result is now properly aligned into r0, remaining bits in r1.
 155         @ Pack final result together.
 156         @ Round with MSB of r1. If halfway between two numbers, round towards
 157         @ LSB of r0 = 0.
 158 LSYM(Lad_e):
 159         cmp     r1, #0x80000000
 160         adc     r0, r0, r2, lsl #23
 161         do_it   eq
 162         biceq   r0, r0, #1
 163         orr     r0, r0, r3
 164         RET
 165
 166         @ Result must be shifted left and exponent adjusted.
 167 LSYM(Lad_a):
 168         movs    r1, r1, lsl #1
 169         adc     r0, r0, r0
 170         tst     r0, #0x00800000
 171         sub     r2, r2, #1
 172         bne     LSYM(Lad_e)
 173
 174         @ No rounding necessary since r1 will always be 0 at this point.
 175 LSYM(Lad_l):
 176
 177 #if __ARM_ARCH__ < 5
 178
 179         movs    ip, r0, lsr #12
 180         moveq   r0, r0, lsl #12
 181         subeq   r2, r2, #12
 182         tst     r0, #0x00ff0000
 183         moveq   r0, r0, lsl #8
 184         subeq   r2, r2, #8
 185         tst     r0, #0x00f00000
 186         moveq   r0, r0, lsl #4
 187         subeq   r2, r2, #4
 188         tst     r0, #0x00c00000
 189         moveq   r0, r0, lsl #2
 190         subeq   r2, r2, #2
 191         cmp     r0, #0x00800000
 192         movcc   r0, r0, lsl #1
 193         sbcs    r2, r2, #0
 194
 195 #else
 196
 197         clz     ip, r0
 198         sub     ip, ip, #8
 199         subs    r2, r2, ip
 200         shift1  lsl, r0, r0, ip
 201
 202 #endif
 203
 204         @ Final result with sign
 205         @ If exponent negative, denormalize result.
 206         do_it   ge, et
 207         addge   r0, r0, r2, lsl #23
 208         rsblt   r2, r2, #0
 209         orrge   r0, r0, r3
 210 #if defined(__thumb2__)
 211         do_it   lt, t
 212         lsrlt   r0, r0, r2
 213         orrlt   r0, r3, r0
 214 #else
 215         orrlt   r0, r3, r0, lsr r2
 216 #endif
 217         RET
 218
 219         @ Fixup and adjust bit position for denormalized arguments.
 220         @ Note that r2 must not remain equal to 0.
 221 LSYM(Lad_d):
 222         teq     r2, #0
 223         eor     r1, r1, #0x00800000
 224         do_it   eq, te
 225         eoreq   r0, r0, #0x00800000
 226         addeq   r2, r2, #1
 227         subne   r3, r3, #1
 228         b       LSYM(Lad_x)
 229
 230 LSYM(Lad_s):
 231         mov     r3, r1, lsl #1
 232
 233         mvns    ip, r2, asr #24
 234         do_it   ne
 235         COND(mvn,s,ne)  ip, r3, asr #24
 236         beq     LSYM(Lad_i)
 237
 238         teq     r2, r3
 239         beq     1f
 240
 241         @ Result is x + 0.0 = x or 0.0 + y = y.
 242         teq     r2, #0
 243         do_it   eq
 244         moveq   r0, r1
 245         RET
 246
 247 1:      teq     r0, r1
 248
 249         @ Result is x - x = 0.
 250         do_it   ne, t
 251         movne   r0, #0
 252         RETc(ne)
 253
 254         @ Result is x + x = 2x.
 255         tst     r2, #0xff000000
 256         bne     2f
 257         movs    r0, r0, lsl #1
 258         do_it   cs
 259         orrcs   r0, r0, #0x80000000
 260         RET
 261 2:      adds    r2, r2, #(2 << 24)
 262         do_it   cc, t
 263         addcc   r0, r0, #(1 << 23)
 264         RETc(cc)
 265         and     r3, r0, #0x80000000
 266
 267         @ Overflow: return INF.
 268 LSYM(Lad_o):
 269         orr     r0, r3, #0x7f000000
 270         orr     r0, r0, #0x00800000
 271         RET
 272
 273         @ At least one of r0/r1 is INF/NAN.
 274         @   if r0 != INF/NAN: return r1 (which is INF/NAN)
 275         @   if r1 != INF/NAN: return r0 (which is INF/NAN)
 276         @   if r0 or r1 is NAN: return NAN
 277         @   if opposite sign: return NAN
 278         @   otherwise return r0 (which is INF or -INF)
 279 LSYM(Lad_i):
 280         mvns    r2, r2, asr #24
 281         do_it   ne, et
 282         movne   r0, r1
 283         COND(mvn,s,eq)  r3, r3, asr #24
 284         movne   r1, r0
 285         movs    r2, r0, lsl #9
 286         do_it   eq, te
 287         COND(mov,s,eq)  r3, r1, lsl #9
 288         teqeq   r0, r1
 289         orrne   r0, r0, #0x00400000     @ quiet NAN
 290         RET
 291
 292         FUNC_END aeabi_frsub
 293         FUNC_END aeabi_fadd
 294         FUNC_END addsf3
 295         FUNC_END aeabi_fsub
 296         FUNC_END subsf3
 297
 298 ARM_FUNC_START floatunsisf
 299 ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
 300
 301         mov     r3, #0
 302         b       1f
 303
 304 ARM_FUNC_START floatsisf
 305 ARM_FUNC_ALIAS aeabi_i2f floatsisf
 306
 307         ands    r3, r0, #0x80000000
 308         do_it   mi
 309         rsbmi   r0, r0, #0
 310
 311 1:      movs    ip, r0
 312         do_it   eq
 313         RETc(eq)
 314
 315         @ Add initial exponent to sign
 316         orr     r3, r3, #((127 + 23) << 23)
 317
 318         .ifnc   ah, r0
 319         mov     ah, r0
 320         .endif
 321         mov     al, #0
 322         b       2f
 323
 324         FUNC_END aeabi_i2f
 325         FUNC_END floatsisf
 326         FUNC_END aeabi_ui2f
 327         FUNC_END floatunsisf
 328
 329 ARM_FUNC_START floatundisf
 330 ARM_FUNC_ALIAS aeabi_ul2f floatundisf
 331
 332         orrs    r2, r0, r1
 333 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
 334         do_itt  eq
 335         mvfeqs  f0, #0.0
 336 #else
 337         do_it   eq
 338 #endif
 339         RETc(eq)
 340
 341         mov     r3, #0
 342         b       1f
 343
 344 ARM_FUNC_START floatdisf
 345 ARM_FUNC_ALIAS aeabi_l2f floatdisf
 346
 347         orrs    r2, r0, r1
 348 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
 349         do_it   eq, t
 350         mvfeqs  f0, #0.0
 351 #else
 352         do_it   eq
 353 #endif
 354         RETc(eq)
 355
 356         ands    r3, ah, #0x80000000     @ sign bit in r3
 357         bpl     1f
 358 #if defined(__thumb2__)
 359         negs    al, al
 360         sbc     ah, ah, ah, lsl #1
 361 #else
 362         rsbs    al, al, #0
 363         rsc     ah, ah, #0
 364 #endif
 365 1:
 366 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
 367         @ For hard FPA code we want to return via the tail below so that
 368         @ we can return the result in f0 as well as in r0 for backwards
 369         @ compatibility.
 370         str     lr, [sp, #-8]!
 371         adr     lr, LSYM(f0_ret)
 372 #endif
 373
 374         movs    ip, ah
 375         do_it   eq, tt
 376         moveq   ip, al
 377         moveq   ah, al
 378         moveq   al, #0
 379
 380         @ Add initial exponent to sign
 381         orr     r3, r3, #((127 + 23 + 32) << 23)
 382         do_it   eq
 383         subeq   r3, r3, #(32 << 23)
 384 2:      sub     r3, r3, #(1 << 23)
 385
 386 #if __ARM_ARCH__ < 5
 387
 388         mov     r2, #23
 389         cmp     ip, #(1 << 16)
 390         do_it   hs, t
 391         movhs   ip, ip, lsr #16
 392         subhs   r2, r2, #16
 393         cmp     ip, #(1 << 8)
 394         do_it   hs, t
 395         movhs   ip, ip, lsr #8
 396         subhs   r2, r2, #8
 397         cmp     ip, #(1 << 4)
 398         do_it   hs, t
 399         movhs   ip, ip, lsr #4
 400         subhs   r2, r2, #4
 401         cmp     ip, #(1 << 2)
 402         do_it   hs, e
 403         subhs   r2, r2, #2
 404         sublo   r2, r2, ip, lsr #1
 405         subs    r2, r2, ip, lsr #3
 406
 407 #else
 408
 409         clz     r2, ip
 410         subs    r2, r2, #8
 411
 412 #endif
 413
 414         sub     r3, r3, r2, lsl #23
 415         blt     3f
 416
 417         shiftop add r3 r3 ah lsl r2 ip
 418         shift1  lsl, ip, al, r2
 419         rsb     r2, r2, #32
 420         cmp     ip, #0x80000000
 421         shiftop adc r0 r3 al lsr r2 r2
 422         do_it   eq
 423         biceq   r0, r0, #1
 424         RET
 425
 426 3:      add     r2, r2, #32
 427         shift1  lsl, ip, ah, r2
 428         rsb     r2, r2, #32
 429         orrs    al, al, ip, lsl #1
 430         shiftop adc r0 r3 ah lsr r2 r2
 431         do_it   eq
 432         biceq   r0, r0, ip, lsr #31
 433         RET
 434
 435 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
 436
 437 LSYM(f0_ret):
 438         str     r0, [sp, #-4]!
 439         ldfs    f0, [sp], #4
 440         RETLDM
 441
 442 #endif
 443
 444         FUNC_END floatdisf
 445         FUNC_END aeabi_l2f
 446         FUNC_END floatundisf
 447         FUNC_END aeabi_ul2f
 448
 449 #endif /* L_addsubsf3 */
 450
 451 #ifdef L_muldivsf3
 452
 453 ARM_FUNC_START mulsf3
 454 ARM_FUNC_ALIAS aeabi_fmul mulsf3
 455
 456         @ Mask out exponents, trap any zero/denormal/INF/NAN.
 457         mov     ip, #0xff
 458         ands    r2, ip, r0, lsr #23
 459         do_it   ne, tt
 460         COND(and,s,ne)  r3, ip, r1, lsr #23
 461         teqne   r2, ip
 462         teqne   r3, ip
 463         beq     LSYM(Lml_s)
 464 LSYM(Lml_x):
 465
 466         @ Add exponents together
 467         add     r2, r2, r3
 468
 469         @ Determine final sign.
 470         eor     ip, r0, r1
 471
 472         @ Convert mantissa to unsigned integer.
 473         @ If power of two, branch to a separate path.
 474         @ Make up for final alignment.
 475         movs    r0, r0, lsl #9
 476         do_it   ne
 477         COND(mov,s,ne)  r1, r1, lsl #9
 478         beq     LSYM(Lml_1)
 479         mov     r3, #0x08000000
 480         orr     r0, r3, r0, lsr #5
 481         orr     r1, r3, r1, lsr #5
 482
 483 #if __ARM_ARCH__ < 4
 484
 485         @ Put sign bit in r3, which will be restored into r0 later.
 486         and     r3, ip, #0x80000000
 487
 488         @ Well, no way to make it shorter without the umull instruction.
 489         do_push {r3, r4, r5}
 490         mov     r4, r0, lsr #16
 491         mov     r5, r1, lsr #16
 492         bic     r0, r0, r4, lsl #16
 493         bic     r1, r1, r5, lsl #16
 494         mul     ip, r4, r5
 495         mul     r3, r0, r1
 496         mul     r0, r5, r0
 497         mla     r0, r4, r1, r0
 498         adds    r3, r3, r0, lsl #16
 499         adc     r1, ip, r0, lsr #16
 500         do_pop  {r0, r4, r5}
 501
 502 #else
 503
 504         @ The actual multiplication.
 505         umull   r3, r1, r0, r1
 506
 507         @ Put final sign in r0.
 508         and     r0, ip, #0x80000000
 509
 510 #endif
 511
 512         @ Adjust result upon the MSB position.
 513         cmp     r1, #(1 << 23)
 514         do_it   cc, tt
 515         movcc   r1, r1, lsl #1
 516         orrcc   r1, r1, r3, lsr #31
 517         movcc   r3, r3, lsl #1
 518
 519         @ Add sign to result.
 520         orr     r0, r0, r1
 521
 522         @ Apply exponent bias, check for under/overflow.
 523         sbc     r2, r2, #127
 524         cmp     r2, #(254 - 1)
 525         bhi     LSYM(Lml_u)
 526
 527         @ Round the result, merge final exponent.
 528         cmp     r3, #0x80000000
 529         adc     r0, r0, r2, lsl #23
 530         do_it   eq
 531         biceq   r0, r0, #1
 532         RET
 533
 534         @ Multiplication by 0x1p*: let''s shortcut a lot of code.
 535 LSYM(Lml_1):
 536         teq     r0, #0
 537         and     ip, ip, #0x80000000
 538         do_it   eq
 539         moveq   r1, r1, lsl #9
 540         orr     r0, ip, r0, lsr #9
 541         orr     r0, r0, r1, lsr #9
 542         subs    r2, r2, #127
 543         do_it   gt, tt
 544         COND(rsb,s,gt)  r3, r2, #255
 545         orrgt   r0, r0, r2, lsl #23
 546         RETc(gt)
 547
 548         @ Under/overflow: fix things up for the code below.
 549         orr     r0, r0, #0x00800000
 550         mov     r3, #0
 551         subs    r2, r2, #1
 552
 553 LSYM(Lml_u):
 554         @ Overflow?
 555         bgt     LSYM(Lml_o)
 556
 557         @ Check if denormalized result is possible, otherwise return signed 0.
 558         cmn     r2, #(24 + 1)
 559         do_it   le, t
 560         bicle   r0, r0, #0x7fffffff
 561         RETc(le)
 562
 563         @ Shift value right, round, etc.
 564         rsb     r2, r2, #0
 565         movs    r1, r0, lsl #1
 566         shift1  lsr, r1, r1, r2
 567         rsb     r2, r2, #32
 568         shift1  lsl, ip, r0, r2
 569         movs    r0, r1, rrx
 570         adc     r0, r0, #0
 571         orrs    r3, r3, ip, lsl #1
 572         do_it   eq
 573         biceq   r0, r0, ip, lsr #31
 574         RET
 575
 576         @ One or both arguments are denormalized.
 577         @ Scale them leftwards and preserve sign bit.
 578 LSYM(Lml_d):
 579         teq     r2, #0
 580         and     ip, r0, #0x80000000
 581 1:      do_it   eq, tt
 582         moveq   r0, r0, lsl #1
 583         tsteq   r0, #0x00800000
 584         subeq   r2, r2, #1
 585         beq     1b
 586         orr     r0, r0, ip
 587         teq     r3, #0
 588         and     ip, r1, #0x80000000
 589 2:      do_it   eq, tt
 590         moveq   r1, r1, lsl #1
 591         tsteq   r1, #0x00800000
 592         subeq   r3, r3, #1
 593         beq     2b
 594         orr     r1, r1, ip
 595         b       LSYM(Lml_x)
 596
 597 LSYM(Lml_s):
 598         @ Isolate the INF and NAN cases away
 599         and     r3, ip, r1, lsr #23
 600         teq     r2, ip
 601         do_it   ne
 602         teqne   r3, ip
 603         beq     1f
 604
 605         @ Here, one or more arguments are either denormalized or zero.
 606         bics    ip, r0, #0x80000000
 607         do_it   ne
 608         COND(bic,s,ne)  ip, r1, #0x80000000
 609         bne     LSYM(Lml_d)
 610
 611         @ Result is 0, but determine sign anyway.
 612 LSYM(Lml_z):
 613         eor     r0, r0, r1
 614         bic     r0, r0, #0x7fffffff
 615         RET
 616
 617 1:      @ One or both args are INF or NAN.
 618         teq     r0, #0x0
 619         do_it   ne, ett
 620         teqne   r0, #0x80000000
 621         moveq   r0, r1
 622         teqne   r1, #0x0
 623         teqne   r1, #0x80000000
 624         beq     LSYM(Lml_n)             @ 0 * INF or INF * 0 -> NAN
 625         teq     r2, ip
 626         bne     1f
 627         movs    r2, r0, lsl #9
 628         bne     LSYM(Lml_n)             @ NAN * <anything> -> NAN
 629 1:      teq     r3, ip
 630         bne     LSYM(Lml_i)
 631         movs    r3, r1, lsl #9
 632         do_it   ne
 633         movne   r0, r1
 634         bne     LSYM(Lml_n)             @ <anything> * NAN -> NAN
 635
 636         @ Result is INF, but we need to determine its sign.
 637 LSYM(Lml_i):
 638         eor     r0, r0, r1
 639
 640         @ Overflow: return INF (sign already in r0).
 641 LSYM(Lml_o):
 642         and     r0, r0, #0x80000000
 643         orr     r0, r0, #0x7f000000
 644         orr     r0, r0, #0x00800000
 645         RET
 646
 647         @ Return a quiet NAN.
 648 LSYM(Lml_n):
 649         orr     r0, r0, #0x7f000000
 650         orr     r0, r0, #0x00c00000
 651         RET
 652
 653         FUNC_END aeabi_fmul
 654         FUNC_END mulsf3
 655
 656 ARM_FUNC_START divsf3
 657 ARM_FUNC_ALIAS aeabi_fdiv divsf3
 658
 659         @ Mask out exponents, trap any zero/denormal/INF/NAN.
 660         mov     ip, #0xff
 661         ands    r2, ip, r0, lsr #23
 662         do_it   ne, tt
 663         COND(and,s,ne)  r3, ip, r1, lsr #23
 664         teqne   r2, ip
 665         teqne   r3, ip
 666         beq     LSYM(Ldv_s)
 667 LSYM(Ldv_x):
 668
 669         @ Substract divisor exponent from dividend''s
 670         sub     r2, r2, r3
 671
 672         @ Preserve final sign into ip.
 673         eor     ip, r0, r1
 674
 675         @ Convert mantissa to unsigned integer.
 676         @ Dividend -> r3, divisor -> r1.
 677         movs    r1, r1, lsl #9
 678         mov     r0, r0, lsl #9
 679         beq     LSYM(Ldv_1)
 680         mov     r3, #0x10000000
 681         orr     r1, r3, r1, lsr #4
 682         orr     r3, r3, r0, lsr #4
 683
 684         @ Initialize r0 (result) with final sign bit.
 685         and     r0, ip, #0x80000000
 686
 687         @ Ensure result will land to known bit position.
 688         @ Apply exponent bias accordingly.
 689         cmp     r3, r1
 690         do_it   cc
 691         movcc   r3, r3, lsl #1
 692         adc     r2, r2, #(127 - 2)
 693
 694         @ The actual division loop.
 695         mov     ip, #0x00800000
 696 1:      cmp     r3, r1
 697         do_it   cs, t
 698         subcs   r3, r3, r1
 699         orrcs   r0, r0, ip
 700         cmp     r3, r1, lsr #1
 701         do_it   cs, t
 702         subcs   r3, r3, r1, lsr #1
 703         orrcs   r0, r0, ip, lsr #1
 704         cmp     r3, r1, lsr #2
 705         do_it   cs, t
 706         subcs   r3, r3, r1, lsr #2
 707         orrcs   r0, r0, ip, lsr #2
 708         cmp     r3, r1, lsr #3
 709         do_it   cs, t
 710         subcs   r3, r3, r1, lsr #3
 711         orrcs   r0, r0, ip, lsr #3
 712         movs    r3, r3, lsl #4
 713         do_it   ne
 714         COND(mov,s,ne)  ip, ip, lsr #4
 715         bne     1b
 716
 717         @ Check exponent for under/overflow.
 718         cmp     r2, #(254 - 1)
 719         bhi     LSYM(Lml_u)
 720
 721         @ Round the result, merge final exponent.
 722         cmp     r3, r1
 723         adc     r0, r0, r2, lsl #23
 724         do_it   eq
 725         biceq   r0, r0, #1
 726         RET
 727
 728         @ Division by 0x1p*: let''s shortcut a lot of code.
 729 LSYM(Ldv_1):
 730         and     ip, ip, #0x80000000
 731         orr     r0, ip, r0, lsr #9
 732         adds    r2, r2, #127
 733         do_it   gt, tt
 734         COND(rsb,s,gt)  r3, r2, #255
 735         orrgt   r0, r0, r2, lsl #23
 736         RETc(gt)
 737
 738         orr     r0, r0, #0x00800000
 739         mov     r3, #0
 740         subs    r2, r2, #1
 741         b       LSYM(Lml_u)
 742
 743         @ One or both arguments are denormalized.
 744         @ Scale them leftwards and preserve sign bit.
 745 LSYM(Ldv_d):
 746         teq     r2, #0
 747         and     ip, r0, #0x80000000
 748 1:      do_it   eq, tt
 749         moveq   r0, r0, lsl #1
 750         tsteq   r0, #0x00800000
 751         subeq   r2, r2, #1
 752         beq     1b
 753         orr     r0, r0, ip
 754         teq     r3, #0
 755         and     ip, r1, #0x80000000
 756 2:      do_it   eq, tt
 757         moveq   r1, r1, lsl #1
 758         tsteq   r1, #0x00800000
 759         subeq   r3, r3, #1
 760         beq     2b
 761         orr     r1, r1, ip
 762         b       LSYM(Ldv_x)
 763
 764         @ One or both arguments are either INF, NAN, zero or denormalized.
 765 LSYM(Ldv_s):
 766         and     r3, ip, r1, lsr #23
 767         teq     r2, ip
 768         bne     1f
 769         movs    r2, r0, lsl #9
 770         bne     LSYM(Lml_n)             @ NAN / <anything> -> NAN
 771         teq     r3, ip
 772         bne     LSYM(Lml_i)             @ INF / <anything> -> INF
 773         mov     r0, r1
 774         b       LSYM(Lml_n)             @ INF / (INF or NAN) -> NAN
 775 1:      teq     r3, ip
 776         bne     2f
 777         movs    r3, r1, lsl #9
 778         beq     LSYM(Lml_z)             @ <anything> / INF -> 0
 779         mov     r0, r1
 780         b       LSYM(Lml_n)             @ <anything> / NAN -> NAN
 781 2:      @ If both are nonzero, we need to normalize and resume above.
 782         bics    ip, r0, #0x80000000
 783         do_it   ne
 784         COND(bic,s,ne)  ip, r1, #0x80000000
 785         bne     LSYM(Ldv_d)
 786         @ One or both arguments are zero.
 787         bics    r2, r0, #0x80000000
 788         bne     LSYM(Lml_i)             @ <non_zero> / 0 -> INF
 789         bics    r3, r1, #0x80000000
 790         bne     LSYM(Lml_z)             @ 0 / <non_zero> -> 0
 791         b       LSYM(Lml_n)             @ 0 / 0 -> NAN
 792
 793         FUNC_END aeabi_fdiv
 794         FUNC_END divsf3
 795
 796 #endif /* L_muldivsf3 */
 797
 798 #ifdef L_cmpsf2
 799
 800         @ The return value in r0 is
 801         @
 802         @   0  if the operands are equal
 803         @   1  if the first operand is greater than the second, or
 804         @      the operands are unordered and the operation is
 805         @      CMP, LT, LE, NE, or EQ.
 806         @   -1 if the first operand is less than the second, or
 807         @      the operands are unordered and the operation is GT
 808         @      or GE.
 809         @
 810         @ The Z flag will be set iff the operands are equal.
 811         @
 812         @ The following registers are clobbered by this function:
 813         @   ip, r0, r1, r2, r3
 814
 815 ARM_FUNC_START gtsf2
 816 ARM_FUNC_ALIAS gesf2 gtsf2
 817         mov     ip, #-1
 818         b       1f
 819
 820 ARM_FUNC_START ltsf2
 821 ARM_FUNC_ALIAS lesf2 ltsf2
 822         mov     ip, #1
 823         b       1f
 824
 825 ARM_FUNC_START cmpsf2
 826 ARM_FUNC_ALIAS nesf2 cmpsf2
 827 ARM_FUNC_ALIAS eqsf2 cmpsf2
 828         mov     ip, #1                  @ how should we specify unordered here?
 829
 830 1:      str     ip, [sp, #-4]
 831
 832         @ Trap any INF/NAN first.
 833         mov     r2, r0, lsl #1
 834         mov     r3, r1, lsl #1
 835         mvns    ip, r2, asr #24
 836         do_it   ne
 837         COND(mvn,s,ne)  ip, r3, asr #24
 838         beq     3f
 839
 840         @ Compare values.
 841         @ Note that 0.0 is equal to -0.0.
 842 2:      orrs    ip, r2, r3, lsr #1      @ test if both are 0, clear C flag
 843         do_it   ne
 844         teqne   r0, r1                  @ if not 0 compare sign
 845         do_it   pl
 846         COND(sub,s,pl)  r0, r2, r3              @ if same sign compare values, set r0
 847
 848         @ Result:
 849         do_it   hi
 850         movhi   r0, r1, asr #31
 851         do_it   lo
 852         mvnlo   r0, r1, asr #31
 853         do_it   ne
 854         orrne   r0, r0, #1
 855         RET
 856
 857         @ Look for a NAN.
 858 3:      mvns    ip, r2, asr #24
 859         bne     4f
 860         movs    ip, r0, lsl #9
 861         bne     5f                      @ r0 is NAN
 862 4:      mvns    ip, r3, asr #24
 863         bne     2b
 864         movs    ip, r1, lsl #9
 865         beq     2b                      @ r1 is not NAN
 866 5:      ldr     r0, [sp, #-4]           @ return unordered code.
 867         RET
 868
 869         FUNC_END gesf2
 870         FUNC_END gtsf2
 871         FUNC_END lesf2
 872         FUNC_END ltsf2
 873         FUNC_END nesf2
 874         FUNC_END eqsf2
 875         FUNC_END cmpsf2
 876
 877 ARM_FUNC_START aeabi_cfrcmple
 878
 879         mov     ip, r0
 880         mov     r0, r1
 881         mov     r1, ip
 882         b       6f
 883
 884 ARM_FUNC_START aeabi_cfcmpeq
 885 ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
 886
 887         @ The status-returning routines are required to preserve all
 888         @ registers except ip, lr, and cpsr.
 889 6:      do_push {r0, r1, r2, r3, lr}
 890         ARM_CALL cmpsf2
 891         @ Set the Z flag correctly, and the C flag unconditionally.
 892         cmp     r0, #0
 893         @ Clear the C flag if the return value was -1, indicating
 894         @ that the first operand was smaller than the second.
 895         do_it   mi
 896         cmnmi   r0, #0
 897         RETLDM  "r0, r1, r2, r3"
 898
 899         FUNC_END aeabi_cfcmple
 900         FUNC_END aeabi_cfcmpeq
 901         FUNC_END aeabi_cfrcmple
 902
 903 ARM_FUNC_START  aeabi_fcmpeq
 904
 905         str     lr, [sp, #-8]!
 906         ARM_CALL aeabi_cfcmple
 907         do_it   eq, e
 908         moveq   r0, #1  @ Equal to.
 909         movne   r0, #0  @ Less than, greater than, or unordered.
 910         RETLDM
 911
 912         FUNC_END aeabi_fcmpeq
 913
 914 ARM_FUNC_START  aeabi_fcmplt
 915
 916         str     lr, [sp, #-8]!
 917         ARM_CALL aeabi_cfcmple
 918         do_it   cc, e
 919         movcc   r0, #1  @ Less than.
 920         movcs   r0, #0  @ Equal to, greater than, or unordered.
 921         RETLDM
 922
 923         FUNC_END aeabi_fcmplt
 924
 925 ARM_FUNC_START  aeabi_fcmple
 926
 927         str     lr, [sp, #-8]!
 928         ARM_CALL aeabi_cfcmple
 929         do_it   ls, e
 930         movls   r0, #1  @ Less than or equal to.
 931         movhi   r0, #0  @ Greater than or unordered.
 932         RETLDM
 933
 934         FUNC_END aeabi_fcmple
 935
 936 ARM_FUNC_START  aeabi_fcmpge
 937
 938         str     lr, [sp, #-8]!
 939         ARM_CALL aeabi_cfrcmple
 940         do_it   ls, e
 941         movls   r0, #1  @ Operand 2 is less than or equal to operand 1.
 942         movhi   r0, #0  @ Operand 2 greater than operand 1, or unordered.
 943         RETLDM
 944
 945         FUNC_END aeabi_fcmpge
 946
 947 ARM_FUNC_START  aeabi_fcmpgt
 948
 949         str     lr, [sp, #-8]!
 950         ARM_CALL aeabi_cfrcmple
 951         do_it   cc, e
 952         movcc   r0, #1  @ Operand 2 is less than operand 1.
 953         movcs   r0, #0  @ Operand 2 is greater than or equal to operand 1,
 954                         @ or they are unordered.
 955         RETLDM
 956
 957         FUNC_END aeabi_fcmpgt
 958
 959 #endif /* L_cmpsf2 */
 960
 961 #ifdef L_unordsf2
 962
 963 ARM_FUNC_START unordsf2
 964 ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
 965
 966         mov     r2, r0, lsl #1
 967         mov     r3, r1, lsl #1
 968         mvns    ip, r2, asr #24
 969         bne     1f
 970         movs    ip, r0, lsl #9
 971         bne     3f                      @ r0 is NAN
 972 1:      mvns    ip, r3, asr #24
 973         bne     2f
 974         movs    ip, r1, lsl #9
 975         bne     3f                      @ r1 is NAN
 976 2:      mov     r0, #0                  @ arguments are ordered.
 977         RET
 978 3:      mov     r0, #1                  @ arguments are unordered.
 979         RET
 980
 981         FUNC_END aeabi_fcmpun
 982         FUNC_END unordsf2
 983
 984 #endif /* L_unordsf2 */
 985
 986 #ifdef L_fixsfsi
 987
 988 ARM_FUNC_START fixsfsi
 989 ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
 990
 991         @ check exponent range.
 992         mov     r2, r0, lsl #1
 993         cmp     r2, #(127 << 24)
 994         bcc     1f                      @ value is too small
 995         mov     r3, #(127 + 31)
 996         subs    r2, r3, r2, lsr #24
 997         bls     2f                      @ value is too large
 998
 999         @ scale value
1000         mov     r3, r0, lsl #8
1001         orr     r3, r3, #0x80000000
1002         tst     r0, #0x80000000         @ the sign bit
1003         shift1  lsr, r0, r3, r2
1004         do_it   ne
1005         rsbne   r0, r0, #0
1006         RET
1007
1008 1:      mov     r0, #0
1009         RET
1010
1011 2:      cmp     r2, #(127 + 31 - 0xff)
1012         bne     3f
1013         movs    r2, r0, lsl #9
1014         bne     4f                      @ r0 is NAN.
1015 3:      ands    r0, r0, #0x80000000     @ the sign bit
1016         do_it   eq
1017         moveq   r0, #0x7fffffff         @ the maximum signed positive si
1018         RET
1019
1020 4:      mov     r0, #0                  @ What should we convert NAN to?
1021         RET
1022
1023         FUNC_END aeabi_f2iz
1024         FUNC_END fixsfsi
1025
1026 #endif /* L_fixsfsi */
1027
1028 #ifdef L_fixunssfsi
1029
1030 ARM_FUNC_START fixunssfsi
1031 ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
1032
1033         @ check exponent range.
1034         movs    r2, r0, lsl #1
1035         bcs     1f                      @ value is negative
1036         cmp     r2, #(127 << 24)
1037         bcc     1f                      @ value is too small
1038         mov     r3, #(127 + 31)
1039         subs    r2, r3, r2, lsr #24
1040         bmi     2f                      @ value is too large
1041
1042         @ scale the value
1043         mov     r3, r0, lsl #8
1044         orr     r3, r3, #0x80000000
1045         shift1  lsr, r0, r3, r2
1046         RET
1047
1048 1:      mov     r0, #0
1049         RET
1050
1051 2:      cmp     r2, #(127 + 31 - 0xff)
1052         bne     3f
1053         movs    r2, r0, lsl #9
1054         bne     4f                      @ r0 is NAN.
1055 3:      mov     r0, #0xffffffff         @ maximum unsigned si
1056         RET
1057
1058 4:      mov     r0, #0                  @ What should we convert NAN to?
1059         RET
1060
1061         FUNC_END aeabi_f2uiz
1062         FUNC_END fixunssfsi
1063
1064 #endif /* L_fixunssfsi */