gcc/config/arm/ieee754-sf.S

   1 /* ieee754-sf.S single-precision floating point support for ARM
   2
   3    Copyright (C) 2003  Free Software Foundation, Inc.
   4    Contributed by Nicolas Pitre (nico@cam.org)
   5
   6    This file is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by the
   8    Free Software Foundation; either version 2, or (at your option) any
   9    later version.
  10
  11    In addition to the permissions in the GNU General Public License, the
  12    Free Software Foundation gives you unlimited permission to link the
  13    compiled version of this file into combinations with other programs,
  14    and to distribute those combinations without any restriction coming
  15    from the use of this file.  (The General Public License restrictions
  16    do apply in other respects; for example, they cover modification of
  17    the file, and distribution when not linked into a combine
  18    executable.)
  19
  20    This file is distributed in the hope that it will be useful, but
  21    WITHOUT ANY WARRANTY; without even the implied warranty of
  22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23    General Public License for more details.
  24
  25    You should have received a copy of the GNU General Public License
  26    along with this program; see the file COPYING.  If not, write to
  27    the Free Software Foundation, 59 Temple Place - Suite 330,
  28    Boston, MA 02111-1307, USA.  */
  29
  30 /*
  31  * Notes:
  32  *
  33  * The goal of this code is to be as fast as possible.  This is
  34  * not meant to be easy to understand for the casual reader.
  35  *
  36  * Only the default rounding mode is intended for best performances.
  37  * Exceptions aren't supported yet, but that can be added quite easily
  38  * if necessary without impacting performances.
  39  */
  40
  41 @ This selects the minimum architecture level required.
  42 #undef __ARM_ARCH__
  43 #define __ARM_ARCH__ 3
  44
  45 #if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
  46         || defined(__ARM_ARCH_4T__)
  47 #undef __ARM_ARCH__
  48 /* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
  49    long multiply instructions.  That includes v3M.  */
  50 #define __ARM_ARCH__ 4
  51 #endif
  52
  53 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
  54         || defined(__ARM_ARCH_5TE__)
  55 #undef __ARM_ARCH__
  56 #define __ARM_ARCH__ 5
  57 #endif
  58
  59 #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
  60 #undef RET
  61 #undef RETc
  62 #define RET     bx      lr
  63 #define RETc(x) bx##x   lr
  64 #if (__ARM_ARCH__ == 4) && (defined(__thumb__) || defined(__THUMB_INTERWORK__))
  65 #define __FP_INTERWORKING__
  66 #endif
  67 #endif
  68
  69 #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
  70 .macro  ARM_FUNC_START name
  71         FUNC_START \name
  72         bx      pc
  73         nop
  74         .arm
  75 .endm
  76 #else
  77 .macro  ARM_FUNC_START name
  78         FUNC_START \name
  79 .endm
  80 #endif
  81
  82 ARM_FUNC_START negsf2
  83         eor     r0, r0, #0x80000000     @ flip sign bit
  84         RET
  85
  86 ARM_FUNC_START subsf3
  87         eor     r1, r1, #0x80000000     @ flip sign bit of second arg
  88 #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
  89         b       1f                      @ Skip Thumb-code prologue
  90 #endif
  91
  92 ARM_FUNC_START addsf3
  93
  94 1:      @ Compare both args, return zero if equal but the sign.
  95         eor     r2, r0, r1
  96         teq     r2, #0x80000000
  97         beq     LSYM(Lad_z)
  98
  99         @ If first arg is 0 or -0, return second arg.
 100         @ If second arg is 0 or -0, return first arg.
 101         bics    r2, r0, #0x80000000
 102         moveq   r0, r1
 103         bicnes  r2, r1, #0x80000000
 104         RETc(eq)
 105
 106         @ Mask out exponents.
 107         mov     ip, #0xff000000
 108         and     r2, r0, ip, lsr #1
 109         and     r3, r1, ip, lsr #1
 110
 111         @ If either of them is 255, result will be INF or NAN
 112         teq     r2, ip, lsr #1
 113         teqne   r3, ip, lsr #1
 114         beq     LSYM(Lad_i)
 115
 116         @ Compute exponent difference.  Make largest exponent in r2,
 117         @ corresponding arg in r0, and positive exponent difference in r3.
 118         subs    r3, r3, r2
 119         addgt   r2, r2, r3
 120         eorgt   r1, r0, r1
 121         eorgt   r0, r1, r0
 122         eorgt   r1, r0, r1
 123         rsblt   r3, r3, #0
 124
 125         @ If exponent difference is too large, return largest argument
 126         @ already in r0.  We need up to 25 bit to handle proper rounding
 127         @ of 0x1p25 - 1.1.
 128         cmp     r3, #(25 << 23)
 129         RETc(hi)
 130
 131         @ Convert mantissa to signed integer.
 132         tst     r0, #0x80000000
 133         orr     r0, r0, #0x00800000
 134         bic     r0, r0, #0xff000000
 135         rsbne   r0, r0, #0
 136         tst     r1, #0x80000000
 137         orr     r1, r1, #0x00800000
 138         bic     r1, r1, #0xff000000
 139         rsbne   r1, r1, #0
 140
 141         @ If exponent == difference, one or both args were denormalized.
 142         @ Since this is not common case, rescale them off line.
 143         teq     r2, r3
 144         beq     LSYM(Lad_d)
 145 LSYM(Lad_x):
 146
 147         @ Scale down second arg with exponent difference.
 148         @ Apply shift one bit left to first arg and the rest to second arg
 149         @ to simplify things later, but only if exponent does not become 0.
 150         movs    r3, r3, lsr #23
 151         teqne   r2, #(1 << 23)
 152         movne   r0, r0, lsl #1
 153         subne   r2, r2, #(1 << 23)
 154         subne   r3, r3, #1
 155
 156         @ Shift second arg into ip, keep leftover bits into r1.
 157         mov     ip, r1, asr r3
 158         rsb     r3, r3, #32
 159         mov     r1, r1, lsl r3
 160
 161         add     r0, r0, ip              @ the actual addition
 162
 163         @ We now have a 64 bit result in r0-r1.
 164         @ Keep absolute value in r0-r1, sign in r3.
 165         ands    r3, r0, #0x80000000
 166         bpl     LSYM(Lad_p)
 167         rsbs    r1, r1, #0
 168         rsc     r0, r0, #0
 169
 170         @ Determine how to normalize the result.
 171 LSYM(Lad_p):
 172         cmp     r0, #0x00800000
 173         bcc     LSYM(Lad_l)
 174         cmp     r0, #0x01000000
 175         bcc     LSYM(Lad_r0)
 176         cmp     r0, #0x02000000
 177         bcc     LSYM(Lad_r1)
 178
 179         @ Result needs to be shifted right.
 180         movs    r0, r0, lsr #1
 181         mov     r1, r1, rrx
 182         add     r2, r2, #(1 << 23)
 183 LSYM(Lad_r1):
 184         movs    r0, r0, lsr #1
 185         mov     r1, r1, rrx
 186         add     r2, r2, #(1 << 23)
 187
 188         @ Our result is now properly aligned into r0, remaining bits in r1.
 189         @ Round with MSB of r1. If halfway between two numbers, round towards
 190         @ LSB of r0 = 0.
 191 LSYM(Lad_r0):
 192         add     r0, r0, r1, lsr #31
 193         teq     r1, #0x80000000
 194         biceq   r0, r0, #1
 195
 196         @ Rounding may have added a new MSB.  Adjust exponent.
 197         @ That MSB will be cleared when exponent is merged below.
 198         tst     r0, #0x01000000
 199         addne   r2, r2, #(1 << 23)
 200
 201         @ Make sure we did not bust our exponent.
 202         cmp     r2, #(254 << 23)
 203         bhi     LSYM(Lad_o)
 204
 205         @ Pack final result together.
 206 LSYM(Lad_e):
 207         bic     r0, r0, #0x01800000
 208         orr     r0, r0, r2
 209         orr     r0, r0, r3
 210         RET
 211
 212         @ Result must be shifted left.
 213         @ No rounding necessary since r1 will always be 0.
 214 LSYM(Lad_l):
 215
 216 #if __ARM_ARCH__ < 5
 217
 218         movs    ip, r0, lsr #12
 219         moveq   r0, r0, lsl #12
 220         subeq   r2, r2, #(12 << 23)
 221         tst     r0, #0x00ff0000
 222         moveq   r0, r0, lsl #8
 223         subeq   r2, r2, #(8 << 23)
 224         tst     r0, #0x00f00000
 225         moveq   r0, r0, lsl #4
 226         subeq   r2, r2, #(4 << 23)
 227         tst     r0, #0x00c00000
 228         moveq   r0, r0, lsl #2
 229         subeq   r2, r2, #(2 << 23)
 230         tst     r0, #0x00800000
 231         moveq   r0, r0, lsl #1
 232         subeq   r2, r2, #(1 << 23)
 233         cmp     r2, #0
 234         bgt     LSYM(Lad_e)
 235
 236 #else
 237
 238         clz     ip, r0
 239         sub     ip, ip, #8
 240         mov     r0, r0, lsl ip
 241         subs    r2, r2, ip, lsl #23
 242         bgt     LSYM(Lad_e)
 243
 244 #endif
 245
 246         @ Exponent too small, denormalize result.
 247         mvn     r2, r2, asr #23
 248         add     r2, r2, #2
 249         orr     r0, r3, r0, lsr r2
 250         RET
 251
 252         @ Fixup and adjust bit position for denormalized arguments.
 253         @ Note that r2 must not remain equal to 0.
 254 LSYM(Lad_d):
 255         teq     r2, #0
 256         eoreq   r0, r0, #0x00800000
 257         addeq   r2, r2, #(1 << 23)
 258         eor     r1, r1, #0x00800000
 259         subne   r3, r3, #(1 << 23)
 260         b       LSYM(Lad_x)
 261
 262         @ Result is x - x = 0, unless x is INF or NAN.
 263 LSYM(Lad_z):
 264         mov     ip, #0xff000000
 265         and     r2, r0, ip, lsr #1
 266         teq     r2, ip, lsr #1
 267         moveq   r0, ip, asr #2
 268         movne   r0, #0
 269         RET
 270
 271         @ Overflow: return INF.
 272 LSYM(Lad_o):
 273         orr     r0, r3, #0x7f000000
 274         orr     r0, r0, #0x00800000
 275         RET
 276
 277         @ At least one of r0/r1 is INF/NAN.
 278         @   if r0 != INF/NAN: return r1 (which is INF/NAN)
 279         @   if r1 != INF/NAN: return r0 (which is INF/NAN)
 280         @   if r0 or r1 is NAN: return NAN
 281         @   if opposite sign: return NAN
 282         @   return r0 (which is INF or -INF)
 283 LSYM(Lad_i):
 284         teq     r2, ip, lsr #1
 285         movne   r0, r1
 286         teqeq   r3, ip, lsr #1
 287         RETc(ne)
 288         movs    r2, r0, lsl #9
 289         moveqs  r2, r1, lsl #9
 290         teqeq   r0, r1
 291         orrne   r0, r3, #0x00400000     @ NAN
 292         RET
 293
 294
 295 ARM_FUNC_START floatunsisf
 296         mov     r3, #0
 297         b       1f
 298
 299 ARM_FUNC_START floatsisf
 300         ands    r3, r0, #0x80000000
 301         rsbmi   r0, r0, #0
 302
 303 1:      teq     r0, #0
 304         RETc(eq)
 305
 306         mov     r1, #0
 307         mov     r2, #((127 + 23) << 23)
 308         tst     r0, #0xfc000000
 309         beq     LSYM(Lad_p)
 310
 311         @ We need to scale the value a little before branching to code above.
 312         tst     r0, #0xf0000000
 313         movne   r1, r0, lsl #28
 314         movne   r0, r0, lsr #4
 315         addne   r2, r2, #(4 << 23)
 316         tst     r0, #0x0c000000
 317         beq     LSYM(Lad_p)
 318         mov     r1, r1, lsr #2
 319         orr     r1, r1, r0, lsl #30
 320         mov     r0, r0, lsr #2
 321         add     r2, r2, #(2 << 23)
 322         b       LSYM(Lad_p)
 323
 324
 325 ARM_FUNC_START mulsf3
 326
 327         @ Mask out exponents.
 328         mov     ip, #0xff000000
 329         and     r2, r0, ip, lsr #1
 330         and     r3, r1, ip, lsr #1
 331
 332         @ Trap any INF/NAN.
 333         teq     r2, ip, lsr #1
 334         teqne   r3, ip, lsr #1
 335         beq     LSYM(Lml_s)
 336
 337         @ Trap any multiplication by 0.
 338         bics    ip, r0, #0x80000000
 339         bicnes  ip, r1, #0x80000000
 340         beq     LSYM(Lml_z)
 341
 342         @ Shift exponents right one bit to make room for overflow bit.
 343         @ If either of them is 0, scale denormalized arguments off line.
 344         @ Then add both exponents together.
 345         movs    r2, r2, lsr #1
 346         teqne   r3, #0
 347         beq     LSYM(Lml_d)
 348 LSYM(Lml_x):
 349         add     r2, r2, r3, asr #1
 350
 351         @ Preserve final sign in r2 along with exponent for now.
 352         teq     r0, r1
 353         orrmi   r2, r2, #0x8000
 354
 355         @ Convert mantissa to unsigned integer.
 356         bic     r0, r0, #0xff000000
 357         bic     r1, r1, #0xff000000
 358         orr     r0, r0, #0x00800000
 359         orr     r1, r1, #0x00800000
 360
 361 #if __ARM_ARCH__ < 4
 362
 363         @ Well, no way to make it shorter without the umull instruction.
 364         @ We must perform that 24 x 24 -> 48 bit multiplication by hand.
 365         stmfd   sp!, {r4, r5}
 366         mov     r4, r0, lsr #16
 367         mov     r5, r1, lsr #16
 368         bic     r0, r0, #0x00ff0000
 369         bic     r1, r1, #0x00ff0000
 370         mul     ip, r4, r5
 371         mul     r3, r0, r1
 372         mul     r0, r5, r0
 373         mla     r0, r4, r1, r0
 374         adds    r3, r3, r0, lsl #16
 375         adc     ip, ip, r0, lsr #16
 376         ldmfd   sp!, {r4, r5}
 377
 378 #else
 379
 380         umull   r3, ip, r0, r1          @ The actual multiplication.
 381
 382 #endif
 383
 384         @ Put final sign in r0.
 385         mov     r0, r2, lsl #16
 386         bic     r2, r2, #0x8000
 387
 388         @ Adjust result if one extra MSB appeared.
 389         @ The LSB may be lost but this never changes the result in this case.
 390         tst     ip, #(1 << 15)
 391         addne   r2, r2, #(1 << 22)
 392         movnes  ip, ip, lsr #1
 393         movne   r3, r3, rrx
 394
 395         @ Apply exponent bias, check range for underflow.
 396         subs    r2, r2, #(127 << 22)
 397         ble     LSYM(Lml_u)
 398
 399         @ Scale back to 24 bits with rounding.
 400         @ r0 contains sign bit already.
 401         orrs    r0, r0, r3, lsr #23
 402         adc     r0, r0, ip, lsl #9
 403
 404         @ If halfway between two numbers, rounding should be towards LSB = 0.
 405         mov     r3, r3, lsl #9
 406         teq     r3, #0x80000000
 407         biceq   r0, r0, #1
 408
 409         @ Note: rounding may have produced an extra MSB here.
 410         @ The extra bit is cleared before merging the exponent below.
 411         tst     r0, #0x01000000
 412         addne   r2, r2, #(1 << 22)
 413
 414         @ Check for exponent overflow
 415         cmp     r2, #(255 << 22)
 416         bge     LSYM(Lml_o)
 417
 418         @ Add final exponent.
 419         bic     r0, r0, #0x01800000
 420         orr     r0, r0, r2, lsl #1
 421         RET
 422
 423         @ Result is 0, but determine sign anyway.
 424 LSYM(Lml_z):    eor     r0, r0, r1
 425         bic     r0, r0, #0x7fffffff
 426         RET
 427
 428         @ Check if denormalized result is possible, otherwise return signed 0.
 429 LSYM(Lml_u):
 430         cmn     r2, #(24 << 22)
 431         RETc(le)
 432
 433         @ Find out proper shift value.
 434         mvn     r1, r2, asr #22
 435         subs    r1, r1, #7
 436         bgt     LSYM(Lml_ur)
 437
 438         @ Shift value left, round, etc.
 439         add     r1, r1, #32
 440         orrs    r0, r0, r3, lsr r1
 441         rsb     r1, r1, #32
 442         adc     r0, r0, ip, lsl r1
 443         mov     ip, r3, lsl r1
 444         teq     ip, #0x80000000
 445         biceq   r0, r0, #1
 446         RET
 447
 448         @ Shift value right, round, etc.
 449         @ Note: r1 must not be 0 otherwise carry does not get set.
 450 LSYM(Lml_ur):
 451         orrs    r0, r0, ip, lsr r1
 452         adc     r0, r0, #0
 453         rsb     r1, r1, #32
 454         mov     ip, ip, lsl r1
 455         teq     r3, #0
 456         teqeq   ip, #0x80000000
 457         biceq   r0, r0, #1
 458         RET
 459
 460         @ One or both arguments are denormalized.
 461         @ Scale them leftwards and preserve sign bit.
 462 LSYM(Lml_d):
 463         teq     r2, #0
 464         and     ip, r0, #0x80000000
 465 1:      moveq   r0, r0, lsl #1
 466         tsteq   r0, #0x00800000
 467         subeq   r2, r2, #(1 << 22)
 468         beq     1b
 469         orr     r0, r0, ip
 470         teq     r3, #0
 471         and     ip, r1, #0x80000000
 472 2:      moveq   r1, r1, lsl #1
 473         tsteq   r1, #0x00800000
 474         subeq   r3, r3, #(1 << 23)
 475         beq     2b
 476         orr     r1, r1, ip
 477         b       LSYM(Lml_x)
 478
 479         @ One or both args are INF or NAN.
 480 LSYM(Lml_s):
 481         teq     r0, #0x0
 482         teqne   r1, #0x0
 483         teqne   r0, #0x80000000
 484         teqne   r1, #0x80000000
 485         beq     LSYM(Lml_n)             @ 0 * INF or INF * 0 -> NAN
 486         teq     r2, ip, lsr #1
 487         bne     1f
 488         movs    r2, r0, lsl #9
 489         bne     LSYM(Lml_n)             @ NAN * <anything> -> NAN
 490 1:      teq     r3, ip, lsr #1
 491         bne     LSYM(Lml_i)
 492         movs    r3, r1, lsl #9
 493         bne     LSYM(Lml_n)             @ <anything> * NAN -> NAN
 494
 495         @ Result is INF, but we need to determine its sign.
 496 LSYM(Lml_i):
 497         eor     r0, r0, r1
 498
 499         @ Overflow: return INF (sign already in r0).
 500 LSYM(Lml_o):
 501         and     r0, r0, #0x80000000
 502         orr     r0, r0, #0x7f000000
 503         orr     r0, r0, #0x00800000
 504         RET
 505
 506         @ Return NAN.
 507 LSYM(Lml_n):
 508         mov     r0, #0x7f000000
 509         orr     r0, r0, #0x00c00000
 510         RET
 511
 512
 513 ARM_FUNC_START divsf3
 514
 515         @ Mask out exponents.
 516         mov     ip, #0xff000000
 517         and     r2, r0, ip, lsr #1
 518         and     r3, r1, ip, lsr #1
 519
 520         @ Trap any INF/NAN or zeroes.
 521         teq     r2, ip, lsr #1
 522         teqne   r3, ip, lsr #1
 523         bicnes  ip, r0, #0x80000000
 524         bicnes  ip, r1, #0x80000000
 525         beq     LSYM(Ldv_s)
 526
 527         @ Shift exponents right one bit to make room for overflow bit.
 528         @ If either of them is 0, scale denormalized arguments off line.
 529         @ Then substract divisor exponent from dividend''s.
 530         movs    r2, r2, lsr #1
 531         teqne   r3, #0
 532         beq     LSYM(Ldv_d)
 533 LSYM(Ldv_x):
 534         sub     r2, r2, r3, asr #1
 535
 536         @ Preserve final sign into ip.
 537         eor     ip, r0, r1
 538
 539         @ Convert mantissa to unsigned integer.
 540         @ Dividend -> r3, divisor -> r1.
 541         mov     r3, #0x10000000
 542         movs    r1, r1, lsl #9
 543         mov     r0, r0, lsl #9
 544         beq     LSYM(Ldv_1)
 545         orr     r1, r3, r1, lsr #4
 546         orr     r3, r3, r0, lsr #4
 547
 548         @ Initialize r0 (result) with final sign bit.
 549         and     r0, ip, #0x80000000
 550
 551         @ Ensure result will land to known bit position.
 552         cmp     r3, r1
 553         subcc   r2, r2, #(1 << 22)
 554         movcc   r3, r3, lsl #1
 555
 556         @ Apply exponent bias, check range for over/underflow.
 557         add     r2, r2, #(127 << 22)
 558         cmn     r2, #(24 << 22)
 559         RETc(le)
 560         cmp     r2, #(255 << 22)
 561         bge     LSYM(Lml_o)
 562
 563         @ The actual division loop.
 564         mov     ip, #0x00800000
 565 1:      cmp     r3, r1
 566         subcs   r3, r3, r1
 567         orrcs   r0, r0, ip
 568         cmp     r3, r1, lsr #1
 569         subcs   r3, r3, r1, lsr #1
 570         orrcs   r0, r0, ip, lsr #1
 571         cmp     r3, r1, lsr #2
 572         subcs   r3, r3, r1, lsr #2
 573         orrcs   r0, r0, ip, lsr #2
 574         cmp     r3, r1, lsr #3
 575         subcs   r3, r3, r1, lsr #3
 576         orrcs   r0, r0, ip, lsr #3
 577         movs    r3, r3, lsl #4
 578         movnes  ip, ip, lsr #4
 579         bne     1b
 580
 581         @ Check if denormalized result is needed.
 582         cmp     r2, #0
 583         ble     LSYM(Ldv_u)
 584
 585         @ Apply proper rounding.
 586         cmp     r3, r1
 587         addcs   r0, r0, #1
 588         biceq   r0, r0, #1
 589
 590         @ Add exponent to result.
 591         bic     r0, r0, #0x00800000
 592         orr     r0, r0, r2, lsl #1
 593         RET
 594
 595         @ Division by 0x1p*: let''s shortcut a lot of code.
 596 LSYM(Ldv_1):
 597         and     ip, ip, #0x80000000
 598         orr     r0, ip, r0, lsr #9
 599         add     r2, r2, #(127 << 22)
 600         cmp     r2, #(255 << 22)
 601         bge     LSYM(Lml_o)
 602         cmp     r2, #0
 603         orrgt   r0, r0, r2, lsl #1
 604         RETc(gt)
 605         cmn     r2, #(24 << 22)
 606         movle   r0, ip
 607         RETc(le)
 608         orr     r0, r0, #0x00800000
 609         mov     r3, #0
 610
 611         @ Result must be denormalized: prepare parameters to use code above.
 612         @ r3 already contains remainder for rounding considerations.
 613 LSYM(Ldv_u):
 614         bic     ip, r0, #0x80000000
 615         and     r0, r0, #0x80000000
 616         mvn     r1, r2, asr #22
 617         add     r1, r1, #2
 618         b       LSYM(Lml_ur)
 619
 620         @ One or both arguments are denormalized.
 621         @ Scale them leftwards and preserve sign bit.
 622 LSYM(Ldv_d):
 623         teq     r2, #0
 624         and     ip, r0, #0x80000000
 625 1:      moveq   r0, r0, lsl #1
 626         tsteq   r0, #0x00800000
 627         subeq   r2, r2, #(1 << 22)
 628         beq     1b
 629         orr     r0, r0, ip
 630         teq     r3, #0
 631         and     ip, r1, #0x80000000
 632 2:      moveq   r1, r1, lsl #1
 633         tsteq   r1, #0x00800000
 634         subeq   r3, r3, #(1 << 23)
 635         beq     2b
 636         orr     r1, r1, ip
 637         b       LSYM(Ldv_x)
 638
 639         @ One or both arguments is either INF, NAN or zero.
 640 LSYM(Ldv_s):
 641         mov     ip, #0xff000000
 642         teq     r2, ip, lsr #1
 643         teqeq   r3, ip, lsr #1
 644         beq     LSYM(Lml_n)             @ INF/NAN / INF/NAN -> NAN
 645         teq     r2, ip, lsr #1
 646         bne     1f
 647         movs    r2, r0, lsl #9
 648         bne     LSYM(Lml_n)             @ NAN / <anything> -> NAN
 649         b       LSYM(Lml_i)             @ INF / <anything> -> INF
 650 1:      teq     r3, ip, lsr #1
 651         bne     2f
 652         movs    r3, r1, lsl #9
 653         bne     LSYM(Lml_n)             @ <anything> / NAN -> NAN
 654         b       LSYM(Lml_z)             @ <anything> / INF -> 0
 655 2:      @ One or both arguments are 0.
 656         bics    r2, r0, #0x80000000
 657         bne     LSYM(Lml_i)             @ <non_zero> / 0 -> INF
 658         bics    r3, r1, #0x80000000
 659         bne     LSYM(Lml_z)             @ 0 / <non_zero> -> 0
 660         b       LSYM(Lml_n)             @ 0 / 0 -> NAN
 661
 662
 663 FUNC_START gesf2
 664 ARM_FUNC_START gtsf2
 665         mov     r3, #-1
 666         b       1f
 667
 668 FUNC_START lesf2
 669 ARM_FUNC_START ltsf2
 670         mov     r3, #1
 671         b       1f
 672
 673 FUNC_START nesf2
 674 FUNC_START eqsf2
 675 ARM_FUNC_START cmpsf2
 676         mov     r3, #1                  @ how should we specify unordered here?
 677
 678 1:      @ Trap any INF/NAN first.
 679         mov     ip, #0xff000000
 680         and     r2, r1, ip, lsr #1
 681         teq     r2, ip, lsr #1
 682         and     r2, r0, ip, lsr #1
 683         teqne   r2, ip, lsr #1
 684         beq     3f
 685
 686         @ Test for equality.
 687         @ Note that 0.0 is equal to -0.0.
 688 2:      orr     r3, r0, r1
 689         bics    r3, r3, #0x80000000     @ either 0.0 or -0.0
 690         teqne   r0, r1                  @ or both the same
 691         moveq   r0, #0
 692         RETc(eq)
 693
 694         @ Check for sign difference.  The N flag is set if it is the case.
 695         @ If so, return sign of r0.
 696         movmi   r0, r0, asr #31
 697         orrmi   r0, r0, #1
 698         RETc(mi)
 699
 700         @ Compare exponents.
 701         and     r3, r1, ip, lsr #1
 702         cmp     r2, r3
 703
 704         @ Compare mantissa if exponents are equal
 705         moveq   r0, r0, lsl #9
 706         cmpeq   r0, r1, lsl #9
 707         movcs   r0, r1, asr #31
 708         mvncc   r0, r1, asr #31
 709         orr     r0, r0, #1
 710         RET
 711
 712         @ Look for a NAN.
 713 3:      and     r2, r1, ip, lsr #1
 714         teq     r2, ip, lsr #1
 715         bne     4f
 716         movs    r2, r1, lsl #9
 717         bne     5f                      @ r1 is NAN
 718 4:      and     r2, r0, ip, lsr #1
 719         teq     r2, ip, lsr #1
 720         bne     2b
 721         movs    ip, r0, lsl #9
 722         beq     2b                      @ r0 is not NAN
 723 5:      mov     r0, r3                  @ return unordered code from r3.
 724         RET
 725
 726
 727 ARM_FUNC_START unordsf2
 728         mov     ip, #0xff000000
 729         and     r2, r1, ip, lsr #1
 730         teq     r2, ip, lsr #1
 731         bne     1f
 732         movs    r2, r1, lsl #9
 733         bne     3f                      @ r1 is NAN
 734 1:      and     r2, r0, ip, lsr #1
 735         teq     r2, ip, lsr #1
 736         bne     2f
 737         movs    r2, r0, lsl #9
 738         bne     3f                      @ r0 is NAN
 739 2:      mov     r0, #0                  @ arguments are ordered.
 740         RET
 741 3:      mov     r0, #1                  @ arguments are unordered.
 742         RET
 743
 744
 745 ARM_FUNC_START fixsfsi
 746         movs    r0, r0, lsl #1
 747         RETc(eq)                        @ value is 0.
 748         @ preserve C flag (the actual sign)
 749 #ifdef __APCS_26__
 750         mov     r1, pc
 751 #else
 752         mrs     r1, cpsr
 753 #endif
 754
 755         @ check exponent range.
 756         and     r2, r0, #0xff000000
 757         cmp     r2, #(127 << 24)
 758         movcc   r0, #0                  @ value is too small
 759         RETc(cc)
 760         cmp     r2, #((127 + 31) << 24)
 761         bcs     1f                      @ value is too large
 762
 763         mov     r0, r0, lsl #7
 764         orr     r0, r0, #0x80000000
 765         mov     r2, r2, lsr #24
 766         rsb     r2, r2, #(127 + 31)
 767         mov     r0, r0, lsr r2
 768         tst     r1, #0x20000000         @ the sign bit
 769         rsbne   r0, r0, #0
 770         RET
 771
 772 1:      teq     r2, #0xff000000
 773         bne     2f
 774         movs    r0, r0, lsl #8
 775         bne     3f                      @ r0 is NAN.
 776 2:      tst     r1, #0x20000000         @ the sign bit
 777         moveq   r0, #0x7fffffff         @ the maximum signed positive si
 778         movne   r0, #0x80000000         @ the maximum signed negative si
 779         RET
 780
 781 3:      mov     r0, #0                  @ What should we convert NAN to?
 782         RET
 783
 784
 785 ARM_FUNC_START fixunssfsi
 786         movs    r0, r0, lsl #1
 787         RETc(eq)                        @ value is 0.
 788         movcs   r0, #0
 789         RETc(cs)                        @ value is negative.
 790
 791         @ check exponent range.
 792         and     r2, r0, #0xff000000
 793         cmp     r2, #(127 << 24)
 794         movcc   r0, #0                  @ value is too small
 795         RETc(cc)
 796         cmp     r2, #((127 + 32) << 24)
 797         bcs     1f                      @ value is too large
 798
 799         mov     r0, r0, lsl #7
 800         orr     r0, r0, #0x80000000
 801         mov     r2, r2, lsr #24
 802         rsb     r2, r2, #(127 + 31)
 803         mov     r0, r0, lsr r2
 804         RET
 805
 806 1:      teq     r2, #0xff000000
 807         bne     2f
 808         movs    r0, r0, lsl #8
 809         bne     3b                      @ r0 is NAN.
 810 2:      mov     r0, #0xffffffff         @ maximum unsigned si
 811         RET
 812
 813