gcc/config/arm/lib1funcs.asm

   1 @ libgcc routines for ARM cpu.
   2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
   3
   4 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007
   5    Free Software Foundation, Inc.
   6
   7 This file is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 2, or (at your option) any
  10 later version.
  11
  12 In addition to the permissions in the GNU General Public License, the
  13 Free Software Foundation gives you unlimited permission to link the
  14 compiled version of this file into combinations with other programs,
  15 and to distribute those combinations without any restriction coming
  16 from the use of this file.  (The General Public License restrictions
  17 do apply in other respects; for example, they cover modification of
  18 the file, and distribution when not linked into a combine
  19 executable.)
  20
  21 This file is distributed in the hope that it will be useful, but
  22 WITHOUT ANY WARRANTY; without even the implied warranty of
  23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  24 General Public License for more details.
  25
  26 You should have received a copy of the GNU General Public License
  27 along with this program; see the file COPYING.  If not, write to
  28 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  29 Boston, MA 02110-1301, USA.  */
  30
  31 /* An executable stack is *not* required for these functions.  */
  32 #if defined(__ELF__) && defined(__linux__)
  33 .section .note.GNU-stack,"",%progbits
  34 .previous
  35 #endif
  36
  37 /* ------------------------------------------------------------------------ */
  38
  39 /* We need to know what prefix to add to function names.  */
  40
  41 #ifndef __USER_LABEL_PREFIX__
  42 #error  __USER_LABEL_PREFIX__ not defined
  43 #endif
  44
  45 /* ANSI concatenation macros.  */
  46
  47 #define CONCAT1(a, b) CONCAT2(a, b)
  48 #define CONCAT2(a, b) a ## b
  49
  50 /* Use the right prefix for global labels.  */
  51
  52 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
  53
  54 #ifdef __ELF__
  55 #ifdef __thumb__
  56 #define __PLT__  /* Not supported in Thumb assembler (for now).  */
  57 #else
  58 #define __PLT__ (PLT)
  59 #endif
  60 #define TYPE(x) .type SYM(x),function
  61 #define SIZE(x) .size SYM(x), . - SYM(x)
  62 #define LSYM(x) .x
  63 #else
  64 #define __PLT__
  65 #define TYPE(x)
  66 #define SIZE(x)
  67 #define LSYM(x) x
  68 #endif
  69
  70 /* Function end macros.  Variants for interworking.  */
  71
  72 #if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
  73         || defined(__ARM_ARCH_4T__)
  74 /* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
  75    long multiply instructions.  That includes v3M.  */
  76 # define __ARM_ARCH__ 4
  77 #endif
  78
  79 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
  80         || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
  81         || defined(__ARM_ARCH_5TEJ__)
  82 # define __ARM_ARCH__ 5
  83 #endif
  84
  85 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
  86         || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
  87         || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__)
  88 # define __ARM_ARCH__ 6
  89 #endif
  90
  91 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
  92         || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)
  93 # define __ARM_ARCH__ 7
  94 #endif
  95
  96 #ifndef __ARM_ARCH__
  97 #error Unable to determine architecture.
  98 #endif
  99
 100 /* How to return from a function call depends on the architecture variant.  */
 101
 102 #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
 103
 104 # define RET            bx      lr
 105 # define RETc(x)        bx##x   lr
 106
 107 /* Special precautions for interworking on armv4t.  */
 108 # if (__ARM_ARCH__ == 4)
 109
 110 /* Always use bx, not ldr pc.  */
 111 #  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
 112 #    define __INTERWORKING__
 113 #   endif /* __THUMB__ || __THUMB_INTERWORK__ */
 114
 115 /* Include thumb stub before arm mode code.  */
 116 #  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
 117 #   define __INTERWORKING_STUBS__
 118 #  endif /* __thumb__ && !__THUMB_INTERWORK__ */
 119
 120 #endif /* __ARM_ARCH == 4 */
 121
 122 #else
 123
 124 # define RET            mov     pc, lr
 125 # define RETc(x)        mov##x  pc, lr
 126
 127 #endif
 128
 129 .macro  cfi_pop         advance, reg, cfa_offset
 130 #ifdef __ELF__
 131         .pushsection    .debug_frame
 132         .byte   0x4             /* DW_CFA_advance_loc4 */
 133         .4byte  \advance
 134         .byte   (0xc0 | \reg)   /* DW_CFA_restore */
 135         .byte   0xe             /* DW_CFA_def_cfa_offset */
 136         .uleb128 \cfa_offset
 137         .popsection
 138 #endif
 139 .endm
 140 .macro  cfi_push        advance, reg, offset, cfa_offset
 141 #ifdef __ELF__
 142         .pushsection    .debug_frame
 143         .byte   0x4             /* DW_CFA_advance_loc4 */
 144         .4byte  \advance
 145         .byte   (0x80 | \reg)   /* DW_CFA_offset */
 146         .uleb128 (\offset / -4)
 147         .byte   0xe             /* DW_CFA_def_cfa_offset */
 148         .uleb128 \cfa_offset
 149         .popsection
 150 #endif
 151 .endm
 152 .macro cfi_start        start_label, end_label
 153 #ifdef __ELF__
 154         .pushsection    .debug_frame
 155 LSYM(Lstart_frame):
 156         .4byte  LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
 157 LSYM(Lstart_cie):
 158         .4byte  0xffffffff      @ CIE Identifier Tag
 159         .byte   0x1     @ CIE Version
 160         .ascii  "\0"    @ CIE Augmentation
 161         .uleb128 0x1    @ CIE Code Alignment Factor
 162         .sleb128 -4     @ CIE Data Alignment Factor
 163         .byte   0xe     @ CIE RA Column
 164         .byte   0xc     @ DW_CFA_def_cfa
 165         .uleb128 0xd
 166         .uleb128 0x0
 167
 168         .align 2
 169 LSYM(Lend_cie):
 170         .4byte  LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
 171 LSYM(Lstart_fde):
 172         .4byte  LSYM(Lstart_frame)      @ FDE CIE offset
 173         .4byte  \start_label    @ FDE initial location
 174         .4byte  \end_label-\start_label @ FDE address range
 175         .popsection
 176 #endif
 177 .endm
 178 .macro cfi_end  end_label
 179 #ifdef __ELF__
 180         .pushsection    .debug_frame
 181         .align  2
 182 LSYM(Lend_fde):
 183         .popsection
 184 \end_label:
 185 #endif
 186 .endm
 187
 188 /* Don't pass dirn, it's there just to get token pasting right.  */
 189
 190 .macro  RETLDM  regs=, cond=, unwind=, dirn=ia
 191 #if defined (__INTERWORKING__)
 192         .ifc "\regs",""
 193         ldr\cond        lr, [sp], #8
 194         .else
 195 # if defined(__thumb2__)
 196         pop\cond        {\regs, lr}
 197 # else
 198         ldm\cond\dirn   sp!, {\regs, lr}
 199 # endif
 200         .endif
 201         .ifnc "\unwind", ""
 202         /* Mark LR as restored.  */
 203 97:     cfi_pop 97b - \unwind, 0xe, 0x0
 204         .endif
 205         bx\cond lr
 206 #else
 207         /* Caller is responsible for providing IT instruction.  */
 208         .ifc "\regs",""
 209         ldr\cond        pc, [sp], #8
 210         .else
 211 # if defined(__thumb2__)
 212         pop\cond        {\regs, pc}
 213 # else
 214         ldm\cond\dirn   sp!, {\regs, lr}
 215 # endif
 216         .endif
 217 #endif
 218 .endm
 219
 220 /* The Unified assembly syntax allows the same code to be assembled for both
 221    ARM and Thumb-2.  However this is only supported by recent gas, so define
 222    a set of macros to allow ARM code on older assemblers.  */
 223 #if defined(__thumb2__)
 224 .macro do_it cond, suffix=""
 225         it\suffix       \cond
 226 .endm
 227 .macro shift1 op, arg0, arg1, arg2
 228         \op     \arg0, \arg1, \arg2
 229 .endm
 230 #define do_push push
 231 #define do_pop  pop
 232 #define COND(op1, op2, cond) op1 ## op2 ## cond
 233 /* Perform an arithmetic operation with a variable shift operand.  This
 234    requires two instructions and a scratch register on Thumb-2.  */
 235 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
 236         \shiftop \tmp, \src2, \shiftreg
 237         \name \dest, \src1, \tmp
 238 .endm
 239 #else
 240 .macro do_it cond, suffix=""
 241 .endm
 242 .macro shift1 op, arg0, arg1, arg2
 243         mov     \arg0, \arg1, \op \arg2
 244 .endm
 245 #define do_push stmfd sp!,
 246 #define do_pop  ldmfd sp!,
 247 #define COND(op1, op2, cond) op1 ## cond ## op2
 248 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
 249         \name \dest, \src1, \src2, \shiftop \shiftreg
 250 .endm
 251 #endif
 252
 253 .macro ARM_LDIV0 name
 254         str     lr, [sp, #-8]!
 255 98:     cfi_push 98b - __\name, 0xe, -0x8, 0x8
 256         bl      SYM (__div0) __PLT__
 257         mov     r0, #0                  @ About as wrong as it could be.
 258         RETLDM  unwind=98b
 259 .endm
 260
 261
 262 .macro THUMB_LDIV0 name
 263         push    { r1, lr }
 264 98:     cfi_push 98b - __\name, 0xe, -0x4, 0x8
 265         bl      SYM (__div0)
 266         mov     r0, #0                  @ About as wrong as it could be.
 267 #if defined (__INTERWORKING__)
 268         pop     { r1, r2 }
 269         bx      r2
 270 #else
 271         pop     { r1, pc }
 272 #endif
 273 .endm
 274
 275 .macro FUNC_END name
 276         SIZE (__\name)
 277 .endm
 278
 279 .macro DIV_FUNC_END name
 280         cfi_start       __\name, LSYM(Lend_div0)
 281 LSYM(Ldiv0):
 282 #ifdef __thumb__
 283         THUMB_LDIV0 \name
 284 #else
 285         ARM_LDIV0 \name
 286 #endif
 287         cfi_end LSYM(Lend_div0)
 288         FUNC_END \name
 289 .endm
 290
 291 .macro THUMB_FUNC_START name
 292         .globl  SYM (\name)
 293         TYPE    (\name)
 294         .thumb_func
 295 SYM (\name):
 296 .endm
 297
 298 /* Function start macros.  Variants for ARM and Thumb.  */
 299
 300 #ifdef __thumb__
 301 #define THUMB_FUNC .thumb_func
 302 #define THUMB_CODE .force_thumb
 303 # if defined(__thumb2__)
 304 #define THUMB_SYNTAX .syntax divided
 305 # else
 306 #define THUMB_SYNTAX
 307 # endif
 308 #else
 309 #define THUMB_FUNC
 310 #define THUMB_CODE
 311 #define THUMB_SYNTAX
 312 #endif
 313
 314 .macro FUNC_START name
 315         .text
 316         .globl SYM (__\name)
 317         TYPE (__\name)
 318         .align 0
 319         THUMB_CODE
 320         THUMB_FUNC
 321         THUMB_SYNTAX
 322 SYM (__\name):
 323 .endm
 324
 325 /* Special function that will always be coded in ARM assembly, even if
 326    in Thumb-only compilation.  */
 327
 328 #if defined(__thumb2__)
 329
 330 /* For Thumb-2 we build everything in thumb mode.  */
 331 .macro ARM_FUNC_START name
 332        FUNC_START \name
 333        .syntax unified
 334 .endm
 335 #define EQUIV .thumb_set
 336 .macro  ARM_CALL name
 337         bl      __\name
 338 .endm
 339
 340 #elif defined(__INTERWORKING_STUBS__)
 341
 342 .macro  ARM_FUNC_START name
 343         FUNC_START \name
 344         bx      pc
 345         nop
 346         .arm
 347 /* A hook to tell gdb that we've switched to ARM mode.  Also used to call
 348    directly from other local arm routines.  */
 349 _L__\name:
 350 .endm
 351 #define EQUIV .thumb_set
 352 /* Branch directly to a function declared with ARM_FUNC_START.
 353    Must be called in arm mode.  */
 354 .macro  ARM_CALL name
 355         bl      _L__\name
 356 .endm
 357
 358 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
 359
 360 .macro  ARM_FUNC_START name
 361         .text
 362         .globl SYM (__\name)
 363         TYPE (__\name)
 364         .align 0
 365         .arm
 366 SYM (__\name):
 367 .endm
 368 #define EQUIV .set
 369 .macro  ARM_CALL name
 370         bl      __\name
 371 .endm
 372
 373 #endif
 374
 375 .macro  FUNC_ALIAS new old
 376         .globl  SYM (__\new)
 377 #if defined (__thumb__)
 378         .thumb_set      SYM (__\new), SYM (__\old)
 379 #else
 380         .set    SYM (__\new), SYM (__\old)
 381 #endif
 382 .endm
 383
 384 .macro  ARM_FUNC_ALIAS new old
 385         .globl  SYM (__\new)
 386         EQUIV   SYM (__\new), SYM (__\old)
 387 #if defined(__INTERWORKING_STUBS__)
 388         .set    SYM (_L__\new), SYM (_L__\old)
 389 #endif
 390 .endm
 391
 392 #ifdef __thumb__
 393 /* Register aliases.  */
 394
 395 work            .req    r4      @ XXXX is this safe ?
 396 dividend        .req    r0
 397 divisor         .req    r1
 398 overdone        .req    r2
 399 result          .req    r2
 400 curbit          .req    r3
 401 #endif
 402 #if 0
 403 ip              .req    r12
 404 sp              .req    r13
 405 lr              .req    r14
 406 pc              .req    r15
 407 #endif
 408
 409 /* ------------------------------------------------------------------------ */
 410 /*              Bodies of the division and modulo routines.                 */
 411 /* ------------------------------------------------------------------------ */
 412 .macro ARM_DIV_BODY dividend, divisor, result, curbit
 413
 414 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
 415
 416         clz     \curbit, \dividend
 417         clz     \result, \divisor
 418         sub     \curbit, \result, \curbit
 419         rsbs    \curbit, \curbit, #31
 420         addne   \curbit, \curbit, \curbit, lsl #1
 421         mov     \result, #0
 422         addne   pc, pc, \curbit, lsl #2
 423         nop
 424         .set    shift, 32
 425         .rept   32
 426         .set    shift, shift - 1
 427         cmp     \dividend, \divisor, lsl #shift
 428         adc     \result, \result, \result
 429         subcs   \dividend, \dividend, \divisor, lsl #shift
 430         .endr
 431
 432 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 433 #if __ARM_ARCH__ >= 5
 434
 435         clz     \curbit, \divisor
 436         clz     \result, \dividend
 437         sub     \result, \curbit, \result
 438         mov     \curbit, #1
 439         mov     \divisor, \divisor, lsl \result
 440         mov     \curbit, \curbit, lsl \result
 441         mov     \result, #0
 442
 443 #else /* __ARM_ARCH__ < 5 */
 444
 445         @ Initially shift the divisor left 3 bits if possible,
 446         @ set curbit accordingly.  This allows for curbit to be located
 447         @ at the left end of each 4 bit nibbles in the division loop
 448         @ to save one loop in most cases.
 449         tst     \divisor, #0xe0000000
 450         moveq   \divisor, \divisor, lsl #3
 451         moveq   \curbit, #8
 452         movne   \curbit, #1
 453
 454         @ Unless the divisor is very big, shift it up in multiples of
 455         @ four bits, since this is the amount of unwinding in the main
 456         @ division loop.  Continue shifting until the divisor is
 457         @ larger than the dividend.
 458 1:      cmp     \divisor, #0x10000000
 459         cmplo   \divisor, \dividend
 460         movlo   \divisor, \divisor, lsl #4
 461         movlo   \curbit, \curbit, lsl #4
 462         blo     1b
 463
 464         @ For very big divisors, we must shift it a bit at a time, or
 465         @ we will be in danger of overflowing.
 466 1:      cmp     \divisor, #0x80000000
 467         cmplo   \divisor, \dividend
 468         movlo   \divisor, \divisor, lsl #1
 469         movlo   \curbit, \curbit, lsl #1
 470         blo     1b
 471
 472         mov     \result, #0
 473
 474 #endif /* __ARM_ARCH__ < 5 */
 475
 476         @ Division loop
 477 1:      cmp     \dividend, \divisor
 478         subhs   \dividend, \dividend, \divisor
 479         orrhs   \result,   \result,   \curbit
 480         cmp     \dividend, \divisor,  lsr #1
 481         subhs   \dividend, \dividend, \divisor, lsr #1
 482         orrhs   \result,   \result,   \curbit,  lsr #1
 483         cmp     \dividend, \divisor,  lsr #2
 484         subhs   \dividend, \dividend, \divisor, lsr #2
 485         orrhs   \result,   \result,   \curbit,  lsr #2
 486         cmp     \dividend, \divisor,  lsr #3
 487         subhs   \dividend, \dividend, \divisor, lsr #3
 488         orrhs   \result,   \result,   \curbit,  lsr #3
 489         cmp     \dividend, #0                   @ Early termination?
 490         movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
 491         movne   \divisor,  \divisor, lsr #4
 492         bne     1b
 493
 494 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 495
 496 .endm
 497 /* ------------------------------------------------------------------------ */
 498 .macro ARM_DIV2_ORDER divisor, order
 499
 500 #if __ARM_ARCH__ >= 5
 501
 502         clz     \order, \divisor
 503         rsb     \order, \order, #31
 504
 505 #else
 506
 507         cmp     \divisor, #(1 << 16)
 508         movhs   \divisor, \divisor, lsr #16
 509         movhs   \order, #16
 510         movlo   \order, #0
 511
 512         cmp     \divisor, #(1 << 8)
 513         movhs   \divisor, \divisor, lsr #8
 514         addhs   \order, \order, #8
 515
 516         cmp     \divisor, #(1 << 4)
 517         movhs   \divisor, \divisor, lsr #4
 518         addhs   \order, \order, #4
 519
 520         cmp     \divisor, #(1 << 2)
 521         addhi   \order, \order, #3
 522         addls   \order, \order, \divisor, lsr #1
 523
 524 #endif
 525
 526 .endm
 527 /* ------------------------------------------------------------------------ */
 528 .macro ARM_MOD_BODY dividend, divisor, order, spare
 529
 530 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
 531
 532         clz     \order, \divisor
 533         clz     \spare, \dividend
 534         sub     \order, \order, \spare
 535         rsbs    \order, \order, #31
 536         addne   pc, pc, \order, lsl #3
 537         nop
 538         .set    shift, 32
 539         .rept   32
 540         .set    shift, shift - 1
 541         cmp     \dividend, \divisor, lsl #shift
 542         subcs   \dividend, \dividend, \divisor, lsl #shift
 543         .endr
 544
 545 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 546 #if __ARM_ARCH__ >= 5
 547
 548         clz     \order, \divisor
 549         clz     \spare, \dividend
 550         sub     \order, \order, \spare
 551         mov     \divisor, \divisor, lsl \order
 552
 553 #else /* __ARM_ARCH__ < 5 */
 554
 555         mov     \order, #0
 556
 557         @ Unless the divisor is very big, shift it up in multiples of
 558         @ four bits, since this is the amount of unwinding in the main
 559         @ division loop.  Continue shifting until the divisor is
 560         @ larger than the dividend.
 561 1:      cmp     \divisor, #0x10000000
 562         cmplo   \divisor, \dividend
 563         movlo   \divisor, \divisor, lsl #4
 564         addlo   \order, \order, #4
 565         blo     1b
 566
 567         @ For very big divisors, we must shift it a bit at a time, or
 568         @ we will be in danger of overflowing.
 569 1:      cmp     \divisor, #0x80000000
 570         cmplo   \divisor, \dividend
 571         movlo   \divisor, \divisor, lsl #1
 572         addlo   \order, \order, #1
 573         blo     1b
 574
 575 #endif /* __ARM_ARCH__ < 5 */
 576
 577         @ Perform all needed substractions to keep only the reminder.
 578         @ Do comparisons in batch of 4 first.
 579         subs    \order, \order, #3              @ yes, 3 is intended here
 580         blt     2f
 581
 582 1:      cmp     \dividend, \divisor
 583         subhs   \dividend, \dividend, \divisor
 584         cmp     \dividend, \divisor,  lsr #1
 585         subhs   \dividend, \dividend, \divisor, lsr #1
 586         cmp     \dividend, \divisor,  lsr #2
 587         subhs   \dividend, \dividend, \divisor, lsr #2
 588         cmp     \dividend, \divisor,  lsr #3
 589         subhs   \dividend, \dividend, \divisor, lsr #3
 590         cmp     \dividend, #1
 591         mov     \divisor, \divisor, lsr #4
 592         subges  \order, \order, #4
 593         bge     1b
 594
 595         tst     \order, #3
 596         teqne   \dividend, #0
 597         beq     5f
 598
 599         @ Either 1, 2 or 3 comparison/substractions are left.
 600 2:      cmn     \order, #2
 601         blt     4f
 602         beq     3f
 603         cmp     \dividend, \divisor
 604         subhs   \dividend, \dividend, \divisor
 605         mov     \divisor,  \divisor,  lsr #1
 606 3:      cmp     \dividend, \divisor
 607         subhs   \dividend, \dividend, \divisor
 608         mov     \divisor,  \divisor,  lsr #1
 609 4:      cmp     \dividend, \divisor
 610         subhs   \dividend, \dividend, \divisor
 611 5:
 612
 613 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 614
 615 .endm
 616 /* ------------------------------------------------------------------------ */
 617 .macro THUMB_DIV_MOD_BODY modulo
 618         @ Load the constant 0x10000000 into our work register.
 619         mov     work, #1
 620         lsl     work, #28
 621 LSYM(Loop1):
 622         @ Unless the divisor is very big, shift it up in multiples of
 623         @ four bits, since this is the amount of unwinding in the main
 624         @ division loop.  Continue shifting until the divisor is
 625         @ larger than the dividend.
 626         cmp     divisor, work
 627         bhs     LSYM(Lbignum)
 628         cmp     divisor, dividend
 629         bhs     LSYM(Lbignum)
 630         lsl     divisor, #4
 631         lsl     curbit,  #4
 632         b       LSYM(Loop1)
 633 LSYM(Lbignum):
 634         @ Set work to 0x80000000
 635         lsl     work, #3
 636 LSYM(Loop2):
 637         @ For very big divisors, we must shift it a bit at a time, or
 638         @ we will be in danger of overflowing.
 639         cmp     divisor, work
 640         bhs     LSYM(Loop3)
 641         cmp     divisor, dividend
 642         bhs     LSYM(Loop3)
 643         lsl     divisor, #1
 644         lsl     curbit,  #1
 645         b       LSYM(Loop2)
 646 LSYM(Loop3):
 647         @ Test for possible subtractions ...
 648   .if \modulo
 649         @ ... On the final pass, this may subtract too much from the dividend,
 650         @ so keep track of which subtractions are done, we can fix them up
 651         @ afterwards.
 652         mov     overdone, #0
 653         cmp     dividend, divisor
 654         blo     LSYM(Lover1)
 655         sub     dividend, dividend, divisor
 656 LSYM(Lover1):
 657         lsr     work, divisor, #1
 658         cmp     dividend, work
 659         blo     LSYM(Lover2)
 660         sub     dividend, dividend, work
 661         mov     ip, curbit
 662         mov     work, #1
 663         ror     curbit, work
 664         orr     overdone, curbit
 665         mov     curbit, ip
 666 LSYM(Lover2):
 667         lsr     work, divisor, #2
 668         cmp     dividend, work
 669         blo     LSYM(Lover3)
 670         sub     dividend, dividend, work
 671         mov     ip, curbit
 672         mov     work, #2
 673         ror     curbit, work
 674         orr     overdone, curbit
 675         mov     curbit, ip
 676 LSYM(Lover3):
 677         lsr     work, divisor, #3
 678         cmp     dividend, work
 679         blo     LSYM(Lover4)
 680         sub     dividend, dividend, work
 681         mov     ip, curbit
 682         mov     work, #3
 683         ror     curbit, work
 684         orr     overdone, curbit
 685         mov     curbit, ip
 686 LSYM(Lover4):
 687         mov     ip, curbit
 688   .else
 689         @ ... and note which bits are done in the result.  On the final pass,
 690         @ this may subtract too much from the dividend, but the result will be ok,
 691         @ since the "bit" will have been shifted out at the bottom.
 692         cmp     dividend, divisor
 693         blo     LSYM(Lover1)
 694         sub     dividend, dividend, divisor
 695         orr     result, result, curbit
 696 LSYM(Lover1):
 697         lsr     work, divisor, #1
 698         cmp     dividend, work
 699         blo     LSYM(Lover2)
 700         sub     dividend, dividend, work
 701         lsr     work, curbit, #1
 702         orr     result, work
 703 LSYM(Lover2):
 704         lsr     work, divisor, #2
 705         cmp     dividend, work
 706         blo     LSYM(Lover3)
 707         sub     dividend, dividend, work
 708         lsr     work, curbit, #2
 709         orr     result, work
 710 LSYM(Lover3):
 711         lsr     work, divisor, #3
 712         cmp     dividend, work
 713         blo     LSYM(Lover4)
 714         sub     dividend, dividend, work
 715         lsr     work, curbit, #3
 716         orr     result, work
 717 LSYM(Lover4):
 718   .endif
 719
 720         cmp     dividend, #0                    @ Early termination?
 721         beq     LSYM(Lover5)
 722         lsr     curbit,  #4                     @ No, any more bits to do?
 723         beq     LSYM(Lover5)
 724         lsr     divisor, #4
 725         b       LSYM(Loop3)
 726 LSYM(Lover5):
 727   .if \modulo
 728         @ Any subtractions that we should not have done will be recorded in
 729         @ the top three bits of "overdone".  Exactly which were not needed
 730         @ are governed by the position of the bit, stored in ip.
 731         mov     work, #0xe
 732         lsl     work, #28
 733         and     overdone, work
 734         beq     LSYM(Lgot_result)
 735
 736         @ If we terminated early, because dividend became zero, then the
 737         @ bit in ip will not be in the bottom nibble, and we should not
 738         @ perform the additions below.  We must test for this though
 739         @ (rather relying upon the TSTs to prevent the additions) since
 740         @ the bit in ip could be in the top two bits which might then match
 741         @ with one of the smaller RORs.
 742         mov     curbit, ip
 743         mov     work, #0x7
 744         tst     curbit, work
 745         beq     LSYM(Lgot_result)
 746
 747         mov     curbit, ip
 748         mov     work, #3
 749         ror     curbit, work
 750         tst     overdone, curbit
 751         beq     LSYM(Lover6)
 752         lsr     work, divisor, #3
 753         add     dividend, work
 754 LSYM(Lover6):
 755         mov     curbit, ip
 756         mov     work, #2
 757         ror     curbit, work
 758         tst     overdone, curbit
 759         beq     LSYM(Lover7)
 760         lsr     work, divisor, #2
 761         add     dividend, work
 762 LSYM(Lover7):
 763         mov     curbit, ip
 764         mov     work, #1
 765         ror     curbit, work
 766         tst     overdone, curbit
 767         beq     LSYM(Lgot_result)
 768         lsr     work, divisor, #1
 769         add     dividend, work
 770   .endif
 771 LSYM(Lgot_result):
 772 .endm
 773 /* ------------------------------------------------------------------------ */
 774 /*              Start of the Real Functions                                 */
 775 /* ------------------------------------------------------------------------ */
 776 #ifdef L_udivsi3
 777
 778         FUNC_START udivsi3
 779         FUNC_ALIAS aeabi_uidiv udivsi3
 780
 781 #ifdef __thumb__
 782
 783         cmp     divisor, #0
 784         beq     LSYM(Ldiv0)
 785         mov     curbit, #1
 786         mov     result, #0
 787
 788         push    { work }
 789         cmp     dividend, divisor
 790         blo     LSYM(Lgot_result)
 791
 792         THUMB_DIV_MOD_BODY 0
 793
 794         mov     r0, result
 795         pop     { work }
 796         RET
 797
 798 #else /* ARM version.  */
 799
 800         subs    r2, r1, #1
 801         RETc(eq)
 802         bcc     LSYM(Ldiv0)
 803         cmp     r0, r1
 804         bls     11f
 805         tst     r1, r2
 806         beq     12f
 807
 808         ARM_DIV_BODY r0, r1, r2, r3
 809
 810         mov     r0, r2
 811         RET
 812
 813 11:     moveq   r0, #1
 814         movne   r0, #0
 815         RET
 816
 817 12:     ARM_DIV2_ORDER r1, r2
 818
 819         mov     r0, r0, lsr r2
 820         RET
 821
 822 #endif /* ARM version */
 823
 824         DIV_FUNC_END udivsi3
 825
 826 FUNC_START aeabi_uidivmod
 827 #ifdef __thumb__
 828         push    {r0, r1, lr}
 829         bl      SYM(__udivsi3)
 830         POP     {r1, r2, r3}
 831         mul     r2, r0
 832         sub     r1, r1, r2
 833         bx      r3
 834 #else
 835         stmfd   sp!, { r0, r1, lr }
 836         bl      SYM(__udivsi3)
 837         ldmfd   sp!, { r1, r2, lr }
 838         mul     r3, r2, r0
 839         sub     r1, r1, r3
 840         RET
 841 #endif
 842         FUNC_END aeabi_uidivmod
 843
 844 #endif /* L_udivsi3 */
 845 /* ------------------------------------------------------------------------ */
 846 #ifdef L_umodsi3
 847
 848         FUNC_START umodsi3
 849
 850 #ifdef __thumb__
 851
 852         cmp     divisor, #0
 853         beq     LSYM(Ldiv0)
 854         mov     curbit, #1
 855         cmp     dividend, divisor
 856         bhs     LSYM(Lover10)
 857         RET
 858
 859 LSYM(Lover10):
 860         push    { work }
 861
 862         THUMB_DIV_MOD_BODY 1
 863
 864         pop     { work }
 865         RET
 866
 867 #else  /* ARM version.  */
 868
 869         subs    r2, r1, #1                      @ compare divisor with 1
 870         bcc     LSYM(Ldiv0)
 871         cmpne   r0, r1                          @ compare dividend with divisor
 872         moveq   r0, #0
 873         tsthi   r1, r2                          @ see if divisor is power of 2
 874         andeq   r0, r0, r2
 875         RETc(ls)
 876
 877         ARM_MOD_BODY r0, r1, r2, r3
 878
 879         RET
 880
 881 #endif /* ARM version.  */
 882
 883         DIV_FUNC_END umodsi3
 884
 885 #endif /* L_umodsi3 */
 886 /* ------------------------------------------------------------------------ */
 887 #ifdef L_divsi3
 888
 889         FUNC_START divsi3
 890         FUNC_ALIAS aeabi_idiv divsi3
 891
 892 #ifdef __thumb__
 893         cmp     divisor, #0
 894         beq     LSYM(Ldiv0)
 895
 896         push    { work }
 897         mov     work, dividend
 898         eor     work, divisor           @ Save the sign of the result.
 899         mov     ip, work
 900         mov     curbit, #1
 901         mov     result, #0
 902         cmp     divisor, #0
 903         bpl     LSYM(Lover10)
 904         neg     divisor, divisor        @ Loops below use unsigned.
 905 LSYM(Lover10):
 906         cmp     dividend, #0
 907         bpl     LSYM(Lover11)
 908         neg     dividend, dividend
 909 LSYM(Lover11):
 910         cmp     dividend, divisor
 911         blo     LSYM(Lgot_result)
 912
 913         THUMB_DIV_MOD_BODY 0
 914
 915         mov     r0, result
 916         mov     work, ip
 917         cmp     work, #0
 918         bpl     LSYM(Lover12)
 919         neg     r0, r0
 920 LSYM(Lover12):
 921         pop     { work }
 922         RET
 923
 924 #else /* ARM version.  */
 925
 926         cmp     r1, #0
 927         eor     ip, r0, r1                      @ save the sign of the result.
 928         beq     LSYM(Ldiv0)
 929         rsbmi   r1, r1, #0                      @ loops below use unsigned.
 930         subs    r2, r1, #1                      @ division by 1 or -1 ?
 931         beq     10f
 932         movs    r3, r0
 933         rsbmi   r3, r0, #0                      @ positive dividend value
 934         cmp     r3, r1
 935         bls     11f
 936         tst     r1, r2                          @ divisor is power of 2 ?
 937         beq     12f
 938
 939         ARM_DIV_BODY r3, r1, r0, r2
 940
 941         cmp     ip, #0
 942         rsbmi   r0, r0, #0
 943         RET
 944
 945 10:     teq     ip, r0                          @ same sign ?
 946         rsbmi   r0, r0, #0
 947         RET
 948
 949 11:     movlo   r0, #0
 950         moveq   r0, ip, asr #31
 951         orreq   r0, r0, #1
 952         RET
 953
 954 12:     ARM_DIV2_ORDER r1, r2
 955
 956         cmp     ip, #0
 957         mov     r0, r3, lsr r2
 958         rsbmi   r0, r0, #0
 959         RET
 960
 961 #endif /* ARM version */
 962
 963         DIV_FUNC_END divsi3
 964
 965 FUNC_START aeabi_idivmod
 966 #ifdef __thumb__
 967         push    {r0, r1, lr}
 968         bl      SYM(__divsi3)
 969         POP     {r1, r2, r3}
 970         mul     r2, r0
 971         sub     r1, r1, r2
 972         bx      r3
 973 #else
 974         stmfd   sp!, { r0, r1, lr }
 975         bl      SYM(__divsi3)
 976         ldmfd   sp!, { r1, r2, lr }
 977         mul     r3, r2, r0
 978         sub     r1, r1, r3
 979         RET
 980 #endif
 981         FUNC_END aeabi_idivmod
 982
 983 #endif /* L_divsi3 */
 984 /* ------------------------------------------------------------------------ */
 985 #ifdef L_modsi3
 986
 987         FUNC_START modsi3
 988
 989 #ifdef __thumb__
 990
 991         mov     curbit, #1
 992         cmp     divisor, #0
 993         beq     LSYM(Ldiv0)
 994         bpl     LSYM(Lover10)
 995         neg     divisor, divisor                @ Loops below use unsigned.
 996 LSYM(Lover10):
 997         push    { work }
 998         @ Need to save the sign of the dividend, unfortunately, we need
 999         @ work later on.  Must do this after saving the original value of
1000         @ the work register, because we will pop this value off first.
1001         push    { dividend }
1002         cmp     dividend, #0
1003         bpl     LSYM(Lover11)
1004         neg     dividend, dividend
1005 LSYM(Lover11):
1006         cmp     dividend, divisor
1007         blo     LSYM(Lgot_result)
1008
1009         THUMB_DIV_MOD_BODY 1
1010
1011         pop     { work }
1012         cmp     work, #0
1013         bpl     LSYM(Lover12)
1014         neg     dividend, dividend
1015 LSYM(Lover12):
1016         pop     { work }
1017         RET
1018
1019 #else /* ARM version.  */
1020
1021         cmp     r1, #0
1022         beq     LSYM(Ldiv0)
1023         rsbmi   r1, r1, #0                      @ loops below use unsigned.
1024         movs    ip, r0                          @ preserve sign of dividend
1025         rsbmi   r0, r0, #0                      @ if negative make positive
1026         subs    r2, r1, #1                      @ compare divisor with 1
1027         cmpne   r0, r1                          @ compare dividend with divisor
1028         moveq   r0, #0
1029         tsthi   r1, r2                          @ see if divisor is power of 2
1030         andeq   r0, r0, r2
1031         bls     10f
1032
1033         ARM_MOD_BODY r0, r1, r2, r3
1034
1035 10:     cmp     ip, #0
1036         rsbmi   r0, r0, #0
1037         RET
1038
1039 #endif /* ARM version */
1040
1041         DIV_FUNC_END modsi3
1042
1043 #endif /* L_modsi3 */
1044 /* ------------------------------------------------------------------------ */
1045 #ifdef L_dvmd_tls
1046
1047         FUNC_START div0
1048         FUNC_ALIAS aeabi_idiv0 div0
1049         FUNC_ALIAS aeabi_ldiv0 div0
1050
1051         RET
1052
1053         FUNC_END aeabi_ldiv0
1054         FUNC_END aeabi_idiv0
1055         FUNC_END div0
1056
1057 #endif /* L_divmodsi_tools */
1058 /* ------------------------------------------------------------------------ */
1059 #ifdef L_dvmd_lnx
1060 @ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
1061
1062 /* Constant taken from <asm/signal.h>.  */
1063 #define SIGFPE  8
1064
1065         .code   32
1066         FUNC_START div0
1067
1068         stmfd   sp!, {r1, lr}
1069         mov     r0, #SIGFPE
1070         bl      SYM(raise) __PLT__
1071         RETLDM  r1
1072
1073         FUNC_END div0
1074
1075 #endif /* L_dvmd_lnx */
1076 /* ------------------------------------------------------------------------ */
1077 /* Dword shift operations.  */
1078 /* All the following Dword shift variants rely on the fact that
1079         shft xxx, Reg
1080    is in fact done as
1081         shft xxx, (Reg & 255)
1082    so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1083    case of logical shifts) or the sign (for asr).  */
1084
1085 #ifdef __ARMEB__
1086 #define al      r1
1087 #define ah      r0
1088 #else
1089 #define al      r0
1090 #define ah      r1
1091 #endif
1092
1093 /* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
1094 #ifndef __symbian__
1095
1096 #ifdef L_lshrdi3
1097
1098         FUNC_START lshrdi3
1099         FUNC_ALIAS aeabi_llsr lshrdi3
1100
1101 #ifdef __thumb__
1102         lsr     al, r2
1103         mov     r3, ah
1104         lsr     ah, r2
1105         mov     ip, r3
1106         sub     r2, #32
1107         lsr     r3, r2
1108         orr     al, r3
1109         neg     r2, r2
1110         mov     r3, ip
1111         lsl     r3, r2
1112         orr     al, r3
1113         RET
1114 #else
1115         subs    r3, r2, #32
1116         rsb     ip, r2, #32
1117         movmi   al, al, lsr r2
1118         movpl   al, ah, lsr r3
1119         orrmi   al, al, ah, lsl ip
1120         mov     ah, ah, lsr r2
1121         RET
1122 #endif
1123         FUNC_END aeabi_llsr
1124         FUNC_END lshrdi3
1125
1126 #endif
1127
1128 #ifdef L_ashrdi3
1129
1130         FUNC_START ashrdi3
1131         FUNC_ALIAS aeabi_lasr ashrdi3
1132
1133 #ifdef __thumb__
1134         lsr     al, r2
1135         mov     r3, ah
1136         asr     ah, r2
1137         sub     r2, #32
1138         @ If r2 is negative at this point the following step would OR
1139         @ the sign bit into all of AL.  That's not what we want...
1140         bmi     1f
1141         mov     ip, r3
1142         asr     r3, r2
1143         orr     al, r3
1144         mov     r3, ip
1145 1:
1146         neg     r2, r2
1147         lsl     r3, r2
1148         orr     al, r3
1149         RET
1150 #else
1151         subs    r3, r2, #32
1152         rsb     ip, r2, #32
1153         movmi   al, al, lsr r2
1154         movpl   al, ah, asr r3
1155         orrmi   al, al, ah, lsl ip
1156         mov     ah, ah, asr r2
1157         RET
1158 #endif
1159
1160         FUNC_END aeabi_lasr
1161         FUNC_END ashrdi3
1162
1163 #endif
1164
1165 #ifdef L_ashldi3
1166
1167         FUNC_START ashldi3
1168         FUNC_ALIAS aeabi_llsl ashldi3
1169
1170 #ifdef __thumb__
1171         lsl     ah, r2
1172         mov     r3, al
1173         lsl     al, r2
1174         mov     ip, r3
1175         sub     r2, #32
1176         lsl     r3, r2
1177         orr     ah, r3
1178         neg     r2, r2
1179         mov     r3, ip
1180         lsr     r3, r2
1181         orr     ah, r3
1182         RET
1183 #else
1184         subs    r3, r2, #32
1185         rsb     ip, r2, #32
1186         movmi   ah, ah, lsl r2
1187         movpl   ah, al, lsl r3
1188         orrmi   ah, ah, al, lsr ip
1189         mov     al, al, lsl r2
1190         RET
1191 #endif
1192         FUNC_END aeabi_llsl
1193         FUNC_END ashldi3
1194
1195 #endif
1196
1197 #endif /* __symbian__ */
1198
1199 /* ------------------------------------------------------------------------ */
1200 /* These next two sections are here despite the fact that they contain Thumb
1201    assembler because their presence allows interworked code to be linked even
1202    when the GCC library is this one.  */
1203
1204 /* Do not build the interworking functions when the target architecture does
1205    not support Thumb instructions.  (This can be a multilib option).  */
1206 #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1207       || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1208       || __ARM_ARCH__ >= 6
1209
1210 #if defined L_call_via_rX
1211
1212 /* These labels & instructions are used by the Arm/Thumb interworking code.
1213    The address of function to be called is loaded into a register and then
1214    one of these labels is called via a BL instruction.  This puts the
1215    return address into the link register with the bottom bit set, and the
1216    code here switches to the correct mode before executing the function.  */
1217
1218         .text
1219         .align 0
1220         .force_thumb
1221
1222 .macro call_via register
1223         THUMB_FUNC_START _call_via_\register
1224
1225         bx      \register
1226         nop
1227
1228         SIZE    (_call_via_\register)
1229 .endm
1230
1231         call_via r0
1232         call_via r1
1233         call_via r2
1234         call_via r3
1235         call_via r4
1236         call_via r5
1237         call_via r6
1238         call_via r7
1239         call_via r8
1240         call_via r9
1241         call_via sl
1242         call_via fp
1243         call_via ip
1244         call_via sp
1245         call_via lr
1246
1247 #endif /* L_call_via_rX */
1248
1249 /* Don't bother with the old interworking routines for Thumb-2.  */
1250 /* ??? Maybe only omit these on v7m.  */
1251 #ifndef __thumb2__
1252
1253 #if defined L_interwork_call_via_rX
1254
1255 /* These labels & instructions are used by the Arm/Thumb interworking code,
1256    when the target address is in an unknown instruction set.  The address
1257    of function to be called is loaded into a register and then one of these
1258    labels is called via a BL instruction.  This puts the return address
1259    into the link register with the bottom bit set, and the code here
1260    switches to the correct mode before executing the function.  Unfortunately
1261    the target code cannot be relied upon to return via a BX instruction, so
1262    instead we have to store the resturn address on the stack and allow the
1263    called function to return here instead.  Upon return we recover the real
1264    return address and use a BX to get back to Thumb mode.
1265
1266    There are three variations of this code.  The first,
1267    _interwork_call_via_rN(), will push the return address onto the
1268    stack and pop it in _arm_return().  It should only be used if all
1269    arguments are passed in registers.
1270
1271    The second, _interwork_r7_call_via_rN(), instead stores the return
1272    address at [r7, #-4].  It is the caller's responsibility to ensure
1273    that this address is valid and contains no useful data.
1274
1275    The third, _interwork_r11_call_via_rN(), works in the same way but
1276    uses r11 instead of r7.  It is useful if the caller does not really
1277    need a frame pointer.  */
1278
1279         .text
1280         .align 0
1281
1282         .code   32
1283         .globl _arm_return
1284 LSYM(Lstart_arm_return):
1285         cfi_start       LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1286         cfi_push        0, 0xe, -0x8, 0x8
1287         nop     @ This nop is for the benefit of debuggers, so that
1288                 @ backtraces will use the correct unwind information.
1289 _arm_return:
1290         RETLDM  unwind=LSYM(Lstart_arm_return)
1291         cfi_end LSYM(Lend_arm_return)
1292
1293         .globl _arm_return_r7
1294 _arm_return_r7:
1295         ldr     lr, [r7, #-4]
1296         bx      lr
1297
1298         .globl _arm_return_r11
1299 _arm_return_r11:
1300         ldr     lr, [r11, #-4]
1301         bx      lr
1302
1303 .macro interwork_with_frame frame, register, name, return
1304         .code   16
1305
1306         THUMB_FUNC_START \name
1307
1308         bx      pc
1309         nop
1310
1311         .code   32
1312         tst     \register, #1
1313         streq   lr, [\frame, #-4]
1314         adreq   lr, _arm_return_\frame
1315         bx      \register
1316
1317         SIZE    (\name)
1318 .endm
1319
1320 .macro interwork register
1321         .code   16
1322
1323         THUMB_FUNC_START _interwork_call_via_\register
1324
1325         bx      pc
1326         nop
1327
1328         .code   32
1329         .globl LSYM(Lchange_\register)
1330 LSYM(Lchange_\register):
1331         tst     \register, #1
1332         streq   lr, [sp, #-8]!
1333         adreq   lr, _arm_return
1334         bx      \register
1335
1336         SIZE    (_interwork_call_via_\register)
1337
1338         interwork_with_frame r7,\register,_interwork_r7_call_via_\register
1339         interwork_with_frame r11,\register,_interwork_r11_call_via_\register
1340 .endm
1341
1342         interwork r0
1343         interwork r1
1344         interwork r2
1345         interwork r3
1346         interwork r4
1347         interwork r5
1348         interwork r6
1349         interwork r7
1350         interwork r8
1351         interwork r9
1352         interwork sl
1353         interwork fp
1354         interwork ip
1355         interwork sp
1356
1357         /* The LR case has to be handled a little differently...  */
1358         .code 16
1359
1360         THUMB_FUNC_START _interwork_call_via_lr
1361
1362         bx      pc
1363         nop
1364
1365         .code 32
1366         .globl .Lchange_lr
1367 .Lchange_lr:
1368         tst     lr, #1
1369         stmeqdb r13!, {lr, pc}
1370         mov     ip, lr
1371         adreq   lr, _arm_return
1372         bx      ip
1373
1374         SIZE    (_interwork_call_via_lr)
1375
1376 #endif /* L_interwork_call_via_rX */
1377 #endif /* !__thumb2__ */
1378 #endif /* Arch supports thumb.  */
1379
1380 #ifndef __symbian__
1381 #include "ieee754-df.S"
1382 #include "ieee754-sf.S"
1383 #include "bpabi.S"
1384 #endif /* __symbian__ */