libgcc/config/arm/lib1funcs.S

   1 @ libgcc routines for ARM cpu.
   2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
   3
   4 /* Copyright (C) 1995-2013 Free Software Foundation, Inc.
   5
   6 This file is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 This file is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 Under Section 7 of GPL version 3, you are granted additional
  17 permissions described in the GCC Runtime Library Exception, version
  18 3.1, as published by the Free Software Foundation.
  19
  20 You should have received a copy of the GNU General Public License and
  21 a copy of the GCC Runtime Library Exception along with this program;
  22 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23 <http://www.gnu.org/licenses/>.  */
  24
  25 /* An executable stack is *not* required for these functions.  */
  26 #if defined(__ELF__) && defined(__linux__)
  27 .section .note.GNU-stack,"",%progbits
  28 .previous
  29 #endif  /* __ELF__ and __linux__ */
  30
  31 #ifdef __ARM_EABI__
  32 /* Some attributes that are common to all routines in this file.  */
  33         /* Tag_ABI_align_needed: This code does not require 8-byte
  34            alignment from the caller.  */
  35         /* .eabi_attribute 24, 0  -- default setting.  */
  36         /* Tag_ABI_align_preserved: This code preserves 8-byte
  37            alignment in any callee.  */
  38         .eabi_attribute 25, 1
  39 #endif /* __ARM_EABI__ */
  40 /* ------------------------------------------------------------------------ */
  41
  42 /* We need to know what prefix to add to function names.  */
  43
  44 #ifndef __USER_LABEL_PREFIX__
  45 #error  __USER_LABEL_PREFIX__ not defined
  46 #endif
  47
  48 /* ANSI concatenation macros.  */
  49
  50 #define CONCAT1(a, b) CONCAT2(a, b)
  51 #define CONCAT2(a, b) a ## b
  52
  53 /* Use the right prefix for global labels.  */
  54
  55 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
  56
  57 #ifdef __ELF__
  58 #ifdef __thumb__
  59 #define __PLT__  /* Not supported in Thumb assembler (for now).  */
  60 #elif defined __vxworks && !defined __PIC__
  61 #define __PLT__ /* Not supported by the kernel loader.  */
  62 #else
  63 #define __PLT__ (PLT)
  64 #endif
  65 #define TYPE(x) .type SYM(x),function
  66 #define SIZE(x) .size SYM(x), . - SYM(x)
  67 #define LSYM(x) .x
  68 #else
  69 #define __PLT__
  70 #define TYPE(x)
  71 #define SIZE(x)
  72 #define LSYM(x) x
  73 #endif
  74
  75 /* Function end macros.  Variants for interworking.  */
  76
  77 #if defined(__ARM_ARCH_2__)
  78 # define __ARM_ARCH__ 2
  79 #endif
  80
  81 #if defined(__ARM_ARCH_3__)
  82 # define __ARM_ARCH__ 3
  83 #endif
  84
  85 #if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
  86         || defined(__ARM_ARCH_4T__)
  87 /* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
  88    long multiply instructions.  That includes v3M.  */
  89 # define __ARM_ARCH__ 4
  90 #endif
  91
  92 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
  93         || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
  94         || defined(__ARM_ARCH_5TEJ__)
  95 # define __ARM_ARCH__ 5
  96 #endif
  97
  98 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
  99         || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
 100         || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
 101         || defined(__ARM_ARCH_6M__)
 102 # define __ARM_ARCH__ 6
 103 #endif
 104
 105 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
 106         || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
 107         || defined(__ARM_ARCH_7EM__)
 108 # define __ARM_ARCH__ 7
 109 #endif
 110
 111 #if defined(__ARM_ARCH_8A__)
 112 # define __ARM_ARCH__ 8
 113 #endif
 114
 115 #ifndef __ARM_ARCH__
 116 #error Unable to determine architecture.
 117 #endif
 118
 119 /* There are times when we might prefer Thumb1 code even if ARM code is
 120    permitted, for example, the code might be smaller, or there might be
 121    interworking problems with switching to ARM state if interworking is
 122    disabled.  */
 123 #if (defined(__thumb__)                 \
 124      && !defined(__thumb2__)            \
 125      && (!defined(__THUMB_INTERWORK__)  \
 126          || defined (__OPTIMIZE_SIZE__) \
 127          || defined(__ARM_ARCH_6M__)))
 128 # define __prefer_thumb__
 129 #endif
 130
 131 /* How to return from a function call depends on the architecture variant.  */
 132
 133 #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
 134
 135 # define RET            bx      lr
 136 # define RETc(x)        bx##x   lr
 137
 138 /* Special precautions for interworking on armv4t.  */
 139 # if (__ARM_ARCH__ == 4)
 140
 141 /* Always use bx, not ldr pc.  */
 142 #  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
 143 #    define __INTERWORKING__
 144 #   endif /* __THUMB__ || __THUMB_INTERWORK__ */
 145
 146 /* Include thumb stub before arm mode code.  */
 147 #  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
 148 #   define __INTERWORKING_STUBS__
 149 #  endif /* __thumb__ && !__THUMB_INTERWORK__ */
 150
 151 #endif /* __ARM_ARCH == 4 */
 152
 153 #else
 154
 155 # define RET            mov     pc, lr
 156 # define RETc(x)        mov##x  pc, lr
 157
 158 #endif
 159
 160 .macro  cfi_pop         advance, reg, cfa_offset
 161 #ifdef __ELF__
 162         .pushsection    .debug_frame
 163         .byte   0x4             /* DW_CFA_advance_loc4 */
 164         .4byte  \advance
 165         .byte   (0xc0 | \reg)   /* DW_CFA_restore */
 166         .byte   0xe             /* DW_CFA_def_cfa_offset */
 167         .uleb128 \cfa_offset
 168         .popsection
 169 #endif
 170 .endm
 171 .macro  cfi_push        advance, reg, offset, cfa_offset
 172 #ifdef __ELF__
 173         .pushsection    .debug_frame
 174         .byte   0x4             /* DW_CFA_advance_loc4 */
 175         .4byte  \advance
 176         .byte   (0x80 | \reg)   /* DW_CFA_offset */
 177         .uleb128 (\offset / -4)
 178         .byte   0xe             /* DW_CFA_def_cfa_offset */
 179         .uleb128 \cfa_offset
 180         .popsection
 181 #endif
 182 .endm
 183 .macro cfi_start        start_label, end_label
 184 #ifdef __ELF__
 185         .pushsection    .debug_frame
 186 LSYM(Lstart_frame):
 187         .4byte  LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
 188 LSYM(Lstart_cie):
 189         .4byte  0xffffffff      @ CIE Identifier Tag
 190         .byte   0x1     @ CIE Version
 191         .ascii  "\0"    @ CIE Augmentation
 192         .uleb128 0x1    @ CIE Code Alignment Factor
 193         .sleb128 -4     @ CIE Data Alignment Factor
 194         .byte   0xe     @ CIE RA Column
 195         .byte   0xc     @ DW_CFA_def_cfa
 196         .uleb128 0xd
 197         .uleb128 0x0
 198
 199         .align 2
 200 LSYM(Lend_cie):
 201         .4byte  LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
 202 LSYM(Lstart_fde):
 203         .4byte  LSYM(Lstart_frame)      @ FDE CIE offset
 204         .4byte  \start_label    @ FDE initial location
 205         .4byte  \end_label-\start_label @ FDE address range
 206         .popsection
 207 #endif
 208 .endm
 209 .macro cfi_end  end_label
 210 #ifdef __ELF__
 211         .pushsection    .debug_frame
 212         .align  2
 213 LSYM(Lend_fde):
 214         .popsection
 215 \end_label:
 216 #endif
 217 .endm
 218
 219 /* Don't pass dirn, it's there just to get token pasting right.  */
 220
 221 .macro  RETLDM  regs=, cond=, unwind=, dirn=ia
 222 #if defined (__INTERWORKING__)
 223         .ifc "\regs",""
 224         ldr\cond        lr, [sp], #8
 225         .else
 226 # if defined(__thumb2__)
 227         pop\cond        {\regs, lr}
 228 # else
 229         ldm\cond\dirn   sp!, {\regs, lr}
 230 # endif
 231         .endif
 232         .ifnc "\unwind", ""
 233         /* Mark LR as restored.  */
 234 97:     cfi_pop 97b - \unwind, 0xe, 0x0
 235         .endif
 236         bx\cond lr
 237 #else
 238         /* Caller is responsible for providing IT instruction.  */
 239         .ifc "\regs",""
 240         ldr\cond        pc, [sp], #8
 241         .else
 242 # if defined(__thumb2__)
 243         pop\cond        {\regs, pc}
 244 # else
 245         ldm\cond\dirn   sp!, {\regs, pc}
 246 # endif
 247         .endif
 248 #endif
 249 .endm
 250
 251 /* The Unified assembly syntax allows the same code to be assembled for both
 252    ARM and Thumb-2.  However this is only supported by recent gas, so define
 253    a set of macros to allow ARM code on older assemblers.  */
 254 #if defined(__thumb2__)
 255 .macro do_it cond, suffix=""
 256         it\suffix       \cond
 257 .endm
 258 .macro shift1 op, arg0, arg1, arg2
 259         \op     \arg0, \arg1, \arg2
 260 .endm
 261 #define do_push push
 262 #define do_pop  pop
 263 #define COND(op1, op2, cond) op1 ## op2 ## cond
 264 /* Perform an arithmetic operation with a variable shift operand.  This
 265    requires two instructions and a scratch register on Thumb-2.  */
 266 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
 267         \shiftop \tmp, \src2, \shiftreg
 268         \name \dest, \src1, \tmp
 269 .endm
 270 #else
 271 .macro do_it cond, suffix=""
 272 .endm
 273 .macro shift1 op, arg0, arg1, arg2
 274         mov     \arg0, \arg1, \op \arg2
 275 .endm
 276 #define do_push stmfd sp!,
 277 #define do_pop  ldmfd sp!,
 278 #define COND(op1, op2, cond) op1 ## cond ## op2
 279 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
 280         \name \dest, \src1, \src2, \shiftop \shiftreg
 281 .endm
 282 #endif
 283
 284 #ifdef __ARM_EABI__
 285 .macro ARM_LDIV0 name signed
 286         cmp     r0, #0
 287         .ifc    \signed, unsigned
 288         movne   r0, #0xffffffff
 289         .else
 290         movgt   r0, #0x7fffffff
 291         movlt   r0, #0x80000000
 292         .endif
 293         b       SYM (__aeabi_idiv0) __PLT__
 294 .endm
 295 #else
 296 .macro ARM_LDIV0 name signed
 297         str     lr, [sp, #-8]!
 298 98:     cfi_push 98b - __\name, 0xe, -0x8, 0x8
 299         bl      SYM (__div0) __PLT__
 300         mov     r0, #0                  @ About as wrong as it could be.
 301         RETLDM  unwind=98b
 302 .endm
 303 #endif
 304
 305
 306 #ifdef __ARM_EABI__
 307 .macro THUMB_LDIV0 name signed
 308 #if defined(__ARM_ARCH_6M__)
 309         .ifc \signed, unsigned
 310         cmp     r0, #0
 311         beq     1f
 312         mov     r0, #0
 313         mvn     r0, r0          @ 0xffffffff
 314 1:
 315         .else
 316         cmp     r0, #0
 317         beq     2f
 318         blt     3f
 319         mov     r0, #0
 320         mvn     r0, r0
 321         lsr     r0, r0, #1      @ 0x7fffffff
 322         b       2f
 323 3:      mov     r0, #0x80
 324         lsl     r0, r0, #24     @ 0x80000000
 325 2:
 326         .endif
 327         push    {r0, r1, r2}
 328         ldr     r0, 4f
 329         adr     r1, 4f
 330         add     r0, r1
 331         str     r0, [sp, #8]
 332         @ We know we are not on armv4t, so pop pc is safe.
 333         pop     {r0, r1, pc}
 334         .align  2
 335 4:
 336         .word   __aeabi_idiv0 - 4b
 337 #elif defined(__thumb2__)
 338         .syntax unified
 339         .ifc \signed, unsigned
 340         cbz     r0, 1f
 341         mov     r0, #0xffffffff
 342 1:
 343         .else
 344         cmp     r0, #0
 345         do_it   gt
 346         movgt   r0, #0x7fffffff
 347         do_it   lt
 348         movlt   r0, #0x80000000
 349         .endif
 350         b.w     SYM(__aeabi_idiv0) __PLT__
 351 #else
 352         .align  2
 353         bx      pc
 354         nop
 355         .arm
 356         cmp     r0, #0
 357         .ifc    \signed, unsigned
 358         movne   r0, #0xffffffff
 359         .else
 360         movgt   r0, #0x7fffffff
 361         movlt   r0, #0x80000000
 362         .endif
 363         b       SYM(__aeabi_idiv0) __PLT__
 364         .thumb
 365 #endif
 366 .endm
 367 #else
 368 .macro THUMB_LDIV0 name signed
 369         push    { r1, lr }
 370 98:     cfi_push 98b - __\name, 0xe, -0x4, 0x8
 371         bl      SYM (__div0)
 372         mov     r0, #0                  @ About as wrong as it could be.
 373 #if defined (__INTERWORKING__)
 374         pop     { r1, r2 }
 375         bx      r2
 376 #else
 377         pop     { r1, pc }
 378 #endif
 379 .endm
 380 #endif
 381
 382 .macro FUNC_END name
 383         SIZE (__\name)
 384 .endm
 385
 386 .macro DIV_FUNC_END name signed
 387         cfi_start       __\name, LSYM(Lend_div0)
 388 LSYM(Ldiv0):
 389 #ifdef __thumb__
 390         THUMB_LDIV0 \name \signed
 391 #else
 392         ARM_LDIV0 \name \signed
 393 #endif
 394         cfi_end LSYM(Lend_div0)
 395         FUNC_END \name
 396 .endm
 397
 398 .macro THUMB_FUNC_START name
 399         .globl  SYM (\name)
 400         TYPE    (\name)
 401         .thumb_func
 402 SYM (\name):
 403 .endm
 404
 405 /* Function start macros.  Variants for ARM and Thumb.  */
 406
 407 #ifdef __thumb__
 408 #define THUMB_FUNC .thumb_func
 409 #define THUMB_CODE .force_thumb
 410 # if defined(__thumb2__)
 411 #define THUMB_SYNTAX .syntax divided
 412 # else
 413 #define THUMB_SYNTAX
 414 # endif
 415 #else
 416 #define THUMB_FUNC
 417 #define THUMB_CODE
 418 #define THUMB_SYNTAX
 419 #endif
 420
 421 .macro FUNC_START name
 422         .text
 423         .globl SYM (__\name)
 424         TYPE (__\name)
 425         .align 0
 426         THUMB_CODE
 427         THUMB_FUNC
 428         THUMB_SYNTAX
 429 SYM (__\name):
 430 .endm
 431
 432 /* Special function that will always be coded in ARM assembly, even if
 433    in Thumb-only compilation.  */
 434
 435 #if defined(__thumb2__)
 436
 437 /* For Thumb-2 we build everything in thumb mode.  */
 438 .macro ARM_FUNC_START name
 439        FUNC_START \name
 440        .syntax unified
 441 .endm
 442 #define EQUIV .thumb_set
 443 .macro  ARM_CALL name
 444         bl      __\name
 445 .endm
 446
 447 #elif defined(__INTERWORKING_STUBS__)
 448
 449 .macro  ARM_FUNC_START name
 450         FUNC_START \name
 451         bx      pc
 452         nop
 453         .arm
 454 /* A hook to tell gdb that we've switched to ARM mode.  Also used to call
 455    directly from other local arm routines.  */
 456 _L__\name:
 457 .endm
 458 #define EQUIV .thumb_set
 459 /* Branch directly to a function declared with ARM_FUNC_START.
 460    Must be called in arm mode.  */
 461 .macro  ARM_CALL name
 462         bl      _L__\name
 463 .endm
 464
 465 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
 466
 467 #ifdef __ARM_ARCH_6M__
 468 #define EQUIV .thumb_set
 469 #else
 470 .macro  ARM_FUNC_START name
 471         .text
 472         .globl SYM (__\name)
 473         TYPE (__\name)
 474         .align 0
 475         .arm
 476 SYM (__\name):
 477 .endm
 478 #define EQUIV .set
 479 .macro  ARM_CALL name
 480         bl      __\name
 481 .endm
 482 #endif
 483
 484 #endif
 485
 486 .macro  FUNC_ALIAS new old
 487         .globl  SYM (__\new)
 488 #if defined (__thumb__)
 489         .thumb_set      SYM (__\new), SYM (__\old)
 490 #else
 491         .set    SYM (__\new), SYM (__\old)
 492 #endif
 493 .endm
 494
 495 #ifndef __ARM_ARCH_6M__
 496 .macro  ARM_FUNC_ALIAS new old
 497         .globl  SYM (__\new)
 498         EQUIV   SYM (__\new), SYM (__\old)
 499 #if defined(__INTERWORKING_STUBS__)
 500         .set    SYM (_L__\new), SYM (_L__\old)
 501 #endif
 502 .endm
 503 #endif
 504
 505 #ifdef __ARMEB__
 506 #define xxh r0
 507 #define xxl r1
 508 #define yyh r2
 509 #define yyl r3
 510 #else
 511 #define xxh r1
 512 #define xxl r0
 513 #define yyh r3
 514 #define yyl r2
 515 #endif
 516
 517 #ifdef __ARM_EABI__
 518 .macro  WEAK name
 519         .weak SYM (__\name)
 520 .endm
 521 #endif
 522
 523 #ifdef __thumb__
 524 /* Register aliases.  */
 525
 526 work            .req    r4      @ XXXX is this safe ?
 527 dividend        .req    r0
 528 divisor         .req    r1
 529 overdone        .req    r2
 530 result          .req    r2
 531 curbit          .req    r3
 532 #endif
 533 #if 0
 534 ip              .req    r12
 535 sp              .req    r13
 536 lr              .req    r14
 537 pc              .req    r15
 538 #endif
 539
 540 /* ------------------------------------------------------------------------ */
 541 /*              Bodies of the division and modulo routines.                 */
 542 /* ------------------------------------------------------------------------ */
 543 .macro ARM_DIV_BODY dividend, divisor, result, curbit
 544
 545 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
 546
 547 #if defined (__thumb2__)
 548         clz     \curbit, \dividend
 549         clz     \result, \divisor
 550         sub     \curbit, \result, \curbit
 551         rsb     \curbit, \curbit, #31
 552         adr     \result, 1f
 553         add     \curbit, \result, \curbit, lsl #4
 554         mov     \result, #0
 555         mov     pc, \curbit
 556 .p2align 3
 557 1:
 558         .set    shift, 32
 559         .rept   32
 560         .set    shift, shift - 1
 561         cmp.w   \dividend, \divisor, lsl #shift
 562         nop.n
 563         adc.w   \result, \result, \result
 564         it      cs
 565         subcs.w \dividend, \dividend, \divisor, lsl #shift
 566         .endr
 567 #else
 568         clz     \curbit, \dividend
 569         clz     \result, \divisor
 570         sub     \curbit, \result, \curbit
 571         rsbs    \curbit, \curbit, #31
 572         addne   \curbit, \curbit, \curbit, lsl #1
 573         mov     \result, #0
 574         addne   pc, pc, \curbit, lsl #2
 575         nop
 576         .set    shift, 32
 577         .rept   32
 578         .set    shift, shift - 1
 579         cmp     \dividend, \divisor, lsl #shift
 580         adc     \result, \result, \result
 581         subcs   \dividend, \dividend, \divisor, lsl #shift
 582         .endr
 583 #endif
 584
 585 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 586 #if __ARM_ARCH__ >= 5
 587
 588         clz     \curbit, \divisor
 589         clz     \result, \dividend
 590         sub     \result, \curbit, \result
 591         mov     \curbit, #1
 592         mov     \divisor, \divisor, lsl \result
 593         mov     \curbit, \curbit, lsl \result
 594         mov     \result, #0
 595
 596 #else /* __ARM_ARCH__ < 5 */
 597
 598         @ Initially shift the divisor left 3 bits if possible,
 599         @ set curbit accordingly.  This allows for curbit to be located
 600         @ at the left end of each 4-bit nibbles in the division loop
 601         @ to save one loop in most cases.
 602         tst     \divisor, #0xe0000000
 603         moveq   \divisor, \divisor, lsl #3
 604         moveq   \curbit, #8
 605         movne   \curbit, #1
 606
 607         @ Unless the divisor is very big, shift it up in multiples of
 608         @ four bits, since this is the amount of unwinding in the main
 609         @ division loop.  Continue shifting until the divisor is
 610         @ larger than the dividend.
 611 1:      cmp     \divisor, #0x10000000
 612         cmplo   \divisor, \dividend
 613         movlo   \divisor, \divisor, lsl #4
 614         movlo   \curbit, \curbit, lsl #4
 615         blo     1b
 616
 617         @ For very big divisors, we must shift it a bit at a time, or
 618         @ we will be in danger of overflowing.
 619 1:      cmp     \divisor, #0x80000000
 620         cmplo   \divisor, \dividend
 621         movlo   \divisor, \divisor, lsl #1
 622         movlo   \curbit, \curbit, lsl #1
 623         blo     1b
 624
 625         mov     \result, #0
 626
 627 #endif /* __ARM_ARCH__ < 5 */
 628
 629         @ Division loop
 630 1:      cmp     \dividend, \divisor
 631         do_it   hs, t
 632         subhs   \dividend, \dividend, \divisor
 633         orrhs   \result,   \result,   \curbit
 634         cmp     \dividend, \divisor,  lsr #1
 635         do_it   hs, t
 636         subhs   \dividend, \dividend, \divisor, lsr #1
 637         orrhs   \result,   \result,   \curbit,  lsr #1
 638         cmp     \dividend, \divisor,  lsr #2
 639         do_it   hs, t
 640         subhs   \dividend, \dividend, \divisor, lsr #2
 641         orrhs   \result,   \result,   \curbit,  lsr #2
 642         cmp     \dividend, \divisor,  lsr #3
 643         do_it   hs, t
 644         subhs   \dividend, \dividend, \divisor, lsr #3
 645         orrhs   \result,   \result,   \curbit,  lsr #3
 646         cmp     \dividend, #0                   @ Early termination?
 647         do_it   ne, t
 648         movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
 649         movne   \divisor,  \divisor, lsr #4
 650         bne     1b
 651
 652 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 653
 654 .endm
 655 /* ------------------------------------------------------------------------ */
 656 .macro ARM_DIV2_ORDER divisor, order
 657
 658 #if __ARM_ARCH__ >= 5
 659
 660         clz     \order, \divisor
 661         rsb     \order, \order, #31
 662
 663 #else
 664
 665         cmp     \divisor, #(1 << 16)
 666         movhs   \divisor, \divisor, lsr #16
 667         movhs   \order, #16
 668         movlo   \order, #0
 669
 670         cmp     \divisor, #(1 << 8)
 671         movhs   \divisor, \divisor, lsr #8
 672         addhs   \order, \order, #8
 673
 674         cmp     \divisor, #(1 << 4)
 675         movhs   \divisor, \divisor, lsr #4
 676         addhs   \order, \order, #4
 677
 678         cmp     \divisor, #(1 << 2)
 679         addhi   \order, \order, #3
 680         addls   \order, \order, \divisor, lsr #1
 681
 682 #endif
 683
 684 .endm
 685 /* ------------------------------------------------------------------------ */
 686 .macro ARM_MOD_BODY dividend, divisor, order, spare
 687
 688 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
 689
 690         clz     \order, \divisor
 691         clz     \spare, \dividend
 692         sub     \order, \order, \spare
 693         rsbs    \order, \order, #31
 694         addne   pc, pc, \order, lsl #3
 695         nop
 696         .set    shift, 32
 697         .rept   32
 698         .set    shift, shift - 1
 699         cmp     \dividend, \divisor, lsl #shift
 700         subcs   \dividend, \dividend, \divisor, lsl #shift
 701         .endr
 702
 703 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 704 #if __ARM_ARCH__ >= 5
 705
 706         clz     \order, \divisor
 707         clz     \spare, \dividend
 708         sub     \order, \order, \spare
 709         mov     \divisor, \divisor, lsl \order
 710
 711 #else /* __ARM_ARCH__ < 5 */
 712
 713         mov     \order, #0
 714
 715         @ Unless the divisor is very big, shift it up in multiples of
 716         @ four bits, since this is the amount of unwinding in the main
 717         @ division loop.  Continue shifting until the divisor is
 718         @ larger than the dividend.
 719 1:      cmp     \divisor, #0x10000000
 720         cmplo   \divisor, \dividend
 721         movlo   \divisor, \divisor, lsl #4
 722         addlo   \order, \order, #4
 723         blo     1b
 724
 725         @ For very big divisors, we must shift it a bit at a time, or
 726         @ we will be in danger of overflowing.
 727 1:      cmp     \divisor, #0x80000000
 728         cmplo   \divisor, \dividend
 729         movlo   \divisor, \divisor, lsl #1
 730         addlo   \order, \order, #1
 731         blo     1b
 732
 733 #endif /* __ARM_ARCH__ < 5 */
 734
 735         @ Perform all needed substractions to keep only the reminder.
 736         @ Do comparisons in batch of 4 first.
 737         subs    \order, \order, #3              @ yes, 3 is intended here
 738         blt     2f
 739
 740 1:      cmp     \dividend, \divisor
 741         subhs   \dividend, \dividend, \divisor
 742         cmp     \dividend, \divisor,  lsr #1
 743         subhs   \dividend, \dividend, \divisor, lsr #1
 744         cmp     \dividend, \divisor,  lsr #2
 745         subhs   \dividend, \dividend, \divisor, lsr #2
 746         cmp     \dividend, \divisor,  lsr #3
 747         subhs   \dividend, \dividend, \divisor, lsr #3
 748         cmp     \dividend, #1
 749         mov     \divisor, \divisor, lsr #4
 750         subges  \order, \order, #4
 751         bge     1b
 752
 753         tst     \order, #3
 754         teqne   \dividend, #0
 755         beq     5f
 756
 757         @ Either 1, 2 or 3 comparison/substractions are left.
 758 2:      cmn     \order, #2
 759         blt     4f
 760         beq     3f
 761         cmp     \dividend, \divisor
 762         subhs   \dividend, \dividend, \divisor
 763         mov     \divisor,  \divisor,  lsr #1
 764 3:      cmp     \dividend, \divisor
 765         subhs   \dividend, \dividend, \divisor
 766         mov     \divisor,  \divisor,  lsr #1
 767 4:      cmp     \dividend, \divisor
 768         subhs   \dividend, \dividend, \divisor
 769 5:
 770
 771 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 772
 773 .endm
 774 /* ------------------------------------------------------------------------ */
 775 .macro THUMB_DIV_MOD_BODY modulo
 776         @ Load the constant 0x10000000 into our work register.
 777         mov     work, #1
 778         lsl     work, #28
 779 LSYM(Loop1):
 780         @ Unless the divisor is very big, shift it up in multiples of
 781         @ four bits, since this is the amount of unwinding in the main
 782         @ division loop.  Continue shifting until the divisor is
 783         @ larger than the dividend.
 784         cmp     divisor, work
 785         bhs     LSYM(Lbignum)
 786         cmp     divisor, dividend
 787         bhs     LSYM(Lbignum)
 788         lsl     divisor, #4
 789         lsl     curbit,  #4
 790         b       LSYM(Loop1)
 791 LSYM(Lbignum):
 792         @ Set work to 0x80000000
 793         lsl     work, #3
 794 LSYM(Loop2):
 795         @ For very big divisors, we must shift it a bit at a time, or
 796         @ we will be in danger of overflowing.
 797         cmp     divisor, work
 798         bhs     LSYM(Loop3)
 799         cmp     divisor, dividend
 800         bhs     LSYM(Loop3)
 801         lsl     divisor, #1
 802         lsl     curbit,  #1
 803         b       LSYM(Loop2)
 804 LSYM(Loop3):
 805         @ Test for possible subtractions ...
 806   .if \modulo
 807         @ ... On the final pass, this may subtract too much from the dividend,
 808         @ so keep track of which subtractions are done, we can fix them up
 809         @ afterwards.
 810         mov     overdone, #0
 811         cmp     dividend, divisor
 812         blo     LSYM(Lover1)
 813         sub     dividend, dividend, divisor
 814 LSYM(Lover1):
 815         lsr     work, divisor, #1
 816         cmp     dividend, work
 817         blo     LSYM(Lover2)
 818         sub     dividend, dividend, work
 819         mov     ip, curbit
 820         mov     work, #1
 821         ror     curbit, work
 822         orr     overdone, curbit
 823         mov     curbit, ip
 824 LSYM(Lover2):
 825         lsr     work, divisor, #2
 826         cmp     dividend, work
 827         blo     LSYM(Lover3)
 828         sub     dividend, dividend, work
 829         mov     ip, curbit
 830         mov     work, #2
 831         ror     curbit, work
 832         orr     overdone, curbit
 833         mov     curbit, ip
 834 LSYM(Lover3):
 835         lsr     work, divisor, #3
 836         cmp     dividend, work
 837         blo     LSYM(Lover4)
 838         sub     dividend, dividend, work
 839         mov     ip, curbit
 840         mov     work, #3
 841         ror     curbit, work
 842         orr     overdone, curbit
 843         mov     curbit, ip
 844 LSYM(Lover4):
 845         mov     ip, curbit
 846   .else
 847         @ ... and note which bits are done in the result.  On the final pass,
 848         @ this may subtract too much from the dividend, but the result will be ok,
 849         @ since the "bit" will have been shifted out at the bottom.
 850         cmp     dividend, divisor
 851         blo     LSYM(Lover1)
 852         sub     dividend, dividend, divisor
 853         orr     result, result, curbit
 854 LSYM(Lover1):
 855         lsr     work, divisor, #1
 856         cmp     dividend, work
 857         blo     LSYM(Lover2)
 858         sub     dividend, dividend, work
 859         lsr     work, curbit, #1
 860         orr     result, work
 861 LSYM(Lover2):
 862         lsr     work, divisor, #2
 863         cmp     dividend, work
 864         blo     LSYM(Lover3)
 865         sub     dividend, dividend, work
 866         lsr     work, curbit, #2
 867         orr     result, work
 868 LSYM(Lover3):
 869         lsr     work, divisor, #3
 870         cmp     dividend, work
 871         blo     LSYM(Lover4)
 872         sub     dividend, dividend, work
 873         lsr     work, curbit, #3
 874         orr     result, work
 875 LSYM(Lover4):
 876   .endif
 877
 878         cmp     dividend, #0                    @ Early termination?
 879         beq     LSYM(Lover5)
 880         lsr     curbit,  #4                     @ No, any more bits to do?
 881         beq     LSYM(Lover5)
 882         lsr     divisor, #4
 883         b       LSYM(Loop3)
 884 LSYM(Lover5):
 885   .if \modulo
 886         @ Any subtractions that we should not have done will be recorded in
 887         @ the top three bits of "overdone".  Exactly which were not needed
 888         @ are governed by the position of the bit, stored in ip.
 889         mov     work, #0xe
 890         lsl     work, #28
 891         and     overdone, work
 892         beq     LSYM(Lgot_result)
 893
 894         @ If we terminated early, because dividend became zero, then the
 895         @ bit in ip will not be in the bottom nibble, and we should not
 896         @ perform the additions below.  We must test for this though
 897         @ (rather relying upon the TSTs to prevent the additions) since
 898         @ the bit in ip could be in the top two bits which might then match
 899         @ with one of the smaller RORs.
 900         mov     curbit, ip
 901         mov     work, #0x7
 902         tst     curbit, work
 903         beq     LSYM(Lgot_result)
 904
 905         mov     curbit, ip
 906         mov     work, #3
 907         ror     curbit, work
 908         tst     overdone, curbit
 909         beq     LSYM(Lover6)
 910         lsr     work, divisor, #3
 911         add     dividend, work
 912 LSYM(Lover6):
 913         mov     curbit, ip
 914         mov     work, #2
 915         ror     curbit, work
 916         tst     overdone, curbit
 917         beq     LSYM(Lover7)
 918         lsr     work, divisor, #2
 919         add     dividend, work
 920 LSYM(Lover7):
 921         mov     curbit, ip
 922         mov     work, #1
 923         ror     curbit, work
 924         tst     overdone, curbit
 925         beq     LSYM(Lgot_result)
 926         lsr     work, divisor, #1
 927         add     dividend, work
 928   .endif
 929 LSYM(Lgot_result):
 930 .endm
 931 /* ------------------------------------------------------------------------ */
 932 /*              Start of the Real Functions                                 */
 933 /* ------------------------------------------------------------------------ */
 934 #ifdef L_udivsi3
 935
 936 #if defined(__prefer_thumb__)
 937
 938         FUNC_START udivsi3
 939         FUNC_ALIAS aeabi_uidiv udivsi3
 940
 941         cmp     divisor, #0
 942         beq     LSYM(Ldiv0)
 943 LSYM(udivsi3_skip_div0_test):
 944         mov     curbit, #1
 945         mov     result, #0
 946
 947         push    { work }
 948         cmp     dividend, divisor
 949         blo     LSYM(Lgot_result)
 950
 951         THUMB_DIV_MOD_BODY 0
 952
 953         mov     r0, result
 954         pop     { work }
 955         RET
 956
 957 #elif defined(__ARM_ARCH_EXT_IDIV__)
 958
 959         ARM_FUNC_START udivsi3
 960         ARM_FUNC_ALIAS aeabi_uidiv udivsi3
 961
 962         cmp     r1, #0
 963         beq     LSYM(Ldiv0)
 964
 965         udiv    r0, r0, r1
 966         RET
 967
 968 #else /* ARM version/Thumb-2.  */
 969
 970         ARM_FUNC_START udivsi3
 971         ARM_FUNC_ALIAS aeabi_uidiv udivsi3
 972
 973         /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
 974            check for division-by-zero a second time.  */
 975 LSYM(udivsi3_skip_div0_test):
 976         subs    r2, r1, #1
 977         do_it   eq
 978         RETc(eq)
 979         bcc     LSYM(Ldiv0)
 980         cmp     r0, r1
 981         bls     11f
 982         tst     r1, r2
 983         beq     12f
 984
 985         ARM_DIV_BODY r0, r1, r2, r3
 986
 987         mov     r0, r2
 988         RET
 989
 990 11:     do_it   eq, e
 991         moveq   r0, #1
 992         movne   r0, #0
 993         RET
 994
 995 12:     ARM_DIV2_ORDER r1, r2
 996
 997         mov     r0, r0, lsr r2
 998         RET
 999
1000 #endif /* ARM version */
1001
1002         DIV_FUNC_END udivsi3 unsigned
1003
1004 #if defined(__prefer_thumb__)
1005 FUNC_START aeabi_uidivmod
1006         cmp     r1, #0
1007         beq     LSYM(Ldiv0)
1008         push    {r0, r1, lr}
1009         bl      LSYM(udivsi3_skip_div0_test)
1010         POP     {r1, r2, r3}
1011         mul     r2, r0
1012         sub     r1, r1, r2
1013         bx      r3
1014 #elif defined(__ARM_ARCH_EXT_IDIV__)
1015 ARM_FUNC_START aeabi_uidivmod
1016         cmp     r1, #0
1017         beq     LSYM(Ldiv0)
1018         mov     r2, r0
1019         udiv    r0, r0, r1
1020         mls     r1, r0, r1, r2
1021         RET
1022 #else
1023 ARM_FUNC_START aeabi_uidivmod
1024         cmp     r1, #0
1025         beq     LSYM(Ldiv0)
1026         stmfd   sp!, { r0, r1, lr }
1027         bl      LSYM(udivsi3_skip_div0_test)
1028         ldmfd   sp!, { r1, r2, lr }
1029         mul     r3, r2, r0
1030         sub     r1, r1, r3
1031         RET
1032 #endif
1033         FUNC_END aeabi_uidivmod
1034
1035 #endif /* L_udivsi3 */
1036 /* ------------------------------------------------------------------------ */
1037 #ifdef L_umodsi3
1038
1039 #ifdef __ARM_ARCH_EXT_IDIV__
1040
1041         ARM_FUNC_START umodsi3
1042
1043         cmp     r1, #0
1044         beq     LSYM(Ldiv0)
1045         udiv    r2, r0, r1
1046         mls     r0, r1, r2, r0
1047         RET
1048
1049 #elif defined(__thumb__)
1050
1051         FUNC_START umodsi3
1052
1053         cmp     divisor, #0
1054         beq     LSYM(Ldiv0)
1055         mov     curbit, #1
1056         cmp     dividend, divisor
1057         bhs     LSYM(Lover10)
1058         RET
1059
1060 LSYM(Lover10):
1061         push    { work }
1062
1063         THUMB_DIV_MOD_BODY 1
1064
1065         pop     { work }
1066         RET
1067
1068 #else  /* ARM version.  */
1069
1070         FUNC_START umodsi3
1071
1072         subs    r2, r1, #1                      @ compare divisor with 1
1073         bcc     LSYM(Ldiv0)
1074         cmpne   r0, r1                          @ compare dividend with divisor
1075         moveq   r0, #0
1076         tsthi   r1, r2                          @ see if divisor is power of 2
1077         andeq   r0, r0, r2
1078         RETc(ls)
1079
1080         ARM_MOD_BODY r0, r1, r2, r3
1081
1082         RET
1083
1084 #endif /* ARM version.  */
1085
1086         DIV_FUNC_END umodsi3 unsigned
1087
1088 #endif /* L_umodsi3 */
1089 /* ------------------------------------------------------------------------ */
1090 #ifdef L_divsi3
1091
1092 #if defined(__prefer_thumb__)
1093
1094         FUNC_START divsi3
1095         FUNC_ALIAS aeabi_idiv divsi3
1096
1097         cmp     divisor, #0
1098         beq     LSYM(Ldiv0)
1099 LSYM(divsi3_skip_div0_test):
1100         push    { work }
1101         mov     work, dividend
1102         eor     work, divisor           @ Save the sign of the result.
1103         mov     ip, work
1104         mov     curbit, #1
1105         mov     result, #0
1106         cmp     divisor, #0
1107         bpl     LSYM(Lover10)
1108         neg     divisor, divisor        @ Loops below use unsigned.
1109 LSYM(Lover10):
1110         cmp     dividend, #0
1111         bpl     LSYM(Lover11)
1112         neg     dividend, dividend
1113 LSYM(Lover11):
1114         cmp     dividend, divisor
1115         blo     LSYM(Lgot_result)
1116
1117         THUMB_DIV_MOD_BODY 0
1118
1119         mov     r0, result
1120         mov     work, ip
1121         cmp     work, #0
1122         bpl     LSYM(Lover12)
1123         neg     r0, r0
1124 LSYM(Lover12):
1125         pop     { work }
1126         RET
1127
1128 #elif defined(__ARM_ARCH_EXT_IDIV__)
1129
1130         ARM_FUNC_START divsi3
1131         ARM_FUNC_ALIAS aeabi_idiv divsi3
1132
1133         cmp     r1, #0
1134         beq     LSYM(Ldiv0)
1135         sdiv    r0, r0, r1
1136         RET
1137
1138 #else /* ARM/Thumb-2 version.  */
1139
1140         ARM_FUNC_START divsi3
1141         ARM_FUNC_ALIAS aeabi_idiv divsi3
1142
1143         cmp     r1, #0
1144         beq     LSYM(Ldiv0)
1145 LSYM(divsi3_skip_div0_test):
1146         eor     ip, r0, r1                      @ save the sign of the result.
1147         do_it   mi
1148         rsbmi   r1, r1, #0                      @ loops below use unsigned.
1149         subs    r2, r1, #1                      @ division by 1 or -1 ?
1150         beq     10f
1151         movs    r3, r0
1152         do_it   mi
1153         rsbmi   r3, r0, #0                      @ positive dividend value
1154         cmp     r3, r1
1155         bls     11f
1156         tst     r1, r2                          @ divisor is power of 2 ?
1157         beq     12f
1158
1159         ARM_DIV_BODY r3, r1, r0, r2
1160
1161         cmp     ip, #0
1162         do_it   mi
1163         rsbmi   r0, r0, #0
1164         RET
1165
1166 10:     teq     ip, r0                          @ same sign ?
1167         do_it   mi
1168         rsbmi   r0, r0, #0
1169         RET
1170
1171 11:     do_it   lo
1172         movlo   r0, #0
1173         do_it   eq,t
1174         moveq   r0, ip, asr #31
1175         orreq   r0, r0, #1
1176         RET
1177
1178 12:     ARM_DIV2_ORDER r1, r2
1179
1180         cmp     ip, #0
1181         mov     r0, r3, lsr r2
1182         do_it   mi
1183         rsbmi   r0, r0, #0
1184         RET
1185
1186 #endif /* ARM version */
1187
1188         DIV_FUNC_END divsi3 signed
1189
1190 #if defined(__prefer_thumb__)
1191 FUNC_START aeabi_idivmod
1192         cmp     r1, #0
1193         beq     LSYM(Ldiv0)
1194         push    {r0, r1, lr}
1195         bl      LSYM(divsi3_skip_div0_test)
1196         POP     {r1, r2, r3}
1197         mul     r2, r0
1198         sub     r1, r1, r2
1199         bx      r3
1200 #elif defined(__ARM_ARCH_EXT_IDIV__)
1201 ARM_FUNC_START aeabi_idivmod
1202         cmp     r1, #0
1203         beq     LSYM(Ldiv0)
1204         mov     r2, r0
1205         sdiv    r0, r0, r1
1206         mls     r1, r0, r1, r2
1207         RET
1208 #else
1209 ARM_FUNC_START aeabi_idivmod
1210         cmp     r1, #0
1211         beq     LSYM(Ldiv0)
1212         stmfd   sp!, { r0, r1, lr }
1213         bl      LSYM(divsi3_skip_div0_test)
1214         ldmfd   sp!, { r1, r2, lr }
1215         mul     r3, r2, r0
1216         sub     r1, r1, r3
1217         RET
1218 #endif
1219         FUNC_END aeabi_idivmod
1220
1221 #endif /* L_divsi3 */
1222 /* ------------------------------------------------------------------------ */
1223 #ifdef L_modsi3
1224
1225 #if defined(__ARM_ARCH_EXT_IDIV__)
1226
1227         ARM_FUNC_START modsi3
1228
1229         cmp     r1, #0
1230         beq     LSYM(Ldiv0)
1231
1232         sdiv    r2, r0, r1
1233         mls     r0, r1, r2, r0
1234         RET
1235
1236 #elif defined(__thumb__)
1237
1238         FUNC_START modsi3
1239
1240         mov     curbit, #1
1241         cmp     divisor, #0
1242         beq     LSYM(Ldiv0)
1243         bpl     LSYM(Lover10)
1244         neg     divisor, divisor                @ Loops below use unsigned.
1245 LSYM(Lover10):
1246         push    { work }
1247         @ Need to save the sign of the dividend, unfortunately, we need
1248         @ work later on.  Must do this after saving the original value of
1249         @ the work register, because we will pop this value off first.
1250         push    { dividend }
1251         cmp     dividend, #0
1252         bpl     LSYM(Lover11)
1253         neg     dividend, dividend
1254 LSYM(Lover11):
1255         cmp     dividend, divisor
1256         blo     LSYM(Lgot_result)
1257
1258         THUMB_DIV_MOD_BODY 1
1259
1260         pop     { work }
1261         cmp     work, #0
1262         bpl     LSYM(Lover12)
1263         neg     dividend, dividend
1264 LSYM(Lover12):
1265         pop     { work }
1266         RET
1267
1268 #else /* ARM version.  */
1269
1270         FUNC_START modsi3
1271
1272         cmp     r1, #0
1273         beq     LSYM(Ldiv0)
1274         rsbmi   r1, r1, #0                      @ loops below use unsigned.
1275         movs    ip, r0                          @ preserve sign of dividend
1276         rsbmi   r0, r0, #0                      @ if negative make positive
1277         subs    r2, r1, #1                      @ compare divisor with 1
1278         cmpne   r0, r1                          @ compare dividend with divisor
1279         moveq   r0, #0
1280         tsthi   r1, r2                          @ see if divisor is power of 2
1281         andeq   r0, r0, r2
1282         bls     10f
1283
1284         ARM_MOD_BODY r0, r1, r2, r3
1285
1286 10:     cmp     ip, #0
1287         rsbmi   r0, r0, #0
1288         RET
1289
1290 #endif /* ARM version */
1291
1292         DIV_FUNC_END modsi3 signed
1293
1294 #endif /* L_modsi3 */
1295 /* ------------------------------------------------------------------------ */
1296 #ifdef L_dvmd_tls
1297
1298 #ifdef __ARM_EABI__
1299         WEAK aeabi_idiv0
1300         WEAK aeabi_ldiv0
1301         FUNC_START aeabi_idiv0
1302         FUNC_START aeabi_ldiv0
1303         RET
1304         FUNC_END aeabi_ldiv0
1305         FUNC_END aeabi_idiv0
1306 #else
1307         FUNC_START div0
1308         RET
1309         FUNC_END div0
1310 #endif
1311
1312 #endif /* L_divmodsi_tools */
1313 /* ------------------------------------------------------------------------ */
1314 #ifdef L_dvmd_lnx
1315 @ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
1316
1317 /* Constant taken from <asm/signal.h>.  */
1318 #define SIGFPE  8
1319
1320 #ifdef __ARM_EABI__
1321         WEAK aeabi_idiv0
1322         WEAK aeabi_ldiv0
1323         ARM_FUNC_START aeabi_idiv0
1324         ARM_FUNC_START aeabi_ldiv0
1325 #else
1326         ARM_FUNC_START div0
1327 #endif
1328
1329         do_push {r1, lr}
1330         mov     r0, #SIGFPE
1331         bl      SYM(raise) __PLT__
1332         RETLDM  r1
1333
1334 #ifdef __ARM_EABI__
1335         FUNC_END aeabi_ldiv0
1336         FUNC_END aeabi_idiv0
1337 #else
1338         FUNC_END div0
1339 #endif
1340
1341 #endif /* L_dvmd_lnx */
1342 #ifdef L_clear_cache
1343 #if defined __ARM_EABI__ && defined __linux__
1344 @ EABI GNU/Linux call to cacheflush syscall.
1345         ARM_FUNC_START clear_cache
1346         do_push {r7}
1347 #if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
1348         movw    r7, #2
1349         movt    r7, #0xf
1350 #else
1351         mov     r7, #0xf0000
1352         add     r7, r7, #2
1353 #endif
1354         mov     r2, #0
1355         swi     0
1356         do_pop  {r7}
1357         RET
1358         FUNC_END clear_cache
1359 #else
1360 #error "This is only for ARM EABI GNU/Linux"
1361 #endif
1362 #endif /* L_clear_cache */
1363 /* ------------------------------------------------------------------------ */
1364 /* Dword shift operations.  */
1365 /* All the following Dword shift variants rely on the fact that
1366         shft xxx, Reg
1367    is in fact done as
1368         shft xxx, (Reg & 255)
1369    so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1370    case of logical shifts) or the sign (for asr).  */
1371
1372 #ifdef __ARMEB__
1373 #define al      r1
1374 #define ah      r0
1375 #else
1376 #define al      r0
1377 #define ah      r1
1378 #endif
1379
1380 /* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
1381 #ifndef __symbian__
1382
1383 #ifdef L_lshrdi3
1384
1385         FUNC_START lshrdi3
1386         FUNC_ALIAS aeabi_llsr lshrdi3
1387
1388 #ifdef __thumb__
1389         lsr     al, r2
1390         mov     r3, ah
1391         lsr     ah, r2
1392         mov     ip, r3
1393         sub     r2, #32
1394         lsr     r3, r2
1395         orr     al, r3
1396         neg     r2, r2
1397         mov     r3, ip
1398         lsl     r3, r2
1399         orr     al, r3
1400         RET
1401 #else
1402         subs    r3, r2, #32
1403         rsb     ip, r2, #32
1404         movmi   al, al, lsr r2
1405         movpl   al, ah, lsr r3
1406         orrmi   al, al, ah, lsl ip
1407         mov     ah, ah, lsr r2
1408         RET
1409 #endif
1410         FUNC_END aeabi_llsr
1411         FUNC_END lshrdi3
1412
1413 #endif
1414
1415 #ifdef L_ashrdi3
1416
1417         FUNC_START ashrdi3
1418         FUNC_ALIAS aeabi_lasr ashrdi3
1419
1420 #ifdef __thumb__
1421         lsr     al, r2
1422         mov     r3, ah
1423         asr     ah, r2
1424         sub     r2, #32
1425         @ If r2 is negative at this point the following step would OR
1426         @ the sign bit into all of AL.  That's not what we want...
1427         bmi     1f
1428         mov     ip, r3
1429         asr     r3, r2
1430         orr     al, r3
1431         mov     r3, ip
1432 1:
1433         neg     r2, r2
1434         lsl     r3, r2
1435         orr     al, r3
1436         RET
1437 #else
1438         subs    r3, r2, #32
1439         rsb     ip, r2, #32
1440         movmi   al, al, lsr r2
1441         movpl   al, ah, asr r3
1442         orrmi   al, al, ah, lsl ip
1443         mov     ah, ah, asr r2
1444         RET
1445 #endif
1446
1447         FUNC_END aeabi_lasr
1448         FUNC_END ashrdi3
1449
1450 #endif
1451
1452 #ifdef L_ashldi3
1453
1454         FUNC_START ashldi3
1455         FUNC_ALIAS aeabi_llsl ashldi3
1456
1457 #ifdef __thumb__
1458         lsl     ah, r2
1459         mov     r3, al
1460         lsl     al, r2
1461         mov     ip, r3
1462         sub     r2, #32
1463         lsl     r3, r2
1464         orr     ah, r3
1465         neg     r2, r2
1466         mov     r3, ip
1467         lsr     r3, r2
1468         orr     ah, r3
1469         RET
1470 #else
1471         subs    r3, r2, #32
1472         rsb     ip, r2, #32
1473         movmi   ah, ah, lsl r2
1474         movpl   ah, al, lsl r3
1475         orrmi   ah, ah, al, lsr ip
1476         mov     al, al, lsl r2
1477         RET
1478 #endif
1479         FUNC_END aeabi_llsl
1480         FUNC_END ashldi3
1481
1482 #endif
1483
1484 #endif /* __symbian__ */
1485
1486 #if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \
1487     || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
1488     || defined(__ARM_ARCH_5TEJ__)
1489 #define HAVE_ARM_CLZ 1
1490 #endif
1491
1492 #ifdef L_clzsi2
1493 #if defined(__ARM_ARCH_6M__)
1494 FUNC_START clzsi2
1495         mov     r1, #28
1496         mov     r3, #1
1497         lsl     r3, r3, #16
1498         cmp     r0, r3 /* 0x10000 */
1499         bcc     2f
1500         lsr     r0, r0, #16
1501         sub     r1, r1, #16
1502 2:      lsr     r3, r3, #8
1503         cmp     r0, r3 /* #0x100 */
1504         bcc     2f
1505         lsr     r0, r0, #8
1506         sub     r1, r1, #8
1507 2:      lsr     r3, r3, #4
1508         cmp     r0, r3 /* #0x10 */
1509         bcc     2f
1510         lsr     r0, r0, #4
1511         sub     r1, r1, #4
1512 2:      adr     r2, 1f
1513         ldrb    r0, [r2, r0]
1514         add     r0, r0, r1
1515         bx lr
1516 .align 2
1517 1:
1518 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1519         FUNC_END clzsi2
1520 #else
1521 ARM_FUNC_START clzsi2
1522 # if defined(HAVE_ARM_CLZ)
1523         clz     r0, r0
1524         RET
1525 # else
1526         mov     r1, #28
1527         cmp     r0, #0x10000
1528         do_it   cs, t
1529         movcs   r0, r0, lsr #16
1530         subcs   r1, r1, #16
1531         cmp     r0, #0x100
1532         do_it   cs, t
1533         movcs   r0, r0, lsr #8
1534         subcs   r1, r1, #8
1535         cmp     r0, #0x10
1536         do_it   cs, t
1537         movcs   r0, r0, lsr #4
1538         subcs   r1, r1, #4
1539         adr     r2, 1f
1540         ldrb    r0, [r2, r0]
1541         add     r0, r0, r1
1542         RET
1543 .align 2
1544 1:
1545 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1546 # endif /* !HAVE_ARM_CLZ */
1547         FUNC_END clzsi2
1548 #endif
1549 #endif /* L_clzsi2 */
1550
1551 #ifdef L_clzdi2
1552 #if !defined(HAVE_ARM_CLZ)
1553
1554 # if defined(__ARM_ARCH_6M__)
1555 FUNC_START clzdi2
1556         push    {r4, lr}
1557 # else
1558 ARM_FUNC_START clzdi2
1559         do_push {r4, lr}
1560 # endif
1561         cmp     xxh, #0
1562         bne     1f
1563 # ifdef __ARMEB__
1564         mov     r0, xxl
1565         bl      __clzsi2
1566         add     r0, r0, #32
1567         b 2f
1568 1:
1569         bl      __clzsi2
1570 # else
1571         bl      __clzsi2
1572         add     r0, r0, #32
1573         b 2f
1574 1:
1575         mov     r0, xxh
1576         bl      __clzsi2
1577 # endif
1578 2:
1579 # if defined(__ARM_ARCH_6M__)
1580         pop     {r4, pc}
1581 # else
1582         RETLDM  r4
1583 # endif
1584         FUNC_END clzdi2
1585
1586 #else /* HAVE_ARM_CLZ */
1587
1588 ARM_FUNC_START clzdi2
1589         cmp     xxh, #0
1590         do_it   eq, et
1591         clzeq   r0, xxl
1592         clzne   r0, xxh
1593         addeq   r0, r0, #32
1594         RET
1595         FUNC_END clzdi2
1596
1597 #endif
1598 #endif /* L_clzdi2 */
1599
1600 #ifdef L_ctzsi2
1601 #if defined(__ARM_ARCH_6M__)
1602 FUNC_START ctzsi2
1603         neg     r1, r0
1604         and     r0, r0, r1
1605         mov     r1, #28
1606         mov     r3, #1
1607         lsl     r3, r3, #16
1608         cmp     r0, r3 /* 0x10000 */
1609         bcc     2f
1610         lsr     r0, r0, #16
1611         sub     r1, r1, #16
1612 2:      lsr     r3, r3, #8
1613         cmp     r0, r3 /* #0x100 */
1614         bcc     2f
1615         lsr     r0, r0, #8
1616         sub     r1, r1, #8
1617 2:      lsr     r3, r3, #4
1618         cmp     r0, r3 /* #0x10 */
1619         bcc     2f
1620         lsr     r0, r0, #4
1621         sub     r1, r1, #4
1622 2:      adr     r2, 1f
1623         ldrb    r0, [r2, r0]
1624         sub     r0, r0, r1
1625         bx lr
1626 .align 2
1627 1:
1628 .byte   27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1629         FUNC_END ctzsi2
1630 #else
1631 ARM_FUNC_START ctzsi2
1632         rsb     r1, r0, #0
1633         and     r0, r0, r1
1634 # if defined(HAVE_ARM_CLZ)
1635         clz     r0, r0
1636         rsb     r0, r0, #31
1637         RET
1638 # else
1639         mov     r1, #28
1640         cmp     r0, #0x10000
1641         do_it   cs, t
1642         movcs   r0, r0, lsr #16
1643         subcs   r1, r1, #16
1644         cmp     r0, #0x100
1645         do_it   cs, t
1646         movcs   r0, r0, lsr #8
1647         subcs   r1, r1, #8
1648         cmp     r0, #0x10
1649         do_it   cs, t
1650         movcs   r0, r0, lsr #4
1651         subcs   r1, r1, #4
1652         adr     r2, 1f
1653         ldrb    r0, [r2, r0]
1654         sub     r0, r0, r1
1655         RET
1656 .align 2
1657 1:
1658 .byte   27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1659 # endif /* !HAVE_ARM_CLZ */
1660         FUNC_END ctzsi2
1661 #endif
1662 #endif /* L_clzsi2 */
1663
1664 /* ------------------------------------------------------------------------ */
1665 /* These next two sections are here despite the fact that they contain Thumb
1666    assembler because their presence allows interworked code to be linked even
1667    when the GCC library is this one.  */
1668
1669 /* Do not build the interworking functions when the target architecture does
1670    not support Thumb instructions.  (This can be a multilib option).  */
1671 #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1672       || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1673       || __ARM_ARCH__ >= 6
1674
1675 #if defined L_call_via_rX
1676
1677 /* These labels & instructions are used by the Arm/Thumb interworking code.
1678    The address of function to be called is loaded into a register and then
1679    one of these labels is called via a BL instruction.  This puts the
1680    return address into the link register with the bottom bit set, and the
1681    code here switches to the correct mode before executing the function.  */
1682
1683         .text
1684         .align 0
1685         .force_thumb
1686
1687 .macro call_via register
1688         THUMB_FUNC_START _call_via_\register
1689
1690         bx      \register
1691         nop
1692
1693         SIZE    (_call_via_\register)
1694 .endm
1695
1696         call_via r0
1697         call_via r1
1698         call_via r2
1699         call_via r3
1700         call_via r4
1701         call_via r5
1702         call_via r6
1703         call_via r7
1704         call_via r8
1705         call_via r9
1706         call_via sl
1707         call_via fp
1708         call_via ip
1709         call_via sp
1710         call_via lr
1711
1712 #endif /* L_call_via_rX */
1713
1714 /* Don't bother with the old interworking routines for Thumb-2.  */
1715 /* ??? Maybe only omit these on "m" variants.  */
1716 #if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
1717
1718 #if defined L_interwork_call_via_rX
1719
1720 /* These labels & instructions are used by the Arm/Thumb interworking code,
1721    when the target address is in an unknown instruction set.  The address
1722    of function to be called is loaded into a register and then one of these
1723    labels is called via a BL instruction.  This puts the return address
1724    into the link register with the bottom bit set, and the code here
1725    switches to the correct mode before executing the function.  Unfortunately
1726    the target code cannot be relied upon to return via a BX instruction, so
1727    instead we have to store the resturn address on the stack and allow the
1728    called function to return here instead.  Upon return we recover the real
1729    return address and use a BX to get back to Thumb mode.
1730
1731    There are three variations of this code.  The first,
1732    _interwork_call_via_rN(), will push the return address onto the
1733    stack and pop it in _arm_return().  It should only be used if all
1734    arguments are passed in registers.
1735
1736    The second, _interwork_r7_call_via_rN(), instead stores the return
1737    address at [r7, #-4].  It is the caller's responsibility to ensure
1738    that this address is valid and contains no useful data.
1739
1740    The third, _interwork_r11_call_via_rN(), works in the same way but
1741    uses r11 instead of r7.  It is useful if the caller does not really
1742    need a frame pointer.  */
1743
1744         .text
1745         .align 0
1746
1747         .code   32
1748         .globl _arm_return
1749 LSYM(Lstart_arm_return):
1750         cfi_start       LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1751         cfi_push        0, 0xe, -0x8, 0x8
1752         nop     @ This nop is for the benefit of debuggers, so that
1753                 @ backtraces will use the correct unwind information.
1754 _arm_return:
1755         RETLDM  unwind=LSYM(Lstart_arm_return)
1756         cfi_end LSYM(Lend_arm_return)
1757
1758         .globl _arm_return_r7
1759 _arm_return_r7:
1760         ldr     lr, [r7, #-4]
1761         bx      lr
1762
1763         .globl _arm_return_r11
1764 _arm_return_r11:
1765         ldr     lr, [r11, #-4]
1766         bx      lr
1767
1768 .macro interwork_with_frame frame, register, name, return
1769         .code   16
1770
1771         THUMB_FUNC_START \name
1772
1773         bx      pc
1774         nop
1775
1776         .code   32
1777         tst     \register, #1
1778         streq   lr, [\frame, #-4]
1779         adreq   lr, _arm_return_\frame
1780         bx      \register
1781
1782         SIZE    (\name)
1783 .endm
1784
1785 .macro interwork register
1786         .code   16
1787
1788         THUMB_FUNC_START _interwork_call_via_\register
1789
1790         bx      pc
1791         nop
1792
1793         .code   32
1794         .globl LSYM(Lchange_\register)
1795 LSYM(Lchange_\register):
1796         tst     \register, #1
1797         streq   lr, [sp, #-8]!
1798         adreq   lr, _arm_return
1799         bx      \register
1800
1801         SIZE    (_interwork_call_via_\register)
1802
1803         interwork_with_frame r7,\register,_interwork_r7_call_via_\register
1804         interwork_with_frame r11,\register,_interwork_r11_call_via_\register
1805 .endm
1806
1807         interwork r0
1808         interwork r1
1809         interwork r2
1810         interwork r3
1811         interwork r4
1812         interwork r5
1813         interwork r6
1814         interwork r7
1815         interwork r8
1816         interwork r9
1817         interwork sl
1818         interwork fp
1819         interwork ip
1820         interwork sp
1821
1822         /* The LR case has to be handled a little differently...  */
1823         .code 16
1824
1825         THUMB_FUNC_START _interwork_call_via_lr
1826
1827         bx      pc
1828         nop
1829
1830         .code 32
1831         .globl .Lchange_lr
1832 .Lchange_lr:
1833         tst     lr, #1
1834         stmeqdb r13!, {lr, pc}
1835         mov     ip, lr
1836         adreq   lr, _arm_return
1837         bx      ip
1838
1839         SIZE    (_interwork_call_via_lr)
1840
1841 #endif /* L_interwork_call_via_rX */
1842 #endif /* !__thumb2__ */
1843
1844 /* Functions to support compact pic switch tables in thumb1 state.
1845    All these routines take an index into the table in r0.  The
1846    table is at LR & ~1 (but this must be rounded up in the case
1847    of 32-bit entires).  They are only permitted to clobber r12
1848    and r14 and r0 must be preserved on exit.  */
1849 #ifdef L_thumb1_case_sqi
1850
1851         .text
1852         .align 0
1853         .force_thumb
1854         .syntax unified
1855         THUMB_FUNC_START __gnu_thumb1_case_sqi
1856         push    {r1}
1857         mov     r1, lr
1858         lsrs    r1, r1, #1
1859         lsls    r1, r1, #1
1860         ldrsb   r1, [r1, r0]
1861         lsls    r1, r1, #1
1862         add     lr, lr, r1
1863         pop     {r1}
1864         bx      lr
1865         SIZE (__gnu_thumb1_case_sqi)
1866 #endif
1867
1868 #ifdef L_thumb1_case_uqi
1869
1870         .text
1871         .align 0
1872         .force_thumb
1873         .syntax unified
1874         THUMB_FUNC_START __gnu_thumb1_case_uqi
1875         push    {r1}
1876         mov     r1, lr
1877         lsrs    r1, r1, #1
1878         lsls    r1, r1, #1
1879         ldrb    r1, [r1, r0]
1880         lsls    r1, r1, #1
1881         add     lr, lr, r1
1882         pop     {r1}
1883         bx      lr
1884         SIZE (__gnu_thumb1_case_uqi)
1885 #endif
1886
1887 #ifdef L_thumb1_case_shi
1888
1889         .text
1890         .align 0
1891         .force_thumb
1892         .syntax unified
1893         THUMB_FUNC_START __gnu_thumb1_case_shi
1894         push    {r0, r1}
1895         mov     r1, lr
1896         lsrs    r1, r1, #1
1897         lsls    r0, r0, #1
1898         lsls    r1, r1, #1
1899         ldrsh   r1, [r1, r0]
1900         lsls    r1, r1, #1
1901         add     lr, lr, r1
1902         pop     {r0, r1}
1903         bx      lr
1904         SIZE (__gnu_thumb1_case_shi)
1905 #endif
1906
1907 #ifdef L_thumb1_case_uhi
1908
1909         .text
1910         .align 0
1911         .force_thumb
1912         .syntax unified
1913         THUMB_FUNC_START __gnu_thumb1_case_uhi
1914         push    {r0, r1}
1915         mov     r1, lr
1916         lsrs    r1, r1, #1
1917         lsls    r0, r0, #1
1918         lsls    r1, r1, #1
1919         ldrh    r1, [r1, r0]
1920         lsls    r1, r1, #1
1921         add     lr, lr, r1
1922         pop     {r0, r1}
1923         bx      lr
1924         SIZE (__gnu_thumb1_case_uhi)
1925 #endif
1926
1927 #ifdef L_thumb1_case_si
1928
1929         .text
1930         .align 0
1931         .force_thumb
1932         .syntax unified
1933         THUMB_FUNC_START __gnu_thumb1_case_si
1934         push    {r0, r1}
1935         mov     r1, lr
1936         adds.n  r1, r1, #2      /* Align to word.  */
1937         lsrs    r1, r1, #2
1938         lsls    r0, r0, #2
1939         lsls    r1, r1, #2
1940         ldr     r0, [r1, r0]
1941         adds    r0, r0, r1
1942         mov     lr, r0
1943         pop     {r0, r1}
1944         mov     pc, lr          /* We know we were called from thumb code.  */
1945         SIZE (__gnu_thumb1_case_si)
1946 #endif
1947
1948 #endif /* Arch supports thumb.  */
1949
1950 #ifndef __symbian__
1951 #ifndef __ARM_ARCH_6M__
1952 #include "ieee754-df.S"
1953 #include "ieee754-sf.S"
1954 #include "bpabi.S"
1955 #else /* __ARM_ARCH_6M__ */
1956 #include "bpabi-v6m.S"
1957 #endif /* __ARM_ARCH_6M__ */
1958 #endif /* !__symbian__ */