libgcc/config/arm/lib1funcs.S

   1 @ libgcc routines for ARM cpu.
   2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
   3
   4 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008,
   5    2009, 2010 Free Software Foundation, Inc.
   6
   7 This file is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 This file is distributed in the hope that it will be useful, but
  13 WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 General Public License for more details.
  16
  17 Under Section 7 of GPL version 3, you are granted additional
  18 permissions described in the GCC Runtime Library Exception, version
  19 3.1, as published by the Free Software Foundation.
  20
  21 You should have received a copy of the GNU General Public License and
  22 a copy of the GCC Runtime Library Exception along with this program;
  23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24 <http://www.gnu.org/licenses/>.  */
  25
  26 /* An executable stack is *not* required for these functions.  */
  27 #if defined(__ELF__) && defined(__linux__)
  28 .section .note.GNU-stack,"",%progbits
  29 .previous
  30 #endif  /* __ELF__ and __linux__ */
  31
  32 #ifdef __ARM_EABI__
  33 /* Some attributes that are common to all routines in this file.  */
  34         /* Tag_ABI_align_needed: This code does not require 8-byte
  35            alignment from the caller.  */
  36         /* .eabi_attribute 24, 0  -- default setting.  */
  37         /* Tag_ABI_align_preserved: This code preserves 8-byte
  38            alignment in any callee.  */
  39         .eabi_attribute 25, 1
  40 #endif /* __ARM_EABI__ */
  41 /* ------------------------------------------------------------------------ */
  42
  43 /* We need to know what prefix to add to function names.  */
  44
  45 #ifndef __USER_LABEL_PREFIX__
  46 #error  __USER_LABEL_PREFIX__ not defined
  47 #endif
  48
  49 /* ANSI concatenation macros.  */
  50
  51 #define CONCAT1(a, b) CONCAT2(a, b)
  52 #define CONCAT2(a, b) a ## b
  53
  54 /* Use the right prefix for global labels.  */
  55
  56 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
  57
  58 #ifdef __ELF__
  59 #ifdef __thumb__
  60 #define __PLT__  /* Not supported in Thumb assembler (for now).  */
  61 #elif defined __vxworks && !defined __PIC__
  62 #define __PLT__ /* Not supported by the kernel loader.  */
  63 #else
  64 #define __PLT__ (PLT)
  65 #endif
  66 #define TYPE(x) .type SYM(x),function
  67 #define SIZE(x) .size SYM(x), . - SYM(x)
  68 #define LSYM(x) .x
  69 #else
  70 #define __PLT__
  71 #define TYPE(x)
  72 #define SIZE(x)
  73 #define LSYM(x) x
  74 #endif
  75
  76 /* Function end macros.  Variants for interworking.  */
  77
  78 #if defined(__ARM_ARCH_2__)
  79 # define __ARM_ARCH__ 2
  80 #endif
  81
  82 #if defined(__ARM_ARCH_3__)
  83 # define __ARM_ARCH__ 3
  84 #endif
  85
  86 #if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
  87         || defined(__ARM_ARCH_4T__)
  88 /* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
  89    long multiply instructions.  That includes v3M.  */
  90 # define __ARM_ARCH__ 4
  91 #endif
  92
  93 #if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
  94         || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
  95         || defined(__ARM_ARCH_5TEJ__)
  96 # define __ARM_ARCH__ 5
  97 #endif
  98
  99 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
 100         || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
 101         || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
 102         || defined(__ARM_ARCH_6M__)
 103 # define __ARM_ARCH__ 6
 104 #endif
 105
 106 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
 107         || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
 108         || defined(__ARM_ARCH_7EM__)
 109 # define __ARM_ARCH__ 7
 110 #endif
 111
 112 #ifndef __ARM_ARCH__
 113 #error Unable to determine architecture.
 114 #endif
 115
 116 /* There are times when we might prefer Thumb1 code even if ARM code is
 117    permitted, for example, the code might be smaller, or there might be
 118    interworking problems with switching to ARM state if interworking is
 119    disabled.  */
 120 #if (defined(__thumb__)                 \
 121      && !defined(__thumb2__)            \
 122      && (!defined(__THUMB_INTERWORK__)  \
 123          || defined (__OPTIMIZE_SIZE__) \
 124          || defined(__ARM_ARCH_6M__)))
 125 # define __prefer_thumb__
 126 #endif
 127
 128 /* How to return from a function call depends on the architecture variant.  */
 129
 130 #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
 131
 132 # define RET            bx      lr
 133 # define RETc(x)        bx##x   lr
 134
 135 /* Special precautions for interworking on armv4t.  */
 136 # if (__ARM_ARCH__ == 4)
 137
 138 /* Always use bx, not ldr pc.  */
 139 #  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
 140 #    define __INTERWORKING__
 141 #   endif /* __THUMB__ || __THUMB_INTERWORK__ */
 142
 143 /* Include thumb stub before arm mode code.  */
 144 #  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
 145 #   define __INTERWORKING_STUBS__
 146 #  endif /* __thumb__ && !__THUMB_INTERWORK__ */
 147
 148 #endif /* __ARM_ARCH == 4 */
 149
 150 #else
 151
 152 # define RET            mov     pc, lr
 153 # define RETc(x)        mov##x  pc, lr
 154
 155 #endif
 156
 157 .macro  cfi_pop         advance, reg, cfa_offset
 158 #ifdef __ELF__
 159         .pushsection    .debug_frame
 160         .byte   0x4             /* DW_CFA_advance_loc4 */
 161         .4byte  \advance
 162         .byte   (0xc0 | \reg)   /* DW_CFA_restore */
 163         .byte   0xe             /* DW_CFA_def_cfa_offset */
 164         .uleb128 \cfa_offset
 165         .popsection
 166 #endif
 167 .endm
 168 .macro  cfi_push        advance, reg, offset, cfa_offset
 169 #ifdef __ELF__
 170         .pushsection    .debug_frame
 171         .byte   0x4             /* DW_CFA_advance_loc4 */
 172         .4byte  \advance
 173         .byte   (0x80 | \reg)   /* DW_CFA_offset */
 174         .uleb128 (\offset / -4)
 175         .byte   0xe             /* DW_CFA_def_cfa_offset */
 176         .uleb128 \cfa_offset
 177         .popsection
 178 #endif
 179 .endm
 180 .macro cfi_start        start_label, end_label
 181 #ifdef __ELF__
 182         .pushsection    .debug_frame
 183 LSYM(Lstart_frame):
 184         .4byte  LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
 185 LSYM(Lstart_cie):
 186         .4byte  0xffffffff      @ CIE Identifier Tag
 187         .byte   0x1     @ CIE Version
 188         .ascii  "\0"    @ CIE Augmentation
 189         .uleb128 0x1    @ CIE Code Alignment Factor
 190         .sleb128 -4     @ CIE Data Alignment Factor
 191         .byte   0xe     @ CIE RA Column
 192         .byte   0xc     @ DW_CFA_def_cfa
 193         .uleb128 0xd
 194         .uleb128 0x0
 195
 196         .align 2
 197 LSYM(Lend_cie):
 198         .4byte  LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
 199 LSYM(Lstart_fde):
 200         .4byte  LSYM(Lstart_frame)      @ FDE CIE offset
 201         .4byte  \start_label    @ FDE initial location
 202         .4byte  \end_label-\start_label @ FDE address range
 203         .popsection
 204 #endif
 205 .endm
 206 .macro cfi_end  end_label
 207 #ifdef __ELF__
 208         .pushsection    .debug_frame
 209         .align  2
 210 LSYM(Lend_fde):
 211         .popsection
 212 \end_label:
 213 #endif
 214 .endm
 215
 216 /* Don't pass dirn, it's there just to get token pasting right.  */
 217
 218 .macro  RETLDM  regs=, cond=, unwind=, dirn=ia
 219 #if defined (__INTERWORKING__)
 220         .ifc "\regs",""
 221         ldr\cond        lr, [sp], #8
 222         .else
 223 # if defined(__thumb2__)
 224         pop\cond        {\regs, lr}
 225 # else
 226         ldm\cond\dirn   sp!, {\regs, lr}
 227 # endif
 228         .endif
 229         .ifnc "\unwind", ""
 230         /* Mark LR as restored.  */
 231 97:     cfi_pop 97b - \unwind, 0xe, 0x0
 232         .endif
 233         bx\cond lr
 234 #else
 235         /* Caller is responsible for providing IT instruction.  */
 236         .ifc "\regs",""
 237         ldr\cond        pc, [sp], #8
 238         .else
 239 # if defined(__thumb2__)
 240         pop\cond        {\regs, pc}
 241 # else
 242         ldm\cond\dirn   sp!, {\regs, pc}
 243 # endif
 244         .endif
 245 #endif
 246 .endm
 247
 248 /* The Unified assembly syntax allows the same code to be assembled for both
 249    ARM and Thumb-2.  However this is only supported by recent gas, so define
 250    a set of macros to allow ARM code on older assemblers.  */
 251 #if defined(__thumb2__)
 252 .macro do_it cond, suffix=""
 253         it\suffix       \cond
 254 .endm
 255 .macro shift1 op, arg0, arg1, arg2
 256         \op     \arg0, \arg1, \arg2
 257 .endm
 258 #define do_push push
 259 #define do_pop  pop
 260 #define COND(op1, op2, cond) op1 ## op2 ## cond
 261 /* Perform an arithmetic operation with a variable shift operand.  This
 262    requires two instructions and a scratch register on Thumb-2.  */
 263 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
 264         \shiftop \tmp, \src2, \shiftreg
 265         \name \dest, \src1, \tmp
 266 .endm
 267 #else
 268 .macro do_it cond, suffix=""
 269 .endm
 270 .macro shift1 op, arg0, arg1, arg2
 271         mov     \arg0, \arg1, \op \arg2
 272 .endm
 273 #define do_push stmfd sp!,
 274 #define do_pop  ldmfd sp!,
 275 #define COND(op1, op2, cond) op1 ## cond ## op2
 276 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
 277         \name \dest, \src1, \src2, \shiftop \shiftreg
 278 .endm
 279 #endif
 280
 281 #ifdef __ARM_EABI__
 282 .macro ARM_LDIV0 name signed
 283         cmp     r0, #0
 284         .ifc    \signed, unsigned
 285         movne   r0, #0xffffffff
 286         .else
 287         movgt   r0, #0x7fffffff
 288         movlt   r0, #0x80000000
 289         .endif
 290         b       SYM (__aeabi_idiv0) __PLT__
 291 .endm
 292 #else
 293 .macro ARM_LDIV0 name signed
 294         str     lr, [sp, #-8]!
 295 98:     cfi_push 98b - __\name, 0xe, -0x8, 0x8
 296         bl      SYM (__div0) __PLT__
 297         mov     r0, #0                  @ About as wrong as it could be.
 298         RETLDM  unwind=98b
 299 .endm
 300 #endif
 301
 302
 303 #ifdef __ARM_EABI__
 304 .macro THUMB_LDIV0 name signed
 305 #if defined(__ARM_ARCH_6M__)
 306         .ifc \signed, unsigned
 307         cmp     r0, #0
 308         beq     1f
 309         mov     r0, #0
 310         mvn     r0, r0          @ 0xffffffff
 311 1:
 312         .else
 313         cmp     r0, #0
 314         beq     2f
 315         blt     3f
 316         mov     r0, #0
 317         mvn     r0, r0
 318         lsr     r0, r0, #1      @ 0x7fffffff
 319         b       2f
 320 3:      mov     r0, #0x80
 321         lsl     r0, r0, #24     @ 0x80000000
 322 2:
 323         .endif
 324         push    {r0, r1, r2}
 325         ldr     r0, 4f
 326         adr     r1, 4f
 327         add     r0, r1
 328         str     r0, [sp, #8]
 329         @ We know we are not on armv4t, so pop pc is safe.
 330         pop     {r0, r1, pc}
 331         .align  2
 332 4:
 333         .word   __aeabi_idiv0 - 4b
 334 #elif defined(__thumb2__)
 335         .syntax unified
 336         .ifc \signed, unsigned
 337         cbz     r0, 1f
 338         mov     r0, #0xffffffff
 339 1:
 340         .else
 341         cmp     r0, #0
 342         do_it   gt
 343         movgt   r0, #0x7fffffff
 344         do_it   lt
 345         movlt   r0, #0x80000000
 346         .endif
 347         b.w     SYM(__aeabi_idiv0) __PLT__
 348 #else
 349         .align  2
 350         bx      pc
 351         nop
 352         .arm
 353         cmp     r0, #0
 354         .ifc    \signed, unsigned
 355         movne   r0, #0xffffffff
 356         .else
 357         movgt   r0, #0x7fffffff
 358         movlt   r0, #0x80000000
 359         .endif
 360         b       SYM(__aeabi_idiv0) __PLT__
 361         .thumb
 362 #endif
 363 .endm
 364 #else
 365 .macro THUMB_LDIV0 name signed
 366         push    { r1, lr }
 367 98:     cfi_push 98b - __\name, 0xe, -0x4, 0x8
 368         bl      SYM (__div0)
 369         mov     r0, #0                  @ About as wrong as it could be.
 370 #if defined (__INTERWORKING__)
 371         pop     { r1, r2 }
 372         bx      r2
 373 #else
 374         pop     { r1, pc }
 375 #endif
 376 .endm
 377 #endif
 378
 379 .macro FUNC_END name
 380         SIZE (__\name)
 381 .endm
 382
 383 .macro DIV_FUNC_END name signed
 384         cfi_start       __\name, LSYM(Lend_div0)
 385 LSYM(Ldiv0):
 386 #ifdef __thumb__
 387         THUMB_LDIV0 \name \signed
 388 #else
 389         ARM_LDIV0 \name \signed
 390 #endif
 391         cfi_end LSYM(Lend_div0)
 392         FUNC_END \name
 393 .endm
 394
 395 .macro THUMB_FUNC_START name
 396         .globl  SYM (\name)
 397         TYPE    (\name)
 398         .thumb_func
 399 SYM (\name):
 400 .endm
 401
 402 /* Function start macros.  Variants for ARM and Thumb.  */
 403
 404 #ifdef __thumb__
 405 #define THUMB_FUNC .thumb_func
 406 #define THUMB_CODE .force_thumb
 407 # if defined(__thumb2__)
 408 #define THUMB_SYNTAX .syntax divided
 409 # else
 410 #define THUMB_SYNTAX
 411 # endif
 412 #else
 413 #define THUMB_FUNC
 414 #define THUMB_CODE
 415 #define THUMB_SYNTAX
 416 #endif
 417
 418 .macro FUNC_START name
 419         .text
 420         .globl SYM (__\name)
 421         TYPE (__\name)
 422         .align 0
 423         THUMB_CODE
 424         THUMB_FUNC
 425         THUMB_SYNTAX
 426 SYM (__\name):
 427 .endm
 428
 429 /* Special function that will always be coded in ARM assembly, even if
 430    in Thumb-only compilation.  */
 431
 432 #if defined(__thumb2__)
 433
 434 /* For Thumb-2 we build everything in thumb mode.  */
 435 .macro ARM_FUNC_START name
 436        FUNC_START \name
 437        .syntax unified
 438 .endm
 439 #define EQUIV .thumb_set
 440 .macro  ARM_CALL name
 441         bl      __\name
 442 .endm
 443
 444 #elif defined(__INTERWORKING_STUBS__)
 445
 446 .macro  ARM_FUNC_START name
 447         FUNC_START \name
 448         bx      pc
 449         nop
 450         .arm
 451 /* A hook to tell gdb that we've switched to ARM mode.  Also used to call
 452    directly from other local arm routines.  */
 453 _L__\name:
 454 .endm
 455 #define EQUIV .thumb_set
 456 /* Branch directly to a function declared with ARM_FUNC_START.
 457    Must be called in arm mode.  */
 458 .macro  ARM_CALL name
 459         bl      _L__\name
 460 .endm
 461
 462 #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
 463
 464 #ifdef __ARM_ARCH_6M__
 465 #define EQUIV .thumb_set
 466 #else
 467 .macro  ARM_FUNC_START name
 468         .text
 469         .globl SYM (__\name)
 470         TYPE (__\name)
 471         .align 0
 472         .arm
 473 SYM (__\name):
 474 .endm
 475 #define EQUIV .set
 476 .macro  ARM_CALL name
 477         bl      __\name
 478 .endm
 479 #endif
 480
 481 #endif
 482
 483 .macro  FUNC_ALIAS new old
 484         .globl  SYM (__\new)
 485 #if defined (__thumb__)
 486         .thumb_set      SYM (__\new), SYM (__\old)
 487 #else
 488         .set    SYM (__\new), SYM (__\old)
 489 #endif
 490 .endm
 491
 492 #ifndef __ARM_ARCH_6M__
 493 .macro  ARM_FUNC_ALIAS new old
 494         .globl  SYM (__\new)
 495         EQUIV   SYM (__\new), SYM (__\old)
 496 #if defined(__INTERWORKING_STUBS__)
 497         .set    SYM (_L__\new), SYM (_L__\old)
 498 #endif
 499 .endm
 500 #endif
 501
 502 #ifdef __ARMEB__
 503 #define xxh r0
 504 #define xxl r1
 505 #define yyh r2
 506 #define yyl r3
 507 #else
 508 #define xxh r1
 509 #define xxl r0
 510 #define yyh r3
 511 #define yyl r2
 512 #endif
 513
 514 #ifdef __ARM_EABI__
 515 .macro  WEAK name
 516         .weak SYM (__\name)
 517 .endm
 518 #endif
 519
 520 #ifdef __thumb__
 521 /* Register aliases.  */
 522
 523 work            .req    r4      @ XXXX is this safe ?
 524 dividend        .req    r0
 525 divisor         .req    r1
 526 overdone        .req    r2
 527 result          .req    r2
 528 curbit          .req    r3
 529 #endif
 530 #if 0
 531 ip              .req    r12
 532 sp              .req    r13
 533 lr              .req    r14
 534 pc              .req    r15
 535 #endif
 536
 537 /* ------------------------------------------------------------------------ */
 538 /*              Bodies of the division and modulo routines.                 */
 539 /* ------------------------------------------------------------------------ */
 540 .macro ARM_DIV_BODY dividend, divisor, result, curbit
 541
 542 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
 543
 544 #if defined (__thumb2__)
 545         clz     \curbit, \dividend
 546         clz     \result, \divisor
 547         sub     \curbit, \result, \curbit
 548         rsb     \curbit, \curbit, #31
 549         adr     \result, 1f
 550         add     \curbit, \result, \curbit, lsl #4
 551         mov     \result, #0
 552         mov     pc, \curbit
 553 .p2align 3
 554 1:
 555         .set    shift, 32
 556         .rept   32
 557         .set    shift, shift - 1
 558         cmp.w   \dividend, \divisor, lsl #shift
 559         nop.n
 560         adc.w   \result, \result, \result
 561         it      cs
 562         subcs.w \dividend, \dividend, \divisor, lsl #shift
 563         .endr
 564 #else
 565         clz     \curbit, \dividend
 566         clz     \result, \divisor
 567         sub     \curbit, \result, \curbit
 568         rsbs    \curbit, \curbit, #31
 569         addne   \curbit, \curbit, \curbit, lsl #1
 570         mov     \result, #0
 571         addne   pc, pc, \curbit, lsl #2
 572         nop
 573         .set    shift, 32
 574         .rept   32
 575         .set    shift, shift - 1
 576         cmp     \dividend, \divisor, lsl #shift
 577         adc     \result, \result, \result
 578         subcs   \dividend, \dividend, \divisor, lsl #shift
 579         .endr
 580 #endif
 581
 582 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 583 #if __ARM_ARCH__ >= 5
 584
 585         clz     \curbit, \divisor
 586         clz     \result, \dividend
 587         sub     \result, \curbit, \result
 588         mov     \curbit, #1
 589         mov     \divisor, \divisor, lsl \result
 590         mov     \curbit, \curbit, lsl \result
 591         mov     \result, #0
 592
 593 #else /* __ARM_ARCH__ < 5 */
 594
 595         @ Initially shift the divisor left 3 bits if possible,
 596         @ set curbit accordingly.  This allows for curbit to be located
 597         @ at the left end of each 4-bit nibbles in the division loop
 598         @ to save one loop in most cases.
 599         tst     \divisor, #0xe0000000
 600         moveq   \divisor, \divisor, lsl #3
 601         moveq   \curbit, #8
 602         movne   \curbit, #1
 603
 604         @ Unless the divisor is very big, shift it up in multiples of
 605         @ four bits, since this is the amount of unwinding in the main
 606         @ division loop.  Continue shifting until the divisor is
 607         @ larger than the dividend.
 608 1:      cmp     \divisor, #0x10000000
 609         cmplo   \divisor, \dividend
 610         movlo   \divisor, \divisor, lsl #4
 611         movlo   \curbit, \curbit, lsl #4
 612         blo     1b
 613
 614         @ For very big divisors, we must shift it a bit at a time, or
 615         @ we will be in danger of overflowing.
 616 1:      cmp     \divisor, #0x80000000
 617         cmplo   \divisor, \dividend
 618         movlo   \divisor, \divisor, lsl #1
 619         movlo   \curbit, \curbit, lsl #1
 620         blo     1b
 621
 622         mov     \result, #0
 623
 624 #endif /* __ARM_ARCH__ < 5 */
 625
 626         @ Division loop
 627 1:      cmp     \dividend, \divisor
 628         do_it   hs, t
 629         subhs   \dividend, \dividend, \divisor
 630         orrhs   \result,   \result,   \curbit
 631         cmp     \dividend, \divisor,  lsr #1
 632         do_it   hs, t
 633         subhs   \dividend, \dividend, \divisor, lsr #1
 634         orrhs   \result,   \result,   \curbit,  lsr #1
 635         cmp     \dividend, \divisor,  lsr #2
 636         do_it   hs, t
 637         subhs   \dividend, \dividend, \divisor, lsr #2
 638         orrhs   \result,   \result,   \curbit,  lsr #2
 639         cmp     \dividend, \divisor,  lsr #3
 640         do_it   hs, t
 641         subhs   \dividend, \dividend, \divisor, lsr #3
 642         orrhs   \result,   \result,   \curbit,  lsr #3
 643         cmp     \dividend, #0                   @ Early termination?
 644         do_it   ne, t
 645         movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
 646         movne   \divisor,  \divisor, lsr #4
 647         bne     1b
 648
 649 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 650
 651 .endm
 652 /* ------------------------------------------------------------------------ */
 653 .macro ARM_DIV2_ORDER divisor, order
 654
 655 #if __ARM_ARCH__ >= 5
 656
 657         clz     \order, \divisor
 658         rsb     \order, \order, #31
 659
 660 #else
 661
 662         cmp     \divisor, #(1 << 16)
 663         movhs   \divisor, \divisor, lsr #16
 664         movhs   \order, #16
 665         movlo   \order, #0
 666
 667         cmp     \divisor, #(1 << 8)
 668         movhs   \divisor, \divisor, lsr #8
 669         addhs   \order, \order, #8
 670
 671         cmp     \divisor, #(1 << 4)
 672         movhs   \divisor, \divisor, lsr #4
 673         addhs   \order, \order, #4
 674
 675         cmp     \divisor, #(1 << 2)
 676         addhi   \order, \order, #3
 677         addls   \order, \order, \divisor, lsr #1
 678
 679 #endif
 680
 681 .endm
 682 /* ------------------------------------------------------------------------ */
 683 .macro ARM_MOD_BODY dividend, divisor, order, spare
 684
 685 #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
 686
 687         clz     \order, \divisor
 688         clz     \spare, \dividend
 689         sub     \order, \order, \spare
 690         rsbs    \order, \order, #31
 691         addne   pc, pc, \order, lsl #3
 692         nop
 693         .set    shift, 32
 694         .rept   32
 695         .set    shift, shift - 1
 696         cmp     \dividend, \divisor, lsl #shift
 697         subcs   \dividend, \dividend, \divisor, lsl #shift
 698         .endr
 699
 700 #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 701 #if __ARM_ARCH__ >= 5
 702
 703         clz     \order, \divisor
 704         clz     \spare, \dividend
 705         sub     \order, \order, \spare
 706         mov     \divisor, \divisor, lsl \order
 707
 708 #else /* __ARM_ARCH__ < 5 */
 709
 710         mov     \order, #0
 711
 712         @ Unless the divisor is very big, shift it up in multiples of
 713         @ four bits, since this is the amount of unwinding in the main
 714         @ division loop.  Continue shifting until the divisor is
 715         @ larger than the dividend.
 716 1:      cmp     \divisor, #0x10000000
 717         cmplo   \divisor, \dividend
 718         movlo   \divisor, \divisor, lsl #4
 719         addlo   \order, \order, #4
 720         blo     1b
 721
 722         @ For very big divisors, we must shift it a bit at a time, or
 723         @ we will be in danger of overflowing.
 724 1:      cmp     \divisor, #0x80000000
 725         cmplo   \divisor, \dividend
 726         movlo   \divisor, \divisor, lsl #1
 727         addlo   \order, \order, #1
 728         blo     1b
 729
 730 #endif /* __ARM_ARCH__ < 5 */
 731
 732         @ Perform all needed substractions to keep only the reminder.
 733         @ Do comparisons in batch of 4 first.
 734         subs    \order, \order, #3              @ yes, 3 is intended here
 735         blt     2f
 736
 737 1:      cmp     \dividend, \divisor
 738         subhs   \dividend, \dividend, \divisor
 739         cmp     \dividend, \divisor,  lsr #1
 740         subhs   \dividend, \dividend, \divisor, lsr #1
 741         cmp     \dividend, \divisor,  lsr #2
 742         subhs   \dividend, \dividend, \divisor, lsr #2
 743         cmp     \dividend, \divisor,  lsr #3
 744         subhs   \dividend, \dividend, \divisor, lsr #3
 745         cmp     \dividend, #1
 746         mov     \divisor, \divisor, lsr #4
 747         subges  \order, \order, #4
 748         bge     1b
 749
 750         tst     \order, #3
 751         teqne   \dividend, #0
 752         beq     5f
 753
 754         @ Either 1, 2 or 3 comparison/substractions are left.
 755 2:      cmn     \order, #2
 756         blt     4f
 757         beq     3f
 758         cmp     \dividend, \divisor
 759         subhs   \dividend, \dividend, \divisor
 760         mov     \divisor,  \divisor,  lsr #1
 761 3:      cmp     \dividend, \divisor
 762         subhs   \dividend, \dividend, \divisor
 763         mov     \divisor,  \divisor,  lsr #1
 764 4:      cmp     \dividend, \divisor
 765         subhs   \dividend, \dividend, \divisor
 766 5:
 767
 768 #endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
 769
 770 .endm
 771 /* ------------------------------------------------------------------------ */
 772 .macro THUMB_DIV_MOD_BODY modulo
 773         @ Load the constant 0x10000000 into our work register.
 774         mov     work, #1
 775         lsl     work, #28
 776 LSYM(Loop1):
 777         @ Unless the divisor is very big, shift it up in multiples of
 778         @ four bits, since this is the amount of unwinding in the main
 779         @ division loop.  Continue shifting until the divisor is
 780         @ larger than the dividend.
 781         cmp     divisor, work
 782         bhs     LSYM(Lbignum)
 783         cmp     divisor, dividend
 784         bhs     LSYM(Lbignum)
 785         lsl     divisor, #4
 786         lsl     curbit,  #4
 787         b       LSYM(Loop1)
 788 LSYM(Lbignum):
 789         @ Set work to 0x80000000
 790         lsl     work, #3
 791 LSYM(Loop2):
 792         @ For very big divisors, we must shift it a bit at a time, or
 793         @ we will be in danger of overflowing.
 794         cmp     divisor, work
 795         bhs     LSYM(Loop3)
 796         cmp     divisor, dividend
 797         bhs     LSYM(Loop3)
 798         lsl     divisor, #1
 799         lsl     curbit,  #1
 800         b       LSYM(Loop2)
 801 LSYM(Loop3):
 802         @ Test for possible subtractions ...
 803   .if \modulo
 804         @ ... On the final pass, this may subtract too much from the dividend,
 805         @ so keep track of which subtractions are done, we can fix them up
 806         @ afterwards.
 807         mov     overdone, #0
 808         cmp     dividend, divisor
 809         blo     LSYM(Lover1)
 810         sub     dividend, dividend, divisor
 811 LSYM(Lover1):
 812         lsr     work, divisor, #1
 813         cmp     dividend, work
 814         blo     LSYM(Lover2)
 815         sub     dividend, dividend, work
 816         mov     ip, curbit
 817         mov     work, #1
 818         ror     curbit, work
 819         orr     overdone, curbit
 820         mov     curbit, ip
 821 LSYM(Lover2):
 822         lsr     work, divisor, #2
 823         cmp     dividend, work
 824         blo     LSYM(Lover3)
 825         sub     dividend, dividend, work
 826         mov     ip, curbit
 827         mov     work, #2
 828         ror     curbit, work
 829         orr     overdone, curbit
 830         mov     curbit, ip
 831 LSYM(Lover3):
 832         lsr     work, divisor, #3
 833         cmp     dividend, work
 834         blo     LSYM(Lover4)
 835         sub     dividend, dividend, work
 836         mov     ip, curbit
 837         mov     work, #3
 838         ror     curbit, work
 839         orr     overdone, curbit
 840         mov     curbit, ip
 841 LSYM(Lover4):
 842         mov     ip, curbit
 843   .else
 844         @ ... and note which bits are done in the result.  On the final pass,
 845         @ this may subtract too much from the dividend, but the result will be ok,
 846         @ since the "bit" will have been shifted out at the bottom.
 847         cmp     dividend, divisor
 848         blo     LSYM(Lover1)
 849         sub     dividend, dividend, divisor
 850         orr     result, result, curbit
 851 LSYM(Lover1):
 852         lsr     work, divisor, #1
 853         cmp     dividend, work
 854         blo     LSYM(Lover2)
 855         sub     dividend, dividend, work
 856         lsr     work, curbit, #1
 857         orr     result, work
 858 LSYM(Lover2):
 859         lsr     work, divisor, #2
 860         cmp     dividend, work
 861         blo     LSYM(Lover3)
 862         sub     dividend, dividend, work
 863         lsr     work, curbit, #2
 864         orr     result, work
 865 LSYM(Lover3):
 866         lsr     work, divisor, #3
 867         cmp     dividend, work
 868         blo     LSYM(Lover4)
 869         sub     dividend, dividend, work
 870         lsr     work, curbit, #3
 871         orr     result, work
 872 LSYM(Lover4):
 873   .endif
 874
 875         cmp     dividend, #0                    @ Early termination?
 876         beq     LSYM(Lover5)
 877         lsr     curbit,  #4                     @ No, any more bits to do?
 878         beq     LSYM(Lover5)
 879         lsr     divisor, #4
 880         b       LSYM(Loop3)
 881 LSYM(Lover5):
 882   .if \modulo
 883         @ Any subtractions that we should not have done will be recorded in
 884         @ the top three bits of "overdone".  Exactly which were not needed
 885         @ are governed by the position of the bit, stored in ip.
 886         mov     work, #0xe
 887         lsl     work, #28
 888         and     overdone, work
 889         beq     LSYM(Lgot_result)
 890
 891         @ If we terminated early, because dividend became zero, then the
 892         @ bit in ip will not be in the bottom nibble, and we should not
 893         @ perform the additions below.  We must test for this though
 894         @ (rather relying upon the TSTs to prevent the additions) since
 895         @ the bit in ip could be in the top two bits which might then match
 896         @ with one of the smaller RORs.
 897         mov     curbit, ip
 898         mov     work, #0x7
 899         tst     curbit, work
 900         beq     LSYM(Lgot_result)
 901
 902         mov     curbit, ip
 903         mov     work, #3
 904         ror     curbit, work
 905         tst     overdone, curbit
 906         beq     LSYM(Lover6)
 907         lsr     work, divisor, #3
 908         add     dividend, work
 909 LSYM(Lover6):
 910         mov     curbit, ip
 911         mov     work, #2
 912         ror     curbit, work
 913         tst     overdone, curbit
 914         beq     LSYM(Lover7)
 915         lsr     work, divisor, #2
 916         add     dividend, work
 917 LSYM(Lover7):
 918         mov     curbit, ip
 919         mov     work, #1
 920         ror     curbit, work
 921         tst     overdone, curbit
 922         beq     LSYM(Lgot_result)
 923         lsr     work, divisor, #1
 924         add     dividend, work
 925   .endif
 926 LSYM(Lgot_result):
 927 .endm
 928 /* ------------------------------------------------------------------------ */
 929 /*              Start of the Real Functions                                 */
 930 /* ------------------------------------------------------------------------ */
 931 #ifdef L_udivsi3
 932
 933 #if defined(__prefer_thumb__)
 934
 935         FUNC_START udivsi3
 936         FUNC_ALIAS aeabi_uidiv udivsi3
 937
 938         cmp     divisor, #0
 939         beq     LSYM(Ldiv0)
 940 LSYM(udivsi3_skip_div0_test):
 941         mov     curbit, #1
 942         mov     result, #0
 943
 944         push    { work }
 945         cmp     dividend, divisor
 946         blo     LSYM(Lgot_result)
 947
 948         THUMB_DIV_MOD_BODY 0
 949
 950         mov     r0, result
 951         pop     { work }
 952         RET
 953
 954 #elif defined(__ARM_ARCH_EXT_IDIV__)
 955
 956         ARM_FUNC_START udivsi3
 957         ARM_FUNC_ALIAS aeabi_uidiv udivsi3
 958
 959         cmp     r1, #0
 960         beq     LSYM(Ldiv0)
 961
 962         udiv    r0, r0, r1
 963         RET
 964
 965 #else /* ARM version/Thumb-2.  */
 966
 967         ARM_FUNC_START udivsi3
 968         ARM_FUNC_ALIAS aeabi_uidiv udivsi3
 969
 970         /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
 971            check for division-by-zero a second time.  */
 972 LSYM(udivsi3_skip_div0_test):
 973         subs    r2, r1, #1
 974         do_it   eq
 975         RETc(eq)
 976         bcc     LSYM(Ldiv0)
 977         cmp     r0, r1
 978         bls     11f
 979         tst     r1, r2
 980         beq     12f
 981
 982         ARM_DIV_BODY r0, r1, r2, r3
 983
 984         mov     r0, r2
 985         RET
 986
 987 11:     do_it   eq, e
 988         moveq   r0, #1
 989         movne   r0, #0
 990         RET
 991
 992 12:     ARM_DIV2_ORDER r1, r2
 993
 994         mov     r0, r0, lsr r2
 995         RET
 996
 997 #endif /* ARM version */
 998
 999         DIV_FUNC_END udivsi3 unsigned
1000
1001 #if defined(__prefer_thumb__)
1002 FUNC_START aeabi_uidivmod
1003         cmp     r1, #0
1004         beq     LSYM(Ldiv0)
1005         push    {r0, r1, lr}
1006         bl      LSYM(udivsi3_skip_div0_test)
1007         POP     {r1, r2, r3}
1008         mul     r2, r0
1009         sub     r1, r1, r2
1010         bx      r3
1011 #elif defined(__ARM_ARCH_EXT_IDIV__)
1012 ARM_FUNC_START aeabi_uidivmod
1013         cmp     r1, #0
1014         beq     LSYM(Ldiv0)
1015         mov     r2, r0
1016         udiv    r0, r0, r1
1017         mls     r1, r0, r1, r2
1018         RET
1019 #else
1020 ARM_FUNC_START aeabi_uidivmod
1021         cmp     r1, #0
1022         beq     LSYM(Ldiv0)
1023         stmfd   sp!, { r0, r1, lr }
1024         bl      LSYM(udivsi3_skip_div0_test)
1025         ldmfd   sp!, { r1, r2, lr }
1026         mul     r3, r2, r0
1027         sub     r1, r1, r3
1028         RET
1029 #endif
1030         FUNC_END aeabi_uidivmod
1031
1032 #endif /* L_udivsi3 */
1033 /* ------------------------------------------------------------------------ */
1034 #ifdef L_umodsi3
1035
1036 #ifdef __ARM_ARCH_EXT_IDIV__
1037
1038         ARM_FUNC_START umodsi3
1039
1040         cmp     r1, #0
1041         beq     LSYM(Ldiv0)
1042         udiv    r2, r0, r1
1043         mls     r0, r1, r2, r0
1044         RET
1045
1046 #elif defined(__thumb__)
1047
1048         FUNC_START umodsi3
1049
1050         cmp     divisor, #0
1051         beq     LSYM(Ldiv0)
1052         mov     curbit, #1
1053         cmp     dividend, divisor
1054         bhs     LSYM(Lover10)
1055         RET
1056
1057 LSYM(Lover10):
1058         push    { work }
1059
1060         THUMB_DIV_MOD_BODY 1
1061
1062         pop     { work }
1063         RET
1064
1065 #else  /* ARM version.  */
1066
1067         FUNC_START umodsi3
1068
1069         subs    r2, r1, #1                      @ compare divisor with 1
1070         bcc     LSYM(Ldiv0)
1071         cmpne   r0, r1                          @ compare dividend with divisor
1072         moveq   r0, #0
1073         tsthi   r1, r2                          @ see if divisor is power of 2
1074         andeq   r0, r0, r2
1075         RETc(ls)
1076
1077         ARM_MOD_BODY r0, r1, r2, r3
1078
1079         RET
1080
1081 #endif /* ARM version.  */
1082
1083         DIV_FUNC_END umodsi3 unsigned
1084
1085 #endif /* L_umodsi3 */
1086 /* ------------------------------------------------------------------------ */
1087 #ifdef L_divsi3
1088
1089 #if defined(__prefer_thumb__)
1090
1091         FUNC_START divsi3
1092         FUNC_ALIAS aeabi_idiv divsi3
1093
1094         cmp     divisor, #0
1095         beq     LSYM(Ldiv0)
1096 LSYM(divsi3_skip_div0_test):
1097         push    { work }
1098         mov     work, dividend
1099         eor     work, divisor           @ Save the sign of the result.
1100         mov     ip, work
1101         mov     curbit, #1
1102         mov     result, #0
1103         cmp     divisor, #0
1104         bpl     LSYM(Lover10)
1105         neg     divisor, divisor        @ Loops below use unsigned.
1106 LSYM(Lover10):
1107         cmp     dividend, #0
1108         bpl     LSYM(Lover11)
1109         neg     dividend, dividend
1110 LSYM(Lover11):
1111         cmp     dividend, divisor
1112         blo     LSYM(Lgot_result)
1113
1114         THUMB_DIV_MOD_BODY 0
1115
1116         mov     r0, result
1117         mov     work, ip
1118         cmp     work, #0
1119         bpl     LSYM(Lover12)
1120         neg     r0, r0
1121 LSYM(Lover12):
1122         pop     { work }
1123         RET
1124
1125 #elif defined(__ARM_ARCH_EXT_IDIV__)
1126
1127         ARM_FUNC_START divsi3
1128         ARM_FUNC_ALIAS aeabi_idiv divsi3
1129
1130         cmp     r1, #0
1131         beq     LSYM(Ldiv0)
1132         sdiv    r0, r0, r1
1133         RET
1134
1135 #else /* ARM/Thumb-2 version.  */
1136
1137         ARM_FUNC_START divsi3
1138         ARM_FUNC_ALIAS aeabi_idiv divsi3
1139
1140         cmp     r1, #0
1141         beq     LSYM(Ldiv0)
1142 LSYM(divsi3_skip_div0_test):
1143         eor     ip, r0, r1                      @ save the sign of the result.
1144         do_it   mi
1145         rsbmi   r1, r1, #0                      @ loops below use unsigned.
1146         subs    r2, r1, #1                      @ division by 1 or -1 ?
1147         beq     10f
1148         movs    r3, r0
1149         do_it   mi
1150         rsbmi   r3, r0, #0                      @ positive dividend value
1151         cmp     r3, r1
1152         bls     11f
1153         tst     r1, r2                          @ divisor is power of 2 ?
1154         beq     12f
1155
1156         ARM_DIV_BODY r3, r1, r0, r2
1157
1158         cmp     ip, #0
1159         do_it   mi
1160         rsbmi   r0, r0, #0
1161         RET
1162
1163 10:     teq     ip, r0                          @ same sign ?
1164         do_it   mi
1165         rsbmi   r0, r0, #0
1166         RET
1167
1168 11:     do_it   lo
1169         movlo   r0, #0
1170         do_it   eq,t
1171         moveq   r0, ip, asr #31
1172         orreq   r0, r0, #1
1173         RET
1174
1175 12:     ARM_DIV2_ORDER r1, r2
1176
1177         cmp     ip, #0
1178         mov     r0, r3, lsr r2
1179         do_it   mi
1180         rsbmi   r0, r0, #0
1181         RET
1182
1183 #endif /* ARM version */
1184
1185         DIV_FUNC_END divsi3 signed
1186
1187 #if defined(__prefer_thumb__)
1188 FUNC_START aeabi_idivmod
1189         cmp     r1, #0
1190         beq     LSYM(Ldiv0)
1191         push    {r0, r1, lr}
1192         bl      LSYM(divsi3_skip_div0_test)
1193         POP     {r1, r2, r3}
1194         mul     r2, r0
1195         sub     r1, r1, r2
1196         bx      r3
1197 #elif defined(__ARM_ARCH_EXT_IDIV__)
1198 ARM_FUNC_START aeabi_idivmod
1199         cmp     r1, #0
1200         beq     LSYM(Ldiv0)
1201         mov     r2, r0
1202         sdiv    r0, r0, r1
1203         mls     r1, r0, r1, r2
1204         RET
1205 #else
1206 ARM_FUNC_START aeabi_idivmod
1207         cmp     r1, #0
1208         beq     LSYM(Ldiv0)
1209         stmfd   sp!, { r0, r1, lr }
1210         bl      LSYM(divsi3_skip_div0_test)
1211         ldmfd   sp!, { r1, r2, lr }
1212         mul     r3, r2, r0
1213         sub     r1, r1, r3
1214         RET
1215 #endif
1216         FUNC_END aeabi_idivmod
1217
1218 #endif /* L_divsi3 */
1219 /* ------------------------------------------------------------------------ */
1220 #ifdef L_modsi3
1221
1222 #if defined(__ARM_ARCH_EXT_IDIV__)
1223
1224         ARM_FUNC_START modsi3
1225
1226         cmp     r1, #0
1227         beq     LSYM(Ldiv0)
1228
1229         sdiv    r2, r0, r1
1230         mls     r0, r1, r2, r0
1231         RET
1232
1233 #elif defined(__thumb__)
1234
1235         FUNC_START modsi3
1236
1237         mov     curbit, #1
1238         cmp     divisor, #0
1239         beq     LSYM(Ldiv0)
1240         bpl     LSYM(Lover10)
1241         neg     divisor, divisor                @ Loops below use unsigned.
1242 LSYM(Lover10):
1243         push    { work }
1244         @ Need to save the sign of the dividend, unfortunately, we need
1245         @ work later on.  Must do this after saving the original value of
1246         @ the work register, because we will pop this value off first.
1247         push    { dividend }
1248         cmp     dividend, #0
1249         bpl     LSYM(Lover11)
1250         neg     dividend, dividend
1251 LSYM(Lover11):
1252         cmp     dividend, divisor
1253         blo     LSYM(Lgot_result)
1254
1255         THUMB_DIV_MOD_BODY 1
1256
1257         pop     { work }
1258         cmp     work, #0
1259         bpl     LSYM(Lover12)
1260         neg     dividend, dividend
1261 LSYM(Lover12):
1262         pop     { work }
1263         RET
1264
1265 #else /* ARM version.  */
1266
1267         FUNC_START modsi3
1268
1269         cmp     r1, #0
1270         beq     LSYM(Ldiv0)
1271         rsbmi   r1, r1, #0                      @ loops below use unsigned.
1272         movs    ip, r0                          @ preserve sign of dividend
1273         rsbmi   r0, r0, #0                      @ if negative make positive
1274         subs    r2, r1, #1                      @ compare divisor with 1
1275         cmpne   r0, r1                          @ compare dividend with divisor
1276         moveq   r0, #0
1277         tsthi   r1, r2                          @ see if divisor is power of 2
1278         andeq   r0, r0, r2
1279         bls     10f
1280
1281         ARM_MOD_BODY r0, r1, r2, r3
1282
1283 10:     cmp     ip, #0
1284         rsbmi   r0, r0, #0
1285         RET
1286
1287 #endif /* ARM version */
1288
1289         DIV_FUNC_END modsi3 signed
1290
1291 #endif /* L_modsi3 */
1292 /* ------------------------------------------------------------------------ */
1293 #ifdef L_dvmd_tls
1294
1295 #ifdef __ARM_EABI__
1296         WEAK aeabi_idiv0
1297         WEAK aeabi_ldiv0
1298         FUNC_START aeabi_idiv0
1299         FUNC_START aeabi_ldiv0
1300         RET
1301         FUNC_END aeabi_ldiv0
1302         FUNC_END aeabi_idiv0
1303 #else
1304         FUNC_START div0
1305         RET
1306         FUNC_END div0
1307 #endif
1308
1309 #endif /* L_divmodsi_tools */
1310 /* ------------------------------------------------------------------------ */
1311 #ifdef L_dvmd_lnx
1312 @ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
1313
1314 /* Constant taken from <asm/signal.h>.  */
1315 #define SIGFPE  8
1316
1317 #ifdef __ARM_EABI__
1318         WEAK aeabi_idiv0
1319         WEAK aeabi_ldiv0
1320         ARM_FUNC_START aeabi_idiv0
1321         ARM_FUNC_START aeabi_ldiv0
1322 #else
1323         ARM_FUNC_START div0
1324 #endif
1325
1326         do_push {r1, lr}
1327         mov     r0, #SIGFPE
1328         bl      SYM(raise) __PLT__
1329         RETLDM  r1
1330
1331 #ifdef __ARM_EABI__
1332         FUNC_END aeabi_ldiv0
1333         FUNC_END aeabi_idiv0
1334 #else
1335         FUNC_END div0
1336 #endif
1337
1338 #endif /* L_dvmd_lnx */
1339 #ifdef L_clear_cache
1340 #if defined __ARM_EABI__ && defined __linux__
1341 @ EABI GNU/Linux call to cacheflush syscall.
1342         ARM_FUNC_START clear_cache
1343         do_push {r7}
1344 #if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
1345         movw    r7, #2
1346         movt    r7, #0xf
1347 #else
1348         mov     r7, #0xf0000
1349         add     r7, r7, #2
1350 #endif
1351         mov     r2, #0
1352         swi     0
1353         do_pop  {r7}
1354         RET
1355         FUNC_END clear_cache
1356 #else
1357 #error "This is only for ARM EABI GNU/Linux"
1358 #endif
1359 #endif /* L_clear_cache */
1360 /* ------------------------------------------------------------------------ */
1361 /* Dword shift operations.  */
1362 /* All the following Dword shift variants rely on the fact that
1363         shft xxx, Reg
1364    is in fact done as
1365         shft xxx, (Reg & 255)
1366    so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1367    case of logical shifts) or the sign (for asr).  */
1368
1369 #ifdef __ARMEB__
1370 #define al      r1
1371 #define ah      r0
1372 #else
1373 #define al      r0
1374 #define ah      r1
1375 #endif
1376
1377 /* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
1378 #ifndef __symbian__
1379
1380 #ifdef L_lshrdi3
1381
1382         FUNC_START lshrdi3
1383         FUNC_ALIAS aeabi_llsr lshrdi3
1384
1385 #ifdef __thumb__
1386         lsr     al, r2
1387         mov     r3, ah
1388         lsr     ah, r2
1389         mov     ip, r3
1390         sub     r2, #32
1391         lsr     r3, r2
1392         orr     al, r3
1393         neg     r2, r2
1394         mov     r3, ip
1395         lsl     r3, r2
1396         orr     al, r3
1397         RET
1398 #else
1399         subs    r3, r2, #32
1400         rsb     ip, r2, #32
1401         movmi   al, al, lsr r2
1402         movpl   al, ah, lsr r3
1403         orrmi   al, al, ah, lsl ip
1404         mov     ah, ah, lsr r2
1405         RET
1406 #endif
1407         FUNC_END aeabi_llsr
1408         FUNC_END lshrdi3
1409
1410 #endif
1411
1412 #ifdef L_ashrdi3
1413
1414         FUNC_START ashrdi3
1415         FUNC_ALIAS aeabi_lasr ashrdi3
1416
1417 #ifdef __thumb__
1418         lsr     al, r2
1419         mov     r3, ah
1420         asr     ah, r2
1421         sub     r2, #32
1422         @ If r2 is negative at this point the following step would OR
1423         @ the sign bit into all of AL.  That's not what we want...
1424         bmi     1f
1425         mov     ip, r3
1426         asr     r3, r2
1427         orr     al, r3
1428         mov     r3, ip
1429 1:
1430         neg     r2, r2
1431         lsl     r3, r2
1432         orr     al, r3
1433         RET
1434 #else
1435         subs    r3, r2, #32
1436         rsb     ip, r2, #32
1437         movmi   al, al, lsr r2
1438         movpl   al, ah, asr r3
1439         orrmi   al, al, ah, lsl ip
1440         mov     ah, ah, asr r2
1441         RET
1442 #endif
1443
1444         FUNC_END aeabi_lasr
1445         FUNC_END ashrdi3
1446
1447 #endif
1448
1449 #ifdef L_ashldi3
1450
1451         FUNC_START ashldi3
1452         FUNC_ALIAS aeabi_llsl ashldi3
1453
1454 #ifdef __thumb__
1455         lsl     ah, r2
1456         mov     r3, al
1457         lsl     al, r2
1458         mov     ip, r3
1459         sub     r2, #32
1460         lsl     r3, r2
1461         orr     ah, r3
1462         neg     r2, r2
1463         mov     r3, ip
1464         lsr     r3, r2
1465         orr     ah, r3
1466         RET
1467 #else
1468         subs    r3, r2, #32
1469         rsb     ip, r2, #32
1470         movmi   ah, ah, lsl r2
1471         movpl   ah, al, lsl r3
1472         orrmi   ah, ah, al, lsr ip
1473         mov     al, al, lsl r2
1474         RET
1475 #endif
1476         FUNC_END aeabi_llsl
1477         FUNC_END ashldi3
1478
1479 #endif
1480
1481 #endif /* __symbian__ */
1482
1483 #if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \
1484     || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
1485     || defined(__ARM_ARCH_5TEJ__)
1486 #define HAVE_ARM_CLZ 1
1487 #endif
1488
1489 #ifdef L_clzsi2
1490 #if defined(__ARM_ARCH_6M__)
1491 FUNC_START clzsi2
1492         mov     r1, #28
1493         mov     r3, #1
1494         lsl     r3, r3, #16
1495         cmp     r0, r3 /* 0x10000 */
1496         bcc     2f
1497         lsr     r0, r0, #16
1498         sub     r1, r1, #16
1499 2:      lsr     r3, r3, #8
1500         cmp     r0, r3 /* #0x100 */
1501         bcc     2f
1502         lsr     r0, r0, #8
1503         sub     r1, r1, #8
1504 2:      lsr     r3, r3, #4
1505         cmp     r0, r3 /* #0x10 */
1506         bcc     2f
1507         lsr     r0, r0, #4
1508         sub     r1, r1, #4
1509 2:      adr     r2, 1f
1510         ldrb    r0, [r2, r0]
1511         add     r0, r0, r1
1512         bx lr
1513 .align 2
1514 1:
1515 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1516         FUNC_END clzsi2
1517 #else
1518 ARM_FUNC_START clzsi2
1519 # if defined(HAVE_ARM_CLZ)
1520         clz     r0, r0
1521         RET
1522 # else
1523         mov     r1, #28
1524         cmp     r0, #0x10000
1525         do_it   cs, t
1526         movcs   r0, r0, lsr #16
1527         subcs   r1, r1, #16
1528         cmp     r0, #0x100
1529         do_it   cs, t
1530         movcs   r0, r0, lsr #8
1531         subcs   r1, r1, #8
1532         cmp     r0, #0x10
1533         do_it   cs, t
1534         movcs   r0, r0, lsr #4
1535         subcs   r1, r1, #4
1536         adr     r2, 1f
1537         ldrb    r0, [r2, r0]
1538         add     r0, r0, r1
1539         RET
1540 .align 2
1541 1:
1542 .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1543 # endif /* !HAVE_ARM_CLZ */
1544         FUNC_END clzsi2
1545 #endif
1546 #endif /* L_clzsi2 */
1547
1548 #ifdef L_clzdi2
1549 #if !defined(HAVE_ARM_CLZ)
1550
1551 # if defined(__ARM_ARCH_6M__)
1552 FUNC_START clzdi2
1553         push    {r4, lr}
1554 # else
1555 ARM_FUNC_START clzdi2
1556         do_push {r4, lr}
1557 # endif
1558         cmp     xxh, #0
1559         bne     1f
1560 # ifdef __ARMEB__
1561         mov     r0, xxl
1562         bl      __clzsi2
1563         add     r0, r0, #32
1564         b 2f
1565 1:
1566         bl      __clzsi2
1567 # else
1568         bl      __clzsi2
1569         add     r0, r0, #32
1570         b 2f
1571 1:
1572         mov     r0, xxh
1573         bl      __clzsi2
1574 # endif
1575 2:
1576 # if defined(__ARM_ARCH_6M__)
1577         pop     {r4, pc}
1578 # else
1579         RETLDM  r4
1580 # endif
1581         FUNC_END clzdi2
1582
1583 #else /* HAVE_ARM_CLZ */
1584
1585 ARM_FUNC_START clzdi2
1586         cmp     xxh, #0
1587         do_it   eq, et
1588         clzeq   r0, xxl
1589         clzne   r0, xxh
1590         addeq   r0, r0, #32
1591         RET
1592         FUNC_END clzdi2
1593
1594 #endif
1595 #endif /* L_clzdi2 */
1596
1597 #ifdef L_ctzsi2
1598 #if defined(__ARM_ARCH_6M__)
1599 FUNC_START ctzsi2
1600         neg     r1, r0
1601         and     r0, r0, r1
1602         mov     r1, #28
1603         mov     r3, #1
1604         lsl     r3, r3, #16
1605         cmp     r0, r3 /* 0x10000 */
1606         bcc     2f
1607         lsr     r0, r0, #16
1608         sub     r1, r1, #16
1609 2:      lsr     r3, r3, #8
1610         cmp     r0, r3 /* #0x100 */
1611         bcc     2f
1612         lsr     r0, r0, #8
1613         sub     r1, r1, #8
1614 2:      lsr     r3, r3, #4
1615         cmp     r0, r3 /* #0x10 */
1616         bcc     2f
1617         lsr     r0, r0, #4
1618         sub     r1, r1, #4
1619 2:      adr     r2, 1f
1620         ldrb    r0, [r2, r0]
1621         sub     r0, r0, r1
1622         bx lr
1623 .align 2
1624 1:
1625 .byte   27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1626         FUNC_END ctzsi2
1627 #else
1628 ARM_FUNC_START ctzsi2
1629         rsb     r1, r0, #0
1630         and     r0, r0, r1
1631 # if defined(HAVE_ARM_CLZ)
1632         clz     r0, r0
1633         rsb     r0, r0, #31
1634         RET
1635 # else
1636         mov     r1, #28
1637         cmp     r0, #0x10000
1638         do_it   cs, t
1639         movcs   r0, r0, lsr #16
1640         subcs   r1, r1, #16
1641         cmp     r0, #0x100
1642         do_it   cs, t
1643         movcs   r0, r0, lsr #8
1644         subcs   r1, r1, #8
1645         cmp     r0, #0x10
1646         do_it   cs, t
1647         movcs   r0, r0, lsr #4
1648         subcs   r1, r1, #4
1649         adr     r2, 1f
1650         ldrb    r0, [r2, r0]
1651         sub     r0, r0, r1
1652         RET
1653 .align 2
1654 1:
1655 .byte   27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1656 # endif /* !HAVE_ARM_CLZ */
1657         FUNC_END ctzsi2
1658 #endif
1659 #endif /* L_clzsi2 */
1660
1661 /* ------------------------------------------------------------------------ */
1662 /* These next two sections are here despite the fact that they contain Thumb
1663    assembler because their presence allows interworked code to be linked even
1664    when the GCC library is this one.  */
1665
1666 /* Do not build the interworking functions when the target architecture does
1667    not support Thumb instructions.  (This can be a multilib option).  */
1668 #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1669       || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1670       || __ARM_ARCH__ >= 6
1671
1672 #if defined L_call_via_rX
1673
1674 /* These labels & instructions are used by the Arm/Thumb interworking code.
1675    The address of function to be called is loaded into a register and then
1676    one of these labels is called via a BL instruction.  This puts the
1677    return address into the link register with the bottom bit set, and the
1678    code here switches to the correct mode before executing the function.  */
1679
1680         .text
1681         .align 0
1682         .force_thumb
1683
1684 .macro call_via register
1685         THUMB_FUNC_START _call_via_\register
1686
1687         bx      \register
1688         nop
1689
1690         SIZE    (_call_via_\register)
1691 .endm
1692
1693         call_via r0
1694         call_via r1
1695         call_via r2
1696         call_via r3
1697         call_via r4
1698         call_via r5
1699         call_via r6
1700         call_via r7
1701         call_via r8
1702         call_via r9
1703         call_via sl
1704         call_via fp
1705         call_via ip
1706         call_via sp
1707         call_via lr
1708
1709 #endif /* L_call_via_rX */
1710
1711 /* Don't bother with the old interworking routines for Thumb-2.  */
1712 /* ??? Maybe only omit these on "m" variants.  */
1713 #if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
1714
1715 #if defined L_interwork_call_via_rX
1716
1717 /* These labels & instructions are used by the Arm/Thumb interworking code,
1718    when the target address is in an unknown instruction set.  The address
1719    of function to be called is loaded into a register and then one of these
1720    labels is called via a BL instruction.  This puts the return address
1721    into the link register with the bottom bit set, and the code here
1722    switches to the correct mode before executing the function.  Unfortunately
1723    the target code cannot be relied upon to return via a BX instruction, so
1724    instead we have to store the resturn address on the stack and allow the
1725    called function to return here instead.  Upon return we recover the real
1726    return address and use a BX to get back to Thumb mode.
1727
1728    There are three variations of this code.  The first,
1729    _interwork_call_via_rN(), will push the return address onto the
1730    stack and pop it in _arm_return().  It should only be used if all
1731    arguments are passed in registers.
1732
1733    The second, _interwork_r7_call_via_rN(), instead stores the return
1734    address at [r7, #-4].  It is the caller's responsibility to ensure
1735    that this address is valid and contains no useful data.
1736
1737    The third, _interwork_r11_call_via_rN(), works in the same way but
1738    uses r11 instead of r7.  It is useful if the caller does not really
1739    need a frame pointer.  */
1740
1741         .text
1742         .align 0
1743
1744         .code   32
1745         .globl _arm_return
1746 LSYM(Lstart_arm_return):
1747         cfi_start       LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1748         cfi_push        0, 0xe, -0x8, 0x8
1749         nop     @ This nop is for the benefit of debuggers, so that
1750                 @ backtraces will use the correct unwind information.
1751 _arm_return:
1752         RETLDM  unwind=LSYM(Lstart_arm_return)
1753         cfi_end LSYM(Lend_arm_return)
1754
1755         .globl _arm_return_r7
1756 _arm_return_r7:
1757         ldr     lr, [r7, #-4]
1758         bx      lr
1759
1760         .globl _arm_return_r11
1761 _arm_return_r11:
1762         ldr     lr, [r11, #-4]
1763         bx      lr
1764
1765 .macro interwork_with_frame frame, register, name, return
1766         .code   16
1767
1768         THUMB_FUNC_START \name
1769
1770         bx      pc
1771         nop
1772
1773         .code   32
1774         tst     \register, #1
1775         streq   lr, [\frame, #-4]
1776         adreq   lr, _arm_return_\frame
1777         bx      \register
1778
1779         SIZE    (\name)
1780 .endm
1781
1782 .macro interwork register
1783         .code   16
1784
1785         THUMB_FUNC_START _interwork_call_via_\register
1786
1787         bx      pc
1788         nop
1789
1790         .code   32
1791         .globl LSYM(Lchange_\register)
1792 LSYM(Lchange_\register):
1793         tst     \register, #1
1794         streq   lr, [sp, #-8]!
1795         adreq   lr, _arm_return
1796         bx      \register
1797
1798         SIZE    (_interwork_call_via_\register)
1799
1800         interwork_with_frame r7,\register,_interwork_r7_call_via_\register
1801         interwork_with_frame r11,\register,_interwork_r11_call_via_\register
1802 .endm
1803
1804         interwork r0
1805         interwork r1
1806         interwork r2
1807         interwork r3
1808         interwork r4
1809         interwork r5
1810         interwork r6
1811         interwork r7
1812         interwork r8
1813         interwork r9
1814         interwork sl
1815         interwork fp
1816         interwork ip
1817         interwork sp
1818
1819         /* The LR case has to be handled a little differently...  */
1820         .code 16
1821
1822         THUMB_FUNC_START _interwork_call_via_lr
1823
1824         bx      pc
1825         nop
1826
1827         .code 32
1828         .globl .Lchange_lr
1829 .Lchange_lr:
1830         tst     lr, #1
1831         stmeqdb r13!, {lr, pc}
1832         mov     ip, lr
1833         adreq   lr, _arm_return
1834         bx      ip
1835
1836         SIZE    (_interwork_call_via_lr)
1837
1838 #endif /* L_interwork_call_via_rX */
1839 #endif /* !__thumb2__ */
1840
1841 /* Functions to support compact pic switch tables in thumb1 state.
1842    All these routines take an index into the table in r0.  The
1843    table is at LR & ~1 (but this must be rounded up in the case
1844    of 32-bit entires).  They are only permitted to clobber r12
1845    and r14 and r0 must be preserved on exit.  */
1846 #ifdef L_thumb1_case_sqi
1847
1848         .text
1849         .align 0
1850         .force_thumb
1851         .syntax unified
1852         THUMB_FUNC_START __gnu_thumb1_case_sqi
1853         push    {r1}
1854         mov     r1, lr
1855         lsrs    r1, r1, #1
1856         lsls    r1, r1, #1
1857         ldrsb   r1, [r1, r0]
1858         lsls    r1, r1, #1
1859         add     lr, lr, r1
1860         pop     {r1}
1861         bx      lr
1862         SIZE (__gnu_thumb1_case_sqi)
1863 #endif
1864
1865 #ifdef L_thumb1_case_uqi
1866
1867         .text
1868         .align 0
1869         .force_thumb
1870         .syntax unified
1871         THUMB_FUNC_START __gnu_thumb1_case_uqi
1872         push    {r1}
1873         mov     r1, lr
1874         lsrs    r1, r1, #1
1875         lsls    r1, r1, #1
1876         ldrb    r1, [r1, r0]
1877         lsls    r1, r1, #1
1878         add     lr, lr, r1
1879         pop     {r1}
1880         bx      lr
1881         SIZE (__gnu_thumb1_case_uqi)
1882 #endif
1883
1884 #ifdef L_thumb1_case_shi
1885
1886         .text
1887         .align 0
1888         .force_thumb
1889         .syntax unified
1890         THUMB_FUNC_START __gnu_thumb1_case_shi
1891         push    {r0, r1}
1892         mov     r1, lr
1893         lsrs    r1, r1, #1
1894         lsls    r0, r0, #1
1895         lsls    r1, r1, #1
1896         ldrsh   r1, [r1, r0]
1897         lsls    r1, r1, #1
1898         add     lr, lr, r1
1899         pop     {r0, r1}
1900         bx      lr
1901         SIZE (__gnu_thumb1_case_shi)
1902 #endif
1903
1904 #ifdef L_thumb1_case_uhi
1905
1906         .text
1907         .align 0
1908         .force_thumb
1909         .syntax unified
1910         THUMB_FUNC_START __gnu_thumb1_case_uhi
1911         push    {r0, r1}
1912         mov     r1, lr
1913         lsrs    r1, r1, #1
1914         lsls    r0, r0, #1
1915         lsls    r1, r1, #1
1916         ldrh    r1, [r1, r0]
1917         lsls    r1, r1, #1
1918         add     lr, lr, r1
1919         pop     {r0, r1}
1920         bx      lr
1921         SIZE (__gnu_thumb1_case_uhi)
1922 #endif
1923
1924 #ifdef L_thumb1_case_si
1925
1926         .text
1927         .align 0
1928         .force_thumb
1929         .syntax unified
1930         THUMB_FUNC_START __gnu_thumb1_case_si
1931         push    {r0, r1}
1932         mov     r1, lr
1933         adds.n  r1, r1, #2      /* Align to word.  */
1934         lsrs    r1, r1, #2
1935         lsls    r0, r0, #2
1936         lsls    r1, r1, #2
1937         ldr     r0, [r1, r0]
1938         adds    r0, r0, r1
1939         mov     lr, r0
1940         pop     {r0, r1}
1941         mov     pc, lr          /* We know we were called from thumb code.  */
1942         SIZE (__gnu_thumb1_case_si)
1943 #endif
1944
1945 #endif /* Arch supports thumb.  */
1946
1947 #ifndef __symbian__
1948 #ifndef __ARM_ARCH_6M__
1949 #include "ieee754-df.S"
1950 #include "ieee754-sf.S"
1951 #include "bpabi.S"
1952 #else /* __ARM_ARCH_6M__ */
1953 #include "bpabi-v6m.S"
1954 #endif /* __ARM_ARCH_6M__ */
1955 #endif /* !__symbian__ */