gcc/config/arm/lib1thumb.asm

   1 @ libgcc1 routines for ARM cpu.
   2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
   3
   4 /* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
   5
   6 This file is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 In addition to the permissions in the GNU General Public License, the
  12 Free Software Foundation gives you unlimited permission to link the
  13 compiled version of this file with other programs, and to distribute
  14 those programs without any restriction coming from the use of this
  15 file.  (The General Public License restrictions do apply in other
  16 respects; for example, they cover modification of the file, and
  17 distribution when not linked into another program.)
  18
  19 This file is distributed in the hope that it will be useful, but
  20 WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22 General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with this program; see the file COPYING.  If not, write to
  26 the Free Software Foundation, 59 Temple Place - Suite 330,
  27 Boston, MA 02111-1307, USA.  */
  28
  29 /* As a special exception, if you link this library with other files,
  30    some of which are compiled with GCC, to produce an executable,
  31    this library does not by itself cause the resulting executable
  32    to be covered by the GNU General Public License.
  33    This exception does not however invalidate any other reasons why
  34    the executable file might be covered by the GNU General Public License.  */
  35
  36         .code    16
  37
  38 #ifndef __USER_LABEL_PREFIX__
  39 #error  __USER_LABEL_PREFIX__ not defined
  40 #endif
  41
  42 #ifdef __elf__
  43 #define __PLT__ (PLT)
  44 #define TYPE(x) .type SYM(x),function
  45 #define SIZE(x) .size SYM(x), . - SYM(x)
  46 #else
  47 #define __PLT__
  48 #define TYPE(x)
  49 #define SIZE(x)
  50 #endif
  51
  52 #define RET     mov     pc, lr
  53
  54 /* ANSI concatenation macros.  */
  55
  56 #define CONCAT1(a, b) CONCAT2(a, b)
  57 #define CONCAT2(a, b) a ## b
  58
  59 /* Use the right prefix for global labels.  */
  60
  61 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
  62
  63 work            .req    r4      @ XXXX is this safe ?
  64
  65 #ifdef L_udivsi3
  66
  67 dividend        .req    r0
  68 divisor         .req    r1
  69 result          .req    r2
  70 curbit          .req    r3
  71 ip              .req    r12
  72 sp              .req    r13
  73 lr              .req    r14
  74 pc              .req    r15
  75
  76         .text
  77         .globl  SYM (__udivsi3)
  78         TYPE    (__udivsi3)
  79         .align  0
  80         .thumb_func
  81 SYM (__udivsi3):
  82         cmp     divisor, #0
  83         beq     Ldiv0
  84         mov     curbit, #1
  85         mov     result, #0
  86
  87         push    { work }
  88         cmp     dividend, divisor
  89         bcc     Lgot_result
  90
  91         @ Load the constant 0x10000000 into our work register
  92         mov     work, #1
  93         lsl     work, #28
  94 Loop1:
  95         @ Unless the divisor is very big, shift it up in multiples of
  96         @ four bits, since this is the amount of unwinding in the main
  97         @ division loop.  Continue shifting until the divisor is
  98         @ larger than the dividend.
  99         cmp     divisor, work
 100         bcs     Lbignum
 101         cmp     divisor, dividend
 102         bcs     Lbignum
 103         lsl     divisor, #4
 104         lsl     curbit,  #4
 105         b       Loop1
 106
 107 Lbignum:
 108         @ Set work to 0x80000000
 109         lsl     work, #3
 110 Loop2:
 111         @ For very big divisors, we must shift it a bit at a time, or
 112         @ we will be in danger of overflowing.
 113         cmp     divisor, work
 114         bcs     Loop3
 115         cmp     divisor, dividend
 116         bcs     Loop3
 117         lsl     divisor, #1
 118         lsl     curbit,  #1
 119         b       Loop2
 120
 121 Loop3:
 122         @ Test for possible subtractions, and note which bits
 123         @ are done in the result.  On the final pass, this may subtract
 124         @ too much from the dividend, but the result will be ok, since the
 125         @ "bit" will have been shifted out at the bottom.
 126         cmp     dividend, divisor
 127         bcc     Over1
 128         sub     dividend, dividend, divisor
 129         orr     result, result, curbit
 130 Over1:
 131         lsr     work, divisor, #1
 132         cmp     dividend, work
 133         bcc     Over2
 134         sub     dividend, dividend, work
 135         lsr     work, curbit, #1
 136         orr     result, work
 137 Over2:
 138         lsr     work, divisor, #2
 139         cmp     dividend, work
 140         bcc     Over3
 141         sub     dividend, dividend, work
 142         lsr     work, curbit, #2
 143         orr     result, work
 144 Over3:
 145         lsr     work, divisor, #3
 146         cmp     dividend, work
 147         bcc     Over4
 148         sub     dividend, dividend, work
 149         lsr     work, curbit, #3
 150         orr     result, work
 151 Over4:
 152         cmp     dividend, #0                    @ Early termination?
 153         beq     Lgot_result
 154         lsr     curbit,  #4                     @ No, any more bits to do?
 155         beq     Lgot_result
 156         lsr     divisor, #4
 157         b       Loop3
 158 Lgot_result:
 159         mov     r0, result
 160         pop     { work }
 161         RET
 162
 163 Ldiv0:
 164         push    { lr }
 165         bl      SYM (__div0) __PLT__
 166         mov     r0, #0                  @ about as wrong as it could be
 167         pop     { pc }
 168
 169         SIZE    (__udivsi3)
 170
 171 #endif /* L_udivsi3 */
 172
 173 #ifdef L_umodsi3
 174
 175 dividend        .req    r0
 176 divisor         .req    r1
 177 overdone        .req    r2
 178 curbit          .req    r3
 179 ip              .req    r12
 180 sp              .req    r13
 181 lr              .req    r14
 182 pc              .req    r15
 183
 184         .text
 185         .globl  SYM (__umodsi3)
 186         TYPE    (__umodsi3)
 187         .align  0
 188         .thumb_func
 189 SYM (__umodsi3):
 190         cmp     divisor, #0
 191         beq     Ldiv0
 192         mov     curbit, #1
 193         cmp     dividend, divisor
 194         bcs     Over1
 195         RET
 196
 197 Over1:
 198         @ Load the constant 0x10000000 into our work register
 199         push    { work }
 200         mov     work, #1
 201         lsl     work, #28
 202 Loop1:
 203         @ Unless the divisor is very big, shift it up in multiples of
 204         @ four bits, since this is the amount of unwinding in the main
 205         @ division loop.  Continue shifting until the divisor is
 206         @ larger than the dividend.
 207         cmp     divisor, work
 208         bcs     Lbignum
 209         cmp     divisor, dividend
 210         bcs     Lbignum
 211         lsl     divisor, #4
 212         lsl     curbit, #4
 213         b       Loop1
 214
 215 Lbignum:
 216         @ Set work to 0x80000000
 217         lsl     work, #3
 218 Loop2:
 219         @ For very big divisors, we must shift it a bit at a time, or
 220         @ we will be in danger of overflowing.
 221         cmp     divisor, work
 222         bcs     Loop3
 223         cmp     divisor, dividend
 224         bcs     Loop3
 225         lsl     divisor, #1
 226         lsl     curbit, #1
 227         b       Loop2
 228
 229 Loop3:
 230         @ Test for possible subtractions.  On the final pass, this may
 231         @ subtract too much from the dividend, so keep track of which
 232         @ subtractions are done, we can fix them up afterwards...
 233         mov     overdone, #0
 234         cmp     dividend, divisor
 235         bcc     Over2
 236         sub     dividend, dividend, divisor
 237 Over2:
 238         lsr     work, divisor, #1
 239         cmp     dividend, work
 240         bcc     Over3
 241         sub     dividend, dividend, work
 242         mov     ip, curbit
 243         mov     work, #1
 244         ror     curbit, work
 245         orr     overdone, curbit
 246         mov     curbit, ip
 247 Over3:
 248         lsr     work, divisor, #2
 249         cmp     dividend, work
 250         bcc     Over4
 251         sub     dividend, dividend, work
 252         mov     ip, curbit
 253         mov     work, #2
 254         ror     curbit, work
 255         orr     overdone, curbit
 256         mov     curbit, ip
 257 Over4:
 258         lsr     work, divisor, #3
 259         cmp     dividend, work
 260         bcc     Over5
 261         sub     dividend, dividend, work
 262         mov     ip, curbit
 263         mov     work, #3
 264         ror     curbit, work
 265         orr     overdone, curbit
 266         mov     curbit, ip
 267 Over5:
 268         mov     ip, curbit
 269         cmp     dividend, #0                    @ Early termination?
 270         beq     Over6
 271         lsr     curbit, #4                      @ No, any more bits to do?
 272         beq     Over6
 273         lsr     divisor, #4
 274         b       Loop3
 275
 276 Over6:
 277         @ Any subtractions that we should not have done will be recorded in
 278         @ the top three bits of "overdone".  Exactly which were not needed
 279         @ are governed by the position of the bit, stored in ip.
 280         @ If we terminated early, because dividend became zero,
 281         @ then none of the below will match, since the bit in ip will not be
 282         @ in the bottom nibble.
 283
 284         mov     work, #0xe
 285         lsl     work, #28
 286         and     overdone, work
 287         bne     Over7
 288         pop     { work }
 289         RET                                     @ No fixups needed
 290 Over7:
 291         mov     curbit, ip
 292         mov     work, #3
 293         ror     curbit, work
 294         tst     overdone, curbit
 295         beq     Over8
 296         lsr     work, divisor, #3
 297         add     dividend, dividend, work
 298 Over8:
 299         mov     curbit, ip
 300         mov     work, #2
 301         ror     curbit, work
 302         tst     overdone, curbit
 303         beq     Over9
 304         lsr     work, divisor, #2
 305         add     dividend, dividend, work
 306 Over9:
 307         mov     curbit, ip
 308         mov     work, #1
 309         ror     curbit, work
 310         tst     overdone, curbit
 311         beq     Over10
 312         lsr     work, divisor, #1
 313         add     dividend, dividend, work
 314 Over10:
 315         pop     { work }
 316         RET
 317
 318 Ldiv0:
 319         push    { lr }
 320         bl      SYM (__div0) __PLT__
 321         mov     r0, #0                  @ about as wrong as it could be
 322         pop     { pc }
 323
 324         SIZE    (__umodsi3)
 325
 326 #endif /* L_umodsi3 */
 327
 328 #ifdef L_divsi3
 329
 330 dividend        .req    r0
 331 divisor         .req    r1
 332 result          .req    r2
 333 curbit          .req    r3
 334 ip              .req    r12
 335 sp              .req    r13
 336 lr              .req    r14
 337 pc              .req    r15
 338
 339         .text
 340         .globl  SYM (__divsi3)
 341         TYPE    (__divsi3)
 342         .align  0
 343         .thumb_func
 344 SYM (__divsi3):
 345         cmp     divisor, #0
 346         beq     Ldiv0
 347
 348         push    { work }
 349         mov     work, dividend
 350         eor     work, divisor           @ Save the sign of the result.
 351         mov     ip, work
 352         mov     curbit, #1
 353         mov     result, #0
 354         cmp     divisor, #0
 355         bpl     Over1
 356         neg     divisor, divisor        @ Loops below use unsigned.
 357 Over1:
 358         cmp     dividend, #0
 359         bpl     Over2
 360         neg     dividend, dividend
 361 Over2:
 362         cmp     dividend, divisor
 363         bcc     Lgot_result
 364
 365         mov     work, #1
 366         lsl     work, #28
 367 Loop1:
 368         @ Unless the divisor is very big, shift it up in multiples of
 369         @ four bits, since this is the amount of unwinding in the main
 370         @ division loop.  Continue shifting until the divisor is
 371         @ larger than the dividend.
 372         cmp     divisor, work
 373         Bcs     Lbignum
 374         cmp     divisor, dividend
 375         Bcs     Lbignum
 376         lsl     divisor, #4
 377         lsl     curbit, #4
 378         b       Loop1
 379
 380 Lbignum:
 381         @ For very big divisors, we must shift it a bit at a time, or
 382         @ we will be in danger of overflowing.
 383         lsl     work, #3
 384 Loop2:
 385         cmp     divisor, work
 386         Bcs     Loop3
 387         cmp     divisor, dividend
 388         Bcs     Loop3
 389         lsl     divisor, #1
 390         lsl     curbit, #1
 391         b       Loop2
 392
 393 Loop3:
 394         @ Test for possible subtractions, and note which bits
 395         @ are done in the result.  On the final pass, this may subtract
 396         @ too much from the dividend, but the result will be ok, since the
 397         @ "bit" will have been shifted out at the bottom.
 398         cmp     dividend, divisor
 399         Bcc     Over3
 400         sub     dividend, dividend, divisor
 401         orr     result, result, curbit
 402 Over3:
 403         lsr     work, divisor, #1
 404         cmp     dividend, work
 405         Bcc     Over4
 406         sub     dividend, dividend, work
 407         lsr     work, curbit, #1
 408         orr     result, work
 409 Over4:
 410         lsr     work, divisor, #2
 411         cmp     dividend, work
 412         Bcc     Over5
 413         sub     dividend, dividend, work
 414         lsr     work, curbit, #2
 415         orr     result, result, work
 416 Over5:
 417         lsr     work, divisor, #3
 418         cmp     dividend, work
 419         Bcc     Over6
 420         sub     dividend, dividend, work
 421         lsr     work, curbit, #3
 422         orr     result, result, work
 423 Over6:
 424         cmp     dividend, #0                    @ Early termination?
 425         Beq     Lgot_result
 426         lsr     curbit, #4                      @ No, any more bits to do?
 427         Beq     Lgot_result
 428         lsr     divisor, #4
 429         b       Loop3
 430
 431 Lgot_result:
 432         mov     r0, result
 433         mov     work, ip
 434         cmp     work, #0
 435         Bpl     Over7
 436         neg     r0, r0
 437 Over7:
 438         pop     { work }
 439         RET
 440
 441 Ldiv0:
 442         push    { lr }
 443         bl      SYM (__div0) __PLT__
 444         mov     r0, #0                  @ about as wrong as it could be
 445         pop     { pc }
 446
 447         SIZE    (__divsi3)
 448
 449 #endif /* L_divsi3 */
 450
 451 #ifdef L_modsi3
 452
 453 dividend        .req    r0
 454 divisor         .req    r1
 455 overdone        .req    r2
 456 curbit          .req    r3
 457 ip              .req    r12
 458 sp              .req    r13
 459 lr              .req    r14
 460 pc              .req    r15
 461
 462         .text
 463         .globl  SYM (__modsi3)
 464         TYPE    (__modsi3)
 465         .align  0
 466         .thumb_func
 467 SYM (__modsi3):
 468         mov     curbit, #1
 469         cmp     divisor, #0
 470         beq     Ldiv0
 471         Bpl     Over1
 472         neg     divisor, divisor                @ Loops below use unsigned.
 473 Over1:
 474         push    { work }
 475         @ Need to save the sign of the dividend, unfortunately, we need
 476         @ ip later on.  Must do this after saving the original value of
 477         @ the work register, because we will pop this value off first.
 478         push    { dividend }
 479         cmp     dividend, #0
 480         Bpl     Over2
 481         neg     dividend, dividend
 482 Over2:
 483         cmp     dividend, divisor
 484         bcc     Lgot_result
 485         mov     work, #1
 486         lsl     work, #28
 487 Loop1:
 488         @ Unless the divisor is very big, shift it up in multiples of
 489         @ four bits, since this is the amount of unwinding in the main
 490         @ division loop.  Continue shifting until the divisor is
 491         @ larger than the dividend.
 492         cmp     divisor, work
 493         bcs     Lbignum
 494         cmp     divisor, dividend
 495         bcs     Lbignum
 496         lsl     divisor, #4
 497         lsl     curbit, #4
 498         b       Loop1
 499
 500 Lbignum:
 501         @ Set work to 0x80000000
 502         lsl     work, #3
 503 Loop2:
 504         @ For very big divisors, we must shift it a bit at a time, or
 505         @ we will be in danger of overflowing.
 506         cmp     divisor, work
 507         bcs     Loop3
 508         cmp     divisor, dividend
 509         bcs     Loop3
 510         lsl     divisor, #1
 511         lsl     curbit, #1
 512         b       Loop2
 513
 514 Loop3:
 515         @ Test for possible subtractions.  On the final pass, this may
 516         @ subtract too much from the dividend, so keep track of which
 517         @ subtractions are done, we can fix them up afterwards...
 518         mov     overdone, #0
 519         cmp     dividend, divisor
 520         bcc     Over3
 521         sub     dividend, dividend, divisor
 522 Over3:
 523         lsr     work, divisor, #1
 524         cmp     dividend, work
 525         bcc     Over4
 526         sub     dividend, dividend, work
 527         mov     ip, curbit
 528         mov     work, #1
 529         ror     curbit, work
 530         orr     overdone, curbit
 531         mov     curbit, ip
 532 Over4:
 533         lsr     work, divisor, #2
 534         cmp     dividend, work
 535         bcc     Over5
 536         sub     dividend, dividend, work
 537         mov     ip, curbit
 538         mov     work, #2
 539         ror     curbit, work
 540         orr     overdone, curbit
 541         mov     curbit, ip
 542 Over5:
 543         lsr     work, divisor, #3
 544         cmp     dividend, work
 545         bcc     Over6
 546         sub     dividend, dividend, work
 547         mov     ip, curbit
 548         mov     work, #3
 549         ror     curbit, work
 550         orr     overdone, curbit
 551         mov     curbit, ip
 552 Over6:
 553         mov     ip, curbit
 554         cmp     dividend, #0                    @ Early termination?
 555         beq     Over7
 556         lsr     curbit, #4                      @ No, any more bits to do?
 557         beq     Over7
 558         lsr     divisor, #4
 559         b       Loop3
 560
 561 Over7:
 562         @ Any subtractions that we should not have done will be recorded in
 563         @ the top three bits of "overdone".  Exactly which were not needed
 564         @ are governed by the position of the bit, stored in ip.
 565         @ If we terminated early, because dividend became zero,
 566         @ then none of the below will match, since the bit in ip will not be
 567         @ in the bottom nibble.
 568         mov     work, #0xe
 569         lsl     work, #28
 570         and     overdone, work
 571         beq     Lgot_result
 572
 573         mov     curbit, ip
 574         mov     work, #3
 575         ror     curbit, work
 576         tst     overdone, curbit
 577         beq     Over8
 578         lsr     work, divisor, #3
 579         add     dividend, dividend, work
 580 Over8:
 581         mov     curbit, ip
 582         mov     work, #2
 583         ror     curbit, work
 584         tst     overdone, curbit
 585         beq     Over9
 586         lsr     work, divisor, #2
 587         add     dividend, dividend, work
 588 Over9:
 589         mov     curbit, ip
 590         mov     work, #1
 591         ror     curbit, work
 592         tst     overdone, curbit
 593         beq     Lgot_result
 594         lsr     work, divisor, #1
 595         add     dividend, dividend, work
 596 Lgot_result:
 597         pop     { work }
 598         cmp     work, #0
 599         bpl     Over10
 600         neg     dividend, dividend
 601 Over10:
 602         pop     { work }
 603         RET
 604
 605 Ldiv0:
 606         push    { lr }
 607         bl      SYM (__div0) __PLT__
 608         mov     r0, #0                  @ about as wrong as it could be
 609         pop     { pc }
 610
 611         SIZE    (__modsi3)
 612
 613 #endif /* L_modsi3 */
 614
 615 #ifdef L_dvmd_tls
 616
 617         .globl  SYM (__div0)
 618         TYPE    (__div0)
 619         .align  0
 620         .thumb_func
 621 SYM (__div0):
 622         RET
 623
 624         SIZE    (__div0)
 625
 626 #endif /* L_divmodsi_tools */
 627
 628
 629 #ifdef L_call_via_rX
 630
 631 /* These labels & instructions are used by the Arm/Thumb interworking code.
 632    The address of function to be called is loaded into a register and then
 633    one of these labels is called via a BL instruction.  This puts the
 634    return address into the link register with the bottom bit set, and the
 635    code here switches to the correct mode before executing the function.  */
 636
 637         .text
 638         .align 0
 639
 640 .macro call_via register
 641         .globl  SYM (_call_via_\register)
 642         TYPE    (_call_via_\register)
 643         .thumb_func
 644 SYM (_call_via_\register):
 645         bx      \register
 646         nop
 647
 648         SIZE    (_call_via_\register)
 649 .endm
 650
 651         call_via r0
 652         call_via r1
 653         call_via r2
 654         call_via r3
 655         call_via r4
 656         call_via r5
 657         call_via r6
 658         call_via r7
 659         call_via r8
 660         call_via r9
 661         call_via sl
 662         call_via fp
 663         call_via ip
 664         call_via sp
 665         call_via lr
 666
 667 #endif /* L_call_via_rX */
 668
 669 #ifdef L_interwork_call_via_rX
 670
 671 /* These labels & instructions are used by the Arm/Thumb interworking code,
 672    when the target address is in an unknown instruction set.  The address
 673    of function to be called is loaded into a register and then one of these
 674    labels is called via a BL instruction.  This puts the return address
 675    into the link register with the bottom bit set, and the code here
 676    switches to the correct mode before executing the function.  Unfortunately
 677    the target code cannot be relied upon to return via a BX instruction, so
 678    instead we have to store the resturn address on the stack and allow the
 679    called function to return here instead.  Upon return we recover the real
 680    return address and use a BX to get back to Thumb mode.  */
 681
 682         .text
 683         .align 0
 684
 685         .code 32
 686         .globl _arm_return
 687 _arm_return:
 688         ldmia   r13!, {r12}
 689         bx      r12
 690
 691 .macro interwork register
 692         .code 16
 693
 694         .globl  SYM (_interwork_call_via_\register)
 695         TYPE    (_interwork_call_via_\register)
 696         .thumb_func
 697 SYM (_interwork_call_via_\register):
 698         bx      pc
 699         nop
 700
 701         .code 32
 702         .globl .Lchange_\register
 703 .Lchange_\register:
 704         tst     \register, #1
 705         stmeqdb r13!, {lr}
 706         adreq   lr, _arm_return
 707         bx      \register
 708
 709         SIZE    (_interwork_call_via_\register)
 710 .endm
 711
 712         interwork r0
 713         interwork r1
 714         interwork r2
 715         interwork r3
 716         interwork r4
 717         interwork r5
 718         interwork r6
 719         interwork r7
 720         interwork r8
 721         interwork r9
 722         interwork sl
 723         interwork fp
 724         interwork ip
 725         interwork sp
 726
 727         /* The lr case has to be handled a little differently...*/
 728         .code 16
 729         .globl  SYM (_interwork_call_via_lr)
 730         TYPE    (_interwork_call_via_lr)
 731         .thumb_func
 732 SYM (_interwork_call_via_lr):
 733         bx      pc
 734         nop
 735
 736         .code 32
 737         .globl .Lchange_lr
 738 .Lchange_lr:
 739         tst     lr, #1
 740         stmeqdb r13!, {lr}
 741         mov     ip, lr
 742         adreq   lr, _arm_return
 743         bx      ip
 744
 745         SIZE    (_interwork_call_via_lr)
 746
 747 #endif /* L_interwork_call_via_rX */
 748
 749