gcc/config/xtensa/lib1funcs.asm

   1 /* Assembly functions for the Xtensa version of libgcc1.
   2    Copyright (C) 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 In addition to the permissions in the GNU General Public License, the
  13 Free Software Foundation gives you unlimited permission to link the
  14 compiled version of this file into combinations with other programs,
  15 and to distribute those combinations without any restriction coming
  16 from the use of this file.  (The General Public License restrictions
  17 do apply in other respects; for example, they cover modification of
  18 the file, and distribution when not linked into a combine
  19 executable.)
  20
  21 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  23 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  24 for more details.
  25
  26 You should have received a copy of the GNU General Public License
  27 along with GCC; see the file COPYING.  If not, write to the Free
  28 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  29 02110-1301, USA.  */
  30
  31 #include "xtensa-config.h"
  32
  33 /* Define macros for the ABS and ADDX* instructions to handle cases
  34    where they are not included in the Xtensa processor configuration.  */
  35
  36         .macro  do_abs dst, src, tmp
  37 #if XCHAL_HAVE_ABS
  38         abs     \dst, \src
  39 #else
  40         neg     \tmp, \src
  41         movgez  \tmp, \src, \src
  42         mov     \dst, \tmp
  43 #endif
  44         .endm
  45
  46         .macro  do_addx2 dst, as, at, tmp
  47 #if XCHAL_HAVE_ADDX
  48         addx2   \dst, \as, \at
  49 #else
  50         slli    \tmp, \as, 1
  51         add     \dst, \tmp, \at
  52 #endif
  53         .endm
  54
  55         .macro  do_addx4 dst, as, at, tmp
  56 #if XCHAL_HAVE_ADDX
  57         addx4   \dst, \as, \at
  58 #else
  59         slli    \tmp, \as, 2
  60         add     \dst, \tmp, \at
  61 #endif
  62         .endm
  63
  64         .macro  do_addx8 dst, as, at, tmp
  65 #if XCHAL_HAVE_ADDX
  66         addx8   \dst, \as, \at
  67 #else
  68         slli    \tmp, \as, 3
  69         add     \dst, \tmp, \at
  70 #endif
  71         .endm
  72
  73 /* Define macros for leaf function entry and return, supporting either the
  74    standard register windowed ABI or the non-windowed call0 ABI.  These
  75    macros do not allocate any extra stack space, so they only work for
  76    leaf functions that do not need to spill anything to the stack.  */
  77
  78         .macro leaf_entry reg, size
  79 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  80         entry \reg, \size
  81 #else
  82         /* do nothing */
  83 #endif
  84         .endm
  85
  86         .macro leaf_return
  87 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  88         retw
  89 #else
  90         ret
  91 #endif
  92         .endm
  93
  94
  95 #ifdef L_mulsi3
  96         .align  4
  97         .global __mulsi3
  98         .type   __mulsi3,@function
  99 __mulsi3:
 100         leaf_entry sp, 16
 101
 102 #if XCHAL_HAVE_MUL32
 103         mull    a2, a2, a3
 104
 105 #elif XCHAL_HAVE_MUL16
 106         or      a4, a2, a3
 107         srai    a4, a4, 16
 108         bnez    a4, .LMUL16
 109         mul16u  a2, a2, a3
 110         leaf_return
 111 .LMUL16:
 112         srai    a4, a2, 16
 113         srai    a5, a3, 16
 114         mul16u  a7, a4, a3
 115         mul16u  a6, a5, a2
 116         mul16u  a4, a2, a3
 117         add     a7, a7, a6
 118         slli    a7, a7, 16
 119         add     a2, a7, a4
 120
 121 #elif XCHAL_HAVE_MAC16
 122         mul.aa.hl a2, a3
 123         mula.aa.lh a2, a3
 124         rsr     a5, ACCLO
 125         umul.aa.ll a2, a3
 126         rsr     a4, ACCLO
 127         slli    a5, a5, 16
 128         add     a2, a4, a5
 129
 130 #else /* !MUL32 && !MUL16 && !MAC16 */
 131
 132         /* Multiply one bit at a time, but unroll the loop 4x to better
 133            exploit the addx instructions and avoid overhead.
 134            Peel the first iteration to save a cycle on init.  */
 135
 136         /* Avoid negative numbers.  */
 137         xor     a5, a2, a3      /* Top bit is 1 if one input is negative.  */
 138         do_abs  a3, a3, a6
 139         do_abs  a2, a2, a6
 140
 141         /* Swap so the second argument is smaller.  */
 142         sub     a7, a2, a3
 143         mov     a4, a3
 144         movgez  a4, a2, a7      /* a4 = max (a2, a3) */
 145         movltz  a3, a2, a7      /* a3 = min (a2, a3) */
 146
 147         movi    a2, 0
 148         extui   a6, a3, 0, 1
 149         movnez  a2, a4, a6
 150
 151         do_addx2 a7, a4, a2, a7
 152         extui   a6, a3, 1, 1
 153         movnez  a2, a7, a6
 154
 155         do_addx4 a7, a4, a2, a7
 156         extui   a6, a3, 2, 1
 157         movnez  a2, a7, a6
 158
 159         do_addx8 a7, a4, a2, a7
 160         extui   a6, a3, 3, 1
 161         movnez  a2, a7, a6
 162
 163         bgeui   a3, 16, .Lmult_main_loop
 164         neg     a3, a2
 165         movltz  a2, a3, a5
 166         leaf_return
 167
 168         .align  4
 169 .Lmult_main_loop:
 170         srli    a3, a3, 4
 171         slli    a4, a4, 4
 172
 173         add     a7, a4, a2
 174         extui   a6, a3, 0, 1
 175         movnez  a2, a7, a6
 176
 177         do_addx2 a7, a4, a2, a7
 178         extui   a6, a3, 1, 1
 179         movnez  a2, a7, a6
 180
 181         do_addx4 a7, a4, a2, a7
 182         extui   a6, a3, 2, 1
 183         movnez  a2, a7, a6
 184
 185         do_addx8 a7, a4, a2, a7
 186         extui   a6, a3, 3, 1
 187         movnez  a2, a7, a6
 188
 189         bgeui   a3, 16, .Lmult_main_loop
 190
 191         neg     a3, a2
 192         movltz  a2, a3, a5
 193
 194 #endif /* !MUL32 && !MUL16 && !MAC16 */
 195
 196         leaf_return
 197         .size   __mulsi3,.-__mulsi3
 198
 199 #endif /* L_mulsi3 */
 200
 201
 202 #ifdef L_umulsidi3
 203         .align  4
 204         .global __umulsidi3
 205         .type   __umulsidi3,@function
 206 __umulsidi3:
 207         leaf_entry sp, 32
 208 #if __XTENSA_CALL0_ABI__
 209         addi    sp, sp, -32
 210         s32i    a12, sp, 16
 211         s32i    a13, sp, 20
 212         s32i    a14, sp, 24
 213         s32i    a15, sp, 28
 214 #endif
 215
 216 #ifdef __XTENSA_EB__
 217 #define wh a2
 218 #define wl a3
 219 #else
 220 #define wh a3
 221 #define wl a2
 222 #endif /* __XTENSA_EB__ */
 223
 224         /* This code is taken from the mulsf3 routine in ieee754-sf.S.
 225            See more comments there.  */
 226
 227 #if XCHAL_HAVE_MUL32_HIGH
 228         mull    a6, a2, a3
 229         muluh   wh, a2, a3
 230         mov     wl, a6
 231
 232 #else /* ! MUL32_HIGH */
 233
 234 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 235         /* a0 and a8 will be clobbered by calling the multiply function
 236            but a8 is not used here and need not be saved.  */
 237         s32i    a0, sp, 0
 238 #endif
 239
 240 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 241
 242 #define a2h a4
 243 #define a3h a5
 244
 245         /* Get the high halves of the inputs into registers.  */
 246         srli    a2h, a2, 16
 247         srli    a3h, a3, 16
 248
 249 #define a2l a2
 250 #define a3l a3
 251
 252 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 253         /* Clear the high halves of the inputs.  This does not matter
 254            for MUL16 because the high bits are ignored.  */
 255         extui   a2, a2, 0, 16
 256         extui   a3, a3, 0, 16
 257 #endif
 258 #endif /* MUL16 || MUL32 */
 259
 260
 261 #if XCHAL_HAVE_MUL16
 262
 263 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 264         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 265
 266 #elif XCHAL_HAVE_MUL32
 267
 268 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 269         mull    dst, xreg ## xhalf, yreg ## yhalf
 270
 271 #elif XCHAL_HAVE_MAC16
 272
 273 /* The preprocessor insists on inserting a space when concatenating after
 274    a period in the definition of do_mul below.  These macros are a workaround
 275    using underscores instead of periods when doing the concatenation.  */
 276 #define umul_aa_ll umul.aa.ll
 277 #define umul_aa_lh umul.aa.lh
 278 #define umul_aa_hl umul.aa.hl
 279 #define umul_aa_hh umul.aa.hh
 280
 281 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 282         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 283         rsr     dst, ACCLO
 284
 285 #else /* no multiply hardware */
 286
 287 #define set_arg_l(dst, src) \
 288         extui   dst, src, 0, 16
 289 #define set_arg_h(dst, src) \
 290         srli    dst, src, 16
 291
 292 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 293         set_arg_ ## xhalf (a13, xreg); \
 294         set_arg_ ## yhalf (a14, yreg); \
 295         call0   .Lmul_mulsi3; \
 296         mov     dst, a12
 297 #endif
 298
 299         /* Add pp1 and pp2 into a6 with carry-out in a9.  */
 300         do_mul(a6, a2, l, a3, h)        /* pp 1 */
 301         do_mul(a11, a2, h, a3, l)       /* pp 2 */
 302         movi    a9, 0
 303         add     a6, a6, a11
 304         bgeu    a6, a11, 1f
 305         addi    a9, a9, 1
 306 1:
 307         /* Shift the high half of a9/a6 into position in a9.  Note that
 308            this value can be safely incremented without any carry-outs.  */
 309         ssai    16
 310         src     a9, a9, a6
 311
 312         /* Compute the low word into a6.  */
 313         do_mul(a11, a2, l, a3, l)       /* pp 0 */
 314         sll     a6, a6
 315         add     a6, a6, a11
 316         bgeu    a6, a11, 1f
 317         addi    a9, a9, 1
 318 1:
 319         /* Compute the high word into wh.  */
 320         do_mul(wh, a2, h, a3, h)        /* pp 3 */
 321         add     wh, wh, a9
 322         mov     wl, a6
 323
 324 #endif /* !MUL32_HIGH */
 325
 326 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 327         /* Restore the original return address.  */
 328         l32i    a0, sp, 0
 329 #endif
 330 #if __XTENSA_CALL0_ABI__
 331         l32i    a12, sp, 16
 332         l32i    a13, sp, 20
 333         l32i    a14, sp, 24
 334         l32i    a15, sp, 28
 335         addi    sp, sp, 32
 336 #endif
 337         leaf_return
 338
 339 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 340
 341         /* For Xtensa processors with no multiply hardware, this simplified
 342            version of _mulsi3 is used for multiplying 16-bit chunks of
 343            the floating-point mantissas.  It uses a custom ABI: the inputs
 344            are passed in a13 and a14, the result is returned in a12, and
 345            a8 and a15 are clobbered.  */
 346         .align  4
 347 .Lmul_mulsi3:
 348         movi    a12, 0
 349 .Lmul_mult_loop:
 350         add     a15, a14, a12
 351         extui   a8, a13, 0, 1
 352         movnez  a12, a15, a8
 353
 354         do_addx2 a15, a14, a12, a15
 355         extui   a8, a13, 1, 1
 356         movnez  a12, a15, a8
 357
 358         do_addx4 a15, a14, a12, a15
 359         extui   a8, a13, 2, 1
 360         movnez  a12, a15, a8
 361
 362         do_addx8 a15, a14, a12, a15
 363         extui   a8, a13, 3, 1
 364         movnez  a12, a15, a8
 365
 366         srli    a13, a13, 4
 367         slli    a14, a14, 4
 368         bnez    a13, .Lmul_mult_loop
 369         ret
 370 #endif /* !MUL16 && !MUL32 && !MAC16 */
 371
 372         .size   __umulsidi3,.-__umulsidi3
 373
 374 #endif /* L_umulsidi3 */
 375
 376
 377 /* Define a macro for the NSAU (unsigned normalize shift amount)
 378    instruction, which computes the number of leading zero bits,
 379    to handle cases where it is not included in the Xtensa processor
 380    configuration.  */
 381
 382         .macro  do_nsau cnt, val, tmp, a
 383 #if XCHAL_HAVE_NSA
 384         nsau    \cnt, \val
 385 #else
 386         mov     \a, \val
 387         movi    \cnt, 0
 388         extui   \tmp, \a, 16, 16
 389         bnez    \tmp, 0f
 390         movi    \cnt, 16
 391         slli    \a, \a, 16
 392 0:
 393         extui   \tmp, \a, 24, 8
 394         bnez    \tmp, 1f
 395         addi    \cnt, \cnt, 8
 396         slli    \a, \a, 8
 397 1:
 398         movi    \tmp, __nsau_data
 399         extui   \a, \a, 24, 8
 400         add     \tmp, \tmp, \a
 401         l8ui    \tmp, \tmp, 0
 402         add     \cnt, \cnt, \tmp
 403 #endif /* !XCHAL_HAVE_NSA */
 404         .endm
 405
 406 #ifdef L_clz
 407         .section .rodata
 408         .align  4
 409         .global __nsau_data
 410         .type   __nsau_data,@object
 411 __nsau_data:
 412 #if !XCHAL_HAVE_NSA
 413         .byte   8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
 414         .byte   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 415         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 416         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 417         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 418         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 419         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 420         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 421         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 422         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 423         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 424         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 425         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 426         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 427         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 428         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 429 #endif /* !XCHAL_HAVE_NSA */
 430         .size   __nsau_data,.-__nsau_data
 431         .hidden __nsau_data
 432 #endif /* L_clz */
 433
 434
 435 #ifdef L_clzsi2
 436         .align  4
 437         .global __clzsi2
 438         .type   __clzsi2,@function
 439 __clzsi2:
 440         leaf_entry sp, 16
 441         do_nsau a2, a2, a3, a4
 442         leaf_return
 443         .size   __clzsi2,.-__clzsi2
 444
 445 #endif /* L_clzsi2 */
 446
 447
 448 #ifdef L_ctzsi2
 449         .align  4
 450         .global __ctzsi2
 451         .type   __ctzsi2,@function
 452 __ctzsi2:
 453         leaf_entry sp, 16
 454         neg     a3, a2
 455         and     a3, a3, a2
 456         do_nsau a2, a3, a4, a5
 457         neg     a2, a2
 458         addi    a2, a2, 31
 459         leaf_return
 460         .size   __ctzsi2,.-__ctzsi2
 461
 462 #endif /* L_ctzsi2 */
 463
 464
 465 #ifdef L_ffssi2
 466         .align  4
 467         .global __ffssi2
 468         .type   __ffssi2,@function
 469 __ffssi2:
 470         leaf_entry sp, 16
 471         neg     a3, a2
 472         and     a3, a3, a2
 473         do_nsau a2, a3, a4, a5
 474         neg     a2, a2
 475         addi    a2, a2, 32
 476         leaf_return
 477         .size   __ffssi2,.-__ffssi2
 478
 479 #endif /* L_ffssi2 */
 480
 481
 482 #ifdef L_udivsi3
 483         .align  4
 484         .global __udivsi3
 485         .type   __udivsi3,@function
 486 __udivsi3:
 487         leaf_entry sp, 16
 488         bltui   a3, 2, .Lle_one /* check if the divisor <= 1 */
 489
 490         mov     a6, a2          /* keep dividend in a6 */
 491         do_nsau a5, a6, a2, a7  /* dividend_shift = nsau (dividend) */
 492         do_nsau a4, a3, a2, a7  /* divisor_shift = nsau (divisor) */
 493         bgeu    a5, a4, .Lspecial
 494
 495         sub     a4, a4, a5      /* count = divisor_shift - dividend_shift */
 496         ssl     a4
 497         sll     a3, a3          /* divisor <<= count */
 498         movi    a2, 0           /* quotient = 0 */
 499
 500         /* test-subtract-and-shift loop; one quotient bit on each iteration */
 501 #if XCHAL_HAVE_LOOPS
 502         loopnez a4, .Lloopend
 503 #endif /* XCHAL_HAVE_LOOPS */
 504 .Lloop:
 505         bltu    a6, a3, .Lzerobit
 506         sub     a6, a6, a3
 507         addi    a2, a2, 1
 508 .Lzerobit:
 509         slli    a2, a2, 1
 510         srli    a3, a3, 1
 511 #if !XCHAL_HAVE_LOOPS
 512         addi    a4, a4, -1
 513         bnez    a4, .Lloop
 514 #endif /* !XCHAL_HAVE_LOOPS */
 515 .Lloopend:
 516
 517         bltu    a6, a3, .Lreturn
 518         addi    a2, a2, 1       /* increment quotient if dividend >= divisor */
 519 .Lreturn:
 520         leaf_return
 521
 522 .Lle_one:
 523         beqz    a3, .Lerror     /* if divisor == 1, return the dividend */
 524         leaf_return
 525
 526 .Lspecial:
 527         /* return dividend >= divisor */
 528         bltu    a6, a3, .Lreturn0
 529         movi    a2, 1
 530         leaf_return
 531
 532 .Lerror:
 533         /* just return 0; could throw an exception */
 534
 535 .Lreturn0:
 536         movi    a2, 0
 537         leaf_return
 538         .size   __udivsi3,.-__udivsi3
 539
 540 #endif /* L_udivsi3 */
 541
 542
 543 #ifdef L_divsi3
 544         .align  4
 545         .global __divsi3
 546         .type   __divsi3,@function
 547 __divsi3:
 548         leaf_entry sp, 16
 549         xor     a7, a2, a3      /* sign = dividend ^ divisor */
 550         do_abs  a6, a2, a4      /* udividend = abs (dividend) */
 551         do_abs  a3, a3, a4      /* udivisor = abs (divisor) */
 552         bltui   a3, 2, .Lle_one /* check if udivisor <= 1 */
 553         do_nsau a5, a6, a2, a8  /* udividend_shift = nsau (udividend) */
 554         do_nsau a4, a3, a2, a8  /* udivisor_shift = nsau (udivisor) */
 555         bgeu    a5, a4, .Lspecial
 556
 557         sub     a4, a4, a5      /* count = udivisor_shift - udividend_shift */
 558         ssl     a4
 559         sll     a3, a3          /* udivisor <<= count */
 560         movi    a2, 0           /* quotient = 0 */
 561
 562         /* test-subtract-and-shift loop; one quotient bit on each iteration */
 563 #if XCHAL_HAVE_LOOPS
 564         loopnez a4, .Lloopend
 565 #endif /* XCHAL_HAVE_LOOPS */
 566 .Lloop:
 567         bltu    a6, a3, .Lzerobit
 568         sub     a6, a6, a3
 569         addi    a2, a2, 1
 570 .Lzerobit:
 571         slli    a2, a2, 1
 572         srli    a3, a3, 1
 573 #if !XCHAL_HAVE_LOOPS
 574         addi    a4, a4, -1
 575         bnez    a4, .Lloop
 576 #endif /* !XCHAL_HAVE_LOOPS */
 577 .Lloopend:
 578
 579         bltu    a6, a3, .Lreturn
 580         addi    a2, a2, 1       /* increment if udividend >= udivisor */
 581 .Lreturn:
 582         neg     a5, a2
 583         movltz  a2, a5, a7      /* return (sign < 0) ? -quotient : quotient */
 584         leaf_return
 585
 586 .Lle_one:
 587         beqz    a3, .Lerror
 588         neg     a2, a6          /* if udivisor == 1, then return... */
 589         movgez  a2, a6, a7      /* (sign < 0) ? -udividend : udividend */
 590         leaf_return
 591
 592 .Lspecial:
 593         bltu    a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
 594         movi    a2, 1
 595         movi    a4, -1
 596         movltz  a2, a4, a7      /* else return (sign < 0) ? -1 : 1 */
 597         leaf_return
 598
 599 .Lerror:
 600         /* just return 0; could throw an exception */
 601
 602 .Lreturn0:
 603         movi    a2, 0
 604         leaf_return
 605         .size   __divsi3,.-__divsi3
 606
 607 #endif /* L_divsi3 */
 608
 609
 610 #ifdef L_umodsi3
 611         .align  4
 612         .global __umodsi3
 613         .type   __umodsi3,@function
 614 __umodsi3:
 615         leaf_entry sp, 16
 616         bltui   a3, 2, .Lle_one /* check if the divisor is <= 1 */
 617
 618         do_nsau a5, a2, a6, a7  /* dividend_shift = nsau (dividend) */
 619         do_nsau a4, a3, a6, a7  /* divisor_shift = nsau (divisor) */
 620         bgeu    a5, a4, .Lspecial
 621
 622         sub     a4, a4, a5      /* count = divisor_shift - dividend_shift */
 623         ssl     a4
 624         sll     a3, a3          /* divisor <<= count */
 625
 626         /* test-subtract-and-shift loop */
 627 #if XCHAL_HAVE_LOOPS
 628         loopnez a4, .Lloopend
 629 #endif /* XCHAL_HAVE_LOOPS */
 630 .Lloop:
 631         bltu    a2, a3, .Lzerobit
 632         sub     a2, a2, a3
 633 .Lzerobit:
 634         srli    a3, a3, 1
 635 #if !XCHAL_HAVE_LOOPS
 636         addi    a4, a4, -1
 637         bnez    a4, .Lloop
 638 #endif /* !XCHAL_HAVE_LOOPS */
 639 .Lloopend:
 640
 641 .Lspecial:
 642         bltu    a2, a3, .Lreturn
 643         sub     a2, a2, a3      /* subtract once more if dividend >= divisor */
 644 .Lreturn:
 645         leaf_return
 646
 647 .Lle_one:
 648         /* The divisor is either 0 or 1, so just return 0.
 649            Someday we may want to throw an exception if the divisor is 0.  */
 650         movi    a2, 0
 651         leaf_return
 652         .size   __umodsi3,.-__umodsi3
 653
 654 #endif /* L_umodsi3 */
 655
 656
 657 #ifdef L_modsi3
 658         .align  4
 659         .global __modsi3
 660         .type   __modsi3,@function
 661 __modsi3:
 662         leaf_entry sp, 16
 663         mov     a7, a2          /* save original (signed) dividend */
 664         do_abs  a2, a2, a4      /* udividend = abs (dividend) */
 665         do_abs  a3, a3, a4      /* udivisor = abs (divisor) */
 666         bltui   a3, 2, .Lle_one /* check if udivisor <= 1 */
 667         do_nsau a5, a2, a6, a8  /* udividend_shift = nsau (udividend) */
 668         do_nsau a4, a3, a6, a8  /* udivisor_shift = nsau (udivisor) */
 669         bgeu    a5, a4, .Lspecial
 670
 671         sub     a4, a4, a5      /* count = udivisor_shift - udividend_shift */
 672         ssl     a4
 673         sll     a3, a3          /* udivisor <<= count */
 674
 675         /* test-subtract-and-shift loop */
 676 #if XCHAL_HAVE_LOOPS
 677         loopnez a4, .Lloopend
 678 #endif /* XCHAL_HAVE_LOOPS */
 679 .Lloop:
 680         bltu    a2, a3, .Lzerobit
 681         sub     a2, a2, a3
 682 .Lzerobit:
 683         srli    a3, a3, 1
 684 #if !XCHAL_HAVE_LOOPS
 685         addi    a4, a4, -1
 686         bnez    a4, .Lloop
 687 #endif /* !XCHAL_HAVE_LOOPS */
 688 .Lloopend:
 689
 690 .Lspecial:
 691         bltu    a2, a3, .Lreturn
 692         sub     a2, a2, a3      /* subtract again if udividend >= udivisor */
 693 .Lreturn:
 694         bgez    a7, .Lpositive
 695         neg     a2, a2          /* if (dividend < 0), return -udividend */
 696 .Lpositive:
 697         leaf_return
 698
 699 .Lle_one:
 700         /* udivisor is either 0 or 1, so just return 0.
 701            Someday we may want to throw an exception if udivisor is 0.  */
 702         movi    a2, 0
 703         leaf_return
 704         .size   __modsi3,.-__modsi3
 705
 706 #endif /* L_modsi3 */
 707
 708 #include "ieee754-df.S"
 709 #include "ieee754-sf.S"