gcc/config/xtensa/lib1funcs.asm

   1 /* Assembly functions for the Xtensa version of libgcc1.
   2    Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009
   3    Free Software Foundation, Inc.
   4    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 Under Section 7 of GPL version 3, you are granted additional
  19 permissions described in the GCC Runtime Library Exception, version
  20 3.1, as published by the Free Software Foundation.
  21
  22 You should have received a copy of the GNU General Public License and
  23 a copy of the GCC Runtime Library Exception along with this program;
  24 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  25 <http://www.gnu.org/licenses/>.  */
  26
  27 #include "xtensa-config.h"
  28
  29 /* Define macros for the ABS and ADDX* instructions to handle cases
  30    where they are not included in the Xtensa processor configuration.  */
  31
  32         .macro  do_abs dst, src, tmp
  33 #if XCHAL_HAVE_ABS
  34         abs     \dst, \src
  35 #else
  36         neg     \tmp, \src
  37         movgez  \tmp, \src, \src
  38         mov     \dst, \tmp
  39 #endif
  40         .endm
  41
  42         .macro  do_addx2 dst, as, at, tmp
  43 #if XCHAL_HAVE_ADDX
  44         addx2   \dst, \as, \at
  45 #else
  46         slli    \tmp, \as, 1
  47         add     \dst, \tmp, \at
  48 #endif
  49         .endm
  50
  51         .macro  do_addx4 dst, as, at, tmp
  52 #if XCHAL_HAVE_ADDX
  53         addx4   \dst, \as, \at
  54 #else
  55         slli    \tmp, \as, 2
  56         add     \dst, \tmp, \at
  57 #endif
  58         .endm
  59
  60         .macro  do_addx8 dst, as, at, tmp
  61 #if XCHAL_HAVE_ADDX
  62         addx8   \dst, \as, \at
  63 #else
  64         slli    \tmp, \as, 3
  65         add     \dst, \tmp, \at
  66 #endif
  67         .endm
  68
  69 /* Define macros for leaf function entry and return, supporting either the
  70    standard register windowed ABI or the non-windowed call0 ABI.  These
  71    macros do not allocate any extra stack space, so they only work for
  72    leaf functions that do not need to spill anything to the stack.  */
  73
  74         .macro leaf_entry reg, size
  75 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  76         entry \reg, \size
  77 #else
  78         /* do nothing */
  79 #endif
  80         .endm
  81
  82         .macro leaf_return
  83 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  84         retw
  85 #else
  86         ret
  87 #endif
  88         .endm
  89
  90
  91 #ifdef L_mulsi3
  92         .align  4
  93         .global __mulsi3
  94         .type   __mulsi3, @function
  95 __mulsi3:
  96         leaf_entry sp, 16
  97
  98 #if XCHAL_HAVE_MUL32
  99         mull    a2, a2, a3
 100
 101 #elif XCHAL_HAVE_MUL16
 102         or      a4, a2, a3
 103         srai    a4, a4, 16
 104         bnez    a4, .LMUL16
 105         mul16u  a2, a2, a3
 106         leaf_return
 107 .LMUL16:
 108         srai    a4, a2, 16
 109         srai    a5, a3, 16
 110         mul16u  a7, a4, a3
 111         mul16u  a6, a5, a2
 112         mul16u  a4, a2, a3
 113         add     a7, a7, a6
 114         slli    a7, a7, 16
 115         add     a2, a7, a4
 116
 117 #elif XCHAL_HAVE_MAC16
 118         mul.aa.hl a2, a3
 119         mula.aa.lh a2, a3
 120         rsr     a5, ACCLO
 121         umul.aa.ll a2, a3
 122         rsr     a4, ACCLO
 123         slli    a5, a5, 16
 124         add     a2, a4, a5
 125
 126 #else /* !MUL32 && !MUL16 && !MAC16 */
 127
 128         /* Multiply one bit at a time, but unroll the loop 4x to better
 129            exploit the addx instructions and avoid overhead.
 130            Peel the first iteration to save a cycle on init.  */
 131
 132         /* Avoid negative numbers.  */
 133         xor     a5, a2, a3      /* Top bit is 1 if one input is negative.  */
 134         do_abs  a3, a3, a6
 135         do_abs  a2, a2, a6
 136
 137         /* Swap so the second argument is smaller.  */
 138         sub     a7, a2, a3
 139         mov     a4, a3
 140         movgez  a4, a2, a7      /* a4 = max (a2, a3) */
 141         movltz  a3, a2, a7      /* a3 = min (a2, a3) */
 142
 143         movi    a2, 0
 144         extui   a6, a3, 0, 1
 145         movnez  a2, a4, a6
 146
 147         do_addx2 a7, a4, a2, a7
 148         extui   a6, a3, 1, 1
 149         movnez  a2, a7, a6
 150
 151         do_addx4 a7, a4, a2, a7
 152         extui   a6, a3, 2, 1
 153         movnez  a2, a7, a6
 154
 155         do_addx8 a7, a4, a2, a7
 156         extui   a6, a3, 3, 1
 157         movnez  a2, a7, a6
 158
 159         bgeui   a3, 16, .Lmult_main_loop
 160         neg     a3, a2
 161         movltz  a2, a3, a5
 162         leaf_return
 163
 164         .align  4
 165 .Lmult_main_loop:
 166         srli    a3, a3, 4
 167         slli    a4, a4, 4
 168
 169         add     a7, a4, a2
 170         extui   a6, a3, 0, 1
 171         movnez  a2, a7, a6
 172
 173         do_addx2 a7, a4, a2, a7
 174         extui   a6, a3, 1, 1
 175         movnez  a2, a7, a6
 176
 177         do_addx4 a7, a4, a2, a7
 178         extui   a6, a3, 2, 1
 179         movnez  a2, a7, a6
 180
 181         do_addx8 a7, a4, a2, a7
 182         extui   a6, a3, 3, 1
 183         movnez  a2, a7, a6
 184
 185         bgeui   a3, 16, .Lmult_main_loop
 186
 187         neg     a3, a2
 188         movltz  a2, a3, a5
 189
 190 #endif /* !MUL32 && !MUL16 && !MAC16 */
 191
 192         leaf_return
 193         .size   __mulsi3, . - __mulsi3
 194
 195 #endif /* L_mulsi3 */
 196
 197
 198 #ifdef L_umulsidi3
 199
 200 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 201 #define XCHAL_NO_MUL 1
 202 #endif
 203
 204         .align  4
 205         .global __umulsidi3
 206         .type   __umulsidi3, @function
 207 __umulsidi3:
 208 #if __XTENSA_CALL0_ABI__
 209         leaf_entry sp, 32
 210         addi    sp, sp, -32
 211         s32i    a12, sp, 16
 212         s32i    a13, sp, 20
 213         s32i    a14, sp, 24
 214         s32i    a15, sp, 28
 215 #elif XCHAL_NO_MUL
 216         /* This is not really a leaf function; allocate enough stack space
 217            to allow CALL12s to a helper function.  */
 218         leaf_entry sp, 48
 219 #else
 220         leaf_entry sp, 16
 221 #endif
 222
 223 #ifdef __XTENSA_EB__
 224 #define wh a2
 225 #define wl a3
 226 #else
 227 #define wh a3
 228 #define wl a2
 229 #endif /* __XTENSA_EB__ */
 230
 231         /* This code is taken from the mulsf3 routine in ieee754-sf.S.
 232            See more comments there.  */
 233
 234 #if XCHAL_HAVE_MUL32_HIGH
 235         mull    a6, a2, a3
 236         muluh   wh, a2, a3
 237         mov     wl, a6
 238
 239 #else /* ! MUL32_HIGH */
 240
 241 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 242         /* a0 and a8 will be clobbered by calling the multiply function
 243            but a8 is not used here and need not be saved.  */
 244         s32i    a0, sp, 0
 245 #endif
 246
 247 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 248
 249 #define a2h a4
 250 #define a3h a5
 251
 252         /* Get the high halves of the inputs into registers.  */
 253         srli    a2h, a2, 16
 254         srli    a3h, a3, 16
 255
 256 #define a2l a2
 257 #define a3l a3
 258
 259 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 260         /* Clear the high halves of the inputs.  This does not matter
 261            for MUL16 because the high bits are ignored.  */
 262         extui   a2, a2, 0, 16
 263         extui   a3, a3, 0, 16
 264 #endif
 265 #endif /* MUL16 || MUL32 */
 266
 267
 268 #if XCHAL_HAVE_MUL16
 269
 270 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 271         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 272
 273 #elif XCHAL_HAVE_MUL32
 274
 275 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 276         mull    dst, xreg ## xhalf, yreg ## yhalf
 277
 278 #elif XCHAL_HAVE_MAC16
 279
 280 /* The preprocessor insists on inserting a space when concatenating after
 281    a period in the definition of do_mul below.  These macros are a workaround
 282    using underscores instead of periods when doing the concatenation.  */
 283 #define umul_aa_ll umul.aa.ll
 284 #define umul_aa_lh umul.aa.lh
 285 #define umul_aa_hl umul.aa.hl
 286 #define umul_aa_hh umul.aa.hh
 287
 288 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 289         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 290         rsr     dst, ACCLO
 291
 292 #else /* no multiply hardware */
 293
 294 #define set_arg_l(dst, src) \
 295         extui   dst, src, 0, 16
 296 #define set_arg_h(dst, src) \
 297         srli    dst, src, 16
 298
 299 #if __XTENSA_CALL0_ABI__
 300 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 301         set_arg_ ## xhalf (a13, xreg); \
 302         set_arg_ ## yhalf (a14, yreg); \
 303         call0   .Lmul_mulsi3; \
 304         mov     dst, a12
 305 #else
 306 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 307         set_arg_ ## xhalf (a14, xreg); \
 308         set_arg_ ## yhalf (a15, yreg); \
 309         call12  .Lmul_mulsi3; \
 310         mov     dst, a14
 311 #endif /* __XTENSA_CALL0_ABI__ */
 312
 313 #endif /* no multiply hardware */
 314
 315         /* Add pp1 and pp2 into a6 with carry-out in a9.  */
 316         do_mul(a6, a2, l, a3, h)        /* pp 1 */
 317         do_mul(a11, a2, h, a3, l)       /* pp 2 */
 318         movi    a9, 0
 319         add     a6, a6, a11
 320         bgeu    a6, a11, 1f
 321         addi    a9, a9, 1
 322 1:
 323         /* Shift the high half of a9/a6 into position in a9.  Note that
 324            this value can be safely incremented without any carry-outs.  */
 325         ssai    16
 326         src     a9, a9, a6
 327
 328         /* Compute the low word into a6.  */
 329         do_mul(a11, a2, l, a3, l)       /* pp 0 */
 330         sll     a6, a6
 331         add     a6, a6, a11
 332         bgeu    a6, a11, 1f
 333         addi    a9, a9, 1
 334 1:
 335         /* Compute the high word into wh.  */
 336         do_mul(wh, a2, h, a3, h)        /* pp 3 */
 337         add     wh, wh, a9
 338         mov     wl, a6
 339
 340 #endif /* !MUL32_HIGH */
 341
 342 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 343         /* Restore the original return address.  */
 344         l32i    a0, sp, 0
 345 #endif
 346 #if __XTENSA_CALL0_ABI__
 347         l32i    a12, sp, 16
 348         l32i    a13, sp, 20
 349         l32i    a14, sp, 24
 350         l32i    a15, sp, 28
 351         addi    sp, sp, 32
 352 #endif
 353         leaf_return
 354
 355 #if XCHAL_NO_MUL
 356
 357         /* For Xtensa processors with no multiply hardware, this simplified
 358            version of _mulsi3 is used for multiplying 16-bit chunks of
 359            the floating-point mantissas.  When using CALL0, this function
 360            uses a custom ABI: the inputs are passed in a13 and a14, the
 361            result is returned in a12, and a8 and a15 are clobbered.  */
 362         .align  4
 363 .Lmul_mulsi3:
 364         leaf_entry sp, 16
 365         .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
 366         movi    \dst, 0
 367 1:      add     \tmp1, \src2, \dst
 368         extui   \tmp2, \src1, 0, 1
 369         movnez  \dst, \tmp1, \tmp2
 370
 371         do_addx2 \tmp1, \src2, \dst, \tmp1
 372         extui   \tmp2, \src1, 1, 1
 373         movnez  \dst, \tmp1, \tmp2
 374
 375         do_addx4 \tmp1, \src2, \dst, \tmp1
 376         extui   \tmp2, \src1, 2, 1
 377         movnez  \dst, \tmp1, \tmp2
 378
 379         do_addx8 \tmp1, \src2, \dst, \tmp1
 380         extui   \tmp2, \src1, 3, 1
 381         movnez  \dst, \tmp1, \tmp2
 382
 383         srli    \src1, \src1, 4
 384         slli    \src2, \src2, 4
 385         bnez    \src1, 1b
 386         .endm
 387 #if __XTENSA_CALL0_ABI__
 388         mul_mulsi3_body a12, a13, a14, a15, a8
 389 #else
 390         /* The result will be written into a2, so save that argument in a4.  */
 391         mov     a4, a2
 392         mul_mulsi3_body a2, a4, a3, a5, a6
 393 #endif
 394         leaf_return
 395 #endif /* XCHAL_NO_MUL */
 396
 397         .size   __umulsidi3, . - __umulsidi3
 398
 399 #endif /* L_umulsidi3 */
 400
 401
 402 /* Define a macro for the NSAU (unsigned normalize shift amount)
 403    instruction, which computes the number of leading zero bits,
 404    to handle cases where it is not included in the Xtensa processor
 405    configuration.  */
 406
 407         .macro  do_nsau cnt, val, tmp, a
 408 #if XCHAL_HAVE_NSA
 409         nsau    \cnt, \val
 410 #else
 411         mov     \a, \val
 412         movi    \cnt, 0
 413         extui   \tmp, \a, 16, 16
 414         bnez    \tmp, 0f
 415         movi    \cnt, 16
 416         slli    \a, \a, 16
 417 0:
 418         extui   \tmp, \a, 24, 8
 419         bnez    \tmp, 1f
 420         addi    \cnt, \cnt, 8
 421         slli    \a, \a, 8
 422 1:
 423         movi    \tmp, __nsau_data
 424         extui   \a, \a, 24, 8
 425         add     \tmp, \tmp, \a
 426         l8ui    \tmp, \tmp, 0
 427         add     \cnt, \cnt, \tmp
 428 #endif /* !XCHAL_HAVE_NSA */
 429         .endm
 430
 431 #ifdef L_clz
 432         .section .rodata
 433         .align  4
 434         .global __nsau_data
 435         .type   __nsau_data, @object
 436 __nsau_data:
 437 #if !XCHAL_HAVE_NSA
 438         .byte   8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
 439         .byte   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 440         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 441         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 442         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 443         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 444         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 445         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 446         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 447         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 448         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 449         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 450         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 451         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 452         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 453         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 454 #endif /* !XCHAL_HAVE_NSA */
 455         .size   __nsau_data, . - __nsau_data
 456         .hidden __nsau_data
 457 #endif /* L_clz */
 458
 459
 460 #ifdef L_clzsi2
 461         .align  4
 462         .global __clzsi2
 463         .type   __clzsi2, @function
 464 __clzsi2:
 465         leaf_entry sp, 16
 466         do_nsau a2, a2, a3, a4
 467         leaf_return
 468         .size   __clzsi2, . - __clzsi2
 469
 470 #endif /* L_clzsi2 */
 471
 472
 473 #ifdef L_ctzsi2
 474         .align  4
 475         .global __ctzsi2
 476         .type   __ctzsi2, @function
 477 __ctzsi2:
 478         leaf_entry sp, 16
 479         neg     a3, a2
 480         and     a3, a3, a2
 481         do_nsau a2, a3, a4, a5
 482         neg     a2, a2
 483         addi    a2, a2, 31
 484         leaf_return
 485         .size   __ctzsi2, . - __ctzsi2
 486
 487 #endif /* L_ctzsi2 */
 488
 489
 490 #ifdef L_ffssi2
 491         .align  4
 492         .global __ffssi2
 493         .type   __ffssi2, @function
 494 __ffssi2:
 495         leaf_entry sp, 16
 496         neg     a3, a2
 497         and     a3, a3, a2
 498         do_nsau a2, a3, a4, a5
 499         neg     a2, a2
 500         addi    a2, a2, 32
 501         leaf_return
 502         .size   __ffssi2, . - __ffssi2
 503
 504 #endif /* L_ffssi2 */
 505
 506
 507 #ifdef L_udivsi3
 508         .align  4
 509         .global __udivsi3
 510         .type   __udivsi3, @function
 511 __udivsi3:
 512         leaf_entry sp, 16
 513 #if XCHAL_HAVE_DIV32
 514         quou    a2, a2, a3
 515 #else
 516         bltui   a3, 2, .Lle_one /* check if the divisor <= 1 */
 517
 518         mov     a6, a2          /* keep dividend in a6 */
 519         do_nsau a5, a6, a2, a7  /* dividend_shift = nsau (dividend) */
 520         do_nsau a4, a3, a2, a7  /* divisor_shift = nsau (divisor) */
 521         bgeu    a5, a4, .Lspecial
 522
 523         sub     a4, a4, a5      /* count = divisor_shift - dividend_shift */
 524         ssl     a4
 525         sll     a3, a3          /* divisor <<= count */
 526         movi    a2, 0           /* quotient = 0 */
 527
 528         /* test-subtract-and-shift loop; one quotient bit on each iteration */
 529 #if XCHAL_HAVE_LOOPS
 530         loopnez a4, .Lloopend
 531 #endif /* XCHAL_HAVE_LOOPS */
 532 .Lloop:
 533         bltu    a6, a3, .Lzerobit
 534         sub     a6, a6, a3
 535         addi    a2, a2, 1
 536 .Lzerobit:
 537         slli    a2, a2, 1
 538         srli    a3, a3, 1
 539 #if !XCHAL_HAVE_LOOPS
 540         addi    a4, a4, -1
 541         bnez    a4, .Lloop
 542 #endif /* !XCHAL_HAVE_LOOPS */
 543 .Lloopend:
 544
 545         bltu    a6, a3, .Lreturn
 546         addi    a2, a2, 1       /* increment quotient if dividend >= divisor */
 547 .Lreturn:
 548         leaf_return
 549
 550 .Lle_one:
 551         beqz    a3, .Lerror     /* if divisor == 1, return the dividend */
 552         leaf_return
 553
 554 .Lspecial:
 555         /* return dividend >= divisor */
 556         bltu    a6, a3, .Lreturn0
 557         movi    a2, 1
 558         leaf_return
 559
 560 .Lerror:
 561         /* Divide by zero: Use an illegal instruction to force an exception.
 562            The subsequent "DIV0" string can be recognized by the exception
 563            handler to identify the real cause of the exception.  */
 564         ill
 565         .ascii  "DIV0"
 566
 567 .Lreturn0:
 568         movi    a2, 0
 569 #endif /* XCHAL_HAVE_DIV32 */
 570         leaf_return
 571         .size   __udivsi3, . - __udivsi3
 572
 573 #endif /* L_udivsi3 */
 574
 575
 576 #ifdef L_divsi3
 577         .align  4
 578         .global __divsi3
 579         .type   __divsi3, @function
 580 __divsi3:
 581         leaf_entry sp, 16
 582 #if XCHAL_HAVE_DIV32
 583         quos    a2, a2, a3
 584 #else
 585         xor     a7, a2, a3      /* sign = dividend ^ divisor */
 586         do_abs  a6, a2, a4      /* udividend = abs (dividend) */
 587         do_abs  a3, a3, a4      /* udivisor = abs (divisor) */
 588         bltui   a3, 2, .Lle_one /* check if udivisor <= 1 */
 589         do_nsau a5, a6, a2, a8  /* udividend_shift = nsau (udividend) */
 590         do_nsau a4, a3, a2, a8  /* udivisor_shift = nsau (udivisor) */
 591         bgeu    a5, a4, .Lspecial
 592
 593         sub     a4, a4, a5      /* count = udivisor_shift - udividend_shift */
 594         ssl     a4
 595         sll     a3, a3          /* udivisor <<= count */
 596         movi    a2, 0           /* quotient = 0 */
 597
 598         /* test-subtract-and-shift loop; one quotient bit on each iteration */
 599 #if XCHAL_HAVE_LOOPS
 600         loopnez a4, .Lloopend
 601 #endif /* XCHAL_HAVE_LOOPS */
 602 .Lloop:
 603         bltu    a6, a3, .Lzerobit
 604         sub     a6, a6, a3
 605         addi    a2, a2, 1
 606 .Lzerobit:
 607         slli    a2, a2, 1
 608         srli    a3, a3, 1
 609 #if !XCHAL_HAVE_LOOPS
 610         addi    a4, a4, -1
 611         bnez    a4, .Lloop
 612 #endif /* !XCHAL_HAVE_LOOPS */
 613 .Lloopend:
 614
 615         bltu    a6, a3, .Lreturn
 616         addi    a2, a2, 1       /* increment if udividend >= udivisor */
 617 .Lreturn:
 618         neg     a5, a2
 619         movltz  a2, a5, a7      /* return (sign < 0) ? -quotient : quotient */
 620         leaf_return
 621
 622 .Lle_one:
 623         beqz    a3, .Lerror
 624         neg     a2, a6          /* if udivisor == 1, then return... */
 625         movgez  a2, a6, a7      /* (sign < 0) ? -udividend : udividend */
 626         leaf_return
 627
 628 .Lspecial:
 629         bltu    a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
 630         movi    a2, 1
 631         movi    a4, -1
 632         movltz  a2, a4, a7      /* else return (sign < 0) ? -1 : 1 */
 633         leaf_return
 634
 635 .Lerror:
 636         /* Divide by zero: Use an illegal instruction to force an exception.
 637            The subsequent "DIV0" string can be recognized by the exception
 638            handler to identify the real cause of the exception.  */
 639         ill
 640         .ascii  "DIV0"
 641
 642 .Lreturn0:
 643         movi    a2, 0
 644 #endif /* XCHAL_HAVE_DIV32 */
 645         leaf_return
 646         .size   __divsi3, . - __divsi3
 647
 648 #endif /* L_divsi3 */
 649
 650
 651 #ifdef L_umodsi3
 652         .align  4
 653         .global __umodsi3
 654         .type   __umodsi3, @function
 655 __umodsi3:
 656         leaf_entry sp, 16
 657 #if XCHAL_HAVE_DIV32
 658         remu    a2, a2, a3
 659 #else
 660         bltui   a3, 2, .Lle_one /* check if the divisor is <= 1 */
 661
 662         do_nsau a5, a2, a6, a7  /* dividend_shift = nsau (dividend) */
 663         do_nsau a4, a3, a6, a7  /* divisor_shift = nsau (divisor) */
 664         bgeu    a5, a4, .Lspecial
 665
 666         sub     a4, a4, a5      /* count = divisor_shift - dividend_shift */
 667         ssl     a4
 668         sll     a3, a3          /* divisor <<= count */
 669
 670         /* test-subtract-and-shift loop */
 671 #if XCHAL_HAVE_LOOPS
 672         loopnez a4, .Lloopend
 673 #endif /* XCHAL_HAVE_LOOPS */
 674 .Lloop:
 675         bltu    a2, a3, .Lzerobit
 676         sub     a2, a2, a3
 677 .Lzerobit:
 678         srli    a3, a3, 1
 679 #if !XCHAL_HAVE_LOOPS
 680         addi    a4, a4, -1
 681         bnez    a4, .Lloop
 682 #endif /* !XCHAL_HAVE_LOOPS */
 683 .Lloopend:
 684
 685 .Lspecial:
 686         bltu    a2, a3, .Lreturn
 687         sub     a2, a2, a3      /* subtract once more if dividend >= divisor */
 688 .Lreturn:
 689         leaf_return
 690
 691 .Lle_one:
 692         bnez    a3, .Lreturn0
 693
 694         /* Divide by zero: Use an illegal instruction to force an exception.
 695            The subsequent "DIV0" string can be recognized by the exception
 696            handler to identify the real cause of the exception.  */
 697         ill
 698         .ascii  "DIV0"
 699
 700 .Lreturn0:
 701         movi    a2, 0
 702 #endif /* XCHAL_HAVE_DIV32 */
 703         leaf_return
 704         .size   __umodsi3, . - __umodsi3
 705
 706 #endif /* L_umodsi3 */
 707
 708
 709 #ifdef L_modsi3
 710         .align  4
 711         .global __modsi3
 712         .type   __modsi3, @function
 713 __modsi3:
 714         leaf_entry sp, 16
 715 #if XCHAL_HAVE_DIV32
 716         rems    a2, a2, a3
 717 #else
 718         mov     a7, a2          /* save original (signed) dividend */
 719         do_abs  a2, a2, a4      /* udividend = abs (dividend) */
 720         do_abs  a3, a3, a4      /* udivisor = abs (divisor) */
 721         bltui   a3, 2, .Lle_one /* check if udivisor <= 1 */
 722         do_nsau a5, a2, a6, a8  /* udividend_shift = nsau (udividend) */
 723         do_nsau a4, a3, a6, a8  /* udivisor_shift = nsau (udivisor) */
 724         bgeu    a5, a4, .Lspecial
 725
 726         sub     a4, a4, a5      /* count = udivisor_shift - udividend_shift */
 727         ssl     a4
 728         sll     a3, a3          /* udivisor <<= count */
 729
 730         /* test-subtract-and-shift loop */
 731 #if XCHAL_HAVE_LOOPS
 732         loopnez a4, .Lloopend
 733 #endif /* XCHAL_HAVE_LOOPS */
 734 .Lloop:
 735         bltu    a2, a3, .Lzerobit
 736         sub     a2, a2, a3
 737 .Lzerobit:
 738         srli    a3, a3, 1
 739 #if !XCHAL_HAVE_LOOPS
 740         addi    a4, a4, -1
 741         bnez    a4, .Lloop
 742 #endif /* !XCHAL_HAVE_LOOPS */
 743 .Lloopend:
 744
 745 .Lspecial:
 746         bltu    a2, a3, .Lreturn
 747         sub     a2, a2, a3      /* subtract again if udividend >= udivisor */
 748 .Lreturn:
 749         bgez    a7, .Lpositive
 750         neg     a2, a2          /* if (dividend < 0), return -udividend */
 751 .Lpositive:
 752         leaf_return
 753
 754 .Lle_one:
 755         bnez    a3, .Lreturn0
 756
 757         /* Divide by zero: Use an illegal instruction to force an exception.
 758            The subsequent "DIV0" string can be recognized by the exception
 759            handler to identify the real cause of the exception.  */
 760         ill
 761         .ascii  "DIV0"
 762
 763 .Lreturn0:
 764         movi    a2, 0
 765 #endif /* XCHAL_HAVE_DIV32 */
 766         leaf_return
 767         .size   __modsi3, . - __modsi3
 768
 769 #endif /* L_modsi3 */
 770
 771
 772 #ifdef __XTENSA_EB__
 773 #define uh a2
 774 #define ul a3
 775 #else
 776 #define uh a3
 777 #define ul a2
 778 #endif /* __XTENSA_EB__ */
 779
 780
 781 #ifdef L_ashldi3
 782         .align  4
 783         .global __ashldi3
 784         .type   __ashldi3, @function
 785 __ashldi3:
 786         leaf_entry sp, 16
 787         ssl     a4
 788         bgei    a4, 32, .Llow_only
 789         src     uh, uh, ul
 790         sll     ul, ul
 791         leaf_return
 792
 793 .Llow_only:
 794         sll     uh, ul
 795         movi    ul, 0
 796         leaf_return
 797         .size   __ashldi3, . - __ashldi3
 798
 799 #endif /* L_ashldi3 */
 800
 801
 802 #ifdef L_ashrdi3
 803         .align  4
 804         .global __ashrdi3
 805         .type   __ashrdi3, @function
 806 __ashrdi3:
 807         leaf_entry sp, 16
 808         ssr     a4
 809         bgei    a4, 32, .Lhigh_only
 810         src     ul, uh, ul
 811         sra     uh, uh
 812         leaf_return
 813
 814 .Lhigh_only:
 815         sra     ul, uh
 816         srai    uh, uh, 31
 817         leaf_return
 818         .size   __ashrdi3, . - __ashrdi3
 819
 820 #endif /* L_ashrdi3 */
 821
 822
 823 #ifdef L_lshrdi3
 824         .align  4
 825         .global __lshrdi3
 826         .type   __lshrdi3, @function
 827 __lshrdi3:
 828         leaf_entry sp, 16
 829         ssr     a4
 830         bgei    a4, 32, .Lhigh_only1
 831         src     ul, uh, ul
 832         srl     uh, uh
 833         leaf_return
 834
 835 .Lhigh_only1:
 836         srl     ul, uh
 837         movi    uh, 0
 838         leaf_return
 839         .size   __lshrdi3, . - __lshrdi3
 840
 841 #endif /* L_lshrdi3 */
 842
 843
 844 #include "ieee754-df.S"
 845 #include "ieee754-sf.S"