gcc/config/xtensa/lib1funcs.asm

   1 /* Assembly functions for the Xtensa version of libgcc1.
   2    Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007
   3    Free Software Foundation, Inc.
   4    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 In addition to the permissions in the GNU General Public License, the
  14 Free Software Foundation gives you unlimited permission to link the
  15 compiled version of this file into combinations with other programs,
  16 and to distribute those combinations without any restriction coming
  17 from the use of this file.  (The General Public License restrictions
  18 do apply in other respects; for example, they cover modification of
  19 the file, and distribution when not linked into a combine
  20 executable.)
  21
  22 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  23 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  24 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  25 for more details.
  26
  27 You should have received a copy of the GNU General Public License
  28 along with GCC; see the file COPYING.  If not, write to the Free
  29 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  30 02110-1301, USA.  */
  31
  32 #include "xtensa-config.h"
  33
  34 /* Define macros for the ABS and ADDX* instructions to handle cases
  35    where they are not included in the Xtensa processor configuration.  */
  36
  37         .macro  do_abs dst, src, tmp
  38 #if XCHAL_HAVE_ABS
  39         abs     \dst, \src
  40 #else
  41         neg     \tmp, \src
  42         movgez  \tmp, \src, \src
  43         mov     \dst, \tmp
  44 #endif
  45         .endm
  46
  47         .macro  do_addx2 dst, as, at, tmp
  48 #if XCHAL_HAVE_ADDX
  49         addx2   \dst, \as, \at
  50 #else
  51         slli    \tmp, \as, 1
  52         add     \dst, \tmp, \at
  53 #endif
  54         .endm
  55
  56         .macro  do_addx4 dst, as, at, tmp
  57 #if XCHAL_HAVE_ADDX
  58         addx4   \dst, \as, \at
  59 #else
  60         slli    \tmp, \as, 2
  61         add     \dst, \tmp, \at
  62 #endif
  63         .endm
  64
  65         .macro  do_addx8 dst, as, at, tmp
  66 #if XCHAL_HAVE_ADDX
  67         addx8   \dst, \as, \at
  68 #else
  69         slli    \tmp, \as, 3
  70         add     \dst, \tmp, \at
  71 #endif
  72         .endm
  73
  74 /* Define macros for leaf function entry and return, supporting either the
  75    standard register windowed ABI or the non-windowed call0 ABI.  These
  76    macros do not allocate any extra stack space, so they only work for
  77    leaf functions that do not need to spill anything to the stack.  */
  78
  79         .macro leaf_entry reg, size
  80 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  81         entry \reg, \size
  82 #else
  83         /* do nothing */
  84 #endif
  85         .endm
  86
  87         .macro leaf_return
  88 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  89         retw
  90 #else
  91         ret
  92 #endif
  93         .endm
  94
  95
  96 #ifdef L_mulsi3
  97         .align  4
  98         .global __mulsi3
  99         .type   __mulsi3, @function
 100 __mulsi3:
 101         leaf_entry sp, 16
 102
 103 #if XCHAL_HAVE_MUL32
 104         mull    a2, a2, a3
 105
 106 #elif XCHAL_HAVE_MUL16
 107         or      a4, a2, a3
 108         srai    a4, a4, 16
 109         bnez    a4, .LMUL16
 110         mul16u  a2, a2, a3
 111         leaf_return
 112 .LMUL16:
 113         srai    a4, a2, 16
 114         srai    a5, a3, 16
 115         mul16u  a7, a4, a3
 116         mul16u  a6, a5, a2
 117         mul16u  a4, a2, a3
 118         add     a7, a7, a6
 119         slli    a7, a7, 16
 120         add     a2, a7, a4
 121
 122 #elif XCHAL_HAVE_MAC16
 123         mul.aa.hl a2, a3
 124         mula.aa.lh a2, a3
 125         rsr     a5, ACCLO
 126         umul.aa.ll a2, a3
 127         rsr     a4, ACCLO
 128         slli    a5, a5, 16
 129         add     a2, a4, a5
 130
 131 #else /* !MUL32 && !MUL16 && !MAC16 */
 132
 133         /* Multiply one bit at a time, but unroll the loop 4x to better
 134            exploit the addx instructions and avoid overhead.
 135            Peel the first iteration to save a cycle on init.  */
 136
 137         /* Avoid negative numbers.  */
 138         xor     a5, a2, a3      /* Top bit is 1 if one input is negative.  */
 139         do_abs  a3, a3, a6
 140         do_abs  a2, a2, a6
 141
 142         /* Swap so the second argument is smaller.  */
 143         sub     a7, a2, a3
 144         mov     a4, a3
 145         movgez  a4, a2, a7      /* a4 = max (a2, a3) */
 146         movltz  a3, a2, a7      /* a3 = min (a2, a3) */
 147
 148         movi    a2, 0
 149         extui   a6, a3, 0, 1
 150         movnez  a2, a4, a6
 151
 152         do_addx2 a7, a4, a2, a7
 153         extui   a6, a3, 1, 1
 154         movnez  a2, a7, a6
 155
 156         do_addx4 a7, a4, a2, a7
 157         extui   a6, a3, 2, 1
 158         movnez  a2, a7, a6
 159
 160         do_addx8 a7, a4, a2, a7
 161         extui   a6, a3, 3, 1
 162         movnez  a2, a7, a6
 163
 164         bgeui   a3, 16, .Lmult_main_loop
 165         neg     a3, a2
 166         movltz  a2, a3, a5
 167         leaf_return
 168
 169         .align  4
 170 .Lmult_main_loop:
 171         srli    a3, a3, 4
 172         slli    a4, a4, 4
 173
 174         add     a7, a4, a2
 175         extui   a6, a3, 0, 1
 176         movnez  a2, a7, a6
 177
 178         do_addx2 a7, a4, a2, a7
 179         extui   a6, a3, 1, 1
 180         movnez  a2, a7, a6
 181
 182         do_addx4 a7, a4, a2, a7
 183         extui   a6, a3, 2, 1
 184         movnez  a2, a7, a6
 185
 186         do_addx8 a7, a4, a2, a7
 187         extui   a6, a3, 3, 1
 188         movnez  a2, a7, a6
 189
 190         bgeui   a3, 16, .Lmult_main_loop
 191
 192         neg     a3, a2
 193         movltz  a2, a3, a5
 194
 195 #endif /* !MUL32 && !MUL16 && !MAC16 */
 196
 197         leaf_return
 198         .size   __mulsi3, . - __mulsi3
 199
 200 #endif /* L_mulsi3 */
 201
 202
 203 #ifdef L_umulsidi3
 204
 205 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
 206 #define XCHAL_NO_MUL 1
 207 #endif
 208
 209         .align  4
 210         .global __umulsidi3
 211         .type   __umulsidi3, @function
 212 __umulsidi3:
 213 #if __XTENSA_CALL0_ABI__
 214         leaf_entry sp, 32
 215         addi    sp, sp, -32
 216         s32i    a12, sp, 16
 217         s32i    a13, sp, 20
 218         s32i    a14, sp, 24
 219         s32i    a15, sp, 28
 220 #elif XCHAL_NO_MUL
 221         /* This is not really a leaf function; allocate enough stack space
 222            to allow CALL12s to a helper function.  */
 223         leaf_entry sp, 48
 224 #else
 225         leaf_entry sp, 16
 226 #endif
 227
 228 #ifdef __XTENSA_EB__
 229 #define wh a2
 230 #define wl a3
 231 #else
 232 #define wh a3
 233 #define wl a2
 234 #endif /* __XTENSA_EB__ */
 235
 236         /* This code is taken from the mulsf3 routine in ieee754-sf.S.
 237            See more comments there.  */
 238
 239 #if XCHAL_HAVE_MUL32_HIGH
 240         mull    a6, a2, a3
 241         muluh   wh, a2, a3
 242         mov     wl, a6
 243
 244 #else /* ! MUL32_HIGH */
 245
 246 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 247         /* a0 and a8 will be clobbered by calling the multiply function
 248            but a8 is not used here and need not be saved.  */
 249         s32i    a0, sp, 0
 250 #endif
 251
 252 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
 253
 254 #define a2h a4
 255 #define a3h a5
 256
 257         /* Get the high halves of the inputs into registers.  */
 258         srli    a2h, a2, 16
 259         srli    a3h, a3, 16
 260
 261 #define a2l a2
 262 #define a3l a3
 263
 264 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
 265         /* Clear the high halves of the inputs.  This does not matter
 266            for MUL16 because the high bits are ignored.  */
 267         extui   a2, a2, 0, 16
 268         extui   a3, a3, 0, 16
 269 #endif
 270 #endif /* MUL16 || MUL32 */
 271
 272
 273 #if XCHAL_HAVE_MUL16
 274
 275 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 276         mul16u  dst, xreg ## xhalf, yreg ## yhalf
 277
 278 #elif XCHAL_HAVE_MUL32
 279
 280 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 281         mull    dst, xreg ## xhalf, yreg ## yhalf
 282
 283 #elif XCHAL_HAVE_MAC16
 284
 285 /* The preprocessor insists on inserting a space when concatenating after
 286    a period in the definition of do_mul below.  These macros are a workaround
 287    using underscores instead of periods when doing the concatenation.  */
 288 #define umul_aa_ll umul.aa.ll
 289 #define umul_aa_lh umul.aa.lh
 290 #define umul_aa_hl umul.aa.hl
 291 #define umul_aa_hh umul.aa.hh
 292
 293 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 294         umul_aa_ ## xhalf ## yhalf      xreg, yreg; \
 295         rsr     dst, ACCLO
 296
 297 #else /* no multiply hardware */
 298
 299 #define set_arg_l(dst, src) \
 300         extui   dst, src, 0, 16
 301 #define set_arg_h(dst, src) \
 302         srli    dst, src, 16
 303
 304 #if __XTENSA_CALL0_ABI__
 305 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 306         set_arg_ ## xhalf (a13, xreg); \
 307         set_arg_ ## yhalf (a14, yreg); \
 308         call0   .Lmul_mulsi3; \
 309         mov     dst, a12
 310 #else
 311 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
 312         set_arg_ ## xhalf (a14, xreg); \
 313         set_arg_ ## yhalf (a15, yreg); \
 314         call12  .Lmul_mulsi3; \
 315         mov     dst, a14
 316 #endif /* __XTENSA_CALL0_ABI__ */
 317
 318 #endif /* no multiply hardware */
 319
 320         /* Add pp1 and pp2 into a6 with carry-out in a9.  */
 321         do_mul(a6, a2, l, a3, h)        /* pp 1 */
 322         do_mul(a11, a2, h, a3, l)       /* pp 2 */
 323         movi    a9, 0
 324         add     a6, a6, a11
 325         bgeu    a6, a11, 1f
 326         addi    a9, a9, 1
 327 1:
 328         /* Shift the high half of a9/a6 into position in a9.  Note that
 329            this value can be safely incremented without any carry-outs.  */
 330         ssai    16
 331         src     a9, a9, a6
 332
 333         /* Compute the low word into a6.  */
 334         do_mul(a11, a2, l, a3, l)       /* pp 0 */
 335         sll     a6, a6
 336         add     a6, a6, a11
 337         bgeu    a6, a11, 1f
 338         addi    a9, a9, 1
 339 1:
 340         /* Compute the high word into wh.  */
 341         do_mul(wh, a2, h, a3, h)        /* pp 3 */
 342         add     wh, wh, a9
 343         mov     wl, a6
 344
 345 #endif /* !MUL32_HIGH */
 346
 347 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
 348         /* Restore the original return address.  */
 349         l32i    a0, sp, 0
 350 #endif
 351 #if __XTENSA_CALL0_ABI__
 352         l32i    a12, sp, 16
 353         l32i    a13, sp, 20
 354         l32i    a14, sp, 24
 355         l32i    a15, sp, 28
 356         addi    sp, sp, 32
 357 #endif
 358         leaf_return
 359
 360 #if XCHAL_NO_MUL
 361
 362         /* For Xtensa processors with no multiply hardware, this simplified
 363            version of _mulsi3 is used for multiplying 16-bit chunks of
 364            the floating-point mantissas.  When using CALL0, this function
 365            uses a custom ABI: the inputs are passed in a13 and a14, the
 366            result is returned in a12, and a8 and a15 are clobbered.  */
 367         .align  4
 368 .Lmul_mulsi3:
 369         leaf_entry sp, 16
 370         .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
 371         movi    \dst, 0
 372 1:      add     \tmp1, \src2, \dst
 373         extui   \tmp2, \src1, 0, 1
 374         movnez  \dst, \tmp1, \tmp2
 375
 376         do_addx2 \tmp1, \src2, \dst, \tmp1
 377         extui   \tmp2, \src1, 1, 1
 378         movnez  \dst, \tmp1, \tmp2
 379
 380         do_addx4 \tmp1, \src2, \dst, \tmp1
 381         extui   \tmp2, \src1, 2, 1
 382         movnez  \dst, \tmp1, \tmp2
 383
 384         do_addx8 \tmp1, \src2, \dst, \tmp1
 385         extui   \tmp2, \src1, 3, 1
 386         movnez  \dst, \tmp1, \tmp2
 387
 388         srli    \src1, \src1, 4
 389         slli    \src2, \src2, 4
 390         bnez    \src1, 1b
 391         .endm
 392 #if __XTENSA_CALL0_ABI__
 393         mul_mulsi3_body a12, a13, a14, a15, a8
 394 #else
 395         /* The result will be written into a2, so save that argument in a4.  */
 396         mov     a4, a2
 397         mul_mulsi3_body a2, a4, a3, a5, a6
 398 #endif
 399         leaf_return
 400 #endif /* XCHAL_NO_MUL */
 401
 402         .size   __umulsidi3, . - __umulsidi3
 403
 404 #endif /* L_umulsidi3 */
 405
 406
 407 /* Define a macro for the NSAU (unsigned normalize shift amount)
 408    instruction, which computes the number of leading zero bits,
 409    to handle cases where it is not included in the Xtensa processor
 410    configuration.  */
 411
 412         .macro  do_nsau cnt, val, tmp, a
 413 #if XCHAL_HAVE_NSA
 414         nsau    \cnt, \val
 415 #else
 416         mov     \a, \val
 417         movi    \cnt, 0
 418         extui   \tmp, \a, 16, 16
 419         bnez    \tmp, 0f
 420         movi    \cnt, 16
 421         slli    \a, \a, 16
 422 0:
 423         extui   \tmp, \a, 24, 8
 424         bnez    \tmp, 1f
 425         addi    \cnt, \cnt, 8
 426         slli    \a, \a, 8
 427 1:
 428         movi    \tmp, __nsau_data
 429         extui   \a, \a, 24, 8
 430         add     \tmp, \tmp, \a
 431         l8ui    \tmp, \tmp, 0
 432         add     \cnt, \cnt, \tmp
 433 #endif /* !XCHAL_HAVE_NSA */
 434         .endm
 435
 436 #ifdef L_clz
 437         .section .rodata
 438         .align  4
 439         .global __nsau_data
 440         .type   __nsau_data, @object
 441 __nsau_data:
 442 #if !XCHAL_HAVE_NSA
 443         .byte   8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
 444         .byte   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 445         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 446         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 447         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 448         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 449         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 450         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 451         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 452         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 453         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 454         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 455         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 456         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 457         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 458         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 459 #endif /* !XCHAL_HAVE_NSA */
 460         .size   __nsau_data, . - __nsau_data
 461         .hidden __nsau_data
 462 #endif /* L_clz */
 463
 464
 465 #ifdef L_clzsi2
 466         .align  4
 467         .global __clzsi2
 468         .type   __clzsi2, @function
 469 __clzsi2:
 470         leaf_entry sp, 16
 471         do_nsau a2, a2, a3, a4
 472         leaf_return
 473         .size   __clzsi2, . - __clzsi2
 474
 475 #endif /* L_clzsi2 */
 476
 477
 478 #ifdef L_ctzsi2
 479         .align  4
 480         .global __ctzsi2
 481         .type   __ctzsi2, @function
 482 __ctzsi2:
 483         leaf_entry sp, 16
 484         neg     a3, a2
 485         and     a3, a3, a2
 486         do_nsau a2, a3, a4, a5
 487         neg     a2, a2
 488         addi    a2, a2, 31
 489         leaf_return
 490         .size   __ctzsi2, . - __ctzsi2
 491
 492 #endif /* L_ctzsi2 */
 493
 494
 495 #ifdef L_ffssi2
 496         .align  4
 497         .global __ffssi2
 498         .type   __ffssi2, @function
 499 __ffssi2:
 500         leaf_entry sp, 16
 501         neg     a3, a2
 502         and     a3, a3, a2
 503         do_nsau a2, a3, a4, a5
 504         neg     a2, a2
 505         addi    a2, a2, 32
 506         leaf_return
 507         .size   __ffssi2, . - __ffssi2
 508
 509 #endif /* L_ffssi2 */
 510
 511
 512 #ifdef L_udivsi3
 513         .align  4
 514         .global __udivsi3
 515         .type   __udivsi3, @function
 516 __udivsi3:
 517         leaf_entry sp, 16
 518 #if XCHAL_HAVE_DIV32
 519         quou    a2, a2, a3
 520 #else
 521         bltui   a3, 2, .Lle_one /* check if the divisor <= 1 */
 522
 523         mov     a6, a2          /* keep dividend in a6 */
 524         do_nsau a5, a6, a2, a7  /* dividend_shift = nsau (dividend) */
 525         do_nsau a4, a3, a2, a7  /* divisor_shift = nsau (divisor) */
 526         bgeu    a5, a4, .Lspecial
 527
 528         sub     a4, a4, a5      /* count = divisor_shift - dividend_shift */
 529         ssl     a4
 530         sll     a3, a3          /* divisor <<= count */
 531         movi    a2, 0           /* quotient = 0 */
 532
 533         /* test-subtract-and-shift loop; one quotient bit on each iteration */
 534 #if XCHAL_HAVE_LOOPS
 535         loopnez a4, .Lloopend
 536 #endif /* XCHAL_HAVE_LOOPS */
 537 .Lloop:
 538         bltu    a6, a3, .Lzerobit
 539         sub     a6, a6, a3
 540         addi    a2, a2, 1
 541 .Lzerobit:
 542         slli    a2, a2, 1
 543         srli    a3, a3, 1
 544 #if !XCHAL_HAVE_LOOPS
 545         addi    a4, a4, -1
 546         bnez    a4, .Lloop
 547 #endif /* !XCHAL_HAVE_LOOPS */
 548 .Lloopend:
 549
 550         bltu    a6, a3, .Lreturn
 551         addi    a2, a2, 1       /* increment quotient if dividend >= divisor */
 552 .Lreturn:
 553         leaf_return
 554
 555 .Lle_one:
 556         beqz    a3, .Lerror     /* if divisor == 1, return the dividend */
 557         leaf_return
 558
 559 .Lspecial:
 560         /* return dividend >= divisor */
 561         bltu    a6, a3, .Lreturn0
 562         movi    a2, 1
 563         leaf_return
 564
 565 .Lerror:
 566         /* Divide by zero: Use an illegal instruction to force an exception.
 567            The subsequent "DIV0" string can be recognized by the exception
 568            handler to identify the real cause of the exception.  */
 569         ill
 570         .ascii  "DIV0"
 571
 572 .Lreturn0:
 573         movi    a2, 0
 574 #endif /* XCHAL_HAVE_DIV32 */
 575         leaf_return
 576         .size   __udivsi3, . - __udivsi3
 577
 578 #endif /* L_udivsi3 */
 579
 580
 581 #ifdef L_divsi3
 582         .align  4
 583         .global __divsi3
 584         .type   __divsi3, @function
 585 __divsi3:
 586         leaf_entry sp, 16
 587 #if XCHAL_HAVE_DIV32
 588         quos    a2, a2, a3
 589 #else
 590         xor     a7, a2, a3      /* sign = dividend ^ divisor */
 591         do_abs  a6, a2, a4      /* udividend = abs (dividend) */
 592         do_abs  a3, a3, a4      /* udivisor = abs (divisor) */
 593         bltui   a3, 2, .Lle_one /* check if udivisor <= 1 */
 594         do_nsau a5, a6, a2, a8  /* udividend_shift = nsau (udividend) */
 595         do_nsau a4, a3, a2, a8  /* udivisor_shift = nsau (udivisor) */
 596         bgeu    a5, a4, .Lspecial
 597
 598         sub     a4, a4, a5      /* count = udivisor_shift - udividend_shift */
 599         ssl     a4
 600         sll     a3, a3          /* udivisor <<= count */
 601         movi    a2, 0           /* quotient = 0 */
 602
 603         /* test-subtract-and-shift loop; one quotient bit on each iteration */
 604 #if XCHAL_HAVE_LOOPS
 605         loopnez a4, .Lloopend
 606 #endif /* XCHAL_HAVE_LOOPS */
 607 .Lloop:
 608         bltu    a6, a3, .Lzerobit
 609         sub     a6, a6, a3
 610         addi    a2, a2, 1
 611 .Lzerobit:
 612         slli    a2, a2, 1
 613         srli    a3, a3, 1
 614 #if !XCHAL_HAVE_LOOPS
 615         addi    a4, a4, -1
 616         bnez    a4, .Lloop
 617 #endif /* !XCHAL_HAVE_LOOPS */
 618 .Lloopend:
 619
 620         bltu    a6, a3, .Lreturn
 621         addi    a2, a2, 1       /* increment if udividend >= udivisor */
 622 .Lreturn:
 623         neg     a5, a2
 624         movltz  a2, a5, a7      /* return (sign < 0) ? -quotient : quotient */
 625         leaf_return
 626
 627 .Lle_one:
 628         beqz    a3, .Lerror
 629         neg     a2, a6          /* if udivisor == 1, then return... */
 630         movgez  a2, a6, a7      /* (sign < 0) ? -udividend : udividend */
 631         leaf_return
 632
 633 .Lspecial:
 634         bltu    a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
 635         movi    a2, 1
 636         movi    a4, -1
 637         movltz  a2, a4, a7      /* else return (sign < 0) ? -1 : 1 */
 638         leaf_return
 639
 640 .Lerror:
 641         /* Divide by zero: Use an illegal instruction to force an exception.
 642            The subsequent "DIV0" string can be recognized by the exception
 643            handler to identify the real cause of the exception.  */
 644         ill
 645         .ascii  "DIV0"
 646
 647 .Lreturn0:
 648         movi    a2, 0
 649 #endif /* XCHAL_HAVE_DIV32 */
 650         leaf_return
 651         .size   __divsi3, . - __divsi3
 652
 653 #endif /* L_divsi3 */
 654
 655
 656 #ifdef L_umodsi3
 657         .align  4
 658         .global __umodsi3
 659         .type   __umodsi3, @function
 660 __umodsi3:
 661         leaf_entry sp, 16
 662 #if XCHAL_HAVE_DIV32
 663         remu    a2, a2, a3
 664 #else
 665         bltui   a3, 2, .Lle_one /* check if the divisor is <= 1 */
 666
 667         do_nsau a5, a2, a6, a7  /* dividend_shift = nsau (dividend) */
 668         do_nsau a4, a3, a6, a7  /* divisor_shift = nsau (divisor) */
 669         bgeu    a5, a4, .Lspecial
 670
 671         sub     a4, a4, a5      /* count = divisor_shift - dividend_shift */
 672         ssl     a4
 673         sll     a3, a3          /* divisor <<= count */
 674
 675         /* test-subtract-and-shift loop */
 676 #if XCHAL_HAVE_LOOPS
 677         loopnez a4, .Lloopend
 678 #endif /* XCHAL_HAVE_LOOPS */
 679 .Lloop:
 680         bltu    a2, a3, .Lzerobit
 681         sub     a2, a2, a3
 682 .Lzerobit:
 683         srli    a3, a3, 1
 684 #if !XCHAL_HAVE_LOOPS
 685         addi    a4, a4, -1
 686         bnez    a4, .Lloop
 687 #endif /* !XCHAL_HAVE_LOOPS */
 688 .Lloopend:
 689
 690 .Lspecial:
 691         bltu    a2, a3, .Lreturn
 692         sub     a2, a2, a3      /* subtract once more if dividend >= divisor */
 693 .Lreturn:
 694         leaf_return
 695
 696 .Lle_one:
 697         bnez    a3, .Lreturn0
 698
 699         /* Divide by zero: Use an illegal instruction to force an exception.
 700            The subsequent "DIV0" string can be recognized by the exception
 701            handler to identify the real cause of the exception.  */
 702         ill
 703         .ascii  "DIV0"
 704
 705 .Lreturn0:
 706         movi    a2, 0
 707 #endif /* XCHAL_HAVE_DIV32 */
 708         leaf_return
 709         .size   __umodsi3, . - __umodsi3
 710
 711 #endif /* L_umodsi3 */
 712
 713
 714 #ifdef L_modsi3
 715         .align  4
 716         .global __modsi3
 717         .type   __modsi3, @function
 718 __modsi3:
 719         leaf_entry sp, 16
 720 #if XCHAL_HAVE_DIV32
 721         rems    a2, a2, a3
 722 #else
 723         mov     a7, a2          /* save original (signed) dividend */
 724         do_abs  a2, a2, a4      /* udividend = abs (dividend) */
 725         do_abs  a3, a3, a4      /* udivisor = abs (divisor) */
 726         bltui   a3, 2, .Lle_one /* check if udivisor <= 1 */
 727         do_nsau a5, a2, a6, a8  /* udividend_shift = nsau (udividend) */
 728         do_nsau a4, a3, a6, a8  /* udivisor_shift = nsau (udivisor) */
 729         bgeu    a5, a4, .Lspecial
 730
 731         sub     a4, a4, a5      /* count = udivisor_shift - udividend_shift */
 732         ssl     a4
 733         sll     a3, a3          /* udivisor <<= count */
 734
 735         /* test-subtract-and-shift loop */
 736 #if XCHAL_HAVE_LOOPS
 737         loopnez a4, .Lloopend
 738 #endif /* XCHAL_HAVE_LOOPS */
 739 .Lloop:
 740         bltu    a2, a3, .Lzerobit
 741         sub     a2, a2, a3
 742 .Lzerobit:
 743         srli    a3, a3, 1
 744 #if !XCHAL_HAVE_LOOPS
 745         addi    a4, a4, -1
 746         bnez    a4, .Lloop
 747 #endif /* !XCHAL_HAVE_LOOPS */
 748 .Lloopend:
 749
 750 .Lspecial:
 751         bltu    a2, a3, .Lreturn
 752         sub     a2, a2, a3      /* subtract again if udividend >= udivisor */
 753 .Lreturn:
 754         bgez    a7, .Lpositive
 755         neg     a2, a2          /* if (dividend < 0), return -udividend */
 756 .Lpositive:
 757         leaf_return
 758
 759 .Lle_one:
 760         bnez    a3, .Lreturn0
 761
 762         /* Divide by zero: Use an illegal instruction to force an exception.
 763            The subsequent "DIV0" string can be recognized by the exception
 764            handler to identify the real cause of the exception.  */
 765         ill
 766         .ascii  "DIV0"
 767
 768 .Lreturn0:
 769         movi    a2, 0
 770 #endif /* XCHAL_HAVE_DIV32 */
 771         leaf_return
 772         .size   __modsi3, . - __modsi3
 773
 774 #endif /* L_modsi3 */
 775
 776
 777 #ifdef __XTENSA_EB__
 778 #define uh a2
 779 #define ul a3
 780 #else
 781 #define uh a3
 782 #define ul a2
 783 #endif /* __XTENSA_EB__ */
 784
 785
 786 #ifdef L_ashldi3
 787         .align  4
 788         .global __ashldi3
 789         .type   __ashldi3, @function
 790 __ashldi3:
 791         leaf_entry sp, 16
 792         ssl     a4
 793         bgei    a4, 32, .Llow_only
 794         src     uh, uh, ul
 795         sll     ul, ul
 796         leaf_return
 797
 798 .Llow_only:
 799         sll     uh, ul
 800         movi    ul, 0
 801         leaf_return
 802         .size   __ashldi3, . - __ashldi3
 803
 804 #endif /* L_ashldi3 */
 805
 806
 807 #ifdef L_ashrdi3
 808         .align  4
 809         .global __ashrdi3
 810         .type   __ashrdi3, @function
 811 __ashrdi3:
 812         leaf_entry sp, 16
 813         ssr     a4
 814         bgei    a4, 32, .Lhigh_only
 815         src     ul, uh, ul
 816         sra     uh, uh
 817         leaf_return
 818
 819 .Lhigh_only:
 820         sra     ul, uh
 821         srai    uh, uh, 31
 822         leaf_return
 823         .size   __ashrdi3, . - __ashrdi3
 824
 825 #endif /* L_ashrdi3 */
 826
 827
 828 #ifdef L_lshrdi3
 829         .align  4
 830         .global __lshrdi3
 831         .type   __lshrdi3, @function
 832 __lshrdi3:
 833         leaf_entry sp, 16
 834         ssr     a4
 835         bgei    a4, 32, .Lhigh_only1
 836         src     ul, uh, ul
 837         srl     uh, uh
 838         leaf_return
 839
 840 .Lhigh_only1:
 841         srl     ul, uh
 842         movi    uh, 0
 843         leaf_return
 844         .size   __lshrdi3, . - __lshrdi3
 845
 846 #endif /* L_lshrdi3 */
 847
 848
 849 #include "ieee754-df.S"
 850 #include "ieee754-sf.S"