gomp-20050608-branch/gcc/config/xtensa/lib1funcs.asm

   1 /* Assembly functions for the Xtensa version of libgcc1.
   2    Copyright (C) 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 In addition to the permissions in the GNU General Public License, the
  13 Free Software Foundation gives you unlimited permission to link the
  14 compiled version of this file into combinations with other programs,
  15 and to distribute those combinations without any restriction coming
  16 from the use of this file.  (The General Public License restrictions
  17 do apply in other respects; for example, they cover modification of
  18 the file, and distribution when not linked into a combine
  19 executable.)
  20
  21 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  23 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  24 for more details.
  25
  26 You should have received a copy of the GNU General Public License
  27 along with GCC; see the file COPYING.  If not, write to the Free
  28 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  29 02110-1301, USA.  */
  30
  31 #include "xtensa-config.h"
  32
  33 # Note: These functions use a minimum stack frame size of 32.  This is
  34 # necessary for Xtensa configurations that only support a fixed register
  35 # window size of 8, where even leaf functions (such as these) need to
  36 # allocate space for a 4-word "extra save area".
  37
  38 # Define macros for the ABS and ADDX* instructions to handle cases
  39 # where they are not included in the Xtensa processor configuration.
  40
  41         .macro  do_abs dst, src, tmp
  42 #if XCHAL_HAVE_ABS
  43         abs     \dst, \src
  44 #else
  45         neg     \tmp, \src
  46         movgez  \tmp, \src, \src
  47         mov     \dst, \tmp
  48 #endif
  49         .endm
  50
  51         .macro  do_addx2 dst, as, at, tmp
  52 #if XCHAL_HAVE_ADDX
  53         addx2   \dst, \as, \at
  54 #else
  55         slli    \tmp, \as, 1
  56         add     \dst, \tmp, \at
  57 #endif
  58         .endm
  59
  60         .macro  do_addx4 dst, as, at, tmp
  61 #if XCHAL_HAVE_ADDX
  62         addx4   \dst, \as, \at
  63 #else
  64         slli    \tmp, \as, 2
  65         add     \dst, \tmp, \at
  66 #endif
  67         .endm
  68
  69         .macro  do_addx8 dst, as, at, tmp
  70 #if XCHAL_HAVE_ADDX
  71         addx8   \dst, \as, \at
  72 #else
  73         slli    \tmp, \as, 3
  74         add     \dst, \tmp, \at
  75 #endif
  76         .endm
  77
  78 # Define macros for function entry and return, supporting either the
  79 # standard register windowed ABI or the non-windowed call0 ABI.  These
  80 # macros do not allocate any extra stack space, so they only work for
  81 # leaf functions that do not need to spill anything to the stack.
  82
  83         .macro abi_entry reg, size
  84 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  85         entry \reg, \size
  86 #else
  87         /* do nothing */
  88 #endif
  89         .endm
  90
  91         .macro abi_return
  92 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  93         retw
  94 #else
  95         ret
  96 #endif
  97         .endm
  98
  99
 100 #ifdef L_mulsi3
 101         .align  4
 102         .global __mulsi3
 103         .type   __mulsi3,@function
 104 __mulsi3:
 105         abi_entry sp, 32
 106
 107 #if XCHAL_HAVE_MUL16
 108         or      a4, a2, a3
 109         srai    a4, a4, 16
 110         bnez    a4, .LMUL16
 111         mul16u  a2, a2, a3
 112         abi_return
 113 .LMUL16:
 114         srai    a4, a2, 16
 115         srai    a5, a3, 16
 116         mul16u  a7, a4, a3
 117         mul16u  a6, a5, a2
 118         mul16u  a4, a2, a3
 119         add     a7, a7, a6
 120         slli    a7, a7, 16
 121         add     a2, a7, a4
 122
 123 #elif XCHAL_HAVE_MAC16
 124         mul.aa.hl a2, a3
 125         mula.aa.lh a2, a3
 126         rsr     a5, ACCLO
 127         umul.aa.ll a2, a3
 128         rsr     a4, ACCLO
 129         slli    a5, a5, 16
 130         add     a2, a4, a5
 131
 132 #else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
 133
 134         # Multiply one bit at a time, but unroll the loop 4x to better
 135         # exploit the addx instructions and avoid overhead.
 136         # Peel the first iteration to save a cycle on init.
 137
 138         # Avoid negative numbers.
 139         xor     a5, a2, a3  # top bit is 1 iff one of the inputs is negative
 140         do_abs  a3, a3, a6
 141         do_abs  a2, a2, a6
 142
 143         # Swap so the second argument is smaller.
 144         sub     a7, a2, a3
 145         mov     a4, a3
 146         movgez  a4, a2, a7  # a4 = max(a2, a3)
 147         movltz  a3, a2, a7  # a3 = min(a2, a3)
 148
 149         movi    a2, 0
 150         extui   a6, a3, 0, 1
 151         movnez  a2, a4, a6
 152
 153         do_addx2 a7, a4, a2, a7
 154         extui   a6, a3, 1, 1
 155         movnez  a2, a7, a6
 156
 157         do_addx4 a7, a4, a2, a7
 158         extui   a6, a3, 2, 1
 159         movnez  a2, a7, a6
 160
 161         do_addx8 a7, a4, a2, a7
 162         extui   a6, a3, 3, 1
 163         movnez  a2, a7, a6
 164
 165         bgeui   a3, 16, .Lmult_main_loop
 166         neg     a3, a2
 167         movltz  a2, a3, a5
 168         abi_return
 169
 170         .align  4
 171 .Lmult_main_loop:
 172         srli    a3, a3, 4
 173         slli    a4, a4, 4
 174
 175         add     a7, a4, a2
 176         extui   a6, a3, 0, 1
 177         movnez  a2, a7, a6
 178
 179         do_addx2 a7, a4, a2, a7
 180         extui   a6, a3, 1, 1
 181         movnez  a2, a7, a6
 182
 183         do_addx4 a7, a4, a2, a7
 184         extui   a6, a3, 2, 1
 185         movnez  a2, a7, a6
 186
 187         do_addx8 a7, a4, a2, a7
 188         extui   a6, a3, 3, 1
 189         movnez  a2, a7, a6
 190
 191         bgeui   a3, 16, .Lmult_main_loop
 192
 193         neg     a3, a2
 194         movltz  a2, a3, a5
 195
 196 #endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
 197
 198         abi_return
 199         .size   __mulsi3,.-__mulsi3
 200
 201 #endif /* L_mulsi3 */
 202
 203
 204 # Define a macro for the NSAU (unsigned normalize shift amount)
 205 # instruction, which computes the number of leading zero bits,
 206 # to handle cases where it is not included in the Xtensa processor
 207 # configuration.
 208
 209         .macro  do_nsau cnt, val, tmp, a
 210 #if XCHAL_HAVE_NSA
 211         nsau    \cnt, \val
 212 #else
 213         mov     \a, \val
 214         movi    \cnt, 0
 215         extui   \tmp, \a, 16, 16
 216         bnez    \tmp, 0f
 217         movi    \cnt, 16
 218         slli    \a, \a, 16
 219 0:
 220         extui   \tmp, \a, 24, 8
 221         bnez    \tmp, 1f
 222         addi    \cnt, \cnt, 8
 223         slli    \a, \a, 8
 224 1:
 225         movi    \tmp, __nsau_data
 226         extui   \a, \a, 24, 8
 227         add     \tmp, \tmp, \a
 228         l8ui    \tmp, \tmp, 0
 229         add     \cnt, \cnt, \tmp
 230 #endif /* !XCHAL_HAVE_NSA */
 231         .endm
 232
 233 #ifdef L_nsau
 234         .section .rodata
 235         .align  4
 236         .global __nsau_data
 237         .type   __nsau_data,@object
 238 __nsau_data:
 239 #if !XCHAL_HAVE_NSA
 240         .byte   8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
 241         .byte   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 242         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 243         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 244         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 245         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 246         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 247         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 248         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 249         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 250         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 251         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 252         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 253         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 254         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 255         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 256 #endif /* !XCHAL_HAVE_NSA */
 257         .size   __nsau_data,.-__nsau_data
 258         .hidden __nsau_data
 259 #endif /* L_nsau */
 260
 261
 262 #ifdef L_udivsi3
 263         .align  4
 264         .global __udivsi3
 265         .type   __udivsi3,@function
 266 __udivsi3:
 267         abi_entry sp, 32
 268         bltui   a3, 2, .Lle_one # check if the divisor <= 1
 269
 270         mov     a6, a2          # keep dividend in a6
 271         do_nsau a5, a6, a2, a7  # dividend_shift = nsau(dividend)
 272         do_nsau a4, a3, a2, a7  # divisor_shift = nsau(divisor)
 273         bgeu    a5, a4, .Lspecial
 274
 275         sub     a4, a4, a5      # count = divisor_shift - dividend_shift
 276         ssl     a4
 277         sll     a3, a3          # divisor <<= count
 278         movi    a2, 0           # quotient = 0
 279
 280         # test-subtract-and-shift loop; one quotient bit on each iteration
 281 #if XCHAL_HAVE_LOOPS
 282         loopnez a4, .Lloopend
 283 #endif /* XCHAL_HAVE_LOOPS */
 284 .Lloop:
 285         bltu    a6, a3, .Lzerobit
 286         sub     a6, a6, a3
 287         addi    a2, a2, 1
 288 .Lzerobit:
 289         slli    a2, a2, 1
 290         srli    a3, a3, 1
 291 #if !XCHAL_HAVE_LOOPS
 292         addi    a4, a4, -1
 293         bnez    a4, .Lloop
 294 #endif /* !XCHAL_HAVE_LOOPS */
 295 .Lloopend:
 296
 297         bltu    a6, a3, .Lreturn
 298         addi    a2, a2, 1       # increment quotient if dividend >= divisor
 299 .Lreturn:
 300         abi_return
 301
 302 .Lle_one:
 303         beqz    a3, .Lerror     # if divisor == 1, return the dividend
 304         abi_return
 305
 306 .Lspecial:
 307         # return dividend >= divisor
 308         bltu    a6, a3, .Lreturn0
 309         movi    a2, 1
 310         abi_return
 311
 312 .Lerror:
 313         # just return 0; could throw an exception
 314
 315 .Lreturn0:
 316         movi    a2, 0
 317         abi_return
 318         .size   __udivsi3,.-__udivsi3
 319
 320 #endif /* L_udivsi3 */
 321
 322
 323 #ifdef L_divsi3
 324         .align  4
 325         .global __divsi3
 326         .type   __divsi3,@function
 327 __divsi3:
 328         abi_entry sp, 32
 329         xor     a7, a2, a3      # sign = dividend ^ divisor
 330         do_abs  a6, a2, a4      # udividend = abs(dividend)
 331         do_abs  a3, a3, a4      # udivisor = abs(divisor)
 332         bltui   a3, 2, .Lle_one # check if udivisor <= 1
 333         do_nsau a5, a6, a2, a8  # udividend_shift = nsau(udividend)
 334         do_nsau a4, a3, a2, a8  # udivisor_shift = nsau(udivisor)
 335         bgeu    a5, a4, .Lspecial
 336
 337         sub     a4, a4, a5      # count = udivisor_shift - udividend_shift
 338         ssl     a4
 339         sll     a3, a3          # udivisor <<= count
 340         movi    a2, 0           # quotient = 0
 341
 342         # test-subtract-and-shift loop; one quotient bit on each iteration
 343 #if XCHAL_HAVE_LOOPS
 344         loopnez a4, .Lloopend
 345 #endif /* XCHAL_HAVE_LOOPS */
 346 .Lloop:
 347         bltu    a6, a3, .Lzerobit
 348         sub     a6, a6, a3
 349         addi    a2, a2, 1
 350 .Lzerobit:
 351         slli    a2, a2, 1
 352         srli    a3, a3, 1
 353 #if !XCHAL_HAVE_LOOPS
 354         addi    a4, a4, -1
 355         bnez    a4, .Lloop
 356 #endif /* !XCHAL_HAVE_LOOPS */
 357 .Lloopend:
 358
 359         bltu    a6, a3, .Lreturn
 360         addi    a2, a2, 1       # increment quotient if udividend >= udivisor
 361 .Lreturn:
 362         neg     a5, a2
 363         movltz  a2, a5, a7      # return (sign < 0) ? -quotient : quotient
 364         abi_return
 365
 366 .Lle_one:
 367         beqz    a3, .Lerror
 368         neg     a2, a6          # if udivisor == 1, then return...
 369         movgez  a2, a6, a7      # (sign < 0) ? -udividend : udividend
 370         abi_return
 371
 372 .Lspecial:
 373         bltu    a6, a3, .Lreturn0 #  if dividend < divisor, return 0
 374         movi    a2, 1
 375         movi    a4, -1
 376         movltz  a2, a4, a7      # else return (sign < 0) ? -1 :  1
 377         abi_return
 378
 379 .Lerror:
 380         # just return 0; could throw an exception
 381
 382 .Lreturn0:
 383         movi    a2, 0
 384         abi_return
 385         .size   __divsi3,.-__divsi3
 386
 387 #endif /* L_divsi3 */
 388
 389
 390 #ifdef L_umodsi3
 391         .align  4
 392         .global __umodsi3
 393         .type   __umodsi3,@function
 394 __umodsi3:
 395         abi_entry sp, 32
 396         bltui   a3, 2, .Lle_one # check if the divisor is <= 1
 397
 398         do_nsau a5, a2, a6, a7  # dividend_shift = nsau(dividend)
 399         do_nsau a4, a3, a6, a7  # divisor_shift = nsau(divisor)
 400         bgeu    a5, a4, .Lspecial
 401
 402         sub     a4, a4, a5      # count = divisor_shift - dividend_shift
 403         ssl     a4
 404         sll     a3, a3          # divisor <<= count
 405
 406         # test-subtract-and-shift loop
 407 #if XCHAL_HAVE_LOOPS
 408         loopnez a4, .Lloopend
 409 #endif /* XCHAL_HAVE_LOOPS */
 410 .Lloop:
 411         bltu    a2, a3, .Lzerobit
 412         sub     a2, a2, a3
 413 .Lzerobit:
 414         srli    a3, a3, 1
 415 #if !XCHAL_HAVE_LOOPS
 416         addi    a4, a4, -1
 417         bnez    a4, .Lloop
 418 #endif /* !XCHAL_HAVE_LOOPS */
 419 .Lloopend:
 420
 421 .Lspecial:
 422         bltu    a2, a3, .Lreturn
 423         sub     a2, a2, a3      # subtract once more if dividend >= divisor
 424 .Lreturn:
 425         abi_return
 426
 427 .Lle_one:
 428         # the divisor is either 0 or 1, so just return 0.
 429         # someday we may want to throw an exception if the divisor is 0.
 430         movi    a2, 0
 431         abi_return
 432         .size   __umodsi3,.-__umodsi3
 433
 434 #endif /* L_umodsi3 */
 435
 436
 437 #ifdef L_modsi3
 438         .align  4
 439         .global __modsi3
 440         .type   __modsi3,@function
 441 __modsi3:
 442         abi_entry sp, 32
 443         mov     a7, a2          # save original (signed) dividend
 444         do_abs  a2, a2, a4      # udividend = abs(dividend)
 445         do_abs  a3, a3, a4      # udivisor = abs(divisor)
 446         bltui   a3, 2, .Lle_one # check if udivisor <= 1
 447         do_nsau a5, a2, a6, a8  # udividend_shift = nsau(udividend)
 448         do_nsau a4, a3, a6, a8  # udivisor_shift = nsau(udivisor)
 449         bgeu    a5, a4, .Lspecial
 450
 451         sub     a4, a4, a5      # count = udivisor_shift - udividend_shift
 452         ssl     a4
 453         sll     a3, a3          # udivisor <<= count
 454
 455         # test-subtract-and-shift loop
 456 #if XCHAL_HAVE_LOOPS
 457         loopnez a4, .Lloopend
 458 #endif /* XCHAL_HAVE_LOOPS */
 459 .Lloop:
 460         bltu    a2, a3, .Lzerobit
 461         sub     a2, a2, a3
 462 .Lzerobit:
 463         srli    a3, a3, 1
 464 #if !XCHAL_HAVE_LOOPS
 465         addi    a4, a4, -1
 466         bnez    a4, .Lloop
 467 #endif /* !XCHAL_HAVE_LOOPS */
 468 .Lloopend:
 469
 470 .Lspecial:
 471         bltu    a2, a3, .Lreturn
 472         sub     a2, a2, a3      # subtract once more if udividend >= udivisor
 473 .Lreturn:
 474         bgez    a7, .Lpositive
 475         neg     a2, a2          # if (dividend < 0), return -udividend
 476 .Lpositive:
 477         abi_return
 478
 479 .Lle_one:
 480         # udivisor is either 0 or 1, so just return 0.
 481         # someday we may want to throw an exception if udivisor is 0.
 482         movi    a2, 0
 483         abi_return
 484         .size   __modsi3,.-__modsi3
 485
 486 #endif /* L_modsi3 */
 487
 488 #include "ieee754-df.S"
 489 #include "ieee754-sf.S"