gcc/config/xtensa/lib1funcs.asm

   1 /* Assembly functions for the Xtensa version of libgcc1.
   2    Copyright (C) 2001,2002 Free Software Foundation, Inc.
   3    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 In addition to the permissions in the GNU General Public License, the
  13 Free Software Foundation gives you unlimited permission to link the
  14 compiled version of this file into combinations with other programs,
  15 and to distribute those combinations without any restriction coming
  16 from the use of this file.  (The General Public License restrictions
  17 do apply in other respects; for example, they cover modification of
  18 the file, and distribution when not linked into a combine
  19 executable.)
  20
  21 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  23 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  24 for more details.
  25
  26 You should have received a copy of the GNU General Public License
  27 along with GCC; see the file COPYING.  If not, write to the Free
  28 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  29 02111-1307, USA.  */
  30
  31 #include "xtensa/xtensa-config.h"
  32
  33 #ifdef L_mulsi3
  34         .align  4
  35         .global __mulsi3
  36         .type   __mulsi3,@function
  37 __mulsi3:
  38         entry   sp, 16
  39
  40 #if XCHAL_HAVE_MUL16
  41         or      a4, a2, a3
  42         srai    a4, a4, 16
  43         bnez    a4, .LMUL16
  44         mul16u  a2, a2, a3
  45         retw
  46 .LMUL16:
  47         srai    a4, a2, 16
  48         srai    a5, a3, 16
  49         mul16u  a7, a4, a3
  50         mul16u  a6, a5, a2
  51         mul16u  a4, a2, a3
  52         add     a7, a7, a6
  53         slli    a7, a7, 16
  54         add     a2, a7, a4
  55
  56 #elif XCHAL_HAVE_MAC16
  57         mul.aa.hl a2, a3
  58         mula.aa.lh a2, a3
  59         rsr     a5, 16 # ACCLO
  60         umul.aa.ll a2, a3
  61         rsr     a4, 16 # ACCLO
  62         slli    a5, a5, 16
  63         add     a2, a4, a5
  64
  65 #else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
  66
  67         # Multiply one bit at a time, but unroll the loop 4x to better
  68         # exploit the addx instructions.
  69
  70         # Peel the first iteration to save a cycle on init
  71
  72         # avoid negative numbers
  73
  74         xor     a5, a2, a3  # top bit is 1 iff one of the inputs is negative
  75         abs     a3, a3
  76         abs     a2, a2
  77
  78         # swap so that second argument is smaller
  79         sub     a7, a2, a3
  80         mov     a4, a3
  81         movgez  a4, a2, a7  # a4 = max(a2, a3)
  82         movltz  a3, a2, a7  # a3 = min(a2, a3)
  83
  84         movi    a2, 0
  85         extui   a6, a3, 0, 1
  86         movnez  a2, a4, a6
  87
  88         addx2   a7, a4, a2
  89         extui   a6, a3, 1, 1
  90         movnez  a2, a7, a6
  91
  92         addx4   a7, a4, a2
  93         extui   a6, a3, 2, 1
  94         movnez  a2, a7, a6
  95
  96         addx8   a7, a4, a2
  97         extui   a6, a3, 3, 1
  98         movnez  a2, a7, a6
  99
 100         bgeui   a3, 16, .Lmult_main_loop
 101         neg     a3, a2
 102         movltz  a2, a3, a5
 103         retw
 104
 105
 106         .align  4
 107 .Lmult_main_loop:
 108         srli    a3, a3, 4
 109         slli    a4, a4, 4
 110
 111         add     a7, a4, a2
 112         extui   a6, a3, 0, 1
 113         movnez  a2, a7, a6
 114
 115         addx2   a7, a4, a2
 116         extui   a6, a3, 1, 1
 117         movnez  a2, a7, a6
 118
 119         addx4   a7, a4, a2
 120         extui   a6, a3, 2, 1
 121         movnez  a2, a7, a6
 122
 123         addx8   a7, a4, a2
 124         extui   a6, a3, 3, 1
 125         movnez  a2, a7, a6
 126
 127
 128         bgeui   a3, 16, .Lmult_main_loop
 129
 130         neg     a3, a2
 131         movltz  a2, a3, a5
 132
 133 #endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
 134
 135         retw
 136 .Lfe0:
 137         .size   __mulsi3,.Lfe0-__mulsi3
 138
 139 #endif /* L_mulsi3 */
 140
 141
 142         # Some Xtensa configurations include the NSAU (unsigned
 143         # normalize shift amount) instruction which computes the number
 144         # of leading zero bits.  For other configurations, the "nsau"
 145         # operation is implemented as a macro.
 146
 147 #if !XCHAL_HAVE_NSA
 148         .macro  nsau cnt, val, tmp, a
 149         mov     \a, \val
 150         movi    \cnt, 0
 151         extui   \tmp, \a, 16, 16
 152         bnez    \tmp, 0f
 153         movi    \cnt, 16
 154         slli    \a, \a, 16
 155 0:
 156         extui   \tmp, \a, 24, 8
 157         bnez    \tmp, 1f
 158         addi    \cnt, \cnt, 8
 159         slli    \a, \a, 8
 160 1:
 161         movi    \tmp, __nsau_data
 162         extui   \a, \a, 24, 8
 163         add     \tmp, \tmp, \a
 164         l8ui    \tmp, \tmp, 0
 165         add     \cnt, \cnt, \tmp
 166         .endm
 167 #endif /* !XCHAL_HAVE_NSA */
 168
 169 #ifdef L_nsau
 170         .section .rodata
 171         .align  4
 172         .global __nsau_data
 173         .type   __nsau_data,@object
 174 __nsau_data:
 175 #if !XCHAL_HAVE_NSA
 176         .byte   8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
 177         .byte   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 178         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 179         .byte   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 180         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 181         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 182         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 183         .byte   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 184         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 185         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 186         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 187         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 188         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 189         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 190         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 191         .byte   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 192 #endif /* !XCHAL_HAVE_NSA */
 193 .Lfe1:
 194         .size   __nsau_data,.Lfe1-__nsau_data
 195         .hidden __nsau_data
 196 #endif /* L_nsau */
 197
 198
 199 #ifdef L_udivsi3
 200         .align  4
 201         .global __udivsi3
 202         .type   __udivsi3,@function
 203 __udivsi3:
 204         entry   sp, 16
 205         bltui   a3, 2, .Lle_one # check if the divisor <= 1
 206
 207         mov     a6, a2          # keep dividend in a6
 208 #if XCHAL_HAVE_NSA
 209         nsau    a5, a6          # dividend_shift = nsau(dividend)
 210         nsau    a4, a3          # divisor_shift = nsau(divisor)
 211 #else /* !XCHAL_HAVE_NSA */
 212         nsau    a5, a6, a2, a7  # dividend_shift = nsau(dividend)
 213         nsau    a4, a3, a2, a7  # divisor_shift = nsau(divisor)
 214 #endif /* !XCHAL_HAVE_NSA */
 215         bgeu    a5, a4, .Lspecial
 216
 217         sub     a4, a4, a5      # count = divisor_shift - dividend_shift
 218         ssl     a4
 219         sll     a3, a3          # divisor <<= count
 220         movi    a2, 0           # quotient = 0
 221
 222         # test-subtract-and-shift loop; one quotient bit on each iteration
 223 #if XCHAL_HAVE_LOOPS
 224         loopnez a4, .Lloopend
 225 #endif /* XCHAL_HAVE_LOOPS */
 226 .Lloop:
 227         bltu    a6, a3, .Lzerobit
 228         sub     a6, a6, a3
 229         addi    a2, a2, 1
 230 .Lzerobit:
 231         slli    a2, a2, 1
 232         srli    a3, a3, 1
 233 #if !XCHAL_HAVE_LOOPS
 234         addi    a4, a4, -1
 235         bnez    a4, .Lloop
 236 #endif /* !XCHAL_HAVE_LOOPS */
 237 .Lloopend:
 238
 239         bltu    a6, a3, .Lreturn
 240         addi    a2, a2, 1       # increment quotient if dividend >= divisor
 241 .Lreturn:
 242         retw
 243
 244 .Lspecial:
 245         # return dividend >= divisor
 246         movi    a2, 0
 247         bltu    a6, a3, .Lreturn2
 248         movi    a2, 1
 249 .Lreturn2:
 250         retw
 251
 252 .Lle_one:
 253         beqz    a3, .Lerror     # if divisor == 1, return the dividend
 254         retw
 255 .Lerror:
 256         movi    a2, 0           # just return 0; could throw an exception
 257         retw
 258 .Lfe2:
 259         .size   __udivsi3,.Lfe2-__udivsi3
 260
 261 #endif /* L_udivsi3 */
 262
 263
 264 #ifdef L_divsi3
 265         .align  4
 266         .global __divsi3
 267         .type   __divsi3,@function
 268 __divsi3:
 269         entry   sp, 16
 270         xor     a7, a2, a3      # sign = dividend ^ divisor
 271         abs     a6, a2          # udividend = abs(dividend)
 272         abs     a3, a3          # udivisor = abs(divisor)
 273         bltui   a3, 2, .Lle_one # check if udivisor <= 1
 274 #if XCHAL_HAVE_NSA
 275         nsau    a5, a6          # udividend_shift = nsau(udividend)
 276         nsau    a4, a3          # udivisor_shift = nsau(udivisor)
 277 #else /* !XCHAL_HAVE_NSA */
 278         nsau    a5, a6, a2, a8  # udividend_shift = nsau(udividend)
 279         nsau    a4, a3, a2, a8  # udivisor_shift = nsau(udivisor)
 280 #endif /* !XCHAL_HAVE_NSA */
 281         bgeu    a5, a4, .Lspecial
 282
 283         sub     a4, a4, a5      # count = udivisor_shift - udividend_shift
 284         ssl     a4
 285         sll     a3, a3          # udivisor <<= count
 286         movi    a2, 0           # quotient = 0
 287
 288         # test-subtract-and-shift loop; one quotient bit on each iteration
 289 #if XCHAL_HAVE_LOOPS
 290         loopnez a4, .Lloopend
 291 #endif /* XCHAL_HAVE_LOOPS */
 292 .Lloop:
 293         bltu    a6, a3, .Lzerobit
 294         sub     a6, a6, a3
 295         addi    a2, a2, 1
 296 .Lzerobit:
 297         slli    a2, a2, 1
 298         srli    a3, a3, 1
 299 #if !XCHAL_HAVE_LOOPS
 300         addi    a4, a4, -1
 301         bnez    a4, .Lloop
 302 #endif /* !XCHAL_HAVE_LOOPS */
 303 .Lloopend:
 304
 305         bltu    a6, a3, .Lreturn
 306         addi    a2, a2, 1       # increment quotient if udividend >= udivisor
 307 .Lreturn:
 308         neg     a5, a2
 309         movltz  a2, a5, a7      # return (sign < 0) ? -quotient : quotient
 310         retw
 311
 312 .Lspecial:
 313         movi    a2, 0
 314         bltu    a6, a3, .Lreturn2 #  if dividend < divisor, return 0
 315         movi    a2, 1
 316         movi    a4, -1
 317         movltz  a2, a4, a7      # else return (sign < 0) ? -1 :  1
 318 .Lreturn2:
 319         retw
 320
 321 .Lle_one:
 322         beqz    a3, .Lerror
 323         neg     a2, a6          # if udivisor == 1, then return...
 324         movgez  a2, a6, a7      # (sign < 0) ? -udividend : udividend
 325         retw
 326 .Lerror:
 327         movi    a2, 0           # just return 0; could throw an exception
 328         retw
 329 .Lfe3:
 330         .size   __divsi3,.Lfe3-__divsi3
 331
 332 #endif /* L_divsi3 */
 333
 334
 335 #ifdef L_umodsi3
 336         .align  4
 337         .global __umodsi3
 338         .type   __umodsi3,@function
 339 __umodsi3:
 340         entry   sp, 16
 341         bltui   a3, 2, .Lle_one # check if the divisor is <= 1
 342
 343 #if XCHAL_HAVE_NSA
 344         nsau    a5, a2          # dividend_shift = nsau(dividend)
 345         nsau    a4, a3          # divisor_shift = nsau(divisor)
 346 #else /* !XCHAL_HAVE_NSA */
 347         nsau    a5, a2, a6, a7  # dividend_shift = nsau(dividend)
 348         nsau    a4, a3, a6, a7  # divisor_shift = nsau(divisor)
 349 #endif /* !XCHAL_HAVE_NSA */
 350         bgeu    a5, a4, .Lspecial
 351
 352         sub     a4, a4, a5      # count = divisor_shift - dividend_shift
 353         ssl     a4
 354         sll     a3, a3          # divisor <<= count
 355
 356         # test-subtract-and-shift loop
 357 #if XCHAL_HAVE_LOOPS
 358         loopnez a4, .Lloopend
 359 #endif /* XCHAL_HAVE_LOOPS */
 360 .Lloop:
 361         bltu    a2, a3, .Lzerobit
 362         sub     a2, a2, a3
 363 .Lzerobit:
 364         srli    a3, a3, 1
 365 #if !XCHAL_HAVE_LOOPS
 366         addi    a4, a4, -1
 367         bnez    a4, .Lloop
 368 #endif /* !XCHAL_HAVE_LOOPS */
 369 .Lloopend:
 370
 371         bltu    a2, a3, .Lreturn
 372         sub     a2, a2, a3      # subtract once more if dividend >= divisor
 373 .Lreturn:
 374         retw
 375
 376 .Lspecial:
 377         bltu    a2, a3, .Lreturn2
 378         sub     a2, a2, a3      # subtract once if dividend >= divisor
 379 .Lreturn2:
 380         retw
 381
 382 .Lle_one:
 383         # the divisor is either 0 or 1, so just return 0.
 384         # someday we may want to throw an exception if the divisor is 0.
 385         movi    a2, 0
 386         retw
 387 .Lfe4:
 388         .size   __umodsi3,.Lfe4-__umodsi3
 389
 390 #endif /* L_umodsi3 */
 391
 392
 393 #ifdef L_modsi3
 394         .align  4
 395         .global __modsi3
 396         .type   __modsi3,@function
 397 __modsi3:
 398         entry   sp, 16
 399         mov     a7, a2          # save original (signed) dividend
 400         abs     a2, a2          # udividend = abs(dividend)
 401         abs     a3, a3          # udivisor = abs(divisor)
 402         bltui   a3, 2, .Lle_one # check if udivisor <= 1
 403 #if XCHAL_HAVE_NSA
 404         nsau    a5, a2          # udividend_shift = nsau(udividend)
 405         nsau    a4, a3          # udivisor_shift = nsau(udivisor)
 406 #else /* !XCHAL_HAVE_NSA */
 407         nsau    a5, a2, a6, a8  # udividend_shift = nsau(udividend)
 408         nsau    a4, a3, a6, a8  # udivisor_shift = nsau(udivisor)
 409 #endif /* !XCHAL_HAVE_NSA */
 410         bgeu    a5, a4, .Lspecial
 411
 412         sub     a4, a4, a5      # count = udivisor_shift - udividend_shift
 413         ssl     a4
 414         sll     a3, a3          # udivisor <<= count
 415
 416         # test-subtract-and-shift loop
 417 #if XCHAL_HAVE_LOOPS
 418         loopnez a4, .Lloopend
 419 #endif /* XCHAL_HAVE_LOOPS */
 420 .Lloop:
 421         bltu    a2, a3, .Lzerobit
 422         sub     a2, a2, a3
 423 .Lzerobit:
 424         srli    a3, a3, 1
 425 #if !XCHAL_HAVE_LOOPS
 426         addi    a4, a4, -1
 427         bnez    a4, .Lloop
 428 #endif /* !XCHAL_HAVE_LOOPS */
 429 .Lloopend:
 430
 431         bltu    a2, a3, .Lreturn
 432         sub     a2, a2, a3      # subtract once more if udividend >= udivisor
 433 .Lreturn:
 434         bgez    a7, .Lpositive
 435         neg     a2, a2          # if (dividend < 0), return -udividend
 436 .Lpositive:
 437         retw
 438
 439 .Lspecial:
 440         bltu    a2, a3, .Lreturn2
 441         sub     a2, a2, a3      # subtract once if dividend >= divisor
 442 .Lreturn2:
 443         bgez    a7, .Lpositive2
 444         neg     a2, a2          # if (dividend < 0), return -udividend
 445 .Lpositive2:
 446         retw
 447
 448 .Lle_one:
 449         # udivisor is either 0 or 1, so just return 0.
 450         # someday we may want to throw an exception if udivisor is 0.
 451         movi    a2, 0
 452         retw
 453 .Lfe5:
 454         .size   __modsi3,.Lfe5-__modsi3
 455
 456 #endif /* L_modsi3 */