1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright
(C
) 2001, 2002, 2003, 2005, 2006 Free Software Foundation
, Inc.
3 Contributed by Bob Wilson
(bwilson
@tensilica.com
) at Tensilica.
5 This file is part of GCC.
7 GCC is free software
; you can redistribute it and/or modify it under
8 the terms of the GNU General
Public License as published by the Free
9 Software Foundation
; either version 2, or (at your option) any later
12 In addition to the permissions
in the GNU General
Public License
, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of
this file
into combinations with other programs
,
15 and to distribute those combinations without any restriction coming
16 from the use of
this file.
(The General
Public License restrictions
17 do apply
in other respects
; for example, they cover modification of
18 the file
, and distribution when
not linked
into a combine
21 GCC is distributed
in the hope that it will be useful
, but WITHOUT ANY
22 WARRANTY
; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
26 You should have received a copy of the GNU General
Public License
27 along with GCC
; see the file COPYING. If not, write to the Free
28 Software Foundation
, 51 Franklin Street
, Fifth Floor
, Boston
, MA
31 #
include "xtensa-config.h"
33 /* Define macros for the ABS
and ADDX
* instructions to handle cases
34 where they are
not included
in the Xtensa processor configuration.
*/
36 .
macro do_abs dst
, src
, tmp
41 movgez
\tmp
, \src
, \src
46 .
macro do_addx2 dst
, as
, at
, tmp
55 .
macro do_addx4 dst
, as
, at
, tmp
64 .
macro do_addx8 dst
, as
, at
, tmp
73 /* Define macros for leaf function
entry and return
, supporting either the
74 standard register windowed ABI
or the non
-windowed call0 ABI. These
75 macros do
not allocate any extra stack space
, so they only work for
76 leaf functions that do
not need to spill anything to the stack.
*/
78 .
macro leaf_entry reg
, size
79 #if XCHAL_HAVE_WINDOWED
&& !__XTENSA_CALL0_ABI__
87 #if XCHAL_HAVE_WINDOWED
&& !__XTENSA_CALL0_ABI__
98 .
type __mulsi3
,@function
105 #elif XCHAL_HAVE_MUL16
121 #elif XCHAL_HAVE_MAC16
130 #else
/* !MUL32
&& !MUL16
&& !MAC16
*/
132 /* Multiply one bit at a time
, but unroll the
loop 4x to better
133 exploit the addx instructions
and avoid overhead.
134 Peel the first iteration to save a cycle on init.
*/
136 /* Avoid negative numbers.
*/
137 xor a5
, a2
, a3
/* Top bit is
1 if one input is negative.
*/
141 /* Swap so the second argument is smaller.
*/
144 movgez a4
, a2
, a7
/* a4
= max
(a2
, a3
) */
145 movltz a3
, a2
, a7
/* a3
= min
(a2
, a3
) */
151 do_addx2 a7
, a4
, a2
, a7
155 do_addx4 a7
, a4
, a2
, a7
159 do_addx8 a7
, a4
, a2
, a7
163 bgeui a3
, 16, .Lmult_main_loop
177 do_addx2 a7
, a4
, a2
, a7
181 do_addx4 a7
, a4
, a2
, a7
185 do_addx8 a7
, a4
, a2
, a7
189 bgeui a3
, 16, .Lmult_main_loop
194 #endif
/* !MUL32
&& !MUL16
&& !MAC16
*/
197 .
size __mulsi3
,.
-__mulsi3
199 #endif
/* L_mulsi3
*/
205 .
type __umulsidi3
,@function
208 #if __XTENSA_CALL0_ABI__
222 #endif
/* __XTENSA_EB__
*/
224 /* This code is taken from the mulsf3 routine
in ieee754
-sf.S.
225 See more comments there.
*/
227 #if XCHAL_HAVE_MUL32_HIGH
232 #else
/* ! MUL32_HIGH
*/
234 #if
!XCHAL_HAVE_MUL16
&& !XCHAL_HAVE_MUL32
&& !XCHAL_HAVE_MAC16
235 /* a0
and a8 will be clobbered by calling the multiply function
236 but a8 is
not used here
and need
not be saved.
*/
240 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
245 /* Get the
high halves of the inputs
into registers.
*/
252 #if XCHAL_HAVE_MUL32
&& !XCHAL_HAVE_MUL16
253 /* Clear the
high halves of the inputs.
This does
not matter
254 for MUL16 because the
high bits are ignored.
*/
258 #endif
/* MUL16 || MUL32
*/
263 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
264 mul16u dst
, xreg ## xhalf
, yreg ## yhalf
266 #elif XCHAL_HAVE_MUL32
268 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
269 mull dst
, xreg ## xhalf
, yreg ## yhalf
271 #elif XCHAL_HAVE_MAC16
273 /* The preprocessor insists on inserting a space when concatenating after
274 a period
in the definition of do_mul below. These macros are a workaround
275 using underscores instead of periods when doing the concatenation.
*/
276 #define umul_aa_ll umul.aa.ll
277 #define umul_aa_lh umul.aa.lh
278 #define umul_aa_hl umul.aa.hl
279 #define umul_aa_hh umul.aa.hh
281 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
282 umul_aa_ ## xhalf ## yhalf xreg
, yreg
; \
285 #else
/* no multiply hardware
*/
287 #define set_arg_l
(dst
, src
) \
288 extui dst
, src
, 0, 16
289 #define set_arg_h
(dst
, src
) \
292 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
293 set_arg_ ## xhalf
(a13
, xreg
); \
294 set_arg_ ## yhalf
(a14
, yreg
); \
295 call0 .Lmul_mulsi3
; \
299 /* Add pp1
and pp2
into a6 with carry
-out in a9.
*/
300 do_mul
(a6
, a2
, l
, a3
, h
) /* pp
1 */
301 do_mul
(a11
, a2
, h
, a3
, l
) /* pp
2 */
307 /* Shift the
high half of a9
/a6
into position
in a9. Note that
308 this value can be safely incremented without any carry
-outs.
*/
312 /* Compute the
low word into a6.
*/
313 do_mul
(a11
, a2
, l
, a3
, l
) /* pp
0 */
319 /* Compute the
high word into wh.
*/
320 do_mul
(wh
, a2
, h
, a3
, h
) /* pp
3 */
324 #endif
/* !MUL32_HIGH
*/
326 #if
!XCHAL_HAVE_MUL16
&& !XCHAL_HAVE_MUL32
&& !XCHAL_HAVE_MAC16
327 /* Restore the original return address.
*/
330 #if __XTENSA_CALL0_ABI__
339 #if
!XCHAL_HAVE_MUL16
&& !XCHAL_HAVE_MUL32
&& !XCHAL_HAVE_MAC16
341 /* For Xtensa processors with no multiply hardware
, this simplified
342 version of _mulsi3 is used for multiplying
16-bit chunks of
343 the floating
-point mantissas. It uses a custom
ABI: the inputs
344 are passed
in a13
and a14
, the result is returned
in a12
, and
345 a8
and a15 are clobbered.
*/
354 do_addx2 a15
, a14
, a12
, a15
358 do_addx4 a15
, a14
, a12
, a15
362 do_addx8 a15
, a14
, a12
, a15
368 bnez a13
, .Lmul_mult_loop
370 #endif
/* !MUL16
&& !MUL32
&& !MAC16
*/
372 .
size __umulsidi3
,.
-__umulsidi3
374 #endif
/* L_umulsidi3
*/
377 /* Define a
macro for the NSAU
(unsigned normalize shift amount
)
378 instruction
, which computes the number of leading zero bits
,
379 to handle cases where it is
not included
in the Xtensa processor
382 .
macro do_nsau cnt
, val
, tmp
, a
388 extui
\tmp
, \a, 16, 16
393 extui
\tmp
, \a, 24, 8
398 movi
\tmp
, __nsau_data
403 #endif
/* !XCHAL_HAVE_NSA
*/
410 .
type __nsau_data
,@object
413 .
byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
414 .
byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
415 .
byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
416 .
byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
417 .
byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
418 .
byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
419 .
byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
420 .
byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
421 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
422 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
423 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
424 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
425 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
426 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
427 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
428 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
429 #endif
/* !XCHAL_HAVE_NSA
*/
430 .
size __nsau_data
,.
-__nsau_data
438 .
type __clzsi2
,@function
441 do_nsau a2
, a2
, a3
, a4
443 .
size __clzsi2
,.
-__clzsi2
445 #endif
/* L_clzsi2
*/
451 .
type __ctzsi2
,@function
456 do_nsau a2
, a3
, a4
, a5
460 .
size __ctzsi2
,.
-__ctzsi2
462 #endif
/* L_ctzsi2
*/
468 .
type __ffssi2
,@function
473 do_nsau a2
, a3
, a4
, a5
477 .
size __ffssi2
,.
-__ffssi2
479 #endif
/* L_ffssi2
*/
485 .
type __udivsi3
,@function
488 bltui a3
, 2, .Lle_one
/* check if the divisor
<= 1 */
490 mov a6
, a2
/* keep dividend
in a6
*/
491 do_nsau a5
, a6
, a2
, a7
/* dividend_shift
= nsau
(dividend
) */
492 do_nsau a4
, a3
, a2
, a7
/* divisor_shift
= nsau
(divisor
) */
493 bgeu a5
, a4
, .Lspecial
495 sub a4
, a4
, a5
/* count
= divisor_shift
- dividend_shift
*/
497 sll a3
, a3
/* divisor
<<= count
*/
498 movi a2
, 0 /* quotient
= 0 */
500 /* test-subtract
-and-shift
loop; one quotient bit on each iteration */
502 loopnez a4
, .Lloopend
503 #endif
/* XCHAL_HAVE_LOOPS
*/
505 bltu a6
, a3
, .Lzerobit
511 #if
!XCHAL_HAVE_LOOPS
514 #endif
/* !XCHAL_HAVE_LOOPS
*/
517 bltu a6
, a3
, .Lreturn
518 addi a2
, a2
, 1 /* increment quotient if dividend
>= divisor
*/
523 beqz a3
, .Lerror
/* if divisor
== 1, return the dividend
*/
527 /* return dividend
>= divisor
*/
528 bltu a6
, a3
, .Lreturn0
533 /* just return
0; could throw an exception */
538 .
size __udivsi3
,.
-__udivsi3
540 #endif
/* L_udivsi3
*/
546 .
type __divsi3
,@function
549 xor a7
, a2
, a3
/* sign
= dividend ^ divisor
*/
550 do_abs a6
, a2
, a4
/* udividend
= abs
(dividend
) */
551 do_abs a3
, a3
, a4
/* udivisor
= abs
(divisor
) */
552 bltui a3
, 2, .Lle_one
/* check if udivisor
<= 1 */
553 do_nsau a5
, a6
, a2
, a8
/* udividend_shift
= nsau
(udividend
) */
554 do_nsau a4
, a3
, a2
, a8
/* udivisor_shift
= nsau
(udivisor
) */
555 bgeu a5
, a4
, .Lspecial
557 sub a4
, a4
, a5
/* count
= udivisor_shift
- udividend_shift
*/
559 sll a3
, a3
/* udivisor
<<= count
*/
560 movi a2
, 0 /* quotient
= 0 */
562 /* test-subtract
-and-shift
loop; one quotient bit on each iteration */
564 loopnez a4
, .Lloopend
565 #endif
/* XCHAL_HAVE_LOOPS
*/
567 bltu a6
, a3
, .Lzerobit
573 #if
!XCHAL_HAVE_LOOPS
576 #endif
/* !XCHAL_HAVE_LOOPS
*/
579 bltu a6
, a3
, .Lreturn
580 addi a2
, a2
, 1 /* increment if udividend
>= udivisor
*/
583 movltz a2
, a5
, a7
/* return
(sign
< 0) ? -quotient
: quotient
*/
588 neg a2
, a6
/* if udivisor
== 1, then return...
*/
589 movgez a2
, a6
, a7
/* (sign
< 0) ? -udividend
: udividend
*/
593 bltu a6
, a3
, .Lreturn0
/* if dividend
< divisor
, return
0 */
596 movltz a2
, a4
, a7
/* else return
(sign
< 0) ? -1 : 1 */
600 /* just return
0; could throw an exception */
605 .
size __divsi3
,.
-__divsi3
607 #endif
/* L_divsi3
*/
613 .
type __umodsi3
,@function
616 bltui a3
, 2, .Lle_one
/* check if the divisor is
<= 1 */
618 do_nsau a5
, a2
, a6
, a7
/* dividend_shift
= nsau
(dividend
) */
619 do_nsau a4
, a3
, a6
, a7
/* divisor_shift
= nsau
(divisor
) */
620 bgeu a5
, a4
, .Lspecial
622 sub a4
, a4
, a5
/* count
= divisor_shift
- dividend_shift
*/
624 sll a3
, a3
/* divisor
<<= count
*/
626 /* test-subtract
-and-shift
loop */
628 loopnez a4
, .Lloopend
629 #endif
/* XCHAL_HAVE_LOOPS
*/
631 bltu a2
, a3
, .Lzerobit
635 #if
!XCHAL_HAVE_LOOPS
638 #endif
/* !XCHAL_HAVE_LOOPS
*/
642 bltu a2
, a3
, .Lreturn
643 sub a2
, a2
, a3
/* subtract once more if dividend
>= divisor
*/
648 /* The divisor is either
0 or 1, so just return
0.
649 Someday we may want to throw an exception if the divisor is
0.
*/
652 .
size __umodsi3
,.
-__umodsi3
654 #endif
/* L_umodsi3
*/
660 .
type __modsi3
,@function
663 mov a7
, a2
/* save original
(signed
) dividend
*/
664 do_abs a2
, a2
, a4
/* udividend
= abs
(dividend
) */
665 do_abs a3
, a3
, a4
/* udivisor
= abs
(divisor
) */
666 bltui a3
, 2, .Lle_one
/* check if udivisor
<= 1 */
667 do_nsau a5
, a2
, a6
, a8
/* udividend_shift
= nsau
(udividend
) */
668 do_nsau a4
, a3
, a6
, a8
/* udivisor_shift
= nsau
(udivisor
) */
669 bgeu a5
, a4
, .Lspecial
671 sub a4
, a4
, a5
/* count
= udivisor_shift
- udividend_shift
*/
673 sll a3
, a3
/* udivisor
<<= count
*/
675 /* test-subtract
-and-shift
loop */
677 loopnez a4
, .Lloopend
678 #endif
/* XCHAL_HAVE_LOOPS
*/
680 bltu a2
, a3
, .Lzerobit
684 #if
!XCHAL_HAVE_LOOPS
687 #endif
/* !XCHAL_HAVE_LOOPS
*/
691 bltu a2
, a3
, .Lreturn
692 sub a2
, a2
, a3
/* subtract again if udividend
>= udivisor
*/
695 neg a2
, a2
/* if
(dividend
< 0), return
-udividend
*/
700 /* udivisor is either
0 or 1, so just return
0.
701 Someday we may want to throw an exception if udivisor is
0.
*/
704 .
size __modsi3
,.
-__modsi3
706 #endif
/* L_modsi3
*/
708 #
include "ieee754-df.S"
709 #
include "ieee754-sf.S"