1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright
(C
) 2001, 2002, 2003, 2005, 2006, 2007, 2009
3 Free Software Foundation
, Inc.
4 Contributed by Bob Wilson
(bwilson
@tensilica.com
) at Tensilica.
6 This file is part of GCC.
8 GCC is free software
; you can redistribute it and/or modify it under
9 the terms of the GNU General
Public License as published by the Free
10 Software Foundation
; either version 3, or (at your option) any later
13 GCC is distributed
in the hope that it will be useful
, but WITHOUT ANY
14 WARRANTY
; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
18 Under
Section 7 of GPL version
3, you are granted additional
19 permissions described
in the GCC Runtime Library Exception
, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General
Public License
and
23 a copy of the GCC Runtime Library Exception along with
this program
;
24 see the files COPYING3
and COPYING.RUNTIME respectively. If
not, see
25 <http://www.gnu.
org/licenses
/>.
*/
27 #
include "xtensa-config.h"
29 /* Define macros for the ABS
and ADDX
* instructions to handle cases
30 where they are
not included
in the Xtensa processor configuration.
*/
32 .
macro do_abs dst
, src
, tmp
37 movgez
\tmp
, \src
, \src
42 .
macro do_addx2 dst
, as
, at
, tmp
51 .
macro do_addx4 dst
, as
, at
, tmp
60 .
macro do_addx8 dst
, as
, at
, tmp
69 /* Define macros for leaf function
entry and return
, supporting either the
70 standard register windowed ABI
or the non
-windowed call0 ABI. These
71 macros do
not allocate any extra stack space
, so they only work for
72 leaf functions that do
not need to spill anything to the stack.
*/
74 .
macro leaf_entry reg
, size
75 #if XCHAL_HAVE_WINDOWED
&& !__XTENSA_CALL0_ABI__
83 #if XCHAL_HAVE_WINDOWED
&& !__XTENSA_CALL0_ABI__
94 .
type __mulsi3
, @function
101 #elif XCHAL_HAVE_MUL16
117 #elif XCHAL_HAVE_MAC16
126 #else
/* !MUL32
&& !MUL16
&& !MAC16
*/
128 /* Multiply one bit at a time
, but unroll the
loop 4x to better
129 exploit the addx instructions
and avoid overhead.
130 Peel the first iteration to save a cycle on init.
*/
132 /* Avoid negative numbers.
*/
133 xor a5
, a2
, a3
/* Top bit is
1 if one input is negative.
*/
137 /* Swap so the second argument is smaller.
*/
140 movgez a4
, a2
, a7
/* a4
= max
(a2
, a3
) */
141 movltz a3
, a2
, a7
/* a3
= min
(a2
, a3
) */
147 do_addx2 a7
, a4
, a2
, a7
151 do_addx4 a7
, a4
, a2
, a7
155 do_addx8 a7
, a4
, a2
, a7
159 bgeui a3
, 16, .Lmult_main_loop
173 do_addx2 a7
, a4
, a2
, a7
177 do_addx4 a7
, a4
, a2
, a7
181 do_addx8 a7
, a4
, a2
, a7
185 bgeui a3
, 16, .Lmult_main_loop
190 #endif
/* !MUL32
&& !MUL16
&& !MAC16
*/
193 .
size __mulsi3
, .
- __mulsi3
195 #endif
/* L_mulsi3
*/
200 #if
!XCHAL_HAVE_MUL16
&& !XCHAL_HAVE_MUL32
&& !XCHAL_HAVE_MAC16
201 #define XCHAL_NO_MUL
1
206 .
type __umulsidi3
, @function
208 #if __XTENSA_CALL0_ABI__
216 /* This is
not really a leaf function
; allocate enough stack space
217 to allow CALL12s to a helper function.
*/
229 #endif
/* __XTENSA_EB__
*/
231 /* This code is taken from the mulsf3 routine
in ieee754
-sf.S.
232 See more comments there.
*/
234 #if XCHAL_HAVE_MUL32_HIGH
239 #else
/* ! MUL32_HIGH
*/
241 #if __XTENSA_CALL0_ABI__
&& XCHAL_NO_MUL
242 /* a0
and a8 will be clobbered by calling the multiply function
243 but a8 is
not used here
and need
not be saved.
*/
247 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
252 /* Get the
high halves of the inputs
into registers.
*/
259 #if XCHAL_HAVE_MUL32
&& !XCHAL_HAVE_MUL16
260 /* Clear the
high halves of the inputs.
This does
not matter
261 for MUL16 because the
high bits are ignored.
*/
265 #endif
/* MUL16 || MUL32
*/
270 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
271 mul16u dst
, xreg ## xhalf
, yreg ## yhalf
273 #elif XCHAL_HAVE_MUL32
275 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
276 mull dst
, xreg ## xhalf
, yreg ## yhalf
278 #elif XCHAL_HAVE_MAC16
280 /* The preprocessor insists on inserting a space when concatenating after
281 a period
in the definition of do_mul below. These macros are a workaround
282 using underscores instead of periods when doing the concatenation.
*/
283 #define umul_aa_ll umul.aa.ll
284 #define umul_aa_lh umul.aa.lh
285 #define umul_aa_hl umul.aa.hl
286 #define umul_aa_hh umul.aa.hh
288 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
289 umul_aa_ ## xhalf ## yhalf xreg
, yreg
; \
292 #else
/* no multiply hardware
*/
294 #define set_arg_l
(dst
, src
) \
295 extui dst
, src
, 0, 16
296 #define set_arg_h
(dst
, src
) \
299 #if __XTENSA_CALL0_ABI__
300 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
301 set_arg_ ## xhalf
(a13
, xreg
); \
302 set_arg_ ## yhalf
(a14
, yreg
); \
303 call0 .Lmul_mulsi3
; \
306 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
307 set_arg_ ## xhalf
(a14
, xreg
); \
308 set_arg_ ## yhalf
(a15
, yreg
); \
309 call12 .Lmul_mulsi3
; \
311 #endif
/* __XTENSA_CALL0_ABI__
*/
313 #endif
/* no multiply hardware
*/
315 /* Add pp1
and pp2
into a6 with carry
-out in a9.
*/
316 do_mul
(a6
, a2
, l
, a3
, h
) /* pp
1 */
317 do_mul
(a11
, a2
, h
, a3
, l
) /* pp
2 */
323 /* Shift the
high half of a9
/a6
into position
in a9. Note that
324 this value can be safely incremented without any carry
-outs.
*/
328 /* Compute the
low word into a6.
*/
329 do_mul
(a11
, a2
, l
, a3
, l
) /* pp
0 */
335 /* Compute the
high word into wh.
*/
336 do_mul
(wh
, a2
, h
, a3
, h
) /* pp
3 */
340 #endif
/* !MUL32_HIGH
*/
342 #if __XTENSA_CALL0_ABI__
&& XCHAL_NO_MUL
343 /* Restore the original return address.
*/
346 #if __XTENSA_CALL0_ABI__
357 /* For Xtensa processors with no multiply hardware
, this simplified
358 version of _mulsi3 is used for multiplying
16-bit chunks of
359 the floating
-point mantissas. When using CALL0
, this function
360 uses a custom
ABI: the inputs are passed
in a13
and a14
, the
361 result is returned
in a12
, and a8
and a15 are clobbered.
*/
365 .
macro mul_mulsi3_body dst
, src1
, src2
, tmp1
, tmp2
367 1: add \tmp
1, \src2
, \dst
368 extui
\tmp
2, \src1
, 0, 1
369 movnez \dst
, \tmp
1, \tmp
2
371 do_addx2
\tmp
1, \src2
, \dst
, \tmp
1
372 extui
\tmp
2, \src1
, 1, 1
373 movnez \dst
, \tmp
1, \tmp
2
375 do_addx4
\tmp
1, \src2
, \dst
, \tmp
1
376 extui
\tmp
2, \src1
, 2, 1
377 movnez \dst
, \tmp
1, \tmp
2
379 do_addx8
\tmp
1, \src2
, \dst
, \tmp
1
380 extui
\tmp
2, \src1
, 3, 1
381 movnez \dst
, \tmp
1, \tmp
2
387 #if __XTENSA_CALL0_ABI__
388 mul_mulsi3_body a12
, a13
, a14
, a15
, a8
390 /* The result will be written
into a2
, so save that argument
in a4.
*/
392 mul_mulsi3_body a2
, a4
, a3
, a5
, a6
395 #endif
/* XCHAL_NO_MUL
*/
397 .
size __umulsidi3
, .
- __umulsidi3
399 #endif
/* L_umulsidi3
*/
402 /* Define a
macro for the NSAU
(unsigned normalize shift amount
)
403 instruction
, which computes the number of leading zero bits
,
404 to handle cases where it is
not included
in the Xtensa processor
407 .
macro do_nsau cnt
, val
, tmp
, a
413 extui
\tmp
, \a, 16, 16
418 extui
\tmp
, \a, 24, 8
423 movi
\tmp
, __nsau_data
428 #endif
/* !XCHAL_HAVE_NSA
*/
435 .
type __nsau_data
, @object
438 .
byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
439 .
byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
440 .
byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
441 .
byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
442 .
byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
443 .
byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
444 .
byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
445 .
byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
446 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
447 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
448 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
449 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
450 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
451 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
452 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
453 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
454 #endif
/* !XCHAL_HAVE_NSA
*/
455 .
size __nsau_data
, .
- __nsau_data
463 .
type __clzsi2
, @function
466 do_nsau a2
, a2
, a3
, a4
468 .
size __clzsi2
, .
- __clzsi2
470 #endif
/* L_clzsi2
*/
476 .
type __ctzsi2
, @function
481 do_nsau a2
, a3
, a4
, a5
485 .
size __ctzsi2
, .
- __ctzsi2
487 #endif
/* L_ctzsi2
*/
493 .
type __ffssi2
, @function
498 do_nsau a2
, a3
, a4
, a5
502 .
size __ffssi2
, .
- __ffssi2
504 #endif
/* L_ffssi2
*/
510 .
type __udivsi3
, @function
516 bltui a3
, 2, .Lle_one
/* check if the divisor
<= 1 */
518 mov a6
, a2
/* keep dividend
in a6
*/
519 do_nsau a5
, a6
, a2
, a7
/* dividend_shift
= nsau
(dividend
) */
520 do_nsau a4
, a3
, a2
, a7
/* divisor_shift
= nsau
(divisor
) */
521 bgeu a5
, a4
, .Lspecial
523 sub a4
, a4
, a5
/* count
= divisor_shift
- dividend_shift
*/
525 sll a3
, a3
/* divisor
<<= count
*/
526 movi a2
, 0 /* quotient
= 0 */
528 /* test-subtract
-and-shift
loop; one quotient bit on each iteration */
530 loopnez a4
, .Lloopend
531 #endif
/* XCHAL_HAVE_LOOPS
*/
533 bltu a6
, a3
, .Lzerobit
539 #if
!XCHAL_HAVE_LOOPS
542 #endif
/* !XCHAL_HAVE_LOOPS
*/
545 bltu a6
, a3
, .Lreturn
546 addi a2
, a2
, 1 /* increment quotient if dividend
>= divisor
*/
551 beqz a3
, .Lerror
/* if divisor
== 1, return the dividend
*/
555 /* return dividend
>= divisor
*/
556 bltu a6
, a3
, .Lreturn0
561 /* Divide by
zero: Use an illegal instruction to force an exception.
562 The subsequent
"DIV0" string can be recognized by the exception
563 handler to identify the real cause of the exception.
*/
569 #endif
/* XCHAL_HAVE_DIV32
*/
571 .
size __udivsi3
, .
- __udivsi3
573 #endif
/* L_udivsi3
*/
579 .
type __divsi3
, @function
585 xor a7
, a2
, a3
/* sign
= dividend ^ divisor
*/
586 do_abs a6
, a2
, a4
/* udividend
= abs
(dividend
) */
587 do_abs a3
, a3
, a4
/* udivisor
= abs
(divisor
) */
588 bltui a3
, 2, .Lle_one
/* check if udivisor
<= 1 */
589 do_nsau a5
, a6
, a2
, a8
/* udividend_shift
= nsau
(udividend
) */
590 do_nsau a4
, a3
, a2
, a8
/* udivisor_shift
= nsau
(udivisor
) */
591 bgeu a5
, a4
, .Lspecial
593 sub a4
, a4
, a5
/* count
= udivisor_shift
- udividend_shift
*/
595 sll a3
, a3
/* udivisor
<<= count
*/
596 movi a2
, 0 /* quotient
= 0 */
598 /* test-subtract
-and-shift
loop; one quotient bit on each iteration */
600 loopnez a4
, .Lloopend
601 #endif
/* XCHAL_HAVE_LOOPS
*/
603 bltu a6
, a3
, .Lzerobit
609 #if
!XCHAL_HAVE_LOOPS
612 #endif
/* !XCHAL_HAVE_LOOPS
*/
615 bltu a6
, a3
, .Lreturn
616 addi a2
, a2
, 1 /* increment if udividend
>= udivisor
*/
619 movltz a2
, a5
, a7
/* return
(sign
< 0) ? -quotient
: quotient
*/
624 neg a2
, a6
/* if udivisor
== 1, then return...
*/
625 movgez a2
, a6
, a7
/* (sign
< 0) ? -udividend
: udividend
*/
629 bltu a6
, a3
, .Lreturn0
/* if dividend
< divisor
, return
0 */
632 movltz a2
, a4
, a7
/* else return
(sign
< 0) ? -1 : 1 */
636 /* Divide by
zero: Use an illegal instruction to force an exception.
637 The subsequent
"DIV0" string can be recognized by the exception
638 handler to identify the real cause of the exception.
*/
644 #endif
/* XCHAL_HAVE_DIV32
*/
646 .
size __divsi3
, .
- __divsi3
648 #endif
/* L_divsi3
*/
654 .
type __umodsi3
, @function
660 bltui a3
, 2, .Lle_one
/* check if the divisor is
<= 1 */
662 do_nsau a5
, a2
, a6
, a7
/* dividend_shift
= nsau
(dividend
) */
663 do_nsau a4
, a3
, a6
, a7
/* divisor_shift
= nsau
(divisor
) */
664 bgeu a5
, a4
, .Lspecial
666 sub a4
, a4
, a5
/* count
= divisor_shift
- dividend_shift
*/
668 sll a3
, a3
/* divisor
<<= count
*/
670 /* test-subtract
-and-shift
loop */
672 loopnez a4
, .Lloopend
673 #endif
/* XCHAL_HAVE_LOOPS
*/
675 bltu a2
, a3
, .Lzerobit
679 #if
!XCHAL_HAVE_LOOPS
682 #endif
/* !XCHAL_HAVE_LOOPS
*/
686 bltu a2
, a3
, .Lreturn
687 sub a2
, a2
, a3
/* subtract once more if dividend
>= divisor
*/
694 /* Divide by
zero: Use an illegal instruction to force an exception.
695 The subsequent
"DIV0" string can be recognized by the exception
696 handler to identify the real cause of the exception.
*/
702 #endif
/* XCHAL_HAVE_DIV32
*/
704 .
size __umodsi3
, .
- __umodsi3
706 #endif
/* L_umodsi3
*/
712 .
type __modsi3
, @function
718 mov a7
, a2
/* save original
(signed
) dividend
*/
719 do_abs a2
, a2
, a4
/* udividend
= abs
(dividend
) */
720 do_abs a3
, a3
, a4
/* udivisor
= abs
(divisor
) */
721 bltui a3
, 2, .Lle_one
/* check if udivisor
<= 1 */
722 do_nsau a5
, a2
, a6
, a8
/* udividend_shift
= nsau
(udividend
) */
723 do_nsau a4
, a3
, a6
, a8
/* udivisor_shift
= nsau
(udivisor
) */
724 bgeu a5
, a4
, .Lspecial
726 sub a4
, a4
, a5
/* count
= udivisor_shift
- udividend_shift
*/
728 sll a3
, a3
/* udivisor
<<= count
*/
730 /* test-subtract
-and-shift
loop */
732 loopnez a4
, .Lloopend
733 #endif
/* XCHAL_HAVE_LOOPS
*/
735 bltu a2
, a3
, .Lzerobit
739 #if
!XCHAL_HAVE_LOOPS
742 #endif
/* !XCHAL_HAVE_LOOPS
*/
746 bltu a2
, a3
, .Lreturn
747 sub a2
, a2
, a3
/* subtract again if udividend
>= udivisor
*/
750 neg a2
, a2
/* if
(dividend
< 0), return
-udividend
*/
757 /* Divide by
zero: Use an illegal instruction to force an exception.
758 The subsequent
"DIV0" string can be recognized by the exception
759 handler to identify the real cause of the exception.
*/
765 #endif
/* XCHAL_HAVE_DIV32
*/
767 .
size __modsi3
, .
- __modsi3
769 #endif
/* L_modsi3
*/
778 #endif
/* __XTENSA_EB__
*/
784 .
type __ashldi3
, @function
788 bgei a4
, 32, .Llow_only
797 .
size __ashldi3
, .
- __ashldi3
799 #endif
/* L_ashldi3
*/
805 .
type __ashrdi3
, @function
809 bgei a4
, 32, .Lhigh_only
818 .
size __ashrdi3
, .
- __ashrdi3
820 #endif
/* L_ashrdi3
*/
826 .
type __lshrdi3
, @function
830 bgei a4
, 32, .Lhigh_only1
839 .
size __lshrdi3
, .
- __lshrdi3
841 #endif
/* L_lshrdi3
*/
844 #
include "ieee754-df.S"
845 #
include "ieee754-sf.S"