1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright
(C
) 2001, 2002, 2003, 2005, 2006, 2007
3 Free Software Foundation
, Inc.
4 Contributed by Bob Wilson
(bwilson
@tensilica.com
) at Tensilica.
6 This file is part of GCC.
8 GCC is free software
; you can redistribute it and/or modify it under
9 the terms of the GNU General
Public License as published by the Free
10 Software Foundation
; either version 2, or (at your option) any later
13 In addition to the permissions
in the GNU General
Public License
, the
14 Free Software Foundation gives you unlimited permission to link the
15 compiled version of
this file
into combinations with other programs
,
16 and to distribute those combinations without any restriction coming
17 from the use of
this file.
(The General
Public License restrictions
18 do apply
in other respects
; for example, they cover modification of
19 the file
, and distribution when
not linked
into a combine
22 GCC is distributed
in the hope that it will be useful
, but WITHOUT ANY
23 WARRANTY
; without even the implied warranty of MERCHANTABILITY or
24 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
27 You should have received a copy of the GNU General
Public License
28 along with GCC
; see the file COPYING. If not, write to the Free
29 Software Foundation
, 51 Franklin Street
, Fifth Floor
, Boston
, MA
32 #
include "xtensa-config.h"
34 /* Define macros for the ABS
and ADDX
* instructions to handle cases
35 where they are
not included
in the Xtensa processor configuration.
*/
37 .
macro do_abs dst
, src
, tmp
42 movgez
\tmp
, \src
, \src
47 .
macro do_addx2 dst
, as
, at
, tmp
56 .
macro do_addx4 dst
, as
, at
, tmp
65 .
macro do_addx8 dst
, as
, at
, tmp
74 /* Define macros for leaf function
entry and return
, supporting either the
75 standard register windowed ABI
or the non
-windowed call0 ABI. These
76 macros do
not allocate any extra stack space
, so they only work for
77 leaf functions that do
not need to spill anything to the stack.
*/
79 .
macro leaf_entry reg
, size
80 #if XCHAL_HAVE_WINDOWED
&& !__XTENSA_CALL0_ABI__
88 #if XCHAL_HAVE_WINDOWED
&& !__XTENSA_CALL0_ABI__
99 .
type __mulsi3
, @function
106 #elif XCHAL_HAVE_MUL16
122 #elif XCHAL_HAVE_MAC16
131 #else
/* !MUL32
&& !MUL16
&& !MAC16
*/
133 /* Multiply one bit at a time
, but unroll the
loop 4x to better
134 exploit the addx instructions
and avoid overhead.
135 Peel the first iteration to save a cycle on init.
*/
137 /* Avoid negative numbers.
*/
138 xor a5
, a2
, a3
/* Top bit is
1 if one input is negative.
*/
142 /* Swap so the second argument is smaller.
*/
145 movgez a4
, a2
, a7
/* a4
= max
(a2
, a3
) */
146 movltz a3
, a2
, a7
/* a3
= min
(a2
, a3
) */
152 do_addx2 a7
, a4
, a2
, a7
156 do_addx4 a7
, a4
, a2
, a7
160 do_addx8 a7
, a4
, a2
, a7
164 bgeui a3
, 16, .Lmult_main_loop
178 do_addx2 a7
, a4
, a2
, a7
182 do_addx4 a7
, a4
, a2
, a7
186 do_addx8 a7
, a4
, a2
, a7
190 bgeui a3
, 16, .Lmult_main_loop
195 #endif
/* !MUL32
&& !MUL16
&& !MAC16
*/
198 .
size __mulsi3
, .
- __mulsi3
200 #endif
/* L_mulsi3
*/
205 #if
!XCHAL_HAVE_MUL16
&& !XCHAL_HAVE_MUL32
&& !XCHAL_HAVE_MAC16
206 #define XCHAL_NO_MUL
1
211 .
type __umulsidi3
, @function
213 #if __XTENSA_CALL0_ABI__
221 /* This is
not really a leaf function
; allocate enough stack space
222 to allow CALL12s to a helper function.
*/
234 #endif
/* __XTENSA_EB__
*/
236 /* This code is taken from the mulsf3 routine
in ieee754
-sf.S.
237 See more comments there.
*/
239 #if XCHAL_HAVE_MUL32_HIGH
244 #else
/* ! MUL32_HIGH
*/
246 #if __XTENSA_CALL0_ABI__
&& XCHAL_NO_MUL
247 /* a0
and a8 will be clobbered by calling the multiply function
248 but a8 is
not used here
and need
not be saved.
*/
252 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
257 /* Get the
high halves of the inputs
into registers.
*/
264 #if XCHAL_HAVE_MUL32
&& !XCHAL_HAVE_MUL16
265 /* Clear the
high halves of the inputs.
This does
not matter
266 for MUL16 because the
high bits are ignored.
*/
270 #endif
/* MUL16 || MUL32
*/
275 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
276 mul16u dst
, xreg ## xhalf
, yreg ## yhalf
278 #elif XCHAL_HAVE_MUL32
280 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
281 mull dst
, xreg ## xhalf
, yreg ## yhalf
283 #elif XCHAL_HAVE_MAC16
285 /* The preprocessor insists on inserting a space when concatenating after
286 a period
in the definition of do_mul below. These macros are a workaround
287 using underscores instead of periods when doing the concatenation.
*/
288 #define umul_aa_ll umul.aa.ll
289 #define umul_aa_lh umul.aa.lh
290 #define umul_aa_hl umul.aa.hl
291 #define umul_aa_hh umul.aa.hh
293 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
294 umul_aa_ ## xhalf ## yhalf xreg
, yreg
; \
297 #else
/* no multiply hardware
*/
299 #define set_arg_l
(dst
, src
) \
300 extui dst
, src
, 0, 16
301 #define set_arg_h
(dst
, src
) \
304 #if __XTENSA_CALL0_ABI__
305 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
306 set_arg_ ## xhalf
(a13
, xreg
); \
307 set_arg_ ## yhalf
(a14
, yreg
); \
308 call0 .Lmul_mulsi3
; \
311 #define do_mul
(dst
, xreg
, xhalf
, yreg
, yhalf
) \
312 set_arg_ ## xhalf
(a14
, xreg
); \
313 set_arg_ ## yhalf
(a15
, yreg
); \
314 call12 .Lmul_mulsi3
; \
316 #endif
/* __XTENSA_CALL0_ABI__
*/
318 #endif
/* no multiply hardware
*/
320 /* Add pp1
and pp2
into a6 with carry
-out in a9.
*/
321 do_mul
(a6
, a2
, l
, a3
, h
) /* pp
1 */
322 do_mul
(a11
, a2
, h
, a3
, l
) /* pp
2 */
328 /* Shift the
high half of a9
/a6
into position
in a9. Note that
329 this value can be safely incremented without any carry
-outs.
*/
333 /* Compute the
low word into a6.
*/
334 do_mul
(a11
, a2
, l
, a3
, l
) /* pp
0 */
340 /* Compute the
high word into wh.
*/
341 do_mul
(wh
, a2
, h
, a3
, h
) /* pp
3 */
345 #endif
/* !MUL32_HIGH
*/
347 #if __XTENSA_CALL0_ABI__
&& XCHAL_NO_MUL
348 /* Restore the original return address.
*/
351 #if __XTENSA_CALL0_ABI__
362 /* For Xtensa processors with no multiply hardware
, this simplified
363 version of _mulsi3 is used for multiplying
16-bit chunks of
364 the floating
-point mantissas. When using CALL0
, this function
365 uses a custom
ABI: the inputs are passed
in a13
and a14
, the
366 result is returned
in a12
, and a8
and a15 are clobbered.
*/
370 .
macro mul_mulsi3_body dst
, src1
, src2
, tmp1
, tmp2
372 1: add \tmp
1, \src2
, \dst
373 extui
\tmp
2, \src1
, 0, 1
374 movnez \dst
, \tmp
1, \tmp
2
376 do_addx2
\tmp
1, \src2
, \dst
, \tmp
1
377 extui
\tmp
2, \src1
, 1, 1
378 movnez \dst
, \tmp
1, \tmp
2
380 do_addx4
\tmp
1, \src2
, \dst
, \tmp
1
381 extui
\tmp
2, \src1
, 2, 1
382 movnez \dst
, \tmp
1, \tmp
2
384 do_addx8
\tmp
1, \src2
, \dst
, \tmp
1
385 extui
\tmp
2, \src1
, 3, 1
386 movnez \dst
, \tmp
1, \tmp
2
392 #if __XTENSA_CALL0_ABI__
393 mul_mulsi3_body a12
, a13
, a14
, a15
, a8
395 /* The result will be written
into a2
, so save that argument
in a4.
*/
397 mul_mulsi3_body a2
, a4
, a3
, a5
, a6
400 #endif
/* XCHAL_NO_MUL
*/
402 .
size __umulsidi3
, .
- __umulsidi3
404 #endif
/* L_umulsidi3
*/
407 /* Define a
macro for the NSAU
(unsigned normalize shift amount
)
408 instruction
, which computes the number of leading zero bits
,
409 to handle cases where it is
not included
in the Xtensa processor
412 .
macro do_nsau cnt
, val
, tmp
, a
418 extui
\tmp
, \a, 16, 16
423 extui
\tmp
, \a, 24, 8
428 movi
\tmp
, __nsau_data
433 #endif
/* !XCHAL_HAVE_NSA
*/
440 .
type __nsau_data
, @object
443 .
byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
444 .
byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
445 .
byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
446 .
byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
447 .
byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
448 .
byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
449 .
byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
450 .
byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
451 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
452 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
453 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
454 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
455 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
456 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
457 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
458 .
byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
459 #endif
/* !XCHAL_HAVE_NSA
*/
460 .
size __nsau_data
, .
- __nsau_data
468 .
type __clzsi2
, @function
471 do_nsau a2
, a2
, a3
, a4
473 .
size __clzsi2
, .
- __clzsi2
475 #endif
/* L_clzsi2
*/
481 .
type __ctzsi2
, @function
486 do_nsau a2
, a3
, a4
, a5
490 .
size __ctzsi2
, .
- __ctzsi2
492 #endif
/* L_ctzsi2
*/
498 .
type __ffssi2
, @function
503 do_nsau a2
, a3
, a4
, a5
507 .
size __ffssi2
, .
- __ffssi2
509 #endif
/* L_ffssi2
*/
515 .
type __udivsi3
, @function
521 bltui a3
, 2, .Lle_one
/* check if the divisor
<= 1 */
523 mov a6
, a2
/* keep dividend
in a6
*/
524 do_nsau a5
, a6
, a2
, a7
/* dividend_shift
= nsau
(dividend
) */
525 do_nsau a4
, a3
, a2
, a7
/* divisor_shift
= nsau
(divisor
) */
526 bgeu a5
, a4
, .Lspecial
528 sub a4
, a4
, a5
/* count
= divisor_shift
- dividend_shift
*/
530 sll a3
, a3
/* divisor
<<= count
*/
531 movi a2
, 0 /* quotient
= 0 */
533 /* test-subtract
-and-shift
loop; one quotient bit on each iteration */
535 loopnez a4
, .Lloopend
536 #endif
/* XCHAL_HAVE_LOOPS
*/
538 bltu a6
, a3
, .Lzerobit
544 #if
!XCHAL_HAVE_LOOPS
547 #endif
/* !XCHAL_HAVE_LOOPS
*/
550 bltu a6
, a3
, .Lreturn
551 addi a2
, a2
, 1 /* increment quotient if dividend
>= divisor
*/
556 beqz a3
, .Lerror
/* if divisor
== 1, return the dividend
*/
560 /* return dividend
>= divisor
*/
561 bltu a6
, a3
, .Lreturn0
566 /* Divide by
zero: Use an illegal instruction to force an exception.
567 The subsequent
"DIV0" string can be recognized by the exception
568 handler to identify the real cause of the exception.
*/
574 #endif
/* XCHAL_HAVE_DIV32
*/
576 .
size __udivsi3
, .
- __udivsi3
578 #endif
/* L_udivsi3
*/
584 .
type __divsi3
, @function
590 xor a7
, a2
, a3
/* sign
= dividend ^ divisor
*/
591 do_abs a6
, a2
, a4
/* udividend
= abs
(dividend
) */
592 do_abs a3
, a3
, a4
/* udivisor
= abs
(divisor
) */
593 bltui a3
, 2, .Lle_one
/* check if udivisor
<= 1 */
594 do_nsau a5
, a6
, a2
, a8
/* udividend_shift
= nsau
(udividend
) */
595 do_nsau a4
, a3
, a2
, a8
/* udivisor_shift
= nsau
(udivisor
) */
596 bgeu a5
, a4
, .Lspecial
598 sub a4
, a4
, a5
/* count
= udivisor_shift
- udividend_shift
*/
600 sll a3
, a3
/* udivisor
<<= count
*/
601 movi a2
, 0 /* quotient
= 0 */
603 /* test-subtract
-and-shift
loop; one quotient bit on each iteration */
605 loopnez a4
, .Lloopend
606 #endif
/* XCHAL_HAVE_LOOPS
*/
608 bltu a6
, a3
, .Lzerobit
614 #if
!XCHAL_HAVE_LOOPS
617 #endif
/* !XCHAL_HAVE_LOOPS
*/
620 bltu a6
, a3
, .Lreturn
621 addi a2
, a2
, 1 /* increment if udividend
>= udivisor
*/
624 movltz a2
, a5
, a7
/* return
(sign
< 0) ? -quotient
: quotient
*/
629 neg a2
, a6
/* if udivisor
== 1, then return...
*/
630 movgez a2
, a6
, a7
/* (sign
< 0) ? -udividend
: udividend
*/
634 bltu a6
, a3
, .Lreturn0
/* if dividend
< divisor
, return
0 */
637 movltz a2
, a4
, a7
/* else return
(sign
< 0) ? -1 : 1 */
641 /* Divide by
zero: Use an illegal instruction to force an exception.
642 The subsequent
"DIV0" string can be recognized by the exception
643 handler to identify the real cause of the exception.
*/
649 #endif
/* XCHAL_HAVE_DIV32
*/
651 .
size __divsi3
, .
- __divsi3
653 #endif
/* L_divsi3
*/
659 .
type __umodsi3
, @function
665 bltui a3
, 2, .Lle_one
/* check if the divisor is
<= 1 */
667 do_nsau a5
, a2
, a6
, a7
/* dividend_shift
= nsau
(dividend
) */
668 do_nsau a4
, a3
, a6
, a7
/* divisor_shift
= nsau
(divisor
) */
669 bgeu a5
, a4
, .Lspecial
671 sub a4
, a4
, a5
/* count
= divisor_shift
- dividend_shift
*/
673 sll a3
, a3
/* divisor
<<= count
*/
675 /* test-subtract
-and-shift
loop */
677 loopnez a4
, .Lloopend
678 #endif
/* XCHAL_HAVE_LOOPS
*/
680 bltu a2
, a3
, .Lzerobit
684 #if
!XCHAL_HAVE_LOOPS
687 #endif
/* !XCHAL_HAVE_LOOPS
*/
691 bltu a2
, a3
, .Lreturn
692 sub a2
, a2
, a3
/* subtract once more if dividend
>= divisor
*/
699 /* Divide by
zero: Use an illegal instruction to force an exception.
700 The subsequent
"DIV0" string can be recognized by the exception
701 handler to identify the real cause of the exception.
*/
707 #endif
/* XCHAL_HAVE_DIV32
*/
709 .
size __umodsi3
, .
- __umodsi3
711 #endif
/* L_umodsi3
*/
717 .
type __modsi3
, @function
723 mov a7
, a2
/* save original
(signed
) dividend
*/
724 do_abs a2
, a2
, a4
/* udividend
= abs
(dividend
) */
725 do_abs a3
, a3
, a4
/* udivisor
= abs
(divisor
) */
726 bltui a3
, 2, .Lle_one
/* check if udivisor
<= 1 */
727 do_nsau a5
, a2
, a6
, a8
/* udividend_shift
= nsau
(udividend
) */
728 do_nsau a4
, a3
, a6
, a8
/* udivisor_shift
= nsau
(udivisor
) */
729 bgeu a5
, a4
, .Lspecial
731 sub a4
, a4
, a5
/* count
= udivisor_shift
- udividend_shift
*/
733 sll a3
, a3
/* udivisor
<<= count
*/
735 /* test-subtract
-and-shift
loop */
737 loopnez a4
, .Lloopend
738 #endif
/* XCHAL_HAVE_LOOPS
*/
740 bltu a2
, a3
, .Lzerobit
744 #if
!XCHAL_HAVE_LOOPS
747 #endif
/* !XCHAL_HAVE_LOOPS
*/
751 bltu a2
, a3
, .Lreturn
752 sub a2
, a2
, a3
/* subtract again if udividend
>= udivisor
*/
755 neg a2
, a2
/* if
(dividend
< 0), return
-udividend
*/
762 /* Divide by
zero: Use an illegal instruction to force an exception.
763 The subsequent
"DIV0" string can be recognized by the exception
764 handler to identify the real cause of the exception.
*/
770 #endif
/* XCHAL_HAVE_DIV32
*/
772 .
size __modsi3
, .
- __modsi3
774 #endif
/* L_modsi3
*/
783 #endif
/* __XTENSA_EB__
*/
789 .
type __ashldi3
, @function
793 bgei a4
, 32, .Llow_only
802 .
size __ashldi3
, .
- __ashldi3
804 #endif
/* L_ashldi3
*/
810 .
type __ashrdi3
, @function
814 bgei a4
, 32, .Lhigh_only
823 .
size __ashrdi3
, .
- __ashrdi3
825 #endif
/* L_ashrdi3
*/
831 .
type __lshrdi3
, @function
835 bgei a4
, 32, .Lhigh_only1
844 .
size __lshrdi3
, .
- __lshrdi3
846 #endif
/* L_lshrdi3
*/
849 #
include "ieee754-df.S"
850 #
include "ieee754-sf.S"