1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright (C) 2001-2015 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 #include "xtensa-config.h"
28 /* Define macros for the ABS and ADDX* instructions to handle cases
29 where they are not included in the Xtensa processor configuration. */
31 .macro do_abs dst, src, tmp
36 movgez \tmp, \src, \src
41 .macro do_addx2 dst, as, at, tmp
50 .macro do_addx4 dst, as, at, tmp
59 .macro do_addx8 dst, as, at, tmp
68 /* Define macros for leaf function entry and return, supporting either the
69 standard register windowed ABI or the non-windowed call0 ABI. These
70 macros do not allocate any extra stack space, so they only work for
71 leaf functions that do not need to spill anything to the stack. */
73 .macro leaf_entry reg, size
74 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
82 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
93 .type __mulsi3, @function
100 #elif XCHAL_HAVE_MUL16
116 #elif XCHAL_HAVE_MAC16
125 #else /* !MUL32 && !MUL16 && !MAC16 */
127 /* Multiply one bit at a time, but unroll the loop 4x to better
128 exploit the addx instructions and avoid overhead.
129 Peel the first iteration to save a cycle on init. */
131 /* Avoid negative numbers. */
132 xor a5, a2, a3 /* Top bit is 1 if one input is negative. */
136 /* Swap so the second argument is smaller. */
139 movgez a4, a2, a7 /* a4 = max (a2, a3) */
140 movltz a3, a2, a7 /* a3 = min (a2, a3) */
146 do_addx2 a7, a4, a2, a7
150 do_addx4 a7, a4, a2, a7
154 do_addx8 a7, a4, a2, a7
158 bgeui a3, 16, .Lmult_main_loop
172 do_addx2 a7, a4, a2, a7
176 do_addx4 a7, a4, a2, a7
180 do_addx8 a7, a4, a2, a7
184 bgeui a3, 16, .Lmult_main_loop
189 #endif /* !MUL32 && !MUL16 && !MAC16 */
192 .size __mulsi3, . - __mulsi3
194 #endif /* L_mulsi3 */
199 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
200 #define XCHAL_NO_MUL 1
205 .type __umulsidi3, @function
207 #if __XTENSA_CALL0_ABI__
215 /* This is not really a leaf function; allocate enough stack space
216 to allow CALL12s to a helper function. */
228 #endif /* __XTENSA_EB__ */
230 /* This code is taken from the mulsf3 routine in ieee754-sf.S.
231 See more comments there. */
233 #if XCHAL_HAVE_MUL32_HIGH
238 #else /* ! MUL32_HIGH */
240 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
241 /* a0 and a8 will be clobbered by calling the multiply function
242 but a8 is not used here and need not be saved. */
246 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
251 /* Get the high halves of the inputs into registers. */
258 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
259 /* Clear the high halves of the inputs. This does not matter
260 for MUL16 because the high bits are ignored. */
264 #endif /* MUL16 || MUL32 */
269 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
270 mul16u dst, xreg ## xhalf, yreg ## yhalf
272 #elif XCHAL_HAVE_MUL32
274 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
275 mull dst, xreg ## xhalf, yreg ## yhalf
277 #elif XCHAL_HAVE_MAC16
279 /* The preprocessor insists on inserting a space when concatenating after
280 a period in the definition of do_mul below. These macros are a workaround
281 using underscores instead of periods when doing the concatenation. */
282 #define umul_aa_ll umul.aa.ll
283 #define umul_aa_lh umul.aa.lh
284 #define umul_aa_hl umul.aa.hl
285 #define umul_aa_hh umul.aa.hh
287 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
288 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
291 #else /* no multiply hardware */
293 #define set_arg_l(dst, src) \
294 extui dst, src, 0, 16
295 #define set_arg_h(dst, src) \
298 #if __XTENSA_CALL0_ABI__
299 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
300 set_arg_ ## xhalf (a13, xreg); \
301 set_arg_ ## yhalf (a14, yreg); \
302 call0 .Lmul_mulsi3; \
305 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
306 set_arg_ ## xhalf (a14, xreg); \
307 set_arg_ ## yhalf (a15, yreg); \
308 call12 .Lmul_mulsi3; \
310 #endif /* __XTENSA_CALL0_ABI__ */
312 #endif /* no multiply hardware */
314 /* Add pp1 and pp2 into a6 with carry-out in a9. */
315 do_mul(a6, a2, l, a3, h) /* pp 1 */
316 do_mul(a11, a2, h, a3, l) /* pp 2 */
322 /* Shift the high half of a9/a6 into position in a9. Note that
323 this value can be safely incremented without any carry-outs. */
327 /* Compute the low word into a6. */
328 do_mul(a11, a2, l, a3, l) /* pp 0 */
334 /* Compute the high word into wh. */
335 do_mul(wh, a2, h, a3, h) /* pp 3 */
339 #endif /* !MUL32_HIGH */
341 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
342 /* Restore the original return address. */
345 #if __XTENSA_CALL0_ABI__
356 /* For Xtensa processors with no multiply hardware, this simplified
357 version of _mulsi3 is used for multiplying 16-bit chunks of
358 the floating-point mantissas. When using CALL0, this function
359 uses a custom ABI: the inputs are passed in a13 and a14, the
360 result is returned in a12, and a8 and a15 are clobbered. */
364 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
366 1: add \tmp1, \src2, \dst
367 extui \tmp2, \src1, 0, 1
368 movnez \dst, \tmp1, \tmp2
370 do_addx2 \tmp1, \src2, \dst, \tmp1
371 extui \tmp2, \src1, 1, 1
372 movnez \dst, \tmp1, \tmp2
374 do_addx4 \tmp1, \src2, \dst, \tmp1
375 extui \tmp2, \src1, 2, 1
376 movnez \dst, \tmp1, \tmp2
378 do_addx8 \tmp1, \src2, \dst, \tmp1
379 extui \tmp2, \src1, 3, 1
380 movnez \dst, \tmp1, \tmp2
386 #if __XTENSA_CALL0_ABI__
387 mul_mulsi3_body a12, a13, a14, a15, a8
389 /* The result will be written into a2, so save that argument in a4. */
391 mul_mulsi3_body a2, a4, a3, a5, a6
394 #endif /* XCHAL_NO_MUL */
396 .size __umulsidi3, . - __umulsidi3
398 #endif /* L_umulsidi3 */
401 /* Define a macro for the NSAU (unsigned normalize shift amount)
402 instruction, which computes the number of leading zero bits,
403 to handle cases where it is not included in the Xtensa processor
406 .macro do_nsau cnt, val, tmp, a
412 extui \tmp, \a, 16, 16
417 extui \tmp, \a, 24, 8
422 movi \tmp, __nsau_data
427 #endif /* !XCHAL_HAVE_NSA */
434 .type __nsau_data, @object
437 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
438 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
439 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
440 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
441 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
442 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
443 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
444 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
445 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
446 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
447 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
448 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
449 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
450 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
451 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
452 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
453 #endif /* !XCHAL_HAVE_NSA */
454 .size __nsau_data, . - __nsau_data
462 .type __clzsi2, @function
465 do_nsau a2, a2, a3, a4
467 .size __clzsi2, . - __clzsi2
469 #endif /* L_clzsi2 */
475 .type __ctzsi2, @function
480 do_nsau a2, a3, a4, a5
484 .size __ctzsi2, . - __ctzsi2
486 #endif /* L_ctzsi2 */
492 .type __ffssi2, @function
497 do_nsau a2, a3, a4, a5
501 .size __ffssi2, . - __ffssi2
503 #endif /* L_ffssi2 */
509 .type __udivsi3, @function
515 bltui a3, 2, .Lle_one /* check if the divisor <= 1 */
517 mov a6, a2 /* keep dividend in a6 */
518 do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */
519 do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */
520 bgeu a5, a4, .Lspecial
522 sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
524 sll a3, a3 /* divisor <<= count */
525 movi a2, 0 /* quotient = 0 */
527 /* test-subtract-and-shift loop; one quotient bit on each iteration */
529 loopnez a4, .Lloopend
530 #endif /* XCHAL_HAVE_LOOPS */
532 bltu a6, a3, .Lzerobit
538 #if !XCHAL_HAVE_LOOPS
541 #endif /* !XCHAL_HAVE_LOOPS */
544 bltu a6, a3, .Lreturn
545 addi a2, a2, 1 /* increment quotient if dividend >= divisor */
550 beqz a3, .Lerror /* if divisor == 1, return the dividend */
554 /* return dividend >= divisor */
555 bltu a6, a3, .Lreturn0
560 /* Divide by zero: Use an illegal instruction to force an exception.
561 The subsequent "DIV0" string can be recognized by the exception
562 handler to identify the real cause of the exception. */
568 #endif /* XCHAL_HAVE_DIV32 */
570 .size __udivsi3, . - __udivsi3
572 #endif /* L_udivsi3 */
578 .type __divsi3, @function
584 xor a7, a2, a3 /* sign = dividend ^ divisor */
585 do_abs a6, a2, a4 /* udividend = abs (dividend) */
586 do_abs a3, a3, a4 /* udivisor = abs (divisor) */
587 bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
588 do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */
589 do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */
590 bgeu a5, a4, .Lspecial
592 sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
594 sll a3, a3 /* udivisor <<= count */
595 movi a2, 0 /* quotient = 0 */
597 /* test-subtract-and-shift loop; one quotient bit on each iteration */
599 loopnez a4, .Lloopend
600 #endif /* XCHAL_HAVE_LOOPS */
602 bltu a6, a3, .Lzerobit
608 #if !XCHAL_HAVE_LOOPS
611 #endif /* !XCHAL_HAVE_LOOPS */
614 bltu a6, a3, .Lreturn
615 addi a2, a2, 1 /* increment if udividend >= udivisor */
618 movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */
623 neg a2, a6 /* if udivisor == 1, then return... */
624 movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */
628 bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
631 movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */
635 /* Divide by zero: Use an illegal instruction to force an exception.
636 The subsequent "DIV0" string can be recognized by the exception
637 handler to identify the real cause of the exception. */
643 #endif /* XCHAL_HAVE_DIV32 */
645 .size __divsi3, . - __divsi3
647 #endif /* L_divsi3 */
653 .type __umodsi3, @function
659 bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */
661 do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */
662 do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */
663 bgeu a5, a4, .Lspecial
665 sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
667 sll a3, a3 /* divisor <<= count */
669 /* test-subtract-and-shift loop */
671 loopnez a4, .Lloopend
672 #endif /* XCHAL_HAVE_LOOPS */
674 bltu a2, a3, .Lzerobit
678 #if !XCHAL_HAVE_LOOPS
681 #endif /* !XCHAL_HAVE_LOOPS */
685 bltu a2, a3, .Lreturn
686 sub a2, a2, a3 /* subtract once more if dividend >= divisor */
693 /* Divide by zero: Use an illegal instruction to force an exception.
694 The subsequent "DIV0" string can be recognized by the exception
695 handler to identify the real cause of the exception. */
701 #endif /* XCHAL_HAVE_DIV32 */
703 .size __umodsi3, . - __umodsi3
705 #endif /* L_umodsi3 */
711 .type __modsi3, @function
717 mov a7, a2 /* save original (signed) dividend */
718 do_abs a2, a2, a4 /* udividend = abs (dividend) */
719 do_abs a3, a3, a4 /* udivisor = abs (divisor) */
720 bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
721 do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */
722 do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */
723 bgeu a5, a4, .Lspecial
725 sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
727 sll a3, a3 /* udivisor <<= count */
729 /* test-subtract-and-shift loop */
731 loopnez a4, .Lloopend
732 #endif /* XCHAL_HAVE_LOOPS */
734 bltu a2, a3, .Lzerobit
738 #if !XCHAL_HAVE_LOOPS
741 #endif /* !XCHAL_HAVE_LOOPS */
745 bltu a2, a3, .Lreturn
746 sub a2, a2, a3 /* subtract again if udividend >= udivisor */
749 neg a2, a2 /* if (dividend < 0), return -udividend */
756 /* Divide by zero: Use an illegal instruction to force an exception.
757 The subsequent "DIV0" string can be recognized by the exception
758 handler to identify the real cause of the exception. */
764 #endif /* XCHAL_HAVE_DIV32 */
766 .size __modsi3, . - __modsi3
768 #endif /* L_modsi3 */
777 #endif /* __XTENSA_EB__ */
783 .type __ashldi3, @function
787 bgei a4, 32, .Llow_only
796 .size __ashldi3, . - __ashldi3
798 #endif /* L_ashldi3 */
804 .type __ashrdi3, @function
808 bgei a4, 32, .Lhigh_only
817 .size __ashrdi3, . - __ashrdi3
819 #endif /* L_ashrdi3 */
825 .type __lshrdi3, @function
829 bgei a4, 32, .Lhigh_only1
838 .size __lshrdi3, . - __lshrdi3
840 #endif /* L_lshrdi3 */
843 #include "ieee754-df.S"
844 #include "ieee754-sf.S"