1 /* -*- Mode: Asm -*- */
3 ;; Free Software Foundation, Inc.
4 ;; Contributed by Sean D'Epagnier (sean@depagnier.com)
5 ;; Georg-Johann Lay (avr@gjlay.de)
7 ;; This file is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by the
9 ;; Free Software Foundation; either version 3, or (at your option) any
12 ;; In addition to the permissions in the GNU General Public License, the
13 ;; Free Software Foundation gives you unlimited permission to link the
14 ;; compiled version of this file into combinations with other programs,
15 ;; and to distribute those combinations without any restriction coming
16 ;; from the use of this file. (The General Public License restrictions
17 ;; do apply in other respects; for example, they cover modification of
18 ;; the file, and distribution when not linked into a combine
21 ;; This file is distributed in the hope that it will be useful, but
22 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 ;; General Public License for more details.
26 ;; You should have received a copy of the GNU General Public License
27 ;; along with this program; see the file COPYING. If not, write to
28 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
29 ;; Boston, MA 02110-1301, USA.
31 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32 ;; Fixed point library routines for AVR
33 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
35 .section .text.libgcc.fixed, "ax", @progbits
37 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
38 ;; Conversions to float
39 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41 #if defined (L_fractqqsf)
43 ;; Move in place for SA -> SF conversion
52 #endif /* L_fractqqsf */
54 #if defined (L_fractuqqsf)
56 ;; Move in place for USA -> SF conversion
64 #endif /* L_fractuqqsf */
66 #if defined (L_fracthqsf)
68 ;; Move in place for SA -> SF conversion
77 #endif /* L_fracthqsf */
79 #if defined (L_fractuhqsf)
81 ;; Move in place for USA -> SF conversion
88 #endif /* L_fractuhqsf */
90 #if defined (L_fracthasf)
92 ;; Move in place for SA -> SF conversion
101 #endif /* L_fracthasf */
103 #if defined (L_fractuhasf)
105 ;; Move in place for USA -> SF conversion
113 #endif /* L_fractuhasf */
116 #if defined (L_fractsqsf)
119 ;; Divide non-zero results by 2^31 to move the
120 ;; decimal point into place
123 subi r24, exp_lo (31)
124 sbci r25, exp_hi (31)
127 #endif /* L_fractsqsf */
129 #if defined (L_fractusqsf)
132 ;; Divide non-zero results by 2^32 to move the
133 ;; decimal point into place
134 cpse r25, __zero_reg__
135 subi r25, exp_hi (32)
138 #endif /* L_fractusqsf */
140 #if defined (L_fractsasf)
143 ;; Divide non-zero results by 2^16 to move the
144 ;; decimal point into place
145 cpse r25, __zero_reg__
146 subi r25, exp_hi (16)
149 #endif /* L_fractsasf */
151 #if defined (L_fractusasf)
154 ;; Divide non-zero results by 2^16 to move the
155 ;; decimal point into place
156 cpse r25, __zero_reg__
157 subi r25, exp_hi (16)
160 #endif /* L_fractusasf */
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
163 ;; Conversions from float
164 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166 #if defined (L_fractsfqq)
168 ;; Multiply with 2^{24+7} to get a QQ result in r25
169 subi r24, exp_lo (-31)
170 sbci r25, exp_hi (-31)
175 #endif /* L_fractsfqq */
177 #if defined (L_fractsfuqq)
179 ;; Multiply with 2^{24+8} to get a UQQ result in r25
180 subi r25, exp_hi (-32)
185 #endif /* L_fractsfuqq */
187 #if defined (L_fractsfha)
189 ;; Multiply with 2^24 to get a HA result in r25:r24
190 subi r25, exp_hi (-24)
193 #endif /* L_fractsfha */
195 #if defined (L_fractsfuha)
197 ;; Multiply with 2^24 to get a UHA result in r25:r24
198 subi r25, exp_hi (-24)
201 #endif /* L_fractsfuha */
203 #if defined (L_fractsfhq)
208 ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
209 ;; resp. with 2^31 to get a SQ result in r25:r22
210 subi r24, exp_lo (-31)
211 sbci r25, exp_hi (-31)
214 #endif /* L_fractsfhq */
216 #if defined (L_fractsfuhq)
221 ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
222 ;; resp. with 2^32 to get a USQ result in r25:r22
223 subi r25, exp_hi (-32)
226 #endif /* L_fractsfuhq */
228 #if defined (L_fractsfsa)
230 ;; Multiply with 2^16 to get a SA result in r25:r22
231 subi r25, exp_hi (-16)
234 #endif /* L_fractsfsa */
236 #if defined (L_fractsfusa)
238 ;; Multiply with 2^16 to get a USA result in r25:r22
239 subi r25, exp_hi (-16)
242 #endif /* L_fractsfusa */
245 ;; For multiplication the functions here are called directly from
246 ;; avr-fixed.md instead of using the standard libcall mechanisms.
247 ;; This can make better code because GCC knows exactly which
248 ;; of the call-used registers (not all of them) are clobbered. */
250 /*******************************************************
251 Fractional Multiplication 8 x 8 without MUL
252 *******************************************************/
254 #if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
256 ;;; Clobbers: __tmp_reg__, R22, R24, R25
260 ;; TR 18037 requires that (-1) * (-1) does not overflow
261 ;; The only input that can produce -1 is (-1)^2.
267 #endif /* L_mulqq3 && ! HAVE_MUL */
269 /*******************************************************
270 Fractional Multiply .16 x .16 with and without MUL
271 *******************************************************/
273 #if defined (L_mulhq3)
274 ;;; Same code with and without MUL, but the interfaces differ:
275 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
276 ;;; Clobbers: ABI, called by optabs
277 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
278 ;;; Clobbers: __tmp_reg__, R22, R23
279 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
282 ;; Shift result into place
291 1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow
292 ldi r24, lo8 (0x7fff)
293 ldi r25, hi8 (0x7fff)
296 #endif /* defined (L_mulhq3) */
298 #if defined (L_muluhq3)
299 ;;; Same code with and without MUL, but the interfaces differ:
300 ;;; no MUL: (R25:R24) *= (R23:R22)
301 ;;; Clobbers: ABI, called by optabs
302 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
303 ;;; Clobbers: __tmp_reg__, R22, R23
304 ;;; Rounding: -0.5 LSB < error <= 0.5 LSB
312 #endif /* L_muluhq3 */
315 /*******************************************************
316 Fixed Multiply 8.8 x 8.8 with and without MUL
317 *******************************************************/
319 #if defined (L_mulha3)
320 ;;; Same code with and without MUL, but the interfaces differ:
321 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
322 ;;; Clobbers: ABI, called by optabs
323 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
324 ;;; Clobbers: __tmp_reg__, R22, R23
325 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
330 #endif /* L_mulha3 */
332 #if defined (L_muluha3)
333 ;;; Same code with and without MUL, but the interfaces differ:
334 ;;; no MUL: (R25:R24) *= (R23:R22)
335 ;;; Clobbers: ABI, called by optabs
336 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
337 ;;; Clobbers: __tmp_reg__, R22, R23
338 ;;; Rounding: -0.5 LSB < error <= 0.5 LSB
343 #endif /* L_muluha3 */
345 #if defined (L_muluha3_round)
346 DEFUN __muluha3_round
347 ;; Shift result into place
355 #endif /* L_muluha3_round */
358 /*******************************************************
359 Fixed Multiplication 16.16 x 16.16
360 *******************************************************/
362 #if defined (__AVR_HAVE_MUL__)
382 #if defined (L_mulusa3)
383 ;;; (C3:C0) = (A3:A0) * (B3:B0)
384 ;;; Clobbers: __tmp_reg__
385 ;;; Rounding: -0.5 LSB < error <= 0.5 LSB
387 ;; Some of the MUL instructions have LSBs outside the result.
388 ;; Don't ignore these LSBs in order to tame rounding error.
389 ;; Use C2/C3 for these LSBs.
393 mul A0, B0 $ movw C2, r0
395 mul A1, B0 $ add C3, r0 $ adc C0, r1
396 mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
402 ;; The following MULs don't have LSBs outside the result.
403 ;; C2/C3 is the high part.
405 mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2
406 mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
407 mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
410 mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3
411 mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
412 mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
413 mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
416 mul A1, B3 $ add C2, r0 $ adc C3, r1
417 mul A2, B2 $ add C2, r0 $ adc C3, r1
418 mul A3, B1 $ add C2, r0 $ adc C3, r1
420 mul A2, B3 $ add C3, r0
421 mul A3, B2 $ add C3, r0
426 #endif /* L_mulusa3 */
428 #if defined (L_mulsa3)
429 ;;; (C3:C0) = (A3:A0) * (B3:B0)
430 ;;; Clobbers: __tmp_reg__
431 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
444 #endif /* L_mulsa3 */
459 #else /* __AVR_HAVE_MUL__ */
488 #if defined (L_mulsa3)
489 ;;; (R25:R22) *= (R21:R18)
490 ;;; Clobbers: ABI, called by optabs
491 ;;; Rounding: -1 LSB <= error <= 1 LSB
497 ;; A survived in 31:30:27:26
508 #endif /* L_mulsa3 */
510 #if defined (L_mulusa3)
511 ;;; (R25:R22) *= (R21:R18)
512 ;;; Clobbers: ABI, called by optabs and __mulsua
513 ;;; Rounding: -1 LSB <= error <= 1 LSB
514 ;;; Does not clobber T and A[] survives in 26, 27, 30, 31
526 ;; Loop the integral part
528 1: ;; CC += A * 2^n; n >= 0
529 add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
532 lsl A0 $ rol A1 $ rol A2 $ rol A3
535 ;; Carry = n-th bit of B; n >= 0
542 ;; Loop the fractional part
543 ;; B2/B3 is 0 now, use as guard bits for rounding
544 ;; Restore multiplicand
549 4: ;; CC += A:Guard * 2^n; n < 0
550 add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
553 lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2
556 ;; Carry = n-th bit of B; n < 0
563 ;; Move result into place and round
578 #endif /* L_mulusa3 */
601 #endif /* __AVR_HAVE_MUL__ */
603 /*******************************************************
604 Fractional Division 8 / 8
605 *******************************************************/
607 #define r_divd r25 /* dividend */
608 #define r_quo r24 /* quotient */
609 #define r_div r22 /* divisor */
611 #if defined (L_divqq3)
620 breq __divqq3_minus1 ; if equal return -1
623 sbrc r0, 7 ; negate result if needed
630 #endif /* defined (L_divqq3) */
632 #if defined (L_udivuqq3)
634 clr r_quo ; clear quotient
635 inc __zero_reg__ ; init loop counter, used per shift
637 lsl r_divd ; shift dividend
638 brcs 0f ; dividend overflow
639 cp r_divd,r_div ; compare dividend & divisor
640 brcc 0f ; dividend >= divisor
641 rol r_quo ; shift quotient (with CARRY)
644 sub r_divd,r_div ; restore dividend
645 lsl r_quo ; shift quotient (without CARRY)
647 lsl __zero_reg__ ; shift loop-counter bit
649 com r_quo ; complement result
650 ; because C flag was complemented in loop
653 #endif /* defined (L_udivuqq3) */
660 /*******************************************************
661 Fractional Division 16 / 16
662 *******************************************************/
663 #define r_divdL 26 /* dividend Low */
664 #define r_divdH 27 /* dividend Hig */
665 #define r_quoL 24 /* quotient Low */
666 #define r_quoH 25 /* quotient High */
667 #define r_divL 22 /* divisor */
668 #define r_divH 23 /* divisor */
671 #if defined (L_divhq3)
685 breq __divhq3_minus1 ; if equal return -1
690 ;; negate result if needed
699 #endif /* defined (L_divhq3) */
701 #if defined (L_udivuhq3)
703 sub r_quoH,r_quoH ; clear quotient and carry
707 DEFUN __udivuha3_common
708 clr r_quoL ; clear quotient
709 ldi r_cnt,16 ; init loop counter
711 rol r_divdL ; shift dividend (with CARRY)
713 brcs __udivuhq3_ep ; dividend overflow
714 cp r_divdL,r_divL ; compare dividend & divisor
716 brcc __udivuhq3_ep ; dividend >= divisor
717 rol r_quoL ; shift quotient (with CARRY)
720 sub r_divdL,r_divL ; restore dividend
722 lsl r_quoL ; shift quotient (without CARRY)
724 rol r_quoH ; shift quotient
725 dec r_cnt ; decrement loop counter
727 com r_quoL ; complement result
728 com r_quoH ; because C flag was complemented in loop
730 ENDF __udivuha3_common
731 #endif /* defined (L_udivuhq3) */
733 /*******************************************************
734 Fixed Division 8.8 / 8.8
735 *******************************************************/
736 #if defined (L_divha3)
749 sbrs r0, 7 ; negate result if needed
754 #endif /* defined (L_divha3) */
756 #if defined (L_udivuha3)
761 lsl r_quoH ; shift quotient into carry
762 XJMP __udivuha3_common ; same as fractional after rearrange
764 #endif /* defined (L_udivuha3) */
774 /*******************************************************
775 Fixed Division 16.16 / 16.16
776 *******************************************************/
778 #define r_arg1L 24 /* arg1 gets passed already in place */
782 #define r_divdL 26 /* dividend Low */
785 #define r_divdHH 31 /* dividend High */
786 #define r_quoL 22 /* quotient Low */
789 #define r_quoHH 25 /* quotient High */
790 #define r_divL 18 /* divisor Low */
793 #define r_divHH 21 /* divisor High */
794 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
796 #if defined (L_divsa3)
809 sbrs r0, 7 ; negate result if needed
814 #endif /* defined (L_divsa3) */
816 #if defined (L_udivusa3)
818 ldi r_divdHL, 32 ; init loop counter
822 wmov r_quoL, r_divdHL
823 lsl r_quoHL ; shift quotient into carry
826 rol r_divdL ; shift dividend (with CARRY)
830 brcs __udivusa3_ep ; dividend overflow
831 cp r_divdL,r_divL ; compare dividend & divisor
835 brcc __udivusa3_ep ; dividend >= divisor
836 rol r_quoL ; shift quotient (with CARRY)
839 sub r_divdL,r_divL ; restore dividend
843 lsl r_quoL ; shift quotient (without CARRY)
845 rol r_quoH ; shift quotient
848 dec r_cnt ; decrement loop counter
850 com r_quoL ; complement result
851 com r_quoH ; because C flag was complemented in loop
856 #endif /* defined (L_udivusa3) */
877 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
878 ;; Saturation, 2 Bytes
879 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
881 ;; First Argument and Return Register
885 #if defined (L_ssneg_2)
892 #endif /* L_ssneg_2 */
894 #if defined (L_ssabs_2)
900 #endif /* L_ssabs_2 */
907 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
908 ;; Saturation, 4 Bytes
909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
911 ;; First Argument and Return Register
917 #if defined (L_ssneg_4)
927 #endif /* L_ssneg_4 */
929 #if defined (L_ssabs_4)
935 #endif /* L_ssabs_4 */
944 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
945 ;; Saturation, 8 Bytes
946 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
948 ;; First Argument and Return Register
958 #if defined (L_clr_8)
963 ;; Clear Carry and all Bytes
965 ;; Clear Carry and set Z
969 ;; Propagate Carry to all Bytes, Carry unaltered
980 #if defined (L_ssneg_8)
994 #endif /* L_ssneg_8 */
996 #if defined (L_ssabs_8)
1006 #endif /* L_ssabs_8 */
1018 #if defined (L_usadd_8)
1030 #endif /* L_usadd_8 */
1032 #if defined (L_ussub_8)
1044 #endif /* L_ussub_8 */
1046 #if defined (L_ssadd_8)
1052 ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
1060 #endif /* L_ssadd_8 */
1062 #if defined (L_sssub_8)
1070 ;; A = (B < 0) ? INT64_MAX : INT64_MIN
1077 #endif /* L_sssub_8 */