1 /* -*- Mode: Asm -*- */
2 ;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
3 ;; Contributed by Sean D'Epagnier (sean@depagnier.com)
4 ;; Georg-Johann Lay (avr@gjlay.de)
6 ;; This file is free software; you can redistribute it and/or modify it
7 ;; under the terms of the GNU General Public License as published by the
8 ;; Free Software Foundation; either version 3, or (at your option) any
11 ;; In addition to the permissions in the GNU General Public License, the
12 ;; Free Software Foundation gives you unlimited permission to link the
13 ;; compiled version of this file into combinations with other programs,
14 ;; and to distribute those combinations without any restriction coming
15 ;; from the use of this file. (The General Public License restrictions
16 ;; do apply in other respects; for example, they cover modification of
17 ;; the file, and distribution when not linked into a combine
20 ;; This file is distributed in the hope that it will be useful, but
21 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 ;; General Public License for more details.
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with this program; see the file COPYING. If not, write to
27 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
28 ;; Boston, MA 02110-1301, USA.
30 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31 ;; Fixed point library routines for AVR
32 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34 .section .text.libgcc.fixed, "ax", @progbits
36 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
37 ;; Conversions to float
38 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
40 #if defined (L_fractqqsf)
42 ;; Move in place for SA -> SF conversion
51 #endif /* L_fractqqsf */
53 #if defined (L_fractuqqsf)
55 ;; Move in place for USA -> SF conversion
63 #endif /* L_fractuqqsf */
65 #if defined (L_fracthqsf)
67 ;; Move in place for SA -> SF conversion
75 #endif /* L_fracthqsf */
77 #if defined (L_fractuhqsf)
79 ;; Move in place for USA -> SF conversion
86 #endif /* L_fractuhqsf */
88 #if defined (L_fracthasf)
90 ;; Move in place for SA -> SF conversion
99 #endif /* L_fracthasf */
101 #if defined (L_fractuhasf)
103 ;; Move in place for USA -> SF conversion
111 #endif /* L_fractuhasf */
114 #if defined (L_fractsqsf)
117 ;; Divide non-zero results by 2^31 to move the
118 ;; decimal point into place
121 subi r24, exp_lo (31)
122 sbci r25, exp_hi (31)
125 #endif /* L_fractsqsf */
127 #if defined (L_fractusqsf)
130 ;; Divide non-zero results by 2^32 to move the
131 ;; decimal point into place
132 cpse r25, __zero_reg__
133 subi r25, exp_hi (32)
136 #endif /* L_fractusqsf */
138 #if defined (L_fractsasf)
141 ;; Divide non-zero results by 2^15 to move the
142 ;; decimal point into place
145 subi r24, exp_lo (15)
146 sbci r25, exp_hi (15)
149 #endif /* L_fractsasf */
151 #if defined (L_fractusasf)
154 ;; Divide non-zero results by 2^16 to move the
155 ;; decimal point into place
156 cpse r25, __zero_reg__
157 subi r25, exp_hi (16)
160 #endif /* L_fractusasf */
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
163 ;; Conversions from float
164 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166 #if defined (L_fractsfqq)
168 ;; Multiply with 2^{24+7} to get a QQ result in r25
169 subi r24, exp_lo (-31)
170 sbci r25, exp_hi (-31)
175 #endif /* L_fractsfqq */
177 #if defined (L_fractsfuqq)
179 ;; Multiply with 2^{24+8} to get a UQQ result in r25
180 subi r25, exp_hi (-32)
185 #endif /* L_fractsfuqq */
187 #if defined (L_fractsfha)
189 ;; Multiply with 2^{16+7} to get a HA result in r25:r24
190 subi r24, exp_lo (-23)
191 sbci r25, exp_hi (-23)
194 #endif /* L_fractsfha */
196 #if defined (L_fractsfuha)
198 ;; Multiply with 2^24 to get a UHA result in r25:r24
199 subi r25, exp_hi (-24)
202 #endif /* L_fractsfuha */
204 #if defined (L_fractsfhq)
208 ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
209 ;; resp. with 2^31 to get a SQ result in r25:r22
210 subi r24, exp_lo (-31)
211 sbci r25, exp_hi (-31)
214 #endif /* L_fractsfhq */
216 #if defined (L_fractsfuhq)
220 ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
221 ;; resp. with 2^32 to get a USQ result in r25:r22
222 subi r25, exp_hi (-32)
225 #endif /* L_fractsfuhq */
227 #if defined (L_fractsfsa)
229 ;; Multiply with 2^15 to get a SA result in r25:r22
230 subi r24, exp_lo (-15)
231 sbci r25, exp_hi (-15)
234 #endif /* L_fractsfsa */
236 #if defined (L_fractsfusa)
238 ;; Multiply with 2^16 to get a USA result in r25:r22
239 subi r25, exp_hi (-16)
242 #endif /* L_fractsfusa */
245 ;; For multiplication the functions here are called directly from
246 ;; avr-fixed.md instead of using the standard libcall mechanisms.
247 ;; This can make better code because GCC knows exactly which
248 ;; of the call-used registers (not all of them) are clobbered. */
250 /*******************************************************
251 Fractional Multiplication 8 x 8 without MUL
252 *******************************************************/
254 #if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
256 ;;; Clobbers: __tmp_reg__, R22, R24, R25
260 ;; TR 18037 requires that (-1) * (-1) does not overflow
261 ;; The only input that can produce -1 is (-1)^2.
267 #endif /* L_mulqq3 && ! HAVE_MUL */
269 /*******************************************************
270 Fractional Multiply .16 x .16 with and without MUL
271 *******************************************************/
273 #if defined (L_mulhq3)
274 ;;; Same code with and without MUL, but the interfaces differ:
275 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
276 ;;; Clobbers: ABI, called by optabs
277 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
278 ;;; Clobbers: __tmp_reg__, R22, R23
279 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
282 ;; Shift result into place
291 1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow
292 ldi r24, lo8 (0x7fff)
293 ldi r25, hi8 (0x7fff)
296 #endif /* defined (L_mulhq3) */
298 #if defined (L_muluhq3)
299 ;;; Same code with and without MUL, but the interfaces differ:
300 ;;; no MUL: (R25:R24) *= (R23:R22)
301 ;;; Clobbers: ABI, called by optabs
302 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
303 ;;; Clobbers: __tmp_reg__, R22, R23
304 ;;; Rounding: -0.5 LSB < error <= 0.5 LSB
312 #endif /* L_muluhq3 */
315 /*******************************************************
316 Fixed Multiply 8.8 x 8.8 with and without MUL
317 *******************************************************/
319 #if defined (L_mulha3)
320 ;;; Same code with and without MUL, but the interfaces differ:
321 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
322 ;;; Clobbers: ABI, called by optabs
323 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
324 ;;; Clobbers: __tmp_reg__, R22, R23
325 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
333 #endif /* L_mulha3 */
335 #if defined (L_muluha3)
336 ;;; Same code with and without MUL, but the interfaces differ:
337 ;;; no MUL: (R25:R24) *= (R23:R22)
338 ;;; Clobbers: ABI, called by optabs
339 ;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
340 ;;; Clobbers: __tmp_reg__, R22, R23
341 ;;; Rounding: -0.5 LSB < error <= 0.5 LSB
346 #endif /* L_muluha3 */
348 #if defined (L_muluha3_round)
349 DEFUN __muluha3_round
350 ;; Shift result into place
358 #endif /* L_muluha3_round */
361 /*******************************************************
362 Fixed Multiplication 16.16 x 16.16
363 *******************************************************/
365 ;; Bits outside the result (below LSB), used in the signed version
366 #define GUARD __tmp_reg__
368 #if defined (__AVR_HAVE_MUL__)
388 #if defined (L_mulusa3)
389 ;;; (C3:C0) = (A3:A0) * (B3:B0)
395 ;;; Round for last digit iff T = 1
396 ;;; Return guard bits in GUARD (__tmp_reg__).
397 ;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB
398 ;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB
399 DEFUN __mulusa3_round
400 ;; Some of the MUL instructions have LSBs outside the result.
401 ;; Don't ignore these LSBs in order to tame rounding error.
402 ;; Use C2/C3 for these LSBs.
406 mul A0, B0 $ movw C2, r0
408 mul A1, B0 $ add C3, r0 $ adc C0, r1
409 mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
411 ;; Round if T = 1. Store guarding bits outside the result for rounding
412 ;; and left-shift by the signed version (function below).
418 ;; The following MULs don't have LSBs outside the result.
419 ;; C2/C3 is the high part.
421 mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2
422 mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
423 mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
426 mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3
427 mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
428 mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
429 mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
432 mul A1, B3 $ add C2, r0 $ adc C3, r1
433 mul A2, B2 $ add C2, r0 $ adc C3, r1
434 mul A3, B1 $ add C2, r0 $ adc C3, r1
436 mul A2, B3 $ add C3, r0
437 mul A3, B2 $ add C3, r0
439 ;; Guard bits used in the signed version below.
444 #endif /* L_mulusa3 */
446 #if defined (L_mulsa3)
447 ;;; (C3:C0) = (A3:A0) * (B3:B0)
448 ;;; Clobbers: __tmp_reg__, T
449 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
452 XCALL __mulusa3_round
453 ;; A posteriori sign extension of the operands
463 ;; Shift 1 bit left to adjust for 15 fractional bits
477 #endif /* L_mulsa3 */
492 #else /* __AVR_HAVE_MUL__ */
521 #if defined (L_mulsa3)
522 ;;; (R25:R22) *= (R21:R18)
523 ;;; Clobbers: ABI, called by optabs
524 ;;; Rounding: -1 LSB <= error <= 1 LSB
530 XCALL __mulusa3_round
535 ;; A1, A0 survived in R27:R26
542 ;; sign-extend A. A3 survived in R31
548 ;; Shift 1 bit left to adjust for 15 fractional bits
562 #endif /* L_mulsa3 */
564 #if defined (L_mulusa3)
565 ;;; (R25:R22) *= (R21:R18)
566 ;;; Clobbers: ABI, called by optabs
567 ;;; Rounding: -1 LSB <= error <= 1 LSB
573 ;;; A[] survives in 26, 27, 30, 31
574 ;;; Also used by __mulsa3 with T = 0
576 ;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
577 DEFUN __mulusa3_round
588 ;; Loop the integral part
590 1: ;; CC += A * 2^n; n >= 0
591 add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
594 lsl A0 $ rol A1 $ rol A2 $ rol A3
597 ;; Carry = n-th bit of B; n >= 0
604 ;; Loop the fractional part
605 ;; B2/B3 is 0 now, use as guard bits for rounding
606 ;; Restore multiplicand
611 4: ;; CC += A:Guard * 2^n; n < 0
612 add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
615 lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2
618 ;; Carry = n-th bit of B; n < 0
625 ;; Save guard bits and set carry for rounding
628 ;; Move result into place
645 #endif /* L_mulusa3 */
668 #endif /* __AVR_HAVE_MUL__ */
672 /***********************************************************
673 Fixed unsigned saturated Multiplication 8.8 x 8.8
674 ***********************************************************/
680 #define SS __tmp_reg__
682 #if defined (L_usmuluha3)
685 #ifdef __AVR_HAVE_MUL__
689 #endif /* HAVE MUL */
693 ;; Round, target is in C1..C2
698 ;; Move result into place
708 #endif /* L_usmuluha3 */
710 /***********************************************************
711 Fixed signed saturated Multiplication s8.7 x s8.7
712 ***********************************************************/
714 #if defined (L_ssmulha3)
717 #ifdef __AVR_HAVE_MUL__
721 #endif /* HAVE MUL */
723 ;; Adjust decimal point
728 ;; The 9 MSBs must be the same
738 ;; Move result into place
747 ;; C3 >= 0 --> 0x7fff
750 ;; Load min / max value:
751 ;; SS = -1 --> 0x8000
759 #endif /* L_ssmulha3 */
767 /***********************************************************
768 Fixed unsigned saturated Multiplication 16.16 x 16.16
769 ***********************************************************/
779 #define SS __tmp_reg__
781 #if defined (L_usmulusa3)
782 ;; R22[4] = R22[4] *{ssat} R18[4]
783 ;; Ordinary ABI function
789 ;; Round, target is in C2..C5
796 ;; Move result into place
807 #endif /* L_usmulusa3 */
809 /***********************************************************
810 Fixed signed saturated Multiplication s16.15 x s16.15
811 ***********************************************************/
813 #if defined (L_ssmulsa3)
814 ;; R22[4] = R22[4] *{ssat} R18[4]
815 ;; Ordinary ABI function
819 ;; Adjust decimal point
826 ;; The 17 MSBs must be the same
840 ;; Move result into place
849 ;; C7 < 0 --> 0x80000000
850 ;; C7 >= 0 --> 0x7fffffff
854 ;; Load min / max value:
855 ;; SS = -1 --> 0x80000000
856 ;; SS = 0 --> 0x7fffffff
864 #endif /* L_ssmulsa3 */
876 /*******************************************************
877 Fractional Division 8 / 8
878 *******************************************************/
880 #define r_divd r25 /* dividend */
881 #define r_quo r24 /* quotient */
882 #define r_div r22 /* divisor */
883 #define r_sign __tmp_reg__
885 #if defined (L_divqq3)
895 sbrc r_sign, 7 ; negate result if needed
899 #endif /* L_divqq3 */
901 #if defined (L_udivuqq3)
906 ;; Result is out of [0, 1) ==> Return 1 - eps.
910 #endif /* L_udivuqq3 */
913 #if defined (L_divqq_helper)
915 clr r_quo ; clear quotient
916 inc __zero_reg__ ; init loop counter, used per shift
918 lsl r_divd ; shift dividend
919 brcs 0f ; dividend overflow
920 cp r_divd,r_div ; compare dividend & divisor
921 brcc 0f ; dividend >= divisor
922 rol r_quo ; shift quotient (with CARRY)
925 sub r_divd,r_div ; restore dividend
926 lsl r_quo ; shift quotient (without CARRY)
928 lsl __zero_reg__ ; shift loop-counter bit
930 com r_quo ; complement result
931 ; because C flag was complemented in loop
934 #endif /* L_divqq_helper */
942 /*******************************************************
943 Fractional Division 16 / 16
944 *******************************************************/
945 #define r_divdL 26 /* dividend Low */
946 #define r_divdH 27 /* dividend Hig */
947 #define r_quoL 24 /* quotient Low */
948 #define r_quoH 25 /* quotient High */
949 #define r_divL 22 /* divisor */
950 #define r_divH 23 /* divisor */
953 #if defined (L_divhq3)
967 breq __divhq3_minus1 ; if equal return -1
972 ;; negate result if needed
981 #endif /* defined (L_divhq3) */
983 #if defined (L_udivuhq3)
985 sub r_quoH,r_quoH ; clear quotient and carry
989 DEFUN __udivuha3_common
990 clr r_quoL ; clear quotient
991 ldi r_cnt,16 ; init loop counter
993 rol r_divdL ; shift dividend (with CARRY)
995 brcs __udivuhq3_ep ; dividend overflow
996 cp r_divdL,r_divL ; compare dividend & divisor
998 brcc __udivuhq3_ep ; dividend >= divisor
999 rol r_quoL ; shift quotient (with CARRY)
1000 rjmp __udivuhq3_cont
1002 sub r_divdL,r_divL ; restore dividend
1004 lsl r_quoL ; shift quotient (without CARRY)
1006 rol r_quoH ; shift quotient
1007 dec r_cnt ; decrement loop counter
1008 brne __udivuhq3_loop
1009 com r_quoL ; complement result
1010 com r_quoH ; because C flag was complemented in loop
1012 ENDF __udivuha3_common
1013 #endif /* defined (L_udivuhq3) */
1015 /*******************************************************
1016 Fixed Division 8.8 / 8.8
1017 *******************************************************/
1018 #if defined (L_divha3)
1031 lsr r_quoH ; adjust to 7 fractional bits
1033 sbrs r0, 7 ; negate result if needed
1038 #endif /* defined (L_divha3) */
1040 #if defined (L_udivuha3)
1043 mov r_divdL, r_divdH
1045 lsl r_quoH ; shift quotient into carry
1046 XJMP __udivuha3_common ; same as fractional after rearrange
1048 #endif /* defined (L_udivuha3) */
1058 /*******************************************************
1059 Fixed Division 16.16 / 16.16
1060 *******************************************************/
1062 #define r_arg1L 24 /* arg1 gets passed already in place */
1066 #define r_divdL 26 /* dividend Low */
1069 #define r_divdHH 31 /* dividend High */
1070 #define r_quoL 22 /* quotient Low */
1073 #define r_quoHH 25 /* quotient High */
1074 #define r_divL 18 /* divisor Low */
1077 #define r_divHH 21 /* divisor High */
1078 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1080 #if defined (L_divsa3)
1093 lsr r_quoHH ; adjust to 15 fractional bits
1097 sbrs r0, 7 ; negate result if needed
1102 #endif /* defined (L_divsa3) */
1104 #if defined (L_udivusa3)
1106 ldi r_divdHL, 32 ; init loop counter
1110 wmov r_quoL, r_divdHL
1111 lsl r_quoHL ; shift quotient into carry
1114 rol r_divdL ; shift dividend (with CARRY)
1118 brcs __udivusa3_ep ; dividend overflow
1119 cp r_divdL,r_divL ; compare dividend & divisor
1121 cpc r_divdHL,r_divHL
1122 cpc r_divdHH,r_divHH
1123 brcc __udivusa3_ep ; dividend >= divisor
1124 rol r_quoL ; shift quotient (with CARRY)
1125 rjmp __udivusa3_cont
1127 sub r_divdL,r_divL ; restore dividend
1129 sbc r_divdHL,r_divHL
1130 sbc r_divdHH,r_divHH
1131 lsl r_quoL ; shift quotient (without CARRY)
1133 rol r_quoH ; shift quotient
1136 dec r_cnt ; decrement loop counter
1137 brne __udivusa3_loop
1138 com r_quoL ; complement result
1139 com r_quoH ; because C flag was complemented in loop
1144 #endif /* defined (L_udivusa3) */
1165 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1166 ;; Saturation, 1 Byte
1167 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1169 ;; First Argument and Return Register
1172 #if defined (L_ssabs_1)
1181 #endif /* L_ssabs_1 */
1187 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1188 ;; Saturation, 2 Bytes
1189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1191 ;; First Argument and Return Register
1195 #if defined (L_ssneg_2)
1202 #endif /* L_ssneg_2 */
1204 #if defined (L_ssabs_2)
1210 #endif /* L_ssabs_2 */
1217 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1218 ;; Saturation, 4 Bytes
1219 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1221 ;; First Argument and Return Register
1227 #if defined (L_ssneg_4)
1237 #endif /* L_ssneg_4 */
1239 #if defined (L_ssabs_4)
1245 #endif /* L_ssabs_4 */
1254 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1255 ;; Saturation, 8 Bytes
1256 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1258 ;; First Argument and Return Register
1268 #if defined (L_clr_8)
1273 ;; Clear Carry and all Bytes
1275 ;; Clear Carry and set Z
1279 ;; Propagate Carry to all Bytes, Carry unaltered
1288 #endif /* L_clr_8 */
1290 #if defined (L_ssneg_8)
1304 #endif /* L_ssneg_8 */
1306 #if defined (L_ssabs_8)
1316 #endif /* L_ssabs_8 */
1328 #if defined (L_usadd_8)
1337 0: ;; A[] = 0xffffffff
1340 #endif /* L_usadd_8 */
1342 #if defined (L_ussub_8)
1354 #endif /* L_ussub_8 */
1356 #if defined (L_ssadd_8)
1364 ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
1370 #endif /* L_ssadd_8 */
1372 #if defined (L_sssub_8)
1380 ;; A = (B < 0) ? INT64_MAX : INT64_MIN
1387 #endif /* L_sssub_8 */
1407 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1409 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1416 ;; R25 = 1 << (R24 & 7)
1417 ;; CC = 1 << (AA & 7)
1435 #endif /* L_mask1 */
1437 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1439 ;; The rounding point. Any bits smaller than
1440 ;; 2^{-RP} will be cleared.
1449 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1451 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1455 ;; R24 = round (R22, R24)
1456 ;; Clobbers: R22, __tmp_reg__
1459 subi RP, __QQ_FBIT__ - 1
1461 ;; R25 = 1 << RP (Total offset is FBIT-1 - RP)
1464 ;; Add-Saturate 2^{-RP-1}
1469 0: ;; Mask out bits beyond RP
1473 9: mov C1, __tmp_reg__
1476 #endif /* L_roundqq3 */
1480 ;; R24 = round (R22, R24)
1481 ;; Clobbers: R22, __tmp_reg__
1484 subi RP, __UQQ_FBIT__ - 1
1486 ;; R25 = 1 << RP (Total offset is FBIT-1 - RP)
1489 ;; Add-Saturate 2^{-RP-1}
1494 0: ;; Mask out bits beyond RP
1498 9: mov C1, __tmp_reg__
1501 #endif /* L_rounduqq3 */
1503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1504 ;; Rounding, 2 Bytes
1505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1509 ;; [ R25:R24 = 1 << (R24 & 15)
1510 ;; R23:R22 += 1 << (R24 & 15) ]
1511 ;; SREG is set according to the addition
1513 ;; R25 = 1 << (R24 & 7)
1517 ;; Swap C0 and C1 if RP.3 was set
1520 ;; Finally, add the power-of-two: A[] += C[]
1525 #endif /* L_addmask_2 */
1529 ;; R25:R24 = round (R23:R22, R24)
1530 ;; Clobbers: R23, R22
1532 subi RP, __HQ_FBIT__ - __HA_FBIT__
1535 subi RP, __HA_FBIT__ - 1
1537 ;; [ R25:R24 = 1 << (FBIT-1 - RP)
1538 ;; R23:R22 += 1 << (FBIT-1 - RP) ]
1540 XJMP __round_s2_const
1543 #endif /* L_round_s2 */
1547 ;; R25:R24 = round (R23:R22, R24)
1548 ;; Clobbers: R23, R22
1550 subi RP, __UHQ_FBIT__ - __UHA_FBIT__
1553 subi RP, __UHA_FBIT__ - 1
1555 ;; [ R25:R24 = 1 << (FBIT-1 - RP)
1556 ;; R23:R22 += 1 << (FBIT-1 - RP) ]
1558 XJMP __round_u2_const
1561 #endif /* L_round_u2 */
1564 #ifdef L_round_2_const
1566 ;; Helpers for 2 byte wide rounding
1568 DEFUN __round_s2_const
1572 ;; FALLTHRU (Barrier)
1573 ENDF __round_s2_const
1575 DEFUN __round_u2_const
1582 ;; Saturation is performed now.
1583 ;; Currently, we have C[] = 2^{-RP-1}
1589 ;; Clear the bits beyond the rounding point.
1593 ENDF __round_u2_const
1595 #endif /* L_round_2_const */
1602 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1603 ;; Rounding, 4 Bytes
1604 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1618 ;; [ R25:R22 = 1 << (R24 & 31)
1619 ;; R21:R18 += 1 << (R24 & 31) ]
1620 ;; SREG is set according to the addition
1622 ;; R25 = 1 << (R24 & 7)
1627 ;; Swap C2 with C3 if RP.3 is not set
1632 ;; Swap C3:C2 with C1:C0 if RP.4 is not set
1633 and C0, C2 $ eor C2, C0
1634 and C1, C3 $ eor C3, C1
1635 ;; Finally, add the power-of-two: A[] += C[]
1642 #endif /* L_addmask_4 */
1646 ;; R25:R22 = round (R21:R18, R24)
1647 ;; Clobbers: R18...R21
1649 subi RP, __SQ_FBIT__ - __SA_FBIT__
1652 subi RP, __SA_FBIT__ - 1
1654 ;; [ R25:R22 = 1 << (FBIT-1 - RP)
1655 ;; R21:R18 += 1 << (FBIT-1 - RP) ]
1657 XJMP __round_s4_const
1660 #endif /* L_round_s4 */
1664 ;; R25:R22 = round (R21:R18, R24)
1665 ;; Clobbers: R18...R21
1667 subi RP, __USQ_FBIT__ - __USA_FBIT__
1670 subi RP, __USA_FBIT__ - 1
1672 ;; [ R25:R22 = 1 << (FBIT-1 - RP)
1673 ;; R21:R18 += 1 << (FBIT-1 - RP) ]
1675 XJMP __round_u4_const
1678 #endif /* L_round_u4 */
1681 #ifdef L_round_4_const
1683 ;; Helpers for 4 byte wide rounding
1685 DEFUN __round_s4_const
1689 ;; FALLTHRU (Barrier)
1690 ENDF __round_s4_const
1692 DEFUN __round_u4_const
1701 ;; Saturation is performed now.
1702 ;; Currently, we have C[] = 2^{-RP-1}
1709 ;; Clear the bits beyond the rounding point.
1715 ENDF __round_u4_const
1717 #endif /* L_round_4_const */
1730 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1731 ;; Rounding, 8 Bytes
1732 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1757 ;; R25:R18 = round (R25:R18, R16)
1760 ldi FBITm1, __DQ_FBIT__ - 1
1764 #endif /* L_rounddq3 */
1767 ;; R25:R18 = round (R25:R18, R16)
1770 ldi FBITm1, __UDQ_FBIT__ - 1
1774 #endif /* L_roundudq3 */
1777 ;; R25:R18 = round (R25:R18, R16)
1780 ldi FBITm1, __DA_FBIT__ - 1
1784 #endif /* L_roundda3 */
1787 ;; R25:R18 = round (R25:R18, R16)
1790 ldi FBITm1, __UDA_FBIT__ - 1
1794 #endif /* L_rounduda3 */
1797 ;; R25:R18 = round (R25:R18, R16)
1800 ldi FBITm1, __TA_FBIT__ - 1
1804 #endif /* L_roundta3 */
1807 ;; R25:R18 = round (R25:R18, R16)
1810 ldi FBITm1, __UTA_FBIT__ - 1
1814 #endif /* L_rounduta3 */
1823 ;; Compute log2 of addend from rounding point
1826 ;; Move input to work register A[]
1832 ;; C[] = 1 << (FBIT-1 - RP)
1849 ;; Signed overflow: A[] = 0x7f...
1853 ;; Unsigned overflow: A[] = 0xff...
1868 ;; Clear the bits beyond the rounding point.
1885 #endif /* L_round_x8 */
1909 ;; Supply implementations / symbols for the bit-banging functions
1910 ;; __builtin_avr_bitsfx and __builtin_avr_fxbits