1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Denis Chertykov <chertykov@gmail.com>
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 #define __zero_reg__ r1
26 #define __tmp_reg__ r0
30 #define __RAMPZ__ 0x3B
33 /* Most of the functions here are called directly from avr.md
34 patterns, instead of using the standard libcall mechanisms.
35 This can make better code because GCC knows exactly which
36 of the call-used registers (not all of them) are clobbered. */
38 /* FIXME: At present, there is no SORT directive in the linker
39 script so that we must not assume that different modules
40 in the same input section like .libgcc.text.mul will be
41 located close together. Therefore, we cannot use
42 RCALL/RJMP to call a function like __udivmodhi4 from
43 __divmodhi4 and have to use lengthy XCALL/XJMP even
44 though they are in the same input section and all same
45 input sections together are small enough to reach every
46 location with a RCALL/RJMP instruction. */
48 .macro mov_l r_dest, r_src
49 #if defined (__AVR_HAVE_MOVW__)
56 .macro mov_h r_dest, r_src
57 #if defined (__AVR_HAVE_MOVW__)
64 .macro wmov r_dest, r_src
65 #if defined (__AVR_HAVE_MOVW__)
69 mov \r_dest+1, \r_src+1
73 #if defined (__AVR_HAVE_JMP_CALL__)
93 .section .text.libgcc.mul, "ax", @progbits
95 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
96 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
97 #if !defined (__AVR_HAVE_MUL__)
98 /*******************************************************
99 Multiplication 8 x 8 without MUL
100 *******************************************************/
101 #if defined (L_mulqi3)
103 #define r_arg2 r22 /* multiplicand */
104 #define r_arg1 r24 /* multiplier */
105 #define r_res __tmp_reg__ /* result */
108 clr r_res ; clear result
112 add r_arg2,r_arg2 ; shift multiplicand
113 breq __mulqi3_exit ; while multiplicand != 0
115 brne __mulqi3_loop ; exit if multiplier = 0
117 mov r_arg1,r_res ; result to return register
125 #endif /* defined (L_mulqi3) */
127 #if defined (L_mulqihi3)
137 #endif /* defined (L_mulqihi3) */
139 #if defined (L_umulqihi3)
145 #endif /* defined (L_umulqihi3) */
147 /*******************************************************
148 Multiplication 16 x 16 without MUL
149 *******************************************************/
150 #if defined (L_mulhi3)
151 #define r_arg1L r24 /* multiplier Low */
152 #define r_arg1H r25 /* multiplier High */
153 #define r_arg2L r22 /* multiplicand Low */
154 #define r_arg2H r23 /* multiplicand High */
155 #define r_resL __tmp_reg__ /* result Low */
156 #define r_resH r21 /* result High */
159 clr r_resH ; clear result
160 clr r_resL ; clear result
164 add r_resL,r_arg2L ; result + multiplicand
167 add r_arg2L,r_arg2L ; shift multiplicand
170 cp r_arg2L,__zero_reg__
171 cpc r_arg2H,__zero_reg__
172 breq __mulhi3_exit ; while multiplicand != 0
174 lsr r_arg1H ; gets LSB of multiplier
177 brne __mulhi3_loop ; exit if multiplier = 0
179 mov r_arg1H,r_resH ; result to return register
191 #endif /* defined (L_mulhi3) */
193 /*******************************************************
194 Widening Multiplication 32 = 16 x 16 without MUL
195 *******************************************************/
197 #if defined (L_mulhisi3)
199 ;;; FIXME: This is dead code (noone calls it)
212 #endif /* defined (L_mulhisi3) */
214 #if defined (L_umulhisi3)
216 ;;; FIXME: This is dead code (noone calls it)
225 #endif /* defined (L_umulhisi3) */
227 #if defined (L_mulsi3)
228 /*******************************************************
229 Multiplication 32 x 32 without MUL
230 *******************************************************/
231 #define r_arg1L r22 /* multiplier Low */
234 #define r_arg1HH r25 /* multiplier High */
236 #define r_arg2L r18 /* multiplicand Low */
239 #define r_arg2HH r21 /* multiplicand High */
241 #define r_resL r26 /* result Low */
244 #define r_resHH r31 /* result High */
247 clr r_resHH ; clear result
248 clr r_resHL ; clear result
249 clr r_resH ; clear result
250 clr r_resL ; clear result
254 add r_resL,r_arg2L ; result + multiplicand
259 add r_arg2L,r_arg2L ; shift multiplicand
261 adc r_arg2HL,r_arg2HL
262 adc r_arg2HH,r_arg2HH
264 lsr r_arg1HH ; gets LSB of multiplier
271 brne __mulsi3_loop ; exit if multiplier = 0
273 mov_h r_arg1HH,r_resHH ; result to return register
274 mov_l r_arg1HL,r_resHL
295 #endif /* defined (L_mulsi3) */
297 #endif /* !defined (__AVR_HAVE_MUL__) */
298 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
300 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
301 #if defined (__AVR_HAVE_MUL__)
316 /*******************************************************
317 Widening Multiplication 32 = 16 x 16
318 *******************************************************/
320 #if defined (L_mulhisi3)
321 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
322 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
323 ;;; Clobbers: __tmp_reg__
332 XJMP __usmulhisi3_tail
334 #endif /* L_mulhisi3 */
336 #if defined (L_usmulhisi3)
337 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
338 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
339 ;;; Clobbers: __tmp_reg__
345 DEFUN __usmulhisi3_tail
352 ENDF __usmulhisi3_tail
353 #endif /* L_usmulhisi3 */
355 #if defined (L_umulhisi3)
356 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
357 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
358 ;;; Clobbers: __tmp_reg__
373 #endif /* L_umulhisi3 */
375 /*******************************************************
376 Widening Multiplication 32 = 16 x 32
377 *******************************************************/
379 #if defined (L_mulshisi3)
380 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
381 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
382 ;;; Clobbers: __tmp_reg__
384 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
385 ;; Some cores have problem skipping 2-word instruction
390 #endif /* __AVR_HAVE_JMP_CALL__ */
395 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
396 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
397 ;;; Clobbers: __tmp_reg__
400 ;; One-extend R27:R26 (A1:A0)
405 #endif /* L_mulshisi3 */
407 #if defined (L_muluhisi3)
408 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
409 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
410 ;;; Clobbers: __tmp_reg__
423 #endif /* L_muluhisi3 */
425 /*******************************************************
426 Multiplication 32 x 32
427 *******************************************************/
429 #if defined (L_mulsi3)
430 ;;; R25:R22 = R25:R22 * R21:R18
431 ;;; (C3:C0) = C3:C0 * B3:B0
432 ;;; Clobbers: R26, R27, __tmp_reg__
440 ;; A1:A0 now contains the high word of A
451 #endif /* L_mulsi3 */
466 #endif /* __AVR_HAVE_MUL__ */
468 /*******************************************************
469 Multiplication 24 x 24
470 *******************************************************/
472 #if defined (L_mulpsi3)
474 ;; A[0..2]: In: Multiplicand; Out: Product
479 ;; B[0..2]: In: Multiplier
484 #if defined (__AVR_HAVE_MUL__)
486 ;; C[0..2]: Expand Result
491 ;; R24:R22 *= R20:R18
492 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
501 mul AA2, B0 $ add C2, r0
502 mul AA0, B2 $ add C2, r0
514 #else /* !HAVE_MUL */
516 ;; C[0..2]: Expand Result
521 ;; R24:R22 *= R20:R18
522 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
530 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
531 LSR B2 $ ror B1 $ ror B0
533 ;; If the N-th Bit of B[] was set...
536 ;; ...then add A[] * 2^N to the Result C[]
537 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
539 1: ;; Multiply A[] by 2
540 LSL A0 $ rol A1 $ rol A2
542 ;; Loop until B[] is 0
543 subi B0,0 $ sbci B1,0 $ sbci B2,0
546 ;; Copy C[] to the return Register A[]
558 #endif /* HAVE_MUL */
568 #endif /* L_mulpsi3 */
570 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
572 ;; A[0..2]: In: Multiplicand
577 ;; BB: In: Multiplier
585 ;; C[] = A[] * sign_extend (BB)
613 #endif /* L_mulsqipsi3 && HAVE_MUL */
615 /*******************************************************
616 Multiplication 64 x 64
617 *******************************************************/
619 #if defined (L_muldi3)
623 ;; A[0..7]: In: Multiplicand
634 ;; B[0..7]: In: Multiplier
644 #if defined (__AVR_HAVE_MUL__)
646 ;; Define C[] for convenience
647 ;; Notice that parts of C[] overlap A[] respective B[]
658 ;; R25:R18 *= R17:R10
659 ;; Ordinary ABI-Function
667 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
670 mul A7,B0 $ $ mov C7,r0
671 mul A0,B7 $ $ add C7,r0
672 mul A6,B1 $ $ add C7,r0
673 mul A6,B0 $ mov C6,r0 $ add C7,r1
674 mul B6,A1 $ $ add C7,r0
675 mul B6,A0 $ add C6,r0 $ adc C7,r1
678 mul A2,B4 $ add C6,r0 $ adc C7,r1
679 mul A3,B4 $ $ add C7,r0
680 mul A2,B5 $ $ add C7,r0
697 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
707 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
717 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
721 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
756 #else /* !HAVE_MUL */
770 ;; R25:R18 *= R17:R10
771 ;; Ordinary ABI-Function
787 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
788 ;; where N = 64 - Loop.
789 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
790 ;; B[] will have its initial Value again.
791 LSR B7 $ ror B6 $ ror B5 $ ror B4
792 ror B3 $ ror B2 $ ror B1 $ ror B0
794 ;; If the N-th Bit of B[] was set then...
796 ;; ...finish Rotation...
799 ;; ...and add A[] * 2^N to the Result C[]
800 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
801 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
803 1: ;; Multiply A[] by 2
804 LSL A0 $ rol A1 $ rol A2 $ rol A3
805 rol A4 $ rol A5 $ rol A6 $ rol A7
810 ;; We expanded the Result in C[]
811 ;; Copy Result to the Return Register A[]
835 #endif /* HAVE_MUL */
855 #endif /* L_muldi3 */
857 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
860 .section .text.libgcc.div, "ax", @progbits
862 /*******************************************************
863 Division 8 / 8 => (result + remainder)
864 *******************************************************/
865 #define r_rem r25 /* remainder */
866 #define r_arg1 r24 /* dividend, quotient */
867 #define r_arg2 r22 /* divisor */
868 #define r_cnt r23 /* loop count */
870 #if defined (L_udivmodqi4)
872 sub r_rem,r_rem ; clear remainder and carry
873 ldi r_cnt,9 ; init loop counter
874 rjmp __udivmodqi4_ep ; jump to entry point
876 rol r_rem ; shift dividend into remainder
877 cp r_rem,r_arg2 ; compare remainder & divisor
878 brcs __udivmodqi4_ep ; remainder <= divisor
879 sub r_rem,r_arg2 ; restore remainder
881 rol r_arg1 ; shift dividend (with CARRY)
882 dec r_cnt ; decrement loop counter
883 brne __udivmodqi4_loop
884 com r_arg1 ; complement result
885 ; because C flag was complemented in loop
888 #endif /* defined (L_udivmodqi4) */
890 #if defined (L_divmodqi4)
892 bst r_arg1,7 ; store sign of dividend
893 mov __tmp_reg__,r_arg1
894 eor __tmp_reg__,r_arg2; r0.7 is sign of result
896 neg r_arg1 ; dividend negative : negate
898 neg r_arg2 ; divisor negative : negate
899 XCALL __udivmodqi4 ; do the unsigned div/mod
901 neg r_rem ; correct remainder sign
904 neg r_arg1 ; correct result sign
908 #endif /* defined (L_divmodqi4) */
916 /*******************************************************
917 Division 16 / 16 => (result + remainder)
918 *******************************************************/
919 #define r_remL r26 /* remainder Low */
920 #define r_remH r27 /* remainder High */
922 /* return: remainder */
923 #define r_arg1L r24 /* dividend Low */
924 #define r_arg1H r25 /* dividend High */
926 /* return: quotient */
927 #define r_arg2L r22 /* divisor Low */
928 #define r_arg2H r23 /* divisor High */
930 #define r_cnt r21 /* loop count */
932 #if defined (L_udivmodhi4)
935 sub r_remH,r_remH ; clear remainder and carry
936 ldi r_cnt,17 ; init loop counter
937 rjmp __udivmodhi4_ep ; jump to entry point
939 rol r_remL ; shift dividend into remainder
941 cp r_remL,r_arg2L ; compare remainder & divisor
943 brcs __udivmodhi4_ep ; remainder < divisor
944 sub r_remL,r_arg2L ; restore remainder
947 rol r_arg1L ; shift dividend (with CARRY)
949 dec r_cnt ; decrement loop counter
950 brne __udivmodhi4_loop
953 ; div/mod results to return registers, as for the div() function
954 mov_l r_arg2L, r_arg1L ; quotient
955 mov_h r_arg2H, r_arg1H
956 mov_l r_arg1L, r_remL ; remainder
957 mov_h r_arg1H, r_remH
960 #endif /* defined (L_udivmodhi4) */
962 #if defined (L_divmodhi4)
966 bst r_arg1H,7 ; store sign of dividend
967 mov __tmp_reg__,r_arg2H
969 com __tmp_reg__ ; r0.7 is sign of result
970 rcall __divmodhi4_neg1 ; dividend negative: negate
973 rcall __divmodhi4_neg2 ; divisor negative: negate
974 XCALL __udivmodhi4 ; do the unsigned div/mod
976 rcall __divmodhi4_neg2 ; correct remainder sign
977 brtc __divmodhi4_exit
979 ;; correct dividend/remainder sign
985 ;; correct divisor/result sign
992 #endif /* defined (L_divmodhi4) */
1005 /*******************************************************
1006 Division 24 / 24 => (result + remainder)
1007 *******************************************************/
1009 ;; A[0..2]: In: Dividend; Out: Quotient
1014 ;; B[0..2]: In: Divisor; Out: Remainder
1019 ;; C[0..2]: Expand remainder
1020 #define C0 __zero_reg__
1027 #if defined (L_udivmodpsi4)
1028 ;; R24:R22 = R24:R22 udiv R20:R18
1029 ;; R20:R18 = R24:R22 umod R20:R18
1030 ;; Clobbers: R21, R25, R26
1035 ; Clear remainder and carry. C0 is already 0
1038 ; jump to entry point
1039 rjmp __udivmodpsi4_start
1041 ; shift dividend into remainder
1045 ; compare remainder & divisor
1049 brcs __udivmodpsi4_start ; remainder <= divisor
1050 sub C0, B0 ; restore remainder
1053 __udivmodpsi4_start:
1054 ; shift dividend (with CARRY)
1058 ; decrement loop counter
1060 brne __udivmodpsi4_loop
1064 ; div/mod results to return registers
1069 clr __zero_reg__ ; C0
1072 #endif /* defined (L_udivmodpsi4) */
1074 #if defined (L_divmodpsi4)
1075 ;; R24:R22 = R24:R22 div R20:R18
1076 ;; R20:R18 = R24:R22 mod R20:R18
1077 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1080 ; R0.7 will contain the sign of the result:
1081 ; R0.7 = A.sign ^ B.sign
1083 ; T-flag = sign of dividend
1087 ; Adjust dividend's sign
1088 rcall __divmodpsi4_negA
1090 ; Adjust divisor's sign
1092 rcall __divmodpsi4_negB
1094 ; Do the unsigned div/mod
1097 ; Adjust quotient's sign
1099 rcall __divmodpsi4_negA
1101 ; Adjust remainder's sign
1102 brtc __divmodpsi4_end
1105 ; Correct divisor/remainder sign
1113 ; Correct dividend/quotient sign
1124 #endif /* defined (L_divmodpsi4) */
1140 /*******************************************************
1141 Division 32 / 32 => (result + remainder)
1142 *******************************************************/
1143 #define r_remHH r31 /* remainder High */
1146 #define r_remL r26 /* remainder Low */
1148 /* return: remainder */
1149 #define r_arg1HH r25 /* dividend High */
1150 #define r_arg1HL r24
1152 #define r_arg1L r22 /* dividend Low */
1154 /* return: quotient */
1155 #define r_arg2HH r21 /* divisor High */
1156 #define r_arg2HL r20
1158 #define r_arg2L r18 /* divisor Low */
1160 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1162 #if defined (L_udivmodsi4)
1164 ldi r_remL, 33 ; init loop counter
1167 sub r_remH,r_remH ; clear remainder and carry
1168 mov_l r_remHL, r_remL
1169 mov_h r_remHH, r_remH
1170 rjmp __udivmodsi4_ep ; jump to entry point
1172 rol r_remL ; shift dividend into remainder
1176 cp r_remL,r_arg2L ; compare remainder & divisor
1178 cpc r_remHL,r_arg2HL
1179 cpc r_remHH,r_arg2HH
1180 brcs __udivmodsi4_ep ; remainder <= divisor
1181 sub r_remL,r_arg2L ; restore remainder
1183 sbc r_remHL,r_arg2HL
1184 sbc r_remHH,r_arg2HH
1186 rol r_arg1L ; shift dividend (with CARRY)
1190 dec r_cnt ; decrement loop counter
1191 brne __udivmodsi4_loop
1192 ; __zero_reg__ now restored (r_cnt == 0)
1197 ; div/mod results to return registers, as for the ldiv() function
1198 mov_l r_arg2L, r_arg1L ; quotient
1199 mov_h r_arg2H, r_arg1H
1200 mov_l r_arg2HL, r_arg1HL
1201 mov_h r_arg2HH, r_arg1HH
1202 mov_l r_arg1L, r_remL ; remainder
1203 mov_h r_arg1H, r_remH
1204 mov_l r_arg1HL, r_remHL
1205 mov_h r_arg1HH, r_remHH
1208 #endif /* defined (L_udivmodsi4) */
1210 #if defined (L_divmodsi4)
1212 mov __tmp_reg__,r_arg2HH
1213 bst r_arg1HH,7 ; store sign of dividend
1215 com __tmp_reg__ ; r0.7 is sign of result
1216 rcall __divmodsi4_neg1 ; dividend negative: negate
1219 rcall __divmodsi4_neg2 ; divisor negative: negate
1220 XCALL __udivmodsi4 ; do the unsigned div/mod
1221 sbrc __tmp_reg__, 7 ; correct quotient sign
1222 rcall __divmodsi4_neg2
1223 brtc __divmodsi4_exit ; correct remainder sign
1225 ;; correct dividend/remainder sign
1235 ;; correct divisor/quotient sign
1246 #endif /* defined (L_divmodsi4) */
1249 /*******************************************************
1252 *******************************************************/
1254 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1255 ;; at least 16k of Program Memory. For smaller Devices, depend
1258 #if defined (__AVR_HAVE_JMP_CALL__)
1259 # define SPEED_DIV 8
1260 #elif defined (__AVR_HAVE_MOVW__)
1261 # define SPEED_DIV 16
1263 # define SPEED_DIV 0
1266 ;; A[0..7]: In: Dividend;
1267 ;; Out: Quotient (T = 0)
1268 ;; Out: Remainder (T = 1)
1278 ;; B[0..7]: In: Divisor; Out: Clobber
1288 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1298 ;; Holds Signs during Division Routine
1299 #define SS __tmp_reg__
1301 ;; Bit-Counter in Division Routine
1302 #define R_cnt __zero_reg__
1304 ;; Scratch Register for Negation
1307 #if defined (L_udivdi3)
1309 ;; R25:R18 = R24:R18 umod R17:R10
1310 ;; Ordinary ABI-Function
1314 rjmp __udivdi3_umoddi3
1317 ;; R25:R18 = R24:R18 udiv R17:R10
1318 ;; Ordinary ABI-Function
1324 DEFUN __udivdi3_umoddi3
1335 ENDF __udivdi3_umoddi3
1336 #endif /* L_udivdi3 */
1338 #if defined (L_udivmod64)
1340 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1341 ;; No Registers saved/restored; the Callers will take Care.
1342 ;; Preserves B[] and T-flag
1343 ;; T = 0: Compute Quotient in A[]
1344 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1348 ;; Clear Remainder (C6, C7 will follow)
1355 #if SPEED_DIV == 0 || SPEED_DIV == 16
1356 ;; Initialize Loop-Counter
1359 #endif /* SPEED_DIV */
1366 1: ;; Compare shifted Devidend against Divisor
1367 ;; If -- even after Shifting -- it is smaller...
1368 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1369 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1372 ;; ...then we can subtract it. Thus, it is legal to shift left
1373 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1374 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1375 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1376 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1382 ;; Shifted 64 Bits: A7 has traveled to C7
1384 ;; Divisor is greater than Dividend. We have:
1387 ;; Thus, we can return immediately
1390 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1393 ;; Push of A7 is not needed because C7 is still 0
1397 #elif SPEED_DIV == 16
1399 ;; Compare shifted Dividend against Divisor
1407 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1408 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1409 wmov C2,A6 $ wmov C0,A4
1410 wmov A6,A2 $ wmov A4,A0
1411 wmov A2,C6 $ wmov A0,C4
1413 ;; Set Bit Counter to 32
1417 #error SPEED_DIV = ?
1418 #endif /* SPEED_DIV */
1420 ;; The very Division + Remainder Routine
1422 3: ;; Left-shift Dividend...
1423 lsl A0 $ rol A1 $ rol A2 $ rol A3
1424 rol A4 $ rol A5 $ rol A6 $ rol A7
1426 ;; ...into Remainder
1427 rol C0 $ rol C1 $ rol C2 $ rol C3
1428 rol C4 $ rol C5 $ rol C6 $ rol C7
1430 ;; Compare Remainder and Divisor
1431 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1432 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1436 ;; Divisor fits into Remainder: Subtract it from Remainder...
1437 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1438 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1440 ;; ...and set according Bit in the upcoming Quotient
1441 ;; The Bit will travel to its final Position
1444 4: ;; This Bit is done
1447 ;; __zero_reg__ is 0 again
1449 ;; T = 0: We are fine with the Quotient in A[]
1450 ;; T = 1: Copy Remainder to A[]
1456 ;; Move the Sign of the Result to SS.7
1462 #endif /* L_udivmod64 */
1465 #if defined (L_divdi3)
1467 ;; R25:R18 = R24:R18 mod R17:R10
1468 ;; Ordinary ABI-Function
1472 rjmp __divdi3_moddi3
1475 ;; R25:R18 = R24:R18 div R17:R10
1476 ;; Ordinary ABI-Function
1482 DEFUN __divdi3_moddi3
1487 ;; Both Signs are 0: the following Complexitiy is not needed
1488 XJMP __udivdi3_umoddi3
1489 #endif /* SPEED_DIV */
1492 ;; Save 12 Registers: Y, 17...8
1493 ;; No Frame needed (X = 0)
1496 ldi r30, lo8(gs(1f))
1497 ldi r31, hi8(gs(1f))
1498 XJMP __prologue_saves__ + ((18 - 12) * 2)
1500 1: ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1501 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1504 ;; Adjust Dividend's Sign as needed
1506 ;; Compiling for Speed we know that at least one Sign must be < 0
1507 ;; Thus, if A[] >= 0 then we know B[] < 0
1511 #endif /* SPEED_DIV */
1515 ;; Adjust Divisor's Sign and SS.7 as needed
1522 com B4 $ com B5 $ com B6 $ com B7
1523 $ com B1 $ com B2 $ com B3
1525 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
1526 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
1528 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1531 ;; Adjust Result's Sign
1532 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1537 #endif /* __AVR_HAVE_JMP_CALL__ */
1540 4: ;; Epilogue: Restore the Z = 12 Registers and return
1542 #if defined (__AVR_HAVE_8BIT_SP__)
1543 ;; FIXME: __AVR_HAVE_8BIT_SP__ is set on device level, not on core level
1544 ;; so this lines are dead code. To make it work, devices without
1545 ;; SP_H must get their own multilib(s).
1549 #endif /* #SP = 8/16 */
1551 XJMP __epilogue_restores__ + ((18 - 12) * 2)
1553 ENDF __divdi3_moddi3
1559 #endif /* L_divdi3 */
1561 #if defined (L_negdi2)
1564 com A4 $ com A5 $ com A6 $ com A7
1565 $ com A1 $ com A2 $ com A3
1567 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
1568 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
1572 #endif /* L_negdi2 */
1602 .section .text.libgcc.prologue, "ax", @progbits
1604 /**********************************
1605 * This is a prologue subroutine
1606 **********************************/
1607 #if defined (L_prologue)
1609 ;; This function does not clobber T-flag; 64-bit division relies on it
1610 DEFUN __prologue_saves__
1629 #if defined (__AVR_HAVE_8BIT_SP__)
1630 ;; FIXME: __AVR_HAVE_8BIT_SP__ is set on device level, not on core level
1631 ;; so this lines are dead code. To make it work, devices without
1632 ;; SP_H must get their own multilib(s).
1642 in __tmp_reg__,__SREG__
1645 out __SREG__,__tmp_reg__
1647 #endif /* #SP = 8/16 */
1649 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1655 ENDF __prologue_saves__
1656 #endif /* defined (L_prologue) */
1659 * This is an epilogue subroutine
1661 #if defined (L_epilogue)
1663 DEFUN __epilogue_restores__
1681 #if defined (__AVR_HAVE_8BIT_SP__)
1682 ;; FIXME: __AVR_HAVE_8BIT_SP__ is set on device level, not on core level
1683 ;; so this lines are dead code. To make it work, devices without
1684 ;; SP_H must get their own multilib(s).
1692 adc r29,__zero_reg__
1693 in __tmp_reg__,__SREG__
1696 out __SREG__,__tmp_reg__
1700 #endif /* #SP = 8/16 */
1702 ENDF __epilogue_restores__
1703 #endif /* defined (L_epilogue) */
1706 .section .fini9,"ax",@progbits
1712 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
1714 .section .fini0,"ax",@progbits
1718 #endif /* defined (L_exit) */
1726 #endif /* defined (L_cleanup) */
1729 .section .text.libgcc, "ax", @progbits
1732 DEFUN __tablejump2__
1739 #if defined (__AVR_HAVE_LPMX__)
1742 mov r30, __tmp_reg__
1743 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1749 #else /* !HAVE_LPMX */
1755 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1756 in __tmp_reg__, __EIND__
1760 #endif /* !HAVE_LPMX */
1762 #endif /* defined (L_tablejump) */
1765 .section .init4,"ax",@progbits
1766 DEFUN __do_copy_data
1767 #if defined(__AVR_HAVE_ELPMX__)
1768 ldi r17, hi8(__data_end)
1769 ldi r26, lo8(__data_start)
1770 ldi r27, hi8(__data_start)
1771 ldi r30, lo8(__data_load_start)
1772 ldi r31, hi8(__data_load_start)
1773 ldi r16, hh8(__data_load_start)
1775 rjmp .L__do_copy_data_start
1776 .L__do_copy_data_loop:
1779 .L__do_copy_data_start:
1780 cpi r26, lo8(__data_end)
1782 brne .L__do_copy_data_loop
1783 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
1784 ldi r17, hi8(__data_end)
1785 ldi r26, lo8(__data_start)
1786 ldi r27, hi8(__data_start)
1787 ldi r30, lo8(__data_load_start)
1788 ldi r31, hi8(__data_load_start)
1789 ldi r16, hh8(__data_load_start - 0x10000)
1790 .L__do_copy_data_carry:
1793 rjmp .L__do_copy_data_start
1794 .L__do_copy_data_loop:
1798 brcs .L__do_copy_data_carry
1799 .L__do_copy_data_start:
1800 cpi r26, lo8(__data_end)
1802 brne .L__do_copy_data_loop
1803 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
1804 ldi r17, hi8(__data_end)
1805 ldi r26, lo8(__data_start)
1806 ldi r27, hi8(__data_start)
1807 ldi r30, lo8(__data_load_start)
1808 ldi r31, hi8(__data_load_start)
1809 rjmp .L__do_copy_data_start
1810 .L__do_copy_data_loop:
1811 #if defined (__AVR_HAVE_LPMX__)
1818 .L__do_copy_data_start:
1819 cpi r26, lo8(__data_end)
1821 brne .L__do_copy_data_loop
1822 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
1824 #endif /* L_copy_data */
1826 /* __do_clear_bss is only necessary if there is anything in .bss section. */
1829 .section .init4,"ax",@progbits
1830 DEFUN __do_clear_bss
1831 ldi r17, hi8(__bss_end)
1832 ldi r26, lo8(__bss_start)
1833 ldi r27, hi8(__bss_start)
1834 rjmp .do_clear_bss_start
1837 .do_clear_bss_start:
1838 cpi r26, lo8(__bss_end)
1840 brne .do_clear_bss_loop
1842 #endif /* L_clear_bss */
1844 /* __do_global_ctors and __do_global_dtors are only necessary
1845 if there are any constructors/destructors. */
1848 .section .init6,"ax",@progbits
1849 DEFUN __do_global_ctors
1850 #if defined(__AVR_HAVE_RAMPZ__)
1851 ldi r17, hi8(__ctors_start)
1852 ldi r28, lo8(__ctors_end)
1853 ldi r29, hi8(__ctors_end)
1854 ldi r16, hh8(__ctors_end)
1855 rjmp .L__do_global_ctors_start
1856 .L__do_global_ctors_loop:
1858 sbc r16, __zero_reg__
1862 XCALL __tablejump_elpm__
1863 .L__do_global_ctors_start:
1864 cpi r28, lo8(__ctors_start)
1866 ldi r24, hh8(__ctors_start)
1868 brne .L__do_global_ctors_loop
1870 ldi r17, hi8(__ctors_start)
1871 ldi r28, lo8(__ctors_end)
1872 ldi r29, hi8(__ctors_end)
1873 rjmp .L__do_global_ctors_start
1874 .L__do_global_ctors_loop:
1879 .L__do_global_ctors_start:
1880 cpi r28, lo8(__ctors_start)
1882 brne .L__do_global_ctors_loop
1883 #endif /* defined(__AVR_HAVE_RAMPZ__) */
1884 ENDF __do_global_ctors
1885 #endif /* L_ctors */
1888 .section .fini6,"ax",@progbits
1889 DEFUN __do_global_dtors
1890 #if defined(__AVR_HAVE_RAMPZ__)
1891 ldi r17, hi8(__dtors_end)
1892 ldi r28, lo8(__dtors_start)
1893 ldi r29, hi8(__dtors_start)
1894 ldi r16, hh8(__dtors_start)
1895 rjmp .L__do_global_dtors_start
1896 .L__do_global_dtors_loop:
1898 sbc r16, __zero_reg__
1902 XCALL __tablejump_elpm__
1903 .L__do_global_dtors_start:
1904 cpi r28, lo8(__dtors_end)
1906 ldi r24, hh8(__dtors_end)
1908 brne .L__do_global_dtors_loop
1910 ldi r17, hi8(__dtors_end)
1911 ldi r28, lo8(__dtors_start)
1912 ldi r29, hi8(__dtors_start)
1913 rjmp .L__do_global_dtors_start
1914 .L__do_global_dtors_loop:
1919 .L__do_global_dtors_start:
1920 cpi r28, lo8(__dtors_end)
1922 brne .L__do_global_dtors_loop
1923 #endif /* defined(__AVR_HAVE_RAMPZ__) */
1924 ENDF __do_global_dtors
1925 #endif /* L_dtors */
1927 .section .text.libgcc, "ax", @progbits
1929 #ifdef L_tablejump_elpm
1930 DEFUN __tablejump_elpm__
1931 #if defined (__AVR_HAVE_ELPM__)
1932 #if defined (__AVR_HAVE_LPMX__)
1933 elpm __tmp_reg__, Z+
1935 mov r30, __tmp_reg__
1936 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1948 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1949 in __tmp_reg__, __EIND__
1954 #endif /* defined (__AVR_HAVE_ELPM__) */
1955 ENDF __tablejump_elpm__
1956 #endif /* defined (L_tablejump_elpm) */
1958 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1959 ;; Loading n bytes from Flash; n = 3,4
1960 ;; R22... = Flash[Z]
1961 ;; Clobbers: __tmp_reg__
1963 #if (defined (L_load_3) \
1964 || defined (L_load_4)) \
1965 && !defined (__AVR_HAVE_LPMX__)
1973 .macro .load dest, n
1976 .if \dest != D0+\n-1
1983 #if defined (L_load_3)
1990 #endif /* L_load_3 */
1992 #if defined (L_load_4)
2000 #endif /* L_load_4 */
2002 #endif /* L_load_3 || L_load_3 */
2004 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2005 ;; Loading n bytes from Flash; n = 2,3,4
2006 ;; R22... = Flash[R21:Z]
2007 ;; Clobbers: __tmp_reg__, R21, R30, R31
2009 #if (defined (L_xload_2) \
2010 || defined (L_xload_3) \
2011 || defined (L_xload_4)) \
2012 && defined (__AVR_HAVE_ELPM__) \
2013 && !defined (__AVR_HAVE_ELPMX__)
2015 #if !defined (__AVR_HAVE_RAMPZ__)
2017 #endif /* have RAMPZ */
2025 ;; Register containing bits 16+ of the address
2029 .macro .xload dest, n
2032 .if \dest != D0+\n-1
2034 adc HHI8, __zero_reg__
2039 #if defined (L_xload_2)
2046 #endif /* L_xload_2 */
2048 #if defined (L_xload_3)
2056 #endif /* L_xload_3 */
2058 #if defined (L_xload_4)
2067 #endif /* L_xload_4 */
2069 #endif /* L_xload_{2|3|4} && ELPM */
2072 .section .text.libgcc.builtins, "ax", @progbits
2074 /**********************************
2075 * Find first set Bit (ffs)
2076 **********************************/
2078 #if defined (L_ffssi2)
2079 ;; find first set bit
2080 ;; r25:r24 = ffs32 (r25:r22)
2081 ;; clobbers: r22, r26
2099 #endif /* defined (L_ffssi2) */
2101 #if defined (L_ffshi2)
2102 ;; find first set bit
2103 ;; r25:r24 = ffs16 (r25:r24)
2107 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2108 ;; Some cores have problem skipping 2-word instruction
2112 cpse r24, __zero_reg__
2113 #endif /* __AVR_HAVE_JMP_CALL__ */
2114 1: XJMP __loop_ffsqi2
2120 #endif /* defined (L_ffshi2) */
2122 #if defined (L_loop_ffsqi2)
2123 ;; Helper for ffshi2, ffssi2
2124 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2135 #endif /* defined (L_loop_ffsqi2) */
2138 /**********************************
2139 * Count trailing Zeros (ctz)
2140 **********************************/
2142 #if defined (L_ctzsi2)
2143 ;; count trailing zeros
2144 ;; r25:r24 = ctz32 (r25:r22)
2145 ;; clobbers: r26, r22
2147 ;; Note that ctz(0) in undefined for GCC
2153 #endif /* defined (L_ctzsi2) */
2155 #if defined (L_ctzhi2)
2156 ;; count trailing zeros
2157 ;; r25:r24 = ctz16 (r25:r24)
2160 ;; Note that ctz(0) in undefined for GCC
2166 #endif /* defined (L_ctzhi2) */
2169 /**********************************
2170 * Count leading Zeros (clz)
2171 **********************************/
2173 #if defined (L_clzdi2)
2174 ;; count leading zeros
2175 ;; r25:r24 = clz64 (r25:r18)
2176 ;; clobbers: r22, r23, r26
2189 #endif /* defined (L_clzdi2) */
2191 #if defined (L_clzsi2)
2192 ;; count leading zeros
2193 ;; r25:r24 = clz32 (r25:r22)
2205 #endif /* defined (L_clzsi2) */
2207 #if defined (L_clzhi2)
2208 ;; count leading zeros
2209 ;; r25:r24 = clz16 (r25:r24)
2231 #endif /* defined (L_clzhi2) */
2234 /**********************************
2236 **********************************/
2238 #if defined (L_paritydi2)
2239 ;; r25:r24 = parity64 (r25:r18)
2240 ;; clobbers: __tmp_reg__
2248 #endif /* defined (L_paritydi2) */
2250 #if defined (L_paritysi2)
2251 ;; r25:r24 = parity32 (r25:r22)
2252 ;; clobbers: __tmp_reg__
2258 #endif /* defined (L_paritysi2) */
2260 #if defined (L_parityhi2)
2261 ;; r25:r24 = parity16 (r25:r24)
2262 ;; clobbers: __tmp_reg__
2268 ;; r25:r24 = parity8 (r24)
2269 ;; clobbers: __tmp_reg__
2271 ;; parity is in r24[0..7]
2272 mov __tmp_reg__, r24
2274 eor r24, __tmp_reg__
2275 ;; parity is in r24[0..3]
2279 ;; parity is in r24[0,3]
2282 ;; parity is in r24[0]
2287 #endif /* defined (L_parityhi2) */
2290 /**********************************
2292 **********************************/
2294 #if defined (L_popcounthi2)
2296 ;; r25:r24 = popcount16 (r25:r24)
2297 ;; clobbers: __tmp_reg__
2307 DEFUN __popcounthi2_tail
2309 add r24, __tmp_reg__
2311 ENDF __popcounthi2_tail
2312 #endif /* defined (L_popcounthi2) */
2314 #if defined (L_popcountsi2)
2316 ;; r25:r24 = popcount32 (r25:r22)
2317 ;; clobbers: __tmp_reg__
2324 XJMP __popcounthi2_tail
2326 #endif /* defined (L_popcountsi2) */
2328 #if defined (L_popcountdi2)
2330 ;; r25:r24 = popcount64 (r25:r18)
2331 ;; clobbers: r22, r23, __tmp_reg__
2340 XJMP __popcounthi2_tail
2342 #endif /* defined (L_popcountdi2) */
2344 #if defined (L_popcountqi2)
2346 ;; r24 = popcount8 (r24)
2347 ;; clobbers: __tmp_reg__
2349 mov __tmp_reg__, r24
2353 adc r24, __zero_reg__
2355 adc r24, __zero_reg__
2357 adc r24, __zero_reg__
2359 adc r24, __zero_reg__
2361 adc r24, __zero_reg__
2363 adc r24, __tmp_reg__
2366 #endif /* defined (L_popcountqi2) */
2369 /**********************************
2371 **********************************/
2373 ;; swap two registers with different register number
2380 #if defined (L_bswapsi2)
2382 ;; r25:r22 = bswap32 (r25:r22)
2388 #endif /* defined (L_bswapsi2) */
2390 #if defined (L_bswapdi2)
2392 ;; r25:r18 = bswap64 (r25:r18)
2400 #endif /* defined (L_bswapdi2) */
2403 /**********************************
2405 **********************************/
2407 #if defined (L_ashrdi3)
2408 ;; Arithmetic shift right
2409 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
2427 #endif /* defined (L_ashrdi3) */
2429 #if defined (L_lshrdi3)
2430 ;; Logic shift right
2431 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
2449 #endif /* defined (L_lshrdi3) */
2451 #if defined (L_ashldi3)
2453 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
2471 #endif /* defined (L_ashldi3) */
2474 .section .text.libgcc.fmul, "ax", @progbits
2476 /***********************************************************/
2477 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
2478 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
2479 /***********************************************************/
2485 #define A0 __tmp_reg__
2488 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
2489 ;;; Clobbers: r24, r25, __tmp_reg__
2491 ;; A0.7 = negate result?
2499 #endif /* L_fmuls */
2502 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
2503 ;;; Clobbers: r24, r25, __tmp_reg__
2505 ;; A0.7 = negate result?
2510 ;; Helper for __fmuls and __fmulsu
2515 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2516 ;; Some cores have problem skipping 2-word instruction
2521 #endif /* __AVR_HAVE_JMP_CALL__ */
2524 ;; C = -C iff A0.7 = 1
2530 #endif /* L_fmulsu */
2534 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
2535 ;;; Clobbers: r24, r25, __tmp_reg__
2542 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.