1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Denis Chertykov <chertykov@gmail.com>
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 #define __zero_reg__ r1
26 #define __tmp_reg__ r0
28 #if defined (__AVR_HAVE_SPH__)
32 #define __RAMPZ__ 0x3B
35 /* Most of the functions here are called directly from avr.md
36 patterns, instead of using the standard libcall mechanisms.
37 This can make better code because GCC knows exactly which
38 of the call-used registers (not all of them) are clobbered. */
40 /* FIXME: At present, there is no SORT directive in the linker
41 script so that we must not assume that different modules
42 in the same input section like .libgcc.text.mul will be
43 located close together. Therefore, we cannot use
44 RCALL/RJMP to call a function like __udivmodhi4 from
45 __divmodhi4 and have to use lengthy XCALL/XJMP even
46 though they are in the same input section and all same
47 input sections together are small enough to reach every
48 location with a RCALL/RJMP instruction. */
50 .macro mov_l r_dest, r_src
51 #if defined (__AVR_HAVE_MOVW__)
58 .macro mov_h r_dest, r_src
59 #if defined (__AVR_HAVE_MOVW__)
66 .macro wmov r_dest, r_src
67 #if defined (__AVR_HAVE_MOVW__)
71 mov \r_dest+1, \r_src+1
75 #if defined (__AVR_HAVE_JMP_CALL__)
94 ;; Negate a 2-byte value held in consecutive registers
101 ;; Negate a 4-byte value held in consecutive registers
113 adc \reg, __zero_reg__
114 adc \reg+1, __zero_reg__
115 adc \reg+2, __zero_reg__
116 adc \reg+3, __zero_reg__
120 #define exp_lo(N) hlo8 ((N) << 23)
121 #define exp_hi(N) hhi8 ((N) << 23)
124 .section .text.libgcc.mul, "ax", @progbits
126 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
127 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
128 #if !defined (__AVR_HAVE_MUL__)
129 /*******************************************************
130 Multiplication 8 x 8 without MUL
131 *******************************************************/
132 #if defined (L_mulqi3)
134 #define r_arg2 r22 /* multiplicand */
135 #define r_arg1 r24 /* multiplier */
136 #define r_res __tmp_reg__ /* result */
139 clr r_res ; clear result
143 add r_arg2,r_arg2 ; shift multiplicand
144 breq __mulqi3_exit ; while multiplicand != 0
146 brne __mulqi3_loop ; exit if multiplier = 0
148 mov r_arg1,r_res ; result to return register
156 #endif /* defined (L_mulqi3) */
159 /*******************************************************
160 Widening Multiplication 16 = 8 x 8 without MUL
161 Multiplication 16 x 16 without MUL
162 *******************************************************/
169 ;; Output overlaps input, thus expand result in CC0/1
172 #define CC0 __tmp_reg__
175 #if defined (L_umulqihi3)
176 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
177 ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
178 ;;; Clobbers: __tmp_reg__, R21..R23
184 #endif /* L_umulqihi3 */
186 #if defined (L_mulqihi3)
187 ;;; R25:R24 = (signed int) R22 * (signed int) R24
188 ;;; (C1:C0) = (signed int) A0 * (signed int) B0
189 ;;; Clobbers: __tmp_reg__, R20..R23
195 ;; The multiplication runs twice as fast if A1 is zero, thus:
198 #ifdef __AVR_HAVE_JMP_CALL__
199 ;; Store B0 * sign of A
204 #else /* have no CALL */
205 ;; Skip sign-extension of A if A >= 0
206 ;; Same size as with the first alternative but avoids errata skip
207 ;; and is faster if A >= 0
213 #endif /* HAVE_JMP_CALL */
214 ;; 1-extend A after the multiplication
218 #endif /* L_mulqihi3 */
220 #if defined (L_mulhi3)
221 ;;; R25:R24 = R23:R22 * R25:R24
222 ;;; (C1:C0) = (A1:A0) * (B1:B0)
223 ;;; Clobbers: __tmp_reg__, R21..R23
231 ;; Bit n of A is 1 --> C += B << n
238 ;; If B == 0 we are ready
242 ;; Carry = n-th bit of A
245 ;; If bit n of A is set, then go add B * 2^n to C
248 ;; Carry = 0 --> The ROR above acts like CP A0, 0
249 ;; Thus, it is sufficient to CPC the high part to test A against 0
251 ;; Only proceed if A != 0
254 ;; Move Result into place
259 #endif /* L_mulhi3 */
292 /*******************************************************
293 Widening Multiplication 32 = 16 x 16 without MUL
294 *******************************************************/
296 #if defined (L_umulhisi3)
306 #endif /* L_umulhisi3 */
308 #if defined (L_mulhisi3)
315 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
322 #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
323 ;; Zero-extend A and __mulsi3 will run at least twice as fast
324 ;; compared to a sign-extended A.
329 ;; If A < 0 then perform the B * 0xffff.... before the
330 ;; very multiplication by initializing the high part of the
331 ;; result CC with -B.
336 #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
338 #endif /* L_mulhisi3 */
341 /*******************************************************
342 Multiplication 32 x 32 without MUL
343 *******************************************************/
345 #if defined (L_mulsi3)
353 DEFUN __mulsi3_helper
358 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
360 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
363 lsl B0 $ rol B1 $ rol B2 $ rol B3
365 3: ;; A >>= 1: Carry = n-th bit of A
366 lsr A3 $ ror A2 $ ror A1 $ ror A0
369 ;; Only continue if A != 0
375 ;; All bits of A are consumed: Copy result to return register C
380 #endif /* L_mulsi3 */
399 #endif /* !defined (__AVR_HAVE_MUL__) */
400 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
402 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
403 #if defined (__AVR_HAVE_MUL__)
418 /*******************************************************
419 Widening Multiplication 32 = 16 x 16 with MUL
420 *******************************************************/
422 #if defined (L_mulhisi3)
423 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
424 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
425 ;;; Clobbers: __tmp_reg__
434 XJMP __usmulhisi3_tail
436 #endif /* L_mulhisi3 */
438 #if defined (L_usmulhisi3)
439 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
440 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
441 ;;; Clobbers: __tmp_reg__
447 DEFUN __usmulhisi3_tail
454 ENDF __usmulhisi3_tail
455 #endif /* L_usmulhisi3 */
457 #if defined (L_umulhisi3)
458 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
459 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
460 ;;; Clobbers: __tmp_reg__
467 #ifdef __AVR_HAVE_JMP_CALL__
468 ;; This function is used by many other routines, often multiple times.
469 ;; Therefore, if the flash size is not too limited, avoid the RCALL
470 ;; and inverst 6 Bytes to speed things up.
485 #endif /* L_umulhisi3 */
487 /*******************************************************
488 Widening Multiplication 32 = 16 x 32 with MUL
489 *******************************************************/
491 #if defined (L_mulshisi3)
492 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
493 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
494 ;;; Clobbers: __tmp_reg__
496 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
497 ;; Some cores have problem skipping 2-word instruction
502 #endif /* __AVR_HAVE_JMP_CALL__ */
507 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
508 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
509 ;;; Clobbers: __tmp_reg__
512 ;; One-extend R27:R26 (A1:A0)
517 #endif /* L_mulshisi3 */
519 #if defined (L_muluhisi3)
520 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
521 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
522 ;;; Clobbers: __tmp_reg__
535 #endif /* L_muluhisi3 */
537 /*******************************************************
538 Multiplication 32 x 32 with MUL
539 *******************************************************/
541 #if defined (L_mulsi3)
542 ;;; R25:R22 = R25:R22 * R21:R18
543 ;;; (C3:C0) = C3:C0 * B3:B0
544 ;;; Clobbers: R26, R27, __tmp_reg__
552 ;; A1:A0 now contains the high word of A
563 #endif /* L_mulsi3 */
578 #endif /* __AVR_HAVE_MUL__ */
580 /*******************************************************
581 Multiplication 24 x 24 with MUL
582 *******************************************************/
584 #if defined (L_mulpsi3)
586 ;; A[0..2]: In: Multiplicand; Out: Product
591 ;; B[0..2]: In: Multiplier
596 #if defined (__AVR_HAVE_MUL__)
598 ;; C[0..2]: Expand Result
603 ;; R24:R22 *= R20:R18
604 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
613 mul AA2, B0 $ add C2, r0
614 mul AA0, B2 $ add C2, r0
626 #else /* !HAVE_MUL */
628 ;; C[0..2]: Expand Result
633 ;; R24:R22 *= R20:R18
634 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
642 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
643 LSR B2 $ ror B1 $ ror B0
645 ;; If the N-th Bit of B[] was set...
648 ;; ...then add A[] * 2^N to the Result C[]
649 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
651 1: ;; Multiply A[] by 2
652 LSL A0 $ rol A1 $ rol A2
654 ;; Loop until B[] is 0
655 subi B0,0 $ sbci B1,0 $ sbci B2,0
658 ;; Copy C[] to the return Register A[]
670 #endif /* HAVE_MUL */
680 #endif /* L_mulpsi3 */
682 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
684 ;; A[0..2]: In: Multiplicand
689 ;; BB: In: Multiplier
697 ;; C[] = A[] * sign_extend (BB)
725 #endif /* L_mulsqipsi3 && HAVE_MUL */
727 /*******************************************************
728 Multiplication 64 x 64
729 *******************************************************/
731 #if defined (L_muldi3)
735 ;; A[0..7]: In: Multiplicand
746 ;; B[0..7]: In: Multiplier
756 #if defined (__AVR_HAVE_MUL__)
758 ;; Define C[] for convenience
759 ;; Notice that parts of C[] overlap A[] respective B[]
770 ;; R25:R18 *= R17:R10
771 ;; Ordinary ABI-Function
779 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
782 mul A7,B0 $ $ mov C7,r0
783 mul A0,B7 $ $ add C7,r0
784 mul A6,B1 $ $ add C7,r0
785 mul A6,B0 $ mov C6,r0 $ add C7,r1
786 mul B6,A1 $ $ add C7,r0
787 mul B6,A0 $ add C6,r0 $ adc C7,r1
790 mul A2,B4 $ add C6,r0 $ adc C7,r1
791 mul A3,B4 $ $ add C7,r0
792 mul A2,B5 $ $ add C7,r0
809 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
819 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
829 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
833 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
868 #else /* !HAVE_MUL */
882 ;; R25:R18 *= R17:R10
883 ;; Ordinary ABI-Function
899 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
900 ;; where N = 64 - Loop.
901 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
902 ;; B[] will have its initial Value again.
903 LSR B7 $ ror B6 $ ror B5 $ ror B4
904 ror B3 $ ror B2 $ ror B1 $ ror B0
906 ;; If the N-th Bit of B[] was set then...
908 ;; ...finish Rotation...
911 ;; ...and add A[] * 2^N to the Result C[]
912 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
913 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
915 1: ;; Multiply A[] by 2
916 LSL A0 $ rol A1 $ rol A2 $ rol A3
917 rol A4 $ rol A5 $ rol A6 $ rol A7
922 ;; We expanded the Result in C[]
923 ;; Copy Result to the Return Register A[]
947 #endif /* HAVE_MUL */
967 #endif /* L_muldi3 */
969 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
972 .section .text.libgcc.div, "ax", @progbits
974 /*******************************************************
975 Division 8 / 8 => (result + remainder)
976 *******************************************************/
977 #define r_rem r25 /* remainder */
978 #define r_arg1 r24 /* dividend, quotient */
979 #define r_arg2 r22 /* divisor */
980 #define r_cnt r23 /* loop count */
982 #if defined (L_udivmodqi4)
984 sub r_rem,r_rem ; clear remainder and carry
985 ldi r_cnt,9 ; init loop counter
986 rjmp __udivmodqi4_ep ; jump to entry point
988 rol r_rem ; shift dividend into remainder
989 cp r_rem,r_arg2 ; compare remainder & divisor
990 brcs __udivmodqi4_ep ; remainder <= divisor
991 sub r_rem,r_arg2 ; restore remainder
993 rol r_arg1 ; shift dividend (with CARRY)
994 dec r_cnt ; decrement loop counter
995 brne __udivmodqi4_loop
996 com r_arg1 ; complement result
997 ; because C flag was complemented in loop
1000 #endif /* defined (L_udivmodqi4) */
1002 #if defined (L_divmodqi4)
1004 bst r_arg1,7 ; store sign of dividend
1005 mov __tmp_reg__,r_arg1
1006 eor __tmp_reg__,r_arg2; r0.7 is sign of result
1008 neg r_arg1 ; dividend negative : negate
1010 neg r_arg2 ; divisor negative : negate
1011 XCALL __udivmodqi4 ; do the unsigned div/mod
1013 neg r_rem ; correct remainder sign
1016 neg r_arg1 ; correct result sign
1020 #endif /* defined (L_divmodqi4) */
1028 /*******************************************************
1029 Division 16 / 16 => (result + remainder)
1030 *******************************************************/
1031 #define r_remL r26 /* remainder Low */
1032 #define r_remH r27 /* remainder High */
1034 /* return: remainder */
1035 #define r_arg1L r24 /* dividend Low */
1036 #define r_arg1H r25 /* dividend High */
1038 /* return: quotient */
1039 #define r_arg2L r22 /* divisor Low */
1040 #define r_arg2H r23 /* divisor High */
1042 #define r_cnt r21 /* loop count */
1044 #if defined (L_udivmodhi4)
1047 sub r_remH,r_remH ; clear remainder and carry
1048 ldi r_cnt,17 ; init loop counter
1049 rjmp __udivmodhi4_ep ; jump to entry point
1051 rol r_remL ; shift dividend into remainder
1053 cp r_remL,r_arg2L ; compare remainder & divisor
1055 brcs __udivmodhi4_ep ; remainder < divisor
1056 sub r_remL,r_arg2L ; restore remainder
1059 rol r_arg1L ; shift dividend (with CARRY)
1061 dec r_cnt ; decrement loop counter
1062 brne __udivmodhi4_loop
1065 ; div/mod results to return registers, as for the div() function
1066 mov_l r_arg2L, r_arg1L ; quotient
1067 mov_h r_arg2H, r_arg1H
1068 mov_l r_arg1L, r_remL ; remainder
1069 mov_h r_arg1H, r_remH
1072 #endif /* defined (L_udivmodhi4) */
1074 #if defined (L_divmodhi4)
1078 bst r_arg1H,7 ; store sign of dividend
1079 mov __tmp_reg__,r_arg2H
1081 com __tmp_reg__ ; r0.7 is sign of result
1082 rcall __divmodhi4_neg1 ; dividend negative: negate
1085 rcall __divmodhi4_neg2 ; divisor negative: negate
1086 XCALL __udivmodhi4 ; do the unsigned div/mod
1088 rcall __divmodhi4_neg2 ; correct remainder sign
1089 brtc __divmodhi4_exit
1091 ;; correct dividend/remainder sign
1097 ;; correct divisor/result sign
1104 #endif /* defined (L_divmodhi4) */
1117 /*******************************************************
1118 Division 24 / 24 => (result + remainder)
1119 *******************************************************/
1121 ;; A[0..2]: In: Dividend; Out: Quotient
1126 ;; B[0..2]: In: Divisor; Out: Remainder
1131 ;; C[0..2]: Expand remainder
1132 #define C0 __zero_reg__
1139 #if defined (L_udivmodpsi4)
1140 ;; R24:R22 = R24:R22 udiv R20:R18
1141 ;; R20:R18 = R24:R22 umod R20:R18
1142 ;; Clobbers: R21, R25, R26
1147 ; Clear remainder and carry. C0 is already 0
1150 ; jump to entry point
1151 rjmp __udivmodpsi4_start
1153 ; shift dividend into remainder
1157 ; compare remainder & divisor
1161 brcs __udivmodpsi4_start ; remainder <= divisor
1162 sub C0, B0 ; restore remainder
1165 __udivmodpsi4_start:
1166 ; shift dividend (with CARRY)
1170 ; decrement loop counter
1172 brne __udivmodpsi4_loop
1176 ; div/mod results to return registers
1181 clr __zero_reg__ ; C0
1184 #endif /* defined (L_udivmodpsi4) */
1186 #if defined (L_divmodpsi4)
1187 ;; R24:R22 = R24:R22 div R20:R18
1188 ;; R20:R18 = R24:R22 mod R20:R18
1189 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1192 ; R0.7 will contain the sign of the result:
1193 ; R0.7 = A.sign ^ B.sign
1195 ; T-flag = sign of dividend
1199 ; Adjust dividend's sign
1200 rcall __divmodpsi4_negA
1202 ; Adjust divisor's sign
1204 rcall __divmodpsi4_negB
1206 ; Do the unsigned div/mod
1209 ; Adjust quotient's sign
1211 rcall __divmodpsi4_negA
1213 ; Adjust remainder's sign
1214 brtc __divmodpsi4_end
1217 ; Correct divisor/remainder sign
1225 ; Correct dividend/quotient sign
1236 #endif /* defined (L_divmodpsi4) */
1252 /*******************************************************
1253 Division 32 / 32 => (result + remainder)
1254 *******************************************************/
1255 #define r_remHH r31 /* remainder High */
1258 #define r_remL r26 /* remainder Low */
1260 /* return: remainder */
1261 #define r_arg1HH r25 /* dividend High */
1262 #define r_arg1HL r24
1264 #define r_arg1L r22 /* dividend Low */
1266 /* return: quotient */
1267 #define r_arg2HH r21 /* divisor High */
1268 #define r_arg2HL r20
1270 #define r_arg2L r18 /* divisor Low */
1272 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1274 #if defined (L_udivmodsi4)
1276 ldi r_remL, 33 ; init loop counter
1279 sub r_remH,r_remH ; clear remainder and carry
1280 mov_l r_remHL, r_remL
1281 mov_h r_remHH, r_remH
1282 rjmp __udivmodsi4_ep ; jump to entry point
1284 rol r_remL ; shift dividend into remainder
1288 cp r_remL,r_arg2L ; compare remainder & divisor
1290 cpc r_remHL,r_arg2HL
1291 cpc r_remHH,r_arg2HH
1292 brcs __udivmodsi4_ep ; remainder <= divisor
1293 sub r_remL,r_arg2L ; restore remainder
1295 sbc r_remHL,r_arg2HL
1296 sbc r_remHH,r_arg2HH
1298 rol r_arg1L ; shift dividend (with CARRY)
1302 dec r_cnt ; decrement loop counter
1303 brne __udivmodsi4_loop
1304 ; __zero_reg__ now restored (r_cnt == 0)
1309 ; div/mod results to return registers, as for the ldiv() function
1310 mov_l r_arg2L, r_arg1L ; quotient
1311 mov_h r_arg2H, r_arg1H
1312 mov_l r_arg2HL, r_arg1HL
1313 mov_h r_arg2HH, r_arg1HH
1314 mov_l r_arg1L, r_remL ; remainder
1315 mov_h r_arg1H, r_remH
1316 mov_l r_arg1HL, r_remHL
1317 mov_h r_arg1HH, r_remHH
1320 #endif /* defined (L_udivmodsi4) */
1322 #if defined (L_divmodsi4)
1324 mov __tmp_reg__,r_arg2HH
1325 bst r_arg1HH,7 ; store sign of dividend
1327 com __tmp_reg__ ; r0.7 is sign of result
1328 rcall __divmodsi4_neg1 ; dividend negative: negate
1331 rcall __divmodsi4_neg2 ; divisor negative: negate
1332 XCALL __udivmodsi4 ; do the unsigned div/mod
1333 sbrc __tmp_reg__, 7 ; correct quotient sign
1334 rcall __divmodsi4_neg2
1335 brtc __divmodsi4_exit ; correct remainder sign
1337 ;; correct dividend/remainder sign
1347 ;; correct divisor/quotient sign
1358 #endif /* defined (L_divmodsi4) */
1374 /*******************************************************
1377 *******************************************************/
1379 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1380 ;; at least 16k of Program Memory. For smaller Devices, depend
1381 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1382 ;; Flash Size so that SP Size can be used to test for Flash Size.
1384 #if defined (__AVR_HAVE_JMP_CALL__)
1385 # define SPEED_DIV 8
1386 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1387 # define SPEED_DIV 16
1389 # define SPEED_DIV 0
1392 ;; A[0..7]: In: Dividend;
1393 ;; Out: Quotient (T = 0)
1394 ;; Out: Remainder (T = 1)
1404 ;; B[0..7]: In: Divisor; Out: Clobber
1414 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1424 ;; Holds Signs during Division Routine
1425 #define SS __tmp_reg__
1427 ;; Bit-Counter in Division Routine
1428 #define R_cnt __zero_reg__
1430 ;; Scratch Register for Negation
1433 #if defined (L_udivdi3)
1435 ;; R25:R18 = R24:R18 umod R17:R10
1436 ;; Ordinary ABI-Function
1440 rjmp __udivdi3_umoddi3
1443 ;; R25:R18 = R24:R18 udiv R17:R10
1444 ;; Ordinary ABI-Function
1450 DEFUN __udivdi3_umoddi3
1461 ENDF __udivdi3_umoddi3
1462 #endif /* L_udivdi3 */
1464 #if defined (L_udivmod64)
1466 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1467 ;; No Registers saved/restored; the Callers will take Care.
1468 ;; Preserves B[] and T-flag
1469 ;; T = 0: Compute Quotient in A[]
1470 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1474 ;; Clear Remainder (C6, C7 will follow)
1481 #if SPEED_DIV == 0 || SPEED_DIV == 16
1482 ;; Initialize Loop-Counter
1485 #endif /* SPEED_DIV */
1492 1: ;; Compare shifted Devidend against Divisor
1493 ;; If -- even after Shifting -- it is smaller...
1494 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1495 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1498 ;; ...then we can subtract it. Thus, it is legal to shift left
1499 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1500 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1501 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1502 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1508 ;; Shifted 64 Bits: A7 has traveled to C7
1510 ;; Divisor is greater than Dividend. We have:
1513 ;; Thus, we can return immediately
1516 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1519 ;; Push of A7 is not needed because C7 is still 0
1523 #elif SPEED_DIV == 16
1525 ;; Compare shifted Dividend against Divisor
1533 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1534 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1535 wmov C2,A6 $ wmov C0,A4
1536 wmov A6,A2 $ wmov A4,A0
1537 wmov A2,C6 $ wmov A0,C4
1539 ;; Set Bit Counter to 32
1543 #error SPEED_DIV = ?
1544 #endif /* SPEED_DIV */
1546 ;; The very Division + Remainder Routine
1548 3: ;; Left-shift Dividend...
1549 lsl A0 $ rol A1 $ rol A2 $ rol A3
1550 rol A4 $ rol A5 $ rol A6 $ rol A7
1552 ;; ...into Remainder
1553 rol C0 $ rol C1 $ rol C2 $ rol C3
1554 rol C4 $ rol C5 $ rol C6 $ rol C7
1556 ;; Compare Remainder and Divisor
1557 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1558 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1562 ;; Divisor fits into Remainder: Subtract it from Remainder...
1563 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1564 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1566 ;; ...and set according Bit in the upcoming Quotient
1567 ;; The Bit will travel to its final Position
1570 4: ;; This Bit is done
1573 ;; __zero_reg__ is 0 again
1575 ;; T = 0: We are fine with the Quotient in A[]
1576 ;; T = 1: Copy Remainder to A[]
1582 ;; Move the Sign of the Result to SS.7
1588 #endif /* L_udivmod64 */
1591 #if defined (L_divdi3)
1593 ;; R25:R18 = R24:R18 mod R17:R10
1594 ;; Ordinary ABI-Function
1598 rjmp __divdi3_moddi3
1601 ;; R25:R18 = R24:R18 div R17:R10
1602 ;; Ordinary ABI-Function
1608 DEFUN __divdi3_moddi3
1613 ;; Both Signs are 0: the following Complexitiy is not needed
1614 XJMP __udivdi3_umoddi3
1615 #endif /* SPEED_DIV */
1618 ;; Save 12 Registers: Y, 17...8
1619 ;; No Frame needed (X = 0)
1622 ldi r30, lo8(gs(1f))
1623 ldi r31, hi8(gs(1f))
1624 XJMP __prologue_saves__ + ((18 - 12) * 2)
1626 1: ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1627 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1630 ;; Adjust Dividend's Sign as needed
1632 ;; Compiling for Speed we know that at least one Sign must be < 0
1633 ;; Thus, if A[] >= 0 then we know B[] < 0
1637 #endif /* SPEED_DIV */
1641 ;; Adjust Divisor's Sign and SS.7 as needed
1648 com B4 $ com B5 $ com B6 $ com B7
1649 $ com B1 $ com B2 $ com B3
1651 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
1652 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
1654 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1657 ;; Adjust Result's Sign
1658 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1663 #endif /* __AVR_HAVE_JMP_CALL__ */
1666 4: ;; Epilogue: Restore the Z = 12 Registers and return
1668 #if defined (__AVR_HAVE_SPH__)
1672 #endif /* #SP = 8/16 */
1674 XJMP __epilogue_restores__ + ((18 - 12) * 2)
1676 ENDF __divdi3_moddi3
1682 #endif /* L_divdi3 */
1684 .section .text.libgcc, "ax", @progbits
1686 #define TT __tmp_reg__
1688 #if defined (L_adddi3)
1690 ;; (plus:DI (reg:DI 18)
1693 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
1694 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
1697 #endif /* L_adddi3 */
1699 #if defined (L_adddi3_s8)
1701 ;; (plus:DI (reg:DI 18)
1702 ;; (sign_extend:SI (reg:QI 26))))
1707 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
1708 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
1711 #endif /* L_adddi3_s8 */
1713 #if defined (L_subdi3)
1715 ;; (minus:DI (reg:DI 18)
1718 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
1719 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
1722 #endif /* L_subdi3 */
1724 #if defined (L_cmpdi2)
1726 ;; (compare (reg:DI 18)
1729 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
1730 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
1733 #endif /* L_cmpdi2 */
1735 #if defined (L_cmpdi2_s8)
1737 ;; (compare (reg:DI 18)
1738 ;; (sign_extend:SI (reg:QI 26))))
1743 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
1744 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
1747 #endif /* L_cmpdi2_s8 */
1749 #if defined (L_negdi2)
1752 com A4 $ com A5 $ com A6 $ com A7
1753 $ com A1 $ com A2 $ com A3
1755 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
1756 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
1760 #endif /* L_negdi2 */
1792 .section .text.libgcc.prologue, "ax", @progbits
1794 /**********************************
1795 * This is a prologue subroutine
1796 **********************************/
1797 #if defined (L_prologue)
1799 ;; This function does not clobber T-flag; 64-bit division relies on it
1800 DEFUN __prologue_saves__
1819 #if !defined (__AVR_HAVE_SPH__)
1824 #elif defined (__AVR_XMEGA__)
1836 in __tmp_reg__,__SREG__
1839 out __SREG__,__tmp_reg__
1841 #endif /* #SP = 8/16 */
1843 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1849 ENDF __prologue_saves__
1850 #endif /* defined (L_prologue) */
1853 * This is an epilogue subroutine
1855 #if defined (L_epilogue)
1857 DEFUN __epilogue_restores__
1875 #if !defined (__AVR_HAVE_SPH__)
1880 #elif defined (__AVR_XMEGA__)
1883 adc r29,__zero_reg__
1890 adc r29,__zero_reg__
1891 in __tmp_reg__,__SREG__
1894 out __SREG__,__tmp_reg__
1898 #endif /* #SP = 8/16 */
1900 ENDF __epilogue_restores__
1901 #endif /* defined (L_epilogue) */
1904 .section .fini9,"ax",@progbits
1910 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
1912 .section .fini0,"ax",@progbits
1916 #endif /* defined (L_exit) */
1924 #endif /* defined (L_cleanup) */
1927 .section .text.libgcc, "ax", @progbits
1930 DEFUN __tablejump2__
1937 #if defined (__AVR_HAVE_LPMX__)
1940 mov r30, __tmp_reg__
1941 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1947 #else /* !HAVE_LPMX */
1953 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1954 in __tmp_reg__, __EIND__
1958 #endif /* !HAVE_LPMX */
1960 #endif /* defined (L_tablejump) */
1963 .section .init4,"ax",@progbits
1964 DEFUN __do_copy_data
1965 #if defined(__AVR_HAVE_ELPMX__)
1966 ldi r17, hi8(__data_end)
1967 ldi r26, lo8(__data_start)
1968 ldi r27, hi8(__data_start)
1969 ldi r30, lo8(__data_load_start)
1970 ldi r31, hi8(__data_load_start)
1971 ldi r16, hh8(__data_load_start)
1973 rjmp .L__do_copy_data_start
1974 .L__do_copy_data_loop:
1977 .L__do_copy_data_start:
1978 cpi r26, lo8(__data_end)
1980 brne .L__do_copy_data_loop
1981 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
1982 ldi r17, hi8(__data_end)
1983 ldi r26, lo8(__data_start)
1984 ldi r27, hi8(__data_start)
1985 ldi r30, lo8(__data_load_start)
1986 ldi r31, hi8(__data_load_start)
1987 ldi r16, hh8(__data_load_start - 0x10000)
1988 .L__do_copy_data_carry:
1991 rjmp .L__do_copy_data_start
1992 .L__do_copy_data_loop:
1996 brcs .L__do_copy_data_carry
1997 .L__do_copy_data_start:
1998 cpi r26, lo8(__data_end)
2000 brne .L__do_copy_data_loop
2001 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2002 ldi r17, hi8(__data_end)
2003 ldi r26, lo8(__data_start)
2004 ldi r27, hi8(__data_start)
2005 ldi r30, lo8(__data_load_start)
2006 ldi r31, hi8(__data_load_start)
2007 rjmp .L__do_copy_data_start
2008 .L__do_copy_data_loop:
2009 #if defined (__AVR_HAVE_LPMX__)
2016 .L__do_copy_data_start:
2017 cpi r26, lo8(__data_end)
2019 brne .L__do_copy_data_loop
2020 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2021 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2022 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2023 out __RAMPZ__, __zero_reg__
2024 #endif /* ELPM && RAMPD */
2026 #endif /* L_copy_data */
2028 /* __do_clear_bss is only necessary if there is anything in .bss section. */
2031 .section .init4,"ax",@progbits
2032 DEFUN __do_clear_bss
2033 ldi r17, hi8(__bss_end)
2034 ldi r26, lo8(__bss_start)
2035 ldi r27, hi8(__bss_start)
2036 rjmp .do_clear_bss_start
2039 .do_clear_bss_start:
2040 cpi r26, lo8(__bss_end)
2042 brne .do_clear_bss_loop
2044 #endif /* L_clear_bss */
2046 /* __do_global_ctors and __do_global_dtors are only necessary
2047 if there are any constructors/destructors. */
2050 .section .init6,"ax",@progbits
2051 DEFUN __do_global_ctors
2052 #if defined(__AVR_HAVE_ELPM__)
2053 ldi r17, hi8(__ctors_start)
2054 ldi r28, lo8(__ctors_end)
2055 ldi r29, hi8(__ctors_end)
2056 ldi r16, hh8(__ctors_end)
2057 rjmp .L__do_global_ctors_start
2058 .L__do_global_ctors_loop:
2060 sbc r16, __zero_reg__
2064 XCALL __tablejump_elpm__
2065 .L__do_global_ctors_start:
2066 cpi r28, lo8(__ctors_start)
2068 ldi r24, hh8(__ctors_start)
2070 brne .L__do_global_ctors_loop
2072 ldi r17, hi8(__ctors_start)
2073 ldi r28, lo8(__ctors_end)
2074 ldi r29, hi8(__ctors_end)
2075 rjmp .L__do_global_ctors_start
2076 .L__do_global_ctors_loop:
2081 .L__do_global_ctors_start:
2082 cpi r28, lo8(__ctors_start)
2084 brne .L__do_global_ctors_loop
2085 #endif /* defined(__AVR_HAVE_ELPM__) */
2086 ENDF __do_global_ctors
2087 #endif /* L_ctors */
2090 .section .fini6,"ax",@progbits
2091 DEFUN __do_global_dtors
2092 #if defined(__AVR_HAVE_ELPM__)
2093 ldi r17, hi8(__dtors_end)
2094 ldi r28, lo8(__dtors_start)
2095 ldi r29, hi8(__dtors_start)
2096 ldi r16, hh8(__dtors_start)
2097 rjmp .L__do_global_dtors_start
2098 .L__do_global_dtors_loop:
2100 sbc r16, __zero_reg__
2104 XCALL __tablejump_elpm__
2105 .L__do_global_dtors_start:
2106 cpi r28, lo8(__dtors_end)
2108 ldi r24, hh8(__dtors_end)
2110 brne .L__do_global_dtors_loop
2112 ldi r17, hi8(__dtors_end)
2113 ldi r28, lo8(__dtors_start)
2114 ldi r29, hi8(__dtors_start)
2115 rjmp .L__do_global_dtors_start
2116 .L__do_global_dtors_loop:
2121 .L__do_global_dtors_start:
2122 cpi r28, lo8(__dtors_end)
2124 brne .L__do_global_dtors_loop
2125 #endif /* defined(__AVR_HAVE_ELPM__) */
2126 ENDF __do_global_dtors
2127 #endif /* L_dtors */
2129 .section .text.libgcc, "ax", @progbits
2131 #ifdef L_tablejump_elpm
2132 DEFUN __tablejump_elpm__
2133 #if defined (__AVR_HAVE_ELPMX__)
2134 elpm __tmp_reg__, Z+
2136 mov r30, __tmp_reg__
2137 #if defined (__AVR_HAVE_RAMPD__)
2138 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2139 out __RAMPZ__, __zero_reg__
2141 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2147 #elif defined (__AVR_HAVE_ELPM__)
2153 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2154 in __tmp_reg__, __EIND__
2159 ENDF __tablejump_elpm__
2160 #endif /* defined (L_tablejump_elpm) */
2162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2163 ;; Loading n bytes from Flash; n = 3,4
2164 ;; R22... = Flash[Z]
2165 ;; Clobbers: __tmp_reg__
2167 #if (defined (L_load_3) \
2168 || defined (L_load_4)) \
2169 && !defined (__AVR_HAVE_LPMX__)
2177 .macro .load dest, n
2180 .if \dest != D0+\n-1
2187 #if defined (L_load_3)
2194 #endif /* L_load_3 */
2196 #if defined (L_load_4)
2204 #endif /* L_load_4 */
2206 #endif /* L_load_3 || L_load_3 */
2208 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2209 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2210 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2211 ;; Clobbers: __tmp_reg__, R21, R30, R31
2213 #if (defined (L_xload_1) \
2214 || defined (L_xload_2) \
2215 || defined (L_xload_3) \
2216 || defined (L_xload_4))
2224 ;; Register containing bits 16+ of the address
2228 .macro .xload dest, n
2229 #if defined (__AVR_HAVE_ELPMX__)
2231 #elif defined (__AVR_HAVE_ELPM__)
2234 .if \dest != D0+\n-1
2236 adc HHI8, __zero_reg__
2239 #elif defined (__AVR_HAVE_LPMX__)
2244 .if \dest != D0+\n-1
2248 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2249 .if \dest == D0+\n-1
2250 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2251 out __RAMPZ__, __zero_reg__
2256 #if defined (L_xload_1)
2258 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2267 #if defined (__AVR_HAVE_ELPM__)
2269 #endif /* __AVR_HAVE_ELPM__ */
2274 #endif /* LPMx && ! ELPM */
2276 #endif /* L_xload_1 */
2278 #if defined (L_xload_2)
2282 #if defined (__AVR_HAVE_ELPM__)
2284 #endif /* __AVR_HAVE_ELPM__ */
2292 #endif /* L_xload_2 */
2294 #if defined (L_xload_3)
2298 #if defined (__AVR_HAVE_ELPM__)
2300 #endif /* __AVR_HAVE_ELPM__ */
2310 #endif /* L_xload_3 */
2312 #if defined (L_xload_4)
2316 #if defined (__AVR_HAVE_ELPM__)
2318 #endif /* __AVR_HAVE_ELPM__ */
2330 #endif /* L_xload_4 */
2332 #endif /* L_xload_{1|2|3|4} */
2334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2335 ;; memcopy from Address Space __pgmx to RAM
2336 ;; R23:Z = Source Address
2337 ;; X = Destination Address
2338 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2340 #if defined (L_movmemx)
2346 ;; #Bytes to copy fity in 8 Bits (1..255)
2347 ;; Zero-extend Loop Counter
2360 #if defined (__AVR_HAVE_ELPM__)
2364 0: ;; Load 1 Byte from Flash...
2366 #if defined (__AVR_HAVE_ELPMX__)
2368 #elif defined (__AVR_HAVE_ELPM__)
2371 adc HHI8, __zero_reg__
2373 #elif defined (__AVR_HAVE_LPMX__)
2380 ;; ...and store that Byte to RAM Destination
2384 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2385 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2386 out __RAMPZ__, __zero_reg__
2387 #endif /* ELPM && RAMPD */
2392 1: ;; Read 1 Byte from RAM...
2394 ;; and store that Byte to RAM Destination
2404 #endif /* L_movmemx */
2407 .section .text.libgcc.builtins, "ax", @progbits
2409 /**********************************
2410 * Find first set Bit (ffs)
2411 **********************************/
2413 #if defined (L_ffssi2)
2414 ;; find first set bit
2415 ;; r25:r24 = ffs32 (r25:r22)
2416 ;; clobbers: r22, r26
2434 #endif /* defined (L_ffssi2) */
2436 #if defined (L_ffshi2)
2437 ;; find first set bit
2438 ;; r25:r24 = ffs16 (r25:r24)
2442 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2443 ;; Some cores have problem skipping 2-word instruction
2447 cpse r24, __zero_reg__
2448 #endif /* __AVR_HAVE_JMP_CALL__ */
2449 1: XJMP __loop_ffsqi2
2455 #endif /* defined (L_ffshi2) */
2457 #if defined (L_loop_ffsqi2)
2458 ;; Helper for ffshi2, ffssi2
2459 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2470 #endif /* defined (L_loop_ffsqi2) */
2473 /**********************************
2474 * Count trailing Zeros (ctz)
2475 **********************************/
2477 #if defined (L_ctzsi2)
2478 ;; count trailing zeros
2479 ;; r25:r24 = ctz32 (r25:r22)
2480 ;; clobbers: r26, r22
2482 ;; Note that ctz(0) in undefined for GCC
2488 #endif /* defined (L_ctzsi2) */
2490 #if defined (L_ctzhi2)
2491 ;; count trailing zeros
2492 ;; r25:r24 = ctz16 (r25:r24)
2495 ;; Note that ctz(0) in undefined for GCC
2501 #endif /* defined (L_ctzhi2) */
2504 /**********************************
2505 * Count leading Zeros (clz)
2506 **********************************/
2508 #if defined (L_clzdi2)
2509 ;; count leading zeros
2510 ;; r25:r24 = clz64 (r25:r18)
2511 ;; clobbers: r22, r23, r26
2524 #endif /* defined (L_clzdi2) */
2526 #if defined (L_clzsi2)
2527 ;; count leading zeros
2528 ;; r25:r24 = clz32 (r25:r22)
2540 #endif /* defined (L_clzsi2) */
2542 #if defined (L_clzhi2)
2543 ;; count leading zeros
2544 ;; r25:r24 = clz16 (r25:r24)
2566 #endif /* defined (L_clzhi2) */
2569 /**********************************
2571 **********************************/
2573 #if defined (L_paritydi2)
2574 ;; r25:r24 = parity64 (r25:r18)
2575 ;; clobbers: __tmp_reg__
2583 #endif /* defined (L_paritydi2) */
2585 #if defined (L_paritysi2)
2586 ;; r25:r24 = parity32 (r25:r22)
2587 ;; clobbers: __tmp_reg__
2593 #endif /* defined (L_paritysi2) */
2595 #if defined (L_parityhi2)
2596 ;; r25:r24 = parity16 (r25:r24)
2597 ;; clobbers: __tmp_reg__
2603 ;; r25:r24 = parity8 (r24)
2604 ;; clobbers: __tmp_reg__
2606 ;; parity is in r24[0..7]
2607 mov __tmp_reg__, r24
2609 eor r24, __tmp_reg__
2610 ;; parity is in r24[0..3]
2614 ;; parity is in r24[0,3]
2617 ;; parity is in r24[0]
2622 #endif /* defined (L_parityhi2) */
2625 /**********************************
2627 **********************************/
2629 #if defined (L_popcounthi2)
2631 ;; r25:r24 = popcount16 (r25:r24)
2632 ;; clobbers: __tmp_reg__
2642 DEFUN __popcounthi2_tail
2644 add r24, __tmp_reg__
2646 ENDF __popcounthi2_tail
2647 #endif /* defined (L_popcounthi2) */
2649 #if defined (L_popcountsi2)
2651 ;; r25:r24 = popcount32 (r25:r22)
2652 ;; clobbers: __tmp_reg__
2659 XJMP __popcounthi2_tail
2661 #endif /* defined (L_popcountsi2) */
2663 #if defined (L_popcountdi2)
2665 ;; r25:r24 = popcount64 (r25:r18)
2666 ;; clobbers: r22, r23, __tmp_reg__
2675 XJMP __popcounthi2_tail
2677 #endif /* defined (L_popcountdi2) */
2679 #if defined (L_popcountqi2)
2681 ;; r24 = popcount8 (r24)
2682 ;; clobbers: __tmp_reg__
2684 mov __tmp_reg__, r24
2688 adc r24, __zero_reg__
2690 adc r24, __zero_reg__
2692 adc r24, __zero_reg__
2694 adc r24, __zero_reg__
2696 adc r24, __zero_reg__
2698 adc r24, __tmp_reg__
2701 #endif /* defined (L_popcountqi2) */
2704 /**********************************
2706 **********************************/
2708 ;; swap two registers with different register number
2715 #if defined (L_bswapsi2)
2717 ;; r25:r22 = bswap32 (r25:r22)
2723 #endif /* defined (L_bswapsi2) */
2725 #if defined (L_bswapdi2)
2727 ;; r25:r18 = bswap64 (r25:r18)
2735 #endif /* defined (L_bswapdi2) */
2738 /**********************************
2740 **********************************/
2742 #if defined (L_ashrdi3)
2743 ;; Arithmetic shift right
2744 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
2762 #endif /* defined (L_ashrdi3) */
2764 #if defined (L_lshrdi3)
2765 ;; Logic shift right
2766 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
2784 #endif /* defined (L_lshrdi3) */
2786 #if defined (L_ashldi3)
2788 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
2806 #endif /* defined (L_ashldi3) */
2808 #if defined (L_rotldi3)
2810 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
2823 adc r18, __zero_reg__
2829 #endif /* defined (L_rotldi3) */
2832 .section .text.libgcc.fmul, "ax", @progbits
2834 /***********************************************************/
2835 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
2836 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
2837 /***********************************************************/
2843 #define A0 __tmp_reg__
2846 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
2847 ;;; Clobbers: r24, r25, __tmp_reg__
2849 ;; A0.7 = negate result?
2857 #endif /* L_fmuls */
2860 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
2861 ;;; Clobbers: r24, r25, __tmp_reg__
2863 ;; A0.7 = negate result?
2868 ;; Helper for __fmuls and __fmulsu
2873 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2874 ;; Some cores have problem skipping 2-word instruction
2879 #endif /* __AVR_HAVE_JMP_CALL__ */
2882 ;; C = -C iff A0.7 = 1
2886 #endif /* L_fmulsu */
2890 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
2891 ;;; Clobbers: r24, r25, __tmp_reg__
2898 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
2919 #include "lib1funcs-fixed.S"