1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Denis Chertykov <chertykov@gmail.com>
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 #define __zero_reg__ r1
26 #define __tmp_reg__ r0
28 #if defined (__AVR_HAVE_SPH__)
32 #define __RAMPZ__ 0x3B
35 /* Most of the functions here are called directly from avr.md
36 patterns, instead of using the standard libcall mechanisms.
37 This can make better code because GCC knows exactly which
38 of the call-used registers (not all of them) are clobbered. */
40 /* FIXME: At present, there is no SORT directive in the linker
41 script so that we must not assume that different modules
42 in the same input section like .libgcc.text.mul will be
43 located close together. Therefore, we cannot use
44 RCALL/RJMP to call a function like __udivmodhi4 from
45 __divmodhi4 and have to use lengthy XCALL/XJMP even
46 though they are in the same input section and all same
47 input sections together are small enough to reach every
48 location with a RCALL/RJMP instruction. */
50 .macro mov_l r_dest, r_src
51 #if defined (__AVR_HAVE_MOVW__)
58 .macro mov_h r_dest, r_src
59 #if defined (__AVR_HAVE_MOVW__)
66 .macro wmov r_dest, r_src
67 #if defined (__AVR_HAVE_MOVW__)
71 mov \r_dest+1, \r_src+1
75 #if defined (__AVR_HAVE_JMP_CALL__)
102 ;; Negate a 2-byte value held in consecutive registers
109 ;; Negate a 4-byte value held in consecutive registers
110 ;; Sets the V flag for signed overflow tests if REG >= 16
122 adc \reg, __zero_reg__
123 adc \reg+1, __zero_reg__
124 adc \reg+2, __zero_reg__
125 adc \reg+3, __zero_reg__
129 #define exp_lo(N) hlo8 ((N) << 23)
130 #define exp_hi(N) hhi8 ((N) << 23)
133 .section .text.libgcc.mul, "ax", @progbits
135 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
136 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
137 #if !defined (__AVR_HAVE_MUL__)
138 /*******************************************************
139 Multiplication 8 x 8 without MUL
140 *******************************************************/
141 #if defined (L_mulqi3)
143 #define r_arg2 r22 /* multiplicand */
144 #define r_arg1 r24 /* multiplier */
145 #define r_res __tmp_reg__ /* result */
148 clr r_res ; clear result
152 add r_arg2,r_arg2 ; shift multiplicand
153 breq __mulqi3_exit ; while multiplicand != 0
155 brne __mulqi3_loop ; exit if multiplier = 0
157 mov r_arg1,r_res ; result to return register
165 #endif /* defined (L_mulqi3) */
168 /*******************************************************
169 Widening Multiplication 16 = 8 x 8 without MUL
170 Multiplication 16 x 16 without MUL
171 *******************************************************/
178 ;; Output overlaps input, thus expand result in CC0/1
181 #define CC0 __tmp_reg__
184 #if defined (L_umulqihi3)
185 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
186 ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
187 ;;; Clobbers: __tmp_reg__, R21..R23
193 #endif /* L_umulqihi3 */
195 #if defined (L_mulqihi3)
196 ;;; R25:R24 = (signed int) R22 * (signed int) R24
197 ;;; (C1:C0) = (signed int) A0 * (signed int) B0
198 ;;; Clobbers: __tmp_reg__, R20..R23
204 ;; The multiplication runs twice as fast if A1 is zero, thus:
207 #ifdef __AVR_HAVE_JMP_CALL__
208 ;; Store B0 * sign of A
213 #else /* have no CALL */
214 ;; Skip sign-extension of A if A >= 0
215 ;; Same size as with the first alternative but avoids errata skip
216 ;; and is faster if A >= 0
222 #endif /* HAVE_JMP_CALL */
223 ;; 1-extend A after the multiplication
227 #endif /* L_mulqihi3 */
229 #if defined (L_mulhi3)
230 ;;; R25:R24 = R23:R22 * R25:R24
231 ;;; (C1:C0) = (A1:A0) * (B1:B0)
232 ;;; Clobbers: __tmp_reg__, R21..R23
240 ;; Bit n of A is 1 --> C += B << n
247 ;; If B == 0 we are ready
251 ;; Carry = n-th bit of A
254 ;; If bit n of A is set, then go add B * 2^n to C
257 ;; Carry = 0 --> The ROR above acts like CP A0, 0
258 ;; Thus, it is sufficient to CPC the high part to test A against 0
260 ;; Only proceed if A != 0
263 ;; Move Result into place
268 #endif /* L_mulhi3 */
301 /*******************************************************
302 Widening Multiplication 32 = 16 x 16 without MUL
303 *******************************************************/
305 #if defined (L_umulhisi3)
315 #endif /* L_umulhisi3 */
317 #if defined (L_mulhisi3)
324 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
331 #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
332 ;; Zero-extend A and __mulsi3 will run at least twice as fast
333 ;; compared to a sign-extended A.
338 ;; If A < 0 then perform the B * 0xffff.... before the
339 ;; very multiplication by initializing the high part of the
340 ;; result CC with -B.
345 #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
347 #endif /* L_mulhisi3 */
350 /*******************************************************
351 Multiplication 32 x 32 without MUL
352 *******************************************************/
354 #if defined (L_mulsi3)
362 DEFUN __mulsi3_helper
367 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
369 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
372 lsl B0 $ rol B1 $ rol B2 $ rol B3
374 3: ;; A >>= 1: Carry = n-th bit of A
375 lsr A3 $ ror A2 $ ror A1 $ ror A0
378 ;; Only continue if A != 0
384 ;; All bits of A are consumed: Copy result to return register C
389 #endif /* L_mulsi3 */
408 #endif /* !defined (__AVR_HAVE_MUL__) */
409 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
411 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
412 #if defined (__AVR_HAVE_MUL__)
427 /*******************************************************
428 Widening Multiplication 32 = 16 x 16 with MUL
429 *******************************************************/
431 #if defined (L_mulhisi3)
432 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
433 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
434 ;;; Clobbers: __tmp_reg__
443 XJMP __usmulhisi3_tail
445 #endif /* L_mulhisi3 */
447 #if defined (L_usmulhisi3)
448 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
449 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
450 ;;; Clobbers: __tmp_reg__
456 DEFUN __usmulhisi3_tail
463 ENDF __usmulhisi3_tail
464 #endif /* L_usmulhisi3 */
466 #if defined (L_umulhisi3)
467 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
468 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
469 ;;; Clobbers: __tmp_reg__
476 #ifdef __AVR_HAVE_JMP_CALL__
477 ;; This function is used by many other routines, often multiple times.
478 ;; Therefore, if the flash size is not too limited, avoid the RCALL
479 ;; and inverst 6 Bytes to speed things up.
494 #endif /* L_umulhisi3 */
496 /*******************************************************
497 Widening Multiplication 32 = 16 x 32 with MUL
498 *******************************************************/
500 #if defined (L_mulshisi3)
501 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
502 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
503 ;;; Clobbers: __tmp_reg__
505 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
506 ;; Some cores have problem skipping 2-word instruction
511 #endif /* __AVR_HAVE_JMP_CALL__ */
516 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
517 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
518 ;;; Clobbers: __tmp_reg__
521 ;; One-extend R27:R26 (A1:A0)
526 #endif /* L_mulshisi3 */
528 #if defined (L_muluhisi3)
529 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
530 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
531 ;;; Clobbers: __tmp_reg__
544 #endif /* L_muluhisi3 */
546 /*******************************************************
547 Multiplication 32 x 32 with MUL
548 *******************************************************/
550 #if defined (L_mulsi3)
551 ;;; R25:R22 = R25:R22 * R21:R18
552 ;;; (C3:C0) = C3:C0 * B3:B0
553 ;;; Clobbers: R26, R27, __tmp_reg__
561 ;; A1:A0 now contains the high word of A
572 #endif /* L_mulsi3 */
587 #endif /* __AVR_HAVE_MUL__ */
589 /*******************************************************
590 Multiplication 24 x 24 with MUL
591 *******************************************************/
593 #if defined (L_mulpsi3)
595 ;; A[0..2]: In: Multiplicand; Out: Product
600 ;; B[0..2]: In: Multiplier
605 #if defined (__AVR_HAVE_MUL__)
607 ;; C[0..2]: Expand Result
612 ;; R24:R22 *= R20:R18
613 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
622 mul AA2, B0 $ add C2, r0
623 mul AA0, B2 $ add C2, r0
635 #else /* !HAVE_MUL */
637 ;; C[0..2]: Expand Result
642 ;; R24:R22 *= R20:R18
643 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
651 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
652 LSR B2 $ ror B1 $ ror B0
654 ;; If the N-th Bit of B[] was set...
657 ;; ...then add A[] * 2^N to the Result C[]
658 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
660 1: ;; Multiply A[] by 2
661 LSL A0 $ rol A1 $ rol A2
663 ;; Loop until B[] is 0
664 subi B0,0 $ sbci B1,0 $ sbci B2,0
667 ;; Copy C[] to the return Register A[]
679 #endif /* HAVE_MUL */
689 #endif /* L_mulpsi3 */
691 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
693 ;; A[0..2]: In: Multiplicand
698 ;; BB: In: Multiplier
706 ;; C[] = A[] * sign_extend (BB)
734 #endif /* L_mulsqipsi3 && HAVE_MUL */
736 /*******************************************************
737 Multiplication 64 x 64
738 *******************************************************/
740 #if defined (L_muldi3)
744 ;; A[0..7]: In: Multiplicand
755 ;; B[0..7]: In: Multiplier
765 #if defined (__AVR_HAVE_MUL__)
767 ;; Define C[] for convenience
768 ;; Notice that parts of C[] overlap A[] respective B[]
779 ;; R25:R18 *= R17:R10
780 ;; Ordinary ABI-Function
788 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
791 mul A7,B0 $ $ mov C7,r0
792 mul A0,B7 $ $ add C7,r0
793 mul A6,B1 $ $ add C7,r0
794 mul A6,B0 $ mov C6,r0 $ add C7,r1
795 mul B6,A1 $ $ add C7,r0
796 mul B6,A0 $ add C6,r0 $ adc C7,r1
799 mul A2,B4 $ add C6,r0 $ adc C7,r1
800 mul A3,B4 $ $ add C7,r0
801 mul A2,B5 $ $ add C7,r0
818 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
828 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
838 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
842 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
877 #else /* !HAVE_MUL */
891 ;; R25:R18 *= R17:R10
892 ;; Ordinary ABI-Function
908 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
909 ;; where N = 64 - Loop.
910 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
911 ;; B[] will have its initial Value again.
912 LSR B7 $ ror B6 $ ror B5 $ ror B4
913 ror B3 $ ror B2 $ ror B1 $ ror B0
915 ;; If the N-th Bit of B[] was set then...
917 ;; ...finish Rotation...
920 ;; ...and add A[] * 2^N to the Result C[]
921 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
922 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
924 1: ;; Multiply A[] by 2
925 LSL A0 $ rol A1 $ rol A2 $ rol A3
926 rol A4 $ rol A5 $ rol A6 $ rol A7
931 ;; We expanded the Result in C[]
932 ;; Copy Result to the Return Register A[]
956 #endif /* HAVE_MUL */
976 #endif /* L_muldi3 */
978 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
981 .section .text.libgcc.div, "ax", @progbits
983 /*******************************************************
984 Division 8 / 8 => (result + remainder)
985 *******************************************************/
986 #define r_rem r25 /* remainder */
987 #define r_arg1 r24 /* dividend, quotient */
988 #define r_arg2 r22 /* divisor */
989 #define r_cnt r23 /* loop count */
991 #if defined (L_udivmodqi4)
993 sub r_rem,r_rem ; clear remainder and carry
994 ldi r_cnt,9 ; init loop counter
995 rjmp __udivmodqi4_ep ; jump to entry point
997 rol r_rem ; shift dividend into remainder
998 cp r_rem,r_arg2 ; compare remainder & divisor
999 brcs __udivmodqi4_ep ; remainder <= divisor
1000 sub r_rem,r_arg2 ; restore remainder
1002 rol r_arg1 ; shift dividend (with CARRY)
1003 dec r_cnt ; decrement loop counter
1004 brne __udivmodqi4_loop
1005 com r_arg1 ; complement result
1006 ; because C flag was complemented in loop
1009 #endif /* defined (L_udivmodqi4) */
1011 #if defined (L_divmodqi4)
1013 bst r_arg1,7 ; store sign of dividend
1014 mov __tmp_reg__,r_arg1
1015 eor __tmp_reg__,r_arg2; r0.7 is sign of result
1017 neg r_arg1 ; dividend negative : negate
1019 neg r_arg2 ; divisor negative : negate
1020 XCALL __udivmodqi4 ; do the unsigned div/mod
1022 neg r_rem ; correct remainder sign
1025 neg r_arg1 ; correct result sign
1029 #endif /* defined (L_divmodqi4) */
1037 /*******************************************************
1038 Division 16 / 16 => (result + remainder)
1039 *******************************************************/
1040 #define r_remL r26 /* remainder Low */
1041 #define r_remH r27 /* remainder High */
1043 /* return: remainder */
1044 #define r_arg1L r24 /* dividend Low */
1045 #define r_arg1H r25 /* dividend High */
1047 /* return: quotient */
1048 #define r_arg2L r22 /* divisor Low */
1049 #define r_arg2H r23 /* divisor High */
1051 #define r_cnt r21 /* loop count */
1053 #if defined (L_udivmodhi4)
1056 sub r_remH,r_remH ; clear remainder and carry
1057 ldi r_cnt,17 ; init loop counter
1058 rjmp __udivmodhi4_ep ; jump to entry point
1060 rol r_remL ; shift dividend into remainder
1062 cp r_remL,r_arg2L ; compare remainder & divisor
1064 brcs __udivmodhi4_ep ; remainder < divisor
1065 sub r_remL,r_arg2L ; restore remainder
1068 rol r_arg1L ; shift dividend (with CARRY)
1070 dec r_cnt ; decrement loop counter
1071 brne __udivmodhi4_loop
1074 ; div/mod results to return registers, as for the div() function
1075 mov_l r_arg2L, r_arg1L ; quotient
1076 mov_h r_arg2H, r_arg1H
1077 mov_l r_arg1L, r_remL ; remainder
1078 mov_h r_arg1H, r_remH
1081 #endif /* defined (L_udivmodhi4) */
1083 #if defined (L_divmodhi4)
1087 bst r_arg1H,7 ; store sign of dividend
1088 mov __tmp_reg__,r_arg2H
1090 com __tmp_reg__ ; r0.7 is sign of result
1091 rcall __divmodhi4_neg1 ; dividend negative: negate
1094 rcall __divmodhi4_neg2 ; divisor negative: negate
1095 XCALL __udivmodhi4 ; do the unsigned div/mod
1097 rcall __divmodhi4_neg2 ; correct remainder sign
1098 brtc __divmodhi4_exit
1100 ;; correct dividend/remainder sign
1106 ;; correct divisor/result sign
1113 #endif /* defined (L_divmodhi4) */
1126 /*******************************************************
1127 Division 24 / 24 => (result + remainder)
1128 *******************************************************/
1130 ;; A[0..2]: In: Dividend; Out: Quotient
1135 ;; B[0..2]: In: Divisor; Out: Remainder
1140 ;; C[0..2]: Expand remainder
1141 #define C0 __zero_reg__
1148 #if defined (L_udivmodpsi4)
1149 ;; R24:R22 = R24:R22 udiv R20:R18
1150 ;; R20:R18 = R24:R22 umod R20:R18
1151 ;; Clobbers: R21, R25, R26
1156 ; Clear remainder and carry. C0 is already 0
1159 ; jump to entry point
1160 rjmp __udivmodpsi4_start
1162 ; shift dividend into remainder
1166 ; compare remainder & divisor
1170 brcs __udivmodpsi4_start ; remainder <= divisor
1171 sub C0, B0 ; restore remainder
1174 __udivmodpsi4_start:
1175 ; shift dividend (with CARRY)
1179 ; decrement loop counter
1181 brne __udivmodpsi4_loop
1185 ; div/mod results to return registers
1190 clr __zero_reg__ ; C0
1193 #endif /* defined (L_udivmodpsi4) */
1195 #if defined (L_divmodpsi4)
1196 ;; R24:R22 = R24:R22 div R20:R18
1197 ;; R20:R18 = R24:R22 mod R20:R18
1198 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1201 ; R0.7 will contain the sign of the result:
1202 ; R0.7 = A.sign ^ B.sign
1204 ; T-flag = sign of dividend
1208 ; Adjust dividend's sign
1209 rcall __divmodpsi4_negA
1211 ; Adjust divisor's sign
1213 rcall __divmodpsi4_negB
1215 ; Do the unsigned div/mod
1218 ; Adjust quotient's sign
1220 rcall __divmodpsi4_negA
1222 ; Adjust remainder's sign
1223 brtc __divmodpsi4_end
1226 ; Correct divisor/remainder sign
1234 ; Correct dividend/quotient sign
1245 #endif /* defined (L_divmodpsi4) */
1261 /*******************************************************
1262 Division 32 / 32 => (result + remainder)
1263 *******************************************************/
1264 #define r_remHH r31 /* remainder High */
1267 #define r_remL r26 /* remainder Low */
1269 /* return: remainder */
1270 #define r_arg1HH r25 /* dividend High */
1271 #define r_arg1HL r24
1273 #define r_arg1L r22 /* dividend Low */
1275 /* return: quotient */
1276 #define r_arg2HH r21 /* divisor High */
1277 #define r_arg2HL r20
1279 #define r_arg2L r18 /* divisor Low */
1281 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1283 #if defined (L_udivmodsi4)
1285 ldi r_remL, 33 ; init loop counter
1288 sub r_remH,r_remH ; clear remainder and carry
1289 mov_l r_remHL, r_remL
1290 mov_h r_remHH, r_remH
1291 rjmp __udivmodsi4_ep ; jump to entry point
1293 rol r_remL ; shift dividend into remainder
1297 cp r_remL,r_arg2L ; compare remainder & divisor
1299 cpc r_remHL,r_arg2HL
1300 cpc r_remHH,r_arg2HH
1301 brcs __udivmodsi4_ep ; remainder <= divisor
1302 sub r_remL,r_arg2L ; restore remainder
1304 sbc r_remHL,r_arg2HL
1305 sbc r_remHH,r_arg2HH
1307 rol r_arg1L ; shift dividend (with CARRY)
1311 dec r_cnt ; decrement loop counter
1312 brne __udivmodsi4_loop
1313 ; __zero_reg__ now restored (r_cnt == 0)
1318 ; div/mod results to return registers, as for the ldiv() function
1319 mov_l r_arg2L, r_arg1L ; quotient
1320 mov_h r_arg2H, r_arg1H
1321 mov_l r_arg2HL, r_arg1HL
1322 mov_h r_arg2HH, r_arg1HH
1323 mov_l r_arg1L, r_remL ; remainder
1324 mov_h r_arg1H, r_remH
1325 mov_l r_arg1HL, r_remHL
1326 mov_h r_arg1HH, r_remHH
1329 #endif /* defined (L_udivmodsi4) */
1331 #if defined (L_divmodsi4)
1333 mov __tmp_reg__,r_arg2HH
1334 bst r_arg1HH,7 ; store sign of dividend
1336 com __tmp_reg__ ; r0.7 is sign of result
1337 XCALL __negsi2 ; dividend negative: negate
1340 rcall __divmodsi4_neg2 ; divisor negative: negate
1341 XCALL __udivmodsi4 ; do the unsigned div/mod
1342 sbrc __tmp_reg__, 7 ; correct quotient sign
1343 rcall __divmodsi4_neg2
1344 brtc __divmodsi4_exit ; correct remainder sign
1347 ;; correct divisor/quotient sign
1358 #endif /* defined (L_divmodsi4) */
1360 #if defined (L_negsi2)
1362 ;; (neg:SI (reg:SI 22)))
1363 ;; Sets the V flag for signed overflow tests
1368 #endif /* L_negsi2 */
1384 /*******************************************************
1387 *******************************************************/
1389 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1390 ;; at least 16k of Program Memory. For smaller Devices, depend
1391 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1392 ;; Flash Size so that SP Size can be used to test for Flash Size.
1394 #if defined (__AVR_HAVE_JMP_CALL__)
1395 # define SPEED_DIV 8
1396 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1397 # define SPEED_DIV 16
1399 # define SPEED_DIV 0
1402 ;; A[0..7]: In: Dividend;
1403 ;; Out: Quotient (T = 0)
1404 ;; Out: Remainder (T = 1)
1414 ;; B[0..7]: In: Divisor; Out: Clobber
1424 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1434 ;; Holds Signs during Division Routine
1435 #define SS __tmp_reg__
1437 ;; Bit-Counter in Division Routine
1438 #define R_cnt __zero_reg__
1440 ;; Scratch Register for Negation
1443 #if defined (L_udivdi3)
1445 ;; R25:R18 = R24:R18 umod R17:R10
1446 ;; Ordinary ABI-Function
1450 rjmp __udivdi3_umoddi3
1453 ;; R25:R18 = R24:R18 udiv R17:R10
1454 ;; Ordinary ABI-Function
1460 DEFUN __udivdi3_umoddi3
1471 ENDF __udivdi3_umoddi3
1472 #endif /* L_udivdi3 */
1474 #if defined (L_udivmod64)
1476 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1477 ;; No Registers saved/restored; the Callers will take Care.
1478 ;; Preserves B[] and T-flag
1479 ;; T = 0: Compute Quotient in A[]
1480 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1484 ;; Clear Remainder (C6, C7 will follow)
1491 #if SPEED_DIV == 0 || SPEED_DIV == 16
1492 ;; Initialize Loop-Counter
1495 #endif /* SPEED_DIV */
1502 1: ;; Compare shifted Devidend against Divisor
1503 ;; If -- even after Shifting -- it is smaller...
1504 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1505 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1508 ;; ...then we can subtract it. Thus, it is legal to shift left
1509 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1510 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1511 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1512 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1518 ;; Shifted 64 Bits: A7 has traveled to C7
1520 ;; Divisor is greater than Dividend. We have:
1523 ;; Thus, we can return immediately
1526 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1529 ;; Push of A7 is not needed because C7 is still 0
1533 #elif SPEED_DIV == 16
1535 ;; Compare shifted Dividend against Divisor
1543 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1544 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1545 wmov C2,A6 $ wmov C0,A4
1546 wmov A6,A2 $ wmov A4,A0
1547 wmov A2,C6 $ wmov A0,C4
1549 ;; Set Bit Counter to 32
1553 #error SPEED_DIV = ?
1554 #endif /* SPEED_DIV */
1556 ;; The very Division + Remainder Routine
1558 3: ;; Left-shift Dividend...
1559 lsl A0 $ rol A1 $ rol A2 $ rol A3
1560 rol A4 $ rol A5 $ rol A6 $ rol A7
1562 ;; ...into Remainder
1563 rol C0 $ rol C1 $ rol C2 $ rol C3
1564 rol C4 $ rol C5 $ rol C6 $ rol C7
1566 ;; Compare Remainder and Divisor
1567 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1568 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1572 ;; Divisor fits into Remainder: Subtract it from Remainder...
1573 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1574 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1576 ;; ...and set according Bit in the upcoming Quotient
1577 ;; The Bit will travel to its final Position
1580 4: ;; This Bit is done
1583 ;; __zero_reg__ is 0 again
1585 ;; T = 0: We are fine with the Quotient in A[]
1586 ;; T = 1: Copy Remainder to A[]
1592 ;; Move the Sign of the Result to SS.7
1598 #endif /* L_udivmod64 */
1601 #if defined (L_divdi3)
1603 ;; R25:R18 = R24:R18 mod R17:R10
1604 ;; Ordinary ABI-Function
1608 rjmp __divdi3_moddi3
1611 ;; R25:R18 = R24:R18 div R17:R10
1612 ;; Ordinary ABI-Function
1618 DEFUN __divdi3_moddi3
1623 ;; Both Signs are 0: the following Complexitiy is not needed
1624 XJMP __udivdi3_umoddi3
1625 #endif /* SPEED_DIV */
1628 ;; Save 12 Registers: Y, 17...8
1629 ;; No Frame needed (X = 0)
1632 ldi r30, lo8(gs(1f))
1633 ldi r31, hi8(gs(1f))
1634 XJMP __prologue_saves__ + ((18 - 12) * 2)
1636 1: ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1637 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1640 ;; Adjust Dividend's Sign as needed
1642 ;; Compiling for Speed we know that at least one Sign must be < 0
1643 ;; Thus, if A[] >= 0 then we know B[] < 0
1647 #endif /* SPEED_DIV */
1651 ;; Adjust Divisor's Sign and SS.7 as needed
1658 com B4 $ com B5 $ com B6 $ com B7
1659 $ com B1 $ com B2 $ com B3
1661 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
1662 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
1664 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1667 ;; Adjust Result's Sign
1668 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1673 #endif /* __AVR_HAVE_JMP_CALL__ */
1676 4: ;; Epilogue: Restore the Z = 12 Registers and return
1678 #if defined (__AVR_HAVE_SPH__)
1682 #endif /* #SP = 8/16 */
1684 XJMP __epilogue_restores__ + ((18 - 12) * 2)
1686 ENDF __divdi3_moddi3
1692 #endif /* L_divdi3 */
1694 .section .text.libgcc, "ax", @progbits
1696 #define TT __tmp_reg__
1698 #if defined (L_adddi3)
1700 ;; (plus:DI (reg:DI 18)
1702 ;; Sets the V flag for signed overflow tests
1703 ;; Sets the C flag for unsigned overflow tests
1705 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
1706 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
1709 #endif /* L_adddi3 */
1711 #if defined (L_adddi3_s8)
1713 ;; (plus:DI (reg:DI 18)
1714 ;; (sign_extend:SI (reg:QI 26))))
1715 ;; Sets the V flag for signed overflow tests
1716 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
1721 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
1722 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
1725 #endif /* L_adddi3_s8 */
1727 #if defined (L_subdi3)
1729 ;; (minus:DI (reg:DI 18)
1731 ;; Sets the V flag for signed overflow tests
1732 ;; Sets the C flag for unsigned overflow tests
1734 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
1735 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
1738 #endif /* L_subdi3 */
1740 #if defined (L_cmpdi2)
1742 ;; (compare (reg:DI 18)
1745 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
1746 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
1749 #endif /* L_cmpdi2 */
1751 #if defined (L_cmpdi2_s8)
1753 ;; (compare (reg:DI 18)
1754 ;; (sign_extend:SI (reg:QI 26))))
1759 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
1760 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
1763 #endif /* L_cmpdi2_s8 */
1765 #if defined (L_negdi2)
1767 ;; (neg:DI (reg:DI 18)))
1768 ;; Sets the V flag for signed overflow tests
1771 com A4 $ com A5 $ com A6 $ com A7
1772 $ com A1 $ com A2 $ com A3
1774 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
1775 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
1779 #endif /* L_negdi2 */
1811 .section .text.libgcc.prologue, "ax", @progbits
1813 /**********************************
1814 * This is a prologue subroutine
1815 **********************************/
1816 #if defined (L_prologue)
1818 ;; This function does not clobber T-flag; 64-bit division relies on it
1819 DEFUN __prologue_saves__
1838 #if !defined (__AVR_HAVE_SPH__)
1843 #elif defined (__AVR_XMEGA__)
1855 in __tmp_reg__,__SREG__
1858 out __SREG__,__tmp_reg__
1860 #endif /* #SP = 8/16 */
1862 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1868 ENDF __prologue_saves__
1869 #endif /* defined (L_prologue) */
1872 * This is an epilogue subroutine
1874 #if defined (L_epilogue)
1876 DEFUN __epilogue_restores__
1894 #if !defined (__AVR_HAVE_SPH__)
1899 #elif defined (__AVR_XMEGA__)
1902 adc r29,__zero_reg__
1909 adc r29,__zero_reg__
1910 in __tmp_reg__,__SREG__
1913 out __SREG__,__tmp_reg__
1917 #endif /* #SP = 8/16 */
1919 ENDF __epilogue_restores__
1920 #endif /* defined (L_epilogue) */
1923 .section .fini9,"ax",@progbits
1929 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
1931 .section .fini0,"ax",@progbits
1935 #endif /* defined (L_exit) */
1943 #endif /* defined (L_cleanup) */
1946 .section .text.libgcc, "ax", @progbits
1949 DEFUN __tablejump2__
1956 #if defined (__AVR_HAVE_LPMX__)
1959 mov r30, __tmp_reg__
1960 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1966 #else /* !HAVE_LPMX */
1972 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1973 in __tmp_reg__, __EIND__
1977 #endif /* !HAVE_LPMX */
1979 #endif /* defined (L_tablejump) */
1982 .section .init4,"ax",@progbits
1983 DEFUN __do_copy_data
1984 #if defined(__AVR_HAVE_ELPMX__)
1985 ldi r17, hi8(__data_end)
1986 ldi r26, lo8(__data_start)
1987 ldi r27, hi8(__data_start)
1988 ldi r30, lo8(__data_load_start)
1989 ldi r31, hi8(__data_load_start)
1990 ldi r16, hh8(__data_load_start)
1992 rjmp .L__do_copy_data_start
1993 .L__do_copy_data_loop:
1996 .L__do_copy_data_start:
1997 cpi r26, lo8(__data_end)
1999 brne .L__do_copy_data_loop
2000 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2001 ldi r17, hi8(__data_end)
2002 ldi r26, lo8(__data_start)
2003 ldi r27, hi8(__data_start)
2004 ldi r30, lo8(__data_load_start)
2005 ldi r31, hi8(__data_load_start)
2006 ldi r16, hh8(__data_load_start - 0x10000)
2007 .L__do_copy_data_carry:
2010 rjmp .L__do_copy_data_start
2011 .L__do_copy_data_loop:
2015 brcs .L__do_copy_data_carry
2016 .L__do_copy_data_start:
2017 cpi r26, lo8(__data_end)
2019 brne .L__do_copy_data_loop
2020 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2021 ldi r17, hi8(__data_end)
2022 ldi r26, lo8(__data_start)
2023 ldi r27, hi8(__data_start)
2024 ldi r30, lo8(__data_load_start)
2025 ldi r31, hi8(__data_load_start)
2026 rjmp .L__do_copy_data_start
2027 .L__do_copy_data_loop:
2028 #if defined (__AVR_HAVE_LPMX__)
2035 .L__do_copy_data_start:
2036 cpi r26, lo8(__data_end)
2038 brne .L__do_copy_data_loop
2039 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2040 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2041 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2042 out __RAMPZ__, __zero_reg__
2043 #endif /* ELPM && RAMPD */
2045 #endif /* L_copy_data */
2047 /* __do_clear_bss is only necessary if there is anything in .bss section. */
2050 .section .init4,"ax",@progbits
2051 DEFUN __do_clear_bss
2052 ldi r17, hi8(__bss_end)
2053 ldi r26, lo8(__bss_start)
2054 ldi r27, hi8(__bss_start)
2055 rjmp .do_clear_bss_start
2058 .do_clear_bss_start:
2059 cpi r26, lo8(__bss_end)
2061 brne .do_clear_bss_loop
2063 #endif /* L_clear_bss */
2065 /* __do_global_ctors and __do_global_dtors are only necessary
2066 if there are any constructors/destructors. */
2069 .section .init6,"ax",@progbits
2070 DEFUN __do_global_ctors
2071 #if defined(__AVR_HAVE_ELPM__)
2072 ldi r17, hi8(__ctors_start)
2073 ldi r28, lo8(__ctors_end)
2074 ldi r29, hi8(__ctors_end)
2075 ldi r16, hh8(__ctors_end)
2076 rjmp .L__do_global_ctors_start
2077 .L__do_global_ctors_loop:
2079 sbc r16, __zero_reg__
2083 XCALL __tablejump_elpm__
2084 .L__do_global_ctors_start:
2085 cpi r28, lo8(__ctors_start)
2087 ldi r24, hh8(__ctors_start)
2089 brne .L__do_global_ctors_loop
2091 ldi r17, hi8(__ctors_start)
2092 ldi r28, lo8(__ctors_end)
2093 ldi r29, hi8(__ctors_end)
2094 rjmp .L__do_global_ctors_start
2095 .L__do_global_ctors_loop:
2100 .L__do_global_ctors_start:
2101 cpi r28, lo8(__ctors_start)
2103 brne .L__do_global_ctors_loop
2104 #endif /* defined(__AVR_HAVE_ELPM__) */
2105 ENDF __do_global_ctors
2106 #endif /* L_ctors */
2109 .section .fini6,"ax",@progbits
2110 DEFUN __do_global_dtors
2111 #if defined(__AVR_HAVE_ELPM__)
2112 ldi r17, hi8(__dtors_end)
2113 ldi r28, lo8(__dtors_start)
2114 ldi r29, hi8(__dtors_start)
2115 ldi r16, hh8(__dtors_start)
2116 rjmp .L__do_global_dtors_start
2117 .L__do_global_dtors_loop:
2119 sbc r16, __zero_reg__
2123 XCALL __tablejump_elpm__
2124 .L__do_global_dtors_start:
2125 cpi r28, lo8(__dtors_end)
2127 ldi r24, hh8(__dtors_end)
2129 brne .L__do_global_dtors_loop
2131 ldi r17, hi8(__dtors_end)
2132 ldi r28, lo8(__dtors_start)
2133 ldi r29, hi8(__dtors_start)
2134 rjmp .L__do_global_dtors_start
2135 .L__do_global_dtors_loop:
2140 .L__do_global_dtors_start:
2141 cpi r28, lo8(__dtors_end)
2143 brne .L__do_global_dtors_loop
2144 #endif /* defined(__AVR_HAVE_ELPM__) */
2145 ENDF __do_global_dtors
2146 #endif /* L_dtors */
2148 .section .text.libgcc, "ax", @progbits
2150 #ifdef L_tablejump_elpm
2151 DEFUN __tablejump_elpm__
2152 #if defined (__AVR_HAVE_ELPMX__)
2153 elpm __tmp_reg__, Z+
2155 mov r30, __tmp_reg__
2156 #if defined (__AVR_HAVE_RAMPD__)
2157 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2158 out __RAMPZ__, __zero_reg__
2160 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2166 #elif defined (__AVR_HAVE_ELPM__)
2172 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2173 in __tmp_reg__, __EIND__
2178 ENDF __tablejump_elpm__
2179 #endif /* defined (L_tablejump_elpm) */
2181 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2182 ;; Loading n bytes from Flash; n = 3,4
2183 ;; R22... = Flash[Z]
2184 ;; Clobbers: __tmp_reg__
2186 #if (defined (L_load_3) \
2187 || defined (L_load_4)) \
2188 && !defined (__AVR_HAVE_LPMX__)
2196 .macro .load dest, n
2199 .if \dest != D0+\n-1
2206 #if defined (L_load_3)
2213 #endif /* L_load_3 */
2215 #if defined (L_load_4)
2223 #endif /* L_load_4 */
2225 #endif /* L_load_3 || L_load_3 */
2227 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2228 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2229 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2230 ;; Clobbers: __tmp_reg__, R21, R30, R31
2232 #if (defined (L_xload_1) \
2233 || defined (L_xload_2) \
2234 || defined (L_xload_3) \
2235 || defined (L_xload_4))
2243 ;; Register containing bits 16+ of the address
2247 .macro .xload dest, n
2248 #if defined (__AVR_HAVE_ELPMX__)
2250 #elif defined (__AVR_HAVE_ELPM__)
2253 .if \dest != D0+\n-1
2255 adc HHI8, __zero_reg__
2258 #elif defined (__AVR_HAVE_LPMX__)
2263 .if \dest != D0+\n-1
2267 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2268 .if \dest == D0+\n-1
2269 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2270 out __RAMPZ__, __zero_reg__
2275 #if defined (L_xload_1)
2277 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2286 #if defined (__AVR_HAVE_ELPM__)
2288 #endif /* __AVR_HAVE_ELPM__ */
2293 #endif /* LPMx && ! ELPM */
2295 #endif /* L_xload_1 */
2297 #if defined (L_xload_2)
2301 #if defined (__AVR_HAVE_ELPM__)
2303 #endif /* __AVR_HAVE_ELPM__ */
2311 #endif /* L_xload_2 */
2313 #if defined (L_xload_3)
2317 #if defined (__AVR_HAVE_ELPM__)
2319 #endif /* __AVR_HAVE_ELPM__ */
2329 #endif /* L_xload_3 */
2331 #if defined (L_xload_4)
2335 #if defined (__AVR_HAVE_ELPM__)
2337 #endif /* __AVR_HAVE_ELPM__ */
2349 #endif /* L_xload_4 */
2351 #endif /* L_xload_{1|2|3|4} */
2353 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2354 ;; memcopy from Address Space __pgmx to RAM
2355 ;; R23:Z = Source Address
2356 ;; X = Destination Address
2357 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2359 #if defined (L_movmemx)
2365 ;; #Bytes to copy fity in 8 Bits (1..255)
2366 ;; Zero-extend Loop Counter
2379 #if defined (__AVR_HAVE_ELPM__)
2383 0: ;; Load 1 Byte from Flash...
2385 #if defined (__AVR_HAVE_ELPMX__)
2387 #elif defined (__AVR_HAVE_ELPM__)
2390 adc HHI8, __zero_reg__
2392 #elif defined (__AVR_HAVE_LPMX__)
2399 ;; ...and store that Byte to RAM Destination
2403 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2404 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2405 out __RAMPZ__, __zero_reg__
2406 #endif /* ELPM && RAMPD */
2411 1: ;; Read 1 Byte from RAM...
2413 ;; and store that Byte to RAM Destination
2423 #endif /* L_movmemx */
2426 .section .text.libgcc.builtins, "ax", @progbits
2428 /**********************************
2429 * Find first set Bit (ffs)
2430 **********************************/
2432 #if defined (L_ffssi2)
2433 ;; find first set bit
2434 ;; r25:r24 = ffs32 (r25:r22)
2435 ;; clobbers: r22, r26
2453 #endif /* defined (L_ffssi2) */
2455 #if defined (L_ffshi2)
2456 ;; find first set bit
2457 ;; r25:r24 = ffs16 (r25:r24)
2461 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2462 ;; Some cores have problem skipping 2-word instruction
2466 cpse r24, __zero_reg__
2467 #endif /* __AVR_HAVE_JMP_CALL__ */
2468 1: XJMP __loop_ffsqi2
2474 #endif /* defined (L_ffshi2) */
2476 #if defined (L_loop_ffsqi2)
2477 ;; Helper for ffshi2, ffssi2
2478 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2489 #endif /* defined (L_loop_ffsqi2) */
2492 /**********************************
2493 * Count trailing Zeros (ctz)
2494 **********************************/
2496 #if defined (L_ctzsi2)
2497 ;; count trailing zeros
2498 ;; r25:r24 = ctz32 (r25:r22)
2499 ;; clobbers: r26, r22
2501 ;; Note that ctz(0) in undefined for GCC
2507 #endif /* defined (L_ctzsi2) */
2509 #if defined (L_ctzhi2)
2510 ;; count trailing zeros
2511 ;; r25:r24 = ctz16 (r25:r24)
2514 ;; Note that ctz(0) in undefined for GCC
2520 #endif /* defined (L_ctzhi2) */
2523 /**********************************
2524 * Count leading Zeros (clz)
2525 **********************************/
2527 #if defined (L_clzdi2)
2528 ;; count leading zeros
2529 ;; r25:r24 = clz64 (r25:r18)
2530 ;; clobbers: r22, r23, r26
2543 #endif /* defined (L_clzdi2) */
2545 #if defined (L_clzsi2)
2546 ;; count leading zeros
2547 ;; r25:r24 = clz32 (r25:r22)
2559 #endif /* defined (L_clzsi2) */
2561 #if defined (L_clzhi2)
2562 ;; count leading zeros
2563 ;; r25:r24 = clz16 (r25:r24)
2585 #endif /* defined (L_clzhi2) */
2588 /**********************************
2590 **********************************/
2592 #if defined (L_paritydi2)
2593 ;; r25:r24 = parity64 (r25:r18)
2594 ;; clobbers: __tmp_reg__
2602 #endif /* defined (L_paritydi2) */
2604 #if defined (L_paritysi2)
2605 ;; r25:r24 = parity32 (r25:r22)
2606 ;; clobbers: __tmp_reg__
2612 #endif /* defined (L_paritysi2) */
2614 #if defined (L_parityhi2)
2615 ;; r25:r24 = parity16 (r25:r24)
2616 ;; clobbers: __tmp_reg__
2622 ;; r25:r24 = parity8 (r24)
2623 ;; clobbers: __tmp_reg__
2625 ;; parity is in r24[0..7]
2626 mov __tmp_reg__, r24
2628 eor r24, __tmp_reg__
2629 ;; parity is in r24[0..3]
2633 ;; parity is in r24[0,3]
2636 ;; parity is in r24[0]
2641 #endif /* defined (L_parityhi2) */
2644 /**********************************
2646 **********************************/
2648 #if defined (L_popcounthi2)
2650 ;; r25:r24 = popcount16 (r25:r24)
2651 ;; clobbers: __tmp_reg__
2661 DEFUN __popcounthi2_tail
2663 add r24, __tmp_reg__
2665 ENDF __popcounthi2_tail
2666 #endif /* defined (L_popcounthi2) */
2668 #if defined (L_popcountsi2)
2670 ;; r25:r24 = popcount32 (r25:r22)
2671 ;; clobbers: __tmp_reg__
2678 XJMP __popcounthi2_tail
2680 #endif /* defined (L_popcountsi2) */
2682 #if defined (L_popcountdi2)
2684 ;; r25:r24 = popcount64 (r25:r18)
2685 ;; clobbers: r22, r23, __tmp_reg__
2694 XJMP __popcounthi2_tail
2696 #endif /* defined (L_popcountdi2) */
2698 #if defined (L_popcountqi2)
2700 ;; r24 = popcount8 (r24)
2701 ;; clobbers: __tmp_reg__
2703 mov __tmp_reg__, r24
2707 adc r24, __zero_reg__
2709 adc r24, __zero_reg__
2711 adc r24, __zero_reg__
2713 adc r24, __zero_reg__
2715 adc r24, __zero_reg__
2717 adc r24, __tmp_reg__
2720 #endif /* defined (L_popcountqi2) */
2723 /**********************************
2725 **********************************/
2727 ;; swap two registers with different register number
2734 #if defined (L_bswapsi2)
2736 ;; r25:r22 = bswap32 (r25:r22)
2742 #endif /* defined (L_bswapsi2) */
2744 #if defined (L_bswapdi2)
2746 ;; r25:r18 = bswap64 (r25:r18)
2754 #endif /* defined (L_bswapdi2) */
2757 /**********************************
2759 **********************************/
2761 #if defined (L_ashrdi3)
2762 ;; Arithmetic shift right
2763 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
2781 #endif /* defined (L_ashrdi3) */
2783 #if defined (L_lshrdi3)
2784 ;; Logic shift right
2785 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
2803 #endif /* defined (L_lshrdi3) */
2805 #if defined (L_ashldi3)
2807 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
2825 #endif /* defined (L_ashldi3) */
2827 #if defined (L_rotldi3)
2829 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
2842 adc r18, __zero_reg__
2848 #endif /* defined (L_rotldi3) */
2851 .section .text.libgcc.fmul, "ax", @progbits
2853 /***********************************************************/
2854 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
2855 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
2856 /***********************************************************/
2862 #define A0 __tmp_reg__
2865 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
2866 ;;; Clobbers: r24, r25, __tmp_reg__
2868 ;; A0.7 = negate result?
2876 #endif /* L_fmuls */
2879 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
2880 ;;; Clobbers: r24, r25, __tmp_reg__
2882 ;; A0.7 = negate result?
2887 ;; Helper for __fmuls and __fmulsu
2892 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2893 ;; Some cores have problem skipping 2-word instruction
2898 #endif /* __AVR_HAVE_JMP_CALL__ */
2901 ;; C = -C iff A0.7 = 1
2905 #endif /* L_fmulsu */
2909 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
2910 ;;; Clobbers: r24, r25, __tmp_reg__
2917 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
2938 #include "lib1funcs-fixed.S"