1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998-2017 Free Software Foundation, Inc.
3 Contributed by Denis Chertykov <chertykov@gmail.com>
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #if defined (__AVR_TINY__)
25 #define __zero_reg__ r17
26 #define __tmp_reg__ r16
28 #define __zero_reg__ r1
29 #define __tmp_reg__ r0
32 #if defined (__AVR_HAVE_SPH__)
36 #define __RAMPZ__ 0x3B
39 /* Most of the functions here are called directly from avr.md
40 patterns, instead of using the standard libcall mechanisms.
41 This can make better code because GCC knows exactly which
42 of the call-used registers (not all of them) are clobbered. */
44 /* FIXME: At present, there is no SORT directive in the linker
45 script so that we must not assume that different modules
46 in the same input section like .libgcc.text.mul will be
47 located close together. Therefore, we cannot use
48 RCALL/RJMP to call a function like __udivmodhi4 from
49 __divmodhi4 and have to use lengthy XCALL/XJMP even
50 though they are in the same input section and all same
51 input sections together are small enough to reach every
52 location with a RCALL/RJMP instruction. */
54 #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55 #error device not supported
58 .macro mov_l r_dest, r_src
59 #if defined (__AVR_HAVE_MOVW__)
66 .macro mov_h r_dest, r_src
67 #if defined (__AVR_HAVE_MOVW__)
74 .macro wmov r_dest, r_src
75 #if defined (__AVR_HAVE_MOVW__)
79 mov \r_dest+1, \r_src+1
83 #if defined (__AVR_HAVE_JMP_CALL__)
91 #if defined (__AVR_HAVE_EIJMP_EICALL__)
101 .macro do_prologue_saves n_pushed n_frame=0
102 ldi r26, lo8(\n_frame)
103 ldi r27, hi8(\n_frame)
104 ldi r30, lo8(gs(.L_prologue_saves.\@))
105 ldi r31, hi8(gs(.L_prologue_saves.\@))
106 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107 .L_prologue_saves.\@:
112 .macro do_epilogue_restores n_pushed n_frame=0
114 #ifdef __AVR_HAVE_SPH__
117 subi r28, lo8(-\n_frame)
118 sbci r29, hi8(-\n_frame)
125 subi r28, lo8(-\n_frame)
127 #endif /* HAVE SPH */
129 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
132 ;; Support function entry and exit for convenience
134 .macro wsubi r_arg1, i_arg2
135 #if defined (__AVR_TINY__)
136 subi \r_arg1, lo8(\i_arg2)
137 sbci \r_arg1+1, hi8(\i_arg2)
139 sbiw \r_arg1, \i_arg2
143 .macro waddi r_arg1, i_arg2
144 #if defined (__AVR_TINY__)
145 subi \r_arg1, lo8(-\i_arg2)
146 sbci \r_arg1+1, hi8(-\i_arg2)
148 adiw \r_arg1, \i_arg2
171 ;; Skip next instruction, typically a jump target
172 #if defined(__AVR_TINY__)
173 #define skip cpse 0,0
175 #define skip cpse 16,16
178 ;; Negate a 2-byte value held in consecutive registers
185 ;; Negate a 4-byte value held in consecutive registers
186 ;; Sets the V flag for signed overflow tests if REG >= 16
198 adc \reg, __zero_reg__
199 adc \reg+1, __zero_reg__
200 adc \reg+2, __zero_reg__
201 adc \reg+3, __zero_reg__
205 #define exp_lo(N) hlo8 ((N) << 23)
206 #define exp_hi(N) hhi8 ((N) << 23)
209 .section .text.libgcc.mul, "ax", @progbits
211 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
212 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
213 #if !defined (__AVR_HAVE_MUL__)
214 /*******************************************************
215 Multiplication 8 x 8 without MUL
216 *******************************************************/
217 #if defined (L_mulqi3)
219 #define r_arg2 r22 /* multiplicand */
220 #define r_arg1 r24 /* multiplier */
221 #define r_res __tmp_reg__ /* result */
224 clr r_res ; clear result
228 add r_arg2,r_arg2 ; shift multiplicand
229 breq __mulqi3_exit ; while multiplicand != 0
231 brne __mulqi3_loop ; exit if multiplier = 0
233 mov r_arg1,r_res ; result to return register
241 #endif /* defined (L_mulqi3) */
244 /*******************************************************
245 Widening Multiplication 16 = 8 x 8 without MUL
246 Multiplication 16 x 16 without MUL
247 *******************************************************/
254 ;; Output overlaps input, thus expand result in CC0/1
257 #define CC0 __tmp_reg__
260 #if defined (L_umulqihi3)
261 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
262 ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
263 ;;; Clobbers: __tmp_reg__, R21..R23
269 #endif /* L_umulqihi3 */
271 #if defined (L_mulqihi3)
272 ;;; R25:R24 = (signed int) R22 * (signed int) R24
273 ;;; (C1:C0) = (signed int) A0 * (signed int) B0
274 ;;; Clobbers: __tmp_reg__, R20..R23
280 ;; The multiplication runs twice as fast if A1 is zero, thus:
283 #ifdef __AVR_HAVE_JMP_CALL__
284 ;; Store B0 * sign of A
289 #else /* have no CALL */
290 ;; Skip sign-extension of A if A >= 0
291 ;; Same size as with the first alternative but avoids errata skip
292 ;; and is faster if A >= 0
298 #endif /* HAVE_JMP_CALL */
299 ;; 1-extend A after the multiplication
303 #endif /* L_mulqihi3 */
305 #if defined (L_mulhi3)
306 ;;; R25:R24 = R23:R22 * R25:R24
307 ;;; (C1:C0) = (A1:A0) * (B1:B0)
308 ;;; Clobbers: __tmp_reg__, R21..R23
316 ;; Bit n of A is 1 --> C += B << n
323 ;; If B == 0 we are ready
327 ;; Carry = n-th bit of A
330 ;; If bit n of A is set, then go add B * 2^n to C
333 ;; Carry = 0 --> The ROR above acts like CP A0, 0
334 ;; Thus, it is sufficient to CPC the high part to test A against 0
336 ;; Only proceed if A != 0
339 ;; Move Result into place
344 #endif /* L_mulhi3 */
377 /*******************************************************
378 Widening Multiplication 32 = 16 x 16 without MUL
379 *******************************************************/
381 #if defined (L_umulhisi3)
391 #endif /* L_umulhisi3 */
393 #if defined (L_mulhisi3)
400 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
407 #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
408 ;; Zero-extend A and __mulsi3 will run at least twice as fast
409 ;; compared to a sign-extended A.
414 ;; If A < 0 then perform the B * 0xffff.... before the
415 ;; very multiplication by initializing the high part of the
416 ;; result CC with -B.
421 #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
423 #endif /* L_mulhisi3 */
426 /*******************************************************
427 Multiplication 32 x 32 without MUL
428 *******************************************************/
430 #if defined (L_mulsi3)
432 #if defined (__AVR_TINY__)
433 in r26, __SP_L__ ; safe to use X, as it is CC0/CC1
435 subi r26, lo8(-3) ; Add 3 to point past return address
437 push B0 ; save callee saved regs
439 ld B0, X+ ; load from caller stack
450 DEFUN __mulsi3_helper
455 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
457 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
460 lsl B0 $ rol B1 $ rol B2 $ rol B3
462 3: ;; A >>= 1: Carry = n-th bit of A
463 lsr A3 $ ror A2 $ ror A1 $ ror A0
466 ;; Only continue if A != 0
472 ;; All bits of A are consumed: Copy result to return register C
475 #if defined (__AVR_TINY__)
476 pop B1 ; restore callee saved regs
478 #endif /* defined (__AVR_TINY__) */
482 #endif /* L_mulsi3 */
501 #endif /* !defined (__AVR_HAVE_MUL__) */
502 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
504 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505 #if defined (__AVR_HAVE_MUL__)
520 /*******************************************************
521 Widening Multiplication 32 = 16 x 16 with MUL
522 *******************************************************/
524 #if defined (L_mulhisi3)
525 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
526 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
527 ;;; Clobbers: __tmp_reg__
536 XJMP __usmulhisi3_tail
538 #endif /* L_mulhisi3 */
540 #if defined (L_usmulhisi3)
541 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
542 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
543 ;;; Clobbers: __tmp_reg__
549 DEFUN __usmulhisi3_tail
556 ENDF __usmulhisi3_tail
557 #endif /* L_usmulhisi3 */
559 #if defined (L_umulhisi3)
560 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
561 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
562 ;;; Clobbers: __tmp_reg__
569 #ifdef __AVR_HAVE_JMP_CALL__
570 ;; This function is used by many other routines, often multiple times.
571 ;; Therefore, if the flash size is not too limited, avoid the RCALL
572 ;; and inverst 6 Bytes to speed things up.
587 #endif /* L_umulhisi3 */
589 /*******************************************************
590 Widening Multiplication 32 = 16 x 32 with MUL
591 *******************************************************/
593 #if defined (L_mulshisi3)
594 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
595 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
596 ;;; Clobbers: __tmp_reg__
598 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
599 ;; Some cores have problem skipping 2-word instruction
604 #endif /* __AVR_HAVE_JMP_CALL__ */
609 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
610 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
611 ;;; Clobbers: __tmp_reg__
614 ;; One-extend R27:R26 (A1:A0)
619 #endif /* L_mulshisi3 */
621 #if defined (L_muluhisi3)
622 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
623 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
624 ;;; Clobbers: __tmp_reg__
637 #endif /* L_muluhisi3 */
639 /*******************************************************
640 Multiplication 32 x 32 with MUL
641 *******************************************************/
643 #if defined (L_mulsi3)
644 ;;; R25:R22 = R25:R22 * R21:R18
645 ;;; (C3:C0) = C3:C0 * B3:B0
646 ;;; Clobbers: R26, R27, __tmp_reg__
654 ;; A1:A0 now contains the high word of A
665 #endif /* L_mulsi3 */
680 #endif /* __AVR_HAVE_MUL__ */
682 /*******************************************************
683 Multiplication 24 x 24 with MUL
684 *******************************************************/
686 #if defined (L_mulpsi3)
688 ;; A[0..2]: In: Multiplicand; Out: Product
693 ;; B[0..2]: In: Multiplier
698 #if defined (__AVR_HAVE_MUL__)
700 ;; C[0..2]: Expand Result
705 ;; R24:R22 *= R20:R18
706 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
715 mul AA2, B0 $ add C2, r0
716 mul AA0, B2 $ add C2, r0
728 #else /* !HAVE_MUL */
729 ;; C[0..2]: Expand Result
730 #if defined (__AVR_TINY__)
734 #endif /* defined (__AVR_TINY__) */
738 ;; R24:R22 *= R20:R18
739 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
742 #if defined (__AVR_TINY__)
745 subi r26, lo8(-3) ; Add 3 to point past return address
747 push B0 ; save callee saved regs
749 ld B0,X+ ; load from caller stack
752 #endif /* defined (__AVR_TINY__) */
758 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
759 LSR B2 $ ror B1 $ ror B0
761 ;; If the N-th Bit of B[] was set...
764 ;; ...then add A[] * 2^N to the Result C[]
765 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
767 1: ;; Multiply A[] by 2
768 LSL A0 $ rol A1 $ rol A2
770 ;; Loop until B[] is 0
771 subi B0,0 $ sbci B1,0 $ sbci B2,0
774 ;; Copy C[] to the return Register A[]
779 #if defined (__AVR_TINY__)
782 #endif /* (__AVR_TINY__) */
790 #endif /* HAVE_MUL */
800 #endif /* L_mulpsi3 */
802 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
804 ;; A[0..2]: In: Multiplicand
809 ;; BB: In: Multiplier
817 ;; C[] = A[] * sign_extend (BB)
845 #endif /* L_mulsqipsi3 && HAVE_MUL */
847 /*******************************************************
848 Multiplication 64 x 64
849 *******************************************************/
853 ;; A[0..7]: In: Multiplicand
864 ;; B[0..7]: In: Multiplier
875 #if defined (__AVR_HAVE_MUL__)
876 ;; Define C[] for convenience
877 ;; Notice that parts of C[] overlap A[] respective B[]
887 #if defined (L_muldi3)
890 ;; R25:R18 *= R17:R10
891 ;; Ordinary ABI-Function
899 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
902 mul A7,B0 $ $ mov C7,r0
903 mul A0,B7 $ $ add C7,r0
904 mul A6,B1 $ $ add C7,r0
905 mul A6,B0 $ mov C6,r0 $ add C7,r1
906 mul B6,A1 $ $ add C7,r0
907 mul B6,A0 $ add C6,r0 $ adc C7,r1
910 mul A2,B4 $ add C6,r0 $ adc C7,r1
911 mul A3,B4 $ $ add C7,r0
912 mul A2,B5 $ $ add C7,r0
929 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
939 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
949 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
953 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
967 #endif /* L_muldi3 */
969 #if defined (L_muldi3_6)
970 ;; A helper for some 64-bit multiplications with MUL available
982 #endif /* L_muldi3_6 */
993 #else /* !HAVE_MUL */
995 #if defined (L_muldi3)
1009 ;; R25:R18 *= R17:R10
1010 ;; Ordinary ABI-Function
1026 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1027 ;; where N = 64 - Loop.
1028 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1029 ;; B[] will have its initial Value again.
1030 LSR B7 $ ror B6 $ ror B5 $ ror B4
1031 ror B3 $ ror B2 $ ror B1 $ ror B0
1033 ;; If the N-th Bit of B[] was set then...
1035 ;; ...finish Rotation...
1038 ;; ...and add A[] * 2^N to the Result C[]
1039 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
1040 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
1042 1: ;; Multiply A[] by 2
1043 LSL A0 $ rol A1 $ rol A2 $ rol A3
1044 rol A4 $ rol A5 $ rol A6 $ rol A7
1049 ;; We expanded the Result in C[]
1050 ;; Copy Result to the Return Register A[]
1074 #endif /* L_muldi3 */
1075 #endif /* HAVE_MUL */
1076 #endif /* if not __AVR_TINY__ */
1096 /*******************************************************
1097 Widening Multiplication 64 = 32 x 32 with MUL
1098 *******************************************************/
1100 #if defined (__AVR_HAVE_MUL__)
1120 #if defined (L_umulsidi3)
1122 ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1124 ;; R18[8] = R22[4] * R18[4]
1126 ;; Ordinary ABI Function, but additionally sets
1127 ;; X = R20[2] = B2[2]
1128 ;; Z = R22[2] = A0[2]
1134 DEFUN __umulsidi3_helper
1135 push 29 $ push 28 ; Y
1137 ;; Counting in Words, we have to perform 4 Multiplications
1141 push 23 $ push 22 ; C0
1145 push 27 $ push 26 ; A0
1146 push 19 $ push 18 ; B2
1148 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1149 ;; B2 C2 -- -- -- B0 A2
1153 ;; Sign-extend A. T holds the sign of A
1155 ;; Subtract B from the high part of the result
1160 0: wmov 18, 28 ;; B0
1164 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1165 ;; B0 C2 -- -- A2 C4 C6
1170 pop 26 $ pop 27 ;; B2
1171 pop 18 $ pop 19 ;; A0
1174 ;; Move result C into place and save A0 in Z
1181 pop 28 $ pop 29 ;; Y
1183 ENDF __umulsidi3_helper
1184 #endif /* L_umulsidi3 */
1187 #if defined (L_mulsidi3)
1189 ;; Signed widening 64 = 32 * 32 Multiplication
1191 ;; R18[8] = R22[4] * R18[4]
1192 ;; Ordinary ABI Function
1195 sbrs B3, 7 ; Enhanced core has no skip bug
1196 XJMP __umulsidi3_helper
1198 ;; B needs sign-extension
1201 XCALL __umulsidi3_helper
1211 #endif /* L_mulsidi3 */
1229 #endif /* HAVE_MUL */
1231 /**********************************************************
1232 Widening Multiplication 64 = 32 x 32 without MUL
1233 **********************************************************/
1234 #ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1235 #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1266 ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1268 ;; R18[8] = R22[4] * R18[4]
1269 ;; Ordinary ABI Function
1278 ;; Save 10 Registers: R10..R17, R28, R29
1279 do_prologue_saves 10
1282 ;; Move B into place...
1291 ;; Move A into place...
1301 do_epilogue_restores 10
1329 #endif /* L_mulsidi3 && !HAVE_MUL */
1330 #endif /* if not __AVR_TINY__ */
1331 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1334 .section .text.libgcc.div, "ax", @progbits
1336 /*******************************************************
1337 Division 8 / 8 => (result + remainder)
1338 *******************************************************/
1339 #define r_rem r25 /* remainder */
1340 #define r_arg1 r24 /* dividend, quotient */
1341 #define r_arg2 r22 /* divisor */
1342 #define r_cnt r23 /* loop count */
1344 #if defined (L_udivmodqi4)
1346 sub r_rem,r_rem ; clear remainder and carry
1347 ldi r_cnt,9 ; init loop counter
1348 rjmp __udivmodqi4_ep ; jump to entry point
1350 rol r_rem ; shift dividend into remainder
1351 cp r_rem,r_arg2 ; compare remainder & divisor
1352 brcs __udivmodqi4_ep ; remainder <= divisor
1353 sub r_rem,r_arg2 ; restore remainder
1355 rol r_arg1 ; shift dividend (with CARRY)
1356 dec r_cnt ; decrement loop counter
1357 brne __udivmodqi4_loop
1358 com r_arg1 ; complement result
1359 ; because C flag was complemented in loop
1362 #endif /* defined (L_udivmodqi4) */
1364 #if defined (L_divmodqi4)
1366 bst r_arg1,7 ; store sign of dividend
1367 mov __tmp_reg__,r_arg1
1368 eor __tmp_reg__,r_arg2; r0.7 is sign of result
1370 neg r_arg1 ; dividend negative : negate
1372 neg r_arg2 ; divisor negative : negate
1373 XCALL __udivmodqi4 ; do the unsigned div/mod
1375 neg r_rem ; correct remainder sign
1378 neg r_arg1 ; correct result sign
1382 #endif /* defined (L_divmodqi4) */
1390 /*******************************************************
1391 Division 16 / 16 => (result + remainder)
1392 *******************************************************/
1393 #define r_remL r26 /* remainder Low */
1394 #define r_remH r27 /* remainder High */
1396 /* return: remainder */
1397 #define r_arg1L r24 /* dividend Low */
1398 #define r_arg1H r25 /* dividend High */
1400 /* return: quotient */
1401 #define r_arg2L r22 /* divisor Low */
1402 #define r_arg2H r23 /* divisor High */
1404 #define r_cnt r21 /* loop count */
1406 #if defined (L_udivmodhi4)
1409 sub r_remH,r_remH ; clear remainder and carry
1410 ldi r_cnt,17 ; init loop counter
1411 rjmp __udivmodhi4_ep ; jump to entry point
1413 rol r_remL ; shift dividend into remainder
1415 cp r_remL,r_arg2L ; compare remainder & divisor
1417 brcs __udivmodhi4_ep ; remainder < divisor
1418 sub r_remL,r_arg2L ; restore remainder
1421 rol r_arg1L ; shift dividend (with CARRY)
1423 dec r_cnt ; decrement loop counter
1424 brne __udivmodhi4_loop
1427 ; div/mod results to return registers, as for the div() function
1428 mov_l r_arg2L, r_arg1L ; quotient
1429 mov_h r_arg2H, r_arg1H
1430 mov_l r_arg1L, r_remL ; remainder
1431 mov_h r_arg1H, r_remH
1434 #endif /* defined (L_udivmodhi4) */
1436 #if defined (L_divmodhi4)
1440 bst r_arg1H,7 ; store sign of dividend
1441 mov __tmp_reg__,r_arg2H
1443 com __tmp_reg__ ; r0.7 is sign of result
1444 rcall __divmodhi4_neg1 ; dividend negative: negate
1447 rcall __divmodhi4_neg2 ; divisor negative: negate
1448 XCALL __udivmodhi4 ; do the unsigned div/mod
1450 rcall __divmodhi4_neg2 ; correct remainder sign
1451 brtc __divmodhi4_exit
1453 ;; correct dividend/remainder sign
1459 ;; correct divisor/result sign
1466 #endif /* defined (L_divmodhi4) */
1479 /*******************************************************
1480 Division 24 / 24 => (result + remainder)
1481 *******************************************************/
1483 ;; A[0..2]: In: Dividend; Out: Quotient
1488 ;; B[0..2]: In: Divisor; Out: Remainder
1493 ;; C[0..2]: Expand remainder
1494 #define C0 __zero_reg__
1501 #if defined (L_udivmodpsi4)
1502 ;; R24:R22 = R24:R24 udiv R20:R18
1503 ;; R20:R18 = R24:R22 umod R20:R18
1504 ;; Clobbers: R21, R25, R26
1509 ; Clear remainder and carry. C0 is already 0
1512 ; jump to entry point
1513 rjmp __udivmodpsi4_start
1515 ; shift dividend into remainder
1519 ; compare remainder & divisor
1523 brcs __udivmodpsi4_start ; remainder <= divisor
1524 sub C0, B0 ; restore remainder
1527 __udivmodpsi4_start:
1528 ; shift dividend (with CARRY)
1532 ; decrement loop counter
1534 brne __udivmodpsi4_loop
1538 ; div/mod results to return registers
1543 clr __zero_reg__ ; C0
1546 #endif /* defined (L_udivmodpsi4) */
1548 #if defined (L_divmodpsi4)
1549 ;; R24:R22 = R24:R22 div R20:R18
1550 ;; R20:R18 = R24:R22 mod R20:R18
1551 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1554 ; R0.7 will contain the sign of the result:
1555 ; R0.7 = A.sign ^ B.sign
1557 ; T-flag = sign of dividend
1561 ; Adjust dividend's sign
1562 rcall __divmodpsi4_negA
1564 ; Adjust divisor's sign
1566 rcall __divmodpsi4_negB
1568 ; Do the unsigned div/mod
1571 ; Adjust quotient's sign
1573 rcall __divmodpsi4_negA
1575 ; Adjust remainder's sign
1576 brtc __divmodpsi4_end
1579 ; Correct divisor/remainder sign
1587 ; Correct dividend/quotient sign
1598 #endif /* defined (L_divmodpsi4) */
1614 /*******************************************************
1615 Division 32 / 32 => (result + remainder)
1616 *******************************************************/
1617 #define r_remHH r31 /* remainder High */
1620 #define r_remL r26 /* remainder Low */
1622 /* return: remainder */
1623 #define r_arg1HH r25 /* dividend High */
1624 #define r_arg1HL r24
1626 #define r_arg1L r22 /* dividend Low */
1628 /* return: quotient */
1629 #define r_arg2HH r21 /* divisor High */
1630 #define r_arg2HL r20
1632 #define r_arg2L r18 /* divisor Low */
1634 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1636 #if defined (L_udivmodsi4)
1638 ldi r_remL, 33 ; init loop counter
1641 sub r_remH,r_remH ; clear remainder and carry
1642 mov_l r_remHL, r_remL
1643 mov_h r_remHH, r_remH
1644 rjmp __udivmodsi4_ep ; jump to entry point
1646 rol r_remL ; shift dividend into remainder
1650 cp r_remL,r_arg2L ; compare remainder & divisor
1652 cpc r_remHL,r_arg2HL
1653 cpc r_remHH,r_arg2HH
1654 brcs __udivmodsi4_ep ; remainder <= divisor
1655 sub r_remL,r_arg2L ; restore remainder
1657 sbc r_remHL,r_arg2HL
1658 sbc r_remHH,r_arg2HH
1660 rol r_arg1L ; shift dividend (with CARRY)
1664 dec r_cnt ; decrement loop counter
1665 brne __udivmodsi4_loop
1666 ; __zero_reg__ now restored (r_cnt == 0)
1671 ; div/mod results to return registers, as for the ldiv() function
1672 mov_l r_arg2L, r_arg1L ; quotient
1673 mov_h r_arg2H, r_arg1H
1674 mov_l r_arg2HL, r_arg1HL
1675 mov_h r_arg2HH, r_arg1HH
1676 mov_l r_arg1L, r_remL ; remainder
1677 mov_h r_arg1H, r_remH
1678 mov_l r_arg1HL, r_remHL
1679 mov_h r_arg1HH, r_remHH
1682 #endif /* defined (L_udivmodsi4) */
1684 #if defined (L_divmodsi4)
1686 mov __tmp_reg__,r_arg2HH
1687 bst r_arg1HH,7 ; store sign of dividend
1689 com __tmp_reg__ ; r0.7 is sign of result
1690 XCALL __negsi2 ; dividend negative: negate
1693 rcall __divmodsi4_neg2 ; divisor negative: negate
1694 XCALL __udivmodsi4 ; do the unsigned div/mod
1695 sbrc __tmp_reg__, 7 ; correct quotient sign
1696 rcall __divmodsi4_neg2
1697 brtc __divmodsi4_exit ; correct remainder sign
1700 ;; correct divisor/quotient sign
1711 #endif /* defined (L_divmodsi4) */
1713 #if defined (L_negsi2)
1715 ;; (neg:SI (reg:SI 22)))
1716 ;; Sets the V flag for signed overflow tests
1721 #endif /* L_negsi2 */
1737 /* *di routines use registers below R19 and won't work with tiny arch
1740 #if !defined (__AVR_TINY__)
1741 /*******************************************************
1744 *******************************************************/
1746 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1747 ;; at least 16k of Program Memory. For smaller Devices, depend
1748 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1749 ;; Flash Size so that SP Size can be used to test for Flash Size.
1751 #if defined (__AVR_HAVE_JMP_CALL__)
1752 # define SPEED_DIV 8
1753 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1754 # define SPEED_DIV 16
1756 # define SPEED_DIV 0
1759 ;; A[0..7]: In: Dividend;
1760 ;; Out: Quotient (T = 0)
1761 ;; Out: Remainder (T = 1)
1771 ;; B[0..7]: In: Divisor; Out: Clobber
1781 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1791 ;; Holds Signs during Division Routine
1792 #define SS __tmp_reg__
1794 ;; Bit-Counter in Division Routine
1795 #define R_cnt __zero_reg__
1797 ;; Scratch Register for Negation
1800 #if defined (L_udivdi3)
1802 ;; R25:R18 = R24:R18 umod R17:R10
1803 ;; Ordinary ABI-Function
1807 rjmp __udivdi3_umoddi3
1810 ;; R25:R18 = R24:R18 udiv R17:R10
1811 ;; Ordinary ABI-Function
1817 DEFUN __udivdi3_umoddi3
1828 ENDF __udivdi3_umoddi3
1829 #endif /* L_udivdi3 */
1831 #if defined (L_udivmod64)
1833 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1834 ;; No Registers saved/restored; the Callers will take Care.
1835 ;; Preserves B[] and T-flag
1836 ;; T = 0: Compute Quotient in A[]
1837 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1841 ;; Clear Remainder (C6, C7 will follow)
1848 #if SPEED_DIV == 0 || SPEED_DIV == 16
1849 ;; Initialize Loop-Counter
1852 #endif /* SPEED_DIV */
1859 1: ;; Compare shifted Devidend against Divisor
1860 ;; If -- even after Shifting -- it is smaller...
1861 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1862 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1865 ;; ...then we can subtract it. Thus, it is legal to shift left
1866 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1867 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1868 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1869 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1875 ;; Shifted 64 Bits: A7 has traveled to C7
1877 ;; Divisor is greater than Dividend. We have:
1880 ;; Thus, we can return immediately
1883 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1886 ;; Push of A7 is not needed because C7 is still 0
1890 #elif SPEED_DIV == 16
1892 ;; Compare shifted Dividend against Divisor
1900 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1901 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1902 wmov C2,A6 $ wmov C0,A4
1903 wmov A6,A2 $ wmov A4,A0
1904 wmov A2,C6 $ wmov A0,C4
1906 ;; Set Bit Counter to 32
1910 #error SPEED_DIV = ?
1911 #endif /* SPEED_DIV */
1913 ;; The very Division + Remainder Routine
1915 3: ;; Left-shift Dividend...
1916 lsl A0 $ rol A1 $ rol A2 $ rol A3
1917 rol A4 $ rol A5 $ rol A6 $ rol A7
1919 ;; ...into Remainder
1920 rol C0 $ rol C1 $ rol C2 $ rol C3
1921 rol C4 $ rol C5 $ rol C6 $ rol C7
1923 ;; Compare Remainder and Divisor
1924 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1925 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1929 ;; Divisor fits into Remainder: Subtract it from Remainder...
1930 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1931 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1933 ;; ...and set according Bit in the upcoming Quotient
1934 ;; The Bit will travel to its final Position
1937 4: ;; This Bit is done
1940 ;; __zero_reg__ is 0 again
1942 ;; T = 0: We are fine with the Quotient in A[]
1943 ;; T = 1: Copy Remainder to A[]
1949 ;; Move the Sign of the Result to SS.7
1955 #endif /* L_udivmod64 */
1958 #if defined (L_divdi3)
1960 ;; R25:R18 = R24:R18 mod R17:R10
1961 ;; Ordinary ABI-Function
1965 rjmp __divdi3_moddi3
1968 ;; R25:R18 = R24:R18 div R17:R10
1969 ;; Ordinary ABI-Function
1975 DEFUN __divdi3_moddi3
1980 ;; Both Signs are 0: the following Complexitiy is not needed
1981 XJMP __udivdi3_umoddi3
1982 #endif /* SPEED_DIV */
1985 ;; Save 12 Registers: Y, 17...8
1987 do_prologue_saves 12
1989 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1990 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1993 ;; Adjust Dividend's Sign as needed
1995 ;; Compiling for Speed we know that at least one Sign must be < 0
1996 ;; Thus, if A[] >= 0 then we know B[] < 0
2000 #endif /* SPEED_DIV */
2004 ;; Adjust Divisor's Sign and SS.7 as needed
2011 com B4 $ com B5 $ com B6 $ com B7
2012 $ com B1 $ com B2 $ com B3
2014 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
2015 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
2017 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2020 ;; Adjust Result's Sign
2021 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2026 #endif /* __AVR_HAVE_JMP_CALL__ */
2029 4: ;; Epilogue: Restore 12 Registers and return
2030 do_epilogue_restores 12
2032 ENDF __divdi3_moddi3
2034 #endif /* L_divdi3 */
2040 .section .text.libgcc, "ax", @progbits
2042 #define TT __tmp_reg__
2044 #if defined (L_adddi3)
2046 ;; (plus:DI (reg:DI 18)
2048 ;; Sets the V flag for signed overflow tests
2049 ;; Sets the C flag for unsigned overflow tests
2051 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
2052 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
2055 #endif /* L_adddi3 */
2057 #if defined (L_adddi3_s8)
2059 ;; (plus:DI (reg:DI 18)
2060 ;; (sign_extend:SI (reg:QI 26))))
2061 ;; Sets the V flag for signed overflow tests
2062 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2067 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
2068 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
2071 #endif /* L_adddi3_s8 */
2073 #if defined (L_subdi3)
2075 ;; (minus:DI (reg:DI 18)
2077 ;; Sets the V flag for signed overflow tests
2078 ;; Sets the C flag for unsigned overflow tests
2080 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2081 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2084 #endif /* L_subdi3 */
2086 #if defined (L_cmpdi2)
2088 ;; (compare (reg:DI 18)
2091 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2092 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2095 #endif /* L_cmpdi2 */
2097 #if defined (L_cmpdi2_s8)
2099 ;; (compare (reg:DI 18)
2100 ;; (sign_extend:SI (reg:QI 26))))
2105 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2106 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2109 #endif /* L_cmpdi2_s8 */
2111 #if defined (L_negdi2)
2113 ;; (neg:DI (reg:DI 18)))
2114 ;; Sets the V flag for signed overflow tests
2117 com A4 $ com A5 $ com A6 $ com A7
2118 $ com A1 $ com A2 $ com A3
2120 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2121 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2125 #endif /* L_negdi2 */
2156 #endif /* !defined (__AVR_TINY__) */
2159 .section .text.libgcc.prologue, "ax", @progbits
2161 /**********************************
2162 * This is a prologue subroutine
2163 **********************************/
2164 #if !defined (__AVR_TINY__)
2165 #if defined (L_prologue)
2167 ;; This function does not clobber T-flag; 64-bit division relies on it
2168 DEFUN __prologue_saves__
2187 #if !defined (__AVR_HAVE_SPH__)
2192 #elif defined (__AVR_XMEGA__)
2204 in __tmp_reg__,__SREG__
2207 out __SREG__,__tmp_reg__
2209 #endif /* #SP = 8/16 */
2213 ENDF __prologue_saves__
2214 #endif /* defined (L_prologue) */
2217 * This is an epilogue subroutine
2219 #if defined (L_epilogue)
2221 DEFUN __epilogue_restores__
2239 #if !defined (__AVR_HAVE_SPH__)
2244 #elif defined (__AVR_XMEGA__)
2247 adc r29,__zero_reg__
2254 adc r29,__zero_reg__
2255 in __tmp_reg__,__SREG__
2258 out __SREG__,__tmp_reg__
2262 #endif /* #SP = 8/16 */
2264 ENDF __epilogue_restores__
2265 #endif /* defined (L_epilogue) */
2266 #endif /* !defined (__AVR_TINY__) */
2269 .section .fini9,"ax",@progbits
2275 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2277 .section .fini0,"ax",@progbits
2281 #endif /* defined (L_exit) */
2289 #endif /* defined (L_cleanup) */
2292 .section .text.libgcc, "ax", @progbits
2295 DEFUN __tablejump2__
2298 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2299 ;; Word address of gs() jumptable entry in R24:Z
2302 #elif defined (__AVR_HAVE_ELPM__)
2303 ;; Word address of jumptable entry in Z
2306 out __RAMPZ__, __tmp_reg__
2309 ;; Read word address from jumptable and jump
2311 #if defined (__AVR_HAVE_ELPMX__)
2312 elpm __tmp_reg__, Z+
2314 mov r30, __tmp_reg__
2315 #ifdef __AVR_HAVE_RAMPD__
2316 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2317 out __RAMPZ__, __zero_reg__
2320 #elif defined (__AVR_HAVE_ELPM__)
2327 #elif defined (__AVR_HAVE_LPMX__)
2330 mov r30, __tmp_reg__
2332 #elif defined (__AVR_TINY__)
2333 wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2335 ld r31, Z ; Use ld instead of lpm to load Z
2336 mov r30, __tmp_reg__
2347 #endif /* L_tablejump2 */
2349 #if defined(__AVR_TINY__)
2351 .section .init4,"ax",@progbits
2352 .global __do_copy_data
2354 ldi r18, hi8(__data_end)
2355 ldi r26, lo8(__data_start)
2356 ldi r27, hi8(__data_start)
2357 ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2358 ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2359 rjmp .L__do_copy_data_start
2360 .L__do_copy_data_loop:
2363 .L__do_copy_data_start:
2364 cpi r26, lo8(__data_end)
2366 brne .L__do_copy_data_loop
2370 .section .init4,"ax",@progbits
2371 DEFUN __do_copy_data
2372 #if defined(__AVR_HAVE_ELPMX__)
2373 ldi r17, hi8(__data_end)
2374 ldi r26, lo8(__data_start)
2375 ldi r27, hi8(__data_start)
2376 ldi r30, lo8(__data_load_start)
2377 ldi r31, hi8(__data_load_start)
2378 ldi r16, hh8(__data_load_start)
2380 rjmp .L__do_copy_data_start
2381 .L__do_copy_data_loop:
2384 .L__do_copy_data_start:
2385 cpi r26, lo8(__data_end)
2387 brne .L__do_copy_data_loop
2388 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2389 ldi r17, hi8(__data_end)
2390 ldi r26, lo8(__data_start)
2391 ldi r27, hi8(__data_start)
2392 ldi r30, lo8(__data_load_start)
2393 ldi r31, hi8(__data_load_start)
2394 ldi r16, hh8(__data_load_start - 0x10000)
2395 .L__do_copy_data_carry:
2398 rjmp .L__do_copy_data_start
2399 .L__do_copy_data_loop:
2403 brcs .L__do_copy_data_carry
2404 .L__do_copy_data_start:
2405 cpi r26, lo8(__data_end)
2407 brne .L__do_copy_data_loop
2408 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2409 ldi r17, hi8(__data_end)
2410 ldi r26, lo8(__data_start)
2411 ldi r27, hi8(__data_start)
2412 ldi r30, lo8(__data_load_start)
2413 ldi r31, hi8(__data_load_start)
2414 rjmp .L__do_copy_data_start
2415 .L__do_copy_data_loop:
2416 #if defined (__AVR_HAVE_LPMX__)
2423 .L__do_copy_data_start:
2424 cpi r26, lo8(__data_end)
2426 brne .L__do_copy_data_loop
2427 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2428 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2429 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2430 out __RAMPZ__, __zero_reg__
2431 #endif /* ELPM && RAMPD */
2433 #endif /* L_copy_data */
2434 #endif /* !defined (__AVR_TINY__) */
2436 /* __do_clear_bss is only necessary if there is anything in .bss section. */
2439 .section .init4,"ax",@progbits
2440 DEFUN __do_clear_bss
2441 ldi r18, hi8(__bss_end)
2442 ldi r26, lo8(__bss_start)
2443 ldi r27, hi8(__bss_start)
2444 rjmp .do_clear_bss_start
2447 .do_clear_bss_start:
2448 cpi r26, lo8(__bss_end)
2450 brne .do_clear_bss_loop
2452 #endif /* L_clear_bss */
2454 /* __do_global_ctors and __do_global_dtors are only necessary
2455 if there are any constructors/destructors. */
2457 #if defined(__AVR_TINY__)
2458 #define cdtors_tst_reg r18
2460 #define cdtors_tst_reg r17
2464 .section .init6,"ax",@progbits
2465 DEFUN __do_global_ctors
2466 ldi cdtors_tst_reg, pm_hi8(__ctors_start)
2467 ldi r28, pm_lo8(__ctors_end)
2468 ldi r29, pm_hi8(__ctors_end)
2469 #ifdef __AVR_HAVE_EIJMP_EICALL__
2470 ldi r16, pm_hh8(__ctors_end)
2471 #endif /* HAVE_EIJMP */
2472 rjmp .L__do_global_ctors_start
2473 .L__do_global_ctors_loop:
2475 #ifdef __AVR_HAVE_EIJMP_EICALL__
2476 sbc r16, __zero_reg__
2478 #endif /* HAVE_EIJMP */
2481 XCALL __tablejump2__
2482 .L__do_global_ctors_start:
2483 cpi r28, pm_lo8(__ctors_start)
2484 cpc r29, cdtors_tst_reg
2485 #ifdef __AVR_HAVE_EIJMP_EICALL__
2486 ldi r24, pm_hh8(__ctors_start)
2488 #endif /* HAVE_EIJMP */
2489 brne .L__do_global_ctors_loop
2490 ENDF __do_global_ctors
2491 #endif /* L_ctors */
2494 .section .fini6,"ax",@progbits
2495 DEFUN __do_global_dtors
2496 ldi cdtors_tst_reg, pm_hi8(__dtors_end)
2497 ldi r28, pm_lo8(__dtors_start)
2498 ldi r29, pm_hi8(__dtors_start)
2499 #ifdef __AVR_HAVE_EIJMP_EICALL__
2500 ldi r16, pm_hh8(__dtors_start)
2501 #endif /* HAVE_EIJMP */
2502 rjmp .L__do_global_dtors_start
2503 .L__do_global_dtors_loop:
2504 #ifdef __AVR_HAVE_EIJMP_EICALL__
2506 #endif /* HAVE_EIJMP */
2509 XCALL __tablejump2__
2511 #ifdef __AVR_HAVE_EIJMP_EICALL__
2512 adc r16, __zero_reg__
2513 #endif /* HAVE_EIJMP */
2514 .L__do_global_dtors_start:
2515 cpi r28, pm_lo8(__dtors_end)
2516 cpc r29, cdtors_tst_reg
2517 #ifdef __AVR_HAVE_EIJMP_EICALL__
2518 ldi r24, pm_hh8(__dtors_end)
2520 #endif /* HAVE_EIJMP */
2521 brne .L__do_global_dtors_loop
2522 ENDF __do_global_dtors
2523 #endif /* L_dtors */
2525 #undef cdtors_tst_reg
2527 .section .text.libgcc, "ax", @progbits
2529 #if !defined (__AVR_TINY__)
2530 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2531 ;; Loading n bytes from Flash; n = 3,4
2532 ;; R22... = Flash[Z]
2533 ;; Clobbers: __tmp_reg__
2535 #if (defined (L_load_3) \
2536 || defined (L_load_4)) \
2537 && !defined (__AVR_HAVE_LPMX__)
2545 .macro .load dest, n
2548 .if \dest != D0+\n-1
2555 #if defined (L_load_3)
2562 #endif /* L_load_3 */
2564 #if defined (L_load_4)
2572 #endif /* L_load_4 */
2574 #endif /* L_load_3 || L_load_3 */
2575 #endif /* !defined (__AVR_TINY__) */
2577 #if !defined (__AVR_TINY__)
2578 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2579 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2580 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2581 ;; Clobbers: __tmp_reg__, R21, R30, R31
2583 #if (defined (L_xload_1) \
2584 || defined (L_xload_2) \
2585 || defined (L_xload_3) \
2586 || defined (L_xload_4))
2594 ;; Register containing bits 16+ of the address
2598 .macro .xload dest, n
2599 #if defined (__AVR_HAVE_ELPMX__)
2601 #elif defined (__AVR_HAVE_ELPM__)
2604 .if \dest != D0+\n-1
2606 adc HHI8, __zero_reg__
2609 #elif defined (__AVR_HAVE_LPMX__)
2614 .if \dest != D0+\n-1
2618 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2619 .if \dest == D0+\n-1
2620 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2621 out __RAMPZ__, __zero_reg__
2626 #if defined (L_xload_1)
2628 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2637 #if defined (__AVR_HAVE_ELPM__)
2639 #endif /* __AVR_HAVE_ELPM__ */
2644 #endif /* LPMx && ! ELPM */
2646 #endif /* L_xload_1 */
2648 #if defined (L_xload_2)
2652 #if defined (__AVR_HAVE_ELPM__)
2654 #endif /* __AVR_HAVE_ELPM__ */
2662 #endif /* L_xload_2 */
2664 #if defined (L_xload_3)
2668 #if defined (__AVR_HAVE_ELPM__)
2670 #endif /* __AVR_HAVE_ELPM__ */
2680 #endif /* L_xload_3 */
2682 #if defined (L_xload_4)
2686 #if defined (__AVR_HAVE_ELPM__)
2688 #endif /* __AVR_HAVE_ELPM__ */
2700 #endif /* L_xload_4 */
2702 #endif /* L_xload_{1|2|3|4} */
2703 #endif /* if !defined (__AVR_TINY__) */
2705 #if !defined (__AVR_TINY__)
2706 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2707 ;; memcopy from Address Space __pgmx to RAM
2708 ;; R23:Z = Source Address
2709 ;; X = Destination Address
2710 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2712 #if defined (L_movmemx)
2718 ;; #Bytes to copy fity in 8 Bits (1..255)
2719 ;; Zero-extend Loop Counter
2732 #if defined (__AVR_HAVE_ELPM__)
2736 0: ;; Load 1 Byte from Flash...
2738 #if defined (__AVR_HAVE_ELPMX__)
2740 #elif defined (__AVR_HAVE_ELPM__)
2743 adc HHI8, __zero_reg__
2745 #elif defined (__AVR_HAVE_LPMX__)
2752 ;; ...and store that Byte to RAM Destination
2756 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2757 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2758 out __RAMPZ__, __zero_reg__
2759 #endif /* ELPM && RAMPD */
2764 1: ;; Read 1 Byte from RAM...
2766 ;; and store that Byte to RAM Destination
2776 #endif /* L_movmemx */
2777 #endif /* !defined (__AVR_TINY__) */
2780 .section .text.libgcc.builtins, "ax", @progbits
2782 /**********************************
2783 * Find first set Bit (ffs)
2784 **********************************/
2786 #if defined (L_ffssi2)
2787 ;; find first set bit
2788 ;; r25:r24 = ffs32 (r25:r22)
2789 ;; clobbers: r22, r26
2807 #endif /* defined (L_ffssi2) */
2809 #if defined (L_ffshi2)
2810 ;; find first set bit
2811 ;; r25:r24 = ffs16 (r25:r24)
2815 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2816 ;; Some cores have problem skipping 2-word instruction
2820 cpse r24, __zero_reg__
2821 #endif /* __AVR_HAVE_JMP_CALL__ */
2822 1: XJMP __loop_ffsqi2
2828 #endif /* defined (L_ffshi2) */
2830 #if defined (L_loop_ffsqi2)
2831 ;; Helper for ffshi2, ffssi2
2832 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2843 #endif /* defined (L_loop_ffsqi2) */
2846 /**********************************
2847 * Count trailing Zeros (ctz)
2848 **********************************/
2850 #if defined (L_ctzsi2)
2851 ;; count trailing zeros
2852 ;; r25:r24 = ctz32 (r25:r22)
2853 ;; clobbers: r26, r22
2855 ;; Note that ctz(0) in undefined for GCC
2861 #endif /* defined (L_ctzsi2) */
2863 #if defined (L_ctzhi2)
2864 ;; count trailing zeros
2865 ;; r25:r24 = ctz16 (r25:r24)
2868 ;; Note that ctz(0) in undefined for GCC
2874 #endif /* defined (L_ctzhi2) */
2877 /**********************************
2878 * Count leading Zeros (clz)
2879 **********************************/
2881 #if defined (L_clzdi2)
2882 ;; count leading zeros
2883 ;; r25:r24 = clz64 (r25:r18)
2884 ;; clobbers: r22, r23, r26
2897 #endif /* defined (L_clzdi2) */
2899 #if defined (L_clzsi2)
2900 ;; count leading zeros
2901 ;; r25:r24 = clz32 (r25:r22)
2913 #endif /* defined (L_clzsi2) */
2915 #if defined (L_clzhi2)
2916 ;; count leading zeros
2917 ;; r25:r24 = clz16 (r25:r24)
2939 #endif /* defined (L_clzhi2) */
2942 /**********************************
2944 **********************************/
2946 #if defined (L_paritydi2)
2947 ;; r25:r24 = parity64 (r25:r18)
2948 ;; clobbers: __tmp_reg__
2956 #endif /* defined (L_paritydi2) */
2958 #if defined (L_paritysi2)
2959 ;; r25:r24 = parity32 (r25:r22)
2960 ;; clobbers: __tmp_reg__
2966 #endif /* defined (L_paritysi2) */
2968 #if defined (L_parityhi2)
2969 ;; r25:r24 = parity16 (r25:r24)
2970 ;; clobbers: __tmp_reg__
2976 ;; r25:r24 = parity8 (r24)
2977 ;; clobbers: __tmp_reg__
2979 ;; parity is in r24[0..7]
2980 mov __tmp_reg__, r24
2982 eor r24, __tmp_reg__
2983 ;; parity is in r24[0..3]
2987 ;; parity is in r24[0,3]
2990 ;; parity is in r24[0]
2995 #endif /* defined (L_parityhi2) */
2998 /**********************************
3000 **********************************/
3002 #if defined (L_popcounthi2)
3004 ;; r25:r24 = popcount16 (r25:r24)
3005 ;; clobbers: __tmp_reg__
3015 DEFUN __popcounthi2_tail
3017 add r24, __tmp_reg__
3019 ENDF __popcounthi2_tail
3020 #endif /* defined (L_popcounthi2) */
3022 #if defined (L_popcountsi2)
3024 ;; r25:r24 = popcount32 (r25:r22)
3025 ;; clobbers: __tmp_reg__
3032 XJMP __popcounthi2_tail
3034 #endif /* defined (L_popcountsi2) */
3036 #if defined (L_popcountdi2)
3038 ;; r25:r24 = popcount64 (r25:r18)
3039 ;; clobbers: r22, r23, __tmp_reg__
3048 XJMP __popcounthi2_tail
3050 #endif /* defined (L_popcountdi2) */
3052 #if defined (L_popcountqi2)
3054 ;; r24 = popcount8 (r24)
3055 ;; clobbers: __tmp_reg__
3057 mov __tmp_reg__, r24
3061 adc r24, __zero_reg__
3063 adc r24, __zero_reg__
3065 adc r24, __zero_reg__
3067 adc r24, __zero_reg__
3069 adc r24, __zero_reg__
3071 adc r24, __tmp_reg__
3074 #endif /* defined (L_popcountqi2) */
3077 /**********************************
3079 **********************************/
3081 ;; swap two registers with different register number
3088 #if defined (L_bswapsi2)
3090 ;; r25:r22 = bswap32 (r25:r22)
3096 #endif /* defined (L_bswapsi2) */
3098 #if defined (L_bswapdi2)
3100 ;; r25:r18 = bswap64 (r25:r18)
3108 #endif /* defined (L_bswapdi2) */
3111 /**********************************
3113 **********************************/
3115 #if defined (L_ashrdi3)
3117 #define SS __zero_reg__
3119 ;; Arithmetic shift right
3120 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3127 ;; Logic shift right
3128 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3130 ;; Signs are in SS (zero_reg)
3131 mov __tmp_reg__, r16
3156 mov r16, __tmp_reg__
3162 #endif /* defined (L_ashrdi3) */
3164 #if defined (L_ashldi3)
3166 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3167 ;; This function does not clobber T.
3169 mov __tmp_reg__, r16
3192 mov r16, __tmp_reg__
3195 #endif /* defined (L_ashldi3) */
3197 #if defined (L_rotldi3)
3199 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3205 mov __tmp_reg__, r25
3213 mov r18, __tmp_reg__
3223 adc r18, __zero_reg__
3229 #endif /* defined (L_rotldi3) */
3232 .section .text.libgcc.fmul, "ax", @progbits
3234 /***********************************************************/
3235 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3236 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3237 /***********************************************************/
3243 #define A0 __tmp_reg__
3246 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3247 ;;; Clobbers: r24, r25, __tmp_reg__
3249 ;; A0.7 = negate result?
3257 #endif /* L_fmuls */
3260 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3261 ;;; Clobbers: r24, r25, __tmp_reg__
3263 ;; A0.7 = negate result?
3268 ;; Helper for __fmuls and __fmulsu
3273 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3274 ;; Some cores have problem skipping 2-word instruction
3279 #endif /* __AVR_HAVE_JMP_CALL__ */
3282 ;; C = -C iff A0.7 = 1
3286 #endif /* L_fmulsu */
3290 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3291 ;;; Clobbers: r24, r25, __tmp_reg__
3298 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3319 #include "lib1funcs-fixed.S"