1 /* -*- Mode: Asm -*- */
2 /* Copyright (C) 1998-2014 Free Software Foundation, Inc.
3 Contributed by Denis Chertykov <chertykov@gmail.com>
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #define __zero_reg__ r1
25 #define __tmp_reg__ r0
27 #if defined (__AVR_HAVE_SPH__)
31 #define __RAMPZ__ 0x3B
34 /* Most of the functions here are called directly from avr.md
35 patterns, instead of using the standard libcall mechanisms.
36 This can make better code because GCC knows exactly which
37 of the call-used registers (not all of them) are clobbered. */
39 /* FIXME: At present, there is no SORT directive in the linker
40 script so that we must not assume that different modules
41 in the same input section like .libgcc.text.mul will be
42 located close together. Therefore, we cannot use
43 RCALL/RJMP to call a function like __udivmodhi4 from
44 __divmodhi4 and have to use lengthy XCALL/XJMP even
45 though they are in the same input section and all same
46 input sections together are small enough to reach every
47 location with a RCALL/RJMP instruction. */
49 #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
50 #error device not supported
53 .macro mov_l r_dest, r_src
54 #if defined (__AVR_HAVE_MOVW__)
61 .macro mov_h r_dest, r_src
62 #if defined (__AVR_HAVE_MOVW__)
69 .macro wmov r_dest, r_src
70 #if defined (__AVR_HAVE_MOVW__)
74 mov \r_dest+1, \r_src+1
78 #if defined (__AVR_HAVE_JMP_CALL__)
86 #if defined (__AVR_HAVE_EIJMP_EICALL__)
96 .macro do_prologue_saves n_pushed n_frame=0
97 ldi r26, lo8(\n_frame)
98 ldi r27, hi8(\n_frame)
99 ldi r30, lo8(gs(.L_prologue_saves.\@))
100 ldi r31, hi8(gs(.L_prologue_saves.\@))
101 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
102 .L_prologue_saves.\@:
107 .macro do_epilogue_restores n_pushed n_frame=0
109 #ifdef __AVR_HAVE_SPH__
112 subi r28, lo8(-\n_frame)
113 sbci r29, hi8(-\n_frame)
120 subi r28, lo8(-\n_frame)
122 #endif /* HAVE SPH */
124 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
127 ;; Support function entry and exit for convenience
148 ;; Skip next instruction, typically a jump target
149 #define skip cpse 0,0
151 ;; Negate a 2-byte value held in consecutive registers
158 ;; Negate a 4-byte value held in consecutive registers
159 ;; Sets the V flag for signed overflow tests if REG >= 16
171 adc \reg, __zero_reg__
172 adc \reg+1, __zero_reg__
173 adc \reg+2, __zero_reg__
174 adc \reg+3, __zero_reg__
178 #define exp_lo(N) hlo8 ((N) << 23)
179 #define exp_hi(N) hhi8 ((N) << 23)
182 .section .text.libgcc.mul, "ax", @progbits
184 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
185 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
186 #if !defined (__AVR_HAVE_MUL__)
187 /*******************************************************
188 Multiplication 8 x 8 without MUL
189 *******************************************************/
190 #if defined (L_mulqi3)
192 #define r_arg2 r22 /* multiplicand */
193 #define r_arg1 r24 /* multiplier */
194 #define r_res __tmp_reg__ /* result */
197 clr r_res ; clear result
201 add r_arg2,r_arg2 ; shift multiplicand
202 breq __mulqi3_exit ; while multiplicand != 0
204 brne __mulqi3_loop ; exit if multiplier = 0
206 mov r_arg1,r_res ; result to return register
214 #endif /* defined (L_mulqi3) */
217 /*******************************************************
218 Widening Multiplication 16 = 8 x 8 without MUL
219 Multiplication 16 x 16 without MUL
220 *******************************************************/
227 ;; Output overlaps input, thus expand result in CC0/1
230 #define CC0 __tmp_reg__
233 #if defined (L_umulqihi3)
234 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
235 ;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
236 ;;; Clobbers: __tmp_reg__, R21..R23
242 #endif /* L_umulqihi3 */
244 #if defined (L_mulqihi3)
245 ;;; R25:R24 = (signed int) R22 * (signed int) R24
246 ;;; (C1:C0) = (signed int) A0 * (signed int) B0
247 ;;; Clobbers: __tmp_reg__, R20..R23
253 ;; The multiplication runs twice as fast if A1 is zero, thus:
256 #ifdef __AVR_HAVE_JMP_CALL__
257 ;; Store B0 * sign of A
262 #else /* have no CALL */
263 ;; Skip sign-extension of A if A >= 0
264 ;; Same size as with the first alternative but avoids errata skip
265 ;; and is faster if A >= 0
271 #endif /* HAVE_JMP_CALL */
272 ;; 1-extend A after the multiplication
276 #endif /* L_mulqihi3 */
278 #if defined (L_mulhi3)
279 ;;; R25:R24 = R23:R22 * R25:R24
280 ;;; (C1:C0) = (A1:A0) * (B1:B0)
281 ;;; Clobbers: __tmp_reg__, R21..R23
289 ;; Bit n of A is 1 --> C += B << n
296 ;; If B == 0 we are ready
300 ;; Carry = n-th bit of A
303 ;; If bit n of A is set, then go add B * 2^n to C
306 ;; Carry = 0 --> The ROR above acts like CP A0, 0
307 ;; Thus, it is sufficient to CPC the high part to test A against 0
309 ;; Only proceed if A != 0
312 ;; Move Result into place
317 #endif /* L_mulhi3 */
350 /*******************************************************
351 Widening Multiplication 32 = 16 x 16 without MUL
352 *******************************************************/
354 #if defined (L_umulhisi3)
364 #endif /* L_umulhisi3 */
366 #if defined (L_mulhisi3)
373 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
380 #else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
381 ;; Zero-extend A and __mulsi3 will run at least twice as fast
382 ;; compared to a sign-extended A.
387 ;; If A < 0 then perform the B * 0xffff.... before the
388 ;; very multiplication by initializing the high part of the
389 ;; result CC with -B.
394 #endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
396 #endif /* L_mulhisi3 */
399 /*******************************************************
400 Multiplication 32 x 32 without MUL
401 *******************************************************/
403 #if defined (L_mulsi3)
411 DEFUN __mulsi3_helper
416 1: ;; If bit n of A is set, then add B * 2^n to the result in CC
418 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
421 lsl B0 $ rol B1 $ rol B2 $ rol B3
423 3: ;; A >>= 1: Carry = n-th bit of A
424 lsr A3 $ ror A2 $ ror A1 $ ror A0
427 ;; Only continue if A != 0
433 ;; All bits of A are consumed: Copy result to return register C
438 #endif /* L_mulsi3 */
457 #endif /* !defined (__AVR_HAVE_MUL__) */
458 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
460 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
461 #if defined (__AVR_HAVE_MUL__)
476 /*******************************************************
477 Widening Multiplication 32 = 16 x 16 with MUL
478 *******************************************************/
480 #if defined (L_mulhisi3)
481 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
482 ;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
483 ;;; Clobbers: __tmp_reg__
492 XJMP __usmulhisi3_tail
494 #endif /* L_mulhisi3 */
496 #if defined (L_usmulhisi3)
497 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
498 ;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
499 ;;; Clobbers: __tmp_reg__
505 DEFUN __usmulhisi3_tail
512 ENDF __usmulhisi3_tail
513 #endif /* L_usmulhisi3 */
515 #if defined (L_umulhisi3)
516 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
517 ;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
518 ;;; Clobbers: __tmp_reg__
525 #ifdef __AVR_HAVE_JMP_CALL__
526 ;; This function is used by many other routines, often multiple times.
527 ;; Therefore, if the flash size is not too limited, avoid the RCALL
528 ;; and inverst 6 Bytes to speed things up.
543 #endif /* L_umulhisi3 */
545 /*******************************************************
546 Widening Multiplication 32 = 16 x 32 with MUL
547 *******************************************************/
549 #if defined (L_mulshisi3)
550 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
551 ;;; (C3:C0) = (signed long) A1:A0 * B3:B0
552 ;;; Clobbers: __tmp_reg__
554 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
555 ;; Some cores have problem skipping 2-word instruction
560 #endif /* __AVR_HAVE_JMP_CALL__ */
565 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
566 ;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
567 ;;; Clobbers: __tmp_reg__
570 ;; One-extend R27:R26 (A1:A0)
575 #endif /* L_mulshisi3 */
577 #if defined (L_muluhisi3)
578 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
579 ;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
580 ;;; Clobbers: __tmp_reg__
593 #endif /* L_muluhisi3 */
595 /*******************************************************
596 Multiplication 32 x 32 with MUL
597 *******************************************************/
599 #if defined (L_mulsi3)
600 ;;; R25:R22 = R25:R22 * R21:R18
601 ;;; (C3:C0) = C3:C0 * B3:B0
602 ;;; Clobbers: R26, R27, __tmp_reg__
610 ;; A1:A0 now contains the high word of A
621 #endif /* L_mulsi3 */
636 #endif /* __AVR_HAVE_MUL__ */
638 /*******************************************************
639 Multiplication 24 x 24 with MUL
640 *******************************************************/
642 #if defined (L_mulpsi3)
644 ;; A[0..2]: In: Multiplicand; Out: Product
649 ;; B[0..2]: In: Multiplier
654 #if defined (__AVR_HAVE_MUL__)
656 ;; C[0..2]: Expand Result
661 ;; R24:R22 *= R20:R18
662 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
671 mul AA2, B0 $ add C2, r0
672 mul AA0, B2 $ add C2, r0
684 #else /* !HAVE_MUL */
686 ;; C[0..2]: Expand Result
691 ;; R24:R22 *= R20:R18
692 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
700 0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
701 LSR B2 $ ror B1 $ ror B0
703 ;; If the N-th Bit of B[] was set...
706 ;; ...then add A[] * 2^N to the Result C[]
707 ADD C0,A0 $ adc C1,A1 $ adc C2,A2
709 1: ;; Multiply A[] by 2
710 LSL A0 $ rol A1 $ rol A2
712 ;; Loop until B[] is 0
713 subi B0,0 $ sbci B1,0 $ sbci B2,0
716 ;; Copy C[] to the return Register A[]
728 #endif /* HAVE_MUL */
738 #endif /* L_mulpsi3 */
740 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
742 ;; A[0..2]: In: Multiplicand
747 ;; BB: In: Multiplier
755 ;; C[] = A[] * sign_extend (BB)
783 #endif /* L_mulsqipsi3 && HAVE_MUL */
785 /*******************************************************
786 Multiplication 64 x 64
787 *******************************************************/
791 ;; A[0..7]: In: Multiplicand
802 ;; B[0..7]: In: Multiplier
812 #if defined (__AVR_HAVE_MUL__)
814 ;; Define C[] for convenience
815 ;; Notice that parts of C[] overlap A[] respective B[]
825 #if defined (L_muldi3)
828 ;; R25:R18 *= R17:R10
829 ;; Ordinary ABI-Function
837 ;; Counting in Words, we have to perform a 4 * 4 Multiplication
840 mul A7,B0 $ $ mov C7,r0
841 mul A0,B7 $ $ add C7,r0
842 mul A6,B1 $ $ add C7,r0
843 mul A6,B0 $ mov C6,r0 $ add C7,r1
844 mul B6,A1 $ $ add C7,r0
845 mul B6,A0 $ add C6,r0 $ adc C7,r1
848 mul A2,B4 $ add C6,r0 $ adc C7,r1
849 mul A3,B4 $ $ add C7,r0
850 mul A2,B5 $ $ add C7,r0
867 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
877 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
887 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
891 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
906 #endif /* L_muldi3 */
908 #if defined (L_muldi3_6)
909 ;; A helper for some 64-bit multiplications with MUL available
921 #endif /* L_muldi3_6 */
932 #else /* !HAVE_MUL */
934 #if defined (L_muldi3)
948 ;; R25:R18 *= R17:R10
949 ;; Ordinary ABI-Function
965 0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
966 ;; where N = 64 - Loop.
967 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
968 ;; B[] will have its initial Value again.
969 LSR B7 $ ror B6 $ ror B5 $ ror B4
970 ror B3 $ ror B2 $ ror B1 $ ror B0
972 ;; If the N-th Bit of B[] was set then...
974 ;; ...finish Rotation...
977 ;; ...and add A[] * 2^N to the Result C[]
978 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
979 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
981 1: ;; Multiply A[] by 2
982 LSL A0 $ rol A1 $ rol A2 $ rol A3
983 rol A4 $ rol A5 $ rol A6 $ rol A7
988 ;; We expanded the Result in C[]
989 ;; Copy Result to the Return Register A[]
1013 #endif /* L_muldi3 */
1014 #endif /* HAVE_MUL */
1034 /*******************************************************
1035 Widening Multiplication 64 = 32 x 32 with MUL
1036 *******************************************************/
1038 #if defined (__AVR_HAVE_MUL__)
1058 #if defined (L_umulsidi3)
1060 ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1062 ;; R18[8] = R22[4] * R18[4]
1064 ;; Ordinary ABI Function, but additionally sets
1065 ;; X = R20[2] = B2[2]
1066 ;; Z = R22[2] = A0[2]
1072 DEFUN __umulsidi3_helper
1073 push 29 $ push 28 ; Y
1075 ;; Counting in Words, we have to perform 4 Multiplications
1079 push 23 $ push 22 ; C0
1083 push 27 $ push 26 ; A0
1084 push 19 $ push 18 ; B2
1086 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1087 ;; B2 C2 -- -- -- B0 A2
1091 ;; Sign-extend A. T holds the sign of A
1093 ;; Subtract B from the high part of the result
1098 0: wmov 18, 28 ;; B0
1102 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
1103 ;; B0 C2 -- -- A2 C4 C6
1108 pop 26 $ pop 27 ;; B2
1109 pop 18 $ pop 19 ;; A0
1112 ;; Move result C into place and save A0 in Z
1119 pop 28 $ pop 29 ;; Y
1121 ENDF __umulsidi3_helper
1122 #endif /* L_umulsidi3 */
1125 #if defined (L_mulsidi3)
1127 ;; Signed widening 64 = 32 * 32 Multiplication
1129 ;; R18[8] = R22[4] * R18[4]
1130 ;; Ordinary ABI Function
1133 sbrs B3, 7 ; Enhanced core has no skip bug
1134 XJMP __umulsidi3_helper
1136 ;; B needs sign-extension
1139 XCALL __umulsidi3_helper
1149 #endif /* L_mulsidi3 */
1167 #endif /* HAVE_MUL */
1169 /**********************************************************
1170 Widening Multiplication 64 = 32 x 32 without MUL
1171 **********************************************************/
1173 #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1204 ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1206 ;; R18[8] = R22[4] * R18[4]
1207 ;; Ordinary ABI Function
1216 ;; Save 10 Registers: R10..R17, R28, R29
1217 do_prologue_saves 10
1220 ;; Move B into place...
1229 ;; Move A into place...
1239 do_epilogue_restores 10
1267 #endif /* L_mulsidi3 && !HAVE_MUL */
1269 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1272 .section .text.libgcc.div, "ax", @progbits
1274 /*******************************************************
1275 Division 8 / 8 => (result + remainder)
1276 *******************************************************/
1277 #define r_rem r25 /* remainder */
1278 #define r_arg1 r24 /* dividend, quotient */
1279 #define r_arg2 r22 /* divisor */
1280 #define r_cnt r23 /* loop count */
1282 #if defined (L_udivmodqi4)
1284 sub r_rem,r_rem ; clear remainder and carry
1285 ldi r_cnt,9 ; init loop counter
1286 rjmp __udivmodqi4_ep ; jump to entry point
1288 rol r_rem ; shift dividend into remainder
1289 cp r_rem,r_arg2 ; compare remainder & divisor
1290 brcs __udivmodqi4_ep ; remainder <= divisor
1291 sub r_rem,r_arg2 ; restore remainder
1293 rol r_arg1 ; shift dividend (with CARRY)
1294 dec r_cnt ; decrement loop counter
1295 brne __udivmodqi4_loop
1296 com r_arg1 ; complement result
1297 ; because C flag was complemented in loop
1300 #endif /* defined (L_udivmodqi4) */
1302 #if defined (L_divmodqi4)
1304 bst r_arg1,7 ; store sign of dividend
1305 mov __tmp_reg__,r_arg1
1306 eor __tmp_reg__,r_arg2; r0.7 is sign of result
1308 neg r_arg1 ; dividend negative : negate
1310 neg r_arg2 ; divisor negative : negate
1311 XCALL __udivmodqi4 ; do the unsigned div/mod
1313 neg r_rem ; correct remainder sign
1316 neg r_arg1 ; correct result sign
1320 #endif /* defined (L_divmodqi4) */
1328 /*******************************************************
1329 Division 16 / 16 => (result + remainder)
1330 *******************************************************/
1331 #define r_remL r26 /* remainder Low */
1332 #define r_remH r27 /* remainder High */
1334 /* return: remainder */
1335 #define r_arg1L r24 /* dividend Low */
1336 #define r_arg1H r25 /* dividend High */
1338 /* return: quotient */
1339 #define r_arg2L r22 /* divisor Low */
1340 #define r_arg2H r23 /* divisor High */
1342 #define r_cnt r21 /* loop count */
1344 #if defined (L_udivmodhi4)
1347 sub r_remH,r_remH ; clear remainder and carry
1348 ldi r_cnt,17 ; init loop counter
1349 rjmp __udivmodhi4_ep ; jump to entry point
1351 rol r_remL ; shift dividend into remainder
1353 cp r_remL,r_arg2L ; compare remainder & divisor
1355 brcs __udivmodhi4_ep ; remainder < divisor
1356 sub r_remL,r_arg2L ; restore remainder
1359 rol r_arg1L ; shift dividend (with CARRY)
1361 dec r_cnt ; decrement loop counter
1362 brne __udivmodhi4_loop
1365 ; div/mod results to return registers, as for the div() function
1366 mov_l r_arg2L, r_arg1L ; quotient
1367 mov_h r_arg2H, r_arg1H
1368 mov_l r_arg1L, r_remL ; remainder
1369 mov_h r_arg1H, r_remH
1372 #endif /* defined (L_udivmodhi4) */
1374 #if defined (L_divmodhi4)
1378 bst r_arg1H,7 ; store sign of dividend
1379 mov __tmp_reg__,r_arg2H
1381 com __tmp_reg__ ; r0.7 is sign of result
1382 rcall __divmodhi4_neg1 ; dividend negative: negate
1385 rcall __divmodhi4_neg2 ; divisor negative: negate
1386 XCALL __udivmodhi4 ; do the unsigned div/mod
1388 rcall __divmodhi4_neg2 ; correct remainder sign
1389 brtc __divmodhi4_exit
1391 ;; correct dividend/remainder sign
1397 ;; correct divisor/result sign
1404 #endif /* defined (L_divmodhi4) */
1417 /*******************************************************
1418 Division 24 / 24 => (result + remainder)
1419 *******************************************************/
1421 ;; A[0..2]: In: Dividend; Out: Quotient
1426 ;; B[0..2]: In: Divisor; Out: Remainder
1431 ;; C[0..2]: Expand remainder
1432 #define C0 __zero_reg__
1439 #if defined (L_udivmodpsi4)
1440 ;; R24:R22 = R24:R22 udiv R20:R18
1441 ;; R20:R18 = R24:R22 umod R20:R18
1442 ;; Clobbers: R21, R25, R26
1447 ; Clear remainder and carry. C0 is already 0
1450 ; jump to entry point
1451 rjmp __udivmodpsi4_start
1453 ; shift dividend into remainder
1457 ; compare remainder & divisor
1461 brcs __udivmodpsi4_start ; remainder <= divisor
1462 sub C0, B0 ; restore remainder
1465 __udivmodpsi4_start:
1466 ; shift dividend (with CARRY)
1470 ; decrement loop counter
1472 brne __udivmodpsi4_loop
1476 ; div/mod results to return registers
1481 clr __zero_reg__ ; C0
1484 #endif /* defined (L_udivmodpsi4) */
1486 #if defined (L_divmodpsi4)
1487 ;; R24:R22 = R24:R22 div R20:R18
1488 ;; R20:R18 = R24:R22 mod R20:R18
1489 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1492 ; R0.7 will contain the sign of the result:
1493 ; R0.7 = A.sign ^ B.sign
1495 ; T-flag = sign of dividend
1499 ; Adjust dividend's sign
1500 rcall __divmodpsi4_negA
1502 ; Adjust divisor's sign
1504 rcall __divmodpsi4_negB
1506 ; Do the unsigned div/mod
1509 ; Adjust quotient's sign
1511 rcall __divmodpsi4_negA
1513 ; Adjust remainder's sign
1514 brtc __divmodpsi4_end
1517 ; Correct divisor/remainder sign
1525 ; Correct dividend/quotient sign
1536 #endif /* defined (L_divmodpsi4) */
1552 /*******************************************************
1553 Division 32 / 32 => (result + remainder)
1554 *******************************************************/
1555 #define r_remHH r31 /* remainder High */
1558 #define r_remL r26 /* remainder Low */
1560 /* return: remainder */
1561 #define r_arg1HH r25 /* dividend High */
1562 #define r_arg1HL r24
1564 #define r_arg1L r22 /* dividend Low */
1566 /* return: quotient */
1567 #define r_arg2HH r21 /* divisor High */
1568 #define r_arg2HL r20
1570 #define r_arg2L r18 /* divisor Low */
1572 #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
1574 #if defined (L_udivmodsi4)
1576 ldi r_remL, 33 ; init loop counter
1579 sub r_remH,r_remH ; clear remainder and carry
1580 mov_l r_remHL, r_remL
1581 mov_h r_remHH, r_remH
1582 rjmp __udivmodsi4_ep ; jump to entry point
1584 rol r_remL ; shift dividend into remainder
1588 cp r_remL,r_arg2L ; compare remainder & divisor
1590 cpc r_remHL,r_arg2HL
1591 cpc r_remHH,r_arg2HH
1592 brcs __udivmodsi4_ep ; remainder <= divisor
1593 sub r_remL,r_arg2L ; restore remainder
1595 sbc r_remHL,r_arg2HL
1596 sbc r_remHH,r_arg2HH
1598 rol r_arg1L ; shift dividend (with CARRY)
1602 dec r_cnt ; decrement loop counter
1603 brne __udivmodsi4_loop
1604 ; __zero_reg__ now restored (r_cnt == 0)
1609 ; div/mod results to return registers, as for the ldiv() function
1610 mov_l r_arg2L, r_arg1L ; quotient
1611 mov_h r_arg2H, r_arg1H
1612 mov_l r_arg2HL, r_arg1HL
1613 mov_h r_arg2HH, r_arg1HH
1614 mov_l r_arg1L, r_remL ; remainder
1615 mov_h r_arg1H, r_remH
1616 mov_l r_arg1HL, r_remHL
1617 mov_h r_arg1HH, r_remHH
1620 #endif /* defined (L_udivmodsi4) */
1622 #if defined (L_divmodsi4)
1624 mov __tmp_reg__,r_arg2HH
1625 bst r_arg1HH,7 ; store sign of dividend
1627 com __tmp_reg__ ; r0.7 is sign of result
1628 XCALL __negsi2 ; dividend negative: negate
1631 rcall __divmodsi4_neg2 ; divisor negative: negate
1632 XCALL __udivmodsi4 ; do the unsigned div/mod
1633 sbrc __tmp_reg__, 7 ; correct quotient sign
1634 rcall __divmodsi4_neg2
1635 brtc __divmodsi4_exit ; correct remainder sign
1638 ;; correct divisor/quotient sign
1649 #endif /* defined (L_divmodsi4) */
1651 #if defined (L_negsi2)
1653 ;; (neg:SI (reg:SI 22)))
1654 ;; Sets the V flag for signed overflow tests
1659 #endif /* L_negsi2 */
1675 /*******************************************************
1678 *******************************************************/
1680 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1681 ;; at least 16k of Program Memory. For smaller Devices, depend
1682 ;; on MOVW and SP Size. There is a Connexion between SP Size and
1683 ;; Flash Size so that SP Size can be used to test for Flash Size.
1685 #if defined (__AVR_HAVE_JMP_CALL__)
1686 # define SPEED_DIV 8
1687 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1688 # define SPEED_DIV 16
1690 # define SPEED_DIV 0
1693 ;; A[0..7]: In: Dividend;
1694 ;; Out: Quotient (T = 0)
1695 ;; Out: Remainder (T = 1)
1705 ;; B[0..7]: In: Divisor; Out: Clobber
1715 ;; C[0..7]: Expand remainder; Out: Remainder (unused)
1725 ;; Holds Signs during Division Routine
1726 #define SS __tmp_reg__
1728 ;; Bit-Counter in Division Routine
1729 #define R_cnt __zero_reg__
1731 ;; Scratch Register for Negation
1734 #if defined (L_udivdi3)
1736 ;; R25:R18 = R24:R18 umod R17:R10
1737 ;; Ordinary ABI-Function
1741 rjmp __udivdi3_umoddi3
1744 ;; R25:R18 = R24:R18 udiv R17:R10
1745 ;; Ordinary ABI-Function
1751 DEFUN __udivdi3_umoddi3
1762 ENDF __udivdi3_umoddi3
1763 #endif /* L_udivdi3 */
1765 #if defined (L_udivmod64)
1767 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1768 ;; No Registers saved/restored; the Callers will take Care.
1769 ;; Preserves B[] and T-flag
1770 ;; T = 0: Compute Quotient in A[]
1771 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1775 ;; Clear Remainder (C6, C7 will follow)
1782 #if SPEED_DIV == 0 || SPEED_DIV == 16
1783 ;; Initialize Loop-Counter
1786 #endif /* SPEED_DIV */
1793 1: ;; Compare shifted Devidend against Divisor
1794 ;; If -- even after Shifting -- it is smaller...
1795 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
1796 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
1799 ;; ...then we can subtract it. Thus, it is legal to shift left
1800 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
1801 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
1802 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
1803 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
1809 ;; Shifted 64 Bits: A7 has traveled to C7
1811 ;; Divisor is greater than Dividend. We have:
1814 ;; Thus, we can return immediately
1817 2: ;; Initialze Bit-Counter with Number of Bits still to be performed
1820 ;; Push of A7 is not needed because C7 is still 0
1824 #elif SPEED_DIV == 16
1826 ;; Compare shifted Dividend against Divisor
1834 ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1835 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1836 wmov C2,A6 $ wmov C0,A4
1837 wmov A6,A2 $ wmov A4,A0
1838 wmov A2,C6 $ wmov A0,C4
1840 ;; Set Bit Counter to 32
1844 #error SPEED_DIV = ?
1845 #endif /* SPEED_DIV */
1847 ;; The very Division + Remainder Routine
1849 3: ;; Left-shift Dividend...
1850 lsl A0 $ rol A1 $ rol A2 $ rol A3
1851 rol A4 $ rol A5 $ rol A6 $ rol A7
1853 ;; ...into Remainder
1854 rol C0 $ rol C1 $ rol C2 $ rol C3
1855 rol C4 $ rol C5 $ rol C6 $ rol C7
1857 ;; Compare Remainder and Divisor
1858 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
1859 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
1863 ;; Divisor fits into Remainder: Subtract it from Remainder...
1864 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
1865 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
1867 ;; ...and set according Bit in the upcoming Quotient
1868 ;; The Bit will travel to its final Position
1871 4: ;; This Bit is done
1874 ;; __zero_reg__ is 0 again
1876 ;; T = 0: We are fine with the Quotient in A[]
1877 ;; T = 1: Copy Remainder to A[]
1883 ;; Move the Sign of the Result to SS.7
1889 #endif /* L_udivmod64 */
1892 #if defined (L_divdi3)
1894 ;; R25:R18 = R24:R18 mod R17:R10
1895 ;; Ordinary ABI-Function
1899 rjmp __divdi3_moddi3
1902 ;; R25:R18 = R24:R18 div R17:R10
1903 ;; Ordinary ABI-Function
1909 DEFUN __divdi3_moddi3
1914 ;; Both Signs are 0: the following Complexitiy is not needed
1915 XJMP __udivdi3_umoddi3
1916 #endif /* SPEED_DIV */
1919 ;; Save 12 Registers: Y, 17...8
1921 do_prologue_saves 12
1923 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
1924 ;; SS.6 will contain the Sign of the Remainder (A.sign)
1927 ;; Adjust Dividend's Sign as needed
1929 ;; Compiling for Speed we know that at least one Sign must be < 0
1930 ;; Thus, if A[] >= 0 then we know B[] < 0
1934 #endif /* SPEED_DIV */
1938 ;; Adjust Divisor's Sign and SS.7 as needed
1945 com B4 $ com B5 $ com B6 $ com B7
1946 $ com B1 $ com B2 $ com B3
1948 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
1949 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
1951 3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1954 ;; Adjust Result's Sign
1955 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1960 #endif /* __AVR_HAVE_JMP_CALL__ */
1963 4: ;; Epilogue: Restore 12 Registers and return
1964 do_epilogue_restores 12
1966 ENDF __divdi3_moddi3
1968 #endif /* L_divdi3 */
1974 .section .text.libgcc, "ax", @progbits
1976 #define TT __tmp_reg__
1978 #if defined (L_adddi3)
1980 ;; (plus:DI (reg:DI 18)
1982 ;; Sets the V flag for signed overflow tests
1983 ;; Sets the C flag for unsigned overflow tests
1985 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
1986 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
1989 #endif /* L_adddi3 */
1991 #if defined (L_adddi3_s8)
1993 ;; (plus:DI (reg:DI 18)
1994 ;; (sign_extend:SI (reg:QI 26))))
1995 ;; Sets the V flag for signed overflow tests
1996 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2001 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
2002 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
2005 #endif /* L_adddi3_s8 */
2007 #if defined (L_subdi3)
2009 ;; (minus:DI (reg:DI 18)
2011 ;; Sets the V flag for signed overflow tests
2012 ;; Sets the C flag for unsigned overflow tests
2014 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
2015 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
2018 #endif /* L_subdi3 */
2020 #if defined (L_cmpdi2)
2022 ;; (compare (reg:DI 18)
2025 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
2026 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
2029 #endif /* L_cmpdi2 */
2031 #if defined (L_cmpdi2_s8)
2033 ;; (compare (reg:DI 18)
2034 ;; (sign_extend:SI (reg:QI 26))))
2039 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
2040 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
2043 #endif /* L_cmpdi2_s8 */
2045 #if defined (L_negdi2)
2047 ;; (neg:DI (reg:DI 18)))
2048 ;; Sets the V flag for signed overflow tests
2051 com A4 $ com A5 $ com A6 $ com A7
2052 $ com A1 $ com A2 $ com A3
2054 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
2055 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
2059 #endif /* L_negdi2 */
2091 .section .text.libgcc.prologue, "ax", @progbits
2093 /**********************************
2094 * This is a prologue subroutine
2095 **********************************/
2096 #if defined (L_prologue)
2098 ;; This function does not clobber T-flag; 64-bit division relies on it
2099 DEFUN __prologue_saves__
2118 #if !defined (__AVR_HAVE_SPH__)
2123 #elif defined (__AVR_XMEGA__)
2135 in __tmp_reg__,__SREG__
2138 out __SREG__,__tmp_reg__
2140 #endif /* #SP = 8/16 */
2144 ENDF __prologue_saves__
2145 #endif /* defined (L_prologue) */
2148 * This is an epilogue subroutine
2150 #if defined (L_epilogue)
2152 DEFUN __epilogue_restores__
2170 #if !defined (__AVR_HAVE_SPH__)
2175 #elif defined (__AVR_XMEGA__)
2178 adc r29,__zero_reg__
2185 adc r29,__zero_reg__
2186 in __tmp_reg__,__SREG__
2189 out __SREG__,__tmp_reg__
2193 #endif /* #SP = 8/16 */
2195 ENDF __epilogue_restores__
2196 #endif /* defined (L_epilogue) */
2199 .section .fini9,"ax",@progbits
2205 /* Code from .fini8 ... .fini1 sections inserted by ld script. */
2207 .section .fini0,"ax",@progbits
2211 #endif /* defined (L_exit) */
2219 #endif /* defined (L_cleanup) */
2222 .section .text.libgcc, "ax", @progbits
2225 DEFUN __tablejump2__
2228 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2229 ;; Word address of gs() jumptable entry in R24:Z
2232 #elif defined (__AVR_HAVE_ELPM__)
2233 ;; Word address of jumptable entry in Z
2236 out __RAMPZ__, __tmp_reg__
2239 ;; Read word address from jumptable and jump
2241 #if defined (__AVR_HAVE_ELPMX__)
2242 elpm __tmp_reg__, Z+
2244 mov r30, __tmp_reg__
2245 #ifdef __AVR_HAVE_RAMPD__
2246 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2247 out __RAMPZ__, __zero_reg__
2250 #elif defined (__AVR_HAVE_ELPM__)
2257 #elif defined (__AVR_HAVE_LPMX__)
2260 mov r30, __tmp_reg__
2271 #endif /* L_tablejump2 */
2274 .section .init4,"ax",@progbits
2275 DEFUN __do_copy_data
2276 #if defined(__AVR_HAVE_ELPMX__)
2277 ldi r17, hi8(__data_end)
2278 ldi r26, lo8(__data_start)
2279 ldi r27, hi8(__data_start)
2280 ldi r30, lo8(__data_load_start)
2281 ldi r31, hi8(__data_load_start)
2282 ldi r16, hh8(__data_load_start)
2284 rjmp .L__do_copy_data_start
2285 .L__do_copy_data_loop:
2288 .L__do_copy_data_start:
2289 cpi r26, lo8(__data_end)
2291 brne .L__do_copy_data_loop
2292 #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2293 ldi r17, hi8(__data_end)
2294 ldi r26, lo8(__data_start)
2295 ldi r27, hi8(__data_start)
2296 ldi r30, lo8(__data_load_start)
2297 ldi r31, hi8(__data_load_start)
2298 ldi r16, hh8(__data_load_start - 0x10000)
2299 .L__do_copy_data_carry:
2302 rjmp .L__do_copy_data_start
2303 .L__do_copy_data_loop:
2307 brcs .L__do_copy_data_carry
2308 .L__do_copy_data_start:
2309 cpi r26, lo8(__data_end)
2311 brne .L__do_copy_data_loop
2312 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2313 ldi r17, hi8(__data_end)
2314 ldi r26, lo8(__data_start)
2315 ldi r27, hi8(__data_start)
2316 ldi r30, lo8(__data_load_start)
2317 ldi r31, hi8(__data_load_start)
2318 rjmp .L__do_copy_data_start
2319 .L__do_copy_data_loop:
2320 #if defined (__AVR_HAVE_LPMX__)
2327 .L__do_copy_data_start:
2328 cpi r26, lo8(__data_end)
2330 brne .L__do_copy_data_loop
2331 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2332 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2333 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2334 out __RAMPZ__, __zero_reg__
2335 #endif /* ELPM && RAMPD */
2337 #endif /* L_copy_data */
2339 /* __do_clear_bss is only necessary if there is anything in .bss section. */
2342 .section .init4,"ax",@progbits
2343 DEFUN __do_clear_bss
2344 ldi r17, hi8(__bss_end)
2345 ldi r26, lo8(__bss_start)
2346 ldi r27, hi8(__bss_start)
2347 rjmp .do_clear_bss_start
2350 .do_clear_bss_start:
2351 cpi r26, lo8(__bss_end)
2353 brne .do_clear_bss_loop
2355 #endif /* L_clear_bss */
2357 /* __do_global_ctors and __do_global_dtors are only necessary
2358 if there are any constructors/destructors. */
2361 .section .init6,"ax",@progbits
2362 DEFUN __do_global_ctors
2363 ldi r17, pm_hi8(__ctors_start)
2364 ldi r28, pm_lo8(__ctors_end)
2365 ldi r29, pm_hi8(__ctors_end)
2366 #ifdef __AVR_HAVE_EIJMP_EICALL__
2367 ldi r16, pm_hh8(__ctors_end)
2368 #endif /* HAVE_EIJMP */
2369 rjmp .L__do_global_ctors_start
2370 .L__do_global_ctors_loop:
2372 #ifdef __AVR_HAVE_EIJMP_EICALL__
2373 sbc r16, __zero_reg__
2375 #endif /* HAVE_EIJMP */
2378 XCALL __tablejump2__
2379 .L__do_global_ctors_start:
2380 cpi r28, pm_lo8(__ctors_start)
2382 #ifdef __AVR_HAVE_EIJMP_EICALL__
2383 ldi r24, pm_hh8(__ctors_start)
2385 #endif /* HAVE_EIJMP */
2386 brne .L__do_global_ctors_loop
2387 ENDF __do_global_ctors
2388 #endif /* L_ctors */
2391 .section .fini6,"ax",@progbits
2392 DEFUN __do_global_dtors
2393 ldi r17, pm_hi8(__dtors_start)
2394 ldi r28, pm_lo8(__dtors_end)
2395 ldi r29, pm_hi8(__dtors_end)
2396 #ifdef __AVR_HAVE_EIJMP_EICALL__
2397 ldi r16, pm_hh8(__dtors_end)
2398 #endif /* HAVE_EIJMP */
2399 rjmp .L__do_global_dtors_start
2400 .L__do_global_dtors_loop:
2402 #ifdef __AVR_HAVE_EIJMP_EICALL__
2403 sbc r16, __zero_reg__
2405 #endif /* HAVE_EIJMP */
2408 XCALL __tablejump2__
2409 .L__do_global_dtors_start:
2410 cpi r28, pm_lo8(__dtors_start)
2412 #ifdef __AVR_HAVE_EIJMP_EICALL__
2413 ldi r24, pm_hh8(__dtors_start)
2415 #endif /* HAVE_EIJMP */
2416 brne .L__do_global_dtors_loop
2417 ENDF __do_global_dtors
2418 #endif /* L_dtors */
2420 .section .text.libgcc, "ax", @progbits
2422 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2423 ;; Loading n bytes from Flash; n = 3,4
2424 ;; R22... = Flash[Z]
2425 ;; Clobbers: __tmp_reg__
2427 #if (defined (L_load_3) \
2428 || defined (L_load_4)) \
2429 && !defined (__AVR_HAVE_LPMX__)
2437 .macro .load dest, n
2440 .if \dest != D0+\n-1
2447 #if defined (L_load_3)
2454 #endif /* L_load_3 */
2456 #if defined (L_load_4)
2464 #endif /* L_load_4 */
2466 #endif /* L_load_3 || L_load_3 */
2468 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2469 ;; Loading n bytes from Flash or RAM; n = 1,2,3,4
2470 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2471 ;; Clobbers: __tmp_reg__, R21, R30, R31
2473 #if (defined (L_xload_1) \
2474 || defined (L_xload_2) \
2475 || defined (L_xload_3) \
2476 || defined (L_xload_4))
2484 ;; Register containing bits 16+ of the address
2488 .macro .xload dest, n
2489 #if defined (__AVR_HAVE_ELPMX__)
2491 #elif defined (__AVR_HAVE_ELPM__)
2494 .if \dest != D0+\n-1
2496 adc HHI8, __zero_reg__
2499 #elif defined (__AVR_HAVE_LPMX__)
2504 .if \dest != D0+\n-1
2508 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2509 .if \dest == D0+\n-1
2510 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2511 out __RAMPZ__, __zero_reg__
2516 #if defined (L_xload_1)
2518 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2527 #if defined (__AVR_HAVE_ELPM__)
2529 #endif /* __AVR_HAVE_ELPM__ */
2534 #endif /* LPMx && ! ELPM */
2536 #endif /* L_xload_1 */
2538 #if defined (L_xload_2)
2542 #if defined (__AVR_HAVE_ELPM__)
2544 #endif /* __AVR_HAVE_ELPM__ */
2552 #endif /* L_xload_2 */
2554 #if defined (L_xload_3)
2558 #if defined (__AVR_HAVE_ELPM__)
2560 #endif /* __AVR_HAVE_ELPM__ */
2570 #endif /* L_xload_3 */
2572 #if defined (L_xload_4)
2576 #if defined (__AVR_HAVE_ELPM__)
2578 #endif /* __AVR_HAVE_ELPM__ */
2590 #endif /* L_xload_4 */
2592 #endif /* L_xload_{1|2|3|4} */
2594 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2595 ;; memcopy from Address Space __pgmx to RAM
2596 ;; R23:Z = Source Address
2597 ;; X = Destination Address
2598 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2600 #if defined (L_movmemx)
2606 ;; #Bytes to copy fity in 8 Bits (1..255)
2607 ;; Zero-extend Loop Counter
2620 #if defined (__AVR_HAVE_ELPM__)
2624 0: ;; Load 1 Byte from Flash...
2626 #if defined (__AVR_HAVE_ELPMX__)
2628 #elif defined (__AVR_HAVE_ELPM__)
2631 adc HHI8, __zero_reg__
2633 #elif defined (__AVR_HAVE_LPMX__)
2640 ;; ...and store that Byte to RAM Destination
2644 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2645 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2646 out __RAMPZ__, __zero_reg__
2647 #endif /* ELPM && RAMPD */
2652 1: ;; Read 1 Byte from RAM...
2654 ;; and store that Byte to RAM Destination
2664 #endif /* L_movmemx */
2667 .section .text.libgcc.builtins, "ax", @progbits
2669 /**********************************
2670 * Find first set Bit (ffs)
2671 **********************************/
2673 #if defined (L_ffssi2)
2674 ;; find first set bit
2675 ;; r25:r24 = ffs32 (r25:r22)
2676 ;; clobbers: r22, r26
2694 #endif /* defined (L_ffssi2) */
2696 #if defined (L_ffshi2)
2697 ;; find first set bit
2698 ;; r25:r24 = ffs16 (r25:r24)
2702 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2703 ;; Some cores have problem skipping 2-word instruction
2707 cpse r24, __zero_reg__
2708 #endif /* __AVR_HAVE_JMP_CALL__ */
2709 1: XJMP __loop_ffsqi2
2715 #endif /* defined (L_ffshi2) */
2717 #if defined (L_loop_ffsqi2)
2718 ;; Helper for ffshi2, ffssi2
2719 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2730 #endif /* defined (L_loop_ffsqi2) */
2733 /**********************************
2734 * Count trailing Zeros (ctz)
2735 **********************************/
2737 #if defined (L_ctzsi2)
2738 ;; count trailing zeros
2739 ;; r25:r24 = ctz32 (r25:r22)
2740 ;; clobbers: r26, r22
2742 ;; Note that ctz(0) in undefined for GCC
2748 #endif /* defined (L_ctzsi2) */
2750 #if defined (L_ctzhi2)
2751 ;; count trailing zeros
2752 ;; r25:r24 = ctz16 (r25:r24)
2755 ;; Note that ctz(0) in undefined for GCC
2761 #endif /* defined (L_ctzhi2) */
2764 /**********************************
2765 * Count leading Zeros (clz)
2766 **********************************/
2768 #if defined (L_clzdi2)
2769 ;; count leading zeros
2770 ;; r25:r24 = clz64 (r25:r18)
2771 ;; clobbers: r22, r23, r26
2784 #endif /* defined (L_clzdi2) */
2786 #if defined (L_clzsi2)
2787 ;; count leading zeros
2788 ;; r25:r24 = clz32 (r25:r22)
2800 #endif /* defined (L_clzsi2) */
2802 #if defined (L_clzhi2)
2803 ;; count leading zeros
2804 ;; r25:r24 = clz16 (r25:r24)
2826 #endif /* defined (L_clzhi2) */
2829 /**********************************
2831 **********************************/
2833 #if defined (L_paritydi2)
2834 ;; r25:r24 = parity64 (r25:r18)
2835 ;; clobbers: __tmp_reg__
2843 #endif /* defined (L_paritydi2) */
2845 #if defined (L_paritysi2)
2846 ;; r25:r24 = parity32 (r25:r22)
2847 ;; clobbers: __tmp_reg__
2853 #endif /* defined (L_paritysi2) */
2855 #if defined (L_parityhi2)
2856 ;; r25:r24 = parity16 (r25:r24)
2857 ;; clobbers: __tmp_reg__
2863 ;; r25:r24 = parity8 (r24)
2864 ;; clobbers: __tmp_reg__
2866 ;; parity is in r24[0..7]
2867 mov __tmp_reg__, r24
2869 eor r24, __tmp_reg__
2870 ;; parity is in r24[0..3]
2874 ;; parity is in r24[0,3]
2877 ;; parity is in r24[0]
2882 #endif /* defined (L_parityhi2) */
2885 /**********************************
2887 **********************************/
2889 #if defined (L_popcounthi2)
2891 ;; r25:r24 = popcount16 (r25:r24)
2892 ;; clobbers: __tmp_reg__
2902 DEFUN __popcounthi2_tail
2904 add r24, __tmp_reg__
2906 ENDF __popcounthi2_tail
2907 #endif /* defined (L_popcounthi2) */
2909 #if defined (L_popcountsi2)
2911 ;; r25:r24 = popcount32 (r25:r22)
2912 ;; clobbers: __tmp_reg__
2919 XJMP __popcounthi2_tail
2921 #endif /* defined (L_popcountsi2) */
2923 #if defined (L_popcountdi2)
2925 ;; r25:r24 = popcount64 (r25:r18)
2926 ;; clobbers: r22, r23, __tmp_reg__
2935 XJMP __popcounthi2_tail
2937 #endif /* defined (L_popcountdi2) */
2939 #if defined (L_popcountqi2)
2941 ;; r24 = popcount8 (r24)
2942 ;; clobbers: __tmp_reg__
2944 mov __tmp_reg__, r24
2948 adc r24, __zero_reg__
2950 adc r24, __zero_reg__
2952 adc r24, __zero_reg__
2954 adc r24, __zero_reg__
2956 adc r24, __zero_reg__
2958 adc r24, __tmp_reg__
2961 #endif /* defined (L_popcountqi2) */
2964 /**********************************
2966 **********************************/
2968 ;; swap two registers with different register number
2975 #if defined (L_bswapsi2)
2977 ;; r25:r22 = bswap32 (r25:r22)
2983 #endif /* defined (L_bswapsi2) */
2985 #if defined (L_bswapdi2)
2987 ;; r25:r18 = bswap64 (r25:r18)
2995 #endif /* defined (L_bswapdi2) */
2998 /**********************************
3000 **********************************/
3002 #if defined (L_ashrdi3)
3003 ;; Arithmetic shift right
3004 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3011 ;; Logic shift right
3012 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3015 sbc __tmp_reg__, __tmp_reg__
3027 mov r25, __tmp_reg__
3043 #endif /* defined (L_ashrdi3) */
3045 #if defined (L_ashldi3)
3047 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3075 #endif /* defined (L_ashldi3) */
3077 #if defined (L_rotldi3)
3079 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3085 mov __tmp_reg__, r25
3093 mov r18, __tmp_reg__
3103 adc r18, __zero_reg__
3109 #endif /* defined (L_rotldi3) */
3112 .section .text.libgcc.fmul, "ax", @progbits
3114 /***********************************************************/
3115 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3116 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3117 /***********************************************************/
3123 #define A0 __tmp_reg__
3126 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3127 ;;; Clobbers: r24, r25, __tmp_reg__
3129 ;; A0.7 = negate result?
3137 #endif /* L_fmuls */
3140 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3141 ;;; Clobbers: r24, r25, __tmp_reg__
3143 ;; A0.7 = negate result?
3148 ;; Helper for __fmuls and __fmulsu
3153 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3154 ;; Some cores have problem skipping 2-word instruction
3159 #endif /* __AVR_HAVE_JMP_CALL__ */
3162 ;; C = -C iff A0.7 = 1
3166 #endif /* L_fmulsu */
3170 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3171 ;;; Clobbers: r24, r25, __tmp_reg__
3178 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3199 #include "lib1funcs-fixed.S"