libgcc/config/avr/lib1funcs.S

   1 /*  -*- Mode: Asm -*-  */
   2 /* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
   3    Free Software Foundation, Inc.
   4    Contributed by Denis Chertykov <chertykov@gmail.com>
   5
   6 This file is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 This file is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 Under Section 7 of GPL version 3, you are granted additional
  17 permissions described in the GCC Runtime Library Exception, version
  18 3.1, as published by the Free Software Foundation.
  19
  20 You should have received a copy of the GNU General Public License and
  21 a copy of the GCC Runtime Library Exception along with this program;
  22 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23 <http://www.gnu.org/licenses/>.  */
  24
  25 #define __zero_reg__ r1
  26 #define __tmp_reg__ r0
  27 #define __SREG__ 0x3f
  28 #if defined (__AVR_HAVE_SPH__)
  29 #define __SP_H__ 0x3e
  30 #endif
  31 #define __SP_L__ 0x3d
  32 #define __RAMPZ__ 0x3B
  33 #define __EIND__  0x3C
  34
  35 /* Most of the functions here are called directly from avr.md
  36    patterns, instead of using the standard libcall mechanisms.
  37    This can make better code because GCC knows exactly which
  38    of the call-used registers (not all of them) are clobbered.  */
  39
  40 /* FIXME:  At present, there is no SORT directive in the linker
  41            script so that we must not assume that different modules
  42            in the same input section like .libgcc.text.mul will be
  43            located close together.  Therefore, we cannot use
  44            RCALL/RJMP to call a function like __udivmodhi4 from
  45            __divmodhi4 and have to use lengthy XCALL/XJMP even
  46            though they are in the same input section and all same
  47            input sections together are small enough to reach every
  48            location with a RCALL/RJMP instruction.  */
  49
  50         .macro  mov_l  r_dest, r_src
  51 #if defined (__AVR_HAVE_MOVW__)
  52         movw    \r_dest, \r_src
  53 #else
  54         mov     \r_dest, \r_src
  55 #endif
  56         .endm
  57
  58         .macro  mov_h  r_dest, r_src
  59 #if defined (__AVR_HAVE_MOVW__)
  60         ; empty
  61 #else
  62         mov     \r_dest, \r_src
  63 #endif
  64         .endm
  65
  66 .macro  wmov  r_dest, r_src
  67 #if defined (__AVR_HAVE_MOVW__)
  68     movw \r_dest,   \r_src
  69 #else
  70     mov \r_dest,    \r_src
  71     mov \r_dest+1,  \r_src+1
  72 #endif
  73 .endm
  74
  75 #if defined (__AVR_HAVE_JMP_CALL__)
  76 #define XCALL call
  77 #define XJMP  jmp
  78 #else
  79 #define XCALL rcall
  80 #define XJMP  rjmp
  81 #endif
  82
  83 .macro DEFUN name
  84 .global \name
  85 .func \name
  86 \name:
  87 .endm
  88
  89 .macro ENDF name
  90 .size \name, .-\name
  91 .endfunc
  92 .endm
  93
  94 .macro FALIAS name
  95 .global \name
  96 .func \name
  97 \name:
  98 .size \name, .-\name
  99 .endfunc
 100 .endm
 101
 102 ;; Negate a 2-byte value held in consecutive registers
 103 .macro NEG2  reg
 104     com     \reg+1
 105     neg     \reg
 106     sbci    \reg+1, -1
 107 .endm
 108
 109 ;; Negate a 4-byte value held in consecutive registers
 110 ;; Sets the V flag for signed overflow tests if REG >= 16
 111 .macro NEG4  reg
 112     com     \reg+3
 113     com     \reg+2
 114     com     \reg+1
 115 .if \reg >= 16
 116     neg     \reg
 117     sbci    \reg+1, -1
 118     sbci    \reg+2, -1
 119     sbci    \reg+3, -1
 120 .else
 121     com     \reg
 122     adc     \reg,   __zero_reg__
 123     adc     \reg+1, __zero_reg__
 124     adc     \reg+2, __zero_reg__
 125     adc     \reg+3, __zero_reg__
 126 .endif
 127 .endm
 128
 129 #define exp_lo(N)  hlo8 ((N) << 23)
 130 #define exp_hi(N)  hhi8 ((N) << 23)
 131
 132 \f
 133 .section .text.libgcc.mul, "ax", @progbits
 134
 135 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 136 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
 137 #if !defined (__AVR_HAVE_MUL__)
 138 /*******************************************************
 139     Multiplication  8 x 8  without MUL
 140 *******************************************************/
 141 #if defined (L_mulqi3)
 142
 143 #define r_arg2  r22             /* multiplicand */
 144 #define r_arg1  r24             /* multiplier */
 145 #define r_res   __tmp_reg__     /* result */
 146
 147 DEFUN __mulqi3
 148         clr     r_res           ; clear result
 149 __mulqi3_loop:
 150         sbrc    r_arg1,0
 151         add     r_res,r_arg2
 152         add     r_arg2,r_arg2   ; shift multiplicand
 153         breq    __mulqi3_exit   ; while multiplicand != 0
 154         lsr     r_arg1          ;
 155         brne    __mulqi3_loop   ; exit if multiplier = 0
 156 __mulqi3_exit:
 157         mov     r_arg1,r_res    ; result to return register
 158         ret
 159 ENDF __mulqi3
 160
 161 #undef r_arg2
 162 #undef r_arg1
 163 #undef r_res
 164
 165 #endif  /* defined (L_mulqi3) */
 166
 167
 168 /*******************************************************
 169     Widening Multiplication  16 = 8 x 8  without MUL
 170     Multiplication  16 x 16  without MUL
 171 *******************************************************/
 172
 173 #define A0  r22
 174 #define A1  r23
 175 #define B0  r24
 176 #define BB0 r20
 177 #define B1  r25
 178 ;; Output overlaps input, thus expand result in CC0/1
 179 #define C0  r24
 180 #define C1  r25
 181 #define CC0  __tmp_reg__
 182 #define CC1  R21
 183
 184 #if defined (L_umulqihi3)
 185 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
 186 ;;; (C1:C0) = (unsigned int) A0  * (unsigned int) B0
 187 ;;; Clobbers: __tmp_reg__, R21..R23
 188 DEFUN __umulqihi3
 189     clr     A1
 190     clr     B1
 191     XJMP    __mulhi3
 192 ENDF __umulqihi3
 193 #endif /* L_umulqihi3 */
 194
 195 #if defined (L_mulqihi3)
 196 ;;; R25:R24 = (signed int) R22 * (signed int) R24
 197 ;;; (C1:C0) = (signed int) A0  * (signed int) B0
 198 ;;; Clobbers: __tmp_reg__, R20..R23
 199 DEFUN __mulqihi3
 200     ;; Sign-extend B0
 201     clr     B1
 202     sbrc    B0, 7
 203     com     B1
 204     ;; The multiplication runs twice as fast if A1 is zero, thus:
 205     ;; Zero-extend A0
 206     clr     A1
 207 #ifdef __AVR_HAVE_JMP_CALL__
 208     ;; Store  B0 * sign of A
 209     clr     BB0
 210     sbrc    A0, 7
 211     mov     BB0, B0
 212     call    __mulhi3
 213 #else /* have no CALL */
 214     ;; Skip sign-extension of A if A >= 0
 215     ;; Same size as with the first alternative but avoids errata skip
 216     ;; and is faster if A >= 0
 217     sbrs    A0, 7
 218     rjmp    __mulhi3
 219     ;; If  A < 0  store B
 220     mov     BB0, B0
 221     rcall   __mulhi3
 222 #endif /* HAVE_JMP_CALL */
 223     ;; 1-extend A after the multiplication
 224     sub     C1, BB0
 225     ret
 226 ENDF __mulqihi3
 227 #endif /* L_mulqihi3 */
 228
 229 #if defined (L_mulhi3)
 230 ;;; R25:R24 = R23:R22 * R25:R24
 231 ;;; (C1:C0) = (A1:A0) * (B1:B0)
 232 ;;; Clobbers: __tmp_reg__, R21..R23
 233 DEFUN __mulhi3
 234
 235     ;; Clear result
 236     clr     CC0
 237     clr     CC1
 238     rjmp 3f
 239 1:
 240     ;; Bit n of A is 1  -->  C += B << n
 241     add     CC0, B0
 242     adc     CC1, B1
 243 2:
 244     lsl     B0
 245     rol     B1
 246 3:
 247     ;; If B == 0 we are ready
 248     sbiw    B0, 0
 249     breq 9f
 250
 251     ;; Carry = n-th bit of A
 252     lsr     A1
 253     ror     A0
 254     ;; If bit n of A is set, then go add  B * 2^n  to  C
 255     brcs 1b
 256
 257     ;; Carry = 0  -->  The ROR above acts like  CP A0, 0
 258     ;; Thus, it is sufficient to CPC the high part to test A against 0
 259     cpc     A1, __zero_reg__
 260     ;; Only proceed if A != 0
 261     brne    2b
 262 9:
 263     ;; Move Result into place
 264     mov     C0, CC0
 265     mov     C1, CC1
 266     ret
 267 ENDF  __mulhi3
 268 #endif /* L_mulhi3 */
 269
 270 #undef A0
 271 #undef A1
 272 #undef B0
 273 #undef BB0
 274 #undef B1
 275 #undef C0
 276 #undef C1
 277 #undef CC0
 278 #undef CC1
 279
 280 \f
 281 #define A0 22
 282 #define A1 A0+1
 283 #define A2 A0+2
 284 #define A3 A0+3
 285
 286 #define B0 18
 287 #define B1 B0+1
 288 #define B2 B0+2
 289 #define B3 B0+3
 290
 291 #define CC0 26
 292 #define CC1 CC0+1
 293 #define CC2 30
 294 #define CC3 CC2+1
 295
 296 #define C0 22
 297 #define C1 C0+1
 298 #define C2 C0+2
 299 #define C3 C0+3
 300
 301 /*******************************************************
 302     Widening Multiplication  32 = 16 x 16  without MUL
 303 *******************************************************/
 304
 305 #if defined (L_umulhisi3)
 306 DEFUN __umulhisi3
 307     wmov    B0, 24
 308     ;; Zero-extend B
 309     clr     B2
 310     clr     B3
 311     ;; Zero-extend A
 312     wmov    A2, B2
 313     XJMP    __mulsi3
 314 ENDF __umulhisi3
 315 #endif /* L_umulhisi3 */
 316
 317 #if defined (L_mulhisi3)
 318 DEFUN __mulhisi3
 319     wmov    B0, 24
 320     ;; Sign-extend B
 321     lsl     r25
 322     sbc     B2, B2
 323     mov     B3, B2
 324 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
 325     ;; Sign-extend A
 326     clr     A2
 327     sbrc    A1, 7
 328     com     A2
 329     mov     A3, A2
 330     XJMP __mulsi3
 331 #else /*  no __AVR_ERRATA_SKIP_JMP_CALL__ */
 332     ;; Zero-extend A and __mulsi3 will run at least twice as fast
 333     ;; compared to a sign-extended A.
 334     clr     A2
 335     clr     A3
 336     sbrs    A1, 7
 337     XJMP __mulsi3
 338     ;; If  A < 0  then perform the  B * 0xffff.... before the
 339     ;; very multiplication by initializing the high part of the
 340     ;; result CC with -B.
 341     wmov    CC2, A2
 342     sub     CC2, B0
 343     sbc     CC3, B1
 344     XJMP __mulsi3_helper
 345 #endif /*  __AVR_ERRATA_SKIP_JMP_CALL__ */
 346 ENDF __mulhisi3
 347 #endif /* L_mulhisi3 */
 348
 349
 350 /*******************************************************
 351     Multiplication  32 x 32  without MUL
 352 *******************************************************/
 353
 354 #if defined (L_mulsi3)
 355 DEFUN __mulsi3
 356     ;; Clear result
 357     clr     CC2
 358     clr     CC3
 359     ;; FALLTHRU
 360 ENDF  __mulsi3
 361
 362 DEFUN __mulsi3_helper
 363     clr     CC0
 364     clr     CC1
 365     rjmp 3f
 366
 367 1:  ;; If bit n of A is set, then add  B * 2^n  to the result in CC
 368     ;; CC += B
 369     add  CC0,B0  $  adc  CC1,B1  $  adc  CC2,B2  $  adc  CC3,B3
 370
 371 2:  ;; B <<= 1
 372     lsl  B0      $  rol  B1      $  rol  B2      $  rol  B3
 373
 374 3:  ;; A >>= 1:  Carry = n-th bit of A
 375     lsr  A3      $  ror  A2      $  ror  A1      $  ror  A0
 376
 377     brcs 1b
 378     ;; Only continue if  A != 0
 379     sbci    A1, 0
 380     brne 2b
 381     sbiw    A2, 0
 382     brne 2b
 383
 384     ;; All bits of A are consumed:  Copy result to return register C
 385     wmov    C0, CC0
 386     wmov    C2, CC2
 387     ret
 388 ENDF __mulsi3_helper
 389 #endif /* L_mulsi3 */
 390
 391 #undef A0
 392 #undef A1
 393 #undef A2
 394 #undef A3
 395 #undef B0
 396 #undef B1
 397 #undef B2
 398 #undef B3
 399 #undef C0
 400 #undef C1
 401 #undef C2
 402 #undef C3
 403 #undef CC0
 404 #undef CC1
 405 #undef CC2
 406 #undef CC3
 407
 408 #endif /* !defined (__AVR_HAVE_MUL__) */
 409 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 410 \f
 411 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 412 #if defined (__AVR_HAVE_MUL__)
 413 #define A0 26
 414 #define B0 18
 415 #define C0 22
 416
 417 #define A1 A0+1
 418
 419 #define B1 B0+1
 420 #define B2 B0+2
 421 #define B3 B0+3
 422
 423 #define C1 C0+1
 424 #define C2 C0+2
 425 #define C3 C0+3
 426
 427 /*******************************************************
 428     Widening Multiplication  32 = 16 x 16  with MUL
 429 *******************************************************/
 430
 431 #if defined (L_mulhisi3)
 432 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
 433 ;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
 434 ;;; Clobbers: __tmp_reg__
 435 DEFUN __mulhisi3
 436     XCALL   __umulhisi3
 437     ;; Sign-extend B
 438     tst     B1
 439     brpl    1f
 440     sub     C2, A0
 441     sbc     C3, A1
 442 1:  ;; Sign-extend A
 443     XJMP __usmulhisi3_tail
 444 ENDF __mulhisi3
 445 #endif /* L_mulhisi3 */
 446
 447 #if defined (L_usmulhisi3)
 448 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
 449 ;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
 450 ;;; Clobbers: __tmp_reg__
 451 DEFUN __usmulhisi3
 452     XCALL   __umulhisi3
 453     ;; FALLTHRU
 454 ENDF __usmulhisi3
 455
 456 DEFUN __usmulhisi3_tail
 457     ;; Sign-extend A
 458     sbrs    A1, 7
 459     ret
 460     sub     C2, B0
 461     sbc     C3, B1
 462     ret
 463 ENDF __usmulhisi3_tail
 464 #endif /* L_usmulhisi3 */
 465
 466 #if defined (L_umulhisi3)
 467 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
 468 ;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
 469 ;;; Clobbers: __tmp_reg__
 470 DEFUN __umulhisi3
 471     mul     A0, B0
 472     movw    C0, r0
 473     mul     A1, B1
 474     movw    C2, r0
 475     mul     A0, B1
 476 #ifdef __AVR_HAVE_JMP_CALL__
 477     ;; This function is used by many other routines, often multiple times.
 478     ;; Therefore, if the flash size is not too limited, avoid the RCALL
 479     ;; and inverst 6 Bytes to speed things up.
 480     add     C1, r0
 481     adc     C2, r1
 482     clr     __zero_reg__
 483     adc     C3, __zero_reg__
 484 #else
 485     rcall   1f
 486 #endif
 487     mul     A1, B0
 488 1:  add     C1, r0
 489     adc     C2, r1
 490     clr     __zero_reg__
 491     adc     C3, __zero_reg__
 492     ret
 493 ENDF __umulhisi3
 494 #endif /* L_umulhisi3 */
 495
 496 /*******************************************************
 497     Widening Multiplication  32 = 16 x 32  with MUL
 498 *******************************************************/
 499
 500 #if defined (L_mulshisi3)
 501 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
 502 ;;; (C3:C0) = (signed long) A1:A0   * B3:B0
 503 ;;; Clobbers: __tmp_reg__
 504 DEFUN __mulshisi3
 505 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
 506     ;; Some cores have problem skipping 2-word instruction
 507     tst     A1
 508     brmi    __mulohisi3
 509 #else
 510     sbrs    A1, 7
 511 #endif /* __AVR_HAVE_JMP_CALL__ */
 512     XJMP    __muluhisi3
 513     ;; FALLTHRU
 514 ENDF __mulshisi3
 515
 516 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
 517 ;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
 518 ;;; Clobbers: __tmp_reg__
 519 DEFUN __mulohisi3
 520     XCALL   __muluhisi3
 521     ;; One-extend R27:R26 (A1:A0)
 522     sub     C2, B0
 523     sbc     C3, B1
 524     ret
 525 ENDF __mulohisi3
 526 #endif /* L_mulshisi3 */
 527
 528 #if defined (L_muluhisi3)
 529 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
 530 ;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
 531 ;;; Clobbers: __tmp_reg__
 532 DEFUN __muluhisi3
 533     XCALL   __umulhisi3
 534     mul     A0, B3
 535     add     C3, r0
 536     mul     A1, B2
 537     add     C3, r0
 538     mul     A0, B2
 539     add     C2, r0
 540     adc     C3, r1
 541     clr     __zero_reg__
 542     ret
 543 ENDF __muluhisi3
 544 #endif /* L_muluhisi3 */
 545
 546 /*******************************************************
 547     Multiplication  32 x 32  with MUL
 548 *******************************************************/
 549
 550 #if defined (L_mulsi3)
 551 ;;; R25:R22 = R25:R22 * R21:R18
 552 ;;; (C3:C0) = C3:C0   * B3:B0
 553 ;;; Clobbers: R26, R27, __tmp_reg__
 554 DEFUN __mulsi3
 555     movw    A0, C0
 556     push    C2
 557     push    C3
 558     XCALL   __muluhisi3
 559     pop     A1
 560     pop     A0
 561     ;; A1:A0 now contains the high word of A
 562     mul     A0, B0
 563     add     C2, r0
 564     adc     C3, r1
 565     mul     A0, B1
 566     add     C3, r0
 567     mul     A1, B0
 568     add     C3, r0
 569     clr     __zero_reg__
 570     ret
 571 ENDF __mulsi3
 572 #endif /* L_mulsi3 */
 573
 574 #undef A0
 575 #undef A1
 576
 577 #undef B0
 578 #undef B1
 579 #undef B2
 580 #undef B3
 581
 582 #undef C0
 583 #undef C1
 584 #undef C2
 585 #undef C3
 586
 587 #endif /* __AVR_HAVE_MUL__ */
 588
 589 /*******************************************************
 590        Multiplication 24 x 24 with MUL
 591 *******************************************************/
 592
 593 #if defined (L_mulpsi3)
 594
 595 ;; A[0..2]: In: Multiplicand; Out: Product
 596 #define A0  22
 597 #define A1  A0+1
 598 #define A2  A0+2
 599
 600 ;; B[0..2]: In: Multiplier
 601 #define B0  18
 602 #define B1  B0+1
 603 #define B2  B0+2
 604
 605 #if defined (__AVR_HAVE_MUL__)
 606
 607 ;; C[0..2]: Expand Result
 608 #define C0  22
 609 #define C1  C0+1
 610 #define C2  C0+2
 611
 612 ;; R24:R22 *= R20:R18
 613 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
 614
 615 #define AA0 26
 616 #define AA2 21
 617
 618 DEFUN __mulpsi3
 619     wmov    AA0, A0
 620     mov     AA2, A2
 621     XCALL   __umulhisi3
 622     mul     AA2, B0     $  add  C2, r0
 623     mul     AA0, B2     $  add  C2, r0
 624     clr     __zero_reg__
 625     ret
 626 ENDF __mulpsi3
 627
 628 #undef AA2
 629 #undef AA0
 630
 631 #undef C2
 632 #undef C1
 633 #undef C0
 634
 635 #else /* !HAVE_MUL */
 636
 637 ;; C[0..2]: Expand Result
 638 #define C0  0
 639 #define C1  C0+1
 640 #define C2  21
 641
 642 ;; R24:R22 *= R20:R18
 643 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
 644
 645 DEFUN __mulpsi3
 646
 647     ;; C[] = 0
 648     clr     __tmp_reg__
 649     clr     C2
 650
 651 0:  ;; Shift N-th Bit of B[] into Carry.  N = 24 - Loop
 652     LSR  B2     $  ror  B1     $  ror  B0
 653
 654     ;; If the N-th Bit of B[] was set...
 655     brcc    1f
 656
 657     ;; ...then add A[] * 2^N to the Result C[]
 658     ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2
 659
 660 1:  ;; Multiply A[] by 2
 661     LSL  A0     $  rol  A1     $  rol  A2
 662
 663     ;; Loop until B[] is 0
 664     subi B0,0   $  sbci B1,0   $  sbci B2,0
 665     brne    0b
 666
 667     ;; Copy C[] to the return Register A[]
 668     wmov    A0, C0
 669     mov     A2, C2
 670
 671     clr     __zero_reg__
 672     ret
 673 ENDF __mulpsi3
 674
 675 #undef C2
 676 #undef C1
 677 #undef C0
 678
 679 #endif /* HAVE_MUL */
 680
 681 #undef B2
 682 #undef B1
 683 #undef B0
 684
 685 #undef A2
 686 #undef A1
 687 #undef A0
 688
 689 #endif /* L_mulpsi3 */
 690
 691 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
 692
 693 ;; A[0..2]: In: Multiplicand
 694 #define A0  22
 695 #define A1  A0+1
 696 #define A2  A0+2
 697
 698 ;; BB: In: Multiplier
 699 #define BB  25
 700
 701 ;; C[0..2]: Result
 702 #define C0  18
 703 #define C1  C0+1
 704 #define C2  C0+2
 705
 706 ;; C[] = A[] * sign_extend (BB)
 707 DEFUN __mulsqipsi3
 708     mul     A0, BB
 709     movw    C0, r0
 710     mul     A2, BB
 711     mov     C2, r0
 712     mul     A1, BB
 713     add     C1, r0
 714     adc     C2, r1
 715     clr     __zero_reg__
 716     sbrs    BB, 7
 717     ret
 718     ;; One-extend BB
 719     sub     C1, A0
 720     sbc     C2, A1
 721     ret
 722 ENDF __mulsqipsi3
 723
 724 #undef C2
 725 #undef C1
 726 #undef C0
 727
 728 #undef BB
 729
 730 #undef A2
 731 #undef A1
 732 #undef A0
 733
 734 #endif /* L_mulsqipsi3  &&  HAVE_MUL */
 735
 736 /*******************************************************
 737        Multiplication 64 x 64
 738 *******************************************************/
 739
 740 #if defined (L_muldi3)
 741
 742 ;; A[] = A[] * B[]
 743
 744 ;; A[0..7]: In: Multiplicand
 745 ;; Out: Product
 746 #define A0  18
 747 #define A1  A0+1
 748 #define A2  A0+2
 749 #define A3  A0+3
 750 #define A4  A0+4
 751 #define A5  A0+5
 752 #define A6  A0+6
 753 #define A7  A0+7
 754
 755 ;; B[0..7]: In: Multiplier
 756 #define B0  10
 757 #define B1  B0+1
 758 #define B2  B0+2
 759 #define B3  B0+3
 760 #define B4  B0+4
 761 #define B5  B0+5
 762 #define B6  B0+6
 763 #define B7  B0+7
 764
 765 #if defined (__AVR_HAVE_MUL__)
 766
 767 ;; Define C[] for convenience
 768 ;; Notice that parts of C[] overlap A[] respective B[]
 769 #define C0  16
 770 #define C1  C0+1
 771 #define C2  20
 772 #define C3  C2+1
 773 #define C4  28
 774 #define C5  C4+1
 775 #define C6  C4+2
 776 #define C7  C4+3
 777
 778 ;; A[]     *= B[]
 779 ;; R25:R18 *= R17:R10
 780 ;; Ordinary ABI-Function
 781
 782 DEFUN __muldi3
 783     push    r29
 784     push    r28
 785     push    r17
 786     push    r16
 787
 788     ;; Counting in Words, we have to perform a 4 * 4 Multiplication
 789
 790     ;; 3 * 0  +  0 * 3
 791     mul  A7,B0  $             $  mov C7,r0
 792     mul  A0,B7  $             $  add C7,r0
 793     mul  A6,B1  $             $  add C7,r0
 794     mul  A6,B0  $  mov C6,r0  $  add C7,r1
 795     mul  B6,A1  $             $  add C7,r0
 796     mul  B6,A0  $  add C6,r0  $  adc C7,r1
 797
 798     ;; 1 * 2
 799     mul  A2,B4  $  add C6,r0  $  adc C7,r1
 800     mul  A3,B4  $             $  add C7,r0
 801     mul  A2,B5  $             $  add C7,r0
 802
 803     push    A5
 804     push    A4
 805     push    B1
 806     push    B0
 807     push    A3
 808     push    A2
 809
 810     ;; 0 * 0
 811     wmov    26, B0
 812     XCALL   __umulhisi3
 813     wmov    C0, 22
 814     wmov    C2, 24
 815
 816     ;; 0 * 2
 817     wmov    26, B4
 818     XCALL   __umulhisi3  $  wmov C4,22            $ add C6,24 $ adc C7,25
 819
 820     wmov    26, B2
 821     ;; 0 * 1
 822     rcall   __muldi3_6
 823
 824     pop     A0
 825     pop     A1
 826     ;; 1 * 1
 827     wmov    26, B2
 828     XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
 829
 830     pop     r26
 831     pop     r27
 832     ;; 1 * 0
 833     rcall   __muldi3_6
 834
 835     pop     A0
 836     pop     A1
 837     ;; 2 * 0
 838     XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
 839
 840     ;; 2 * 1
 841     wmov    26, B2
 842     XCALL   __umulhisi3  $            $           $ add C6,22 $ adc C7,23
 843
 844     ;; A[] = C[]
 845     wmov    A0, C0
 846     ;; A2 = C2 already
 847     wmov    A4, C4
 848     wmov    A6, C6
 849
 850     clr     __zero_reg__
 851     pop     r16
 852     pop     r17
 853     pop     r28
 854     pop     r29
 855     ret
 856
 857 __muldi3_6:
 858     XCALL   __umulhisi3
 859     add     C2, 22
 860     adc     C3, 23
 861     adc     C4, 24
 862     adc     C5, 25
 863     brcc    0f
 864     adiw    C6, 1
 865 0:  ret
 866 ENDF __muldi3
 867
 868 #undef C7
 869 #undef C6
 870 #undef C5
 871 #undef C4
 872 #undef C3
 873 #undef C2
 874 #undef C1
 875 #undef C0
 876
 877 #else /* !HAVE_MUL */
 878
 879 #define C0  26
 880 #define C1  C0+1
 881 #define C2  C0+2
 882 #define C3  C0+3
 883 #define C4  C0+4
 884 #define C5  C0+5
 885 #define C6  0
 886 #define C7  C6+1
 887
 888 #define Loop 9
 889
 890 ;; A[]     *= B[]
 891 ;; R25:R18 *= R17:R10
 892 ;; Ordinary ABI-Function
 893
 894 DEFUN __muldi3
 895     push    r29
 896     push    r28
 897     push    Loop
 898
 899     ldi     C0, 64
 900     mov     Loop, C0
 901
 902     ;; C[] = 0
 903     clr     __tmp_reg__
 904     wmov    C0, 0
 905     wmov    C2, 0
 906     wmov    C4, 0
 907
 908 0:  ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
 909     ;; where N = 64 - Loop.
 910     ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
 911     ;; B[] will have its initial Value again.
 912     LSR  B7     $  ror  B6     $  ror  B5     $  ror  B4
 913     ror  B3     $  ror  B2     $  ror  B1     $  ror  B0
 914
 915     ;; If the N-th Bit of B[] was set then...
 916     brcc    1f
 917     ;; ...finish Rotation...
 918     ori     B7, 1 << 7
 919
 920     ;; ...and add A[] * 2^N to the Result C[]
 921     ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2  $  adc  C3,A3
 922     adc  C4,A4  $  adc  C5,A5  $  adc  C6,A6  $  adc  C7,A7
 923
 924 1:  ;; Multiply A[] by 2
 925     LSL  A0     $  rol  A1     $  rol  A2     $  rol  A3
 926     rol  A4     $  rol  A5     $  rol  A6     $  rol  A7
 927
 928     dec     Loop
 929     brne    0b
 930
 931     ;; We expanded the Result in C[]
 932     ;; Copy Result to the Return Register A[]
 933     wmov    A0, C0
 934     wmov    A2, C2
 935     wmov    A4, C4
 936     wmov    A6, C6
 937
 938     clr     __zero_reg__
 939     pop     Loop
 940     pop     r28
 941     pop     r29
 942     ret
 943 ENDF __muldi3
 944
 945 #undef Loop
 946
 947 #undef C7
 948 #undef C6
 949 #undef C5
 950 #undef C4
 951 #undef C3
 952 #undef C2
 953 #undef C1
 954 #undef C0
 955
 956 #endif /* HAVE_MUL */
 957
 958 #undef B7
 959 #undef B6
 960 #undef B5
 961 #undef B4
 962 #undef B3
 963 #undef B2
 964 #undef B1
 965 #undef B0
 966
 967 #undef A7
 968 #undef A6
 969 #undef A5
 970 #undef A4
 971 #undef A3
 972 #undef A2
 973 #undef A1
 974 #undef A0
 975
 976 #endif /* L_muldi3 */
 977
 978 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 979
 980 \f
 981 .section .text.libgcc.div, "ax", @progbits
 982
 983 /*******************************************************
 984        Division 8 / 8 => (result + remainder)
 985 *******************************************************/
 986 #define r_rem   r25     /* remainder */
 987 #define r_arg1  r24     /* dividend, quotient */
 988 #define r_arg2  r22     /* divisor */
 989 #define r_cnt   r23     /* loop count */
 990
 991 #if defined (L_udivmodqi4)
 992 DEFUN __udivmodqi4
 993         sub     r_rem,r_rem     ; clear remainder and carry
 994         ldi     r_cnt,9         ; init loop counter
 995         rjmp    __udivmodqi4_ep ; jump to entry point
 996 __udivmodqi4_loop:
 997         rol     r_rem           ; shift dividend into remainder
 998         cp      r_rem,r_arg2    ; compare remainder & divisor
 999         brcs    __udivmodqi4_ep ; remainder <= divisor
1000         sub     r_rem,r_arg2    ; restore remainder
1001 __udivmodqi4_ep:
1002         rol     r_arg1          ; shift dividend (with CARRY)
1003         dec     r_cnt           ; decrement loop counter
1004         brne    __udivmodqi4_loop
1005         com     r_arg1          ; complement result
1006                                 ; because C flag was complemented in loop
1007         ret
1008 ENDF __udivmodqi4
1009 #endif /* defined (L_udivmodqi4) */
1010
1011 #if defined (L_divmodqi4)
1012 DEFUN __divmodqi4
1013         bst     r_arg1,7        ; store sign of dividend
1014         mov     __tmp_reg__,r_arg1
1015         eor     __tmp_reg__,r_arg2; r0.7 is sign of result
1016         sbrc    r_arg1,7
1017         neg     r_arg1          ; dividend negative : negate
1018         sbrc    r_arg2,7
1019         neg     r_arg2          ; divisor negative : negate
1020         XCALL   __udivmodqi4    ; do the unsigned div/mod
1021         brtc    __divmodqi4_1
1022         neg     r_rem           ; correct remainder sign
1023 __divmodqi4_1:
1024         sbrc    __tmp_reg__,7
1025         neg     r_arg1          ; correct result sign
1026 __divmodqi4_exit:
1027         ret
1028 ENDF __divmodqi4
1029 #endif /* defined (L_divmodqi4) */
1030
1031 #undef r_rem
1032 #undef r_arg1
1033 #undef r_arg2
1034 #undef r_cnt
1035
1036
1037 /*******************************************************
1038        Division 16 / 16 => (result + remainder)
1039 *******************************************************/
1040 #define r_remL  r26     /* remainder Low */
1041 #define r_remH  r27     /* remainder High */
1042
1043 /* return: remainder */
1044 #define r_arg1L r24     /* dividend Low */
1045 #define r_arg1H r25     /* dividend High */
1046
1047 /* return: quotient */
1048 #define r_arg2L r22     /* divisor Low */
1049 #define r_arg2H r23     /* divisor High */
1050
1051 #define r_cnt   r21     /* loop count */
1052
1053 #if defined (L_udivmodhi4)
1054 DEFUN __udivmodhi4
1055         sub     r_remL,r_remL
1056         sub     r_remH,r_remH   ; clear remainder and carry
1057         ldi     r_cnt,17        ; init loop counter
1058         rjmp    __udivmodhi4_ep ; jump to entry point
1059 __udivmodhi4_loop:
1060         rol     r_remL          ; shift dividend into remainder
1061         rol     r_remH
1062         cp      r_remL,r_arg2L  ; compare remainder & divisor
1063         cpc     r_remH,r_arg2H
1064         brcs    __udivmodhi4_ep ; remainder < divisor
1065         sub     r_remL,r_arg2L  ; restore remainder
1066         sbc     r_remH,r_arg2H
1067 __udivmodhi4_ep:
1068         rol     r_arg1L         ; shift dividend (with CARRY)
1069         rol     r_arg1H
1070         dec     r_cnt           ; decrement loop counter
1071         brne    __udivmodhi4_loop
1072         com     r_arg1L
1073         com     r_arg1H
1074 ; div/mod results to return registers, as for the div() function
1075         mov_l   r_arg2L, r_arg1L        ; quotient
1076         mov_h   r_arg2H, r_arg1H
1077         mov_l   r_arg1L, r_remL         ; remainder
1078         mov_h   r_arg1H, r_remH
1079         ret
1080 ENDF __udivmodhi4
1081 #endif /* defined (L_udivmodhi4) */
1082
1083 #if defined (L_divmodhi4)
1084 DEFUN __divmodhi4
1085     .global _div
1086 _div:
1087     bst     r_arg1H,7           ; store sign of dividend
1088     mov     __tmp_reg__,r_arg2H
1089     brtc    0f
1090     com     __tmp_reg__         ; r0.7 is sign of result
1091     rcall   __divmodhi4_neg1    ; dividend negative: negate
1092 0:
1093     sbrc    r_arg2H,7
1094     rcall   __divmodhi4_neg2    ; divisor negative: negate
1095     XCALL   __udivmodhi4        ; do the unsigned div/mod
1096     sbrc    __tmp_reg__,7
1097     rcall   __divmodhi4_neg2    ; correct remainder sign
1098     brtc    __divmodhi4_exit
1099 __divmodhi4_neg1:
1100     ;; correct dividend/remainder sign
1101     com     r_arg1H
1102     neg     r_arg1L
1103     sbci    r_arg1H,0xff
1104     ret
1105 __divmodhi4_neg2:
1106     ;; correct divisor/result sign
1107     com     r_arg2H
1108     neg     r_arg2L
1109     sbci    r_arg2H,0xff
1110 __divmodhi4_exit:
1111     ret
1112 ENDF __divmodhi4
1113 #endif /* defined (L_divmodhi4) */
1114
1115 #undef r_remH
1116 #undef r_remL
1117
1118 #undef r_arg1H
1119 #undef r_arg1L
1120
1121 #undef r_arg2H
1122 #undef r_arg2L
1123
1124 #undef r_cnt
1125
1126 /*******************************************************
1127        Division 24 / 24 => (result + remainder)
1128 *******************************************************/
1129
1130 ;; A[0..2]: In: Dividend; Out: Quotient
1131 #define A0  22
1132 #define A1  A0+1
1133 #define A2  A0+2
1134
1135 ;; B[0..2]: In: Divisor;   Out: Remainder
1136 #define B0  18
1137 #define B1  B0+1
1138 #define B2  B0+2
1139
1140 ;; C[0..2]: Expand remainder
1141 #define C0  __zero_reg__
1142 #define C1  26
1143 #define C2  25
1144
1145 ;; Loop counter
1146 #define r_cnt   21
1147
1148 #if defined (L_udivmodpsi4)
1149 ;; R24:R22 = R24:R22  udiv  R20:R18
1150 ;; R20:R18 = R24:R22  umod  R20:R18
1151 ;; Clobbers: R21, R25, R26
1152
1153 DEFUN __udivmodpsi4
1154     ; init loop counter
1155     ldi     r_cnt, 24+1
1156     ; Clear remainder and carry.  C0 is already 0
1157     clr     C1
1158     sub     C2, C2
1159     ; jump to entry point
1160     rjmp    __udivmodpsi4_start
1161 __udivmodpsi4_loop:
1162     ; shift dividend into remainder
1163     rol     C0
1164     rol     C1
1165     rol     C2
1166     ; compare remainder & divisor
1167     cp      C0, B0
1168     cpc     C1, B1
1169     cpc     C2, B2
1170     brcs    __udivmodpsi4_start ; remainder <= divisor
1171     sub     C0, B0              ; restore remainder
1172     sbc     C1, B1
1173     sbc     C2, B2
1174 __udivmodpsi4_start:
1175     ; shift dividend (with CARRY)
1176     rol     A0
1177     rol     A1
1178     rol     A2
1179     ; decrement loop counter
1180     dec     r_cnt
1181     brne    __udivmodpsi4_loop
1182     com     A0
1183     com     A1
1184     com     A2
1185     ; div/mod results to return registers
1186     ; remainder
1187     mov     B0, C0
1188     mov     B1, C1
1189     mov     B2, C2
1190     clr     __zero_reg__ ; C0
1191     ret
1192 ENDF __udivmodpsi4
1193 #endif /* defined (L_udivmodpsi4) */
1194
1195 #if defined (L_divmodpsi4)
1196 ;; R24:R22 = R24:R22  div  R20:R18
1197 ;; R20:R18 = R24:R22  mod  R20:R18
1198 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1199
1200 DEFUN __divmodpsi4
1201     ; R0.7 will contain the sign of the result:
1202     ; R0.7 = A.sign ^ B.sign
1203     mov __tmp_reg__, B2
1204     ; T-flag = sign of dividend
1205     bst     A2, 7
1206     brtc    0f
1207     com     __tmp_reg__
1208     ; Adjust dividend's sign
1209     rcall   __divmodpsi4_negA
1210 0:
1211     ; Adjust divisor's sign
1212     sbrc    B2, 7
1213     rcall   __divmodpsi4_negB
1214
1215     ; Do the unsigned div/mod
1216     XCALL   __udivmodpsi4
1217
1218     ; Adjust quotient's sign
1219     sbrc    __tmp_reg__, 7
1220     rcall   __divmodpsi4_negA
1221
1222     ; Adjust remainder's sign
1223     brtc    __divmodpsi4_end
1224
1225 __divmodpsi4_negB:
1226     ; Correct divisor/remainder sign
1227     com     B2
1228     com     B1
1229     neg     B0
1230     sbci    B1, -1
1231     sbci    B2, -1
1232     ret
1233
1234     ; Correct dividend/quotient sign
1235 __divmodpsi4_negA:
1236     com     A2
1237     com     A1
1238     neg     A0
1239     sbci    A1, -1
1240     sbci    A2, -1
1241 __divmodpsi4_end:
1242     ret
1243
1244 ENDF __divmodpsi4
1245 #endif /* defined (L_divmodpsi4) */
1246
1247 #undef A0
1248 #undef A1
1249 #undef A2
1250
1251 #undef B0
1252 #undef B1
1253 #undef B2
1254
1255 #undef C0
1256 #undef C1
1257 #undef C2
1258
1259 #undef r_cnt
1260
1261 /*******************************************************
1262        Division 32 / 32 => (result + remainder)
1263 *******************************************************/
1264 #define r_remHH r31     /* remainder High */
1265 #define r_remHL r30
1266 #define r_remH  r27
1267 #define r_remL  r26     /* remainder Low */
1268
1269 /* return: remainder */
1270 #define r_arg1HH r25    /* dividend High */
1271 #define r_arg1HL r24
1272 #define r_arg1H  r23
1273 #define r_arg1L  r22    /* dividend Low */
1274
1275 /* return: quotient */
1276 #define r_arg2HH r21    /* divisor High */
1277 #define r_arg2HL r20
1278 #define r_arg2H  r19
1279 #define r_arg2L  r18    /* divisor Low */
1280
1281 #define r_cnt __zero_reg__  /* loop count (0 after the loop!) */
1282
1283 #if defined (L_udivmodsi4)
1284 DEFUN __udivmodsi4
1285         ldi     r_remL, 33      ; init loop counter
1286         mov     r_cnt, r_remL
1287         sub     r_remL,r_remL
1288         sub     r_remH,r_remH   ; clear remainder and carry
1289         mov_l   r_remHL, r_remL
1290         mov_h   r_remHH, r_remH
1291         rjmp    __udivmodsi4_ep ; jump to entry point
1292 __udivmodsi4_loop:
1293         rol     r_remL          ; shift dividend into remainder
1294         rol     r_remH
1295         rol     r_remHL
1296         rol     r_remHH
1297         cp      r_remL,r_arg2L  ; compare remainder & divisor
1298         cpc     r_remH,r_arg2H
1299         cpc     r_remHL,r_arg2HL
1300         cpc     r_remHH,r_arg2HH
1301         brcs    __udivmodsi4_ep ; remainder <= divisor
1302         sub     r_remL,r_arg2L  ; restore remainder
1303         sbc     r_remH,r_arg2H
1304         sbc     r_remHL,r_arg2HL
1305         sbc     r_remHH,r_arg2HH
1306 __udivmodsi4_ep:
1307         rol     r_arg1L         ; shift dividend (with CARRY)
1308         rol     r_arg1H
1309         rol     r_arg1HL
1310         rol     r_arg1HH
1311         dec     r_cnt           ; decrement loop counter
1312         brne    __udivmodsi4_loop
1313                                 ; __zero_reg__ now restored (r_cnt == 0)
1314         com     r_arg1L
1315         com     r_arg1H
1316         com     r_arg1HL
1317         com     r_arg1HH
1318 ; div/mod results to return registers, as for the ldiv() function
1319         mov_l   r_arg2L,  r_arg1L       ; quotient
1320         mov_h   r_arg2H,  r_arg1H
1321         mov_l   r_arg2HL, r_arg1HL
1322         mov_h   r_arg2HH, r_arg1HH
1323         mov_l   r_arg1L,  r_remL        ; remainder
1324         mov_h   r_arg1H,  r_remH
1325         mov_l   r_arg1HL, r_remHL
1326         mov_h   r_arg1HH, r_remHH
1327         ret
1328 ENDF __udivmodsi4
1329 #endif /* defined (L_udivmodsi4) */
1330
1331 #if defined (L_divmodsi4)
1332 DEFUN __divmodsi4
1333     mov     __tmp_reg__,r_arg2HH
1334     bst     r_arg1HH,7          ; store sign of dividend
1335     brtc    0f
1336     com     __tmp_reg__         ; r0.7 is sign of result
1337     XCALL   __negsi2            ; dividend negative: negate
1338 0:
1339     sbrc    r_arg2HH,7
1340     rcall   __divmodsi4_neg2    ; divisor negative: negate
1341     XCALL   __udivmodsi4        ; do the unsigned div/mod
1342     sbrc    __tmp_reg__, 7      ; correct quotient sign
1343     rcall   __divmodsi4_neg2
1344     brtc    __divmodsi4_exit    ; correct remainder sign
1345     XJMP    __negsi2
1346 __divmodsi4_neg2:
1347     ;; correct divisor/quotient sign
1348     com     r_arg2HH
1349     com     r_arg2HL
1350     com     r_arg2H
1351     neg     r_arg2L
1352     sbci    r_arg2H,0xff
1353     sbci    r_arg2HL,0xff
1354     sbci    r_arg2HH,0xff
1355 __divmodsi4_exit:
1356     ret
1357 ENDF __divmodsi4
1358 #endif /* defined (L_divmodsi4) */
1359
1360 #if defined (L_negsi2)
1361 ;; (set (reg:SI 22)
1362 ;;      (neg:SI (reg:SI 22)))
1363 ;; Sets the V flag for signed overflow tests
1364 DEFUN __negsi2
1365     NEG4    22
1366     ret
1367 ENDF __negsi2
1368 #endif /* L_negsi2 */
1369
1370 #undef r_remHH
1371 #undef r_remHL
1372 #undef r_remH
1373 #undef r_remL
1374 #undef r_arg1HH
1375 #undef r_arg1HL
1376 #undef r_arg1H
1377 #undef r_arg1L
1378 #undef r_arg2HH
1379 #undef r_arg2HL
1380 #undef r_arg2H
1381 #undef r_arg2L
1382 #undef r_cnt
1383
1384 /*******************************************************
1385        Division 64 / 64
1386        Modulo   64 % 64
1387 *******************************************************/
1388
1389 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1390 ;; at least 16k of Program Memory.  For smaller Devices, depend
1391 ;; on MOVW and SP Size.  There is a Connexion between SP Size and
1392 ;; Flash Size so that SP Size can be used to test for Flash Size.
1393
1394 #if defined (__AVR_HAVE_JMP_CALL__)
1395 #   define SPEED_DIV 8
1396 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1397 #   define SPEED_DIV 16
1398 #else
1399 #   define SPEED_DIV 0
1400 #endif
1401
1402 ;; A[0..7]: In: Dividend;
1403 ;; Out: Quotient  (T = 0)
1404 ;; Out: Remainder (T = 1)
1405 #define A0  18
1406 #define A1  A0+1
1407 #define A2  A0+2
1408 #define A3  A0+3
1409 #define A4  A0+4
1410 #define A5  A0+5
1411 #define A6  A0+6
1412 #define A7  A0+7
1413
1414 ;; B[0..7]: In: Divisor;   Out: Clobber
1415 #define B0  10
1416 #define B1  B0+1
1417 #define B2  B0+2
1418 #define B3  B0+3
1419 #define B4  B0+4
1420 #define B5  B0+5
1421 #define B6  B0+6
1422 #define B7  B0+7
1423
1424 ;; C[0..7]: Expand remainder;  Out: Remainder (unused)
1425 #define C0  8
1426 #define C1  C0+1
1427 #define C2  30
1428 #define C3  C2+1
1429 #define C4  28
1430 #define C5  C4+1
1431 #define C6  26
1432 #define C7  C6+1
1433
1434 ;; Holds Signs during Division Routine
1435 #define SS      __tmp_reg__
1436
1437 ;; Bit-Counter in Division Routine
1438 #define R_cnt   __zero_reg__
1439
1440 ;; Scratch Register for Negation
1441 #define NN      r31
1442
1443 #if defined (L_udivdi3)
1444
1445 ;; R25:R18 = R24:R18  umod  R17:R10
1446 ;; Ordinary ABI-Function
1447
1448 DEFUN __umoddi3
1449     set
1450     rjmp __udivdi3_umoddi3
1451 ENDF __umoddi3
1452
1453 ;; R25:R18 = R24:R18  udiv  R17:R10
1454 ;; Ordinary ABI-Function
1455
1456 DEFUN __udivdi3
1457     clt
1458 ENDF __udivdi3
1459
1460 DEFUN __udivdi3_umoddi3
1461     push    C0
1462     push    C1
1463     push    C4
1464     push    C5
1465     XCALL   __udivmod64
1466     pop     C5
1467     pop     C4
1468     pop     C1
1469     pop     C0
1470     ret
1471 ENDF __udivdi3_umoddi3
1472 #endif /* L_udivdi3 */
1473
1474 #if defined (L_udivmod64)
1475
1476 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1477 ;; No Registers saved/restored; the Callers will take Care.
1478 ;; Preserves B[] and T-flag
1479 ;; T = 0: Compute Quotient  in A[]
1480 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1481
1482 DEFUN __udivmod64
1483
1484     ;; Clear Remainder (C6, C7 will follow)
1485     clr     C0
1486     clr     C1
1487     wmov    C2, C0
1488     wmov    C4, C0
1489     ldi     C7, 64
1490
1491 #if SPEED_DIV == 0 || SPEED_DIV == 16
1492     ;; Initialize Loop-Counter
1493     mov     R_cnt, C7
1494     wmov    C6, C0
1495 #endif /* SPEED_DIV */
1496
1497 #if SPEED_DIV == 8
1498
1499     push    A7
1500     clr     C6
1501
1502 1:  ;; Compare shifted Devidend against Divisor
1503     ;; If -- even after Shifting -- it is smaller...
1504     CP  A7,B0  $  cpc C0,B1  $  cpc C1,B2  $  cpc C2,B3
1505     cpc C3,B4  $  cpc C4,B5  $  cpc C5,B6  $  cpc C6,B7
1506     brcc    2f
1507
1508     ;; ...then we can subtract it.  Thus, it is legal to shift left
1509                $  mov C6,C5  $  mov C5,C4  $  mov C4,C3
1510     mov C3,C2  $  mov C2,C1  $  mov C1,C0  $  mov C0,A7
1511     mov A7,A6  $  mov A6,A5  $  mov A5,A4  $  mov A4,A3
1512     mov A3,A2  $  mov A2,A1  $  mov A1,A0  $  clr A0
1513
1514     ;; 8 Bits are done
1515     subi    C7, 8
1516     brne    1b
1517
1518     ;; Shifted 64 Bits:  A7 has traveled to C7
1519     pop     C7
1520     ;; Divisor is greater than Dividend. We have:
1521     ;; A[] % B[] = A[]
1522     ;; A[] / B[] = 0
1523     ;; Thus, we can return immediately
1524     rjmp    5f
1525
1526 2:  ;; Initialze Bit-Counter with Number of Bits still to be performed
1527     mov     R_cnt, C7
1528
1529     ;; Push of A7 is not needed because C7 is still 0
1530     pop     C7
1531     clr     C7
1532
1533 #elif  SPEED_DIV == 16
1534
1535     ;; Compare shifted Dividend against Divisor
1536     cp      A7, B3
1537     cpc     C0, B4
1538     cpc     C1, B5
1539     cpc     C2, B6
1540     cpc     C3, B7
1541     brcc    2f
1542
1543     ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1544     ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1545     wmov  C2,A6  $  wmov C0,A4
1546     wmov  A6,A2  $  wmov A4,A0
1547     wmov  A2,C6  $  wmov A0,C4
1548
1549     ;; Set Bit Counter to 32
1550     lsr     R_cnt
1551 2:
1552 #elif SPEED_DIV
1553 #error SPEED_DIV = ?
1554 #endif /* SPEED_DIV */
1555
1556 ;; The very Division + Remainder Routine
1557
1558 3:  ;; Left-shift Dividend...
1559     lsl A0     $  rol A1     $  rol A2     $  rol A3
1560     rol A4     $  rol A5     $  rol A6     $  rol A7
1561
1562     ;; ...into Remainder
1563     rol C0     $  rol C1     $  rol C2     $  rol C3
1564     rol C4     $  rol C5     $  rol C6     $  rol C7
1565
1566     ;; Compare Remainder and Divisor
1567     CP  C0,B0  $  cpc C1,B1  $  cpc C2,B2  $  cpc C3,B3
1568     cpc C4,B4  $  cpc C5,B5  $  cpc C6,B6  $  cpc C7,B7
1569
1570     brcs 4f
1571
1572     ;; Divisor fits into Remainder:  Subtract it from Remainder...
1573     SUB C0,B0  $  sbc C1,B1  $  sbc C2,B2  $  sbc C3,B3
1574     sbc C4,B4  $  sbc C5,B5  $  sbc C6,B6  $  sbc C7,B7
1575
1576     ;; ...and set according Bit in the upcoming Quotient
1577     ;; The Bit will travel to its final Position
1578     ori A0, 1
1579
1580 4:  ;; This Bit is done
1581     dec     R_cnt
1582     brne    3b
1583     ;; __zero_reg__ is 0 again
1584
1585     ;; T = 0: We are fine with the Quotient in A[]
1586     ;; T = 1: Copy Remainder to A[]
1587 5:  brtc    6f
1588     wmov    A0, C0
1589     wmov    A2, C2
1590     wmov    A4, C4
1591     wmov    A6, C6
1592     ;; Move the Sign of the Result to SS.7
1593     lsl     SS
1594
1595 6:  ret
1596
1597 ENDF __udivmod64
1598 #endif /* L_udivmod64 */
1599
1600
1601 #if defined (L_divdi3)
1602
1603 ;; R25:R18 = R24:R18  mod  R17:R10
1604 ;; Ordinary ABI-Function
1605
1606 DEFUN __moddi3
1607     set
1608     rjmp    __divdi3_moddi3
1609 ENDF __moddi3
1610
1611 ;; R25:R18 = R24:R18  div  R17:R10
1612 ;; Ordinary ABI-Function
1613
1614 DEFUN __divdi3
1615     clt
1616 ENDF __divdi3
1617
1618 DEFUN  __divdi3_moddi3
1619 #if SPEED_DIV
1620     mov     r31, A7
1621     or      r31, B7
1622     brmi    0f
1623     ;; Both Signs are 0:  the following Complexitiy is not needed
1624     XJMP    __udivdi3_umoddi3
1625 #endif /* SPEED_DIV */
1626
1627 0:  ;; The Prologue
1628     ;; Save 12 Registers:  Y, 17...8
1629     ;; No Frame needed (X = 0)
1630     clr r26
1631     clr r27
1632     ldi r30, lo8(gs(1f))
1633     ldi r31, hi8(gs(1f))
1634     XJMP __prologue_saves__ + ((18 - 12) * 2)
1635
1636 1:  ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
1637     ;; SS.6 will contain the Sign of the Remainder (A.sign)
1638     mov     SS, A7
1639     asr     SS
1640     ;; Adjust Dividend's Sign as needed
1641 #if SPEED_DIV
1642     ;; Compiling for Speed we know that at least one Sign must be < 0
1643     ;; Thus, if A[] >= 0 then we know B[] < 0
1644     brpl    22f
1645 #else
1646     brpl    21f
1647 #endif /* SPEED_DIV */
1648
1649     XCALL   __negdi2
1650
1651     ;; Adjust Divisor's Sign and SS.7 as needed
1652 21: tst     B7
1653     brpl    3f
1654 22: ldi     NN, 1 << 7
1655     eor     SS, NN
1656
1657     ldi NN, -1
1658     com B4     $  com B5     $  com B6     $  com B7
1659                $  com B1     $  com B2     $  com B3
1660     NEG B0
1661                $  sbc B1,NN  $  sbc B2,NN  $  sbc B3,NN
1662     sbc B4,NN  $  sbc B5,NN  $  sbc B6,NN  $  sbc B7,NN
1663
1664 3:  ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1665     XCALL   __udivmod64
1666
1667     ;; Adjust Result's Sign
1668 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1669     tst     SS
1670     brpl    4f
1671 #else
1672     sbrc    SS, 7
1673 #endif /* __AVR_HAVE_JMP_CALL__ */
1674     XCALL   __negdi2
1675
1676 4:  ;; Epilogue: Restore the Z = 12 Registers and return
1677     in r28, __SP_L__
1678 #if defined (__AVR_HAVE_SPH__)
1679     in r29, __SP_H__
1680 #else
1681     clr r29
1682 #endif /* #SP = 8/16 */
1683     ldi r30, 12
1684     XJMP __epilogue_restores__ + ((18 - 12) * 2)
1685
1686 ENDF __divdi3_moddi3
1687
1688 #undef R_cnt
1689 #undef SS
1690 #undef NN
1691
1692 #endif /* L_divdi3 */
1693
1694 .section .text.libgcc, "ax", @progbits
1695
1696 #define TT __tmp_reg__
1697
1698 #if defined (L_adddi3)
1699 ;; (set (reg:DI 18)
1700 ;;      (plus:DI (reg:DI 18)
1701 ;;               (reg:DI 10)))
1702 ;; Sets the V flag for signed overflow tests
1703 ;; Sets the C flag for unsigned overflow tests
1704 DEFUN __adddi3
1705     ADD A0,B0  $  adc A1,B1  $  adc A2,B2  $  adc A3,B3
1706     adc A4,B4  $  adc A5,B5  $  adc A6,B6  $  adc A7,B7
1707     ret
1708 ENDF __adddi3
1709 #endif /* L_adddi3 */
1710
1711 #if defined (L_adddi3_s8)
1712 ;; (set (reg:DI 18)
1713 ;;      (plus:DI (reg:DI 18)
1714 ;;               (sign_extend:SI (reg:QI 26))))
1715 ;; Sets the V flag for signed overflow tests
1716 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
1717 DEFUN __adddi3_s8
1718     clr     TT
1719     sbrc    r26, 7
1720     com     TT
1721     ADD A0,r26 $  adc A1,TT  $  adc A2,TT  $  adc A3,TT
1722     adc A4,TT  $  adc A5,TT  $  adc A6,TT  $  adc A7,TT
1723     ret
1724 ENDF __adddi3_s8
1725 #endif /* L_adddi3_s8 */
1726
1727 #if defined (L_subdi3)
1728 ;; (set (reg:DI 18)
1729 ;;      (minus:DI (reg:DI 18)
1730 ;;                (reg:DI 10)))
1731 ;; Sets the V flag for signed overflow tests
1732 ;; Sets the C flag for unsigned overflow tests
1733 DEFUN __subdi3
1734     SUB A0,B0  $  sbc A1,B1  $  sbc A2,B2  $  sbc A3,B3
1735     sbc A4,B4  $  sbc A5,B5  $  sbc A6,B6  $  sbc A7,B7
1736     ret
1737 ENDF __subdi3
1738 #endif /* L_subdi3 */
1739
1740 #if defined (L_cmpdi2)
1741 ;; (set (cc0)
1742 ;;      (compare (reg:DI 18)
1743 ;;               (reg:DI 10)))
1744 DEFUN __cmpdi2
1745     CP  A0,B0  $  cpc A1,B1  $  cpc A2,B2  $  cpc A3,B3
1746     cpc A4,B4  $  cpc A5,B5  $  cpc A6,B6  $  cpc A7,B7
1747     ret
1748 ENDF __cmpdi2
1749 #endif /* L_cmpdi2 */
1750
1751 #if defined (L_cmpdi2_s8)
1752 ;; (set (cc0)
1753 ;;      (compare (reg:DI 18)
1754 ;;               (sign_extend:SI (reg:QI 26))))
1755 DEFUN __cmpdi2_s8
1756     clr     TT
1757     sbrc    r26, 7
1758     com     TT
1759     CP  A0,r26 $  cpc A1,TT  $  cpc A2,TT  $  cpc A3,TT
1760     cpc A4,TT  $  cpc A5,TT  $  cpc A6,TT  $  cpc A7,TT
1761     ret
1762 ENDF __cmpdi2_s8
1763 #endif /* L_cmpdi2_s8 */
1764
1765 #if defined (L_negdi2)
1766 ;; (set (reg:DI 18)
1767 ;;      (neg:DI (reg:DI 18)))
1768 ;; Sets the V flag for signed overflow tests
1769 DEFUN __negdi2
1770
1771     com  A4    $  com  A5    $  com  A6    $  com  A7
1772                $  com  A1    $  com  A2    $  com  A3
1773     NEG  A0
1774                $  sbci A1,-1 $  sbci A2,-1 $  sbci A3,-1
1775     sbci A4,-1 $  sbci A5,-1 $  sbci A6,-1 $  sbci A7,-1
1776     ret
1777
1778 ENDF __negdi2
1779 #endif /* L_negdi2 */
1780
1781 #undef TT
1782
1783 #undef C7
1784 #undef C6
1785 #undef C5
1786 #undef C4
1787 #undef C3
1788 #undef C2
1789 #undef C1
1790 #undef C0
1791
1792 #undef B7
1793 #undef B6
1794 #undef B5
1795 #undef B4
1796 #undef B3
1797 #undef B2
1798 #undef B1
1799 #undef B0
1800
1801 #undef A7
1802 #undef A6
1803 #undef A5
1804 #undef A4
1805 #undef A3
1806 #undef A2
1807 #undef A1
1808 #undef A0
1809
1810 \f
1811 .section .text.libgcc.prologue, "ax", @progbits
1812
1813 /**********************************
1814  * This is a prologue subroutine
1815  **********************************/
1816 #if defined (L_prologue)
1817
1818 ;; This function does not clobber T-flag; 64-bit division relies on it
1819 DEFUN __prologue_saves__
1820         push r2
1821         push r3
1822         push r4
1823         push r5
1824         push r6
1825         push r7
1826         push r8
1827         push r9
1828         push r10
1829         push r11
1830         push r12
1831         push r13
1832         push r14
1833         push r15
1834         push r16
1835         push r17
1836         push r28
1837         push r29
1838 #if !defined (__AVR_HAVE_SPH__)
1839         in      r28,__SP_L__
1840         sub     r28,r26
1841         out     __SP_L__,r28
1842         clr     r29
1843 #elif defined (__AVR_XMEGA__)
1844         in      r28,__SP_L__
1845         in      r29,__SP_H__
1846         sub     r28,r26
1847         sbc     r29,r27
1848         out     __SP_L__,r28
1849         out     __SP_H__,r29
1850 #else
1851         in      r28,__SP_L__
1852         in      r29,__SP_H__
1853         sub     r28,r26
1854         sbc     r29,r27
1855         in      __tmp_reg__,__SREG__
1856         cli
1857         out     __SP_H__,r29
1858         out     __SREG__,__tmp_reg__
1859         out     __SP_L__,r28
1860 #endif /* #SP = 8/16 */
1861
1862 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1863         eijmp
1864 #else
1865         ijmp
1866 #endif
1867
1868 ENDF __prologue_saves__
1869 #endif /* defined (L_prologue) */
1870
1871 /*
1872  * This is an epilogue subroutine
1873  */
1874 #if defined (L_epilogue)
1875
1876 DEFUN __epilogue_restores__
1877         ldd     r2,Y+18
1878         ldd     r3,Y+17
1879         ldd     r4,Y+16
1880         ldd     r5,Y+15
1881         ldd     r6,Y+14
1882         ldd     r7,Y+13
1883         ldd     r8,Y+12
1884         ldd     r9,Y+11
1885         ldd     r10,Y+10
1886         ldd     r11,Y+9
1887         ldd     r12,Y+8
1888         ldd     r13,Y+7
1889         ldd     r14,Y+6
1890         ldd     r15,Y+5
1891         ldd     r16,Y+4
1892         ldd     r17,Y+3
1893         ldd     r26,Y+2
1894 #if !defined (__AVR_HAVE_SPH__)
1895         ldd     r29,Y+1
1896         add     r28,r30
1897         out     __SP_L__,r28
1898         mov     r28, r26
1899 #elif defined (__AVR_XMEGA__)
1900         ldd  r27,Y+1
1901         add  r28,r30
1902         adc  r29,__zero_reg__
1903         out  __SP_L__,r28
1904         out  __SP_H__,r29
1905         wmov 28, 26
1906 #else
1907         ldd     r27,Y+1
1908         add     r28,r30
1909         adc     r29,__zero_reg__
1910         in      __tmp_reg__,__SREG__
1911         cli
1912         out     __SP_H__,r29
1913         out     __SREG__,__tmp_reg__
1914         out     __SP_L__,r28
1915         mov_l   r28, r26
1916         mov_h   r29, r27
1917 #endif /* #SP = 8/16 */
1918         ret
1919 ENDF __epilogue_restores__
1920 #endif /* defined (L_epilogue) */
1921
1922 #ifdef L_exit
1923         .section .fini9,"ax",@progbits
1924 DEFUN _exit
1925         .weak   exit
1926 exit:
1927 ENDF _exit
1928
1929         /* Code from .fini8 ... .fini1 sections inserted by ld script.  */
1930
1931         .section .fini0,"ax",@progbits
1932         cli
1933 __stop_program:
1934         rjmp    __stop_program
1935 #endif /* defined (L_exit) */
1936
1937 #ifdef L_cleanup
1938         .weak   _cleanup
1939         .func   _cleanup
1940 _cleanup:
1941         ret
1942 .endfunc
1943 #endif /* defined (L_cleanup) */
1944
1945 \f
1946 .section .text.libgcc, "ax", @progbits
1947
1948 #ifdef L_tablejump
1949 DEFUN __tablejump2__
1950         lsl     r30
1951         rol     r31
1952     ;; FALLTHRU
1953 ENDF __tablejump2__
1954
1955 DEFUN __tablejump__
1956 #if defined (__AVR_HAVE_LPMX__)
1957         lpm __tmp_reg__, Z+
1958         lpm r31, Z
1959         mov r30, __tmp_reg__
1960 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1961         eijmp
1962 #else
1963         ijmp
1964 #endif
1965
1966 #else /* !HAVE_LPMX */
1967         lpm
1968         adiw r30, 1
1969         push r0
1970         lpm
1971         push r0
1972 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1973         in   __tmp_reg__, __EIND__
1974         push __tmp_reg__
1975 #endif
1976         ret
1977 #endif /* !HAVE_LPMX */
1978 ENDF __tablejump__
1979 #endif /* defined (L_tablejump) */
1980
1981 #ifdef L_copy_data
1982         .section .init4,"ax",@progbits
1983 DEFUN __do_copy_data
1984 #if defined(__AVR_HAVE_ELPMX__)
1985         ldi     r17, hi8(__data_end)
1986         ldi     r26, lo8(__data_start)
1987         ldi     r27, hi8(__data_start)
1988         ldi     r30, lo8(__data_load_start)
1989         ldi     r31, hi8(__data_load_start)
1990         ldi     r16, hh8(__data_load_start)
1991         out     __RAMPZ__, r16
1992         rjmp    .L__do_copy_data_start
1993 .L__do_copy_data_loop:
1994         elpm    r0, Z+
1995         st      X+, r0
1996 .L__do_copy_data_start:
1997         cpi     r26, lo8(__data_end)
1998         cpc     r27, r17
1999         brne    .L__do_copy_data_loop
2000 #elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2001         ldi     r17, hi8(__data_end)
2002         ldi     r26, lo8(__data_start)
2003         ldi     r27, hi8(__data_start)
2004         ldi     r30, lo8(__data_load_start)
2005         ldi     r31, hi8(__data_load_start)
2006         ldi     r16, hh8(__data_load_start - 0x10000)
2007 .L__do_copy_data_carry:
2008         inc     r16
2009         out     __RAMPZ__, r16
2010         rjmp    .L__do_copy_data_start
2011 .L__do_copy_data_loop:
2012         elpm
2013         st      X+, r0
2014         adiw    r30, 1
2015         brcs    .L__do_copy_data_carry
2016 .L__do_copy_data_start:
2017         cpi     r26, lo8(__data_end)
2018         cpc     r27, r17
2019         brne    .L__do_copy_data_loop
2020 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2021         ldi     r17, hi8(__data_end)
2022         ldi     r26, lo8(__data_start)
2023         ldi     r27, hi8(__data_start)
2024         ldi     r30, lo8(__data_load_start)
2025         ldi     r31, hi8(__data_load_start)
2026         rjmp    .L__do_copy_data_start
2027 .L__do_copy_data_loop:
2028 #if defined (__AVR_HAVE_LPMX__)
2029         lpm     r0, Z+
2030 #else
2031         lpm
2032         adiw    r30, 1
2033 #endif
2034         st      X+, r0
2035 .L__do_copy_data_start:
2036         cpi     r26, lo8(__data_end)
2037         cpc     r27, r17
2038         brne    .L__do_copy_data_loop
2039 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2040 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2041         ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2042         out     __RAMPZ__, __zero_reg__
2043 #endif /* ELPM && RAMPD */
2044 ENDF __do_copy_data
2045 #endif /* L_copy_data */
2046
2047 /* __do_clear_bss is only necessary if there is anything in .bss section.  */
2048
2049 #ifdef L_clear_bss
2050         .section .init4,"ax",@progbits
2051 DEFUN __do_clear_bss
2052         ldi     r17, hi8(__bss_end)
2053         ldi     r26, lo8(__bss_start)
2054         ldi     r27, hi8(__bss_start)
2055         rjmp    .do_clear_bss_start
2056 .do_clear_bss_loop:
2057         st      X+, __zero_reg__
2058 .do_clear_bss_start:
2059         cpi     r26, lo8(__bss_end)
2060         cpc     r27, r17
2061         brne    .do_clear_bss_loop
2062 ENDF __do_clear_bss
2063 #endif /* L_clear_bss */
2064
2065 /* __do_global_ctors and __do_global_dtors are only necessary
2066    if there are any constructors/destructors.  */
2067
2068 #ifdef L_ctors
2069         .section .init6,"ax",@progbits
2070 DEFUN __do_global_ctors
2071 #if defined(__AVR_HAVE_ELPM__)
2072         ldi     r17, hi8(__ctors_start)
2073         ldi     r28, lo8(__ctors_end)
2074         ldi     r29, hi8(__ctors_end)
2075         ldi     r16, hh8(__ctors_end)
2076         rjmp    .L__do_global_ctors_start
2077 .L__do_global_ctors_loop:
2078         sbiw    r28, 2
2079         sbc     r16, __zero_reg__
2080         mov_h   r31, r29
2081         mov_l   r30, r28
2082         out     __RAMPZ__, r16
2083         XCALL   __tablejump_elpm__
2084 .L__do_global_ctors_start:
2085         cpi     r28, lo8(__ctors_start)
2086         cpc     r29, r17
2087         ldi     r24, hh8(__ctors_start)
2088         cpc     r16, r24
2089         brne    .L__do_global_ctors_loop
2090 #else
2091         ldi     r17, hi8(__ctors_start)
2092         ldi     r28, lo8(__ctors_end)
2093         ldi     r29, hi8(__ctors_end)
2094         rjmp    .L__do_global_ctors_start
2095 .L__do_global_ctors_loop:
2096         sbiw    r28, 2
2097         mov_h   r31, r29
2098         mov_l   r30, r28
2099         XCALL   __tablejump__
2100 .L__do_global_ctors_start:
2101         cpi     r28, lo8(__ctors_start)
2102         cpc     r29, r17
2103         brne    .L__do_global_ctors_loop
2104 #endif /* defined(__AVR_HAVE_ELPM__) */
2105 ENDF __do_global_ctors
2106 #endif /* L_ctors */
2107
2108 #ifdef L_dtors
2109         .section .fini6,"ax",@progbits
2110 DEFUN __do_global_dtors
2111 #if defined(__AVR_HAVE_ELPM__)
2112         ldi     r17, hi8(__dtors_end)
2113         ldi     r28, lo8(__dtors_start)
2114         ldi     r29, hi8(__dtors_start)
2115         ldi     r16, hh8(__dtors_start)
2116         rjmp    .L__do_global_dtors_start
2117 .L__do_global_dtors_loop:
2118         sbiw    r28, 2
2119         sbc     r16, __zero_reg__
2120         mov_h   r31, r29
2121         mov_l   r30, r28
2122         out     __RAMPZ__, r16
2123         XCALL   __tablejump_elpm__
2124 .L__do_global_dtors_start:
2125         cpi     r28, lo8(__dtors_end)
2126         cpc     r29, r17
2127         ldi     r24, hh8(__dtors_end)
2128         cpc     r16, r24
2129         brne    .L__do_global_dtors_loop
2130 #else
2131         ldi     r17, hi8(__dtors_end)
2132         ldi     r28, lo8(__dtors_start)
2133         ldi     r29, hi8(__dtors_start)
2134         rjmp    .L__do_global_dtors_start
2135 .L__do_global_dtors_loop:
2136         mov_h   r31, r29
2137         mov_l   r30, r28
2138         XCALL   __tablejump__
2139         adiw    r28, 2
2140 .L__do_global_dtors_start:
2141         cpi     r28, lo8(__dtors_end)
2142         cpc     r29, r17
2143         brne    .L__do_global_dtors_loop
2144 #endif /* defined(__AVR_HAVE_ELPM__) */
2145 ENDF __do_global_dtors
2146 #endif /* L_dtors */
2147
2148 .section .text.libgcc, "ax", @progbits
2149
2150 #ifdef L_tablejump_elpm
2151 DEFUN __tablejump_elpm__
2152 #if defined (__AVR_HAVE_ELPMX__)
2153         elpm    __tmp_reg__, Z+
2154         elpm    r31, Z
2155         mov     r30, __tmp_reg__
2156 #if defined (__AVR_HAVE_RAMPD__)
2157         ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2158         out     __RAMPZ__, __zero_reg__
2159 #endif /* RAMPD */
2160 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2161         eijmp
2162 #else
2163         ijmp
2164 #endif
2165
2166 #elif defined (__AVR_HAVE_ELPM__)
2167         elpm
2168         adiw    r30, 1
2169         push    r0
2170         elpm
2171         push    r0
2172 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2173         in      __tmp_reg__, __EIND__
2174         push    __tmp_reg__
2175 #endif
2176         ret
2177 #endif
2178 ENDF __tablejump_elpm__
2179 #endif /* defined (L_tablejump_elpm) */
2180
2181 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2182 ;; Loading n bytes from Flash; n = 3,4
2183 ;; R22... = Flash[Z]
2184 ;; Clobbers: __tmp_reg__
2185
2186 #if (defined (L_load_3)        \
2187      || defined (L_load_4))    \
2188     && !defined (__AVR_HAVE_LPMX__)
2189
2190 ;; Destination
2191 #define D0  22
2192 #define D1  D0+1
2193 #define D2  D0+2
2194 #define D3  D0+3
2195
2196 .macro  .load dest, n
2197     lpm
2198     mov     \dest, r0
2199 .if \dest != D0+\n-1
2200     adiw    r30, 1
2201 .else
2202     sbiw    r30, \n-1
2203 .endif
2204 .endm
2205
2206 #if defined (L_load_3)
2207 DEFUN __load_3
2208     push  D3
2209     XCALL __load_4
2210     pop   D3
2211     ret
2212 ENDF __load_3
2213 #endif /* L_load_3 */
2214
2215 #if defined (L_load_4)
2216 DEFUN __load_4
2217     .load D0, 4
2218     .load D1, 4
2219     .load D2, 4
2220     .load D3, 4
2221     ret
2222 ENDF __load_4
2223 #endif /* L_load_4 */
2224
2225 #endif /* L_load_3 || L_load_3 */
2226
2227 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2228 ;; Loading n bytes from Flash or RAM;  n = 1,2,3,4
2229 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2230 ;; Clobbers: __tmp_reg__, R21, R30, R31
2231
2232 #if (defined (L_xload_1)            \
2233      || defined (L_xload_2)         \
2234      || defined (L_xload_3)         \
2235      || defined (L_xload_4))
2236
2237 ;; Destination
2238 #define D0  22
2239 #define D1  D0+1
2240 #define D2  D0+2
2241 #define D3  D0+3
2242
2243 ;; Register containing bits 16+ of the address
2244
2245 #define HHI8  21
2246
2247 .macro  .xload dest, n
2248 #if defined (__AVR_HAVE_ELPMX__)
2249     elpm    \dest, Z+
2250 #elif defined (__AVR_HAVE_ELPM__)
2251     elpm
2252     mov     \dest, r0
2253 .if \dest != D0+\n-1
2254     adiw    r30, 1
2255     adc     HHI8, __zero_reg__
2256     out     __RAMPZ__, HHI8
2257 .endif
2258 #elif defined (__AVR_HAVE_LPMX__)
2259     lpm     \dest, Z+
2260 #else
2261     lpm
2262     mov     \dest, r0
2263 .if \dest != D0+\n-1
2264     adiw    r30, 1
2265 .endif
2266 #endif
2267 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2268 .if \dest == D0+\n-1
2269     ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2270     out     __RAMPZ__, __zero_reg__
2271 .endif
2272 #endif
2273 .endm ; .xload
2274
2275 #if defined (L_xload_1)
2276 DEFUN __xload_1
2277 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2278     sbrc    HHI8, 7
2279     ld      D0, Z
2280     sbrs    HHI8, 7
2281     lpm     D0, Z
2282     ret
2283 #else
2284     sbrc    HHI8, 7
2285     rjmp    1f
2286 #if defined (__AVR_HAVE_ELPM__)
2287     out     __RAMPZ__, HHI8
2288 #endif /* __AVR_HAVE_ELPM__ */
2289     .xload  D0, 1
2290     ret
2291 1:  ld      D0, Z
2292     ret
2293 #endif /* LPMx && ! ELPM */
2294 ENDF __xload_1
2295 #endif /* L_xload_1 */
2296
2297 #if defined (L_xload_2)
2298 DEFUN __xload_2
2299     sbrc    HHI8, 7
2300     rjmp    1f
2301 #if defined (__AVR_HAVE_ELPM__)
2302     out     __RAMPZ__, HHI8
2303 #endif /* __AVR_HAVE_ELPM__ */
2304     .xload  D0, 2
2305     .xload  D1, 2
2306     ret
2307 1:  ld      D0, Z+
2308     ld      D1, Z+
2309     ret
2310 ENDF __xload_2
2311 #endif /* L_xload_2 */
2312
2313 #if defined (L_xload_3)
2314 DEFUN __xload_3
2315     sbrc    HHI8, 7
2316     rjmp    1f
2317 #if defined (__AVR_HAVE_ELPM__)
2318     out     __RAMPZ__, HHI8
2319 #endif /* __AVR_HAVE_ELPM__ */
2320     .xload  D0, 3
2321     .xload  D1, 3
2322     .xload  D2, 3
2323     ret
2324 1:  ld      D0, Z+
2325     ld      D1, Z+
2326     ld      D2, Z+
2327     ret
2328 ENDF __xload_3
2329 #endif /* L_xload_3 */
2330
2331 #if defined (L_xload_4)
2332 DEFUN __xload_4
2333     sbrc    HHI8, 7
2334     rjmp    1f
2335 #if defined (__AVR_HAVE_ELPM__)
2336     out     __RAMPZ__, HHI8
2337 #endif /* __AVR_HAVE_ELPM__ */
2338     .xload  D0, 4
2339     .xload  D1, 4
2340     .xload  D2, 4
2341     .xload  D3, 4
2342     ret
2343 1:  ld      D0, Z+
2344     ld      D1, Z+
2345     ld      D2, Z+
2346     ld      D3, Z+
2347     ret
2348 ENDF __xload_4
2349 #endif /* L_xload_4 */
2350
2351 #endif /* L_xload_{1|2|3|4} */
2352
2353 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2354 ;; memcopy from Address Space __pgmx to RAM
2355 ;; R23:Z = Source Address
2356 ;; X     = Destination Address
2357 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2358
2359 #if defined (L_movmemx)
2360
2361 #define HHI8  23
2362 #define LOOP  24
2363
2364 DEFUN __movmemx_qi
2365     ;; #Bytes to copy fity in 8 Bits (1..255)
2366     ;; Zero-extend Loop Counter
2367     clr     LOOP+1
2368     ;; FALLTHRU
2369 ENDF __movmemx_qi
2370
2371 DEFUN __movmemx_hi
2372
2373 ;; Read from where?
2374     sbrc    HHI8, 7
2375     rjmp    1f
2376
2377 ;; Read from Flash
2378
2379 #if defined (__AVR_HAVE_ELPM__)
2380     out     __RAMPZ__, HHI8
2381 #endif
2382
2383 0:  ;; Load 1 Byte from Flash...
2384
2385 #if defined (__AVR_HAVE_ELPMX__)
2386     elpm    r0, Z+
2387 #elif defined (__AVR_HAVE_ELPM__)
2388     elpm
2389     adiw    r30, 1
2390     adc     HHI8, __zero_reg__
2391     out     __RAMPZ__, HHI8
2392 #elif defined (__AVR_HAVE_LPMX__)
2393     lpm     r0, Z+
2394 #else
2395     lpm
2396     adiw    r30, 1
2397 #endif
2398
2399     ;; ...and store that Byte to RAM Destination
2400     st      X+, r0
2401     sbiw    LOOP, 1
2402     brne    0b
2403 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2404     ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2405     out __RAMPZ__, __zero_reg__
2406 #endif /* ELPM && RAMPD */
2407     ret
2408
2409 ;; Read from RAM
2410
2411 1:  ;; Read 1 Byte from RAM...
2412     ld      r0, Z+
2413     ;; and store that Byte to RAM Destination
2414     st      X+, r0
2415     sbiw    LOOP, 1
2416     brne    1b
2417     ret
2418 ENDF __movmemx_hi
2419
2420 #undef HHI8
2421 #undef LOOP
2422
2423 #endif /* L_movmemx */
2424
2425 \f
2426 .section .text.libgcc.builtins, "ax", @progbits
2427
2428 /**********************************
2429  * Find first set Bit (ffs)
2430  **********************************/
2431
2432 #if defined (L_ffssi2)
2433 ;; find first set bit
2434 ;; r25:r24 = ffs32 (r25:r22)
2435 ;; clobbers: r22, r26
2436 DEFUN __ffssi2
2437     clr  r26
2438     tst  r22
2439     brne 1f
2440     subi r26, -8
2441     or   r22, r23
2442     brne 1f
2443     subi r26, -8
2444     or   r22, r24
2445     brne 1f
2446     subi r26, -8
2447     or   r22, r25
2448     brne 1f
2449     ret
2450 1:  mov  r24, r22
2451     XJMP __loop_ffsqi2
2452 ENDF __ffssi2
2453 #endif /* defined (L_ffssi2) */
2454
2455 #if defined (L_ffshi2)
2456 ;; find first set bit
2457 ;; r25:r24 = ffs16 (r25:r24)
2458 ;; clobbers: r26
2459 DEFUN __ffshi2
2460     clr  r26
2461 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2462     ;; Some cores have problem skipping 2-word instruction
2463     tst  r24
2464     breq 2f
2465 #else
2466     cpse r24, __zero_reg__
2467 #endif /* __AVR_HAVE_JMP_CALL__ */
2468 1:  XJMP __loop_ffsqi2
2469 2:  ldi  r26, 8
2470     or   r24, r25
2471     brne 1b
2472     ret
2473 ENDF __ffshi2
2474 #endif /* defined (L_ffshi2) */
2475
2476 #if defined (L_loop_ffsqi2)
2477 ;; Helper for ffshi2, ffssi2
2478 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2479 ;; r24 must be != 0
2480 ;; clobbers: r26
2481 DEFUN __loop_ffsqi2
2482     inc  r26
2483     lsr  r24
2484     brcc __loop_ffsqi2
2485     mov  r24, r26
2486     clr  r25
2487     ret
2488 ENDF __loop_ffsqi2
2489 #endif /* defined (L_loop_ffsqi2) */
2490
2491 \f
2492 /**********************************
2493  * Count trailing Zeros (ctz)
2494  **********************************/
2495
2496 #if defined (L_ctzsi2)
2497 ;; count trailing zeros
2498 ;; r25:r24 = ctz32 (r25:r22)
2499 ;; clobbers: r26, r22
2500 ;; ctz(0) = 255
2501 ;; Note that ctz(0) in undefined for GCC
2502 DEFUN __ctzsi2
2503     XCALL __ffssi2
2504     dec  r24
2505     ret
2506 ENDF __ctzsi2
2507 #endif /* defined (L_ctzsi2) */
2508
2509 #if defined (L_ctzhi2)
2510 ;; count trailing zeros
2511 ;; r25:r24 = ctz16 (r25:r24)
2512 ;; clobbers: r26
2513 ;; ctz(0) = 255
2514 ;; Note that ctz(0) in undefined for GCC
2515 DEFUN __ctzhi2
2516     XCALL __ffshi2
2517     dec  r24
2518     ret
2519 ENDF __ctzhi2
2520 #endif /* defined (L_ctzhi2) */
2521
2522 \f
2523 /**********************************
2524  * Count leading Zeros (clz)
2525  **********************************/
2526
2527 #if defined (L_clzdi2)
2528 ;; count leading zeros
2529 ;; r25:r24 = clz64 (r25:r18)
2530 ;; clobbers: r22, r23, r26
2531 DEFUN __clzdi2
2532     XCALL __clzsi2
2533     sbrs r24, 5
2534     ret
2535     mov_l r22, r18
2536     mov_h r23, r19
2537     mov_l r24, r20
2538     mov_h r25, r21
2539     XCALL __clzsi2
2540     subi r24, -32
2541     ret
2542 ENDF __clzdi2
2543 #endif /* defined (L_clzdi2) */
2544
2545 #if defined (L_clzsi2)
2546 ;; count leading zeros
2547 ;; r25:r24 = clz32 (r25:r22)
2548 ;; clobbers: r26
2549 DEFUN __clzsi2
2550     XCALL __clzhi2
2551     sbrs r24, 4
2552     ret
2553     mov_l r24, r22
2554     mov_h r25, r23
2555     XCALL __clzhi2
2556     subi r24, -16
2557     ret
2558 ENDF __clzsi2
2559 #endif /* defined (L_clzsi2) */
2560
2561 #if defined (L_clzhi2)
2562 ;; count leading zeros
2563 ;; r25:r24 = clz16 (r25:r24)
2564 ;; clobbers: r26
2565 DEFUN __clzhi2
2566     clr  r26
2567     tst  r25
2568     brne 1f
2569     subi r26, -8
2570     or   r25, r24
2571     brne 1f
2572     ldi  r24, 16
2573     ret
2574 1:  cpi  r25, 16
2575     brsh 3f
2576     subi r26, -3
2577     swap r25
2578 2:  inc  r26
2579 3:  lsl  r25
2580     brcc 2b
2581     mov  r24, r26
2582     clr  r25
2583     ret
2584 ENDF __clzhi2
2585 #endif /* defined (L_clzhi2) */
2586
2587 \f
2588 /**********************************
2589  * Parity
2590  **********************************/
2591
2592 #if defined (L_paritydi2)
2593 ;; r25:r24 = parity64 (r25:r18)
2594 ;; clobbers: __tmp_reg__
2595 DEFUN __paritydi2
2596     eor  r24, r18
2597     eor  r24, r19
2598     eor  r24, r20
2599     eor  r24, r21
2600     XJMP __paritysi2
2601 ENDF __paritydi2
2602 #endif /* defined (L_paritydi2) */
2603
2604 #if defined (L_paritysi2)
2605 ;; r25:r24 = parity32 (r25:r22)
2606 ;; clobbers: __tmp_reg__
2607 DEFUN __paritysi2
2608     eor  r24, r22
2609     eor  r24, r23
2610     XJMP __parityhi2
2611 ENDF __paritysi2
2612 #endif /* defined (L_paritysi2) */
2613
2614 #if defined (L_parityhi2)
2615 ;; r25:r24 = parity16 (r25:r24)
2616 ;; clobbers: __tmp_reg__
2617 DEFUN __parityhi2
2618     eor  r24, r25
2619 ;; FALLTHRU
2620 ENDF __parityhi2
2621
2622 ;; r25:r24 = parity8 (r24)
2623 ;; clobbers: __tmp_reg__
2624 DEFUN __parityqi2
2625     ;; parity is in r24[0..7]
2626     mov  __tmp_reg__, r24
2627     swap __tmp_reg__
2628     eor  r24, __tmp_reg__
2629     ;; parity is in r24[0..3]
2630     subi r24, -4
2631     andi r24, -5
2632     subi r24, -6
2633     ;; parity is in r24[0,3]
2634     sbrc r24, 3
2635     inc  r24
2636     ;; parity is in r24[0]
2637     andi r24, 1
2638     clr  r25
2639     ret
2640 ENDF __parityqi2
2641 #endif /* defined (L_parityhi2) */
2642
2643 \f
2644 /**********************************
2645  * Population Count
2646  **********************************/
2647
2648 #if defined (L_popcounthi2)
2649 ;; population count
2650 ;; r25:r24 = popcount16 (r25:r24)
2651 ;; clobbers: __tmp_reg__
2652 DEFUN __popcounthi2
2653     XCALL __popcountqi2
2654     push r24
2655     mov  r24, r25
2656     XCALL __popcountqi2
2657     clr  r25
2658     ;; FALLTHRU
2659 ENDF __popcounthi2
2660
2661 DEFUN __popcounthi2_tail
2662     pop   __tmp_reg__
2663     add   r24, __tmp_reg__
2664     ret
2665 ENDF __popcounthi2_tail
2666 #endif /* defined (L_popcounthi2) */
2667
2668 #if defined (L_popcountsi2)
2669 ;; population count
2670 ;; r25:r24 = popcount32 (r25:r22)
2671 ;; clobbers: __tmp_reg__
2672 DEFUN __popcountsi2
2673     XCALL __popcounthi2
2674     push  r24
2675     mov_l r24, r22
2676     mov_h r25, r23
2677     XCALL __popcounthi2
2678     XJMP  __popcounthi2_tail
2679 ENDF __popcountsi2
2680 #endif /* defined (L_popcountsi2) */
2681
2682 #if defined (L_popcountdi2)
2683 ;; population count
2684 ;; r25:r24 = popcount64 (r25:r18)
2685 ;; clobbers: r22, r23, __tmp_reg__
2686 DEFUN __popcountdi2
2687     XCALL __popcountsi2
2688     push  r24
2689     mov_l r22, r18
2690     mov_h r23, r19
2691     mov_l r24, r20
2692     mov_h r25, r21
2693     XCALL __popcountsi2
2694     XJMP  __popcounthi2_tail
2695 ENDF __popcountdi2
2696 #endif /* defined (L_popcountdi2) */
2697
2698 #if defined (L_popcountqi2)
2699 ;; population count
2700 ;; r24 = popcount8 (r24)
2701 ;; clobbers: __tmp_reg__
2702 DEFUN __popcountqi2
2703     mov  __tmp_reg__, r24
2704     andi r24, 1
2705     lsr  __tmp_reg__
2706     lsr  __tmp_reg__
2707     adc  r24, __zero_reg__
2708     lsr  __tmp_reg__
2709     adc  r24, __zero_reg__
2710     lsr  __tmp_reg__
2711     adc  r24, __zero_reg__
2712     lsr  __tmp_reg__
2713     adc  r24, __zero_reg__
2714     lsr  __tmp_reg__
2715     adc  r24, __zero_reg__
2716     lsr  __tmp_reg__
2717     adc  r24, __tmp_reg__
2718     ret
2719 ENDF __popcountqi2
2720 #endif /* defined (L_popcountqi2) */
2721
2722 \f
2723 /**********************************
2724  * Swap bytes
2725  **********************************/
2726
2727 ;; swap two registers with different register number
2728 .macro bswap a, b
2729     eor \a, \b
2730     eor \b, \a
2731     eor \a, \b
2732 .endm
2733
2734 #if defined (L_bswapsi2)
2735 ;; swap bytes
2736 ;; r25:r22 = bswap32 (r25:r22)
2737 DEFUN __bswapsi2
2738     bswap r22, r25
2739     bswap r23, r24
2740     ret
2741 ENDF __bswapsi2
2742 #endif /* defined (L_bswapsi2) */
2743
2744 #if defined (L_bswapdi2)
2745 ;; swap bytes
2746 ;; r25:r18 = bswap64 (r25:r18)
2747 DEFUN __bswapdi2
2748     bswap r18, r25
2749     bswap r19, r24
2750     bswap r20, r23
2751     bswap r21, r22
2752     ret
2753 ENDF __bswapdi2
2754 #endif /* defined (L_bswapdi2) */
2755
2756 \f
2757 /**********************************
2758  * 64-bit shifts
2759  **********************************/
2760
2761 #if defined (L_ashrdi3)
2762 ;; Arithmetic shift right
2763 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
2764 DEFUN __ashrdi3
2765     push r16
2766     andi r16, 63
2767     breq 2f
2768 1:  asr  r25
2769     ror  r24
2770     ror  r23
2771     ror  r22
2772     ror  r21
2773     ror  r20
2774     ror  r19
2775     ror  r18
2776     dec  r16
2777     brne 1b
2778 2:  pop  r16
2779     ret
2780 ENDF __ashrdi3
2781 #endif /* defined (L_ashrdi3) */
2782
2783 #if defined (L_lshrdi3)
2784 ;; Logic shift right
2785 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
2786 DEFUN __lshrdi3
2787     push r16
2788     andi r16, 63
2789     breq 2f
2790 1:  lsr  r25
2791     ror  r24
2792     ror  r23
2793     ror  r22
2794     ror  r21
2795     ror  r20
2796     ror  r19
2797     ror  r18
2798     dec  r16
2799     brne 1b
2800 2:  pop  r16
2801     ret
2802 ENDF __lshrdi3
2803 #endif /* defined (L_lshrdi3) */
2804
2805 #if defined (L_ashldi3)
2806 ;; Shift left
2807 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
2808 DEFUN __ashldi3
2809     push r16
2810     andi r16, 63
2811     breq 2f
2812 1:  lsl  r18
2813     rol  r19
2814     rol  r20
2815     rol  r21
2816     rol  r22
2817     rol  r23
2818     rol  r24
2819     rol  r25
2820     dec  r16
2821     brne 1b
2822 2:  pop  r16
2823     ret
2824 ENDF __ashldi3
2825 #endif /* defined (L_ashldi3) */
2826
2827 #if defined (L_rotldi3)
2828 ;; Shift left
2829 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
2830 DEFUN __rotldi3
2831     push r16
2832     andi r16, 63
2833     breq 2f
2834 1:  lsl  r18
2835     rol  r19
2836     rol  r20
2837     rol  r21
2838     rol  r22
2839     rol  r23
2840     rol  r24
2841     rol  r25
2842     adc  r18, __zero_reg__
2843     dec  r16
2844     brne 1b
2845 2:  pop  r16
2846     ret
2847 ENDF __rotldi3
2848 #endif /* defined (L_rotldi3) */
2849
2850 \f
2851 .section .text.libgcc.fmul, "ax", @progbits
2852
2853 /***********************************************************/
2854 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
2855 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
2856 /***********************************************************/
2857
2858 #define A1 24
2859 #define B1 25
2860 #define C0 22
2861 #define C1 23
2862 #define A0 __tmp_reg__
2863
2864 #ifdef L_fmuls
2865 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
2866 ;;; Clobbers: r24, r25, __tmp_reg__
2867 DEFUN __fmuls
2868     ;; A0.7 = negate result?
2869     mov  A0, A1
2870     eor  A0, B1
2871     ;; B1 = |B1|
2872     sbrc B1, 7
2873     neg  B1
2874     XJMP __fmulsu_exit
2875 ENDF __fmuls
2876 #endif /* L_fmuls */
2877
2878 #ifdef L_fmulsu
2879 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
2880 ;;; Clobbers: r24, r25, __tmp_reg__
2881 DEFUN __fmulsu
2882     ;; A0.7 = negate result?
2883     mov  A0, A1
2884 ;; FALLTHRU
2885 ENDF __fmulsu
2886
2887 ;; Helper for __fmuls and __fmulsu
2888 DEFUN __fmulsu_exit
2889     ;; A1 = |A1|
2890     sbrc A1, 7
2891     neg  A1
2892 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2893     ;; Some cores have problem skipping 2-word instruction
2894     tst  A0
2895     brmi 1f
2896 #else
2897     sbrs A0, 7
2898 #endif /* __AVR_HAVE_JMP_CALL__ */
2899     XJMP  __fmul
2900 1:  XCALL __fmul
2901     ;; C = -C iff A0.7 = 1
2902     NEG2 C0
2903     ret
2904 ENDF __fmulsu_exit
2905 #endif /* L_fmulsu */
2906
2907
2908 #ifdef L_fmul
2909 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
2910 ;;; Clobbers: r24, r25, __tmp_reg__
2911 DEFUN __fmul
2912     ; clear result
2913     clr   C0
2914     clr   C1
2915     clr   A0
2916 1:  tst   B1
2917     ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
2918 2:  brpl  3f
2919     ;; C += A
2920     add   C0, A0
2921     adc   C1, A1
2922 3:  ;; A >>= 1
2923     lsr   A1
2924     ror   A0
2925     ;; B <<= 1
2926     lsl   B1
2927     brne  2b
2928     ret
2929 ENDF __fmul
2930 #endif /* L_fmul */
2931
2932 #undef A0
2933 #undef A1
2934 #undef B1
2935 #undef C0
2936 #undef C1
2937
2938 #include "lib1funcs-fixed.S"