libgcc/config/avr/lib1funcs.S

   1 /*  -*- Mode: Asm -*-  */
   2 /* Copyright (C) 1998-2014 Free Software Foundation, Inc.
   3    Contributed by Denis Chertykov <chertykov@gmail.com>
   4
   5 This file is free software; you can redistribute it and/or modify it
   6 under the terms of the GNU General Public License as published by the
   7 Free Software Foundation; either version 3, or (at your option) any
   8 later version.
   9
  10 This file is distributed in the hope that it will be useful, but
  11 WITHOUT ANY WARRANTY; without even the implied warranty of
  12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 General Public License for more details.
  14
  15 Under Section 7 of GPL version 3, you are granted additional
  16 permissions described in the GCC Runtime Library Exception, version
  17 3.1, as published by the Free Software Foundation.
  18
  19 You should have received a copy of the GNU General Public License and
  20 a copy of the GCC Runtime Library Exception along with this program;
  21 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24 #define __zero_reg__ r1
  25 #define __tmp_reg__ r0
  26 #define __SREG__ 0x3f
  27 #if defined (__AVR_HAVE_SPH__)
  28 #define __SP_H__ 0x3e
  29 #endif
  30 #define __SP_L__ 0x3d
  31 #define __RAMPZ__ 0x3B
  32 #define __EIND__  0x3C
  33
  34 /* Most of the functions here are called directly from avr.md
  35    patterns, instead of using the standard libcall mechanisms.
  36    This can make better code because GCC knows exactly which
  37    of the call-used registers (not all of them) are clobbered.  */
  38
  39 /* FIXME:  At present, there is no SORT directive in the linker
  40            script so that we must not assume that different modules
  41            in the same input section like .libgcc.text.mul will be
  42            located close together.  Therefore, we cannot use
  43            RCALL/RJMP to call a function like __udivmodhi4 from
  44            __divmodhi4 and have to use lengthy XCALL/XJMP even
  45            though they are in the same input section and all same
  46            input sections together are small enough to reach every
  47            location with a RCALL/RJMP instruction.  */
  48
  49 #if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
  50 #error device not supported
  51 #endif
  52
  53         .macro  mov_l  r_dest, r_src
  54 #if defined (__AVR_HAVE_MOVW__)
  55         movw    \r_dest, \r_src
  56 #else
  57         mov     \r_dest, \r_src
  58 #endif
  59         .endm
  60
  61         .macro  mov_h  r_dest, r_src
  62 #if defined (__AVR_HAVE_MOVW__)
  63         ; empty
  64 #else
  65         mov     \r_dest, \r_src
  66 #endif
  67         .endm
  68
  69 .macro  wmov  r_dest, r_src
  70 #if defined (__AVR_HAVE_MOVW__)
  71     movw \r_dest,   \r_src
  72 #else
  73     mov \r_dest,    \r_src
  74     mov \r_dest+1,  \r_src+1
  75 #endif
  76 .endm
  77
  78 #if defined (__AVR_HAVE_JMP_CALL__)
  79 #define XCALL call
  80 #define XJMP  jmp
  81 #else
  82 #define XCALL rcall
  83 #define XJMP  rjmp
  84 #endif
  85
  86 #if defined (__AVR_HAVE_EIJMP_EICALL__)
  87 #define XICALL eicall
  88 #define XIJMP  eijmp
  89 #else
  90 #define XICALL icall
  91 #define XIJMP  ijmp
  92 #endif
  93
  94 ;; Prologue stuff
  95
  96 .macro do_prologue_saves n_pushed n_frame=0
  97     ldi r26, lo8(\n_frame)
  98     ldi r27, hi8(\n_frame)
  99     ldi r30, lo8(gs(.L_prologue_saves.\@))
 100     ldi r31, hi8(gs(.L_prologue_saves.\@))
 101     XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
 102 .L_prologue_saves.\@:
 103 .endm
 104
 105 ;; Epilogue stuff
 106
 107 .macro do_epilogue_restores n_pushed n_frame=0
 108     in      r28, __SP_L__
 109 #ifdef __AVR_HAVE_SPH__
 110     in      r29, __SP_H__
 111 .if \n_frame > 63
 112     subi    r28, lo8(-\n_frame)
 113     sbci    r29, hi8(-\n_frame)
 114 .elseif \n_frame > 0
 115     adiw    r28, \n_frame
 116 .endif
 117 #else
 118     clr     r29
 119 .if \n_frame > 0
 120     subi    r28, lo8(-\n_frame)
 121 .endif
 122 #endif /* HAVE SPH */
 123     ldi     r30, \n_pushed
 124     XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
 125 .endm
 126
 127 ;; Support function entry and exit for convenience
 128
 129 .macro DEFUN name
 130 .global \name
 131 .func \name
 132 \name:
 133 .endm
 134
 135 .macro ENDF name
 136 .size \name, .-\name
 137 .endfunc
 138 .endm
 139
 140 .macro FALIAS name
 141 .global \name
 142 .func \name
 143 \name:
 144 .size \name, .-\name
 145 .endfunc
 146 .endm
 147
 148 ;; Skip next instruction, typically a jump target
 149 #define skip cpse 0,0
 150
 151 ;; Negate a 2-byte value held in consecutive registers
 152 .macro NEG2  reg
 153     com     \reg+1
 154     neg     \reg
 155     sbci    \reg+1, -1
 156 .endm
 157
 158 ;; Negate a 4-byte value held in consecutive registers
 159 ;; Sets the V flag for signed overflow tests if REG >= 16
 160 .macro NEG4  reg
 161     com     \reg+3
 162     com     \reg+2
 163     com     \reg+1
 164 .if \reg >= 16
 165     neg     \reg
 166     sbci    \reg+1, -1
 167     sbci    \reg+2, -1
 168     sbci    \reg+3, -1
 169 .else
 170     com     \reg
 171     adc     \reg,   __zero_reg__
 172     adc     \reg+1, __zero_reg__
 173     adc     \reg+2, __zero_reg__
 174     adc     \reg+3, __zero_reg__
 175 .endif
 176 .endm
 177
 178 #define exp_lo(N)  hlo8 ((N) << 23)
 179 #define exp_hi(N)  hhi8 ((N) << 23)
 180
 181 \f
 182 .section .text.libgcc.mul, "ax", @progbits
 183
 184 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 185 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
 186 #if !defined (__AVR_HAVE_MUL__)
 187 /*******************************************************
 188     Multiplication  8 x 8  without MUL
 189 *******************************************************/
 190 #if defined (L_mulqi3)
 191
 192 #define r_arg2  r22             /* multiplicand */
 193 #define r_arg1  r24             /* multiplier */
 194 #define r_res   __tmp_reg__     /* result */
 195
 196 DEFUN __mulqi3
 197         clr     r_res           ; clear result
 198 __mulqi3_loop:
 199         sbrc    r_arg1,0
 200         add     r_res,r_arg2
 201         add     r_arg2,r_arg2   ; shift multiplicand
 202         breq    __mulqi3_exit   ; while multiplicand != 0
 203         lsr     r_arg1          ;
 204         brne    __mulqi3_loop   ; exit if multiplier = 0
 205 __mulqi3_exit:
 206         mov     r_arg1,r_res    ; result to return register
 207         ret
 208 ENDF __mulqi3
 209
 210 #undef r_arg2
 211 #undef r_arg1
 212 #undef r_res
 213
 214 #endif  /* defined (L_mulqi3) */
 215
 216
 217 /*******************************************************
 218     Widening Multiplication  16 = 8 x 8  without MUL
 219     Multiplication  16 x 16  without MUL
 220 *******************************************************/
 221
 222 #define A0  r22
 223 #define A1  r23
 224 #define B0  r24
 225 #define BB0 r20
 226 #define B1  r25
 227 ;; Output overlaps input, thus expand result in CC0/1
 228 #define C0  r24
 229 #define C1  r25
 230 #define CC0  __tmp_reg__
 231 #define CC1  R21
 232
 233 #if defined (L_umulqihi3)
 234 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
 235 ;;; (C1:C0) = (unsigned int) A0  * (unsigned int) B0
 236 ;;; Clobbers: __tmp_reg__, R21..R23
 237 DEFUN __umulqihi3
 238     clr     A1
 239     clr     B1
 240     XJMP    __mulhi3
 241 ENDF __umulqihi3
 242 #endif /* L_umulqihi3 */
 243
 244 #if defined (L_mulqihi3)
 245 ;;; R25:R24 = (signed int) R22 * (signed int) R24
 246 ;;; (C1:C0) = (signed int) A0  * (signed int) B0
 247 ;;; Clobbers: __tmp_reg__, R20..R23
 248 DEFUN __mulqihi3
 249     ;; Sign-extend B0
 250     clr     B1
 251     sbrc    B0, 7
 252     com     B1
 253     ;; The multiplication runs twice as fast if A1 is zero, thus:
 254     ;; Zero-extend A0
 255     clr     A1
 256 #ifdef __AVR_HAVE_JMP_CALL__
 257     ;; Store  B0 * sign of A
 258     clr     BB0
 259     sbrc    A0, 7
 260     mov     BB0, B0
 261     call    __mulhi3
 262 #else /* have no CALL */
 263     ;; Skip sign-extension of A if A >= 0
 264     ;; Same size as with the first alternative but avoids errata skip
 265     ;; and is faster if A >= 0
 266     sbrs    A0, 7
 267     rjmp    __mulhi3
 268     ;; If  A < 0  store B
 269     mov     BB0, B0
 270     rcall   __mulhi3
 271 #endif /* HAVE_JMP_CALL */
 272     ;; 1-extend A after the multiplication
 273     sub     C1, BB0
 274     ret
 275 ENDF __mulqihi3
 276 #endif /* L_mulqihi3 */
 277
 278 #if defined (L_mulhi3)
 279 ;;; R25:R24 = R23:R22 * R25:R24
 280 ;;; (C1:C0) = (A1:A0) * (B1:B0)
 281 ;;; Clobbers: __tmp_reg__, R21..R23
 282 DEFUN __mulhi3
 283
 284     ;; Clear result
 285     clr     CC0
 286     clr     CC1
 287     rjmp 3f
 288 1:
 289     ;; Bit n of A is 1  -->  C += B << n
 290     add     CC0, B0
 291     adc     CC1, B1
 292 2:
 293     lsl     B0
 294     rol     B1
 295 3:
 296     ;; If B == 0 we are ready
 297     sbiw    B0, 0
 298     breq 9f
 299
 300     ;; Carry = n-th bit of A
 301     lsr     A1
 302     ror     A0
 303     ;; If bit n of A is set, then go add  B * 2^n  to  C
 304     brcs 1b
 305
 306     ;; Carry = 0  -->  The ROR above acts like  CP A0, 0
 307     ;; Thus, it is sufficient to CPC the high part to test A against 0
 308     cpc     A1, __zero_reg__
 309     ;; Only proceed if A != 0
 310     brne    2b
 311 9:
 312     ;; Move Result into place
 313     mov     C0, CC0
 314     mov     C1, CC1
 315     ret
 316 ENDF  __mulhi3
 317 #endif /* L_mulhi3 */
 318
 319 #undef A0
 320 #undef A1
 321 #undef B0
 322 #undef BB0
 323 #undef B1
 324 #undef C0
 325 #undef C1
 326 #undef CC0
 327 #undef CC1
 328
 329 \f
 330 #define A0 22
 331 #define A1 A0+1
 332 #define A2 A0+2
 333 #define A3 A0+3
 334
 335 #define B0 18
 336 #define B1 B0+1
 337 #define B2 B0+2
 338 #define B3 B0+3
 339
 340 #define CC0 26
 341 #define CC1 CC0+1
 342 #define CC2 30
 343 #define CC3 CC2+1
 344
 345 #define C0 22
 346 #define C1 C0+1
 347 #define C2 C0+2
 348 #define C3 C0+3
 349
 350 /*******************************************************
 351     Widening Multiplication  32 = 16 x 16  without MUL
 352 *******************************************************/
 353
 354 #if defined (L_umulhisi3)
 355 DEFUN __umulhisi3
 356     wmov    B0, 24
 357     ;; Zero-extend B
 358     clr     B2
 359     clr     B3
 360     ;; Zero-extend A
 361     wmov    A2, B2
 362     XJMP    __mulsi3
 363 ENDF __umulhisi3
 364 #endif /* L_umulhisi3 */
 365
 366 #if defined (L_mulhisi3)
 367 DEFUN __mulhisi3
 368     wmov    B0, 24
 369     ;; Sign-extend B
 370     lsl     r25
 371     sbc     B2, B2
 372     mov     B3, B2
 373 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
 374     ;; Sign-extend A
 375     clr     A2
 376     sbrc    A1, 7
 377     com     A2
 378     mov     A3, A2
 379     XJMP __mulsi3
 380 #else /*  no __AVR_ERRATA_SKIP_JMP_CALL__ */
 381     ;; Zero-extend A and __mulsi3 will run at least twice as fast
 382     ;; compared to a sign-extended A.
 383     clr     A2
 384     clr     A3
 385     sbrs    A1, 7
 386     XJMP __mulsi3
 387     ;; If  A < 0  then perform the  B * 0xffff.... before the
 388     ;; very multiplication by initializing the high part of the
 389     ;; result CC with -B.
 390     wmov    CC2, A2
 391     sub     CC2, B0
 392     sbc     CC3, B1
 393     XJMP __mulsi3_helper
 394 #endif /*  __AVR_ERRATA_SKIP_JMP_CALL__ */
 395 ENDF __mulhisi3
 396 #endif /* L_mulhisi3 */
 397
 398
 399 /*******************************************************
 400     Multiplication  32 x 32  without MUL
 401 *******************************************************/
 402
 403 #if defined (L_mulsi3)
 404 DEFUN __mulsi3
 405     ;; Clear result
 406     clr     CC2
 407     clr     CC3
 408     ;; FALLTHRU
 409 ENDF  __mulsi3
 410
 411 DEFUN __mulsi3_helper
 412     clr     CC0
 413     clr     CC1
 414     rjmp 3f
 415
 416 1:  ;; If bit n of A is set, then add  B * 2^n  to the result in CC
 417     ;; CC += B
 418     add  CC0,B0  $  adc  CC1,B1  $  adc  CC2,B2  $  adc  CC3,B3
 419
 420 2:  ;; B <<= 1
 421     lsl  B0      $  rol  B1      $  rol  B2      $  rol  B3
 422
 423 3:  ;; A >>= 1:  Carry = n-th bit of A
 424     lsr  A3      $  ror  A2      $  ror  A1      $  ror  A0
 425
 426     brcs 1b
 427     ;; Only continue if  A != 0
 428     sbci    A1, 0
 429     brne 2b
 430     sbiw    A2, 0
 431     brne 2b
 432
 433     ;; All bits of A are consumed:  Copy result to return register C
 434     wmov    C0, CC0
 435     wmov    C2, CC2
 436     ret
 437 ENDF __mulsi3_helper
 438 #endif /* L_mulsi3 */
 439
 440 #undef A0
 441 #undef A1
 442 #undef A2
 443 #undef A3
 444 #undef B0
 445 #undef B1
 446 #undef B2
 447 #undef B3
 448 #undef C0
 449 #undef C1
 450 #undef C2
 451 #undef C3
 452 #undef CC0
 453 #undef CC1
 454 #undef CC2
 455 #undef CC3
 456
 457 #endif /* !defined (__AVR_HAVE_MUL__) */
 458 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 459 \f
 460 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 461 #if defined (__AVR_HAVE_MUL__)
 462 #define A0 26
 463 #define B0 18
 464 #define C0 22
 465
 466 #define A1 A0+1
 467
 468 #define B1 B0+1
 469 #define B2 B0+2
 470 #define B3 B0+3
 471
 472 #define C1 C0+1
 473 #define C2 C0+2
 474 #define C3 C0+3
 475
 476 /*******************************************************
 477     Widening Multiplication  32 = 16 x 16  with MUL
 478 *******************************************************/
 479
 480 #if defined (L_mulhisi3)
 481 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
 482 ;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
 483 ;;; Clobbers: __tmp_reg__
 484 DEFUN __mulhisi3
 485     XCALL   __umulhisi3
 486     ;; Sign-extend B
 487     tst     B1
 488     brpl    1f
 489     sub     C2, A0
 490     sbc     C3, A1
 491 1:  ;; Sign-extend A
 492     XJMP __usmulhisi3_tail
 493 ENDF __mulhisi3
 494 #endif /* L_mulhisi3 */
 495
 496 #if defined (L_usmulhisi3)
 497 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
 498 ;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
 499 ;;; Clobbers: __tmp_reg__
 500 DEFUN __usmulhisi3
 501     XCALL   __umulhisi3
 502     ;; FALLTHRU
 503 ENDF __usmulhisi3
 504
 505 DEFUN __usmulhisi3_tail
 506     ;; Sign-extend A
 507     sbrs    A1, 7
 508     ret
 509     sub     C2, B0
 510     sbc     C3, B1
 511     ret
 512 ENDF __usmulhisi3_tail
 513 #endif /* L_usmulhisi3 */
 514
 515 #if defined (L_umulhisi3)
 516 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
 517 ;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
 518 ;;; Clobbers: __tmp_reg__
 519 DEFUN __umulhisi3
 520     mul     A0, B0
 521     movw    C0, r0
 522     mul     A1, B1
 523     movw    C2, r0
 524     mul     A0, B1
 525 #ifdef __AVR_HAVE_JMP_CALL__
 526     ;; This function is used by many other routines, often multiple times.
 527     ;; Therefore, if the flash size is not too limited, avoid the RCALL
 528     ;; and inverst 6 Bytes to speed things up.
 529     add     C1, r0
 530     adc     C2, r1
 531     clr     __zero_reg__
 532     adc     C3, __zero_reg__
 533 #else
 534     rcall   1f
 535 #endif
 536     mul     A1, B0
 537 1:  add     C1, r0
 538     adc     C2, r1
 539     clr     __zero_reg__
 540     adc     C3, __zero_reg__
 541     ret
 542 ENDF __umulhisi3
 543 #endif /* L_umulhisi3 */
 544
 545 /*******************************************************
 546     Widening Multiplication  32 = 16 x 32  with MUL
 547 *******************************************************/
 548
 549 #if defined (L_mulshisi3)
 550 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
 551 ;;; (C3:C0) = (signed long) A1:A0   * B3:B0
 552 ;;; Clobbers: __tmp_reg__
 553 DEFUN __mulshisi3
 554 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
 555     ;; Some cores have problem skipping 2-word instruction
 556     tst     A1
 557     brmi    __mulohisi3
 558 #else
 559     sbrs    A1, 7
 560 #endif /* __AVR_HAVE_JMP_CALL__ */
 561     XJMP    __muluhisi3
 562     ;; FALLTHRU
 563 ENDF __mulshisi3
 564
 565 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
 566 ;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
 567 ;;; Clobbers: __tmp_reg__
 568 DEFUN __mulohisi3
 569     XCALL   __muluhisi3
 570     ;; One-extend R27:R26 (A1:A0)
 571     sub     C2, B0
 572     sbc     C3, B1
 573     ret
 574 ENDF __mulohisi3
 575 #endif /* L_mulshisi3 */
 576
 577 #if defined (L_muluhisi3)
 578 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
 579 ;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
 580 ;;; Clobbers: __tmp_reg__
 581 DEFUN __muluhisi3
 582     XCALL   __umulhisi3
 583     mul     A0, B3
 584     add     C3, r0
 585     mul     A1, B2
 586     add     C3, r0
 587     mul     A0, B2
 588     add     C2, r0
 589     adc     C3, r1
 590     clr     __zero_reg__
 591     ret
 592 ENDF __muluhisi3
 593 #endif /* L_muluhisi3 */
 594
 595 /*******************************************************
 596     Multiplication  32 x 32  with MUL
 597 *******************************************************/
 598
 599 #if defined (L_mulsi3)
 600 ;;; R25:R22 = R25:R22 * R21:R18
 601 ;;; (C3:C0) = C3:C0   * B3:B0
 602 ;;; Clobbers: R26, R27, __tmp_reg__
 603 DEFUN __mulsi3
 604     movw    A0, C0
 605     push    C2
 606     push    C3
 607     XCALL   __muluhisi3
 608     pop     A1
 609     pop     A0
 610     ;; A1:A0 now contains the high word of A
 611     mul     A0, B0
 612     add     C2, r0
 613     adc     C3, r1
 614     mul     A0, B1
 615     add     C3, r0
 616     mul     A1, B0
 617     add     C3, r0
 618     clr     __zero_reg__
 619     ret
 620 ENDF __mulsi3
 621 #endif /* L_mulsi3 */
 622
 623 #undef A0
 624 #undef A1
 625
 626 #undef B0
 627 #undef B1
 628 #undef B2
 629 #undef B3
 630
 631 #undef C0
 632 #undef C1
 633 #undef C2
 634 #undef C3
 635
 636 #endif /* __AVR_HAVE_MUL__ */
 637
 638 /*******************************************************
 639        Multiplication 24 x 24 with MUL
 640 *******************************************************/
 641
 642 #if defined (L_mulpsi3)
 643
 644 ;; A[0..2]: In: Multiplicand; Out: Product
 645 #define A0  22
 646 #define A1  A0+1
 647 #define A2  A0+2
 648
 649 ;; B[0..2]: In: Multiplier
 650 #define B0  18
 651 #define B1  B0+1
 652 #define B2  B0+2
 653
 654 #if defined (__AVR_HAVE_MUL__)
 655
 656 ;; C[0..2]: Expand Result
 657 #define C0  22
 658 #define C1  C0+1
 659 #define C2  C0+2
 660
 661 ;; R24:R22 *= R20:R18
 662 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
 663
 664 #define AA0 26
 665 #define AA2 21
 666
 667 DEFUN __mulpsi3
 668     wmov    AA0, A0
 669     mov     AA2, A2
 670     XCALL   __umulhisi3
 671     mul     AA2, B0     $  add  C2, r0
 672     mul     AA0, B2     $  add  C2, r0
 673     clr     __zero_reg__
 674     ret
 675 ENDF __mulpsi3
 676
 677 #undef AA2
 678 #undef AA0
 679
 680 #undef C2
 681 #undef C1
 682 #undef C0
 683
 684 #else /* !HAVE_MUL */
 685
 686 ;; C[0..2]: Expand Result
 687 #define C0  0
 688 #define C1  C0+1
 689 #define C2  21
 690
 691 ;; R24:R22 *= R20:R18
 692 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
 693
 694 DEFUN __mulpsi3
 695
 696     ;; C[] = 0
 697     clr     __tmp_reg__
 698     clr     C2
 699
 700 0:  ;; Shift N-th Bit of B[] into Carry.  N = 24 - Loop
 701     LSR  B2     $  ror  B1     $  ror  B0
 702
 703     ;; If the N-th Bit of B[] was set...
 704     brcc    1f
 705
 706     ;; ...then add A[] * 2^N to the Result C[]
 707     ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2
 708
 709 1:  ;; Multiply A[] by 2
 710     LSL  A0     $  rol  A1     $  rol  A2
 711
 712     ;; Loop until B[] is 0
 713     subi B0,0   $  sbci B1,0   $  sbci B2,0
 714     brne    0b
 715
 716     ;; Copy C[] to the return Register A[]
 717     wmov    A0, C0
 718     mov     A2, C2
 719
 720     clr     __zero_reg__
 721     ret
 722 ENDF __mulpsi3
 723
 724 #undef C2
 725 #undef C1
 726 #undef C0
 727
 728 #endif /* HAVE_MUL */
 729
 730 #undef B2
 731 #undef B1
 732 #undef B0
 733
 734 #undef A2
 735 #undef A1
 736 #undef A0
 737
 738 #endif /* L_mulpsi3 */
 739
 740 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
 741
 742 ;; A[0..2]: In: Multiplicand
 743 #define A0  22
 744 #define A1  A0+1
 745 #define A2  A0+2
 746
 747 ;; BB: In: Multiplier
 748 #define BB  25
 749
 750 ;; C[0..2]: Result
 751 #define C0  18
 752 #define C1  C0+1
 753 #define C2  C0+2
 754
 755 ;; C[] = A[] * sign_extend (BB)
 756 DEFUN __mulsqipsi3
 757     mul     A0, BB
 758     movw    C0, r0
 759     mul     A2, BB
 760     mov     C2, r0
 761     mul     A1, BB
 762     add     C1, r0
 763     adc     C2, r1
 764     clr     __zero_reg__
 765     sbrs    BB, 7
 766     ret
 767     ;; One-extend BB
 768     sub     C1, A0
 769     sbc     C2, A1
 770     ret
 771 ENDF __mulsqipsi3
 772
 773 #undef C2
 774 #undef C1
 775 #undef C0
 776
 777 #undef BB
 778
 779 #undef A2
 780 #undef A1
 781 #undef A0
 782
 783 #endif /* L_mulsqipsi3  &&  HAVE_MUL */
 784
 785 /*******************************************************
 786        Multiplication 64 x 64
 787 *******************************************************/
 788
 789 ;; A[] = A[] * B[]
 790
 791 ;; A[0..7]: In: Multiplicand
 792 ;; Out: Product
 793 #define A0  18
 794 #define A1  A0+1
 795 #define A2  A0+2
 796 #define A3  A0+3
 797 #define A4  A0+4
 798 #define A5  A0+5
 799 #define A6  A0+6
 800 #define A7  A0+7
 801
 802 ;; B[0..7]: In: Multiplier
 803 #define B0  10
 804 #define B1  B0+1
 805 #define B2  B0+2
 806 #define B3  B0+3
 807 #define B4  B0+4
 808 #define B5  B0+5
 809 #define B6  B0+6
 810 #define B7  B0+7
 811
 812 #if defined (__AVR_HAVE_MUL__)
 813
 814 ;; Define C[] for convenience
 815 ;; Notice that parts of C[] overlap A[] respective B[]
 816 #define C0  16
 817 #define C1  C0+1
 818 #define C2  20
 819 #define C3  C2+1
 820 #define C4  28
 821 #define C5  C4+1
 822 #define C6  C4+2
 823 #define C7  C4+3
 824
 825 #if defined (L_muldi3)
 826
 827 ;; A[]     *= B[]
 828 ;; R25:R18 *= R17:R10
 829 ;; Ordinary ABI-Function
 830
 831 DEFUN __muldi3
 832     push    r29
 833     push    r28
 834     push    r17
 835     push    r16
 836
 837     ;; Counting in Words, we have to perform a 4 * 4 Multiplication
 838
 839     ;; 3 * 0  +  0 * 3
 840     mul  A7,B0  $             $  mov C7,r0
 841     mul  A0,B7  $             $  add C7,r0
 842     mul  A6,B1  $             $  add C7,r0
 843     mul  A6,B0  $  mov C6,r0  $  add C7,r1
 844     mul  B6,A1  $             $  add C7,r0
 845     mul  B6,A0  $  add C6,r0  $  adc C7,r1
 846
 847     ;; 1 * 2
 848     mul  A2,B4  $  add C6,r0  $  adc C7,r1
 849     mul  A3,B4  $             $  add C7,r0
 850     mul  A2,B5  $             $  add C7,r0
 851
 852     push    A5
 853     push    A4
 854     push    B1
 855     push    B0
 856     push    A3
 857     push    A2
 858
 859     ;; 0 * 0
 860     wmov    26, B0
 861     XCALL   __umulhisi3
 862     wmov    C0, 22
 863     wmov    C2, 24
 864
 865     ;; 0 * 2
 866     wmov    26, B4
 867     XCALL   __umulhisi3  $  wmov C4,22            $ add C6,24 $ adc C7,25
 868
 869     wmov    26, B2
 870     ;; 0 * 1
 871     XCALL   __muldi3_6
 872
 873     pop     A0
 874     pop     A1
 875     ;; 1 * 1
 876     wmov    26, B2
 877     XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
 878
 879     pop     r26
 880     pop     r27
 881     ;; 1 * 0
 882     XCALL   __muldi3_6
 883
 884     pop     A0
 885     pop     A1
 886     ;; 2 * 0
 887     XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
 888
 889     ;; 2 * 1
 890     wmov    26, B2
 891     XCALL   __umulhisi3  $            $           $ add C6,22 $ adc C7,23
 892
 893     ;; A[] = C[]
 894     wmov    A0, C0
 895     ;; A2 = C2 already
 896     wmov    A4, C4
 897     wmov    A6, C6
 898
 899     clr     __zero_reg__
 900     pop     r16
 901     pop     r17
 902     pop     r28
 903     pop     r29
 904     ret
 905 ENDF __muldi3
 906 #endif /* L_muldi3 */
 907
 908 #if defined (L_muldi3_6)
 909 ;; A helper for some 64-bit multiplications with MUL available
 910 DEFUN __muldi3_6
 911 __muldi3_6:
 912     XCALL   __umulhisi3
 913     add     C2, 22
 914     adc     C3, 23
 915     adc     C4, 24
 916     adc     C5, 25
 917     brcc    0f
 918     adiw    C6, 1
 919 0:  ret
 920 ENDF __muldi3_6
 921 #endif /* L_muldi3_6 */
 922
 923 #undef C7
 924 #undef C6
 925 #undef C5
 926 #undef C4
 927 #undef C3
 928 #undef C2
 929 #undef C1
 930 #undef C0
 931
 932 #else /* !HAVE_MUL */
 933
 934 #if defined (L_muldi3)
 935
 936 #define C0  26
 937 #define C1  C0+1
 938 #define C2  C0+2
 939 #define C3  C0+3
 940 #define C4  C0+4
 941 #define C5  C0+5
 942 #define C6  0
 943 #define C7  C6+1
 944
 945 #define Loop 9
 946
 947 ;; A[]     *= B[]
 948 ;; R25:R18 *= R17:R10
 949 ;; Ordinary ABI-Function
 950
 951 DEFUN __muldi3
 952     push    r29
 953     push    r28
 954     push    Loop
 955
 956     ldi     C0, 64
 957     mov     Loop, C0
 958
 959     ;; C[] = 0
 960     clr     __tmp_reg__
 961     wmov    C0, 0
 962     wmov    C2, 0
 963     wmov    C4, 0
 964
 965 0:  ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
 966     ;; where N = 64 - Loop.
 967     ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
 968     ;; B[] will have its initial Value again.
 969     LSR  B7     $  ror  B6     $  ror  B5     $  ror  B4
 970     ror  B3     $  ror  B2     $  ror  B1     $  ror  B0
 971
 972     ;; If the N-th Bit of B[] was set then...
 973     brcc    1f
 974     ;; ...finish Rotation...
 975     ori     B7, 1 << 7
 976
 977     ;; ...and add A[] * 2^N to the Result C[]
 978     ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2  $  adc  C3,A3
 979     adc  C4,A4  $  adc  C5,A5  $  adc  C6,A6  $  adc  C7,A7
 980
 981 1:  ;; Multiply A[] by 2
 982     LSL  A0     $  rol  A1     $  rol  A2     $  rol  A3
 983     rol  A4     $  rol  A5     $  rol  A6     $  rol  A7
 984
 985     dec     Loop
 986     brne    0b
 987
 988     ;; We expanded the Result in C[]
 989     ;; Copy Result to the Return Register A[]
 990     wmov    A0, C0
 991     wmov    A2, C2
 992     wmov    A4, C4
 993     wmov    A6, C6
 994
 995     clr     __zero_reg__
 996     pop     Loop
 997     pop     r28
 998     pop     r29
 999     ret
1000 ENDF __muldi3
1001
1002 #undef Loop
1003
1004 #undef C7
1005 #undef C6
1006 #undef C5
1007 #undef C4
1008 #undef C3
1009 #undef C2
1010 #undef C1
1011 #undef C0
1012
1013 #endif /* L_muldi3 */
1014 #endif /* HAVE_MUL */
1015
1016 #undef B7
1017 #undef B6
1018 #undef B5
1019 #undef B4
1020 #undef B3
1021 #undef B2
1022 #undef B1
1023 #undef B0
1024
1025 #undef A7
1026 #undef A6
1027 #undef A5
1028 #undef A4
1029 #undef A3
1030 #undef A2
1031 #undef A1
1032 #undef A0
1033
1034 /*******************************************************
1035    Widening Multiplication 64 = 32 x 32  with  MUL
1036 *******************************************************/
1037
1038 #if defined (__AVR_HAVE_MUL__)
1039 #define A0 r22
1040 #define A1 r23
1041 #define A2 r24
1042 #define A3 r25
1043
1044 #define B0 r18
1045 #define B1 r19
1046 #define B2 r20
1047 #define B3 r21
1048
1049 #define C0  18
1050 #define C1  C0+1
1051 #define C2  20
1052 #define C3  C2+1
1053 #define C4  28
1054 #define C5  C4+1
1055 #define C6  C4+2
1056 #define C7  C4+3
1057
1058 #if defined (L_umulsidi3)
1059
1060 ;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1061
1062 ;; R18[8] = R22[4] * R18[4]
1063 ;;
1064 ;; Ordinary ABI Function, but additionally sets
1065 ;; X = R20[2] = B2[2]
1066 ;; Z = R22[2] = A0[2]
1067 DEFUN __umulsidi3
1068     clt
1069     ;; FALLTHRU
1070 ENDF  __umulsidi3
1071     ;; T = sign (A)
1072 DEFUN __umulsidi3_helper
1073     push    29  $  push    28 ; Y
1074     wmov    30, A2
1075     ;; Counting in Words, we have to perform 4 Multiplications
1076     ;; 0 * 0
1077     wmov    26, A0
1078     XCALL __umulhisi3
1079     push    23  $  push    22 ; C0
1080     wmov    28, B0
1081     wmov    18, B2
1082     wmov    C2, 24
1083     push    27  $  push    26 ; A0
1084     push    19  $  push    18 ; B2
1085     ;;
1086     ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
1087     ;;  B2  C2  --  --  --  B0  A2
1088     ;; 1 * 1
1089     wmov    26, 30      ; A2
1090     XCALL __umulhisi3
1091     ;; Sign-extend A.  T holds the sign of A
1092     brtc    0f
1093     ;; Subtract B from the high part of the result
1094     sub     22, 28
1095     sbc     23, 29
1096     sbc     24, 18
1097     sbc     25, 19
1098 0:  wmov    18, 28      ;; B0
1099     wmov    C4, 22
1100     wmov    C6, 24
1101     ;;
1102     ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
1103     ;;  B0  C2  --  --  A2  C4  C6
1104     ;;
1105     ;; 1 * 0
1106     XCALL __muldi3_6
1107     ;; 0 * 1
1108     pop     26  $   pop 27  ;; B2
1109     pop     18  $   pop 19  ;; A0
1110     XCALL __muldi3_6
1111
1112     ;; Move result C into place and save A0 in Z
1113     wmov    22, C4
1114     wmov    24, C6
1115     wmov    30, 18 ; A0
1116     pop     C0  $   pop C1
1117
1118     ;; Epilogue
1119     pop     28  $   pop 29  ;; Y
1120     ret
1121 ENDF __umulsidi3_helper
1122 #endif /* L_umulsidi3 */
1123
1124
1125 #if defined (L_mulsidi3)
1126
1127 ;; Signed widening 64 = 32 * 32 Multiplication
1128 ;;
1129 ;; R18[8] = R22[4] * R18[4]
1130 ;; Ordinary ABI Function
1131 DEFUN __mulsidi3
1132     bst     A3, 7
1133     sbrs    B3, 7           ; Enhanced core has no skip bug
1134     XJMP __umulsidi3_helper
1135
1136     ;; B needs sign-extension
1137     push    A3
1138     push    A2
1139     XCALL __umulsidi3_helper
1140     ;; A0 survived in Z
1141     sub     r22, r30
1142     sbc     r23, r31
1143     pop     r26
1144     pop     r27
1145     sbc     r24, r26
1146     sbc     r25, r27
1147     ret
1148 ENDF __mulsidi3
1149 #endif /* L_mulsidi3 */
1150
1151 #undef A0
1152 #undef A1
1153 #undef A2
1154 #undef A3
1155 #undef B0
1156 #undef B1
1157 #undef B2
1158 #undef B3
1159 #undef C0
1160 #undef C1
1161 #undef C2
1162 #undef C3
1163 #undef C4
1164 #undef C5
1165 #undef C6
1166 #undef C7
1167 #endif /* HAVE_MUL */
1168
1169 /**********************************************************
1170     Widening Multiplication 64 = 32 x 32  without  MUL
1171 **********************************************************/
1172
1173 #if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1174 #define A0 18
1175 #define A1 A0+1
1176 #define A2 A0+2
1177 #define A3 A0+3
1178 #define A4 A0+4
1179 #define A5 A0+5
1180 #define A6 A0+6
1181 #define A7 A0+7
1182
1183 #define B0 10
1184 #define B1 B0+1
1185 #define B2 B0+2
1186 #define B3 B0+3
1187 #define B4 B0+4
1188 #define B5 B0+5
1189 #define B6 B0+6
1190 #define B7 B0+7
1191
1192 #define AA0 22
1193 #define AA1 AA0+1
1194 #define AA2 AA0+2
1195 #define AA3 AA0+3
1196
1197 #define BB0 18
1198 #define BB1 BB0+1
1199 #define BB2 BB0+2
1200 #define BB3 BB0+3
1201
1202 #define Mask r30
1203
1204 ;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1205 ;;
1206 ;; R18[8] = R22[4] * R18[4]
1207 ;; Ordinary ABI Function
1208 DEFUN __mulsidi3
1209     set
1210     skip
1211     ;; FALLTHRU
1212 ENDF  __mulsidi3
1213
1214 DEFUN __umulsidi3
1215     clt     ; skipped
1216     ;; Save 10 Registers: R10..R17, R28, R29
1217     do_prologue_saves 10
1218     ldi     Mask, 0xff
1219     bld     Mask, 7
1220     ;; Move B into place...
1221     wmov    B0, BB0
1222     wmov    B2, BB2
1223     ;; ...and extend it
1224     and     BB3, Mask
1225     lsl     BB3
1226     sbc     B4, B4
1227     mov     B5, B4
1228     wmov    B6, B4
1229     ;; Move A into place...
1230     wmov    A0, AA0
1231     wmov    A2, AA2
1232     ;; ...and extend it
1233     and     AA3, Mask
1234     lsl     AA3
1235     sbc     A4, A4
1236     mov     A5, A4
1237     wmov    A6, A4
1238     XCALL   __muldi3
1239     do_epilogue_restores 10
1240 ENDF __umulsidi3
1241
1242 #undef A0
1243 #undef A1
1244 #undef A2
1245 #undef A3
1246 #undef A4
1247 #undef A5
1248 #undef A6
1249 #undef A7
1250 #undef B0
1251 #undef B1
1252 #undef B2
1253 #undef B3
1254 #undef B4
1255 #undef B5
1256 #undef B6
1257 #undef B7
1258 #undef AA0
1259 #undef AA1
1260 #undef AA2
1261 #undef AA3
1262 #undef BB0
1263 #undef BB1
1264 #undef BB2
1265 #undef BB3
1266 #undef Mask
1267 #endif /* L_mulsidi3 && !HAVE_MUL */
1268
1269 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1270
1271 \f
1272 .section .text.libgcc.div, "ax", @progbits
1273
1274 /*******************************************************
1275        Division 8 / 8 => (result + remainder)
1276 *******************************************************/
1277 #define r_rem   r25     /* remainder */
1278 #define r_arg1  r24     /* dividend, quotient */
1279 #define r_arg2  r22     /* divisor */
1280 #define r_cnt   r23     /* loop count */
1281
1282 #if defined (L_udivmodqi4)
1283 DEFUN __udivmodqi4
1284         sub     r_rem,r_rem     ; clear remainder and carry
1285         ldi     r_cnt,9         ; init loop counter
1286         rjmp    __udivmodqi4_ep ; jump to entry point
1287 __udivmodqi4_loop:
1288         rol     r_rem           ; shift dividend into remainder
1289         cp      r_rem,r_arg2    ; compare remainder & divisor
1290         brcs    __udivmodqi4_ep ; remainder <= divisor
1291         sub     r_rem,r_arg2    ; restore remainder
1292 __udivmodqi4_ep:
1293         rol     r_arg1          ; shift dividend (with CARRY)
1294         dec     r_cnt           ; decrement loop counter
1295         brne    __udivmodqi4_loop
1296         com     r_arg1          ; complement result
1297                                 ; because C flag was complemented in loop
1298         ret
1299 ENDF __udivmodqi4
1300 #endif /* defined (L_udivmodqi4) */
1301
1302 #if defined (L_divmodqi4)
1303 DEFUN __divmodqi4
1304         bst     r_arg1,7        ; store sign of dividend
1305         mov     __tmp_reg__,r_arg1
1306         eor     __tmp_reg__,r_arg2; r0.7 is sign of result
1307         sbrc    r_arg1,7
1308         neg     r_arg1          ; dividend negative : negate
1309         sbrc    r_arg2,7
1310         neg     r_arg2          ; divisor negative : negate
1311         XCALL   __udivmodqi4    ; do the unsigned div/mod
1312         brtc    __divmodqi4_1
1313         neg     r_rem           ; correct remainder sign
1314 __divmodqi4_1:
1315         sbrc    __tmp_reg__,7
1316         neg     r_arg1          ; correct result sign
1317 __divmodqi4_exit:
1318         ret
1319 ENDF __divmodqi4
1320 #endif /* defined (L_divmodqi4) */
1321
1322 #undef r_rem
1323 #undef r_arg1
1324 #undef r_arg2
1325 #undef r_cnt
1326
1327
1328 /*******************************************************
1329        Division 16 / 16 => (result + remainder)
1330 *******************************************************/
1331 #define r_remL  r26     /* remainder Low */
1332 #define r_remH  r27     /* remainder High */
1333
1334 /* return: remainder */
1335 #define r_arg1L r24     /* dividend Low */
1336 #define r_arg1H r25     /* dividend High */
1337
1338 /* return: quotient */
1339 #define r_arg2L r22     /* divisor Low */
1340 #define r_arg2H r23     /* divisor High */
1341
1342 #define r_cnt   r21     /* loop count */
1343
1344 #if defined (L_udivmodhi4)
1345 DEFUN __udivmodhi4
1346         sub     r_remL,r_remL
1347         sub     r_remH,r_remH   ; clear remainder and carry
1348         ldi     r_cnt,17        ; init loop counter
1349         rjmp    __udivmodhi4_ep ; jump to entry point
1350 __udivmodhi4_loop:
1351         rol     r_remL          ; shift dividend into remainder
1352         rol     r_remH
1353         cp      r_remL,r_arg2L  ; compare remainder & divisor
1354         cpc     r_remH,r_arg2H
1355         brcs    __udivmodhi4_ep ; remainder < divisor
1356         sub     r_remL,r_arg2L  ; restore remainder
1357         sbc     r_remH,r_arg2H
1358 __udivmodhi4_ep:
1359         rol     r_arg1L         ; shift dividend (with CARRY)
1360         rol     r_arg1H
1361         dec     r_cnt           ; decrement loop counter
1362         brne    __udivmodhi4_loop
1363         com     r_arg1L
1364         com     r_arg1H
1365 ; div/mod results to return registers, as for the div() function
1366         mov_l   r_arg2L, r_arg1L        ; quotient
1367         mov_h   r_arg2H, r_arg1H
1368         mov_l   r_arg1L, r_remL         ; remainder
1369         mov_h   r_arg1H, r_remH
1370         ret
1371 ENDF __udivmodhi4
1372 #endif /* defined (L_udivmodhi4) */
1373
1374 #if defined (L_divmodhi4)
1375 DEFUN __divmodhi4
1376     .global _div
1377 _div:
1378     bst     r_arg1H,7           ; store sign of dividend
1379     mov     __tmp_reg__,r_arg2H
1380     brtc    0f
1381     com     __tmp_reg__         ; r0.7 is sign of result
1382     rcall   __divmodhi4_neg1    ; dividend negative: negate
1383 0:
1384     sbrc    r_arg2H,7
1385     rcall   __divmodhi4_neg2    ; divisor negative: negate
1386     XCALL   __udivmodhi4        ; do the unsigned div/mod
1387     sbrc    __tmp_reg__,7
1388     rcall   __divmodhi4_neg2    ; correct remainder sign
1389     brtc    __divmodhi4_exit
1390 __divmodhi4_neg1:
1391     ;; correct dividend/remainder sign
1392     com     r_arg1H
1393     neg     r_arg1L
1394     sbci    r_arg1H,0xff
1395     ret
1396 __divmodhi4_neg2:
1397     ;; correct divisor/result sign
1398     com     r_arg2H
1399     neg     r_arg2L
1400     sbci    r_arg2H,0xff
1401 __divmodhi4_exit:
1402     ret
1403 ENDF __divmodhi4
1404 #endif /* defined (L_divmodhi4) */
1405
1406 #undef r_remH
1407 #undef r_remL
1408
1409 #undef r_arg1H
1410 #undef r_arg1L
1411
1412 #undef r_arg2H
1413 #undef r_arg2L
1414
1415 #undef r_cnt
1416
1417 /*******************************************************
1418        Division 24 / 24 => (result + remainder)
1419 *******************************************************/
1420
1421 ;; A[0..2]: In: Dividend; Out: Quotient
1422 #define A0  22
1423 #define A1  A0+1
1424 #define A2  A0+2
1425
1426 ;; B[0..2]: In: Divisor;   Out: Remainder
1427 #define B0  18
1428 #define B1  B0+1
1429 #define B2  B0+2
1430
1431 ;; C[0..2]: Expand remainder
1432 #define C0  __zero_reg__
1433 #define C1  26
1434 #define C2  25
1435
1436 ;; Loop counter
1437 #define r_cnt   21
1438
1439 #if defined (L_udivmodpsi4)
1440 ;; R24:R22 = R24:R22  udiv  R20:R18
1441 ;; R20:R18 = R24:R22  umod  R20:R18
1442 ;; Clobbers: R21, R25, R26
1443
1444 DEFUN __udivmodpsi4
1445     ; init loop counter
1446     ldi     r_cnt, 24+1
1447     ; Clear remainder and carry.  C0 is already 0
1448     clr     C1
1449     sub     C2, C2
1450     ; jump to entry point
1451     rjmp    __udivmodpsi4_start
1452 __udivmodpsi4_loop:
1453     ; shift dividend into remainder
1454     rol     C0
1455     rol     C1
1456     rol     C2
1457     ; compare remainder & divisor
1458     cp      C0, B0
1459     cpc     C1, B1
1460     cpc     C2, B2
1461     brcs    __udivmodpsi4_start ; remainder <= divisor
1462     sub     C0, B0              ; restore remainder
1463     sbc     C1, B1
1464     sbc     C2, B2
1465 __udivmodpsi4_start:
1466     ; shift dividend (with CARRY)
1467     rol     A0
1468     rol     A1
1469     rol     A2
1470     ; decrement loop counter
1471     dec     r_cnt
1472     brne    __udivmodpsi4_loop
1473     com     A0
1474     com     A1
1475     com     A2
1476     ; div/mod results to return registers
1477     ; remainder
1478     mov     B0, C0
1479     mov     B1, C1
1480     mov     B2, C2
1481     clr     __zero_reg__ ; C0
1482     ret
1483 ENDF __udivmodpsi4
1484 #endif /* defined (L_udivmodpsi4) */
1485
1486 #if defined (L_divmodpsi4)
1487 ;; R24:R22 = R24:R22  div  R20:R18
1488 ;; R20:R18 = R24:R22  mod  R20:R18
1489 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1490
1491 DEFUN __divmodpsi4
1492     ; R0.7 will contain the sign of the result:
1493     ; R0.7 = A.sign ^ B.sign
1494     mov __tmp_reg__, B2
1495     ; T-flag = sign of dividend
1496     bst     A2, 7
1497     brtc    0f
1498     com     __tmp_reg__
1499     ; Adjust dividend's sign
1500     rcall   __divmodpsi4_negA
1501 0:
1502     ; Adjust divisor's sign
1503     sbrc    B2, 7
1504     rcall   __divmodpsi4_negB
1505
1506     ; Do the unsigned div/mod
1507     XCALL   __udivmodpsi4
1508
1509     ; Adjust quotient's sign
1510     sbrc    __tmp_reg__, 7
1511     rcall   __divmodpsi4_negA
1512
1513     ; Adjust remainder's sign
1514     brtc    __divmodpsi4_end
1515
1516 __divmodpsi4_negB:
1517     ; Correct divisor/remainder sign
1518     com     B2
1519     com     B1
1520     neg     B0
1521     sbci    B1, -1
1522     sbci    B2, -1
1523     ret
1524
1525     ; Correct dividend/quotient sign
1526 __divmodpsi4_negA:
1527     com     A2
1528     com     A1
1529     neg     A0
1530     sbci    A1, -1
1531     sbci    A2, -1
1532 __divmodpsi4_end:
1533     ret
1534
1535 ENDF __divmodpsi4
1536 #endif /* defined (L_divmodpsi4) */
1537
1538 #undef A0
1539 #undef A1
1540 #undef A2
1541
1542 #undef B0
1543 #undef B1
1544 #undef B2
1545
1546 #undef C0
1547 #undef C1
1548 #undef C2
1549
1550 #undef r_cnt
1551
1552 /*******************************************************
1553        Division 32 / 32 => (result + remainder)
1554 *******************************************************/
1555 #define r_remHH r31     /* remainder High */
1556 #define r_remHL r30
1557 #define r_remH  r27
1558 #define r_remL  r26     /* remainder Low */
1559
1560 /* return: remainder */
1561 #define r_arg1HH r25    /* dividend High */
1562 #define r_arg1HL r24
1563 #define r_arg1H  r23
1564 #define r_arg1L  r22    /* dividend Low */
1565
1566 /* return: quotient */
1567 #define r_arg2HH r21    /* divisor High */
1568 #define r_arg2HL r20
1569 #define r_arg2H  r19
1570 #define r_arg2L  r18    /* divisor Low */
1571
1572 #define r_cnt __zero_reg__  /* loop count (0 after the loop!) */
1573
1574 #if defined (L_udivmodsi4)
1575 DEFUN __udivmodsi4
1576         ldi     r_remL, 33      ; init loop counter
1577         mov     r_cnt, r_remL
1578         sub     r_remL,r_remL
1579         sub     r_remH,r_remH   ; clear remainder and carry
1580         mov_l   r_remHL, r_remL
1581         mov_h   r_remHH, r_remH
1582         rjmp    __udivmodsi4_ep ; jump to entry point
1583 __udivmodsi4_loop:
1584         rol     r_remL          ; shift dividend into remainder
1585         rol     r_remH
1586         rol     r_remHL
1587         rol     r_remHH
1588         cp      r_remL,r_arg2L  ; compare remainder & divisor
1589         cpc     r_remH,r_arg2H
1590         cpc     r_remHL,r_arg2HL
1591         cpc     r_remHH,r_arg2HH
1592         brcs    __udivmodsi4_ep ; remainder <= divisor
1593         sub     r_remL,r_arg2L  ; restore remainder
1594         sbc     r_remH,r_arg2H
1595         sbc     r_remHL,r_arg2HL
1596         sbc     r_remHH,r_arg2HH
1597 __udivmodsi4_ep:
1598         rol     r_arg1L         ; shift dividend (with CARRY)
1599         rol     r_arg1H
1600         rol     r_arg1HL
1601         rol     r_arg1HH
1602         dec     r_cnt           ; decrement loop counter
1603         brne    __udivmodsi4_loop
1604                                 ; __zero_reg__ now restored (r_cnt == 0)
1605         com     r_arg1L
1606         com     r_arg1H
1607         com     r_arg1HL
1608         com     r_arg1HH
1609 ; div/mod results to return registers, as for the ldiv() function
1610         mov_l   r_arg2L,  r_arg1L       ; quotient
1611         mov_h   r_arg2H,  r_arg1H
1612         mov_l   r_arg2HL, r_arg1HL
1613         mov_h   r_arg2HH, r_arg1HH
1614         mov_l   r_arg1L,  r_remL        ; remainder
1615         mov_h   r_arg1H,  r_remH
1616         mov_l   r_arg1HL, r_remHL
1617         mov_h   r_arg1HH, r_remHH
1618         ret
1619 ENDF __udivmodsi4
1620 #endif /* defined (L_udivmodsi4) */
1621
1622 #if defined (L_divmodsi4)
1623 DEFUN __divmodsi4
1624     mov     __tmp_reg__,r_arg2HH
1625     bst     r_arg1HH,7          ; store sign of dividend
1626     brtc    0f
1627     com     __tmp_reg__         ; r0.7 is sign of result
1628     XCALL   __negsi2            ; dividend negative: negate
1629 0:
1630     sbrc    r_arg2HH,7
1631     rcall   __divmodsi4_neg2    ; divisor negative: negate
1632     XCALL   __udivmodsi4        ; do the unsigned div/mod
1633     sbrc    __tmp_reg__, 7      ; correct quotient sign
1634     rcall   __divmodsi4_neg2
1635     brtc    __divmodsi4_exit    ; correct remainder sign
1636     XJMP    __negsi2
1637 __divmodsi4_neg2:
1638     ;; correct divisor/quotient sign
1639     com     r_arg2HH
1640     com     r_arg2HL
1641     com     r_arg2H
1642     neg     r_arg2L
1643     sbci    r_arg2H,0xff
1644     sbci    r_arg2HL,0xff
1645     sbci    r_arg2HH,0xff
1646 __divmodsi4_exit:
1647     ret
1648 ENDF __divmodsi4
1649 #endif /* defined (L_divmodsi4) */
1650
1651 #if defined (L_negsi2)
1652 ;; (set (reg:SI 22)
1653 ;;      (neg:SI (reg:SI 22)))
1654 ;; Sets the V flag for signed overflow tests
1655 DEFUN __negsi2
1656     NEG4    22
1657     ret
1658 ENDF __negsi2
1659 #endif /* L_negsi2 */
1660
1661 #undef r_remHH
1662 #undef r_remHL
1663 #undef r_remH
1664 #undef r_remL
1665 #undef r_arg1HH
1666 #undef r_arg1HL
1667 #undef r_arg1H
1668 #undef r_arg1L
1669 #undef r_arg2HH
1670 #undef r_arg2HL
1671 #undef r_arg2H
1672 #undef r_arg2L
1673 #undef r_cnt
1674
1675 /*******************************************************
1676        Division 64 / 64
1677        Modulo   64 % 64
1678 *******************************************************/
1679
1680 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1681 ;; at least 16k of Program Memory.  For smaller Devices, depend
1682 ;; on MOVW and SP Size.  There is a Connexion between SP Size and
1683 ;; Flash Size so that SP Size can be used to test for Flash Size.
1684
1685 #if defined (__AVR_HAVE_JMP_CALL__)
1686 #   define SPEED_DIV 8
1687 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1688 #   define SPEED_DIV 16
1689 #else
1690 #   define SPEED_DIV 0
1691 #endif
1692
1693 ;; A[0..7]: In: Dividend;
1694 ;; Out: Quotient  (T = 0)
1695 ;; Out: Remainder (T = 1)
1696 #define A0  18
1697 #define A1  A0+1
1698 #define A2  A0+2
1699 #define A3  A0+3
1700 #define A4  A0+4
1701 #define A5  A0+5
1702 #define A6  A0+6
1703 #define A7  A0+7
1704
1705 ;; B[0..7]: In: Divisor;   Out: Clobber
1706 #define B0  10
1707 #define B1  B0+1
1708 #define B2  B0+2
1709 #define B3  B0+3
1710 #define B4  B0+4
1711 #define B5  B0+5
1712 #define B6  B0+6
1713 #define B7  B0+7
1714
1715 ;; C[0..7]: Expand remainder;  Out: Remainder (unused)
1716 #define C0  8
1717 #define C1  C0+1
1718 #define C2  30
1719 #define C3  C2+1
1720 #define C4  28
1721 #define C5  C4+1
1722 #define C6  26
1723 #define C7  C6+1
1724
1725 ;; Holds Signs during Division Routine
1726 #define SS      __tmp_reg__
1727
1728 ;; Bit-Counter in Division Routine
1729 #define R_cnt   __zero_reg__
1730
1731 ;; Scratch Register for Negation
1732 #define NN      r31
1733
1734 #if defined (L_udivdi3)
1735
1736 ;; R25:R18 = R24:R18  umod  R17:R10
1737 ;; Ordinary ABI-Function
1738
1739 DEFUN __umoddi3
1740     set
1741     rjmp __udivdi3_umoddi3
1742 ENDF __umoddi3
1743
1744 ;; R25:R18 = R24:R18  udiv  R17:R10
1745 ;; Ordinary ABI-Function
1746
1747 DEFUN __udivdi3
1748     clt
1749 ENDF __udivdi3
1750
1751 DEFUN __udivdi3_umoddi3
1752     push    C0
1753     push    C1
1754     push    C4
1755     push    C5
1756     XCALL   __udivmod64
1757     pop     C5
1758     pop     C4
1759     pop     C1
1760     pop     C0
1761     ret
1762 ENDF __udivdi3_umoddi3
1763 #endif /* L_udivdi3 */
1764
1765 #if defined (L_udivmod64)
1766
1767 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1768 ;; No Registers saved/restored; the Callers will take Care.
1769 ;; Preserves B[] and T-flag
1770 ;; T = 0: Compute Quotient  in A[]
1771 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1772
1773 DEFUN __udivmod64
1774
1775     ;; Clear Remainder (C6, C7 will follow)
1776     clr     C0
1777     clr     C1
1778     wmov    C2, C0
1779     wmov    C4, C0
1780     ldi     C7, 64
1781
1782 #if SPEED_DIV == 0 || SPEED_DIV == 16
1783     ;; Initialize Loop-Counter
1784     mov     R_cnt, C7
1785     wmov    C6, C0
1786 #endif /* SPEED_DIV */
1787
1788 #if SPEED_DIV == 8
1789
1790     push    A7
1791     clr     C6
1792
1793 1:  ;; Compare shifted Devidend against Divisor
1794     ;; If -- even after Shifting -- it is smaller...
1795     CP  A7,B0  $  cpc C0,B1  $  cpc C1,B2  $  cpc C2,B3
1796     cpc C3,B4  $  cpc C4,B5  $  cpc C5,B6  $  cpc C6,B7
1797     brcc    2f
1798
1799     ;; ...then we can subtract it.  Thus, it is legal to shift left
1800                $  mov C6,C5  $  mov C5,C4  $  mov C4,C3
1801     mov C3,C2  $  mov C2,C1  $  mov C1,C0  $  mov C0,A7
1802     mov A7,A6  $  mov A6,A5  $  mov A5,A4  $  mov A4,A3
1803     mov A3,A2  $  mov A2,A1  $  mov A1,A0  $  clr A0
1804
1805     ;; 8 Bits are done
1806     subi    C7, 8
1807     brne    1b
1808
1809     ;; Shifted 64 Bits:  A7 has traveled to C7
1810     pop     C7
1811     ;; Divisor is greater than Dividend. We have:
1812     ;; A[] % B[] = A[]
1813     ;; A[] / B[] = 0
1814     ;; Thus, we can return immediately
1815     rjmp    5f
1816
1817 2:  ;; Initialze Bit-Counter with Number of Bits still to be performed
1818     mov     R_cnt, C7
1819
1820     ;; Push of A7 is not needed because C7 is still 0
1821     pop     C7
1822     clr     C7
1823
1824 #elif  SPEED_DIV == 16
1825
1826     ;; Compare shifted Dividend against Divisor
1827     cp      A7, B3
1828     cpc     C0, B4
1829     cpc     C1, B5
1830     cpc     C2, B6
1831     cpc     C3, B7
1832     brcc    2f
1833
1834     ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1835     ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1836     wmov  C2,A6  $  wmov C0,A4
1837     wmov  A6,A2  $  wmov A4,A0
1838     wmov  A2,C6  $  wmov A0,C4
1839
1840     ;; Set Bit Counter to 32
1841     lsr     R_cnt
1842 2:
1843 #elif SPEED_DIV
1844 #error SPEED_DIV = ?
1845 #endif /* SPEED_DIV */
1846
1847 ;; The very Division + Remainder Routine
1848
1849 3:  ;; Left-shift Dividend...
1850     lsl A0     $  rol A1     $  rol A2     $  rol A3
1851     rol A4     $  rol A5     $  rol A6     $  rol A7
1852
1853     ;; ...into Remainder
1854     rol C0     $  rol C1     $  rol C2     $  rol C3
1855     rol C4     $  rol C5     $  rol C6     $  rol C7
1856
1857     ;; Compare Remainder and Divisor
1858     CP  C0,B0  $  cpc C1,B1  $  cpc C2,B2  $  cpc C3,B3
1859     cpc C4,B4  $  cpc C5,B5  $  cpc C6,B6  $  cpc C7,B7
1860
1861     brcs 4f
1862
1863     ;; Divisor fits into Remainder:  Subtract it from Remainder...
1864     SUB C0,B0  $  sbc C1,B1  $  sbc C2,B2  $  sbc C3,B3
1865     sbc C4,B4  $  sbc C5,B5  $  sbc C6,B6  $  sbc C7,B7
1866
1867     ;; ...and set according Bit in the upcoming Quotient
1868     ;; The Bit will travel to its final Position
1869     ori A0, 1
1870
1871 4:  ;; This Bit is done
1872     dec     R_cnt
1873     brne    3b
1874     ;; __zero_reg__ is 0 again
1875
1876     ;; T = 0: We are fine with the Quotient in A[]
1877     ;; T = 1: Copy Remainder to A[]
1878 5:  brtc    6f
1879     wmov    A0, C0
1880     wmov    A2, C2
1881     wmov    A4, C4
1882     wmov    A6, C6
1883     ;; Move the Sign of the Result to SS.7
1884     lsl     SS
1885
1886 6:  ret
1887
1888 ENDF __udivmod64
1889 #endif /* L_udivmod64 */
1890
1891
1892 #if defined (L_divdi3)
1893
1894 ;; R25:R18 = R24:R18  mod  R17:R10
1895 ;; Ordinary ABI-Function
1896
1897 DEFUN __moddi3
1898     set
1899     rjmp    __divdi3_moddi3
1900 ENDF __moddi3
1901
1902 ;; R25:R18 = R24:R18  div  R17:R10
1903 ;; Ordinary ABI-Function
1904
1905 DEFUN __divdi3
1906     clt
1907 ENDF __divdi3
1908
1909 DEFUN  __divdi3_moddi3
1910 #if SPEED_DIV
1911     mov     r31, A7
1912     or      r31, B7
1913     brmi    0f
1914     ;; Both Signs are 0:  the following Complexitiy is not needed
1915     XJMP    __udivdi3_umoddi3
1916 #endif /* SPEED_DIV */
1917
1918 0:  ;; The Prologue
1919     ;; Save 12 Registers:  Y, 17...8
1920     ;; No Frame needed
1921     do_prologue_saves 12
1922
1923     ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
1924     ;; SS.6 will contain the Sign of the Remainder (A.sign)
1925     mov     SS, A7
1926     asr     SS
1927     ;; Adjust Dividend's Sign as needed
1928 #if SPEED_DIV
1929     ;; Compiling for Speed we know that at least one Sign must be < 0
1930     ;; Thus, if A[] >= 0 then we know B[] < 0
1931     brpl    22f
1932 #else
1933     brpl    21f
1934 #endif /* SPEED_DIV */
1935
1936     XCALL   __negdi2
1937
1938     ;; Adjust Divisor's Sign and SS.7 as needed
1939 21: tst     B7
1940     brpl    3f
1941 22: ldi     NN, 1 << 7
1942     eor     SS, NN
1943
1944     ldi NN, -1
1945     com B4     $  com B5     $  com B6     $  com B7
1946                $  com B1     $  com B2     $  com B3
1947     NEG B0
1948                $  sbc B1,NN  $  sbc B2,NN  $  sbc B3,NN
1949     sbc B4,NN  $  sbc B5,NN  $  sbc B6,NN  $  sbc B7,NN
1950
1951 3:  ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1952     XCALL   __udivmod64
1953
1954     ;; Adjust Result's Sign
1955 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1956     tst     SS
1957     brpl    4f
1958 #else
1959     sbrc    SS, 7
1960 #endif /* __AVR_HAVE_JMP_CALL__ */
1961     XCALL   __negdi2
1962
1963 4:  ;; Epilogue: Restore 12 Registers and return
1964     do_epilogue_restores 12
1965
1966 ENDF __divdi3_moddi3
1967
1968 #endif /* L_divdi3 */
1969
1970 #undef R_cnt
1971 #undef SS
1972 #undef NN
1973
1974 .section .text.libgcc, "ax", @progbits
1975
1976 #define TT __tmp_reg__
1977
1978 #if defined (L_adddi3)
1979 ;; (set (reg:DI 18)
1980 ;;      (plus:DI (reg:DI 18)
1981 ;;               (reg:DI 10)))
1982 ;; Sets the V flag for signed overflow tests
1983 ;; Sets the C flag for unsigned overflow tests
1984 DEFUN __adddi3
1985     ADD A0,B0  $  adc A1,B1  $  adc A2,B2  $  adc A3,B3
1986     adc A4,B4  $  adc A5,B5  $  adc A6,B6  $  adc A7,B7
1987     ret
1988 ENDF __adddi3
1989 #endif /* L_adddi3 */
1990
1991 #if defined (L_adddi3_s8)
1992 ;; (set (reg:DI 18)
1993 ;;      (plus:DI (reg:DI 18)
1994 ;;               (sign_extend:SI (reg:QI 26))))
1995 ;; Sets the V flag for signed overflow tests
1996 ;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
1997 DEFUN __adddi3_s8
1998     clr     TT
1999     sbrc    r26, 7
2000     com     TT
2001     ADD A0,r26 $  adc A1,TT  $  adc A2,TT  $  adc A3,TT
2002     adc A4,TT  $  adc A5,TT  $  adc A6,TT  $  adc A7,TT
2003     ret
2004 ENDF __adddi3_s8
2005 #endif /* L_adddi3_s8 */
2006
2007 #if defined (L_subdi3)
2008 ;; (set (reg:DI 18)
2009 ;;      (minus:DI (reg:DI 18)
2010 ;;                (reg:DI 10)))
2011 ;; Sets the V flag for signed overflow tests
2012 ;; Sets the C flag for unsigned overflow tests
2013 DEFUN __subdi3
2014     SUB A0,B0  $  sbc A1,B1  $  sbc A2,B2  $  sbc A3,B3
2015     sbc A4,B4  $  sbc A5,B5  $  sbc A6,B6  $  sbc A7,B7
2016     ret
2017 ENDF __subdi3
2018 #endif /* L_subdi3 */
2019
2020 #if defined (L_cmpdi2)
2021 ;; (set (cc0)
2022 ;;      (compare (reg:DI 18)
2023 ;;               (reg:DI 10)))
2024 DEFUN __cmpdi2
2025     CP  A0,B0  $  cpc A1,B1  $  cpc A2,B2  $  cpc A3,B3
2026     cpc A4,B4  $  cpc A5,B5  $  cpc A6,B6  $  cpc A7,B7
2027     ret
2028 ENDF __cmpdi2
2029 #endif /* L_cmpdi2 */
2030
2031 #if defined (L_cmpdi2_s8)
2032 ;; (set (cc0)
2033 ;;      (compare (reg:DI 18)
2034 ;;               (sign_extend:SI (reg:QI 26))))
2035 DEFUN __cmpdi2_s8
2036     clr     TT
2037     sbrc    r26, 7
2038     com     TT
2039     CP  A0,r26 $  cpc A1,TT  $  cpc A2,TT  $  cpc A3,TT
2040     cpc A4,TT  $  cpc A5,TT  $  cpc A6,TT  $  cpc A7,TT
2041     ret
2042 ENDF __cmpdi2_s8
2043 #endif /* L_cmpdi2_s8 */
2044
2045 #if defined (L_negdi2)
2046 ;; (set (reg:DI 18)
2047 ;;      (neg:DI (reg:DI 18)))
2048 ;; Sets the V flag for signed overflow tests
2049 DEFUN __negdi2
2050
2051     com  A4    $  com  A5    $  com  A6    $  com  A7
2052                $  com  A1    $  com  A2    $  com  A3
2053     NEG  A0
2054                $  sbci A1,-1 $  sbci A2,-1 $  sbci A3,-1
2055     sbci A4,-1 $  sbci A5,-1 $  sbci A6,-1 $  sbci A7,-1
2056     ret
2057
2058 ENDF __negdi2
2059 #endif /* L_negdi2 */
2060
2061 #undef TT
2062
2063 #undef C7
2064 #undef C6
2065 #undef C5
2066 #undef C4
2067 #undef C3
2068 #undef C2
2069 #undef C1
2070 #undef C0
2071
2072 #undef B7
2073 #undef B6
2074 #undef B5
2075 #undef B4
2076 #undef B3
2077 #undef B2
2078 #undef B1
2079 #undef B0
2080
2081 #undef A7
2082 #undef A6
2083 #undef A5
2084 #undef A4
2085 #undef A3
2086 #undef A2
2087 #undef A1
2088 #undef A0
2089
2090 \f
2091 .section .text.libgcc.prologue, "ax", @progbits
2092
2093 /**********************************
2094  * This is a prologue subroutine
2095  **********************************/
2096 #if defined (L_prologue)
2097
2098 ;; This function does not clobber T-flag; 64-bit division relies on it
2099 DEFUN __prologue_saves__
2100         push r2
2101         push r3
2102         push r4
2103         push r5
2104         push r6
2105         push r7
2106         push r8
2107         push r9
2108         push r10
2109         push r11
2110         push r12
2111         push r13
2112         push r14
2113         push r15
2114         push r16
2115         push r17
2116         push r28
2117         push r29
2118 #if !defined (__AVR_HAVE_SPH__)
2119         in      r28,__SP_L__
2120         sub     r28,r26
2121         out     __SP_L__,r28
2122         clr     r29
2123 #elif defined (__AVR_XMEGA__)
2124         in      r28,__SP_L__
2125         in      r29,__SP_H__
2126         sub     r28,r26
2127         sbc     r29,r27
2128         out     __SP_L__,r28
2129         out     __SP_H__,r29
2130 #else
2131         in      r28,__SP_L__
2132         in      r29,__SP_H__
2133         sub     r28,r26
2134         sbc     r29,r27
2135         in      __tmp_reg__,__SREG__
2136         cli
2137         out     __SP_H__,r29
2138         out     __SREG__,__tmp_reg__
2139         out     __SP_L__,r28
2140 #endif /* #SP = 8/16 */
2141
2142         XIJMP
2143
2144 ENDF __prologue_saves__
2145 #endif /* defined (L_prologue) */
2146
2147 /*
2148  * This is an epilogue subroutine
2149  */
2150 #if defined (L_epilogue)
2151
2152 DEFUN __epilogue_restores__
2153         ldd     r2,Y+18
2154         ldd     r3,Y+17
2155         ldd     r4,Y+16
2156         ldd     r5,Y+15
2157         ldd     r6,Y+14
2158         ldd     r7,Y+13
2159         ldd     r8,Y+12
2160         ldd     r9,Y+11
2161         ldd     r10,Y+10
2162         ldd     r11,Y+9
2163         ldd     r12,Y+8
2164         ldd     r13,Y+7
2165         ldd     r14,Y+6
2166         ldd     r15,Y+5
2167         ldd     r16,Y+4
2168         ldd     r17,Y+3
2169         ldd     r26,Y+2
2170 #if !defined (__AVR_HAVE_SPH__)
2171         ldd     r29,Y+1
2172         add     r28,r30
2173         out     __SP_L__,r28
2174         mov     r28, r26
2175 #elif defined (__AVR_XMEGA__)
2176         ldd  r27,Y+1
2177         add  r28,r30
2178         adc  r29,__zero_reg__
2179         out  __SP_L__,r28
2180         out  __SP_H__,r29
2181         wmov 28, 26
2182 #else
2183         ldd     r27,Y+1
2184         add     r28,r30
2185         adc     r29,__zero_reg__
2186         in      __tmp_reg__,__SREG__
2187         cli
2188         out     __SP_H__,r29
2189         out     __SREG__,__tmp_reg__
2190         out     __SP_L__,r28
2191         mov_l   r28, r26
2192         mov_h   r29, r27
2193 #endif /* #SP = 8/16 */
2194         ret
2195 ENDF __epilogue_restores__
2196 #endif /* defined (L_epilogue) */
2197
2198 #ifdef L_exit
2199         .section .fini9,"ax",@progbits
2200 DEFUN _exit
2201         .weak   exit
2202 exit:
2203 ENDF _exit
2204
2205         /* Code from .fini8 ... .fini1 sections inserted by ld script.  */
2206
2207         .section .fini0,"ax",@progbits
2208         cli
2209 __stop_program:
2210         rjmp    __stop_program
2211 #endif /* defined (L_exit) */
2212
2213 #ifdef L_cleanup
2214         .weak   _cleanup
2215         .func   _cleanup
2216 _cleanup:
2217         ret
2218 .endfunc
2219 #endif /* defined (L_cleanup) */
2220
2221 \f
2222 .section .text.libgcc, "ax", @progbits
2223
2224 #ifdef L_tablejump2
2225 DEFUN __tablejump2__
2226     lsl     r30
2227     rol     r31
2228 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2229     ;; Word address of gs() jumptable entry in R24:Z
2230     rol     r24
2231     out     __RAMPZ__, r24
2232 #elif defined (__AVR_HAVE_ELPM__)
2233     ;; Word address of jumptable entry in Z
2234     clr     __tmp_reg__
2235     rol     __tmp_reg__
2236     out     __RAMPZ__, __tmp_reg__
2237 #endif
2238
2239     ;; Read word address from jumptable and jump
2240
2241 #if defined (__AVR_HAVE_ELPMX__)
2242     elpm    __tmp_reg__, Z+
2243     elpm    r31, Z
2244     mov     r30, __tmp_reg__
2245 #ifdef __AVR_HAVE_RAMPD__
2246     ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2247     out     __RAMPZ__, __zero_reg__
2248 #endif /* RAMPD */
2249     XIJMP
2250 #elif defined (__AVR_HAVE_ELPM__)
2251     elpm
2252     push    r0
2253     adiw    r30, 1
2254     elpm
2255     push    r0
2256     ret
2257 #elif defined (__AVR_HAVE_LPMX__)
2258     lpm     __tmp_reg__, Z+
2259     lpm     r31, Z
2260     mov     r30, __tmp_reg__
2261     ijmp
2262 #else
2263     lpm
2264     push    r0
2265     adiw    r30, 1
2266     lpm
2267     push    r0
2268     ret
2269 #endif
2270 ENDF __tablejump2__
2271 #endif /* L_tablejump2 */
2272
2273 #ifdef L_copy_data
2274         .section .init4,"ax",@progbits
2275 DEFUN __do_copy_data
2276 #if defined(__AVR_HAVE_ELPMX__)
2277         ldi     r17, hi8(__data_end)
2278         ldi     r26, lo8(__data_start)
2279         ldi     r27, hi8(__data_start)
2280         ldi     r30, lo8(__data_load_start)
2281         ldi     r31, hi8(__data_load_start)
2282         ldi     r16, hh8(__data_load_start)
2283         out     __RAMPZ__, r16
2284         rjmp    .L__do_copy_data_start
2285 .L__do_copy_data_loop:
2286         elpm    r0, Z+
2287         st      X+, r0
2288 .L__do_copy_data_start:
2289         cpi     r26, lo8(__data_end)
2290         cpc     r27, r17
2291         brne    .L__do_copy_data_loop
2292 #elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2293         ldi     r17, hi8(__data_end)
2294         ldi     r26, lo8(__data_start)
2295         ldi     r27, hi8(__data_start)
2296         ldi     r30, lo8(__data_load_start)
2297         ldi     r31, hi8(__data_load_start)
2298         ldi     r16, hh8(__data_load_start - 0x10000)
2299 .L__do_copy_data_carry:
2300         inc     r16
2301         out     __RAMPZ__, r16
2302         rjmp    .L__do_copy_data_start
2303 .L__do_copy_data_loop:
2304         elpm
2305         st      X+, r0
2306         adiw    r30, 1
2307         brcs    .L__do_copy_data_carry
2308 .L__do_copy_data_start:
2309         cpi     r26, lo8(__data_end)
2310         cpc     r27, r17
2311         brne    .L__do_copy_data_loop
2312 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2313         ldi     r17, hi8(__data_end)
2314         ldi     r26, lo8(__data_start)
2315         ldi     r27, hi8(__data_start)
2316         ldi     r30, lo8(__data_load_start)
2317         ldi     r31, hi8(__data_load_start)
2318         rjmp    .L__do_copy_data_start
2319 .L__do_copy_data_loop:
2320 #if defined (__AVR_HAVE_LPMX__)
2321         lpm     r0, Z+
2322 #else
2323         lpm
2324         adiw    r30, 1
2325 #endif
2326         st      X+, r0
2327 .L__do_copy_data_start:
2328         cpi     r26, lo8(__data_end)
2329         cpc     r27, r17
2330         brne    .L__do_copy_data_loop
2331 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2332 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2333         ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2334         out     __RAMPZ__, __zero_reg__
2335 #endif /* ELPM && RAMPD */
2336 ENDF __do_copy_data
2337 #endif /* L_copy_data */
2338
2339 /* __do_clear_bss is only necessary if there is anything in .bss section.  */
2340
2341 #ifdef L_clear_bss
2342         .section .init4,"ax",@progbits
2343 DEFUN __do_clear_bss
2344         ldi     r17, hi8(__bss_end)
2345         ldi     r26, lo8(__bss_start)
2346         ldi     r27, hi8(__bss_start)
2347         rjmp    .do_clear_bss_start
2348 .do_clear_bss_loop:
2349         st      X+, __zero_reg__
2350 .do_clear_bss_start:
2351         cpi     r26, lo8(__bss_end)
2352         cpc     r27, r17
2353         brne    .do_clear_bss_loop
2354 ENDF __do_clear_bss
2355 #endif /* L_clear_bss */
2356
2357 /* __do_global_ctors and __do_global_dtors are only necessary
2358    if there are any constructors/destructors.  */
2359
2360 #ifdef L_ctors
2361         .section .init6,"ax",@progbits
2362 DEFUN __do_global_ctors
2363     ldi     r17, pm_hi8(__ctors_start)
2364     ldi     r28, pm_lo8(__ctors_end)
2365     ldi     r29, pm_hi8(__ctors_end)
2366 #ifdef __AVR_HAVE_EIJMP_EICALL__
2367     ldi     r16, pm_hh8(__ctors_end)
2368 #endif /* HAVE_EIJMP */
2369     rjmp    .L__do_global_ctors_start
2370 .L__do_global_ctors_loop:
2371     sbiw    r28, 1
2372 #ifdef __AVR_HAVE_EIJMP_EICALL__
2373     sbc     r16, __zero_reg__
2374     mov     r24, r16
2375 #endif /* HAVE_EIJMP */
2376     mov_h   r31, r29
2377     mov_l   r30, r28
2378     XCALL   __tablejump2__
2379 .L__do_global_ctors_start:
2380     cpi     r28, pm_lo8(__ctors_start)
2381     cpc     r29, r17
2382 #ifdef __AVR_HAVE_EIJMP_EICALL__
2383     ldi     r24, pm_hh8(__ctors_start)
2384     cpc     r16, r24
2385 #endif /* HAVE_EIJMP */
2386     brne    .L__do_global_ctors_loop
2387 ENDF __do_global_ctors
2388 #endif /* L_ctors */
2389
2390 #ifdef L_dtors
2391         .section .fini6,"ax",@progbits
2392 DEFUN __do_global_dtors
2393     ldi     r17, pm_hi8(__dtors_start)
2394     ldi     r28, pm_lo8(__dtors_end)
2395     ldi     r29, pm_hi8(__dtors_end)
2396 #ifdef __AVR_HAVE_EIJMP_EICALL__
2397     ldi     r16, pm_hh8(__dtors_end)
2398 #endif /* HAVE_EIJMP */
2399     rjmp    .L__do_global_dtors_start
2400 .L__do_global_dtors_loop:
2401     sbiw    r28, 1
2402 #ifdef __AVR_HAVE_EIJMP_EICALL__
2403     sbc     r16, __zero_reg__
2404     mov     r24, r16
2405 #endif /* HAVE_EIJMP */
2406     mov_h   r31, r29
2407     mov_l   r30, r28
2408     XCALL   __tablejump2__
2409 .L__do_global_dtors_start:
2410     cpi     r28, pm_lo8(__dtors_start)
2411     cpc     r29, r17
2412 #ifdef __AVR_HAVE_EIJMP_EICALL__
2413     ldi     r24, pm_hh8(__dtors_start)
2414     cpc     r16, r24
2415 #endif /* HAVE_EIJMP */
2416     brne    .L__do_global_dtors_loop
2417 ENDF __do_global_dtors
2418 #endif /* L_dtors */
2419
2420 .section .text.libgcc, "ax", @progbits
2421
2422 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2423 ;; Loading n bytes from Flash; n = 3,4
2424 ;; R22... = Flash[Z]
2425 ;; Clobbers: __tmp_reg__
2426
2427 #if (defined (L_load_3)        \
2428      || defined (L_load_4))    \
2429     && !defined (__AVR_HAVE_LPMX__)
2430
2431 ;; Destination
2432 #define D0  22
2433 #define D1  D0+1
2434 #define D2  D0+2
2435 #define D3  D0+3
2436
2437 .macro  .load dest, n
2438     lpm
2439     mov     \dest, r0
2440 .if \dest != D0+\n-1
2441     adiw    r30, 1
2442 .else
2443     sbiw    r30, \n-1
2444 .endif
2445 .endm
2446
2447 #if defined (L_load_3)
2448 DEFUN __load_3
2449     push  D3
2450     XCALL __load_4
2451     pop   D3
2452     ret
2453 ENDF __load_3
2454 #endif /* L_load_3 */
2455
2456 #if defined (L_load_4)
2457 DEFUN __load_4
2458     .load D0, 4
2459     .load D1, 4
2460     .load D2, 4
2461     .load D3, 4
2462     ret
2463 ENDF __load_4
2464 #endif /* L_load_4 */
2465
2466 #endif /* L_load_3 || L_load_3 */
2467
2468 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2469 ;; Loading n bytes from Flash or RAM;  n = 1,2,3,4
2470 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2471 ;; Clobbers: __tmp_reg__, R21, R30, R31
2472
2473 #if (defined (L_xload_1)            \
2474      || defined (L_xload_2)         \
2475      || defined (L_xload_3)         \
2476      || defined (L_xload_4))
2477
2478 ;; Destination
2479 #define D0  22
2480 #define D1  D0+1
2481 #define D2  D0+2
2482 #define D3  D0+3
2483
2484 ;; Register containing bits 16+ of the address
2485
2486 #define HHI8  21
2487
2488 .macro  .xload dest, n
2489 #if defined (__AVR_HAVE_ELPMX__)
2490     elpm    \dest, Z+
2491 #elif defined (__AVR_HAVE_ELPM__)
2492     elpm
2493     mov     \dest, r0
2494 .if \dest != D0+\n-1
2495     adiw    r30, 1
2496     adc     HHI8, __zero_reg__
2497     out     __RAMPZ__, HHI8
2498 .endif
2499 #elif defined (__AVR_HAVE_LPMX__)
2500     lpm     \dest, Z+
2501 #else
2502     lpm
2503     mov     \dest, r0
2504 .if \dest != D0+\n-1
2505     adiw    r30, 1
2506 .endif
2507 #endif
2508 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2509 .if \dest == D0+\n-1
2510     ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2511     out     __RAMPZ__, __zero_reg__
2512 .endif
2513 #endif
2514 .endm ; .xload
2515
2516 #if defined (L_xload_1)
2517 DEFUN __xload_1
2518 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2519     sbrc    HHI8, 7
2520     ld      D0, Z
2521     sbrs    HHI8, 7
2522     lpm     D0, Z
2523     ret
2524 #else
2525     sbrc    HHI8, 7
2526     rjmp    1f
2527 #if defined (__AVR_HAVE_ELPM__)
2528     out     __RAMPZ__, HHI8
2529 #endif /* __AVR_HAVE_ELPM__ */
2530     .xload  D0, 1
2531     ret
2532 1:  ld      D0, Z
2533     ret
2534 #endif /* LPMx && ! ELPM */
2535 ENDF __xload_1
2536 #endif /* L_xload_1 */
2537
2538 #if defined (L_xload_2)
2539 DEFUN __xload_2
2540     sbrc    HHI8, 7
2541     rjmp    1f
2542 #if defined (__AVR_HAVE_ELPM__)
2543     out     __RAMPZ__, HHI8
2544 #endif /* __AVR_HAVE_ELPM__ */
2545     .xload  D0, 2
2546     .xload  D1, 2
2547     ret
2548 1:  ld      D0, Z+
2549     ld      D1, Z+
2550     ret
2551 ENDF __xload_2
2552 #endif /* L_xload_2 */
2553
2554 #if defined (L_xload_3)
2555 DEFUN __xload_3
2556     sbrc    HHI8, 7
2557     rjmp    1f
2558 #if defined (__AVR_HAVE_ELPM__)
2559     out     __RAMPZ__, HHI8
2560 #endif /* __AVR_HAVE_ELPM__ */
2561     .xload  D0, 3
2562     .xload  D1, 3
2563     .xload  D2, 3
2564     ret
2565 1:  ld      D0, Z+
2566     ld      D1, Z+
2567     ld      D2, Z+
2568     ret
2569 ENDF __xload_3
2570 #endif /* L_xload_3 */
2571
2572 #if defined (L_xload_4)
2573 DEFUN __xload_4
2574     sbrc    HHI8, 7
2575     rjmp    1f
2576 #if defined (__AVR_HAVE_ELPM__)
2577     out     __RAMPZ__, HHI8
2578 #endif /* __AVR_HAVE_ELPM__ */
2579     .xload  D0, 4
2580     .xload  D1, 4
2581     .xload  D2, 4
2582     .xload  D3, 4
2583     ret
2584 1:  ld      D0, Z+
2585     ld      D1, Z+
2586     ld      D2, Z+
2587     ld      D3, Z+
2588     ret
2589 ENDF __xload_4
2590 #endif /* L_xload_4 */
2591
2592 #endif /* L_xload_{1|2|3|4} */
2593
2594 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2595 ;; memcopy from Address Space __pgmx to RAM
2596 ;; R23:Z = Source Address
2597 ;; X     = Destination Address
2598 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2599
2600 #if defined (L_movmemx)
2601
2602 #define HHI8  23
2603 #define LOOP  24
2604
2605 DEFUN __movmemx_qi
2606     ;; #Bytes to copy fity in 8 Bits (1..255)
2607     ;; Zero-extend Loop Counter
2608     clr     LOOP+1
2609     ;; FALLTHRU
2610 ENDF __movmemx_qi
2611
2612 DEFUN __movmemx_hi
2613
2614 ;; Read from where?
2615     sbrc    HHI8, 7
2616     rjmp    1f
2617
2618 ;; Read from Flash
2619
2620 #if defined (__AVR_HAVE_ELPM__)
2621     out     __RAMPZ__, HHI8
2622 #endif
2623
2624 0:  ;; Load 1 Byte from Flash...
2625
2626 #if defined (__AVR_HAVE_ELPMX__)
2627     elpm    r0, Z+
2628 #elif defined (__AVR_HAVE_ELPM__)
2629     elpm
2630     adiw    r30, 1
2631     adc     HHI8, __zero_reg__
2632     out     __RAMPZ__, HHI8
2633 #elif defined (__AVR_HAVE_LPMX__)
2634     lpm     r0, Z+
2635 #else
2636     lpm
2637     adiw    r30, 1
2638 #endif
2639
2640     ;; ...and store that Byte to RAM Destination
2641     st      X+, r0
2642     sbiw    LOOP, 1
2643     brne    0b
2644 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2645     ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2646     out __RAMPZ__, __zero_reg__
2647 #endif /* ELPM && RAMPD */
2648     ret
2649
2650 ;; Read from RAM
2651
2652 1:  ;; Read 1 Byte from RAM...
2653     ld      r0, Z+
2654     ;; and store that Byte to RAM Destination
2655     st      X+, r0
2656     sbiw    LOOP, 1
2657     brne    1b
2658     ret
2659 ENDF __movmemx_hi
2660
2661 #undef HHI8
2662 #undef LOOP
2663
2664 #endif /* L_movmemx */
2665
2666 \f
2667 .section .text.libgcc.builtins, "ax", @progbits
2668
2669 /**********************************
2670  * Find first set Bit (ffs)
2671  **********************************/
2672
2673 #if defined (L_ffssi2)
2674 ;; find first set bit
2675 ;; r25:r24 = ffs32 (r25:r22)
2676 ;; clobbers: r22, r26
2677 DEFUN __ffssi2
2678     clr  r26
2679     tst  r22
2680     brne 1f
2681     subi r26, -8
2682     or   r22, r23
2683     brne 1f
2684     subi r26, -8
2685     or   r22, r24
2686     brne 1f
2687     subi r26, -8
2688     or   r22, r25
2689     brne 1f
2690     ret
2691 1:  mov  r24, r22
2692     XJMP __loop_ffsqi2
2693 ENDF __ffssi2
2694 #endif /* defined (L_ffssi2) */
2695
2696 #if defined (L_ffshi2)
2697 ;; find first set bit
2698 ;; r25:r24 = ffs16 (r25:r24)
2699 ;; clobbers: r26
2700 DEFUN __ffshi2
2701     clr  r26
2702 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2703     ;; Some cores have problem skipping 2-word instruction
2704     tst  r24
2705     breq 2f
2706 #else
2707     cpse r24, __zero_reg__
2708 #endif /* __AVR_HAVE_JMP_CALL__ */
2709 1:  XJMP __loop_ffsqi2
2710 2:  ldi  r26, 8
2711     or   r24, r25
2712     brne 1b
2713     ret
2714 ENDF __ffshi2
2715 #endif /* defined (L_ffshi2) */
2716
2717 #if defined (L_loop_ffsqi2)
2718 ;; Helper for ffshi2, ffssi2
2719 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2720 ;; r24 must be != 0
2721 ;; clobbers: r26
2722 DEFUN __loop_ffsqi2
2723     inc  r26
2724     lsr  r24
2725     brcc __loop_ffsqi2
2726     mov  r24, r26
2727     clr  r25
2728     ret
2729 ENDF __loop_ffsqi2
2730 #endif /* defined (L_loop_ffsqi2) */
2731
2732 \f
2733 /**********************************
2734  * Count trailing Zeros (ctz)
2735  **********************************/
2736
2737 #if defined (L_ctzsi2)
2738 ;; count trailing zeros
2739 ;; r25:r24 = ctz32 (r25:r22)
2740 ;; clobbers: r26, r22
2741 ;; ctz(0) = 255
2742 ;; Note that ctz(0) in undefined for GCC
2743 DEFUN __ctzsi2
2744     XCALL __ffssi2
2745     dec  r24
2746     ret
2747 ENDF __ctzsi2
2748 #endif /* defined (L_ctzsi2) */
2749
2750 #if defined (L_ctzhi2)
2751 ;; count trailing zeros
2752 ;; r25:r24 = ctz16 (r25:r24)
2753 ;; clobbers: r26
2754 ;; ctz(0) = 255
2755 ;; Note that ctz(0) in undefined for GCC
2756 DEFUN __ctzhi2
2757     XCALL __ffshi2
2758     dec  r24
2759     ret
2760 ENDF __ctzhi2
2761 #endif /* defined (L_ctzhi2) */
2762
2763 \f
2764 /**********************************
2765  * Count leading Zeros (clz)
2766  **********************************/
2767
2768 #if defined (L_clzdi2)
2769 ;; count leading zeros
2770 ;; r25:r24 = clz64 (r25:r18)
2771 ;; clobbers: r22, r23, r26
2772 DEFUN __clzdi2
2773     XCALL __clzsi2
2774     sbrs r24, 5
2775     ret
2776     mov_l r22, r18
2777     mov_h r23, r19
2778     mov_l r24, r20
2779     mov_h r25, r21
2780     XCALL __clzsi2
2781     subi r24, -32
2782     ret
2783 ENDF __clzdi2
2784 #endif /* defined (L_clzdi2) */
2785
2786 #if defined (L_clzsi2)
2787 ;; count leading zeros
2788 ;; r25:r24 = clz32 (r25:r22)
2789 ;; clobbers: r26
2790 DEFUN __clzsi2
2791     XCALL __clzhi2
2792     sbrs r24, 4
2793     ret
2794     mov_l r24, r22
2795     mov_h r25, r23
2796     XCALL __clzhi2
2797     subi r24, -16
2798     ret
2799 ENDF __clzsi2
2800 #endif /* defined (L_clzsi2) */
2801
2802 #if defined (L_clzhi2)
2803 ;; count leading zeros
2804 ;; r25:r24 = clz16 (r25:r24)
2805 ;; clobbers: r26
2806 DEFUN __clzhi2
2807     clr  r26
2808     tst  r25
2809     brne 1f
2810     subi r26, -8
2811     or   r25, r24
2812     brne 1f
2813     ldi  r24, 16
2814     ret
2815 1:  cpi  r25, 16
2816     brsh 3f
2817     subi r26, -3
2818     swap r25
2819 2:  inc  r26
2820 3:  lsl  r25
2821     brcc 2b
2822     mov  r24, r26
2823     clr  r25
2824     ret
2825 ENDF __clzhi2
2826 #endif /* defined (L_clzhi2) */
2827
2828 \f
2829 /**********************************
2830  * Parity
2831  **********************************/
2832
2833 #if defined (L_paritydi2)
2834 ;; r25:r24 = parity64 (r25:r18)
2835 ;; clobbers: __tmp_reg__
2836 DEFUN __paritydi2
2837     eor  r24, r18
2838     eor  r24, r19
2839     eor  r24, r20
2840     eor  r24, r21
2841     XJMP __paritysi2
2842 ENDF __paritydi2
2843 #endif /* defined (L_paritydi2) */
2844
2845 #if defined (L_paritysi2)
2846 ;; r25:r24 = parity32 (r25:r22)
2847 ;; clobbers: __tmp_reg__
2848 DEFUN __paritysi2
2849     eor  r24, r22
2850     eor  r24, r23
2851     XJMP __parityhi2
2852 ENDF __paritysi2
2853 #endif /* defined (L_paritysi2) */
2854
2855 #if defined (L_parityhi2)
2856 ;; r25:r24 = parity16 (r25:r24)
2857 ;; clobbers: __tmp_reg__
2858 DEFUN __parityhi2
2859     eor  r24, r25
2860 ;; FALLTHRU
2861 ENDF __parityhi2
2862
2863 ;; r25:r24 = parity8 (r24)
2864 ;; clobbers: __tmp_reg__
2865 DEFUN __parityqi2
2866     ;; parity is in r24[0..7]
2867     mov  __tmp_reg__, r24
2868     swap __tmp_reg__
2869     eor  r24, __tmp_reg__
2870     ;; parity is in r24[0..3]
2871     subi r24, -4
2872     andi r24, -5
2873     subi r24, -6
2874     ;; parity is in r24[0,3]
2875     sbrc r24, 3
2876     inc  r24
2877     ;; parity is in r24[0]
2878     andi r24, 1
2879     clr  r25
2880     ret
2881 ENDF __parityqi2
2882 #endif /* defined (L_parityhi2) */
2883
2884 \f
2885 /**********************************
2886  * Population Count
2887  **********************************/
2888
2889 #if defined (L_popcounthi2)
2890 ;; population count
2891 ;; r25:r24 = popcount16 (r25:r24)
2892 ;; clobbers: __tmp_reg__
2893 DEFUN __popcounthi2
2894     XCALL __popcountqi2
2895     push r24
2896     mov  r24, r25
2897     XCALL __popcountqi2
2898     clr  r25
2899     ;; FALLTHRU
2900 ENDF __popcounthi2
2901
2902 DEFUN __popcounthi2_tail
2903     pop   __tmp_reg__
2904     add   r24, __tmp_reg__
2905     ret
2906 ENDF __popcounthi2_tail
2907 #endif /* defined (L_popcounthi2) */
2908
2909 #if defined (L_popcountsi2)
2910 ;; population count
2911 ;; r25:r24 = popcount32 (r25:r22)
2912 ;; clobbers: __tmp_reg__
2913 DEFUN __popcountsi2
2914     XCALL __popcounthi2
2915     push  r24
2916     mov_l r24, r22
2917     mov_h r25, r23
2918     XCALL __popcounthi2
2919     XJMP  __popcounthi2_tail
2920 ENDF __popcountsi2
2921 #endif /* defined (L_popcountsi2) */
2922
2923 #if defined (L_popcountdi2)
2924 ;; population count
2925 ;; r25:r24 = popcount64 (r25:r18)
2926 ;; clobbers: r22, r23, __tmp_reg__
2927 DEFUN __popcountdi2
2928     XCALL __popcountsi2
2929     push  r24
2930     mov_l r22, r18
2931     mov_h r23, r19
2932     mov_l r24, r20
2933     mov_h r25, r21
2934     XCALL __popcountsi2
2935     XJMP  __popcounthi2_tail
2936 ENDF __popcountdi2
2937 #endif /* defined (L_popcountdi2) */
2938
2939 #if defined (L_popcountqi2)
2940 ;; population count
2941 ;; r24 = popcount8 (r24)
2942 ;; clobbers: __tmp_reg__
2943 DEFUN __popcountqi2
2944     mov  __tmp_reg__, r24
2945     andi r24, 1
2946     lsr  __tmp_reg__
2947     lsr  __tmp_reg__
2948     adc  r24, __zero_reg__
2949     lsr  __tmp_reg__
2950     adc  r24, __zero_reg__
2951     lsr  __tmp_reg__
2952     adc  r24, __zero_reg__
2953     lsr  __tmp_reg__
2954     adc  r24, __zero_reg__
2955     lsr  __tmp_reg__
2956     adc  r24, __zero_reg__
2957     lsr  __tmp_reg__
2958     adc  r24, __tmp_reg__
2959     ret
2960 ENDF __popcountqi2
2961 #endif /* defined (L_popcountqi2) */
2962
2963 \f
2964 /**********************************
2965  * Swap bytes
2966  **********************************/
2967
2968 ;; swap two registers with different register number
2969 .macro bswap a, b
2970     eor \a, \b
2971     eor \b, \a
2972     eor \a, \b
2973 .endm
2974
2975 #if defined (L_bswapsi2)
2976 ;; swap bytes
2977 ;; r25:r22 = bswap32 (r25:r22)
2978 DEFUN __bswapsi2
2979     bswap r22, r25
2980     bswap r23, r24
2981     ret
2982 ENDF __bswapsi2
2983 #endif /* defined (L_bswapsi2) */
2984
2985 #if defined (L_bswapdi2)
2986 ;; swap bytes
2987 ;; r25:r18 = bswap64 (r25:r18)
2988 DEFUN __bswapdi2
2989     bswap r18, r25
2990     bswap r19, r24
2991     bswap r20, r23
2992     bswap r21, r22
2993     ret
2994 ENDF __bswapdi2
2995 #endif /* defined (L_bswapdi2) */
2996
2997 \f
2998 /**********************************
2999  * 64-bit shifts
3000  **********************************/
3001
3002 #if defined (L_ashrdi3)
3003 ;; Arithmetic shift right
3004 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
3005 DEFUN __ashrdi3
3006     bst     r25, 7
3007     bld     __zero_reg__, 0
3008     ;; FALLTHRU
3009 ENDF  __ashrdi3
3010
3011 ;; Logic shift right
3012 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
3013 DEFUN __lshrdi3
3014     lsr     __zero_reg__
3015     sbc     __tmp_reg__, __tmp_reg__
3016     push    r16
3017 0:  cpi     r16, 8
3018     brlo 2f
3019     subi    r16, 8
3020     mov     r18, r19
3021     mov     r19, r20
3022     mov     r20, r21
3023     mov     r21, r22
3024     mov     r22, r23
3025     mov     r23, r24
3026     mov     r24, r25
3027     mov     r25, __tmp_reg__
3028     rjmp 0b
3029 1:  asr     __tmp_reg__
3030     ror     r25
3031     ror     r24
3032     ror     r23
3033     ror     r22
3034     ror     r21
3035     ror     r20
3036     ror     r19
3037     ror     r18
3038 2:  dec     r16
3039     brpl 1b
3040     pop     r16
3041     ret
3042 ENDF __lshrdi3
3043 #endif /* defined (L_ashrdi3) */
3044
3045 #if defined (L_ashldi3)
3046 ;; Shift left
3047 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
3048 DEFUN __ashldi3
3049     push    r16
3050 0:  cpi     r16, 8
3051     brlo 2f
3052     mov     r25, r24
3053     mov     r24, r23
3054     mov     r23, r22
3055     mov     r22, r21
3056     mov     r21, r20
3057     mov     r20, r19
3058     mov     r19, r18
3059     clr     r18
3060     subi    r16, 8
3061     rjmp 0b
3062 1:  lsl     r18
3063     rol     r19
3064     rol     r20
3065     rol     r21
3066     rol     r22
3067     rol     r23
3068     rol     r24
3069     rol     r25
3070 2:  dec     r16
3071     brpl 1b
3072     pop     r16
3073     ret
3074 ENDF __ashldi3
3075 #endif /* defined (L_ashldi3) */
3076
3077 #if defined (L_rotldi3)
3078 ;; Shift left
3079 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
3080 DEFUN __rotldi3
3081     push    r16
3082 0:  cpi     r16, 8
3083     brlo 2f
3084     subi    r16, 8
3085     mov     __tmp_reg__, r25
3086     mov     r25, r24
3087     mov     r24, r23
3088     mov     r23, r22
3089     mov     r22, r21
3090     mov     r21, r20
3091     mov     r20, r19
3092     mov     r19, r18
3093     mov     r18, __tmp_reg__
3094     rjmp 0b
3095 1:  lsl     r18
3096     rol     r19
3097     rol     r20
3098     rol     r21
3099     rol     r22
3100     rol     r23
3101     rol     r24
3102     rol     r25
3103     adc     r18, __zero_reg__
3104 2:  dec     r16
3105     brpl 1b
3106     pop     r16
3107     ret
3108 ENDF __rotldi3
3109 #endif /* defined (L_rotldi3) */
3110
3111 \f
3112 .section .text.libgcc.fmul, "ax", @progbits
3113
3114 /***********************************************************/
3115 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3116 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3117 /***********************************************************/
3118
3119 #define A1 24
3120 #define B1 25
3121 #define C0 22
3122 #define C1 23
3123 #define A0 __tmp_reg__
3124
3125 #ifdef L_fmuls
3126 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3127 ;;; Clobbers: r24, r25, __tmp_reg__
3128 DEFUN __fmuls
3129     ;; A0.7 = negate result?
3130     mov  A0, A1
3131     eor  A0, B1
3132     ;; B1 = |B1|
3133     sbrc B1, 7
3134     neg  B1
3135     XJMP __fmulsu_exit
3136 ENDF __fmuls
3137 #endif /* L_fmuls */
3138
3139 #ifdef L_fmulsu
3140 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3141 ;;; Clobbers: r24, r25, __tmp_reg__
3142 DEFUN __fmulsu
3143     ;; A0.7 = negate result?
3144     mov  A0, A1
3145 ;; FALLTHRU
3146 ENDF __fmulsu
3147
3148 ;; Helper for __fmuls and __fmulsu
3149 DEFUN __fmulsu_exit
3150     ;; A1 = |A1|
3151     sbrc A1, 7
3152     neg  A1
3153 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3154     ;; Some cores have problem skipping 2-word instruction
3155     tst  A0
3156     brmi 1f
3157 #else
3158     sbrs A0, 7
3159 #endif /* __AVR_HAVE_JMP_CALL__ */
3160     XJMP  __fmul
3161 1:  XCALL __fmul
3162     ;; C = -C iff A0.7 = 1
3163     NEG2 C0
3164     ret
3165 ENDF __fmulsu_exit
3166 #endif /* L_fmulsu */
3167
3168
3169 #ifdef L_fmul
3170 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3171 ;;; Clobbers: r24, r25, __tmp_reg__
3172 DEFUN __fmul
3173     ; clear result
3174     clr   C0
3175     clr   C1
3176     clr   A0
3177 1:  tst   B1
3178     ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
3179 2:  brpl  3f
3180     ;; C += A
3181     add   C0, A0
3182     adc   C1, A1
3183 3:  ;; A >>= 1
3184     lsr   A1
3185     ror   A0
3186     ;; B <<= 1
3187     lsl   B1
3188     brne  2b
3189     ret
3190 ENDF __fmul
3191 #endif /* L_fmul */
3192
3193 #undef A0
3194 #undef A1
3195 #undef B1
3196 #undef C0
3197 #undef C1
3198
3199 #include "lib1funcs-fixed.S"