libgcc/config/avr/lib1funcs.S

   1 /*  -*- Mode: Asm -*-  */
   2 /* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
   3    Free Software Foundation, Inc.
   4    Contributed by Denis Chertykov <chertykov@gmail.com>
   5
   6 This file is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 This file is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 Under Section 7 of GPL version 3, you are granted additional
  17 permissions described in the GCC Runtime Library Exception, version
  18 3.1, as published by the Free Software Foundation.
  19
  20 You should have received a copy of the GNU General Public License and
  21 a copy of the GCC Runtime Library Exception along with this program;
  22 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23 <http://www.gnu.org/licenses/>.  */
  24
  25 #define __zero_reg__ r1
  26 #define __tmp_reg__ r0
  27 #define __SREG__ 0x3f
  28 #define __SP_H__ 0x3e
  29 #define __SP_L__ 0x3d
  30 #define __RAMPZ__ 0x3B
  31 #define __EIND__  0x3C
  32
  33 /* Most of the functions here are called directly from avr.md
  34    patterns, instead of using the standard libcall mechanisms.
  35    This can make better code because GCC knows exactly which
  36    of the call-used registers (not all of them) are clobbered.  */
  37
  38 /* FIXME:  At present, there is no SORT directive in the linker
  39            script so that we must not assume that different modules
  40            in the same input section like .libgcc.text.mul will be
  41            located close together.  Therefore, we cannot use
  42            RCALL/RJMP to call a function like __udivmodhi4 from
  43            __divmodhi4 and have to use lengthy XCALL/XJMP even
  44            though they are in the same input section and all same
  45            input sections together are small enough to reach every
  46            location with a RCALL/RJMP instruction.  */
  47
  48         .macro  mov_l  r_dest, r_src
  49 #if defined (__AVR_HAVE_MOVW__)
  50         movw    \r_dest, \r_src
  51 #else
  52         mov     \r_dest, \r_src
  53 #endif
  54         .endm
  55
  56         .macro  mov_h  r_dest, r_src
  57 #if defined (__AVR_HAVE_MOVW__)
  58         ; empty
  59 #else
  60         mov     \r_dest, \r_src
  61 #endif
  62         .endm
  63
  64 .macro  wmov  r_dest, r_src
  65 #if defined (__AVR_HAVE_MOVW__)
  66     movw \r_dest,   \r_src
  67 #else
  68     mov \r_dest,    \r_src
  69     mov \r_dest+1,  \r_src+1
  70 #endif
  71 .endm
  72
  73 #if defined (__AVR_HAVE_JMP_CALL__)
  74 #define XCALL call
  75 #define XJMP  jmp
  76 #else
  77 #define XCALL rcall
  78 #define XJMP  rjmp
  79 #endif
  80
  81 .macro DEFUN name
  82 .global \name
  83 .func \name
  84 \name:
  85 .endm
  86
  87 .macro ENDF name
  88 .size \name, .-\name
  89 .endfunc
  90 .endm
  91
  92 \f
  93 .section .text.libgcc.mul, "ax", @progbits
  94
  95 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  96 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
  97 #if !defined (__AVR_HAVE_MUL__)
  98 /*******************************************************
  99     Multiplication  8 x 8  without MUL
 100 *******************************************************/
 101 #if defined (L_mulqi3)
 102
 103 #define r_arg2  r22             /* multiplicand */
 104 #define r_arg1  r24             /* multiplier */
 105 #define r_res   __tmp_reg__     /* result */
 106
 107 DEFUN __mulqi3
 108         clr     r_res           ; clear result
 109 __mulqi3_loop:
 110         sbrc    r_arg1,0
 111         add     r_res,r_arg2
 112         add     r_arg2,r_arg2   ; shift multiplicand
 113         breq    __mulqi3_exit   ; while multiplicand != 0
 114         lsr     r_arg1          ;
 115         brne    __mulqi3_loop   ; exit if multiplier = 0
 116 __mulqi3_exit:
 117         mov     r_arg1,r_res    ; result to return register
 118         ret
 119 ENDF __mulqi3
 120
 121 #undef r_arg2
 122 #undef r_arg1
 123 #undef r_res
 124
 125 #endif  /* defined (L_mulqi3) */
 126
 127 #if defined (L_mulqihi3)
 128 DEFUN __mulqihi3
 129         clr     r25
 130         sbrc    r24, 7
 131         dec     r25
 132         clr     r23
 133         sbrc    r22, 7
 134         dec     r22
 135         XJMP    __mulhi3
 136 ENDF __mulqihi3:
 137 #endif /* defined (L_mulqihi3) */
 138
 139 #if defined (L_umulqihi3)
 140 DEFUN __umulqihi3
 141         clr     r25
 142         clr     r23
 143         XJMP    __mulhi3
 144 ENDF __umulqihi3
 145 #endif /* defined (L_umulqihi3) */
 146
 147 /*******************************************************
 148     Multiplication  16 x 16  without MUL
 149 *******************************************************/
 150 #if defined (L_mulhi3)
 151 #define r_arg1L r24             /* multiplier Low */
 152 #define r_arg1H r25             /* multiplier High */
 153 #define r_arg2L r22             /* multiplicand Low */
 154 #define r_arg2H r23             /* multiplicand High */
 155 #define r_resL  __tmp_reg__     /* result Low */
 156 #define r_resH  r21             /* result High */
 157
 158 DEFUN __mulhi3
 159         clr     r_resH          ; clear result
 160         clr     r_resL          ; clear result
 161 __mulhi3_loop:
 162         sbrs    r_arg1L,0
 163         rjmp    __mulhi3_skip1
 164         add     r_resL,r_arg2L  ; result + multiplicand
 165         adc     r_resH,r_arg2H
 166 __mulhi3_skip1:
 167         add     r_arg2L,r_arg2L ; shift multiplicand
 168         adc     r_arg2H,r_arg2H
 169
 170         cp      r_arg2L,__zero_reg__
 171         cpc     r_arg2H,__zero_reg__
 172         breq    __mulhi3_exit   ; while multiplicand != 0
 173
 174         lsr     r_arg1H         ; gets LSB of multiplier
 175         ror     r_arg1L
 176         sbiw    r_arg1L,0
 177         brne    __mulhi3_loop   ; exit if multiplier = 0
 178 __mulhi3_exit:
 179         mov     r_arg1H,r_resH  ; result to return register
 180         mov     r_arg1L,r_resL
 181         ret
 182 ENDF __mulhi3
 183
 184 #undef r_arg1L
 185 #undef r_arg1H
 186 #undef r_arg2L
 187 #undef r_arg2H
 188 #undef r_resL
 189 #undef r_resH
 190
 191 #endif /* defined (L_mulhi3) */
 192
 193 /*******************************************************
 194     Widening Multiplication  32 = 16 x 16  without MUL
 195 *******************************************************/
 196
 197 #if defined (L_mulhisi3)
 198 DEFUN __mulhisi3
 199 ;;; FIXME: This is dead code (noone calls it)
 200     mov_l   r18, r24
 201     mov_h   r19, r25
 202     clr     r24
 203     sbrc    r23, 7
 204     dec     r24
 205     mov     r25, r24
 206     clr     r20
 207     sbrc    r19, 7
 208     dec     r20
 209     mov     r21, r20
 210     XJMP    __mulsi3
 211 ENDF __mulhisi3
 212 #endif /* defined (L_mulhisi3) */
 213
 214 #if defined (L_umulhisi3)
 215 DEFUN __umulhisi3
 216 ;;; FIXME: This is dead code (noone calls it)
 217     mov_l   r18, r24
 218     mov_h   r19, r25
 219     clr     r24
 220     clr     r25
 221     mov_l   r20, r24
 222     mov_h   r21, r25
 223     XJMP    __mulsi3
 224 ENDF __umulhisi3
 225 #endif /* defined (L_umulhisi3) */
 226
 227 #if defined (L_mulsi3)
 228 /*******************************************************
 229     Multiplication  32 x 32  without MUL
 230 *******************************************************/
 231 #define r_arg1L  r22            /* multiplier Low */
 232 #define r_arg1H  r23
 233 #define r_arg1HL r24
 234 #define r_arg1HH r25            /* multiplier High */
 235
 236 #define r_arg2L  r18            /* multiplicand Low */
 237 #define r_arg2H  r19
 238 #define r_arg2HL r20
 239 #define r_arg2HH r21            /* multiplicand High */
 240
 241 #define r_resL   r26            /* result Low */
 242 #define r_resH   r27
 243 #define r_resHL  r30
 244 #define r_resHH  r31            /* result High */
 245
 246 DEFUN __mulsi3
 247         clr     r_resHH         ; clear result
 248         clr     r_resHL         ; clear result
 249         clr     r_resH          ; clear result
 250         clr     r_resL          ; clear result
 251 __mulsi3_loop:
 252         sbrs    r_arg1L,0
 253         rjmp    __mulsi3_skip1
 254         add     r_resL,r_arg2L          ; result + multiplicand
 255         adc     r_resH,r_arg2H
 256         adc     r_resHL,r_arg2HL
 257         adc     r_resHH,r_arg2HH
 258 __mulsi3_skip1:
 259         add     r_arg2L,r_arg2L         ; shift multiplicand
 260         adc     r_arg2H,r_arg2H
 261         adc     r_arg2HL,r_arg2HL
 262         adc     r_arg2HH,r_arg2HH
 263
 264         lsr     r_arg1HH        ; gets LSB of multiplier
 265         ror     r_arg1HL
 266         ror     r_arg1H
 267         ror     r_arg1L
 268         brne    __mulsi3_loop
 269         sbiw    r_arg1HL,0
 270         cpc     r_arg1H,r_arg1L
 271         brne    __mulsi3_loop           ; exit if multiplier = 0
 272 __mulsi3_exit:
 273         mov_h   r_arg1HH,r_resHH        ; result to return register
 274         mov_l   r_arg1HL,r_resHL
 275         mov_h   r_arg1H,r_resH
 276         mov_l   r_arg1L,r_resL
 277         ret
 278 ENDF __mulsi3
 279
 280 #undef r_arg1L
 281 #undef r_arg1H
 282 #undef r_arg1HL
 283 #undef r_arg1HH
 284
 285 #undef r_arg2L
 286 #undef r_arg2H
 287 #undef r_arg2HL
 288 #undef r_arg2HH
 289
 290 #undef r_resL
 291 #undef r_resH
 292 #undef r_resHL
 293 #undef r_resHH
 294
 295 #endif /* defined (L_mulsi3) */
 296
 297 #endif /* !defined (__AVR_HAVE_MUL__) */
 298 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 299 \f
 300 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 301 #if defined (__AVR_HAVE_MUL__)
 302 #define A0 26
 303 #define B0 18
 304 #define C0 22
 305
 306 #define A1 A0+1
 307
 308 #define B1 B0+1
 309 #define B2 B0+2
 310 #define B3 B0+3
 311
 312 #define C1 C0+1
 313 #define C2 C0+2
 314 #define C3 C0+3
 315
 316 /*******************************************************
 317     Widening Multiplication  32 = 16 x 16
 318 *******************************************************/
 319
 320 #if defined (L_mulhisi3)
 321 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
 322 ;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
 323 ;;; Clobbers: __tmp_reg__
 324 DEFUN __mulhisi3
 325     XCALL   __umulhisi3
 326     ;; Sign-extend B
 327     tst     B1
 328     brpl    1f
 329     sub     C2, A0
 330     sbc     C3, A1
 331 1:  ;; Sign-extend A
 332     XJMP __usmulhisi3_tail
 333 ENDF __mulhisi3
 334 #endif /* L_mulhisi3 */
 335
 336 #if defined (L_usmulhisi3)
 337 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
 338 ;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
 339 ;;; Clobbers: __tmp_reg__
 340 DEFUN __usmulhisi3
 341     XCALL   __umulhisi3
 342     ;; FALLTHRU
 343 ENDF __usmulhisi3
 344
 345 DEFUN __usmulhisi3_tail
 346     ;; Sign-extend A
 347     sbrs    A1, 7
 348     ret
 349     sub     C2, B0
 350     sbc     C3, B1
 351     ret
 352 ENDF __usmulhisi3_tail
 353 #endif /* L_usmulhisi3 */
 354
 355 #if defined (L_umulhisi3)
 356 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
 357 ;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
 358 ;;; Clobbers: __tmp_reg__
 359 DEFUN __umulhisi3
 360     mul     A0, B0
 361     movw    C0, r0
 362     mul     A1, B1
 363     movw    C2, r0
 364     mul     A0, B1
 365     rcall   1f
 366     mul     A1, B0
 367 1:  add     C1, r0
 368     adc     C2, r1
 369     clr     __zero_reg__
 370     adc     C3, __zero_reg__
 371     ret
 372 ENDF __umulhisi3
 373 #endif /* L_umulhisi3 */
 374
 375 /*******************************************************
 376     Widening Multiplication  32 = 16 x 32
 377 *******************************************************/
 378
 379 #if defined (L_mulshisi3)
 380 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
 381 ;;; (C3:C0) = (signed long) A1:A0   * B3:B0
 382 ;;; Clobbers: __tmp_reg__
 383 DEFUN __mulshisi3
 384 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
 385     ;; Some cores have problem skipping 2-word instruction
 386     tst     A1
 387     brmi    __mulohisi3
 388 #else
 389     sbrs    A1, 7
 390 #endif /* __AVR_HAVE_JMP_CALL__ */
 391     XJMP    __muluhisi3
 392     ;; FALLTHRU
 393 ENDF __mulshisi3
 394
 395 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
 396 ;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
 397 ;;; Clobbers: __tmp_reg__
 398 DEFUN __mulohisi3
 399     XCALL   __muluhisi3
 400     ;; One-extend R27:R26 (A1:A0)
 401     sub     C2, B0
 402     sbc     C3, B1
 403     ret
 404 ENDF __mulohisi3
 405 #endif /* L_mulshisi3 */
 406
 407 #if defined (L_muluhisi3)
 408 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
 409 ;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
 410 ;;; Clobbers: __tmp_reg__
 411 DEFUN __muluhisi3
 412     XCALL   __umulhisi3
 413     mul     A0, B3
 414     add     C3, r0
 415     mul     A1, B2
 416     add     C3, r0
 417     mul     A0, B2
 418     add     C2, r0
 419     adc     C3, r1
 420     clr     __zero_reg__
 421     ret
 422 ENDF __muluhisi3
 423 #endif /* L_muluhisi3 */
 424
 425 /*******************************************************
 426     Multiplication  32 x 32
 427 *******************************************************/
 428
 429 #if defined (L_mulsi3)
 430 ;;; R25:R22 = R25:R22 * R21:R18
 431 ;;; (C3:C0) = C3:C0   * B3:B0
 432 ;;; Clobbers: R26, R27, __tmp_reg__
 433 DEFUN __mulsi3
 434     movw    A0, C0
 435     push    C2
 436     push    C3
 437     XCALL   __muluhisi3
 438     pop     A1
 439     pop     A0
 440     ;; A1:A0 now contains the high word of A
 441     mul     A0, B0
 442     add     C2, r0
 443     adc     C3, r1
 444     mul     A0, B1
 445     add     C3, r0
 446     mul     A1, B0
 447     add     C3, r0
 448     clr     __zero_reg__
 449     ret
 450 ENDF __mulsi3
 451 #endif /* L_mulsi3 */
 452
 453 #undef A0
 454 #undef A1
 455
 456 #undef B0
 457 #undef B1
 458 #undef B2
 459 #undef B3
 460
 461 #undef C0
 462 #undef C1
 463 #undef C2
 464 #undef C3
 465
 466 #endif /* __AVR_HAVE_MUL__ */
 467
 468 /*******************************************************
 469        Multiplication 24 x 24
 470 *******************************************************/
 471
 472 #if defined (L_mulpsi3)
 473
 474 ;; A[0..2]: In: Multiplicand; Out: Product
 475 #define A0  22
 476 #define A1  A0+1
 477 #define A2  A0+2
 478
 479 ;; B[0..2]: In: Multiplier
 480 #define B0  18
 481 #define B1  B0+1
 482 #define B2  B0+2
 483
 484 #if defined (__AVR_HAVE_MUL__)
 485
 486 ;; C[0..2]: Expand Result
 487 #define C0  22
 488 #define C1  C0+1
 489 #define C2  C0+2
 490
 491 ;; R24:R22 *= R20:R18
 492 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
 493
 494 #define AA0 26
 495 #define AA2 21
 496
 497 DEFUN __mulpsi3
 498     wmov    AA0, A0
 499     mov     AA2, A2
 500     XCALL   __umulhisi3
 501     mul     AA2, B0     $  add  C2, r0
 502     mul     AA0, B2     $  add  C2, r0
 503     clr     __zero_reg__
 504     ret
 505 ENDF __mulpsi3
 506
 507 #undef AA2
 508 #undef AA0
 509
 510 #undef C2
 511 #undef C1
 512 #undef C0
 513
 514 #else /* !HAVE_MUL */
 515
 516 ;; C[0..2]: Expand Result
 517 #define C0  0
 518 #define C1  C0+1
 519 #define C2  21
 520
 521 ;; R24:R22 *= R20:R18
 522 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
 523
 524 DEFUN __mulpsi3
 525
 526     ;; C[] = 0
 527     clr     __tmp_reg__
 528     clr     C2
 529
 530 0:  ;; Shift N-th Bit of B[] into Carry.  N = 24 - Loop
 531     LSR  B2     $  ror  B1     $  ror  B0
 532
 533     ;; If the N-th Bit of B[] was set...
 534     brcc    1f
 535
 536     ;; ...then add A[] * 2^N to the Result C[]
 537     ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2
 538
 539 1:  ;; Multiply A[] by 2
 540     LSL  A0     $  rol  A1     $  rol  A2
 541
 542     ;; Loop until B[] is 0
 543     subi B0,0   $  sbci B1,0   $  sbci B2,0
 544     brne    0b
 545
 546     ;; Copy C[] to the return Register A[]
 547     wmov    A0, C0
 548     mov     A2, C2
 549
 550     clr     __zero_reg__
 551     ret
 552 ENDF __mulpsi3
 553
 554 #undef C2
 555 #undef C1
 556 #undef C0
 557
 558 #endif /* HAVE_MUL */
 559
 560 #undef B2
 561 #undef B1
 562 #undef B0
 563
 564 #undef A2
 565 #undef A1
 566 #undef A0
 567
 568 #endif /* L_mulpsi3 */
 569
 570 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
 571
 572 ;; A[0..2]: In: Multiplicand
 573 #define A0  22
 574 #define A1  A0+1
 575 #define A2  A0+2
 576
 577 ;; BB: In: Multiplier
 578 #define BB  25
 579
 580 ;; C[0..2]: Result
 581 #define C0  18
 582 #define C1  C0+1
 583 #define C2  C0+2
 584
 585 ;; C[] = A[] * sign_extend (BB)
 586 DEFUN __mulsqipsi3
 587     mul     A0, BB
 588     movw    C0, r0
 589     mul     A2, BB
 590     mov     C2, r0
 591     mul     A1, BB
 592     add     C1, r0
 593     adc     C2, r1
 594     clr     __zero_reg__
 595     sbrs    BB, 7
 596     ret
 597     ;; One-extend BB
 598     sub     C1, A0
 599     sbc     C2, A1
 600     ret
 601 ENDF __mulsqipsi3
 602
 603 #undef C2
 604 #undef C1
 605 #undef C0
 606
 607 #undef BB
 608
 609 #undef A2
 610 #undef A1
 611 #undef A0
 612
 613 #endif /* L_mulsqipsi3  &&  HAVE_MUL */
 614
 615 /*******************************************************
 616        Multiplication 64 x 64
 617 *******************************************************/
 618
 619 #if defined (L_muldi3)
 620
 621 ;; A[] = A[] * B[]
 622
 623 ;; A[0..7]: In: Multiplicand
 624 ;; Out: Product
 625 #define A0  18
 626 #define A1  A0+1
 627 #define A2  A0+2
 628 #define A3  A0+3
 629 #define A4  A0+4
 630 #define A5  A0+5
 631 #define A6  A0+6
 632 #define A7  A0+7
 633
 634 ;; B[0..7]: In: Multiplier
 635 #define B0  10
 636 #define B1  B0+1
 637 #define B2  B0+2
 638 #define B3  B0+3
 639 #define B4  B0+4
 640 #define B5  B0+5
 641 #define B6  B0+6
 642 #define B7  B0+7
 643
 644 #if defined (__AVR_HAVE_MUL__)
 645
 646 ;; Define C[] for convenience
 647 ;; Notice that parts of C[] overlap A[] respective B[]
 648 #define C0  16
 649 #define C1  C0+1
 650 #define C2  20
 651 #define C3  C2+1
 652 #define C4  28
 653 #define C5  C4+1
 654 #define C6  C4+2
 655 #define C7  C4+3
 656
 657 ;; A[]     *= B[]
 658 ;; R25:R18 *= R17:R10
 659 ;; Ordinary ABI-Function
 660
 661 DEFUN __muldi3
 662     push    r29
 663     push    r28
 664     push    r17
 665     push    r16
 666
 667     ;; Counting in Words, we have to perform a 4 * 4 Multiplication
 668
 669     ;; 3 * 0  +  0 * 3
 670     mul  A7,B0  $             $  mov C7,r0
 671     mul  A0,B7  $             $  add C7,r0
 672     mul  A6,B1  $             $  add C7,r0
 673     mul  A6,B0  $  mov C6,r0  $  add C7,r1
 674     mul  B6,A1  $             $  add C7,r0
 675     mul  B6,A0  $  add C6,r0  $  adc C7,r1
 676
 677     ;; 1 * 2
 678     mul  A2,B4  $  add C6,r0  $  adc C7,r1
 679     mul  A3,B4  $             $  add C7,r0
 680     mul  A2,B5  $             $  add C7,r0
 681
 682     push    A5
 683     push    A4
 684     push    B1
 685     push    B0
 686     push    A3
 687     push    A2
 688
 689     ;; 0 * 0
 690     wmov    26, B0
 691     XCALL   __umulhisi3
 692     wmov    C0, 22
 693     wmov    C2, 24
 694
 695     ;; 0 * 2
 696     wmov    26, B4
 697     XCALL   __umulhisi3  $  wmov C4,22            $ add C6,24 $ adc C7,25
 698
 699     wmov    26, B2
 700     ;; 0 * 1
 701     rcall   __muldi3_6
 702
 703     pop     A0
 704     pop     A1
 705     ;; 1 * 1
 706     wmov    26, B2
 707     XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
 708
 709     pop     r26
 710     pop     r27
 711     ;; 1 * 0
 712     rcall   __muldi3_6
 713
 714     pop     A0
 715     pop     A1
 716     ;; 2 * 0
 717     XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
 718
 719     ;; 2 * 1
 720     wmov    26, B2
 721     XCALL   __umulhisi3  $            $           $ add C6,22 $ adc C7,23
 722
 723     ;; A[] = C[]
 724     wmov    A0, C0
 725     ;; A2 = C2 already
 726     wmov    A4, C4
 727     wmov    A6, C6
 728
 729     clr     __zero_reg__
 730     pop     r16
 731     pop     r17
 732     pop     r28
 733     pop     r29
 734     ret
 735
 736 __muldi3_6:
 737     XCALL   __umulhisi3
 738     add     C2, 22
 739     adc     C3, 23
 740     adc     C4, 24
 741     adc     C5, 25
 742     brcc    0f
 743     adiw    C6, 1
 744 0:  ret
 745 ENDF __muldi3
 746
 747 #undef C7
 748 #undef C6
 749 #undef C5
 750 #undef C4
 751 #undef C3
 752 #undef C2
 753 #undef C1
 754 #undef C0
 755
 756 #else /* !HAVE_MUL */
 757
 758 #define C0  26
 759 #define C1  C0+1
 760 #define C2  C0+2
 761 #define C3  C0+3
 762 #define C4  C0+4
 763 #define C5  C0+5
 764 #define C6  0
 765 #define C7  C6+1
 766
 767 #define Loop 9
 768
 769 ;; A[]     *= B[]
 770 ;; R25:R18 *= R17:R10
 771 ;; Ordinary ABI-Function
 772
 773 DEFUN __muldi3
 774     push    r29
 775     push    r28
 776     push    Loop
 777
 778     ldi     C0, 64
 779     mov     Loop, C0
 780
 781     ;; C[] = 0
 782     clr     __tmp_reg__
 783     wmov    C0, 0
 784     wmov    C2, 0
 785     wmov    C4, 0
 786
 787 0:  ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
 788     ;; where N = 64 - Loop.
 789     ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
 790     ;; B[] will have its initial Value again.
 791     LSR  B7     $  ror  B6     $  ror  B5     $  ror  B4
 792     ror  B3     $  ror  B2     $  ror  B1     $  ror  B0
 793
 794     ;; If the N-th Bit of B[] was set then...
 795     brcc    1f
 796     ;; ...finish Rotation...
 797     ori     B7, 1 << 7
 798
 799     ;; ...and add A[] * 2^N to the Result C[]
 800     ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2  $  adc  C3,A3
 801     adc  C4,A4  $  adc  C5,A5  $  adc  C6,A6  $  adc  C7,A7
 802
 803 1:  ;; Multiply A[] by 2
 804     LSL  A0     $  rol  A1     $  rol  A2     $  rol  A3
 805     rol  A4     $  rol  A5     $  rol  A6     $  rol  A7
 806
 807     dec     Loop
 808     brne    0b
 809
 810     ;; We expanded the Result in C[]
 811     ;; Copy Result to the Return Register A[]
 812     wmov    A0, C0
 813     wmov    A2, C2
 814     wmov    A4, C4
 815     wmov    A6, C6
 816
 817     clr     __zero_reg__
 818     pop     Loop
 819     pop     r28
 820     pop     r29
 821     ret
 822 ENDF __muldi3
 823
 824 #undef Loop
 825
 826 #undef C7
 827 #undef C6
 828 #undef C5
 829 #undef C4
 830 #undef C3
 831 #undef C2
 832 #undef C1
 833 #undef C0
 834
 835 #endif /* HAVE_MUL */
 836
 837 #undef B7
 838 #undef B6
 839 #undef B5
 840 #undef B4
 841 #undef B3
 842 #undef B2
 843 #undef B1
 844 #undef B0
 845
 846 #undef A7
 847 #undef A6
 848 #undef A5
 849 #undef A4
 850 #undef A3
 851 #undef A2
 852 #undef A1
 853 #undef A0
 854
 855 #endif /* L_muldi3 */
 856
 857 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 858
 859 \f
 860 .section .text.libgcc.div, "ax", @progbits
 861
 862 /*******************************************************
 863        Division 8 / 8 => (result + remainder)
 864 *******************************************************/
 865 #define r_rem   r25     /* remainder */
 866 #define r_arg1  r24     /* dividend, quotient */
 867 #define r_arg2  r22     /* divisor */
 868 #define r_cnt   r23     /* loop count */
 869
 870 #if defined (L_udivmodqi4)
 871 DEFUN __udivmodqi4
 872         sub     r_rem,r_rem     ; clear remainder and carry
 873         ldi     r_cnt,9         ; init loop counter
 874         rjmp    __udivmodqi4_ep ; jump to entry point
 875 __udivmodqi4_loop:
 876         rol     r_rem           ; shift dividend into remainder
 877         cp      r_rem,r_arg2    ; compare remainder & divisor
 878         brcs    __udivmodqi4_ep ; remainder <= divisor
 879         sub     r_rem,r_arg2    ; restore remainder
 880 __udivmodqi4_ep:
 881         rol     r_arg1          ; shift dividend (with CARRY)
 882         dec     r_cnt           ; decrement loop counter
 883         brne    __udivmodqi4_loop
 884         com     r_arg1          ; complement result
 885                                 ; because C flag was complemented in loop
 886         ret
 887 ENDF __udivmodqi4
 888 #endif /* defined (L_udivmodqi4) */
 889
 890 #if defined (L_divmodqi4)
 891 DEFUN __divmodqi4
 892         bst     r_arg1,7        ; store sign of dividend
 893         mov     __tmp_reg__,r_arg1
 894         eor     __tmp_reg__,r_arg2; r0.7 is sign of result
 895         sbrc    r_arg1,7
 896         neg     r_arg1          ; dividend negative : negate
 897         sbrc    r_arg2,7
 898         neg     r_arg2          ; divisor negative : negate
 899         XCALL   __udivmodqi4    ; do the unsigned div/mod
 900         brtc    __divmodqi4_1
 901         neg     r_rem           ; correct remainder sign
 902 __divmodqi4_1:
 903         sbrc    __tmp_reg__,7
 904         neg     r_arg1          ; correct result sign
 905 __divmodqi4_exit:
 906         ret
 907 ENDF __divmodqi4
 908 #endif /* defined (L_divmodqi4) */
 909
 910 #undef r_rem
 911 #undef r_arg1
 912 #undef r_arg2
 913 #undef r_cnt
 914
 915
 916 /*******************************************************
 917        Division 16 / 16 => (result + remainder)
 918 *******************************************************/
 919 #define r_remL  r26     /* remainder Low */
 920 #define r_remH  r27     /* remainder High */
 921
 922 /* return: remainder */
 923 #define r_arg1L r24     /* dividend Low */
 924 #define r_arg1H r25     /* dividend High */
 925
 926 /* return: quotient */
 927 #define r_arg2L r22     /* divisor Low */
 928 #define r_arg2H r23     /* divisor High */
 929
 930 #define r_cnt   r21     /* loop count */
 931
 932 #if defined (L_udivmodhi4)
 933 DEFUN __udivmodhi4
 934         sub     r_remL,r_remL
 935         sub     r_remH,r_remH   ; clear remainder and carry
 936         ldi     r_cnt,17        ; init loop counter
 937         rjmp    __udivmodhi4_ep ; jump to entry point
 938 __udivmodhi4_loop:
 939         rol     r_remL          ; shift dividend into remainder
 940         rol     r_remH
 941         cp      r_remL,r_arg2L  ; compare remainder & divisor
 942         cpc     r_remH,r_arg2H
 943         brcs    __udivmodhi4_ep ; remainder < divisor
 944         sub     r_remL,r_arg2L  ; restore remainder
 945         sbc     r_remH,r_arg2H
 946 __udivmodhi4_ep:
 947         rol     r_arg1L         ; shift dividend (with CARRY)
 948         rol     r_arg1H
 949         dec     r_cnt           ; decrement loop counter
 950         brne    __udivmodhi4_loop
 951         com     r_arg1L
 952         com     r_arg1H
 953 ; div/mod results to return registers, as for the div() function
 954         mov_l   r_arg2L, r_arg1L        ; quotient
 955         mov_h   r_arg2H, r_arg1H
 956         mov_l   r_arg1L, r_remL         ; remainder
 957         mov_h   r_arg1H, r_remH
 958         ret
 959 ENDF __udivmodhi4
 960 #endif /* defined (L_udivmodhi4) */
 961
 962 #if defined (L_divmodhi4)
 963 DEFUN __divmodhi4
 964     .global _div
 965 _div:
 966     bst     r_arg1H,7           ; store sign of dividend
 967     mov     __tmp_reg__,r_arg2H
 968     brtc    0f
 969     com     __tmp_reg__         ; r0.7 is sign of result
 970     rcall   __divmodhi4_neg1    ; dividend negative: negate
 971 0:
 972     sbrc    r_arg2H,7
 973     rcall   __divmodhi4_neg2    ; divisor negative: negate
 974     XCALL   __udivmodhi4        ; do the unsigned div/mod
 975     sbrc    __tmp_reg__,7
 976     rcall   __divmodhi4_neg2    ; correct remainder sign
 977     brtc    __divmodhi4_exit
 978 __divmodhi4_neg1:
 979     ;; correct dividend/remainder sign
 980     com     r_arg1H
 981     neg     r_arg1L
 982     sbci    r_arg1H,0xff
 983     ret
 984 __divmodhi4_neg2:
 985     ;; correct divisor/result sign
 986     com     r_arg2H
 987     neg     r_arg2L
 988     sbci    r_arg2H,0xff
 989 __divmodhi4_exit:
 990     ret
 991 ENDF __divmodhi4
 992 #endif /* defined (L_divmodhi4) */
 993
 994 #undef r_remH
 995 #undef r_remL
 996
 997 #undef r_arg1H
 998 #undef r_arg1L
 999
1000 #undef r_arg2H
1001 #undef r_arg2L
1002
1003 #undef r_cnt
1004
1005 /*******************************************************
1006        Division 24 / 24 => (result + remainder)
1007 *******************************************************/
1008
1009 ;; A[0..2]: In: Dividend; Out: Quotient
1010 #define A0  22
1011 #define A1  A0+1
1012 #define A2  A0+2
1013
1014 ;; B[0..2]: In: Divisor;   Out: Remainder
1015 #define B0  18
1016 #define B1  B0+1
1017 #define B2  B0+2
1018
1019 ;; C[0..2]: Expand remainder
1020 #define C0  __zero_reg__
1021 #define C1  26
1022 #define C2  25
1023
1024 ;; Loop counter
1025 #define r_cnt   21
1026
1027 #if defined (L_udivmodpsi4)
1028 ;; R24:R22 = R24:R22  udiv  R20:R18
1029 ;; R20:R18 = R24:R22  umod  R20:R18
1030 ;; Clobbers: R21, R25, R26
1031
1032 DEFUN __udivmodpsi4
1033     ; init loop counter
1034     ldi     r_cnt, 24+1
1035     ; Clear remainder and carry.  C0 is already 0
1036     clr     C1
1037     sub     C2, C2
1038     ; jump to entry point
1039     rjmp    __udivmodpsi4_start
1040 __udivmodpsi4_loop:
1041     ; shift dividend into remainder
1042     rol     C0
1043     rol     C1
1044     rol     C2
1045     ; compare remainder & divisor
1046     cp      C0, B0
1047     cpc     C1, B1
1048     cpc     C2, B2
1049     brcs    __udivmodpsi4_start ; remainder <= divisor
1050     sub     C0, B0              ; restore remainder
1051     sbc     C1, B1
1052     sbc     C2, B2
1053 __udivmodpsi4_start:
1054     ; shift dividend (with CARRY)
1055     rol     A0
1056     rol     A1
1057     rol     A2
1058     ; decrement loop counter
1059     dec     r_cnt
1060     brne    __udivmodpsi4_loop
1061     com     A0
1062     com     A1
1063     com     A2
1064     ; div/mod results to return registers
1065     ; remainder
1066     mov     B0, C0
1067     mov     B1, C1
1068     mov     B2, C2
1069     clr     __zero_reg__ ; C0
1070     ret
1071 ENDF __udivmodpsi4
1072 #endif /* defined (L_udivmodpsi4) */
1073
1074 #if defined (L_divmodpsi4)
1075 ;; R24:R22 = R24:R22  div  R20:R18
1076 ;; R20:R18 = R24:R22  mod  R20:R18
1077 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1078
1079 DEFUN __divmodpsi4
1080     ; R0.7 will contain the sign of the result:
1081     ; R0.7 = A.sign ^ B.sign
1082     mov __tmp_reg__, B2
1083     ; T-flag = sign of dividend
1084     bst     A2, 7
1085     brtc    0f
1086     com     __tmp_reg__
1087     ; Adjust dividend's sign
1088     rcall   __divmodpsi4_negA
1089 0:
1090     ; Adjust divisor's sign
1091     sbrc    B2, 7
1092     rcall   __divmodpsi4_negB
1093
1094     ; Do the unsigned div/mod
1095     XCALL   __udivmodpsi4
1096
1097     ; Adjust quotient's sign
1098     sbrc    __tmp_reg__, 7
1099     rcall   __divmodpsi4_negA
1100
1101     ; Adjust remainder's sign
1102     brtc    __divmodpsi4_end
1103
1104 __divmodpsi4_negB:
1105     ; Correct divisor/remainder sign
1106     com     B2
1107     com     B1
1108     neg     B0
1109     sbci    B1, -1
1110     sbci    B2, -1
1111     ret
1112
1113     ; Correct dividend/quotient sign
1114 __divmodpsi4_negA:
1115     com     A2
1116     com     A1
1117     neg     A0
1118     sbci    A1, -1
1119     sbci    A2, -1
1120 __divmodpsi4_end:
1121     ret
1122
1123 ENDF __divmodpsi4
1124 #endif /* defined (L_divmodpsi4) */
1125
1126 #undef A0
1127 #undef A1
1128 #undef A2
1129
1130 #undef B0
1131 #undef B1
1132 #undef B2
1133
1134 #undef C0
1135 #undef C1
1136 #undef C2
1137
1138 #undef r_cnt
1139
1140 /*******************************************************
1141        Division 32 / 32 => (result + remainder)
1142 *******************************************************/
1143 #define r_remHH r31     /* remainder High */
1144 #define r_remHL r30
1145 #define r_remH  r27
1146 #define r_remL  r26     /* remainder Low */
1147
1148 /* return: remainder */
1149 #define r_arg1HH r25    /* dividend High */
1150 #define r_arg1HL r24
1151 #define r_arg1H  r23
1152 #define r_arg1L  r22    /* dividend Low */
1153
1154 /* return: quotient */
1155 #define r_arg2HH r21    /* divisor High */
1156 #define r_arg2HL r20
1157 #define r_arg2H  r19
1158 #define r_arg2L  r18    /* divisor Low */
1159
1160 #define r_cnt __zero_reg__  /* loop count (0 after the loop!) */
1161
1162 #if defined (L_udivmodsi4)
1163 DEFUN __udivmodsi4
1164         ldi     r_remL, 33      ; init loop counter
1165         mov     r_cnt, r_remL
1166         sub     r_remL,r_remL
1167         sub     r_remH,r_remH   ; clear remainder and carry
1168         mov_l   r_remHL, r_remL
1169         mov_h   r_remHH, r_remH
1170         rjmp    __udivmodsi4_ep ; jump to entry point
1171 __udivmodsi4_loop:
1172         rol     r_remL          ; shift dividend into remainder
1173         rol     r_remH
1174         rol     r_remHL
1175         rol     r_remHH
1176         cp      r_remL,r_arg2L  ; compare remainder & divisor
1177         cpc     r_remH,r_arg2H
1178         cpc     r_remHL,r_arg2HL
1179         cpc     r_remHH,r_arg2HH
1180         brcs    __udivmodsi4_ep ; remainder <= divisor
1181         sub     r_remL,r_arg2L  ; restore remainder
1182         sbc     r_remH,r_arg2H
1183         sbc     r_remHL,r_arg2HL
1184         sbc     r_remHH,r_arg2HH
1185 __udivmodsi4_ep:
1186         rol     r_arg1L         ; shift dividend (with CARRY)
1187         rol     r_arg1H
1188         rol     r_arg1HL
1189         rol     r_arg1HH
1190         dec     r_cnt           ; decrement loop counter
1191         brne    __udivmodsi4_loop
1192                                 ; __zero_reg__ now restored (r_cnt == 0)
1193         com     r_arg1L
1194         com     r_arg1H
1195         com     r_arg1HL
1196         com     r_arg1HH
1197 ; div/mod results to return registers, as for the ldiv() function
1198         mov_l   r_arg2L,  r_arg1L       ; quotient
1199         mov_h   r_arg2H,  r_arg1H
1200         mov_l   r_arg2HL, r_arg1HL
1201         mov_h   r_arg2HH, r_arg1HH
1202         mov_l   r_arg1L,  r_remL        ; remainder
1203         mov_h   r_arg1H,  r_remH
1204         mov_l   r_arg1HL, r_remHL
1205         mov_h   r_arg1HH, r_remHH
1206         ret
1207 ENDF __udivmodsi4
1208 #endif /* defined (L_udivmodsi4) */
1209
1210 #if defined (L_divmodsi4)
1211 DEFUN __divmodsi4
1212     mov     __tmp_reg__,r_arg2HH
1213     bst     r_arg1HH,7          ; store sign of dividend
1214     brtc    0f
1215     com     __tmp_reg__         ; r0.7 is sign of result
1216     rcall   __divmodsi4_neg1    ; dividend negative: negate
1217 0:
1218     sbrc    r_arg2HH,7
1219     rcall   __divmodsi4_neg2    ; divisor negative: negate
1220     XCALL   __udivmodsi4        ; do the unsigned div/mod
1221     sbrc    __tmp_reg__, 7      ; correct quotient sign
1222     rcall   __divmodsi4_neg2
1223     brtc    __divmodsi4_exit    ; correct remainder sign
1224 __divmodsi4_neg1:
1225     ;; correct dividend/remainder sign
1226     com     r_arg1HH
1227     com     r_arg1HL
1228     com     r_arg1H
1229     neg     r_arg1L
1230     sbci    r_arg1H, 0xff
1231     sbci    r_arg1HL,0xff
1232     sbci    r_arg1HH,0xff
1233     ret
1234 __divmodsi4_neg2:
1235     ;; correct divisor/quotient sign
1236     com     r_arg2HH
1237     com     r_arg2HL
1238     com     r_arg2H
1239     neg     r_arg2L
1240     sbci    r_arg2H,0xff
1241     sbci    r_arg2HL,0xff
1242     sbci    r_arg2HH,0xff
1243 __divmodsi4_exit:
1244     ret
1245 ENDF __divmodsi4
1246 #endif /* defined (L_divmodsi4) */
1247
1248
1249 /*******************************************************
1250        Division 64 / 64
1251        Modulo   64 % 64
1252 *******************************************************/
1253
1254 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1255 ;; at least 16k of Program Memory.  For smaller Devices, depend
1256 ;; on MOVW.
1257
1258 #if defined (__AVR_HAVE_JMP_CALL__)
1259 #   define SPEED_DIV 8
1260 #elif defined (__AVR_HAVE_MOVW__)
1261 #   define SPEED_DIV 16
1262 #else
1263 #   define SPEED_DIV 0
1264 #endif
1265
1266 ;; A[0..7]: In: Dividend;
1267 ;; Out: Quotient  (T = 0)
1268 ;; Out: Remainder (T = 1)
1269 #define A0  18
1270 #define A1  A0+1
1271 #define A2  A0+2
1272 #define A3  A0+3
1273 #define A4  A0+4
1274 #define A5  A0+5
1275 #define A6  A0+6
1276 #define A7  A0+7
1277
1278 ;; B[0..7]: In: Divisor;   Out: Clobber
1279 #define B0  10
1280 #define B1  B0+1
1281 #define B2  B0+2
1282 #define B3  B0+3
1283 #define B4  B0+4
1284 #define B5  B0+5
1285 #define B6  B0+6
1286 #define B7  B0+7
1287
1288 ;; C[0..7]: Expand remainder;  Out: Remainder (unused)
1289 #define C0  8
1290 #define C1  C0+1
1291 #define C2  30
1292 #define C3  C2+1
1293 #define C4  28
1294 #define C5  C4+1
1295 #define C6  26
1296 #define C7  C6+1
1297
1298 ;; Holds Signs during Division Routine
1299 #define SS      __tmp_reg__
1300
1301 ;; Bit-Counter in Division Routine
1302 #define R_cnt   __zero_reg__
1303
1304 ;; Scratch Register for Negation
1305 #define NN      r31
1306
1307 #if defined (L_udivdi3)
1308
1309 ;; R25:R18 = R24:R18  umod  R17:R10
1310 ;; Ordinary ABI-Function
1311
1312 DEFUN __umoddi3
1313     set
1314     rjmp __udivdi3_umoddi3
1315 ENDF __umoddi3
1316
1317 ;; R25:R18 = R24:R18  udiv  R17:R10
1318 ;; Ordinary ABI-Function
1319
1320 DEFUN __udivdi3
1321     clt
1322 ENDF __udivdi3
1323
1324 DEFUN __udivdi3_umoddi3
1325     push    C0
1326     push    C1
1327     push    C4
1328     push    C5
1329     XCALL   __udivmod64
1330     pop     C5
1331     pop     C4
1332     pop     C1
1333     pop     C0
1334     ret
1335 ENDF __udivdi3_umoddi3
1336 #endif /* L_udivdi3 */
1337
1338 #if defined (L_udivmod64)
1339
1340 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1341 ;; No Registers saved/restored; the Callers will take Care.
1342 ;; Preserves B[] and T-flag
1343 ;; T = 0: Compute Quotient  in A[]
1344 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1345
1346 DEFUN __udivmod64
1347
1348     ;; Clear Remainder (C6, C7 will follow)
1349     clr     C0
1350     clr     C1
1351     wmov    C2, C0
1352     wmov    C4, C0
1353     ldi     C7, 64
1354
1355 #if SPEED_DIV == 0 || SPEED_DIV == 16
1356     ;; Initialize Loop-Counter
1357     mov     R_cnt, C7
1358     wmov    C6, C0
1359 #endif /* SPEED_DIV */
1360
1361 #if SPEED_DIV == 8
1362
1363     push    A7
1364     clr     C6
1365
1366 1:  ;; Compare shifted Devidend against Divisor
1367     ;; If -- even after Shifting -- it is smaller...
1368     CP  A7,B0  $  cpc C0,B1  $  cpc C1,B2  $  cpc C2,B3
1369     cpc C3,B4  $  cpc C4,B5  $  cpc C5,B6  $  cpc C6,B7
1370     brcc    2f
1371
1372     ;; ...then we can subtract it.  Thus, it is legal to shift left
1373                $  mov C6,C5  $  mov C5,C4  $  mov C4,C3
1374     mov C3,C2  $  mov C2,C1  $  mov C1,C0  $  mov C0,A7
1375     mov A7,A6  $  mov A6,A5  $  mov A5,A4  $  mov A4,A3
1376     mov A3,A2  $  mov A2,A1  $  mov A1,A0  $  clr A0
1377
1378     ;; 8 Bits are done
1379     subi    C7, 8
1380     brne    1b
1381
1382     ;; Shifted 64 Bits:  A7 has traveled to C7
1383     pop     C7
1384     ;; Divisor is greater than Dividend. We have:
1385     ;; A[] % B[] = A[]
1386     ;; A[] / B[] = 0
1387     ;; Thus, we can return immediately
1388     rjmp    5f
1389
1390 2:  ;; Initialze Bit-Counter with Number of Bits still to be performed
1391     mov     R_cnt, C7
1392
1393     ;; Push of A7 is not needed because C7 is still 0
1394     pop     C7
1395     clr     C7
1396
1397 #elif  SPEED_DIV == 16
1398
1399     ;; Compare shifted Dividend against Divisor
1400     cp      A7, B3
1401     cpc     C0, B4
1402     cpc     C1, B5
1403     cpc     C2, B6
1404     cpc     C3, B7
1405     brcc    2f
1406
1407     ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1408     ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1409     wmov  C2,A6  $  wmov C0,A4
1410     wmov  A6,A2  $  wmov A4,A0
1411     wmov  A2,C6  $  wmov A0,C4
1412
1413     ;; Set Bit Counter to 32
1414     lsr     R_cnt
1415 2:
1416 #elif SPEED_DIV
1417 #error SPEED_DIV = ?
1418 #endif /* SPEED_DIV */
1419
1420 ;; The very Division + Remainder Routine
1421
1422 3:  ;; Left-shift Dividend...
1423     lsl A0     $  rol A1     $  rol A2     $  rol A3
1424     rol A4     $  rol A5     $  rol A6     $  rol A7
1425
1426     ;; ...into Remainder
1427     rol C0     $  rol C1     $  rol C2     $  rol C3
1428     rol C4     $  rol C5     $  rol C6     $  rol C7
1429
1430     ;; Compare Remainder and Divisor
1431     CP  C0,B0  $  cpc C1,B1  $  cpc C2,B2  $  cpc C3,B3
1432     cpc C4,B4  $  cpc C5,B5  $  cpc C6,B6  $  cpc C7,B7
1433
1434     brcs 4f
1435
1436     ;; Divisor fits into Remainder:  Subtract it from Remainder...
1437     SUB C0,B0  $  sbc C1,B1  $  sbc C2,B2  $  sbc C3,B3
1438     sbc C4,B4  $  sbc C5,B5  $  sbc C6,B6  $  sbc C7,B7
1439
1440     ;; ...and set according Bit in the upcoming Quotient
1441     ;; The Bit will travel to its final Position
1442     ori A0, 1
1443
1444 4:  ;; This Bit is done
1445     dec     R_cnt
1446     brne    3b
1447     ;; __zero_reg__ is 0 again
1448
1449     ;; T = 0: We are fine with the Quotient in A[]
1450     ;; T = 1: Copy Remainder to A[]
1451 5:  brtc    6f
1452     wmov    A0, C0
1453     wmov    A2, C2
1454     wmov    A4, C4
1455     wmov    A6, C6
1456     ;; Move the Sign of the Result to SS.7
1457     lsl     SS
1458
1459 6:  ret
1460
1461 ENDF __udivmod64
1462 #endif /* L_udivmod64 */
1463
1464
1465 #if defined (L_divdi3)
1466
1467 ;; R25:R18 = R24:R18  mod  R17:R10
1468 ;; Ordinary ABI-Function
1469
1470 DEFUN __moddi3
1471     set
1472     rjmp    __divdi3_moddi3
1473 ENDF __moddi3
1474
1475 ;; R25:R18 = R24:R18  div  R17:R10
1476 ;; Ordinary ABI-Function
1477
1478 DEFUN __divdi3
1479     clt
1480 ENDF __divdi3
1481
1482 DEFUN  __divdi3_moddi3
1483 #if SPEED_DIV
1484     mov     r31, A7
1485     or      r31, B7
1486     brmi    0f
1487     ;; Both Signs are 0:  the following Complexitiy is not needed
1488     XJMP    __udivdi3_umoddi3
1489 #endif /* SPEED_DIV */
1490
1491 0:  ;; The Prologue
1492     ;; Save 12 Registers:  Y, 17...8
1493     ;; No Frame needed (X = 0)
1494     clr r26
1495     clr r27
1496     ldi r30, lo8(gs(1f))
1497     ldi r31, hi8(gs(1f))
1498     XJMP __prologue_saves__ + ((18 - 12) * 2)
1499
1500 1:  ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
1501     ;; SS.6 will contain the Sign of the Remainder (A.sign)
1502     mov     SS, A7
1503     asr     SS
1504     ;; Adjust Dividend's Sign as needed
1505 #if SPEED_DIV
1506     ;; Compiling for Speed we know that at least one Sign must be < 0
1507     ;; Thus, if A[] >= 0 then we know B[] < 0
1508     brpl    22f
1509 #else
1510     brpl    21f
1511 #endif /* SPEED_DIV */
1512
1513     XCALL   __negdi2
1514
1515     ;; Adjust Divisor's Sign and SS.7 as needed
1516 21: tst     B7
1517     brpl    3f
1518 22: ldi     NN, 1 << 7
1519     eor     SS, NN
1520
1521     ldi NN, -1
1522     com B4     $  com B5     $  com B6     $  com B7
1523                $  com B1     $  com B2     $  com B3
1524     NEG B0
1525                $  sbc B1,NN  $  sbc B2,NN  $  sbc B3,NN
1526     sbc B4,NN  $  sbc B5,NN  $  sbc B6,NN  $  sbc B7,NN
1527
1528 3:  ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1529     XCALL   __udivmod64
1530
1531     ;; Adjust Result's Sign
1532 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1533     tst     SS
1534     brpl    4f
1535 #else
1536     sbrc    SS, 7
1537 #endif /* __AVR_HAVE_JMP_CALL__ */
1538     XCALL   __negdi2
1539
1540 4:  ;; Epilogue: Restore the Z = 12 Registers and return
1541     in r28, __SP_L__
1542 #if defined (__AVR_HAVE_8BIT_SP__)
1543 ;; FIXME: __AVR_HAVE_8BIT_SP__ is set on device level, not on core level
1544 ;;        so this lines are dead code.  To make it work, devices without
1545 ;;        SP_H must get their own multilib(s).
1546     clr r29
1547 #else
1548     in r29, __SP_H__
1549 #endif /* #SP = 8/16 */
1550     ldi r30, 12
1551     XJMP __epilogue_restores__ + ((18 - 12) * 2)
1552
1553 ENDF __divdi3_moddi3
1554
1555 #undef R_cnt
1556 #undef SS
1557 #undef NN
1558
1559 #endif /* L_divdi3 */
1560
1561 #if defined (L_negdi2)
1562 DEFUN __negdi2
1563
1564     com  A4    $  com  A5    $  com  A6    $  com  A7
1565                $  com  A1    $  com  A2    $  com  A3
1566     NEG  A0
1567                $  sbci A1,-1 $  sbci A2,-1 $  sbci A3,-1
1568     sbci A4,-1 $  sbci A5,-1 $  sbci A6,-1 $  sbci A7,-1
1569     ret
1570
1571 ENDF __negdi2
1572 #endif /* L_negdi2 */
1573
1574 #undef C7
1575 #undef C6
1576 #undef C5
1577 #undef C4
1578 #undef C3
1579 #undef C2
1580 #undef C1
1581 #undef C0
1582
1583 #undef B7
1584 #undef B6
1585 #undef B5
1586 #undef B4
1587 #undef B3
1588 #undef B2
1589 #undef B1
1590 #undef B0
1591
1592 #undef A7
1593 #undef A6
1594 #undef A5
1595 #undef A4
1596 #undef A3
1597 #undef A2
1598 #undef A1
1599 #undef A0
1600
1601 \f
1602 .section .text.libgcc.prologue, "ax", @progbits
1603
1604 /**********************************
1605  * This is a prologue subroutine
1606  **********************************/
1607 #if defined (L_prologue)
1608
1609 ;; This function does not clobber T-flag; 64-bit division relies on it
1610 DEFUN __prologue_saves__
1611         push r2
1612         push r3
1613         push r4
1614         push r5
1615         push r6
1616         push r7
1617         push r8
1618         push r9
1619         push r10
1620         push r11
1621         push r12
1622         push r13
1623         push r14
1624         push r15
1625         push r16
1626         push r17
1627         push r28
1628         push r29
1629 #if defined (__AVR_HAVE_8BIT_SP__)
1630 ;; FIXME: __AVR_HAVE_8BIT_SP__ is set on device level, not on core level
1631 ;;        so this lines are dead code.  To make it work, devices without
1632 ;;        SP_H must get their own multilib(s).
1633         in      r28,__SP_L__
1634         sub     r28,r26
1635         out     __SP_L__,r28
1636         clr     r29
1637 #else
1638         in      r28,__SP_L__
1639         in      r29,__SP_H__
1640         sub     r28,r26
1641         sbc     r29,r27
1642         in      __tmp_reg__,__SREG__
1643         cli
1644         out     __SP_H__,r29
1645         out     __SREG__,__tmp_reg__
1646         out     __SP_L__,r28
1647 #endif /* #SP = 8/16 */
1648
1649 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1650         eijmp
1651 #else
1652         ijmp
1653 #endif
1654
1655 ENDF __prologue_saves__
1656 #endif /* defined (L_prologue) */
1657
1658 /*
1659  * This is an epilogue subroutine
1660  */
1661 #if defined (L_epilogue)
1662
1663 DEFUN __epilogue_restores__
1664         ldd     r2,Y+18
1665         ldd     r3,Y+17
1666         ldd     r4,Y+16
1667         ldd     r5,Y+15
1668         ldd     r6,Y+14
1669         ldd     r7,Y+13
1670         ldd     r8,Y+12
1671         ldd     r9,Y+11
1672         ldd     r10,Y+10
1673         ldd     r11,Y+9
1674         ldd     r12,Y+8
1675         ldd     r13,Y+7
1676         ldd     r14,Y+6
1677         ldd     r15,Y+5
1678         ldd     r16,Y+4
1679         ldd     r17,Y+3
1680         ldd     r26,Y+2
1681 #if defined (__AVR_HAVE_8BIT_SP__)
1682 ;; FIXME: __AVR_HAVE_8BIT_SP__ is set on device level, not on core level
1683 ;;        so this lines are dead code.  To make it work, devices without
1684 ;;        SP_H must get their own multilib(s).
1685         ldd     r29,Y+1
1686         add     r28,r30
1687         out     __SP_L__,r28
1688         mov     r28, r26
1689 #else
1690         ldd     r27,Y+1
1691         add     r28,r30
1692         adc     r29,__zero_reg__
1693         in      __tmp_reg__,__SREG__
1694         cli
1695         out     __SP_H__,r29
1696         out     __SREG__,__tmp_reg__
1697         out     __SP_L__,r28
1698         mov_l   r28, r26
1699         mov_h   r29, r27
1700 #endif /* #SP = 8/16 */
1701         ret
1702 ENDF __epilogue_restores__
1703 #endif /* defined (L_epilogue) */
1704
1705 #ifdef L_exit
1706         .section .fini9,"ax",@progbits
1707 DEFUN _exit
1708         .weak   exit
1709 exit:
1710 ENDF _exit
1711
1712         /* Code from .fini8 ... .fini1 sections inserted by ld script.  */
1713
1714         .section .fini0,"ax",@progbits
1715         cli
1716 __stop_program:
1717         rjmp    __stop_program
1718 #endif /* defined (L_exit) */
1719
1720 #ifdef L_cleanup
1721         .weak   _cleanup
1722         .func   _cleanup
1723 _cleanup:
1724         ret
1725 .endfunc
1726 #endif /* defined (L_cleanup) */
1727
1728 \f
1729 .section .text.libgcc, "ax", @progbits
1730
1731 #ifdef L_tablejump
1732 DEFUN __tablejump2__
1733         lsl     r30
1734         rol     r31
1735     ;; FALLTHRU
1736 ENDF __tablejump2__
1737
1738 DEFUN __tablejump__
1739 #if defined (__AVR_HAVE_LPMX__)
1740         lpm __tmp_reg__, Z+
1741         lpm r31, Z
1742         mov r30, __tmp_reg__
1743 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1744         eijmp
1745 #else
1746         ijmp
1747 #endif
1748
1749 #else /* !HAVE_LPMX */
1750         lpm
1751         adiw r30, 1
1752         push r0
1753         lpm
1754         push r0
1755 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1756         in   __tmp_reg__, __EIND__
1757         push __tmp_reg__
1758 #endif
1759         ret
1760 #endif /* !HAVE_LPMX */
1761 ENDF __tablejump__
1762 #endif /* defined (L_tablejump) */
1763
1764 #ifdef L_copy_data
1765         .section .init4,"ax",@progbits
1766 DEFUN __do_copy_data
1767 #if defined(__AVR_HAVE_ELPMX__)
1768         ldi     r17, hi8(__data_end)
1769         ldi     r26, lo8(__data_start)
1770         ldi     r27, hi8(__data_start)
1771         ldi     r30, lo8(__data_load_start)
1772         ldi     r31, hi8(__data_load_start)
1773         ldi     r16, hh8(__data_load_start)
1774         out     __RAMPZ__, r16
1775         rjmp    .L__do_copy_data_start
1776 .L__do_copy_data_loop:
1777         elpm    r0, Z+
1778         st      X+, r0
1779 .L__do_copy_data_start:
1780         cpi     r26, lo8(__data_end)
1781         cpc     r27, r17
1782         brne    .L__do_copy_data_loop
1783 #elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
1784         ldi     r17, hi8(__data_end)
1785         ldi     r26, lo8(__data_start)
1786         ldi     r27, hi8(__data_start)
1787         ldi     r30, lo8(__data_load_start)
1788         ldi     r31, hi8(__data_load_start)
1789         ldi     r16, hh8(__data_load_start - 0x10000)
1790 .L__do_copy_data_carry:
1791         inc     r16
1792         out     __RAMPZ__, r16
1793         rjmp    .L__do_copy_data_start
1794 .L__do_copy_data_loop:
1795         elpm
1796         st      X+, r0
1797         adiw    r30, 1
1798         brcs    .L__do_copy_data_carry
1799 .L__do_copy_data_start:
1800         cpi     r26, lo8(__data_end)
1801         cpc     r27, r17
1802         brne    .L__do_copy_data_loop
1803 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
1804         ldi     r17, hi8(__data_end)
1805         ldi     r26, lo8(__data_start)
1806         ldi     r27, hi8(__data_start)
1807         ldi     r30, lo8(__data_load_start)
1808         ldi     r31, hi8(__data_load_start)
1809         rjmp    .L__do_copy_data_start
1810 .L__do_copy_data_loop:
1811 #if defined (__AVR_HAVE_LPMX__)
1812         lpm     r0, Z+
1813 #else
1814         lpm
1815         adiw    r30, 1
1816 #endif
1817         st      X+, r0
1818 .L__do_copy_data_start:
1819         cpi     r26, lo8(__data_end)
1820         cpc     r27, r17
1821         brne    .L__do_copy_data_loop
1822 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
1823 ENDF __do_copy_data
1824 #endif /* L_copy_data */
1825
1826 /* __do_clear_bss is only necessary if there is anything in .bss section.  */
1827
1828 #ifdef L_clear_bss
1829         .section .init4,"ax",@progbits
1830 DEFUN __do_clear_bss
1831         ldi     r17, hi8(__bss_end)
1832         ldi     r26, lo8(__bss_start)
1833         ldi     r27, hi8(__bss_start)
1834         rjmp    .do_clear_bss_start
1835 .do_clear_bss_loop:
1836         st      X+, __zero_reg__
1837 .do_clear_bss_start:
1838         cpi     r26, lo8(__bss_end)
1839         cpc     r27, r17
1840         brne    .do_clear_bss_loop
1841 ENDF __do_clear_bss
1842 #endif /* L_clear_bss */
1843
1844 /* __do_global_ctors and __do_global_dtors are only necessary
1845    if there are any constructors/destructors.  */
1846
1847 #ifdef L_ctors
1848         .section .init6,"ax",@progbits
1849 DEFUN __do_global_ctors
1850 #if defined(__AVR_HAVE_RAMPZ__)
1851         ldi     r17, hi8(__ctors_start)
1852         ldi     r28, lo8(__ctors_end)
1853         ldi     r29, hi8(__ctors_end)
1854         ldi     r16, hh8(__ctors_end)
1855         rjmp    .L__do_global_ctors_start
1856 .L__do_global_ctors_loop:
1857         sbiw    r28, 2
1858         sbc     r16, __zero_reg__
1859         mov_h   r31, r29
1860         mov_l   r30, r28
1861         out     __RAMPZ__, r16
1862         XCALL   __tablejump_elpm__
1863 .L__do_global_ctors_start:
1864         cpi     r28, lo8(__ctors_start)
1865         cpc     r29, r17
1866         ldi     r24, hh8(__ctors_start)
1867         cpc     r16, r24
1868         brne    .L__do_global_ctors_loop
1869 #else
1870         ldi     r17, hi8(__ctors_start)
1871         ldi     r28, lo8(__ctors_end)
1872         ldi     r29, hi8(__ctors_end)
1873         rjmp    .L__do_global_ctors_start
1874 .L__do_global_ctors_loop:
1875         sbiw    r28, 2
1876         mov_h   r31, r29
1877         mov_l   r30, r28
1878         XCALL   __tablejump__
1879 .L__do_global_ctors_start:
1880         cpi     r28, lo8(__ctors_start)
1881         cpc     r29, r17
1882         brne    .L__do_global_ctors_loop
1883 #endif /* defined(__AVR_HAVE_RAMPZ__) */
1884 ENDF __do_global_ctors
1885 #endif /* L_ctors */
1886
1887 #ifdef L_dtors
1888         .section .fini6,"ax",@progbits
1889 DEFUN __do_global_dtors
1890 #if defined(__AVR_HAVE_RAMPZ__)
1891         ldi     r17, hi8(__dtors_end)
1892         ldi     r28, lo8(__dtors_start)
1893         ldi     r29, hi8(__dtors_start)
1894         ldi     r16, hh8(__dtors_start)
1895         rjmp    .L__do_global_dtors_start
1896 .L__do_global_dtors_loop:
1897         sbiw    r28, 2
1898         sbc     r16, __zero_reg__
1899         mov_h   r31, r29
1900         mov_l   r30, r28
1901         out     __RAMPZ__, r16
1902         XCALL   __tablejump_elpm__
1903 .L__do_global_dtors_start:
1904         cpi     r28, lo8(__dtors_end)
1905         cpc     r29, r17
1906         ldi     r24, hh8(__dtors_end)
1907         cpc     r16, r24
1908         brne    .L__do_global_dtors_loop
1909 #else
1910         ldi     r17, hi8(__dtors_end)
1911         ldi     r28, lo8(__dtors_start)
1912         ldi     r29, hi8(__dtors_start)
1913         rjmp    .L__do_global_dtors_start
1914 .L__do_global_dtors_loop:
1915         mov_h   r31, r29
1916         mov_l   r30, r28
1917         XCALL   __tablejump__
1918         adiw    r28, 2
1919 .L__do_global_dtors_start:
1920         cpi     r28, lo8(__dtors_end)
1921         cpc     r29, r17
1922         brne    .L__do_global_dtors_loop
1923 #endif /* defined(__AVR_HAVE_RAMPZ__) */
1924 ENDF __do_global_dtors
1925 #endif /* L_dtors */
1926
1927 .section .text.libgcc, "ax", @progbits
1928
1929 #ifdef L_tablejump_elpm
1930 DEFUN __tablejump_elpm__
1931 #if defined (__AVR_HAVE_ELPM__)
1932 #if defined (__AVR_HAVE_LPMX__)
1933         elpm    __tmp_reg__, Z+
1934         elpm    r31, Z
1935         mov     r30, __tmp_reg__
1936 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1937         eijmp
1938 #else
1939         ijmp
1940 #endif
1941
1942 #else
1943         elpm
1944         adiw    r30, 1
1945         push    r0
1946         elpm
1947         push    r0
1948 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1949         in      __tmp_reg__, __EIND__
1950         push    __tmp_reg__
1951 #endif
1952         ret
1953 #endif
1954 #endif /* defined (__AVR_HAVE_ELPM__) */
1955 ENDF __tablejump_elpm__
1956 #endif /* defined (L_tablejump_elpm) */
1957
1958 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1959 ;; Loading n bytes from Flash; n = 3,4
1960 ;; R22... = Flash[Z]
1961 ;; Clobbers: __tmp_reg__
1962
1963 #if (defined (L_load_3)        \
1964      || defined (L_load_4))    \
1965     && !defined (__AVR_HAVE_LPMX__)
1966
1967 ;; Destination
1968 #define D0  22
1969 #define D1  D0+1
1970 #define D2  D0+2
1971 #define D3  D0+3
1972
1973 .macro  .load dest, n
1974     lpm
1975     mov     \dest, r0
1976 .if \dest != D0+\n-1
1977     adiw    r30, 1
1978 .else
1979     sbiw    r30, \n-1
1980 .endif
1981 .endm
1982
1983 #if defined (L_load_3)
1984 DEFUN __load_3
1985     push  D3
1986     XCALL __load_4
1987     pop   D3
1988     ret
1989 ENDF __load_3
1990 #endif /* L_load_3 */
1991
1992 #if defined (L_load_4)
1993 DEFUN __load_4
1994     .load D0, 4
1995     .load D1, 4
1996     .load D2, 4
1997     .load D3, 4
1998     ret
1999 ENDF __load_4
2000 #endif /* L_load_4 */
2001
2002 #endif /* L_load_3 || L_load_3 */
2003
2004 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2005 ;; Loading n bytes from Flash; n = 2,3,4
2006 ;; R22... = Flash[R21:Z]
2007 ;; Clobbers: __tmp_reg__, R21, R30, R31
2008
2009 #if (defined (L_xload_2)            \
2010      || defined (L_xload_3)         \
2011      || defined (L_xload_4))        \
2012     && defined (__AVR_HAVE_ELPM__)  \
2013     && !defined (__AVR_HAVE_ELPMX__)
2014
2015 #if !defined (__AVR_HAVE_RAMPZ__)
2016 #error Need RAMPZ
2017 #endif /* have RAMPZ */
2018
2019 ;; Destination
2020 #define D0  22
2021 #define D1  D0+1
2022 #define D2  D0+2
2023 #define D3  D0+3
2024
2025 ;; Register containing bits 16+ of the address
2026
2027 #define HHI8  21
2028
2029 .macro  .xload dest, n
2030     elpm
2031     mov     \dest, r0
2032 .if \dest != D0+\n-1
2033     adiw    r30, 1
2034     adc     HHI8, __zero_reg__
2035     out     __RAMPZ__, HHI8
2036 .endif
2037 .endm
2038
2039 #if defined (L_xload_2)
2040 DEFUN __xload_2
2041     out     __RAMPZ__, HHI8
2042     .xload D0, 2
2043     .xload D1, 2
2044     ret
2045 ENDF __xload_2
2046 #endif /* L_xload_2 */
2047
2048 #if defined (L_xload_3)
2049 DEFUN __xload_3
2050     out     __RAMPZ__, HHI8
2051     .xload D0, 3
2052     .xload D1, 3
2053     .xload D2, 3
2054     ret
2055 ENDF __xload_3
2056 #endif /* L_xload_3 */
2057
2058 #if defined (L_xload_4)
2059 DEFUN __xload_4
2060     out     __RAMPZ__, HHI8
2061     .xload D0, 4
2062     .xload D1, 4
2063     .xload D2, 4
2064     .xload D3, 4
2065     ret
2066 ENDF __xload_4
2067 #endif /* L_xload_4 */
2068
2069 #endif /* L_xload_{2|3|4} && ELPM */
2070
2071 \f
2072 .section .text.libgcc.builtins, "ax", @progbits
2073
2074 /**********************************
2075  * Find first set Bit (ffs)
2076  **********************************/
2077
2078 #if defined (L_ffssi2)
2079 ;; find first set bit
2080 ;; r25:r24 = ffs32 (r25:r22)
2081 ;; clobbers: r22, r26
2082 DEFUN __ffssi2
2083     clr  r26
2084     tst  r22
2085     brne 1f
2086     subi r26, -8
2087     or   r22, r23
2088     brne 1f
2089     subi r26, -8
2090     or   r22, r24
2091     brne 1f
2092     subi r26, -8
2093     or   r22, r25
2094     brne 1f
2095     ret
2096 1:  mov  r24, r22
2097     XJMP __loop_ffsqi2
2098 ENDF __ffssi2
2099 #endif /* defined (L_ffssi2) */
2100
2101 #if defined (L_ffshi2)
2102 ;; find first set bit
2103 ;; r25:r24 = ffs16 (r25:r24)
2104 ;; clobbers: r26
2105 DEFUN __ffshi2
2106     clr  r26
2107 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2108     ;; Some cores have problem skipping 2-word instruction
2109     tst  r24
2110     breq 2f
2111 #else
2112     cpse r24, __zero_reg__
2113 #endif /* __AVR_HAVE_JMP_CALL__ */
2114 1:  XJMP __loop_ffsqi2
2115 2:  ldi  r26, 8
2116     or   r24, r25
2117     brne 1b
2118     ret
2119 ENDF __ffshi2
2120 #endif /* defined (L_ffshi2) */
2121
2122 #if defined (L_loop_ffsqi2)
2123 ;; Helper for ffshi2, ffssi2
2124 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2125 ;; r24 must be != 0
2126 ;; clobbers: r26
2127 DEFUN __loop_ffsqi2
2128     inc  r26
2129     lsr  r24
2130     brcc __loop_ffsqi2
2131     mov  r24, r26
2132     clr  r25
2133     ret
2134 ENDF __loop_ffsqi2
2135 #endif /* defined (L_loop_ffsqi2) */
2136
2137 \f
2138 /**********************************
2139  * Count trailing Zeros (ctz)
2140  **********************************/
2141
2142 #if defined (L_ctzsi2)
2143 ;; count trailing zeros
2144 ;; r25:r24 = ctz32 (r25:r22)
2145 ;; clobbers: r26, r22
2146 ;; ctz(0) = 255
2147 ;; Note that ctz(0) in undefined for GCC
2148 DEFUN __ctzsi2
2149     XCALL __ffssi2
2150     dec  r24
2151     ret
2152 ENDF __ctzsi2
2153 #endif /* defined (L_ctzsi2) */
2154
2155 #if defined (L_ctzhi2)
2156 ;; count trailing zeros
2157 ;; r25:r24 = ctz16 (r25:r24)
2158 ;; clobbers: r26
2159 ;; ctz(0) = 255
2160 ;; Note that ctz(0) in undefined for GCC
2161 DEFUN __ctzhi2
2162     XCALL __ffshi2
2163     dec  r24
2164     ret
2165 ENDF __ctzhi2
2166 #endif /* defined (L_ctzhi2) */
2167
2168 \f
2169 /**********************************
2170  * Count leading Zeros (clz)
2171  **********************************/
2172
2173 #if defined (L_clzdi2)
2174 ;; count leading zeros
2175 ;; r25:r24 = clz64 (r25:r18)
2176 ;; clobbers: r22, r23, r26
2177 DEFUN __clzdi2
2178     XCALL __clzsi2
2179     sbrs r24, 5
2180     ret
2181     mov_l r22, r18
2182     mov_h r23, r19
2183     mov_l r24, r20
2184     mov_h r25, r21
2185     XCALL __clzsi2
2186     subi r24, -32
2187     ret
2188 ENDF __clzdi2
2189 #endif /* defined (L_clzdi2) */
2190
2191 #if defined (L_clzsi2)
2192 ;; count leading zeros
2193 ;; r25:r24 = clz32 (r25:r22)
2194 ;; clobbers: r26
2195 DEFUN __clzsi2
2196     XCALL __clzhi2
2197     sbrs r24, 4
2198     ret
2199     mov_l r24, r22
2200     mov_h r25, r23
2201     XCALL __clzhi2
2202     subi r24, -16
2203     ret
2204 ENDF __clzsi2
2205 #endif /* defined (L_clzsi2) */
2206
2207 #if defined (L_clzhi2)
2208 ;; count leading zeros
2209 ;; r25:r24 = clz16 (r25:r24)
2210 ;; clobbers: r26
2211 DEFUN __clzhi2
2212     clr  r26
2213     tst  r25
2214     brne 1f
2215     subi r26, -8
2216     or   r25, r24
2217     brne 1f
2218     ldi  r24, 16
2219     ret
2220 1:  cpi  r25, 16
2221     brsh 3f
2222     subi r26, -3
2223     swap r25
2224 2:  inc  r26
2225 3:  lsl  r25
2226     brcc 2b
2227     mov  r24, r26
2228     clr  r25
2229     ret
2230 ENDF __clzhi2
2231 #endif /* defined (L_clzhi2) */
2232
2233 \f
2234 /**********************************
2235  * Parity
2236  **********************************/
2237
2238 #if defined (L_paritydi2)
2239 ;; r25:r24 = parity64 (r25:r18)
2240 ;; clobbers: __tmp_reg__
2241 DEFUN __paritydi2
2242     eor  r24, r18
2243     eor  r24, r19
2244     eor  r24, r20
2245     eor  r24, r21
2246     XJMP __paritysi2
2247 ENDF __paritydi2
2248 #endif /* defined (L_paritydi2) */
2249
2250 #if defined (L_paritysi2)
2251 ;; r25:r24 = parity32 (r25:r22)
2252 ;; clobbers: __tmp_reg__
2253 DEFUN __paritysi2
2254     eor  r24, r22
2255     eor  r24, r23
2256     XJMP __parityhi2
2257 ENDF __paritysi2
2258 #endif /* defined (L_paritysi2) */
2259
2260 #if defined (L_parityhi2)
2261 ;; r25:r24 = parity16 (r25:r24)
2262 ;; clobbers: __tmp_reg__
2263 DEFUN __parityhi2
2264     eor  r24, r25
2265 ;; FALLTHRU
2266 ENDF __parityhi2
2267
2268 ;; r25:r24 = parity8 (r24)
2269 ;; clobbers: __tmp_reg__
2270 DEFUN __parityqi2
2271     ;; parity is in r24[0..7]
2272     mov  __tmp_reg__, r24
2273     swap __tmp_reg__
2274     eor  r24, __tmp_reg__
2275     ;; parity is in r24[0..3]
2276     subi r24, -4
2277     andi r24, -5
2278     subi r24, -6
2279     ;; parity is in r24[0,3]
2280     sbrc r24, 3
2281     inc  r24
2282     ;; parity is in r24[0]
2283     andi r24, 1
2284     clr  r25
2285     ret
2286 ENDF __parityqi2
2287 #endif /* defined (L_parityhi2) */
2288
2289 \f
2290 /**********************************
2291  * Population Count
2292  **********************************/
2293
2294 #if defined (L_popcounthi2)
2295 ;; population count
2296 ;; r25:r24 = popcount16 (r25:r24)
2297 ;; clobbers: __tmp_reg__
2298 DEFUN __popcounthi2
2299     XCALL __popcountqi2
2300     push r24
2301     mov  r24, r25
2302     XCALL __popcountqi2
2303     clr  r25
2304     ;; FALLTHRU
2305 ENDF __popcounthi2
2306
2307 DEFUN __popcounthi2_tail
2308     pop   __tmp_reg__
2309     add   r24, __tmp_reg__
2310     ret
2311 ENDF __popcounthi2_tail
2312 #endif /* defined (L_popcounthi2) */
2313
2314 #if defined (L_popcountsi2)
2315 ;; population count
2316 ;; r25:r24 = popcount32 (r25:r22)
2317 ;; clobbers: __tmp_reg__
2318 DEFUN __popcountsi2
2319     XCALL __popcounthi2
2320     push  r24
2321     mov_l r24, r22
2322     mov_h r25, r23
2323     XCALL __popcounthi2
2324     XJMP  __popcounthi2_tail
2325 ENDF __popcountsi2
2326 #endif /* defined (L_popcountsi2) */
2327
2328 #if defined (L_popcountdi2)
2329 ;; population count
2330 ;; r25:r24 = popcount64 (r25:r18)
2331 ;; clobbers: r22, r23, __tmp_reg__
2332 DEFUN __popcountdi2
2333     XCALL __popcountsi2
2334     push  r24
2335     mov_l r22, r18
2336     mov_h r23, r19
2337     mov_l r24, r20
2338     mov_h r25, r21
2339     XCALL __popcountsi2
2340     XJMP  __popcounthi2_tail
2341 ENDF __popcountdi2
2342 #endif /* defined (L_popcountdi2) */
2343
2344 #if defined (L_popcountqi2)
2345 ;; population count
2346 ;; r24 = popcount8 (r24)
2347 ;; clobbers: __tmp_reg__
2348 DEFUN __popcountqi2
2349     mov  __tmp_reg__, r24
2350     andi r24, 1
2351     lsr  __tmp_reg__
2352     lsr  __tmp_reg__
2353     adc  r24, __zero_reg__
2354     lsr  __tmp_reg__
2355     adc  r24, __zero_reg__
2356     lsr  __tmp_reg__
2357     adc  r24, __zero_reg__
2358     lsr  __tmp_reg__
2359     adc  r24, __zero_reg__
2360     lsr  __tmp_reg__
2361     adc  r24, __zero_reg__
2362     lsr  __tmp_reg__
2363     adc  r24, __tmp_reg__
2364     ret
2365 ENDF __popcountqi2
2366 #endif /* defined (L_popcountqi2) */
2367
2368 \f
2369 /**********************************
2370  * Swap bytes
2371  **********************************/
2372
2373 ;; swap two registers with different register number
2374 .macro bswap a, b
2375     eor \a, \b
2376     eor \b, \a
2377     eor \a, \b
2378 .endm
2379
2380 #if defined (L_bswapsi2)
2381 ;; swap bytes
2382 ;; r25:r22 = bswap32 (r25:r22)
2383 DEFUN __bswapsi2
2384     bswap r22, r25
2385     bswap r23, r24
2386     ret
2387 ENDF __bswapsi2
2388 #endif /* defined (L_bswapsi2) */
2389
2390 #if defined (L_bswapdi2)
2391 ;; swap bytes
2392 ;; r25:r18 = bswap64 (r25:r18)
2393 DEFUN __bswapdi2
2394     bswap r18, r25
2395     bswap r19, r24
2396     bswap r20, r23
2397     bswap r21, r22
2398     ret
2399 ENDF __bswapdi2
2400 #endif /* defined (L_bswapdi2) */
2401
2402 \f
2403 /**********************************
2404  * 64-bit shifts
2405  **********************************/
2406
2407 #if defined (L_ashrdi3)
2408 ;; Arithmetic shift right
2409 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
2410 DEFUN __ashrdi3
2411     push r16
2412     andi r16, 63
2413     breq 2f
2414 1:  asr  r25
2415     ror  r24
2416     ror  r23
2417     ror  r22
2418     ror  r21
2419     ror  r20
2420     ror  r19
2421     ror  r18
2422     dec  r16
2423     brne 1b
2424 2:  pop  r16
2425     ret
2426 ENDF __ashrdi3
2427 #endif /* defined (L_ashrdi3) */
2428
2429 #if defined (L_lshrdi3)
2430 ;; Logic shift right
2431 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
2432 DEFUN __lshrdi3
2433     push r16
2434     andi r16, 63
2435     breq 2f
2436 1:  lsr  r25
2437     ror  r24
2438     ror  r23
2439     ror  r22
2440     ror  r21
2441     ror  r20
2442     ror  r19
2443     ror  r18
2444     dec  r16
2445     brne 1b
2446 2:  pop  r16
2447     ret
2448 ENDF __lshrdi3
2449 #endif /* defined (L_lshrdi3) */
2450
2451 #if defined (L_ashldi3)
2452 ;; Shift left
2453 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
2454 DEFUN __ashldi3
2455     push r16
2456     andi r16, 63
2457     breq 2f
2458 1:  lsl  r18
2459     rol  r19
2460     rol  r20
2461     rol  r21
2462     rol  r22
2463     rol  r23
2464     rol  r24
2465     rol  r25
2466     dec  r16
2467     brne 1b
2468 2:  pop  r16
2469     ret
2470 ENDF __ashldi3
2471 #endif /* defined (L_ashldi3) */
2472
2473 \f
2474 .section .text.libgcc.fmul, "ax", @progbits
2475
2476 /***********************************************************/
2477 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
2478 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
2479 /***********************************************************/
2480
2481 #define A1 24
2482 #define B1 25
2483 #define C0 22
2484 #define C1 23
2485 #define A0 __tmp_reg__
2486
2487 #ifdef L_fmuls
2488 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
2489 ;;; Clobbers: r24, r25, __tmp_reg__
2490 DEFUN __fmuls
2491     ;; A0.7 = negate result?
2492     mov  A0, A1
2493     eor  A0, B1
2494     ;; B1 = |B1|
2495     sbrc B1, 7
2496     neg  B1
2497     XJMP __fmulsu_exit
2498 ENDF __fmuls
2499 #endif /* L_fmuls */
2500
2501 #ifdef L_fmulsu
2502 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
2503 ;;; Clobbers: r24, r25, __tmp_reg__
2504 DEFUN __fmulsu
2505     ;; A0.7 = negate result?
2506     mov  A0, A1
2507 ;; FALLTHRU
2508 ENDF __fmulsu
2509
2510 ;; Helper for __fmuls and __fmulsu
2511 DEFUN __fmulsu_exit
2512     ;; A1 = |A1|
2513     sbrc A1, 7
2514     neg  A1
2515 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2516     ;; Some cores have problem skipping 2-word instruction
2517     tst  A0
2518     brmi 1f
2519 #else
2520     sbrs A0, 7
2521 #endif /* __AVR_HAVE_JMP_CALL__ */
2522     XJMP  __fmul
2523 1:  XCALL __fmul
2524     ;; C = -C iff A0.7 = 1
2525     com  C1
2526     neg  C0
2527     sbci C1, -1
2528     ret
2529 ENDF __fmulsu_exit
2530 #endif /* L_fmulsu */
2531
2532
2533 #ifdef L_fmul
2534 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
2535 ;;; Clobbers: r24, r25, __tmp_reg__
2536 DEFUN __fmul
2537     ; clear result
2538     clr   C0
2539     clr   C1
2540     clr   A0
2541 1:  tst   B1
2542     ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
2543 2:  brpl  3f
2544     ;; C += A
2545     add   C0, A0
2546     adc   C1, A1
2547 3:  ;; A >>= 1
2548     lsr   A1
2549     ror   A0
2550     ;; B <<= 1
2551     lsl   B1
2552     brne  2b
2553     ret
2554 ENDF __fmul
2555 #endif /* L_fmul */
2556
2557 #undef A0
2558 #undef A1
2559 #undef B1
2560 #undef C0
2561 #undef C1