libgcc/config/avr/lib1funcs.S

   1 /*  -*- Mode: Asm -*-  */
   2 /* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
   3    Free Software Foundation, Inc.
   4    Contributed by Denis Chertykov <chertykov@gmail.com>
   5
   6 This file is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 This file is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 Under Section 7 of GPL version 3, you are granted additional
  17 permissions described in the GCC Runtime Library Exception, version
  18 3.1, as published by the Free Software Foundation.
  19
  20 You should have received a copy of the GNU General Public License and
  21 a copy of the GCC Runtime Library Exception along with this program;
  22 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23 <http://www.gnu.org/licenses/>.  */
  24
  25 #define __zero_reg__ r1
  26 #define __tmp_reg__ r0
  27 #define __SREG__ 0x3f
  28 #if defined (__AVR_HAVE_SPH__)
  29 #define __SP_H__ 0x3e
  30 #endif
  31 #define __SP_L__ 0x3d
  32 #define __RAMPZ__ 0x3B
  33 #define __EIND__  0x3C
  34
  35 /* Most of the functions here are called directly from avr.md
  36    patterns, instead of using the standard libcall mechanisms.
  37    This can make better code because GCC knows exactly which
  38    of the call-used registers (not all of them) are clobbered.  */
  39
  40 /* FIXME:  At present, there is no SORT directive in the linker
  41            script so that we must not assume that different modules
  42            in the same input section like .libgcc.text.mul will be
  43            located close together.  Therefore, we cannot use
  44            RCALL/RJMP to call a function like __udivmodhi4 from
  45            __divmodhi4 and have to use lengthy XCALL/XJMP even
  46            though they are in the same input section and all same
  47            input sections together are small enough to reach every
  48            location with a RCALL/RJMP instruction.  */
  49
  50         .macro  mov_l  r_dest, r_src
  51 #if defined (__AVR_HAVE_MOVW__)
  52         movw    \r_dest, \r_src
  53 #else
  54         mov     \r_dest, \r_src
  55 #endif
  56         .endm
  57
  58         .macro  mov_h  r_dest, r_src
  59 #if defined (__AVR_HAVE_MOVW__)
  60         ; empty
  61 #else
  62         mov     \r_dest, \r_src
  63 #endif
  64         .endm
  65
  66 .macro  wmov  r_dest, r_src
  67 #if defined (__AVR_HAVE_MOVW__)
  68     movw \r_dest,   \r_src
  69 #else
  70     mov \r_dest,    \r_src
  71     mov \r_dest+1,  \r_src+1
  72 #endif
  73 .endm
  74
  75 #if defined (__AVR_HAVE_JMP_CALL__)
  76 #define XCALL call
  77 #define XJMP  jmp
  78 #else
  79 #define XCALL rcall
  80 #define XJMP  rjmp
  81 #endif
  82
  83 .macro DEFUN name
  84 .global \name
  85 .func \name
  86 \name:
  87 .endm
  88
  89 .macro ENDF name
  90 .size \name, .-\name
  91 .endfunc
  92 .endm
  93
  94 ;; Negate a 2-byte value held in consecutive registers
  95 .macro NEG2  reg
  96     com     \reg+1
  97     neg     \reg
  98     sbci    \reg+1, -1
  99 .endm
 100
 101 ;; Negate a 4-byte value held in consecutive registers
 102 .macro NEG4  reg
 103     com     \reg+3
 104     com     \reg+2
 105     com     \reg+1
 106 .if \reg >= 16
 107     neg     \reg
 108     sbci    \reg+1, -1
 109     sbci    \reg+2, -1
 110     sbci    \reg+3, -1
 111 .else
 112     com     \reg
 113     adc     \reg,   __zero_reg__
 114     adc     \reg+1, __zero_reg__
 115     adc     \reg+2, __zero_reg__
 116     adc     \reg+3, __zero_reg__
 117 .endif
 118 .endm
 119
 120 #define exp_lo(N)  hlo8 ((N) << 23)
 121 #define exp_hi(N)  hhi8 ((N) << 23)
 122
 123 \f
 124 .section .text.libgcc.mul, "ax", @progbits
 125
 126 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 127 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
 128 #if !defined (__AVR_HAVE_MUL__)
 129 /*******************************************************
 130     Multiplication  8 x 8  without MUL
 131 *******************************************************/
 132 #if defined (L_mulqi3)
 133
 134 #define r_arg2  r22             /* multiplicand */
 135 #define r_arg1  r24             /* multiplier */
 136 #define r_res   __tmp_reg__     /* result */
 137
 138 DEFUN __mulqi3
 139         clr     r_res           ; clear result
 140 __mulqi3_loop:
 141         sbrc    r_arg1,0
 142         add     r_res,r_arg2
 143         add     r_arg2,r_arg2   ; shift multiplicand
 144         breq    __mulqi3_exit   ; while multiplicand != 0
 145         lsr     r_arg1          ;
 146         brne    __mulqi3_loop   ; exit if multiplier = 0
 147 __mulqi3_exit:
 148         mov     r_arg1,r_res    ; result to return register
 149         ret
 150 ENDF __mulqi3
 151
 152 #undef r_arg2
 153 #undef r_arg1
 154 #undef r_res
 155
 156 #endif  /* defined (L_mulqi3) */
 157
 158
 159 /*******************************************************
 160     Widening Multiplication  16 = 8 x 8  without MUL
 161     Multiplication  16 x 16  without MUL
 162 *******************************************************/
 163
 164 #define A0  r22
 165 #define A1  r23
 166 #define B0  r24
 167 #define BB0 r20
 168 #define B1  r25
 169 ;; Output overlaps input, thus expand result in CC0/1
 170 #define C0  r24
 171 #define C1  r25
 172 #define CC0  __tmp_reg__
 173 #define CC1  R21
 174
 175 #if defined (L_umulqihi3)
 176 ;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
 177 ;;; (C1:C0) = (unsigned int) A0  * (unsigned int) B0
 178 ;;; Clobbers: __tmp_reg__, R21..R23
 179 DEFUN __umulqihi3
 180     clr     A1
 181     clr     B1
 182     XJMP    __mulhi3
 183 ENDF __umulqihi3
 184 #endif /* L_umulqihi3 */
 185
 186 #if defined (L_mulqihi3)
 187 ;;; R25:R24 = (signed int) R22 * (signed int) R24
 188 ;;; (C1:C0) = (signed int) A0  * (signed int) B0
 189 ;;; Clobbers: __tmp_reg__, R20..R23
 190 DEFUN __mulqihi3
 191     ;; Sign-extend B0
 192     clr     B1
 193     sbrc    B0, 7
 194     com     B1
 195     ;; The multiplication runs twice as fast if A1 is zero, thus:
 196     ;; Zero-extend A0
 197     clr     A1
 198 #ifdef __AVR_HAVE_JMP_CALL__
 199     ;; Store  B0 * sign of A
 200     clr     BB0
 201     sbrc    A0, 7
 202     mov     BB0, B0
 203     call    __mulhi3
 204 #else /* have no CALL */
 205     ;; Skip sign-extension of A if A >= 0
 206     ;; Same size as with the first alternative but avoids errata skip
 207     ;; and is faster if A >= 0
 208     sbrs    A0, 7
 209     rjmp    __mulhi3
 210     ;; If  A < 0  store B
 211     mov     BB0, B0
 212     rcall   __mulhi3
 213 #endif /* HAVE_JMP_CALL */
 214     ;; 1-extend A after the multiplication
 215     sub     C1, BB0
 216     ret
 217 ENDF __mulqihi3
 218 #endif /* L_mulqihi3 */
 219
 220 #if defined (L_mulhi3)
 221 ;;; R25:R24 = R23:R22 * R25:R24
 222 ;;; (C1:C0) = (A1:A0) * (B1:B0)
 223 ;;; Clobbers: __tmp_reg__, R21..R23
 224 DEFUN __mulhi3
 225
 226     ;; Clear result
 227     clr     CC0
 228     clr     CC1
 229     rjmp 3f
 230 1:
 231     ;; Bit n of A is 1  -->  C += B << n
 232     add     CC0, B0
 233     adc     CC1, B1
 234 2:
 235     lsl     B0
 236     rol     B1
 237 3:
 238     ;; If B == 0 we are ready
 239     sbiw    B0, 0
 240     breq 9f
 241
 242     ;; Carry = n-th bit of A
 243     lsr     A1
 244     ror     A0
 245     ;; If bit n of A is set, then go add  B * 2^n  to  C
 246     brcs 1b
 247
 248     ;; Carry = 0  -->  The ROR above acts like  CP A0, 0
 249     ;; Thus, it is sufficient to CPC the high part to test A against 0
 250     cpc     A1, __zero_reg__
 251     ;; Only proceed if A != 0
 252     brne    2b
 253 9:
 254     ;; Move Result into place
 255     mov     C0, CC0
 256     mov     C1, CC1
 257     ret
 258 ENDF  __mulhi3
 259 #endif /* L_mulhi3 */
 260
 261 #undef A0
 262 #undef A1
 263 #undef B0
 264 #undef BB0
 265 #undef B1
 266 #undef C0
 267 #undef C1
 268 #undef CC0
 269 #undef CC1
 270
 271 \f
 272 #define A0 22
 273 #define A1 A0+1
 274 #define A2 A0+2
 275 #define A3 A0+3
 276
 277 #define B0 18
 278 #define B1 B0+1
 279 #define B2 B0+2
 280 #define B3 B0+3
 281
 282 #define CC0 26
 283 #define CC1 CC0+1
 284 #define CC2 30
 285 #define CC3 CC2+1
 286
 287 #define C0 22
 288 #define C1 C0+1
 289 #define C2 C0+2
 290 #define C3 C0+3
 291
 292 /*******************************************************
 293     Widening Multiplication  32 = 16 x 16  without MUL
 294 *******************************************************/
 295
 296 #if defined (L_umulhisi3)
 297 DEFUN __umulhisi3
 298     wmov    B0, 24
 299     ;; Zero-extend B
 300     clr     B2
 301     clr     B3
 302     ;; Zero-extend A
 303     wmov    A2, B2
 304     XJMP    __mulsi3
 305 ENDF __umulhisi3
 306 #endif /* L_umulhisi3 */
 307
 308 #if defined (L_mulhisi3)
 309 DEFUN __mulhisi3
 310     wmov    B0, 24
 311     ;; Sign-extend B
 312     lsl     r25
 313     sbc     B2, B2
 314     mov     B3, B2
 315 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
 316     ;; Sign-extend A
 317     clr     A2
 318     sbrc    A1, 7
 319     com     A2
 320     mov     A3, A2
 321     XJMP __mulsi3
 322 #else /*  no __AVR_ERRATA_SKIP_JMP_CALL__ */
 323     ;; Zero-extend A and __mulsi3 will run at least twice as fast
 324     ;; compared to a sign-extended A.
 325     clr     A2
 326     clr     A3
 327     sbrs    A1, 7
 328     XJMP __mulsi3
 329     ;; If  A < 0  then perform the  B * 0xffff.... before the
 330     ;; very multiplication by initializing the high part of the
 331     ;; result CC with -B.
 332     wmov    CC2, A2
 333     sub     CC2, B0
 334     sbc     CC3, B1
 335     XJMP __mulsi3_helper
 336 #endif /*  __AVR_ERRATA_SKIP_JMP_CALL__ */
 337 ENDF __mulhisi3
 338 #endif /* L_mulhisi3 */
 339
 340
 341 /*******************************************************
 342     Multiplication  32 x 32  without MUL
 343 *******************************************************/
 344
 345 #if defined (L_mulsi3)
 346 DEFUN __mulsi3
 347     ;; Clear result
 348     clr     CC2
 349     clr     CC3
 350     ;; FALLTHRU
 351 ENDF  __mulsi3
 352
 353 DEFUN __mulsi3_helper
 354     clr     CC0
 355     clr     CC1
 356     rjmp 3f
 357
 358 1:  ;; If bit n of A is set, then add  B * 2^n  to the result in CC
 359     ;; CC += B
 360     add  CC0,B0  $  adc  CC1,B1  $  adc  CC2,B2  $  adc  CC3,B3
 361
 362 2:  ;; B <<= 1
 363     lsl  B0      $  rol  B1      $  rol  B2      $  rol  B3
 364
 365 3:  ;; A >>= 1:  Carry = n-th bit of A
 366     lsr  A3      $  ror  A2      $  ror  A1      $  ror  A0
 367
 368     brcs 1b
 369     ;; Only continue if  A != 0
 370     sbci    A1, 0
 371     brne 2b
 372     sbiw    A2, 0
 373     brne 2b
 374
 375     ;; All bits of A are consumed:  Copy result to return register C
 376     wmov    C0, CC0
 377     wmov    C2, CC2
 378     ret
 379 ENDF __mulsi3_helper
 380 #endif /* L_mulsi3 */
 381
 382 #undef A0
 383 #undef A1
 384 #undef A2
 385 #undef A3
 386 #undef B0
 387 #undef B1
 388 #undef B2
 389 #undef B3
 390 #undef C0
 391 #undef C1
 392 #undef C2
 393 #undef C3
 394 #undef CC0
 395 #undef CC1
 396 #undef CC2
 397 #undef CC3
 398
 399 #endif /* !defined (__AVR_HAVE_MUL__) */
 400 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 401 \f
 402 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 403 #if defined (__AVR_HAVE_MUL__)
 404 #define A0 26
 405 #define B0 18
 406 #define C0 22
 407
 408 #define A1 A0+1
 409
 410 #define B1 B0+1
 411 #define B2 B0+2
 412 #define B3 B0+3
 413
 414 #define C1 C0+1
 415 #define C2 C0+2
 416 #define C3 C0+3
 417
 418 /*******************************************************
 419     Widening Multiplication  32 = 16 x 16  with MUL
 420 *******************************************************/
 421
 422 #if defined (L_mulhisi3)
 423 ;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
 424 ;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
 425 ;;; Clobbers: __tmp_reg__
 426 DEFUN __mulhisi3
 427     XCALL   __umulhisi3
 428     ;; Sign-extend B
 429     tst     B1
 430     brpl    1f
 431     sub     C2, A0
 432     sbc     C3, A1
 433 1:  ;; Sign-extend A
 434     XJMP __usmulhisi3_tail
 435 ENDF __mulhisi3
 436 #endif /* L_mulhisi3 */
 437
 438 #if defined (L_usmulhisi3)
 439 ;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
 440 ;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
 441 ;;; Clobbers: __tmp_reg__
 442 DEFUN __usmulhisi3
 443     XCALL   __umulhisi3
 444     ;; FALLTHRU
 445 ENDF __usmulhisi3
 446
 447 DEFUN __usmulhisi3_tail
 448     ;; Sign-extend A
 449     sbrs    A1, 7
 450     ret
 451     sub     C2, B0
 452     sbc     C3, B1
 453     ret
 454 ENDF __usmulhisi3_tail
 455 #endif /* L_usmulhisi3 */
 456
 457 #if defined (L_umulhisi3)
 458 ;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
 459 ;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
 460 ;;; Clobbers: __tmp_reg__
 461 DEFUN __umulhisi3
 462     mul     A0, B0
 463     movw    C0, r0
 464     mul     A1, B1
 465     movw    C2, r0
 466     mul     A0, B1
 467 #ifdef __AVR_HAVE_JMP_CALL__
 468     ;; This function is used by many other routines, often multiple times.
 469     ;; Therefore, if the flash size is not too limited, avoid the RCALL
 470     ;; and inverst 6 Bytes to speed things up.
 471     add     C1, r0
 472     adc     C2, r1
 473     clr     __zero_reg__
 474     adc     C3, __zero_reg__
 475 #else
 476     rcall   1f
 477 #endif
 478     mul     A1, B0
 479 1:  add     C1, r0
 480     adc     C2, r1
 481     clr     __zero_reg__
 482     adc     C3, __zero_reg__
 483     ret
 484 ENDF __umulhisi3
 485 #endif /* L_umulhisi3 */
 486
 487 /*******************************************************
 488     Widening Multiplication  32 = 16 x 32  with MUL
 489 *******************************************************/
 490
 491 #if defined (L_mulshisi3)
 492 ;;; R25:R22 = (signed long) R27:R26 * R21:R18
 493 ;;; (C3:C0) = (signed long) A1:A0   * B3:B0
 494 ;;; Clobbers: __tmp_reg__
 495 DEFUN __mulshisi3
 496 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
 497     ;; Some cores have problem skipping 2-word instruction
 498     tst     A1
 499     brmi    __mulohisi3
 500 #else
 501     sbrs    A1, 7
 502 #endif /* __AVR_HAVE_JMP_CALL__ */
 503     XJMP    __muluhisi3
 504     ;; FALLTHRU
 505 ENDF __mulshisi3
 506
 507 ;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
 508 ;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
 509 ;;; Clobbers: __tmp_reg__
 510 DEFUN __mulohisi3
 511     XCALL   __muluhisi3
 512     ;; One-extend R27:R26 (A1:A0)
 513     sub     C2, B0
 514     sbc     C3, B1
 515     ret
 516 ENDF __mulohisi3
 517 #endif /* L_mulshisi3 */
 518
 519 #if defined (L_muluhisi3)
 520 ;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
 521 ;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
 522 ;;; Clobbers: __tmp_reg__
 523 DEFUN __muluhisi3
 524     XCALL   __umulhisi3
 525     mul     A0, B3
 526     add     C3, r0
 527     mul     A1, B2
 528     add     C3, r0
 529     mul     A0, B2
 530     add     C2, r0
 531     adc     C3, r1
 532     clr     __zero_reg__
 533     ret
 534 ENDF __muluhisi3
 535 #endif /* L_muluhisi3 */
 536
 537 /*******************************************************
 538     Multiplication  32 x 32  with MUL
 539 *******************************************************/
 540
 541 #if defined (L_mulsi3)
 542 ;;; R25:R22 = R25:R22 * R21:R18
 543 ;;; (C3:C0) = C3:C0   * B3:B0
 544 ;;; Clobbers: R26, R27, __tmp_reg__
 545 DEFUN __mulsi3
 546     movw    A0, C0
 547     push    C2
 548     push    C3
 549     XCALL   __muluhisi3
 550     pop     A1
 551     pop     A0
 552     ;; A1:A0 now contains the high word of A
 553     mul     A0, B0
 554     add     C2, r0
 555     adc     C3, r1
 556     mul     A0, B1
 557     add     C3, r0
 558     mul     A1, B0
 559     add     C3, r0
 560     clr     __zero_reg__
 561     ret
 562 ENDF __mulsi3
 563 #endif /* L_mulsi3 */
 564
 565 #undef A0
 566 #undef A1
 567
 568 #undef B0
 569 #undef B1
 570 #undef B2
 571 #undef B3
 572
 573 #undef C0
 574 #undef C1
 575 #undef C2
 576 #undef C3
 577
 578 #endif /* __AVR_HAVE_MUL__ */
 579
 580 /*******************************************************
 581        Multiplication 24 x 24 with MUL
 582 *******************************************************/
 583
 584 #if defined (L_mulpsi3)
 585
 586 ;; A[0..2]: In: Multiplicand; Out: Product
 587 #define A0  22
 588 #define A1  A0+1
 589 #define A2  A0+2
 590
 591 ;; B[0..2]: In: Multiplier
 592 #define B0  18
 593 #define B1  B0+1
 594 #define B2  B0+2
 595
 596 #if defined (__AVR_HAVE_MUL__)
 597
 598 ;; C[0..2]: Expand Result
 599 #define C0  22
 600 #define C1  C0+1
 601 #define C2  C0+2
 602
 603 ;; R24:R22 *= R20:R18
 604 ;; Clobbers: r21, r25, r26, r27, __tmp_reg__
 605
 606 #define AA0 26
 607 #define AA2 21
 608
 609 DEFUN __mulpsi3
 610     wmov    AA0, A0
 611     mov     AA2, A2
 612     XCALL   __umulhisi3
 613     mul     AA2, B0     $  add  C2, r0
 614     mul     AA0, B2     $  add  C2, r0
 615     clr     __zero_reg__
 616     ret
 617 ENDF __mulpsi3
 618
 619 #undef AA2
 620 #undef AA0
 621
 622 #undef C2
 623 #undef C1
 624 #undef C0
 625
 626 #else /* !HAVE_MUL */
 627
 628 ;; C[0..2]: Expand Result
 629 #define C0  0
 630 #define C1  C0+1
 631 #define C2  21
 632
 633 ;; R24:R22 *= R20:R18
 634 ;; Clobbers: __tmp_reg__, R18, R19, R20, R21
 635
 636 DEFUN __mulpsi3
 637
 638     ;; C[] = 0
 639     clr     __tmp_reg__
 640     clr     C2
 641
 642 0:  ;; Shift N-th Bit of B[] into Carry.  N = 24 - Loop
 643     LSR  B2     $  ror  B1     $  ror  B0
 644
 645     ;; If the N-th Bit of B[] was set...
 646     brcc    1f
 647
 648     ;; ...then add A[] * 2^N to the Result C[]
 649     ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2
 650
 651 1:  ;; Multiply A[] by 2
 652     LSL  A0     $  rol  A1     $  rol  A2
 653
 654     ;; Loop until B[] is 0
 655     subi B0,0   $  sbci B1,0   $  sbci B2,0
 656     brne    0b
 657
 658     ;; Copy C[] to the return Register A[]
 659     wmov    A0, C0
 660     mov     A2, C2
 661
 662     clr     __zero_reg__
 663     ret
 664 ENDF __mulpsi3
 665
 666 #undef C2
 667 #undef C1
 668 #undef C0
 669
 670 #endif /* HAVE_MUL */
 671
 672 #undef B2
 673 #undef B1
 674 #undef B0
 675
 676 #undef A2
 677 #undef A1
 678 #undef A0
 679
 680 #endif /* L_mulpsi3 */
 681
 682 #if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
 683
 684 ;; A[0..2]: In: Multiplicand
 685 #define A0  22
 686 #define A1  A0+1
 687 #define A2  A0+2
 688
 689 ;; BB: In: Multiplier
 690 #define BB  25
 691
 692 ;; C[0..2]: Result
 693 #define C0  18
 694 #define C1  C0+1
 695 #define C2  C0+2
 696
 697 ;; C[] = A[] * sign_extend (BB)
 698 DEFUN __mulsqipsi3
 699     mul     A0, BB
 700     movw    C0, r0
 701     mul     A2, BB
 702     mov     C2, r0
 703     mul     A1, BB
 704     add     C1, r0
 705     adc     C2, r1
 706     clr     __zero_reg__
 707     sbrs    BB, 7
 708     ret
 709     ;; One-extend BB
 710     sub     C1, A0
 711     sbc     C2, A1
 712     ret
 713 ENDF __mulsqipsi3
 714
 715 #undef C2
 716 #undef C1
 717 #undef C0
 718
 719 #undef BB
 720
 721 #undef A2
 722 #undef A1
 723 #undef A0
 724
 725 #endif /* L_mulsqipsi3  &&  HAVE_MUL */
 726
 727 /*******************************************************
 728        Multiplication 64 x 64
 729 *******************************************************/
 730
 731 #if defined (L_muldi3)
 732
 733 ;; A[] = A[] * B[]
 734
 735 ;; A[0..7]: In: Multiplicand
 736 ;; Out: Product
 737 #define A0  18
 738 #define A1  A0+1
 739 #define A2  A0+2
 740 #define A3  A0+3
 741 #define A4  A0+4
 742 #define A5  A0+5
 743 #define A6  A0+6
 744 #define A7  A0+7
 745
 746 ;; B[0..7]: In: Multiplier
 747 #define B0  10
 748 #define B1  B0+1
 749 #define B2  B0+2
 750 #define B3  B0+3
 751 #define B4  B0+4
 752 #define B5  B0+5
 753 #define B6  B0+6
 754 #define B7  B0+7
 755
 756 #if defined (__AVR_HAVE_MUL__)
 757
 758 ;; Define C[] for convenience
 759 ;; Notice that parts of C[] overlap A[] respective B[]
 760 #define C0  16
 761 #define C1  C0+1
 762 #define C2  20
 763 #define C3  C2+1
 764 #define C4  28
 765 #define C5  C4+1
 766 #define C6  C4+2
 767 #define C7  C4+3
 768
 769 ;; A[]     *= B[]
 770 ;; R25:R18 *= R17:R10
 771 ;; Ordinary ABI-Function
 772
 773 DEFUN __muldi3
 774     push    r29
 775     push    r28
 776     push    r17
 777     push    r16
 778
 779     ;; Counting in Words, we have to perform a 4 * 4 Multiplication
 780
 781     ;; 3 * 0  +  0 * 3
 782     mul  A7,B0  $             $  mov C7,r0
 783     mul  A0,B7  $             $  add C7,r0
 784     mul  A6,B1  $             $  add C7,r0
 785     mul  A6,B0  $  mov C6,r0  $  add C7,r1
 786     mul  B6,A1  $             $  add C7,r0
 787     mul  B6,A0  $  add C6,r0  $  adc C7,r1
 788
 789     ;; 1 * 2
 790     mul  A2,B4  $  add C6,r0  $  adc C7,r1
 791     mul  A3,B4  $             $  add C7,r0
 792     mul  A2,B5  $             $  add C7,r0
 793
 794     push    A5
 795     push    A4
 796     push    B1
 797     push    B0
 798     push    A3
 799     push    A2
 800
 801     ;; 0 * 0
 802     wmov    26, B0
 803     XCALL   __umulhisi3
 804     wmov    C0, 22
 805     wmov    C2, 24
 806
 807     ;; 0 * 2
 808     wmov    26, B4
 809     XCALL   __umulhisi3  $  wmov C4,22            $ add C6,24 $ adc C7,25
 810
 811     wmov    26, B2
 812     ;; 0 * 1
 813     rcall   __muldi3_6
 814
 815     pop     A0
 816     pop     A1
 817     ;; 1 * 1
 818     wmov    26, B2
 819     XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
 820
 821     pop     r26
 822     pop     r27
 823     ;; 1 * 0
 824     rcall   __muldi3_6
 825
 826     pop     A0
 827     pop     A1
 828     ;; 2 * 0
 829     XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
 830
 831     ;; 2 * 1
 832     wmov    26, B2
 833     XCALL   __umulhisi3  $            $           $ add C6,22 $ adc C7,23
 834
 835     ;; A[] = C[]
 836     wmov    A0, C0
 837     ;; A2 = C2 already
 838     wmov    A4, C4
 839     wmov    A6, C6
 840
 841     clr     __zero_reg__
 842     pop     r16
 843     pop     r17
 844     pop     r28
 845     pop     r29
 846     ret
 847
 848 __muldi3_6:
 849     XCALL   __umulhisi3
 850     add     C2, 22
 851     adc     C3, 23
 852     adc     C4, 24
 853     adc     C5, 25
 854     brcc    0f
 855     adiw    C6, 1
 856 0:  ret
 857 ENDF __muldi3
 858
 859 #undef C7
 860 #undef C6
 861 #undef C5
 862 #undef C4
 863 #undef C3
 864 #undef C2
 865 #undef C1
 866 #undef C0
 867
 868 #else /* !HAVE_MUL */
 869
 870 #define C0  26
 871 #define C1  C0+1
 872 #define C2  C0+2
 873 #define C3  C0+3
 874 #define C4  C0+4
 875 #define C5  C0+5
 876 #define C6  0
 877 #define C7  C6+1
 878
 879 #define Loop 9
 880
 881 ;; A[]     *= B[]
 882 ;; R25:R18 *= R17:R10
 883 ;; Ordinary ABI-Function
 884
 885 DEFUN __muldi3
 886     push    r29
 887     push    r28
 888     push    Loop
 889
 890     ldi     C0, 64
 891     mov     Loop, C0
 892
 893     ;; C[] = 0
 894     clr     __tmp_reg__
 895     wmov    C0, 0
 896     wmov    C2, 0
 897     wmov    C4, 0
 898
 899 0:  ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
 900     ;; where N = 64 - Loop.
 901     ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
 902     ;; B[] will have its initial Value again.
 903     LSR  B7     $  ror  B6     $  ror  B5     $  ror  B4
 904     ror  B3     $  ror  B2     $  ror  B1     $  ror  B0
 905
 906     ;; If the N-th Bit of B[] was set then...
 907     brcc    1f
 908     ;; ...finish Rotation...
 909     ori     B7, 1 << 7
 910
 911     ;; ...and add A[] * 2^N to the Result C[]
 912     ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2  $  adc  C3,A3
 913     adc  C4,A4  $  adc  C5,A5  $  adc  C6,A6  $  adc  C7,A7
 914
 915 1:  ;; Multiply A[] by 2
 916     LSL  A0     $  rol  A1     $  rol  A2     $  rol  A3
 917     rol  A4     $  rol  A5     $  rol  A6     $  rol  A7
 918
 919     dec     Loop
 920     brne    0b
 921
 922     ;; We expanded the Result in C[]
 923     ;; Copy Result to the Return Register A[]
 924     wmov    A0, C0
 925     wmov    A2, C2
 926     wmov    A4, C4
 927     wmov    A6, C6
 928
 929     clr     __zero_reg__
 930     pop     Loop
 931     pop     r28
 932     pop     r29
 933     ret
 934 ENDF __muldi3
 935
 936 #undef Loop
 937
 938 #undef C7
 939 #undef C6
 940 #undef C5
 941 #undef C4
 942 #undef C3
 943 #undef C2
 944 #undef C1
 945 #undef C0
 946
 947 #endif /* HAVE_MUL */
 948
 949 #undef B7
 950 #undef B6
 951 #undef B5
 952 #undef B4
 953 #undef B3
 954 #undef B2
 955 #undef B1
 956 #undef B0
 957
 958 #undef A7
 959 #undef A6
 960 #undef A5
 961 #undef A4
 962 #undef A3
 963 #undef A2
 964 #undef A1
 965 #undef A0
 966
 967 #endif /* L_muldi3 */
 968
 969 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 970
 971 \f
 972 .section .text.libgcc.div, "ax", @progbits
 973
 974 /*******************************************************
 975        Division 8 / 8 => (result + remainder)
 976 *******************************************************/
 977 #define r_rem   r25     /* remainder */
 978 #define r_arg1  r24     /* dividend, quotient */
 979 #define r_arg2  r22     /* divisor */
 980 #define r_cnt   r23     /* loop count */
 981
 982 #if defined (L_udivmodqi4)
 983 DEFUN __udivmodqi4
 984         sub     r_rem,r_rem     ; clear remainder and carry
 985         ldi     r_cnt,9         ; init loop counter
 986         rjmp    __udivmodqi4_ep ; jump to entry point
 987 __udivmodqi4_loop:
 988         rol     r_rem           ; shift dividend into remainder
 989         cp      r_rem,r_arg2    ; compare remainder & divisor
 990         brcs    __udivmodqi4_ep ; remainder <= divisor
 991         sub     r_rem,r_arg2    ; restore remainder
 992 __udivmodqi4_ep:
 993         rol     r_arg1          ; shift dividend (with CARRY)
 994         dec     r_cnt           ; decrement loop counter
 995         brne    __udivmodqi4_loop
 996         com     r_arg1          ; complement result
 997                                 ; because C flag was complemented in loop
 998         ret
 999 ENDF __udivmodqi4
1000 #endif /* defined (L_udivmodqi4) */
1001
1002 #if defined (L_divmodqi4)
1003 DEFUN __divmodqi4
1004         bst     r_arg1,7        ; store sign of dividend
1005         mov     __tmp_reg__,r_arg1
1006         eor     __tmp_reg__,r_arg2; r0.7 is sign of result
1007         sbrc    r_arg1,7
1008         neg     r_arg1          ; dividend negative : negate
1009         sbrc    r_arg2,7
1010         neg     r_arg2          ; divisor negative : negate
1011         XCALL   __udivmodqi4    ; do the unsigned div/mod
1012         brtc    __divmodqi4_1
1013         neg     r_rem           ; correct remainder sign
1014 __divmodqi4_1:
1015         sbrc    __tmp_reg__,7
1016         neg     r_arg1          ; correct result sign
1017 __divmodqi4_exit:
1018         ret
1019 ENDF __divmodqi4
1020 #endif /* defined (L_divmodqi4) */
1021
1022 #undef r_rem
1023 #undef r_arg1
1024 #undef r_arg2
1025 #undef r_cnt
1026
1027
1028 /*******************************************************
1029        Division 16 / 16 => (result + remainder)
1030 *******************************************************/
1031 #define r_remL  r26     /* remainder Low */
1032 #define r_remH  r27     /* remainder High */
1033
1034 /* return: remainder */
1035 #define r_arg1L r24     /* dividend Low */
1036 #define r_arg1H r25     /* dividend High */
1037
1038 /* return: quotient */
1039 #define r_arg2L r22     /* divisor Low */
1040 #define r_arg2H r23     /* divisor High */
1041
1042 #define r_cnt   r21     /* loop count */
1043
1044 #if defined (L_udivmodhi4)
1045 DEFUN __udivmodhi4
1046         sub     r_remL,r_remL
1047         sub     r_remH,r_remH   ; clear remainder and carry
1048         ldi     r_cnt,17        ; init loop counter
1049         rjmp    __udivmodhi4_ep ; jump to entry point
1050 __udivmodhi4_loop:
1051         rol     r_remL          ; shift dividend into remainder
1052         rol     r_remH
1053         cp      r_remL,r_arg2L  ; compare remainder & divisor
1054         cpc     r_remH,r_arg2H
1055         brcs    __udivmodhi4_ep ; remainder < divisor
1056         sub     r_remL,r_arg2L  ; restore remainder
1057         sbc     r_remH,r_arg2H
1058 __udivmodhi4_ep:
1059         rol     r_arg1L         ; shift dividend (with CARRY)
1060         rol     r_arg1H
1061         dec     r_cnt           ; decrement loop counter
1062         brne    __udivmodhi4_loop
1063         com     r_arg1L
1064         com     r_arg1H
1065 ; div/mod results to return registers, as for the div() function
1066         mov_l   r_arg2L, r_arg1L        ; quotient
1067         mov_h   r_arg2H, r_arg1H
1068         mov_l   r_arg1L, r_remL         ; remainder
1069         mov_h   r_arg1H, r_remH
1070         ret
1071 ENDF __udivmodhi4
1072 #endif /* defined (L_udivmodhi4) */
1073
1074 #if defined (L_divmodhi4)
1075 DEFUN __divmodhi4
1076     .global _div
1077 _div:
1078     bst     r_arg1H,7           ; store sign of dividend
1079     mov     __tmp_reg__,r_arg2H
1080     brtc    0f
1081     com     __tmp_reg__         ; r0.7 is sign of result
1082     rcall   __divmodhi4_neg1    ; dividend negative: negate
1083 0:
1084     sbrc    r_arg2H,7
1085     rcall   __divmodhi4_neg2    ; divisor negative: negate
1086     XCALL   __udivmodhi4        ; do the unsigned div/mod
1087     sbrc    __tmp_reg__,7
1088     rcall   __divmodhi4_neg2    ; correct remainder sign
1089     brtc    __divmodhi4_exit
1090 __divmodhi4_neg1:
1091     ;; correct dividend/remainder sign
1092     com     r_arg1H
1093     neg     r_arg1L
1094     sbci    r_arg1H,0xff
1095     ret
1096 __divmodhi4_neg2:
1097     ;; correct divisor/result sign
1098     com     r_arg2H
1099     neg     r_arg2L
1100     sbci    r_arg2H,0xff
1101 __divmodhi4_exit:
1102     ret
1103 ENDF __divmodhi4
1104 #endif /* defined (L_divmodhi4) */
1105
1106 #undef r_remH
1107 #undef r_remL
1108
1109 #undef r_arg1H
1110 #undef r_arg1L
1111
1112 #undef r_arg2H
1113 #undef r_arg2L
1114
1115 #undef r_cnt
1116
1117 /*******************************************************
1118        Division 24 / 24 => (result + remainder)
1119 *******************************************************/
1120
1121 ;; A[0..2]: In: Dividend; Out: Quotient
1122 #define A0  22
1123 #define A1  A0+1
1124 #define A2  A0+2
1125
1126 ;; B[0..2]: In: Divisor;   Out: Remainder
1127 #define B0  18
1128 #define B1  B0+1
1129 #define B2  B0+2
1130
1131 ;; C[0..2]: Expand remainder
1132 #define C0  __zero_reg__
1133 #define C1  26
1134 #define C2  25
1135
1136 ;; Loop counter
1137 #define r_cnt   21
1138
1139 #if defined (L_udivmodpsi4)
1140 ;; R24:R22 = R24:R22  udiv  R20:R18
1141 ;; R20:R18 = R24:R22  umod  R20:R18
1142 ;; Clobbers: R21, R25, R26
1143
1144 DEFUN __udivmodpsi4
1145     ; init loop counter
1146     ldi     r_cnt, 24+1
1147     ; Clear remainder and carry.  C0 is already 0
1148     clr     C1
1149     sub     C2, C2
1150     ; jump to entry point
1151     rjmp    __udivmodpsi4_start
1152 __udivmodpsi4_loop:
1153     ; shift dividend into remainder
1154     rol     C0
1155     rol     C1
1156     rol     C2
1157     ; compare remainder & divisor
1158     cp      C0, B0
1159     cpc     C1, B1
1160     cpc     C2, B2
1161     brcs    __udivmodpsi4_start ; remainder <= divisor
1162     sub     C0, B0              ; restore remainder
1163     sbc     C1, B1
1164     sbc     C2, B2
1165 __udivmodpsi4_start:
1166     ; shift dividend (with CARRY)
1167     rol     A0
1168     rol     A1
1169     rol     A2
1170     ; decrement loop counter
1171     dec     r_cnt
1172     brne    __udivmodpsi4_loop
1173     com     A0
1174     com     A1
1175     com     A2
1176     ; div/mod results to return registers
1177     ; remainder
1178     mov     B0, C0
1179     mov     B1, C1
1180     mov     B2, C2
1181     clr     __zero_reg__ ; C0
1182     ret
1183 ENDF __udivmodpsi4
1184 #endif /* defined (L_udivmodpsi4) */
1185
1186 #if defined (L_divmodpsi4)
1187 ;; R24:R22 = R24:R22  div  R20:R18
1188 ;; R20:R18 = R24:R22  mod  R20:R18
1189 ;; Clobbers: T, __tmp_reg__, R21, R25, R26
1190
1191 DEFUN __divmodpsi4
1192     ; R0.7 will contain the sign of the result:
1193     ; R0.7 = A.sign ^ B.sign
1194     mov __tmp_reg__, B2
1195     ; T-flag = sign of dividend
1196     bst     A2, 7
1197     brtc    0f
1198     com     __tmp_reg__
1199     ; Adjust dividend's sign
1200     rcall   __divmodpsi4_negA
1201 0:
1202     ; Adjust divisor's sign
1203     sbrc    B2, 7
1204     rcall   __divmodpsi4_negB
1205
1206     ; Do the unsigned div/mod
1207     XCALL   __udivmodpsi4
1208
1209     ; Adjust quotient's sign
1210     sbrc    __tmp_reg__, 7
1211     rcall   __divmodpsi4_negA
1212
1213     ; Adjust remainder's sign
1214     brtc    __divmodpsi4_end
1215
1216 __divmodpsi4_negB:
1217     ; Correct divisor/remainder sign
1218     com     B2
1219     com     B1
1220     neg     B0
1221     sbci    B1, -1
1222     sbci    B2, -1
1223     ret
1224
1225     ; Correct dividend/quotient sign
1226 __divmodpsi4_negA:
1227     com     A2
1228     com     A1
1229     neg     A0
1230     sbci    A1, -1
1231     sbci    A2, -1
1232 __divmodpsi4_end:
1233     ret
1234
1235 ENDF __divmodpsi4
1236 #endif /* defined (L_divmodpsi4) */
1237
1238 #undef A0
1239 #undef A1
1240 #undef A2
1241
1242 #undef B0
1243 #undef B1
1244 #undef B2
1245
1246 #undef C0
1247 #undef C1
1248 #undef C2
1249
1250 #undef r_cnt
1251
1252 /*******************************************************
1253        Division 32 / 32 => (result + remainder)
1254 *******************************************************/
1255 #define r_remHH r31     /* remainder High */
1256 #define r_remHL r30
1257 #define r_remH  r27
1258 #define r_remL  r26     /* remainder Low */
1259
1260 /* return: remainder */
1261 #define r_arg1HH r25    /* dividend High */
1262 #define r_arg1HL r24
1263 #define r_arg1H  r23
1264 #define r_arg1L  r22    /* dividend Low */
1265
1266 /* return: quotient */
1267 #define r_arg2HH r21    /* divisor High */
1268 #define r_arg2HL r20
1269 #define r_arg2H  r19
1270 #define r_arg2L  r18    /* divisor Low */
1271
1272 #define r_cnt __zero_reg__  /* loop count (0 after the loop!) */
1273
1274 #if defined (L_udivmodsi4)
1275 DEFUN __udivmodsi4
1276         ldi     r_remL, 33      ; init loop counter
1277         mov     r_cnt, r_remL
1278         sub     r_remL,r_remL
1279         sub     r_remH,r_remH   ; clear remainder and carry
1280         mov_l   r_remHL, r_remL
1281         mov_h   r_remHH, r_remH
1282         rjmp    __udivmodsi4_ep ; jump to entry point
1283 __udivmodsi4_loop:
1284         rol     r_remL          ; shift dividend into remainder
1285         rol     r_remH
1286         rol     r_remHL
1287         rol     r_remHH
1288         cp      r_remL,r_arg2L  ; compare remainder & divisor
1289         cpc     r_remH,r_arg2H
1290         cpc     r_remHL,r_arg2HL
1291         cpc     r_remHH,r_arg2HH
1292         brcs    __udivmodsi4_ep ; remainder <= divisor
1293         sub     r_remL,r_arg2L  ; restore remainder
1294         sbc     r_remH,r_arg2H
1295         sbc     r_remHL,r_arg2HL
1296         sbc     r_remHH,r_arg2HH
1297 __udivmodsi4_ep:
1298         rol     r_arg1L         ; shift dividend (with CARRY)
1299         rol     r_arg1H
1300         rol     r_arg1HL
1301         rol     r_arg1HH
1302         dec     r_cnt           ; decrement loop counter
1303         brne    __udivmodsi4_loop
1304                                 ; __zero_reg__ now restored (r_cnt == 0)
1305         com     r_arg1L
1306         com     r_arg1H
1307         com     r_arg1HL
1308         com     r_arg1HH
1309 ; div/mod results to return registers, as for the ldiv() function
1310         mov_l   r_arg2L,  r_arg1L       ; quotient
1311         mov_h   r_arg2H,  r_arg1H
1312         mov_l   r_arg2HL, r_arg1HL
1313         mov_h   r_arg2HH, r_arg1HH
1314         mov_l   r_arg1L,  r_remL        ; remainder
1315         mov_h   r_arg1H,  r_remH
1316         mov_l   r_arg1HL, r_remHL
1317         mov_h   r_arg1HH, r_remHH
1318         ret
1319 ENDF __udivmodsi4
1320 #endif /* defined (L_udivmodsi4) */
1321
1322 #if defined (L_divmodsi4)
1323 DEFUN __divmodsi4
1324     mov     __tmp_reg__,r_arg2HH
1325     bst     r_arg1HH,7          ; store sign of dividend
1326     brtc    0f
1327     com     __tmp_reg__         ; r0.7 is sign of result
1328     rcall   __divmodsi4_neg1    ; dividend negative: negate
1329 0:
1330     sbrc    r_arg2HH,7
1331     rcall   __divmodsi4_neg2    ; divisor negative: negate
1332     XCALL   __udivmodsi4        ; do the unsigned div/mod
1333     sbrc    __tmp_reg__, 7      ; correct quotient sign
1334     rcall   __divmodsi4_neg2
1335     brtc    __divmodsi4_exit    ; correct remainder sign
1336 __divmodsi4_neg1:
1337     ;; correct dividend/remainder sign
1338     com     r_arg1HH
1339     com     r_arg1HL
1340     com     r_arg1H
1341     neg     r_arg1L
1342     sbci    r_arg1H, 0xff
1343     sbci    r_arg1HL,0xff
1344     sbci    r_arg1HH,0xff
1345     ret
1346 __divmodsi4_neg2:
1347     ;; correct divisor/quotient sign
1348     com     r_arg2HH
1349     com     r_arg2HL
1350     com     r_arg2H
1351     neg     r_arg2L
1352     sbci    r_arg2H,0xff
1353     sbci    r_arg2HL,0xff
1354     sbci    r_arg2HH,0xff
1355 __divmodsi4_exit:
1356     ret
1357 ENDF __divmodsi4
1358 #endif /* defined (L_divmodsi4) */
1359
1360 #undef r_remHH
1361 #undef r_remHL
1362 #undef r_remH
1363 #undef r_remL
1364 #undef r_arg1HH
1365 #undef r_arg1HL
1366 #undef r_arg1H
1367 #undef r_arg1L
1368 #undef r_arg2HH
1369 #undef r_arg2HL
1370 #undef r_arg2H
1371 #undef r_arg2L
1372 #undef r_cnt
1373
1374 /*******************************************************
1375        Division 64 / 64
1376        Modulo   64 % 64
1377 *******************************************************/
1378
1379 ;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1380 ;; at least 16k of Program Memory.  For smaller Devices, depend
1381 ;; on MOVW and SP Size.  There is a Connexion between SP Size and
1382 ;; Flash Size so that SP Size can be used to test for Flash Size.
1383
1384 #if defined (__AVR_HAVE_JMP_CALL__)
1385 #   define SPEED_DIV 8
1386 #elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1387 #   define SPEED_DIV 16
1388 #else
1389 #   define SPEED_DIV 0
1390 #endif
1391
1392 ;; A[0..7]: In: Dividend;
1393 ;; Out: Quotient  (T = 0)
1394 ;; Out: Remainder (T = 1)
1395 #define A0  18
1396 #define A1  A0+1
1397 #define A2  A0+2
1398 #define A3  A0+3
1399 #define A4  A0+4
1400 #define A5  A0+5
1401 #define A6  A0+6
1402 #define A7  A0+7
1403
1404 ;; B[0..7]: In: Divisor;   Out: Clobber
1405 #define B0  10
1406 #define B1  B0+1
1407 #define B2  B0+2
1408 #define B3  B0+3
1409 #define B4  B0+4
1410 #define B5  B0+5
1411 #define B6  B0+6
1412 #define B7  B0+7
1413
1414 ;; C[0..7]: Expand remainder;  Out: Remainder (unused)
1415 #define C0  8
1416 #define C1  C0+1
1417 #define C2  30
1418 #define C3  C2+1
1419 #define C4  28
1420 #define C5  C4+1
1421 #define C6  26
1422 #define C7  C6+1
1423
1424 ;; Holds Signs during Division Routine
1425 #define SS      __tmp_reg__
1426
1427 ;; Bit-Counter in Division Routine
1428 #define R_cnt   __zero_reg__
1429
1430 ;; Scratch Register for Negation
1431 #define NN      r31
1432
1433 #if defined (L_udivdi3)
1434
1435 ;; R25:R18 = R24:R18  umod  R17:R10
1436 ;; Ordinary ABI-Function
1437
1438 DEFUN __umoddi3
1439     set
1440     rjmp __udivdi3_umoddi3
1441 ENDF __umoddi3
1442
1443 ;; R25:R18 = R24:R18  udiv  R17:R10
1444 ;; Ordinary ABI-Function
1445
1446 DEFUN __udivdi3
1447     clt
1448 ENDF __udivdi3
1449
1450 DEFUN __udivdi3_umoddi3
1451     push    C0
1452     push    C1
1453     push    C4
1454     push    C5
1455     XCALL   __udivmod64
1456     pop     C5
1457     pop     C4
1458     pop     C1
1459     pop     C0
1460     ret
1461 ENDF __udivdi3_umoddi3
1462 #endif /* L_udivdi3 */
1463
1464 #if defined (L_udivmod64)
1465
1466 ;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1467 ;; No Registers saved/restored; the Callers will take Care.
1468 ;; Preserves B[] and T-flag
1469 ;; T = 0: Compute Quotient  in A[]
1470 ;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1471
1472 DEFUN __udivmod64
1473
1474     ;; Clear Remainder (C6, C7 will follow)
1475     clr     C0
1476     clr     C1
1477     wmov    C2, C0
1478     wmov    C4, C0
1479     ldi     C7, 64
1480
1481 #if SPEED_DIV == 0 || SPEED_DIV == 16
1482     ;; Initialize Loop-Counter
1483     mov     R_cnt, C7
1484     wmov    C6, C0
1485 #endif /* SPEED_DIV */
1486
1487 #if SPEED_DIV == 8
1488
1489     push    A7
1490     clr     C6
1491
1492 1:  ;; Compare shifted Devidend against Divisor
1493     ;; If -- even after Shifting -- it is smaller...
1494     CP  A7,B0  $  cpc C0,B1  $  cpc C1,B2  $  cpc C2,B3
1495     cpc C3,B4  $  cpc C4,B5  $  cpc C5,B6  $  cpc C6,B7
1496     brcc    2f
1497
1498     ;; ...then we can subtract it.  Thus, it is legal to shift left
1499                $  mov C6,C5  $  mov C5,C4  $  mov C4,C3
1500     mov C3,C2  $  mov C2,C1  $  mov C1,C0  $  mov C0,A7
1501     mov A7,A6  $  mov A6,A5  $  mov A5,A4  $  mov A4,A3
1502     mov A3,A2  $  mov A2,A1  $  mov A1,A0  $  clr A0
1503
1504     ;; 8 Bits are done
1505     subi    C7, 8
1506     brne    1b
1507
1508     ;; Shifted 64 Bits:  A7 has traveled to C7
1509     pop     C7
1510     ;; Divisor is greater than Dividend. We have:
1511     ;; A[] % B[] = A[]
1512     ;; A[] / B[] = 0
1513     ;; Thus, we can return immediately
1514     rjmp    5f
1515
1516 2:  ;; Initialze Bit-Counter with Number of Bits still to be performed
1517     mov     R_cnt, C7
1518
1519     ;; Push of A7 is not needed because C7 is still 0
1520     pop     C7
1521     clr     C7
1522
1523 #elif  SPEED_DIV == 16
1524
1525     ;; Compare shifted Dividend against Divisor
1526     cp      A7, B3
1527     cpc     C0, B4
1528     cpc     C1, B5
1529     cpc     C2, B6
1530     cpc     C3, B7
1531     brcc    2f
1532
1533     ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1534     ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1535     wmov  C2,A6  $  wmov C0,A4
1536     wmov  A6,A2  $  wmov A4,A0
1537     wmov  A2,C6  $  wmov A0,C4
1538
1539     ;; Set Bit Counter to 32
1540     lsr     R_cnt
1541 2:
1542 #elif SPEED_DIV
1543 #error SPEED_DIV = ?
1544 #endif /* SPEED_DIV */
1545
1546 ;; The very Division + Remainder Routine
1547
1548 3:  ;; Left-shift Dividend...
1549     lsl A0     $  rol A1     $  rol A2     $  rol A3
1550     rol A4     $  rol A5     $  rol A6     $  rol A7
1551
1552     ;; ...into Remainder
1553     rol C0     $  rol C1     $  rol C2     $  rol C3
1554     rol C4     $  rol C5     $  rol C6     $  rol C7
1555
1556     ;; Compare Remainder and Divisor
1557     CP  C0,B0  $  cpc C1,B1  $  cpc C2,B2  $  cpc C3,B3
1558     cpc C4,B4  $  cpc C5,B5  $  cpc C6,B6  $  cpc C7,B7
1559
1560     brcs 4f
1561
1562     ;; Divisor fits into Remainder:  Subtract it from Remainder...
1563     SUB C0,B0  $  sbc C1,B1  $  sbc C2,B2  $  sbc C3,B3
1564     sbc C4,B4  $  sbc C5,B5  $  sbc C6,B6  $  sbc C7,B7
1565
1566     ;; ...and set according Bit in the upcoming Quotient
1567     ;; The Bit will travel to its final Position
1568     ori A0, 1
1569
1570 4:  ;; This Bit is done
1571     dec     R_cnt
1572     brne    3b
1573     ;; __zero_reg__ is 0 again
1574
1575     ;; T = 0: We are fine with the Quotient in A[]
1576     ;; T = 1: Copy Remainder to A[]
1577 5:  brtc    6f
1578     wmov    A0, C0
1579     wmov    A2, C2
1580     wmov    A4, C4
1581     wmov    A6, C6
1582     ;; Move the Sign of the Result to SS.7
1583     lsl     SS
1584
1585 6:  ret
1586
1587 ENDF __udivmod64
1588 #endif /* L_udivmod64 */
1589
1590
1591 #if defined (L_divdi3)
1592
1593 ;; R25:R18 = R24:R18  mod  R17:R10
1594 ;; Ordinary ABI-Function
1595
1596 DEFUN __moddi3
1597     set
1598     rjmp    __divdi3_moddi3
1599 ENDF __moddi3
1600
1601 ;; R25:R18 = R24:R18  div  R17:R10
1602 ;; Ordinary ABI-Function
1603
1604 DEFUN __divdi3
1605     clt
1606 ENDF __divdi3
1607
1608 DEFUN  __divdi3_moddi3
1609 #if SPEED_DIV
1610     mov     r31, A7
1611     or      r31, B7
1612     brmi    0f
1613     ;; Both Signs are 0:  the following Complexitiy is not needed
1614     XJMP    __udivdi3_umoddi3
1615 #endif /* SPEED_DIV */
1616
1617 0:  ;; The Prologue
1618     ;; Save 12 Registers:  Y, 17...8
1619     ;; No Frame needed (X = 0)
1620     clr r26
1621     clr r27
1622     ldi r30, lo8(gs(1f))
1623     ldi r31, hi8(gs(1f))
1624     XJMP __prologue_saves__ + ((18 - 12) * 2)
1625
1626 1:  ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
1627     ;; SS.6 will contain the Sign of the Remainder (A.sign)
1628     mov     SS, A7
1629     asr     SS
1630     ;; Adjust Dividend's Sign as needed
1631 #if SPEED_DIV
1632     ;; Compiling for Speed we know that at least one Sign must be < 0
1633     ;; Thus, if A[] >= 0 then we know B[] < 0
1634     brpl    22f
1635 #else
1636     brpl    21f
1637 #endif /* SPEED_DIV */
1638
1639     XCALL   __negdi2
1640
1641     ;; Adjust Divisor's Sign and SS.7 as needed
1642 21: tst     B7
1643     brpl    3f
1644 22: ldi     NN, 1 << 7
1645     eor     SS, NN
1646
1647     ldi NN, -1
1648     com B4     $  com B5     $  com B6     $  com B7
1649                $  com B1     $  com B2     $  com B3
1650     NEG B0
1651                $  sbc B1,NN  $  sbc B2,NN  $  sbc B3,NN
1652     sbc B4,NN  $  sbc B5,NN  $  sbc B6,NN  $  sbc B7,NN
1653
1654 3:  ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1655     XCALL   __udivmod64
1656
1657     ;; Adjust Result's Sign
1658 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1659     tst     SS
1660     brpl    4f
1661 #else
1662     sbrc    SS, 7
1663 #endif /* __AVR_HAVE_JMP_CALL__ */
1664     XCALL   __negdi2
1665
1666 4:  ;; Epilogue: Restore the Z = 12 Registers and return
1667     in r28, __SP_L__
1668 #if defined (__AVR_HAVE_SPH__)
1669     in r29, __SP_H__
1670 #else
1671     clr r29
1672 #endif /* #SP = 8/16 */
1673     ldi r30, 12
1674     XJMP __epilogue_restores__ + ((18 - 12) * 2)
1675
1676 ENDF __divdi3_moddi3
1677
1678 #undef R_cnt
1679 #undef SS
1680 #undef NN
1681
1682 #endif /* L_divdi3 */
1683
1684 .section .text.libgcc, "ax", @progbits
1685
1686 #define TT __tmp_reg__
1687
1688 #if defined (L_adddi3)
1689 ;; (set (reg:DI 18)
1690 ;;      (plus:DI (reg:DI 18)
1691 ;;               (reg:DI 10)))
1692 DEFUN __adddi3
1693     ADD A0,B0  $  adc A1,B1  $  adc A2,B2  $  adc A3,B3
1694     adc A4,B4  $  adc A5,B5  $  adc A6,B6  $  adc A7,B7
1695     ret
1696 ENDF __adddi3
1697 #endif /* L_adddi3 */
1698
1699 #if defined (L_adddi3_s8)
1700 ;; (set (reg:DI 18)
1701 ;;      (plus:DI (reg:DI 18)
1702 ;;               (sign_extend:SI (reg:QI 26))))
1703 DEFUN __adddi3_s8
1704     clr     TT
1705     sbrc    r26, 7
1706     com     TT
1707     ADD A0,r26 $  adc A1,TT  $  adc A2,TT  $  adc A3,TT
1708     adc A4,TT  $  adc A5,TT  $  adc A6,TT  $  adc A7,TT
1709     ret
1710 ENDF __adddi3_s8
1711 #endif /* L_adddi3_s8 */
1712
1713 #if defined (L_subdi3)
1714 ;; (set (reg:DI 18)
1715 ;;      (minus:DI (reg:DI 18)
1716 ;;                (reg:DI 10)))
1717 DEFUN __subdi3
1718     SUB A0,B0  $  sbc A1,B1  $  sbc A2,B2  $  sbc A3,B3
1719     sbc A4,B4  $  sbc A5,B5  $  sbc A6,B6  $  sbc A7,B7
1720     ret
1721 ENDF __subdi3
1722 #endif /* L_subdi3 */
1723
1724 #if defined (L_cmpdi2)
1725 ;; (set (cc0)
1726 ;;      (compare (reg:DI 18)
1727 ;;               (reg:DI 10)))
1728 DEFUN __cmpdi2
1729     CP  A0,B0  $  cpc A1,B1  $  cpc A2,B2  $  cpc A3,B3
1730     cpc A4,B4  $  cpc A5,B5  $  cpc A6,B6  $  cpc A7,B7
1731     ret
1732 ENDF __cmpdi2
1733 #endif /* L_cmpdi2 */
1734
1735 #if defined (L_cmpdi2_s8)
1736 ;; (set (cc0)
1737 ;;      (compare (reg:DI 18)
1738 ;;               (sign_extend:SI (reg:QI 26))))
1739 DEFUN __cmpdi2_s8
1740     clr     TT
1741     sbrc    r26, 7
1742     com     TT
1743     CP  A0,r26 $  cpc A1,TT  $  cpc A2,TT  $  cpc A3,TT
1744     cpc A4,TT  $  cpc A5,TT  $  cpc A6,TT  $  cpc A7,TT
1745     ret
1746 ENDF __cmpdi2_s8
1747 #endif /* L_cmpdi2_s8 */
1748
1749 #if defined (L_negdi2)
1750 DEFUN __negdi2
1751
1752     com  A4    $  com  A5    $  com  A6    $  com  A7
1753                $  com  A1    $  com  A2    $  com  A3
1754     NEG  A0
1755                $  sbci A1,-1 $  sbci A2,-1 $  sbci A3,-1
1756     sbci A4,-1 $  sbci A5,-1 $  sbci A6,-1 $  sbci A7,-1
1757     ret
1758
1759 ENDF __negdi2
1760 #endif /* L_negdi2 */
1761
1762 #undef TT
1763
1764 #undef C7
1765 #undef C6
1766 #undef C5
1767 #undef C4
1768 #undef C3
1769 #undef C2
1770 #undef C1
1771 #undef C0
1772
1773 #undef B7
1774 #undef B6
1775 #undef B5
1776 #undef B4
1777 #undef B3
1778 #undef B2
1779 #undef B1
1780 #undef B0
1781
1782 #undef A7
1783 #undef A6
1784 #undef A5
1785 #undef A4
1786 #undef A3
1787 #undef A2
1788 #undef A1
1789 #undef A0
1790
1791 \f
1792 .section .text.libgcc.prologue, "ax", @progbits
1793
1794 /**********************************
1795  * This is a prologue subroutine
1796  **********************************/
1797 #if defined (L_prologue)
1798
1799 ;; This function does not clobber T-flag; 64-bit division relies on it
1800 DEFUN __prologue_saves__
1801         push r2
1802         push r3
1803         push r4
1804         push r5
1805         push r6
1806         push r7
1807         push r8
1808         push r9
1809         push r10
1810         push r11
1811         push r12
1812         push r13
1813         push r14
1814         push r15
1815         push r16
1816         push r17
1817         push r28
1818         push r29
1819 #if !defined (__AVR_HAVE_SPH__)
1820         in      r28,__SP_L__
1821         sub     r28,r26
1822         out     __SP_L__,r28
1823         clr     r29
1824 #elif defined (__AVR_XMEGA__)
1825         in      r28,__SP_L__
1826         in      r29,__SP_H__
1827         sub     r28,r26
1828         sbc     r29,r27
1829         out     __SP_L__,r28
1830         out     __SP_H__,r29
1831 #else
1832         in      r28,__SP_L__
1833         in      r29,__SP_H__
1834         sub     r28,r26
1835         sbc     r29,r27
1836         in      __tmp_reg__,__SREG__
1837         cli
1838         out     __SP_H__,r29
1839         out     __SREG__,__tmp_reg__
1840         out     __SP_L__,r28
1841 #endif /* #SP = 8/16 */
1842
1843 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1844         eijmp
1845 #else
1846         ijmp
1847 #endif
1848
1849 ENDF __prologue_saves__
1850 #endif /* defined (L_prologue) */
1851
1852 /*
1853  * This is an epilogue subroutine
1854  */
1855 #if defined (L_epilogue)
1856
1857 DEFUN __epilogue_restores__
1858         ldd     r2,Y+18
1859         ldd     r3,Y+17
1860         ldd     r4,Y+16
1861         ldd     r5,Y+15
1862         ldd     r6,Y+14
1863         ldd     r7,Y+13
1864         ldd     r8,Y+12
1865         ldd     r9,Y+11
1866         ldd     r10,Y+10
1867         ldd     r11,Y+9
1868         ldd     r12,Y+8
1869         ldd     r13,Y+7
1870         ldd     r14,Y+6
1871         ldd     r15,Y+5
1872         ldd     r16,Y+4
1873         ldd     r17,Y+3
1874         ldd     r26,Y+2
1875 #if !defined (__AVR_HAVE_SPH__)
1876         ldd     r29,Y+1
1877         add     r28,r30
1878         out     __SP_L__,r28
1879         mov     r28, r26
1880 #elif defined (__AVR_XMEGA__)
1881         ldd  r27,Y+1
1882         add  r28,r30
1883         adc  r29,__zero_reg__
1884         out  __SP_L__,r28
1885         out  __SP_H__,r29
1886         wmov 28, 26
1887 #else
1888         ldd     r27,Y+1
1889         add     r28,r30
1890         adc     r29,__zero_reg__
1891         in      __tmp_reg__,__SREG__
1892         cli
1893         out     __SP_H__,r29
1894         out     __SREG__,__tmp_reg__
1895         out     __SP_L__,r28
1896         mov_l   r28, r26
1897         mov_h   r29, r27
1898 #endif /* #SP = 8/16 */
1899         ret
1900 ENDF __epilogue_restores__
1901 #endif /* defined (L_epilogue) */
1902
1903 #ifdef L_exit
1904         .section .fini9,"ax",@progbits
1905 DEFUN _exit
1906         .weak   exit
1907 exit:
1908 ENDF _exit
1909
1910         /* Code from .fini8 ... .fini1 sections inserted by ld script.  */
1911
1912         .section .fini0,"ax",@progbits
1913         cli
1914 __stop_program:
1915         rjmp    __stop_program
1916 #endif /* defined (L_exit) */
1917
1918 #ifdef L_cleanup
1919         .weak   _cleanup
1920         .func   _cleanup
1921 _cleanup:
1922         ret
1923 .endfunc
1924 #endif /* defined (L_cleanup) */
1925
1926 \f
1927 .section .text.libgcc, "ax", @progbits
1928
1929 #ifdef L_tablejump
1930 DEFUN __tablejump2__
1931         lsl     r30
1932         rol     r31
1933     ;; FALLTHRU
1934 ENDF __tablejump2__
1935
1936 DEFUN __tablejump__
1937 #if defined (__AVR_HAVE_LPMX__)
1938         lpm __tmp_reg__, Z+
1939         lpm r31, Z
1940         mov r30, __tmp_reg__
1941 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1942         eijmp
1943 #else
1944         ijmp
1945 #endif
1946
1947 #else /* !HAVE_LPMX */
1948         lpm
1949         adiw r30, 1
1950         push r0
1951         lpm
1952         push r0
1953 #if defined (__AVR_HAVE_EIJMP_EICALL__)
1954         in   __tmp_reg__, __EIND__
1955         push __tmp_reg__
1956 #endif
1957         ret
1958 #endif /* !HAVE_LPMX */
1959 ENDF __tablejump__
1960 #endif /* defined (L_tablejump) */
1961
1962 #ifdef L_copy_data
1963         .section .init4,"ax",@progbits
1964 DEFUN __do_copy_data
1965 #if defined(__AVR_HAVE_ELPMX__)
1966         ldi     r17, hi8(__data_end)
1967         ldi     r26, lo8(__data_start)
1968         ldi     r27, hi8(__data_start)
1969         ldi     r30, lo8(__data_load_start)
1970         ldi     r31, hi8(__data_load_start)
1971         ldi     r16, hh8(__data_load_start)
1972         out     __RAMPZ__, r16
1973         rjmp    .L__do_copy_data_start
1974 .L__do_copy_data_loop:
1975         elpm    r0, Z+
1976         st      X+, r0
1977 .L__do_copy_data_start:
1978         cpi     r26, lo8(__data_end)
1979         cpc     r27, r17
1980         brne    .L__do_copy_data_loop
1981 #elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
1982         ldi     r17, hi8(__data_end)
1983         ldi     r26, lo8(__data_start)
1984         ldi     r27, hi8(__data_start)
1985         ldi     r30, lo8(__data_load_start)
1986         ldi     r31, hi8(__data_load_start)
1987         ldi     r16, hh8(__data_load_start - 0x10000)
1988 .L__do_copy_data_carry:
1989         inc     r16
1990         out     __RAMPZ__, r16
1991         rjmp    .L__do_copy_data_start
1992 .L__do_copy_data_loop:
1993         elpm
1994         st      X+, r0
1995         adiw    r30, 1
1996         brcs    .L__do_copy_data_carry
1997 .L__do_copy_data_start:
1998         cpi     r26, lo8(__data_end)
1999         cpc     r27, r17
2000         brne    .L__do_copy_data_loop
2001 #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2002         ldi     r17, hi8(__data_end)
2003         ldi     r26, lo8(__data_start)
2004         ldi     r27, hi8(__data_start)
2005         ldi     r30, lo8(__data_load_start)
2006         ldi     r31, hi8(__data_load_start)
2007         rjmp    .L__do_copy_data_start
2008 .L__do_copy_data_loop:
2009 #if defined (__AVR_HAVE_LPMX__)
2010         lpm     r0, Z+
2011 #else
2012         lpm
2013         adiw    r30, 1
2014 #endif
2015         st      X+, r0
2016 .L__do_copy_data_start:
2017         cpi     r26, lo8(__data_end)
2018         cpc     r27, r17
2019         brne    .L__do_copy_data_loop
2020 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2021 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2022         ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2023         out     __RAMPZ__, __zero_reg__
2024 #endif /* ELPM && RAMPD */
2025 ENDF __do_copy_data
2026 #endif /* L_copy_data */
2027
2028 /* __do_clear_bss is only necessary if there is anything in .bss section.  */
2029
2030 #ifdef L_clear_bss
2031         .section .init4,"ax",@progbits
2032 DEFUN __do_clear_bss
2033         ldi     r17, hi8(__bss_end)
2034         ldi     r26, lo8(__bss_start)
2035         ldi     r27, hi8(__bss_start)
2036         rjmp    .do_clear_bss_start
2037 .do_clear_bss_loop:
2038         st      X+, __zero_reg__
2039 .do_clear_bss_start:
2040         cpi     r26, lo8(__bss_end)
2041         cpc     r27, r17
2042         brne    .do_clear_bss_loop
2043 ENDF __do_clear_bss
2044 #endif /* L_clear_bss */
2045
2046 /* __do_global_ctors and __do_global_dtors are only necessary
2047    if there are any constructors/destructors.  */
2048
2049 #ifdef L_ctors
2050         .section .init6,"ax",@progbits
2051 DEFUN __do_global_ctors
2052 #if defined(__AVR_HAVE_ELPM__)
2053         ldi     r17, hi8(__ctors_start)
2054         ldi     r28, lo8(__ctors_end)
2055         ldi     r29, hi8(__ctors_end)
2056         ldi     r16, hh8(__ctors_end)
2057         rjmp    .L__do_global_ctors_start
2058 .L__do_global_ctors_loop:
2059         sbiw    r28, 2
2060         sbc     r16, __zero_reg__
2061         mov_h   r31, r29
2062         mov_l   r30, r28
2063         out     __RAMPZ__, r16
2064         XCALL   __tablejump_elpm__
2065 .L__do_global_ctors_start:
2066         cpi     r28, lo8(__ctors_start)
2067         cpc     r29, r17
2068         ldi     r24, hh8(__ctors_start)
2069         cpc     r16, r24
2070         brne    .L__do_global_ctors_loop
2071 #else
2072         ldi     r17, hi8(__ctors_start)
2073         ldi     r28, lo8(__ctors_end)
2074         ldi     r29, hi8(__ctors_end)
2075         rjmp    .L__do_global_ctors_start
2076 .L__do_global_ctors_loop:
2077         sbiw    r28, 2
2078         mov_h   r31, r29
2079         mov_l   r30, r28
2080         XCALL   __tablejump__
2081 .L__do_global_ctors_start:
2082         cpi     r28, lo8(__ctors_start)
2083         cpc     r29, r17
2084         brne    .L__do_global_ctors_loop
2085 #endif /* defined(__AVR_HAVE_ELPM__) */
2086 ENDF __do_global_ctors
2087 #endif /* L_ctors */
2088
2089 #ifdef L_dtors
2090         .section .fini6,"ax",@progbits
2091 DEFUN __do_global_dtors
2092 #if defined(__AVR_HAVE_ELPM__)
2093         ldi     r17, hi8(__dtors_end)
2094         ldi     r28, lo8(__dtors_start)
2095         ldi     r29, hi8(__dtors_start)
2096         ldi     r16, hh8(__dtors_start)
2097         rjmp    .L__do_global_dtors_start
2098 .L__do_global_dtors_loop:
2099         sbiw    r28, 2
2100         sbc     r16, __zero_reg__
2101         mov_h   r31, r29
2102         mov_l   r30, r28
2103         out     __RAMPZ__, r16
2104         XCALL   __tablejump_elpm__
2105 .L__do_global_dtors_start:
2106         cpi     r28, lo8(__dtors_end)
2107         cpc     r29, r17
2108         ldi     r24, hh8(__dtors_end)
2109         cpc     r16, r24
2110         brne    .L__do_global_dtors_loop
2111 #else
2112         ldi     r17, hi8(__dtors_end)
2113         ldi     r28, lo8(__dtors_start)
2114         ldi     r29, hi8(__dtors_start)
2115         rjmp    .L__do_global_dtors_start
2116 .L__do_global_dtors_loop:
2117         mov_h   r31, r29
2118         mov_l   r30, r28
2119         XCALL   __tablejump__
2120         adiw    r28, 2
2121 .L__do_global_dtors_start:
2122         cpi     r28, lo8(__dtors_end)
2123         cpc     r29, r17
2124         brne    .L__do_global_dtors_loop
2125 #endif /* defined(__AVR_HAVE_ELPM__) */
2126 ENDF __do_global_dtors
2127 #endif /* L_dtors */
2128
2129 .section .text.libgcc, "ax", @progbits
2130
2131 #ifdef L_tablejump_elpm
2132 DEFUN __tablejump_elpm__
2133 #if defined (__AVR_HAVE_ELPMX__)
2134         elpm    __tmp_reg__, Z+
2135         elpm    r31, Z
2136         mov     r30, __tmp_reg__
2137 #if defined (__AVR_HAVE_RAMPD__)
2138         ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2139         out     __RAMPZ__, __zero_reg__
2140 #endif /* RAMPD */
2141 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2142         eijmp
2143 #else
2144         ijmp
2145 #endif
2146
2147 #elif defined (__AVR_HAVE_ELPM__)
2148         elpm
2149         adiw    r30, 1
2150         push    r0
2151         elpm
2152         push    r0
2153 #if defined (__AVR_HAVE_EIJMP_EICALL__)
2154         in      __tmp_reg__, __EIND__
2155         push    __tmp_reg__
2156 #endif
2157         ret
2158 #endif
2159 ENDF __tablejump_elpm__
2160 #endif /* defined (L_tablejump_elpm) */
2161
2162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2163 ;; Loading n bytes from Flash; n = 3,4
2164 ;; R22... = Flash[Z]
2165 ;; Clobbers: __tmp_reg__
2166
2167 #if (defined (L_load_3)        \
2168      || defined (L_load_4))    \
2169     && !defined (__AVR_HAVE_LPMX__)
2170
2171 ;; Destination
2172 #define D0  22
2173 #define D1  D0+1
2174 #define D2  D0+2
2175 #define D3  D0+3
2176
2177 .macro  .load dest, n
2178     lpm
2179     mov     \dest, r0
2180 .if \dest != D0+\n-1
2181     adiw    r30, 1
2182 .else
2183     sbiw    r30, \n-1
2184 .endif
2185 .endm
2186
2187 #if defined (L_load_3)
2188 DEFUN __load_3
2189     push  D3
2190     XCALL __load_4
2191     pop   D3
2192     ret
2193 ENDF __load_3
2194 #endif /* L_load_3 */
2195
2196 #if defined (L_load_4)
2197 DEFUN __load_4
2198     .load D0, 4
2199     .load D1, 4
2200     .load D2, 4
2201     .load D3, 4
2202     ret
2203 ENDF __load_4
2204 #endif /* L_load_4 */
2205
2206 #endif /* L_load_3 || L_load_3 */
2207
2208 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2209 ;; Loading n bytes from Flash or RAM;  n = 1,2,3,4
2210 ;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2211 ;; Clobbers: __tmp_reg__, R21, R30, R31
2212
2213 #if (defined (L_xload_1)            \
2214      || defined (L_xload_2)         \
2215      || defined (L_xload_3)         \
2216      || defined (L_xload_4))
2217
2218 ;; Destination
2219 #define D0  22
2220 #define D1  D0+1
2221 #define D2  D0+2
2222 #define D3  D0+3
2223
2224 ;; Register containing bits 16+ of the address
2225
2226 #define HHI8  21
2227
2228 .macro  .xload dest, n
2229 #if defined (__AVR_HAVE_ELPMX__)
2230     elpm    \dest, Z+
2231 #elif defined (__AVR_HAVE_ELPM__)
2232     elpm
2233     mov     \dest, r0
2234 .if \dest != D0+\n-1
2235     adiw    r30, 1
2236     adc     HHI8, __zero_reg__
2237     out     __RAMPZ__, HHI8
2238 .endif
2239 #elif defined (__AVR_HAVE_LPMX__)
2240     lpm     \dest, Z+
2241 #else
2242     lpm
2243     mov     \dest, r0
2244 .if \dest != D0+\n-1
2245     adiw    r30, 1
2246 .endif
2247 #endif
2248 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2249 .if \dest == D0+\n-1
2250     ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2251     out     __RAMPZ__, __zero_reg__
2252 .endif
2253 #endif
2254 .endm ; .xload
2255
2256 #if defined (L_xload_1)
2257 DEFUN __xload_1
2258 #if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2259     sbrc    HHI8, 7
2260     ld      D0, Z
2261     sbrs    HHI8, 7
2262     lpm     D0, Z
2263     ret
2264 #else
2265     sbrc    HHI8, 7
2266     rjmp    1f
2267 #if defined (__AVR_HAVE_ELPM__)
2268     out     __RAMPZ__, HHI8
2269 #endif /* __AVR_HAVE_ELPM__ */
2270     .xload  D0, 1
2271     ret
2272 1:  ld      D0, Z
2273     ret
2274 #endif /* LPMx && ! ELPM */
2275 ENDF __xload_1
2276 #endif /* L_xload_1 */
2277
2278 #if defined (L_xload_2)
2279 DEFUN __xload_2
2280     sbrc    HHI8, 7
2281     rjmp    1f
2282 #if defined (__AVR_HAVE_ELPM__)
2283     out     __RAMPZ__, HHI8
2284 #endif /* __AVR_HAVE_ELPM__ */
2285     .xload  D0, 2
2286     .xload  D1, 2
2287     ret
2288 1:  ld      D0, Z+
2289     ld      D1, Z+
2290     ret
2291 ENDF __xload_2
2292 #endif /* L_xload_2 */
2293
2294 #if defined (L_xload_3)
2295 DEFUN __xload_3
2296     sbrc    HHI8, 7
2297     rjmp    1f
2298 #if defined (__AVR_HAVE_ELPM__)
2299     out     __RAMPZ__, HHI8
2300 #endif /* __AVR_HAVE_ELPM__ */
2301     .xload  D0, 3
2302     .xload  D1, 3
2303     .xload  D2, 3
2304     ret
2305 1:  ld      D0, Z+
2306     ld      D1, Z+
2307     ld      D2, Z+
2308     ret
2309 ENDF __xload_3
2310 #endif /* L_xload_3 */
2311
2312 #if defined (L_xload_4)
2313 DEFUN __xload_4
2314     sbrc    HHI8, 7
2315     rjmp    1f
2316 #if defined (__AVR_HAVE_ELPM__)
2317     out     __RAMPZ__, HHI8
2318 #endif /* __AVR_HAVE_ELPM__ */
2319     .xload  D0, 4
2320     .xload  D1, 4
2321     .xload  D2, 4
2322     .xload  D3, 4
2323     ret
2324 1:  ld      D0, Z+
2325     ld      D1, Z+
2326     ld      D2, Z+
2327     ld      D3, Z+
2328     ret
2329 ENDF __xload_4
2330 #endif /* L_xload_4 */
2331
2332 #endif /* L_xload_{1|2|3|4} */
2333
2334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2335 ;; memcopy from Address Space __pgmx to RAM
2336 ;; R23:Z = Source Address
2337 ;; X     = Destination Address
2338 ;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2339
2340 #if defined (L_movmemx)
2341
2342 #define HHI8  23
2343 #define LOOP  24
2344
2345 DEFUN __movmemx_qi
2346     ;; #Bytes to copy fity in 8 Bits (1..255)
2347     ;; Zero-extend Loop Counter
2348     clr     LOOP+1
2349     ;; FALLTHRU
2350 ENDF __movmemx_qi
2351
2352 DEFUN __movmemx_hi
2353
2354 ;; Read from where?
2355     sbrc    HHI8, 7
2356     rjmp    1f
2357
2358 ;; Read from Flash
2359
2360 #if defined (__AVR_HAVE_ELPM__)
2361     out     __RAMPZ__, HHI8
2362 #endif
2363
2364 0:  ;; Load 1 Byte from Flash...
2365
2366 #if defined (__AVR_HAVE_ELPMX__)
2367     elpm    r0, Z+
2368 #elif defined (__AVR_HAVE_ELPM__)
2369     elpm
2370     adiw    r30, 1
2371     adc     HHI8, __zero_reg__
2372     out     __RAMPZ__, HHI8
2373 #elif defined (__AVR_HAVE_LPMX__)
2374     lpm     r0, Z+
2375 #else
2376     lpm
2377     adiw    r30, 1
2378 #endif
2379
2380     ;; ...and store that Byte to RAM Destination
2381     st      X+, r0
2382     sbiw    LOOP, 1
2383     brne    0b
2384 #if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2385     ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2386     out __RAMPZ__, __zero_reg__
2387 #endif /* ELPM && RAMPD */
2388     ret
2389
2390 ;; Read from RAM
2391
2392 1:  ;; Read 1 Byte from RAM...
2393     ld      r0, Z+
2394     ;; and store that Byte to RAM Destination
2395     st      X+, r0
2396     sbiw    LOOP, 1
2397     brne    1b
2398     ret
2399 ENDF __movmemx_hi
2400
2401 #undef HHI8
2402 #undef LOOP
2403
2404 #endif /* L_movmemx */
2405
2406 \f
2407 .section .text.libgcc.builtins, "ax", @progbits
2408
2409 /**********************************
2410  * Find first set Bit (ffs)
2411  **********************************/
2412
2413 #if defined (L_ffssi2)
2414 ;; find first set bit
2415 ;; r25:r24 = ffs32 (r25:r22)
2416 ;; clobbers: r22, r26
2417 DEFUN __ffssi2
2418     clr  r26
2419     tst  r22
2420     brne 1f
2421     subi r26, -8
2422     or   r22, r23
2423     brne 1f
2424     subi r26, -8
2425     or   r22, r24
2426     brne 1f
2427     subi r26, -8
2428     or   r22, r25
2429     brne 1f
2430     ret
2431 1:  mov  r24, r22
2432     XJMP __loop_ffsqi2
2433 ENDF __ffssi2
2434 #endif /* defined (L_ffssi2) */
2435
2436 #if defined (L_ffshi2)
2437 ;; find first set bit
2438 ;; r25:r24 = ffs16 (r25:r24)
2439 ;; clobbers: r26
2440 DEFUN __ffshi2
2441     clr  r26
2442 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2443     ;; Some cores have problem skipping 2-word instruction
2444     tst  r24
2445     breq 2f
2446 #else
2447     cpse r24, __zero_reg__
2448 #endif /* __AVR_HAVE_JMP_CALL__ */
2449 1:  XJMP __loop_ffsqi2
2450 2:  ldi  r26, 8
2451     or   r24, r25
2452     brne 1b
2453     ret
2454 ENDF __ffshi2
2455 #endif /* defined (L_ffshi2) */
2456
2457 #if defined (L_loop_ffsqi2)
2458 ;; Helper for ffshi2, ffssi2
2459 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2460 ;; r24 must be != 0
2461 ;; clobbers: r26
2462 DEFUN __loop_ffsqi2
2463     inc  r26
2464     lsr  r24
2465     brcc __loop_ffsqi2
2466     mov  r24, r26
2467     clr  r25
2468     ret
2469 ENDF __loop_ffsqi2
2470 #endif /* defined (L_loop_ffsqi2) */
2471
2472 \f
2473 /**********************************
2474  * Count trailing Zeros (ctz)
2475  **********************************/
2476
2477 #if defined (L_ctzsi2)
2478 ;; count trailing zeros
2479 ;; r25:r24 = ctz32 (r25:r22)
2480 ;; clobbers: r26, r22
2481 ;; ctz(0) = 255
2482 ;; Note that ctz(0) in undefined for GCC
2483 DEFUN __ctzsi2
2484     XCALL __ffssi2
2485     dec  r24
2486     ret
2487 ENDF __ctzsi2
2488 #endif /* defined (L_ctzsi2) */
2489
2490 #if defined (L_ctzhi2)
2491 ;; count trailing zeros
2492 ;; r25:r24 = ctz16 (r25:r24)
2493 ;; clobbers: r26
2494 ;; ctz(0) = 255
2495 ;; Note that ctz(0) in undefined for GCC
2496 DEFUN __ctzhi2
2497     XCALL __ffshi2
2498     dec  r24
2499     ret
2500 ENDF __ctzhi2
2501 #endif /* defined (L_ctzhi2) */
2502
2503 \f
2504 /**********************************
2505  * Count leading Zeros (clz)
2506  **********************************/
2507
2508 #if defined (L_clzdi2)
2509 ;; count leading zeros
2510 ;; r25:r24 = clz64 (r25:r18)
2511 ;; clobbers: r22, r23, r26
2512 DEFUN __clzdi2
2513     XCALL __clzsi2
2514     sbrs r24, 5
2515     ret
2516     mov_l r22, r18
2517     mov_h r23, r19
2518     mov_l r24, r20
2519     mov_h r25, r21
2520     XCALL __clzsi2
2521     subi r24, -32
2522     ret
2523 ENDF __clzdi2
2524 #endif /* defined (L_clzdi2) */
2525
2526 #if defined (L_clzsi2)
2527 ;; count leading zeros
2528 ;; r25:r24 = clz32 (r25:r22)
2529 ;; clobbers: r26
2530 DEFUN __clzsi2
2531     XCALL __clzhi2
2532     sbrs r24, 4
2533     ret
2534     mov_l r24, r22
2535     mov_h r25, r23
2536     XCALL __clzhi2
2537     subi r24, -16
2538     ret
2539 ENDF __clzsi2
2540 #endif /* defined (L_clzsi2) */
2541
2542 #if defined (L_clzhi2)
2543 ;; count leading zeros
2544 ;; r25:r24 = clz16 (r25:r24)
2545 ;; clobbers: r26
2546 DEFUN __clzhi2
2547     clr  r26
2548     tst  r25
2549     brne 1f
2550     subi r26, -8
2551     or   r25, r24
2552     brne 1f
2553     ldi  r24, 16
2554     ret
2555 1:  cpi  r25, 16
2556     brsh 3f
2557     subi r26, -3
2558     swap r25
2559 2:  inc  r26
2560 3:  lsl  r25
2561     brcc 2b
2562     mov  r24, r26
2563     clr  r25
2564     ret
2565 ENDF __clzhi2
2566 #endif /* defined (L_clzhi2) */
2567
2568 \f
2569 /**********************************
2570  * Parity
2571  **********************************/
2572
2573 #if defined (L_paritydi2)
2574 ;; r25:r24 = parity64 (r25:r18)
2575 ;; clobbers: __tmp_reg__
2576 DEFUN __paritydi2
2577     eor  r24, r18
2578     eor  r24, r19
2579     eor  r24, r20
2580     eor  r24, r21
2581     XJMP __paritysi2
2582 ENDF __paritydi2
2583 #endif /* defined (L_paritydi2) */
2584
2585 #if defined (L_paritysi2)
2586 ;; r25:r24 = parity32 (r25:r22)
2587 ;; clobbers: __tmp_reg__
2588 DEFUN __paritysi2
2589     eor  r24, r22
2590     eor  r24, r23
2591     XJMP __parityhi2
2592 ENDF __paritysi2
2593 #endif /* defined (L_paritysi2) */
2594
2595 #if defined (L_parityhi2)
2596 ;; r25:r24 = parity16 (r25:r24)
2597 ;; clobbers: __tmp_reg__
2598 DEFUN __parityhi2
2599     eor  r24, r25
2600 ;; FALLTHRU
2601 ENDF __parityhi2
2602
2603 ;; r25:r24 = parity8 (r24)
2604 ;; clobbers: __tmp_reg__
2605 DEFUN __parityqi2
2606     ;; parity is in r24[0..7]
2607     mov  __tmp_reg__, r24
2608     swap __tmp_reg__
2609     eor  r24, __tmp_reg__
2610     ;; parity is in r24[0..3]
2611     subi r24, -4
2612     andi r24, -5
2613     subi r24, -6
2614     ;; parity is in r24[0,3]
2615     sbrc r24, 3
2616     inc  r24
2617     ;; parity is in r24[0]
2618     andi r24, 1
2619     clr  r25
2620     ret
2621 ENDF __parityqi2
2622 #endif /* defined (L_parityhi2) */
2623
2624 \f
2625 /**********************************
2626  * Population Count
2627  **********************************/
2628
2629 #if defined (L_popcounthi2)
2630 ;; population count
2631 ;; r25:r24 = popcount16 (r25:r24)
2632 ;; clobbers: __tmp_reg__
2633 DEFUN __popcounthi2
2634     XCALL __popcountqi2
2635     push r24
2636     mov  r24, r25
2637     XCALL __popcountqi2
2638     clr  r25
2639     ;; FALLTHRU
2640 ENDF __popcounthi2
2641
2642 DEFUN __popcounthi2_tail
2643     pop   __tmp_reg__
2644     add   r24, __tmp_reg__
2645     ret
2646 ENDF __popcounthi2_tail
2647 #endif /* defined (L_popcounthi2) */
2648
2649 #if defined (L_popcountsi2)
2650 ;; population count
2651 ;; r25:r24 = popcount32 (r25:r22)
2652 ;; clobbers: __tmp_reg__
2653 DEFUN __popcountsi2
2654     XCALL __popcounthi2
2655     push  r24
2656     mov_l r24, r22
2657     mov_h r25, r23
2658     XCALL __popcounthi2
2659     XJMP  __popcounthi2_tail
2660 ENDF __popcountsi2
2661 #endif /* defined (L_popcountsi2) */
2662
2663 #if defined (L_popcountdi2)
2664 ;; population count
2665 ;; r25:r24 = popcount64 (r25:r18)
2666 ;; clobbers: r22, r23, __tmp_reg__
2667 DEFUN __popcountdi2
2668     XCALL __popcountsi2
2669     push  r24
2670     mov_l r22, r18
2671     mov_h r23, r19
2672     mov_l r24, r20
2673     mov_h r25, r21
2674     XCALL __popcountsi2
2675     XJMP  __popcounthi2_tail
2676 ENDF __popcountdi2
2677 #endif /* defined (L_popcountdi2) */
2678
2679 #if defined (L_popcountqi2)
2680 ;; population count
2681 ;; r24 = popcount8 (r24)
2682 ;; clobbers: __tmp_reg__
2683 DEFUN __popcountqi2
2684     mov  __tmp_reg__, r24
2685     andi r24, 1
2686     lsr  __tmp_reg__
2687     lsr  __tmp_reg__
2688     adc  r24, __zero_reg__
2689     lsr  __tmp_reg__
2690     adc  r24, __zero_reg__
2691     lsr  __tmp_reg__
2692     adc  r24, __zero_reg__
2693     lsr  __tmp_reg__
2694     adc  r24, __zero_reg__
2695     lsr  __tmp_reg__
2696     adc  r24, __zero_reg__
2697     lsr  __tmp_reg__
2698     adc  r24, __tmp_reg__
2699     ret
2700 ENDF __popcountqi2
2701 #endif /* defined (L_popcountqi2) */
2702
2703 \f
2704 /**********************************
2705  * Swap bytes
2706  **********************************/
2707
2708 ;; swap two registers with different register number
2709 .macro bswap a, b
2710     eor \a, \b
2711     eor \b, \a
2712     eor \a, \b
2713 .endm
2714
2715 #if defined (L_bswapsi2)
2716 ;; swap bytes
2717 ;; r25:r22 = bswap32 (r25:r22)
2718 DEFUN __bswapsi2
2719     bswap r22, r25
2720     bswap r23, r24
2721     ret
2722 ENDF __bswapsi2
2723 #endif /* defined (L_bswapsi2) */
2724
2725 #if defined (L_bswapdi2)
2726 ;; swap bytes
2727 ;; r25:r18 = bswap64 (r25:r18)
2728 DEFUN __bswapdi2
2729     bswap r18, r25
2730     bswap r19, r24
2731     bswap r20, r23
2732     bswap r21, r22
2733     ret
2734 ENDF __bswapdi2
2735 #endif /* defined (L_bswapdi2) */
2736
2737 \f
2738 /**********************************
2739  * 64-bit shifts
2740  **********************************/
2741
2742 #if defined (L_ashrdi3)
2743 ;; Arithmetic shift right
2744 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
2745 DEFUN __ashrdi3
2746     push r16
2747     andi r16, 63
2748     breq 2f
2749 1:  asr  r25
2750     ror  r24
2751     ror  r23
2752     ror  r22
2753     ror  r21
2754     ror  r20
2755     ror  r19
2756     ror  r18
2757     dec  r16
2758     brne 1b
2759 2:  pop  r16
2760     ret
2761 ENDF __ashrdi3
2762 #endif /* defined (L_ashrdi3) */
2763
2764 #if defined (L_lshrdi3)
2765 ;; Logic shift right
2766 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
2767 DEFUN __lshrdi3
2768     push r16
2769     andi r16, 63
2770     breq 2f
2771 1:  lsr  r25
2772     ror  r24
2773     ror  r23
2774     ror  r22
2775     ror  r21
2776     ror  r20
2777     ror  r19
2778     ror  r18
2779     dec  r16
2780     brne 1b
2781 2:  pop  r16
2782     ret
2783 ENDF __lshrdi3
2784 #endif /* defined (L_lshrdi3) */
2785
2786 #if defined (L_ashldi3)
2787 ;; Shift left
2788 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
2789 DEFUN __ashldi3
2790     push r16
2791     andi r16, 63
2792     breq 2f
2793 1:  lsl  r18
2794     rol  r19
2795     rol  r20
2796     rol  r21
2797     rol  r22
2798     rol  r23
2799     rol  r24
2800     rol  r25
2801     dec  r16
2802     brne 1b
2803 2:  pop  r16
2804     ret
2805 ENDF __ashldi3
2806 #endif /* defined (L_ashldi3) */
2807
2808 #if defined (L_rotldi3)
2809 ;; Shift left
2810 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
2811 DEFUN __rotldi3
2812     push r16
2813     andi r16, 63
2814     breq 2f
2815 1:  lsl  r18
2816     rol  r19
2817     rol  r20
2818     rol  r21
2819     rol  r22
2820     rol  r23
2821     rol  r24
2822     rol  r25
2823     adc  r18, __zero_reg__
2824     dec  r16
2825     brne 1b
2826 2:  pop  r16
2827     ret
2828 ENDF __rotldi3
2829 #endif /* defined (L_rotldi3) */
2830
2831 \f
2832 .section .text.libgcc.fmul, "ax", @progbits
2833
2834 /***********************************************************/
2835 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement
2836 ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
2837 /***********************************************************/
2838
2839 #define A1 24
2840 #define B1 25
2841 #define C0 22
2842 #define C1 23
2843 #define A0 __tmp_reg__
2844
2845 #ifdef L_fmuls
2846 ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
2847 ;;; Clobbers: r24, r25, __tmp_reg__
2848 DEFUN __fmuls
2849     ;; A0.7 = negate result?
2850     mov  A0, A1
2851     eor  A0, B1
2852     ;; B1 = |B1|
2853     sbrc B1, 7
2854     neg  B1
2855     XJMP __fmulsu_exit
2856 ENDF __fmuls
2857 #endif /* L_fmuls */
2858
2859 #ifdef L_fmulsu
2860 ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
2861 ;;; Clobbers: r24, r25, __tmp_reg__
2862 DEFUN __fmulsu
2863     ;; A0.7 = negate result?
2864     mov  A0, A1
2865 ;; FALLTHRU
2866 ENDF __fmulsu
2867
2868 ;; Helper for __fmuls and __fmulsu
2869 DEFUN __fmulsu_exit
2870     ;; A1 = |A1|
2871     sbrc A1, 7
2872     neg  A1
2873 #ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2874     ;; Some cores have problem skipping 2-word instruction
2875     tst  A0
2876     brmi 1f
2877 #else
2878     sbrs A0, 7
2879 #endif /* __AVR_HAVE_JMP_CALL__ */
2880     XJMP  __fmul
2881 1:  XCALL __fmul
2882     ;; C = -C iff A0.7 = 1
2883     NEG2 C0
2884     ret
2885 ENDF __fmulsu_exit
2886 #endif /* L_fmulsu */
2887
2888
2889 #ifdef L_fmul
2890 ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
2891 ;;; Clobbers: r24, r25, __tmp_reg__
2892 DEFUN __fmul
2893     ; clear result
2894     clr   C0
2895     clr   C1
2896     clr   A0
2897 1:  tst   B1
2898     ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
2899 2:  brpl  3f
2900     ;; C += A
2901     add   C0, A0
2902     adc   C1, A1
2903 3:  ;; A >>= 1
2904     lsr   A1
2905     ror   A0
2906     ;; B <<= 1
2907     lsl   B1
2908     brne  2b
2909     ret
2910 ENDF __fmul
2911 #endif /* L_fmul */
2912
2913 #undef A0
2914 #undef A1
2915 #undef B1
2916 #undef C0
2917 #undef C1
2918
2919 #include "lib1funcs-fixed.S"