libgcc/config/avr/lib1funcs-fixed.S

   1 /*  -*- Mode: Asm -*-  */
   2 ;;    Copyright (C) 2012
   3 ;;    Free Software Foundation, Inc.
   4 ;;    Contributed by Sean D'Epagnier  (sean@depagnier.com)
   5 ;;                   Georg-Johann Lay (avr@gjlay.de)
   6
   7 ;; This file is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by the
   9 ;; Free Software Foundation; either version 3, or (at your option) any
  10 ;; later version.
  11
  12 ;; In addition to the permissions in the GNU General Public License, the
  13 ;; Free Software Foundation gives you unlimited permission to link the
  14 ;; compiled version of this file into combinations with other programs,
  15 ;; and to distribute those combinations without any restriction coming
  16 ;; from the use of this file.  (The General Public License restrictions
  17 ;; do apply in other respects; for example, they cover modification of
  18 ;; the file, and distribution when not linked into a combine
  19 ;; executable.)
  20
  21 ;; This file is distributed in the hope that it will be useful, but
  22 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  23 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  24 ;; General Public License for more details.
  25
  26 ;; You should have received a copy of the GNU General Public License
  27 ;; along with this program; see the file COPYING.  If not, write to
  28 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  29 ;; Boston, MA 02110-1301, USA.
  30
  31 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  32 ;; Fixed point library routines for AVR
  33 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  34
  35 .section .text.libgcc.fixed, "ax", @progbits
  36
  37 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  38 ;; Conversions to float
  39 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  40
  41 #if defined (L_fractqqsf)
  42 DEFUN __fractqqsf
  43     ;; Move in place for SA -> SF conversion
  44     clr     r22
  45     mov     r23, r24
  46     lsl     r23
  47     ;; Sign-extend
  48     sbc     r24, r24
  49     mov     r25, r24
  50     XJMP    __fractsasf
  51 ENDF __fractqqsf
  52 #endif  /* L_fractqqsf */
  53
  54 #if defined (L_fractuqqsf)
  55 DEFUN __fractuqqsf
  56     ;; Move in place for USA -> SF conversion
  57     clr     r22
  58     mov     r23, r24
  59     ;; Zero-extend
  60     clr     r24
  61     clr     r25
  62     XJMP    __fractusasf
  63 ENDF __fractuqqsf
  64 #endif  /* L_fractuqqsf */
  65
  66 #if defined (L_fracthqsf)
  67 DEFUN __fracthqsf
  68     ;; Move in place for SA -> SF conversion
  69     wmov    22, 24
  70     lsl     r22
  71     rol     r23
  72     ;; Sign-extend
  73     sbc     r24, r24
  74     mov     r25, r24
  75     XJMP    __fractsasf
  76 ENDF __fracthqsf
  77 #endif  /* L_fracthqsf */
  78
  79 #if defined (L_fractuhqsf)
  80 DEFUN __fractuhqsf
  81     ;; Move in place for USA -> SF conversion
  82     wmov    22, 24
  83     ;; Zero-extend
  84     clr     r24
  85     clr     r25
  86     XJMP    __fractusasf
  87 ENDF __fractuhqsf
  88 #endif  /* L_fractuhqsf */
  89
  90 #if defined (L_fracthasf)
  91 DEFUN __fracthasf
  92     ;; Move in place for SA -> SF conversion
  93     clr     r22
  94     mov     r23, r24
  95     mov     r24, r25
  96     ;; Sign-extend
  97     lsl     r25
  98     sbc     r25, r25
  99     XJMP    __fractsasf
 100 ENDF __fracthasf
 101 #endif  /* L_fracthasf */
 102
 103 #if defined (L_fractuhasf)
 104 DEFUN __fractuhasf
 105     ;; Move in place for USA -> SF conversion
 106     clr     r22
 107     mov     r23, r24
 108     mov     r24, r25
 109     ;; Zero-extend
 110     clr     r25
 111     XJMP    __fractusasf
 112 ENDF __fractuhasf
 113 #endif  /* L_fractuhasf */
 114
 115
 116 #if defined (L_fractsqsf)
 117 DEFUN __fractsqsf
 118     XCALL   __floatsisf
 119     ;; Divide non-zero results by 2^31 to move the
 120     ;; decimal point into place
 121     tst     r25
 122     breq    0f
 123     subi    r24, exp_lo (31)
 124     sbci    r25, exp_hi (31)
 125 0:  ret
 126 ENDF __fractsqsf
 127 #endif  /* L_fractsqsf */
 128
 129 #if defined (L_fractusqsf)
 130 DEFUN __fractusqsf
 131     XCALL   __floatunsisf
 132     ;; Divide non-zero results by 2^32 to move the
 133     ;; decimal point into place
 134     cpse    r25, __zero_reg__
 135     subi    r25, exp_hi (32)
 136     ret
 137 ENDF __fractusqsf
 138 #endif  /* L_fractusqsf */
 139
 140 #if defined (L_fractsasf)
 141 DEFUN __fractsasf
 142     XCALL   __floatsisf
 143     ;; Divide non-zero results by 2^16 to move the
 144     ;; decimal point into place
 145     cpse    r25, __zero_reg__
 146     subi    r25, exp_hi (16)
 147     ret
 148 ENDF __fractsasf
 149 #endif  /* L_fractsasf */
 150
 151 #if defined (L_fractusasf)
 152 DEFUN __fractusasf
 153     XCALL   __floatunsisf
 154     ;; Divide non-zero results by 2^16 to move the
 155     ;; decimal point into place
 156     cpse    r25, __zero_reg__
 157     subi    r25, exp_hi (16)
 158     ret
 159 ENDF __fractusasf
 160 #endif  /* L_fractusasf */
 161
 162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 163 ;; Conversions from float
 164 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 165
 166 #if defined (L_fractsfqq)
 167 DEFUN __fractsfqq
 168     ;; Multiply with 2^{24+7} to get a QQ result in r25
 169     subi    r24, exp_lo (-31)
 170     sbci    r25, exp_hi (-31)
 171     XCALL   __fixsfsi
 172     mov     r24, r25
 173     ret
 174 ENDF __fractsfqq
 175 #endif  /* L_fractsfqq */
 176
 177 #if defined (L_fractsfuqq)
 178 DEFUN __fractsfuqq
 179     ;; Multiply with 2^{24+8} to get a UQQ result in r25
 180     subi    r25, exp_hi (-32)
 181     XCALL   __fixunssfsi
 182     mov     r24, r25
 183     ret
 184 ENDF __fractsfuqq
 185 #endif  /* L_fractsfuqq */
 186
 187 #if defined (L_fractsfha)
 188 DEFUN __fractsfha
 189     ;; Multiply with 2^24 to get a HA result in r25:r24
 190     subi    r25, exp_hi (-24)
 191     XJMP    __fixsfsi
 192 ENDF __fractsfha
 193 #endif  /* L_fractsfha */
 194
 195 #if defined (L_fractsfuha)
 196 DEFUN __fractsfuha
 197     ;; Multiply with 2^24 to get a UHA result in r25:r24
 198     subi    r25, exp_hi (-24)
 199     XJMP    __fixunssfsi
 200 ENDF __fractsfuha
 201 #endif  /* L_fractsfuha */
 202
 203 #if defined (L_fractsfhq)
 204 DEFUN __fractsfsq
 205 ENDF  __fractsfsq
 206
 207 DEFUN __fractsfhq
 208     ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
 209     ;; resp. with 2^31 to get a SQ result in r25:r22
 210     subi    r24, exp_lo (-31)
 211     sbci    r25, exp_hi (-31)
 212     XJMP    __fixsfsi
 213 ENDF __fractsfhq
 214 #endif  /* L_fractsfhq */
 215
 216 #if defined (L_fractsfuhq)
 217 DEFUN __fractsfusq
 218 ENDF  __fractsfusq
 219
 220 DEFUN __fractsfuhq
 221     ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
 222     ;; resp. with 2^32 to get a USQ result in r25:r22
 223     subi    r25, exp_hi (-32)
 224     XJMP    __fixunssfsi
 225 ENDF __fractsfuhq
 226 #endif  /* L_fractsfuhq */
 227
 228 #if defined (L_fractsfsa)
 229 DEFUN __fractsfsa
 230     ;; Multiply with 2^16 to get a SA result in r25:r22
 231     subi    r25, exp_hi (-16)
 232     XJMP    __fixsfsi
 233 ENDF __fractsfsa
 234 #endif  /* L_fractsfsa */
 235
 236 #if defined (L_fractsfusa)
 237 DEFUN __fractsfusa
 238     ;; Multiply with 2^16 to get a USA result in r25:r22
 239     subi    r25, exp_hi (-16)
 240     XJMP    __fixunssfsi
 241 ENDF __fractsfusa
 242 #endif  /* L_fractsfusa */
 243
 244
 245 ;; For multiplication the functions here are called directly from
 246 ;; avr-fixed.md instead of using the standard libcall mechanisms.
 247 ;; This can make better code because GCC knows exactly which
 248 ;; of the call-used registers (not all of them) are clobbered.  */
 249
 250 /*******************************************************
 251     Fractional  Multiplication  8 x 8  without MUL
 252 *******************************************************/
 253
 254 #if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
 255 ;;; R23 = R24 * R25
 256 ;;; Clobbers: __tmp_reg__, R22, R24, R25
 257 ;;; Rounding: ???
 258 DEFUN __mulqq3
 259     XCALL   __fmuls
 260     ;; TR 18037 requires that  (-1) * (-1)  does not overflow
 261     ;; The only input that can produce  -1  is  (-1)^2.
 262     dec     r23
 263     brvs    0f
 264     inc     r23
 265 0:  ret
 266 ENDF  __mulqq3
 267 #endif /* L_mulqq3 && ! HAVE_MUL */
 268
 269 /*******************************************************
 270     Fractional Multiply  .16 x .16  with and without MUL
 271 *******************************************************/
 272
 273 #if defined (L_mulhq3)
 274 ;;; Same code with and without MUL, but the interfaces differ:
 275 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
 276 ;;;         Clobbers: ABI, called by optabs
 277 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 278 ;;;         Clobbers: __tmp_reg__, R22, R23
 279 ;;; Rounding:  -0.5 LSB  <= error  <=  0.5 LSB
 280 DEFUN   __mulhq3
 281     XCALL   __mulhisi3
 282     ;; Shift result into place
 283     lsl     r23
 284     rol     r24
 285     rol     r25
 286     brvs    1f
 287     ;; Round
 288     sbrc    r23, 7
 289     adiw    r24, 1
 290     ret
 291 1:  ;; Overflow.  TR 18037 requires  (-1)^2  not to overflow
 292     ldi     r24, lo8 (0x7fff)
 293     ldi     r25, hi8 (0x7fff)
 294     ret
 295 ENDF __mulhq3
 296 #endif  /* defined (L_mulhq3) */
 297
 298 #if defined (L_muluhq3)
 299 ;;; Same code with and without MUL, but the interfaces differ:
 300 ;;; no MUL: (R25:R24) *= (R23:R22)
 301 ;;;         Clobbers: ABI, called by optabs
 302 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 303 ;;;         Clobbers: __tmp_reg__, R22, R23
 304 ;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
 305 DEFUN   __muluhq3
 306     XCALL   __umulhisi3
 307     ;; Round
 308     sbrc    r23, 7
 309     adiw    r24, 1
 310     ret
 311 ENDF __muluhq3
 312 #endif  /* L_muluhq3 */
 313
 314
 315 /*******************************************************
 316     Fixed  Multiply  8.8 x 8.8  with and without MUL
 317 *******************************************************/
 318
 319 #if defined (L_mulha3)
 320 ;;; Same code with and without MUL, but the interfaces differ:
 321 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
 322 ;;;         Clobbers: ABI, called by optabs
 323 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 324 ;;;         Clobbers: __tmp_reg__, R22, R23
 325 ;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
 326 DEFUN   __mulha3
 327     XCALL   __mulhisi3
 328     XJMP    __muluha3_round
 329 ENDF __mulha3
 330 #endif  /* L_mulha3 */
 331
 332 #if defined (L_muluha3)
 333 ;;; Same code with and without MUL, but the interfaces differ:
 334 ;;; no MUL: (R25:R24) *= (R23:R22)
 335 ;;;         Clobbers: ABI, called by optabs
 336 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 337 ;;;         Clobbers: __tmp_reg__, R22, R23
 338 ;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
 339 DEFUN   __muluha3
 340     XCALL   __umulhisi3
 341     XJMP    __muluha3_round
 342 ENDF __muluha3
 343 #endif  /* L_muluha3 */
 344
 345 #if defined (L_muluha3_round)
 346 DEFUN   __muluha3_round
 347     ;; Shift result into place
 348     mov     r25, r24
 349     mov     r24, r23
 350     ;; Round
 351     sbrc    r22, 7
 352     adiw    r24, 1
 353     ret
 354 ENDF __muluha3_round
 355 #endif  /* L_muluha3_round */
 356
 357
 358 /*******************************************************
 359     Fixed  Multiplication  16.16 x 16.16
 360 *******************************************************/
 361
 362 #if defined (__AVR_HAVE_MUL__)
 363
 364 ;; Multiplier
 365 #define A0  16
 366 #define A1  A0+1
 367 #define A2  A1+1
 368 #define A3  A2+1
 369
 370 ;; Multiplicand
 371 #define B0  20
 372 #define B1  B0+1
 373 #define B2  B1+1
 374 #define B3  B2+1
 375
 376 ;; Result
 377 #define C0  24
 378 #define C1  C0+1
 379 #define C2  C1+1
 380 #define C3  C2+1
 381
 382 #if defined (L_mulusa3)
 383 ;;; (C3:C0) = (A3:A0) * (B3:B0)
 384 ;;; Clobbers: __tmp_reg__
 385 ;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
 386 DEFUN   __mulusa3
 387     ;; Some of the MUL instructions have LSBs outside the result.
 388     ;; Don't ignore these LSBs in order to tame rounding error.
 389     ;; Use C2/C3 for these LSBs.
 390
 391     clr C0
 392     clr C1
 393     mul A0, B0  $  movw C2, r0
 394
 395     mul A1, B0  $  add  C3, r0  $  adc C0, r1
 396     mul A0, B1  $  add  C3, r0  $  adc C0, r1  $  rol C1
 397
 398     ;; Round
 399     sbrc C3, 7
 400     adiw C0, 1
 401
 402     ;; The following MULs don't have LSBs outside the result.
 403     ;; C2/C3 is the high part.
 404
 405     mul  A0, B2  $  add C0, r0  $  adc C1, r1  $  sbc  C2, C2
 406     mul  A1, B1  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
 407     mul  A2, B0  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
 408     neg  C2
 409
 410     mul  A0, B3  $  add C1, r0  $  adc C2, r1  $  sbc  C3, C3
 411     mul  A1, B2  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
 412     mul  A2, B1  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
 413     mul  A3, B0  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
 414     neg  C3
 415
 416     mul  A1, B3  $  add C2, r0  $  adc C3, r1
 417     mul  A2, B2  $  add C2, r0  $  adc C3, r1
 418     mul  A3, B1  $  add C2, r0  $  adc C3, r1
 419
 420     mul  A2, B3  $  add C3, r0
 421     mul  A3, B2  $  add C3, r0
 422
 423     clr  __zero_reg__
 424     ret
 425 ENDF __mulusa3
 426 #endif /* L_mulusa3 */
 427
 428 #if defined (L_mulsa3)
 429 ;;; (C3:C0) = (A3:A0) * (B3:B0)
 430 ;;; Clobbers: __tmp_reg__
 431 ;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
 432 DEFUN __mulsa3
 433     XCALL   __mulusa3
 434     tst     B3
 435     brpl    1f
 436     sub     C2, A0
 437     sbc     C3, A1
 438 1:  sbrs    A3, 7
 439     ret
 440     sub     C2, B0
 441     sbc     C3, B1
 442     ret
 443 ENDF __mulsa3
 444 #endif /* L_mulsa3 */
 445
 446 #undef A0
 447 #undef A1
 448 #undef A2
 449 #undef A3
 450 #undef B0
 451 #undef B1
 452 #undef B2
 453 #undef B3
 454 #undef C0
 455 #undef C1
 456 #undef C2
 457 #undef C3
 458
 459 #else /* __AVR_HAVE_MUL__ */
 460
 461 #define A0 18
 462 #define A1 A0+1
 463 #define A2 A0+2
 464 #define A3 A0+3
 465
 466 #define B0 22
 467 #define B1 B0+1
 468 #define B2 B0+2
 469 #define B3 B0+3
 470
 471 #define C0  22
 472 #define C1  C0+1
 473 #define C2  C0+2
 474 #define C3  C0+3
 475
 476 ;; __tmp_reg__
 477 #define CC0  0
 478 ;; __zero_reg__
 479 #define CC1  1
 480 #define CC2  16
 481 #define CC3  17
 482
 483 #define AA0  26
 484 #define AA1  AA0+1
 485 #define AA2  30
 486 #define AA3  AA2+1
 487
 488 #if defined (L_mulsa3)
 489 ;;; (R25:R22)  *=  (R21:R18)
 490 ;;; Clobbers: ABI, called by optabs
 491 ;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
 492 DEFUN   __mulsa3
 493     push    B0
 494     push    B1
 495     bst     B3, 7
 496     XCALL   __mulusa3
 497     ;; A survived in  31:30:27:26
 498     rcall 1f
 499     pop     AA1
 500     pop     AA0
 501     bst     AA3, 7
 502 1:  brtc  9f
 503     ;; 1-extend A/B
 504     sub     C2, AA0
 505     sbc     C3, AA1
 506 9:  ret
 507 ENDF __mulsa3
 508 #endif  /* L_mulsa3 */
 509
 510 #if defined (L_mulusa3)
 511 ;;; (R25:R22)  *=  (R21:R18)
 512 ;;; Clobbers: ABI, called by optabs and __mulsua
 513 ;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
 514 ;;; Does not clobber T and A[] survives in 26, 27, 30, 31
 515 DEFUN   __mulusa3
 516     push    CC2
 517     push    CC3
 518     ; clear result
 519     clr     __tmp_reg__
 520     wmov    CC2, CC0
 521     ; save multiplicand
 522     wmov    AA0, A0
 523     wmov    AA2, A2
 524     rjmp 3f
 525
 526     ;; Loop the integral part
 527
 528 1:  ;; CC += A * 2^n;  n >= 0
 529     add  CC0,A0  $  adc CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
 530
 531 2:  ;; A <<= 1
 532     lsl  A0      $  rol A1      $  rol  A2      $  rol  A3
 533
 534 3:  ;; IBIT(B) >>= 1
 535     ;; Carry = n-th bit of B;  n >= 0
 536     lsr     B3
 537     ror     B2
 538     brcs 1b
 539     sbci    B3, 0
 540     brne 2b
 541
 542     ;; Loop the fractional part
 543     ;; B2/B3 is 0 now, use as guard bits for rounding
 544     ;; Restore multiplicand
 545     wmov    A0, AA0
 546     wmov    A2, AA2
 547     rjmp 5f
 548
 549 4:  ;; CC += A:Guard * 2^n;  n < 0
 550     add  B3,B2 $  adc  CC0,A0  $  adc  CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
 551 5:
 552     ;; A:Guard >>= 1
 553     lsr  A3   $  ror  A2  $  ror  A1  $  ror   A0  $   ror  B2
 554
 555     ;; FBIT(B) <<= 1
 556     ;; Carry = n-th bit of B;  n < 0
 557     lsl     B0
 558     rol     B1
 559     brcs 4b
 560     sbci    B0, 0
 561     brne 5b
 562
 563     ;; Move result into place and round
 564     lsl     B3
 565     wmov    C2, CC2
 566     wmov    C0, CC0
 567     clr     __zero_reg__
 568     adc     C0, __zero_reg__
 569     adc     C1, __zero_reg__
 570     adc     C2, __zero_reg__
 571     adc     C3, __zero_reg__
 572
 573     ;; Epilogue
 574     pop     CC3
 575     pop     CC2
 576     ret
 577 ENDF __mulusa3
 578 #endif  /* L_mulusa3 */
 579
 580 #undef A0
 581 #undef A1
 582 #undef A2
 583 #undef A3
 584 #undef B0
 585 #undef B1
 586 #undef B2
 587 #undef B3
 588 #undef C0
 589 #undef C1
 590 #undef C2
 591 #undef C3
 592 #undef AA0
 593 #undef AA1
 594 #undef AA2
 595 #undef AA3
 596 #undef CC0
 597 #undef CC1
 598 #undef CC2
 599 #undef CC3
 600
 601 #endif /* __AVR_HAVE_MUL__ */
 602
 603 /*******************************************************
 604       Fractional Division 8 / 8
 605 *******************************************************/
 606
 607 #define r_divd  r25     /* dividend */
 608 #define r_quo   r24     /* quotient */
 609 #define r_div   r22     /* divisor */
 610
 611 #if defined (L_divqq3)
 612 DEFUN   __divqq3
 613     mov     r0, r_divd
 614     eor     r0, r_div
 615     sbrc    r_div, 7
 616     neg     r_div
 617     sbrc    r_divd, 7
 618     neg     r_divd
 619     cp      r_divd, r_div
 620     breq    __divqq3_minus1  ; if equal return -1
 621     XCALL   __udivuqq3
 622     lsr     r_quo
 623     sbrc    r0, 7   ; negate result if needed
 624     neg     r_quo
 625     ret
 626 __divqq3_minus1:
 627     ldi     r_quo, 0x80
 628     ret
 629 ENDF __divqq3
 630 #endif  /* defined (L_divqq3) */
 631
 632 #if defined (L_udivuqq3)
 633 DEFUN   __udivuqq3
 634     clr     r_quo           ; clear quotient
 635     inc     __zero_reg__    ; init loop counter, used per shift
 636 __udivuqq3_loop:
 637     lsl     r_divd          ; shift dividend
 638     brcs    0f              ; dividend overflow
 639     cp      r_divd,r_div    ; compare dividend & divisor
 640     brcc    0f              ; dividend >= divisor
 641     rol     r_quo           ; shift quotient (with CARRY)
 642     rjmp    __udivuqq3_cont
 643 0:
 644     sub     r_divd,r_div    ; restore dividend
 645     lsl     r_quo           ; shift quotient (without CARRY)
 646 __udivuqq3_cont:
 647     lsl     __zero_reg__    ; shift loop-counter bit
 648     brne    __udivuqq3_loop
 649     com     r_quo           ; complement result
 650                             ; because C flag was complemented in loop
 651     ret
 652 ENDF __udivuqq3
 653 #endif  /* defined (L_udivuqq3) */
 654
 655 #undef  r_divd
 656 #undef  r_quo
 657 #undef  r_div
 658
 659
 660 /*******************************************************
 661     Fractional Division 16 / 16
 662 *******************************************************/
 663 #define r_divdL 26     /* dividend Low */
 664 #define r_divdH 27     /* dividend Hig */
 665 #define r_quoL  24     /* quotient Low */
 666 #define r_quoH  25     /* quotient High */
 667 #define r_divL  22     /* divisor */
 668 #define r_divH  23     /* divisor */
 669 #define r_cnt   21
 670
 671 #if defined (L_divhq3)
 672 DEFUN   __divhq3
 673     mov     r0, r_divdH
 674     eor     r0, r_divH
 675     sbrs    r_divH, 7
 676     rjmp    1f
 677     NEG2    r_divL
 678 1:
 679     sbrs    r_divdH, 7
 680     rjmp    2f
 681     NEG2    r_divdL
 682 2:
 683     cp      r_divdL, r_divL
 684     cpc     r_divdH, r_divH
 685     breq    __divhq3_minus1  ; if equal return -1
 686     XCALL   __udivuhq3
 687     lsr     r_quoH
 688     ror     r_quoL
 689     brpl    9f
 690     ;; negate result if needed
 691     NEG2    r_quoL
 692 9:
 693     ret
 694 __divhq3_minus1:
 695     ldi     r_quoH, 0x80
 696     clr     r_quoL
 697     ret
 698 ENDF __divhq3
 699 #endif  /* defined (L_divhq3) */
 700
 701 #if defined (L_udivuhq3)
 702 DEFUN   __udivuhq3
 703     sub     r_quoH,r_quoH   ; clear quotient and carry
 704     ;; FALLTHRU
 705 ENDF __udivuhq3
 706
 707 DEFUN   __udivuha3_common
 708     clr     r_quoL          ; clear quotient
 709     ldi     r_cnt,16        ; init loop counter
 710 __udivuhq3_loop:
 711     rol     r_divdL         ; shift dividend (with CARRY)
 712     rol     r_divdH
 713     brcs    __udivuhq3_ep   ; dividend overflow
 714     cp      r_divdL,r_divL  ; compare dividend & divisor
 715     cpc     r_divdH,r_divH
 716     brcc    __udivuhq3_ep   ; dividend >= divisor
 717     rol     r_quoL          ; shift quotient (with CARRY)
 718     rjmp    __udivuhq3_cont
 719 __udivuhq3_ep:
 720     sub     r_divdL,r_divL  ; restore dividend
 721     sbc     r_divdH,r_divH
 722     lsl     r_quoL          ; shift quotient (without CARRY)
 723 __udivuhq3_cont:
 724     rol     r_quoH          ; shift quotient
 725     dec     r_cnt           ; decrement loop counter
 726     brne    __udivuhq3_loop
 727     com     r_quoL          ; complement result
 728     com     r_quoH          ; because C flag was complemented in loop
 729     ret
 730 ENDF __udivuha3_common
 731 #endif  /* defined (L_udivuhq3) */
 732
 733 /*******************************************************
 734     Fixed Division 8.8 / 8.8
 735 *******************************************************/
 736 #if defined (L_divha3)
 737 DEFUN   __divha3
 738     mov     r0, r_divdH
 739     eor     r0, r_divH
 740     sbrs    r_divH, 7
 741     rjmp    1f
 742     NEG2    r_divL
 743 1:
 744     sbrs    r_divdH, 7
 745     rjmp    2f
 746     NEG2    r_divdL
 747 2:
 748     XCALL   __udivuha3
 749     sbrs    r0, 7   ; negate result if needed
 750     ret
 751     NEG2    r_quoL
 752     ret
 753 ENDF __divha3
 754 #endif  /* defined (L_divha3) */
 755
 756 #if defined (L_udivuha3)
 757 DEFUN   __udivuha3
 758     mov     r_quoH, r_divdL
 759     mov     r_divdL, r_divdH
 760     clr     r_divdH
 761     lsl     r_quoH     ; shift quotient into carry
 762     XJMP    __udivuha3_common ; same as fractional after rearrange
 763 ENDF __udivuha3
 764 #endif  /* defined (L_udivuha3) */
 765
 766 #undef  r_divdL
 767 #undef  r_divdH
 768 #undef  r_quoL
 769 #undef  r_quoH
 770 #undef  r_divL
 771 #undef  r_divH
 772 #undef  r_cnt
 773
 774 /*******************************************************
 775     Fixed Division 16.16 / 16.16
 776 *******************************************************/
 777
 778 #define r_arg1L  24    /* arg1 gets passed already in place */
 779 #define r_arg1H  25
 780 #define r_arg1HL 26
 781 #define r_arg1HH 27
 782 #define r_divdL  26    /* dividend Low */
 783 #define r_divdH  27
 784 #define r_divdHL 30
 785 #define r_divdHH 31    /* dividend High */
 786 #define r_quoL   22    /* quotient Low */
 787 #define r_quoH   23
 788 #define r_quoHL  24
 789 #define r_quoHH  25    /* quotient High */
 790 #define r_divL   18    /* divisor Low */
 791 #define r_divH   19
 792 #define r_divHL  20
 793 #define r_divHH  21    /* divisor High */
 794 #define r_cnt  __zero_reg__  /* loop count (0 after the loop!) */
 795
 796 #if defined (L_divsa3)
 797 DEFUN   __divsa3
 798     mov     r0, r_arg1HH
 799     eor     r0, r_divHH
 800     sbrs    r_divHH, 7
 801     rjmp    1f
 802     NEG4    r_divL
 803 1:
 804     sbrs    r_arg1HH, 7
 805     rjmp    2f
 806     NEG4    r_arg1L
 807 2:
 808     XCALL   __udivusa3
 809     sbrs    r0, 7   ; negate result if needed
 810     ret
 811     ;; negate r_quoL
 812     XJMP    __negsi2
 813 ENDF __divsa3
 814 #endif  /* defined (L_divsa3) */
 815
 816 #if defined (L_udivusa3)
 817 DEFUN   __udivusa3
 818     ldi     r_divdHL, 32    ; init loop counter
 819     mov     r_cnt, r_divdHL
 820     clr     r_divdHL
 821     clr     r_divdHH
 822     wmov    r_quoL, r_divdHL
 823     lsl     r_quoHL         ; shift quotient into carry
 824     rol     r_quoHH
 825 __udivusa3_loop:
 826     rol     r_divdL         ; shift dividend (with CARRY)
 827     rol     r_divdH
 828     rol     r_divdHL
 829     rol     r_divdHH
 830     brcs    __udivusa3_ep   ; dividend overflow
 831     cp      r_divdL,r_divL  ; compare dividend & divisor
 832     cpc     r_divdH,r_divH
 833     cpc     r_divdHL,r_divHL
 834     cpc     r_divdHH,r_divHH
 835     brcc    __udivusa3_ep   ; dividend >= divisor
 836     rol     r_quoL          ; shift quotient (with CARRY)
 837     rjmp    __udivusa3_cont
 838 __udivusa3_ep:
 839     sub     r_divdL,r_divL  ; restore dividend
 840     sbc     r_divdH,r_divH
 841     sbc     r_divdHL,r_divHL
 842     sbc     r_divdHH,r_divHH
 843     lsl     r_quoL          ; shift quotient (without CARRY)
 844 __udivusa3_cont:
 845     rol     r_quoH          ; shift quotient
 846     rol     r_quoHL
 847     rol     r_quoHH
 848     dec     r_cnt           ; decrement loop counter
 849     brne    __udivusa3_loop
 850     com     r_quoL          ; complement result
 851     com     r_quoH          ; because C flag was complemented in loop
 852     com     r_quoHL
 853     com     r_quoHH
 854     ret
 855 ENDF __udivusa3
 856 #endif  /* defined (L_udivusa3) */
 857
 858 #undef  r_arg1L
 859 #undef  r_arg1H
 860 #undef  r_arg1HL
 861 #undef  r_arg1HH
 862 #undef  r_divdL
 863 #undef  r_divdH
 864 #undef  r_divdHL
 865 #undef  r_divdHH
 866 #undef  r_quoL
 867 #undef  r_quoH
 868 #undef  r_quoHL
 869 #undef  r_quoHH
 870 #undef  r_divL
 871 #undef  r_divH
 872 #undef  r_divHL
 873 #undef  r_divHH
 874 #undef  r_cnt
 875
 876 \f
 877 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 878 ;; Saturation, 2 Bytes
 879 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 880
 881 ;; First Argument and Return Register
 882 #define A0  24
 883 #define A1  A0+1
 884
 885 #if defined (L_ssneg_2)
 886 DEFUN __ssneg_2
 887     NEG2    A0
 888     brvc 0f
 889     sbiw    A0, 1
 890 0:  ret
 891 ENDF __ssneg_2
 892 #endif /* L_ssneg_2 */
 893
 894 #if defined (L_ssabs_2)
 895 DEFUN __ssabs_2
 896     sbrs    A1, 7
 897     ret
 898     XJMP    __ssneg_2
 899 ENDF __ssabs_2
 900 #endif /* L_ssabs_2 */
 901
 902 #undef A0
 903 #undef A1
 904
 905
 906 \f
 907 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 908 ;; Saturation, 4 Bytes
 909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 910
 911 ;; First Argument and Return Register
 912 #define A0  22
 913 #define A1  A0+1
 914 #define A2  A0+2
 915 #define A3  A0+3
 916
 917 #if defined (L_ssneg_4)
 918 DEFUN __ssneg_4
 919     XCALL   __negsi2
 920     brvc 0f
 921     ldi     A3, 0x7f
 922     ldi     A2, 0xff
 923     ldi     A1, 0xff
 924     ldi     A0, 0xff
 925 0:  ret
 926 ENDF __ssneg_4
 927 #endif /* L_ssneg_4 */
 928
 929 #if defined (L_ssabs_4)
 930 DEFUN __ssabs_4
 931     sbrs    A3, 7
 932     ret
 933     XJMP    __ssneg_4
 934 ENDF __ssabs_4
 935 #endif /* L_ssabs_4 */
 936
 937 #undef A0
 938 #undef A1
 939 #undef A2
 940 #undef A3
 941
 942
 943 \f
 944 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 945 ;; Saturation, 8 Bytes
 946 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 947
 948 ;; First Argument and Return Register
 949 #define A0  18
 950 #define A1  A0+1
 951 #define A2  A0+2
 952 #define A3  A0+3
 953 #define A4  A0+4
 954 #define A5  A0+5
 955 #define A6  A0+6
 956 #define A7  A0+7
 957
 958 #if defined (L_clr_8)
 959 FALIAS __usneguta2
 960 FALIAS __usneguda2
 961 FALIAS __usnegudq2
 962
 963 ;; Clear Carry and all Bytes
 964 DEFUN __clr_8
 965     ;; Clear Carry and set Z
 966     sub     A7, A7
 967     ;; FALLTHRU
 968 ENDF  __clr_8
 969 ;; Propagate Carry to all Bytes, Carry unaltered
 970 DEFUN __sbc_8
 971     sbc     A7, A7
 972     sbc     A6, A6
 973     wmov    A4, A6
 974     wmov    A2, A6
 975     wmov    A0, A6
 976     ret
 977 ENDF __sbc_8
 978 #endif /* L_clr_8 */
 979
 980 #if defined (L_ssneg_8)
 981 FALIAS __ssnegta2
 982 FALIAS __ssnegda2
 983 FALIAS __ssnegdq2
 984
 985 DEFUN __ssneg_8
 986     XCALL   __negdi2
 987     brvc 0f
 988     ;; A[] = 0x7fffffff
 989     sec
 990     XCALL   __sbc_8
 991     ldi     A7, 0x7f
 992 0:  ret
 993 ENDF __ssneg_8
 994 #endif /* L_ssneg_8 */
 995
 996 #if defined (L_ssabs_8)
 997 FALIAS __ssabsta2
 998 FALIAS __ssabsda2
 999 FALIAS __ssabsdq2
1000
1001 DEFUN __ssabs_8
1002     sbrs    A7, 7
1003     ret
1004     XJMP    __ssneg_8
1005 ENDF __ssabs_8
1006 #endif /* L_ssabs_8 */
1007
1008 ;; Second Argument
1009 #define B0  10
1010 #define B1  B0+1
1011 #define B2  B0+2
1012 #define B3  B0+3
1013 #define B4  B0+4
1014 #define B5  B0+5
1015 #define B6  B0+6
1016 #define B7  B0+7
1017
1018 #if defined (L_usadd_8)
1019 FALIAS __usadduta3
1020 FALIAS __usadduda3
1021 FALIAS __usaddudq3
1022
1023 DEFUN __usadd_8
1024     XCALL   __adddi3
1025     brcs 0f
1026     ret
1027         ;; A[] = 0xffffffff
1028 0:  XJMP    __sbc_8
1029 ENDF __usadd_8
1030 #endif /* L_usadd_8 */
1031
1032 #if defined (L_ussub_8)
1033 FALIAS __ussubuta3
1034 FALIAS __ussubuda3
1035 FALIAS __ussubudq3
1036
1037 DEFUN __ussub_8
1038     XCALL   __subdi3
1039     brcs 0f
1040     ret
1041         ;; A[] = 0
1042 0:  XJMP    __clr_8
1043 ENDF __ussub_8
1044 #endif /* L_ussub_8 */
1045
1046 #if defined (L_ssadd_8)
1047 FALIAS __ssaddta3
1048 FALIAS __ssaddda3
1049 FALIAS __ssadddq3
1050
1051 DEFUN __ssadd_8
1052     ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
1053     XCALL   __adddi3
1054     brvc 0f
1055     cpi     B7, 0x80
1056     XCALL   __sbc_8
1057     subi    A7, 0x80
1058 0:  ret
1059 ENDF __ssadd_8
1060 #endif /* L_ssadd_8 */
1061
1062 #if defined (L_sssub_8)
1063 FALIAS __sssubta3
1064 FALIAS __sssubda3
1065 FALIAS __sssubdq3
1066
1067 DEFUN __sssub_8
1068     XCALL   __subdi3
1069     brvc 0f
1070         ;; A = (B < 0) ? INT64_MAX : INT64_MIN
1071     ldi     A7, 0x7f
1072     cp      A7, B7
1073     XCALL   __sbc_8
1074     subi    A7, 0x80
1075 0:  ret
1076 ENDF __sssub_8
1077 #endif /* L_sssub_8 */
1078
1079 #undef A0
1080 #undef A1
1081 #undef A2
1082 #undef A3
1083 #undef A4
1084 #undef A5
1085 #undef A6
1086 #undef A7
1087 #undef B0
1088 #undef B1
1089 #undef B2
1090 #undef B3
1091 #undef B4
1092 #undef B5
1093 #undef B6
1094 #undef B7