libgcc/config/avr/lib1funcs-fixed.S

   1 /*  -*- Mode: Asm -*-  */
   2 ;;    Copyright (C) 2012-2014 Free Software Foundation, Inc.
   3 ;;    Contributed by Sean D'Epagnier  (sean@depagnier.com)
   4 ;;                   Georg-Johann Lay (avr@gjlay.de)
   5
   6 ;; This file is free software; you can redistribute it and/or modify it
   7 ;; under the terms of the GNU General Public License as published by the
   8 ;; Free Software Foundation; either version 3, or (at your option) any
   9 ;; later version.
  10
  11 ;; In addition to the permissions in the GNU General Public License, the
  12 ;; Free Software Foundation gives you unlimited permission to link the
  13 ;; compiled version of this file into combinations with other programs,
  14 ;; and to distribute those combinations without any restriction coming
  15 ;; from the use of this file.  (The General Public License restrictions
  16 ;; do apply in other respects; for example, they cover modification of
  17 ;; the file, and distribution when not linked into a combine
  18 ;; executable.)
  19
  20 ;; This file is distributed in the hope that it will be useful, but
  21 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23 ;; General Public License for more details.
  24
  25 ;; You should have received a copy of the GNU General Public License
  26 ;; along with this program; see the file COPYING.  If not, write to
  27 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  28 ;; Boston, MA 02110-1301, USA.
  29
  30 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  31 ;; Fixed point library routines for AVR
  32 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  33
  34 .section .text.libgcc.fixed, "ax", @progbits
  35
  36 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  37 ;; Conversions to float
  38 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  39
  40 #if defined (L_fractqqsf)
  41 DEFUN __fractqqsf
  42     ;; Move in place for SA -> SF conversion
  43     clr     r22
  44     mov     r23, r24
  45     ;; Sign-extend
  46     lsl     r24
  47     sbc     r24, r24
  48     mov     r25, r24
  49     XJMP    __fractsasf
  50 ENDF __fractqqsf
  51 #endif  /* L_fractqqsf */
  52
  53 #if defined (L_fractuqqsf)
  54 DEFUN __fractuqqsf
  55     ;; Move in place for USA -> SF conversion
  56     clr     r22
  57     mov     r23, r24
  58     ;; Zero-extend
  59     clr     r24
  60     clr     r25
  61     XJMP    __fractusasf
  62 ENDF __fractuqqsf
  63 #endif  /* L_fractuqqsf */
  64
  65 #if defined (L_fracthqsf)
  66 DEFUN __fracthqsf
  67     ;; Move in place for SA -> SF conversion
  68     wmov    22, 24
  69     ;; Sign-extend
  70     lsl     r25
  71     sbc     r24, r24
  72     mov     r25, r24
  73     XJMP    __fractsasf
  74 ENDF __fracthqsf
  75 #endif  /* L_fracthqsf */
  76
  77 #if defined (L_fractuhqsf)
  78 DEFUN __fractuhqsf
  79     ;; Move in place for USA -> SF conversion
  80     wmov    22, 24
  81     ;; Zero-extend
  82     clr     r24
  83     clr     r25
  84     XJMP    __fractusasf
  85 ENDF __fractuhqsf
  86 #endif  /* L_fractuhqsf */
  87
  88 #if defined (L_fracthasf)
  89 DEFUN __fracthasf
  90     ;; Move in place for SA -> SF conversion
  91     clr     r22
  92     mov     r23, r24
  93     mov     r24, r25
  94     ;; Sign-extend
  95     lsl     r25
  96     sbc     r25, r25
  97     XJMP    __fractsasf
  98 ENDF __fracthasf
  99 #endif  /* L_fracthasf */
 100
 101 #if defined (L_fractuhasf)
 102 DEFUN __fractuhasf
 103     ;; Move in place for USA -> SF conversion
 104     clr     r22
 105     mov     r23, r24
 106     mov     r24, r25
 107     ;; Zero-extend
 108     clr     r25
 109     XJMP    __fractusasf
 110 ENDF __fractuhasf
 111 #endif  /* L_fractuhasf */
 112
 113
 114 #if defined (L_fractsqsf)
 115 DEFUN __fractsqsf
 116     XCALL   __floatsisf
 117     ;; Divide non-zero results by 2^31 to move the
 118     ;; decimal point into place
 119     tst     r25
 120     breq    0f
 121     subi    r24, exp_lo (31)
 122     sbci    r25, exp_hi (31)
 123 0:  ret
 124 ENDF __fractsqsf
 125 #endif  /* L_fractsqsf */
 126
 127 #if defined (L_fractusqsf)
 128 DEFUN __fractusqsf
 129     XCALL   __floatunsisf
 130     ;; Divide non-zero results by 2^32 to move the
 131     ;; decimal point into place
 132     cpse    r25, __zero_reg__
 133     subi    r25, exp_hi (32)
 134     ret
 135 ENDF __fractusqsf
 136 #endif  /* L_fractusqsf */
 137
 138 #if defined (L_fractsasf)
 139 DEFUN __fractsasf
 140     XCALL   __floatsisf
 141     ;; Divide non-zero results by 2^15 to move the
 142     ;; decimal point into place
 143     tst     r25
 144     breq    0f
 145     subi    r24, exp_lo (15)
 146     sbci    r25, exp_hi (15)
 147 0:  ret
 148 ENDF __fractsasf
 149 #endif  /* L_fractsasf */
 150
 151 #if defined (L_fractusasf)
 152 DEFUN __fractusasf
 153     XCALL   __floatunsisf
 154     ;; Divide non-zero results by 2^16 to move the
 155     ;; decimal point into place
 156     cpse    r25, __zero_reg__
 157     subi    r25, exp_hi (16)
 158     ret
 159 ENDF __fractusasf
 160 #endif  /* L_fractusasf */
 161
 162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 163 ;; Conversions from float
 164 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 165
 166 #if defined (L_fractsfqq)
 167 DEFUN __fractsfqq
 168     ;; Multiply with 2^{24+7} to get a QQ result in r25
 169     subi    r24, exp_lo (-31)
 170     sbci    r25, exp_hi (-31)
 171     XCALL   __fixsfsi
 172     mov     r24, r25
 173     ret
 174 ENDF __fractsfqq
 175 #endif  /* L_fractsfqq */
 176
 177 #if defined (L_fractsfuqq)
 178 DEFUN __fractsfuqq
 179     ;; Multiply with 2^{24+8} to get a UQQ result in r25
 180     subi    r25, exp_hi (-32)
 181     XCALL   __fixunssfsi
 182     mov     r24, r25
 183     ret
 184 ENDF __fractsfuqq
 185 #endif  /* L_fractsfuqq */
 186
 187 #if defined (L_fractsfha)
 188 DEFUN __fractsfha
 189     ;; Multiply with 2^{16+7} to get a HA result in r25:r24
 190     subi    r24, exp_lo (-23)
 191     sbci    r25, exp_hi (-23)
 192     XJMP    __fixsfsi
 193 ENDF __fractsfha
 194 #endif  /* L_fractsfha */
 195
 196 #if defined (L_fractsfuha)
 197 DEFUN __fractsfuha
 198     ;; Multiply with 2^24 to get a UHA result in r25:r24
 199     subi    r25, exp_hi (-24)
 200     XJMP    __fixunssfsi
 201 ENDF __fractsfuha
 202 #endif  /* L_fractsfuha */
 203
 204 #if defined (L_fractsfhq)
 205 FALIAS __fractsfsq
 206
 207 DEFUN __fractsfhq
 208     ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
 209     ;; resp. with 2^31 to get a SQ result in r25:r22
 210     subi    r24, exp_lo (-31)
 211     sbci    r25, exp_hi (-31)
 212     XJMP    __fixsfsi
 213 ENDF __fractsfhq
 214 #endif  /* L_fractsfhq */
 215
 216 #if defined (L_fractsfuhq)
 217 FALIAS __fractsfusq
 218
 219 DEFUN __fractsfuhq
 220     ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
 221     ;; resp. with 2^32 to get a USQ result in r25:r22
 222     subi    r25, exp_hi (-32)
 223     XJMP    __fixunssfsi
 224 ENDF __fractsfuhq
 225 #endif  /* L_fractsfuhq */
 226
 227 #if defined (L_fractsfsa)
 228 DEFUN __fractsfsa
 229     ;; Multiply with 2^15 to get a SA result in r25:r22
 230     subi    r24, exp_lo (-15)
 231     sbci    r25, exp_hi (-15)
 232     XJMP    __fixsfsi
 233 ENDF __fractsfsa
 234 #endif  /* L_fractsfsa */
 235
 236 #if defined (L_fractsfusa)
 237 DEFUN __fractsfusa
 238     ;; Multiply with 2^16 to get a USA result in r25:r22
 239     subi    r25, exp_hi (-16)
 240     XJMP    __fixunssfsi
 241 ENDF __fractsfusa
 242 #endif  /* L_fractsfusa */
 243
 244
 245 ;; For multiplication the functions here are called directly from
 246 ;; avr-fixed.md instead of using the standard libcall mechanisms.
 247 ;; This can make better code because GCC knows exactly which
 248 ;; of the call-used registers (not all of them) are clobbered.  */
 249
 250 /*******************************************************
 251     Fractional  Multiplication  8 x 8  without MUL
 252 *******************************************************/
 253
 254 #if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
 255 ;;; R23 = R24 * R25
 256 ;;; Clobbers: __tmp_reg__, R22, R24, R25
 257 ;;; Rounding: ???
 258 DEFUN __mulqq3
 259     XCALL   __fmuls
 260     ;; TR 18037 requires that  (-1) * (-1)  does not overflow
 261     ;; The only input that can produce  -1  is  (-1)^2.
 262     dec     r23
 263     brvs    0f
 264     inc     r23
 265 0:  ret
 266 ENDF  __mulqq3
 267 #endif /* L_mulqq3 && ! HAVE_MUL */
 268
 269 /*******************************************************
 270     Fractional Multiply  .16 x .16  with and without MUL
 271 *******************************************************/
 272
 273 #if defined (L_mulhq3)
 274 ;;; Same code with and without MUL, but the interfaces differ:
 275 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
 276 ;;;         Clobbers: ABI, called by optabs
 277 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 278 ;;;         Clobbers: __tmp_reg__, R22, R23
 279 ;;; Rounding:  -0.5 LSB  <= error  <=  0.5 LSB
 280 DEFUN   __mulhq3
 281     XCALL   __mulhisi3
 282     ;; Shift result into place
 283     lsl     r23
 284     rol     r24
 285     rol     r25
 286     brvs    1f
 287     ;; Round
 288     sbrc    r23, 7
 289     adiw    r24, 1
 290     ret
 291 1:  ;; Overflow.  TR 18037 requires  (-1)^2  not to overflow
 292     ldi     r24, lo8 (0x7fff)
 293     ldi     r25, hi8 (0x7fff)
 294     ret
 295 ENDF __mulhq3
 296 #endif  /* defined (L_mulhq3) */
 297
 298 #if defined (L_muluhq3)
 299 ;;; Same code with and without MUL, but the interfaces differ:
 300 ;;; no MUL: (R25:R24) *= (R23:R22)
 301 ;;;         Clobbers: ABI, called by optabs
 302 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 303 ;;;         Clobbers: __tmp_reg__, R22, R23
 304 ;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
 305 DEFUN   __muluhq3
 306     XCALL   __umulhisi3
 307     ;; Round
 308     sbrc    r23, 7
 309     adiw    r24, 1
 310     ret
 311 ENDF __muluhq3
 312 #endif  /* L_muluhq3 */
 313
 314
 315 /*******************************************************
 316     Fixed  Multiply  8.8 x 8.8  with and without MUL
 317 *******************************************************/
 318
 319 #if defined (L_mulha3)
 320 ;;; Same code with and without MUL, but the interfaces differ:
 321 ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
 322 ;;;         Clobbers: ABI, called by optabs
 323 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 324 ;;;         Clobbers: __tmp_reg__, R22, R23
 325 ;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
 326 DEFUN   __mulha3
 327     XCALL   __mulhisi3
 328     lsl     r22
 329     rol     r23
 330     rol     r24
 331     XJMP    __muluha3_round
 332 ENDF __mulha3
 333 #endif  /* L_mulha3 */
 334
 335 #if defined (L_muluha3)
 336 ;;; Same code with and without MUL, but the interfaces differ:
 337 ;;; no MUL: (R25:R24) *= (R23:R22)
 338 ;;;         Clobbers: ABI, called by optabs
 339 ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
 340 ;;;         Clobbers: __tmp_reg__, R22, R23
 341 ;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
 342 DEFUN   __muluha3
 343     XCALL   __umulhisi3
 344     XJMP    __muluha3_round
 345 ENDF __muluha3
 346 #endif  /* L_muluha3 */
 347
 348 #if defined (L_muluha3_round)
 349 DEFUN   __muluha3_round
 350     ;; Shift result into place
 351     mov     r25, r24
 352     mov     r24, r23
 353     ;; Round
 354     sbrc    r22, 7
 355     adiw    r24, 1
 356     ret
 357 ENDF __muluha3_round
 358 #endif  /* L_muluha3_round */
 359
 360
 361 /*******************************************************
 362     Fixed  Multiplication  16.16 x 16.16
 363 *******************************************************/
 364
 365 ;; Bits outside the result (below LSB), used in the signed version
 366 #define GUARD __tmp_reg__
 367
 368 #if defined (__AVR_HAVE_MUL__)
 369
 370 ;; Multiplier
 371 #define A0  16
 372 #define A1  A0+1
 373 #define A2  A1+1
 374 #define A3  A2+1
 375
 376 ;; Multiplicand
 377 #define B0  20
 378 #define B1  B0+1
 379 #define B2  B1+1
 380 #define B3  B2+1
 381
 382 ;; Result
 383 #define C0  24
 384 #define C1  C0+1
 385 #define C2  C1+1
 386 #define C3  C2+1
 387
 388 #if defined (L_mulusa3)
 389 ;;; (C3:C0) = (A3:A0) * (B3:B0)
 390 DEFUN __mulusa3
 391     set
 392     ;; Fallthru
 393 ENDF  __mulusa3
 394
 395 ;;; Round for last digit iff T = 1
 396 ;;; Return guard bits in GUARD (__tmp_reg__).
 397 ;;; Rounding, T = 0:  -1.0 LSB  <  error  <=  0   LSB
 398 ;;; Rounding, T = 1:  -0.5 LSB  <  error  <=  0.5 LSB
 399 DEFUN __mulusa3_round
 400     ;; Some of the MUL instructions have LSBs outside the result.
 401     ;; Don't ignore these LSBs in order to tame rounding error.
 402     ;; Use C2/C3 for these LSBs.
 403
 404     clr C0
 405     clr C1
 406     mul A0, B0  $  movw C2, r0
 407
 408     mul A1, B0  $  add  C3, r0  $  adc C0, r1
 409     mul A0, B1  $  add  C3, r0  $  adc C0, r1  $  rol C1
 410
 411     ;; Round if T = 1.  Store guarding bits outside the result for rounding
 412     ;; and left-shift by the signed version (function below).
 413     brtc 0f
 414     sbrc C3, 7
 415     adiw C0, 1
 416 0:  push C3
 417
 418     ;; The following MULs don't have LSBs outside the result.
 419     ;; C2/C3 is the high part.
 420
 421     mul  A0, B2  $  add C0, r0  $  adc C1, r1  $  sbc  C2, C2
 422     mul  A1, B1  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
 423     mul  A2, B0  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
 424     neg  C2
 425
 426     mul  A0, B3  $  add C1, r0  $  adc C2, r1  $  sbc  C3, C3
 427     mul  A1, B2  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
 428     mul  A2, B1  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
 429     mul  A3, B0  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
 430     neg  C3
 431
 432     mul  A1, B3  $  add C2, r0  $  adc C3, r1
 433     mul  A2, B2  $  add C2, r0  $  adc C3, r1
 434     mul  A3, B1  $  add C2, r0  $  adc C3, r1
 435
 436     mul  A2, B3  $  add C3, r0
 437     mul  A3, B2  $  add C3, r0
 438
 439     ;; Guard bits used in the signed version below.
 440     pop  GUARD
 441     clr  __zero_reg__
 442     ret
 443 ENDF __mulusa3_round
 444 #endif /* L_mulusa3 */
 445
 446 #if defined (L_mulsa3)
 447 ;;; (C3:C0) = (A3:A0) * (B3:B0)
 448 ;;; Clobbers: __tmp_reg__, T
 449 ;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
 450 DEFUN __mulsa3
 451     clt
 452     XCALL   __mulusa3_round
 453     ;; A posteriori sign extension of the operands
 454     tst     B3
 455     brpl 1f
 456     sub     C2, A0
 457     sbc     C3, A1
 458 1:  sbrs    A3, 7
 459     rjmp 2f
 460     sub     C2, B0
 461     sbc     C3, B1
 462 2:
 463     ;;  Shift 1 bit left to adjust for 15 fractional bits
 464     lsl     GUARD
 465     rol     C0
 466     rol     C1
 467     rol     C2
 468     rol     C3
 469     ;; Round last digit
 470     lsl     GUARD
 471     adc     C0, __zero_reg__
 472     adc     C1, __zero_reg__
 473     adc     C2, __zero_reg__
 474     adc     C3, __zero_reg__
 475     ret
 476 ENDF __mulsa3
 477 #endif /* L_mulsa3 */
 478
 479 #undef A0
 480 #undef A1
 481 #undef A2
 482 #undef A3
 483 #undef B0
 484 #undef B1
 485 #undef B2
 486 #undef B3
 487 #undef C0
 488 #undef C1
 489 #undef C2
 490 #undef C3
 491
 492 #else /* __AVR_HAVE_MUL__ */
 493
 494 #define A0 18
 495 #define A1 A0+1
 496 #define A2 A0+2
 497 #define A3 A0+3
 498
 499 #define B0 22
 500 #define B1 B0+1
 501 #define B2 B0+2
 502 #define B3 B0+3
 503
 504 #define C0  22
 505 #define C1  C0+1
 506 #define C2  C0+2
 507 #define C3  C0+3
 508
 509 ;; __tmp_reg__
 510 #define CC0  0
 511 ;; __zero_reg__
 512 #define CC1  1
 513 #define CC2  16
 514 #define CC3  17
 515
 516 #define AA0  26
 517 #define AA1  AA0+1
 518 #define AA2  30
 519 #define AA3  AA2+1
 520
 521 #if defined (L_mulsa3)
 522 ;;; (R25:R22)  *=  (R21:R18)
 523 ;;; Clobbers: ABI, called by optabs
 524 ;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
 525 DEFUN   __mulsa3
 526     push    B0
 527     push    B1
 528     push    B3
 529     clt
 530     XCALL   __mulusa3_round
 531     pop     r30
 532     ;; sign-extend B
 533     bst     r30, 7
 534     brtc 1f
 535     ;; A1, A0 survived in  R27:R26
 536     sub     C2, AA0
 537     sbc     C3, AA1
 538 1:
 539     pop     AA1  ;; B1
 540     pop     AA0  ;; B0
 541
 542     ;; sign-extend A.  A3 survived in  R31
 543     bst     AA3, 7
 544     brtc 2f
 545     sub     C2, AA0
 546     sbc     C3, AA1
 547 2:
 548     ;;  Shift 1 bit left to adjust for 15 fractional bits
 549     lsl     GUARD
 550     rol     C0
 551     rol     C1
 552     rol     C2
 553     rol     C3
 554     ;; Round last digit
 555     lsl     GUARD
 556     adc     C0, __zero_reg__
 557     adc     C1, __zero_reg__
 558     adc     C2, __zero_reg__
 559     adc     C3, __zero_reg__
 560     ret
 561 ENDF __mulsa3
 562 #endif  /* L_mulsa3 */
 563
 564 #if defined (L_mulusa3)
 565 ;;; (R25:R22)  *=  (R21:R18)
 566 ;;; Clobbers: ABI, called by optabs
 567 ;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
 568 DEFUN __mulusa3
 569     set
 570     ;; Fallthru
 571 ENDF  __mulusa3
 572
 573 ;;; A[] survives in 26, 27, 30, 31
 574 ;;; Also used by __mulsa3 with T = 0
 575 ;;; Round if T = 1
 576 ;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
 577 DEFUN __mulusa3_round
 578     push    CC2
 579     push    CC3
 580     ; clear result
 581     clr     __tmp_reg__
 582     wmov    CC2, CC0
 583     ; save multiplicand
 584     wmov    AA0, A0
 585     wmov    AA2, A2
 586     rjmp 3f
 587
 588     ;; Loop the integral part
 589
 590 1:  ;; CC += A * 2^n;  n >= 0
 591     add  CC0,A0  $  adc CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
 592
 593 2:  ;; A <<= 1
 594     lsl  A0      $  rol A1      $  rol  A2      $  rol  A3
 595
 596 3:  ;; IBIT(B) >>= 1
 597     ;; Carry = n-th bit of B;  n >= 0
 598     lsr     B3
 599     ror     B2
 600     brcs 1b
 601     sbci    B3, 0
 602     brne 2b
 603
 604     ;; Loop the fractional part
 605     ;; B2/B3 is 0 now, use as guard bits for rounding
 606     ;; Restore multiplicand
 607     wmov    A0, AA0
 608     wmov    A2, AA2
 609     rjmp 5f
 610
 611 4:  ;; CC += A:Guard * 2^n;  n < 0
 612     add  B3,B2 $  adc  CC0,A0  $  adc  CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
 613 5:
 614     ;; A:Guard >>= 1
 615     lsr  A3   $  ror  A2  $  ror  A1  $  ror   A0  $   ror  B2
 616
 617     ;; FBIT(B) <<= 1
 618     ;; Carry = n-th bit of B;  n < 0
 619     lsl     B0
 620     rol     B1
 621     brcs 4b
 622     sbci    B0, 0
 623     brne 5b
 624
 625     ;; Save guard bits and set carry for rounding
 626     push    B3
 627     lsl     B3
 628     ;; Move result into place
 629     wmov    C2, CC2
 630     wmov    C0, CC0
 631     clr     __zero_reg__
 632     brtc 6f
 633     ;; Round iff T = 1
 634     adc     C0, __zero_reg__
 635     adc     C1, __zero_reg__
 636     adc     C2, __zero_reg__
 637     adc     C3, __zero_reg__
 638 6:
 639     pop     GUARD
 640     ;; Epilogue
 641     pop     CC3
 642     pop     CC2
 643     ret
 644 ENDF __mulusa3_round
 645 #endif  /* L_mulusa3 */
 646
 647 #undef A0
 648 #undef A1
 649 #undef A2
 650 #undef A3
 651 #undef B0
 652 #undef B1
 653 #undef B2
 654 #undef B3
 655 #undef C0
 656 #undef C1
 657 #undef C2
 658 #undef C3
 659 #undef AA0
 660 #undef AA1
 661 #undef AA2
 662 #undef AA3
 663 #undef CC0
 664 #undef CC1
 665 #undef CC2
 666 #undef CC3
 667
 668 #endif /* __AVR_HAVE_MUL__ */
 669
 670 #undef GUARD
 671
 672 /***********************************************************
 673     Fixed  unsigned saturated Multiplication  8.8 x 8.8
 674 ***********************************************************/
 675
 676 #define C0  22
 677 #define C1  C0+1
 678 #define C2  C0+2
 679 #define C3  C0+3
 680 #define SS __tmp_reg__
 681
 682 #if defined (L_usmuluha3)
 683 DEFUN __usmuluha3
 684     ;; Widening multiply
 685 #ifdef __AVR_HAVE_MUL__
 686     ;; Adjust interface
 687     movw    R26, R22
 688     movw    R18, R24
 689 #endif /* HAVE MUL */
 690     XCALL   __umulhisi3
 691     tst     C3
 692     brne .Lmax
 693     ;; Round, target is in C1..C2
 694     lsl     C0
 695     adc     C1, __zero_reg__
 696     adc     C2, __zero_reg__
 697     brcs .Lmax
 698     ;; Move result into place
 699     mov     C3, C2
 700     mov     C2, C1
 701     ret
 702 .Lmax:
 703     ;; Saturate
 704     ldi     C2, 0xff
 705     ldi     C3, 0xff
 706     ret
 707 ENDF  __usmuluha3
 708 #endif /* L_usmuluha3 */
 709
 710 /***********************************************************
 711     Fixed signed saturated Multiplication  s8.7 x s8.7
 712 ***********************************************************/
 713
 714 #if defined (L_ssmulha3)
 715 DEFUN __ssmulha3
 716     ;; Widening multiply
 717 #ifdef __AVR_HAVE_MUL__
 718     ;; Adjust interface
 719     movw    R26, R22
 720     movw    R18, R24
 721 #endif /* HAVE MUL */
 722     XCALL   __mulhisi3
 723     ;; Adjust decimal point
 724     lsl     C0
 725     rol     C1
 726     rol     C2
 727     brvs .LsatC3.3
 728     ;; The 9 MSBs must be the same
 729     rol     C3
 730     sbc     SS, SS
 731     cp      C3, SS
 732     brne .LsatSS
 733     ;; Round
 734     lsl     C0
 735     adc     C1, __zero_reg__
 736     adc     C2, __zero_reg__
 737     brvs .Lmax
 738     ;; Move result into place
 739     mov    C3, C2
 740     mov    C2, C1
 741     ret
 742 .Lmax:
 743     ;; Load 0x7fff
 744     clr     C3
 745 .LsatC3.3:
 746     ;; C3 <  0 -->  0x8000
 747     ;; C3 >= 0 -->  0x7fff
 748     mov     SS, C3
 749 .LsatSS:
 750     ;; Load min / max value:
 751     ;; SS = -1  -->  0x8000
 752     ;; SS =  0  -->  0x7fff
 753     ldi     C3, 0x7f
 754     ldi     C2, 0xff
 755     sbrc    SS, 7
 756     adiw    C2, 1
 757     ret
 758 ENDF  __ssmulha3
 759 #endif /* L_ssmulha3 */
 760
 761 #undef C0
 762 #undef C1
 763 #undef C2
 764 #undef C3
 765 #undef SS
 766
 767 /***********************************************************
 768     Fixed  unsigned saturated Multiplication  16.16 x 16.16
 769 ***********************************************************/
 770
 771 #define C0  18
 772 #define C1  C0+1
 773 #define C2  C0+2
 774 #define C3  C0+3
 775 #define C4  C0+4
 776 #define C5  C0+5
 777 #define C6  C0+6
 778 #define C7  C0+7
 779 #define SS __tmp_reg__
 780
 781 #if defined (L_usmulusa3)
 782 ;; R22[4] = R22[4] *{ssat} R18[4]
 783 ;; Ordinary ABI function
 784 DEFUN __usmulusa3
 785     ;; Widening multiply
 786     XCALL   __umulsidi3
 787     or      C7, C6
 788     brne .Lmax
 789     ;; Round, target is in C2..C5
 790     lsl     C1
 791     adc     C2, __zero_reg__
 792     adc     C3, __zero_reg__
 793     adc     C4, __zero_reg__
 794     adc     C5, __zero_reg__
 795     brcs .Lmax
 796     ;; Move result into place
 797     wmov    C6, C4
 798     wmov    C4, C2
 799     ret
 800 .Lmax:
 801     ;; Saturate
 802     ldi     C7, 0xff
 803     ldi     C6, 0xff
 804     wmov    C4, C6
 805     ret
 806 ENDF  __usmulusa3
 807 #endif /* L_usmulusa3 */
 808
 809 /***********************************************************
 810     Fixed signed saturated Multiplication  s16.15 x s16.15
 811 ***********************************************************/
 812
 813 #if defined (L_ssmulsa3)
 814 ;; R22[4] = R22[4] *{ssat} R18[4]
 815 ;; Ordinary ABI function
 816 DEFUN __ssmulsa3
 817     ;; Widening multiply
 818     XCALL   __mulsidi3
 819     ;; Adjust decimal point
 820     lsl     C1
 821     rol     C2
 822     rol     C3
 823     rol     C4
 824     rol     C5
 825     brvs .LsatC7.7
 826     ;; The 17 MSBs must be the same
 827     rol     C6
 828     rol     C7
 829     sbc     SS, SS
 830     cp      C6, SS
 831     cpc     C7, SS
 832     brne .LsatSS
 833     ;; Round
 834     lsl     C1
 835     adc     C2, __zero_reg__
 836     adc     C3, __zero_reg__
 837     adc     C4, __zero_reg__
 838     adc     C5, __zero_reg__
 839     brvs .Lmax
 840     ;; Move result into place
 841     wmov    C6, C4
 842     wmov    C4, C2
 843     ret
 844
 845 .Lmax:
 846     ;; Load 0x7fffffff
 847     clr     C7
 848 .LsatC7.7:
 849     ;; C7 <  0 -->  0x80000000
 850     ;; C7 >= 0 -->  0x7fffffff
 851     lsl     C7
 852     sbc     SS, SS
 853 .LsatSS:
 854     ;; Load min / max value:
 855     ;; SS = -1  -->  0x80000000
 856     ;; SS =  0  -->  0x7fffffff
 857     com     SS
 858     mov     C4, SS
 859     mov     C5, C4
 860     wmov    C6, C4
 861     subi    C7, 0x80
 862     ret
 863 ENDF  __ssmulsa3
 864 #endif /* L_ssmulsa3 */
 865
 866 #undef C0
 867 #undef C1
 868 #undef C2
 869 #undef C3
 870 #undef C4
 871 #undef C5
 872 #undef C6
 873 #undef C7
 874 #undef SS
 875
 876 /*******************************************************
 877       Fractional Division 8 / 8
 878 *******************************************************/
 879
 880 #define r_divd  r25     /* dividend */
 881 #define r_quo   r24     /* quotient */
 882 #define r_div   r22     /* divisor */
 883 #define r_sign  __tmp_reg__
 884
 885 #if defined (L_divqq3)
 886 DEFUN   __divqq3
 887     mov     r_sign, r_divd
 888     eor     r_sign, r_div
 889     sbrc    r_div, 7
 890     neg     r_div
 891     sbrc    r_divd, 7
 892     neg     r_divd
 893     XCALL   __divqq_helper
 894     lsr     r_quo
 895     sbrc    r_sign, 7   ; negate result if needed
 896     neg     r_quo
 897     ret
 898 ENDF __divqq3
 899 #endif  /* L_divqq3 */
 900
 901 #if defined (L_udivuqq3)
 902 DEFUN   __udivuqq3
 903     cp      r_divd, r_div
 904     brsh    0f
 905     XJMP __divqq_helper
 906     ;; Result is out of [0, 1)  ==>  Return 1 - eps.
 907 0:  ldi     r_quo, 0xff
 908     ret
 909 ENDF __udivuqq3
 910 #endif  /* L_udivuqq3 */
 911
 912
 913 #if defined (L_divqq_helper)
 914 DEFUN   __divqq_helper
 915     clr     r_quo           ; clear quotient
 916     inc     __zero_reg__    ; init loop counter, used per shift
 917 __udivuqq3_loop:
 918     lsl     r_divd          ; shift dividend
 919     brcs    0f              ; dividend overflow
 920     cp      r_divd,r_div    ; compare dividend & divisor
 921     brcc    0f              ; dividend >= divisor
 922     rol     r_quo           ; shift quotient (with CARRY)
 923     rjmp    __udivuqq3_cont
 924 0:
 925     sub     r_divd,r_div    ; restore dividend
 926     lsl     r_quo           ; shift quotient (without CARRY)
 927 __udivuqq3_cont:
 928     lsl     __zero_reg__    ; shift loop-counter bit
 929     brne    __udivuqq3_loop
 930     com     r_quo           ; complement result
 931                             ; because C flag was complemented in loop
 932     ret
 933 ENDF __divqq_helper
 934 #endif  /* L_divqq_helper */
 935
 936 #undef  r_divd
 937 #undef  r_quo
 938 #undef  r_div
 939 #undef  r_sign
 940
 941
 942 /*******************************************************
 943     Fractional Division 16 / 16
 944 *******************************************************/
 945 #define r_divdL 26     /* dividend Low */
 946 #define r_divdH 27     /* dividend Hig */
 947 #define r_quoL  24     /* quotient Low */
 948 #define r_quoH  25     /* quotient High */
 949 #define r_divL  22     /* divisor */
 950 #define r_divH  23     /* divisor */
 951 #define r_cnt   21
 952
 953 #if defined (L_divhq3)
 954 DEFUN   __divhq3
 955     mov     r0, r_divdH
 956     eor     r0, r_divH
 957     sbrs    r_divH, 7
 958     rjmp    1f
 959     NEG2    r_divL
 960 1:
 961     sbrs    r_divdH, 7
 962     rjmp    2f
 963     NEG2    r_divdL
 964 2:
 965     cp      r_divdL, r_divL
 966     cpc     r_divdH, r_divH
 967     breq    __divhq3_minus1  ; if equal return -1
 968     XCALL   __udivuhq3
 969     lsr     r_quoH
 970     ror     r_quoL
 971     brpl    9f
 972     ;; negate result if needed
 973     NEG2    r_quoL
 974 9:
 975     ret
 976 __divhq3_minus1:
 977     ldi     r_quoH, 0x80
 978     clr     r_quoL
 979     ret
 980 ENDF __divhq3
 981 #endif  /* defined (L_divhq3) */
 982
 983 #if defined (L_udivuhq3)
 984 DEFUN   __udivuhq3
 985     sub     r_quoH,r_quoH   ; clear quotient and carry
 986     ;; FALLTHRU
 987 ENDF __udivuhq3
 988
 989 DEFUN   __udivuha3_common
 990     clr     r_quoL          ; clear quotient
 991     ldi     r_cnt,16        ; init loop counter
 992 __udivuhq3_loop:
 993     rol     r_divdL         ; shift dividend (with CARRY)
 994     rol     r_divdH
 995     brcs    __udivuhq3_ep   ; dividend overflow
 996     cp      r_divdL,r_divL  ; compare dividend & divisor
 997     cpc     r_divdH,r_divH
 998     brcc    __udivuhq3_ep   ; dividend >= divisor
 999     rol     r_quoL          ; shift quotient (with CARRY)
1000     rjmp    __udivuhq3_cont
1001 __udivuhq3_ep:
1002     sub     r_divdL,r_divL  ; restore dividend
1003     sbc     r_divdH,r_divH
1004     lsl     r_quoL          ; shift quotient (without CARRY)
1005 __udivuhq3_cont:
1006     rol     r_quoH          ; shift quotient
1007     dec     r_cnt           ; decrement loop counter
1008     brne    __udivuhq3_loop
1009     com     r_quoL          ; complement result
1010     com     r_quoH          ; because C flag was complemented in loop
1011     ret
1012 ENDF __udivuha3_common
1013 #endif  /* defined (L_udivuhq3) */
1014
1015 /*******************************************************
1016     Fixed Division 8.8 / 8.8
1017 *******************************************************/
1018 #if defined (L_divha3)
1019 DEFUN   __divha3
1020     mov     r0, r_divdH
1021     eor     r0, r_divH
1022     sbrs    r_divH, 7
1023     rjmp    1f
1024     NEG2    r_divL
1025 1:
1026     sbrs    r_divdH, 7
1027     rjmp    2f
1028     NEG2    r_divdL
1029 2:
1030     XCALL   __udivuha3
1031     lsr     r_quoH  ; adjust to 7 fractional bits
1032     ror     r_quoL
1033     sbrs    r0, 7   ; negate result if needed
1034     ret
1035     NEG2    r_quoL
1036     ret
1037 ENDF __divha3
1038 #endif  /* defined (L_divha3) */
1039
1040 #if defined (L_udivuha3)
1041 DEFUN   __udivuha3
1042     mov     r_quoH, r_divdL
1043     mov     r_divdL, r_divdH
1044     clr     r_divdH
1045     lsl     r_quoH     ; shift quotient into carry
1046     XJMP    __udivuha3_common ; same as fractional after rearrange
1047 ENDF __udivuha3
1048 #endif  /* defined (L_udivuha3) */
1049
1050 #undef  r_divdL
1051 #undef  r_divdH
1052 #undef  r_quoL
1053 #undef  r_quoH
1054 #undef  r_divL
1055 #undef  r_divH
1056 #undef  r_cnt
1057
1058 /*******************************************************
1059     Fixed Division 16.16 / 16.16
1060 *******************************************************/
1061
1062 #define r_arg1L  24    /* arg1 gets passed already in place */
1063 #define r_arg1H  25
1064 #define r_arg1HL 26
1065 #define r_arg1HH 27
1066 #define r_divdL  26    /* dividend Low */
1067 #define r_divdH  27
1068 #define r_divdHL 30
1069 #define r_divdHH 31    /* dividend High */
1070 #define r_quoL   22    /* quotient Low */
1071 #define r_quoH   23
1072 #define r_quoHL  24
1073 #define r_quoHH  25    /* quotient High */
1074 #define r_divL   18    /* divisor Low */
1075 #define r_divH   19
1076 #define r_divHL  20
1077 #define r_divHH  21    /* divisor High */
1078 #define r_cnt  __zero_reg__  /* loop count (0 after the loop!) */
1079
1080 #if defined (L_divsa3)
1081 DEFUN   __divsa3
1082     mov     r0, r_arg1HH
1083     eor     r0, r_divHH
1084     sbrs    r_divHH, 7
1085     rjmp    1f
1086     NEG4    r_divL
1087 1:
1088     sbrs    r_arg1HH, 7
1089     rjmp    2f
1090     NEG4    r_arg1L
1091 2:
1092     XCALL   __udivusa3
1093     lsr     r_quoHH ; adjust to 15 fractional bits
1094     ror     r_quoHL
1095     ror     r_quoH
1096     ror     r_quoL
1097     sbrs    r0, 7   ; negate result if needed
1098     ret
1099     ;; negate r_quoL
1100     XJMP    __negsi2
1101 ENDF __divsa3
1102 #endif  /* defined (L_divsa3) */
1103
1104 #if defined (L_udivusa3)
1105 DEFUN   __udivusa3
1106     ldi     r_divdHL, 32    ; init loop counter
1107     mov     r_cnt, r_divdHL
1108     clr     r_divdHL
1109     clr     r_divdHH
1110     wmov    r_quoL, r_divdHL
1111     lsl     r_quoHL         ; shift quotient into carry
1112     rol     r_quoHH
1113 __udivusa3_loop:
1114     rol     r_divdL         ; shift dividend (with CARRY)
1115     rol     r_divdH
1116     rol     r_divdHL
1117     rol     r_divdHH
1118     brcs    __udivusa3_ep   ; dividend overflow
1119     cp      r_divdL,r_divL  ; compare dividend & divisor
1120     cpc     r_divdH,r_divH
1121     cpc     r_divdHL,r_divHL
1122     cpc     r_divdHH,r_divHH
1123     brcc    __udivusa3_ep   ; dividend >= divisor
1124     rol     r_quoL          ; shift quotient (with CARRY)
1125     rjmp    __udivusa3_cont
1126 __udivusa3_ep:
1127     sub     r_divdL,r_divL  ; restore dividend
1128     sbc     r_divdH,r_divH
1129     sbc     r_divdHL,r_divHL
1130     sbc     r_divdHH,r_divHH
1131     lsl     r_quoL          ; shift quotient (without CARRY)
1132 __udivusa3_cont:
1133     rol     r_quoH          ; shift quotient
1134     rol     r_quoHL
1135     rol     r_quoHH
1136     dec     r_cnt           ; decrement loop counter
1137     brne    __udivusa3_loop
1138     com     r_quoL          ; complement result
1139     com     r_quoH          ; because C flag was complemented in loop
1140     com     r_quoHL
1141     com     r_quoHH
1142     ret
1143 ENDF __udivusa3
1144 #endif  /* defined (L_udivusa3) */
1145
1146 #undef  r_arg1L
1147 #undef  r_arg1H
1148 #undef  r_arg1HL
1149 #undef  r_arg1HH
1150 #undef  r_divdL
1151 #undef  r_divdH
1152 #undef  r_divdHL
1153 #undef  r_divdHH
1154 #undef  r_quoL
1155 #undef  r_quoH
1156 #undef  r_quoHL
1157 #undef  r_quoHH
1158 #undef  r_divL
1159 #undef  r_divH
1160 #undef  r_divHL
1161 #undef  r_divHH
1162 #undef  r_cnt
1163
1164 \f
1165 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1166 ;; Saturation, 1 Byte
1167 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1168
1169 ;; First Argument and Return Register
1170 #define A0  24
1171
1172 #if defined (L_ssabs_1)
1173 DEFUN __ssabs_1
1174     sbrs    A0, 7
1175     ret
1176     neg     A0
1177     sbrc    A0,7
1178     dec     A0
1179     ret
1180 ENDF __ssabs_1
1181 #endif /* L_ssabs_1 */
1182
1183 #undef A0
1184
1185
1186 \f
1187 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1188 ;; Saturation, 2 Bytes
1189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1190
1191 ;; First Argument and Return Register
1192 #define A0  24
1193 #define A1  A0+1
1194
1195 #if defined (L_ssneg_2)
1196 DEFUN __ssneg_2
1197     NEG2    A0
1198     brvc 0f
1199     sbiw    A0, 1
1200 0:  ret
1201 ENDF __ssneg_2
1202 #endif /* L_ssneg_2 */
1203
1204 #if defined (L_ssabs_2)
1205 DEFUN __ssabs_2
1206     sbrs    A1, 7
1207     ret
1208     XJMP    __ssneg_2
1209 ENDF __ssabs_2
1210 #endif /* L_ssabs_2 */
1211
1212 #undef A0
1213 #undef A1
1214
1215
1216 \f
1217 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1218 ;; Saturation, 4 Bytes
1219 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1220
1221 ;; First Argument and Return Register
1222 #define A0  22
1223 #define A1  A0+1
1224 #define A2  A0+2
1225 #define A3  A0+3
1226
1227 #if defined (L_ssneg_4)
1228 DEFUN __ssneg_4
1229     XCALL   __negsi2
1230     brvc 0f
1231     ldi     A3, 0x7f
1232     ldi     A2, 0xff
1233     ldi     A1, 0xff
1234     ldi     A0, 0xff
1235 0:  ret
1236 ENDF __ssneg_4
1237 #endif /* L_ssneg_4 */
1238
1239 #if defined (L_ssabs_4)
1240 DEFUN __ssabs_4
1241     sbrs    A3, 7
1242     ret
1243     XJMP    __ssneg_4
1244 ENDF __ssabs_4
1245 #endif /* L_ssabs_4 */
1246
1247 #undef A0
1248 #undef A1
1249 #undef A2
1250 #undef A3
1251
1252
1253 \f
1254 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1255 ;; Saturation, 8 Bytes
1256 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1257
1258 ;; First Argument and Return Register
1259 #define A0  18
1260 #define A1  A0+1
1261 #define A2  A0+2
1262 #define A3  A0+3
1263 #define A4  A0+4
1264 #define A5  A0+5
1265 #define A6  A0+6
1266 #define A7  A0+7
1267
1268 #if defined (L_clr_8)
1269 FALIAS __usneguta2
1270 FALIAS __usneguda2
1271 FALIAS __usnegudq2
1272
1273 ;; Clear Carry and all Bytes
1274 DEFUN __clr_8
1275     ;; Clear Carry and set Z
1276     sub     A7, A7
1277     ;; FALLTHRU
1278 ENDF  __clr_8
1279 ;; Propagate Carry to all Bytes, Carry unaltered
1280 DEFUN __sbc_8
1281     sbc     A7, A7
1282     sbc     A6, A6
1283     wmov    A4, A6
1284     wmov    A2, A6
1285     wmov    A0, A6
1286     ret
1287 ENDF __sbc_8
1288 #endif /* L_clr_8 */
1289
1290 #if defined (L_ssneg_8)
1291 FALIAS __ssnegta2
1292 FALIAS __ssnegda2
1293 FALIAS __ssnegdq2
1294
1295 DEFUN __ssneg_8
1296     XCALL   __negdi2
1297     brvc 0f
1298     ;; A[] = 0x7fffffff
1299     sec
1300     XCALL   __sbc_8
1301     ldi     A7, 0x7f
1302 0:  ret
1303 ENDF __ssneg_8
1304 #endif /* L_ssneg_8 */
1305
1306 #if defined (L_ssabs_8)
1307 FALIAS __ssabsta2
1308 FALIAS __ssabsda2
1309 FALIAS __ssabsdq2
1310
1311 DEFUN __ssabs_8
1312     sbrs    A7, 7
1313     ret
1314     XJMP    __ssneg_8
1315 ENDF __ssabs_8
1316 #endif /* L_ssabs_8 */
1317
1318 ;; Second Argument
1319 #define B0  10
1320 #define B1  B0+1
1321 #define B2  B0+2
1322 #define B3  B0+3
1323 #define B4  B0+4
1324 #define B5  B0+5
1325 #define B6  B0+6
1326 #define B7  B0+7
1327
1328 #if defined (L_usadd_8)
1329 FALIAS __usadduta3
1330 FALIAS __usadduda3
1331 FALIAS __usaddudq3
1332
1333 DEFUN __usadd_8
1334     XCALL   __adddi3
1335     brcs 0f
1336     ret
1337 0:  ;; A[] = 0xffffffff
1338     XJMP    __sbc_8
1339 ENDF __usadd_8
1340 #endif /* L_usadd_8 */
1341
1342 #if defined (L_ussub_8)
1343 FALIAS __ussubuta3
1344 FALIAS __ussubuda3
1345 FALIAS __ussubudq3
1346
1347 DEFUN __ussub_8
1348     XCALL   __subdi3
1349     brcs 0f
1350     ret
1351 0:  ;; A[] = 0
1352     XJMP    __clr_8
1353 ENDF __ussub_8
1354 #endif /* L_ussub_8 */
1355
1356 #if defined (L_ssadd_8)
1357 FALIAS __ssaddta3
1358 FALIAS __ssaddda3
1359 FALIAS __ssadddq3
1360
1361 DEFUN __ssadd_8
1362     XCALL   __adddi3
1363     brvc 0f
1364     ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
1365     cpi     B7, 0x80
1366     XCALL   __sbc_8
1367     subi    A7, 0x80
1368 0:  ret
1369 ENDF __ssadd_8
1370 #endif /* L_ssadd_8 */
1371
1372 #if defined (L_sssub_8)
1373 FALIAS __sssubta3
1374 FALIAS __sssubda3
1375 FALIAS __sssubdq3
1376
1377 DEFUN __sssub_8
1378     XCALL   __subdi3
1379     brvc 0f
1380     ;; A = (B < 0) ? INT64_MAX : INT64_MIN
1381     ldi     A7, 0x7f
1382     cp      A7, B7
1383     XCALL   __sbc_8
1384     subi    A7, 0x80
1385 0:  ret
1386 ENDF __sssub_8
1387 #endif /* L_sssub_8 */
1388
1389 #undef A0
1390 #undef A1
1391 #undef A2
1392 #undef A3
1393 #undef A4
1394 #undef A5
1395 #undef A6
1396 #undef A7
1397 #undef B0
1398 #undef B1
1399 #undef B2
1400 #undef B3
1401 #undef B4
1402 #undef B5
1403 #undef B6
1404 #undef B7
1405
1406 \f
1407 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1408 ;; Rounding Helpers
1409 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1410
1411 #ifdef L_mask1
1412
1413 #define AA 24
1414 #define CC 25
1415
1416 ;; R25 = 1 << (R24 & 7)
1417 ;; CC  = 1 << (AA  & 7)
1418 ;; Clobbers: None
1419 DEFUN __mask1
1420     ;; CC = 2 ^ AA.1
1421     ldi     CC, 1 << 2
1422     sbrs    AA, 1
1423     ldi     CC, 1 << 0
1424     ;; CC *= 2 ^ AA.0
1425     sbrc    AA, 0
1426     lsl     CC
1427     ;; CC *= 2 ^ AA.2
1428     sbrc    AA, 2
1429     swap    CC
1430     ret
1431 ENDF __mask1
1432
1433 #undef AA
1434 #undef CC
1435 #endif /* L_mask1 */
1436
1437 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1438
1439 ;; The rounding point. Any bits smaller than
1440 ;; 2^{-RP} will be cleared.
1441 #define RP R24
1442
1443 #define A0 22
1444 #define A1 A0 + 1
1445
1446 #define C0 24
1447 #define C1 C0 + 1
1448
1449 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1450 ;; Rounding, 1 Byte
1451 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1452
1453 #ifdef L_roundqq3
1454
1455 ;; R24 = round (R22, R24)
1456 ;; Clobbers: R22, __tmp_reg__
1457 DEFUN  __roundqq3
1458     mov     __tmp_reg__, C1
1459     subi    RP, __QQ_FBIT__ - 1
1460     neg     RP
1461     ;; R25 = 1 << RP  (Total offset is FBIT-1 - RP)
1462     XCALL   __mask1
1463     mov     C0, C1
1464     ;; Add-Saturate 2^{-RP-1}
1465     add     A0, C0
1466     brvc 0f
1467     ldi     C0, 0x7f
1468     rjmp 9f
1469 0:  ;; Mask out bits beyond RP
1470     lsl     C0
1471     neg     C0
1472     and     C0, A0
1473 9:  mov     C1, __tmp_reg__
1474     ret
1475 ENDF  __roundqq3
1476 #endif /* L_roundqq3 */
1477
1478 #ifdef L_rounduqq3
1479
1480 ;; R24 = round (R22, R24)
1481 ;; Clobbers: R22, __tmp_reg__
1482 DEFUN  __rounduqq3
1483     mov     __tmp_reg__, C1
1484     subi    RP, __UQQ_FBIT__ - 1
1485     neg     RP
1486     ;; R25 = 1 << RP  (Total offset is FBIT-1 - RP)
1487     XCALL   __mask1
1488     mov     C0, C1
1489     ;; Add-Saturate 2^{-RP-1}
1490     add     A0, C0
1491     brcc 0f
1492     ldi     C0, 0xff
1493     rjmp 9f
1494 0:  ;; Mask out bits beyond RP
1495     lsl     C0
1496     neg     C0
1497     and     C0, A0
1498 9:  mov     C1, __tmp_reg__
1499     ret
1500 ENDF  __rounduqq3
1501 #endif /* L_rounduqq3 */
1502
1503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1504 ;; Rounding, 2 Bytes
1505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1506
1507 #ifdef L_addmask_2
1508
1509 ;; [ R25:R24 =  1 << (R24 & 15)
1510 ;;   R23:R22 += 1 << (R24 & 15) ]
1511 ;; SREG is set according to the addition
1512 DEFUN __addmask_2
1513     ;; R25 = 1 << (R24 & 7)
1514     XCALL   __mask1
1515     cpi     RP, 1 << 3
1516     sbc     C0, C0
1517     ;; Swap C0 and C1 if RP.3 was set
1518     and     C0, C1
1519     eor     C1, C0
1520     ;; Finally, add the power-of-two:  A[] += C[]
1521     add     A0, C0
1522     adc     A1, C1
1523     ret
1524 ENDF  __addmask_2
1525 #endif /* L_addmask_2 */
1526
1527 #ifdef L_round_s2
1528
1529 ;; R25:R24 = round (R23:R22, R24)
1530 ;; Clobbers: R23, R22
1531 DEFUN  __roundhq3
1532     subi    RP, __HQ_FBIT__ - __HA_FBIT__
1533 ENDF   __roundhq3
1534 DEFUN  __roundha3
1535     subi    RP, __HA_FBIT__ - 1
1536     neg     RP
1537     ;; [ R25:R24  = 1 << (FBIT-1 - RP)
1538     ;;   R23:R22 += 1 << (FBIT-1 - RP) ]
1539     XCALL   __addmask_2
1540     XJMP    __round_s2_const
1541 ENDF  __roundha3
1542
1543 #endif /* L_round_s2 */
1544
1545 #ifdef L_round_u2
1546
1547 ;; R25:R24 = round (R23:R22, R24)
1548 ;; Clobbers: R23, R22
1549 DEFUN  __rounduhq3
1550     subi    RP, __UHQ_FBIT__ - __UHA_FBIT__
1551 ENDF   __rounduhq3
1552 DEFUN  __rounduha3
1553     subi    RP, __UHA_FBIT__ - 1
1554     neg     RP
1555     ;; [ R25:R24  = 1 << (FBIT-1 - RP)
1556     ;;   R23:R22 += 1 << (FBIT-1 - RP) ]
1557     XCALL   __addmask_2
1558     XJMP    __round_u2_const
1559 ENDF  __rounduha3
1560
1561 #endif /* L_round_u2 */
1562
1563
1564 #ifdef L_round_2_const
1565
1566 ;; Helpers for 2 byte wide rounding
1567
1568 DEFUN  __round_s2_const
1569     brvc 2f
1570     ldi     C1, 0x7f
1571     rjmp 1f
1572     ;; FALLTHRU (Barrier)
1573 ENDF  __round_s2_const
1574
1575 DEFUN __round_u2_const
1576     brcc 2f
1577     ldi     C1, 0xff
1578 1:
1579     ldi     C0, 0xff
1580     rjmp 9f
1581 2:
1582     ;; Saturation is performed now.
1583     ;; Currently, we have C[] = 2^{-RP-1}
1584     ;; C[] = 2^{-RP}
1585     lsl     C0
1586     rol     C1
1587     ;;
1588     NEG2    C0
1589     ;; Clear the bits beyond the rounding point.
1590     and     C0, A0
1591     and     C1, A1
1592 9:  ret
1593 ENDF  __round_u2_const
1594
1595 #endif /* L_round_2_const */
1596
1597 #undef A0
1598 #undef A1
1599 #undef C0
1600 #undef C1
1601
1602 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1603 ;; Rounding, 4 Bytes
1604 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1605
1606 #define A0 18
1607 #define A1 A0 + 1
1608 #define A2 A0 + 2
1609 #define A3 A0 + 3
1610
1611 #define C0 22
1612 #define C1 C0 + 1
1613 #define C2 C0 + 2
1614 #define C3 C0 + 3
1615
1616 #ifdef L_addmask_4
1617
1618 ;; [ R25:R22 =  1 << (R24 & 31)
1619 ;;   R21:R18 += 1 << (R24 & 31) ]
1620 ;; SREG is set according to the addition
1621 DEFUN __addmask_4
1622     ;; R25 = 1 << (R24 & 7)
1623     XCALL   __mask1
1624     cpi     RP, 1 << 4
1625     sbc     C0, C0
1626     sbc     C1, C1
1627     ;; Swap C2 with C3 if RP.3 is not set
1628     cpi     RP, 1 << 3
1629     sbc     C2, C2
1630     and     C2, C3
1631     eor     C3, C2
1632     ;; Swap C3:C2 with C1:C0 if RP.4 is not set
1633     and     C0, C2  $  eor     C2, C0
1634     and     C1, C3  $  eor     C3, C1
1635     ;; Finally, add the power-of-two:  A[] += C[]
1636     add     A0, C0
1637     adc     A1, C1
1638     adc     A2, C2
1639     adc     A3, C3
1640     ret
1641 ENDF  __addmask_4
1642 #endif /* L_addmask_4 */
1643
1644 #ifdef L_round_s4
1645
1646 ;; R25:R22 = round (R21:R18, R24)
1647 ;; Clobbers: R18...R21
1648 DEFUN  __roundsq3
1649     subi    RP, __SQ_FBIT__ - __SA_FBIT__
1650 ENDF   __roundsq3
1651 DEFUN  __roundsa3
1652     subi    RP, __SA_FBIT__ - 1
1653     neg     RP
1654     ;; [ R25:R22  = 1 << (FBIT-1 - RP)
1655     ;;   R21:R18 += 1 << (FBIT-1 - RP) ]
1656     XCALL   __addmask_4
1657     XJMP    __round_s4_const
1658 ENDF  __roundsa3
1659
1660 #endif /* L_round_s4 */
1661
1662 #ifdef L_round_u4
1663
1664 ;; R25:R22 = round (R21:R18, R24)
1665 ;; Clobbers: R18...R21
1666 DEFUN  __roundusq3
1667     subi    RP, __USQ_FBIT__ - __USA_FBIT__
1668 ENDF   __roundusq3
1669 DEFUN  __roundusa3
1670     subi    RP, __USA_FBIT__ - 1
1671     neg     RP
1672     ;; [ R25:R22  = 1 << (FBIT-1 - RP)
1673     ;;   R21:R18 += 1 << (FBIT-1 - RP) ]
1674     XCALL   __addmask_4
1675     XJMP    __round_u4_const
1676 ENDF  __roundusa3
1677
1678 #endif /* L_round_u4 */
1679
1680
1681 #ifdef L_round_4_const
1682
1683 ;; Helpers for 4 byte wide rounding
1684
1685 DEFUN  __round_s4_const
1686     brvc 2f
1687     ldi     C3, 0x7f
1688     rjmp 1f
1689     ;; FALLTHRU (Barrier)
1690 ENDF  __round_s4_const
1691
1692 DEFUN __round_u4_const
1693     brcc 2f
1694     ldi     C3, 0xff
1695 1:
1696     ldi     C2, 0xff
1697     ldi     C1, 0xff
1698     ldi     C0, 0xff
1699     rjmp 9f
1700 2:
1701     ;; Saturation is performed now.
1702     ;; Currently, we have C[] = 2^{-RP-1}
1703     ;; C[] = 2^{-RP}
1704     lsl     C0
1705     rol     C1
1706     rol     C2
1707     rol     C3
1708     XCALL   __negsi2
1709     ;; Clear the bits beyond the rounding point.
1710     and     C0, A0
1711     and     C1, A1
1712     and     C2, A2
1713     and     C3, A3
1714 9:  ret
1715 ENDF  __round_u4_const
1716
1717 #endif /* L_round_4_const */
1718
1719 #undef A0
1720 #undef A1
1721 #undef A2
1722 #undef A3
1723 #undef C0
1724 #undef C1
1725 #undef C2
1726 #undef C3
1727
1728 #undef RP
1729
1730 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1731 ;; Rounding, 8 Bytes
1732 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1733
1734 #define RP     16
1735 #define FBITm1 31
1736
1737 #define C0 18
1738 #define C1 C0 + 1
1739 #define C2 C0 + 2
1740 #define C3 C0 + 3
1741 #define C4 C0 + 4
1742 #define C5 C0 + 5
1743 #define C6 C0 + 6
1744 #define C7 C0 + 7
1745
1746 #define A0 16
1747 #define A1 17
1748 #define A2 26
1749 #define A3 27
1750 #define A4 28
1751 #define A5 29
1752 #define A6 30
1753 #define A7 31
1754
1755
1756 #ifdef L_rounddq3
1757 ;; R25:R18 = round (R25:R18, R16)
1758 ;; Clobbers: ABI
1759 DEFUN  __rounddq3
1760     ldi     FBITm1, __DQ_FBIT__ - 1
1761     clt
1762     XJMP    __round_x8
1763 ENDF  __rounddq3
1764 #endif /* L_rounddq3 */
1765
1766 #ifdef L_roundudq3
1767 ;; R25:R18 = round (R25:R18, R16)
1768 ;; Clobbers: ABI
1769 DEFUN  __roundudq3
1770     ldi     FBITm1, __UDQ_FBIT__ - 1
1771     set
1772     XJMP    __round_x8
1773 ENDF  __roundudq3
1774 #endif /* L_roundudq3 */
1775
1776 #ifdef L_roundda3
1777 ;; R25:R18 = round (R25:R18, R16)
1778 ;; Clobbers: ABI
1779 DEFUN  __roundda3
1780     ldi     FBITm1, __DA_FBIT__ - 1
1781     clt
1782     XJMP    __round_x8
1783 ENDF  __roundda3
1784 #endif /* L_roundda3 */
1785
1786 #ifdef L_rounduda3
1787 ;; R25:R18 = round (R25:R18, R16)
1788 ;; Clobbers: ABI
1789 DEFUN  __rounduda3
1790     ldi     FBITm1, __UDA_FBIT__ - 1
1791     set
1792     XJMP    __round_x8
1793 ENDF  __rounduda3
1794 #endif /* L_rounduda3 */
1795
1796 #ifdef L_roundta3
1797 ;; R25:R18 = round (R25:R18, R16)
1798 ;; Clobbers: ABI
1799 DEFUN  __roundta3
1800     ldi     FBITm1, __TA_FBIT__ - 1
1801     clt
1802     XJMP    __round_x8
1803 ENDF  __roundta3
1804 #endif /* L_roundta3 */
1805
1806 #ifdef L_rounduta3
1807 ;; R25:R18 = round (R25:R18, R16)
1808 ;; Clobbers: ABI
1809 DEFUN  __rounduta3
1810     ldi     FBITm1, __UTA_FBIT__ - 1
1811     set
1812     XJMP    __round_x8
1813 ENDF  __rounduta3
1814 #endif /* L_rounduta3 */
1815
1816
1817 #ifdef L_round_x8
1818 DEFUN __round_x8
1819     push r16
1820     push r17
1821     push r28
1822     push r29
1823     ;; Compute log2 of addend from rounding point
1824     sub     RP, FBITm1
1825     neg     RP
1826     ;; Move input to work register A[]
1827     push    C0
1828     mov     A1, C1
1829     wmov    A2, C2
1830     wmov    A4, C4
1831     wmov    A6, C6
1832     ;; C[] = 1 << (FBIT-1 - RP)
1833     XCALL   __clr_8
1834     inc     C0
1835     XCALL   __ashldi3
1836     pop     A0
1837     ;; A[] += C[]
1838     add     A0, C0
1839     adc     A1, C1
1840     adc     A2, C2
1841     adc     A3, C3
1842     adc     A4, C4
1843     adc     A5, C5
1844     adc     A6, C6
1845     adc     A7, C7
1846     brts    1f
1847     ;; Signed
1848     brvc    3f
1849     ;; Signed overflow: A[] = 0x7f...
1850     brvs    2f
1851 1:  ;; Unsigned
1852     brcc    3f
1853     ;; Unsigned overflow: A[] = 0xff...
1854 2:  ldi     C7, 0xff
1855     ldi     C6, 0xff
1856     wmov    C0, C6
1857     wmov    C2, C6
1858     wmov    C4, C6
1859     bld     C7, 7
1860     rjmp 9f
1861 3:
1862     ;;  C[] = -C[] - C[]
1863     push    A0
1864     ldi     r16, 1
1865     XCALL   __ashldi3
1866     pop     A0
1867     XCALL   __negdi2
1868     ;; Clear the bits beyond the rounding point.
1869     and     C0, A0
1870     and     C1, A1
1871     and     C2, A2
1872     and     C3, A3
1873     and     C4, A4
1874     and     C5, A5
1875     and     C6, A6
1876     and     C7, A7
1877 9:  ;; Epilogue
1878     pop r29
1879     pop r28
1880     pop r17
1881     pop r16
1882     ret
1883 ENDF  __round_x8
1884
1885 #endif /* L_round_x8 */
1886
1887 #undef A0
1888 #undef A1
1889 #undef A2
1890 #undef A3
1891 #undef A4
1892 #undef A5
1893 #undef A6
1894 #undef A7
1895
1896 #undef C0
1897 #undef C1
1898 #undef C2
1899 #undef C3
1900 #undef C4
1901 #undef C5
1902 #undef C6
1903 #undef C7
1904
1905 #undef RP
1906 #undef FBITm1
1907
1908
1909 ;; Supply implementations / symbols for the bit-banging functions
1910 ;; __builtin_avr_bitsfx and __builtin_avr_fxbits
1911 #ifdef L_ret
1912 DEFUN __ret
1913     ret
1914 ENDF  __ret
1915 #endif /* L_ret */