gcc/config/c4x/libgcc.S

   1 /* libgcc routines for the Texas Instruments TMS320C[34]x
   2    Copyright (C) 1997,98, 1999 Free Software Foundation, Inc.
   3
   4  Contributed by Michael Hayes (m.hayes@elec.canterbury.ac.nz)
   5             and Herman Ten Brugge (Haj.Ten.Brugge@net.HCC.nl).
   6
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it
  11 under the terms of the GNU General Public License as published by the
  12 Free Software Foundation; either version 2, or (at your option) any
  13 later version.
  14
  15 In addition to the permissions in the GNU General Public License, the
  16 Free Software Foundation gives you unlimited permission to link the
  17 compiled version of this file into combinations with other programs,
  18 and to distribute those combinations without any restriction coming
  19 from the use of this file.  (The General Public License restrictions
  20 do apply in other respects; for example, they cover modification of
  21 the file, and distribution when not linked into a combine
  22 executable.)
  23
  24 This file is distributed in the hope that it will be useful, but
  25 WITHOUT ANY WARRANTY; without even the implied warranty of
  26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27 General Public License for more details.
  28
  29 You should have received a copy of the GNU General Public License
  30 along with this program; see the file COPYING.  If not, write to
  31 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  32 Boston, MA 02110-1301, USA.  */
  33
  34 ; These routines are called using the standard TI register argument
  35 ; passing model.
  36 ; The following registers do not have to be saved:
  37 ; r0, r1, r2, r3, ar0, ar1, ar2, ir0, ir1, bk, rs, rc, re, (r9, r10, r11)
  38 ;
  39 ; Perform floating point divqf3
  40 ;
  41 ; This routine performs a reciprocal of the divisor using the method
  42 ; described in the C30/C40 user manuals.  It then multiplies that
  43 ; result by the dividend.
  44 ;
  45 ; Let r be the reciprocal of the divisor v and let the ith estimate
  46 ; of r be denoted by r[i].  An iterative approach can be used to
  47 ; improve the estimate of r, given an initial estimate r[0], where
  48 ;
  49 ; r[i + 1] = r[i] * (2.0 - v * r[i])
  50 ;
  51 ; The normalized error e[i] at the ith iteration is
  52 ;
  53 ; e[i] = (r - r[i]) / r = (1 / v - r[i]) * v = (1 - v * r[i])
  54 ;
  55 ; Note that
  56 ;
  57 ; e[i + 1]  = (1 - v * r[i + 1]) = 1 - 2 * v * r[i] + v^2 + (r[i])^2
  58 ;           = (1 - v * r[i])^2 = (e[i])^2
  59
  60 ; r2 dividend, r3 divisor, r0 quotient
  61 ; clobbers r1, ar1
  62 #ifdef L_divsf3
  63         .text
  64         .global ___divqf3
  65 ___divqf3:
  66
  67 #ifdef _TMS320C4x
  68         .if .REGPARM == 0
  69         lda     sp,ar0
  70         ldf     *-ar0(2), r3
  71         .endif
  72
  73         pop     ar1             ; Pop return address
  74
  75 ; r0 = estimate of r, r1 = tmp, r2 = dividend, r3 = divisor
  76         rcpf    r3, r0          ; Compute initial estimate r[0]
  77
  78         mpyf3   r0, r3, r1      ; r1 = r[0] * v
  79         subrf   2.0, r1         ; r1 = 2.0 - r[0] * v
  80         mpyf    r1, r0          ; r0 = r[0] * (2.0 - r[0] * v) = r[1]
  81 ; End of 1st iteration (16 bits accuracy)
  82
  83         mpyf3   r0, r3, r1      ; r1 = r[1] * v
  84         subrf   2.0, r1         ; r1 = 2.0 - r[1] * v
  85
  86         bud     ar1             ; Delayed branch
  87         mpyf    r1, r0          ; r0 = r[1] * (2.0 - r[1] * v) = r[2]
  88 ; End of 2nd iteration (32 bits accuracy)
  89         .if .REGPARM == 0
  90         mpyf    *-ar0(1), r0    ; Multiply by the dividend
  91         .else
  92         mpyf    r2, r0          ; Multiply by the dividend
  93         .endif
  94         rnd     r0
  95         ; Branch occurs here
  96 #else
  97         .if .REGPARM == 0
  98         ldiu    sp,ar0
  99         ldf     *-ar0(2), r3
 100         .endif
 101
 102         pop     ar1             ; Pop return address
 103
 104 ; Initial estimate       r[0] = 1.0 * 2^(-e - 1)
 105 ; where                  v = m * 2^e
 106
 107 ; r0 = estimate of r, r1 = tmp, r2 = dividend, r3 = divisor
 108
 109 ; Calculate initial estimate r[0]
 110         pushf   r3
 111         pop     r0
 112         not     r0              ; r0 = -e
 113                                 ; complement exponent = -e -1
 114                                 ; complement sign (side effect)
 115                                 ; complement mantissa (almost 3 bit accurate)
 116         push    r0
 117         popf    r0              ; r0 = 1.0 * e^(-e - 1) + inverted mantissa
 118         ldf     -1.0, r1        ; undo complement sign bit
 119         xor     r1, r0
 120
 121         mpyf3   r0, r3, r1      ; r1 = r[0] * v
 122         subrf   2.0, r1         ; r1 = 2.0 - r[0] * v
 123         mpyf    r1, r0          ; r0 = r[0] * (2.0 - r[0] * v) = r[1]
 124 ; End of 1st iteration
 125
 126         mpyf3   r0, r3, r1      ; r1 = r[1] * v
 127         subrf   2.0, r1         ; r1 = 2.0 - r[1] * v
 128         mpyf    r1, r0          ; r0 = r[1] * (2.0 - r[1] * v) = r[2]
 129 ; End of 2nd iteration
 130
 131         mpyf3   r0, r3, r1      ; r1 = r[2] * v
 132         subrf   2.0, r1         ; r1 = 2.0 - r[2] * v
 133         mpyf    r1, r0          ; r0 = r[2] * (2.0 - r[2] * v) = r[3]
 134 ; End of 3rd iteration
 135
 136         rnd     r0              ; Minimize error in x[3]'s LSBs
 137
 138 ; Use modified last iteration
 139 ; r[4] = (r[3] * (1.0 - (v * r[3]))) + r[3]
 140         mpyf3   r0, r3, r1      ; r1 = r[3] * v
 141         subrf   1.0, r1         ; r1 = 1.0 - r[3] * v
 142         mpyf    r0, r1          ; r1 = r[3] * (1.0 - r[3] * v)
 143         addf    r1, r0          ; r0 = r[3] * (1.0 - r[3] * v) + r[3] = r[4]
 144
 145         rnd     r0              ; Minimize error in x[4]'s LSBs
 146
 147         bud     ar1             ; Delayed branch
 148
 149         .if .REGPARM == 0
 150         ldfu    *-ar0(1), r2    ; Dividend in mem has only 24 bits significance
 151         .else
 152         rnd     r2              ; Minimize error in reg dividend's LSBs
 153                                 ; since this may have 32 bit significance
 154         .endif
 155
 156         mpyf    r2, r0          ; Multiply by the dividend
 157         rnd     r0              ; Round result to 32 bits
 158
 159         ; Branch occurs here
 160 #endif
 161
 162 #endif
 163 ;
 164 ; Integer signed division
 165 ;
 166 ; ar2 dividend, r2 divisor, r0 quotient
 167 ; clobbers r1, r3, ar0, ar1, ir0, ir1, rc, rs, re
 168 #ifdef L_divsi3
 169         .text
 170         .global ___divqi3
 171         .ref    udivqi3n
 172 ___divqi3:
 173         .if .REGPARM == 0
 174 #ifdef _TMS320C4x
 175         lda     sp,ar0
 176 #else
 177         ldiu    sp,ar0
 178 #endif
 179         ldi     *-ar0(1), ar2
 180         ldi     *-ar0(2), r2
 181         .endif
 182
 183         xor3    ar2, r2, r3     ; Get the sign
 184         absi    ar2, r0
 185         bvd     divq32
 186         ldi     r0, ar2
 187         absi    r2, r2
 188         cmpi    ar2, r2         ; Divisor > dividend?
 189
 190         pop     ir1
 191         bhid    zero            ; If so, return 0
 192
 193 ;
 194 ; Normalize oeprands.  Use difference exponents as shift count
 195 ; for divisor, and as repeat count for "subc"
 196 ;
 197         float   ar2, r1         ; Normalize dividend
 198         pushf   r1              ; Get as integer
 199         pop     ar0
 200         lsh     -24, ar0        ; Get exponent
 201
 202         float   r2, r1          ; Normalize divisor
 203         pushf   r1              ; Get as integer
 204         pop     ir0
 205         lsh     -24, ir0        ; Get exponent
 206
 207         subi    ir0, ar0        ; Get difference of exponents
 208         lsh     ar0, r2         ; Align divisor with dividend
 209
 210 ;
 211 ; Do count + 1 subtracts and shifts
 212 ;
 213         rpts    ar0
 214                 subc    r2, ar2
 215
 216 ;
 217 ; Mask off the lower count+1 bits of ar2
 218 ;
 219         subri   31, ar0         ; Shift count is (32 - (ar0 + 1))
 220         lsh     ar0, ar2        ; Shift left
 221         negi    ar0, ar0
 222         lsh3    ar0, ar2, r0    ; Shift right and put result in r0
 223
 224 ;
 225 ; Check sign and negate result if necessary
 226 ;
 227         bud     ir1             ; Delayed return
 228         negi    r0, r1          ; Negate result
 229         ash     -31, r3         ; Check sign
 230         ldinz   r1, r0          ; If set, use negative result
 231         ; Branch occurs here
 232
 233 zero:   bud     ir1             ; Delayed branch
 234         ldi     0, r0
 235         nop
 236         nop
 237         ; Branch occurs here
 238 ;
 239 ; special case where ar2 = abs(ar2) = 0x80000000.  We handle this by
 240 ; calling unsigned divide and negating the result if necessary.
 241 ;
 242 divq32:
 243         push    r3              ; Save sign
 244         call    udivqi3n
 245         pop     r3
 246         pop     ir1
 247         bd      ir1
 248         negi    r0, r1          ; Negate result
 249         ash     -31, r3         ; Check sign
 250         ldinz   r1, r0          ; If set, use negative result
 251         ; Branch occurs here
 252 #endif
 253 ;
 254 ;
 255 ; ar2 dividend, r2 divisor, r0 quotient,
 256 ; clobbers r1, r3, ar0, ar1, ir0, ir1, rc, rs, re
 257 #ifdef L_udivsi3
 258         .text
 259         .global ___udivqi3
 260         .global udivqi3n
 261 ___udivqi3:
 262         .if .REGPARM == 0
 263 #ifdef _TMS320C4x
 264         lda     sp,ar0
 265 #else
 266         ldiu    sp,ar0
 267 #endif
 268         ldi     *-ar0(1), ar2
 269         ldi     *-ar0(2), r2
 270         .endif
 271
 272 udivqi3n:
 273         pop     ir1
 274
 275         cmpi    ar2, r2         ; If divisor > dividend
 276         bhi     qzero           ; return zero
 277         ldi     r2, ar1         ; Store divisor in ar1
 278
 279         tstb    ar2, ar2        ; Check top bit, jump if set to special handler
 280         bld     div_32          ; Delayed branch
 281
 282 ;
 283 ; Get divisor exponent
 284 ;
 285         float   ar1, r1         ; Normalize the divisor
 286         pushf   r1              ; Get into int register
 287         pop     rc
 288         ; branch occurs here
 289
 290         bzd     qzero           ; if (float) divisor zero, return zero
 291
 292         float   ar2, r1         ; Normalize the dividend
 293         pushf   r1              ; Get into int register
 294         pop     ar0
 295         lsh     -24, ar0        ; Get both the exponents
 296         lsh     -24, rc
 297
 298         subi    rc, ar0         ; Get the difference between the exponents
 299         lsh     ar0, ar1        ; Normalize the divisor with the dividend
 300
 301 ;
 302 ; Do count_1 subtracts and shifts
 303 ;
 304         rpts    ar0
 305                 subc    ar1, ar2
 306
 307 ;
 308 ; mask off the lower count+1 bits
 309 ;
 310         subri   31, ar0         ; Shift count (31 - (ar0+1))
 311         bud     ir1             ; Delayed return
 312         lsh3    ar0, ar2, r0
 313         negi    ar0, ar0
 314         lsh     ar0, r0
 315         ; Branch occurs here
 316
 317 ;
 318 ; Handle a full 32-bit dividend
 319 ;
 320 div_32: tstb    ar1, ar1
 321         bld     qone            ; if divisor high bit is one, the result is one
 322         lsh     -24, rc
 323         subri   31, rc
 324         lsh     rc, ar1         ; Line up the divisor
 325
 326 ;
 327 ; Now divisor and dividend are aligned.  Do first SUBC by hand, save
 328 ; of the forst quotient digit.  Then, shift divisor right rather
 329 ; than shifting dividend left.  This leaves a zero in the top bit of
 330 ; the dividend
 331 ;
 332         ldi     1, ar0          ; Initizialize MSB of quotient
 333         lsh     rc, ar0         ; create a mask for MSBs
 334         subi    1, ar0          ; mask is (2 << count) - 1
 335
 336         subi3   ar1, ar2, r1
 337         ldihs   r1, ar2
 338         ldihs   1, r1
 339         ldilo   0, r1
 340         lsh     rc, r1
 341
 342         lsh     -1, ar1
 343         subi    1, rc
 344 ;
 345 ; do the rest of the shifts and subtracts
 346 ;
 347         rpts    rc
 348                 subc    ar1, ar2
 349
 350         bud     ir1
 351         and     ar0, ar2
 352         or3     r1, ar2, r0
 353         nop
 354
 355 qone:
 356         bud     ir1
 357         ldi     1, r0
 358         nop
 359         nop
 360
 361 qzero:
 362         bud     ir1
 363         ldi     0, r0
 364         nop
 365         nop
 366 #endif
 367
 368 #ifdef L_umodsi3
 369         .text
 370         .global ___umodqi3
 371         .global umodqi3n
 372 ___umodqi3:
 373         .if .REGPARM == 0
 374 #ifdef _TMS320C4x
 375         lda     sp,ar0
 376 #else
 377         ldiu    sp,ar0
 378 #endif
 379         ldi     *-ar0(1), ar2
 380         ldi     *-ar0(2), r2
 381         .endif
 382
 383 umodqi3n:
 384         pop     ir1             ; return address
 385         cmpi    ar2, r2         ; divisor > dividend ?
 386         bhi     uzero           ;    if so, return dividend
 387         ldi     r2, ar1         ; load divisor
 388 ;
 389 ; If top bit of dividend is set, handle specially.
 390 ;
 391         tstb    ar2, ar2        ; check top bit
 392         bld     umod_32         ; get divisor exponent, then jump.
 393 ;
 394 ; Get divisor exponent by converting to float.
 395 ;
 396         float   ar1, r1         ; normalize divisor
 397         pushf   r1              ; push as float
 398         pop     rc              ; pop as int to get exponent
 399         bzd     uzero           ; if (float)divisor was zero, return
 400 ;
 401 ; 31 or less bits in dividend.  Get dividend exponent.
 402 ;
 403         float   ar2, r1         ; normalize dividend
 404         pushf   r1              ; push as float
 405         pop     ar0             ; pop as int to get exponent
 406 ;
 407 ; Use difference in exponents as shift count to line up MSBs.
 408 ;
 409         lsh     -24, rc         ; divisor exponent
 410         lsh     -24, ar0        ; dividend exponent
 411         subi    rc, ar0         ; difference
 412         lsh     ar0, ar1        ; shift divisor up
 413 ;
 414 ; Do COUNT+1 subtract & shifts.
 415 ;
 416         rpts    ar0
 417                 subc    ar1, ar2
 418 ;
 419 ;  Remainder is in upper 31-COUNT bits.
 420 ;
 421         bud     ir1             ; delayed branch to return
 422         addi    1, ar0          ; shift count is COUNT+1
 423         negi    ar0, ar0        ; negate for right shift
 424         lsh3    ar0, ar2, r0    ; shift to get result
 425         ; Return occurs here
 426
 427 ;
 428 ; The following code handles cases of a full 32-bit dividend.  Before
 429 ; SUBC can be used, the top bit must be cleared (otherwise SUBC can
 430 ; possibly shift a significant 1 out the top of the dividend).  This
 431 ; is accomplished by first doing a normal subtraction, then proceeding
 432 ; with SUBCs.
 433 ;
 434 umod_32:
 435 ;
 436 ; If the top bit of the divisor is set too, the remainder is simply
 437 ; the difference between the dividend and divisor.  Otherwise, shift
 438 ; the divisor up to line up the MSBs.
 439 ;
 440         tstb    ar1, ar1        ; check divisor
 441         bld     uone            ; if negative, remainder is diff
 442
 443         lsh     -24, rc         ; divisor exponent
 444         subri   31, rc          ; shift count = 31 - exp
 445         negi    rc, ar0         ; used later as shift count
 446         lsh     rc, ar1         ; shift up to line up MSBs
 447 ;
 448 ; Now MSBs are aligned.  Do first SUBC by hand using a plain subtraction.
 449 ; Then, shift divisor right rather than shifting dividend left.  This leaves
 450 ; a 0 in the top bit of the dividend.
 451 ;
 452         subi3   ar1, ar2, r1    ; subtract
 453         ldihs   r1, ar2         ; if positive, replace dividend
 454         subi    1, rc           ; first iteration is done
 455         lsh     -1, ar1         ; shift divisor down
 456 ;
 457 ; Do EXP subtract & shifts.
 458 ;
 459         rpts    rc
 460                 subc    ar1, ar2
 461 ;
 462 ;  Quotient is in EXP+1 LSBs; shift remainder (in MSBs) down.
 463 ;
 464         bud     ir1
 465         lsh3    ar0, ar2, r0    ; COUNT contains -(EXP+1)
 466         nop
 467         nop
 468 ;
 469 ;  Return (dividend - divisor).
 470 ;
 471 uone:   bud     ir1
 472         subi3   r2, ar2, r0
 473         nop
 474         nop
 475 ;
 476 ;  Return dividend.
 477 ;
 478 uzero:  bud     ir1
 479         ldi     ar2, r0         ; set status from result
 480         nop
 481         nop
 482 #endif
 483
 484 #ifdef L_modsi3
 485         .text
 486         .global ___modqi3
 487         .ref umodqi3n
 488 ___modqi3:
 489         .if .REGPARM == 0
 490 #ifdef _TMS320C4x
 491         lda     sp,ar0
 492 #else
 493         ldiu    sp,ar0
 494 #endif
 495         ldi     *-ar0(1), ar2
 496         ldi     *-ar0(2), r2
 497         .endif
 498
 499 ;
 500 ; Determine sign of result.  Get absolute value of operands.
 501 ;
 502         ldi     ar2, ar0        ; sign of result same as dividend
 503         absi    ar2, r0         ; make dividend positive
 504         bvd     mod_32          ; if still negative, escape
 505         absi    r2, r1          ; make divisor positive
 506         ldi     r1, ar1         ; save in ar1
 507         cmpi    r0, ar1         ; divisor > dividend ?
 508
 509         pop     ir1            ; return address
 510         bhid    return          ;   if so, return dividend
 511 ;
 512 ; Normalize operands.  Use difference in exponents as shift count
 513 ; for divisor, and as repeat count for SUBC.
 514 ;
 515         float   r1, r1          ; normalize divisor
 516         pushf   r1              ; push as float
 517         pop     rc              ; pop as int
 518         bzd     return          ; if (float)divisor was zero, return
 519
 520         float   r0, r1          ; normalize dividend
 521         pushf   r1              ; push as float
 522         pop     r1              ; pop as int
 523
 524         lsh     -24, rc         ; get divisor exponent
 525         lsh     -24, r1         ; get dividend exponent
 526         subi    rc, r1          ; get difference in exponents
 527         lsh     r1, ar1         ; align divisor with dividend
 528 ;
 529 ; Do COUNT+1 subtract & shifts.
 530 ;
 531         rpts    r1
 532                 subc    ar1, r0
 533 ;
 534 ;  Remainder is in upper bits of R0
 535 ;
 536         addi    1, r1           ; shift count is -(r1+1)
 537         negi    r1, r1
 538         lsh     r1, r0          ; shift right
 539 ;
 540 ;  Check sign and negate result if necessary.
 541 ;
 542 return:
 543         bud     ir1             ; delayed branch to return
 544         negi    r0, r1          ; negate result
 545         cmpi    0, ar0          ; check sign
 546         ldin    r1, r0          ; if set, use negative result
 547         ; Return occurs here
 548 ;
 549 ; The following code handles cases of a full 32-bit dividend.  This occurs
 550 ; when R0 = abs(R0) = 080000000h.  Handle this by calling the unsigned mod
 551 ; function, then negating the result if necessary.
 552 ;
 553 mod_32:
 554         push    ar0             ; remember sign
 555         call    umodqi3n        ; do divide
 556
 557         brd     return          ; return
 558         pop     ar0             ; restore sign
 559         pop     ir1             ; return address
 560         nop
 561 #endif
 562
 563 #ifdef L_unsfltconst
 564         .section .const
 565         .global ___unsfltconst
 566 ___unsfltconst:   .float 4294967296.0
 567 #endif
 568
 569 #ifdef L_unsfltcompare
 570         .section .const
 571         .global ___unsfltcompare
 572 ___unsfltcompare: .float 2147483648.0
 573 #endif
 574
 575 ; Integer 32-bit signed multiplication
 576 ;
 577 ; The TMS320C3x MPYI instruction takes two 24-bit signed integers
 578 ; and produces a 48-bit signed result which is truncated to 32-bits.
 579 ;
 580 ; A 32-bit by 32-bit multiplication thus requires a number of steps.
 581 ;
 582 ; Consider the product of two 32-bit signed integers,
 583 ;
 584 ;       z = x * y
 585 ;
 586 ; where x = (b << 16) + a,  y = (d << 16) + c
 587 ;
 588 ; This can be expressed as
 589 ;
 590 ;       z = ((b << 16) + a) * ((d << 16) + c)
 591 ;
 592 ;          = ((b * d) << 32) + ((b * c + a * d) << 16) + a * c
 593 ;
 594 ; Let z = (f << 16) + e where f < (1 << 16).
 595 ;
 596 ; Since we are only interested in a 32-bit result, we can ignore the
 597 ; (b * d) << 32 term, and thus
 598 ;
 599 ;       f = b * c + a * d,  e = a * c
 600 ;
 601 ; We can simplify things if we have some a priori knowledge of the
 602 ; operands, for example, if -32768 <= y <= 32767, then y = c and d = 0 and thus
 603 ;
 604 ;       f = b * c,  e = a * c
 605 ;
 606 ; ar2 multiplier, r2 multiplicand, r0 product
 607 ; clobbers r1, r2, r3
 608 #ifdef L_mulsi3
 609         .text
 610         .global ___mulqi3
 611 ___mulqi3:
 612         .if .REGPARM == 0
 613 #ifdef _TMS320C4x
 614         lda     sp,ar0
 615 #else
 616         ldiu    sp,ar0
 617 #endif
 618         ldi     *-ar0(1), ar2
 619         ldi     *-ar0(2), r2
 620         .endif
 621
 622         pop     ir1             ; return address
 623         ldi     ar2, r0         ;
 624         and     0ffffh, r0      ; a
 625         lsh     -16, ar2        ; b
 626         ldi     r2, r3          ;
 627         and     0ffffh, r3      ; c
 628         mpyi    r3, ar2         ; c * b
 629         lsh     -16, r2         ; d
 630         mpyi    r0, r2          ; a * d
 631         addi    ar2, r2         ; c * b + a * d
 632         bd      ir1             ; delayed branch to return
 633         lsh     16, r2          ; (c * b + a * d) << 16
 634         mpyi    r3, r0          ; a * c
 635         addi    r2, r0          ; a * c + (c * b + a * d) << 16
 636 ; branch occurs here
 637
 638 #endif
 639
 640 ;
 641 ; Integer 64 by 64 multiply
 642 ; long1 and long2 on stack
 643 ; result in r0,r1
 644 ;
 645 #ifdef L_muldi3
 646         .text
 647         .global ___mulhi3
 648 #ifdef _TMS320C4x
 649 ___mulhi3:
 650         pop     ar0
 651         ldi     sp,ar2
 652         ldi     *-ar2(1),r2
 653         ldi     *-ar2(3),r3
 654         mpyi3   r2,r3,r0
 655         mpyuhi3 r2,r3,r1
 656         mpyi    *-ar2(2),r2
 657         bd      ar0
 658         mpyi    *-ar2(0),r3
 659         addi    r2,r1
 660         addi    r3,r1
 661 #else
 662 ___mulhi3:
 663         ldi     sp,ar2
 664         ldi     -16,rs
 665         ldi     *-ar2(2),ar0
 666         ldi     *-ar2(4),ar1
 667         ldi     ar0,r2
 668         and     0ffffh,r2
 669         ldi     ar1,r3
 670         and     0ffffh,r3
 671         lsh     rs,ar0
 672         lsh     rs,ar1
 673
 674         mpyi    r2,r3,r0
 675         mpyi    ar0,ar1,r1
 676         mpyi    r2,ar1,rc
 677         lsh     rs,rc,re
 678         addi    re,r1
 679         lsh     16,rc
 680         addi    rc,r0
 681         addc    0,r1
 682         mpyi    r3,ar0,rc
 683         lsh     rs,rc,re
 684         addi    re,r1
 685         lsh     16,rc
 686         addi    rc,r0
 687         addc    0,r1
 688
 689         ldi     *-ar2(1),ar0
 690         ldi     ar0,r2
 691         and     0ffffh,r2
 692         lsh     rs,ar0
 693         mpyi    r2,r3,rc
 694         addi    rc,r1
 695         mpyi    r2,ar1,rc
 696         mpyi    r3,ar0,re
 697         addi    re,rc
 698         lsh     16,rc
 699         addi    rc,r1
 700
 701         ldi     *-ar2(2),ar0
 702         ldi     *-ar2(3),ar1
 703         ldi     ar0,r2
 704         and     0ffffh,r2
 705         ldi     ar1,r3
 706         and     0ffffh,r3
 707         lsh     rs,ar0
 708         lsh     rs,ar1
 709         mpyi    r2,r3,rc
 710         addi    rc,r1
 711         mpyi    r2,ar1,rc
 712         mpyi    r3,ar0,re
 713         pop     ar0
 714         bd      ar0
 715         addi    re,rc
 716         lsh     16,rc
 717         addi    rc,r1
 718 #endif
 719 #endif
 720
 721 ;
 722 ; Integer 32 by 32 multiply highpart unsigned
 723 ; src1 in ar2
 724 ; src2 in r2
 725 ; result in r0
 726 ;
 727 #ifdef L_umuldi3_high
 728         .text
 729         .global ___umulhi3_high
 730 ___umulhi3_high:
 731         .if .REGPARM == 0
 732 #ifdef _TMS320C4x
 733         lda     sp,ar0
 734 #else
 735         ldiu    sp,ar0
 736 #endif
 737         ldi     *-ar0(1), ar2
 738         ldi     *-ar0(2), r2
 739         .endif
 740
 741         ldi     -16,rs
 742         ldi     r2,r3
 743         and     0ffffh,r2
 744         ldi     ar2,ar1
 745         and     0ffffh,ar2
 746         lsh     rs,r3
 747         lsh     rs,ar1
 748
 749         mpyi    ar2,r2,r1
 750         mpyi    ar1,r3,r0
 751         mpyi    ar2,r3,rc
 752         lsh     rs,rc,re
 753         addi    re,r0
 754         lsh     16,rc
 755         addi    rc,r1
 756         addc    0,r0
 757         mpyi    r2,ar1,rc
 758         lsh     rs,rc,re
 759         addi    re,r0
 760         pop     ar0
 761         bd      ar0
 762         lsh     16,rc
 763         addi    rc,r1
 764         addc    0,r0
 765 #endif
 766
 767 ;
 768 ; Integer 32 by 32 multiply highpart signed
 769 ; src1 in ar2
 770 ; src2 in r2
 771 ; result in r0
 772 ;
 773 #ifdef L_smuldi3_high
 774         .text
 775         .global ___smulhi3_high
 776 ___smulhi3_high:
 777         .if .REGPARM == 0
 778 #ifdef _TMS320C4x
 779         lda     sp,ar0
 780 #else
 781         ldiu    sp,ar0
 782 #endif
 783         ldi     *-ar0(1), ar2
 784         ldi     *-ar0(2), r2
 785         .endif
 786
 787         ldi     -16,rs
 788         ldi     0,rc
 789         subi3   ar2,rc,r0
 790         ldi     r2,r3
 791         ldilt   r0,rc
 792         subi3   r2,rc,r0
 793         ldi     ar2,ar1
 794         tstb    ar1,ar1
 795         ldilt   r0,rc
 796         and     0ffffh,r2
 797         and     0ffffh,ar2
 798         lsh     rs,r3
 799         lsh     rs,ar1
 800
 801         mpyi    ar2,r2,r1
 802         mpyi    ar1,r3,r0
 803         addi    rc,r0
 804         mpyi    ar2,r3,rc
 805         lsh     rs,rc,re
 806         addi    re,r0
 807         lsh     16,rc
 808         addi    rc,r1
 809         addc    0,r0
 810         mpyi    r2,ar1,rc
 811         lsh     rs,rc,re
 812         addi    re,r0
 813         pop     ar0
 814         bd      ar0
 815         lsh     16,rc
 816         addi    rc,r1
 817         addc    0,r0
 818 #endif
 819
 820 ;
 821 ; Integer 64 by 64 unsigned divide
 822 ; long1 and long2 on stack
 823 ; divide in r0,r1
 824 ; modulo in r2,r3
 825 ; routine takes a maximum of 64*8+23=535 cycles = 21.4 us @ 50Mhz
 826 ;
 827 #ifdef L_udivdi3
 828         .text
 829         .global ___udivhi3
 830         .global ___udivide
 831         .global ___umodulo
 832         .ref udivqi3n
 833         .ref umodqi3n
 834 ___udivhi3:
 835         ldi     sp,ar2
 836         ldi     *-ar2(4),ar0
 837         ldi     *-ar2(3),ar1
 838         ldi     *-ar2(2),r0
 839         ldi     *-ar2(1),r1
 840
 841 ___udivide:
 842         or      r1,ar1,r2
 843         bne     udiv0
 844         ldi     ar0,r2
 845         ldi     r0,ar2
 846         call    udivqi3n
 847         ldiu    0,r1
 848         rets
 849
 850 ___umodulo:
 851         or      r1,ar1,r2
 852         bne     udiv0
 853         ldi     ar0,r2
 854         ldi     r0,ar2
 855         call    umodqi3n
 856         ldi     r0,r2
 857         ldiu    0,r3
 858         rets
 859
 860 udiv0:
 861         tstb    ar1,ar1
 862         bne     udiv1
 863         tstb    ar0,ar0
 864         bn      udiv1
 865
 866         ldiu    63,rc
 867 #ifdef _TMS320C4x
 868         rptbd   udivend0
 869         ldiu    0,r2
 870         addi    r0,r0
 871         rolc    r1
 872 #else
 873         ldiu    0,r2
 874         addi    r0,r0
 875         rolc    r1
 876         rptb    udivend0
 877 #endif
 878
 879         rolc    r2
 880         subi3   ar0,r2,r3
 881         ldinc   r3,r2
 882         rolc    r0
 883 udivend0:
 884         rolc    r1
 885
 886         not     r0
 887         not     r1
 888         ldiu    0,r3
 889         rets
 890 udiv1:
 891         push    r4
 892         push    r5
 893         ldiu    63,rc
 894         ldiu    0,r2
 895 #ifdef _TMS320C4x
 896         rptbd   udivend1
 897         ldiu    0,r3
 898         addi    r0,r0
 899         rolc    r1
 900 #else
 901         ldiu    0,r3
 902         addi    r0,r0
 903         rolc    r1
 904         rptb    udivend1
 905 #endif
 906
 907         rolc    r2
 908         rolc    r3
 909         subi3   ar0,r2,r4
 910         subb3   ar1,r3,r5
 911         ldinc   r4,r2
 912         ldinc   r5,r3
 913         rolc    r0
 914 udivend1:
 915         rolc    r1
 916
 917         not     r0
 918         not     r1
 919         pop     r5
 920         pop     r4
 921         rets
 922 #endif
 923
 924 ;
 925 ; Integer 64 by 64 unsigned modulo
 926 ; long1 and long2 on stack
 927 ; result in r0,r1
 928 ;
 929 #ifdef L_umoddi3
 930         .text
 931         .global ___umodhi3
 932         .ref ___modulo
 933 ___umodhi3:
 934         ldi     sp,ar2
 935         ldi     *-ar2(4),ar0
 936         ldi     *-ar2(3),ar1
 937         ldi     *-ar2(2),r0
 938         ldi     *-ar2(1),r1
 939         call    ___umodulo
 940         pop     ar0
 941         bd      ar0
 942         ldi     r2,r0
 943         ldi     r3,r1
 944         nop
 945 #endif
 946
 947 ;
 948 ; Integer 64 by 64 signed divide
 949 ; long1 and long2 on stack
 950 ; result in r0,r1
 951 ;
 952 #ifdef L_divdi3
 953         .text
 954         .global ___divhi3
 955         .ref ___udivide
 956 ___divhi3:
 957         ldi     0,ir0
 958         ldi     sp,ar2
 959         ldi     *-ar2(4),r0
 960         ldi     *-ar2(3),r1
 961         bge     div1
 962         not     ir0
 963         negi    r0
 964         negb    r1
 965 div1:
 966         ldi     r0,ar0
 967         ldi     r1,ar1
 968         ldi     *-ar2(2),r0
 969         ldi     *-ar2(1),r1
 970         bge     div2
 971         not     ir0
 972         negi    r0
 973         negb    r1
 974 div2:
 975         call    ___udivide
 976         tstb    ir0,ir0
 977         bge     div3
 978         negi    r0
 979         negb    r1
 980 div3:
 981         rets
 982 #endif
 983
 984 ;
 985 ; Integer 64 by 64 signed modulo
 986 ; long1 and long2 on stack
 987 ; result in r0,r1
 988 ;
 989 #ifdef L_moddi3
 990         .text
 991         .global ___modhi3
 992         .ref ___umodulo
 993 ___modhi3:
 994         ldi     0,ir0
 995         ldi     sp,ar2
 996         ldi     *-ar2(4),r0
 997         ldi     *-ar2(3),r1
 998         bge     mod1
 999         not     ir0
1000         negi    r0
1001         negb    r1
1002 mod1:
1003         ldi     r0,ar0
1004         ldi     r1,ar1
1005         ldi     *-ar2(2),r0
1006         ldi     *-ar2(1),r1
1007         bge     mod2
1008         not     ir0
1009         negi    r0
1010         negb    r1
1011 mod2:
1012         call    ___umodulo
1013         ldi     r2,r0
1014         ldi     r3,r1
1015         tstb    ir0,ir0
1016         bge     mod3
1017         negi    r0
1018         negb    r1
1019 mod3:
1020         rets
1021 #endif
1022
1023 ;
1024 ; double to signed long long conversion
1025 ; input in r2
1026 ; result in r0,r1
1027 ;
1028 #ifdef L_fix_truncsfdi2
1029         .text
1030         .global ___fix_truncqfhi2
1031         .ref ufix_truncqfhi2n
1032 ___fix_truncqfhi2:
1033         .if .REGPARM == 0
1034 #ifdef _TMS320C4x
1035         lda     sp,ar0
1036 #else
1037         ldiu    sp,ar0
1038 #endif
1039         ldf     *-ar0(1), r2
1040         .endif
1041
1042         cmpf    0.0,r2
1043         bge     ufix_truncqfhi2n
1044         negf    r2
1045         call    ufix_truncqfhi2n
1046         negi    r0
1047         negb    r1
1048         rets
1049 #endif
1050
1051 ;
1052 ; double to unsigned long long conversion
1053 ; input in r2
1054 ; result in r0,r1
1055 ;
1056 #ifdef L_ufix_truncsfdi2
1057         .text
1058         .global ___ufix_truncqfhi2
1059         .global ufix_truncqfhi2n
1060 ___ufix_truncqfhi2:
1061         .if .REGPARM == 0
1062 #ifdef _TMS320C4x
1063         lda     sp,ar0
1064 #else
1065         ldiu    sp,ar0
1066 #endif
1067         ldf     *-ar0(1), r2
1068         .endif
1069
1070 ufix_truncqfhi2n:
1071         cmpf    0.0,r2
1072         ble     ufix1
1073         pushf   r2
1074         pop     r3
1075         ash     -24,r3
1076         subi    31,r3
1077         cmpi    32,r3
1078         bgt     ufix1
1079         cmpi    -32,r3
1080         ble     ufix1
1081         ldi     1,r0
1082         ash     31,r0
1083         or3     r0,r2,r0
1084         ldi     r0,r1
1085         lsh3    r3,r0,r0
1086         subi    32,r3
1087         cmpi    -32,r3
1088         ldile   0,r1
1089         lsh3    r3,r1,r1
1090         rets
1091 ufix1:
1092         ldi     0,r0
1093         ldi     0,r1
1094         rets
1095 #endif
1096
1097 ;
1098 ; signed long long to double conversion
1099 ; input on stack
1100 ; result in r0
1101 ;
1102 #ifdef L_floatdisf2
1103         .text
1104         .global ___floathiqf2
1105         .ref ufloathiqf2n
1106 ___floathiqf2:
1107         ldi     sp,ar2
1108         ldi     *-ar2(2),r0
1109         ldi     *-ar2(1),r1
1110         bge     ufloathiqf2n
1111         negi    r0
1112         negb    r1
1113         call    ufloathiqf2n
1114         negf    r0
1115         rets
1116 #endif
1117
1118 ;
1119 ; unsigned long long to double conversion
1120 ; input on stack
1121 ; result in r0
1122 ;
1123 #ifdef L_ufloatdisf2
1124         .text
1125         .global ___ufloathiqf2
1126         .global ufloathiqf2n
1127         .ref ___unsfltconst
1128 ___ufloathiqf2:
1129         ldi     sp,ar2
1130         ldi     *-ar2(2),r0
1131         ldi     *-ar2(1),r1
1132 ufloathiqf2n:
1133         .if .BIGMODEL
1134 #ifdef _TMS320C4x
1135         ldpk    @___unsfltconst
1136 #else
1137         ldp     @___unsfltconst
1138 #endif
1139         .endif
1140         ldf     @___unsfltconst,r2
1141         float   r0
1142         bge     uflt1
1143         addf    r2,r0
1144 uflt1:
1145         float   r1
1146         bge     uflt2
1147         addf    r2,r1
1148 uflt2:
1149 #ifdef _TMS320C4x
1150         pop     r3
1151         bd      r3
1152         mpyf    r2,r1
1153         addf    r1,r0
1154         nop
1155 #else
1156         ldf     r1,r3
1157         and     0ffh,r3
1158         norm    r3,r3
1159         mpyf    r2,r3
1160         pop     ar2
1161         bd      ar2
1162         addf    r3,r0
1163         mpyf    r2,r1
1164         addf    r1,r0
1165 #endif
1166 #endif
1167
1168 ;
1169 ; long double to signed long long conversion
1170 ; input in r2
1171 ; result in r0,r1
1172 ;
1173 #ifdef L_fix_truncdfdi2
1174         .text
1175         .global ___fix_trunchfhi2
1176         .ref ufix_trunchfhi2n
1177 ___fix_trunchfhi2:
1178         .if .REGPARM == 0
1179 #ifdef _TMS320C4x
1180         lda     sp,ar0
1181 #else
1182         ldiu    sp,ar0
1183 #endif
1184         ldf     *-ar0(2), r2
1185         ldi     *-ar0(1), r2
1186         .endif
1187
1188         cmpf    0.0,r2
1189         bge     ufix_trunchfhi2n
1190         negf    r2
1191         call    ufix_trunchfhi2n
1192         negi    r0
1193         negb    r1
1194         rets
1195 #endif
1196
1197 ;
1198 ; long double to unsigned long long conversion
1199 ; input in r2
1200 ; result in r0,r1
1201 ;
1202 #ifdef L_ufix_truncdfdi2
1203         .text
1204         .global ___ufix_trunchfhi2
1205         .global ufix_trunchfhi2n
1206 ___ufix_trunchfhi2:
1207         .if .REGPARM == 0
1208 #ifdef _TMS320C4x
1209         lda     sp,ar0
1210 #else
1211         ldiu    sp,ar0
1212 #endif
1213         ldf     *-ar0(2), r2
1214         ldi     *-ar0(1), r2
1215         .endif
1216
1217 ufix_trunchfhi2n:
1218         cmpf    0.0,r2
1219         ble     ufixh1
1220         pushf   r2
1221         pop     r3
1222         ash     -24,r3
1223         subi    31,r3
1224         cmpi    32,r3
1225         bgt     ufixh1
1226         cmpi    -32,r3
1227         ble     ufixh1
1228         ldi     1,r0
1229         ash     31,r0
1230         or3     r0,r2,r0
1231         ldi     r0,r1
1232         lsh3    r3,r0,r0
1233         subi    32,r3
1234         cmpi    -32,r3
1235         ldile   0,r1
1236         lsh3    r3,r1,r1
1237         rets
1238 ufixh1:
1239         ldi     0,r0
1240         ldi     0,r1
1241         rets
1242 #endif
1243
1244 ;
1245 ; signed long long to long double conversion
1246 ; input on stack
1247 ; result in r0
1248 ;
1249 #ifdef L_floatdidf2
1250         .text
1251         .global ___floathihf2
1252         .ref ufloathihf2n
1253 ___floathihf2:
1254         ldi     sp,ar2
1255         ldi     *-ar2(2),r0
1256         ldi     *-ar2(1),r1
1257         bge     ufloathihf2n
1258         negi    r0
1259         negb    r1
1260         call    ufloathihf2n
1261         negf    r0
1262         rets
1263 #endif
1264
1265 ;
1266 ; unsigned long long to double conversion
1267 ; input on stack
1268 ; result in r0
1269 ;
1270 #ifdef L_ufloatdidf2
1271         .text
1272         .global ___ufloathihf2
1273         .global ufloathihf2n
1274         .ref ___unsfltconst
1275 ___ufloathihf2:
1276         ldi     sp,ar2
1277         ldi     *-ar2(2),r0
1278         ldi     *-ar2(1),r1
1279 ufloathihf2n
1280         .if .BIGMODEL
1281 #ifdef _TMS320C4x
1282         ldpk    @___unsfltconst
1283 #else
1284         ldp     @___unsfltconst
1285 #endif
1286         .endif
1287         ldf     @___unsfltconst,r2
1288         float   r0
1289         bge     uflth1
1290         addf    r2,r0
1291 uflth1:
1292         float   r1
1293         bge     uflth2
1294         addf    r2,r1
1295 uflth2:
1296 #ifdef _TMS320C4x
1297         pop     r3
1298         bd      r3
1299         mpyf    r2,r1
1300         addf    r1,r0
1301         nop
1302 #else
1303         ldf     r1,r3
1304         and     0ffh,r3
1305         norm    r3,r3
1306         mpyf    r2,r3
1307         pop     ar2
1308         bd      ar2
1309         addf    r3,r0
1310         mpyf    r2,r1
1311         addf    r1,r0
1312 #endif
1313 #endif
1314
1315 ;
1316 ; calculate ffs
1317 ; input in ar2
1318 ; result in r0
1319 ;
1320 #ifdef L_ffs
1321         .global ___ffs
1322         .ref ___unsfltconst
1323         .text
1324 ___ffs:
1325         .if .REGPARM == 0
1326 #ifdef _TMS320C4x
1327         lda     sp,ar0
1328 #else
1329         ldiu    sp,ar0
1330 #endif
1331         ldi     *-ar0(1), ar2
1332         .endif
1333
1334         negi    ar2,r0
1335         and     ar2,r0
1336         float   r0,r0
1337         ldfu    0.0,r1
1338         .if .BIGMODEL
1339 #ifdef _TMS320C4x
1340         ldpk    @___unsfltconst
1341 #else
1342         ldp     @___unsfltconst
1343 #endif
1344         .endif
1345         ldflt   @___unsfltconst,r1
1346         addf    r1,r0
1347         pushf   r0
1348         pop     r0
1349         pop     ar0
1350         bd      ar0
1351         ash     -24,r0
1352         ldilt   -1,r0
1353         addi    1,r0
1354 #endif
1355
1356 ;
1357 ; calculate long double * long double
1358 ; input in r2, r3
1359 ; output in r0
1360 ;
1361 #ifdef L_muldf3
1362         .global ___mulhf3
1363         .text
1364 ___mulhf3:
1365         .if .REGPARM == 0
1366 #ifdef _TMS320C4x
1367         lda     sp,ar0
1368 #else
1369         ldiu    sp,ar0
1370 #endif
1371         ldf     *-ar0(2), r2
1372         ldi     *-ar0(1), r2
1373         ldf     *-ar0(4), r3
1374         ldi     *-ar0(3), r3
1375         .endif
1376
1377         pop     ar2             ; return ad
1378         ldf     r2,r0           ; copy lsb0
1379         ldf     r3,r1           ; copy lsb1
1380         and     0ffh,r0         ; mask lsb0
1381         and     0ffh,r1         ; mask lsb1
1382         norm    r0,r0           ; correct lsb0
1383         norm    r1,r1           ; correct lsb1
1384         mpyf    r2,r1           ; arg0*lsb1
1385         mpyf    r3,r0           ; arg1*lsb0
1386         bd      ar2             ; return (delayed)
1387         addf    r0,r1           ; arg0*lsb1 + arg1*lsb0
1388         mpyf    r2,r3,r0        ; msb0*msb1
1389         addf    r1,r0           ; msb0*msb1 + arg0*lsb1 + arg1*lsb0
1390 #endif
1391
1392 ;
1393 ; calculate long double / long double
1394 ; r2 dividend, r3 divisor, r0 quotient
1395 ;
1396 #ifdef L_divdf3
1397         .global ___divhf3
1398         .text
1399 ___divhf3:
1400         .if .REGPARM == 0
1401 #ifdef _TMS320C4x
1402         lda     sp,ar0
1403 #else
1404         ldiu    sp,ar0
1405 #endif
1406         ldf     *-ar0(2), r2
1407         ldi     *-ar0(1), r2
1408         ldf     *-ar0(4), r3
1409         ldi     *-ar0(3), r3
1410         .endif
1411
1412 #ifdef _TMS320C4x
1413         pop     ar1
1414         rcpf    r3, r0
1415         mpyf3   r0, r3, r1
1416         subrf   2.0, r1
1417         mpyf    r1, r0
1418         mpyf3   r0, r3, r1
1419         bud     ar1
1420         subrf   2.0, r1
1421         mpyf    r1, r0
1422         mpyf    r2, r0
1423 #else
1424         pop     ar1
1425         pushf   r3
1426         pop     r0
1427         not     r0
1428         push    r0
1429         popf    r0
1430         ldf     -1.0, r1
1431         xor     r1, r0
1432
1433         mpyf3   r0, r3, r1      ; r1 = r[0] * v
1434         subrf   2.0, r1         ; r1 = 2.0 - r[0] * v
1435         mpyf    r1, r0          ; r0 = r[0] * (2.0 - r[0] * v) = r[1]
1436 ; End of 1st iteration
1437
1438         mpyf3   r0, r3, r1      ; r1 = r[1] * v
1439         subrf   2.0, r1         ; r1 = 2.0 - r[1] * v
1440         mpyf    r1, r0          ; r0 = r[1] * (2.0 - r[1] * v) = r[2]
1441 ; End of 2nd iteration
1442
1443         mpyf3   r0, r3, r1      ; r1 = r[2] * v
1444         subrf   2.0, r1         ; r1 = 2.0 - r[2] * v
1445         mpyf    r1, r0          ; r0 = r[2] * (2.0 - r[2] * v) = r[3]
1446 ; End of 3rd iteration
1447
1448         or      080h, r0
1449         rnd     r0
1450
1451 ;       mpyf3   r0, r3, r1      ; r1 = r[3] * v
1452         push    r4
1453         pushf   r4
1454         mpyf    r0, r3, r1
1455
1456         ldf     r0, r4
1457         and     0ffh, r4
1458         norm    r4, r4
1459         mpyf    r3, r4
1460         addf    r4, r1
1461
1462         ldf     r3, r4
1463         and     0ffh, r4
1464         norm    r4, r4
1465         mpyf    r0, r4
1466         addf    r4, r1
1467
1468         subrf   2.0, r1         ; r1 = 2.0 - r[3] * v
1469
1470         mpyf    r1, r0, r3      ; r3 = r[3] * (2.0 - r[3] * v) = r[5]
1471
1472         ldf     r1, r4
1473         and     0ffh, r4
1474         norm    r4, r4
1475         mpyf    r0, r4
1476         addf    r4, r3
1477
1478         ldf     r0, r4
1479         and     0ffh, r4
1480         norm    r4, r4
1481         mpyf    r1, r4
1482         addf    r4, r3
1483
1484         mpyf    r2, r3, r0      ; Multiply by the dividend
1485
1486         ldf     r2, r4
1487         and     0ffh, r4
1488         norm    r4, r4
1489         mpyf    r3, r4
1490         addf    r4, r0
1491
1492         ldf     r3, r4
1493         and     0ffh, r4
1494         norm    r4, r4
1495         mpyf    r2, r4
1496         bd      ar1
1497         addf    r4, r0
1498
1499         popf    r4
1500         pop     r4
1501 #endif
1502 #endif