sysdeps/alpha/remqu.S

   1 /* Copyright (C) 2004-2023 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library.  If not, see
  16    <https://www.gnu.org/licenses/>.  */
  17
  18 #include "div_libc.h"
  19
  20
  21 /* 64-bit unsigned long remainder.  These are not normal C functions.  Argument
  22    registers are t10 and t11, the result goes in t12.  Only t12 and AT may be
  23    clobbered.
  24
  25    Theory of operation here is that we can use the FPU divider for virtually
  26    all operands that we see: all dividend values between -2**53 and 2**53-1
  27    can be computed directly.  Note that divisor values need not be checked
  28    against that range because the rounded fp value will be close enough such
  29    that the quotient is < 1, which will properly be truncated to zero when we
  30    convert back to integer.
  31
  32    When the dividend is outside the range for which we can compute exact
  33    results, we use the fp quotent as an estimate from which we begin refining
  34    an exact integral value.  This reduces the number of iterations in the
  35    shift-and-subtract loop significantly.
  36
  37    The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE
  38    for cvttq/c even without /sui being set.  It will not, however, properly
  39    raise the exception, so we don't have to worry about FPCR_INED being clear
  40    and so dying by SIGFPE.  */
  41
  42         .text
  43         .align  4
  44         .globl  __remqu
  45         .type   __remqu, @funcnoplt
  46         .usepv  __remqu, no
  47
  48         cfi_startproc
  49         cfi_return_column (RA)
  50 __remqu:
  51         lda     sp, -FRAME(sp)
  52         cfi_def_cfa_offset (FRAME)
  53         CALL_MCOUNT
  54
  55         /* Get the fp divide insn issued as quickly as possible.  After
  56            that's done, we have at least 22 cycles until its results are
  57            ready -- all the time in the world to figure out how we're
  58            going to use the results.  */
  59         subq    Y, 1, AT
  60         and     Y, AT, AT
  61         beq     AT, $powerof2
  62
  63         stt     $f0, 0(sp)
  64         excb
  65         stt     $f1, 8(sp)
  66         stt     $f3, 48(sp)
  67         cfi_rel_offset ($f0, 0)
  68         cfi_rel_offset ($f1, 8)
  69         cfi_rel_offset ($f3, 48)
  70         mf_fpcr $f3
  71
  72         _ITOFT2 X, $f0, 16, Y, $f1, 24
  73         cvtqt   $f0, $f0
  74         cvtqt   $f1, $f1
  75
  76         blt     X, $x_is_neg
  77         divt/c  $f0, $f1, $f0
  78
  79         /* Check to see if Y was mis-converted as signed value.  */
  80         ldt     $f1, 8(sp)
  81         blt     Y, $y_is_neg
  82
  83         /* Check to see if X fit in the double as an exact value.  */
  84         srl     X, 53, AT
  85         bne     AT, $x_big
  86
  87         /* If we get here, we're expecting exact results from the division.
  88            Do nothing else besides convert, compute remainder, clean up.  */
  89         cvttq/c $f0, $f0
  90         excb
  91         mt_fpcr $f3
  92         _FTOIT  $f0, AT, 16
  93
  94         mulq    AT, Y, AT
  95         ldt     $f0, 0(sp)
  96         ldt     $f3, 48(sp)
  97         lda     sp, FRAME(sp)
  98         cfi_remember_state
  99         cfi_restore ($f0)
 100         cfi_restore ($f1)
 101         cfi_restore ($f3)
 102         cfi_def_cfa_offset (0)
 103
 104         .align  4
 105         subq    X, AT, RV
 106         ret     $31, (RA), 1
 107
 108         .align  4
 109         cfi_restore_state
 110 $x_is_neg:
 111         /* If we get here, X is so big that bit 63 is set, which made the
 112            conversion come out negative.  Fix it up lest we not even get
 113            a good estimate.  */
 114         ldah    AT, 0x5f80              /* 2**64 as float.  */
 115         stt     $f2, 24(sp)
 116         cfi_rel_offset ($f2, 24)
 117         _ITOFS  AT, $f2, 16
 118
 119         .align  4
 120         addt    $f0, $f2, $f0
 121         unop
 122         divt/c  $f0, $f1, $f0
 123         unop
 124
 125         /* Ok, we've now the divide issued.  Continue with other checks.  */
 126         ldt     $f1, 8(sp)
 127         unop
 128         ldt     $f2, 24(sp)
 129         blt     Y, $y_is_neg
 130         cfi_restore ($f1)
 131         cfi_restore ($f2)
 132         cfi_remember_state      /* for y_is_neg */
 133
 134         .align  4
 135 $x_big:
 136         /* If we get here, X is large enough that we don't expect exact
 137            results, and neither X nor Y got mis-translated for the fp
 138            division.  Our task is to take the fp result, figure out how
 139            far it's off from the correct result and compute a fixup.  */
 140         stq     t0, 16(sp)
 141         stq     t1, 24(sp)
 142         stq     t2, 32(sp)
 143         stq     t3, 40(sp)
 144         cfi_rel_offset (t0, 16)
 145         cfi_rel_offset (t1, 24)
 146         cfi_rel_offset (t2, 32)
 147         cfi_rel_offset (t3, 40)
 148
 149 #define Q       t0              /* quotient */
 150 #define R       RV              /* remainder */
 151 #define SY      t1              /* scaled Y */
 152 #define S       t2              /* scalar */
 153 #define QY      t3              /* Q*Y */
 154
 155         cvttq/c $f0, $f0
 156         _FTOIT  $f0, Q, 8
 157         mulq    Q, Y, QY
 158
 159         .align  4
 160         stq     t4, 8(sp)
 161         excb
 162         ldt     $f0, 0(sp)
 163         mt_fpcr $f3
 164         cfi_rel_offset (t4, 8)
 165         cfi_restore ($f0)
 166
 167         subq    QY, X, R
 168         mov     Y, SY
 169         mov     1, S
 170         bgt     R, $q_high
 171
 172 $q_high_ret:
 173         subq    X, QY, R
 174         mov     Y, SY
 175         mov     1, S
 176         bgt     R, $q_low
 177
 178 $q_low_ret:
 179         ldq     t4, 8(sp)
 180         ldq     t0, 16(sp)
 181         ldq     t1, 24(sp)
 182         ldq     t2, 32(sp)
 183
 184         ldq     t3, 40(sp)
 185         ldt     $f3, 48(sp)
 186         lda     sp, FRAME(sp)
 187         cfi_remember_state
 188         cfi_restore (t0)
 189         cfi_restore (t1)
 190         cfi_restore (t2)
 191         cfi_restore (t3)
 192         cfi_restore (t4)
 193         cfi_restore ($f3)
 194         cfi_def_cfa_offset (0)
 195         ret     $31, (RA), 1
 196
 197         .align  4
 198         cfi_restore_state
 199         /* The quotient that we computed was too large.  We need to reduce
 200            it by S such that Y*S >= R.  Obviously the closer we get to the
 201            correct value the better, but overshooting high is ok, as we'll
 202            fix that up later.  */
 203 0:
 204         addq    SY, SY, SY
 205         addq    S, S, S
 206 $q_high:
 207         cmpult  SY, R, AT
 208         bne     AT, 0b
 209
 210         subq    Q, S, Q
 211         unop
 212         subq    QY, SY, QY
 213         br      $q_high_ret
 214
 215         .align  4
 216         /* The quotient that we computed was too small.  Divide Y by the
 217            current remainder (R) and add that to the existing quotient (Q).
 218            The expectation, of course, is that R is much smaller than X.  */
 219         /* Begin with a shift-up loop.  Compute S such that Y*S >= R.  We
 220            already have a copy of Y in SY and the value 1 in S.  */
 221 0:
 222         addq    SY, SY, SY
 223         addq    S, S, S
 224 $q_low:
 225         cmpult  SY, R, AT
 226         bne     AT, 0b
 227
 228         /* Shift-down and subtract loop.  Each iteration compares our scaled
 229            Y (SY) with the remainder (R); if SY <= R then X is divisible by
 230            Y's scalar (S) so add it to the quotient (Q).  */
 231 2:      addq    Q, S, t3
 232         srl     S, 1, S
 233         cmpule  SY, R, AT
 234         subq    R, SY, t4
 235
 236         cmovne  AT, t3, Q
 237         cmovne  AT, t4, R
 238         srl     SY, 1, SY
 239         bne     S, 2b
 240
 241         br      $q_low_ret
 242
 243         .align  4
 244         cfi_restore_state
 245 $y_is_neg:
 246         /* If we get here, Y is so big that bit 63 is set.  The results
 247            from the divide will be completely wrong.  Fortunately, the
 248            quotient must be either 0 or 1, so the remainder must be X
 249            or X-Y, so just compute it directly.  */
 250         cmpule  Y, X, AT
 251         excb
 252         mt_fpcr $f3
 253         subq    X, Y, RV
 254         ldt     $f0, 0(sp)
 255         ldt     $f3, 48(sp)
 256         cmoveq  AT, X, RV
 257
 258         lda     sp, FRAME(sp)
 259         cfi_restore ($f0)
 260         cfi_restore ($f3)
 261         cfi_def_cfa_offset (0)
 262         ret     $31, (RA), 1
 263
 264         .align  4
 265         cfi_def_cfa_offset (FRAME)
 266 $powerof2:
 267         subq    Y, 1, AT
 268         beq     Y, DIVBYZERO
 269         and     X, AT, RV
 270         lda     sp, FRAME(sp)
 271         cfi_def_cfa_offset (0)
 272         ret     $31, (RA), 1
 273
 274         cfi_endproc
 275         .size   __remqu, .-__remqu
 276
 277         DO_DIVBYZERO