source/libs/gmp/gmp-src/mpn/sparc64/divrem_1.c

   1 /* UltraSparc 64 mpn_divrem_1 -- mpn by limb division.
   2
   3 Copyright 1991, 1993, 1994, 1996, 1998-2001, 2003 Free Software Foundation,
   4 Inc.
   5
   6 This file is part of the GNU MP Library.
   7
   8 The GNU MP Library is free software; you can redistribute it and/or modify
   9 it under the terms of either:
  10
  11   * the GNU Lesser General Public License as published by the Free
  12     Software Foundation; either version 3 of the License, or (at your
  13     option) any later version.
  14
  15 or
  16
  17   * the GNU General Public License as published by the Free Software
  18     Foundation; either version 2 of the License, or (at your option) any
  19     later version.
  20
  21 or both in parallel, as here.
  22
  23 The GNU MP Library is distributed in the hope that it will be useful, but
  24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  26 for more details.
  27
  28 You should have received copies of the GNU General Public License and the
  29 GNU Lesser General Public License along with the GNU MP Library.  If not,
  30 see https://www.gnu.org/licenses/.  */
  31
  32 #include "gmp.h"
  33 #include "gmp-impl.h"
  34 #include "longlong.h"
  35
  36 #include "mpn/sparc64/sparc64.h"
  37
  38
  39 /*                   64-bit divisor       32-bit divisor
  40                        cycles/limb          cycles/limb
  41                         (approx)             (approx)
  42                    integer  fraction    integer  fraction
  43    Ultrasparc 2i:    160      160          122      96
  44 */
  45
  46
  47 /* 32-bit divisors are treated in special case code.  This requires 4 mulx
  48    per limb instead of 8 in the general case.
  49
  50    For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
  51    addressing, to get the two halves of each limb read in the correct order.
  52    This is kept in an adj variable.  Doing that measures about 4 c/l faster
  53    than just writing HALF_ENDIAN_ADJ(i) in the integer loop.  The latter
  54    shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well
  55    (on gcc 3.2.1 at least).  The fraction loop doesn't seem affected, but we
  56    still use a variable since that ought to work out best.  */
  57
  58 mp_limb_t
  59 mpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs,
  60               mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
  61 {
  62   mp_size_t  total_size_limbs;
  63   mp_size_t  i;
  64
  65   ASSERT (xsize_limbs >= 0);
  66   ASSERT (size_limbs >= 0);
  67   ASSERT (d_limb != 0);
  68   /* FIXME: What's the correct overlap rule when xsize!=0? */
  69   ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs,
  70                                   ap_limbptr, size_limbs));
  71
  72   total_size_limbs = size_limbs + xsize_limbs;
  73   if (UNLIKELY (total_size_limbs == 0))
  74     return 0;
  75
  76   /* udivx is good for total_size==1, and no need to bother checking
  77      limb<divisor, since if that's likely the caller should check */
  78   if (UNLIKELY (total_size_limbs == 1))
  79     {
  80       mp_limb_t  a, q;
  81       a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0);
  82       q = a / d_limb;
  83       qp_limbptr[0] = q;
  84       return a - q*d_limb;
  85     }
  86
  87   if (d_limb <= CNST_LIMB(0xFFFFFFFF))
  88     {
  89       mp_size_t  size, xsize, total_size, adj;
  90       unsigned   *qp, n1, n0, q, r, nshift, norm_rmask;
  91       mp_limb_t  dinv_limb;
  92       const unsigned *ap;
  93       int        norm, norm_rshift;
  94
  95       size = 2 * size_limbs;
  96       xsize = 2 * xsize_limbs;
  97       total_size = size + xsize;
  98
  99       ap = (unsigned *) ap_limbptr;
 100       qp = (unsigned *) qp_limbptr;
 101
 102       qp += xsize;
 103       r = 0;        /* initial remainder */
 104
 105       if (LIKELY (size != 0))
 106         {
 107           n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)];
 108
 109           /* If the length of the source is uniformly distributed, then
 110              there's a 50% chance of the high 32-bits being zero, which we
 111              can skip.  */
 112           if (n1 == 0)
 113             {
 114               n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)];
 115               total_size--;
 116               size--;
 117               ASSERT (size > 0);  /* because always even */
 118               qp[size + HALF_ENDIAN_ADJ(1)] = 0;
 119             }
 120
 121           /* Skip a division if high < divisor (high quotient 0).  Testing
 122              here before before normalizing will still skip as often as
 123              possible.  */
 124           if (n1 < d_limb)
 125             {
 126               r = n1;
 127               size--;
 128               qp[size + HALF_ENDIAN_ADJ(size)] = 0;
 129               total_size--;
 130               if (total_size == 0)
 131                 return r;
 132             }
 133         }
 134
 135       count_leading_zeros_32 (norm, d_limb);
 136       norm -= 32;
 137       d_limb <<= norm;
 138       r <<= norm;
 139
 140       norm_rshift = 32 - norm;
 141       norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
 142
 143       invert_half_limb (dinv_limb, d_limb);
 144
 145       if (LIKELY (size != 0))
 146         {
 147           i = size - 1;
 148           adj = HALF_ENDIAN_ADJ (i);
 149           n1 = ap[i + adj];
 150           adj = -adj;
 151           r |= ((n1 >> norm_rshift) & norm_rmask);
 152           for ( ; i > 0; i--)
 153             {
 154               n0 = ap[i-1 + adj];
 155               adj = -adj;
 156               nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
 157               udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
 158               qp[i + adj] = q;
 159               n1 = n0;
 160             }
 161           nshift = n1 << norm;
 162           udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
 163           qp[0 + HALF_ENDIAN_ADJ(0)] = q;
 164         }
 165       qp -= xsize;
 166       adj = HALF_ENDIAN_ADJ (0);
 167       for (i = xsize-1; i >= 0; i--)
 168         {
 169           udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb);
 170           adj = -adj;
 171           qp[i + adj] = q;
 172         }
 173
 174       return r >> norm;
 175     }
 176   else
 177     {
 178       mp_srcptr  ap;
 179       mp_ptr     qp;
 180       mp_size_t  size, xsize, total_size;
 181       mp_limb_t  d, n1, n0, q, r, dinv, nshift, norm_rmask;
 182       int        norm, norm_rshift;
 183
 184       ap = ap_limbptr;
 185       qp = qp_limbptr;
 186       size = size_limbs;
 187       xsize = xsize_limbs;
 188       total_size = total_size_limbs;
 189       d = d_limb;
 190
 191       qp += total_size;   /* above high limb */
 192       r = 0;              /* initial remainder */
 193
 194       if (LIKELY (size != 0))
 195         {
 196           /* Skip a division if high < divisor (high quotient 0).  Testing
 197              here before before normalizing will still skip as often as
 198              possible.  */
 199           n1 = ap[size-1];
 200           if (n1 < d)
 201             {
 202               r = n1;
 203               *--qp = 0;
 204               total_size--;
 205               if (total_size == 0)
 206                 return r;
 207               size--;
 208             }
 209         }
 210
 211       count_leading_zeros (norm, d);
 212       d <<= norm;
 213       r <<= norm;
 214
 215       norm_rshift = GMP_LIMB_BITS - norm;
 216       norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0));
 217
 218       invert_limb (dinv, d);
 219
 220       if (LIKELY (size != 0))
 221         {
 222           n1 = ap[size-1];
 223           r |= ((n1 >> norm_rshift) & norm_rmask);
 224           for (i = size-2; i >= 0; i--)
 225             {
 226               n0 = ap[i];
 227               nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
 228               udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
 229               *--qp = q;
 230               n1 = n0;
 231             }
 232           nshift = n1 << norm;
 233           udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
 234           *--qp = q;
 235         }
 236       for (i = 0; i < xsize; i++)
 237         {
 238           udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv);
 239           *--qp = q;
 240         }
 241       return r >> norm;
 242     }
 243 }