bignum.c

   1 /**********************************************************************
   2
   3   bignum.c -
   4
   5   $Author$
   6   created at: Fri Jun 10 00:48:55 JST 1994
   7
   8   Copyright (C) 1993-2007 Yukihiro Matsumoto
   9
  10 **********************************************************************/
  11
  12 #include "ruby/internal/config.h"
  13
  14 #include <ctype.h>
  15 #include <float.h>
  16 #include <math.h>
  17
  18 #ifdef HAVE_STRINGS_H
  19 # include <strings.h>
  20 #endif
  21
  22 #ifdef HAVE_IEEEFP_H
  23 # include <ieeefp.h>
  24 #endif
  25
  26 #if defined(HAVE_LIBGMP) && defined(HAVE_GMP_H)
  27 # define USE_GMP
  28 # include <gmp.h>
  29 #endif
  30
  31 #include "id.h"
  32 #include "internal.h"
  33 #include "internal/bignum.h"
  34 #include "internal/complex.h"
  35 #include "internal/gc.h"
  36 #include "internal/numeric.h"
  37 #include "internal/object.h"
  38 #include "internal/sanitizers.h"
  39 #include "internal/variable.h"
  40 #include "internal/warnings.h"
  41 #include "ruby/thread.h"
  42 #include "ruby/util.h"
  43 #include "ruby_assert.h"
  44
  45 const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
  46
  47 #ifndef SIZEOF_BDIGIT_DBL
  48 # if SIZEOF_INT*2 <= SIZEOF_LONG_LONG
  49 #  define SIZEOF_BDIGIT_DBL SIZEOF_LONG_LONG
  50 # else
  51 #  define SIZEOF_BDIGIT_DBL SIZEOF_LONG
  52 # endif
  53 #endif
  54
  55 STATIC_ASSERT(sizeof_bdigit_dbl, sizeof(BDIGIT_DBL) == SIZEOF_BDIGIT_DBL);
  56 STATIC_ASSERT(sizeof_bdigit_dbl_signed, sizeof(BDIGIT_DBL_SIGNED) == SIZEOF_BDIGIT_DBL);
  57 STATIC_ASSERT(sizeof_bdigit, SIZEOF_BDIGIT <= sizeof(BDIGIT));
  58 STATIC_ASSERT(sizeof_bdigit_and_dbl, SIZEOF_BDIGIT*2 <= SIZEOF_BDIGIT_DBL);
  59 STATIC_ASSERT(bdigit_signedness, 0 < (BDIGIT)-1);
  60 STATIC_ASSERT(bdigit_dbl_signedness, 0 < (BDIGIT_DBL)-1);
  61 STATIC_ASSERT(bdigit_dbl_signed_signedness, 0 > (BDIGIT_DBL_SIGNED)-1);
  62 STATIC_ASSERT(rbignum_embed_len_max, BIGNUM_EMBED_LEN_MAX <= (BIGNUM_EMBED_LEN_MASK >> BIGNUM_EMBED_LEN_SHIFT));
  63
  64 #if SIZEOF_BDIGIT < SIZEOF_LONG
  65 STATIC_ASSERT(sizeof_long_and_sizeof_bdigit, SIZEOF_LONG % SIZEOF_BDIGIT == 0);
  66 #else
  67 STATIC_ASSERT(sizeof_long_and_sizeof_bdigit, SIZEOF_BDIGIT % SIZEOF_LONG == 0);
  68 #endif
  69
  70 #ifdef WORDS_BIGENDIAN
  71 #   define HOST_BIGENDIAN_P 1
  72 #else
  73 #   define HOST_BIGENDIAN_P 0
  74 #endif
  75 /* (!LSHIFTABLE(d, n) ? 0 : (n)) is the same as n but suppress a warning, C4293, by Visual Studio.  */
  76 #define LSHIFTABLE(d, n) ((n) < sizeof(d) * CHAR_BIT)
  77 #define LSHIFTX(d, n) (!LSHIFTABLE(d, n) ? 0 : ((d) << (!LSHIFTABLE(d, n) ? 0 : (n))))
  78 #define CLEAR_LOWBITS(d, numbits) ((d) & LSHIFTX(~((d)*0), (numbits)))
  79 #define FILL_LOWBITS(d, numbits) ((d) | (LSHIFTX(((d)*0+1), (numbits))-1))
  80 #define POW2_P(x) (((x)&((x)-1))==0)
  81
  82 #define BDIGITS(x) (BIGNUM_DIGITS(x))
  83 #define BITSPERDIG (SIZEOF_BDIGIT*CHAR_BIT)
  84 #define BIGRAD ((BDIGIT_DBL)1 << BITSPERDIG)
  85 #define BIGRAD_HALF ((BDIGIT)(BIGRAD >> 1))
  86 #define BDIGIT_MSB(d) (((d) & BIGRAD_HALF) != 0)
  87 #define BIGUP(x) LSHIFTX(((x) + (BDIGIT_DBL)0), BITSPERDIG)
  88 #define BIGDN(x) RSHIFT((x),BITSPERDIG)
  89 #define BIGLO(x) ((BDIGIT)((x) & BDIGMAX))
  90 #define BDIGMAX ((BDIGIT)(BIGRAD-1))
  91 #define BDIGIT_DBL_MAX (~(BDIGIT_DBL)0)
  92
  93 #if SIZEOF_BDIGIT == 2
  94 #   define swap_bdigit(x) swap16(x)
  95 #elif SIZEOF_BDIGIT == 4
  96 #   define swap_bdigit(x) swap32(x)
  97 #elif SIZEOF_BDIGIT == 8
  98 #   define swap_bdigit(x) swap64(x)
  99 #endif
 100
 101 #define BIGZEROP(x) (BIGNUM_LEN(x) == 0 || \
 102                      (BDIGITS(x)[0] == 0 && \
 103                       (BIGNUM_LEN(x) == 1 || bigzero_p(x))))
 104 #define BIGSIZE(x) (BIGNUM_LEN(x) == 0 ? (size_t)0 : \
 105     BDIGITS(x)[BIGNUM_LEN(x)-1] ? \
 106         (size_t)(BIGNUM_LEN(x)*SIZEOF_BDIGIT - nlz(BDIGITS(x)[BIGNUM_LEN(x)-1])/CHAR_BIT) : \
 107     rb_absint_size(x, NULL))
 108
 109 #define BIGDIVREM_EXTRA_WORDS 1
 110 #define bdigit_roomof(n) roomof(n, SIZEOF_BDIGIT)
 111 #define BARY_ARGS(ary) ary, numberof(ary)
 112
 113 #define BARY_ADD(z, x, y) bary_add(BARY_ARGS(z), BARY_ARGS(x), BARY_ARGS(y))
 114 #define BARY_SUB(z, x, y) bary_sub(BARY_ARGS(z), BARY_ARGS(x), BARY_ARGS(y))
 115 #define BARY_SHORT_MUL(z, x, y) bary_short_mul(BARY_ARGS(z), BARY_ARGS(x), BARY_ARGS(y))
 116 #define BARY_DIVMOD(q, r, x, y) bary_divmod(BARY_ARGS(q), BARY_ARGS(r), BARY_ARGS(x), BARY_ARGS(y))
 117 #define BARY_ZERO_P(x) bary_zero_p(BARY_ARGS(x))
 118
 119 #define BIGNUM_SET_NEGATIVE_SIGN(b) BIGNUM_SET_SIGN(b, 0)
 120 #define BIGNUM_SET_POSITIVE_SIGN(b) BIGNUM_SET_SIGN(b, 1)
 121
 122 #define bignew(len,sign) bignew_1(rb_cInteger,(len),(sign))
 123
 124 #define BDIGITS_ZERO(ptr, n) do { \
 125   BDIGIT *bdigitz_zero_ptr = (ptr); \
 126   size_t bdigitz_zero_n = (n); \
 127   while (bdigitz_zero_n) { \
 128     *bdigitz_zero_ptr++ = 0; \
 129     bdigitz_zero_n--; \
 130   } \
 131 } while (0)
 132
 133 #define BARY_TRUNC(ds, n) do { \
 134         while (0 < (n) && (ds)[(n)-1] == 0) \
 135             (n)--; \
 136     } while (0)
 137
 138 #define KARATSUBA_BALANCED(xn, yn) ((yn)/2 < (xn))
 139 #define TOOM3_BALANCED(xn, yn) (((yn)+2)/3 * 2 < (xn))
 140
 141 #define GMP_MUL_DIGITS 20
 142 #define KARATSUBA_MUL_DIGITS 70
 143 #define TOOM3_MUL_DIGITS 150
 144
 145 #define GMP_DIV_DIGITS 20
 146 #define GMP_BIG2STR_DIGITS 20
 147 #define GMP_STR2BIG_DIGITS 20
 148 #ifdef USE_GMP
 149 # define NAIVE_MUL_DIGITS GMP_MUL_DIGITS
 150 #else
 151 # define NAIVE_MUL_DIGITS KARATSUBA_MUL_DIGITS
 152 #endif
 153
 154 typedef void (mulfunc_t)(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn);
 155
 156 static mulfunc_t bary_mul_toom3_start;
 157 static mulfunc_t bary_mul_karatsuba_start;
 158 static BDIGIT bigdivrem_single(BDIGIT *qds, const BDIGIT *xds, size_t xn, BDIGIT y);
 159
 160 static VALUE bignew_1(VALUE klass, size_t len, int sign);
 161 static inline VALUE bigtrunc(VALUE x);
 162
 163 static VALUE bigsq(VALUE x);
 164 static inline VALUE power_cache_get_power(int base, int power_level, size_t *numdigits_ret);
 165
 166 #if SIZEOF_BDIGIT <= SIZEOF_INT
 167 static int nlz(BDIGIT x) { return nlz_int((unsigned int)x) - (SIZEOF_INT-SIZEOF_BDIGIT) * CHAR_BIT; }
 168 #elif SIZEOF_BDIGIT <= SIZEOF_LONG
 169 static int nlz(BDIGIT x) { return nlz_long((unsigned long)x) - (SIZEOF_LONG-SIZEOF_BDIGIT) * CHAR_BIT; }
 170 #elif SIZEOF_BDIGIT <= SIZEOF_LONG_LONG
 171 static int nlz(BDIGIT x) { return nlz_long_long((unsigned LONG_LONG)x) - (SIZEOF_LONG_LONG-SIZEOF_BDIGIT) * CHAR_BIT; }
 172 #elif SIZEOF_BDIGIT <= SIZEOF_INT128_T
 173 static int nlz(BDIGIT x) { return nlz_int128((uint128_t)x) - (SIZEOF_INT128_T-SIZEOF_BDIGIT) * CHAR_BIT; }
 174 #endif
 175
 176 #define U16(a) ((uint16_t)(a))
 177 #define U32(a) ((uint32_t)(a))
 178 #ifdef HAVE_UINT64_T
 179 #define U64(a,b) (((uint64_t)(a) << 32) | (b))
 180 #endif
 181 #ifdef HAVE_UINT128_T
 182 #define U128(a,b,c,d) (((uint128_t)U64(a,b) << 64) | U64(c,d))
 183 #endif
 184
 185 /* The following script, maxpow.rb, generates the tables follows.
 186
 187 def big(n, bits)
 188   ns = []
 189   ((bits+31)/32).times {
 190     ns << sprintf("0x%08x", n & 0xffff_ffff)
 191     n >>= 32
 192   }
 193   "U#{bits}(" + ns.reverse.join(",") + ")"
 194 end
 195 def values(ary, width, indent)
 196   lines = [""]
 197   ary.each {|e|
 198     lines << "" if !ary.last.empty? && width < (lines.last + e + ", ").length
 199     lines.last << e + ", "
 200   }
 201   lines.map {|line| " " * indent + line.chomp(" ") + "\n" }.join
 202 end
 203 [16,32,64,128].each {|bits|
 204   max = 2**bits-1
 205   exps = []
 206   nums = []
 207   2.upto(36) {|base|
 208     exp = 0
 209     n = 1
 210     while n * base <= max
 211       exp += 1
 212       n *= base
 213     end
 214     exps << exp.to_s
 215     nums << big(n, bits)
 216   }
 217   puts "#ifdef HAVE_UINT#{bits}_T"
 218   puts "static const int maxpow#{bits}_exp[35] = {"
 219   print values(exps, 70, 4)
 220   puts "};"
 221   puts "static const uint#{bits}_t maxpow#{bits}_num[35] = {"
 222   print values(nums, 70, 4)
 223   puts "};"
 224   puts "#endif"
 225 }
 226
 227  */
 228
 229 #if SIZEOF_BDIGIT_DBL == 2
 230 static const int maxpow16_exp[35] = {
 231     15, 10, 7, 6, 6, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3,
 232     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 233 };
 234 static const uint16_t maxpow16_num[35] = {
 235     U16(0x00008000), U16(0x0000e6a9), U16(0x00004000), U16(0x00003d09),
 236     U16(0x0000b640), U16(0x000041a7), U16(0x00008000), U16(0x0000e6a9),
 237     U16(0x00002710), U16(0x00003931), U16(0x00005100), U16(0x00006f91),
 238     U16(0x00009610), U16(0x0000c5c1), U16(0x00001000), U16(0x00001331),
 239     U16(0x000016c8), U16(0x00001acb), U16(0x00001f40), U16(0x0000242d),
 240     U16(0x00002998), U16(0x00002f87), U16(0x00003600), U16(0x00003d09),
 241     U16(0x000044a8), U16(0x00004ce3), U16(0x000055c0), U16(0x00005f45),
 242     U16(0x00006978), U16(0x0000745f), U16(0x00008000), U16(0x00008c61),
 243     U16(0x00009988), U16(0x0000a77b), U16(0x0000b640),
 244 };
 245 #elif SIZEOF_BDIGIT_DBL == 4
 246 static const int maxpow32_exp[35] = {
 247     31, 20, 15, 13, 12, 11, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7,
 248     7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 249 };
 250 static const uint32_t maxpow32_num[35] = {
 251     U32(0x80000000), U32(0xcfd41b91), U32(0x40000000), U32(0x48c27395),
 252     U32(0x81bf1000), U32(0x75db9c97), U32(0x40000000), U32(0xcfd41b91),
 253     U32(0x3b9aca00), U32(0x8c8b6d2b), U32(0x19a10000), U32(0x309f1021),
 254     U32(0x57f6c100), U32(0x98c29b81), U32(0x10000000), U32(0x18754571),
 255     U32(0x247dbc80), U32(0x3547667b), U32(0x4c4b4000), U32(0x6b5a6e1d),
 256     U32(0x94ace180), U32(0xcaf18367), U32(0x0b640000), U32(0x0e8d4a51),
 257     U32(0x1269ae40), U32(0x17179149), U32(0x1cb91000), U32(0x23744899),
 258     U32(0x2b73a840), U32(0x34e63b41), U32(0x40000000), U32(0x4cfa3cc1),
 259     U32(0x5c13d840), U32(0x6d91b519), U32(0x81bf1000),
 260 };
 261 #elif SIZEOF_BDIGIT_DBL == 8 && defined HAVE_UINT64_T
 262 static const int maxpow64_exp[35] = {
 263     63, 40, 31, 27, 24, 22, 21, 20, 19, 18, 17, 17, 16, 16, 15, 15, 15,
 264     15, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12,
 265     12,
 266 };
 267 static const uint64_t maxpow64_num[35] = {
 268     U64(0x80000000,0x00000000), U64(0xa8b8b452,0x291fe821),
 269     U64(0x40000000,0x00000000), U64(0x6765c793,0xfa10079d),
 270     U64(0x41c21cb8,0xe1000000), U64(0x36427987,0x50226111),
 271     U64(0x80000000,0x00000000), U64(0xa8b8b452,0x291fe821),
 272     U64(0x8ac72304,0x89e80000), U64(0x4d28cb56,0xc33fa539),
 273     U64(0x1eca170c,0x00000000), U64(0x780c7372,0x621bd74d),
 274     U64(0x1e39a505,0x7d810000), U64(0x5b27ac99,0x3df97701),
 275     U64(0x10000000,0x00000000), U64(0x27b95e99,0x7e21d9f1),
 276     U64(0x5da0e1e5,0x3c5c8000), U64(0xd2ae3299,0xc1c4aedb),
 277     U64(0x16bcc41e,0x90000000), U64(0x2d04b7fd,0xd9c0ef49),
 278     U64(0x5658597b,0xcaa24000), U64(0xa0e20737,0x37609371),
 279     U64(0x0c29e980,0x00000000), U64(0x14adf4b7,0x320334b9),
 280     U64(0x226ed364,0x78bfa000), U64(0x383d9170,0xb85ff80b),
 281     U64(0x5a3c23e3,0x9c000000), U64(0x8e651373,0x88122bcd),
 282     U64(0xdd41bb36,0xd259e000), U64(0x0aee5720,0xee830681),
 283     U64(0x10000000,0x00000000), U64(0x172588ad,0x4f5f0981),
 284     U64(0x211e44f7,0xd02c1000), U64(0x2ee56725,0xf06e5c71),
 285     U64(0x41c21cb8,0xe1000000),
 286 };
 287 #elif SIZEOF_BDIGIT_DBL == 16 && defined HAVE_UINT128_T
 288 static const int maxpow128_exp[35] = {
 289     127, 80, 63, 55, 49, 45, 42, 40, 38, 37, 35, 34, 33, 32, 31, 31, 30,
 290     30, 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 25, 25, 25, 24,
 291     24,
 292 };
 293 static const uint128_t maxpow128_num[35] = {
 294     U128(0x80000000,0x00000000,0x00000000,0x00000000),
 295     U128(0x6f32f1ef,0x8b18a2bc,0x3cea5978,0x9c79d441),
 296     U128(0x40000000,0x00000000,0x00000000,0x00000000),
 297     U128(0xd0cf4b50,0xcfe20765,0xfff4b4e3,0xf741cf6d),
 298     U128(0x6558e2a0,0x921fe069,0x42860000,0x00000000),
 299     U128(0x5080c7b7,0xd0e31ba7,0x5911a67d,0xdd3d35e7),
 300     U128(0x40000000,0x00000000,0x00000000,0x00000000),
 301     U128(0x6f32f1ef,0x8b18a2bc,0x3cea5978,0x9c79d441),
 302     U128(0x4b3b4ca8,0x5a86c47a,0x098a2240,0x00000000),
 303     U128(0xffd1390a,0x0adc2fb8,0xdabbb817,0x4d95c99b),
 304     U128(0x2c6fdb36,0x4c25e6c0,0x00000000,0x00000000),
 305     U128(0x384bacd6,0x42c343b4,0xe90c4272,0x13506d29),
 306     U128(0x31f5db32,0xa34aced6,0x0bf13a0e,0x00000000),
 307     U128(0x20753ada,0xfd1e839f,0x53686d01,0x3143ee01),
 308     U128(0x10000000,0x00000000,0x00000000,0x00000000),
 309     U128(0x68ca11d6,0xb4f6d1d1,0xfaa82667,0x8073c2f1),
 310     U128(0x223e493b,0xb3bb69ff,0xa4b87d6c,0x40000000),
 311     U128(0xad62418d,0x14ea8247,0x01c4b488,0x6cc66f59),
 312     U128(0x2863c1f5,0xcdae42f9,0x54000000,0x00000000),
 313     U128(0xa63fd833,0xb9386b07,0x36039e82,0xbe651b25),
 314     U128(0x1d1f7a9c,0xd087a14d,0x28cdf3d5,0x10000000),
 315     U128(0x651b5095,0xc2ea8fc1,0xb30e2c57,0x77aaf7e1),
 316     U128(0x0ddef20e,0xff760000,0x00000000,0x00000000),
 317     U128(0x29c30f10,0x29939b14,0x6664242d,0x97d9f649),
 318     U128(0x786a435a,0xe9558b0e,0x6aaf6d63,0xa8000000),
 319     U128(0x0c5afe6f,0xf302bcbf,0x94fd9829,0xd87f5079),
 320     U128(0x1fce575c,0xe1692706,0x07100000,0x00000000),
 321     U128(0x4f34497c,0x8597e144,0x36e91802,0x00528229),
 322     U128(0xbf3a8e1d,0x41ef2170,0x7802130d,0x84000000),
 323     U128(0x0e7819e1,0x7f1eb0fb,0x6ee4fb89,0x01d9531f),
 324     U128(0x20000000,0x00000000,0x00000000,0x00000000),
 325     U128(0x4510460d,0xd9e879c0,0x14a82375,0x2f22b321),
 326     U128(0x91abce3c,0x4b4117ad,0xe76d35db,0x22000000),
 327     U128(0x08973ea3,0x55d75bc2,0x2e42c391,0x727d69e1),
 328     U128(0x10e425c5,0x6daffabc,0x35c10000,0x00000000),
 329 };
 330 #endif
 331
 332 static BDIGIT_DBL
 333 maxpow_in_bdigit_dbl(int base, int *exp_ret)
 334 {
 335     BDIGIT_DBL maxpow;
 336     int exponent;
 337
 338     assert(2 <= base && base <= 36);
 339
 340     {
 341 #if SIZEOF_BDIGIT_DBL == 2
 342         maxpow = maxpow16_num[base-2];
 343         exponent = maxpow16_exp[base-2];
 344 #elif SIZEOF_BDIGIT_DBL == 4
 345         maxpow = maxpow32_num[base-2];
 346         exponent = maxpow32_exp[base-2];
 347 #elif SIZEOF_BDIGIT_DBL == 8 && defined HAVE_UINT64_T
 348         maxpow = maxpow64_num[base-2];
 349         exponent = maxpow64_exp[base-2];
 350 #elif SIZEOF_BDIGIT_DBL == 16 && defined HAVE_UINT128_T
 351         maxpow = maxpow128_num[base-2];
 352         exponent = maxpow128_exp[base-2];
 353 #else
 354         maxpow = base;
 355         exponent = 1;
 356         while (maxpow <= BDIGIT_DBL_MAX / base) {
 357             maxpow *= base;
 358             exponent++;
 359         }
 360 #endif
 361     }
 362
 363     *exp_ret = exponent;
 364     return maxpow;
 365 }
 366
 367 static inline BDIGIT_DBL
 368 bary2bdigitdbl(const BDIGIT *ds, size_t n)
 369 {
 370     assert(n <= 2);
 371
 372     if (n == 2)
 373         return ds[0] | BIGUP(ds[1]);
 374     if (n == 1)
 375         return ds[0];
 376     return 0;
 377 }
 378
 379 static inline void
 380 bdigitdbl2bary(BDIGIT *ds, size_t n, BDIGIT_DBL num)
 381 {
 382     assert(n == 2);
 383
 384     ds[0] = BIGLO(num);
 385     ds[1] = (BDIGIT)BIGDN(num);
 386 }
 387
 388 static int
 389 bary_cmp(const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
 390 {
 391     size_t i;
 392     BARY_TRUNC(xds, xn);
 393     BARY_TRUNC(yds, yn);
 394
 395     if (xn < yn)
 396         return -1;
 397     if (xn > yn)
 398         return 1;
 399
 400     for (i = 0; i < xn; i++)
 401         if (xds[xn - i - 1] != yds[yn - i - 1])
 402             break;
 403     if (i == xn)
 404         return 0;
 405     return xds[xn - i - 1] < yds[yn - i - 1] ? -1 : 1;
 406 }
 407
 408 static BDIGIT
 409 bary_small_lshift(BDIGIT *zds, const BDIGIT *xds, size_t n, int shift)
 410 {
 411     size_t i;
 412     BDIGIT_DBL num = 0;
 413     assert(0 <= shift && shift < BITSPERDIG);
 414
 415     for (i=0; i<n; i++) {
 416         num = num | (BDIGIT_DBL)*xds++ << shift;
 417         *zds++ = BIGLO(num);
 418         num = BIGDN(num);
 419     }
 420     return BIGLO(num);
 421 }
 422
 423 static void
 424 bary_small_rshift(BDIGIT *zds, const BDIGIT *xds, size_t n, int shift, BDIGIT higher_bdigit)
 425 {
 426     size_t i;
 427     BDIGIT_DBL num = 0;
 428
 429     assert(0 <= shift && shift < BITSPERDIG);
 430
 431     num = BIGUP(higher_bdigit);
 432     for (i = 0; i < n; i++) {
 433         BDIGIT x = xds[n - i - 1];
 434         num = (num | x) >> shift;
 435         zds[n - i - 1] = BIGLO(num);
 436         num = BIGUP(x);
 437     }
 438 }
 439
 440 static int
 441 bary_zero_p(const BDIGIT *xds, size_t xn)
 442 {
 443     if (xn == 0)
 444         return 1;
 445     do {
 446         if (xds[--xn]) return 0;
 447     } while (xn);
 448     return 1;
 449 }
 450
 451 static void
 452 bary_neg(BDIGIT *ds, size_t n)
 453 {
 454     size_t i;
 455     for (i = 0; i < n; i++)
 456         ds[n - i - 1] = BIGLO(~ds[n - i - 1]);
 457 }
 458
 459 static int
 460 bary_2comp(BDIGIT *ds, size_t n)
 461 {
 462     size_t i;
 463     for (i = 0; i < n; i++) {
 464         if (ds[i] != 0) {
 465             goto non_zero;
 466         }
 467     }
 468     return 1;
 469
 470   non_zero:
 471     ds[i] = BIGLO(~ds[i] + 1);
 472     i++;
 473     for (; i < n; i++) {
 474         ds[i] = BIGLO(~ds[i]);
 475     }
 476     return 0;
 477 }
 478
 479 static void
 480 bary_swap(BDIGIT *ds, size_t num_bdigits)
 481 {
 482     BDIGIT *p1 = ds;
 483     BDIGIT *p2 = ds + num_bdigits - 1;
 484     for (; p1 < p2; p1++, p2--) {
 485         BDIGIT tmp = *p1;
 486         *p1 = *p2;
 487         *p2 = tmp;
 488     }
 489 }
 490
 491 #define INTEGER_PACK_WORDORDER_MASK \
 492     (INTEGER_PACK_MSWORD_FIRST | \
 493      INTEGER_PACK_LSWORD_FIRST)
 494 #define INTEGER_PACK_BYTEORDER_MASK \
 495     (INTEGER_PACK_MSBYTE_FIRST | \
 496      INTEGER_PACK_LSBYTE_FIRST | \
 497      INTEGER_PACK_NATIVE_BYTE_ORDER)
 498
 499 static void
 500 validate_integer_pack_format(size_t numwords, size_t wordsize, size_t nails, int flags, int supported_flags)
 501 {
 502     int wordorder_bits = flags & INTEGER_PACK_WORDORDER_MASK;
 503     int byteorder_bits = flags & INTEGER_PACK_BYTEORDER_MASK;
 504
 505     if (flags & ~supported_flags) {
 506         rb_raise(rb_eArgError, "unsupported flags specified");
 507     }
 508     if (wordorder_bits == 0) {
 509         if (1 < numwords)
 510             rb_raise(rb_eArgError, "word order not specified");
 511     }
 512     else if (wordorder_bits != INTEGER_PACK_MSWORD_FIRST &&
 513         wordorder_bits != INTEGER_PACK_LSWORD_FIRST)
 514         rb_raise(rb_eArgError, "unexpected word order");
 515     if (byteorder_bits == 0) {
 516         rb_raise(rb_eArgError, "byte order not specified");
 517     }
 518     else if (byteorder_bits != INTEGER_PACK_MSBYTE_FIRST &&
 519         byteorder_bits != INTEGER_PACK_LSBYTE_FIRST &&
 520         byteorder_bits != INTEGER_PACK_NATIVE_BYTE_ORDER)
 521         rb_raise(rb_eArgError, "unexpected byte order");
 522     if (wordsize == 0)
 523         rb_raise(rb_eArgError, "invalid wordsize: %"PRI_SIZE_PREFIX"u", wordsize);
 524     if (SSIZE_MAX < wordsize)
 525         rb_raise(rb_eArgError, "too big wordsize: %"PRI_SIZE_PREFIX"u", wordsize);
 526     if (wordsize <= nails / CHAR_BIT)
 527         rb_raise(rb_eArgError, "too big nails: %"PRI_SIZE_PREFIX"u", nails);
 528     if (SIZE_MAX / wordsize < numwords)
 529         rb_raise(rb_eArgError, "too big numwords * wordsize: %"PRI_SIZE_PREFIX"u * %"PRI_SIZE_PREFIX"u", numwords, wordsize);
 530 }
 531
 532 static void
 533 integer_pack_loop_setup(
 534     size_t numwords, size_t wordsize, size_t nails, int flags,
 535     size_t *word_num_fullbytes_ret,
 536     int *word_num_partialbits_ret,
 537     size_t *word_start_ret,
 538     ssize_t *word_step_ret,
 539     size_t *word_last_ret,
 540     size_t *byte_start_ret,
 541     int *byte_step_ret)
 542 {
 543     int wordorder_bits = flags & INTEGER_PACK_WORDORDER_MASK;
 544     int byteorder_bits = flags & INTEGER_PACK_BYTEORDER_MASK;
 545     size_t word_num_fullbytes;
 546     int word_num_partialbits;
 547     size_t word_start;
 548     ssize_t word_step;
 549     size_t word_last;
 550     size_t byte_start;
 551     int byte_step;
 552
 553     word_num_partialbits = CHAR_BIT - (int)(nails % CHAR_BIT);
 554     if (word_num_partialbits == CHAR_BIT)
 555         word_num_partialbits = 0;
 556     word_num_fullbytes = wordsize - (nails / CHAR_BIT);
 557     if (word_num_partialbits != 0) {
 558         word_num_fullbytes--;
 559     }
 560
 561     if (wordorder_bits == INTEGER_PACK_MSWORD_FIRST) {
 562         word_start = wordsize*(numwords-1);
 563         word_step = -(ssize_t)wordsize;
 564         word_last = 0;
 565     }
 566     else {
 567         word_start = 0;
 568         word_step = wordsize;
 569         word_last = wordsize*(numwords-1);
 570     }
 571
 572     if (byteorder_bits == INTEGER_PACK_NATIVE_BYTE_ORDER) {
 573 #ifdef WORDS_BIGENDIAN
 574         byteorder_bits = INTEGER_PACK_MSBYTE_FIRST;
 575 #else
 576         byteorder_bits = INTEGER_PACK_LSBYTE_FIRST;
 577 #endif
 578     }
 579     if (byteorder_bits == INTEGER_PACK_MSBYTE_FIRST) {
 580         byte_start = wordsize-1;
 581         byte_step = -1;
 582     }
 583     else {
 584         byte_start = 0;
 585         byte_step = 1;
 586     }
 587
 588     *word_num_partialbits_ret = word_num_partialbits;
 589     *word_num_fullbytes_ret = word_num_fullbytes;
 590     *word_start_ret = word_start;
 591     *word_step_ret = word_step;
 592     *word_last_ret = word_last;
 593     *byte_start_ret = byte_start;
 594     *byte_step_ret = byte_step;
 595 }
 596
 597 static inline void
 598 integer_pack_fill_dd(BDIGIT **dpp, BDIGIT **dep, BDIGIT_DBL *ddp, int *numbits_in_dd_p)
 599 {
 600     if (*dpp < *dep && BITSPERDIG <= (int)sizeof(*ddp) * CHAR_BIT - *numbits_in_dd_p) {
 601         *ddp |= (BDIGIT_DBL)(*(*dpp)++) << *numbits_in_dd_p;
 602         *numbits_in_dd_p += BITSPERDIG;
 603     }
 604     else if (*dpp == *dep) {
 605         /* higher bits are infinity zeros */
 606         *numbits_in_dd_p = (int)sizeof(*ddp) * CHAR_BIT;
 607     }
 608 }
 609
 610 static inline BDIGIT_DBL
 611 integer_pack_take_lowbits(int n, BDIGIT_DBL *ddp, int *numbits_in_dd_p)
 612 {
 613     BDIGIT_DBL ret;
 614     ret = (*ddp) & (((BDIGIT_DBL)1 << n) - 1);
 615     *ddp >>= n;
 616     *numbits_in_dd_p -= n;
 617     return ret;
 618 }
 619
 620 #if !defined(WORDS_BIGENDIAN)
 621 static int
 622 bytes_2comp(unsigned char *buf, size_t len)
 623 {
 624     size_t i;
 625     for (i = 0; i < len; i++) {
 626         signed char c = buf[i];
 627         signed int d = ~c;
 628         unsigned int e = d & 0xFF;
 629         buf[i] = e;
 630     }
 631     for (i = 0; i < len; i++) {
 632         buf[i]++;
 633         if (buf[i] != 0)
 634             return 0;
 635     }
 636     return 1;
 637 }
 638 #endif
 639
 640 static int
 641 bary_pack(int sign, BDIGIT *ds, size_t num_bdigits, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
 642 {
 643     BDIGIT *dp, *de;
 644     unsigned char *buf, *bufend;
 645
 646     dp = ds;
 647     de = ds + num_bdigits;
 648
 649     validate_integer_pack_format(numwords, wordsize, nails, flags,
 650             INTEGER_PACK_MSWORD_FIRST|
 651             INTEGER_PACK_LSWORD_FIRST|
 652             INTEGER_PACK_MSBYTE_FIRST|
 653             INTEGER_PACK_LSBYTE_FIRST|
 654             INTEGER_PACK_NATIVE_BYTE_ORDER|
 655             INTEGER_PACK_2COMP|
 656             INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION);
 657
 658     while (dp < de && de[-1] == 0)
 659         de--;
 660     if (dp == de) {
 661         sign = 0;
 662     }
 663
 664     if (!(flags & INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION)) {
 665         if (sign == 0) {
 666             MEMZERO(words, unsigned char, numwords * wordsize);
 667             return 0;
 668         }
 669         if (nails == 0 && numwords == 1) {
 670             int need_swap = wordsize != 1 &&
 671                 (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_NATIVE_BYTE_ORDER &&
 672                 ((flags & INTEGER_PACK_MSBYTE_FIRST) ? !HOST_BIGENDIAN_P : HOST_BIGENDIAN_P);
 673             if (0 < sign || !(flags & INTEGER_PACK_2COMP)) {
 674                 BDIGIT d;
 675                 if (wordsize == 1) {
 676                     *((unsigned char *)words) = (unsigned char)(d = dp[0]);
 677                     return ((1 < de - dp || CLEAR_LOWBITS(d, 8) != 0) ? 2 : 1) * sign;
 678                 }
 679 #if defined(HAVE_UINT16_T) && 2 <= SIZEOF_BDIGIT
 680                 if (wordsize == 2 && (uintptr_t)words % RUBY_ALIGNOF(uint16_t) == 0) {
 681                     uint16_t u = (uint16_t)(d = dp[0]);
 682                     if (need_swap) u = swap16(u);
 683                     *((uint16_t *)words) = u;
 684                     return ((1 < de - dp || CLEAR_LOWBITS(d, 16) != 0) ? 2 : 1) * sign;
 685                 }
 686 #endif
 687 #if defined(HAVE_UINT32_T) && 4 <= SIZEOF_BDIGIT
 688                 if (wordsize == 4 && (uintptr_t)words % RUBY_ALIGNOF(uint32_t) == 0) {
 689                     uint32_t u = (uint32_t)(d = dp[0]);
 690                     if (need_swap) u = swap32(u);
 691                     *((uint32_t *)words) = u;
 692                     return ((1 < de - dp || CLEAR_LOWBITS(d, 32) != 0) ? 2 : 1) * sign;
 693                 }
 694 #endif
 695 #if defined(HAVE_UINT64_T) && 8 <= SIZEOF_BDIGIT
 696                 if (wordsize == 8 && (uintptr_t)words % RUBY_ALIGNOF(uint64_t) == 0) {
 697                     uint64_t u = (uint64_t)(d = dp[0]);
 698                     if (need_swap) u = swap64(u);
 699                     *((uint64_t *)words) = u;
 700                     return ((1 < de - dp || CLEAR_LOWBITS(d, 64) != 0) ? 2 : 1) * sign;
 701                 }
 702 #endif
 703             }
 704             else { /* sign < 0 && (flags & INTEGER_PACK_2COMP) */
 705                 BDIGIT_DBL_SIGNED d;
 706                 if (wordsize == 1) {
 707                     *((unsigned char *)words) = (unsigned char)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
 708                     return (1 < de - dp || FILL_LOWBITS(d, 8) != -1) ? -2 : -1;
 709                 }
 710 #if defined(HAVE_UINT16_T) && 2 <= SIZEOF_BDIGIT
 711                 if (wordsize == 2 && (uintptr_t)words % RUBY_ALIGNOF(uint16_t) == 0) {
 712                     uint16_t u = (uint16_t)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
 713                     if (need_swap) u = swap16(u);
 714                     *((uint16_t *)words) = u;
 715                     return (wordsize == SIZEOF_BDIGIT && de - dp == 2 && dp[1] == 1 && dp[0] == 0) ? -1 :
 716                         (1 < de - dp || FILL_LOWBITS(d, 16) != -1) ? -2 : -1;
 717                 }
 718 #endif
 719 #if defined(HAVE_UINT32_T) && 4 <= SIZEOF_BDIGIT
 720                 if (wordsize == 4 && (uintptr_t)words % RUBY_ALIGNOF(uint32_t) == 0) {
 721                     uint32_t u = (uint32_t)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
 722                     if (need_swap) u = swap32(u);
 723                     *((uint32_t *)words) = u;
 724                     return (wordsize == SIZEOF_BDIGIT && de - dp == 2 && dp[1] == 1 && dp[0] == 0) ? -1 :
 725                         (1 < de - dp || FILL_LOWBITS(d, 32) != -1) ? -2 : -1;
 726                 }
 727 #endif
 728 #if defined(HAVE_UINT64_T) && 8 <= SIZEOF_BDIGIT
 729                 if (wordsize == 8 && (uintptr_t)words % RUBY_ALIGNOF(uint64_t) == 0) {
 730                     uint64_t u = (uint64_t)(d = -(BDIGIT_DBL_SIGNED)dp[0]);
 731                     if (need_swap) u = swap64(u);
 732                     *((uint64_t *)words) = u;
 733                     return (wordsize == SIZEOF_BDIGIT && de - dp == 2 && dp[1] == 1 && dp[0] == 0) ? -1 :
 734                         (1 < de - dp || FILL_LOWBITS(d, 64) != -1) ? -2 : -1;
 735                 }
 736 #endif
 737             }
 738         }
 739 #if !defined(WORDS_BIGENDIAN)
 740         if (nails == 0 && SIZEOF_BDIGIT == sizeof(BDIGIT) &&
 741             (flags & INTEGER_PACK_WORDORDER_MASK) == INTEGER_PACK_LSWORD_FIRST &&
 742             (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_MSBYTE_FIRST) {
 743             size_t src_size = (de - dp) * SIZEOF_BDIGIT;
 744             size_t dst_size = numwords * wordsize;
 745             int overflow = 0;
 746             while (0 < src_size && ((unsigned char *)ds)[src_size-1] == 0)
 747                 src_size--;
 748             if (src_size <= dst_size) {
 749                 MEMCPY(words, dp, char, src_size);
 750                 MEMZERO((char*)words + src_size, char, dst_size - src_size);
 751             }
 752             else {
 753                 MEMCPY(words, dp, char, dst_size);
 754                 overflow = 1;
 755             }
 756             if (sign < 0 && (flags & INTEGER_PACK_2COMP)) {
 757                 int zero_p = bytes_2comp(words, dst_size);
 758                 if (zero_p && overflow) {
 759                     unsigned char *p = (unsigned char *)dp;
 760                     if (dst_size == src_size-1 &&
 761                         p[dst_size] == 1) {
 762                         overflow = 0;
 763                     }
 764                 }
 765             }
 766             if (overflow)
 767                 sign *= 2;
 768             return sign;
 769         }
 770 #endif
 771         if (nails == 0 && SIZEOF_BDIGIT == sizeof(BDIGIT) &&
 772             wordsize % SIZEOF_BDIGIT == 0 && (uintptr_t)words % RUBY_ALIGNOF(BDIGIT) == 0) {
 773             size_t bdigits_per_word = wordsize / SIZEOF_BDIGIT;
 774             size_t src_num_bdigits = de - dp;
 775             size_t dst_num_bdigits = numwords * bdigits_per_word;
 776             int overflow = 0;
 777             int mswordfirst_p = (flags & INTEGER_PACK_MSWORD_FIRST) != 0;
 778             int msbytefirst_p = (flags & INTEGER_PACK_NATIVE_BYTE_ORDER) ? HOST_BIGENDIAN_P :
 779                 (flags & INTEGER_PACK_MSBYTE_FIRST) != 0;
 780             if (src_num_bdigits <= dst_num_bdigits) {
 781                 MEMCPY(words, dp, BDIGIT, src_num_bdigits);
 782                 BDIGITS_ZERO((BDIGIT*)words + src_num_bdigits, dst_num_bdigits - src_num_bdigits);
 783             }
 784             else {
 785                 MEMCPY(words, dp, BDIGIT, dst_num_bdigits);
 786                 overflow = 1;
 787             }
 788             if (sign < 0 && (flags & INTEGER_PACK_2COMP)) {
 789                 int zero_p = bary_2comp(words, dst_num_bdigits);
 790                 if (zero_p && overflow &&
 791                     dst_num_bdigits == src_num_bdigits-1 &&
 792                     dp[dst_num_bdigits] == 1)
 793                     overflow = 0;
 794             }
 795             if (msbytefirst_p != HOST_BIGENDIAN_P) {
 796                 size_t i;
 797                 for (i = 0; i < dst_num_bdigits; i++) {
 798                     BDIGIT d = ((BDIGIT*)words)[i];
 799                     ((BDIGIT*)words)[i] = swap_bdigit(d);
 800                 }
 801             }
 802             if (mswordfirst_p ? !msbytefirst_p : msbytefirst_p) {
 803                 size_t i;
 804                 BDIGIT *p = words;
 805                 for (i = 0; i < numwords; i++) {
 806                     bary_swap(p, bdigits_per_word);
 807                     p += bdigits_per_word;
 808                 }
 809             }
 810             if (mswordfirst_p) {
 811                 bary_swap(words, dst_num_bdigits);
 812             }
 813             if (overflow)
 814                 sign *= 2;
 815             return sign;
 816         }
 817     }
 818
 819     buf = words;
 820     bufend = buf + numwords * wordsize;
 821
 822     if (buf == bufend) {
 823         /* overflow if non-zero*/
 824         if (!(flags & INTEGER_PACK_2COMP) || 0 <= sign)
 825             sign *= 2;
 826         else {
 827             if (de - dp == 1 && dp[0] == 1)
 828                 sign = -1; /* val == -1 == -2**(numwords*(wordsize*CHAR_BIT-nails)) */
 829             else
 830                 sign = -2; /* val < -1 == -2**(numwords*(wordsize*CHAR_BIT-nails)) */
 831         }
 832     }
 833     else if (dp == de) {
 834         memset(buf, '\0', bufend - buf);
 835     }
 836     else if (dp < de && buf < bufend) {
 837         int word_num_partialbits;
 838         size_t word_num_fullbytes;
 839
 840         ssize_t word_step;
 841         size_t byte_start;
 842         int byte_step;
 843
 844         size_t word_start, word_last;
 845         unsigned char *wordp, *last_wordp;
 846         BDIGIT_DBL dd;
 847         int numbits_in_dd;
 848
 849         integer_pack_loop_setup(numwords, wordsize, nails, flags,
 850             &word_num_fullbytes, &word_num_partialbits,
 851             &word_start, &word_step, &word_last, &byte_start, &byte_step);
 852
 853         wordp = buf + word_start;
 854         last_wordp = buf + word_last;
 855
 856         dd = 0;
 857         numbits_in_dd = 0;
 858
 859 #define FILL_DD \
 860     integer_pack_fill_dd(&dp, &de, &dd, &numbits_in_dd)
 861 #define TAKE_LOWBITS(n) \
 862     integer_pack_take_lowbits(n, &dd, &numbits_in_dd)
 863
 864         while (1) {
 865             size_t index_in_word = 0;
 866             unsigned char *bytep = wordp + byte_start;
 867             while (index_in_word < word_num_fullbytes) {
 868                 FILL_DD;
 869                 *bytep = TAKE_LOWBITS(CHAR_BIT);
 870                 bytep += byte_step;
 871                 index_in_word++;
 872             }
 873             if (word_num_partialbits) {
 874                 FILL_DD;
 875                 *bytep = TAKE_LOWBITS(word_num_partialbits);
 876                 bytep += byte_step;
 877                 index_in_word++;
 878             }
 879             while (index_in_word < wordsize) {
 880                 *bytep = 0;
 881                 bytep += byte_step;
 882                 index_in_word++;
 883             }
 884
 885             if (wordp == last_wordp)
 886                 break;
 887
 888             wordp += word_step;
 889         }
 890         FILL_DD;
 891         /* overflow tests */
 892         if (dp != de || 1 < dd) {
 893             /* 2**(numwords*(wordsize*CHAR_BIT-nails)+1) <= abs(val) */
 894             sign *= 2;
 895         }
 896         else if (dd == 1) {
 897             /* 2**(numwords*(wordsize*CHAR_BIT-nails)) <= abs(val) < 2**(numwords*(wordsize*CHAR_BIT-nails)+1) */
 898             if (!(flags & INTEGER_PACK_2COMP) || 0 <= sign)
 899                 sign *= 2;
 900             else { /* overflow_2comp && sign == -1 */
 901                 /* test lower bits are all zero. */
 902                 dp = ds;
 903                 while (dp < de && *dp == 0)
 904                     dp++;
 905                 if (de - dp == 1 && /* only one non-zero word. */
 906                     POW2_P(*dp)) /* *dp contains only one bit set. */
 907                     sign = -1; /* val == -2**(numwords*(wordsize*CHAR_BIT-nails)) */
 908                 else
 909                     sign = -2; /* val < -2**(numwords*(wordsize*CHAR_BIT-nails)) */
 910             }
 911         }
 912     }
 913
 914     if ((flags & INTEGER_PACK_2COMP) && (sign < 0 && numwords != 0)) {
 915         int word_num_partialbits;
 916         size_t word_num_fullbytes;
 917
 918         ssize_t word_step;
 919         size_t byte_start;
 920         int byte_step;
 921
 922         size_t word_start, word_last;
 923         unsigned char *wordp, *last_wordp;
 924
 925         unsigned int partialbits_mask;
 926         int carry;
 927
 928         integer_pack_loop_setup(numwords, wordsize, nails, flags,
 929             &word_num_fullbytes, &word_num_partialbits,
 930             &word_start, &word_step, &word_last, &byte_start, &byte_step);
 931
 932         partialbits_mask = (1 << word_num_partialbits) - 1;
 933
 934         buf = words;
 935         wordp = buf + word_start;
 936         last_wordp = buf + word_last;
 937
 938         carry = 1;
 939         while (1) {
 940             size_t index_in_word = 0;
 941             unsigned char *bytep = wordp + byte_start;
 942             while (index_in_word < word_num_fullbytes) {
 943                 carry += (unsigned char)~*bytep;
 944                 *bytep = (unsigned char)carry;
 945                 carry >>= CHAR_BIT;
 946                 bytep += byte_step;
 947                 index_in_word++;
 948             }
 949             if (word_num_partialbits) {
 950                 carry += (*bytep & partialbits_mask) ^ partialbits_mask;
 951                 *bytep = carry & partialbits_mask;
 952                 carry >>= word_num_partialbits;
 953                 bytep += byte_step;
 954                 index_in_word++;
 955             }
 956
 957             if (wordp == last_wordp)
 958                 break;
 959
 960             wordp += word_step;
 961         }
 962     }
 963
 964     return sign;
 965 #undef FILL_DD
 966 #undef TAKE_LOWBITS
 967 }
 968
 969 static size_t
 970 integer_unpack_num_bdigits_small(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret)
 971 {
 972     /* nlp_bits stands for number of leading padding bits */
 973     size_t num_bits = (wordsize * CHAR_BIT - nails) * numwords;
 974     size_t num_bdigits = (num_bits + BITSPERDIG - 1) / BITSPERDIG;
 975     *nlp_bits_ret = (int)(num_bdigits * BITSPERDIG - num_bits);
 976     return num_bdigits;
 977 }
 978
 979 static size_t
 980 integer_unpack_num_bdigits_generic(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret)
 981 {
 982     /* BITSPERDIG = SIZEOF_BDIGIT * CHAR_BIT */
 983     /* num_bits = (wordsize * CHAR_BIT - nails) * numwords */
 984     /* num_bdigits = (num_bits + BITSPERDIG - 1) / BITSPERDIG */
 985
 986     /* num_bits = CHAR_BIT * (wordsize * numwords) - nails * numwords = CHAR_BIT * num_bytes1 - nails * numwords */
 987     size_t num_bytes1 = wordsize * numwords;
 988
 989     /* q1 * CHAR_BIT + r1 = numwords */
 990     size_t q1 = numwords / CHAR_BIT;
 991     size_t r1 = numwords % CHAR_BIT;
 992
 993     /* num_bits = CHAR_BIT * num_bytes1 - nails * (q1 * CHAR_BIT + r1) = CHAR_BIT * num_bytes2 - nails * r1 */
 994     size_t num_bytes2 = num_bytes1 - nails * q1;
 995
 996     /* q2 * CHAR_BIT + r2 = nails */
 997     size_t q2 = nails / CHAR_BIT;
 998     size_t r2 = nails % CHAR_BIT;
 999
1000     /* num_bits = CHAR_BIT * num_bytes2 - (q2 * CHAR_BIT + r2) * r1 = CHAR_BIT * num_bytes3 - r1 * r2 */
1001     size_t num_bytes3 = num_bytes2 - q2 * r1;
1002
1003     /* q3 * BITSPERDIG + r3 = num_bytes3 */
1004     size_t q3 = num_bytes3 / BITSPERDIG;
1005     size_t r3 = num_bytes3 % BITSPERDIG;
1006
1007     /* num_bits = CHAR_BIT * (q3 * BITSPERDIG + r3) - r1 * r2 = BITSPERDIG * num_digits1 + CHAR_BIT * r3 - r1 * r2 */
1008     size_t num_digits1 = CHAR_BIT * q3;
1009
1010     /*
1011      * if CHAR_BIT * r3 >= r1 * r2
1012      *   CHAR_BIT * r3 - r1 * r2 = CHAR_BIT * BITSPERDIG - (CHAR_BIT * BITSPERDIG - (CHAR_BIT * r3 - r1 * r2))
1013      *   q4 * BITSPERDIG + r4 = CHAR_BIT * BITSPERDIG - (CHAR_BIT * r3 - r1 * r2)
1014      *   num_bits = BITSPERDIG * num_digits1 + CHAR_BIT * BITSPERDIG - (q4 * BITSPERDIG + r4) = BITSPERDIG * num_digits2 - r4
1015      * else
1016      *   q4 * BITSPERDIG + r4 = -(CHAR_BIT * r3 - r1 * r2)
1017      *   num_bits = BITSPERDIG * num_digits1 - (q4 * BITSPERDIG + r4) = BITSPERDIG * num_digits2 - r4
1018      * end
1019      */
1020
1021     if (CHAR_BIT * r3 >= r1 * r2) {
1022         size_t tmp1 = CHAR_BIT * BITSPERDIG - (CHAR_BIT * r3 - r1 * r2);
1023         size_t q4 = tmp1 / BITSPERDIG;
1024         int r4 = (int)(tmp1 % BITSPERDIG);
1025         size_t num_digits2 = num_digits1 + CHAR_BIT - q4;
1026         *nlp_bits_ret = r4;
1027         return num_digits2;
1028     }
1029     else {
1030         size_t tmp1 = r1 * r2 - CHAR_BIT * r3;
1031         size_t q4 = tmp1 / BITSPERDIG;
1032         int r4 = (int)(tmp1 % BITSPERDIG);
1033         size_t num_digits2 = num_digits1 - q4;
1034         *nlp_bits_ret = r4;
1035         return num_digits2;
1036     }
1037 }
1038
1039 static size_t
1040 integer_unpack_num_bdigits(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret)
1041 {
1042     size_t num_bdigits;
1043
1044     if (numwords <= (SIZE_MAX - (BITSPERDIG-1)) / CHAR_BIT / wordsize) {
1045         num_bdigits = integer_unpack_num_bdigits_small(numwords, wordsize, nails, nlp_bits_ret);
1046 #ifdef DEBUG_INTEGER_PACK
1047         {
1048             int nlp_bits1;
1049             size_t num_bdigits1 = integer_unpack_num_bdigits_generic(numwords, wordsize, nails, &nlp_bits1);
1050             assert(num_bdigits == num_bdigits1);
1051             assert(*nlp_bits_ret == nlp_bits1);
1052             (void)num_bdigits1;
1053         }
1054 #endif
1055     }
1056     else {
1057         num_bdigits = integer_unpack_num_bdigits_generic(numwords, wordsize, nails, nlp_bits_ret);
1058     }
1059     return num_bdigits;
1060 }
1061
1062 static inline void
1063 integer_unpack_push_bits(int data, int numbits, BDIGIT_DBL *ddp, int *numbits_in_dd_p, BDIGIT **dpp)
1064 {
1065     (*ddp) |= ((BDIGIT_DBL)data) << (*numbits_in_dd_p);
1066     *numbits_in_dd_p += numbits;
1067     while (BITSPERDIG <= *numbits_in_dd_p) {
1068         *(*dpp)++ = BIGLO(*ddp);
1069         *ddp = BIGDN(*ddp);
1070         *numbits_in_dd_p -= BITSPERDIG;
1071     }
1072 }
1073
1074 static int
1075 integer_unpack_single_bdigit(BDIGIT u, size_t size, int flags, BDIGIT *dp)
1076 {
1077     int sign;
1078     if (flags & INTEGER_PACK_2COMP) {
1079         sign = (flags & INTEGER_PACK_NEGATIVE) ?
1080             ((size == SIZEOF_BDIGIT && u == 0) ? -2 : -1) :
1081             ((u >> (size * CHAR_BIT - 1)) ? -1 : 1);
1082         if (sign < 0) {
1083             u |= LSHIFTX(BDIGMAX, size * CHAR_BIT);
1084             u = BIGLO(1 + ~u);
1085         }
1086     }
1087     else
1088         sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
1089     *dp = u;
1090     return sign;
1091 }
1092
1093 #ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1094 #define reinterpret_cast(type, value) (type) \
1095     __builtin_assume_aligned((value), sizeof(*(type)NULL));
1096 #else
1097 #define reinterpret_cast(type, value) (type)value
1098 #endif
1099
1100 static int
1101 bary_unpack_internal(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags, int nlp_bits)
1102 {
1103     int sign;
1104     const unsigned char *buf = words;
1105     BDIGIT *dp;
1106     BDIGIT *de;
1107
1108     dp = bdigits;
1109     de = dp + num_bdigits;
1110
1111     if (!(flags & INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION)) {
1112         if (nails == 0 && numwords == 1) {
1113             int need_swap = wordsize != 1 &&
1114                 (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_NATIVE_BYTE_ORDER &&
1115                 ((flags & INTEGER_PACK_MSBYTE_FIRST) ? !HOST_BIGENDIAN_P : HOST_BIGENDIAN_P);
1116             if (wordsize == 1) {
1117                 return integer_unpack_single_bdigit(*(uint8_t *)buf, sizeof(uint8_t), flags, dp);
1118             }
1119 #if defined(HAVE_UINT16_T) && 2 <= SIZEOF_BDIGIT
1120             if (wordsize == 2 && (uintptr_t)words % RUBY_ALIGNOF(uint16_t) == 0) {
1121                 uint16_t u = *reinterpret_cast(const uint16_t *, buf);
1122                 return integer_unpack_single_bdigit(need_swap ? swap16(u) : u, sizeof(uint16_t), flags, dp);
1123             }
1124 #endif
1125 #if defined(HAVE_UINT32_T) && 4 <= SIZEOF_BDIGIT
1126             if (wordsize == 4 && (uintptr_t)words % RUBY_ALIGNOF(uint32_t) == 0) {
1127                 uint32_t u = *reinterpret_cast(const uint32_t *, buf);
1128                 return integer_unpack_single_bdigit(need_swap ? swap32(u) : u, sizeof(uint32_t), flags, dp);
1129             }
1130 #endif
1131 #if defined(HAVE_UINT64_T) && 8 <= SIZEOF_BDIGIT
1132             if (wordsize == 8 && (uintptr_t)words % RUBY_ALIGNOF(uint64_t) == 0) {
1133                 uint64_t u = *reinterpret_cast(const uint64_t *, buf);
1134                 return integer_unpack_single_bdigit(need_swap ? swap64(u) : u, sizeof(uint64_t), flags, dp);
1135             }
1136 #endif
1137 #undef reinterpret_cast
1138         }
1139 #if !defined(WORDS_BIGENDIAN)
1140         if (nails == 0 && SIZEOF_BDIGIT == sizeof(BDIGIT) &&
1141             (flags & INTEGER_PACK_WORDORDER_MASK) == INTEGER_PACK_LSWORD_FIRST &&
1142             (flags & INTEGER_PACK_BYTEORDER_MASK) != INTEGER_PACK_MSBYTE_FIRST) {
1143             size_t src_size = numwords * wordsize;
1144             size_t dst_size = num_bdigits * SIZEOF_BDIGIT;
1145             MEMCPY(dp, words, char, src_size);
1146             if (flags & INTEGER_PACK_2COMP) {
1147                 if (flags & INTEGER_PACK_NEGATIVE) {
1148                     int zero_p;
1149                     memset((char*)dp + src_size, 0xff, dst_size - src_size);
1150                     zero_p = bary_2comp(dp, num_bdigits);
1151                     sign = zero_p ? -2 : -1;
1152                 }
1153                 else if (buf[src_size-1] >> (CHAR_BIT-1)) {
1154                     memset((char*)dp + src_size, 0xff, dst_size - src_size);
1155                     bary_2comp(dp, num_bdigits);
1156                     sign = -1;
1157                 }
1158                 else {
1159                     MEMZERO((char*)dp + src_size, char, dst_size - src_size);
1160                     sign = 1;
1161                 }
1162             }
1163             else {
1164                 MEMZERO((char*)dp + src_size, char, dst_size - src_size);
1165                 sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
1166             }
1167             return sign;
1168         }
1169 #endif
1170         if (nails == 0 && SIZEOF_BDIGIT == sizeof(BDIGIT) &&
1171             wordsize % SIZEOF_BDIGIT == 0) {
1172             size_t bdigits_per_word = wordsize / SIZEOF_BDIGIT;
1173             int mswordfirst_p = (flags & INTEGER_PACK_MSWORD_FIRST) != 0;
1174             int msbytefirst_p = (flags & INTEGER_PACK_NATIVE_BYTE_ORDER) ? HOST_BIGENDIAN_P :
1175                 (flags & INTEGER_PACK_MSBYTE_FIRST) != 0;
1176             MEMCPY(dp, words, BDIGIT, numwords*bdigits_per_word);
1177             if (mswordfirst_p) {
1178                 bary_swap(dp, num_bdigits);
1179             }
1180             if (mswordfirst_p ? !msbytefirst_p : msbytefirst_p) {
1181                 size_t i;
1182                 BDIGIT *p = dp;
1183                 for (i = 0; i < numwords; i++) {
1184                     bary_swap(p, bdigits_per_word);
1185                     p += bdigits_per_word;
1186                 }
1187             }
1188             if (msbytefirst_p != HOST_BIGENDIAN_P) {
1189                 BDIGIT *p;
1190                 for (p = dp; p < de; p++) {
1191                     BDIGIT d = *p;
1192                     *p = swap_bdigit(d);
1193                 }
1194             }
1195             if (flags & INTEGER_PACK_2COMP) {
1196                 if (flags & INTEGER_PACK_NEGATIVE) {
1197                     int zero_p = bary_2comp(dp, num_bdigits);
1198                     sign = zero_p ? -2 : -1;
1199                 }
1200                 else if (BDIGIT_MSB(de[-1])) {
1201                     bary_2comp(dp, num_bdigits);
1202                     sign = -1;
1203                 }
1204                 else {
1205                     sign = 1;
1206                 }
1207             }
1208             else {
1209                 sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
1210             }
1211             return sign;
1212         }
1213     }
1214
1215     if (num_bdigits != 0) {
1216         int word_num_partialbits;
1217         size_t word_num_fullbytes;
1218
1219         ssize_t word_step;
1220         size_t byte_start;
1221         int byte_step;
1222
1223         size_t word_start, word_last;
1224         const unsigned char *wordp, *last_wordp;
1225         BDIGIT_DBL dd;
1226         int numbits_in_dd;
1227
1228         integer_pack_loop_setup(numwords, wordsize, nails, flags,
1229             &word_num_fullbytes, &word_num_partialbits,
1230             &word_start, &word_step, &word_last, &byte_start, &byte_step);
1231
1232         wordp = buf + word_start;
1233         last_wordp = buf + word_last;
1234
1235         dd = 0;
1236         numbits_in_dd = 0;
1237
1238 #define PUSH_BITS(data, numbits) \
1239         integer_unpack_push_bits(data, numbits, &dd, &numbits_in_dd, &dp)
1240
1241         while (1) {
1242             size_t index_in_word = 0;
1243             const unsigned char *bytep = wordp + byte_start;
1244             while (index_in_word < word_num_fullbytes) {
1245                 PUSH_BITS(*bytep, CHAR_BIT);
1246                 bytep += byte_step;
1247                 index_in_word++;
1248             }
1249             if (word_num_partialbits) {
1250                 PUSH_BITS(*bytep & ((1 << word_num_partialbits) - 1), word_num_partialbits);
1251                 bytep += byte_step;
1252                 index_in_word++;
1253             }
1254
1255             if (wordp == last_wordp)
1256                 break;
1257
1258             wordp += word_step;
1259         }
1260         if (dd)
1261             *dp++ = (BDIGIT)dd;
1262         assert(dp <= de);
1263         while (dp < de)
1264             *dp++ = 0;
1265 #undef PUSH_BITS
1266     }
1267
1268     if (!(flags & INTEGER_PACK_2COMP)) {
1269         sign = (flags & INTEGER_PACK_NEGATIVE) ? -1 : 1;
1270     }
1271     else {
1272         if (nlp_bits) {
1273             if ((flags & INTEGER_PACK_NEGATIVE) ||
1274                 (bdigits[num_bdigits-1] >> (BITSPERDIG - nlp_bits - 1))) {
1275                 bdigits[num_bdigits-1] |= BIGLO(BDIGMAX << (BITSPERDIG - nlp_bits));
1276                 sign = -1;
1277             }
1278             else {
1279                 sign = 1;
1280             }
1281         }
1282         else {
1283             if (flags & INTEGER_PACK_NEGATIVE) {
1284                 sign = bary_zero_p(bdigits, num_bdigits) ? -2 : -1;
1285             }
1286             else {
1287                 if (num_bdigits != 0 && BDIGIT_MSB(bdigits[num_bdigits-1]))
1288                     sign = -1;
1289                 else
1290                     sign = 1;
1291             }
1292         }
1293         if (sign == -1 && num_bdigits != 0) {
1294             bary_2comp(bdigits, num_bdigits);
1295         }
1296     }
1297
1298     return sign;
1299 }
1300
1301 static void
1302 bary_unpack(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
1303 {
1304     size_t num_bdigits0;
1305     int nlp_bits;
1306     int sign;
1307
1308     validate_integer_pack_format(numwords, wordsize, nails, flags,
1309             INTEGER_PACK_MSWORD_FIRST|
1310             INTEGER_PACK_LSWORD_FIRST|
1311             INTEGER_PACK_MSBYTE_FIRST|
1312             INTEGER_PACK_LSBYTE_FIRST|
1313             INTEGER_PACK_NATIVE_BYTE_ORDER|
1314             INTEGER_PACK_2COMP|
1315             INTEGER_PACK_FORCE_BIGNUM|
1316             INTEGER_PACK_NEGATIVE|
1317             INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION);
1318
1319     num_bdigits0 = integer_unpack_num_bdigits(numwords, wordsize, nails, &nlp_bits);
1320
1321     assert(num_bdigits0 <= num_bdigits);
1322
1323     sign = bary_unpack_internal(bdigits, num_bdigits0, words, numwords, wordsize, nails, flags, nlp_bits);
1324
1325     if (num_bdigits0 < num_bdigits) {
1326         BDIGITS_ZERO(bdigits + num_bdigits0, num_bdigits - num_bdigits0);
1327         if (sign == -2) {
1328             bdigits[num_bdigits0] = 1;
1329         }
1330     }
1331 }
1332
1333 static int
1334 bary_subb(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, int borrow)
1335 {
1336     BDIGIT_DBL_SIGNED num;
1337     size_t i;
1338     size_t sn;
1339
1340     assert(xn <= zn);
1341     assert(yn <= zn);
1342
1343     sn = xn < yn ? xn : yn;
1344
1345     num = borrow ? -1 : 0;
1346     for (i = 0; i < sn; i++) {
1347         num += (BDIGIT_DBL_SIGNED)xds[i] - yds[i];
1348         zds[i] = BIGLO(num);
1349         num = BIGDN(num);
1350     }
1351     if (yn <= xn) {
1352         for (; i < xn; i++) {
1353             if (num == 0) goto num_is_zero;
1354             num += xds[i];
1355             zds[i] = BIGLO(num);
1356             num = BIGDN(num);
1357         }
1358     }
1359     else {
1360         for (; i < yn; i++) {
1361             num -= yds[i];
1362             zds[i] = BIGLO(num);
1363             num = BIGDN(num);
1364         }
1365     }
1366     if (num == 0) goto num_is_zero;
1367     for (; i < zn; i++) {
1368         zds[i] = BDIGMAX;
1369     }
1370     return 1;
1371
1372   num_is_zero:
1373     if (xds == zds && xn == zn)
1374         return 0;
1375     for (; i < xn; i++) {
1376         zds[i] = xds[i];
1377     }
1378     for (; i < zn; i++) {
1379         zds[i] = 0;
1380     }
1381     return 0;
1382 }
1383
1384 static int
1385 bary_sub(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
1386 {
1387     return bary_subb(zds, zn, xds, xn, yds, yn, 0);
1388 }
1389
1390 static int
1391 bary_sub_one(BDIGIT *zds, size_t zn)
1392 {
1393     return bary_subb(zds, zn, zds, zn, NULL, 0, 1);
1394 }
1395
1396 static int
1397 bary_addc(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, int carry)
1398 {
1399     BDIGIT_DBL num;
1400     size_t i;
1401
1402     assert(xn <= zn);
1403     assert(yn <= zn);
1404
1405     if (xn > yn) {
1406         const BDIGIT *tds;
1407         tds = xds; xds = yds; yds = tds;
1408         i = xn; xn = yn; yn = i;
1409     }
1410
1411     num = carry ? 1 : 0;
1412     for (i = 0; i < xn; i++) {
1413         num += (BDIGIT_DBL)xds[i] + yds[i];
1414         zds[i] = BIGLO(num);
1415         num = BIGDN(num);
1416     }
1417     for (; i < yn; i++) {
1418         if (num == 0) goto num_is_zero;
1419         num += yds[i];
1420         zds[i] = BIGLO(num);
1421         num = BIGDN(num);
1422     }
1423     for (; i < zn; i++) {
1424         if (num == 0) goto num_is_zero;
1425         zds[i] = BIGLO(num);
1426         num = BIGDN(num);
1427     }
1428     return num != 0;
1429
1430   num_is_zero:
1431     if (yds == zds && yn == zn)
1432         return 0;
1433     for (; i < yn; i++) {
1434         zds[i] = yds[i];
1435     }
1436     for (; i < zn; i++) {
1437         zds[i] = 0;
1438     }
1439     return 0;
1440 }
1441
1442 static int
1443 bary_add(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
1444 {
1445     return bary_addc(zds, zn, xds, xn, yds, yn, 0);
1446 }
1447
1448 static int
1449 bary_add_one(BDIGIT *ds, size_t n)
1450 {
1451     size_t i;
1452     for (i = 0; i < n; i++) {
1453         BDIGIT_DBL n = ds[i];
1454         n += 1;
1455         ds[i] = BIGLO(n);
1456         if (ds[i] != 0)
1457             return 0;
1458     }
1459     return 1;
1460 }
1461
1462 static void
1463 bary_mul_single(BDIGIT *zds, size_t zn, BDIGIT x, BDIGIT y)
1464 {
1465     BDIGIT_DBL n;
1466
1467     assert(2 <= zn);
1468
1469     n = (BDIGIT_DBL)x * y;
1470     bdigitdbl2bary(zds, 2, n);
1471     BDIGITS_ZERO(zds + 2, zn - 2);
1472 }
1473
1474 static int
1475 bary_muladd_1xN(BDIGIT *zds, size_t zn, BDIGIT x, const BDIGIT *yds, size_t yn)
1476 {
1477     BDIGIT_DBL n;
1478     BDIGIT_DBL dd;
1479     size_t j;
1480
1481     assert(zn > yn);
1482
1483     if (x == 0)
1484         return 0;
1485     dd = x;
1486     n = 0;
1487     for (j = 0; j < yn; j++) {
1488         BDIGIT_DBL ee = n + dd * yds[j];
1489         if (ee) {
1490             n = zds[j] + ee;
1491             zds[j] = BIGLO(n);
1492             n = BIGDN(n);
1493         }
1494         else {
1495             n = 0;
1496         }
1497
1498     }
1499     for (; j < zn; j++) {
1500         if (n == 0)
1501             break;
1502         n += zds[j];
1503         zds[j] = BIGLO(n);
1504         n = BIGDN(n);
1505     }
1506     return n != 0;
1507 }
1508
1509 static BDIGIT_DBL_SIGNED
1510 bigdivrem_mulsub(BDIGIT *zds, size_t zn, BDIGIT x, const BDIGIT *yds, size_t yn)
1511 {
1512     size_t i;
1513     BDIGIT_DBL t2;
1514     BDIGIT_DBL_SIGNED num;
1515
1516     assert(zn == yn + 1);
1517
1518     num = 0;
1519     t2 = 0;
1520     i = 0;
1521
1522     do {
1523         BDIGIT_DBL_SIGNED ee;
1524         t2 += (BDIGIT_DBL)yds[i] * x;
1525         ee = num - BIGLO(t2);
1526         num = (BDIGIT_DBL_SIGNED)zds[i] + ee;
1527         if (ee) zds[i] = BIGLO(num);
1528         num = BIGDN(num);
1529         t2 = BIGDN(t2);
1530     } while (++i < yn);
1531     num -= (BDIGIT_DBL_SIGNED)t2;
1532     num += (BDIGIT_DBL_SIGNED)zds[yn]; /* borrow from high digit; don't update */
1533     return num;
1534 }
1535
1536 static int
1537 bary_mulsub_1xN(BDIGIT *zds, size_t zn, BDIGIT x, const BDIGIT *yds, size_t yn)
1538 {
1539     BDIGIT_DBL_SIGNED num;
1540
1541     assert(zn == yn + 1);
1542
1543     num = bigdivrem_mulsub(zds, zn, x, yds, yn);
1544     zds[yn] = BIGLO(num);
1545     if (BIGDN(num))
1546         return 1;
1547     return 0;
1548 }
1549
1550 static void
1551 bary_mul_normal(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
1552 {
1553     size_t i;
1554
1555     assert(xn + yn <= zn);
1556
1557     BDIGITS_ZERO(zds, zn);
1558     for (i = 0; i < xn; i++) {
1559         bary_muladd_1xN(zds+i, zn-i, xds[i], yds, yn);
1560     }
1561 }
1562
1563 VALUE
1564 rb_big_mul_normal(VALUE x, VALUE y)
1565 {
1566     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), zn = xn + yn;
1567     VALUE z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
1568     bary_mul_normal(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn);
1569     RB_GC_GUARD(x);
1570     RB_GC_GUARD(y);
1571     return z;
1572 }
1573
1574 /* efficient squaring (2 times faster than normal multiplication)
1575  * ref: Handbook of Applied Cryptography, Algorithm 14.16
1576  *      http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf
1577  */
1578 static void
1579 bary_sq_fast(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn)
1580 {
1581     size_t i, j;
1582     BDIGIT_DBL c, v, w;
1583     BDIGIT vl;
1584     int vh;
1585
1586     assert(xn * 2 <= zn);
1587
1588     BDIGITS_ZERO(zds, zn);
1589
1590     if (xn == 0)
1591         return;
1592
1593     for (i = 0; i < xn-1; i++) {
1594         v = (BDIGIT_DBL)xds[i];
1595         if (!v)
1596             continue;
1597         c = (BDIGIT_DBL)zds[i + i] + v * v;
1598         zds[i + i] = BIGLO(c);
1599         c = BIGDN(c);
1600         v *= 2;
1601         vl = BIGLO(v);
1602         vh = (int)BIGDN(v);
1603         for (j = i + 1; j < xn; j++) {
1604             w = (BDIGIT_DBL)xds[j];
1605             c += (BDIGIT_DBL)zds[i + j] + vl * w;
1606             zds[i + j] = BIGLO(c);
1607             c = BIGDN(c);
1608             if (vh)
1609                 c += w;
1610         }
1611         if (c) {
1612             c += (BDIGIT_DBL)zds[i + xn];
1613             zds[i + xn] = BIGLO(c);
1614             c = BIGDN(c);
1615             if (c)
1616                 zds[i + xn + 1] += (BDIGIT)c;
1617         }
1618     }
1619
1620     /* i == xn-1 */
1621     v = (BDIGIT_DBL)xds[i];
1622     if (!v)
1623         return;
1624     c = (BDIGIT_DBL)zds[i + i] + v * v;
1625     zds[i + i] = BIGLO(c);
1626     c = BIGDN(c);
1627     if (c) {
1628         zds[i + xn] += BIGLO(c);
1629     }
1630 }
1631
1632 VALUE
1633 rb_big_sq_fast(VALUE x)
1634 {
1635     size_t xn = BIGNUM_LEN(x), zn = 2 * xn;
1636     VALUE z = bignew(zn, 1);
1637     bary_sq_fast(BDIGITS(z), zn, BDIGITS(x), xn);
1638     RB_GC_GUARD(x);
1639     return z;
1640 }
1641
1642 /* balancing multiplication by slicing larger argument */
1643 static void
1644 bary_mul_balance_with_mulfunc(BDIGIT *const zds, const size_t zn,
1645                               const BDIGIT *const xds, const size_t xn,
1646                               const BDIGIT *const yds, const size_t yn,
1647                               BDIGIT *wds, size_t wn, mulfunc_t *const mulfunc)
1648 {
1649     VALUE work = 0;
1650     size_t n;
1651
1652     assert(xn + yn <= zn);
1653     assert(xn <= yn);
1654     assert(!KARATSUBA_BALANCED(xn, yn) || !TOOM3_BALANCED(xn, yn));
1655
1656     BDIGITS_ZERO(zds, xn);
1657
1658     if (wn < xn) {
1659         const size_t r = (yn % xn) ? (yn % xn) : xn;
1660         if ((2 * xn + yn + r) > zn) {
1661             wn = xn;
1662             wds = ALLOCV_N(BDIGIT, work, wn);
1663         }
1664     }
1665
1666     n = 0;
1667     while (yn > n) {
1668         const size_t r = (xn > (yn - n) ? (yn - n) : xn);
1669         const size_t tn = (xn + r);
1670         if (2 * (xn + r) <= zn - n) {
1671             BDIGIT *const tds = zds + n + xn + r;
1672             mulfunc(tds, tn, xds, xn, yds + n, r, wds, wn);
1673             BDIGITS_ZERO(zds + n + xn, r);
1674             bary_add(zds + n, tn,
1675                      zds + n, tn,
1676                      tds, tn);
1677         }
1678         else {
1679             BDIGIT *const tds = zds + n;
1680             if (wn < xn) {
1681                 /* xn is invariant, only once here */
1682 #if 0
1683                 wn = xn;
1684                 wds = ALLOCV_N(BDIGIT, work, wn);
1685 #else
1686                 rb_bug("wds is not enough: %" PRIdSIZE " for %" PRIdSIZE, wn, xn);
1687 #endif
1688             }
1689             MEMCPY(wds, zds + n, BDIGIT, xn);
1690             mulfunc(tds, tn, xds, xn, yds + n, r, wds+xn, wn-xn);
1691             bary_add(zds + n, tn,
1692                      zds + n, tn,
1693                      wds, xn);
1694         }
1695         n += r;
1696     }
1697     BDIGITS_ZERO(zds+xn+yn, zn - (xn+yn));
1698
1699     if (work)
1700         ALLOCV_END(work);
1701 }
1702
1703 VALUE
1704 rb_big_mul_balance(VALUE x, VALUE y)
1705 {
1706     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), zn = xn + yn;
1707     VALUE z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
1708     bary_mul_balance_with_mulfunc(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn, NULL, 0, bary_mul_toom3_start);
1709     RB_GC_GUARD(x);
1710     RB_GC_GUARD(y);
1711     return z;
1712 }
1713
1714 /* multiplication by karatsuba method */
1715 static void
1716 bary_mul_karatsuba(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn)
1717 {
1718     VALUE work = 0;
1719
1720     size_t n;
1721     int sub_p, borrow, carry1, carry2, carry3;
1722
1723     int odd_y = 0;
1724     int odd_xy = 0;
1725     int sq;
1726
1727     const BDIGIT *xds0, *xds1, *yds0, *yds1;
1728     BDIGIT *zds0, *zds1, *zds2, *zds3;
1729
1730     assert(xn + yn <= zn);
1731     assert(xn <= yn);
1732     assert(yn < 2 * xn);
1733
1734     sq = xds == yds && xn == yn;
1735
1736     if (yn & 1) {
1737         odd_y = 1;
1738         yn--;
1739         if (yn < xn) {
1740             odd_xy = 1;
1741             xn--;
1742         }
1743     }
1744
1745     n = yn / 2;
1746
1747     assert(n < xn);
1748
1749     if (wn < n) {
1750         /* This function itself needs only n BDIGITs for work area.
1751          * However this function calls bary_mul_karatsuba and
1752          * bary_mul_balance recursively.
1753          * 2n BDIGITs are enough to avoid allocations in
1754          * the recursively called functions.
1755          */
1756         wn = 2*n;
1757         wds = ALLOCV_N(BDIGIT, work, wn);
1758     }
1759
1760     /* Karatsuba algorithm:
1761      *
1762      * x = x0 + r*x1
1763      * y = y0 + r*y1
1764      * z = x*y
1765      *   = (x0 + r*x1) * (y0 + r*y1)
1766      *   = x0*y0 + r*(x1*y0 + x0*y1) + r*r*x1*y1
1767      *   = x0*y0 + r*(x0*y0 + x1*y1 - (x1-x0)*(y1-y0)) + r*r*x1*y1
1768      *   = x0*y0 + r*(x0*y0 + x1*y1 - (x0-x1)*(y0-y1)) + r*r*x1*y1
1769      */
1770
1771     xds0 = xds;
1772     xds1 = xds + n;
1773     yds0 = yds;
1774     yds1 = yds + n;
1775     zds0 = zds;
1776     zds1 = zds + n;
1777     zds2 = zds + 2*n;
1778     zds3 = zds + 3*n;
1779
1780     sub_p = 1;
1781
1782     /* zds0:? zds1:? zds2:? zds3:? wds:? */
1783
1784     if (bary_sub(zds0, n, xds, n, xds+n, xn-n)) {
1785         bary_2comp(zds0, n);
1786         sub_p = !sub_p;
1787     }
1788
1789     /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:? */
1790
1791     if (sq) {
1792         sub_p = 1;
1793         bary_mul_karatsuba_start(zds1, 2*n, zds0, n, zds0, n, wds, wn);
1794     }
1795     else {
1796         if (bary_sub(wds, n, yds, n, yds+n, n)) {
1797             bary_2comp(wds, n);
1798             sub_p = !sub_p;
1799         }
1800
1801         /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:|y1-y0| */
1802
1803         bary_mul_karatsuba_start(zds1, 2*n, zds0, n, wds, n, wds+n, wn-n);
1804     }
1805
1806     /* zds0:|x1-x0| zds1,zds2:|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
1807
1808     borrow = 0;
1809     if (sub_p) {
1810         borrow = !bary_2comp(zds1, 2*n);
1811     }
1812     /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
1813
1814     MEMCPY(wds, zds1, BDIGIT, n);
1815
1816     /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
1817
1818     bary_mul_karatsuba_start(zds0, 2*n, xds0, n, yds0, n, wds+n, wn-n);
1819
1820     /* zds0,zds1:x0*y0 zds2:hi(-?|x1-x0|*|y1-y0|) zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
1821
1822     carry1 = bary_add(wds, n, wds, n, zds0, n);
1823     carry1 = bary_addc(zds2, n, zds2, n, zds1, n, carry1);
1824
1825     /* zds0,zds1:x0*y0 zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
1826
1827     carry2 = bary_add(zds1, n, zds1, n, wds, n);
1828
1829     /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
1830
1831     MEMCPY(wds, zds2, BDIGIT, n);
1832
1833     /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:_ zds3:? wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
1834
1835     bary_mul_karatsuba_start(zds2, zn-2*n, xds1, xn-n, yds1, n, wds+n, wn-n);
1836
1837     /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
1838
1839     carry3 = bary_add(zds1, n, zds1, n, zds2, n);
1840
1841     /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
1842
1843     carry3 = bary_addc(zds2, n, zds2, n, zds3, (4*n < zn ? n : zn-3*n), carry3);
1844
1845     /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1) wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
1846
1847     bary_add(zds2, zn-2*n, zds2, zn-2*n, wds, n);
1848
1849     /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1)+hi(x0*y0-?|x1-x0|*|y1-y0|) wds:_ */
1850
1851     if (carry2)
1852         bary_add_one(zds2, zn-2*n);
1853
1854     if (carry1 + carry3 - borrow < 0)
1855         bary_sub_one(zds3, zn-3*n);
1856     else if (carry1 + carry3 - borrow > 0) {
1857         BDIGIT c = carry1 + carry3 - borrow;
1858         bary_add(zds3, zn-3*n, zds3, zn-3*n, &c, 1);
1859     }
1860
1861     /*
1862     if (SIZEOF_BDIGIT * zn <= 16) {
1863         uint128_t z, x, y;
1864         ssize_t i;
1865         for (x = 0, i = xn-1; 0 <= i; i--) { x <<= SIZEOF_BDIGIT*CHAR_BIT; x |= xds[i]; }
1866         for (y = 0, i = yn-1; 0 <= i; i--) { y <<= SIZEOF_BDIGIT*CHAR_BIT; y |= yds[i]; }
1867         for (z = 0, i = zn-1; 0 <= i; i--) { z <<= SIZEOF_BDIGIT*CHAR_BIT; z |= zds[i]; }
1868         assert(z == x * y);
1869     }
1870     */
1871
1872     if (odd_xy) {
1873         bary_muladd_1xN(zds+yn, zn-yn, yds[yn], xds, xn);
1874         bary_muladd_1xN(zds+xn, zn-xn, xds[xn], yds, yn+1);
1875     }
1876     else if (odd_y) {
1877         bary_muladd_1xN(zds+yn, zn-yn, yds[yn], xds, xn);
1878     }
1879
1880     if (work)
1881         ALLOCV_END(work);
1882 }
1883
1884 VALUE
1885 rb_big_mul_karatsuba(VALUE x, VALUE y)
1886 {
1887     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), zn = xn + yn;
1888     VALUE z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
1889     if (!((xn <= yn && yn < 2) || KARATSUBA_BALANCED(xn, yn)))
1890         rb_raise(rb_eArgError, "unexpected bignum length for karatsuba");
1891     bary_mul_karatsuba(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn, NULL, 0);
1892     RB_GC_GUARD(x);
1893     RB_GC_GUARD(y);
1894     return z;
1895 }
1896
1897 static void
1898 bary_mul_toom3(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn)
1899 {
1900     size_t n;
1901     size_t wnc;
1902     VALUE work = 0;
1903
1904     /* "p" stands for "positive".  Actually it means "non-negative", though. */
1905     size_t x0n; const BDIGIT *x0ds;
1906     size_t x1n; const BDIGIT *x1ds;
1907     size_t x2n; const BDIGIT *x2ds;
1908     size_t y0n; const BDIGIT *y0ds;
1909     size_t y1n; const BDIGIT *y1ds;
1910     size_t y2n; const BDIGIT *y2ds;
1911
1912     size_t u1n; BDIGIT *u1ds; int u1p;
1913     size_t u2n; BDIGIT *u2ds; int u2p;
1914     size_t u3n; BDIGIT *u3ds; int u3p;
1915
1916     size_t v1n; BDIGIT *v1ds; int v1p;
1917     size_t v2n; BDIGIT *v2ds; int v2p;
1918     size_t v3n; BDIGIT *v3ds; int v3p;
1919
1920     size_t t0n; BDIGIT *t0ds; int t0p;
1921     size_t t1n; BDIGIT *t1ds; int t1p;
1922     size_t t2n; BDIGIT *t2ds; int t2p;
1923     size_t t3n; BDIGIT *t3ds; int t3p;
1924     size_t t4n; BDIGIT *t4ds; int t4p;
1925
1926     size_t z0n; BDIGIT *z0ds;
1927     size_t z1n; BDIGIT *z1ds; int z1p;
1928     size_t z2n; BDIGIT *z2ds; int z2p;
1929     size_t z3n; BDIGIT *z3ds; int z3p;
1930     size_t z4n; BDIGIT *z4ds;
1931
1932     size_t zzn; BDIGIT *zzds;
1933
1934     int sq = xds == yds && xn == yn;
1935
1936     assert(xn <= yn);  /* assume y >= x */
1937     assert(xn + yn <= zn);
1938
1939     n = (yn + 2) / 3;
1940     assert(2*n < xn);
1941
1942     wnc = 0;
1943
1944     wnc += (u1n = n+1); /* BITSPERDIG*n+2 bits */
1945     wnc += (u2n = n+1); /* BITSPERDIG*n+1 bits */
1946     wnc += (u3n = n+1); /* BITSPERDIG*n+3 bits */
1947     wnc += (v1n = n+1); /* BITSPERDIG*n+2 bits */
1948     wnc += (v2n = n+1); /* BITSPERDIG*n+1 bits */
1949     wnc += (v3n = n+1); /* BITSPERDIG*n+3 bits */
1950
1951     wnc += (t0n = 2*n); /* BITSPERDIG*2*n bits */
1952     wnc += (t1n = 2*n+2); /* BITSPERDIG*2*n+4 bits but bary_mul needs u1n+v1n */
1953     wnc += (t2n = 2*n+2); /* BITSPERDIG*2*n+2 bits but bary_mul needs u2n+v2n */
1954     wnc += (t3n = 2*n+2); /* BITSPERDIG*2*n+6 bits but bary_mul needs u3n+v3n */
1955     wnc += (t4n = 2*n); /* BITSPERDIG*2*n bits */
1956
1957     wnc += (z1n = 2*n+1); /* BITSPERDIG*2*n+5 bits */
1958     wnc += (z2n = 2*n+1); /* BITSPERDIG*2*n+6 bits */
1959     wnc += (z3n = 2*n+1); /* BITSPERDIG*2*n+8 bits */
1960
1961     if (wn < wnc) {
1962         wn = wnc * 3 / 2; /* Allocate working memory for whole recursion at once. */
1963         wds = ALLOCV_N(BDIGIT, work, wn);
1964     }
1965
1966     u1ds = wds; wds += u1n;
1967     u2ds = wds; wds += u2n;
1968     u3ds = wds; wds += u3n;
1969
1970     v1ds = wds; wds += v1n;
1971     v2ds = wds; wds += v2n;
1972     v3ds = wds; wds += v3n;
1973
1974     t0ds = wds; wds += t0n;
1975     t1ds = wds; wds += t1n;
1976     t2ds = wds; wds += t2n;
1977     t3ds = wds; wds += t3n;
1978     t4ds = wds; wds += t4n;
1979
1980     z1ds = wds; wds += z1n;
1981     z2ds = wds; wds += z2n;
1982     z3ds = wds; wds += z3n;
1983
1984     wn -= wnc;
1985
1986     zzds = u1ds;
1987     zzn = 6*n+1;
1988
1989     x0n = n;
1990     x1n = n;
1991     x2n = xn - 2*n;
1992     x0ds = xds;
1993     x1ds = xds + n;
1994     x2ds = xds + 2*n;
1995
1996     if (sq) {
1997         y0n = x0n;
1998         y1n = x1n;
1999         y2n = x2n;
2000         y0ds = x0ds;
2001         y1ds = x1ds;
2002         y2ds = x2ds;
2003     }
2004     else {
2005         y0n = n;
2006         y1n = n;
2007         y2n = yn - 2*n;
2008         y0ds = yds;
2009         y1ds = yds + n;
2010         y2ds = yds + 2*n;
2011     }
2012
2013     /*
2014      * ref. https://en.wikipedia.org/wiki/Toom%E2%80%93Cook_multiplication
2015      *
2016      * x(b) = x0 * b^0 + x1 * b^1 + x2 * b^2
2017      * y(b) = y0 * b^0 + y1 * b^1 + y2 * b^2
2018      *
2019      * z(b) = x(b) * y(b)
2020      * z(b) = z0 * b^0 + z1 * b^1 + z2 * b^2 + z3 * b^3 + z4 * b^4
2021      * where:
2022      *   z0 = x0 * y0
2023      *   z1 = x0 * y1 + x1 * y0
2024      *   z2 = x0 * y2 + x1 * y1 + x2 * y0
2025      *   z3 = x1 * y2 + x2 * y1
2026      *   z4 = x2 * y2
2027      *
2028      * Toom3 method (a.k.a. Toom-Cook method):
2029      * (Step1) calculating 5 points z(b0), z(b1), z(b2), z(b3), z(b4),
2030      * where:
2031      *   b0 = 0, b1 = 1, b2 = -1, b3 = -2, b4 = inf,
2032      *   z(0)   = x(0)   * y(0)   = x0 * y0
2033      *   z(1)   = x(1)   * y(1)   = (x0 + x1 + x2) * (y0 + y1 + y2)
2034      *   z(-1)  = x(-1)  * y(-1)  = (x0 - x1 + x2) * (y0 - y1 + y2)
2035      *   z(-2)  = x(-2)  * y(-2)  = (x0 - 2 * (x1 - 2 * x2)) * (y0 - 2 * (y1 - 2 * y2))
2036      *   z(inf) = x(inf) * y(inf) = x2 * y2
2037      *
2038      * (Step2) interpolating z0, z1, z2, z3 and z4.
2039      *
2040      * (Step3) Substituting base value into b of the polynomial z(b),
2041      */
2042
2043     /*
2044      * [Step1] calculating 5 points z(b0), z(b1), z(b2), z(b3), z(b4)
2045      */
2046
2047     /* u1 <- x0 + x2 */
2048     bary_add(u1ds, u1n, x0ds, x0n, x2ds, x2n);
2049     u1p = 1;
2050
2051     /* x(-1) : u2 <- u1 - x1 = x0 - x1 + x2 */
2052     if (bary_sub(u2ds, u2n, u1ds, u1n, x1ds, x1n)) {
2053         bary_2comp(u2ds, u2n);
2054         u2p = 0;
2055     }
2056     else {
2057         u2p = 1;
2058     }
2059
2060     /* x(1) : u1 <- u1 + x1 = x0 + x1 + x2 */
2061     bary_add(u1ds, u1n, u1ds, u1n, x1ds, x1n);
2062
2063     /* x(-2) : u3 <- 2 * (u2 + x2) - x0 = x0 - 2 * (x1 - 2 * x2) */
2064     u3p = 1;
2065     if (u2p) {
2066         bary_add(u3ds, u3n, u2ds, u2n, x2ds, x2n);
2067     }
2068     else if (bary_sub(u3ds, u3n, x2ds, x2n, u2ds, u2n)) {
2069         bary_2comp(u3ds, u3n);
2070         u3p = 0;
2071     }
2072     bary_small_lshift(u3ds, u3ds, u3n, 1);
2073     if (!u3p) {
2074         bary_add(u3ds, u3n, u3ds, u3n, x0ds, x0n);
2075     }
2076     else if (bary_sub(u3ds, u3n, u3ds, u3n, x0ds, x0n)) {
2077         bary_2comp(u3ds, u3n);
2078         u3p = 0;
2079     }
2080
2081     if (sq) {
2082         v1n = u1n; v1ds = u1ds; v1p = u1p;
2083         v2n = u2n; v2ds = u2ds; v2p = u2p;
2084         v3n = u3n; v3ds = u3ds; v3p = u3p;
2085     }
2086     else {
2087         /* v1 <- y0 + y2 */
2088         bary_add(v1ds, v1n, y0ds, y0n, y2ds, y2n);
2089         v1p = 1;
2090
2091         /* y(-1) : v2 <- v1 - y1 = y0 - y1 + y2 */
2092         v2p = 1;
2093         if (bary_sub(v2ds, v2n, v1ds, v1n, y1ds, y1n)) {
2094             bary_2comp(v2ds, v2n);
2095             v2p = 0;
2096         }
2097
2098         /* y(1) : v1 <- v1 + y1 = y0 + y1 + y2 */
2099         bary_add(v1ds, v1n, v1ds, v1n, y1ds, y1n);
2100
2101         /* y(-2) : v3 <- 2 * (v2 + y2) - y0 = y0 - 2 * (y1 - 2 * y2) */
2102         v3p = 1;
2103         if (v2p) {
2104             bary_add(v3ds, v3n, v2ds, v2n, y2ds, y2n);
2105         }
2106         else if (bary_sub(v3ds, v3n, y2ds, y2n, v2ds, v2n)) {
2107             bary_2comp(v3ds, v3n);
2108             v3p = 0;
2109         }
2110         bary_small_lshift(v3ds, v3ds, v3n, 1);
2111         if (!v3p) {
2112             bary_add(v3ds, v3n, v3ds, v3n, y0ds, y0n);
2113         }
2114         else if (bary_sub(v3ds, v3n, v3ds, v3n, y0ds, y0n)) {
2115             bary_2comp(v3ds, v3n);
2116             v3p = 0;
2117         }
2118     }
2119
2120     /* z(0) : t0 <- x0 * y0 */
2121     bary_mul_toom3_start(t0ds, t0n, x0ds, x0n, y0ds, y0n, wds, wn);
2122     t0p = 1;
2123
2124     /* z(1) : t1 <- u1 * v1 */
2125     bary_mul_toom3_start(t1ds, t1n, u1ds, u1n, v1ds, v1n, wds, wn);
2126     t1p = u1p == v1p;
2127     assert(t1ds[t1n-1] == 0);
2128     t1n--;
2129
2130     /* z(-1) : t2 <- u2 * v2 */
2131     bary_mul_toom3_start(t2ds, t2n, u2ds, u2n, v2ds, v2n, wds, wn);
2132     t2p = u2p == v2p;
2133     assert(t2ds[t2n-1] == 0);
2134     t2n--;
2135
2136     /* z(-2) : t3 <- u3 * v3 */
2137     bary_mul_toom3_start(t3ds, t3n, u3ds, u3n, v3ds, v3n, wds, wn);
2138     t3p = u3p == v3p;
2139     assert(t3ds[t3n-1] == 0);
2140     t3n--;
2141
2142     /* z(inf) : t4 <- x2 * y2 */
2143     bary_mul_toom3_start(t4ds, t4n, x2ds, x2n, y2ds, y2n, wds, wn);
2144     t4p = 1;
2145
2146     /*
2147      * [Step2] interpolating z0, z1, z2, z3 and z4.
2148      */
2149
2150     /* z0 <- z(0) == t0 */
2151     z0n = t0n; z0ds = t0ds;
2152
2153     /* z4 <- z(inf) == t4 */
2154     z4n = t4n; z4ds = t4ds;
2155
2156     /* z3 <- (z(-2) - z(1)) / 3 == (t3 - t1) / 3 */
2157     if (t3p == t1p) {
2158         z3p = t3p;
2159         if (bary_sub(z3ds, z3n, t3ds, t3n, t1ds, t1n)) {
2160             bary_2comp(z3ds, z3n);
2161             z3p = !z3p;
2162         }
2163     }
2164     else {
2165         z3p = t3p;
2166         bary_add(z3ds, z3n, t3ds, t3n, t1ds, t1n);
2167     }
2168     bigdivrem_single(z3ds, z3ds, z3n, 3);
2169
2170     /* z1 <- (z(1) - z(-1)) / 2 == (t1 - t2) / 2 */
2171     if (t1p == t2p) {
2172         z1p = t1p;
2173         if (bary_sub(z1ds, z1n, t1ds, t1n, t2ds, t2n)) {
2174             bary_2comp(z1ds, z1n);
2175             z1p = !z1p;
2176         }
2177     }
2178     else {
2179         z1p = t1p;
2180         bary_add(z1ds, z1n, t1ds, t1n, t2ds, t2n);
2181     }
2182     bary_small_rshift(z1ds, z1ds, z1n, 1, 0);
2183
2184     /* z2 <- z(-1) - z(0) == t2 - t0 */
2185     if (t2p == t0p) {
2186         z2p = t2p;
2187         if (bary_sub(z2ds, z2n, t2ds, t2n, t0ds, t0n)) {
2188             bary_2comp(z2ds, z2n);
2189             z2p = !z2p;
2190         }
2191     }
2192     else {
2193         z2p = t2p;
2194         bary_add(z2ds, z2n, t2ds, t2n, t0ds, t0n);
2195     }
2196
2197     /* z3 <- (z2 - z3) / 2 + 2 * z(inf) == (z2 - z3) / 2 + 2 * t4 */
2198     if (z2p == z3p) {
2199         z3p = z2p;
2200         if (bary_sub(z3ds, z3n, z2ds, z2n, z3ds, z3n)) {
2201             bary_2comp(z3ds, z3n);
2202             z3p = !z3p;
2203         }
2204     }
2205     else {
2206         z3p = z2p;
2207         bary_add(z3ds, z3n, z2ds, z2n, z3ds, z3n);
2208     }
2209     bary_small_rshift(z3ds, z3ds, z3n, 1, 0);
2210     if (z3p == t4p) {
2211         bary_muladd_1xN(z3ds, z3n, 2, t4ds, t4n);
2212     }
2213     else {
2214         if (bary_mulsub_1xN(z3ds, z3n, 2, t4ds, t4n)) {
2215             bary_2comp(z3ds, z3n);
2216             z3p = !z3p;
2217         }
2218     }
2219
2220     /* z2 <- z2 + z1 - z(inf) == z2 + z1 - t4 */
2221     if (z2p == z1p) {
2222         bary_add(z2ds, z2n, z2ds, z2n, z1ds, z1n);
2223     }
2224     else {
2225         if (bary_sub(z2ds, z2n, z2ds, z2n, z1ds, z1n)) {
2226             bary_2comp(z2ds, z2n);
2227             z2p = !z2p;
2228         }
2229     }
2230
2231     if (z2p == t4p) {
2232         if (bary_sub(z2ds, z2n, z2ds, z2n, t4ds, t4n)) {
2233             bary_2comp(z2ds, z2n);
2234             z2p = !z2p;
2235         }
2236     }
2237     else {
2238         bary_add(z2ds, z2n, z2ds, z2n, t4ds, t4n);
2239     }
2240
2241     /* z1 <- z1 - z3 */
2242     if (z1p == z3p) {
2243         if (bary_sub(z1ds, z1n, z1ds, z1n, z3ds, z3n)) {
2244             bary_2comp(z1ds, z1n);
2245             z1p = !z1p;
2246         }
2247     }
2248     else {
2249         bary_add(z1ds, z1n, z1ds, z1n, z3ds, z3n);
2250     }
2251
2252     /*
2253      * [Step3] Substituting base value into b of the polynomial z(b),
2254      */
2255
2256     MEMCPY(zzds, z0ds, BDIGIT, z0n);
2257     BDIGITS_ZERO(zzds + z0n, 4*n - z0n);
2258     MEMCPY(zzds + 4*n, z4ds, BDIGIT, z4n);
2259     BDIGITS_ZERO(zzds + 4*n + z4n, zzn - (4*n + z4n));
2260     if (z1p)
2261         bary_add(zzds +   n, zzn -   n, zzds +   n, zzn -   n, z1ds, z1n);
2262     else
2263         bary_sub(zzds +   n, zzn -   n, zzds +   n, zzn -   n, z1ds, z1n);
2264     if (z2p)
2265         bary_add(zzds + 2*n, zzn - 2*n, zzds + 2*n, zzn - 2*n, z2ds, z2n);
2266     else
2267         bary_sub(zzds + 2*n, zzn - 2*n, zzds + 2*n, zzn - 2*n, z2ds, z2n);
2268     if (z3p)
2269         bary_add(zzds + 3*n, zzn - 3*n, zzds + 3*n, zzn - 3*n, z3ds, z3n);
2270     else
2271         bary_sub(zzds + 3*n, zzn - 3*n, zzds + 3*n, zzn - 3*n, z3ds, z3n);
2272
2273     BARY_TRUNC(zzds, zzn);
2274     MEMCPY(zds, zzds, BDIGIT, zzn);
2275     BDIGITS_ZERO(zds + zzn, zn - zzn);
2276
2277     if (work)
2278         ALLOCV_END(work);
2279 }
2280
2281 VALUE
2282 rb_big_mul_toom3(VALUE x, VALUE y)
2283 {
2284     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), zn = xn + yn;
2285     VALUE z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
2286     if (xn > yn || yn < 3 || !TOOM3_BALANCED(xn,yn))
2287         rb_raise(rb_eArgError, "unexpected bignum length for toom3");
2288     bary_mul_toom3(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn, NULL, 0);
2289     RB_GC_GUARD(x);
2290     RB_GC_GUARD(y);
2291     return z;
2292 }
2293
2294 #ifdef USE_GMP
2295 static inline void
2296 bdigits_to_mpz(mpz_t mp, const BDIGIT *digits, size_t len)
2297 {
2298     const size_t nails = (sizeof(BDIGIT)-SIZEOF_BDIGIT)*CHAR_BIT;
2299     mpz_import(mp, len, -1, sizeof(BDIGIT), 0, nails, digits);
2300 }
2301
2302 static inline void
2303 bdigits_from_mpz(mpz_t mp, BDIGIT *digits, size_t *len)
2304 {
2305     const size_t nails = (sizeof(BDIGIT)-SIZEOF_BDIGIT)*CHAR_BIT;
2306     mpz_export(digits, len, -1, sizeof(BDIGIT), 0, nails, mp);
2307 }
2308
2309 static void
2310 bary_mul_gmp(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2311 {
2312     mpz_t x, y, z;
2313     size_t count;
2314
2315     assert(xn + yn <= zn);
2316
2317     mpz_init(x);
2318     mpz_init(y);
2319     mpz_init(z);
2320     bdigits_to_mpz(x, xds, xn);
2321     if (xds == yds && xn == yn) {
2322         mpz_mul(z, x, x);
2323     }
2324     else {
2325         bdigits_to_mpz(y, yds, yn);
2326         mpz_mul(z, x, y);
2327     }
2328     bdigits_from_mpz(z, zds, &count);
2329     BDIGITS_ZERO(zds+count, zn-count);
2330     mpz_clear(x);
2331     mpz_clear(y);
2332     mpz_clear(z);
2333 }
2334
2335 VALUE
2336 rb_big_mul_gmp(VALUE x, VALUE y)
2337 {
2338     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), zn = xn + yn;
2339     VALUE z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
2340     bary_mul_gmp(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn);
2341     RB_GC_GUARD(x);
2342     RB_GC_GUARD(y);
2343     return z;
2344 }
2345 #endif
2346
2347 static void
2348 bary_short_mul(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2349 {
2350     assert(xn + yn <= zn);
2351
2352     if (xn == 1 && yn == 1) {
2353         bary_mul_single(zds, zn, xds[0], yds[0]);
2354     }
2355     else {
2356         bary_mul_normal(zds, zn, xds, xn, yds, yn);
2357         rb_thread_check_ints();
2358     }
2359 }
2360
2361 /* determine whether a bignum is sparse or not by random sampling */
2362 static inline int
2363 bary_sparse_p(const BDIGIT *ds, size_t n)
2364 {
2365     long c = 0;
2366
2367     if (          ds[2 * n / 5]) c++;
2368     if (c <= 1 && ds[    n / 2]) c++;
2369     if (c <= 1 && ds[3 * n / 5]) c++;
2370
2371     return (c <= 1) ? 1 : 0;
2372 }
2373
2374 static int
2375 bary_mul_precheck(BDIGIT **zdsp, size_t *znp, const BDIGIT **xdsp, size_t *xnp, const BDIGIT **ydsp, size_t *ynp)
2376 {
2377     size_t nlsz; /* number of least significant zero BDIGITs */
2378
2379     BDIGIT *zds = *zdsp;
2380     size_t zn = *znp;
2381     const BDIGIT *xds = *xdsp;
2382     size_t xn = *xnp;
2383     const BDIGIT *yds = *ydsp;
2384     size_t yn = *ynp;
2385
2386     assert(xn + yn <= zn);
2387
2388     nlsz = 0;
2389
2390     while (0 < xn) {
2391         if (xds[xn-1] == 0) {
2392             xn--;
2393         }
2394         else {
2395             do {
2396                 if (xds[0] != 0)
2397                     break;
2398                 xds++;
2399                 xn--;
2400                 nlsz++;
2401             } while (0 < xn);
2402             break;
2403         }
2404     }
2405
2406     while (0 < yn) {
2407         if (yds[yn-1] == 0) {
2408             yn--;
2409         }
2410         else {
2411             do {
2412                 if (yds[0] != 0)
2413                     break;
2414                 yds++;
2415                 yn--;
2416                 nlsz++;
2417             } while (0 < yn);
2418             break;
2419         }
2420     }
2421
2422     if (nlsz) {
2423         BDIGITS_ZERO(zds, nlsz);
2424         zds += nlsz;
2425         zn -= nlsz;
2426     }
2427
2428     /* make sure that y is longer than x */
2429     if (xn > yn) {
2430         const BDIGIT *tds;
2431         size_t tn;
2432         tds = xds; xds = yds; yds = tds;
2433         tn = xn; xn = yn; yn = tn;
2434     }
2435     assert(xn <= yn);
2436
2437     if (xn <= 1) {
2438         if (xn == 0) {
2439             BDIGITS_ZERO(zds, zn);
2440             return 1;
2441         }
2442
2443         if (xds[0] == 1) {
2444             MEMCPY(zds, yds, BDIGIT, yn);
2445             BDIGITS_ZERO(zds+yn, zn-yn);
2446             return 1;
2447         }
2448         if (POW2_P(xds[0])) {
2449             zds[yn] = bary_small_lshift(zds, yds, yn, bit_length(xds[0])-1);
2450             BDIGITS_ZERO(zds+yn+1, zn-yn-1);
2451             return 1;
2452         }
2453         if (yn == 1 && yds[0] == 1) {
2454             zds[0] = xds[0];
2455             BDIGITS_ZERO(zds+1, zn-1);
2456             return 1;
2457         }
2458         bary_mul_normal(zds, zn, xds, xn, yds, yn);
2459         return 1;
2460     }
2461
2462     *zdsp = zds;
2463     *znp = zn;
2464     *xdsp = xds;
2465     *xnp = xn;
2466     *ydsp = yds;
2467     *ynp = yn;
2468
2469     return 0;
2470 }
2471
2472 static void
2473 bary_mul_karatsuba_branch(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn)
2474 {
2475     /* normal multiplication when x is small */
2476     if (xn < KARATSUBA_MUL_DIGITS) {
2477         goto normal;
2478     }
2479
2480     /* normal multiplication when x or y is a sparse bignum */
2481     if (bary_sparse_p(xds, xn)) goto normal;
2482     if (bary_sparse_p(yds, yn)) {
2483         bary_short_mul(zds, zn, yds, yn, xds, xn);
2484         return;
2485     }
2486
2487     /* balance multiplication by slicing y when x is much smaller than y */
2488     if (!KARATSUBA_BALANCED(xn, yn)) {
2489         bary_mul_balance_with_mulfunc(zds, zn, xds, xn, yds, yn, wds, wn, bary_mul_karatsuba_start);
2490         return;
2491     }
2492
2493     /* multiplication by karatsuba method */
2494     bary_mul_karatsuba(zds, zn, xds, xn, yds, yn, wds, wn);
2495     return;
2496
2497   normal:
2498     if (xds == yds && xn == yn) {
2499         bary_sq_fast(zds, zn, xds, xn);
2500     }
2501     else {
2502         bary_short_mul(zds, zn, xds, xn, yds, yn);
2503     }
2504 }
2505
2506 static void
2507 bary_mul_karatsuba_start(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn)
2508 {
2509     if (bary_mul_precheck(&zds, &zn, &xds, &xn, &yds, &yn))
2510         return;
2511
2512     bary_mul_karatsuba_branch(zds, zn, xds, xn, yds, yn, wds, wn);
2513 }
2514
2515 static void
2516 bary_mul_toom3_branch(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn)
2517 {
2518     if (xn < TOOM3_MUL_DIGITS) {
2519         bary_mul_karatsuba_branch(zds, zn, xds, xn, yds, yn, wds, wn);
2520         return;
2521     }
2522
2523     if (!TOOM3_BALANCED(xn, yn)) {
2524         bary_mul_balance_with_mulfunc(zds, zn, xds, xn, yds, yn, wds, wn, bary_mul_toom3_start);
2525         return;
2526     }
2527
2528     bary_mul_toom3(zds, zn, xds, xn, yds, yn, wds, wn);
2529 }
2530
2531 static void
2532 bary_mul_toom3_start(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, BDIGIT *wds, size_t wn)
2533 {
2534     if (bary_mul_precheck(&zds, &zn, &xds, &xn, &yds, &yn))
2535         return;
2536
2537     bary_mul_toom3_branch(zds, zn, xds, xn, yds, yn, wds, wn);
2538 }
2539
2540 static void
2541 bary_mul(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2542 {
2543     if (xn <= yn) {
2544         if (xn < NAIVE_MUL_DIGITS) {
2545             if (xds == yds && xn == yn)
2546                 bary_sq_fast(zds, zn, xds, xn);
2547             else
2548                 bary_short_mul(zds, zn, xds, xn, yds, yn);
2549             return;
2550         }
2551     }
2552     else {
2553         if (yn < NAIVE_MUL_DIGITS) {
2554             bary_short_mul(zds, zn, yds, yn, xds, xn);
2555             return;
2556         }
2557     }
2558
2559 #ifdef USE_GMP
2560     bary_mul_gmp(zds, zn, xds, xn, yds, yn);
2561 #else
2562     bary_mul_toom3_start(zds, zn, xds, xn, yds, yn, NULL, 0);
2563 #endif
2564 }
2565
2566 struct big_div_struct {
2567     size_t yn, zn;
2568     BDIGIT *yds, *zds;
2569     volatile VALUE stop;
2570 };
2571
2572 static void *
2573 bigdivrem1(void *ptr)
2574 {
2575     struct big_div_struct *bds = (struct big_div_struct*)ptr;
2576     size_t yn = bds->yn;
2577     size_t zn = bds->zn;
2578     BDIGIT *yds = bds->yds, *zds = bds->zds;
2579     BDIGIT_DBL_SIGNED num;
2580     BDIGIT q;
2581
2582     do {
2583         if (bds->stop) {
2584             bds->zn = zn;
2585             return 0;
2586         }
2587         if (zds[zn-1] == yds[yn-1]) q = BDIGMAX;
2588         else q = (BDIGIT)((BIGUP(zds[zn-1]) + zds[zn-2])/yds[yn-1]);
2589         if (q) {
2590             num = bigdivrem_mulsub(zds+zn-(yn+1), yn+1,
2591                                    q,
2592                                    yds, yn);
2593             while (num) { /* "add back" required */
2594                 q--;
2595                 num = bary_add(zds+zn-(yn+1), yn,
2596                                zds+zn-(yn+1), yn,
2597                                yds, yn);
2598                 num--;
2599             }
2600         }
2601         zn--;
2602         zds[zn] = q;
2603     } while (zn > yn);
2604     return 0;
2605 }
2606
2607 /* async-signal-safe */
2608 static void
2609 rb_big_stop(void *ptr)
2610 {
2611     struct big_div_struct *bds = ptr;
2612     bds->stop = Qtrue;
2613 }
2614
2615 static BDIGIT
2616 bigdivrem_single1(BDIGIT *qds, const BDIGIT *xds, size_t xn, BDIGIT x_higher_bdigit, BDIGIT y)
2617 {
2618     assert(0 < xn);
2619     assert(x_higher_bdigit < y);
2620     if (POW2_P(y)) {
2621         BDIGIT r;
2622         r = xds[0] & (y-1);
2623         bary_small_rshift(qds, xds, xn, bit_length(y)-1, x_higher_bdigit);
2624         return r;
2625     }
2626     else {
2627         size_t i;
2628         BDIGIT_DBL t2;
2629         t2 = x_higher_bdigit;
2630         for (i = 0; i < xn; i++) {
2631             t2 = BIGUP(t2) + xds[xn - i - 1];
2632             qds[xn - i - 1] = (BDIGIT)(t2 / y);
2633             t2 %= y;
2634         }
2635         return (BDIGIT)t2;
2636     }
2637 }
2638
2639 static BDIGIT
2640 bigdivrem_single(BDIGIT *qds, const BDIGIT *xds, size_t xn, BDIGIT y)
2641 {
2642     return bigdivrem_single1(qds, xds, xn, 0, y);
2643 }
2644
2645 static void
2646 bigdivrem_restoring(BDIGIT *zds, size_t zn, BDIGIT *yds, size_t yn)
2647 {
2648     struct big_div_struct bds;
2649     size_t ynzero;
2650
2651     assert(yn < zn);
2652     assert(BDIGIT_MSB(yds[yn-1]));
2653     assert(zds[zn-1] < yds[yn-1]);
2654
2655     for (ynzero = 0; !yds[ynzero]; ynzero++);
2656
2657     if (ynzero+1 == yn) {
2658         BDIGIT r;
2659         r = bigdivrem_single1(zds+yn, zds+ynzero, zn-yn, zds[zn-1], yds[ynzero]);
2660         zds[ynzero] = r;
2661         return;
2662     }
2663
2664     bds.yn = yn - ynzero;
2665     bds.zds = zds + ynzero;
2666     bds.yds = yds + ynzero;
2667     bds.stop = Qfalse;
2668     bds.zn = zn - ynzero;
2669     if (bds.zn > 10000 || bds.yn > 10000) {
2670       retry:
2671         bds.stop = Qfalse;
2672         rb_nogvl(bigdivrem1, &bds, rb_big_stop, &bds, RB_NOGVL_UBF_ASYNC_SAFE);
2673
2674         if (bds.stop == Qtrue) {
2675             /* execute trap handler, but exception was not raised. */
2676             goto retry;
2677         }
2678     }
2679     else {
2680         bigdivrem1(&bds);
2681     }
2682 }
2683
2684 static void
2685 bary_divmod_normal(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2686 {
2687     int shift;
2688     BDIGIT *zds, *yyds;
2689     size_t zn;
2690     VALUE tmpyz = 0;
2691
2692     assert(yn < xn || (xn == yn && yds[yn - 1] <= xds[xn - 1]));
2693     assert(qds ? (xn - yn + 1) <= qn : 1);
2694     assert(rds ? yn <= rn : 1);
2695
2696     zn = xn + BIGDIVREM_EXTRA_WORDS;
2697
2698     shift = nlz(yds[yn-1]);
2699     if (shift) {
2700         int alloc_y = !rds;
2701         int alloc_z = !qds || qn < zn;
2702         if (alloc_y && alloc_z) {
2703             yyds = ALLOCV_N(BDIGIT, tmpyz, yn+zn);
2704             zds = yyds + yn;
2705         }
2706         else {
2707             yyds = alloc_y ? ALLOCV_N(BDIGIT, tmpyz, yn) : rds;
2708             zds = alloc_z ? ALLOCV_N(BDIGIT, tmpyz, zn) : qds;
2709         }
2710         zds[xn] = bary_small_lshift(zds, xds, xn, shift);
2711         bary_small_lshift(yyds, yds, yn, shift);
2712     }
2713     else {
2714         if (qds && zn <= qn)
2715             zds = qds;
2716         else
2717             zds = ALLOCV_N(BDIGIT, tmpyz, zn);
2718         MEMCPY(zds, xds, BDIGIT, xn);
2719         zds[xn] = 0;
2720         /* bigdivrem_restoring will not modify y.
2721          * So use yds directly.  */
2722         yyds = (BDIGIT *)yds;
2723     }
2724
2725     bigdivrem_restoring(zds, zn, yyds, yn);
2726
2727     if (rds) {
2728         if (shift)
2729             bary_small_rshift(rds, zds, yn, shift, 0);
2730         else
2731             MEMCPY(rds, zds, BDIGIT, yn);
2732         BDIGITS_ZERO(rds+yn, rn-yn);
2733     }
2734
2735     if (qds) {
2736         size_t j = zn - yn;
2737         MEMMOVE(qds, zds+yn, BDIGIT, j);
2738         BDIGITS_ZERO(qds+j, qn-j);
2739     }
2740
2741     if (tmpyz)
2742         ALLOCV_END(tmpyz);
2743 }
2744
2745 VALUE
2746 rb_big_divrem_normal(VALUE x, VALUE y)
2747 {
2748     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), qn, rn;
2749     BDIGIT *xds = BDIGITS(x), *yds = BDIGITS(y), *qds, *rds;
2750     VALUE q, r;
2751
2752     BARY_TRUNC(yds, yn);
2753     if (yn == 0)
2754         rb_num_zerodiv();
2755     BARY_TRUNC(xds, xn);
2756
2757     if (xn < yn || (xn == yn && xds[xn - 1] < yds[yn - 1]))
2758         return rb_assoc_new(LONG2FIX(0), x);
2759
2760     qn = xn + BIGDIVREM_EXTRA_WORDS;
2761     q = bignew(qn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
2762     qds = BDIGITS(q);
2763
2764     rn = yn;
2765     r = bignew(rn, BIGNUM_SIGN(x));
2766     rds = BDIGITS(r);
2767
2768     bary_divmod_normal(qds, qn, rds, rn, xds, xn, yds, yn);
2769
2770     bigtrunc(q);
2771     bigtrunc(r);
2772
2773     RB_GC_GUARD(x);
2774     RB_GC_GUARD(y);
2775
2776     return rb_assoc_new(q, r);
2777 }
2778
2779 #ifdef USE_GMP
2780 static void
2781 bary_divmod_gmp(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2782 {
2783     mpz_t x, y, q, r;
2784     size_t count;
2785
2786     assert(yn < xn || (xn == yn && yds[yn - 1] <= xds[xn - 1]));
2787     assert(qds ? (xn - yn + 1) <= qn : 1);
2788     assert(rds ? yn <= rn : 1);
2789     assert(qds || rds);
2790
2791     mpz_init(x);
2792     mpz_init(y);
2793     if (qds) mpz_init(q);
2794     if (rds) mpz_init(r);
2795
2796     bdigits_to_mpz(x, xds, xn);
2797     bdigits_to_mpz(y, yds, yn);
2798
2799     if (!rds) {
2800         mpz_fdiv_q(q, x, y);
2801     }
2802     else if (!qds) {
2803         mpz_fdiv_r(r, x, y);
2804     }
2805     else {
2806         mpz_fdiv_qr(q, r, x, y);
2807     }
2808
2809     mpz_clear(x);
2810     mpz_clear(y);
2811
2812     if (qds) {
2813         bdigits_from_mpz(q, qds, &count);
2814         BDIGITS_ZERO(qds+count, qn-count);
2815         mpz_clear(q);
2816     }
2817
2818     if (rds) {
2819         bdigits_from_mpz(r, rds, &count);
2820         BDIGITS_ZERO(rds+count, rn-count);
2821         mpz_clear(r);
2822     }
2823 }
2824
2825 VALUE
2826 rb_big_divrem_gmp(VALUE x, VALUE y)
2827 {
2828     size_t xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y), qn, rn;
2829     BDIGIT *xds = BDIGITS(x), *yds = BDIGITS(y), *qds, *rds;
2830     VALUE q, r;
2831
2832     BARY_TRUNC(yds, yn);
2833     if (yn == 0)
2834         rb_num_zerodiv();
2835     BARY_TRUNC(xds, xn);
2836
2837     if (xn < yn || (xn == yn && xds[xn - 1] < yds[yn - 1]))
2838         return rb_assoc_new(LONG2FIX(0), x);
2839
2840     qn = xn - yn + 1;
2841     q = bignew(qn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
2842     qds = BDIGITS(q);
2843
2844     rn = yn;
2845     r = bignew(rn, BIGNUM_SIGN(x));
2846     rds = BDIGITS(r);
2847
2848     bary_divmod_gmp(qds, qn, rds, rn, xds, xn, yds, yn);
2849
2850     bigtrunc(q);
2851     bigtrunc(r);
2852
2853     RB_GC_GUARD(x);
2854     RB_GC_GUARD(y);
2855
2856     return rb_assoc_new(q, r);
2857 }
2858 #endif
2859
2860 static void
2861 bary_divmod_branch(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2862 {
2863 #ifdef USE_GMP
2864     if (GMP_DIV_DIGITS < xn) {
2865         bary_divmod_gmp(qds, qn, rds, rn, xds, xn, yds, yn);
2866         return;
2867     }
2868 #endif
2869     bary_divmod_normal(qds, qn, rds, rn, xds, xn, yds, yn);
2870 }
2871
2872 static void
2873 bary_divmod(BDIGIT *qds, size_t qn, BDIGIT *rds, size_t rn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn)
2874 {
2875     assert(xn <= qn);
2876     assert(yn <= rn);
2877
2878     BARY_TRUNC(yds, yn);
2879     if (yn == 0)
2880         rb_num_zerodiv();
2881
2882     BARY_TRUNC(xds, xn);
2883     if (xn == 0) {
2884         BDIGITS_ZERO(qds, qn);
2885         BDIGITS_ZERO(rds, rn);
2886         return;
2887     }
2888
2889     if (xn < yn || (xn == yn && xds[xn - 1] < yds[yn - 1])) {
2890         MEMCPY(rds, xds, BDIGIT, xn);
2891         BDIGITS_ZERO(rds+xn, rn-xn);
2892         BDIGITS_ZERO(qds, qn);
2893     }
2894     else if (yn == 1) {
2895         MEMCPY(qds, xds, BDIGIT, xn);
2896         BDIGITS_ZERO(qds+xn, qn-xn);
2897         rds[0] = bigdivrem_single(qds, xds, xn, yds[0]);
2898         BDIGITS_ZERO(rds+1, rn-1);
2899     }
2900     else if (xn == 2 && yn == 2) {
2901         BDIGIT_DBL x = bary2bdigitdbl(xds, 2);
2902         BDIGIT_DBL y = bary2bdigitdbl(yds, 2);
2903         BDIGIT_DBL q = x / y;
2904         BDIGIT_DBL r = x % y;
2905         qds[0] = BIGLO(q);
2906         qds[1] = BIGLO(BIGDN(q));
2907         BDIGITS_ZERO(qds+2, qn-2);
2908         rds[0] = BIGLO(r);
2909         rds[1] = BIGLO(BIGDN(r));
2910         BDIGITS_ZERO(rds+2, rn-2);
2911     }
2912     else {
2913         bary_divmod_branch(qds, qn, rds, rn, xds, xn, yds, yn);
2914     }
2915 }
2916
2917
2918 #ifndef BIGNUM_DEBUG
2919 # define BIGNUM_DEBUG (0+RUBY_DEBUG)
2920 #endif
2921
2922 static int
2923 bigzero_p(VALUE x)
2924 {
2925     return bary_zero_p(BDIGITS(x), BIGNUM_LEN(x));
2926 }
2927
2928 int
2929 rb_bigzero_p(VALUE x)
2930 {
2931     return BIGZEROP(x);
2932 }
2933
2934 int
2935 rb_cmpint(VALUE val, VALUE a, VALUE b)
2936 {
2937     if (NIL_P(val)) {
2938         rb_cmperr(a, b);
2939     }
2940     if (FIXNUM_P(val)) {
2941         long l = FIX2LONG(val);
2942         if (l > 0) return 1;
2943         if (l < 0) return -1;
2944         return 0;
2945     }
2946     if (RB_BIGNUM_TYPE_P(val)) {
2947         if (BIGZEROP(val)) return 0;
2948         if (BIGNUM_SIGN(val)) return 1;
2949         return -1;
2950     }
2951     if (RTEST(rb_funcall(val, '>', 1, INT2FIX(0)))) return 1;
2952     if (RTEST(rb_funcall(val, '<', 1, INT2FIX(0)))) return -1;
2953     return 0;
2954 }
2955
2956 #define BIGNUM_SET_LEN(b,l) \
2957     (BIGNUM_EMBED_P(b) ? \
2958      (void)(RBASIC(b)->flags = \
2959             (RBASIC(b)->flags & ~BIGNUM_EMBED_LEN_MASK) | \
2960             ((l) << BIGNUM_EMBED_LEN_SHIFT)) : \
2961      (void)(RBIGNUM(b)->as.heap.len = (l)))
2962
2963 static void
2964 rb_big_realloc(VALUE big, size_t len)
2965 {
2966     BDIGIT *ds;
2967     if (BIGNUM_EMBED_P(big)) {
2968         if (BIGNUM_EMBED_LEN_MAX < len) {
2969             ds = ALLOC_N(BDIGIT, len);
2970             MEMCPY(ds, RBIGNUM(big)->as.ary, BDIGIT, BIGNUM_EMBED_LEN_MAX);
2971             RBIGNUM(big)->as.heap.len = BIGNUM_LEN(big);
2972             RBIGNUM(big)->as.heap.digits = ds;
2973             FL_UNSET_RAW(big, BIGNUM_EMBED_FLAG);
2974         }
2975     }
2976     else {
2977         if (len <= BIGNUM_EMBED_LEN_MAX) {
2978             ds = RBIGNUM(big)->as.heap.digits;
2979             FL_SET_RAW(big, BIGNUM_EMBED_FLAG);
2980             BIGNUM_SET_LEN(big, len);
2981             (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)RBIGNUM(big)->as.ary, sizeof(RBIGNUM(big)->as.ary));
2982             if (ds) {
2983                 MEMCPY(RBIGNUM(big)->as.ary, ds, BDIGIT, len);
2984                 xfree(ds);
2985             }
2986         }
2987         else {
2988             if (BIGNUM_LEN(big) == 0) {
2989                 RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len);
2990             }
2991             else {
2992                 REALLOC_N(RBIGNUM(big)->as.heap.digits, BDIGIT, len);
2993             }
2994         }
2995     }
2996 }
2997
2998 void
2999 rb_big_resize(VALUE big, size_t len)
3000 {
3001     rb_big_realloc(big, len);
3002     BIGNUM_SET_LEN(big, len);
3003 }
3004
3005 static VALUE
3006 bignew_1(VALUE klass, size_t len, int sign)
3007 {
3008     NEWOBJ_OF(big, struct RBignum, klass, T_BIGNUM | (RGENGC_WB_PROTECTED_BIGNUM ? FL_WB_PROTECTED : 0));
3009     VALUE bigv = (VALUE)big;
3010     BIGNUM_SET_SIGN(bigv, sign);
3011     if (len <= BIGNUM_EMBED_LEN_MAX) {
3012         FL_SET_RAW(bigv, BIGNUM_EMBED_FLAG);
3013         BIGNUM_SET_LEN(bigv, len);
3014         (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)big->as.ary, sizeof(big->as.ary));
3015     }
3016     else {
3017         big->as.heap.digits = ALLOC_N(BDIGIT, len);
3018         big->as.heap.len = len;
3019     }
3020     OBJ_FREEZE(bigv);
3021     return bigv;
3022 }
3023
3024 VALUE
3025 rb_big_new(size_t len, int sign)
3026 {
3027     return bignew(len, sign != 0);
3028 }
3029
3030 VALUE
3031 rb_big_clone(VALUE x)
3032 {
3033     size_t len = BIGNUM_LEN(x);
3034     VALUE z = bignew_1(CLASS_OF(x), len, BIGNUM_SIGN(x));
3035
3036     MEMCPY(BDIGITS(z), BDIGITS(x), BDIGIT, len);
3037     return z;
3038 }
3039
3040 static void
3041 big_extend_carry(VALUE x)
3042 {
3043     rb_big_resize(x, BIGNUM_LEN(x)+1);
3044     BDIGITS(x)[BIGNUM_LEN(x)-1] = 1;
3045 }
3046
3047 /* modify a bignum by 2's complement */
3048 static void
3049 get2comp(VALUE x)
3050 {
3051     long i = BIGNUM_LEN(x);
3052     BDIGIT *ds = BDIGITS(x);
3053
3054     if (bary_2comp(ds, i)) {
3055         big_extend_carry(x);
3056     }
3057 }
3058
3059 void
3060 rb_big_2comp(VALUE x)                   /* get 2's complement */
3061 {
3062     get2comp(x);
3063 }
3064
3065 static BDIGIT
3066 abs2twocomp(VALUE *xp, long *n_ret)
3067 {
3068     VALUE x = *xp;
3069     long n = BIGNUM_LEN(x);
3070     BDIGIT *ds = BDIGITS(x);
3071     BDIGIT hibits = 0;
3072
3073     BARY_TRUNC(ds, n);
3074
3075     if (n != 0 && BIGNUM_NEGATIVE_P(x)) {
3076         VALUE z = bignew_1(CLASS_OF(x), n, 0);
3077         MEMCPY(BDIGITS(z), ds, BDIGIT, n);
3078         bary_2comp(BDIGITS(z), n);
3079         hibits = BDIGMAX;
3080         *xp = z;
3081     }
3082     *n_ret = n;
3083     return hibits;
3084 }
3085
3086 static void
3087 twocomp2abs_bang(VALUE x, int hibits)
3088 {
3089     BIGNUM_SET_SIGN(x, !hibits);
3090     if (hibits) {
3091         get2comp(x);
3092     }
3093 }
3094
3095 static inline VALUE
3096 bigtrunc(VALUE x)
3097 {
3098     size_t len = BIGNUM_LEN(x);
3099     BDIGIT *ds = BDIGITS(x);
3100
3101     if (len == 0) return x;
3102     while (--len && !ds[len]);
3103     if (BIGNUM_LEN(x) > len+1) {
3104         rb_big_resize(x, len+1);
3105     }
3106     return x;
3107 }
3108
3109 static inline VALUE
3110 bigfixize(VALUE x)
3111 {
3112     size_t n = BIGNUM_LEN(x);
3113     BDIGIT *ds = BDIGITS(x);
3114 #if SIZEOF_BDIGIT < SIZEOF_LONG
3115     unsigned long u;
3116 #else
3117     BDIGIT u;
3118 #endif
3119
3120     BARY_TRUNC(ds, n);
3121
3122     if (n == 0) return INT2FIX(0);
3123
3124 #if SIZEOF_BDIGIT < SIZEOF_LONG
3125     if (sizeof(long)/SIZEOF_BDIGIT < n)
3126         goto return_big;
3127     else {
3128         int i = (int)n;
3129         u = 0;
3130         while (i--) {
3131             u = (unsigned long)(BIGUP(u) + ds[i]);
3132         }
3133     }
3134 #else /* SIZEOF_BDIGIT >= SIZEOF_LONG */
3135     if (1 < n)
3136         goto return_big;
3137     else
3138         u = ds[0];
3139 #endif
3140
3141     if (BIGNUM_POSITIVE_P(x)) {
3142         if (POSFIXABLE(u)) return LONG2FIX((long)u);
3143     }
3144     else {
3145         if (u <= -FIXNUM_MIN) return LONG2FIX(-(long)u);
3146     }
3147
3148   return_big:
3149     rb_big_resize(x, n);
3150     return x;
3151 }
3152
3153 static VALUE
3154 bignorm(VALUE x)
3155 {
3156     if (RB_BIGNUM_TYPE_P(x)) {
3157         x = bigfixize(x);
3158     }
3159     return x;
3160 }
3161
3162 VALUE
3163 rb_big_norm(VALUE x)
3164 {
3165     return bignorm(x);
3166 }
3167
3168 VALUE
3169 rb_uint2big(uintptr_t n)
3170 {
3171     long i;
3172     VALUE big = bignew(bdigit_roomof(SIZEOF_VALUE), 1);
3173     BDIGIT *digits = BDIGITS(big);
3174
3175 #if SIZEOF_BDIGIT >= SIZEOF_VALUE
3176     digits[0] = n;
3177 #else
3178     for (i = 0; i < bdigit_roomof(SIZEOF_VALUE); i++) {
3179         digits[i] = BIGLO(n);
3180         n = BIGDN(n);
3181     }
3182 #endif
3183
3184     i = bdigit_roomof(SIZEOF_VALUE);
3185     while (--i && !digits[i]) ;
3186     BIGNUM_SET_LEN(big, i+1);
3187     return big;
3188 }
3189
3190 VALUE
3191 rb_int2big(intptr_t n)
3192 {
3193     long neg = 0;
3194     VALUE u;
3195     VALUE big;
3196
3197     if (n < 0) {
3198         u = 1 + (VALUE)(-(n + 1)); /* u = -n avoiding overflow */
3199         neg = 1;
3200     }
3201     else {
3202         u = n;
3203     }
3204     big = rb_uint2big(u);
3205     if (neg) {
3206         BIGNUM_SET_NEGATIVE_SIGN(big);
3207     }
3208     return big;
3209 }
3210
3211 VALUE
3212 rb_uint2inum(uintptr_t n)
3213 {
3214     if (POSFIXABLE(n)) return LONG2FIX(n);
3215     return rb_uint2big(n);
3216 }
3217
3218 VALUE
3219 rb_int2inum(intptr_t n)
3220 {
3221     if (FIXABLE(n)) return LONG2FIX(n);
3222     return rb_int2big(n);
3223 }
3224
3225 void
3226 rb_big_pack(VALUE val, unsigned long *buf, long num_longs)
3227 {
3228     rb_integer_pack(val, buf, num_longs, sizeof(long), 0,
3229             INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER|
3230             INTEGER_PACK_2COMP);
3231 }
3232
3233 VALUE
3234 rb_big_unpack(unsigned long *buf, long num_longs)
3235 {
3236     return rb_integer_unpack(buf, num_longs, sizeof(long), 0,
3237             INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER|
3238             INTEGER_PACK_2COMP);
3239 }
3240
3241 /*
3242  * Calculate the number of bytes to be required to represent
3243  * the absolute value of the integer given as _val_.
3244  *
3245  * [val] an integer.
3246  * [nlz_bits_ret] number of leading zero bits in the most significant byte is returned if not NULL.
3247  *
3248  * This function returns ((val_numbits * CHAR_BIT + CHAR_BIT - 1) / CHAR_BIT)
3249  * where val_numbits is the number of bits of abs(val).
3250  * This function should not overflow.
3251  *
3252  * If nlz_bits_ret is not NULL,
3253  * (return_value * CHAR_BIT - val_numbits) is stored in *nlz_bits_ret.
3254  * In this case, 0 <= *nlz_bits_ret < CHAR_BIT.
3255  *
3256  */
3257 size_t
3258 rb_absint_size(VALUE val, int *nlz_bits_ret)
3259 {
3260     BDIGIT *dp;
3261     BDIGIT *de;
3262     BDIGIT fixbuf[bdigit_roomof(sizeof(long))];
3263
3264     int num_leading_zeros;
3265
3266     val = rb_to_int(val);
3267
3268     if (FIXNUM_P(val)) {
3269         long v = FIX2LONG(val);
3270         if (v < 0) {
3271             v = -v;
3272         }
3273 #if SIZEOF_BDIGIT >= SIZEOF_LONG
3274         fixbuf[0] = v;
3275 #else
3276         {
3277             int i;
3278             for (i = 0; i < numberof(fixbuf); i++) {
3279                 fixbuf[i] = BIGLO(v);
3280                 v = BIGDN(v);
3281             }
3282         }
3283 #endif
3284         dp = fixbuf;
3285         de = fixbuf + numberof(fixbuf);
3286     }
3287     else {
3288         dp = BDIGITS(val);
3289         de = dp + BIGNUM_LEN(val);
3290     }
3291     while (dp < de && de[-1] == 0)
3292         de--;
3293     if (dp == de) {
3294         if (nlz_bits_ret)
3295             *nlz_bits_ret = 0;
3296         return 0;
3297     }
3298     num_leading_zeros = nlz(de[-1]);
3299     if (nlz_bits_ret)
3300         *nlz_bits_ret = num_leading_zeros % CHAR_BIT;
3301     return (de - dp) * SIZEOF_BDIGIT - num_leading_zeros / CHAR_BIT;
3302 }
3303
3304 static size_t
3305 absint_numwords_small(size_t numbytes, int nlz_bits_in_msbyte, size_t word_numbits, size_t *nlz_bits_ret)
3306 {
3307     size_t val_numbits = numbytes * CHAR_BIT - nlz_bits_in_msbyte;
3308     size_t div = val_numbits / word_numbits;
3309     size_t mod = val_numbits % word_numbits;
3310     size_t numwords;
3311     size_t nlz_bits;
3312     numwords = mod == 0 ? div : div + 1;
3313     nlz_bits = mod == 0 ? 0 : word_numbits - mod;
3314     *nlz_bits_ret = nlz_bits;
3315     return numwords;
3316 }
3317
3318 static size_t
3319 absint_numwords_generic(size_t numbytes, int nlz_bits_in_msbyte, size_t word_numbits, size_t *nlz_bits_ret)
3320 {
3321     static const BDIGIT char_bit[1] = { CHAR_BIT };
3322     BDIGIT numbytes_bary[bdigit_roomof(sizeof(numbytes))];
3323     BDIGIT val_numbits_bary[bdigit_roomof(sizeof(numbytes) + 1)];
3324     BDIGIT nlz_bits_in_msbyte_bary[1];
3325     BDIGIT word_numbits_bary[bdigit_roomof(sizeof(word_numbits))];
3326     BDIGIT div_bary[numberof(val_numbits_bary) + BIGDIVREM_EXTRA_WORDS];
3327     BDIGIT mod_bary[numberof(word_numbits_bary)];
3328     BDIGIT one[1] = { 1 };
3329     size_t nlz_bits;
3330     size_t mod;
3331     int sign;
3332     size_t numwords;
3333
3334     nlz_bits_in_msbyte_bary[0] = nlz_bits_in_msbyte;
3335
3336     /*
3337      * val_numbits = numbytes * CHAR_BIT - nlz_bits_in_msbyte
3338      * div, mod = val_numbits.divmod(word_numbits)
3339      * numwords = mod == 0 ? div : div + 1
3340      * nlz_bits = mod == 0 ? 0 : word_numbits - mod
3341      */
3342
3343     bary_unpack(BARY_ARGS(numbytes_bary), &numbytes, 1, sizeof(numbytes), 0,
3344         INTEGER_PACK_NATIVE_BYTE_ORDER);
3345     BARY_SHORT_MUL(val_numbits_bary, numbytes_bary, char_bit);
3346     if (nlz_bits_in_msbyte)
3347         BARY_SUB(val_numbits_bary, val_numbits_bary, nlz_bits_in_msbyte_bary);
3348     bary_unpack(BARY_ARGS(word_numbits_bary), &word_numbits, 1, sizeof(word_numbits), 0,
3349         INTEGER_PACK_NATIVE_BYTE_ORDER);
3350     BARY_DIVMOD(div_bary, mod_bary, val_numbits_bary, word_numbits_bary);
3351     if (BARY_ZERO_P(mod_bary)) {
3352         nlz_bits = 0;
3353     }
3354     else {
3355         BARY_ADD(div_bary, div_bary, one);
3356         bary_pack(+1, BARY_ARGS(mod_bary), &mod, 1, sizeof(mod), 0,
3357             INTEGER_PACK_NATIVE_BYTE_ORDER);
3358         nlz_bits = word_numbits - mod;
3359     }
3360     sign = bary_pack(+1, BARY_ARGS(div_bary), &numwords, 1, sizeof(numwords), 0,
3361         INTEGER_PACK_NATIVE_BYTE_ORDER);
3362
3363     if (sign == 2) {
3364 #if defined __GNUC__ && (__GNUC__ == 4 && __GNUC_MINOR__ == 4)
3365         *nlz_bits_ret = 0;
3366 #endif
3367         return (size_t)-1;
3368     }
3369     *nlz_bits_ret = nlz_bits;
3370     return numwords;
3371 }
3372
3373 /*
3374  * Calculate the number of words to be required to represent
3375  * the absolute value of the integer given as _val_.
3376  *
3377  * [val] an integer.
3378  * [word_numbits] number of bits in a word.
3379  * [nlz_bits_ret] number of leading zero bits in the most significant word is returned if not NULL.
3380  *
3381  * This function returns ((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits)
3382  * where val_numbits is the number of bits of abs(val).
3383  *
3384  * This function can overflow.
3385  * When overflow occur, (size_t)-1 is returned.
3386  *
3387  * If nlz_bits_ret is not NULL and overflow is not occur,
3388  * (return_value * word_numbits - val_numbits) is stored in *nlz_bits_ret.
3389  * In this case, 0 <= *nlz_bits_ret < word_numbits.
3390  *
3391  */
3392 size_t
3393 rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret)
3394 {
3395     size_t numbytes;
3396     int nlz_bits_in_msbyte;
3397     size_t numwords;
3398     size_t nlz_bits = 0;
3399
3400     if (word_numbits == 0)
3401         return (size_t)-1;
3402
3403     numbytes = rb_absint_size(val, &nlz_bits_in_msbyte);
3404
3405     if (numbytes <= SIZE_MAX / CHAR_BIT) {
3406         numwords = absint_numwords_small(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits);
3407 #ifdef DEBUG_INTEGER_PACK
3408         {
3409             size_t numwords0, nlz_bits0;
3410             numwords0 = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits0);
3411             assert(numwords0 == numwords);
3412             assert(nlz_bits0 == nlz_bits);
3413             (void)numwords0;
3414         }
3415 #endif
3416     }
3417     else {
3418         numwords = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits);
3419     }
3420     if (numwords == (size_t)-1)
3421         return numwords;
3422
3423     if (nlz_bits_ret)
3424         *nlz_bits_ret = nlz_bits;
3425
3426     return numwords;
3427 }
3428
3429 /* Test abs(val) consists only a bit or not.
3430  *
3431  * Returns 1 if abs(val) == 1 << n for some n >= 0.
3432  * Returns 0 otherwise.
3433  *
3434  * rb_absint_singlebit_p can be used to determine required buffer size
3435  * for rb_integer_pack used with INTEGER_PACK_2COMP (two's complement).
3436  *
3437  * Following example calculates number of bits required to
3438  * represent val in two's complement number, without sign bit.
3439  *
3440  *   size_t size;
3441  *   int neg = FIXNUM_P(val) ? FIX2LONG(val) < 0 : BIGNUM_NEGATIVE_P(val);
3442  *   size = rb_absint_numwords(val, 1, NULL)
3443  *   if (size == (size_t)-1) ...overflow...
3444  *   if (neg && rb_absint_singlebit_p(val))
3445  *     size--;
3446  *
3447  * Following example calculates number of bytes required to
3448  * represent val in two's complement number, with sign bit.
3449  *
3450  *   size_t size;
3451  *   int neg = FIXNUM_P(val) ? FIX2LONG(val) < 0 : BIGNUM_NEGATIVE_P(val);
3452  *   int nlz_bits;
3453  *   size = rb_absint_size(val, &nlz_bits);
3454  *   if (nlz_bits == 0 && !(neg && rb_absint_singlebit_p(val)))
3455  *     size++;
3456  */
3457 int
3458 rb_absint_singlebit_p(VALUE val)
3459 {
3460     BDIGIT *dp;
3461     BDIGIT *de;
3462     BDIGIT fixbuf[bdigit_roomof(sizeof(long))];
3463     BDIGIT d;
3464
3465     val = rb_to_int(val);
3466
3467     if (FIXNUM_P(val)) {
3468         long v = FIX2LONG(val);
3469         if (v < 0) {
3470             v = -v;
3471         }
3472 #if SIZEOF_BDIGIT >= SIZEOF_LONG
3473         fixbuf[0] = v;
3474 #else
3475         {
3476             int i;
3477             for (i = 0; i < numberof(fixbuf); i++) {
3478                 fixbuf[i] = BIGLO(v);
3479                 v = BIGDN(v);
3480             }
3481         }
3482 #endif
3483         dp = fixbuf;
3484         de = fixbuf + numberof(fixbuf);
3485     }
3486     else {
3487         dp = BDIGITS(val);
3488         de = dp + BIGNUM_LEN(val);
3489     }
3490     while (dp < de && de[-1] == 0)
3491         de--;
3492     while (dp < de && dp[0] == 0)
3493         dp++;
3494     if (dp == de) /* no bit set. */
3495         return 0;
3496     if (dp != de-1) /* two non-zero words. two bits set, at least. */
3497         return 0;
3498     d = *dp;
3499     return POW2_P(d);
3500 }
3501
3502
3503 /*
3504  * Export an integer into a buffer.
3505  *
3506  * This function fills the buffer specified by _words_ and _numwords_ as
3507  * val in the format specified by _wordsize_, _nails_ and _flags_.
3508  *
3509  * [val] Fixnum, Bignum or another integer like object which has to_int method.
3510  * [words] buffer to export abs(val).
3511  * [numwords] the size of given buffer as number of words.
3512  * [wordsize] the size of word as number of bytes.
3513  * [nails] number of padding bits in a word.
3514  *   Most significant nails bits of each word are filled by zero.
3515  * [flags] bitwise or of constants which name starts "INTEGER_PACK_".
3516  *
3517  * flags:
3518  * [INTEGER_PACK_MSWORD_FIRST] Store the most significant word as the first word.
3519  * [INTEGER_PACK_LSWORD_FIRST] Store the least significant word as the first word.
3520  * [INTEGER_PACK_MSBYTE_FIRST] Store the most significant byte in a word as the first byte in the word.
3521  * [INTEGER_PACK_LSBYTE_FIRST] Store the least significant byte in a word as the first byte in the word.
3522  * [INTEGER_PACK_NATIVE_BYTE_ORDER] INTEGER_PACK_MSBYTE_FIRST or INTEGER_PACK_LSBYTE_FIRST corresponding to the host's endian.
3523  * [INTEGER_PACK_2COMP] Use 2's complement representation.
3524  * [INTEGER_PACK_LITTLE_ENDIAN] Same as INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST
3525  * [INTEGER_PACK_BIG_ENDIAN] Same as INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST
3526  * [INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION] Use generic implementation (for test and debug).
3527  *
3528  * This function fills the buffer specified by _words_
3529  * as abs(val) if INTEGER_PACK_2COMP is not specified in _flags_.
3530  * If INTEGER_PACK_2COMP is specified, 2's complement representation of val is
3531  * filled in the buffer.
3532  *
3533  * This function returns the signedness and overflow condition.
3534  * The overflow condition depends on INTEGER_PACK_2COMP.
3535  *
3536  * INTEGER_PACK_2COMP is not specified:
3537  *   -2 : negative overflow.  val <= -2**(numwords*(wordsize*CHAR_BIT-nails))
3538  *   -1 : negative without overflow.  -2**(numwords*(wordsize*CHAR_BIT-nails)) < val < 0
3539  *   0 : zero.  val == 0
3540  *   1 : positive without overflow.  0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails))
3541  *   2 : positive overflow.  2**(numwords*(wordsize*CHAR_BIT-nails)) <= val
3542  *
3543  * INTEGER_PACK_2COMP is specified:
3544  *   -2 : negative overflow.  val < -2**(numwords*(wordsize*CHAR_BIT-nails))
3545  *   -1 : negative without overflow.  -2**(numwords*(wordsize*CHAR_BIT-nails)) <= val < 0
3546  *   0 : zero.  val == 0
3547  *   1 : positive without overflow.  0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails))
3548  *   2 : positive overflow.  2**(numwords*(wordsize*CHAR_BIT-nails)) <= val
3549  *
3550  * The value, -2**(numwords*(wordsize*CHAR_BIT-nails)), is representable
3551  * in 2's complement representation but not representable in absolute value.
3552  * So -1 is returned for the value if INTEGER_PACK_2COMP is specified
3553  * but returns -2 if INTEGER_PACK_2COMP is not specified.
3554  *
3555  * The least significant words are filled in the buffer when overflow occur.
3556  */
3557
3558 int
3559 rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
3560 {
3561     int sign;
3562     BDIGIT *ds;
3563     size_t num_bdigits;
3564     BDIGIT fixbuf[bdigit_roomof(sizeof(long))];
3565
3566     RB_GC_GUARD(val) = rb_to_int(val);
3567
3568     if (FIXNUM_P(val)) {
3569         long v = FIX2LONG(val);
3570         if (v < 0) {
3571             sign = -1;
3572             v = -v;
3573         }
3574         else {
3575             sign = 1;
3576         }
3577 #if SIZEOF_BDIGIT >= SIZEOF_LONG
3578         fixbuf[0] = v;
3579 #else
3580         {
3581             int i;
3582             for (i = 0; i < numberof(fixbuf); i++) {
3583                 fixbuf[i] = BIGLO(v);
3584                 v = BIGDN(v);
3585             }
3586         }
3587 #endif
3588         ds = fixbuf;
3589         num_bdigits = numberof(fixbuf);
3590     }
3591     else {
3592         sign = BIGNUM_POSITIVE_P(val) ? 1 : -1;
3593         ds = BDIGITS(val);
3594         num_bdigits = BIGNUM_LEN(val);
3595     }
3596
3597     return bary_pack(sign, ds, num_bdigits, words, numwords, wordsize, nails, flags);
3598 }
3599
3600 /*
3601  * Import an integer from a buffer.
3602  *
3603  * [words] buffer to import.
3604  * [numwords] the size of given buffer as number of words.
3605  * [wordsize] the size of word as number of bytes.
3606  * [nails] number of padding bits in a word.
3607  *   Most significant nails bits of each word are ignored.
3608  * [flags] bitwise or of constants which name starts "INTEGER_PACK_".
3609  *
3610  * flags:
3611  * [INTEGER_PACK_MSWORD_FIRST] Interpret the first word as the most significant word.
3612  * [INTEGER_PACK_LSWORD_FIRST] Interpret the first word as the least significant word.
3613  * [INTEGER_PACK_MSBYTE_FIRST] Interpret the first byte in a word as the most significant byte in the word.
3614  * [INTEGER_PACK_LSBYTE_FIRST] Interpret the first byte in a word as the least significant byte in the word.
3615  * [INTEGER_PACK_NATIVE_BYTE_ORDER] INTEGER_PACK_MSBYTE_FIRST or INTEGER_PACK_LSBYTE_FIRST corresponding to the host's endian.
3616  * [INTEGER_PACK_2COMP] Use 2's complement representation.
3617  * [INTEGER_PACK_LITTLE_ENDIAN] Same as INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST
3618  * [INTEGER_PACK_BIG_ENDIAN] Same as INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST
3619  * [INTEGER_PACK_FORCE_BIGNUM] the result will be a Bignum
3620  *   even if it is representable as a Fixnum.
3621  * [INTEGER_PACK_NEGATIVE] Returns non-positive value.
3622  *   (Returns non-negative value if not specified.)
3623  * [INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION] Use generic implementation (for test and debug).
3624  *
3625  * This function returns the imported integer as Fixnum or Bignum.
3626  *
3627  * The range of the result value depends on INTEGER_PACK_2COMP and INTEGER_PACK_NEGATIVE.
3628  *
3629  * INTEGER_PACK_2COMP is not set:
3630  *   0 <= val < 2**(numwords*(wordsize*CHAR_BIT-nails)) if !INTEGER_PACK_NEGATIVE
3631  *   -2**(numwords*(wordsize*CHAR_BIT-nails)) < val <= 0 if INTEGER_PACK_NEGATIVE
3632  *
3633  * INTEGER_PACK_2COMP is set:
3634  *   -2**(numwords*(wordsize*CHAR_BIT-nails)-1) <= val <= 2**(numwords*(wordsize*CHAR_BIT-nails)-1)-1 if !INTEGER_PACK_NEGATIVE
3635  *   -2**(numwords*(wordsize*CHAR_BIT-nails)) <= val <= -1 if INTEGER_PACK_NEGATIVE
3636  *
3637  * INTEGER_PACK_2COMP without INTEGER_PACK_NEGATIVE means sign extension.
3638  * INTEGER_PACK_2COMP with INTEGER_PACK_NEGATIVE mean assuming the higher bits are 1.
3639  *
3640  * Note that this function returns 0 when numwords is zero and
3641  * INTEGER_PACK_2COMP is set but INTEGER_PACK_NEGATIVE is not set.
3642  */
3643
3644 VALUE
3645 rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
3646 {
3647     VALUE val;
3648     size_t num_bdigits;
3649     int sign;
3650     int nlp_bits;
3651     BDIGIT *ds;
3652     BDIGIT fixbuf[2] = { 0, 0 };
3653
3654     validate_integer_pack_format(numwords, wordsize, nails, flags,
3655             INTEGER_PACK_MSWORD_FIRST|
3656             INTEGER_PACK_LSWORD_FIRST|
3657             INTEGER_PACK_MSBYTE_FIRST|
3658             INTEGER_PACK_LSBYTE_FIRST|
3659             INTEGER_PACK_NATIVE_BYTE_ORDER|
3660             INTEGER_PACK_2COMP|
3661             INTEGER_PACK_FORCE_BIGNUM|
3662             INTEGER_PACK_NEGATIVE|
3663             INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION);
3664
3665     num_bdigits = integer_unpack_num_bdigits(numwords, wordsize, nails, &nlp_bits);
3666
3667     if (LONG_MAX-1 < num_bdigits)
3668         rb_raise(rb_eArgError, "too big to unpack as an integer");
3669     if (num_bdigits <= numberof(fixbuf) && !(flags & INTEGER_PACK_FORCE_BIGNUM)) {
3670         val = Qfalse;
3671         ds = fixbuf;
3672     }
3673     else {
3674         val = bignew((long)num_bdigits, 0);
3675         ds = BDIGITS(val);
3676     }
3677     sign = bary_unpack_internal(ds, num_bdigits, words, numwords, wordsize, nails, flags, nlp_bits);
3678
3679     if (sign == -2) {
3680         if (val) {
3681             big_extend_carry(val);
3682         }
3683         else if (num_bdigits == numberof(fixbuf)) {
3684             val = bignew((long)num_bdigits+1, 0);
3685             MEMCPY(BDIGITS(val), fixbuf, BDIGIT, num_bdigits);
3686             BDIGITS(val)[num_bdigits++] = 1;
3687         }
3688         else {
3689             ds[num_bdigits++] = 1;
3690         }
3691     }
3692
3693     if (!val) {
3694         BDIGIT_DBL u = fixbuf[0] + BIGUP(fixbuf[1]);
3695         if (u == 0)
3696             return LONG2FIX(0);
3697         if (0 < sign && POSFIXABLE(u))
3698             return LONG2FIX((long)u);
3699         if (sign < 0 && BDIGIT_MSB(fixbuf[1]) == 0 &&
3700                 NEGFIXABLE(-(BDIGIT_DBL_SIGNED)u))
3701             return LONG2FIX((long)-(BDIGIT_DBL_SIGNED)u);
3702         val = bignew((long)num_bdigits, 0 <= sign);
3703         MEMCPY(BDIGITS(val), fixbuf, BDIGIT, num_bdigits);
3704     }
3705
3706     if ((flags & INTEGER_PACK_FORCE_BIGNUM) && sign != 0 &&
3707         bary_zero_p(BDIGITS(val), BIGNUM_LEN(val)))
3708         sign = 0;
3709     BIGNUM_SET_SIGN(val, 0 <= sign);
3710
3711     if (flags & INTEGER_PACK_FORCE_BIGNUM)
3712         return bigtrunc(val);
3713     return bignorm(val);
3714 }
3715
3716 #define conv_digit(c) (ruby_digit36_to_number_table[(unsigned char)(c)])
3717
3718 NORETURN(static inline void invalid_radix(int base));
3719 NORETURN(static inline void invalid_integer(VALUE s));
3720
3721 static inline int
3722 valid_radix_p(int base)
3723 {
3724     return (1 < base && base <= 36);
3725 }
3726
3727 static inline void
3728 invalid_radix(int base)
3729 {
3730     rb_raise(rb_eArgError, "invalid radix %d", base);
3731 }
3732
3733 static inline void
3734 invalid_integer(VALUE s)
3735 {
3736     rb_raise(rb_eArgError, "invalid value for Integer(): %+"PRIsVALUE, s);
3737 }
3738
3739 static int
3740 str2big_scan_digits(const char *s, const char *str, int base, int badcheck, size_t *num_digits_p, ssize_t *len_p)
3741 {
3742     char nondigit = 0;
3743     size_t num_digits = 0;
3744     const char *digits_start = str;
3745     const char *digits_end = str;
3746     ssize_t len = *len_p;
3747
3748     int c;
3749
3750     if (!len) {
3751         *num_digits_p = 0;
3752         *len_p = 0;
3753         return TRUE;
3754     }
3755
3756     if (badcheck && *str == '_') return FALSE;
3757
3758     while ((c = *str++) != 0) {
3759         if (c == '_') {
3760             if (nondigit) {
3761                 if (badcheck) return FALSE;
3762                 break;
3763             }
3764             nondigit = (char) c;
3765         }
3766         else if ((c = conv_digit(c)) < 0 || c >= base) {
3767             break;
3768         }
3769         else {
3770             nondigit = 0;
3771             num_digits++;
3772             digits_end = str;
3773         }
3774         if (len > 0 && !--len) break;
3775     }
3776     if (badcheck && nondigit) return FALSE;
3777     if (badcheck && len) {
3778         str--;
3779         while (*str && ISSPACE(*str)) {
3780             str++;
3781             if (len > 0 && !--len) break;
3782         }
3783         if (len && *str) {
3784             return FALSE;
3785         }
3786     }
3787     *num_digits_p = num_digits;
3788     *len_p = digits_end - digits_start;
3789     return TRUE;
3790 }
3791
3792 static VALUE
3793 str2big_poweroftwo(
3794     int sign,
3795     const char *digits_start,
3796     const char *digits_end,
3797     size_t num_digits,
3798     int bits_per_digit)
3799 {
3800     BDIGIT *dp;
3801     BDIGIT_DBL dd;
3802     int numbits;
3803
3804     size_t num_bdigits;
3805     const char *p;
3806     int c;
3807     VALUE z;
3808
3809     num_bdigits = (num_digits / BITSPERDIG) * bits_per_digit + roomof((num_digits % BITSPERDIG) * bits_per_digit, BITSPERDIG);
3810     z = bignew(num_bdigits, sign);
3811     dp = BDIGITS(z);
3812     dd = 0;
3813     numbits = 0;
3814     for (p = digits_end; digits_start < p; p--) {
3815         if ((c = conv_digit(p[-1])) < 0)
3816             continue;
3817         dd |= (BDIGIT_DBL)c << numbits;
3818         numbits += bits_per_digit;
3819         if (BITSPERDIG <= numbits) {
3820             *dp++ = BIGLO(dd);
3821             dd = BIGDN(dd);
3822             numbits -= BITSPERDIG;
3823         }
3824     }
3825     if (numbits) {
3826         *dp++ = BIGLO(dd);
3827     }
3828     assert((size_t)(dp - BDIGITS(z)) == num_bdigits);
3829
3830     return z;
3831 }
3832
3833 static VALUE
3834 str2big_normal(
3835     int sign,
3836     const char *digits_start,
3837     const char *digits_end,
3838     size_t num_bdigits,
3839     int base)
3840 {
3841     size_t blen = 1;
3842     BDIGIT *zds;
3843     BDIGIT_DBL num;
3844
3845     size_t i;
3846     const char *p;
3847     int c;
3848     VALUE z;
3849
3850     z = bignew(num_bdigits, sign);
3851     zds = BDIGITS(z);
3852     BDIGITS_ZERO(zds, num_bdigits);
3853
3854     for (p = digits_start; p < digits_end; p++) {
3855         if ((c = conv_digit(*p)) < 0)
3856             continue;
3857         num = c;
3858         i = 0;
3859         for (;;) {
3860             while (i<blen) {
3861                 num += (BDIGIT_DBL)zds[i]*base;
3862                 zds[i++] = BIGLO(num);
3863                 num = BIGDN(num);
3864             }
3865             if (num) {
3866                 blen++;
3867                 continue;
3868             }
3869             break;
3870         }
3871         assert(blen <= num_bdigits);
3872     }
3873
3874     return z;
3875 }
3876
3877 static VALUE
3878 str2big_karatsuba(
3879     int sign,
3880     const char *digits_start,
3881     const char *digits_end,
3882     size_t num_digits,
3883     size_t num_bdigits,
3884     int digits_per_bdigits_dbl,
3885     int base)
3886 {
3887     VALUE powerv;
3888     size_t unit;
3889     VALUE tmpuv = 0;
3890     BDIGIT *uds, *vds, *tds;
3891     BDIGIT_DBL dd;
3892     BDIGIT_DBL current_base;
3893     int m;
3894     int power_level = 0;
3895
3896     size_t i;
3897     const char *p;
3898     int c;
3899     VALUE z;
3900
3901     uds = ALLOCV_N(BDIGIT, tmpuv, 2*num_bdigits);
3902     vds = uds + num_bdigits;
3903
3904     powerv = power_cache_get_power(base, power_level, NULL);
3905
3906     i = 0;
3907     dd = 0;
3908     current_base = 1;
3909     m = digits_per_bdigits_dbl;
3910     if (num_digits < (size_t)m)
3911         m = (int)num_digits;
3912     for (p = digits_end; digits_start < p; p--) {
3913         if ((c = conv_digit(p[-1])) < 0)
3914             continue;
3915         dd = dd + c * current_base;
3916         current_base *= base;
3917         num_digits--;
3918         m--;
3919         if (m == 0) {
3920             uds[i++] = BIGLO(dd);
3921             uds[i++] = (BDIGIT)BIGDN(dd);
3922             dd = 0;
3923             m = digits_per_bdigits_dbl;
3924             if (num_digits < (size_t)m)
3925                 m = (int)num_digits;
3926             current_base = 1;
3927         }
3928     }
3929     assert(i == num_bdigits);
3930     for (unit = 2; unit < num_bdigits; unit *= 2) {
3931         for (i = 0; i < num_bdigits; i += unit*2) {
3932             if (2*unit <= num_bdigits - i) {
3933                 bary_mul(vds+i, unit*2, BDIGITS(powerv), BIGNUM_LEN(powerv), uds+i+unit, unit);
3934                 bary_add(vds+i, unit*2, vds+i, unit*2, uds+i, unit);
3935             }
3936             else if (unit <= num_bdigits - i) {
3937                 bary_mul(vds+i, num_bdigits-i, BDIGITS(powerv), BIGNUM_LEN(powerv), uds+i+unit, num_bdigits-(i+unit));
3938                 bary_add(vds+i, num_bdigits-i, vds+i, num_bdigits-i, uds+i, unit);
3939             }
3940             else {
3941                 MEMCPY(vds+i, uds+i, BDIGIT, num_bdigits-i);
3942             }
3943         }
3944         power_level++;
3945         powerv = power_cache_get_power(base, power_level, NULL);
3946         tds = vds;
3947         vds = uds;
3948         uds = tds;
3949     }
3950     BARY_TRUNC(uds, num_bdigits);
3951     z = bignew(num_bdigits, sign);
3952     MEMCPY(BDIGITS(z), uds, BDIGIT, num_bdigits);
3953
3954     if (tmpuv)
3955         ALLOCV_END(tmpuv);
3956
3957     return z;
3958 }
3959
3960 #ifdef USE_GMP
3961 static VALUE
3962 str2big_gmp(
3963     int sign,
3964     const char *digits_start,
3965     const char *digits_end,
3966     size_t num_digits,
3967     size_t num_bdigits,
3968     int base)
3969 {
3970     char *buf, *p;
3971     const char *q;
3972     VALUE tmps;
3973     mpz_t mz;
3974     VALUE z;
3975     BDIGIT *zds;
3976     size_t zn, count;
3977
3978     buf = ALLOCV_N(char, tmps, num_digits+1);
3979     p = buf;
3980     for (q = digits_start; q < digits_end; q++) {
3981         if (conv_digit(*q) < 0)
3982             continue;
3983         *p++ = *q;
3984     }
3985     *p = '\0';
3986
3987     mpz_init(mz);
3988     mpz_set_str(mz, buf, base);
3989     zn = num_bdigits;
3990     z = bignew(zn, sign);
3991     zds = BDIGITS(z);
3992     bdigits_from_mpz(mz, BDIGITS(z), &count);
3993     BDIGITS_ZERO(zds+count, zn-count);
3994     mpz_clear(mz);
3995
3996     if (tmps)
3997         ALLOCV_END(tmps);
3998
3999     return z;
4000 }
4001 #endif
4002
4003 static VALUE rb_cstr_parse_inum(const char *str, ssize_t len, char **endp, int base);
4004
4005 /*
4006  * Parse +str+ as Ruby Integer, i.e., underscores, 0d and 0b prefixes.
4007  *
4008  * str:      pointer to the string to be parsed.
4009  *           should be NUL-terminated.
4010  * base:     base of conversion, must be 2..36, or -36..0.
4011  *           if +base+ > 0, the conversion is done according to the +base+
4012  *           and unmatched prefix is parsed as a part of the result if
4013  *           present.
4014  *           if +base+ <= 0, the conversion is done according to the
4015  *           prefix if present, in base <code>-base</code> if +base+ < -1,
4016  *           or in base 10.
4017  * badcheck: if non-zero, +ArgumentError+ is raised when +str+ is not
4018  *           valid as an Integer.  if zero, Fixnum 0 is returned in
4019  *           that case.
4020  */
4021 VALUE
4022 rb_cstr_to_inum(const char *str, int base, int badcheck)
4023 {
4024     char *end;
4025     VALUE ret = rb_cstr_parse_inum(str, -1, (badcheck ? NULL : &end), base);
4026     if (NIL_P(ret)) {
4027         if (badcheck) rb_invalid_str(str, "Integer()");
4028         ret = INT2FIX(0);
4029     }
4030     return ret;
4031 }
4032
4033 /*
4034  * Parse +str+ as Ruby Integer, i.e., underscores, 0d and 0b prefixes.
4035  *
4036  * str:  pointer to the string to be parsed.
4037  *       should be NUL-terminated if +len+ is negative.
4038  * len:  length of +str+ if >= 0.  if +len+ is negative, +str+ should
4039  *       be NUL-terminated.
4040  * endp: if non-NULL, the address after parsed part is stored.  if
4041  *       NULL, Qnil is returned when +str+ is not valid as an Integer.
4042  * ndigits: if non-NULL, the number of parsed digits is stored.
4043  * base: see +rb_cstr_to_inum+
4044  * flags: bitwise OR of below flags:
4045  *       RB_INT_PARSE_SIGN: allow preceding spaces and +/- sign
4046  *       RB_INT_PARSE_UNDERSCORE: allow an underscore between digits
4047  *       RB_INT_PARSE_PREFIX: allow preceding prefix
4048  */
4049
4050 VALUE
4051 rb_int_parse_cstr(const char *str, ssize_t len, char **endp, size_t *ndigits,
4052                   int base, int flags)
4053 {
4054     const char *const s = str;
4055     char sign = 1;
4056     int c;
4057     VALUE z = Qnil;
4058
4059     unsigned long val;
4060     int ov;
4061
4062     const char *digits_start, *digits_end;
4063     size_t num_digits = 0;
4064     size_t num_bdigits;
4065     const ssize_t len0 = len;
4066     const int badcheck = !endp;
4067
4068 #define ADV(n) do {\
4069         if (len > 0 && len <= (n)) goto bad; \
4070         str += (n); \
4071         len -= (n); \
4072     } while (0)
4073 #define ASSERT_LEN() do {\
4074         assert(len != 0); \
4075         if (len0 >= 0) assert(s + len0 == str + len); \
4076     } while (0)
4077
4078     if (!str) {
4079         goto bad;
4080     }
4081     if (len && (flags & RB_INT_PARSE_SIGN)) {
4082         while (ISSPACE(*str)) ADV(1);
4083
4084         if (str[0] == '+') {
4085             ADV(1);
4086         }
4087         else if (str[0] == '-') {
4088             ADV(1);
4089             sign = 0;
4090         }
4091         ASSERT_LEN();
4092     }
4093     if (base <= 0) {
4094         if (str[0] == '0' && len > 1) {
4095             switch (str[1]) {
4096               case 'x': case 'X':
4097                 base = 16;
4098                 ADV(2);
4099                 break;
4100               case 'b': case 'B':
4101                 base = 2;
4102                 ADV(2);
4103                 break;
4104               case 'o': case 'O':
4105                 base = 8;
4106                 ADV(2);
4107                 break;
4108               case 'd': case 'D':
4109                 base = 10;
4110                 ADV(2);
4111                 break;
4112               default:
4113                 base = 8;
4114             }
4115         }
4116         else if (base < -1) {
4117             base = -base;
4118         }
4119         else {
4120             base = 10;
4121         }
4122     }
4123     else if (len == 1 || !(flags & RB_INT_PARSE_PREFIX)) {
4124         /* no prefix */
4125     }
4126     else if (base == 2) {
4127         if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
4128             ADV(2);
4129         }
4130     }
4131     else if (base == 8) {
4132         if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
4133             ADV(2);
4134         }
4135     }
4136     else if (base == 10) {
4137         if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
4138             ADV(2);
4139         }
4140     }
4141     else if (base == 16) {
4142         if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
4143             ADV(2);
4144         }
4145     }
4146     if (!valid_radix_p(base)) {
4147         invalid_radix(base);
4148     }
4149     if (!len) goto bad;
4150     num_digits = str - s;
4151     if (*str == '0' && len != 1) { /* squeeze preceding 0s */
4152         int us = 0;
4153         const char *end = len < 0 ? NULL : str + len;
4154         ++num_digits;
4155         while ((c = *++str) == '0' ||
4156                ((flags & RB_INT_PARSE_UNDERSCORE) && c == '_')) {
4157             if (c == '_') {
4158                 if (++us >= 2)
4159                     break;
4160             }
4161             else {
4162                 ++num_digits;
4163                 us = 0;
4164             }
4165             if (str == end) break;
4166         }
4167         if (!c || ISSPACE(c)) --str;
4168         if (end) len = end - str;
4169         ASSERT_LEN();
4170     }
4171     c = *str;
4172     c = conv_digit(c);
4173     if (c < 0 || c >= base) {
4174         if (!badcheck && num_digits) z = INT2FIX(0);
4175         goto bad;
4176     }
4177
4178     if (ndigits) *ndigits = num_digits;
4179     val = ruby_scan_digits(str, len, base, &num_digits, &ov);
4180     if (!ov) {
4181         const char *end = &str[num_digits];
4182         if (num_digits > 0 && *end == '_' && (flags & RB_INT_PARSE_UNDERSCORE))
4183             goto bigparse;
4184         if (endp) *endp = (char *)end;
4185         if (ndigits) *ndigits += num_digits;
4186         if (badcheck) {
4187             if (num_digits == 0) return Qnil; /* no number */
4188             while (len < 0 ? *end : end < str + len) {
4189                 if (!ISSPACE(*end)) return Qnil; /* trailing garbage */
4190                 end++;
4191             }
4192         }
4193
4194         if (POSFIXABLE(val)) {
4195             if (sign) return LONG2FIX(val);
4196             else {
4197                 long result = -(long)val;
4198                 return LONG2FIX(result);
4199             }
4200         }
4201         else {
4202             VALUE big = rb_uint2big(val);
4203             BIGNUM_SET_SIGN(big, sign);
4204             return bignorm(big);
4205         }
4206     }
4207
4208   bigparse:
4209     digits_start = str;
4210     if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
4211         goto bad;
4212     if (endp) *endp = (char *)(str + len);
4213     if (ndigits) *ndigits += num_digits;
4214     digits_end = digits_start + len;
4215
4216     if (POW2_P(base)) {
4217         z = str2big_poweroftwo(sign, digits_start, digits_end, num_digits,
4218                                bit_length(base-1));
4219     }
4220     else {
4221         int digits_per_bdigits_dbl;
4222         maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
4223         num_bdigits = roomof(num_digits, digits_per_bdigits_dbl)*2;
4224
4225 #ifdef USE_GMP
4226         if (GMP_STR2BIG_DIGITS < num_bdigits) {
4227             z = str2big_gmp(sign, digits_start, digits_end, num_digits,
4228                     num_bdigits, base);
4229         }
4230         else
4231 #endif
4232         if (num_bdigits < KARATSUBA_MUL_DIGITS) {
4233             z = str2big_normal(sign, digits_start, digits_end,
4234                     num_bdigits, base);
4235         }
4236         else {
4237             z = str2big_karatsuba(sign, digits_start, digits_end, num_digits,
4238                     num_bdigits, digits_per_bdigits_dbl, base);
4239         }
4240     }
4241
4242     return bignorm(z);
4243
4244   bad:
4245     if (endp) *endp = (char *)str;
4246     if (ndigits) *ndigits = num_digits;
4247     return z;
4248 }
4249
4250 static VALUE
4251 rb_cstr_parse_inum(const char *str, ssize_t len, char **endp, int base)
4252 {
4253     return rb_int_parse_cstr(str, len, endp, NULL, base,
4254                              RB_INT_PARSE_DEFAULT);
4255 }
4256
4257 VALUE
4258 rb_str_convert_to_inum(VALUE str, int base, int badcheck, int raise_exception)
4259 {
4260     VALUE ret;
4261     const char *s;
4262     long len;
4263     char *end;
4264
4265     StringValue(str);
4266     rb_must_asciicompat(str);
4267     RSTRING_GETMEM(str, s, len);
4268     ret = rb_cstr_parse_inum(s, len, (badcheck ? NULL : &end), base);
4269     if (NIL_P(ret)) {
4270         if (badcheck) {
4271             if (!raise_exception) return Qnil;
4272             invalid_integer(str);
4273         }
4274         ret = INT2FIX(0);
4275     }
4276     return ret;
4277 }
4278
4279 VALUE
4280 rb_str_to_inum(VALUE str, int base, int badcheck)
4281 {
4282     return rb_str_convert_to_inum(str, base, badcheck, TRUE);
4283 }
4284
4285 VALUE
4286 rb_str2big_poweroftwo(VALUE arg, int base, int badcheck)
4287 {
4288     int positive_p = 1;
4289     const char *s, *str;
4290     const char *digits_start, *digits_end;
4291     size_t num_digits;
4292     ssize_t len;
4293     VALUE z;
4294
4295     if (!valid_radix_p(base) || !POW2_P(base)) {
4296         invalid_radix(base);
4297     }
4298
4299     rb_must_asciicompat(arg);
4300     s = str = StringValueCStr(arg);
4301     len = RSTRING_LEN(arg);
4302     if (*str == '-') {
4303         len--;
4304         str++;
4305         positive_p = 0;
4306     }
4307
4308     digits_start = str;
4309     if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
4310         invalid_integer(arg);
4311     digits_end = digits_start + len;
4312
4313     z = str2big_poweroftwo(positive_p, digits_start, digits_end, num_digits,
4314             bit_length(base-1));
4315
4316     RB_GC_GUARD(arg);
4317
4318     return bignorm(z);
4319 }
4320
4321 VALUE
4322 rb_str2big_normal(VALUE arg, int base, int badcheck)
4323 {
4324     int positive_p = 1;
4325     const char *s, *str;
4326     const char *digits_start, *digits_end;
4327     size_t num_digits;
4328     ssize_t len;
4329     VALUE z;
4330
4331     int digits_per_bdigits_dbl;
4332     size_t num_bdigits;
4333
4334     if (!valid_radix_p(base)) {
4335         invalid_radix(base);
4336     }
4337
4338     rb_must_asciicompat(arg);
4339     s = str = StringValuePtr(arg);
4340     len = RSTRING_LEN(arg);
4341     if (len > 0 && *str == '-') {
4342         len--;
4343         str++;
4344         positive_p = 0;
4345     }
4346
4347     digits_start = str;
4348     if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
4349         invalid_integer(arg);
4350     digits_end = digits_start + len;
4351
4352     maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
4353     num_bdigits = roomof(num_digits, digits_per_bdigits_dbl)*2;
4354
4355     z = str2big_normal(positive_p, digits_start, digits_end,
4356             num_bdigits, base);
4357
4358     RB_GC_GUARD(arg);
4359
4360     return bignorm(z);
4361 }
4362
4363 VALUE
4364 rb_str2big_karatsuba(VALUE arg, int base, int badcheck)
4365 {
4366     int positive_p = 1;
4367     const char *s, *str;
4368     const char *digits_start, *digits_end;
4369     size_t num_digits;
4370     ssize_t len;
4371     VALUE z;
4372
4373     int digits_per_bdigits_dbl;
4374     size_t num_bdigits;
4375
4376     if (!valid_radix_p(base)) {
4377         invalid_radix(base);
4378     }
4379
4380     rb_must_asciicompat(arg);
4381     s = str = StringValuePtr(arg);
4382     len = RSTRING_LEN(arg);
4383     if (len > 0 && *str == '-') {
4384         len--;
4385         str++;
4386         positive_p = 0;
4387     }
4388
4389     digits_start = str;
4390     if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
4391         invalid_integer(arg);
4392     digits_end = digits_start + len;
4393
4394     maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
4395     num_bdigits = roomof(num_digits, digits_per_bdigits_dbl)*2;
4396
4397     z = str2big_karatsuba(positive_p, digits_start, digits_end, num_digits,
4398             num_bdigits, digits_per_bdigits_dbl, base);
4399
4400     RB_GC_GUARD(arg);
4401
4402     return bignorm(z);
4403 }
4404
4405 #ifdef USE_GMP
4406 VALUE
4407 rb_str2big_gmp(VALUE arg, int base, int badcheck)
4408 {
4409     int positive_p = 1;
4410     const char *s, *str;
4411     const char *digits_start, *digits_end;
4412     size_t num_digits;
4413     ssize_t len;
4414     VALUE z;
4415
4416     int digits_per_bdigits_dbl;
4417     size_t num_bdigits;
4418
4419     if (!valid_radix_p(base)) {
4420         invalid_radix(base);
4421     }
4422
4423     rb_must_asciicompat(arg);
4424     s = str = StringValuePtr(arg);
4425     len = RSTRING_LEN(arg);
4426     if (len > 0 && *str == '-') {
4427         len--;
4428         str++;
4429         positive_p = 0;
4430     }
4431
4432     digits_start = str;
4433     if (!str2big_scan_digits(s, str, base, badcheck, &num_digits, &len))
4434         invalid_integer(arg);
4435     digits_end = digits_start + len;
4436
4437     maxpow_in_bdigit_dbl(base, &digits_per_bdigits_dbl);
4438     num_bdigits = roomof(num_digits, digits_per_bdigits_dbl)*2;
4439
4440     z = str2big_gmp(positive_p, digits_start, digits_end, num_digits, num_bdigits, base);
4441
4442     RB_GC_GUARD(arg);
4443
4444     return bignorm(z);
4445 }
4446 #endif
4447
4448 #if HAVE_LONG_LONG
4449
4450 static VALUE
4451 rb_ull2big(unsigned LONG_LONG n)
4452 {
4453     long i;
4454     VALUE big = bignew(bdigit_roomof(SIZEOF_LONG_LONG), 1);
4455     BDIGIT *digits = BDIGITS(big);
4456
4457 #if SIZEOF_BDIGIT >= SIZEOF_LONG_LONG
4458     digits[0] = n;
4459 #else
4460     for (i = 0; i < bdigit_roomof(SIZEOF_LONG_LONG); i++) {
4461         digits[i] = BIGLO(n);
4462         n = BIGDN(n);
4463     }
4464 #endif
4465
4466     i = bdigit_roomof(SIZEOF_LONG_LONG);
4467     while (i-- && !digits[i]) ;
4468     BIGNUM_SET_LEN(big, i+1);
4469     return big;
4470 }
4471
4472 static VALUE
4473 rb_ll2big(LONG_LONG n)
4474 {
4475     long neg = 0;
4476     unsigned LONG_LONG u;
4477     VALUE big;
4478
4479     if (n < 0) {
4480         u = 1 + (unsigned LONG_LONG)(-(n + 1)); /* u = -n avoiding overflow */
4481         neg = 1;
4482     }
4483     else {
4484         u = n;
4485     }
4486     big = rb_ull2big(u);
4487     if (neg) {
4488         BIGNUM_SET_NEGATIVE_SIGN(big);
4489     }
4490     return big;
4491 }
4492
4493 VALUE
4494 rb_ull2inum(unsigned LONG_LONG n)
4495 {
4496     if (POSFIXABLE(n)) return LONG2FIX((long)n);
4497     return rb_ull2big(n);
4498 }
4499
4500 VALUE
4501 rb_ll2inum(LONG_LONG n)
4502 {
4503     if (FIXABLE(n)) return LONG2FIX((long)n);
4504     return rb_ll2big(n);
4505 }
4506
4507 #endif  /* HAVE_LONG_LONG */
4508
4509 #ifdef HAVE_INT128_T
4510 static VALUE
4511 rb_uint128t2big(uint128_t n)
4512 {
4513     long i;
4514     VALUE big = bignew(bdigit_roomof(SIZEOF_INT128_T), 1);
4515     BDIGIT *digits = BDIGITS(big);
4516
4517     for (i = 0; i < bdigit_roomof(SIZEOF_INT128_T); i++) {
4518         digits[i] = BIGLO(RSHIFT(n ,BITSPERDIG*i));
4519     }
4520
4521     i = bdigit_roomof(SIZEOF_INT128_T);
4522     while (i-- && !digits[i]) ;
4523     BIGNUM_SET_LEN(big, i+1);
4524     return big;
4525 }
4526
4527 MJIT_FUNC_EXPORTED VALUE
4528 rb_int128t2big(int128_t n)
4529 {
4530     int neg = 0;
4531     uint128_t u;
4532     VALUE big;
4533
4534     if (n < 0) {
4535         u = 1 + (uint128_t)(-(n + 1)); /* u = -n avoiding overflow */
4536         neg = 1;
4537     }
4538     else {
4539         u = n;
4540     }
4541     big = rb_uint128t2big(u);
4542     if (neg) {
4543         BIGNUM_SET_NEGATIVE_SIGN(big);
4544     }
4545     return big;
4546 }
4547 #endif
4548
4549 VALUE
4550 rb_cstr2inum(const char *str, int base)
4551 {
4552     return rb_cstr_to_inum(str, base, base==0);
4553 }
4554
4555 VALUE
4556 rb_str2inum(VALUE str, int base)
4557 {
4558     return rb_str_to_inum(str, base, base==0);
4559 }
4560
4561 static VALUE
4562 big_shift3(VALUE x, int lshift_p, size_t shift_numdigits, int shift_numbits)
4563 {
4564     BDIGIT *xds, *zds;
4565     long s1;
4566     int s2;
4567     VALUE z;
4568     long xn;
4569
4570     if (lshift_p) {
4571         if (LONG_MAX < shift_numdigits) {
4572             rb_raise(rb_eArgError, "too big number");
4573         }
4574         s1 = shift_numdigits;
4575         s2 = shift_numbits;
4576         xn = BIGNUM_LEN(x);
4577         z = bignew(xn+s1+1, BIGNUM_SIGN(x));
4578         zds = BDIGITS(z);
4579         BDIGITS_ZERO(zds, s1);
4580         xds = BDIGITS(x);
4581         zds[xn+s1] = bary_small_lshift(zds+s1, xds, xn, s2);
4582     }
4583     else {
4584         long zn;
4585         BDIGIT hibitsx;
4586         if (LONG_MAX < shift_numdigits || (size_t)BIGNUM_LEN(x) <= shift_numdigits) {
4587             if (BIGNUM_POSITIVE_P(x) ||
4588                 bary_zero_p(BDIGITS(x), BIGNUM_LEN(x)))
4589                 return INT2FIX(0);
4590             else
4591                 return INT2FIX(-1);
4592         }
4593         s1 = shift_numdigits;
4594         s2 = shift_numbits;
4595         hibitsx = abs2twocomp(&x, &xn);
4596         xds = BDIGITS(x);
4597         if (xn <= s1) {
4598             return hibitsx ? INT2FIX(-1) : INT2FIX(0);
4599         }
4600         zn = xn - s1;
4601         z = bignew(zn, 0);
4602         zds = BDIGITS(z);
4603         bary_small_rshift(zds, xds+s1, zn, s2, hibitsx != 0 ? BDIGMAX : 0);
4604         twocomp2abs_bang(z, hibitsx != 0);
4605     }
4606     RB_GC_GUARD(x);
4607     return z;
4608 }
4609
4610 static VALUE
4611 big_shift2(VALUE x, int lshift_p, VALUE y)
4612 {
4613     int sign;
4614     size_t lens[2];
4615     size_t shift_numdigits;
4616     int shift_numbits;
4617
4618     assert(POW2_P(CHAR_BIT));
4619     assert(POW2_P(BITSPERDIG));
4620
4621     if (BIGZEROP(x))
4622         return INT2FIX(0);
4623     sign = rb_integer_pack(y, lens, numberof(lens), sizeof(size_t), 0,
4624         INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER);
4625     if (sign < 0) {
4626         lshift_p = !lshift_p;
4627         sign = -sign;
4628     }
4629     if (lshift_p) {
4630         if (1 < sign || CHAR_BIT <= lens[1])
4631             rb_raise(rb_eRangeError, "shift width too big");
4632     }
4633     else {
4634         if (1 < sign || CHAR_BIT <= lens[1])
4635             return BIGNUM_POSITIVE_P(x) ? INT2FIX(0) : INT2FIX(-1);
4636     }
4637     shift_numbits = (int)(lens[0] & (BITSPERDIG-1));
4638     shift_numdigits = (lens[0] >> bit_length(BITSPERDIG-1)) |
4639       (lens[1] << (CHAR_BIT*SIZEOF_SIZE_T - bit_length(BITSPERDIG-1)));
4640     return big_shift3(x, lshift_p, shift_numdigits, shift_numbits);
4641 }
4642
4643 static VALUE
4644 big_lshift(VALUE x, unsigned long shift)
4645 {
4646     long s1 = shift/BITSPERDIG;
4647     int s2 = (int)(shift%BITSPERDIG);
4648     return big_shift3(x, 1, s1, s2);
4649 }
4650
4651 static VALUE
4652 big_rshift(VALUE x, unsigned long shift)
4653 {
4654     long s1 = shift/BITSPERDIG;
4655     int s2 = (int)(shift%BITSPERDIG);
4656     return big_shift3(x, 0, s1, s2);
4657 }
4658
4659 #define MAX_BASE36_POWER_TABLE_ENTRIES (SIZEOF_SIZE_T * CHAR_BIT + 1)
4660
4661 static VALUE base36_power_cache[35][MAX_BASE36_POWER_TABLE_ENTRIES];
4662 static size_t base36_numdigits_cache[35][MAX_BASE36_POWER_TABLE_ENTRIES];
4663
4664 static void
4665 power_cache_init(void)
4666 {
4667 }
4668
4669 static inline VALUE
4670 power_cache_get_power(int base, int power_level, size_t *numdigits_ret)
4671 {
4672     /*
4673      * MAX_BASE36_POWER_TABLE_ENTRIES is big enough to that
4674      * base36_power_cache[base][MAX_BASE36_POWER_TABLE_ENTRIES-1] fills whole memory.
4675      * So MAX_BASE36_POWER_TABLE_ENTRIES <= power_level is not possible to calculate.
4676      *
4677      * number-of-bytes =
4678      * log256(base36_power_cache[base][MAX_BASE36_POWER_TABLE_ENTRIES-1]) =
4679      * log256(maxpow_in_bdigit_dbl(base)**(2**(MAX_BASE36_POWER_TABLE_ENTRIES-1))) =
4680      * log256(maxpow_in_bdigit_dbl(base)**(2**(SIZEOF_SIZE_T*CHAR_BIT))) =
4681      * (2**(SIZEOF_SIZE_T*CHAR_BIT))*log256(maxpow_in_bdigit_dbl(base)) =
4682      * (256**SIZEOF_SIZE_T)*log256(maxpow_in_bdigit_dbl(base)) >
4683      * (256**SIZEOF_SIZE_T)*(sizeof(BDIGIT_DBL)-1) >
4684      * 256**SIZEOF_SIZE_T
4685      */
4686     if (MAX_BASE36_POWER_TABLE_ENTRIES <= power_level)
4687         rb_bug("too big power number requested: maxpow_in_bdigit_dbl(%d)**(2**%d)", base, power_level);
4688
4689     VALUE power = base36_power_cache[base - 2][power_level];
4690     if (!power) {
4691         size_t numdigits;
4692         if (power_level == 0) {
4693             int numdigits0;
4694             BDIGIT_DBL dd = maxpow_in_bdigit_dbl(base, &numdigits0);
4695             power = bignew(2, 1);
4696             bdigitdbl2bary(BDIGITS(power), 2, dd);
4697             numdigits = numdigits0;
4698         }
4699         else {
4700             power = bigtrunc(bigsq(power_cache_get_power(base, power_level - 1, &numdigits)));
4701             numdigits *= 2;
4702         }
4703         rb_obj_hide(power);
4704         base36_power_cache[base - 2][power_level] = power;
4705         base36_numdigits_cache[base - 2][power_level] = numdigits;
4706         rb_gc_register_mark_object(power);
4707     }
4708     if (numdigits_ret)
4709         *numdigits_ret = base36_numdigits_cache[base - 2][power_level];
4710     return power;
4711 }
4712
4713 struct big2str_struct {
4714     int negative;
4715     int base;
4716     BDIGIT_DBL hbase2;
4717     int hbase2_numdigits;
4718     VALUE result;
4719     char *ptr;
4720 };
4721
4722 static void
4723 big2str_alloc(struct big2str_struct *b2s, size_t len)
4724 {
4725     if (LONG_MAX-1 < len)
4726         rb_raise(rb_eArgError, "too big number");
4727     b2s->result = rb_usascii_str_new(0, (long)(len + 1)); /* plus one for sign */
4728     b2s->ptr = RSTRING_PTR(b2s->result);
4729     if (b2s->negative)
4730         *b2s->ptr++ = '-';
4731 }
4732
4733 static void
4734 big2str_2bdigits(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t taillen)
4735 {
4736     size_t j;
4737     BDIGIT_DBL num;
4738     char buf[SIZEOF_BDIGIT_DBL*CHAR_BIT], *p;
4739     int beginning = !b2s->ptr;
4740     size_t len = 0;
4741
4742     assert(xn <= 2);
4743     num = bary2bdigitdbl(xds, xn);
4744
4745     if (beginning) {
4746         if (num == 0)
4747             return;
4748         p = buf;
4749         j = sizeof(buf);
4750         do {
4751             BDIGIT_DBL idx = num % b2s->base;
4752             num /= b2s->base;
4753             p[--j] = ruby_digitmap[idx];
4754         } while (num);
4755         len = sizeof(buf) - j;
4756         big2str_alloc(b2s, len + taillen);
4757         MEMCPY(b2s->ptr, buf + j, char, len);
4758     }
4759     else {
4760         p = b2s->ptr;
4761         j = b2s->hbase2_numdigits;
4762         do {
4763             BDIGIT_DBL idx = num % b2s->base;
4764             num /= b2s->base;
4765             p[--j] = ruby_digitmap[idx];
4766         } while (j);
4767         len = b2s->hbase2_numdigits;
4768     }
4769     b2s->ptr += len;
4770 }
4771
4772 static void
4773 big2str_karatsuba(struct big2str_struct *b2s, BDIGIT *xds, size_t xn, size_t wn,
4774                   int power_level, size_t taillen)
4775 {
4776     VALUE b;
4777     size_t half_numdigits, lower_numdigits;
4778     int lower_power_level;
4779     size_t bn;
4780     const BDIGIT *bds;
4781     size_t len;
4782
4783     /*
4784      * Precondition:
4785      * abs(x) < maxpow**(2**power_level)
4786      * where
4787      *   maxpow = maxpow_in_bdigit_dbl(base, &numdigits)
4788      *
4789      * This function generates sequence of zeros, and then stringized abs(x) into b2s->ptr.
4790      *
4791      * b2s->ptr can be NULL.
4792      * It is allocated when the first character is generated via big2str_alloc.
4793      *
4794      * The prefix zeros should be generated if and only if b2s->ptr is not NULL.
4795      * When the zeros are generated, the zeros and abs(x) consists
4796      * numdigits*(2**power_level) characters at total.
4797      *
4798      * Note:
4799      * power_cache_get_power(base, power_level, &len) may not be cached yet. It should not be called.
4800      * power_cache_get_power(base, power_level-1, &len) should be cached already if 0 <= power_level-1.
4801      */
4802
4803     if (xn == 0 || bary_zero_p(xds, xn)) {
4804         if (b2s->ptr) {
4805             /* When x is zero, power_cache_get_power(base, power_level) should be cached already. */
4806             power_cache_get_power(b2s->base, power_level, &len);
4807             memset(b2s->ptr, '0', len);
4808             b2s->ptr += len;
4809         }
4810         return;
4811     }
4812
4813     if (power_level == 0) {
4814         big2str_2bdigits(b2s, xds, xn, taillen);
4815         return;
4816     }
4817
4818     lower_power_level = power_level-1;
4819     b = power_cache_get_power(b2s->base, lower_power_level, &lower_numdigits);
4820     bn = BIGNUM_LEN(b);
4821     bds = BDIGITS(b);
4822
4823     half_numdigits = lower_numdigits;
4824
4825     while (0 < lower_power_level &&
4826             (xn < bn ||
4827              (xn == bn && bary_cmp(xds, xn, bds, bn) < 0))) {
4828         lower_power_level--;
4829         b = power_cache_get_power(b2s->base, lower_power_level, &lower_numdigits);
4830         bn = BIGNUM_LEN(b);
4831         bds = BDIGITS(b);
4832     }
4833
4834     if (lower_power_level == 0 &&
4835             (xn < bn ||
4836              (xn == bn && bary_cmp(xds, xn, bds, bn) < 0))) {
4837         if (b2s->ptr) {
4838             len = half_numdigits * 2 - lower_numdigits;
4839             memset(b2s->ptr, '0', len);
4840             b2s->ptr += len;
4841         }
4842         big2str_2bdigits(b2s, xds, xn, taillen);
4843     }
4844     else {
4845         BDIGIT *qds, *rds;
4846         size_t qn, rn;
4847         BDIGIT *tds;
4848         int shift;
4849
4850         if (lower_power_level != power_level-1 && b2s->ptr) {
4851             len = (half_numdigits - lower_numdigits) * 2;
4852             memset(b2s->ptr, '0', len);
4853             b2s->ptr += len;
4854         }
4855
4856         shift = nlz(bds[bn-1]);
4857
4858         qn = xn + BIGDIVREM_EXTRA_WORDS;
4859
4860         if (shift == 0) {
4861             /* bigdivrem_restoring will not modify y.
4862              * So use bds directly.  */
4863             tds = (BDIGIT *)bds;
4864             xds[xn] = 0;
4865         }
4866         else {
4867             /* bigdivrem_restoring will modify y.
4868              * So use temporary buffer.  */
4869             tds = xds + qn;
4870             assert(qn + bn <= xn + wn);
4871             bary_small_lshift(tds, bds, bn, shift);
4872             xds[xn] = bary_small_lshift(xds, xds, xn, shift);
4873         }
4874
4875         bigdivrem_restoring(xds, qn, tds, bn);
4876
4877         rds = xds;
4878         rn = bn;
4879
4880         qds = xds + bn;
4881         qn = qn - bn;
4882
4883         if (shift) {
4884             bary_small_rshift(rds, rds, rn, shift, 0);
4885         }
4886
4887         BARY_TRUNC(qds, qn);
4888         assert(qn <= bn);
4889         big2str_karatsuba(b2s, qds, qn, xn+wn - (rn+qn), lower_power_level, lower_numdigits+taillen);
4890         BARY_TRUNC(rds, rn);
4891         big2str_karatsuba(b2s, rds, rn, xn+wn - rn, lower_power_level, taillen);
4892     }
4893 }
4894
4895 static VALUE
4896 big2str_base_poweroftwo(VALUE x, int base)
4897 {
4898     int word_numbits = ffs(base) - 1;
4899     size_t numwords;
4900     VALUE result;
4901     char *ptr;
4902     numwords = rb_absint_numwords(x, word_numbits, NULL);
4903     if (BIGNUM_NEGATIVE_P(x)) {
4904         if (LONG_MAX-1 < numwords)
4905             rb_raise(rb_eArgError, "too big number");
4906         result = rb_usascii_str_new(0, 1+numwords);
4907         ptr = RSTRING_PTR(result);
4908         *ptr++ = BIGNUM_POSITIVE_P(x) ? '+' : '-';
4909     }
4910     else {
4911         if (LONG_MAX < numwords)
4912             rb_raise(rb_eArgError, "too big number");
4913         result = rb_usascii_str_new(0, numwords);
4914         ptr = RSTRING_PTR(result);
4915     }
4916     rb_integer_pack(x, ptr, numwords, 1, CHAR_BIT-word_numbits,
4917                     INTEGER_PACK_BIG_ENDIAN);
4918     while (0 < numwords) {
4919         *ptr = ruby_digitmap[*(unsigned char *)ptr];
4920         ptr++;
4921         numwords--;
4922     }
4923     return result;
4924 }
4925
4926 VALUE
4927 rb_big2str_poweroftwo(VALUE x, int base)
4928 {
4929     return big2str_base_poweroftwo(x, base);
4930 }
4931
4932 static VALUE
4933 big2str_generic(VALUE x, int base)
4934 {
4935     BDIGIT *xds;
4936     size_t xn;
4937     struct big2str_struct b2s_data;
4938     int power_level;
4939     VALUE power;
4940
4941     xds = BDIGITS(x);
4942     xn = BIGNUM_LEN(x);
4943     BARY_TRUNC(xds, xn);
4944
4945     if (xn == 0) {
4946         return rb_usascii_str_new2("0");
4947     }
4948
4949     if (!valid_radix_p(base))
4950         invalid_radix(base);
4951
4952     if (xn >= LONG_MAX/BITSPERDIG) {
4953         rb_raise(rb_eRangeError, "bignum too big to convert into `string'");
4954     }
4955
4956     power_level = 0;
4957     power = power_cache_get_power(base, power_level, NULL);
4958     while (power_level < MAX_BASE36_POWER_TABLE_ENTRIES &&
4959            (size_t)BIGNUM_LEN(power) <= (xn+1)/2) {
4960         power_level++;
4961         power = power_cache_get_power(base, power_level, NULL);
4962     }
4963     assert(power_level != MAX_BASE36_POWER_TABLE_ENTRIES);
4964
4965     if ((size_t)BIGNUM_LEN(power) <= xn) {
4966         /*
4967          * This increment guarantees x < power_cache_get_power(base, power_level)
4968          * without invoking it actually.
4969          * (power_cache_get_power(base, power_level) can be slow and not used
4970          * in big2str_karatsuba.)
4971          *
4972          * Although it is possible that x < power_cache_get_power(base, power_level-1),
4973          * it is no problem because big2str_karatsuba checks it and
4974          * doesn't affect the result when b2s_data.ptr is NULL.
4975          */
4976         power_level++;
4977     }
4978
4979     b2s_data.negative = BIGNUM_NEGATIVE_P(x);
4980     b2s_data.base = base;
4981     b2s_data.hbase2 = maxpow_in_bdigit_dbl(base, &b2s_data.hbase2_numdigits);
4982
4983     b2s_data.result = Qnil;
4984     b2s_data.ptr = NULL;
4985
4986     if (power_level == 0) {
4987         big2str_2bdigits(&b2s_data, xds, xn, 0);
4988     }
4989     else {
4990         VALUE tmpw = 0;
4991         BDIGIT *wds;
4992         size_t wn;
4993         wn = power_level * BIGDIVREM_EXTRA_WORDS + BIGNUM_LEN(power);
4994         wds = ALLOCV_N(BDIGIT, tmpw, xn + wn);
4995         MEMCPY(wds, xds, BDIGIT, xn);
4996         big2str_karatsuba(&b2s_data, wds, xn, wn, power_level, 0);
4997         if (tmpw)
4998             ALLOCV_END(tmpw);
4999     }
5000     RB_GC_GUARD(x);
5001
5002     *b2s_data.ptr = '\0';
5003     rb_str_resize(b2s_data.result, (long)(b2s_data.ptr - RSTRING_PTR(b2s_data.result)));
5004
5005     RB_GC_GUARD(x);
5006     return b2s_data.result;
5007 }
5008
5009 VALUE
5010 rb_big2str_generic(VALUE x, int base)
5011 {
5012     return big2str_generic(x, base);
5013 }
5014
5015 #ifdef USE_GMP
5016 static VALUE
5017 big2str_gmp(VALUE x, int base)
5018 {
5019     mpz_t mx;
5020     size_t size;
5021     VALUE str;
5022     BDIGIT *xds = BDIGITS(x);
5023     size_t xn = BIGNUM_LEN(x);
5024
5025     mpz_init(mx);
5026     bdigits_to_mpz(mx, xds, xn);
5027
5028     size = mpz_sizeinbase(mx, base);
5029
5030     if (BIGNUM_NEGATIVE_P(x)) {
5031         mpz_neg(mx, mx);
5032         str = rb_usascii_str_new(0, size+1);
5033     }
5034     else {
5035         str = rb_usascii_str_new(0, size);
5036     }
5037     mpz_get_str(RSTRING_PTR(str), base, mx);
5038     mpz_clear(mx);
5039
5040     if (RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\0') {
5041         rb_str_set_len(str, RSTRING_LEN(str)-1);
5042     }
5043
5044     RB_GC_GUARD(x);
5045     return str;
5046 }
5047
5048 VALUE
5049 rb_big2str_gmp(VALUE x, int base)
5050 {
5051     return big2str_gmp(x, base);
5052 }
5053 #endif
5054
5055 static VALUE
5056 rb_big2str1(VALUE x, int base)
5057 {
5058     BDIGIT *xds;
5059     size_t xn;
5060
5061     if (FIXNUM_P(x)) {
5062         return rb_fix2str(x, base);
5063     }
5064
5065     bigtrunc(x);
5066     xds = BDIGITS(x);
5067     xn = BIGNUM_LEN(x);
5068     BARY_TRUNC(xds, xn);
5069
5070     if (xn == 0) {
5071         return rb_usascii_str_new2("0");
5072     }
5073
5074     if (!valid_radix_p(base))
5075         invalid_radix(base);
5076
5077     if (xn >= LONG_MAX/BITSPERDIG) {
5078         rb_raise(rb_eRangeError, "bignum too big to convert into `string'");
5079     }
5080
5081     if (POW2_P(base)) {
5082         /* base == 2 || base == 4 || base == 8 || base == 16 || base == 32 */
5083         return big2str_base_poweroftwo(x, base);
5084     }
5085
5086 #ifdef USE_GMP
5087     if (GMP_BIG2STR_DIGITS < xn) {
5088         return big2str_gmp(x, base);
5089     }
5090 #endif
5091
5092     return big2str_generic(x, base);
5093 }
5094
5095 VALUE
5096 rb_big2str(VALUE x, int base)
5097 {
5098     return rb_big2str1(x, base);
5099 }
5100
5101 static unsigned long
5102 big2ulong(VALUE x, const char *type)
5103 {
5104 #if SIZEOF_LONG > SIZEOF_BDIGIT
5105     size_t i;
5106 #endif
5107     size_t len = BIGNUM_LEN(x);
5108     unsigned long num;
5109     BDIGIT *ds;
5110
5111     if (len == 0)
5112         return 0;
5113     if (BIGSIZE(x) > sizeof(long)) {
5114         rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type);
5115     }
5116     ds = BDIGITS(x);
5117 #if SIZEOF_LONG <= SIZEOF_BDIGIT
5118     num = (unsigned long)ds[0];
5119 #else
5120     num = 0;
5121     for (i = 0; i < len; i++) {
5122         num <<= BITSPERDIG;
5123         num += (unsigned long)ds[len - i - 1]; /* overflow is already checked */
5124     }
5125 #endif
5126     return num;
5127 }
5128
5129 unsigned long
5130 rb_big2ulong(VALUE x)
5131 {
5132     unsigned long num = big2ulong(x, "unsigned long");
5133
5134     if (BIGNUM_POSITIVE_P(x)) {
5135         return num;
5136     }
5137     else {
5138         if (num <= 1+(unsigned long)(-(LONG_MIN+1)))
5139             return -(long)(num-1)-1;
5140     }
5141     rb_raise(rb_eRangeError, "bignum out of range of unsigned long");
5142 }
5143
5144 long
5145 rb_big2long(VALUE x)
5146 {
5147     unsigned long num = big2ulong(x, "long");
5148
5149     if (BIGNUM_POSITIVE_P(x)) {
5150         if (num <= LONG_MAX)
5151             return num;
5152     }
5153     else {
5154         if (num <= 1+(unsigned long)(-(LONG_MIN+1)))
5155             return -(long)(num-1)-1;
5156     }
5157     rb_raise(rb_eRangeError, "bignum too big to convert into `long'");
5158 }
5159
5160 #if HAVE_LONG_LONG
5161
5162 static unsigned LONG_LONG
5163 big2ull(VALUE x, const char *type)
5164 {
5165 #if SIZEOF_LONG_LONG > SIZEOF_BDIGIT
5166     size_t i;
5167 #endif
5168     size_t len = BIGNUM_LEN(x);
5169     unsigned LONG_LONG num;
5170     BDIGIT *ds = BDIGITS(x);
5171
5172     if (len == 0)
5173         return 0;
5174     if (BIGSIZE(x) > SIZEOF_LONG_LONG)
5175         rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type);
5176 #if SIZEOF_LONG_LONG <= SIZEOF_BDIGIT
5177     num = (unsigned LONG_LONG)ds[0];
5178 #else
5179     num = 0;
5180     for (i = 0; i < len; i++) {
5181         num = BIGUP(num);
5182         num += ds[len - i - 1];
5183     }
5184 #endif
5185     return num;
5186 }
5187
5188 unsigned LONG_LONG
5189 rb_big2ull(VALUE x)
5190 {
5191     unsigned LONG_LONG num = big2ull(x, "unsigned long long");
5192
5193     if (BIGNUM_POSITIVE_P(x)) {
5194         return num;
5195     }
5196     else {
5197         if (num <= 1+(unsigned LONG_LONG)(-(LLONG_MIN+1)))
5198             return -(LONG_LONG)(num-1)-1;
5199     }
5200     rb_raise(rb_eRangeError, "bignum out of range of unsigned long long");
5201 }
5202
5203 LONG_LONG
5204 rb_big2ll(VALUE x)
5205 {
5206     unsigned LONG_LONG num = big2ull(x, "long long");
5207
5208     if (BIGNUM_POSITIVE_P(x)) {
5209         if (num <= LLONG_MAX)
5210             return num;
5211     }
5212     else {
5213         if (num <= 1+(unsigned LONG_LONG)(-(LLONG_MIN+1)))
5214             return -(LONG_LONG)(num-1)-1;
5215     }
5216     rb_raise(rb_eRangeError, "bignum too big to convert into `long long'");
5217 }
5218
5219 #endif  /* HAVE_LONG_LONG */
5220
5221 static VALUE
5222 dbl2big(double d)
5223 {
5224     long i = 0;
5225     BDIGIT c;
5226     BDIGIT *digits;
5227     VALUE z;
5228     double u = (d < 0)?-d:d;
5229
5230     if (isinf(d)) {
5231         rb_raise(rb_eFloatDomainError, d < 0 ? "-Infinity" : "Infinity");
5232     }
5233     if (isnan(d)) {
5234         rb_raise(rb_eFloatDomainError, "NaN");
5235     }
5236
5237     while (1.0 <= u) {
5238         u /= (double)(BIGRAD);
5239         i++;
5240     }
5241     z = bignew(i, d>=0);
5242     digits = BDIGITS(z);
5243     while (i--) {
5244         u *= BIGRAD;
5245         c = (BDIGIT)u;
5246         u -= c;
5247         digits[i] = c;
5248     }
5249
5250     return z;
5251 }
5252
5253 VALUE
5254 rb_dbl2big(double d)
5255 {
5256     return bignorm(dbl2big(d));
5257 }
5258
5259 static double
5260 big2dbl(VALUE x)
5261 {
5262     double d = 0.0;
5263     long i = (bigtrunc(x), BIGNUM_LEN(x)), lo = 0, bits;
5264     BDIGIT *ds = BDIGITS(x), dl;
5265
5266     if (i) {
5267         bits = i * BITSPERDIG - nlz(ds[i-1]);
5268         if (bits > DBL_MANT_DIG+DBL_MAX_EXP) {
5269             d = HUGE_VAL;
5270         }
5271         else {
5272             if (bits > DBL_MANT_DIG+1)
5273                 lo = (bits -= DBL_MANT_DIG+1) / BITSPERDIG;
5274             else
5275                 bits = 0;
5276             while (--i > lo) {
5277                 d = ds[i] + BIGRAD*d;
5278             }
5279             dl = ds[i];
5280             if (bits && (dl & ((BDIGIT)1 << (bits %= BITSPERDIG)))) {
5281                 int carry = (dl & ~(BDIGMAX << bits)) != 0;
5282                 if (!carry) {
5283                     while (i-- > 0) {
5284                         carry = ds[i] != 0;
5285                         if (carry) break;
5286                     }
5287                 }
5288                 if (carry) {
5289                     BDIGIT mask = BDIGMAX;
5290                     BDIGIT bit = 1;
5291                     mask <<= bits;
5292                     bit <<= bits;
5293                     dl &= mask;
5294                     dl += bit;
5295                     dl = BIGLO(dl);
5296                     if (!dl) d += 1;
5297                 }
5298             }
5299             d = dl + BIGRAD*d;
5300             if (lo) {
5301                 if (lo > INT_MAX / BITSPERDIG)
5302                     d = HUGE_VAL;
5303                 else if (lo < INT_MIN / BITSPERDIG)
5304                     d = 0.0;
5305                 else
5306                     d = ldexp(d, (int)(lo * BITSPERDIG));
5307             }
5308         }
5309     }
5310     if (BIGNUM_NEGATIVE_P(x)) d = -d;
5311     return d;
5312 }
5313
5314 double
5315 rb_big2dbl(VALUE x)
5316 {
5317     double d = big2dbl(x);
5318
5319     if (isinf(d)) {
5320         rb_warning("Integer out of Float range");
5321         if (d < 0.0)
5322             d = -HUGE_VAL;
5323         else
5324             d = HUGE_VAL;
5325     }
5326     return d;
5327 }
5328
5329 VALUE
5330 rb_integer_float_cmp(VALUE x, VALUE y)
5331 {
5332     double yd = RFLOAT_VALUE(y);
5333     double yi, yf;
5334     VALUE rel;
5335
5336     if (isnan(yd))
5337         return Qnil;
5338     if (isinf(yd)) {
5339         if (yd > 0.0) return INT2FIX(-1);
5340         else return INT2FIX(1);
5341     }
5342     yf = modf(yd, &yi);
5343     if (FIXNUM_P(x)) {
5344 #if SIZEOF_LONG * CHAR_BIT < DBL_MANT_DIG /* assume FLT_RADIX == 2 */
5345         double xd = (double)FIX2LONG(x);
5346         if (xd < yd)
5347             return INT2FIX(-1);
5348         if (xd > yd)
5349             return INT2FIX(1);
5350         return INT2FIX(0);
5351 #else
5352         long xn, yn;
5353         if (yi < FIXNUM_MIN)
5354             return INT2FIX(1);
5355         if (FIXNUM_MAX+1 <= yi)
5356             return INT2FIX(-1);
5357         xn = FIX2LONG(x);
5358         yn = (long)yi;
5359         if (xn < yn)
5360             return INT2FIX(-1);
5361         if (xn > yn)
5362             return INT2FIX(1);
5363         if (yf < 0.0)
5364             return INT2FIX(1);
5365         if (0.0 < yf)
5366             return INT2FIX(-1);
5367         return INT2FIX(0);
5368 #endif
5369     }
5370     y = rb_dbl2big(yi);
5371     rel = rb_big_cmp(x, y);
5372     if (yf == 0.0 || rel != INT2FIX(0))
5373         return rel;
5374     if (yf < 0.0)
5375         return INT2FIX(1);
5376     return INT2FIX(-1);
5377 }
5378
5379 #if SIZEOF_LONG * CHAR_BIT >= DBL_MANT_DIG /* assume FLT_RADIX == 2 */
5380 COMPILER_WARNING_PUSH
5381 #if __has_warning("-Wimplicit-int-float-conversion")
5382 COMPILER_WARNING_IGNORED(-Wimplicit-int-float-conversion)
5383 #endif
5384 static const double LONG_MAX_as_double = LONG_MAX;
5385 COMPILER_WARNING_POP
5386 #endif
5387
5388 VALUE
5389 rb_integer_float_eq(VALUE x, VALUE y)
5390 {
5391     double yd = RFLOAT_VALUE(y);
5392     double yi, yf;
5393
5394     if (!isfinite(yd))
5395         return Qfalse;
5396     yf = modf(yd, &yi);
5397     if (yf != 0)
5398         return Qfalse;
5399     if (FIXNUM_P(x)) {
5400 #if SIZEOF_LONG * CHAR_BIT < DBL_MANT_DIG /* assume FLT_RADIX == 2 */
5401         double xd = (double)FIX2LONG(x);
5402         return RBOOL(xd == yd);
5403 #else
5404         long xn, yn;
5405         if (yi < LONG_MIN || LONG_MAX_as_double <= yi)
5406             return Qfalse;
5407         xn = FIX2LONG(x);
5408         yn = (long)yi;
5409         return RBOOL(xn == yn);
5410 #endif
5411     }
5412     y = rb_dbl2big(yi);
5413     return rb_big_eq(x, y);
5414 }
5415
5416
5417 VALUE
5418 rb_big_cmp(VALUE x, VALUE y)
5419 {
5420     if (FIXNUM_P(y)) {
5421         x = bigfixize(x);
5422         if (FIXNUM_P(x)) {
5423             /* SIGNED_VALUE and Fixnum have same sign-bits, same
5424              * order */
5425             SIGNED_VALUE sx = (SIGNED_VALUE)x, sy = (SIGNED_VALUE)y;
5426             if (sx < sy) return INT2FIX(-1);
5427             return INT2FIX(sx > sy);
5428         }
5429     }
5430     else if (RB_BIGNUM_TYPE_P(y)) {
5431         if (BIGNUM_SIGN(x) == BIGNUM_SIGN(y)) {
5432             int cmp = bary_cmp(BDIGITS(x), BIGNUM_LEN(x), BDIGITS(y), BIGNUM_LEN(y));
5433             return INT2FIX(BIGNUM_SIGN(x) ? cmp : -cmp);
5434         }
5435     }
5436     else if (RB_FLOAT_TYPE_P(y)) {
5437         return rb_integer_float_cmp(x, y);
5438     }
5439     else {
5440         return rb_num_coerce_cmp(x, y, idCmp);
5441     }
5442     return INT2FIX(BIGNUM_SIGN(x) ? 1 : -1);
5443 }
5444
5445 enum big_op_t {
5446     big_op_gt,
5447     big_op_ge,
5448     big_op_lt,
5449     big_op_le
5450 };
5451
5452 static VALUE
5453 big_op(VALUE x, VALUE y, enum big_op_t op)
5454 {
5455     VALUE rel;
5456     int n;
5457
5458     if (RB_INTEGER_TYPE_P(y)) {
5459         rel = rb_big_cmp(x, y);
5460     }
5461     else if (RB_FLOAT_TYPE_P(y)) {
5462         rel = rb_integer_float_cmp(x, y);
5463     }
5464     else {
5465         ID id = 0;
5466         switch (op) {
5467           case big_op_gt: id = '>'; break;
5468           case big_op_ge: id = idGE; break;
5469           case big_op_lt: id = '<'; break;
5470           case big_op_le: id = idLE; break;
5471         }
5472         return rb_num_coerce_relop(x, y, id);
5473     }
5474
5475     if (NIL_P(rel)) return Qfalse;
5476     n = FIX2INT(rel);
5477
5478     switch (op) {
5479         case big_op_gt: return RBOOL(n >  0);
5480         case big_op_ge: return RBOOL(n >= 0);
5481         case big_op_lt: return RBOOL(n <  0);
5482         case big_op_le: return RBOOL(n <= 0);
5483     }
5484     return Qundef;
5485 }
5486
5487 VALUE
5488 rb_big_gt(VALUE x, VALUE y)
5489 {
5490     return big_op(x, y, big_op_gt);
5491 }
5492
5493 VALUE
5494 rb_big_ge(VALUE x, VALUE y)
5495 {
5496     return big_op(x, y, big_op_ge);
5497 }
5498
5499 VALUE
5500 rb_big_lt(VALUE x, VALUE y)
5501 {
5502     return big_op(x, y, big_op_lt);
5503 }
5504
5505 VALUE
5506 rb_big_le(VALUE x, VALUE y)
5507 {
5508     return big_op(x, y, big_op_le);
5509 }
5510
5511 /*
5512  *  call-seq:
5513  *     big == obj  -> true or false
5514  *
5515  *  Returns <code>true</code> only if <i>obj</i> has the same value
5516  *  as <i>big</i>. Contrast this with Integer#eql?, which requires
5517  *  <i>obj</i> to be an Integer.
5518  *
5519  *     68719476736 == 68719476736.0   #=> true
5520  */
5521
5522 VALUE
5523 rb_big_eq(VALUE x, VALUE y)
5524 {
5525     if (FIXNUM_P(y)) {
5526         return RBOOL(bignorm(x) == y);
5527     }
5528     else if (RB_BIGNUM_TYPE_P(y)) {
5529     }
5530     else if (RB_FLOAT_TYPE_P(y)) {
5531         return rb_integer_float_eq(x, y);
5532     }
5533     else {
5534         return rb_equal(y, x);
5535     }
5536     if (BIGNUM_SIGN(x) != BIGNUM_SIGN(y)) return Qfalse;
5537     if (BIGNUM_LEN(x) != BIGNUM_LEN(y)) return Qfalse;
5538     return RBOOL(MEMCMP(BDIGITS(x),BDIGITS(y),BDIGIT,BIGNUM_LEN(y)) == 0);
5539 }
5540
5541 VALUE
5542 rb_big_eql(VALUE x, VALUE y)
5543 {
5544     if (!RB_BIGNUM_TYPE_P(y)) return Qfalse;
5545     if (BIGNUM_SIGN(x) != BIGNUM_SIGN(y)) return Qfalse;
5546     if (BIGNUM_LEN(x) != BIGNUM_LEN(y)) return Qfalse;
5547     return RBOOL(MEMCMP(BDIGITS(x),BDIGITS(y),BDIGIT,BIGNUM_LEN(y)) == 0);
5548 }
5549
5550 VALUE
5551 rb_big_uminus(VALUE x)
5552 {
5553     VALUE z = rb_big_clone(x);
5554
5555     BIGNUM_NEGATE(z);
5556
5557     return bignorm(z);
5558 }
5559
5560 VALUE
5561 rb_big_comp(VALUE x)
5562 {
5563     VALUE z = rb_big_clone(x);
5564     BDIGIT *ds = BDIGITS(z);
5565     long n = BIGNUM_LEN(z);
5566
5567     if (!n) return INT2FIX(-1);
5568
5569     if (BIGNUM_POSITIVE_P(z)) {
5570         if (bary_add_one(ds, n)) {
5571             big_extend_carry(z);
5572         }
5573         BIGNUM_SET_NEGATIVE_SIGN(z);
5574     }
5575     else {
5576         bary_neg(ds, n);
5577         if (bary_add_one(ds, n))
5578             return INT2FIX(-1);
5579         bary_neg(ds, n);
5580         BIGNUM_SET_POSITIVE_SIGN(z);
5581     }
5582
5583     return bignorm(z);
5584 }
5585
5586 static VALUE
5587 bigsub(VALUE x, VALUE y)
5588 {
5589     VALUE z;
5590     BDIGIT *xds, *yds, *zds;
5591     long xn, yn, zn;
5592
5593     xn = BIGNUM_LEN(x);
5594     yn = BIGNUM_LEN(y);
5595     zn = xn < yn ? yn : xn;
5596
5597     z = bignew(zn, 1);
5598
5599     xds = BDIGITS(x);
5600     yds = BDIGITS(y);
5601     zds = BDIGITS(z);
5602
5603     if (bary_sub(zds, zn, xds, xn, yds, yn)) {
5604         bary_2comp(zds, zn);
5605         BIGNUM_SET_NEGATIVE_SIGN(z);
5606     }
5607
5608     return z;
5609 }
5610
5611 static VALUE bigadd_int(VALUE x, long y);
5612
5613 static VALUE
5614 bigsub_int(VALUE x, long y0)
5615 {
5616     VALUE z;
5617     BDIGIT *xds, *zds;
5618     long xn, zn;
5619     BDIGIT_DBL_SIGNED num;
5620     long i, y;
5621
5622     y = y0;
5623     xds = BDIGITS(x);
5624     xn = BIGNUM_LEN(x);
5625
5626     if (xn == 0)
5627         return LONG2NUM(-y0);
5628
5629     zn = xn;
5630 #if SIZEOF_BDIGIT < SIZEOF_LONG
5631     if (zn < bdigit_roomof(SIZEOF_LONG))
5632         zn = bdigit_roomof(SIZEOF_LONG);
5633 #endif
5634     z = bignew(zn, BIGNUM_SIGN(x));
5635     zds = BDIGITS(z);
5636
5637 #if SIZEOF_BDIGIT >= SIZEOF_LONG
5638     assert(xn == zn);
5639     num = (BDIGIT_DBL_SIGNED)xds[0] - y;
5640     if (xn == 1 && num < 0) {
5641         BIGNUM_NEGATE(z);
5642         zds[0] = (BDIGIT)-num;
5643         RB_GC_GUARD(x);
5644         return bignorm(z);
5645     }
5646     zds[0] = BIGLO(num);
5647     num = BIGDN(num);
5648     i = 1;
5649     if (i < xn)
5650         goto y_is_zero_x;
5651     goto finish;
5652 #else
5653     num = 0;
5654     for (i=0; i < xn; i++) {
5655         if (y == 0) goto y_is_zero_x;
5656         num += (BDIGIT_DBL_SIGNED)xds[i] - BIGLO(y);
5657         zds[i] = BIGLO(num);
5658         num = BIGDN(num);
5659         y = BIGDN(y);
5660     }
5661     for (; i < zn; i++) {
5662         if (y == 0) goto y_is_zero_z;
5663         num -= BIGLO(y);
5664         zds[i] = BIGLO(num);
5665         num = BIGDN(num);
5666         y = BIGDN(y);
5667     }
5668     goto finish;
5669 #endif
5670
5671     for (; i < xn; i++) {
5672       y_is_zero_x:
5673         if (num == 0) goto num_is_zero_x;
5674         num += xds[i];
5675         zds[i] = BIGLO(num);
5676         num = BIGDN(num);
5677     }
5678 #if SIZEOF_BDIGIT < SIZEOF_LONG
5679     for (; i < zn; i++) {
5680       y_is_zero_z:
5681         if (num == 0) goto num_is_zero_z;
5682         zds[i] = BIGLO(num);
5683         num = BIGDN(num);
5684     }
5685 #endif
5686     goto finish;
5687
5688     for (; i < xn; i++) {
5689       num_is_zero_x:
5690         zds[i] = xds[i];
5691     }
5692 #if SIZEOF_BDIGIT < SIZEOF_LONG
5693     for (; i < zn; i++) {
5694       num_is_zero_z:
5695         zds[i] = 0;
5696     }
5697 #endif
5698     goto finish;
5699
5700   finish:
5701     assert(num == 0 || num == -1);
5702     if (num < 0) {
5703         get2comp(z);
5704         BIGNUM_NEGATE(z);
5705     }
5706     RB_GC_GUARD(x);
5707     return bignorm(z);
5708 }
5709
5710 static VALUE
5711 bigadd_int(VALUE x, long y)
5712 {
5713     VALUE z;
5714     BDIGIT *xds, *zds;
5715     long xn, zn;
5716     BDIGIT_DBL num;
5717     long i;
5718
5719     xds = BDIGITS(x);
5720     xn = BIGNUM_LEN(x);
5721
5722     if (xn == 0)
5723         return LONG2NUM(y);
5724
5725     zn = xn;
5726 #if SIZEOF_BDIGIT < SIZEOF_LONG
5727     if (zn < bdigit_roomof(SIZEOF_LONG))
5728         zn = bdigit_roomof(SIZEOF_LONG);
5729 #endif
5730     zn++;
5731
5732     z = bignew(zn, BIGNUM_SIGN(x));
5733     zds = BDIGITS(z);
5734
5735 #if SIZEOF_BDIGIT >= SIZEOF_LONG
5736     num = (BDIGIT_DBL)xds[0] + y;
5737     zds[0] = BIGLO(num);
5738     num = BIGDN(num);
5739     i = 1;
5740     if (i < xn)
5741         goto y_is_zero_x;
5742     goto y_is_zero_z;
5743 #else
5744     num = 0;
5745     for (i=0; i < xn; i++) {
5746         if (y == 0) goto y_is_zero_x;
5747         num += (BDIGIT_DBL)xds[i] + BIGLO(y);
5748         zds[i] = BIGLO(num);
5749         num = BIGDN(num);
5750         y = BIGDN(y);
5751     }
5752     for (; i < zn; i++) {
5753         if (y == 0) goto y_is_zero_z;
5754         num += BIGLO(y);
5755         zds[i] = BIGLO(num);
5756         num = BIGDN(num);
5757         y = BIGDN(y);
5758     }
5759     goto finish;
5760
5761 #endif
5762
5763     for (;i < xn; i++) {
5764       y_is_zero_x:
5765         if (num == 0) goto num_is_zero_x;
5766         num += (BDIGIT_DBL)xds[i];
5767         zds[i] = BIGLO(num);
5768         num = BIGDN(num);
5769     }
5770     for (; i < zn; i++) {
5771       y_is_zero_z:
5772         if (num == 0) goto num_is_zero_z;
5773         zds[i] = BIGLO(num);
5774         num = BIGDN(num);
5775     }
5776     goto finish;
5777
5778     for (;i < xn; i++) {
5779       num_is_zero_x:
5780         zds[i] = xds[i];
5781     }
5782     for (; i < zn; i++) {
5783       num_is_zero_z:
5784         zds[i] = 0;
5785     }
5786     goto finish;
5787
5788   finish:
5789     RB_GC_GUARD(x);
5790     return bignorm(z);
5791 }
5792
5793 static VALUE
5794 bigadd(VALUE x, VALUE y, int sign)
5795 {
5796     VALUE z;
5797     size_t len;
5798
5799     sign = (sign == BIGNUM_SIGN(y));
5800     if (BIGNUM_SIGN(x) != sign) {
5801         if (sign) return bigsub(y, x);
5802         return bigsub(x, y);
5803     }
5804
5805     if (BIGNUM_LEN(x) > BIGNUM_LEN(y)) {
5806         len = BIGNUM_LEN(x) + 1;
5807     }
5808     else {
5809         len = BIGNUM_LEN(y) + 1;
5810     }
5811     z = bignew(len, sign);
5812
5813     bary_add(BDIGITS(z), BIGNUM_LEN(z),
5814              BDIGITS(x), BIGNUM_LEN(x),
5815              BDIGITS(y), BIGNUM_LEN(y));
5816
5817     return z;
5818 }
5819
5820 VALUE
5821 rb_big_plus(VALUE x, VALUE y)
5822 {
5823     long n;
5824
5825     if (FIXNUM_P(y)) {
5826         n = FIX2LONG(y);
5827         if ((n > 0) != BIGNUM_SIGN(x)) {
5828             if (n < 0) {
5829                 n = -n;
5830             }
5831             return bigsub_int(x, n);
5832         }
5833         if (n < 0) {
5834             n = -n;
5835         }
5836         return bigadd_int(x, n);
5837     }
5838     else if (RB_BIGNUM_TYPE_P(y)) {
5839         return bignorm(bigadd(x, y, 1));
5840     }
5841     else if (RB_FLOAT_TYPE_P(y)) {
5842         return DBL2NUM(rb_big2dbl(x) + RFLOAT_VALUE(y));
5843     }
5844     else {
5845         return rb_num_coerce_bin(x, y, '+');
5846     }
5847 }
5848
5849 VALUE
5850 rb_big_minus(VALUE x, VALUE y)
5851 {
5852     long n;
5853
5854     if (FIXNUM_P(y)) {
5855         n = FIX2LONG(y);
5856         if ((n > 0) != BIGNUM_SIGN(x)) {
5857             if (n < 0) {
5858                 n = -n;
5859             }
5860             return bigadd_int(x, n);
5861         }
5862         if (n < 0) {
5863             n = -n;
5864         }
5865         return bigsub_int(x, n);
5866     }
5867     else if (RB_BIGNUM_TYPE_P(y)) {
5868         return bignorm(bigadd(x, y, 0));
5869     }
5870     else if (RB_FLOAT_TYPE_P(y)) {
5871         return DBL2NUM(rb_big2dbl(x) - RFLOAT_VALUE(y));
5872     }
5873     else {
5874         return rb_num_coerce_bin(x, y, '-');
5875     }
5876 }
5877
5878 static VALUE
5879 bigsq(VALUE x)
5880 {
5881     long xn, zn;
5882     VALUE z;
5883     BDIGIT *xds, *zds;
5884
5885     xn = BIGNUM_LEN(x);
5886     zn = 2 * xn;
5887
5888     z = bignew(zn, 1);
5889
5890     xds = BDIGITS(x);
5891     zds = BDIGITS(z);
5892
5893     if (xn < NAIVE_MUL_DIGITS)
5894         bary_sq_fast(zds, zn, xds, xn);
5895     else
5896         bary_mul(zds, zn, xds, xn, xds, xn);
5897
5898     RB_GC_GUARD(x);
5899     return z;
5900 }
5901
5902 static VALUE
5903 bigmul0(VALUE x, VALUE y)
5904 {
5905     long xn, yn, zn;
5906     VALUE z;
5907     BDIGIT *xds, *yds, *zds;
5908
5909     if (x == y)
5910         return bigsq(x);
5911
5912     xn = BIGNUM_LEN(x);
5913     yn = BIGNUM_LEN(y);
5914     zn = xn + yn;
5915
5916     z = bignew(zn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
5917
5918     xds = BDIGITS(x);
5919     yds = BDIGITS(y);
5920     zds = BDIGITS(z);
5921
5922     bary_mul(zds, zn, xds, xn, yds, yn);
5923
5924     RB_GC_GUARD(x);
5925     RB_GC_GUARD(y);
5926     return z;
5927 }
5928
5929 VALUE
5930 rb_big_mul(VALUE x, VALUE y)
5931 {
5932     if (FIXNUM_P(y)) {
5933         y = rb_int2big(FIX2LONG(y));
5934     }
5935     else if (RB_BIGNUM_TYPE_P(y)) {
5936     }
5937     else if (RB_FLOAT_TYPE_P(y)) {
5938         return DBL2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y));
5939     }
5940     else {
5941         return rb_num_coerce_bin(x, y, '*');
5942     }
5943
5944     return bignorm(bigmul0(x, y));
5945 }
5946
5947 static VALUE
5948 bigdivrem(VALUE x, VALUE y, volatile VALUE *divp, volatile VALUE *modp)
5949 {
5950     long xn = BIGNUM_LEN(x), yn = BIGNUM_LEN(y);
5951     VALUE z;
5952     BDIGIT *xds, *yds, *zds;
5953     BDIGIT dd;
5954
5955     VALUE q = Qnil, r = Qnil;
5956     BDIGIT *qds, *rds;
5957     long qn, rn;
5958
5959     yds = BDIGITS(y);
5960     BARY_TRUNC(yds, yn);
5961     if (yn == 0)
5962         rb_num_zerodiv();
5963
5964     xds = BDIGITS(x);
5965     BARY_TRUNC(xds, xn);
5966
5967     if (xn < yn || (xn == yn && xds[xn - 1] < yds[yn - 1])) {
5968         if (divp) *divp = rb_int2big(0);
5969         if (modp) *modp = x;
5970         return Qnil;
5971     }
5972     if (yn == 1) {
5973         dd = yds[0];
5974         z = bignew(xn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
5975         zds = BDIGITS(z);
5976         dd = bigdivrem_single(zds, xds, xn, dd);
5977         if (modp) {
5978             *modp = rb_uint2big((uintptr_t)dd);
5979             BIGNUM_SET_SIGN(*modp, BIGNUM_SIGN(x));
5980         }
5981         if (divp) *divp = z;
5982         return Qnil;
5983     }
5984     if (xn == 2 && yn == 2) {
5985         BDIGIT_DBL x0 = bary2bdigitdbl(xds, 2);
5986         BDIGIT_DBL y0 = bary2bdigitdbl(yds, 2);
5987         BDIGIT_DBL q0 = x0 / y0;
5988         BDIGIT_DBL r0 = x0 % y0;
5989         if (divp) {
5990             z = bignew(bdigit_roomof(sizeof(BDIGIT_DBL)), BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
5991             zds = BDIGITS(z);
5992             zds[0] = BIGLO(q0);
5993             zds[1] = BIGLO(BIGDN(q0));
5994             *divp = z;
5995         }
5996         if (modp) {
5997             z = bignew(bdigit_roomof(sizeof(BDIGIT_DBL)), BIGNUM_SIGN(x));
5998             zds = BDIGITS(z);
5999             zds[0] = BIGLO(r0);
6000             zds[1] = BIGLO(BIGDN(r0));
6001             *modp = z;
6002         }
6003         return Qnil;
6004     }
6005
6006     if (divp) {
6007         qn = xn + BIGDIVREM_EXTRA_WORDS;
6008         q = bignew(qn, BIGNUM_SIGN(x)==BIGNUM_SIGN(y));
6009         qds = BDIGITS(q);
6010     }
6011     else {
6012         qn = 0;
6013         qds = NULL;
6014     }
6015
6016     if (modp) {
6017         rn = yn;
6018         r = bignew(rn, BIGNUM_SIGN(x));
6019         rds = BDIGITS(r);
6020     }
6021     else {
6022         rn = 0;
6023         rds = NULL;
6024     }
6025
6026     bary_divmod_branch(qds, qn, rds, rn, xds, xn, yds, yn);
6027
6028     if (divp) {
6029         bigtrunc(q);
6030         *divp = q;
6031     }
6032     if (modp) {
6033         bigtrunc(r);
6034         *modp = r;
6035     }
6036
6037     return Qnil;
6038 }
6039
6040 static void
6041 bigdivmod(VALUE x, VALUE y, volatile VALUE *divp, volatile VALUE *modp)
6042 {
6043     VALUE mod;
6044
6045     bigdivrem(x, y, divp, &mod);
6046     if (BIGNUM_SIGN(x) != BIGNUM_SIGN(y) && !BIGZEROP(mod)) {
6047         if (divp) *divp = bigadd(*divp, rb_int2big(1), 0);
6048         if (modp) *modp = bigadd(mod, y, 1);
6049     }
6050     else if (modp) {
6051         *modp = mod;
6052     }
6053 }
6054
6055
6056 static VALUE
6057 rb_big_divide(VALUE x, VALUE y, ID op)
6058 {
6059     VALUE z;
6060
6061     if (FIXNUM_P(y)) {
6062         y = rb_int2big(FIX2LONG(y));
6063     }
6064     else if (RB_BIGNUM_TYPE_P(y)) {
6065     }
6066     else if (RB_FLOAT_TYPE_P(y)) {
6067         if (op == '/') {
6068             double dx = rb_big2dbl(x);
6069             return rb_flo_div_flo(DBL2NUM(dx), y);
6070         }
6071         else {
6072             VALUE v;
6073             double dy = RFLOAT_VALUE(y);
6074             if (dy == 0.0) rb_num_zerodiv();
6075             v = rb_big_divide(x, y, '/');
6076             return rb_dbl2big(RFLOAT_VALUE(v));
6077         }
6078     }
6079     else {
6080         return rb_num_coerce_bin(x, y, op);
6081     }
6082     bigdivmod(x, y, &z, 0);
6083
6084     return bignorm(z);
6085 }
6086
6087 VALUE
6088 rb_big_div(VALUE x, VALUE y)
6089 {
6090     return rb_big_divide(x, y, '/');
6091 }
6092
6093 VALUE
6094 rb_big_idiv(VALUE x, VALUE y)
6095 {
6096     return rb_big_divide(x, y, idDiv);
6097 }
6098
6099 VALUE
6100 rb_big_modulo(VALUE x, VALUE y)
6101 {
6102     VALUE z;
6103
6104     if (FIXNUM_P(y)) {
6105         y = rb_int2big(FIX2LONG(y));
6106     }
6107     else if (!RB_BIGNUM_TYPE_P(y)) {
6108         return rb_num_coerce_bin(x, y, '%');
6109     }
6110     bigdivmod(x, y, 0, &z);
6111
6112     return bignorm(z);
6113 }
6114
6115 VALUE
6116 rb_big_remainder(VALUE x, VALUE y)
6117 {
6118     VALUE z;
6119
6120     if (FIXNUM_P(y)) {
6121         y = rb_int2big(FIX2LONG(y));
6122     }
6123     else if (!RB_BIGNUM_TYPE_P(y)) {
6124         return rb_num_coerce_bin(x, y, rb_intern("remainder"));
6125     }
6126     bigdivrem(x, y, 0, &z);
6127
6128     return bignorm(z);
6129 }
6130
6131 VALUE
6132 rb_big_divmod(VALUE x, VALUE y)
6133 {
6134     VALUE div, mod;
6135
6136     if (FIXNUM_P(y)) {
6137         y = rb_int2big(FIX2LONG(y));
6138     }
6139     else if (!RB_BIGNUM_TYPE_P(y)) {
6140         return rb_num_coerce_bin(x, y, idDivmod);
6141     }
6142     bigdivmod(x, y, &div, &mod);
6143
6144     return rb_assoc_new(bignorm(div), bignorm(mod));
6145 }
6146
6147 static VALUE
6148 big_shift(VALUE x, long n)
6149 {
6150     if (n < 0)
6151         return big_lshift(x, 1+(unsigned long)(-(n+1)));
6152     else if (n > 0)
6153         return big_rshift(x, (unsigned long)n);
6154     return x;
6155 }
6156
6157 enum {DBL_BIGDIG = ((DBL_MANT_DIG + BITSPERDIG) / BITSPERDIG)};
6158
6159 static double
6160 big_fdiv(VALUE x, VALUE y, long ey)
6161 {
6162     VALUE z;
6163     long l, ex;
6164
6165     bigtrunc(x);
6166     l = BIGNUM_LEN(x);
6167     ex = l * BITSPERDIG - nlz(BDIGITS(x)[l-1]);
6168     ex -= 2 * DBL_BIGDIG * BITSPERDIG;
6169     if (ex > BITSPERDIG) ex -= BITSPERDIG;
6170     else if (ex > 0) ex = 0;
6171     if (ex) x = big_shift(x, ex);
6172
6173     bigdivrem(x, y, &z, 0);
6174     l = ex - ey;
6175 #if SIZEOF_LONG > SIZEOF_INT
6176     {
6177         /* Visual C++ can't be here */
6178         if (l > INT_MAX) return HUGE_VAL;
6179         if (l < INT_MIN) return 0.0;
6180     }
6181 #endif
6182     return ldexp(big2dbl(z), (int)l);
6183 }
6184
6185 static double
6186 big_fdiv_int(VALUE x, VALUE y)
6187 {
6188     long l, ey;
6189     bigtrunc(y);
6190     l = BIGNUM_LEN(y);
6191     ey = l * BITSPERDIG - nlz(BDIGITS(y)[l-1]);
6192     ey -= DBL_BIGDIG * BITSPERDIG;
6193     if (ey) y = big_shift(y, ey);
6194     return big_fdiv(x, y, ey);
6195 }
6196
6197 static double
6198 big_fdiv_float(VALUE x, VALUE y)
6199 {
6200     int i;
6201     y = dbl2big(ldexp(frexp(RFLOAT_VALUE(y), &i), DBL_MANT_DIG));
6202     return big_fdiv(x, y, i - DBL_MANT_DIG);
6203 }
6204
6205 double
6206 rb_big_fdiv_double(VALUE x, VALUE y)
6207 {
6208     double dx, dy;
6209     VALUE v;
6210
6211     dx = big2dbl(x);
6212     if (FIXNUM_P(y)) {
6213         dy = (double)FIX2LONG(y);
6214         if (isinf(dx))
6215             return big_fdiv_int(x, rb_int2big(FIX2LONG(y)));
6216     }
6217     else if (RB_BIGNUM_TYPE_P(y)) {
6218         return big_fdiv_int(x, y);
6219     }
6220     else if (RB_FLOAT_TYPE_P(y)) {
6221         dy = RFLOAT_VALUE(y);
6222         if (isnan(dy))
6223             return dy;
6224         if (isinf(dx))
6225             return big_fdiv_float(x, y);
6226     }
6227     else {
6228         return NUM2DBL(rb_num_coerce_bin(x, y, idFdiv));
6229     }
6230     v = rb_flo_div_flo(DBL2NUM(dx), DBL2NUM(dy));
6231     return NUM2DBL(v);
6232 }
6233
6234 VALUE
6235 rb_big_fdiv(VALUE x, VALUE y)
6236 {
6237     return DBL2NUM(rb_big_fdiv_double(x, y));
6238 }
6239
6240 VALUE
6241 rb_big_pow(VALUE x, VALUE y)
6242 {
6243     double d;
6244     SIGNED_VALUE yy;
6245
6246   again:
6247     if (y == INT2FIX(0)) return INT2FIX(1);
6248     if (y == INT2FIX(1)) return x;
6249     if (RB_FLOAT_TYPE_P(y)) {
6250         d = RFLOAT_VALUE(y);
6251         if ((BIGNUM_NEGATIVE_P(x) && !BIGZEROP(x))) {
6252             return rb_dbl_complex_new_polar_pi(pow(-rb_big2dbl(x), d), d);
6253         }
6254     }
6255     else if (RB_BIGNUM_TYPE_P(y)) {
6256         y = bignorm(y);
6257         if (FIXNUM_P(y))
6258             goto again;
6259         rb_warn("in a**b, b may be too big");
6260         d = rb_big2dbl(y);
6261     }
6262     else if (FIXNUM_P(y)) {
6263         yy = FIX2LONG(y);
6264
6265         if (yy < 0) {
6266             x = rb_big_pow(x, LONG2NUM(-yy));
6267             if (RB_INTEGER_TYPE_P(x))
6268                 return rb_rational_raw(INT2FIX(1), x);
6269             else
6270                 return DBL2NUM(1.0 / NUM2DBL(x));
6271         }
6272         else {
6273             VALUE z = 0;
6274             SIGNED_VALUE mask;
6275             const size_t xbits = rb_absint_numwords(x, 1, NULL);
6276             const size_t BIGLEN_LIMIT = 32*1024*1024;
6277
6278             if (xbits == (size_t)-1 ||
6279                 (xbits > BIGLEN_LIMIT) ||
6280                 (xbits * yy > BIGLEN_LIMIT)) {
6281                 rb_warn("in a**b, b may be too big");
6282                 d = (double)yy;
6283             }
6284             else {
6285                 for (mask = FIXNUM_MAX + 1; mask; mask >>= 1) {
6286                     if (z) z = bigsq(z);
6287                     if (yy & mask) {
6288                         z = z ? bigtrunc(bigmul0(z, x)) : x;
6289                     }
6290                 }
6291                 return bignorm(z);
6292             }
6293         }
6294     }
6295     else {
6296         return rb_num_coerce_bin(x, y, idPow);
6297     }
6298     return DBL2NUM(pow(rb_big2dbl(x), d));
6299 }
6300
6301 static VALUE
6302 bigand_int(VALUE x, long xn, BDIGIT hibitsx, long y)
6303 {
6304     VALUE z;
6305     BDIGIT *xds, *zds;
6306     long zn;
6307     long i;
6308     BDIGIT hibitsy;
6309
6310     if (y == 0) return INT2FIX(0);
6311     if (xn == 0) return hibitsx ? LONG2NUM(y) : 0;
6312     hibitsy = 0 <= y ? 0 : BDIGMAX;
6313     xds = BDIGITS(x);
6314 #if SIZEOF_BDIGIT >= SIZEOF_LONG
6315     if (!hibitsy) {
6316         y &= xds[0];
6317         return LONG2NUM(y);
6318     }
6319 #endif
6320
6321     zn = xn;
6322 #if SIZEOF_BDIGIT < SIZEOF_LONG
6323     if (hibitsx && zn < bdigit_roomof(SIZEOF_LONG))
6324         zn = bdigit_roomof(SIZEOF_LONG);
6325 #endif
6326
6327     z = bignew(zn, 0);
6328     zds = BDIGITS(z);
6329
6330 #if SIZEOF_BDIGIT >= SIZEOF_LONG
6331     i = 1;
6332     zds[0] = xds[0] & BIGLO(y);
6333 #else
6334     for (i=0; i < xn; i++) {
6335         if (y == 0 || y == -1) break;
6336         zds[i] = xds[i] & BIGLO(y);
6337         y = BIGDN(y);
6338     }
6339     for (; i < zn; i++) {
6340         if (y == 0 || y == -1) break;
6341         zds[i] = hibitsx & BIGLO(y);
6342         y = BIGDN(y);
6343     }
6344 #endif
6345     for (;i < xn; i++) {
6346         zds[i] = xds[i] & hibitsy;
6347     }
6348     for (;i < zn; i++) {
6349         zds[i] = hibitsx & hibitsy;
6350     }
6351     twocomp2abs_bang(z, hibitsx && hibitsy);
6352     RB_GC_GUARD(x);
6353     return bignorm(z);
6354 }
6355
6356 VALUE
6357 rb_big_and(VALUE x, VALUE y)
6358 {
6359     VALUE z;
6360     BDIGIT *ds1, *ds2, *zds;
6361     long i, xn, yn, n1, n2;
6362     BDIGIT hibitsx, hibitsy;
6363     BDIGIT hibits1, hibits2;
6364     VALUE tmpv;
6365     BDIGIT tmph;
6366     long tmpn;
6367
6368     if (!RB_INTEGER_TYPE_P(y)) {
6369         return rb_num_coerce_bit(x, y, '&');
6370     }
6371
6372     hibitsx = abs2twocomp(&x, &xn);
6373     if (FIXNUM_P(y)) {
6374         return bigand_int(x, xn, hibitsx, FIX2LONG(y));
6375     }
6376     hibitsy = abs2twocomp(&y, &yn);
6377     if (xn > yn) {
6378         tmpv = x; x = y; y = tmpv;
6379         tmpn = xn; xn = yn; yn = tmpn;
6380         tmph = hibitsx; hibitsx = hibitsy; hibitsy = tmph;
6381     }
6382     n1 = xn;
6383     n2 = yn;
6384     ds1 = BDIGITS(x);
6385     ds2 = BDIGITS(y);
6386     hibits1 = hibitsx;
6387     hibits2 = hibitsy;
6388
6389     if (!hibits1)
6390         n2 = n1;
6391
6392     z = bignew(n2, 0);
6393     zds = BDIGITS(z);
6394
6395     for (i=0; i<n1; i++) {
6396         zds[i] = ds1[i] & ds2[i];
6397     }
6398     for (; i<n2; i++) {
6399         zds[i] = hibits1 & ds2[i];
6400     }
6401     twocomp2abs_bang(z, hibits1 && hibits2);
6402     RB_GC_GUARD(x);
6403     RB_GC_GUARD(y);
6404     return bignorm(z);
6405 }
6406
6407 static VALUE
6408 bigor_int(VALUE x, long xn, BDIGIT hibitsx, long y)
6409 {
6410     VALUE z;
6411     BDIGIT *xds, *zds;
6412     long zn;
6413     long i;
6414     BDIGIT hibitsy;
6415
6416     if (y == -1) return INT2FIX(-1);
6417     if (xn == 0) return hibitsx ? INT2FIX(-1) : LONG2FIX(y);
6418     hibitsy = 0 <= y ? 0 : BDIGMAX;
6419     xds = BDIGITS(x);
6420
6421     zn = BIGNUM_LEN(x);
6422 #if SIZEOF_BDIGIT < SIZEOF_LONG
6423     if (zn < bdigit_roomof(SIZEOF_LONG))
6424         zn = bdigit_roomof(SIZEOF_LONG);
6425 #endif
6426     z = bignew(zn, 0);
6427     zds = BDIGITS(z);
6428
6429 #if SIZEOF_BDIGIT >= SIZEOF_LONG
6430     i = 1;
6431     zds[0] = xds[0] | BIGLO(y);
6432     if (i < zn)
6433         goto y_is_fixed_point;
6434     goto finish;
6435 #else
6436     for (i=0; i < xn; i++) {
6437         if (y == 0 || y == -1) goto y_is_fixed_point;
6438         zds[i] = xds[i] | BIGLO(y);
6439         y = BIGDN(y);
6440     }
6441     if (hibitsx)
6442         goto fill_hibits;
6443     for (; i < zn; i++) {
6444         if (y == 0 || y == -1) goto y_is_fixed_point;
6445         zds[i] = BIGLO(y);
6446         y = BIGDN(y);
6447     }
6448   goto finish;
6449 #endif
6450
6451   y_is_fixed_point:
6452     if (hibitsy)
6453         goto fill_hibits;
6454     for (; i < xn; i++) {
6455         zds[i] = xds[i];
6456     }
6457     if (hibitsx)
6458         goto fill_hibits;
6459     for (; i < zn; i++) {
6460         zds[i] = 0;
6461     }
6462   goto finish;
6463
6464   fill_hibits:
6465     for (; i < zn; i++) {
6466         zds[i] = BDIGMAX;
6467     }
6468
6469   finish:
6470     twocomp2abs_bang(z, hibitsx || hibitsy);
6471     RB_GC_GUARD(x);
6472     return bignorm(z);
6473 }
6474
6475 VALUE
6476 rb_big_or(VALUE x, VALUE y)
6477 {
6478     VALUE z;
6479     BDIGIT *ds1, *ds2, *zds;
6480     long i, xn, yn, n1, n2;
6481     BDIGIT hibitsx, hibitsy;
6482     BDIGIT hibits1, hibits2;
6483     VALUE tmpv;
6484     BDIGIT tmph;
6485     long tmpn;
6486
6487     if (!RB_INTEGER_TYPE_P(y)) {
6488         return rb_num_coerce_bit(x, y, '|');
6489     }
6490
6491     hibitsx = abs2twocomp(&x, &xn);
6492     if (FIXNUM_P(y)) {
6493         return bigor_int(x, xn, hibitsx, FIX2LONG(y));
6494     }
6495     hibitsy = abs2twocomp(&y, &yn);
6496     if (xn > yn) {
6497         tmpv = x; x = y; y = tmpv;
6498         tmpn = xn; xn = yn; yn = tmpn;
6499         tmph = hibitsx; hibitsx = hibitsy; hibitsy = tmph;
6500     }
6501     n1 = xn;
6502     n2 = yn;
6503     ds1 = BDIGITS(x);
6504     ds2 = BDIGITS(y);
6505     hibits1 = hibitsx;
6506     hibits2 = hibitsy;
6507
6508     if (hibits1)
6509         n2 = n1;
6510
6511     z = bignew(n2, 0);
6512     zds = BDIGITS(z);
6513
6514     for (i=0; i<n1; i++) {
6515         zds[i] = ds1[i] | ds2[i];
6516     }
6517     for (; i<n2; i++) {
6518         zds[i] = hibits1 | ds2[i];
6519     }
6520     twocomp2abs_bang(z, hibits1 || hibits2);
6521     RB_GC_GUARD(x);
6522     RB_GC_GUARD(y);
6523     return bignorm(z);
6524 }
6525
6526 static VALUE
6527 bigxor_int(VALUE x, long xn, BDIGIT hibitsx, long y)
6528 {
6529     VALUE z;
6530     BDIGIT *xds, *zds;
6531     long zn;
6532     long i;
6533     BDIGIT hibitsy;
6534
6535     hibitsy = 0 <= y ? 0 : BDIGMAX;
6536     xds = BDIGITS(x);
6537     zn = BIGNUM_LEN(x);
6538 #if SIZEOF_BDIGIT < SIZEOF_LONG
6539     if (zn < bdigit_roomof(SIZEOF_LONG))
6540         zn = bdigit_roomof(SIZEOF_LONG);
6541 #endif
6542     z = bignew(zn, 0);
6543     zds = BDIGITS(z);
6544
6545 #if SIZEOF_BDIGIT >= SIZEOF_LONG
6546     i = 1;
6547     zds[0] = xds[0] ^ BIGLO(y);
6548 #else
6549     for (i = 0; i < xn; i++) {
6550         zds[i] = xds[i] ^ BIGLO(y);
6551         y = BIGDN(y);
6552     }
6553     for (; i < zn; i++) {
6554         zds[i] = hibitsx ^ BIGLO(y);
6555         y = BIGDN(y);
6556     }
6557 #endif
6558     for (; i < xn; i++) {
6559         zds[i] = xds[i] ^ hibitsy;
6560     }
6561     for (; i < zn; i++) {
6562         zds[i] = hibitsx ^ hibitsy;
6563     }
6564     twocomp2abs_bang(z, (hibitsx ^ hibitsy) != 0);
6565     RB_GC_GUARD(x);
6566     return bignorm(z);
6567 }
6568
6569 VALUE
6570 rb_big_xor(VALUE x, VALUE y)
6571 {
6572     VALUE z;
6573     BDIGIT *ds1, *ds2, *zds;
6574     long i, xn, yn, n1, n2;
6575     BDIGIT hibitsx, hibitsy;
6576     BDIGIT hibits1, hibits2;
6577     VALUE tmpv;
6578     BDIGIT tmph;
6579     long tmpn;
6580
6581     if (!RB_INTEGER_TYPE_P(y)) {
6582         return rb_num_coerce_bit(x, y, '^');
6583     }
6584
6585     hibitsx = abs2twocomp(&x, &xn);
6586     if (FIXNUM_P(y)) {
6587         return bigxor_int(x, xn, hibitsx, FIX2LONG(y));
6588     }
6589     hibitsy = abs2twocomp(&y, &yn);
6590     if (xn > yn) {
6591         tmpv = x; x = y; y = tmpv;
6592         tmpn = xn; xn = yn; yn = tmpn;
6593         tmph = hibitsx; hibitsx = hibitsy; hibitsy = tmph;
6594     }
6595     n1 = xn;
6596     n2 = yn;
6597     ds1 = BDIGITS(x);
6598     ds2 = BDIGITS(y);
6599     hibits1 = hibitsx;
6600     hibits2 = hibitsy;
6601
6602     z = bignew(n2, 0);
6603     zds = BDIGITS(z);
6604
6605     for (i=0; i<n1; i++) {
6606         zds[i] = ds1[i] ^ ds2[i];
6607     }
6608     for (; i<n2; i++) {
6609         zds[i] = hibitsx ^ ds2[i];
6610     }
6611     twocomp2abs_bang(z, (hibits1 ^ hibits2) != 0);
6612     RB_GC_GUARD(x);
6613     RB_GC_GUARD(y);
6614     return bignorm(z);
6615 }
6616
6617 VALUE
6618 rb_big_lshift(VALUE x, VALUE y)
6619 {
6620     int lshift_p;
6621     size_t shift_numdigits;
6622     int shift_numbits;
6623
6624     for (;;) {
6625         if (FIXNUM_P(y)) {
6626             long l = FIX2LONG(y);
6627             unsigned long shift;
6628             if (0 <= l) {
6629                 lshift_p = 1;
6630                 shift = l;
6631             }
6632             else {
6633                 lshift_p = 0;
6634                 shift = 1+(unsigned long)(-(l+1));
6635             }
6636             shift_numbits = (int)(shift & (BITSPERDIG-1));
6637             shift_numdigits = shift >> bit_length(BITSPERDIG-1);
6638             return bignorm(big_shift3(x, lshift_p, shift_numdigits, shift_numbits));
6639         }
6640         else if (RB_BIGNUM_TYPE_P(y)) {
6641             return bignorm(big_shift2(x, 1, y));
6642         }
6643         y = rb_to_int(y);
6644     }
6645 }
6646
6647 VALUE
6648 rb_big_rshift(VALUE x, VALUE y)
6649 {
6650     int lshift_p;
6651     size_t shift_numdigits;
6652     int shift_numbits;
6653
6654     for (;;) {
6655         if (FIXNUM_P(y)) {
6656             long l = FIX2LONG(y);
6657             unsigned long shift;
6658             if (0 <= l) {
6659                 lshift_p = 0;
6660                 shift = l;
6661             }
6662             else {
6663                 lshift_p = 1;
6664                 shift = 1+(unsigned long)(-(l+1));
6665             }
6666             shift_numbits = (int)(shift & (BITSPERDIG-1));
6667             shift_numdigits = shift >> bit_length(BITSPERDIG-1);
6668             return bignorm(big_shift3(x, lshift_p, shift_numdigits, shift_numbits));
6669         }
6670         else if (RB_BIGNUM_TYPE_P(y)) {
6671             return bignorm(big_shift2(x, 0, y));
6672         }
6673         y = rb_to_int(y);
6674     }
6675 }
6676
6677 VALUE
6678 rb_big_aref(VALUE x, VALUE y)
6679 {
6680     BDIGIT *xds;
6681     size_t shift;
6682     size_t i, s1, s2;
6683     long l;
6684     BDIGIT bit;
6685
6686     if (RB_BIGNUM_TYPE_P(y)) {
6687         if (BIGNUM_NEGATIVE_P(y))
6688             return INT2FIX(0);
6689         bigtrunc(y);
6690         if (BIGSIZE(y) > sizeof(size_t)) {
6691             return BIGNUM_SIGN(x) ? INT2FIX(0) : INT2FIX(1);
6692         }
6693 #if SIZEOF_SIZE_T <= SIZEOF_LONG
6694         shift = big2ulong(y, "long");
6695 #else
6696         shift = big2ull(y, "long long");
6697 #endif
6698     }
6699     else {
6700         l = NUM2LONG(y);
6701         if (l < 0) return INT2FIX(0);
6702         shift = (size_t)l;
6703     }
6704     s1 = shift/BITSPERDIG;
6705     s2 = shift%BITSPERDIG;
6706     bit = (BDIGIT)1 << s2;
6707
6708     if (s1 >= BIGNUM_LEN(x))
6709         return BIGNUM_SIGN(x) ? INT2FIX(0) : INT2FIX(1);
6710
6711     xds = BDIGITS(x);
6712     if (BIGNUM_POSITIVE_P(x))
6713         return (xds[s1] & bit) ? INT2FIX(1) : INT2FIX(0);
6714     if (xds[s1] & (bit-1))
6715         return (xds[s1] & bit) ? INT2FIX(0) : INT2FIX(1);
6716     for (i = 0; i < s1; i++)
6717         if (xds[i])
6718             return (xds[s1] & bit) ? INT2FIX(0) : INT2FIX(1);
6719     return (xds[s1] & bit) ? INT2FIX(1) : INT2FIX(0);
6720 }
6721
6722 VALUE
6723 rb_big_hash(VALUE x)
6724 {
6725     st_index_t hash;
6726
6727     hash = rb_memhash(BDIGITS(x), sizeof(BDIGIT)*BIGNUM_LEN(x)) ^ BIGNUM_SIGN(x);
6728     return ST2FIX(hash);
6729 }
6730
6731 /*
6732  * call-seq:
6733  *   int.coerce(numeric)  ->  array
6734  *
6735  * Returns an array with both a +numeric+ and a +int+ represented as
6736  * Integer objects or Float objects.
6737  *
6738  * This is achieved by converting +numeric+ to an Integer or a Float.
6739  *
6740  * A TypeError is raised if the +numeric+ is not an Integer or a Float
6741  * type.
6742  *
6743  *     (0x3FFFFFFFFFFFFFFF+1).coerce(42)   #=> [42, 4611686018427387904]
6744  */
6745
6746 static VALUE
6747 rb_int_coerce(VALUE x, VALUE y)
6748 {
6749     if (RB_INTEGER_TYPE_P(y)) {
6750         return rb_assoc_new(y, x);
6751     }
6752     else {
6753         x = rb_Float(x);
6754         y = rb_Float(y);
6755         return rb_assoc_new(y, x);
6756     }
6757 }
6758
6759 VALUE
6760 rb_big_abs(VALUE x)
6761 {
6762     if (BIGNUM_NEGATIVE_P(x)) {
6763         x = rb_big_clone(x);
6764         BIGNUM_SET_POSITIVE_SIGN(x);
6765     }
6766     return x;
6767 }
6768
6769 int
6770 rb_big_sign(VALUE x)
6771 {
6772     return BIGNUM_SIGN(x);
6773 }
6774
6775 size_t
6776 rb_big_size(VALUE big)
6777 {
6778     return BIGSIZE(big);
6779 }
6780
6781 VALUE
6782 rb_big_size_m(VALUE big)
6783 {
6784     return SIZET2NUM(rb_big_size(big));
6785 }
6786
6787 VALUE
6788 rb_big_bit_length(VALUE big)
6789 {
6790     int nlz_bits;
6791     size_t numbytes;
6792
6793     static const BDIGIT char_bit[1] = { CHAR_BIT };
6794     BDIGIT numbytes_bary[bdigit_roomof(sizeof(size_t))];
6795     BDIGIT nlz_bary[1];
6796     BDIGIT result_bary[bdigit_roomof(sizeof(size_t)+1)];
6797
6798     numbytes = rb_absint_size(big, &nlz_bits);
6799
6800     if (numbytes == 0)
6801         return LONG2FIX(0);
6802
6803     if (BIGNUM_NEGATIVE_P(big) && rb_absint_singlebit_p(big)) {
6804         if (nlz_bits != CHAR_BIT-1) {
6805             nlz_bits++;
6806         }
6807         else {
6808             nlz_bits = 0;
6809             numbytes--;
6810         }
6811     }
6812
6813     if (numbytes <= SIZE_MAX / CHAR_BIT) {
6814         return SIZET2NUM(numbytes * CHAR_BIT - nlz_bits);
6815     }
6816
6817     nlz_bary[0] = nlz_bits;
6818
6819     bary_unpack(BARY_ARGS(numbytes_bary), &numbytes, 1, sizeof(numbytes), 0,
6820             INTEGER_PACK_NATIVE_BYTE_ORDER);
6821     BARY_SHORT_MUL(result_bary, numbytes_bary, char_bit);
6822     BARY_SUB(result_bary, result_bary, nlz_bary);
6823
6824     return rb_integer_unpack(result_bary, numberof(result_bary), sizeof(BDIGIT), 0,
6825             INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER);
6826 }
6827
6828 VALUE
6829 rb_big_odd_p(VALUE num)
6830 {
6831     return RBOOL(BIGNUM_LEN(num) != 0 && BDIGITS(num)[0] & 1);
6832 }
6833
6834 VALUE
6835 rb_big_even_p(VALUE num)
6836 {
6837     if (BIGNUM_LEN(num) != 0 && BDIGITS(num)[0] & 1) {
6838         return Qfalse;
6839     }
6840     return Qtrue;
6841 }
6842
6843 unsigned long rb_ulong_isqrt(unsigned long);
6844 #if SIZEOF_BDIGIT*2 > SIZEOF_LONG
6845 BDIGIT rb_bdigit_dbl_isqrt(BDIGIT_DBL);
6846 # ifdef ULL_TO_DOUBLE
6847 #   define BDIGIT_DBL_TO_DOUBLE(n) ULL_TO_DOUBLE(n)
6848 # endif
6849 #else
6850 # define rb_bdigit_dbl_isqrt(x) (BDIGIT)rb_ulong_isqrt(x)
6851 #endif
6852 #ifndef BDIGIT_DBL_TO_DOUBLE
6853 # define BDIGIT_DBL_TO_DOUBLE(n) (double)(n)
6854 #endif
6855
6856 static BDIGIT *
6857 estimate_initial_sqrt(VALUE *xp, const size_t xn, const BDIGIT *nds, size_t len)
6858 {
6859     enum {dbl_per_bdig = roomof(DBL_MANT_DIG,BITSPERDIG)};
6860     const int zbits = nlz(nds[len-1]);
6861     VALUE x = *xp = bignew_1(0, xn, 1); /* division may release the GVL */
6862     BDIGIT *xds = BDIGITS(x);
6863     BDIGIT_DBL d = bary2bdigitdbl(nds+len-dbl_per_bdig, dbl_per_bdig);
6864     BDIGIT lowbits = 1;
6865     int rshift = (int)((BITSPERDIG*2-zbits+(len&BITSPERDIG&1) - DBL_MANT_DIG + 1) & ~1);
6866     double f;
6867
6868     if (rshift > 0) {
6869         lowbits = (BDIGIT)d & ~(~(BDIGIT)1U << rshift);
6870         d >>= rshift;
6871     }
6872     else if (rshift < 0) {
6873         d <<= -rshift;
6874         d |= nds[len-dbl_per_bdig-1] >> (BITSPERDIG+rshift);
6875     }
6876     f = sqrt(BDIGIT_DBL_TO_DOUBLE(d));
6877     d = (BDIGIT_DBL)ceil(f);
6878     if (BDIGIT_DBL_TO_DOUBLE(d) == f) {
6879         if (lowbits || (lowbits = !bary_zero_p(nds, len-dbl_per_bdig)))
6880             ++d;
6881     }
6882     else {
6883         lowbits = 1;
6884     }
6885     rshift /= 2;
6886     rshift += (2-(len&1))*BITSPERDIG/2;
6887     if (rshift >= 0) {
6888         if (nlz((BDIGIT)d) + rshift >= BITSPERDIG) {
6889             /* (d << rshift) does cause overflow.
6890              * example: Integer.sqrt(0xffff_ffff_ffff_ffff ** 2)
6891              */
6892             d = ~(BDIGIT_DBL)0;
6893         }
6894         else {
6895             d <<= rshift;
6896         }
6897     }
6898     BDIGITS_ZERO(xds, xn-2);
6899     bdigitdbl2bary(&xds[xn-2], 2, d);
6900
6901     if (!lowbits) return NULL; /* special case, exact result */
6902     return xds;
6903 }
6904
6905 VALUE
6906 rb_big_isqrt(VALUE n)
6907 {
6908     BDIGIT *nds = BDIGITS(n);
6909     size_t len = BIGNUM_LEN(n);
6910     size_t xn = (len+1) / 2;
6911     VALUE x;
6912     BDIGIT *xds;
6913
6914     if (len <= 2) {
6915         BDIGIT sq = rb_bdigit_dbl_isqrt(bary2bdigitdbl(nds, len));
6916 #if SIZEOF_BDIGIT > SIZEOF_LONG
6917         return ULL2NUM(sq);
6918 #else
6919         return ULONG2NUM(sq);
6920 #endif
6921     }
6922     else if ((xds = estimate_initial_sqrt(&x, xn, nds, len)) != 0) {
6923         size_t tn = xn + BIGDIVREM_EXTRA_WORDS;
6924         VALUE t = bignew_1(0, tn, 1);
6925         BDIGIT *tds = BDIGITS(t);
6926         tn = BIGNUM_LEN(t);
6927
6928         /* t = n/x */
6929         while (bary_divmod_branch(tds, tn, NULL, 0, nds, len, xds, xn),
6930                bary_cmp(tds, tn, xds, xn) < 0) {
6931             int carry;
6932             BARY_TRUNC(tds, tn);
6933             /* x = (x+t)/2 */
6934             carry = bary_add(xds, xn, xds, xn, tds, tn);
6935             bary_small_rshift(xds, xds, xn, 1, carry);
6936             tn = BIGNUM_LEN(t);
6937         }
6938     }
6939     RBASIC_SET_CLASS_RAW(x, rb_cInteger);
6940     return x;
6941 }
6942
6943 #ifdef USE_GMP
6944 static void
6945 bary_powm_gmp(BDIGIT *zds, size_t zn, const BDIGIT *xds, size_t xn, const BDIGIT *yds, size_t yn, const BDIGIT *mds, size_t mn)
6946 {
6947     mpz_t z, x, y, m;
6948     size_t count;
6949     mpz_init(x);
6950     mpz_init(y);
6951     mpz_init(m);
6952     mpz_init(z);
6953     bdigits_to_mpz(x, xds, xn);
6954     bdigits_to_mpz(y, yds, yn);
6955     bdigits_to_mpz(m, mds, mn);
6956     mpz_powm(z, x, y, m);
6957     bdigits_from_mpz(z, zds, &count);
6958     BDIGITS_ZERO(zds+count, zn-count);
6959     mpz_clear(x);
6960     mpz_clear(y);
6961     mpz_clear(m);
6962     mpz_clear(z);
6963 }
6964 #endif
6965
6966 static VALUE
6967 int_pow_tmp3(VALUE x, VALUE y, VALUE m, int nega_flg)
6968 {
6969 #ifdef USE_GMP
6970     VALUE z;
6971     size_t xn, yn, mn, zn;
6972
6973     if (FIXNUM_P(x)) {
6974        x = rb_int2big(FIX2LONG(x));
6975     }
6976     if (FIXNUM_P(y)) {
6977        y = rb_int2big(FIX2LONG(y));
6978     }
6979     assert(RB_BIGNUM_TYPE_P(m));
6980     xn = BIGNUM_LEN(x);
6981     yn = BIGNUM_LEN(y);
6982     mn = BIGNUM_LEN(m);
6983     zn = mn;
6984     z = bignew(zn, 1);
6985     bary_powm_gmp(BDIGITS(z), zn, BDIGITS(x), xn, BDIGITS(y), yn, BDIGITS(m), mn);
6986     if (nega_flg & BIGNUM_POSITIVE_P(z)) {
6987         z = rb_big_minus(z, m);
6988     }
6989     RB_GC_GUARD(x);
6990     RB_GC_GUARD(y);
6991     RB_GC_GUARD(m);
6992     return rb_big_norm(z);
6993 #else
6994     VALUE tmp = LONG2FIX(1L);
6995     long yy;
6996
6997     for (/*NOP*/; ! FIXNUM_P(y); y = rb_big_rshift(y, LONG2FIX(1L))) {
6998         if (RTEST(rb_int_odd_p(y))) {
6999             tmp = rb_int_mul(tmp, x);
7000             tmp = rb_int_modulo(tmp, m);
7001         }
7002         x = rb_int_mul(x, x);
7003         x = rb_int_modulo(x, m);
7004     }
7005     for (yy = FIX2LONG(y); yy; yy >>= 1L) {
7006         if (yy & 1L) {
7007             tmp = rb_int_mul(tmp, x);
7008             tmp = rb_int_modulo(tmp, m);
7009         }
7010         x = rb_int_mul(x, x);
7011         x = rb_int_modulo(x, m);
7012     }
7013
7014     if (nega_flg && rb_int_positive_p(tmp)) {
7015         tmp = rb_int_minus(tmp, m);
7016     }
7017     return tmp;
7018 #endif
7019 }
7020
7021 /*
7022  * Integer#pow
7023  */
7024
7025 static VALUE
7026 int_pow_tmp1(VALUE x, VALUE y, long mm, int nega_flg)
7027 {
7028     long xx = FIX2LONG(x);
7029     long tmp = 1L;
7030     long yy;
7031
7032     for (/*NOP*/; ! FIXNUM_P(y); y = rb_big_rshift(y, LONG2FIX(1L))) {
7033         if (RTEST(rb_int_odd_p(y))) {
7034             tmp = (tmp * xx) % mm;
7035         }
7036         xx = (xx * xx) % mm;
7037     }
7038     for (yy = FIX2LONG(y); yy; yy >>= 1L) {
7039         if (yy & 1L) {
7040             tmp = (tmp * xx) % mm;
7041         }
7042         xx = (xx * xx) % mm;
7043     }
7044
7045     if (nega_flg && tmp) {
7046         tmp -= mm;
7047     }
7048     return LONG2FIX(tmp);
7049 }
7050
7051 static VALUE
7052 int_pow_tmp2(VALUE x, VALUE y, long mm, int nega_flg)
7053 {
7054     long tmp = 1L;
7055     long yy;
7056 #ifdef DLONG
7057     const DLONG m = mm;
7058     long tmp2 = tmp;
7059     long xx = FIX2LONG(x);
7060 # define MUL_MODULO(a, b, c) (long)(((DLONG)(a) * (DLONG)(b)) % (c))
7061 #else
7062     const VALUE m = LONG2FIX(mm);
7063     VALUE tmp2 = LONG2FIX(tmp);
7064     VALUE xx = x;
7065 # define MUL_MODULO(a, b, c) rb_int_modulo(rb_fix_mul_fix((a), (b)), (c))
7066 #endif
7067
7068     for (/*NOP*/; ! FIXNUM_P(y); y = rb_big_rshift(y, LONG2FIX(1L))) {
7069         if (RTEST(rb_int_odd_p(y))) {
7070             tmp2 = MUL_MODULO(tmp2, xx, m);
7071         }
7072         xx = MUL_MODULO(xx, xx, m);
7073     }
7074     for (yy = FIX2LONG(y); yy; yy >>= 1L) {
7075         if (yy & 1L) {
7076             tmp2 = MUL_MODULO(tmp2, xx, m);
7077         }
7078         xx = MUL_MODULO(xx, xx, m);
7079     }
7080
7081 #ifdef DLONG
7082     tmp = tmp2;
7083 #else
7084     tmp = FIX2LONG(tmp2);
7085 #endif
7086     if (nega_flg && tmp) {
7087         tmp -= mm;
7088     }
7089     return LONG2FIX(tmp);
7090 }
7091
7092 /*
7093  * Document-method: Integer#pow
7094  * call-seq:
7095  *    integer.pow(numeric)           ->  numeric
7096  *    integer.pow(integer, integer)  ->  integer
7097  *
7098  * Returns (modular) exponentiation as:
7099  *
7100  *   a.pow(b)     #=> same as a**b
7101  *   a.pow(b, m)  #=> same as (a**b) % m, but avoids huge temporary values
7102  */
7103 VALUE
7104 rb_int_powm(int const argc, VALUE * const argv, VALUE const num)
7105 {
7106     rb_check_arity(argc, 1, 2);
7107
7108     if (argc == 1) {
7109         return rb_int_pow(num, argv[0]);
7110     }
7111     else {
7112         VALUE const a = num;
7113         VALUE const b = argv[0];
7114         VALUE m = argv[1];
7115         int nega_flg = 0;
7116         if ( ! RB_INTEGER_TYPE_P(b)) {
7117             rb_raise(rb_eTypeError, "Integer#pow() 2nd argument not allowed unless a 1st argument is integer");
7118         }
7119         if (rb_int_negative_p(b)) {
7120             rb_raise(rb_eRangeError, "Integer#pow() 1st argument cannot be negative when 2nd argument specified");
7121         }
7122         if (!RB_INTEGER_TYPE_P(m)) {
7123             rb_raise(rb_eTypeError, "Integer#pow() 2nd argument not allowed unless all arguments are integers");
7124         }
7125
7126         if (rb_int_negative_p(m)) {
7127             m = rb_int_uminus(m);
7128             nega_flg = 1;
7129         }
7130
7131         if (FIXNUM_P(m)) {
7132             long const half_val = (long)HALF_LONG_MSB;
7133             long const mm = FIX2LONG(m);
7134             if (!mm) rb_num_zerodiv();
7135             if (mm == 1) return INT2FIX(0);
7136             if (mm <= half_val) {
7137                 return int_pow_tmp1(rb_int_modulo(a, m), b, mm, nega_flg);
7138             }
7139             else {
7140                 return int_pow_tmp2(rb_int_modulo(a, m), b, mm, nega_flg);
7141             }
7142         }
7143         else {
7144             if (rb_bigzero_p(m)) rb_num_zerodiv();
7145             if (bignorm(m) == INT2FIX(1)) return INT2FIX(0);
7146             return int_pow_tmp3(rb_int_modulo(a, m), b, m, nega_flg);
7147         }
7148     }
7149     UNREACHABLE_RETURN(Qnil);
7150 }
7151
7152 /*
7153  *  Bignum objects hold integers outside the range of
7154  *  Fixnum. Bignum objects are created
7155  *  automatically when integer calculations would otherwise overflow a
7156  *  Fixnum. When a calculation involving
7157  *  Bignum objects returns a result that will fit in a
7158  *  Fixnum, the result is automatically converted.
7159  *
7160  *  For the purposes of the bitwise operations and <code>[]</code>, a
7161  *  Bignum is treated as if it were an infinite-length
7162  *  bitstring with 2's complement representation.
7163  *
7164  *  While Fixnum values are immediate, Bignum
7165  *  objects are not---assignment and parameter passing work with
7166  *  references to objects, not the objects themselves.
7167  *
7168  */
7169
7170 void
7171 Init_Bignum(void)
7172 {
7173     rb_define_method(rb_cInteger, "coerce", rb_int_coerce, 1);
7174
7175 #ifdef USE_GMP
7176     /* The version of loaded GMP. */
7177     rb_define_const(rb_cInteger, "GMP_VERSION", rb_sprintf("GMP %s", gmp_version));
7178 #endif
7179
7180     power_cache_init();
7181 }