1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29 #ifndef _DIV_MACROS_H_
30 #define _DIV_MACROS_H_
32 #include "bid_internal.h"
37 //#define DOUBLE_EXTENDED_ON
39 #if DOUBLE_EXTENDED_ON
43 __div_128_by_128 (UINT128
* pCQ
, UINT128
* pCR
, UINT128 CX
, UINT128 CY
) {
44 UINT128 CB
, CB2
, CB4
, CB8
, CQB
, CA
;
45 int_double d64
, dm64
, ds
;
49 UINT64 Rh
, R
, B2
, B4
, Ph
, Ql
, Ql2
, carry
, Qh
;
55 pCQ
->w
[0] = CX
.w
[0] / CY
.w
[0];
58 pCR
->w
[0] = CX
.w
[0] - pCQ
->w
[0] * CY
.w
[0];
61 // This path works for CX<2^116 only
64 d64
.i
= 0x43f0000000000000;
66 dm64
.i
= 0x3bf0000000000000;
68 ds
.i
= 0x3cb8000000000000;
69 dx
= (BINARY80
) CX
.w
[1] * d64
.d
+ (BINARY80
) CX
.w
[0];
70 dq
= dx
/ (BINARY80
) CY
.w
[0];
74 Ql
= (UINT64
) (dq
- ((double) Qh
) * d64
.d
);
76 Rh
= CX
.w
[0] - Ql
* CY
.w
[0];
78 pCR
->w
[0] = Rh
- Ql2
* CY
.w
[0];
79 __add_carry_out ((pCQ
->w
[0]), carry
, Ql
, Ql2
);
80 pCQ
->w
[1] = Qh
+ carry
;
89 lx
= (BINARY80
) CX
.w
[1] * (BINARY80
) t64
.d
+ (BINARY80
) CX
.w
[0];
90 ly
= (BINARY80
) CY
.w
[1] * (BINARY80
) t64
.d
+ (BINARY80
) CY
.w
[0];
92 pCQ
->w
[0] = (UINT64
) lq
;
97 /*if(__unsigned_compare_ge_128(CX,CY))
100 __sub_128_128((*pCR), CX, CY);
110 if (CY
.w
[1] >= 16 || pCQ
->w
[0] <= 0x1000000000000000ull
) {
111 pCQ
->w
[0] = (UINT64
) lq
- 1;
112 __mul_64x128_full (Ph
, CQB
, (pCQ
->w
[0]), CY
);
113 __sub_128_128 (CA
, CX
, CQB
);
114 if (__unsigned_compare_ge_128 (CA
, CY
)) {
115 __sub_128_128 (CA
, CA
, CY
);
117 if (__unsigned_compare_ge_128 (CA
, CY
)) {
118 __sub_128_128 (CA
, CA
, CY
);
125 pCQ
->w
[0] = (UINT64
) lq
- 6;
127 __mul_64x128_full (Ph
, CQB
, (pCQ
->w
[0]), CY
);
128 __sub_128_128 (CA
, CX
, CQB
);
130 CB8
.w
[1] = (CY
.w
[1] << 3) | (CY
.w
[0] >> 61);
131 CB8
.w
[0] = CY
.w
[0] << 3;
132 CB4
.w
[1] = (CY
.w
[1] << 2) | (CY
.w
[0] >> 62);
133 CB4
.w
[0] = CY
.w
[0] << 2;
134 CB2
.w
[1] = (CY
.w
[1] << 1) | (CY
.w
[0] >> 63);
135 CB2
.w
[0] = CY
.w
[0] << 1;
137 if (__unsigned_compare_ge_128 (CA
, CB8
)) {
139 __sub_128_128 (CA
, CA
, CB8
);
141 if (__unsigned_compare_ge_128 (CA
, CB4
)) {
143 __sub_128_128 (CA
, CA
, CB4
);
145 if (__unsigned_compare_ge_128 (CA
, CB2
)) {
147 __sub_128_128 (CA
, CA
, CB2
);
149 if (__unsigned_compare_ge_128 (CA
, CY
)) {
151 __sub_128_128 (CA
, CA
, CY
);
165 __div_256_by_128 (UINT128
* pCQ
, UINT256
* pCA4
, UINT128 CY
) {
170 BINARY80 lx
, ly
, lq
, l64
, l128
;
173 d64
.i
= 0x43f0000000000000ull
;
174 l64
= (BINARY80
) d64
.d
;
179 ((BINARY80
) (*pCA4
).w
[3] * l64
+
180 (BINARY80
) (*pCA4
).w
[2]) * l128
+
181 (BINARY80
) (*pCA4
).w
[1] * l64
+ (BINARY80
) (*pCA4
).w
[0];
182 ly
= (BINARY80
) CY
.w
[1] * l128
+ (BINARY80
) CY
.w
[0] * l64
;
185 CQ2
.w
[1] = (UINT64
) lq
;
186 lq
= (lq
- CQ2
.w
[1]) * l64
;
187 CQ2
.w
[0] = (UINT64
) lq
;
190 __mul_128x128_to_256 (CQ2Y
, CY
, CQ2
);
193 if (CQ2Y
.w
[3] < (*pCA4
).w
[3]
194 || (CQ2Y
.w
[3] == (*pCA4
).w
[3]
195 && (CQ2Y
.w
[2] < (*pCA4
).w
[2]
196 || (CQ2Y
.w
[2] == (*pCA4
).w
[2]
197 && (CQ2Y
.w
[1] < (*pCA4
).w
[1]
198 || (CQ2Y
.w
[1] == (*pCA4
).w
[1]
199 && (CQ2Y
.w
[0] <= (*pCA4
).w
[0]))))))) {
201 // (*pCA4) -CQ2Y, guaranteed below 5*2^49*CY < 5*2^(49+128)
202 __sub_borrow_out ((*pCA4
).w
[0], carry64
, (*pCA4
).w
[0], CQ2Y
.w
[0]);
203 __sub_borrow_in_out ((*pCA4
).w
[1], carry64
, (*pCA4
).w
[1], CQ2Y
.w
[1],
205 (*pCA4
).w
[2] = (*pCA4
).w
[2] - CQ2Y
.w
[2] - carry64
;
207 lx
= ((BINARY80
) (*pCA4
).w
[2] * l128
+
208 ((BINARY80
) (*pCA4
).w
[1] * l64
+
209 (BINARY80
) (*pCA4
).w
[0])) * l64
;
215 __mul_64x128_short (CQ3Y
, Q3
, CY
);
216 __sub_borrow_out ((*pCA4
).w
[0], carry64
, (*pCA4
).w
[0], CQ3Y
.w
[0]);
217 (*pCA4
).w
[1] = (*pCA4
).w
[1] - CQ3Y
.w
[1] - carry64
;
219 if ((*pCA4
).w
[1] > CY
.w
[1]
220 || ((*pCA4
).w
[1] == CY
.w
[1] && (*pCA4
).w
[0] >= CY
.w
[0])) {
222 __sub_borrow_out ((*pCA4
).w
[0], carry64
, (*pCA4
).w
[0], CY
.w
[0]);
223 (*pCA4
).w
[1] = (*pCA4
).w
[1] - CY
.w
[1] - carry64
;
224 if ((*pCA4
).w
[1] > CY
.w
[1]
225 || ((*pCA4
).w
[1] == CY
.w
[1] && (*pCA4
).w
[0] >= CY
.w
[0])) {
227 __sub_borrow_out ((*pCA4
).w
[0], carry64
, (*pCA4
).w
[0],
229 (*pCA4
).w
[1] = (*pCA4
).w
[1] - CY
.w
[1] - carry64
;
233 __add_carry_out (CQ2
.w
[0], carry64
, Q3
, CQ2
.w
[0]);
237 // CQ2Y - (*pCA4), guaranteed below 5*2^(49+128)
238 __sub_borrow_out ((*pCA4
).w
[0], carry64
, CQ2Y
.w
[0], (*pCA4
).w
[0]);
239 __sub_borrow_in_out ((*pCA4
).w
[1], carry64
, CQ2Y
.w
[1], (*pCA4
).w
[1],
241 (*pCA4
).w
[2] = CQ2Y
.w
[2] - (*pCA4
).w
[2] - carry64
;
244 ((BINARY80
) (*pCA4
).w
[2] * l128
+
245 (BINARY80
) (*pCA4
).w
[1] * l64
+ (BINARY80
) (*pCA4
).w
[0]) * l64
;
247 Q3
= 1 + (UINT64
) lq
;
249 __mul_64x128_short (CQ3Y
, Q3
, CY
);
250 __sub_borrow_out ((*pCA4
).w
[0], carry64
, CQ3Y
.w
[0], (*pCA4
).w
[0]);
251 (*pCA4
).w
[1] = CQ3Y
.w
[1] - (*pCA4
).w
[1] - carry64
;
253 if ((SINT64
) (*pCA4
).w
[1] > (SINT64
) CY
.w
[1]
254 || ((*pCA4
).w
[1] == CY
.w
[1] && (*pCA4
).w
[0] >= CY
.w
[0])) {
256 __sub_borrow_out ((*pCA4
).w
[0], carry64
, (*pCA4
).w
[0], CY
.w
[0]);
257 (*pCA4
).w
[1] = (*pCA4
).w
[1] - CY
.w
[1] - carry64
;
258 } else if ((SINT64
) (*pCA4
).w
[1] < 0) {
260 __add_carry_out ((*pCA4
).w
[0], carry64
, (*pCA4
).w
[0], CY
.w
[0]);
261 (*pCA4
).w
[1] = (*pCA4
).w
[1] + CY
.w
[1] + carry64
;
263 // subtract Q3 from Q2
264 __sub_borrow_out (CQ2
.w
[0], carry64
, CQ2
.w
[0], Q3
);
268 // (*pCQ) + CQ2 + carry
269 __add_carry_out ((*pCQ
).w
[0], carry64
, CQ2
.w
[0], (*pCQ
).w
[0]);
270 (*pCQ
).w
[1] = (*pCQ
).w
[1] + CQ2
.w
[1] + carry64
;
277 __div_128_by_128 (UINT128
* pCQ
, UINT128
* pCR
, UINT128 CX0
, UINT128 CY
) {
278 UINT128 CY36
, CY51
, CQ
, A2
, CX
, CQT
;
280 int_double t64
, d49
, d60
;
283 if (!CX0
.w
[1] && !CY
.w
[1]) {
284 pCQ
->w
[0] = CX0
.w
[0] / CY
.w
[0];
286 pCR
->w
[1] = pCR
->w
[0] = 0;
287 pCR
->w
[0] = CX0
.w
[0] - pCQ
->w
[0] * CY
.w
[0];
295 t64
.i
= 0x43f0000000000000ull
;
296 lx
= (double) CX
.w
[1] * t64
.d
+ (double) CX
.w
[0];
297 ly
= (double) CY
.w
[1] * t64
.d
+ (double) CY
.w
[0];
300 CY36
.w
[1] = CY
.w
[0] >> (64 - 36);
301 CY36
.w
[0] = CY
.w
[0] << 36;
303 CQ
.w
[1] = CQ
.w
[0] = 0;
306 if (!CY
.w
[1] && !CY36
.w
[1] && (CX
.w
[1] >= CY36
.w
[0])) {
310 d60
.i
= 0x3c30000000000000ull
;
312 Q
= (UINT64
) lq
- 4ull;
315 __mul_64x64_to_128 (A2
, Q
, CY
.w
[0]);
318 A2
.w
[1] = (A2
.w
[1] << 60) | (A2
.w
[0] >> (64 - 60));
321 __sub_128_128 (CX
, CX
, A2
);
323 lx
= (double) CX
.w
[1] * t64
.d
+ (double) CX
.w
[0];
326 CQ
.w
[1] = Q
>> (64 - 60);
331 CY51
.w
[1] = (CY
.w
[1] << 51) | (CY
.w
[0] >> (64 - 51));
332 CY51
.w
[0] = CY
.w
[0] << 51;
334 if (CY
.w
[1] < (UINT64
) (1 << (64 - 51))
335 && (__unsigned_compare_gt_128 (CX
, CY51
))) {
339 d49
.i
= 0x3ce0000000000000ull
;
342 Q
= (UINT64
) lq
- 1ull;
345 __mul_64x64_to_128 (A2
, Q
, CY
.w
[0]);
346 A2
.w
[1] += Q
* CY
.w
[1];
349 A2
.w
[1] = (A2
.w
[1] << 49) | (A2
.w
[0] >> (64 - 49));
352 __sub_128_128 (CX
, CX
, A2
);
354 CQT
.w
[1] = Q
>> (64 - 49);
356 __add_128_128 (CQ
, CQ
, CQT
);
358 lx
= (double) CX
.w
[1] * t64
.d
+ (double) CX
.w
[0];
364 __mul_64x64_to_128 (A2
, Q
, CY
.w
[0]);
365 A2
.w
[1] += Q
* CY
.w
[1];
367 __sub_128_128 (CX
, CX
, A2
);
368 if ((SINT64
) CX
.w
[1] < 0) {
371 if (CX
.w
[0] < CY
.w
[0])
374 if ((SINT64
) CX
.w
[1] < 0) {
377 if (CX
.w
[0] < CY
.w
[0])
381 } else if (__unsigned_compare_ge_128 (CX
, CY
)) {
383 __sub_128_128 (CX
, CX
, CY
);
386 __add_128_64 (CQ
, CQ
, Q
);
398 __div_256_by_128 (UINT128
* pCQ
, UINT256
* pCA4
, UINT128 CY
) {
399 UINT256 CA4
, CA2
, CY51
, CY36
;
400 UINT128 CQ
, A2
, A2h
, CQT
;
402 int_double t64
, d49
, d60
;
403 double lx
, ly
, lq
, d128
, d192
;
405 // the quotient is assumed to be at most 113 bits,
406 // as needed by BID128 divide routines
409 CA4
.w
[3] = (*pCA4
).w
[3];
410 CA4
.w
[2] = (*pCA4
).w
[2];
411 CA4
.w
[1] = (*pCA4
).w
[1];
412 CA4
.w
[0] = (*pCA4
).w
[0];
413 CQ
.w
[1] = (*pCQ
).w
[1];
414 CQ
.w
[0] = (*pCQ
).w
[0];
417 t64
.i
= 0x43f0000000000000ull
;
418 d128
= t64
.d
* t64
.d
;
420 lx
= (double) CA4
.w
[3] * d192
+ ((double) CA4
.w
[2] * d128
+
421 ((double) CA4
.w
[1] * t64
.d
+
423 ly
= (double) CY
.w
[1] * t64
.d
+ (double) CY
.w
[0];
426 CY36
.w
[2] = CY
.w
[1] >> (64 - 36);
427 CY36
.w
[1] = (CY
.w
[1] << 36) | (CY
.w
[0] >> (64 - 36));
428 CY36
.w
[0] = CY
.w
[0] << 36;
430 CQ
.w
[1] = (*pCQ
).w
[1];
431 CQ
.w
[0] = (*pCQ
).w
[0];
434 if (CA4
.w
[3] > CY36
.w
[2]
435 || (CA4
.w
[3] == CY36
.w
[2]
436 && (CA4
.w
[2] > CY36
.w
[1]
437 || (CA4
.w
[2] == CY36
.w
[1] && CA4
.w
[1] >= CY36
.w
[0])))) {
439 d60
.i
= 0x3c30000000000000ull
;
441 Q
= (UINT64
) lq
- 4ull;
444 __mul_64x128_to_192 (CA2
, Q
, CY
);
447 // CA2.w[3] = CA2.w[2] >> (64-60);
448 CA2
.w
[2] = (CA2
.w
[2] << 60) | (CA2
.w
[1] >> (64 - 60));
449 CA2
.w
[1] = (CA2
.w
[1] << 60) | (CA2
.w
[0] >> (64 - 60));
453 __sub_borrow_out (CA4
.w
[0], carry64
, CA4
.w
[0], CA2
.w
[0]);
454 __sub_borrow_in_out (CA4
.w
[1], carry64
, CA4
.w
[1], CA2
.w
[1],
456 CA4
.w
[2] = CA4
.w
[2] - CA2
.w
[2] - carry64
;
458 lx
= ((double) CA4
.w
[2] * d128
+
459 ((double) CA4
.w
[1] * t64
.d
+ (double) CA4
.w
[0]));
462 CQT
.w
[1] = Q
>> (64 - 60);
464 __add_128_128 (CQ
, CQ
, CQT
);
467 CY51
.w
[2] = CY
.w
[1] >> (64 - 51);
468 CY51
.w
[1] = (CY
.w
[1] << 51) | (CY
.w
[0] >> (64 - 51));
469 CY51
.w
[0] = CY
.w
[0] << 51;
471 if (CA4
.w
[2] > CY51
.w
[2] || ((CA4
.w
[2] == CY51
.w
[2])
473 (__unsigned_compare_gt_128 (CA4
, CY51
))))
478 d49
.i
= 0x3ce0000000000000ull
;
481 Q
= (UINT64
) lq
- 1ull;
484 __mul_64x64_to_128 (A2
, Q
, CY
.w
[0]);
485 __mul_64x64_to_128 (A2h
, Q
, CY
.w
[1]);
487 if (A2
.w
[1] < A2h
.w
[0])
491 CA2
.w
[2] = (A2h
.w
[1] << 49) | (A2
.w
[1] >> (64 - 49));
492 CA2
.w
[1] = (A2
.w
[1] << 49) | (A2
.w
[0] >> (64 - 49));
493 CA2
.w
[0] = A2
.w
[0] << 49;
495 __sub_borrow_out (CA4
.w
[0], carry64
, CA4
.w
[0], CA2
.w
[0]);
496 __sub_borrow_in_out (CA4
.w
[1], carry64
, CA4
.w
[1], CA2
.w
[1],
498 CA4
.w
[2] = CA4
.w
[2] - CA2
.w
[2] - carry64
;
500 CQT
.w
[1] = Q
>> (64 - 49);
502 __add_128_128 (CQ
, CQ
, CQT
);
504 lx
= ((double) CA4
.w
[2] * d128
+
505 ((double) CA4
.w
[1] * t64
.d
+ (double) CA4
.w
[0]));
510 __mul_64x64_to_128 (A2
, Q
, CY
.w
[0]);
511 A2
.w
[1] += Q
* CY
.w
[1];
513 __sub_128_128 (CA4
, CA4
, A2
);
514 if ((SINT64
) CA4
.w
[1] < 0) {
517 if (CA4
.w
[0] < CY
.w
[0])
520 if ((SINT64
) CA4
.w
[1] < 0) {
523 if (CA4
.w
[0] < CY
.w
[0])
527 } else if (__unsigned_compare_ge_128 (CA4
, CY
)) {
529 __sub_128_128 (CA4
, CA4
, CY
);
532 __add_128_64 (CQ
, CQ
, Q
);
536 pCA4
->w
[1] = CA4
.w
[1];
537 pCA4
->w
[0] = CA4
.w
[0];