1 /* Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _DIV_MACROS_H_
25 #define _DIV_MACROS_H_
27 #include "bid_internal.h"
32 //#define DOUBLE_EXTENDED_ON
34 #if DOUBLE_EXTENDED_ON
38 __div_128_by_128 (UINT128
* pCQ
, UINT128
* pCR
, UINT128 CX
, UINT128 CY
) {
39 UINT128 CB
, CB2
, CB4
, CB8
, CQB
, CA
;
40 int_double d64
, dm64
, ds
;
44 UINT64 Rh
, R
, B2
, B4
, Ph
, Ql
, Ql2
, carry
, Qh
;
50 pCQ
->w
[0] = CX
.w
[0] / CY
.w
[0];
53 pCR
->w
[0] = CX
.w
[0] - pCQ
->w
[0] * CY
.w
[0];
56 // This path works for CX<2^116 only
59 d64
.i
= 0x43f0000000000000;
61 dm64
.i
= 0x3bf0000000000000;
63 ds
.i
= 0x3cb8000000000000;
64 dx
= (BINARY80
) CX
.w
[1] * d64
.d
+ (BINARY80
) CX
.w
[0];
65 dq
= dx
/ (BINARY80
) CY
.w
[0];
69 Ql
= (UINT64
) (dq
- ((double) Qh
) * d64
.d
);
71 Rh
= CX
.w
[0] - Ql
* CY
.w
[0];
73 pCR
->w
[0] = Rh
- Ql2
* CY
.w
[0];
74 __add_carry_out ((pCQ
->w
[0]), carry
, Ql
, Ql2
);
75 pCQ
->w
[1] = Qh
+ carry
;
84 lx
= (BINARY80
) CX
.w
[1] * (BINARY80
) t64
.d
+ (BINARY80
) CX
.w
[0];
85 ly
= (BINARY80
) CY
.w
[1] * (BINARY80
) t64
.d
+ (BINARY80
) CY
.w
[0];
87 pCQ
->w
[0] = (UINT64
) lq
;
92 /*if(__unsigned_compare_ge_128(CX,CY))
95 __sub_128_128((*pCR), CX, CY);
105 if (CY
.w
[1] >= 16 || pCQ
->w
[0] <= 0x1000000000000000ull
) {
106 pCQ
->w
[0] = (UINT64
) lq
- 1;
107 __mul_64x128_full (Ph
, CQB
, (pCQ
->w
[0]), CY
);
108 __sub_128_128 (CA
, CX
, CQB
);
109 if (__unsigned_compare_ge_128 (CA
, CY
)) {
110 __sub_128_128 (CA
, CA
, CY
);
112 if (__unsigned_compare_ge_128 (CA
, CY
)) {
113 __sub_128_128 (CA
, CA
, CY
);
120 pCQ
->w
[0] = (UINT64
) lq
- 6;
122 __mul_64x128_full (Ph
, CQB
, (pCQ
->w
[0]), CY
);
123 __sub_128_128 (CA
, CX
, CQB
);
125 CB8
.w
[1] = (CY
.w
[1] << 3) | (CY
.w
[0] >> 61);
126 CB8
.w
[0] = CY
.w
[0] << 3;
127 CB4
.w
[1] = (CY
.w
[1] << 2) | (CY
.w
[0] >> 62);
128 CB4
.w
[0] = CY
.w
[0] << 2;
129 CB2
.w
[1] = (CY
.w
[1] << 1) | (CY
.w
[0] >> 63);
130 CB2
.w
[0] = CY
.w
[0] << 1;
132 if (__unsigned_compare_ge_128 (CA
, CB8
)) {
134 __sub_128_128 (CA
, CA
, CB8
);
136 if (__unsigned_compare_ge_128 (CA
, CB4
)) {
138 __sub_128_128 (CA
, CA
, CB4
);
140 if (__unsigned_compare_ge_128 (CA
, CB2
)) {
142 __sub_128_128 (CA
, CA
, CB2
);
144 if (__unsigned_compare_ge_128 (CA
, CY
)) {
146 __sub_128_128 (CA
, CA
, CY
);
160 __div_256_by_128 (UINT128
* pCQ
, UINT256
* pCA4
, UINT128 CY
) {
165 BINARY80 lx
, ly
, lq
, l64
, l128
;
168 d64
.i
= 0x43f0000000000000ull
;
169 l64
= (BINARY80
) d64
.d
;
174 ((BINARY80
) (*pCA4
).w
[3] * l64
+
175 (BINARY80
) (*pCA4
).w
[2]) * l128
+
176 (BINARY80
) (*pCA4
).w
[1] * l64
+ (BINARY80
) (*pCA4
).w
[0];
177 ly
= (BINARY80
) CY
.w
[1] * l128
+ (BINARY80
) CY
.w
[0] * l64
;
180 CQ2
.w
[1] = (UINT64
) lq
;
181 lq
= (lq
- CQ2
.w
[1]) * l64
;
182 CQ2
.w
[0] = (UINT64
) lq
;
185 __mul_128x128_to_256 (CQ2Y
, CY
, CQ2
);
188 if (CQ2Y
.w
[3] < (*pCA4
).w
[3]
189 || (CQ2Y
.w
[3] == (*pCA4
).w
[3]
190 && (CQ2Y
.w
[2] < (*pCA4
).w
[2]
191 || (CQ2Y
.w
[2] == (*pCA4
).w
[2]
192 && (CQ2Y
.w
[1] < (*pCA4
).w
[1]
193 || (CQ2Y
.w
[1] == (*pCA4
).w
[1]
194 && (CQ2Y
.w
[0] <= (*pCA4
).w
[0]))))))) {
196 // (*pCA4) -CQ2Y, guaranteed below 5*2^49*CY < 5*2^(49+128)
197 __sub_borrow_out ((*pCA4
).w
[0], carry64
, (*pCA4
).w
[0], CQ2Y
.w
[0]);
198 __sub_borrow_in_out ((*pCA4
).w
[1], carry64
, (*pCA4
).w
[1], CQ2Y
.w
[1],
200 (*pCA4
).w
[2] = (*pCA4
).w
[2] - CQ2Y
.w
[2] - carry64
;
202 lx
= ((BINARY80
) (*pCA4
).w
[2] * l128
+
203 ((BINARY80
) (*pCA4
).w
[1] * l64
+
204 (BINARY80
) (*pCA4
).w
[0])) * l64
;
210 __mul_64x128_short (CQ3Y
, Q3
, CY
);
211 __sub_borrow_out ((*pCA4
).w
[0], carry64
, (*pCA4
).w
[0], CQ3Y
.w
[0]);
212 (*pCA4
).w
[1] = (*pCA4
).w
[1] - CQ3Y
.w
[1] - carry64
;
214 if ((*pCA4
).w
[1] > CY
.w
[1]
215 || ((*pCA4
).w
[1] == CY
.w
[1] && (*pCA4
).w
[0] >= CY
.w
[0])) {
217 __sub_borrow_out ((*pCA4
).w
[0], carry64
, (*pCA4
).w
[0], CY
.w
[0]);
218 (*pCA4
).w
[1] = (*pCA4
).w
[1] - CY
.w
[1] - carry64
;
219 if ((*pCA4
).w
[1] > CY
.w
[1]
220 || ((*pCA4
).w
[1] == CY
.w
[1] && (*pCA4
).w
[0] >= CY
.w
[0])) {
222 __sub_borrow_out ((*pCA4
).w
[0], carry64
, (*pCA4
).w
[0],
224 (*pCA4
).w
[1] = (*pCA4
).w
[1] - CY
.w
[1] - carry64
;
228 __add_carry_out (CQ2
.w
[0], carry64
, Q3
, CQ2
.w
[0]);
232 // CQ2Y - (*pCA4), guaranteed below 5*2^(49+128)
233 __sub_borrow_out ((*pCA4
).w
[0], carry64
, CQ2Y
.w
[0], (*pCA4
).w
[0]);
234 __sub_borrow_in_out ((*pCA4
).w
[1], carry64
, CQ2Y
.w
[1], (*pCA4
).w
[1],
236 (*pCA4
).w
[2] = CQ2Y
.w
[2] - (*pCA4
).w
[2] - carry64
;
239 ((BINARY80
) (*pCA4
).w
[2] * l128
+
240 (BINARY80
) (*pCA4
).w
[1] * l64
+ (BINARY80
) (*pCA4
).w
[0]) * l64
;
242 Q3
= 1 + (UINT64
) lq
;
244 __mul_64x128_short (CQ3Y
, Q3
, CY
);
245 __sub_borrow_out ((*pCA4
).w
[0], carry64
, CQ3Y
.w
[0], (*pCA4
).w
[0]);
246 (*pCA4
).w
[1] = CQ3Y
.w
[1] - (*pCA4
).w
[1] - carry64
;
248 if ((SINT64
) (*pCA4
).w
[1] > (SINT64
) CY
.w
[1]
249 || ((*pCA4
).w
[1] == CY
.w
[1] && (*pCA4
).w
[0] >= CY
.w
[0])) {
251 __sub_borrow_out ((*pCA4
).w
[0], carry64
, (*pCA4
).w
[0], CY
.w
[0]);
252 (*pCA4
).w
[1] = (*pCA4
).w
[1] - CY
.w
[1] - carry64
;
253 } else if ((SINT64
) (*pCA4
).w
[1] < 0) {
255 __add_carry_out ((*pCA4
).w
[0], carry64
, (*pCA4
).w
[0], CY
.w
[0]);
256 (*pCA4
).w
[1] = (*pCA4
).w
[1] + CY
.w
[1] + carry64
;
258 // subtract Q3 from Q2
259 __sub_borrow_out (CQ2
.w
[0], carry64
, CQ2
.w
[0], Q3
);
263 // (*pCQ) + CQ2 + carry
264 __add_carry_out ((*pCQ
).w
[0], carry64
, CQ2
.w
[0], (*pCQ
).w
[0]);
265 (*pCQ
).w
[1] = (*pCQ
).w
[1] + CQ2
.w
[1] + carry64
;
272 __div_128_by_128 (UINT128
* pCQ
, UINT128
* pCR
, UINT128 CX0
, UINT128 CY
) {
273 UINT128 CY36
, CY51
, CQ
, A2
, CX
, CQT
;
275 int_double t64
, d49
, d60
;
278 if (!CX0
.w
[1] && !CY
.w
[1]) {
279 pCQ
->w
[0] = CX0
.w
[0] / CY
.w
[0];
281 pCR
->w
[1] = pCR
->w
[0] = 0;
282 pCR
->w
[0] = CX0
.w
[0] - pCQ
->w
[0] * CY
.w
[0];
290 t64
.i
= 0x43f0000000000000ull
;
291 lx
= (double) CX
.w
[1] * t64
.d
+ (double) CX
.w
[0];
292 ly
= (double) CY
.w
[1] * t64
.d
+ (double) CY
.w
[0];
295 CY36
.w
[1] = CY
.w
[0] >> (64 - 36);
296 CY36
.w
[0] = CY
.w
[0] << 36;
298 CQ
.w
[1] = CQ
.w
[0] = 0;
301 if (!CY
.w
[1] && !CY36
.w
[1] && (CX
.w
[1] >= CY36
.w
[0])) {
305 d60
.i
= 0x3c30000000000000ull
;
307 Q
= (UINT64
) lq
- 4ull;
310 __mul_64x64_to_128 (A2
, Q
, CY
.w
[0]);
313 A2
.w
[1] = (A2
.w
[1] << 60) | (A2
.w
[0] >> (64 - 60));
316 __sub_128_128 (CX
, CX
, A2
);
318 lx
= (double) CX
.w
[1] * t64
.d
+ (double) CX
.w
[0];
321 CQ
.w
[1] = Q
>> (64 - 60);
326 CY51
.w
[1] = (CY
.w
[1] << 51) | (CY
.w
[0] >> (64 - 51));
327 CY51
.w
[0] = CY
.w
[0] << 51;
329 if (CY
.w
[1] < (UINT64
) (1 << (64 - 51))
330 && (__unsigned_compare_gt_128 (CX
, CY51
))) {
334 d49
.i
= 0x3ce0000000000000ull
;
337 Q
= (UINT64
) lq
- 1ull;
340 __mul_64x64_to_128 (A2
, Q
, CY
.w
[0]);
341 A2
.w
[1] += Q
* CY
.w
[1];
344 A2
.w
[1] = (A2
.w
[1] << 49) | (A2
.w
[0] >> (64 - 49));
347 __sub_128_128 (CX
, CX
, A2
);
349 CQT
.w
[1] = Q
>> (64 - 49);
351 __add_128_128 (CQ
, CQ
, CQT
);
353 lx
= (double) CX
.w
[1] * t64
.d
+ (double) CX
.w
[0];
359 __mul_64x64_to_128 (A2
, Q
, CY
.w
[0]);
360 A2
.w
[1] += Q
* CY
.w
[1];
362 __sub_128_128 (CX
, CX
, A2
);
363 if ((SINT64
) CX
.w
[1] < 0) {
366 if (CX
.w
[0] < CY
.w
[0])
369 if ((SINT64
) CX
.w
[1] < 0) {
372 if (CX
.w
[0] < CY
.w
[0])
376 } else if (__unsigned_compare_ge_128 (CX
, CY
)) {
378 __sub_128_128 (CX
, CX
, CY
);
381 __add_128_64 (CQ
, CQ
, Q
);
393 __div_256_by_128 (UINT128
* pCQ
, UINT256
* pCA4
, UINT128 CY
) {
394 UINT256 CA4
, CA2
, CY51
, CY36
;
395 UINT128 CQ
, A2
, A2h
, CQT
;
397 int_double t64
, d49
, d60
;
398 double lx
, ly
, lq
, d128
, d192
;
400 // the quotient is assumed to be at most 113 bits,
401 // as needed by BID128 divide routines
404 CA4
.w
[3] = (*pCA4
).w
[3];
405 CA4
.w
[2] = (*pCA4
).w
[2];
406 CA4
.w
[1] = (*pCA4
).w
[1];
407 CA4
.w
[0] = (*pCA4
).w
[0];
408 CQ
.w
[1] = (*pCQ
).w
[1];
409 CQ
.w
[0] = (*pCQ
).w
[0];
412 t64
.i
= 0x43f0000000000000ull
;
413 d128
= t64
.d
* t64
.d
;
415 lx
= (double) CA4
.w
[3] * d192
+ ((double) CA4
.w
[2] * d128
+
416 ((double) CA4
.w
[1] * t64
.d
+
418 ly
= (double) CY
.w
[1] * t64
.d
+ (double) CY
.w
[0];
421 CY36
.w
[2] = CY
.w
[1] >> (64 - 36);
422 CY36
.w
[1] = (CY
.w
[1] << 36) | (CY
.w
[0] >> (64 - 36));
423 CY36
.w
[0] = CY
.w
[0] << 36;
425 CQ
.w
[1] = (*pCQ
).w
[1];
426 CQ
.w
[0] = (*pCQ
).w
[0];
429 if (CA4
.w
[3] > CY36
.w
[2]
430 || (CA4
.w
[3] == CY36
.w
[2]
431 && (CA4
.w
[2] > CY36
.w
[1]
432 || (CA4
.w
[2] == CY36
.w
[1] && CA4
.w
[1] >= CY36
.w
[0])))) {
434 d60
.i
= 0x3c30000000000000ull
;
436 Q
= (UINT64
) lq
- 4ull;
439 __mul_64x128_to_192 (CA2
, Q
, CY
);
442 // CA2.w[3] = CA2.w[2] >> (64-60);
443 CA2
.w
[2] = (CA2
.w
[2] << 60) | (CA2
.w
[1] >> (64 - 60));
444 CA2
.w
[1] = (CA2
.w
[1] << 60) | (CA2
.w
[0] >> (64 - 60));
448 __sub_borrow_out (CA4
.w
[0], carry64
, CA4
.w
[0], CA2
.w
[0]);
449 __sub_borrow_in_out (CA4
.w
[1], carry64
, CA4
.w
[1], CA2
.w
[1],
451 CA4
.w
[2] = CA4
.w
[2] - CA2
.w
[2] - carry64
;
453 lx
= ((double) CA4
.w
[2] * d128
+
454 ((double) CA4
.w
[1] * t64
.d
+ (double) CA4
.w
[0]));
457 CQT
.w
[1] = Q
>> (64 - 60);
459 __add_128_128 (CQ
, CQ
, CQT
);
462 CY51
.w
[2] = CY
.w
[1] >> (64 - 51);
463 CY51
.w
[1] = (CY
.w
[1] << 51) | (CY
.w
[0] >> (64 - 51));
464 CY51
.w
[0] = CY
.w
[0] << 51;
466 if (CA4
.w
[2] > CY51
.w
[2] || ((CA4
.w
[2] == CY51
.w
[2])
468 (__unsigned_compare_gt_128 (CA4
, CY51
))))
473 d49
.i
= 0x3ce0000000000000ull
;
476 Q
= (UINT64
) lq
- 1ull;
479 __mul_64x64_to_128 (A2
, Q
, CY
.w
[0]);
480 __mul_64x64_to_128 (A2h
, Q
, CY
.w
[1]);
482 if (A2
.w
[1] < A2h
.w
[0])
486 CA2
.w
[2] = (A2h
.w
[1] << 49) | (A2
.w
[1] >> (64 - 49));
487 CA2
.w
[1] = (A2
.w
[1] << 49) | (A2
.w
[0] >> (64 - 49));
488 CA2
.w
[0] = A2
.w
[0] << 49;
490 __sub_borrow_out (CA4
.w
[0], carry64
, CA4
.w
[0], CA2
.w
[0]);
491 __sub_borrow_in_out (CA4
.w
[1], carry64
, CA4
.w
[1], CA2
.w
[1],
493 CA4
.w
[2] = CA4
.w
[2] - CA2
.w
[2] - carry64
;
495 CQT
.w
[1] = Q
>> (64 - 49);
497 __add_128_128 (CQ
, CQ
, CQT
);
499 lx
= ((double) CA4
.w
[2] * d128
+
500 ((double) CA4
.w
[1] * t64
.d
+ (double) CA4
.w
[0]));
505 __mul_64x64_to_128 (A2
, Q
, CY
.w
[0]);
506 A2
.w
[1] += Q
* CY
.w
[1];
508 __sub_128_128 (CA4
, CA4
, A2
);
509 if ((SINT64
) CA4
.w
[1] < 0) {
512 if (CA4
.w
[0] < CY
.w
[0])
515 if ((SINT64
) CA4
.w
[1] < 0) {
518 if (CA4
.w
[0] < CY
.w
[0])
522 } else if (__unsigned_compare_ge_128 (CA4
, CY
)) {
524 __sub_128_128 (CA4
, CA4
, CY
);
527 __add_128_64 (CQ
, CQ
, Q
);
531 pCA4
->w
[1] = CA4
.w
[1];
532 pCA4
->w
[0] = CA4
.w
[0];