1 /* cairo - a vector graphics library with display and print output
3 * Copyright © 2004 Keith Packard
5 * This library is free software; you can redistribute it and/or
6 * modify it either under the terms of the GNU Lesser General Public
7 * License version 2.1 as published by the Free Software Foundation
8 * (the "LGPL") or, at your option, under the terms of the Mozilla
9 * Public License Version 1.1 (the "MPL"). If you do not alter this
10 * notice, a recipient may use your version of this file under either
11 * the MPL or the LGPL.
13 * You should have received a copy of the LGPL along with this library
14 * in the file COPYING-LGPL-2.1; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA
16 * You should have received a copy of the MPL along with this library
17 * in the file COPYING-MPL-1.1
19 * The contents of this file are subject to the Mozilla Public License
20 * Version 1.1 (the "License"); you may not use this file except in
21 * compliance with the License. You may obtain a copy of the License at
22 * http://www.mozilla.org/MPL/
24 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY
25 * OF ANY KIND, either express or implied. See the LGPL or the MPL for
26 * the specific language governing rights and limitations.
28 * The Original Code is the cairo graphics library.
30 * The Initial Developer of the Original Code is Keith Packard
33 * Keith R. Packard <keithp@keithp.com>
40 #define uint64_lo32(i) ((i) & 0xffffffff)
41 #define uint64_hi32(i) ((i) >> 32)
42 #define uint64_lo(i) ((i) & 0xffffffff)
43 #define uint64_hi(i) ((i) >> 32)
44 #define uint64_shift32(i) ((i) << 32)
45 #define uint64_carry32 (((uint64_t) 1) << 32)
47 #define _cairo_uint32s_to_uint64(h,l) ((uint64_t) (h) << 32 | (l))
51 #define uint64_lo32(i) ((i).lo)
52 #define uint64_hi32(i) ((i).hi)
55 uint64_lo (cairo_uint64_t i
)
65 uint64_hi (cairo_uint64_t i
)
75 uint64_shift32 (cairo_uint64_t i
)
84 static const cairo_uint64_t uint64_carry32
= { 0, 1 };
87 _cairo_double_to_uint64 (double i
)
91 q
.hi
= i
* (1. / 4294967296.);
92 q
.lo
= i
- q
.hi
* 4294967296.;
97 _cairo_uint64_to_double (cairo_uint64_t i
)
99 return i
.hi
* 4294967296. + i
.lo
;
103 _cairo_double_to_int64 (double i
)
107 q
.hi
= i
* (1. / INT32_MAX
);
108 q
.lo
= i
- q
.hi
* (double)INT32_MAX
;
113 _cairo_int64_to_double (cairo_int64_t i
)
115 return i
.hi
* INT32_MAX
+ i
.lo
;
119 _cairo_uint32_to_uint64 (uint32_t i
)
129 _cairo_int32_to_int64 (int32_t i
)
134 q
.hi
= i
< 0 ? -1 : 0;
138 static cairo_uint64_t
139 _cairo_uint32s_to_uint64 (uint32_t h
, uint32_t l
)
149 _cairo_uint64_add (cairo_uint64_t a
, cairo_uint64_t b
)
161 _cairo_uint64_sub (cairo_uint64_t a
, cairo_uint64_t b
)
172 #define uint32_lo(i) ((i) & 0xffff)
173 #define uint32_hi(i) ((i) >> 16)
174 #define uint32_carry16 ((1) << 16)
177 _cairo_uint32x32_64_mul (uint32_t a
, uint32_t b
)
181 uint16_t ah
, al
, bh
, bl
;
182 uint32_t r0
, r1
, r2
, r3
;
189 r0
= (uint32_t) al
* bl
;
190 r1
= (uint32_t) al
* bh
;
191 r2
= (uint32_t) ah
* bl
;
192 r3
= (uint32_t) ah
* bh
;
194 r1
+= uint32_hi(r0
); /* no carry possible */
195 r1
+= r2
; /* but this can carry */
196 if (r1
< r2
) /* check */
197 r3
+= uint32_carry16
;
199 s
.hi
= r3
+ uint32_hi(r1
);
200 s
.lo
= (uint32_lo (r1
) << 16) + uint32_lo (r0
);
205 _cairo_int32x32_64_mul (int32_t a
, int32_t b
)
208 s
= _cairo_uint32x32_64_mul ((uint32_t) a
, (uint32_t) b
);
217 _cairo_uint64_mul (cairo_uint64_t a
, cairo_uint64_t b
)
221 s
= _cairo_uint32x32_64_mul (a
.lo
, b
.lo
);
222 s
.hi
+= a
.lo
* b
.hi
+ a
.hi
* b
.lo
;
227 _cairo_uint64_lsl (cairo_uint64_t a
, int shift
)
237 a
.hi
= a
.hi
<< shift
| a
.lo
>> (32 - shift
);
238 a
.lo
= a
.lo
<< shift
;
244 _cairo_uint64_rsl (cairo_uint64_t a
, int shift
)
254 a
.lo
= a
.lo
>> shift
| a
.hi
<< (32 - shift
);
255 a
.hi
= a
.hi
>> shift
;
260 #define _cairo_uint32_rsa(a,n) ((uint32_t) (((int32_t) (a)) >> (n)))
263 _cairo_uint64_rsa (cairo_int64_t a
, int shift
)
268 a
.hi
= _cairo_uint32_rsa (a
.hi
, 31);
273 a
.lo
= a
.lo
>> shift
| a
.hi
<< (32 - shift
);
274 a
.hi
= _cairo_uint32_rsa (a
.hi
, shift
);
280 _cairo_uint64_lt (cairo_uint64_t a
, cairo_uint64_t b
)
282 return (a
.hi
< b
.hi
||
283 (a
.hi
== b
.hi
&& a
.lo
< b
.lo
));
287 _cairo_uint64_eq (cairo_uint64_t a
, cairo_uint64_t b
)
289 return a
.hi
== b
.hi
&& a
.lo
== b
.lo
;
293 _cairo_int64_lt (cairo_int64_t a
, cairo_int64_t b
)
295 if (_cairo_int64_negative (a
) && !_cairo_int64_negative (b
))
297 if (!_cairo_int64_negative (a
) && _cairo_int64_negative (b
))
299 return _cairo_uint64_lt (a
, b
);
303 _cairo_uint64_cmp (cairo_uint64_t a
, cairo_uint64_t b
)
307 else if (a
.hi
> b
.hi
)
309 else if (a
.lo
< b
.lo
)
311 else if (a
.lo
> b
.lo
)
318 _cairo_int64_cmp (cairo_int64_t a
, cairo_int64_t b
)
320 if (_cairo_int64_negative (a
) && !_cairo_int64_negative (b
))
322 if (!_cairo_int64_negative (a
) && _cairo_int64_negative (b
))
325 return _cairo_uint64_cmp (a
, b
);
329 _cairo_uint64_not (cairo_uint64_t a
)
337 _cairo_uint64_negate (cairo_uint64_t a
)
347 * Simple bit-at-a-time divide.
350 _cairo_uint64_divrem (cairo_uint64_t num
, cairo_uint64_t den
)
352 cairo_uquorem64_t qr
;
356 bit
= _cairo_uint32_to_uint64 (1);
358 /* normalize to make den >= num, but not overflow */
359 while (_cairo_uint64_lt (den
, num
) && (den
.hi
& 0x80000000) == 0)
361 bit
= _cairo_uint64_lsl (bit
, 1);
362 den
= _cairo_uint64_lsl (den
, 1);
364 quo
= _cairo_uint32_to_uint64 (0);
366 /* generate quotient, one bit at a time */
367 while (bit
.hi
| bit
.lo
)
369 if (_cairo_uint64_le (den
, num
))
371 num
= _cairo_uint64_sub (num
, den
);
372 quo
= _cairo_uint64_add (quo
, bit
);
374 bit
= _cairo_uint64_rsl (bit
, 1);
375 den
= _cairo_uint64_rsl (den
, 1);
382 #endif /* !HAVE_UINT64_T */
386 _cairo_uint128_divrem (cairo_uint128_t num
, cairo_uint128_t den
)
388 cairo_uquorem128_t qr
;
398 _cairo_uint32_to_uint128 (uint32_t i
)
402 q
.lo
= _cairo_uint32_to_uint64 (i
);
403 q
.hi
= _cairo_uint32_to_uint64 (0);
408 _cairo_int32_to_int128 (int32_t i
)
412 q
.lo
= _cairo_int32_to_int64 (i
);
413 q
.hi
= _cairo_int32_to_int64 (i
< 0 ? -1 : 0);
418 _cairo_uint64_to_uint128 (cairo_uint64_t i
)
423 q
.hi
= _cairo_uint32_to_uint64 (0);
428 _cairo_int64_to_int128 (cairo_int64_t i
)
433 q
.hi
= _cairo_int32_to_int64 (_cairo_int64_negative(i
) ? -1 : 0);
438 _cairo_uint128_add (cairo_uint128_t a
, cairo_uint128_t b
)
442 s
.hi
= _cairo_uint64_add (a
.hi
, b
.hi
);
443 s
.lo
= _cairo_uint64_add (a
.lo
, b
.lo
);
444 if (_cairo_uint64_lt (s
.lo
, a
.lo
))
445 s
.hi
= _cairo_uint64_add (s
.hi
, _cairo_uint32_to_uint64 (1));
450 _cairo_uint128_sub (cairo_uint128_t a
, cairo_uint128_t b
)
454 s
.hi
= _cairo_uint64_sub (a
.hi
, b
.hi
);
455 s
.lo
= _cairo_uint64_sub (a
.lo
, b
.lo
);
456 if (_cairo_uint64_gt (s
.lo
, a
.lo
))
457 s
.hi
= _cairo_uint64_sub (s
.hi
, _cairo_uint32_to_uint64(1));
462 _cairo_uint64x64_128_mul (cairo_uint64_t a
, cairo_uint64_t b
)
465 uint32_t ah
, al
, bh
, bl
;
466 cairo_uint64_t r0
, r1
, r2
, r3
;
468 al
= uint64_lo32 (a
);
469 ah
= uint64_hi32 (a
);
470 bl
= uint64_lo32 (b
);
471 bh
= uint64_hi32 (b
);
473 r0
= _cairo_uint32x32_64_mul (al
, bl
);
474 r1
= _cairo_uint32x32_64_mul (al
, bh
);
475 r2
= _cairo_uint32x32_64_mul (ah
, bl
);
476 r3
= _cairo_uint32x32_64_mul (ah
, bh
);
478 r1
= _cairo_uint64_add (r1
, uint64_hi (r0
)); /* no carry possible */
479 r1
= _cairo_uint64_add (r1
, r2
); /* but this can carry */
480 if (_cairo_uint64_lt (r1
, r2
)) /* check */
481 r3
= _cairo_uint64_add (r3
, uint64_carry32
);
483 s
.hi
= _cairo_uint64_add (r3
, uint64_hi(r1
));
484 s
.lo
= _cairo_uint64_add (uint64_shift32 (r1
),
490 _cairo_int64x64_128_mul (cairo_int64_t a
, cairo_int64_t b
)
493 s
= _cairo_uint64x64_128_mul (_cairo_int64_to_uint64(a
),
494 _cairo_int64_to_uint64(b
));
495 if (_cairo_int64_negative (a
))
496 s
.hi
= _cairo_uint64_sub (s
.hi
,
497 _cairo_int64_to_uint64 (b
));
498 if (_cairo_int64_negative (b
))
499 s
.hi
= _cairo_uint64_sub (s
.hi
,
500 _cairo_int64_to_uint64 (a
));
505 _cairo_uint128_mul (cairo_uint128_t a
, cairo_uint128_t b
)
509 s
= _cairo_uint64x64_128_mul (a
.lo
, b
.lo
);
510 s
.hi
= _cairo_uint64_add (s
.hi
,
511 _cairo_uint64_mul (a
.lo
, b
.hi
));
512 s
.hi
= _cairo_uint64_add (s
.hi
,
513 _cairo_uint64_mul (a
.hi
, b
.lo
));
518 _cairo_uint128_lsl (cairo_uint128_t a
, int shift
)
523 a
.lo
= _cairo_uint32_to_uint64 (0);
528 a
.hi
= _cairo_uint64_add (_cairo_uint64_lsl (a
.hi
, shift
),
529 _cairo_uint64_rsl (a
.lo
, (64 - shift
)));
530 a
.lo
= _cairo_uint64_lsl (a
.lo
, shift
);
536 _cairo_uint128_rsl (cairo_uint128_t a
, int shift
)
541 a
.hi
= _cairo_uint32_to_uint64 (0);
546 a
.lo
= _cairo_uint64_add (_cairo_uint64_rsl (a
.lo
, shift
),
547 _cairo_uint64_lsl (a
.hi
, (64 - shift
)));
548 a
.hi
= _cairo_uint64_rsl (a
.hi
, shift
);
554 _cairo_uint128_rsa (cairo_int128_t a
, int shift
)
559 a
.hi
= _cairo_uint64_rsa (a
.hi
, 64-1);
564 a
.lo
= _cairo_uint64_add (_cairo_uint64_rsl (a
.lo
, shift
),
565 _cairo_uint64_lsl (a
.hi
, (64 - shift
)));
566 a
.hi
= _cairo_uint64_rsa (a
.hi
, shift
);
572 _cairo_uint128_lt (cairo_uint128_t a
, cairo_uint128_t b
)
574 return (_cairo_uint64_lt (a
.hi
, b
.hi
) ||
575 (_cairo_uint64_eq (a
.hi
, b
.hi
) &&
576 _cairo_uint64_lt (a
.lo
, b
.lo
)));
580 _cairo_int128_lt (cairo_int128_t a
, cairo_int128_t b
)
582 if (_cairo_int128_negative (a
) && !_cairo_int128_negative (b
))
584 if (!_cairo_int128_negative (a
) && _cairo_int128_negative (b
))
586 return _cairo_uint128_lt (a
, b
);
590 _cairo_uint128_cmp (cairo_uint128_t a
, cairo_uint128_t b
)
594 cmp
= _cairo_uint64_cmp (a
.hi
, b
.hi
);
597 return _cairo_uint64_cmp (a
.lo
, b
.lo
);
601 _cairo_int128_cmp (cairo_int128_t a
, cairo_int128_t b
)
603 if (_cairo_int128_negative (a
) && !_cairo_int128_negative (b
))
605 if (!_cairo_int128_negative (a
) && _cairo_int128_negative (b
))
608 return _cairo_uint128_cmp (a
, b
);
612 _cairo_uint128_eq (cairo_uint128_t a
, cairo_uint128_t b
)
614 return (_cairo_uint64_eq (a
.hi
, b
.hi
) &&
615 _cairo_uint64_eq (a
.lo
, b
.lo
));
619 #define _cairo_msbset64(q) (q & ((uint64_t) 1 << 63))
621 #define _cairo_msbset64(q) (q.hi & ((uint32_t) 1 << 31))
625 _cairo_uint128_divrem (cairo_uint128_t num
, cairo_uint128_t den
)
627 cairo_uquorem128_t qr
;
631 bit
= _cairo_uint32_to_uint128 (1);
633 /* normalize to make den >= num, but not overflow */
634 while (_cairo_uint128_lt (den
, num
) && !_cairo_msbset64(den
.hi
))
636 bit
= _cairo_uint128_lsl (bit
, 1);
637 den
= _cairo_uint128_lsl (den
, 1);
639 quo
= _cairo_uint32_to_uint128 (0);
641 /* generate quotient, one bit at a time */
642 while (_cairo_uint128_ne (bit
, _cairo_uint32_to_uint128(0)))
644 if (_cairo_uint128_le (den
, num
))
646 num
= _cairo_uint128_sub (num
, den
);
647 quo
= _cairo_uint128_add (quo
, bit
);
649 bit
= _cairo_uint128_rsl (bit
, 1);
650 den
= _cairo_uint128_rsl (den
, 1);
658 _cairo_uint128_negate (cairo_uint128_t a
)
660 a
.lo
= _cairo_uint64_not (a
.lo
);
661 a
.hi
= _cairo_uint64_not (a
.hi
);
662 return _cairo_uint128_add (a
, _cairo_uint32_to_uint128 (1));
666 _cairo_uint128_not (cairo_uint128_t a
)
668 a
.lo
= _cairo_uint64_not (a
.lo
);
669 a
.hi
= _cairo_uint64_not (a
.hi
);
673 #endif /* !HAVE_UINT128_T */
676 _cairo_int128_divrem (cairo_int128_t num
, cairo_int128_t den
)
678 int num_neg
= _cairo_int128_negative (num
);
679 int den_neg
= _cairo_int128_negative (den
);
680 cairo_uquorem128_t uqr
;
681 cairo_quorem128_t qr
;
684 num
= _cairo_int128_negate (num
);
686 den
= _cairo_int128_negate (den
);
687 uqr
= _cairo_uint128_divrem (num
, den
);
689 qr
.rem
= _cairo_int128_negate (uqr
.rem
);
692 if (num_neg
!= den_neg
)
693 qr
.quo
= _cairo_int128_negate (uqr
.quo
);
700 * _cairo_uint_96by64_32x64_divrem:
702 * Compute a 32 bit quotient and 64 bit remainder of a 96 bit unsigned
703 * dividend and 64 bit divisor. If the quotient doesn't fit into 32
704 * bits then the returned remainder is equal to the divisor, and the
705 * quotient is the largest representable 64 bit integer. It is an
706 * error to call this function with the high 32 bits of @num being
710 _cairo_uint_96by64_32x64_divrem (cairo_uint128_t num
,
713 cairo_uquorem64_t result
;
714 cairo_uint64_t B
= _cairo_uint32s_to_uint64 (1, 0);
716 /* These are the high 64 bits of the *96* bit numerator. We're
717 * going to represent the numerator as xB + y, where x is a 64,
718 * and y is a 32 bit number. */
719 cairo_uint64_t x
= _cairo_uint128_to_uint64 (_cairo_uint128_rsl(num
, 32));
721 /* Initialise the result to indicate overflow. */
722 result
.quo
= _cairo_uint32s_to_uint64 (-1U, -1U);
725 /* Don't bother if the quotient is going to overflow. */
726 if (_cairo_uint64_ge (x
, den
)) {
727 return /* overflow */ result
;
730 if (_cairo_uint64_lt (x
, B
)) {
731 /* When the final quotient is known to fit in 32 bits, then
732 * num < 2^64 if and only if den < 2^32. */
733 return _cairo_uint64_divrem (_cairo_uint128_to_uint64 (num
), den
);
736 /* Denominator is >= 2^32. the numerator is >= 2^64, and the
737 * division won't overflow: need two divrems. Write the
738 * numerator and denominator as
740 * num = xB + y x : 64 bits, y : 32 bits
741 * den = uB + v u, v : 32 bits
743 uint32_t y
= _cairo_uint128_to_uint32 (num
);
744 uint32_t u
= uint64_hi32 (den
);
745 uint32_t v
= _cairo_uint64_to_uint32 (den
);
747 /* Compute a lower bound approximate quotient of num/den
748 * from x/(u+1). Then we have
750 * x = q(u+1) + r ; q : 32 bits, r <= u : 32 bits.
752 * xB + y = q(u+1)B + (rB+y)
753 * = q(uB + B + v - v) + (rB+y)
754 * = q(uB + v) + qB - qv + (rB+y)
755 * = q(uB + v) + q(B-v) + (rB+y)
757 * The true quotient of num/den then is q plus the
758 * contribution of q(B-v) + (rB+y). The main contribution
759 * comes from the term q(B-v), with the term (rB+y) only
760 * contributing at most one part.
762 * The term q(B-v) must fit into 64 bits, since q fits into 32
763 * bits on account of being a lower bound to the true
764 * quotient, and as B-v <= 2^32, we may safely use a single
765 * 64/64 bit division to find its contribution. */
767 cairo_uquorem64_t quorem
;
768 cairo_uint64_t remainder
; /* will contain final remainder */
769 uint32_t quotient
; /* will contain final quotient. */
773 /* Approximate quotient by dividing the high 64 bits of num by
774 * u+1. Watch out for overflow of u+1. */
776 quorem
= _cairo_uint64_divrem (x
, _cairo_uint32_to_uint64 (u
+1));
777 q
= _cairo_uint64_to_uint32 (quorem
.quo
);
778 r
= _cairo_uint64_to_uint32 (quorem
.rem
);
782 r
= _cairo_uint64_to_uint32 (x
);
786 /* Add the main term's contribution to quotient. Note B-v =
787 * -v as an uint32 (unless v = 0) */
789 quorem
= _cairo_uint64_divrem (_cairo_uint32x32_64_mul (q
, -v
), den
);
791 quorem
= _cairo_uint64_divrem (_cairo_uint32s_to_uint64 (q
, 0), den
);
792 quotient
+= _cairo_uint64_to_uint32 (quorem
.quo
);
794 /* Add the contribution of the subterm and start computing the
796 remainder
= _cairo_uint32s_to_uint64 (r
, y
);
797 if (_cairo_uint64_ge (remainder
, den
)) {
798 remainder
= _cairo_uint64_sub (remainder
, den
);
802 /* Add the contribution of the main term's remainder. The
803 * funky test here checks that remainder + main_rem >= den,
804 * taking into account overflow of the addition. */
805 remainder
= _cairo_uint64_add (remainder
, quorem
.rem
);
806 if (_cairo_uint64_ge (remainder
, den
) ||
807 _cairo_uint64_lt (remainder
, quorem
.rem
))
809 remainder
= _cairo_uint64_sub (remainder
, den
);
813 result
.quo
= _cairo_uint32_to_uint64 (quotient
);
814 result
.rem
= remainder
;
820 _cairo_int_96by64_32x64_divrem (cairo_int128_t num
, cairo_int64_t den
)
822 int num_neg
= _cairo_int128_negative (num
);
823 int den_neg
= _cairo_int64_negative (den
);
824 cairo_uint64_t nonneg_den
;
825 cairo_uquorem64_t uqr
;
829 num
= _cairo_int128_negate (num
);
831 nonneg_den
= _cairo_int64_negate (den
);
835 uqr
= _cairo_uint_96by64_32x64_divrem (num
, nonneg_den
);
836 if (_cairo_uint64_eq (uqr
.rem
, nonneg_den
)) {
837 /* bail on overflow. */
838 qr
.quo
= _cairo_uint32s_to_uint64 (0x7FFFFFFF, -1U);
844 qr
.rem
= _cairo_int64_negate (uqr
.rem
);
847 if (num_neg
!= den_neg
)
848 qr
.quo
= _cairo_int64_negate (uqr
.quo
);