1 // VirtualDub - Video processing and capture application
2 // System library component
3 // Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
5 // Beginning with 1.6.0, the VirtualDub system library is licensed
6 // differently than the remainder of VirtualDub. This particular file is
7 // thus licensed as follows (the "zlib" license):
9 // This software is provided 'as-is', without any express or implied
10 // warranty. In no event will the authors be held liable for any
11 // damages arising from the use of this software.
13 // Permission is granted to anyone to use this software for any purpose,
14 // including commercial applications, and to alter it and redistribute it
15 // freely, subject to the following restrictions:
17 // 1. The origin of this software must not be misrepresented; you must
18 // not claim that you wrote the original software. If you use this
19 // software in a product, an acknowledgment in the product
20 // documentation would be appreciated but is not required.
21 // 2. Altered source versions must be plainly marked as such, and must
22 // not be misrepresented as being the original software.
23 // 3. This notice may not be removed or altered from any source
29 #include <vd2/system/int128.h>
31 #if defined(VD_CPU_X86) && defined(VD_COMPILER_MSVC)
32 void __declspec(naked
) __cdecl
vdasm_uint128_add(uint64 dst
[2], const uint64 x
[2], const uint64 y
[2]) {
58 void __declspec(naked
) __cdecl
vdasm_uint128_sub(uint64 dst
[2], const uint64 x
[2], const uint64 y
[2]) {
84 void __declspec(naked
) vdint128::setSquare(sint64 v
) {
124 const vdint128
__declspec(naked
) vdint128::operator<<(int v
) const {
188 const vdint128
__declspec(naked
) vdint128::operator>>(int v
) const {
252 const vduint128
__declspec(naked
) vduint128::operator<<(int v
) const {
316 const vduint128
__declspec(naked
) vduint128::operator>>(int v
) const {
380 #elif !defined(VD_CPU_AMD64)
382 // These aren't really assembly routines, but we define them so we aren't asm dependent.
384 void vdasm_uint128_add(uint64 dst
[2], const uint64 x
[2], const uint64 y
[2]) {
385 dst
[0] = x
[0] + y
[0];
386 dst
[1] = x
[1] + y
[1] + (dst
[0] < x
[0]);
389 void vdasm_uint128_sub(uint64 dst
[2], const uint64 x
[2], const uint64 y
[2]) {
390 dst
[0] = x
[0] - y
[0];
391 dst
[1] = x
[1] - y
[1] - (dst
[0] > x
[0]);
394 void vdint128::setSquare(sint64 v
) {
397 uint32 u0
= (uint32
)v
;
398 uint32 u1
= (uint32
)(v
>> 32);
400 uint64 m1
= u0
*u1
; // added twice
402 uint32 s0
= (uint32
)m0
;
403 uint32 s1a
= (uint32
)(m0
>> 32);
404 uint32 s1b
= (uint32
)m1
;
405 uint32 s2a
= (uint32
)(m1
>> 32);
420 const vdint128
vdint128::operator<<(int v
) const {
440 r
.q
[1] = (r
.q
[1] << v
) + ((uint64
)r
.q
[0] >> (64 - v
));
447 const vdint128
vdint128::operator>>(int v
) const {
454 sint64 sign
= q
[1] >> 63;
469 r
.q
[0] = ((uint64
)r
.q
[0] >> v
) + (r
.q
[1] << (64 - v
));
476 const vduint128
vduint128::operator<<(int v
) const {
496 r
.q
[1] = (r
.q
[1] << v
) + (r
.q
[0] >> (64 - v
));
503 const vduint128
vduint128::operator>>(int v
) const {
523 r
.q
[0] = (r
.q
[0] >> v
) + (r
.q
[1] << (64 - v
));
531 const vdint128
vdint128::operator*(const vdint128
& x
) const {
532 vdint128 X
= x
.abs();
535 vduint128
bd(VDUMul64x64To128(X
.q
[0], Y
.q
[0]));
537 bd
.q
[1] += X
.q
[0]*Y
.q
[1] + X
.q
[1]*Y
.q
[0];
539 return (q
[1]^x
.q
[1])<0 ? -vdint128(bd
) : vdint128(bd
);
542 const vdint128
vdint128::operator/(int x
) const {
548 accum
= ((sint64
)(d
[3] % x
) << 32) + d
[2];
549 r
.d
[2] = (sint32
)(accum
/ x
);
551 accum
= ((accum
% x
) << 32) + d
[1];
552 r
.d
[1] = (sint32
)(accum
/ x
);
554 accum
= ((accum
% x
) << 32) + d
[0];
555 r
.d
[0] = (sint32
)(accum
/ x
);
560 vdint128::operator double() const {
561 return (double)(unsigned long)q
[0]
562 + ldexp((double)(unsigned long)((unsigned __int64
)q
[0]>>32), 32)
563 + ldexp((double)q
[1], 64);
566 /////////////////////////////////////////////////////////////////////////////
568 const vduint128
vduint128::operator*(const vduint128
& x
) const {
569 vduint128
result(VDUMul64x64To128(q
[0], x
.q
[0]));
571 result
.q
[1] += q
[0]*x
.q
[1] + q
[1]*x
.q
[0];
576 #if defined(VD_CPU_X86) && defined(VD_COMPILER_MSVC)
577 vduint128
__declspec(naked
) __cdecl
VDUMul64x64To128(uint64 x
, uint64 y
) {
582 mul dword ptr
[esp
+16] ;EDX
:EAX
= BD
587 mul dword ptr
[esp
+20] ;EDX
:EAX
= AC
592 mul dword ptr
[esp
+20] ;EDX
:EAX
= BC
595 adc dword ptr
[ecx
+12], 0
598 mul dword ptr
[esp
+16] ;EDX
:EAX
= AD
601 adc dword ptr
[ecx
+12], 0
607 #elif !defined(VD_CPU_AMD64)
608 vduint128
VDUMul64x64To128(uint64 x
, uint64 y
) {
609 uint32 x0
= (uint32
)x
;
610 uint32 x1
= (uint32
)(x
>> 32);
611 uint32 y0
= (uint32
)y
;
612 uint32 y1
= (uint32
)(y
>> 32);
614 uint64 m0
= (uint64
)x0
*y0
;
615 uint64 m1a
= (uint64
)x1
*y0
;
616 uint64 m1b
= (uint64
)x0
*y1
;
617 uint64 m2
= (uint64
)x1
*y1
;
619 uint32 s0
= (uint32
)m0
;
620 uint32 s1a
= (uint32
)(m0
>> 32);
621 uint32 s1b
= (uint32
)m1a
;
622 uint32 s1c
= (uint32
)m1b
;
623 uint32 s2a
= (uint32
)(m1a
>> 32);
624 uint32 s2b
= (uint32
)(m1b
>> 32);
625 uint32 s2c
= (uint32
)m2
;
626 uint32 s3
= (uint32
)(m2
>> 32);
631 r
.d
[2] = r
.d
[1] < s1b
;
633 r
.d
[2] += r
.d
[1] < s1c
;
635 r
.d
[3] = r
.d
[2] < s2a
;
637 r
.d
[3] += r
.d
[2] < s2b
;
639 r
.d
[3] += r
.d
[2] < s2c
;
646 uint64
VDUDiv128x64To64(const vduint128
& dividend
, uint64 divisor
, uint64
& remainder
) {
647 vduint128
temp(dividend
);
648 vduint128
divisor2(divisor
);
653 for(int i
=0; i
<64; ++i
) {
655 if (temp
>= divisor2
) {
662 remainder
= temp
.q
[1];