Visual Studio 2012 Support
[xy_vsfilter.git] / src / thirdparty / VirtualDub / system / source / int128.cpp
blobadbe7d69764fa43ee53b68182ca76b3620c078f4
1 // VirtualDub - Video processing and capture application
2 // System library component
3 // Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
4 //
5 // Beginning with 1.6.0, the VirtualDub system library is licensed
6 // differently than the remainder of VirtualDub. This particular file is
7 // thus licensed as follows (the "zlib" license):
8 //
9 // This software is provided 'as-is', without any express or implied
10 // warranty. In no event will the authors be held liable for any
11 // damages arising from the use of this software.
13 // Permission is granted to anyone to use this software for any purpose,
14 // including commercial applications, and to alter it and redistribute it
15 // freely, subject to the following restrictions:
17 // 1. The origin of this software must not be misrepresented; you must
18 // not claim that you wrote the original software. If you use this
19 // software in a product, an acknowledgment in the product
20 // documentation would be appreciated but is not required.
21 // 2. Altered source versions must be plainly marked as such, and must
22 // not be misrepresented as being the original software.
23 // 3. This notice may not be removed or altered from any source
24 // distribution.
26 #include "stdafx.h"
27 #include <math.h>
29 #include <vd2/system/int128.h>
31 #if defined(VD_CPU_X86) && defined(VD_COMPILER_MSVC)
32 void __declspec(naked) __cdecl vdasm_uint128_add(uint64 dst[2], const uint64 x[2], const uint64 y[2]) {
33 __asm {
34 push ebx
36 mov ebx, [esp+16]
37 mov ecx, [esp+12]
38 mov edx, [esp+8]
40 mov eax, [ecx+0]
41 add eax, [ebx+0]
42 mov [edx+0],eax
43 mov eax, [ecx+4]
44 adc eax, [ebx+4]
45 mov [edx+4],eax
46 mov eax, [ecx+8]
47 adc eax, [ebx+8]
48 mov [edx+8],eax
49 mov eax, [ecx+12]
50 adc eax, [ebx+12]
51 mov [edx+12],eax
53 pop ebx
54 ret
58 void __declspec(naked) __cdecl vdasm_uint128_sub(uint64 dst[2], const uint64 x[2], const uint64 y[2]) {
59 __asm {
60 push ebx
62 mov ebx, [esp+16]
63 mov ecx, [esp+12]
64 mov edx, [esp+8]
66 mov eax, [ecx+0]
67 sub eax, [ebx+0]
68 mov [edx+0],eax
69 mov eax, [ecx+4]
70 sbb eax, [ebx+4]
71 mov [edx+4],eax
72 mov eax, [ecx+8]
73 sbb eax, [ebx+8]
74 mov [edx+8],eax
75 mov eax, [ecx+12]
76 sbb eax, [ebx+12]
77 mov [edx+12],eax
79 pop ebx
80 ret
84 void __declspec(naked) vdint128::setSquare(sint64 v) {
85 __asm {
86 push edi
87 push esi
88 push ebx
89 mov eax, [esp+20]
90 cdq
91 mov esi, eax
92 mov eax, [esp+16]
93 xor eax, edx
94 xor esi, edx
95 sub eax, edx
96 sbb esi, edx
97 mov ebx, eax
98 mul eax
99 mov [ecx], eax
100 mov edi, edx
101 mov eax, ebx
102 mul esi
103 mov ebx, 0
104 add eax, eax
105 adc edx, edx
106 add eax, edi
107 adc edx, 0
108 mov edi, edx
109 adc ebx, 0
110 mov [ecx+4], eax
111 mov eax, esi
112 mul esi
113 add eax, edi
114 adc edx, ebx
115 mov [ecx+8], eax
116 mov [ecx+12], edx
117 pop ebx
118 pop esi
119 pop edi
120 ret 8
124 const vdint128 __declspec(naked) vdint128::operator<<(int v) const {
125 __asm {
126 push ebp
127 push ebx
128 push esi
129 push edi
131 mov esi,ecx
132 mov edx,[esp+20]
134 mov ecx,[esp+24]
135 cmp ecx,128
136 jae zeroit
138 mov eax,[esi+12]
139 mov ebx,[esi+8]
140 mov edi,[esi+4]
141 mov ebp,[esi]
143 dwordloop:
144 cmp ecx,32
145 jb bits
147 mov eax,ebx
148 mov ebx,edi
149 mov edi,ebp
150 xor ebp,ebp
151 sub ecx,32
152 jmp short dwordloop
154 bits:
155 shld eax,ebx,cl
156 shld ebx,edi,cl
157 mov [edx+12],eax
158 mov [edx+8],ebx
159 shld edi,ebp,cl
161 shl ebp,cl
162 mov [edx+4],edi
163 mov [edx],ebp
165 pop edi
166 pop esi
167 pop ebx
168 pop ebp
169 mov eax,[esp+4]
170 ret 8
172 zeroit:
173 xor eax,eax
174 mov [edx+0],eax
175 mov [edx+4],eax
176 mov [edx+8],eax
177 mov [edx+12],eax
179 pop edi
180 pop esi
181 pop ebx
182 pop ebp
183 mov eax,[esp+4]
184 ret 8
188 const vdint128 __declspec(naked) vdint128::operator>>(int v) const {
189 __asm {
190 push ebp
191 push ebx
192 push esi
193 push edi
195 mov esi,ecx
196 mov edx,[esp+20]
198 mov eax,[esi+12]
199 mov ecx,[esp+24]
200 cmp ecx,127
201 jae clearit
203 mov ebx,[esi+8]
204 mov edi,[esi+4]
205 mov ebp,[esi]
207 dwordloop:
208 cmp ecx,32
209 jb bits
211 mov ebp,edi
212 mov edi,ebx
213 mov ebx,eax
214 sar eax,31
215 sub ecx,32
216 jmp short dwordloop
218 bits:
219 shrd ebp,edi,cl
220 shrd edi,ebx,cl
221 mov [edx],ebp
222 mov [edx+4],edi
223 shrd ebx,eax,cl
225 sar eax,cl
226 mov [edx+8],ebx
227 mov [edx+12],eax
229 pop edi
230 pop esi
231 pop ebx
232 pop ebp
233 mov eax,[esp+4]
234 ret 8
236 clearit:
237 sar eax, 31
238 mov [edx+0],eax
239 mov [edx+4],eax
240 mov [edx+8],eax
241 mov [edx+12],eax
243 pop edi
244 pop esi
245 pop ebx
246 pop ebp
247 mov eax,[esp+4]
248 ret 8
252 const vduint128 __declspec(naked) vduint128::operator<<(int v) const {
253 __asm {
254 push ebp
255 push ebx
256 push esi
257 push edi
259 mov esi,ecx
260 mov edx,[esp+20]
262 mov ecx,[esp+24]
263 cmp ecx,128
264 jae zeroit
266 mov eax,[esi+12]
267 mov ebx,[esi+8]
268 mov edi,[esi+4]
269 mov ebp,[esi]
271 dwordloop:
272 cmp ecx,32
273 jb bits
275 mov eax,ebx
276 mov ebx,edi
277 mov edi,ebp
278 xor ebp,ebp
279 sub ecx,32
280 jmp short dwordloop
282 bits:
283 shld eax,ebx,cl
284 shld ebx,edi,cl
285 mov [edx+12],eax
286 mov [edx+8],ebx
287 shld edi,ebp,cl
289 shl ebp,cl
290 mov [edx+4],edi
291 mov [edx],ebp
293 pop edi
294 pop esi
295 pop ebx
296 pop ebp
297 mov eax,[esp+4]
298 ret 8
300 zeroit:
301 xor eax,eax
302 mov [edx+0],eax
303 mov [edx+4],eax
304 mov [edx+8],eax
305 mov [edx+12],eax
307 pop edi
308 pop esi
309 pop ebx
310 pop ebp
311 mov eax,[esp+4]
312 ret 8
316 const vduint128 __declspec(naked) vduint128::operator>>(int v) const {
317 __asm {
318 push ebp
319 push ebx
320 push esi
321 push edi
323 mov esi,ecx
324 mov edx,[esp+20]
326 mov eax,[esi+12]
327 mov ecx,[esp+24]
328 cmp ecx,127
329 jae clearit
331 mov ebx,[esi+8]
332 mov edi,[esi+4]
333 mov ebp,[esi]
335 dwordloop:
336 cmp ecx,32
337 jb bits
339 mov ebp,edi
340 mov edi,ebx
341 mov ebx,eax
342 xor eax,eax
343 sub ecx,32
344 jmp short dwordloop
346 bits:
347 shrd ebp,edi,cl
348 shrd edi,ebx,cl
349 mov [edx],ebp
350 mov [edx+4],edi
351 shrd ebx,eax,cl
353 shr eax,cl
354 mov [edx+8],ebx
355 mov [edx+12],eax
357 pop edi
358 pop esi
359 pop ebx
360 pop ebp
361 mov eax,[esp+4]
362 ret 8
364 clearit:
365 sar eax, 31
366 mov [edx+0],eax
367 mov [edx+4],eax
368 mov [edx+8],eax
369 mov [edx+12],eax
371 pop edi
372 pop esi
373 pop ebx
374 pop ebp
375 mov eax,[esp+4]
376 ret 8
380 #elif !defined(VD_CPU_AMD64)
382 // These aren't really assembly routines, but we define them so we aren't asm dependent.
384 void vdasm_uint128_add(uint64 dst[2], const uint64 x[2], const uint64 y[2]) {
385 dst[0] = x[0] + y[0];
386 dst[1] = x[1] + y[1] + (dst[0] < x[0]);
389 void vdasm_uint128_sub(uint64 dst[2], const uint64 x[2], const uint64 y[2]) {
390 dst[0] = x[0] - y[0];
391 dst[1] = x[1] - y[1] - (dst[0] > x[0]);
394 void vdint128::setSquare(sint64 v) {
395 vdint128 r;
397 uint32 u0 = (uint32)v;
398 uint32 u1 = (uint32)(v >> 32);
399 uint64 m0 = u0*u0;
400 uint64 m1 = u0*u1; // added twice
401 uint64 m2 = u1*u1;
402 uint32 s0 = (uint32)m0;
403 uint32 s1a = (uint32)(m0 >> 32);
404 uint32 s1b = (uint32)m1;
405 uint32 s2a = (uint32)(m1 >> 32);
407 q[1] = m2 + s2a;
409 d[0] = s0;
411 d[1] = s1a + s1b;
412 if (d[1] < s1b)
413 ++q[1];
415 d[1] += s1b;
416 if (d[1] < s1b)
417 ++q[1];
420 const vdint128 vdint128::operator<<(int v) const {
421 vdint128 r;
423 r.q[0] = q[0];
424 r.q[1] = q[1];
426 if (v >= 64) {
427 if (v >= 128) {
428 r.q[0] = 0;
429 r.q[1] = 0;
430 return r;
433 r.q[1] = r.q[0];
434 r.q[0] = 0;
436 v -= 64;
439 if (v) {
440 r.q[1] = (r.q[1] << v) + ((uint64)r.q[0] >> (64 - v));
441 r.q[0] <<= v;
444 return r;
447 const vdint128 vdint128::operator>>(int v) const {
448 vdint128 r;
450 r.q[0] = q[0];
451 r.q[1] = q[1];
453 if (v >= 64) {
454 sint64 sign = q[1] >> 63;
456 if (v >= 128) {
457 r.q[0] = sign;
458 r.q[1] = sign;
459 return r;
462 r.q[0] = r.q[1];
463 r.q[1] = sign;
465 v -= 64;
468 if (v) {
469 r.q[0] = ((uint64)r.q[0] >> v) + (r.q[1] << (64 - v));
470 r.q[1] >>= v;
473 return r;
476 const vduint128 vduint128::operator<<(int v) const {
477 vduint128 r;
479 r.q[0] = q[0];
480 r.q[1] = q[1];
482 if (v >= 64) {
483 if (v >= 128) {
484 r.q[0] = 0;
485 r.q[1] = 0;
486 return r;
489 r.q[1] = r.q[0];
490 r.q[0] = 0;
492 v -= 64;
495 if (v) {
496 r.q[1] = (r.q[1] << v) + (r.q[0] >> (64 - v));
497 r.q[0] <<= v;
500 return r;
503 const vduint128 vduint128::operator>>(int v) const {
504 vduint128 r;
506 r.q[0] = q[0];
507 r.q[1] = q[1];
509 if (v >= 64) {
510 if (v >= 128) {
511 r.q[0] = 0;
512 r.q[1] = 0;
513 return r;
516 r.q[0] = r.q[1];
517 r.q[1] = 0;
519 v -= 64;
522 if (v) {
523 r.q[0] = (r.q[0] >> v) + (r.q[1] << (64 - v));
524 r.q[1] >>= v;
527 return r;
529 #endif
531 const vdint128 vdint128::operator*(const vdint128& x) const {
532 vdint128 X = x.abs();
533 vdint128 Y = abs();
535 vduint128 bd(VDUMul64x64To128(X.q[0], Y.q[0]));
537 bd.q[1] += X.q[0]*Y.q[1] + X.q[1]*Y.q[0];
539 return (q[1]^x.q[1])<0 ? -vdint128(bd) : vdint128(bd);
542 const vdint128 vdint128::operator/(int x) const {
543 vdint128 r;
544 sint64 accum;
546 r.d[3] = d[3] / x;
548 accum = ((sint64)(d[3] % x) << 32) + d[2];
549 r.d[2] = (sint32)(accum / x);
551 accum = ((accum % x) << 32) + d[1];
552 r.d[1] = (sint32)(accum / x);
554 accum = ((accum % x) << 32) + d[0];
555 r.d[0] = (sint32)(accum / x);
557 return r;
560 vdint128::operator double() const {
561 return (double)(unsigned long)q[0]
562 + ldexp((double)(unsigned long)((unsigned __int64)q[0]>>32), 32)
563 + ldexp((double)q[1], 64);
566 /////////////////////////////////////////////////////////////////////////////
568 const vduint128 vduint128::operator*(const vduint128& x) const {
569 vduint128 result(VDUMul64x64To128(q[0], x.q[0]));
571 result.q[1] += q[0]*x.q[1] + q[1]*x.q[0];
573 return result;
576 #if defined(VD_CPU_X86) && defined(VD_COMPILER_MSVC)
577 vduint128 __declspec(naked) __cdecl VDUMul64x64To128(uint64 x, uint64 y) {
578 __asm {
579 mov ecx,[esp+4]
581 mov eax,[esp+8]
582 mul dword ptr [esp+16] ;EDX:EAX = BD
583 mov [ecx+0],eax
584 mov [ecx+4],edx
586 mov eax,[esp+12]
587 mul dword ptr [esp+20] ;EDX:EAX = AC
588 mov [ecx+8],eax
589 mov [ecx+12],edx
591 mov eax,[esp+8]
592 mul dword ptr [esp+20] ;EDX:EAX = BC
593 add [ecx+4],eax
594 adc [ecx+8],edx
595 adc dword ptr [ecx+12], 0
597 mov eax,[esp+12]
598 mul dword ptr [esp+16] ;EDX:EAX = AD
599 add [ecx+4],eax
600 adc [ecx+8],edx
601 adc dword ptr [ecx+12], 0
603 mov eax, ecx
607 #elif !defined(VD_CPU_AMD64)
608 vduint128 VDUMul64x64To128(uint64 x, uint64 y) {
609 uint32 x0 = (uint32)x;
610 uint32 x1 = (uint32)(x >> 32);
611 uint32 y0 = (uint32)y;
612 uint32 y1 = (uint32)(y >> 32);
614 uint64 m0 = (uint64)x0*y0;
615 uint64 m1a = (uint64)x1*y0;
616 uint64 m1b = (uint64)x0*y1;
617 uint64 m2 = (uint64)x1*y1;
619 uint32 s0 = (uint32)m0;
620 uint32 s1a = (uint32)(m0 >> 32);
621 uint32 s1b = (uint32)m1a;
622 uint32 s1c = (uint32)m1b;
623 uint32 s2a = (uint32)(m1a >> 32);
624 uint32 s2b = (uint32)(m1b >> 32);
625 uint32 s2c = (uint32)m2;
626 uint32 s3 = (uint32)(m2 >> 32);
628 vduint128 r;
629 r.d[0] = s0;
630 r.d[1] = s1a + s1b;
631 r.d[2] = r.d[1] < s1b;
632 r.d[1] += s1c;
633 r.d[2] += r.d[1] < s1c;
634 r.d[2] += s2a;
635 r.d[3] = r.d[2] < s2a;
636 r.d[2] += s2b;
637 r.d[3] += r.d[2] < s2b;
638 r.d[2] += s2c;
639 r.d[3] += r.d[2] < s2c;
640 r.d[3] += s3;
642 return r;
644 #endif
646 uint64 VDUDiv128x64To64(const vduint128& dividend, uint64 divisor, uint64& remainder) {
647 vduint128 temp(dividend);
648 vduint128 divisor2(divisor);
650 divisor2 <<= 63;
652 uint64 result = 0;
653 for(int i=0; i<64; ++i) {
654 result += result;
655 if (temp >= divisor2) {
656 temp -= divisor2;
657 ++result;
659 temp += temp;
662 remainder = temp.q[1];
664 return result;