src/thirdparty/VirtualDub/system/source/int128.cpp

   1 //      VirtualDub - Video processing and capture application
   2 //      System library component
   3 //      Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
   4 //
   5 //      Beginning with 1.6.0, the VirtualDub system library is licensed
   6 //      differently than the remainder of VirtualDub.  This particular file is
   7 //      thus licensed as follows (the "zlib" license):
   8 //
   9 //      This software is provided 'as-is', without any express or implied
  10 //      warranty.  In no event will the authors be held liable for any
  11 //      damages arising from the use of this software.
  12 //
  13 //      Permission is granted to anyone to use this software for any purpose,
  14 //      including commercial applications, and to alter it and redistribute it
  15 //      freely, subject to the following restrictions:
  16 //
  17 //      1.      The origin of this software must not be misrepresented; you must
  18 //              not claim that you wrote the original software. If you use this
  19 //              software in a product, an acknowledgment in the product
  20 //              documentation would be appreciated but is not required.
  21 //      2.      Altered source versions must be plainly marked as such, and must
  22 //              not be misrepresented as being the original software.
  23 //      3.      This notice may not be removed or altered from any source
  24 //              distribution.
  25
  26 #include "stdafx.h"
  27 #include <math.h>
  28
  29 #include <vd2/system/int128.h>
  30
  31 #if defined(VD_CPU_X86) && defined(VD_COMPILER_MSVC)
  32         void __declspec(naked) __cdecl vdasm_uint128_add(uint64 dst[2], const uint64 x[2], const uint64 y[2]) {
  33                 __asm {
  34                         push    ebx
  35
  36                         mov             ebx, [esp+16]
  37                         mov             ecx, [esp+12]
  38                         mov             edx, [esp+8]
  39
  40                         mov             eax, [ecx+0]
  41                         add             eax, [ebx+0]
  42                         mov             [edx+0],eax
  43                         mov             eax, [ecx+4]
  44                         adc             eax, [ebx+4]
  45                         mov             [edx+4],eax
  46                         mov             eax, [ecx+8]
  47                         adc             eax, [ebx+8]
  48                         mov             [edx+8],eax
  49                         mov             eax, [ecx+12]
  50                         adc             eax, [ebx+12]
  51                         mov             [edx+12],eax
  52
  53                         pop             ebx
  54                         ret
  55                 }
  56         }
  57
  58         void __declspec(naked) __cdecl vdasm_uint128_sub(uint64 dst[2], const uint64 x[2], const uint64 y[2]) {
  59                 __asm {
  60                         push    ebx
  61
  62                         mov             ebx, [esp+16]
  63                         mov             ecx, [esp+12]
  64                         mov             edx, [esp+8]
  65
  66                         mov             eax, [ecx+0]
  67                         sub             eax, [ebx+0]
  68                         mov             [edx+0],eax
  69                         mov             eax, [ecx+4]
  70                         sbb             eax, [ebx+4]
  71                         mov             [edx+4],eax
  72                         mov             eax, [ecx+8]
  73                         sbb             eax, [ebx+8]
  74                         mov             [edx+8],eax
  75                         mov             eax, [ecx+12]
  76                         sbb             eax, [ebx+12]
  77                         mov             [edx+12],eax
  78
  79                         pop             ebx
  80                         ret
  81                 }
  82         }
  83
  84         void __declspec(naked) vdint128::setSquare(sint64 v) {
  85                 __asm {
  86                         push    edi
  87                         push    esi
  88                         push    ebx
  89                         mov             eax, [esp+20]
  90                         cdq
  91                         mov             esi, eax
  92                         mov             eax, [esp+16]
  93                         xor             eax, edx
  94                         xor             esi, edx
  95                         sub             eax, edx
  96                         sbb             esi, edx
  97                         mov             ebx, eax
  98                         mul             eax
  99                         mov             [ecx], eax
 100                         mov             edi, edx
 101                         mov             eax, ebx
 102                         mul             esi
 103                         mov             ebx, 0
 104                         add             eax, eax
 105                         adc             edx, edx
 106                         add             eax, edi
 107                         adc             edx, 0
 108                         mov             edi, edx
 109                         adc             ebx, 0
 110                         mov             [ecx+4], eax
 111                         mov             eax, esi
 112                         mul             esi
 113                         add             eax, edi
 114                         adc             edx, ebx
 115                         mov             [ecx+8], eax
 116                         mov             [ecx+12], edx
 117                         pop             ebx
 118                         pop             esi
 119                         pop             edi
 120                         ret             8
 121                 }
 122         }
 123
 124         const vdint128 __declspec(naked) vdint128::operator<<(int v) const {
 125                 __asm {
 126                         push    ebp
 127                         push    ebx
 128                         push    esi
 129                         push    edi
 130
 131                         mov             esi,ecx
 132                         mov             edx,[esp+20]
 133
 134                         mov             ecx,[esp+24]
 135                         cmp             ecx,128
 136                         jae             zeroit
 137
 138                         mov             eax,[esi+12]
 139                         mov             ebx,[esi+8]
 140                         mov             edi,[esi+4]
 141                         mov             ebp,[esi]
 142
 143         dwordloop:
 144                         cmp             ecx,32
 145                         jb              bits
 146
 147                         mov             eax,ebx
 148                         mov             ebx,edi
 149                         mov             edi,ebp
 150                         xor             ebp,ebp
 151                         sub             ecx,32
 152                         jmp             short dwordloop
 153
 154         bits:
 155                         shld    eax,ebx,cl
 156                         shld    ebx,edi,cl
 157                         mov             [edx+12],eax
 158                         mov             [edx+8],ebx
 159                         shld    edi,ebp,cl
 160
 161                         shl             ebp,cl
 162                         mov             [edx+4],edi
 163                         mov             [edx],ebp
 164
 165                         pop             edi
 166                         pop             esi
 167                         pop             ebx
 168                         pop             ebp
 169                         mov             eax,[esp+4]
 170                         ret             8
 171
 172         zeroit:
 173                         xor             eax,eax
 174                         mov             [edx+0],eax
 175                         mov             [edx+4],eax
 176                         mov             [edx+8],eax
 177                         mov             [edx+12],eax
 178
 179                         pop             edi
 180                         pop             esi
 181                         pop             ebx
 182                         pop             ebp
 183                         mov             eax,[esp+4]
 184                         ret             8
 185                 }
 186         }
 187
 188         const vdint128 __declspec(naked) vdint128::operator>>(int v) const {
 189                 __asm {
 190                         push    ebp
 191                         push    ebx
 192                         push    esi
 193                         push    edi
 194
 195                         mov             esi,ecx
 196                         mov             edx,[esp+20]
 197
 198                         mov             eax,[esi+12]
 199                         mov             ecx,[esp+24]
 200                         cmp             ecx,127
 201                         jae             clearit
 202
 203                         mov             ebx,[esi+8]
 204                         mov             edi,[esi+4]
 205                         mov             ebp,[esi]
 206
 207         dwordloop:
 208                         cmp             ecx,32
 209                         jb              bits
 210
 211                         mov             ebp,edi
 212                         mov             edi,ebx
 213                         mov             ebx,eax
 214                         sar             eax,31
 215                         sub             ecx,32
 216                         jmp             short dwordloop
 217
 218         bits:
 219                         shrd    ebp,edi,cl
 220                         shrd    edi,ebx,cl
 221                         mov             [edx],ebp
 222                         mov             [edx+4],edi
 223                         shrd    ebx,eax,cl
 224
 225                         sar             eax,cl
 226                         mov             [edx+8],ebx
 227                         mov             [edx+12],eax
 228
 229                         pop             edi
 230                         pop             esi
 231                         pop             ebx
 232                         pop             ebp
 233                         mov             eax,[esp+4]
 234                         ret             8
 235
 236         clearit:
 237                         sar             eax, 31
 238                         mov             [edx+0],eax
 239                         mov             [edx+4],eax
 240                         mov             [edx+8],eax
 241                         mov             [edx+12],eax
 242
 243                         pop             edi
 244                         pop             esi
 245                         pop             ebx
 246                         pop             ebp
 247                         mov             eax,[esp+4]
 248                         ret             8
 249                 }
 250         }
 251
 252         const vduint128 __declspec(naked) vduint128::operator<<(int v) const {
 253                 __asm {
 254                         push    ebp
 255                         push    ebx
 256                         push    esi
 257                         push    edi
 258
 259                         mov             esi,ecx
 260                         mov             edx,[esp+20]
 261
 262                         mov             ecx,[esp+24]
 263                         cmp             ecx,128
 264                         jae             zeroit
 265
 266                         mov             eax,[esi+12]
 267                         mov             ebx,[esi+8]
 268                         mov             edi,[esi+4]
 269                         mov             ebp,[esi]
 270
 271         dwordloop:
 272                         cmp             ecx,32
 273                         jb              bits
 274
 275                         mov             eax,ebx
 276                         mov             ebx,edi
 277                         mov             edi,ebp
 278                         xor             ebp,ebp
 279                         sub             ecx,32
 280                         jmp             short dwordloop
 281
 282         bits:
 283                         shld    eax,ebx,cl
 284                         shld    ebx,edi,cl
 285                         mov             [edx+12],eax
 286                         mov             [edx+8],ebx
 287                         shld    edi,ebp,cl
 288
 289                         shl             ebp,cl
 290                         mov             [edx+4],edi
 291                         mov             [edx],ebp
 292
 293                         pop             edi
 294                         pop             esi
 295                         pop             ebx
 296                         pop             ebp
 297                         mov             eax,[esp+4]
 298                         ret             8
 299
 300         zeroit:
 301                         xor             eax,eax
 302                         mov             [edx+0],eax
 303                         mov             [edx+4],eax
 304                         mov             [edx+8],eax
 305                         mov             [edx+12],eax
 306
 307                         pop             edi
 308                         pop             esi
 309                         pop             ebx
 310                         pop             ebp
 311                         mov             eax,[esp+4]
 312                         ret             8
 313                 }
 314         }
 315
 316         const vduint128 __declspec(naked) vduint128::operator>>(int v) const {
 317                 __asm {
 318                         push    ebp
 319                         push    ebx
 320                         push    esi
 321                         push    edi
 322
 323                         mov             esi,ecx
 324                         mov             edx,[esp+20]
 325
 326                         mov             eax,[esi+12]
 327                         mov             ecx,[esp+24]
 328                         cmp             ecx,127
 329                         jae             clearit
 330
 331                         mov             ebx,[esi+8]
 332                         mov             edi,[esi+4]
 333                         mov             ebp,[esi]
 334
 335         dwordloop:
 336                         cmp             ecx,32
 337                         jb              bits
 338
 339                         mov             ebp,edi
 340                         mov             edi,ebx
 341                         mov             ebx,eax
 342                         xor             eax,eax
 343                         sub             ecx,32
 344                         jmp             short dwordloop
 345
 346         bits:
 347                         shrd    ebp,edi,cl
 348                         shrd    edi,ebx,cl
 349                         mov             [edx],ebp
 350                         mov             [edx+4],edi
 351                         shrd    ebx,eax,cl
 352
 353                         shr             eax,cl
 354                         mov             [edx+8],ebx
 355                         mov             [edx+12],eax
 356
 357                         pop             edi
 358                         pop             esi
 359                         pop             ebx
 360                         pop             ebp
 361                         mov             eax,[esp+4]
 362                         ret             8
 363
 364         clearit:
 365                         sar             eax, 31
 366                         mov             [edx+0],eax
 367                         mov             [edx+4],eax
 368                         mov             [edx+8],eax
 369                         mov             [edx+12],eax
 370
 371                         pop             edi
 372                         pop             esi
 373                         pop             ebx
 374                         pop             ebp
 375                         mov             eax,[esp+4]
 376                         ret             8
 377                 }
 378         }
 379
 380 #elif !defined(VD_CPU_AMD64)
 381
 382         // These aren't really assembly routines, but we define them so we aren't asm dependent.
 383
 384         void vdasm_uint128_add(uint64 dst[2], const uint64 x[2], const uint64 y[2]) {
 385                 dst[0] = x[0] + y[0];
 386                 dst[1] = x[1] + y[1] + (dst[0] < x[0]);
 387         }
 388
 389         void vdasm_uint128_sub(uint64 dst[2], const uint64 x[2], const uint64 y[2]) {
 390                 dst[0] = x[0] - y[0];
 391                 dst[1] = x[1] - y[1] - (dst[0] > x[0]);
 392         }
 393
 394         void vdint128::setSquare(sint64 v) {
 395                 vdint128 r;
 396
 397                 uint32 u0 = (uint32)v;
 398                 uint32 u1 = (uint32)(v >> 32);
 399                 uint64 m0 = u0*u0;
 400                 uint64 m1 = u0*u1;              // added twice
 401                 uint64 m2 = u1*u1;
 402                 uint32 s0  = (uint32)m0;
 403                 uint32 s1a = (uint32)(m0 >> 32);
 404                 uint32 s1b = (uint32)m1;
 405                 uint32 s2a = (uint32)(m1 >> 32);
 406
 407                 q[1] = m2 + s2a;
 408
 409                 d[0] = s0;
 410
 411                 d[1] = s1a + s1b;
 412                 if (d[1] < s1b)
 413                         ++q[1];
 414
 415                 d[1] += s1b;
 416                 if (d[1] < s1b)
 417                         ++q[1];
 418         }
 419
 420         const vdint128 vdint128::operator<<(int v) const {
 421                 vdint128 r;
 422
 423                 r.q[0] = q[0];
 424                 r.q[1] = q[1];
 425
 426                 if (v >= 64) {
 427                         if (v >= 128) {
 428                                 r.q[0] = 0;
 429                                 r.q[1] = 0;
 430                                 return r;
 431                         }
 432
 433                         r.q[1] = r.q[0];
 434                         r.q[0] = 0;
 435
 436                         v -= 64;
 437                 }
 438
 439                 if (v) {
 440                         r.q[1] = (r.q[1] << v) + ((uint64)r.q[0] >> (64 - v));
 441                         r.q[0] <<= v;
 442                 }
 443
 444                 return r;
 445         }
 446
 447         const vdint128 vdint128::operator>>(int v) const {
 448                 vdint128 r;
 449
 450                 r.q[0] = q[0];
 451                 r.q[1] = q[1];
 452
 453                 if (v >= 64) {
 454                         sint64 sign = q[1] >> 63;
 455
 456                         if (v >= 128) {
 457                                 r.q[0] = sign;
 458                                 r.q[1] = sign;
 459                                 return r;
 460                         }
 461
 462                         r.q[0] = r.q[1];
 463                         r.q[1] = sign;
 464
 465                         v -= 64;
 466                 }
 467
 468                 if (v) {
 469                         r.q[0] = ((uint64)r.q[0] >> v) + (r.q[1] << (64 - v));
 470                         r.q[1] >>= v;
 471                 }
 472
 473                 return r;
 474         }
 475
 476         const vduint128 vduint128::operator<<(int v) const {
 477                 vduint128 r;
 478
 479                 r.q[0] = q[0];
 480                 r.q[1] = q[1];
 481
 482                 if (v >= 64) {
 483                         if (v >= 128) {
 484                                 r.q[0] = 0;
 485                                 r.q[1] = 0;
 486                                 return r;
 487                         }
 488
 489                         r.q[1] = r.q[0];
 490                         r.q[0] = 0;
 491
 492                         v -= 64;
 493                 }
 494
 495                 if (v) {
 496                         r.q[1] = (r.q[1] << v) + (r.q[0] >> (64 - v));
 497                         r.q[0] <<= v;
 498                 }
 499
 500                 return r;
 501         }
 502
 503         const vduint128 vduint128::operator>>(int v) const {
 504                 vduint128 r;
 505
 506                 r.q[0] = q[0];
 507                 r.q[1] = q[1];
 508
 509                 if (v >= 64) {
 510                         if (v >= 128) {
 511                                 r.q[0] = 0;
 512                                 r.q[1] = 0;
 513                                 return r;
 514                         }
 515
 516                         r.q[0] = r.q[1];
 517                         r.q[1] = 0;
 518
 519                         v -= 64;
 520                 }
 521
 522                 if (v) {
 523                         r.q[0] = (r.q[0] >> v) + (r.q[1] << (64 - v));
 524                         r.q[1] >>= v;
 525                 }
 526
 527                 return r;
 528         }
 529 #endif
 530
 531 const vdint128 vdint128::operator*(const vdint128& x) const {
 532         vdint128 X = x.abs();
 533         vdint128 Y = abs();
 534
 535         vduint128 bd(VDUMul64x64To128(X.q[0], Y.q[0]));
 536
 537         bd.q[1] += X.q[0]*Y.q[1] + X.q[1]*Y.q[0];
 538
 539         return (q[1]^x.q[1])<0 ? -vdint128(bd) : vdint128(bd);
 540 }
 541
 542 const vdint128 vdint128::operator/(int x) const {
 543         vdint128 r;
 544         sint64 accum;
 545
 546         r.d[3] = d[3] / x;
 547
 548         accum = ((sint64)(d[3] % x) << 32) + d[2];
 549         r.d[2] = (sint32)(accum / x);
 550
 551         accum = ((accum % x) << 32) + d[1];
 552         r.d[1] = (sint32)(accum / x);
 553
 554         accum = ((accum % x) << 32) + d[0];
 555         r.d[0] = (sint32)(accum / x);
 556
 557         return r;
 558 }
 559
 560 vdint128::operator double() const {
 561         return (double)(unsigned long)q[0]
 562                 + ldexp((double)(unsigned long)((unsigned __int64)q[0]>>32), 32)
 563                 + ldexp((double)q[1], 64);
 564 }
 565
 566 /////////////////////////////////////////////////////////////////////////////
 567
 568 const vduint128 vduint128::operator*(const vduint128& x) const {
 569         vduint128 result(VDUMul64x64To128(q[0], x.q[0]));
 570
 571         result.q[1] += q[0]*x.q[1] + q[1]*x.q[0];
 572
 573         return result;
 574 }
 575
 576 #if defined(VD_CPU_X86) && defined(VD_COMPILER_MSVC)
 577         vduint128 __declspec(naked) __cdecl VDUMul64x64To128(uint64 x, uint64 y) {
 578                 __asm {
 579                         mov             ecx,[esp+4]
 580
 581                         mov             eax,[esp+8]
 582                         mul             dword ptr [esp+16]              ;EDX:EAX = BD
 583                         mov             [ecx+0],eax
 584                         mov             [ecx+4],edx
 585
 586                         mov             eax,[esp+12]
 587                         mul             dword ptr [esp+20]              ;EDX:EAX = AC
 588                         mov             [ecx+8],eax
 589                         mov             [ecx+12],edx
 590
 591                         mov             eax,[esp+8]
 592                         mul             dword ptr [esp+20]              ;EDX:EAX = BC
 593                         add             [ecx+4],eax
 594                         adc             [ecx+8],edx
 595                         adc             dword ptr [ecx+12], 0
 596
 597                         mov             eax,[esp+12]
 598                         mul             dword ptr [esp+16]              ;EDX:EAX = AD
 599                         add             [ecx+4],eax
 600                         adc             [ecx+8],edx
 601                         adc             dword ptr [ecx+12], 0
 602
 603                         mov             eax, ecx
 604                         ret
 605                 }
 606         }
 607 #elif !defined(VD_CPU_AMD64)
 608         vduint128 VDUMul64x64To128(uint64 x, uint64 y) {
 609                 uint32 x0 = (uint32)x;
 610                 uint32 x1 = (uint32)(x >> 32);
 611                 uint32 y0 = (uint32)y;
 612                 uint32 y1 = (uint32)(y >> 32);
 613
 614                 uint64 m0  = (uint64)x0*y0;
 615                 uint64 m1a = (uint64)x1*y0;
 616                 uint64 m1b = (uint64)x0*y1;
 617                 uint64 m2  = (uint64)x1*y1;
 618
 619                 uint32 s0  = (uint32)m0;
 620                 uint32 s1a = (uint32)(m0 >> 32);
 621                 uint32 s1b = (uint32)m1a;
 622                 uint32 s1c = (uint32)m1b;
 623                 uint32 s2a = (uint32)(m1a >> 32);
 624                 uint32 s2b = (uint32)(m1b >> 32);
 625                 uint32 s2c = (uint32)m2;
 626                 uint32 s3  = (uint32)(m2 >> 32);
 627
 628                 vduint128 r;
 629                 r.d[0] = s0;
 630                 r.d[1] = s1a + s1b;
 631                 r.d[2] = r.d[1] < s1b;
 632                 r.d[1] += s1c;
 633                 r.d[2] += r.d[1] < s1c;
 634                 r.d[2] += s2a;
 635                 r.d[3] = r.d[2] < s2a;
 636                 r.d[2] += s2b;
 637                 r.d[3] += r.d[2] < s2b;
 638                 r.d[2] += s2c;
 639                 r.d[3] += r.d[2] < s2c;
 640                 r.d[3] += s3;
 641
 642                 return r;
 643         }
 644 #endif
 645
 646 uint64 VDUDiv128x64To64(const vduint128& dividend, uint64 divisor, uint64& remainder) {
 647         vduint128 temp(dividend);
 648         vduint128 divisor2(divisor);
 649
 650         divisor2 <<= 63;
 651
 652         uint64 result = 0;
 653         for(int i=0; i<64; ++i) {
 654                 result += result;
 655                 if (temp >= divisor2) {
 656                         temp -= divisor2;
 657                         ++result;
 658                 }
 659                 temp += temp;
 660         }
 661
 662         remainder = temp.q[1];
 663
 664         return result;
 665 }