VEX/priv/guest_generic_x87.c

   1
   2 /*---------------------------------------------------------------*/
   3 /*--- begin                               guest_generic_x87.c ---*/
   4 /*---------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2004-2017 OpenWorks LLP
  11       info@open-works.net
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, see <http://www.gnu.org/licenses/>.
  25
  26    The GNU General Public License is contained in the file COPYING.
  27
  28    Neither the names of the U.S. Department of Energy nor the
  29    University of California nor the names of its contributors may be
  30    used to endorse or promote products derived from this software
  31    without prior written permission.
  32 */
  33
  34 /* This file contains functions for doing some x87-specific
  35    operations.  Both the amd64 and x86 front ends (guests) indirectly
  36    call these functions via guest helper calls.  By putting them here,
  37    code duplication is avoided.  Some of these functions are tricky
  38    and hard to verify, so there is much to be said for only having one
  39    copy thereof.
  40 */
  41
  42 #include "libvex_basictypes.h"
  43
  44 #include "main_util.h"
  45 #include "guest_generic_x87.h"
  46
  47
  48 /* 80 and 64-bit floating point formats:
  49
  50    80-bit:
  51
  52     S  0       0-------0      zero
  53     S  0       0X------X      denormals
  54     S  1-7FFE  1X------X      normals (all normals have leading 1)
  55     S  7FFF    10------0      infinity
  56     S  7FFF    10X-----X      snan
  57     S  7FFF    11X-----X      qnan
  58
  59    S is the sign bit.  For runs X----X, at least one of the Xs must be
  60    nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
  61    there is an explicitly represented leading 1, and a sign bit,
  62    giving 80 in total.
  63
  64    64-bit avoids the confusion of an explicitly represented leading 1
  65    and so is simpler:
  66
  67     S  0      0------0   zero
  68     S  0      X------X   denormals
  69     S  1-7FE  any        normals
  70     S  7FF    0------0   infinity
  71     S  7FF    0X-----X   snan
  72     S  7FF    1X-----X   qnan
  73
  74    Exponent is 11 bits, fractional part is 52 bits, and there is a
  75    sign bit, giving 64 in total.
  76 */
  77
  78
  79 static inline UInt read_bit_array ( UChar* arr, UInt n )
  80 {
  81    UChar c = arr[n >> 3];
  82    c >>= (n&7);
  83    return c & 1;
  84 }
  85
  86 static inline void write_bit_array ( UChar* arr, UInt n, UInt b )
  87 {
  88    UChar c = arr[n >> 3];
  89    c = toUChar( c & ~(1 << (n&7)) );
  90    c = toUChar( c | ((b&1) << (n&7)) );
  91    arr[n >> 3] = c;
  92 }
  93
  94 /* Convert an IEEE754 double (64-bit) into an x87 extended double
  95    (80-bit), mimicing the hardware fairly closely.  Both numbers are
  96    stored little-endian.  Limitations, all of which could be fixed,
  97    given some level of hassle:
  98
  99    * Identity of NaNs is not preserved.
 100
 101    See comments in the code for more details.
 102 */
 103 void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
 104 {
 105    Bool  mantissaIsZero;
 106    Int   bexp, i, j, shift;
 107    UChar sign;
 108
 109    sign = toUChar( (f64[7] >> 7) & 1 );
 110    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
 111    bexp &= 0x7FF;
 112
 113    mantissaIsZero = False;
 114    if (bexp == 0 || bexp == 0x7FF) {
 115       /* We'll need to know whether or not the mantissa (bits 51:0) is
 116          all zeroes in order to handle these cases.  So figure it
 117          out. */
 118       mantissaIsZero
 119          = toBool(
 120               (f64[6] & 0x0F) == 0
 121               && f64[5] == 0 && f64[4] == 0 && f64[3] == 0
 122               && f64[2] == 0 && f64[1] == 0 && f64[0] == 0
 123            );
 124    }
 125
 126    /* If the exponent is zero, either we have a zero or a denormal.
 127       Produce a zero.  This is a hack in that it forces denormals to
 128       zero.  Could do better. */
 129    if (bexp == 0) {
 130       f80[9] = toUChar( sign << 7 );
 131       f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
 132              = f80[3] = f80[2] = f80[1] = f80[0] = 0;
 133
 134       if (mantissaIsZero)
 135          /* It really is zero, so that's all we can do. */
 136          return;
 137
 138       /* There is at least one 1-bit in the mantissa.  So it's a
 139          potentially denormalised double -- but we can produce a
 140          normalised long double.  Count the leading zeroes in the
 141          mantissa so as to decide how much to bump the exponent down
 142          by.  Note, this is SLOW. */
 143       shift = 0;
 144       for (i = 51; i >= 0; i--) {
 145         if (read_bit_array(f64, i))
 146            break;
 147         shift++;
 148       }
 149
 150       /* and copy into place as many bits as we can get our hands on. */
 151       j = 63;
 152       for (i = 51 - shift; i >= 0; i--) {
 153          write_bit_array( f80, j,
 154          read_bit_array( f64, i ) );
 155          j--;
 156       }
 157
 158       /* Set the exponent appropriately, and we're done. */
 159       bexp -= shift;
 160       bexp += (16383 - 1023);
 161       f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
 162       f80[8] = toUChar( bexp & 0xFF );
 163       return;
 164    }
 165
 166    /* If the exponent is 7FF, this is either an Infinity, a SNaN or
 167       QNaN, as determined by examining bits 51:0, thus:
 168           0  ... 0    Inf
 169           0X ... X    SNaN
 170           1X ... X    QNaN
 171       where at least one of the Xs is not zero.
 172    */
 173    if (bexp == 0x7FF) {
 174       if (mantissaIsZero) {
 175          /* Produce an appropriately signed infinity:
 176             S 1--1 (15)  1  0--0 (63)
 177          */
 178          f80[9] = toUChar( (sign << 7) | 0x7F );
 179          f80[8] = 0xFF;
 180          f80[7] = 0x80;
 181          f80[6] = f80[5] = f80[4] = f80[3]
 182                 = f80[2] = f80[1] = f80[0] = 0;
 183          return;
 184       }
 185       /* So it's either a QNaN or SNaN.  Distinguish by considering
 186          bit 51.  Note, this destroys all the trailing bits
 187          (identity?) of the NaN.  IEEE754 doesn't require preserving
 188          these (it only requires that there be one QNaN value and one
 189          SNaN value), but x87 does seem to have some ability to
 190          preserve them.  Anyway, here, the NaN's identity is
 191          destroyed.  Could be improved. */
 192       if (f64[6] & 8) {
 193          /* QNaN.  Make a canonical QNaN:
 194             S 1--1 (15)  1 1  0--0 (62)
 195          */
 196          f80[9] = toUChar( (sign << 7) | 0x7F );
 197          f80[8] = 0xFF;
 198          f80[7] = 0xC0;
 199          f80[6] = f80[5] = f80[4] = f80[3]
 200                 = f80[2] = f80[1] = f80[0] = 0x00;
 201       } else {
 202          /* SNaN.  Make a SNaN:
 203             S 1--1 (15)  1 0  1--1 (62)
 204          */
 205          f80[9] = toUChar( (sign << 7) | 0x7F );
 206          f80[8] = 0xFF;
 207          f80[7] = 0xBF;
 208          f80[6] = f80[5] = f80[4] = f80[3]
 209                 = f80[2] = f80[1] = f80[0] = 0xFF;
 210       }
 211       return;
 212    }
 213
 214    /* It's not a zero, denormal, infinity or nan.  So it must be a
 215       normalised number.  Rebias the exponent and build the new
 216       number.  */
 217    bexp += (16383 - 1023);
 218
 219    f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
 220    f80[8] = toUChar( bexp & 0xFF );
 221    f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78)
 222                               | ((f64[5] >> 5) & 7) );
 223    f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) );
 224    f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) );
 225    f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) );
 226    f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) );
 227    f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) );
 228    f80[1] = toUChar( ((f64[0] << 3) & 0xF8) );
 229    f80[0] = toUChar( 0 );
 230 }
 231
 232
 233 /* Convert an x87 extended double (80-bit) into an IEEE 754 double
 234    (64-bit), mimicking the hardware fairly closely.  Both numbers are
 235    stored little-endian.  Limitations, both of which could be fixed,
 236    given some level of hassle:
 237
 238    * Rounding following truncation could be a bit better.
 239
 240    * Identity of NaNs is not preserved.
 241
 242    See comments in the code for more details.
 243 */
 244 void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
 245 {
 246    Bool  isInf;
 247    Int   bexp, i, j;
 248    UChar sign;
 249
 250    sign = toUChar((f80[9] >> 7) & 1);
 251    bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
 252    bexp &= 0x7FFF;
 253
 254    /* If the exponent is zero, either we have a zero or a denormal.
 255       But an extended precision denormal becomes a double precision
 256       zero, so in either case, just produce the appropriately signed
 257       zero. */
 258    if (bexp == 0) {
 259       f64[7] = toUChar(sign << 7);
 260       f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
 261       return;
 262    }
 263
 264    /* If the exponent is 7FFF, this is either an Infinity, a SNaN or
 265       QNaN, as determined by examining bits 62:0, thus:
 266           10  ... 0    Inf
 267           10X ... X    SNaN
 268           11X ... X    QNaN
 269       where at least one of the Xs is not zero.
 270    */
 271    if (bexp == 0x7FFF) {
 272       isInf = toBool(
 273                  (f80[7] & 0x7F) == 0
 274                  && f80[6] == 0 && f80[5] == 0 && f80[4] == 0
 275                  && f80[3] == 0 && f80[2] == 0 && f80[1] == 0
 276                  && f80[0] == 0
 277               );
 278       if (isInf) {
 279          if (0 == (f80[7] & 0x80))
 280             goto wierd_NaN;
 281          /* Produce an appropriately signed infinity:
 282             S 1--1 (11)  0--0 (52)
 283          */
 284          f64[7] = toUChar((sign << 7) | 0x7F);
 285          f64[6] = 0xF0;
 286          f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
 287          return;
 288       }
 289       /* So it's either a QNaN or SNaN.  Distinguish by considering
 290          bit 61.  Note, this destroys all the trailing bits
 291          (identity?) of the NaN.  IEEE754 doesn't require preserving
 292          these (it only requires that there be one QNaN value and one
 293          SNaN value), but x87 does seem to have some ability to
 294          preserve them.  Anyway, here, the NaN's identity is
 295          destroyed.  Could be improved. */
 296       if (f80[7] & 0x40) {
 297          /* QNaN.  Make a canonical QNaN:
 298             S 1--1 (11)  1  0--0 (51)
 299          */
 300          f64[7] = toUChar((sign << 7) | 0x7F);
 301          f64[6] = 0xF8;
 302          f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0x00;
 303       } else {
 304          /* SNaN.  Make a SNaN:
 305             S 1--1 (11)  0  1--1 (51)
 306          */
 307          f64[7] = toUChar((sign << 7) | 0x7F);
 308          f64[6] = 0xF7;
 309          f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
 310       }
 311       return;
 312    }
 313
 314    /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
 315       zero, the x87 FPU appears to consider the number denormalised
 316       and converts it to a QNaN. */
 317    if (0 == (f80[7] & 0x80)) {
 318       wierd_NaN:
 319       /* Strange hardware QNaN:
 320          S 1--1 (11)  1  0--0 (51)
 321       */
 322       /* On a PIII, these QNaNs always appear with sign==1.  I have
 323          no idea why. */
 324       f64[7] = (1 /*sign*/ << 7) | 0x7F;
 325       f64[6] = 0xF8;
 326       f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
 327       return;
 328    }
 329
 330    /* It's not a zero, denormal, infinity or nan.  So it must be a
 331       normalised number.  Rebias the exponent and consider. */
 332    bexp -= (16383 - 1023);
 333    if (bexp >= 0x7FF) {
 334       /* It's too big for a double.  Construct an infinity. */
 335       f64[7] = toUChar((sign << 7) | 0x7F);
 336       f64[6] = 0xF0;
 337       f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
 338       return;
 339    }
 340
 341    if (bexp <= 0) {
 342       /* It's too small for a normalised double.  First construct a
 343          zero and then see if it can be improved into a denormal.  */
 344       f64[7] = toUChar(sign << 7);
 345       f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
 346
 347       if (bexp < -52)
 348          /* Too small even for a denormal. */
 349          return;
 350
 351       /* Ok, let's make a denormal.  Note, this is SLOW. */
 352       /* Copy bits 63, 62, 61, etc of the src mantissa into the dst,
 353          indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */
 354       /* bexp is in range -52 .. 0 inclusive */
 355       for (i = 63; i >= 0; i--) {
 356          j = i - 12 + bexp;
 357          if (j < 0) break;
 358          /* We shouldn't really call vassert from generated code. */
 359          vassert(j >= 0 && j < 52);
 360          write_bit_array ( f64,
 361                            j,
 362                            read_bit_array ( f80, i ) );
 363       }
 364       /* and now we might have to round ... */
 365       if (read_bit_array(f80, 10+1 - bexp) == 1)
 366          goto do_rounding;
 367
 368       return;
 369    }
 370
 371    /* Ok, it's a normalised number which is representable as a double.
 372       Copy the exponent and mantissa into place. */
 373    /*
 374    for (i = 0; i < 52; i++)
 375       write_bit_array ( f64,
 376                         i,
 377                         read_bit_array ( f80, i+11 ) );
 378    */
 379    f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) );
 380    f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) );
 381    f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) );
 382    f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) );
 383    f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) );
 384    f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) );
 385
 386    f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) );
 387
 388    f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) );
 389
 390    /* Now consider any rounding that needs to happen as a result of
 391       truncating the mantissa. */
 392    if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
 393
 394       /* If the bottom bits of f80 are "100 0000 0000", then the
 395          infinitely precise value is deemed to be mid-way between the
 396          two closest representable values.  Since we're doing
 397          round-to-nearest (the default mode), in that case it is the
 398          bit immediately above which indicates whether we should round
 399          upwards or not -- if 0, we don't.  All that is encapsulated
 400          in the following simple test. */
 401       if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0)
 402          return;
 403
 404       do_rounding:
 405       /* Round upwards.  This is a kludge.  Once in every 2^24
 406          roundings (statistically) the bottom three bytes are all 0xFF
 407          and so we don't round at all.  Could be improved. */
 408       if (f64[0] != 0xFF) {
 409          f64[0]++;
 410       }
 411       else
 412       if (f64[0] == 0xFF && f64[1] != 0xFF) {
 413          f64[0] = 0;
 414          f64[1]++;
 415       }
 416       else
 417       if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) {
 418          f64[0] = 0;
 419          f64[1] = 0;
 420          f64[2]++;
 421       }
 422       /* else we don't round, but we should. */
 423    }
 424 }
 425
 426
 427 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 428 /* Extract the signed significand or exponent component as per
 429    fxtract.  Arg and result are doubles travelling under the guise of
 430    ULongs.  Returns significand when getExp is zero and exponent
 431    otherwise. */
 432 ULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp )
 433 {
 434    ULong  uSig, uExp;
 435    /* Long   sSig; */
 436    Int    sExp, i;
 437    UInt   sign, expExp;
 438
 439    /*
 440     S  7FF    0------0   infinity
 441     S  7FF    0X-----X   snan
 442     S  7FF    1X-----X   qnan
 443    */
 444    const ULong posInf  = 0x7FF0000000000000ULL;
 445    const ULong negInf  = 0xFFF0000000000000ULL;
 446    const ULong nanMask = 0x7FF0000000000000ULL;
 447    const ULong qNan    = 0x7FF8000000000000ULL;
 448    const ULong posZero = 0x0000000000000000ULL;
 449    const ULong negZero = 0x8000000000000000ULL;
 450    const ULong bit51   = 1ULL << 51;
 451    const ULong bit52   = 1ULL << 52;
 452    const ULong sigMask = bit52 - 1;
 453
 454    /* Mimic Core i5 behaviour for special cases. */
 455    if (arg == posInf)
 456       return posInf; /* Both significand and exponent are posInf. */
 457    if (arg == negInf)
 458       return getExp ? posInf : negInf;
 459    if ((arg & nanMask) == nanMask)
 460       return qNan | (arg & (1ULL << 63));
 461    if (arg == posZero)
 462       return getExp ? negInf : posZero;
 463    if (arg == negZero)
 464       return getExp ? negInf : negZero;
 465
 466    /* Split into sign, exponent and significand. */
 467    sign = ((UInt)(arg >> 63)) & 1;
 468
 469    /* Mask off exponent & sign. uSig is in range 0 .. 2^52-1. */
 470    uSig = arg & sigMask;
 471
 472    /* Get the exponent. */
 473    sExp = ((Int)(arg >> 52)) & 0x7FF;
 474
 475    /* Deal with denormals: if the exponent is zero, then the
 476       significand cannot possibly be zero (negZero/posZero are handled
 477       above).  Shift the significand left until bit 51 of it becomes
 478       1, and decrease the exponent accordingly.
 479    */
 480    if (sExp == 0) {
 481       for (i = 0; i < 52; i++) {
 482          if (uSig & bit51)
 483             break;
 484          uSig <<= 1;
 485          sExp--;
 486       }
 487       uSig <<= 1;
 488    } else {
 489       /* Add the implied leading-1 in the significand. */
 490       uSig |= bit52;
 491    }
 492
 493    /* Roll in the sign. */
 494    /* sSig = uSig; */
 495    /* if (sign) sSig =- sSig; */
 496
 497    /* Convert sig into a double.  This should be an exact conversion.
 498       Then divide by 2^52, which should give a value in the range 1.0
 499       to 2.0-epsilon, at least for normalised args. */
 500    /* dSig = (Double)sSig; */
 501    /* dSig /= 67108864.0;  */ /* 2^26 */
 502    /* dSig /= 67108864.0;  */ /* 2^26 */
 503    uSig &= sigMask;
 504    uSig |= 0x3FF0000000000000ULL;
 505    if (sign)
 506       uSig ^= negZero;
 507
 508    /* Convert exp into a double.  Also an exact conversion. */
 509    /* dExp = (Double)(sExp - 1023); */
 510    sExp -= 1023;
 511    if (sExp == 0) {
 512       uExp = 0;
 513    } else {
 514       uExp   = sExp < 0 ? -sExp : sExp;
 515       expExp = 0x3FF +52;
 516       /* 1 <= uExp <= 1074 */
 517       /* Skip first 42 iterations of normalisation loop as we know they
 518          will always happen */
 519       uExp <<= 42;
 520       expExp -= 42;
 521       for (i = 0; i < 52-42; i++) {
 522          if (uExp & bit52)
 523             break;
 524          uExp <<= 1;
 525          expExp--;
 526       }
 527       uExp &= sigMask;
 528       uExp |= ((ULong)expExp) << 52;
 529       if (sExp < 0) uExp ^= negZero;
 530    }
 531
 532    return getExp ? uExp : uSig;
 533 }
 534
 535
 536
 537 /*---------------------------------------------------------*/
 538 /*--- SSE4.2 PCMP{E,I}STR{I,M} helpers                  ---*/
 539 /*---------------------------------------------------------*/
 540
 541 /* We need the definitions for OSZACP eflags/rflags offsets.
 542    #including guest_{amd64,x86}_defs.h causes chaos, so just copy the
 543    required values directly.  They are not going to change in the
 544    foreseeable future :-)
 545 */
 546
 547 #define SHIFT_O   11
 548 #define SHIFT_S   7
 549 #define SHIFT_Z   6
 550 #define SHIFT_A   4
 551 #define SHIFT_C   0
 552 #define SHIFT_P   2
 553
 554 #define MASK_O    (1 << SHIFT_O)
 555 #define MASK_S    (1 << SHIFT_S)
 556 #define MASK_Z    (1 << SHIFT_Z)
 557 #define MASK_A    (1 << SHIFT_A)
 558 #define MASK_C    (1 << SHIFT_C)
 559 #define MASK_P    (1 << SHIFT_P)
 560
 561
 562 /* Count leading zeroes, w/ 0-produces-32 semantics, a la Hacker's
 563    Delight. */
 564 static UInt clz32 ( UInt x )
 565 {
 566    Int y, m, n;
 567    y = -(x >> 16);
 568    m = (y >> 16) & 16;
 569    n = 16 - m;
 570    x = x >> m;
 571    y = x - 0x100;
 572    m = (y >> 16) & 8;
 573    n = n + m;
 574    x = x << m;
 575    y = x - 0x1000;
 576    m = (y >> 16) & 4;
 577    n = n + m;
 578    x = x << m;
 579    y = x - 0x4000;
 580    m = (y >> 16) & 2;
 581    n = n + m;
 582    x = x << m;
 583    y = x >> 14;
 584    m = y & ~(y >> 1);
 585    return n + 2 - m;
 586 }
 587
 588 static UInt ctz32 ( UInt x )
 589 {
 590    return 32 - clz32((~x) & (x-1));
 591 }
 592
 593 /* Convert a 4-bit value to a 32-bit value by cloning each bit 8
 594    times.  There's surely a better way to do this, but I don't know
 595    what it is. */
 596 static UInt bits4_to_bytes4 ( UInt bits4 )
 597 {
 598    UInt r = 0;
 599    r |= (bits4 & 1) ? 0x000000FF : 0;
 600    r |= (bits4 & 2) ? 0x0000FF00 : 0;
 601    r |= (bits4 & 4) ? 0x00FF0000 : 0;
 602    r |= (bits4 & 8) ? 0xFF000000 : 0;
 603    return r;
 604 }
 605
 606
 607 /* Convert a 2-bit value to a 32-bit value by cloning each bit 16
 608    times.  There's surely a better way to do this, but I don't know
 609    what it is. */
 610 static UInt bits2_to_bytes4 ( UInt bits2 )
 611 {
 612    UInt r = 0;
 613    r |= (bits2 & 1) ? 0x0000FFFF : 0;
 614    r |= (bits2 & 2) ? 0xFFFF0000 : 0;
 615    return r;
 616 }
 617
 618
 619 /* Given partial results from a pcmpXstrX operation (intRes1,
 620    basically), generate an I- or M-format output value, also the new
 621    OSZACP flags.  */
 622 static
 623 void compute_PCMPxSTRx_gen_output (/*OUT*/V128* resV,
 624                                    /*OUT*/UInt* resOSZACP,
 625                                    UInt intRes1,
 626                                    UInt zmaskL, UInt zmaskR,
 627                                    UInt validL,
 628                                    UInt pol, UInt idx,
 629                                    Bool isxSTRM )
 630 {
 631    vassert((pol >> 2) == 0);
 632    vassert((idx >> 1) == 0);
 633
 634    UInt intRes2 = 0;
 635    switch (pol) {
 636       case 0: intRes2 = intRes1;          break; // pol +
 637       case 1: intRes2 = ~intRes1;         break; // pol -
 638       case 2: intRes2 = intRes1;          break; // pol m+
 639       case 3: intRes2 = intRes1 ^ validL; break; // pol m-
 640    }
 641    intRes2 &= 0xFFFF;
 642
 643    if (isxSTRM) {
 644
 645       // generate M-format output (a bit or byte mask in XMM0)
 646       if (idx) {
 647          resV->w32[0] = bits4_to_bytes4( (intRes2 >>  0) & 0xF );
 648          resV->w32[1] = bits4_to_bytes4( (intRes2 >>  4) & 0xF );
 649          resV->w32[2] = bits4_to_bytes4( (intRes2 >>  8) & 0xF );
 650          resV->w32[3] = bits4_to_bytes4( (intRes2 >> 12) & 0xF );
 651       } else {
 652          resV->w32[0] = intRes2 & 0xFFFF;
 653          resV->w32[1] = 0;
 654          resV->w32[2] = 0;
 655          resV->w32[3] = 0;
 656       }
 657
 658    } else {
 659
 660       // generate I-format output (an index in ECX)
 661       // generate ecx value
 662       UInt newECX = 0;
 663       if (idx) {
 664          // index of ms-1-bit
 665          newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
 666       } else {
 667          // index of ls-1-bit
 668          newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
 669       }
 670
 671       resV->w32[0] = newECX;
 672       resV->w32[1] = 0;
 673       resV->w32[2] = 0;
 674       resV->w32[3] = 0;
 675
 676    }
 677
 678    // generate new flags, common to all ISTRI and ISTRM cases
 679    *resOSZACP    // A, P are zero
 680      = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
 681      | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
 682      | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
 683      | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
 684 }
 685
 686
 687 /* Given partial results from a 16-bit pcmpXstrX operation (intRes1,
 688    basically), generate an I- or M-format output value, also the new
 689    OSZACP flags.  */
 690 static
 691 void compute_PCMPxSTRx_gen_output_wide (/*OUT*/V128* resV,
 692                                         /*OUT*/UInt* resOSZACP,
 693                                         UInt intRes1,
 694                                         UInt zmaskL, UInt zmaskR,
 695                                         UInt validL,
 696                                         UInt pol, UInt idx,
 697                                         Bool isxSTRM )
 698 {
 699    vassert((pol >> 2) == 0);
 700    vassert((idx >> 1) == 0);
 701
 702    UInt intRes2 = 0;
 703    switch (pol) {
 704       case 0: intRes2 = intRes1;          break; // pol +
 705       case 1: intRes2 = ~intRes1;         break; // pol -
 706       case 2: intRes2 = intRes1;          break; // pol m+
 707       case 3: intRes2 = intRes1 ^ validL; break; // pol m-
 708    }
 709    intRes2 &= 0xFF;
 710
 711    if (isxSTRM) {
 712
 713       // generate M-format output (a bit or byte mask in XMM0)
 714       if (idx) {
 715          resV->w32[0] = bits2_to_bytes4( (intRes2 >> 0) & 0x3 );
 716          resV->w32[1] = bits2_to_bytes4( (intRes2 >> 2) & 0x3 );
 717          resV->w32[2] = bits2_to_bytes4( (intRes2 >> 4) & 0x3 );
 718          resV->w32[3] = bits2_to_bytes4( (intRes2 >> 6) & 0x3 );
 719       } else {
 720          resV->w32[0] = intRes2 & 0xFF;
 721          resV->w32[1] = 0;
 722          resV->w32[2] = 0;
 723          resV->w32[3] = 0;
 724       }
 725
 726    } else {
 727
 728       // generate I-format output (an index in ECX)
 729       // generate ecx value
 730       UInt newECX = 0;
 731       if (idx) {
 732          // index of ms-1-bit
 733          newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2));
 734       } else {
 735          // index of ls-1-bit
 736          newECX = intRes2 == 0 ? 8 : ctz32(intRes2);
 737       }
 738
 739       resV->w32[0] = newECX;
 740       resV->w32[1] = 0;
 741       resV->w32[2] = 0;
 742       resV->w32[3] = 0;
 743
 744    }
 745
 746    // generate new flags, common to all ISTRI and ISTRM cases
 747    *resOSZACP    // A, P are zero
 748      = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
 749      | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
 750      | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
 751      | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
 752 }
 753
 754
 755 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
 756    variants on 8-bit data.
 757
 758    For xSTRI variants, the new ECX value is placed in the 32 bits
 759    pointed to by *resV, and the top 96 bits are zeroed.  For xSTRM
 760    variants, the result is a 128 bit value and is placed at *resV in
 761    the obvious way.
 762
 763    For all variants, the new OSZACP value is placed at *resOSZACP.
 764
 765    argLV and argRV are the vector args.  The caller must prepare a
 766    16-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
 767    must be 1 for each zero byte of of the respective arg.  For ESTRx
 768    variants this is derived from the explicit length indication, and
 769    must be 0 in all places except at the bit index corresponding to
 770    the valid length (0 .. 16).  If the valid length is 16 then the
 771    mask must be all zeroes.  In all cases, bits 31:16 must be zero.
 772
 773    imm8 is the original immediate from the instruction.  isSTRM
 774    indicates whether this is a xSTRM or xSTRI variant, which controls
 775    how much of *res is written.
 776
 777    If the given imm8 case can be handled, the return value is True.
 778    If not, False is returned, and neither *res not *resOSZACP are
 779    altered.
 780 */
 781
 782 Bool compute_PCMPxSTRx ( /*OUT*/V128* resV,
 783                          /*OUT*/UInt* resOSZACP,
 784                          V128* argLV,  V128* argRV,
 785                          UInt zmaskL, UInt zmaskR,
 786                          UInt imm8,   Bool isxSTRM )
 787 {
 788    vassert(imm8 < 0x80);
 789    vassert((zmaskL >> 16) == 0);
 790    vassert((zmaskR >> 16) == 0);
 791
 792    /* Explicitly reject any imm8 values that haven't been validated,
 793       even if they would probably work.  Life is too short to have
 794       unvalidated cases in the code base. */
 795    switch (imm8) {
 796       case 0x00: case 0x02:
 797       case 0x08: case 0x0A: case 0x0C: case 0x0E:
 798       case 0x10: case 0x12: case 0x14:
 799       case 0x18: case 0x1A:
 800       case 0x30:            case 0x34:
 801       case 0x38: case 0x3A:
 802       case 0x40: case 0x42: case 0x44: case 0x46:
 803                  case 0x4A:
 804                  case 0x62:
 805       case 0x70: case 0x72:
 806          break;
 807       default:
 808          return False;
 809    }
 810
 811    UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
 812    UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
 813    UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
 814    UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
 815
 816    /*----------------------------------------*/
 817    /*-- strcmp on byte data                --*/
 818    /*----------------------------------------*/
 819
 820    if (agg == 2/*equal each, aka strcmp*/
 821        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
 822       Int    i;
 823       UChar* argL = (UChar*)argLV;
 824       UChar* argR = (UChar*)argRV;
 825       UInt boolResII = 0;
 826       for (i = 15; i >= 0; i--) {
 827          UChar cL  = argL[i];
 828          UChar cR  = argR[i];
 829          boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
 830       }
 831       UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
 832       UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
 833
 834       // do invalidation, common to all equal-each cases
 835       UInt intRes1
 836          = (boolResII & validL & validR)  // if both valid, use cmpres
 837            | (~ (validL | validR));       // if both invalid, force 1
 838                                           // else force 0
 839       intRes1 &= 0xFFFF;
 840
 841       // generate I-format output
 842       compute_PCMPxSTRx_gen_output(
 843          resV, resOSZACP,
 844          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
 845       );
 846
 847       return True;
 848    }
 849
 850    /*----------------------------------------*/
 851    /*-- set membership on byte data        --*/
 852    /*----------------------------------------*/
 853
 854    if (agg == 0/*equal any, aka find chars in a set*/
 855        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
 856       /* argL: the string,  argR: charset */
 857       UInt   si, ci;
 858       UChar* argL    = (UChar*)argLV;
 859       UChar* argR    = (UChar*)argRV;
 860       UInt   boolRes = 0;
 861       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
 862       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
 863
 864       for (si = 0; si < 16; si++) {
 865          if ((validL & (1 << si)) == 0)
 866             // run off the end of the string.
 867             break;
 868          UInt m = 0;
 869          for (ci = 0; ci < 16; ci++) {
 870             if ((validR & (1 << ci)) == 0) break;
 871             if (argR[ci] == argL[si]) { m = 1; break; }
 872          }
 873          boolRes |= (m << si);
 874       }
 875
 876       // boolRes is "pre-invalidated"
 877       UInt intRes1 = boolRes & 0xFFFF;
 878
 879       // generate I-format output
 880       compute_PCMPxSTRx_gen_output(
 881          resV, resOSZACP,
 882          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
 883       );
 884
 885       return True;
 886    }
 887
 888    /*----------------------------------------*/
 889    /*-- substring search on byte data      --*/
 890    /*----------------------------------------*/
 891
 892    if (agg == 3/*equal ordered, aka substring search*/
 893        && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
 894
 895       /* argL: haystack,  argR: needle */
 896       UInt   ni, hi;
 897       UChar* argL    = (UChar*)argLV;
 898       UChar* argR    = (UChar*)argRV;
 899       UInt   boolRes = 0;
 900       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
 901       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
 902       for (hi = 0; hi < 16; hi++) {
 903          UInt m = 1;
 904          for (ni = 0; ni < 16; ni++) {
 905             if ((validR & (1 << ni)) == 0) break;
 906             UInt i = ni + hi;
 907             if (i >= 16) break;
 908             if (argL[i] != argR[ni]) { m = 0; break; }
 909          }
 910          boolRes |= (m << hi);
 911          if ((validL & (1 << hi)) == 0)
 912             // run off the end of the haystack
 913             break;
 914       }
 915
 916       // boolRes is "pre-invalidated"
 917       UInt intRes1 = boolRes & 0xFFFF;
 918
 919       // generate I-format output
 920       compute_PCMPxSTRx_gen_output(
 921          resV, resOSZACP,
 922          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
 923       );
 924
 925       return True;
 926    }
 927
 928    /*----------------------------------------*/
 929    /*-- ranges, unsigned byte data         --*/
 930    /*----------------------------------------*/
 931
 932    if (agg == 1/*ranges*/
 933        && fmt == 0/*ub*/) {
 934
 935       /* argL: string,  argR: range-pairs */
 936       UInt   ri, si;
 937       UChar* argL    = (UChar*)argLV;
 938       UChar* argR    = (UChar*)argRV;
 939       UInt   boolRes = 0;
 940       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
 941       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
 942       for (si = 0; si < 16; si++) {
 943          if ((validL & (1 << si)) == 0)
 944             // run off the end of the string
 945             break;
 946          UInt m = 0;
 947          for (ri = 0; ri < 16; ri += 2) {
 948             if ((validR & (3 << ri)) != (3 << ri)) break;
 949             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
 950                m = 1; break;
 951             }
 952          }
 953          boolRes |= (m << si);
 954       }
 955
 956       // boolRes is "pre-invalidated"
 957       UInt intRes1 = boolRes & 0xFFFF;
 958
 959       // generate I-format output
 960       compute_PCMPxSTRx_gen_output(
 961          resV, resOSZACP,
 962          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
 963       );
 964
 965       return True;
 966    }
 967
 968    /*----------------------------------------*/
 969    /*-- ranges, signed byte data           --*/
 970    /*----------------------------------------*/
 971
 972    if (agg == 1/*ranges*/
 973        && fmt == 2/*sb*/) {
 974
 975       /* argL: string,  argR: range-pairs */
 976       UInt   ri, si;
 977       Char*  argL    = (Char*)argLV;
 978       Char*  argR    = (Char*)argRV;
 979       UInt   boolRes = 0;
 980       UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
 981       UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
 982       for (si = 0; si < 16; si++) {
 983          if ((validL & (1 << si)) == 0)
 984             // run off the end of the string
 985             break;
 986          UInt m = 0;
 987          for (ri = 0; ri < 16; ri += 2) {
 988             if ((validR & (3 << ri)) != (3 << ri)) break;
 989             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
 990                m = 1; break;
 991             }
 992          }
 993          boolRes |= (m << si);
 994       }
 995
 996       // boolRes is "pre-invalidated"
 997       UInt intRes1 = boolRes & 0xFFFF;
 998
 999       // generate I-format output
1000       compute_PCMPxSTRx_gen_output(
1001          resV, resOSZACP,
1002          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1003       );
1004
1005       return True;
1006    }
1007
1008    return False;
1009 }
1010
1011
1012 /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
1013    variants on 16-bit characters.
1014
1015    For xSTRI variants, the new ECX value is placed in the 32 bits
1016    pointed to by *resV, and the top 96 bits are zeroed.  For xSTRM
1017    variants, the result is a 128 bit value and is placed at *resV in
1018    the obvious way.
1019
1020    For all variants, the new OSZACP value is placed at *resOSZACP.
1021
1022    argLV and argRV are the vector args.  The caller must prepare a
1023    8-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
1024    must be 1 for each zero byte of of the respective arg.  For ESTRx
1025    variants this is derived from the explicit length indication, and
1026    must be 0 in all places except at the bit index corresponding to
1027    the valid length (0 .. 8).  If the valid length is 8 then the
1028    mask must be all zeroes.  In all cases, bits 31:8 must be zero.
1029
1030    imm8 is the original immediate from the instruction.  isSTRM
1031    indicates whether this is a xSTRM or xSTRI variant, which controls
1032    how much of *res is written.
1033
1034    If the given imm8 case can be handled, the return value is True.
1035    If not, False is returned, and neither *res not *resOSZACP are
1036    altered.
1037 */
1038
1039 Bool compute_PCMPxSTRx_wide ( /*OUT*/V128* resV,
1040                               /*OUT*/UInt* resOSZACP,
1041                               V128* argLV,  V128* argRV,
1042                               UInt zmaskL, UInt zmaskR,
1043                               UInt imm8,   Bool isxSTRM )
1044 {
1045    vassert(imm8 < 0x80);
1046    vassert((zmaskL >> 8) == 0);
1047    vassert((zmaskR >> 8) == 0);
1048
1049    /* Explicitly reject any imm8 values that haven't been validated,
1050       even if they would probably work.  Life is too short to have
1051       unvalidated cases in the code base. */
1052    switch (imm8) {
1053       //    1,9        3,B        5,D        7,F
1054       case 0x01: case 0x03:
1055       case 0x09: case 0x0B: case 0x0D:
1056                  case 0x13:
1057       case 0x19: case 0x1B:
1058       case 0x39: case 0x3B:
1059       case 0x41:            case 0x45:
1060                  case 0x4B:
1061          break;
1062       default:
1063          return False;
1064    }
1065
1066    UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
1067    UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
1068    UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
1069    UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
1070
1071    /*----------------------------------------*/
1072    /*-- strcmp on wide data                --*/
1073    /*----------------------------------------*/
1074
1075    if (agg == 2/*equal each, aka strcmp*/
1076        && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
1077       Int     i;
1078       UShort* argL = (UShort*)argLV;
1079       UShort* argR = (UShort*)argRV;
1080       UInt boolResII = 0;
1081       for (i = 7; i >= 0; i--) {
1082          UShort cL  = argL[i];
1083          UShort cR  = argR[i];
1084          boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
1085       }
1086       UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
1087       UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
1088
1089       // do invalidation, common to all equal-each cases
1090       UInt intRes1
1091          = (boolResII & validL & validR)  // if both valid, use cmpres
1092            | (~ (validL | validR));       // if both invalid, force 1
1093                                           // else force 0
1094       intRes1 &= 0xFF;
1095
1096       // generate I-format output
1097       compute_PCMPxSTRx_gen_output_wide(
1098          resV, resOSZACP,
1099          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1100       );
1101
1102       return True;
1103    }
1104
1105    /*----------------------------------------*/
1106    /*-- set membership on wide data        --*/
1107    /*----------------------------------------*/
1108
1109    if (agg == 0/*equal any, aka find chars in a set*/
1110        && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
1111       /* argL: the string,  argR: charset */
1112       UInt    si, ci;
1113       UShort* argL    = (UShort*)argLV;
1114       UShort* argR    = (UShort*)argRV;
1115       UInt    boolRes = 0;
1116       UInt    validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
1117       UInt    validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
1118
1119       for (si = 0; si < 8; si++) {
1120          if ((validL & (1 << si)) == 0)
1121             // run off the end of the string.
1122             break;
1123          UInt m = 0;
1124          for (ci = 0; ci < 8; ci++) {
1125             if ((validR & (1 << ci)) == 0) break;
1126             if (argR[ci] == argL[si]) { m = 1; break; }
1127          }
1128          boolRes |= (m << si);
1129       }
1130
1131       // boolRes is "pre-invalidated"
1132       UInt intRes1 = boolRes & 0xFF;
1133
1134       // generate I-format output
1135       compute_PCMPxSTRx_gen_output_wide(
1136          resV, resOSZACP,
1137          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1138       );
1139
1140       return True;
1141    }
1142
1143    /*----------------------------------------*/
1144    /*-- substring search on wide data      --*/
1145    /*----------------------------------------*/
1146
1147    if (agg == 3/*equal ordered, aka substring search*/
1148        && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
1149
1150       /* argL: haystack,  argR: needle */
1151       UInt    ni, hi;
1152       UShort* argL    = (UShort*)argLV;
1153       UShort* argR    = (UShort*)argRV;
1154       UInt    boolRes = 0;
1155       UInt    validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
1156       UInt    validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
1157       for (hi = 0; hi < 8; hi++) {
1158          UInt m = 1;
1159          for (ni = 0; ni < 8; ni++) {
1160             if ((validR & (1 << ni)) == 0) break;
1161             UInt i = ni + hi;
1162             if (i >= 8) break;
1163             if (argL[i] != argR[ni]) { m = 0; break; }
1164          }
1165          boolRes |= (m << hi);
1166          if ((validL & (1 << hi)) == 0)
1167             // run off the end of the haystack
1168             break;
1169       }
1170
1171       // boolRes is "pre-invalidated"
1172       UInt intRes1 = boolRes & 0xFF;
1173
1174       // generate I-format output
1175       compute_PCMPxSTRx_gen_output_wide(
1176          resV, resOSZACP,
1177          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1178       );
1179
1180       return True;
1181    }
1182
1183    /*----------------------------------------*/
1184    /*-- ranges, unsigned wide data         --*/
1185    /*----------------------------------------*/
1186
1187    if (agg == 1/*ranges*/
1188        && fmt == 1/*uw*/) {
1189
1190       /* argL: string,  argR: range-pairs */
1191       UInt    ri, si;
1192       UShort* argL    = (UShort*)argLV;
1193       UShort* argR    = (UShort*)argRV;
1194       UInt    boolRes = 0;
1195       UInt    validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
1196       UInt    validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
1197       for (si = 0; si < 8; si++) {
1198          if ((validL & (1 << si)) == 0)
1199             // run off the end of the string
1200             break;
1201          UInt m = 0;
1202          for (ri = 0; ri < 8; ri += 2) {
1203             if ((validR & (3 << ri)) != (3 << ri)) break;
1204             if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
1205                m = 1; break;
1206             }
1207          }
1208          boolRes |= (m << si);
1209       }
1210
1211       // boolRes is "pre-invalidated"
1212       UInt intRes1 = boolRes & 0xFF;
1213
1214       // generate I-format output
1215       compute_PCMPxSTRx_gen_output_wide(
1216          resV, resOSZACP,
1217          intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1218       );
1219
1220       return True;
1221    }
1222
1223    return False;
1224 }
1225
1226
1227 /*---------------------------------------------------------------*/
1228 /*--- end                                 guest_generic_x87.c ---*/
1229 /*---------------------------------------------------------------*/