VEX/priv/guest_arm64_helpers.c

   1
   2 /*---------------------------------------------------------------*/
   3 /*--- begin                             guest_arm64_helpers.c ---*/
   4 /*---------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2013-2017 OpenWorks
  11       info@open-works.net
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, write to the Free Software
  25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  26    02110-1301, USA.
  27
  28    The GNU General Public License is contained in the file COPYING.
  29 */
  30
  31 #include "libvex_basictypes.h"
  32 #include "libvex_emnote.h"
  33 #include "libvex_guest_arm64.h"
  34 #include "libvex_ir.h"
  35 #include "libvex.h"
  36
  37 #include "main_util.h"
  38 #include "main_globals.h"
  39 #include "guest_generic_bb_to_IR.h"
  40 #include "guest_arm64_defs.h"
  41
  42
  43 /* This file contains helper functions for arm guest code.  Calls to
  44    these functions are generated by the back end.  These calls are of
  45    course in the host machine code and this file will be compiled to
  46    host machine code, so that all makes sense.
  47
  48    Only change the signatures of these helper functions very
  49    carefully.  If you change the signature here, you'll have to change
  50    the parameters passed to it in the IR calls constructed by
  51    guest_arm64_toIR.c.
  52 */
  53
  54
  55 /* Set to 1 to get detailed profiling info about individual N, Z, C
  56    and V flag evaluation. */
  57 #define PROFILE_NZCV_FLAGS 0
  58
  59 #if PROFILE_NZCV_FLAGS
  60
  61 static UInt tab_eval[ARM64G_CC_OP_NUMBER][16];
  62 static UInt initted = 0;
  63 static UInt tot_evals = 0;
  64
  65 static void initCounts ( void )
  66 {
  67    UInt i, j;
  68    for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
  69       for (j = 0; j < 16; j++) {
  70          tab_eval[i][j] = 0;
  71       }
  72    }
  73    initted = 1;
  74 }
  75
  76 static void showCounts ( void )
  77 {
  78    const HChar* nameCC[16]
  79       = { "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC",
  80           "HI", "LS", "GE", "LT", "GT", "LE", "AL", "NV" };
  81    UInt i, j;
  82    ULong sum = 0;
  83    vex_printf("\nCC_OP          0         1         2         3    "
  84               "     4         5         6\n");
  85    vex_printf(  "--------------------------------------------------"
  86               "--------------------------\n");
  87    for (j = 0; j < 16; j++) {
  88       vex_printf("%2d %s  ", j, nameCC[j]);
  89       for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
  90          vex_printf("%9d ", tab_eval[i][j]);
  91          sum += tab_eval[i][j];
  92       }
  93       vex_printf("\n");
  94    }
  95    vex_printf("(In total %llu calls)\n", sum);
  96 }
  97
  98 #define NOTE_EVAL(_cc_op, _cond) \
  99    do { \
 100       if (!initted) initCounts(); \
 101       vassert( ((UInt)(_cc_op)) < ARM64G_CC_OP_NUMBER); \
 102       vassert( ((UInt)(_cond)) < 16); \
 103       tab_eval[(UInt)(_cc_op)][(UInt)(cond)]++;  \
 104       tot_evals++; \
 105       if (0 == (tot_evals & 0x7FFF)) \
 106         showCounts(); \
 107    } while (0)
 108
 109 #endif /* PROFILE_NZCV_FLAGS */
 110
 111
 112 /* Calculate the N flag from the supplied thunk components, in the
 113    least significant bit of the word.  Returned bits 63:1 are zero. */
 114 static
 115 ULong arm64g_calculate_flag_n ( ULong cc_op, ULong cc_dep1,
 116                                 ULong cc_dep2, ULong cc_dep3 )
 117 {
 118    switch (cc_op) {
 119       case ARM64G_CC_OP_COPY: {
 120          /* (nzcv:28x0, unused, unused) */
 121          ULong nf   = (cc_dep1 >> ARM64G_CC_SHIFT_N) & 1;
 122          return nf;
 123       }
 124       case ARM64G_CC_OP_ADD32: {
 125          /* (argL, argR, unused) */
 126          UInt  argL = (UInt)cc_dep1;
 127          UInt  argR = (UInt)cc_dep2;
 128          UInt  res  = argL + argR;
 129          ULong nf   = (ULong)(res >> 31);
 130          return nf;
 131       }
 132       case ARM64G_CC_OP_ADD64: {
 133          /* (argL, argR, unused) */
 134          ULong argL = cc_dep1;
 135          ULong argR = cc_dep2;
 136          ULong res  = argL + argR;
 137          ULong nf   = (ULong)(res >> 63);
 138          return nf;
 139       }
 140       case ARM64G_CC_OP_SUB32: {
 141          /* (argL, argR, unused) */
 142          UInt  argL = (UInt)cc_dep1;
 143          UInt  argR = (UInt)cc_dep2;
 144          UInt  res  = argL - argR;
 145          ULong nf   = (ULong)(res >> 31);
 146          return nf;
 147       }
 148       case ARM64G_CC_OP_SUB64: {
 149          /* (argL, argR, unused) */
 150          ULong argL = cc_dep1;
 151          ULong argR = cc_dep2;
 152          ULong res  = argL - argR;
 153          ULong nf   = res >> 63;
 154          return nf;
 155       }
 156       case ARM64G_CC_OP_ADC32: {
 157          /* (argL, argR, oldC) */
 158          UInt  argL = cc_dep1;
 159          UInt  argR = cc_dep2;
 160          UInt  oldC = cc_dep3;
 161          vassert((oldC & ~1) == 0);
 162          UInt  res  = argL + argR + oldC;
 163          ULong nf   = (ULong)(res >> 31);
 164          return nf;
 165       }
 166       case ARM64G_CC_OP_ADC64: {
 167          /* (argL, argR, oldC) */
 168          ULong argL = cc_dep1;
 169          ULong argR = cc_dep2;
 170          ULong oldC = cc_dep3;
 171          vassert((oldC & ~1) == 0);
 172          ULong res  = argL + argR + oldC;
 173          ULong nf   = res >> 63;
 174          return nf;
 175       }
 176       case ARM64G_CC_OP_SBC32: {
 177          /* (argL, argR, oldC) */
 178          UInt  argL = cc_dep1;
 179          UInt  argR = cc_dep2;
 180          UInt  oldC = cc_dep3;
 181          vassert((oldC & ~1) == 0);
 182          UInt  res  = argL - argR - (oldC ^ 1);
 183          ULong nf   = (ULong)(res >> 31);
 184          return nf;
 185       }
 186       case ARM64G_CC_OP_SBC64: {
 187          /* (argL, argR, oldC) */
 188          ULong argL = cc_dep1;
 189          ULong argR = cc_dep2;
 190          ULong oldC = cc_dep3;
 191          vassert((oldC & ~1) == 0);
 192          ULong res  = argL - argR - (oldC ^ 1);
 193          ULong nf   = res >> 63;
 194          return nf;
 195       }
 196       case ARM64G_CC_OP_LOGIC32: {
 197          /* (res, unused, unused) */
 198          UInt  res = (UInt)cc_dep1;
 199          ULong nf  = res >> 31;
 200          return nf;
 201       }
 202       case ARM64G_CC_OP_LOGIC64: {
 203          /* (res, unused, unused) */
 204          ULong res = cc_dep1;
 205          ULong nf  = res >> 63;
 206          return nf;
 207       }
 208 //ZZ       case ARMG_CC_OP_MUL: {
 209 //ZZ          /* (res, unused, oldC:oldV) */
 210 //ZZ          UInt res  = cc_dep1;
 211 //ZZ          UInt nf   = res >> 31;
 212 //ZZ          return nf;
 213 //ZZ       }
 214 //ZZ       case ARMG_CC_OP_MULL: {
 215 //ZZ          /* (resLo32, resHi32, oldC:oldV) */
 216 //ZZ          UInt resHi32 = cc_dep2;
 217 //ZZ          UInt nf      = resHi32 >> 31;
 218 //ZZ          return nf;
 219 //ZZ       }
 220       default:
 221          /* shouldn't really make these calls from generated code */
 222          vex_printf("arm64g_calculate_flag_n"
 223                     "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
 224                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
 225          vpanic("arm64g_calculate_flag_n");
 226    }
 227 }
 228
 229
 230 /* Calculate the Z flag from the supplied thunk components, in the
 231    least significant bit of the word.  Returned bits 63:1 are zero. */
 232 static
 233 ULong arm64g_calculate_flag_z ( ULong cc_op, ULong cc_dep1,
 234                                 ULong cc_dep2, ULong cc_dep3 )
 235 {
 236    switch (cc_op) {
 237       case ARM64G_CC_OP_COPY: {
 238          /* (nzcv:28x0, unused, unused) */
 239          ULong zf   = (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1;
 240          return zf;
 241       }
 242       case ARM64G_CC_OP_ADD32: {
 243          /* (argL, argR, unused) */
 244          UInt  argL = (UInt)cc_dep1;
 245          UInt  argR = (UInt)cc_dep2;
 246          UInt  res  = argL + argR;
 247          ULong zf   = res == 0;
 248          return zf;
 249       }
 250       case ARM64G_CC_OP_ADD64: {
 251          /* (argL, argR, unused) */
 252          ULong argL = cc_dep1;
 253          ULong argR = cc_dep2;
 254          ULong res  = argL + argR;
 255          ULong zf   = res == 0;
 256          return zf;
 257       }
 258       case ARM64G_CC_OP_SUB32: {
 259          /* (argL, argR, unused) */
 260          UInt  argL = (UInt)cc_dep1;
 261          UInt  argR = (UInt)cc_dep2;
 262          UInt  res  = argL - argR;
 263          ULong zf   = res == 0;
 264          return zf;
 265       }
 266       case ARM64G_CC_OP_SUB64: {
 267          /* (argL, argR, unused) */
 268          ULong argL = cc_dep1;
 269          ULong argR = cc_dep2;
 270          ULong res  = argL - argR;
 271          ULong zf   = res == 0;
 272          return zf;
 273       }
 274       case ARM64G_CC_OP_ADC32: {
 275          /* (argL, argR, oldC) */
 276          UInt  argL = cc_dep1;
 277          UInt  argR = cc_dep2;
 278          UInt  oldC = cc_dep3;
 279          vassert((oldC & ~1) == 0);
 280          UInt  res  = argL + argR + oldC;
 281          ULong zf   = res == 0;
 282          return zf;
 283       }
 284       case ARM64G_CC_OP_ADC64: {
 285          /* (argL, argR, oldC) */
 286          ULong argL = cc_dep1;
 287          ULong argR = cc_dep2;
 288          ULong oldC = cc_dep3;
 289          vassert((oldC & ~1) == 0);
 290          ULong res  = argL + argR + oldC;
 291          ULong zf   = res == 0;
 292          return zf;
 293       }
 294       case ARM64G_CC_OP_SBC32: {
 295          /* (argL, argR, oldC) */
 296          UInt  argL = cc_dep1;
 297          UInt  argR = cc_dep2;
 298          UInt  oldC = cc_dep3;
 299          vassert((oldC & ~1) == 0);
 300          UInt  res  = argL - argR - (oldC ^ 1);
 301          ULong zf   = res == 0;
 302          return zf;
 303       }
 304       case ARM64G_CC_OP_SBC64: {
 305          /* (argL, argR, oldC) */
 306          ULong argL = cc_dep1;
 307          ULong argR = cc_dep2;
 308          ULong oldC = cc_dep3;
 309          vassert((oldC & ~1) == 0);
 310          ULong res  = argL - argR - (oldC ^ 1);
 311          ULong zf   = res == 0;
 312          return zf;
 313       }
 314       case ARM64G_CC_OP_LOGIC32: {
 315          /* (res, unused, unused) */
 316          UInt  res  = (UInt)cc_dep1;
 317          ULong zf   = res == 0;
 318          return zf;
 319       }
 320       case ARM64G_CC_OP_LOGIC64: {
 321          /* (res, unused, unused) */
 322          ULong res  = cc_dep1;
 323          ULong zf   = res == 0;
 324          return zf;
 325       }
 326 //ZZ       case ARMG_CC_OP_MUL: {
 327 //ZZ          /* (res, unused, oldC:oldV) */
 328 //ZZ          UInt res  = cc_dep1;
 329 //ZZ          UInt zf   = res == 0;
 330 //ZZ          return zf;
 331 //ZZ       }
 332 //ZZ       case ARMG_CC_OP_MULL: {
 333 //ZZ          /* (resLo32, resHi32, oldC:oldV) */
 334 //ZZ          UInt resLo32 = cc_dep1;
 335 //ZZ          UInt resHi32 = cc_dep2;
 336 //ZZ          UInt zf      = (resHi32|resLo32) == 0;
 337 //ZZ          return zf;
 338 //ZZ       }
 339       default:
 340          /* shouldn't really make these calls from generated code */
 341          vex_printf("arm64g_calculate_flag_z"
 342                     "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
 343                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
 344          vpanic("arm64g_calculate_flag_z");
 345    }
 346 }
 347
 348
 349 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 350 /* Calculate the C flag from the supplied thunk components, in the
 351    least significant bit of the word.  Returned bits 63:1 are zero. */
 352 ULong arm64g_calculate_flag_c ( ULong cc_op, ULong cc_dep1,
 353                                 ULong cc_dep2, ULong cc_dep3 )
 354 {
 355    switch (cc_op) {
 356       case ARM64G_CC_OP_COPY: {
 357          /* (nzcv:28x0, unused, unused) */
 358          ULong cf = (cc_dep1 >> ARM64G_CC_SHIFT_C) & 1;
 359          return cf;
 360       }
 361       case ARM64G_CC_OP_ADD32: {
 362          /* (argL, argR, unused) */
 363          UInt  argL = (UInt)cc_dep1;
 364          UInt  argR = (UInt)cc_dep2;
 365          UInt  res  = argL + argR;
 366          ULong cf   = res < argL;
 367          return cf;
 368       }
 369       case ARM64G_CC_OP_ADD64: {
 370          /* (argL, argR, unused) */
 371          ULong argL = cc_dep1;
 372          ULong argR = cc_dep2;
 373          ULong res  = argL + argR;
 374          ULong cf   = res < argL;
 375          return cf;
 376       }
 377       case ARM64G_CC_OP_SUB32: {
 378          /* (argL, argR, unused) */
 379          UInt  argL = (UInt)cc_dep1;
 380          UInt  argR = (UInt)cc_dep2;
 381          ULong cf   = argL >= argR;
 382          return cf;
 383       }
 384       case ARM64G_CC_OP_SUB64: {
 385          /* (argL, argR, unused) */
 386          ULong argL = cc_dep1;
 387          ULong argR = cc_dep2;
 388          ULong cf   = argL >= argR;
 389          return cf;
 390       }
 391       case ARM64G_CC_OP_ADC32: {
 392          /* (argL, argR, oldC) */
 393          UInt  argL = cc_dep1;
 394          UInt  argR = cc_dep2;
 395          UInt  oldC = cc_dep3;
 396          vassert((oldC & ~1) == 0);
 397          UInt  res  = argL + argR + oldC;
 398          ULong cf   = oldC ? (res <= argL) : (res < argL);
 399          return cf;
 400       }
 401       case ARM64G_CC_OP_ADC64: {
 402          /* (argL, argR, oldC) */
 403          ULong argL = cc_dep1;
 404          ULong argR = cc_dep2;
 405          ULong oldC = cc_dep3;
 406          vassert((oldC & ~1) == 0);
 407          ULong res  = argL + argR + oldC;
 408          ULong cf   = oldC ? (res <= argL) : (res < argL);
 409          return cf;
 410       }
 411       case ARM64G_CC_OP_SBC32: {
 412          /* (argL, argR, oldC) */
 413          UInt  argL = cc_dep1;
 414          UInt  argR = cc_dep2;
 415          UInt  oldC = cc_dep3;
 416          vassert((oldC & ~1) == 0);
 417          ULong cf   = oldC ? (argL >= argR) : (argL > argR);
 418          return cf;
 419       }
 420       case ARM64G_CC_OP_SBC64: {
 421          /* (argL, argR, oldC) */
 422          ULong argL = cc_dep1;
 423          ULong argR = cc_dep2;
 424          ULong oldC = cc_dep3;
 425          vassert((oldC & ~1) == 0);
 426          ULong cf   = oldC ? (argL >= argR) : (argL > argR);
 427          return cf;
 428       }
 429       case ARM64G_CC_OP_LOGIC32:
 430       case ARM64G_CC_OP_LOGIC64: {
 431          /* (res, unused, unused) */
 432          return 0; // C after logic is zero on arm64
 433       }
 434 //ZZ       case ARMG_CC_OP_MUL: {
 435 //ZZ          /* (res, unused, oldC:oldV) */
 436 //ZZ          UInt oldC = (cc_dep3 >> 1) & 1;
 437 //ZZ          vassert((cc_dep3 & ~3) == 0);
 438 //ZZ          UInt cf   = oldC;
 439 //ZZ          return cf;
 440 //ZZ       }
 441 //ZZ       case ARMG_CC_OP_MULL: {
 442 //ZZ          /* (resLo32, resHi32, oldC:oldV) */
 443 //ZZ          UInt oldC    = (cc_dep3 >> 1) & 1;
 444 //ZZ          vassert((cc_dep3 & ~3) == 0);
 445 //ZZ          UInt cf      = oldC;
 446 //ZZ          return cf;
 447 //ZZ       }
 448       default:
 449          /* shouldn't really make these calls from generated code */
 450          vex_printf("arm64g_calculate_flag_c"
 451                     "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
 452                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
 453          vpanic("arm64g_calculate_flag_c");
 454    }
 455 }
 456
 457
 458 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 459 /* Calculate the V flag from the supplied thunk components, in the
 460    least significant bit of the word.  Returned bits 63:1 are zero. */
 461 static
 462 ULong arm64g_calculate_flag_v ( ULong cc_op, ULong cc_dep1,
 463                                 ULong cc_dep2, ULong cc_dep3 )
 464 {
 465    switch (cc_op) {
 466       case ARM64G_CC_OP_COPY: {
 467          /* (nzcv:28x0, unused, unused) */
 468          ULong vf   = (cc_dep1 >> ARM64G_CC_SHIFT_V) & 1;
 469          return vf;
 470       }
 471       case ARM64G_CC_OP_ADD32: {
 472          /* (argL, argR, unused) */
 473          UInt  argL = (UInt)cc_dep1;
 474          UInt  argR = (UInt)cc_dep2;
 475          UInt  res  = argL + argR;
 476          ULong vf   = (ULong)(((res ^ argL) & (res ^ argR)) >> 31);
 477          return vf;
 478       }
 479       case ARM64G_CC_OP_ADD64: {
 480          /* (argL, argR, unused) */
 481          ULong argL = cc_dep1;
 482          ULong argR = cc_dep2;
 483          ULong res  = argL + argR;
 484          ULong vf   = ((res ^ argL) & (res ^ argR)) >> 63;
 485          return vf;
 486       }
 487       case ARM64G_CC_OP_SUB32: {
 488          /* (argL, argR, unused) */
 489          UInt  argL = (UInt)cc_dep1;
 490          UInt  argR = (UInt)cc_dep2;
 491          UInt  res  = argL - argR;
 492          ULong vf   = (ULong)(((argL ^ argR) & (argL ^ res)) >> 31);
 493          return vf;
 494       }
 495       case ARM64G_CC_OP_SUB64: {
 496          /* (argL, argR, unused) */
 497          ULong argL = cc_dep1;
 498          ULong argR = cc_dep2;
 499          ULong res  = argL - argR;
 500          ULong vf   = (((argL ^ argR) & (argL ^ res))) >> 63;
 501          return vf;
 502       }
 503       case ARM64G_CC_OP_ADC32: {
 504          /* (argL, argR, oldC) */
 505          UInt  argL = cc_dep1;
 506          UInt  argR = cc_dep2;
 507          UInt  oldC = cc_dep3;
 508          vassert((oldC & ~1) == 0);
 509          UInt  res  = argL + argR + oldC;
 510          ULong vf   = (ULong)(((res ^ argL) & (res ^ argR)) >> 31);
 511          return vf;
 512       }
 513       case ARM64G_CC_OP_ADC64: {
 514          /* (argL, argR, oldC) */
 515          ULong argL = cc_dep1;
 516          ULong argR = cc_dep2;
 517          ULong oldC = cc_dep3;
 518          vassert((oldC & ~1) == 0);
 519          ULong res  = argL + argR + oldC;
 520          ULong vf   = ((res ^ argL) & (res ^ argR)) >> 63;
 521          return vf;
 522       }
 523       case ARM64G_CC_OP_SBC32: {
 524          /* (argL, argR, oldC) */
 525          UInt  argL = cc_dep1;
 526          UInt  argR = cc_dep2;
 527          UInt  oldC = cc_dep3;
 528          vassert((oldC & ~1) == 0);
 529          UInt  res  = argL - argR - (oldC ^ 1);
 530          ULong vf   = (ULong)(((argL ^ argR) & (argL ^ res)) >> 31);
 531          return vf;
 532       }
 533       case ARM64G_CC_OP_SBC64: {
 534          /* (argL, argR, oldC) */
 535          ULong argL = cc_dep1;
 536          ULong argR = cc_dep2;
 537          ULong oldC = cc_dep3;
 538          vassert((oldC & ~1) == 0);
 539          ULong res  = argL - argR - (oldC ^ 1);
 540          ULong vf   = ((argL ^ argR) & (argL ^ res)) >> 63;
 541          return vf;
 542       }
 543       case ARM64G_CC_OP_LOGIC32:
 544       case ARM64G_CC_OP_LOGIC64: {
 545          /* (res, unused, unused) */
 546          return 0; // V after logic is zero on arm64
 547       }
 548 //ZZ       case ARMG_CC_OP_MUL: {
 549 //ZZ          /* (res, unused, oldC:oldV) */
 550 //ZZ          UInt oldV = (cc_dep3 >> 0) & 1;
 551 //ZZ          vassert((cc_dep3 & ~3) == 0);
 552 //ZZ          UInt vf   = oldV;
 553 //ZZ          return vf;
 554 //ZZ       }
 555 //ZZ       case ARMG_CC_OP_MULL: {
 556 //ZZ          /* (resLo32, resHi32, oldC:oldV) */
 557 //ZZ          UInt oldV    = (cc_dep3 >> 0) & 1;
 558 //ZZ          vassert((cc_dep3 & ~3) == 0);
 559 //ZZ          UInt vf      = oldV;
 560 //ZZ          return vf;
 561 //ZZ       }
 562       default:
 563          /* shouldn't really make these calls from generated code */
 564          vex_printf("arm64g_calculate_flag_v"
 565                     "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
 566                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
 567          vpanic("arm64g_calculate_flag_v");
 568    }
 569 }
 570
 571
 572 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 573 /* Calculate NZCV from the supplied thunk components, in the positions
 574    they appear in the CPSR, viz bits 31:28 for N Z C V respectively.
 575    Returned bits 27:0 are zero. */
 576 ULong arm64g_calculate_flags_nzcv ( ULong cc_op, ULong cc_dep1,
 577                                     ULong cc_dep2, ULong cc_dep3 )
 578 {
 579    ULong f;
 580    ULong res = 0;
 581    f = 1 & arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
 582    res |= (f << ARM64G_CC_SHIFT_N);
 583    f = 1 & arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
 584    res |= (f << ARM64G_CC_SHIFT_Z);
 585    f = 1 & arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
 586    res |= (f << ARM64G_CC_SHIFT_C);
 587    f = 1 & arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
 588    res |= (f << ARM64G_CC_SHIFT_V);
 589    return res;
 590 }
 591
 592 //ZZ
 593 //ZZ /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 594 //ZZ /* Calculate the QC flag from the arguments, in the lowest bit
 595 //ZZ    of the word (bit 0).  Urr, having this out of line is bizarre.
 596 //ZZ    Push back inline. */
 597 //ZZ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
 598 //ZZ                               UInt resR1, UInt resR2 )
 599 //ZZ {
 600 //ZZ    if (resL1 != resR1 || resL2 != resR2)
 601 //ZZ       return 1;
 602 //ZZ    else
 603 //ZZ       return 0;
 604 //ZZ }
 605
 606 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 607 /* Calculate the specified condition from the thunk components, in the
 608    lowest bit of the word (bit 0).  Returned bits 63:1 are zero. */
 609 ULong arm64g_calculate_condition ( /* ARM64Condcode << 4 | cc_op */
 610                                    ULong cond_n_op ,
 611                                    ULong cc_dep1,
 612                                    ULong cc_dep2, ULong cc_dep3 )
 613 {
 614    ULong cond  = cond_n_op >> 4;
 615    ULong cc_op = cond_n_op & 0xF;
 616    ULong inv   = cond & 1;
 617    ULong nf, zf, vf, cf;
 618
 619 #  if PROFILE_NZCV_FLAGS
 620    NOTE_EVAL(cc_op, cond);
 621 #  endif
 622
 623    //   vex_printf("XXXXXXXX %llx %llx %llx %llx\n",
 624    //              cond_n_op, cc_dep1, cc_dep2, cc_dep3);
 625
 626    switch (cond) {
 627       case ARM64CondEQ:    // Z=1         => z
 628       case ARM64CondNE:    // Z=0
 629          zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
 630          return inv ^ zf;
 631
 632       case ARM64CondCS:    // C=1         => c
 633       case ARM64CondCC:    // C=0
 634          cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
 635          return inv ^ cf;
 636
 637       case ARM64CondMI:    // N=1         => n
 638       case ARM64CondPL:    // N=0
 639          nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
 640          return inv ^ nf;
 641
 642       case ARM64CondVS:    // V=1         => v
 643       case ARM64CondVC:    // V=0
 644          vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
 645          return inv ^ vf;
 646
 647       case ARM64CondHI:    // C=1 && Z=0   => c & ~z
 648       case ARM64CondLS:    // C=0 || Z=1
 649          cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
 650          zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
 651          return inv ^ (1 & (cf & ~zf));
 652
 653       case ARM64CondGE:    // N=V          => ~(n^v)
 654       case ARM64CondLT:    // N!=V
 655          nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
 656          vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
 657          return inv ^ (1 & ~(nf ^ vf));
 658
 659       case ARM64CondGT:    // Z=0 && N=V   => ~z & ~(n^v)  =>  ~(z | (n^v))
 660       case ARM64CondLE:    // Z=1 || N!=V
 661          nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
 662          vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
 663          zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
 664          return inv ^ (1 & ~(zf | (nf ^ vf)));
 665
 666       case ARM64CondAL:    // 1
 667       case ARM64CondNV:    // 1
 668          return 1;
 669
 670       default:
 671          /* shouldn't really make these calls from generated code */
 672          vex_printf("arm64g_calculate_condition(ARM64)"
 673                     "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
 674                     cond, cc_op, cc_dep1, cc_dep2, cc_dep3 );
 675          vpanic("armg_calculate_condition(ARM64)");
 676    }
 677 }
 678
 679
 680 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 681 ULong arm64g_calc_crc32b ( ULong acc, ULong bits )
 682 {
 683    UInt  i;
 684    ULong crc = (bits & 0xFFULL) ^ acc;
 685    for (i = 0; i < 8; i++)
 686       crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
 687    return crc;
 688 }
 689
 690 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 691 ULong arm64g_calc_crc32h ( ULong acc, ULong bits )
 692 {
 693    UInt  i;
 694    ULong crc = (bits & 0xFFFFULL) ^ acc;
 695    for (i = 0; i < 16; i++)
 696       crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
 697    return crc;
 698 }
 699
 700 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 701 ULong arm64g_calc_crc32w ( ULong acc, ULong bits )
 702 {
 703    UInt  i;
 704    ULong crc = (bits & 0xFFFFFFFFULL) ^ acc;
 705    for (i = 0; i < 32; i++)
 706       crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
 707    return crc;
 708 }
 709
 710 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 711 ULong arm64g_calc_crc32x ( ULong acc, ULong bits )
 712 {
 713    UInt  i;
 714    ULong crc = bits ^ acc;
 715    for (i = 0; i < 64; i++)
 716       crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
 717    return crc;
 718
 719 }
 720
 721 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 722 ULong arm64g_calc_crc32cb ( ULong acc, ULong bits )
 723 {
 724    UInt  i;
 725    ULong crc = (bits & 0xFFULL) ^ acc;
 726    for (i = 0; i < 8; i++)
 727       crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
 728    return crc;
 729 }
 730
 731 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 732 ULong arm64g_calc_crc32ch ( ULong acc, ULong bits )
 733 {
 734    UInt  i;
 735    ULong crc = (bits & 0xFFFFULL) ^ acc;
 736    for (i = 0; i < 16; i++)
 737       crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
 738    return crc;
 739 }
 740
 741 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 742 ULong arm64g_calc_crc32cw ( ULong acc, ULong bits )
 743 {
 744    UInt  i;
 745    ULong crc = (bits & 0xFFFFFFFFULL) ^ acc;
 746    for (i = 0; i < 32; i++)
 747       crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
 748    return crc;
 749 }
 750
 751 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 752 ULong arm64g_calc_crc32cx ( ULong acc, ULong bits )
 753 {
 754    UInt  i;
 755    ULong crc = bits ^ acc;
 756    for (i = 0; i < 64; i++)
 757       crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
 758    return crc;
 759 }
 760
 761
 762 /* CALLED FROM GENERATED CODE */
 763 /* DIRTY HELPER (non-referentially-transparent) */
 764 /* Horrible hack.  On non-arm64 platforms, return 0. */
 765 ULong arm64g_dirtyhelper_MRS_CNTVCT_EL0 ( void )
 766 {
 767 #  if defined(__aarch64__) && !defined(__arm__)
 768    ULong w = 0x5555555555555555ULL; /* overwritten */
 769    __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(w));
 770    return w;
 771 #  else
 772    return 0ULL;
 773 #  endif
 774 }
 775
 776
 777 /* CALLED FROM GENERATED CODE */
 778 /* DIRTY HELPER (non-referentially-transparent) */
 779 /* Horrible hack.  On non-arm64 platforms, return 0. */
 780 ULong arm64g_dirtyhelper_MRS_CNTFRQ_EL0 ( void )
 781 {
 782 #  if defined(__aarch64__) && !defined(__arm__)
 783    ULong w = 0x5555555555555555ULL; /* overwritten */
 784    __asm__ __volatile__("mrs %0, cntfrq_el0" : "=r"(w));
 785    return w;
 786 #  else
 787    return 0ULL;
 788 #  endif
 789 }
 790
 791
 792 void arm64g_dirtyhelper_PMULLQ ( /*OUT*/V128* res, ULong arg1, ULong arg2 )
 793 {
 794    /* This doesn't need to be a dirty helper, except for the fact that
 795       a clean helper can't return a 128 bit value.  This is a pretty
 796       lame implementation of PMULLQ, but at least it doesn't contain any
 797       data dependent branches, and has lots of ILP.  I guess we could unroll
 798       the loop completely and offer extensive prayers to the gods of ILP
 799       if more performance is needed. */
 800    UInt i;
 801    ULong accHi = 0, accLo = 0;
 802    ULong op2Hi = 0, op2Lo = arg2;
 803    for (i = 0; i < 64; i++) {
 804       /* Make |mask| be all 0s or all 1s, a copy of arg1[i] */
 805       Long mask = arg1 << (63-i);
 806       mask >>= 63;
 807       accHi ^= (op2Hi & mask);
 808       accLo ^= (op2Lo & mask);
 809       /* do: op2Hi:op2Lo <<=u 1 */
 810       op2Hi <<= 1;
 811       op2Hi |= ((op2Lo >> 63) & 1);
 812       op2Lo <<= 1;
 813    }
 814    res->w64[1] = accHi;
 815    res->w64[0] = accLo;
 816 }
 817
 818
 819 /*---------------------------------------------------------------*/
 820 /*--- Crypto instruction helpers                              ---*/
 821 /*---------------------------------------------------------------*/
 822
 823 /* DIRTY HELPERS for doing AES support:
 824    * AESE (SubBytes, then ShiftRows)
 825    * AESD (InvShiftRows, then InvSubBytes)
 826    * AESMC (MixColumns)
 827    * AESIMC (InvMixColumns)
 828    These don't actually have to be dirty helpers -- they could be
 829    clean, but for the fact that they return a V128 and a clean helper
 830    can't do that.
 831
 832    The ARMv8 manual seems to imply that AESE first performs ShiftRows,
 833    then SubBytes.  This seems to contradict FIPS 197, so the
 834    implementation below is consistent with FIPS 197.  One can observe
 835    that the two transformations commute -- the order in which they
 836    happen makes no difference to the result.  So the ambiguity doesn't
 837    actually matter, but it is confusing.  The v8 manual looks correct
 838    about AESD, though.
 839
 840    The three functions rj_xtime, aesMixColumn and aesInvMixColumn only,
 841    are taken from "A byte-oriented AES-256 implementation" and are subject
 842    to the following usage terms:
 843
 844      Byte-oriented AES-256 implementation.
 845      All lookup tables replaced with 'on the fly' calculations.
 846
 847      Copyright (c) 2007-2011 Ilya O. Levin, http://www.literatecode.com
 848      Other contributors: Hal Finney
 849
 850      Permission to use, copy, modify, and distribute this software for any
 851      purpose with or without fee is hereby granted, provided that the above
 852      copyright notice and this permission notice appear in all copies.
 853
 854      THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 855      WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 856      MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 857      ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 858      WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 859      ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 860      OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 861 */
 862
 863 const UChar aesMapSubBytes[256]
 864    = { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
 865        0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
 866        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
 867        0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
 868        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
 869        0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
 870        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
 871        0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
 872        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
 873        0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
 874        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
 875        0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
 876        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
 877        0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
 878        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
 879        0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
 880        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
 881        0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
 882        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
 883        0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
 884        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
 885        0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
 886        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
 887        0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
 888        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
 889        0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
 890        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
 891        0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
 892        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
 893        0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
 894        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
 895        0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
 896      };
 897
 898 const UChar aesMapInvSubBytes[256]
 899    = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
 900        0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
 901        0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
 902        0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
 903        0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
 904        0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
 905        0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
 906        0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
 907        0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
 908        0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
 909        0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
 910        0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
 911        0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
 912        0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
 913        0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
 914        0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
 915        0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
 916        0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
 917        0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
 918        0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
 919        0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
 920        0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
 921        0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
 922        0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
 923        0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
 924        0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
 925        0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
 926        0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
 927        0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
 928        0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
 929        0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
 930        0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
 931      };
 932
 933 static inline UChar rj_xtime ( UChar x )
 934 {
 935    UChar y = (UChar)(x << 1);
 936    return (x & 0x80) ? (y ^ 0x1b) : y;
 937 }
 938
 939 static void aesMixColumn ( /*MOD*/UChar* r )
 940 {
 941    UChar a = r[0];
 942    UChar b = r[1];
 943    UChar c = r[2];
 944    UChar d = r[3];
 945    UChar e = a ^ b ^ c ^ d;
 946    r[0] ^= e ^ rj_xtime(a ^ b);
 947    r[1] ^= e ^ rj_xtime(b ^ c);
 948    r[2] ^= e ^ rj_xtime(c ^ d);
 949    r[3] ^= e ^ rj_xtime(d ^ a);
 950 }
 951
 952 static void aesInvMixColumn ( /*MOD*/UChar* r )
 953 {
 954    UChar a = r[0];
 955    UChar b = r[1];
 956    UChar c = r[2];
 957    UChar d = r[3];
 958    UChar e = a ^ b ^ c ^ d;
 959    UChar z = rj_xtime(e);
 960    UChar x = e ^ rj_xtime(rj_xtime(z ^ a ^ c));
 961    UChar y = e ^ rj_xtime(rj_xtime(z ^ b ^ d));
 962    r[0] ^= x ^ rj_xtime(a ^ b);
 963    r[1] ^= y ^ rj_xtime(b ^ c);
 964    r[2] ^= x ^ rj_xtime(c ^ d);
 965    r[3] ^= y ^ rj_xtime(d ^ a);
 966 }
 967
 968
 969 /* CALLED FROM GENERATED CODE */
 970 void arm64g_dirtyhelper_AESE ( /*OUT*/V128* res, ULong argHi, ULong argLo )
 971 {
 972    res->w64[1] = argHi;
 973    res->w64[0] = argLo;
 974
 975    /* First do SubBytes on the State. */
 976    UInt i;
 977    for (i = 0; i < 16; i++) {
 978       res->w8[i] = aesMapSubBytes[res->w8[i] & 0xFF];
 979    }
 980
 981    /* Then do ShiftRows on the State. */
 982 #  define XX(_ix) res->w8[_ix]
 983    { UChar old1 = XX(1);
 984      XX(1) = XX(5); XX(5) = XX(9); XX(9) = XX(13); XX(13) = old1;
 985    }
 986    { UChar old2 = XX(2); UChar old6 = XX(6);
 987      XX(2) = XX(10); XX(6) = XX(14); XX(10) = old2; XX(14) = old6;
 988    }
 989    { UChar old15 = XX(15);
 990      XX(15) = XX(11); XX(11) = XX(7); XX(7) = XX(3); XX(3) = old15;
 991    }
 992 #  undef XX
 993 }
 994
 995
 996 /* CALLED FROM GENERATED CODE */
 997 void arm64g_dirtyhelper_AESD ( /*OUT*/V128* res, ULong argHi, ULong argLo )
 998 {
 999    res->w64[1] = argHi;
1000    res->w64[0] = argLo;
1001
1002    /* First do InvShiftRows on the State. */
1003 #  define XX(_ix) res->w8[_ix]
1004    { UChar old13 = XX(13);
1005      XX(13) = XX(9); XX(9) = XX(5); XX(5) = XX(1); XX(1) = old13;
1006    }
1007    { UChar old14 = XX(14); UChar old10 = XX(10);
1008      XX(14) = XX(6); XX(10) = XX(2); XX(6) = old14; XX(2) = old10;
1009    }
1010    { UChar old3 = XX(3);
1011      XX(3) = XX(7); XX(7) = XX(11); XX(11) = XX(15); XX(15) = old3;
1012    }
1013 #  undef XX
1014
1015 /* Then do InvSubBytes on the State. */
1016    UInt i;
1017    for (i = 0; i < 16; i++) {
1018       res->w8[i] = aesMapInvSubBytes[res->w8[i] & 0xFF];
1019    }
1020 }
1021
1022
1023 /* CALLED FROM GENERATED CODE */
1024 void arm64g_dirtyhelper_AESMC ( /*OUT*/V128* res, ULong argHi, ULong argLo )
1025 {
1026    res->w64[1] = argHi;
1027    res->w64[0] = argLo;
1028    aesMixColumn(&res->w8[0]);
1029    aesMixColumn(&res->w8[4]);
1030    aesMixColumn(&res->w8[8]);
1031    aesMixColumn(&res->w8[12]);
1032 }
1033
1034
1035 /* CALLED FROM GENERATED CODE */
1036 void arm64g_dirtyhelper_AESIMC ( /*OUT*/V128* res, ULong argHi, ULong argLo )
1037 {
1038    res->w64[1] = argHi;
1039    res->w64[0] = argLo;
1040    aesInvMixColumn(&res->w8[0]);
1041    aesInvMixColumn(&res->w8[4]);
1042    aesInvMixColumn(&res->w8[8]);
1043    aesInvMixColumn(&res->w8[12]);
1044 }
1045
1046
1047 /* DIRTY HELPERS for SHA instruction support.  As with the AES helpers
1048    above, these are actually pure functions and are only dirty because
1049    clean helpers can't return a V128. */
1050
1051 static inline UInt ROL32 ( UInt x, UInt sh ) {
1052    vassert(sh > 0 && sh < 32);
1053    return (x << sh) | (x >> (32 - sh));
1054 }
1055
1056 static inline UInt ROR32 ( UInt x, UInt sh ) {
1057    vassert(sh > 0 && sh < 32);
1058    return (x >> sh) | (x << (32 - sh));
1059 }
1060
1061 static inline UInt SHAchoose ( UInt x, UInt y, UInt z ) {
1062    return ((y ^ z) & x) ^ z;
1063 }
1064
1065 static inline UInt SHAmajority ( UInt x, UInt y, UInt z ) {
1066    return (x & y) | ((x | y) & z);
1067 }
1068
1069 static inline UInt SHAparity ( UInt x, UInt y, UInt z ) {
1070    return x ^ y ^ z;
1071 }
1072
1073 static inline UInt SHAhashSIGMA0 ( UInt x ) {
1074    return ROR32(x, 2) ^ ROR32(x, 13) ^ ROR32(x, 22);
1075 }
1076
1077 static inline UInt SHAhashSIGMA1 ( UInt x ) {
1078    return ROR32(x, 6) ^ ROR32(x, 11) ^ ROR32(x, 25);
1079 }
1080
1081 static void SHA256hash ( /*MOD*/V128* X, /*MOD*/V128* Y, const V128* W )
1082 {
1083    UInt e;
1084    for (e = 0; e <= 3; e++) {
1085       UInt chs = SHAchoose(Y->w32[0], Y->w32[1], Y->w32[2]);
1086       UInt maj = SHAmajority(X->w32[0], X->w32[1], X->w32[2]);
1087       UInt t   = Y->w32[3] + SHAhashSIGMA1(Y->w32[0]) + chs + W->w32[e];
1088       X->w32[3] = t + X->w32[3];
1089       Y->w32[3] = t + SHAhashSIGMA0(X->w32[0]) + maj;
1090       UInt ts = Y->w32[3];
1091       Y->w32[3] = Y->w32[2];
1092       Y->w32[2] = Y->w32[1];
1093       Y->w32[1] = Y->w32[0];
1094       Y->w32[0] = X->w32[3];
1095       X->w32[3] = X->w32[2];
1096       X->w32[2] = X->w32[1];
1097       X->w32[1] = X->w32[0];
1098       X->w32[0] = ts;
1099    }
1100 }
1101
1102 /* CALLED FROM GENERATED CODE */
1103 void arm64g_dirtyhelper_SHA1C ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1104                                 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1105 {
1106    vassert(nHi == 0);
1107    vassert((nLo >> 32) == 0);
1108    V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
1109    UInt Y; Y = (UInt)nLo;
1110    V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1111    UInt e;
1112    for (e = 0; e <= 3; e++) {
1113       UInt t = SHAchoose(X.w32[1], X.w32[2], X.w32[3]);
1114       Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
1115       X.w32[1] = ROL32(X.w32[1], 30);
1116       UInt oldY = Y;
1117       Y = X.w32[3];
1118       X.w32[3] = X.w32[2];
1119       X.w32[2] = X.w32[1];
1120       X.w32[1] = X.w32[0];
1121       X.w32[0] = oldY;
1122    }
1123    res->w64[1] = X.w64[1];
1124    res->w64[0] = X.w64[0];
1125 }
1126
1127 /* CALLED FROM GENERATED CODE */
1128 void arm64g_dirtyhelper_SHA1H ( /*OUT*/V128* res, ULong nHi, ULong nLo )
1129 {
1130    vassert(nHi == 0);
1131    vassert((nLo >> 32) == 0);
1132    res->w32[3] = res->w32[2] = res->w32[1] = 0;
1133    res->w32[0] = ROL32((UInt)nLo, 30);
1134 }
1135
1136 /* CALLED FROM GENERATED CODE */
1137 void arm64g_dirtyhelper_SHA1M ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1138                                 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1139 {
1140    vassert(nHi == 0);
1141    vassert((nLo >> 32) == 0);
1142    V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
1143    UInt Y; Y = (UInt)nLo;
1144    V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1145    UInt e;
1146    for (e = 0; e <= 3; e++) {
1147       UInt t = SHAmajority(X.w32[1], X.w32[2], X.w32[3]);
1148       Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
1149       X.w32[1] = ROL32(X.w32[1], 30);
1150       UInt oldY = Y;
1151       Y = X.w32[3];
1152       X.w32[3] = X.w32[2];
1153       X.w32[2] = X.w32[1];
1154       X.w32[1] = X.w32[0];
1155       X.w32[0] = oldY;
1156    }
1157    res->w64[1] = X.w64[1];
1158    res->w64[0] = X.w64[0];
1159 }
1160
1161 /* CALLED FROM GENERATED CODE */
1162 void arm64g_dirtyhelper_SHA1P ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1163                                 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1164 {
1165    vassert(nHi == 0);
1166    vassert((nLo >> 32) == 0);
1167    V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
1168    UInt Y; Y = (UInt)nLo;
1169    V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1170    UInt e;
1171    for (e = 0; e <= 3; e++) {
1172       UInt t = SHAparity(X.w32[1], X.w32[2], X.w32[3]);
1173       Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
1174       X.w32[1] = ROL32(X.w32[1], 30);
1175       UInt oldY = Y;
1176       Y = X.w32[3];
1177       X.w32[3] = X.w32[2];
1178       X.w32[2] = X.w32[1];
1179       X.w32[1] = X.w32[0];
1180       X.w32[0] = oldY;
1181    }
1182    res->w64[1] = X.w64[1];
1183    res->w64[0] = X.w64[0];
1184 }
1185
1186 /* CALLED FROM GENERATED CODE */
1187 void arm64g_dirtyhelper_SHA1SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1188                                   ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1189 {
1190    res->w64[1] = nLo;
1191    res->w64[0] = dHi;
1192    res->w64[1] ^= dHi ^ mHi;
1193    res->w64[0] ^= dLo ^ mLo;
1194 }
1195
1196 /* CALLED FROM GENERATED CODE */
1197 void arm64g_dirtyhelper_SHA1SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1198                                   ULong nHi, ULong nLo )
1199 {
1200    /* This computes "T = Vd ^ (Vn >>u 32)" */
1201    V128 T; T.w64[1] = nHi; T.w64[0] = nLo;
1202    T.w32[0] = T.w32[1];
1203    T.w32[1] = T.w32[2];
1204    T.w32[2] = T.w32[3];
1205    T.w32[3] = 0;
1206    T.w64[1] ^= dHi;
1207    T.w64[0] ^= dLo;
1208    /* */
1209    res->w32[0] = ROL32(T.w32[0], 1);
1210    res->w32[1] = ROL32(T.w32[1], 1);
1211    res->w32[2] = ROL32(T.w32[2], 1);
1212    res->w32[3] = ROL32(T.w32[3], 1) ^ ROL32(T.w32[0], 2);
1213 }
1214
1215 /* CALLED FROM GENERATED CODE */
1216 void arm64g_dirtyhelper_SHA256H2 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1217                                    ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1218 {
1219    V128 X; X.w64[1] = nHi; X.w64[0] = nLo;
1220    V128 Y; Y.w64[1] = dHi; Y.w64[0] = dLo;
1221    V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1222    SHA256hash(&X, &Y, &W);
1223    res->w64[1] = Y.w64[1];
1224    res->w64[0] = Y.w64[0];
1225 }
1226
1227 /* CALLED FROM GENERATED CODE */
1228 void arm64g_dirtyhelper_SHA256H ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1229                                   ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1230 {
1231    V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
1232    V128 Y; Y.w64[1] = nHi; Y.w64[0] = nLo;
1233    V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1234    SHA256hash(&X, &Y, &W);
1235    res->w64[1] = X.w64[1];
1236    res->w64[0] = X.w64[0];
1237 }
1238
1239 /* CALLED FROM GENERATED CODE */
1240 void arm64g_dirtyhelper_SHA256SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1241                                     ULong nHi, ULong nLo )
1242
1243 {
1244    res->w64[1] = res->w64[0] = 0;
1245    V128 op1; op1.w64[1] = dHi; op1.w64[0] = dLo;
1246    V128 op2; op2.w64[1] = nHi; op2.w64[0] = nLo;
1247    V128 T;
1248    T.w32[3] = op2.w32[0];
1249    T.w32[2] = op1.w32[3];
1250    T.w32[1] = op1.w32[2];
1251    T.w32[0] = op1.w32[1];
1252    UInt e;
1253    for (e = 0; e <= 3; e++) {
1254       UInt elt = T.w32[e];
1255       elt = ROR32(elt, 7) ^ ROR32(elt, 18) ^ (elt >> 3);
1256       res->w32[e] = elt + op1.w32[e];
1257    }
1258 }
1259
1260 /* CALLED FROM GENERATED CODE */
1261 void arm64g_dirtyhelper_SHA256SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1262                                     ULong nHi, ULong nLo,
1263                                     ULong mHi, ULong mLo )
1264 {
1265    res->w64[0] = res->w64[1] = 0;
1266    V128 op1; op1.w64[1] = dHi; op1.w64[0] = dLo;
1267    V128 op2; op2.w64[1] = nHi; op2.w64[0] = nLo;
1268    V128 op3; op3.w64[1] = mHi; op3.w64[0] = mLo;
1269    V128 T0;
1270    T0.w32[3] = op3.w32[0];
1271    T0.w32[2] = op2.w32[3];
1272    T0.w32[1] = op2.w32[2];
1273    T0.w32[0] = op2.w32[1];
1274    UInt T1[2];
1275    UInt e;
1276    T1[1] = op3.w32[3];
1277    T1[0] = op3.w32[2];
1278    for (e = 0; e <= 1; e++) {
1279       UInt elt = T1[e];
1280       elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ (elt >> 10);
1281       elt = elt + op1.w32[e] + T0.w32[e];
1282       res->w32[e] = elt;
1283    }
1284    T1[1] = res->w32[1];
1285    T1[0] = res->w32[0];
1286    for (e = 2; e <= 3; e++) {
1287       UInt elt = T1[e-2];
1288       elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ (elt >> 10);
1289       elt = elt + op1.w32[e] + T0.w32[e];
1290       res->w32[e] = elt;
1291    }
1292 }
1293
1294
1295 /*---------------------------------------------------------------*/
1296 /*--- Flag-helpers translation-time function specialisers.    ---*/
1297 /*--- These help iropt specialise calls the above run-time    ---*/
1298 /*--- flags functions.                                        ---*/
1299 /*---------------------------------------------------------------*/
1300
1301 /* Used by the optimiser to try specialisations.  Returns an
1302    equivalent expression, or NULL if none. */
1303
1304 static Bool isU64 ( IRExpr* e, ULong n )
1305 {
1306    return
1307       toBool( e->tag == Iex_Const
1308               && e->Iex.Const.con->tag == Ico_U64
1309               && e->Iex.Const.con->Ico.U64 == n );
1310 }
1311
1312 IRExpr* guest_arm64_spechelper ( const HChar* function_name,
1313                                  IRExpr** args,
1314                                  IRStmt** precedingStmts,
1315                                  Int      n_precedingStmts )
1316 {
1317 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
1318 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
1319 #  define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
1320 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
1321 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
1322
1323    Int i, arity = 0;
1324    for (i = 0; args[i]; i++)
1325       arity++;
1326 //ZZ #  if 0
1327 //ZZ    vex_printf("spec request:\n");
1328 //ZZ    vex_printf("   %s  ", function_name);
1329 //ZZ    for (i = 0; i < arity; i++) {
1330 //ZZ       vex_printf("  ");
1331 //ZZ       ppIRExpr(args[i]);
1332 //ZZ    }
1333 //ZZ    vex_printf("\n");
1334 //ZZ #  endif
1335
1336    /* --------- specialising "arm64g_calculate_condition" --------- */
1337
1338    if (vex_streq(function_name, "arm64g_calculate_condition")) {
1339
1340       /* specialise calls to the "arm64g_calculate_condition" function.
1341          Not sure whether this is strictly necessary, but: the
1342          replacement IR must produce only the values 0 or 1.  Bits
1343          63:1 are required to be zero. */
1344       IRExpr *cond_n_op, *cc_dep1, *cc_dep2  ; //, *cc_ndep;
1345       vassert(arity == 4);
1346       cond_n_op = args[0]; /* (ARM64Condcode << 4)  |  ARM64G_CC_OP_* */
1347       cc_dep1   = args[1];
1348       cc_dep2   = args[2];
1349       //cc_ndep   = args[3];
1350
1351       /*---------------- SUB64 ----------------*/
1352
1353       /* 0, 1 */
1354       if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB64)) {
1355          /* EQ after SUB --> test argL == argR */
1356          return unop(Iop_1Uto64,
1357                      binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
1358       }
1359       if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB64)) {
1360          /* NE after SUB --> test argL != argR */
1361          return unop(Iop_1Uto64,
1362                      binop(Iop_CmpNE64, cc_dep1, cc_dep2));
1363       }
1364
1365       /* 2, 3 */
1366       if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB64)) {
1367          /* CS after SUB --> test argL >=u argR
1368                          --> test argR <=u argL */
1369          return unop(Iop_1Uto64,
1370                      binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1371       }
1372       if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB64)) {
1373          /* CC after SUB --> test argL <u argR */
1374          return unop(Iop_1Uto64,
1375                      binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1376       }
1377
1378       /* 8, 9 */
1379       if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB64)) {
1380          /* LS after SUB --> test argL <=u argR */
1381          return unop(Iop_1Uto64,
1382                      binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1383       }
1384       if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB64)) {
1385          /* HI after SUB --> test argL >u argR
1386                          --> test argR <u argL */
1387          return unop(Iop_1Uto64,
1388                      binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
1389       }
1390
1391       /* 10, 11 */
1392       if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB64)) {
1393          /* LT after SUB --> test argL <s argR */
1394          return unop(Iop_1Uto64,
1395                      binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1396       }
1397       if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB64)) {
1398          /* GE after SUB --> test argL >=s argR
1399                          --> test argR <=s argL */
1400          return unop(Iop_1Uto64,
1401                      binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1402       }
1403
1404       /* 12, 13 */
1405       if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB64)) {
1406          /* GT after SUB --> test argL >s argR
1407                          --> test argR <s argL */
1408          return unop(Iop_1Uto64,
1409                      binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1410       }
1411       if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB64)) {
1412          /* LE after SUB --> test argL <=s argR */
1413          return unop(Iop_1Uto64,
1414                      binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1415       }
1416
1417       /*---------------- SUB32 ----------------*/
1418
1419       /* 0, 1 */
1420       if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB32)) {
1421          /* EQ after SUB --> test argL == argR */
1422          return unop(Iop_1Uto64,
1423                      binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
1424                                         unop(Iop_64to32, cc_dep2)));
1425       }
1426       if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB32)) {
1427          /* NE after SUB --> test argL != argR */
1428          return unop(Iop_1Uto64,
1429                      binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1),
1430                                         unop(Iop_64to32, cc_dep2)));
1431       }
1432
1433       /* 2, 3 */
1434       if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB32)) {
1435          /* CS after SUB --> test argL >=u argR
1436                          --> test argR <=u argL */
1437          return unop(Iop_1Uto64,
1438                      binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep2),
1439                                          unop(Iop_64to32, cc_dep1)));
1440       }
1441       if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB32)) {
1442          /* CC after SUB --> test argL <u argR */
1443          return unop(Iop_1Uto64,
1444                      binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep1),
1445                                          unop(Iop_64to32, cc_dep2)));
1446       }
1447
1448       /* 8, 9 */
1449       if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB32)) {
1450          /* LS after SUB --> test argL <=u argR */
1451          return unop(Iop_1Uto64,
1452                      binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep1),
1453                                          unop(Iop_64to32, cc_dep2)));
1454       }
1455       if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB32)) {
1456          /* HI after SUB --> test argL >u argR
1457                          --> test argR <u argL */
1458          return unop(Iop_1Uto64,
1459                      binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep2),
1460                                          unop(Iop_64to32, cc_dep1)));
1461       }
1462
1463       /* 10, 11 */
1464       if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB32)) {
1465          /* LT after SUB --> test argL <s argR */
1466          return unop(Iop_1Uto64,
1467                      binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep1),
1468                                          unop(Iop_64to32, cc_dep2)));
1469       }
1470       if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB32)) {
1471          /* GE after SUB --> test argL >=s argR
1472                          --> test argR <=s argL */
1473          return unop(Iop_1Uto64,
1474                      binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep2),
1475                                          unop(Iop_64to32, cc_dep1)));
1476       }
1477
1478       /* 12, 13 */
1479       if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB32)) {
1480          /* GT after SUB --> test argL >s argR
1481                          --> test argR <s argL */
1482          return unop(Iop_1Uto64,
1483                      binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep2),
1484                                          unop(Iop_64to32, cc_dep1)));
1485       }
1486       if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB32)) {
1487          /* LE after SUB --> test argL <=s argR */
1488          return unop(Iop_1Uto64,
1489                      binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep1),
1490                                          unop(Iop_64to32, cc_dep2)));
1491       }
1492
1493 //ZZ       /*---------------- SBB ----------------*/
1494 //ZZ
1495 //ZZ       if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SBB)) {
1496 //ZZ          /* This seems to happen a lot in softfloat code, eg __divdf3+140 */
1497 //ZZ          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1498 //ZZ          /* HS after SBB (same as C after SBB below)
1499 //ZZ             --> oldC ? (argL >=u argR) : (argL >u argR)
1500 //ZZ             --> oldC ? (argR <=u argL) : (argR <u argL)
1501 //ZZ          */
1502 //ZZ          return
1503 //ZZ             IRExpr_ITE(
1504 //ZZ                binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
1505 //ZZ                /* case oldC != 0 */
1506 //ZZ                unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
1507 //ZZ                /* case oldC == 0 */
1508 //ZZ                unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
1509 //ZZ             );
1510 //ZZ       }
1511
1512       /*---------------- LOGIC32 ----------------*/
1513
1514       if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_LOGIC32)) {
1515          /* EQ after LOGIC32 --> test res[31:0] == 0 */
1516          return unop(Iop_1Uto64,
1517                      binop(Iop_CmpEQ32,
1518                            unop(Iop_64to32, cc_dep1), mkU32(0)));
1519       }
1520       if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_LOGIC32)) {
1521          /* NE after LOGIC32 --> test res[31:0] != 0 */
1522          return unop(Iop_1Uto64,
1523                      binop(Iop_CmpNE32,
1524                            unop(Iop_64to32, cc_dep1), mkU32(0)));
1525       }
1526
1527       /*---------------- LOGIC64 ----------------*/
1528
1529       if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_LOGIC64)) {
1530          /* EQ after LOGIC64 --> test res[63:0] == 0 */
1531          return unop(Iop_1Uto64,
1532                      binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1533       }
1534       if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_LOGIC64)) {
1535          /* NE after LOGIC64 --> test res[63:0] != 0 */
1536          return unop(Iop_1Uto64,
1537                      binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1538       }
1539
1540 //ZZ       if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_LOGIC)) {
1541 //ZZ          /* NE after LOGIC --> test res != 0 */
1542 //ZZ          return unop(Iop_1Uto32,
1543 //ZZ                      binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
1544 //ZZ       }
1545 //ZZ
1546 //ZZ       if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_LOGIC)) {
1547 //ZZ          /* PL after LOGIC --> test (res >> 31) == 0 */
1548 //ZZ          return unop(Iop_1Uto32,
1549 //ZZ                      binop(Iop_CmpEQ32,
1550 //ZZ                            binop(Iop_Shr32, cc_dep1, mkU8(31)),
1551 //ZZ                            mkU32(0)));
1552 //ZZ       }
1553 //ZZ       if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_LOGIC)) {
1554 //ZZ          /* MI after LOGIC --> test (res >> 31) == 1 */
1555 //ZZ          return unop(Iop_1Uto32,
1556 //ZZ                      binop(Iop_CmpEQ32,
1557 //ZZ                            binop(Iop_Shr32, cc_dep1, mkU8(31)),
1558 //ZZ                            mkU32(1)));
1559 //ZZ       }
1560
1561       /*---------------- COPY ----------------*/
1562
1563       if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_COPY)) {
1564          /* EQ after COPY --> (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1 */
1565          return binop(Iop_And64,
1566                       binop(Iop_Shr64, cc_dep1,
1567                                        mkU8(ARM64G_CC_SHIFT_Z)),
1568                       mkU64(1));
1569       }
1570       if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_COPY)) {
1571          /* NE after COPY --> ((cc_dep1 >> ARM64G_CC_SHIFT_Z) ^ 1) & 1 */
1572          return binop(Iop_And64,
1573                       binop(Iop_Xor64,
1574                             binop(Iop_Shr64, cc_dep1,
1575                                              mkU8(ARM64G_CC_SHIFT_Z)),
1576                             mkU64(1)),
1577                       mkU64(1));
1578       }
1579
1580 //ZZ       /*----------------- AL -----------------*/
1581 //ZZ
1582 //ZZ       /* A critically important case for Thumb code.
1583 //ZZ
1584 //ZZ          What we're trying to spot is the case where cond_n_op is an
1585 //ZZ          expression of the form Or32(..., 0xE0) since that means the
1586 //ZZ          caller is asking for CondAL and we can simply return 1
1587 //ZZ          without caring what the ... part is.  This is a potentially
1588 //ZZ          dodgy kludge in that it assumes that the ... part has zeroes
1589 //ZZ          in bits 7:4, so that the result of the Or32 is guaranteed to
1590 //ZZ          be 0xE in bits 7:4.  Given that the places where this first
1591 //ZZ          arg are constructed (in guest_arm_toIR.c) are very
1592 //ZZ          constrained, we can get away with this.  To make this
1593 //ZZ          guaranteed safe would require to have a new primop, Slice44
1594 //ZZ          or some such, thusly
1595 //ZZ
1596 //ZZ          Slice44(arg1, arg2) = 0--(24)--0 arg1[7:4] arg2[3:0]
1597 //ZZ
1598 //ZZ          and we would then look for Slice44(0xE0, ...)
1599 //ZZ          which would give the required safety property.
1600 //ZZ
1601 //ZZ          It would be infeasibly expensive to scan backwards through
1602 //ZZ          the entire block looking for an assignment to the temp, so
1603 //ZZ          just look at the previous 16 statements.  That should find it
1604 //ZZ          if it is an interesting case, as a result of how the
1605 //ZZ          boilerplate guff at the start of each Thumb insn translation
1606 //ZZ          is made.
1607 //ZZ       */
1608 //ZZ       if (cond_n_op->tag == Iex_RdTmp) {
1609 //ZZ          Int    j;
1610 //ZZ          IRTemp look_for = cond_n_op->Iex.RdTmp.tmp;
1611 //ZZ          Int    limit    = n_precedingStmts - 16;
1612 //ZZ          if (limit < 0) limit = 0;
1613 //ZZ          if (0) vex_printf("scanning %d .. %d\n", n_precedingStmts-1, limit);
1614 //ZZ          for (j = n_precedingStmts - 1; j >= limit; j--) {
1615 //ZZ             IRStmt* st = precedingStmts[j];
1616 //ZZ             if (st->tag == Ist_WrTmp
1617 //ZZ                 && st->Ist.WrTmp.tmp == look_for
1618 //ZZ                 && st->Ist.WrTmp.data->tag == Iex_Binop
1619 //ZZ                 && st->Ist.WrTmp.data->Iex.Binop.op == Iop_Or32
1620 //ZZ                 && isU32(st->Ist.WrTmp.data->Iex.Binop.arg2, (ARMCondAL << 4)))
1621 //ZZ                return mkU32(1);
1622 //ZZ          }
1623 //ZZ          /* Didn't find any useful binding to the first arg
1624 //ZZ             in the previous 16 stmts. */
1625 //ZZ       }
1626    }
1627
1628 //ZZ    /* --------- specialising "armg_calculate_flag_c" --------- */
1629 //ZZ
1630 //ZZ    else
1631 //ZZ    if (vex_streq(function_name, "armg_calculate_flag_c")) {
1632 //ZZ
1633 //ZZ       /* specialise calls to the "armg_calculate_flag_c" function.
1634 //ZZ          Note that the returned value must be either 0 or 1; nonzero
1635 //ZZ          bits 31:1 are not allowed.  In turn, incoming oldV and oldC
1636 //ZZ          values (from the thunk) are assumed to have bits 31:1
1637 //ZZ          clear. */
1638 //ZZ       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1639 //ZZ       vassert(arity == 4);
1640 //ZZ       cc_op   = args[0]; /* ARMG_CC_OP_* */
1641 //ZZ       cc_dep1 = args[1];
1642 //ZZ       cc_dep2 = args[2];
1643 //ZZ       cc_ndep = args[3];
1644 //ZZ
1645 //ZZ       if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
1646 //ZZ          /* Thunk args are (result, shco, oldV) */
1647 //ZZ          /* C after LOGIC --> shco */
1648 //ZZ          return cc_dep2;
1649 //ZZ       }
1650 //ZZ
1651 //ZZ       if (isU32(cc_op, ARMG_CC_OP_SUB)) {
1652 //ZZ          /* Thunk args are (argL, argR, unused) */
1653 //ZZ          /* C after SUB --> argL >=u argR
1654 //ZZ                         --> argR <=u argL */
1655 //ZZ          return unop(Iop_1Uto32,
1656 //ZZ                      binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
1657 //ZZ       }
1658 //ZZ
1659 //ZZ       if (isU32(cc_op, ARMG_CC_OP_SBB)) {
1660 //ZZ          /* This happens occasionally in softfloat code, eg __divdf3+140 */
1661 //ZZ          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1662 //ZZ          /* C after SBB (same as HS after SBB above)
1663 //ZZ             --> oldC ? (argL >=u argR) : (argL >u argR)
1664 //ZZ             --> oldC ? (argR <=u argL) : (argR <u argL)
1665 //ZZ          */
1666 //ZZ          return
1667 //ZZ             IRExpr_ITE(
1668 //ZZ                binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
1669 //ZZ                /* case oldC != 0 */
1670 //ZZ                unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
1671 //ZZ                /* case oldC == 0 */
1672 //ZZ                unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
1673 //ZZ             );
1674 //ZZ       }
1675 //ZZ
1676 //ZZ    }
1677 //ZZ
1678 //ZZ    /* --------- specialising "armg_calculate_flag_v" --------- */
1679 //ZZ
1680 //ZZ    else
1681 //ZZ    if (vex_streq(function_name, "armg_calculate_flag_v")) {
1682 //ZZ
1683 //ZZ       /* specialise calls to the "armg_calculate_flag_v" function.
1684 //ZZ          Note that the returned value must be either 0 or 1; nonzero
1685 //ZZ          bits 31:1 are not allowed.  In turn, incoming oldV and oldC
1686 //ZZ          values (from the thunk) are assumed to have bits 31:1
1687 //ZZ          clear. */
1688 //ZZ       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1689 //ZZ       vassert(arity == 4);
1690 //ZZ       cc_op   = args[0]; /* ARMG_CC_OP_* */
1691 //ZZ       cc_dep1 = args[1];
1692 //ZZ       cc_dep2 = args[2];
1693 //ZZ       cc_ndep = args[3];
1694 //ZZ
1695 //ZZ       if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
1696 //ZZ          /* Thunk args are (result, shco, oldV) */
1697 //ZZ          /* V after LOGIC --> oldV */
1698 //ZZ          return cc_ndep;
1699 //ZZ       }
1700 //ZZ
1701 //ZZ       if (isU32(cc_op, ARMG_CC_OP_SUB)) {
1702 //ZZ          /* Thunk args are (argL, argR, unused) */
1703 //ZZ          /* V after SUB
1704 //ZZ             --> let res = argL - argR
1705 //ZZ                 in ((argL ^ argR) & (argL ^ res)) >> 31
1706 //ZZ             --> ((argL ^ argR) & (argL ^ (argL - argR))) >> 31
1707 //ZZ          */
1708 //ZZ          IRExpr* argL = cc_dep1;
1709 //ZZ          IRExpr* argR = cc_dep2;
1710 //ZZ          return
1711 //ZZ             binop(Iop_Shr32,
1712 //ZZ                   binop(Iop_And32,
1713 //ZZ                         binop(Iop_Xor32, argL, argR),
1714 //ZZ                         binop(Iop_Xor32, argL, binop(Iop_Sub32, argL, argR))
1715 //ZZ                   ),
1716 //ZZ                   mkU8(31)
1717 //ZZ             );
1718 //ZZ       }
1719 //ZZ
1720 //ZZ       if (isU32(cc_op, ARMG_CC_OP_SBB)) {
1721 //ZZ          /* This happens occasionally in softfloat code, eg __divdf3+140 */
1722 //ZZ          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1723 //ZZ          /* V after SBB
1724 //ZZ             --> let res = argL - argR - (oldC ^ 1)
1725 //ZZ                 in  (argL ^ argR) & (argL ^ res) & 1
1726 //ZZ          */
1727 //ZZ          return
1728 //ZZ             binop(
1729 //ZZ                Iop_And32,
1730 //ZZ                binop(
1731 //ZZ                   Iop_And32,
1732 //ZZ                   // argL ^ argR
1733 //ZZ                   binop(Iop_Xor32, cc_dep1, cc_dep2),
1734 //ZZ                   // argL ^ (argL - argR - (oldC ^ 1))
1735 //ZZ                   binop(Iop_Xor32,
1736 //ZZ                         cc_dep1,
1737 //ZZ                         binop(Iop_Sub32,
1738 //ZZ                               binop(Iop_Sub32, cc_dep1, cc_dep2),
1739 //ZZ                               binop(Iop_Xor32, cc_ndep, mkU32(1)))
1740 //ZZ                   )
1741 //ZZ                ),
1742 //ZZ                mkU32(1)
1743 //ZZ             );
1744 //ZZ       }
1745 //ZZ
1746 //ZZ    }
1747
1748 #  undef unop
1749 #  undef binop
1750 #  undef mkU64
1751 #  undef mkU8
1752
1753    return NULL;
1754 }
1755
1756
1757 /*----------------------------------------------*/
1758 /*--- The exported fns ..                    ---*/
1759 /*----------------------------------------------*/
1760
1761 //ZZ /* VISIBLE TO LIBVEX CLIENT */
1762 //ZZ #if 0
1763 //ZZ void LibVEX_GuestARM_put_flags ( UInt flags_native,
1764 //ZZ                                  /*OUT*/VexGuestARMState* vex_state )
1765 //ZZ {
1766 //ZZ    vassert(0); // FIXME
1767 //ZZ
1768 //ZZ    /* Mask out everything except N Z V C. */
1769 //ZZ    flags_native
1770 //ZZ       &= (ARMG_CC_MASK_N | ARMG_CC_MASK_Z | ARMG_CC_MASK_V | ARMG_CC_MASK_C);
1771 //ZZ
1772 //ZZ    vex_state->guest_CC_OP   = ARMG_CC_OP_COPY;
1773 //ZZ    vex_state->guest_CC_DEP1 = flags_native;
1774 //ZZ    vex_state->guest_CC_DEP2 = 0;
1775 //ZZ    vex_state->guest_CC_NDEP = 0;
1776 //ZZ }
1777 //ZZ #endif
1778
1779 /* VISIBLE TO LIBVEX CLIENT */
1780 ULong LibVEX_GuestARM64_get_nzcv ( /*IN*/const VexGuestARM64State* vex_state )
1781 {
1782    ULong nzcv = 0;
1783    // NZCV
1784    nzcv |= arm64g_calculate_flags_nzcv(
1785                vex_state->guest_CC_OP,
1786                vex_state->guest_CC_DEP1,
1787                vex_state->guest_CC_DEP2,
1788                vex_state->guest_CC_NDEP
1789             );
1790    vassert(0 == (nzcv & 0xFFFFFFFF0FFFFFFFULL));
1791 //ZZ    // Q
1792 //ZZ    if (vex_state->guest_QFLAG32 > 0)
1793 //ZZ       cpsr |= (1 << 27);
1794 //ZZ    // GE
1795 //ZZ    if (vex_state->guest_GEFLAG0 > 0)
1796 //ZZ       cpsr |= (1 << 16);
1797 //ZZ    if (vex_state->guest_GEFLAG1 > 0)
1798 //ZZ       cpsr |= (1 << 17);
1799 //ZZ    if (vex_state->guest_GEFLAG2 > 0)
1800 //ZZ       cpsr |= (1 << 18);
1801 //ZZ    if (vex_state->guest_GEFLAG3 > 0)
1802 //ZZ       cpsr |= (1 << 19);
1803 //ZZ    // M
1804 //ZZ    cpsr |= (1 << 4); // 0b10000 means user-mode
1805 //ZZ    // J,T   J (bit 24) is zero by initialisation above
1806 //ZZ    // T  we copy from R15T[0]
1807 //ZZ    if (vex_state->guest_R15T & 1)
1808 //ZZ       cpsr |= (1 << 5);
1809 //ZZ    // ITSTATE we punt on for the time being.  Could compute it
1810 //ZZ    // if needed though.
1811 //ZZ    // E, endianness, 0 (littleendian) from initialisation above
1812 //ZZ    // A,I,F disable some async exceptions.  Not sure about these.
1813 //ZZ    // Leave as zero for the time being.
1814    return nzcv;
1815 }
1816
1817 /* VISIBLE TO LIBVEX CLIENT */
1818 ULong LibVEX_GuestARM64_get_fpsr ( const VexGuestARM64State* vex_state )
1819 {
1820    UInt w32 = vex_state->guest_QCFLAG[0] | vex_state->guest_QCFLAG[1]
1821               | vex_state->guest_QCFLAG[2] | vex_state->guest_QCFLAG[3];
1822    ULong fpsr = 0;
1823    // QC
1824    if (w32 != 0)
1825       fpsr |= (1 << 27);
1826    return fpsr;
1827 }
1828
1829 void LibVEX_GuestARM64_set_fpsr ( /*MOD*/VexGuestARM64State* vex_state,
1830                                   ULong fpsr )
1831 {
1832    // QC
1833    vex_state->guest_QCFLAG[0] = (UInt)((fpsr >> 27) & 1);
1834    vex_state->guest_QCFLAG[1] = 0;
1835    vex_state->guest_QCFLAG[2] = 0;
1836    vex_state->guest_QCFLAG[3] = 0;
1837 }
1838
1839 /* VISIBLE TO LIBVEX CLIENT */
1840 void LibVEX_GuestARM64_initialise ( /*OUT*/VexGuestARM64State* vex_state )
1841 {
1842    vex_bzero(vex_state, sizeof(*vex_state));
1843 //ZZ    vex_state->host_EvC_FAILADDR = 0;
1844 //ZZ    vex_state->host_EvC_COUNTER = 0;
1845 //ZZ
1846 //ZZ    vex_state->guest_R0  = 0;
1847 //ZZ    vex_state->guest_R1  = 0;
1848 //ZZ    vex_state->guest_R2  = 0;
1849 //ZZ    vex_state->guest_R3  = 0;
1850 //ZZ    vex_state->guest_R4  = 0;
1851 //ZZ    vex_state->guest_R5  = 0;
1852 //ZZ    vex_state->guest_R6  = 0;
1853 //ZZ    vex_state->guest_R7  = 0;
1854 //ZZ    vex_state->guest_R8  = 0;
1855 //ZZ    vex_state->guest_R9  = 0;
1856 //ZZ    vex_state->guest_R10 = 0;
1857 //ZZ    vex_state->guest_R11 = 0;
1858 //ZZ    vex_state->guest_R12 = 0;
1859 //ZZ    vex_state->guest_R13 = 0;
1860 //ZZ    vex_state->guest_R14 = 0;
1861 //ZZ    vex_state->guest_R15T = 0;  /* NB: implies ARM mode */
1862 //ZZ
1863    vex_state->guest_CC_OP   = ARM64G_CC_OP_COPY;
1864 //ZZ    vex_state->guest_CC_DEP1 = 0;
1865 //ZZ    vex_state->guest_CC_DEP2 = 0;
1866 //ZZ    vex_state->guest_CC_NDEP = 0;
1867 //ZZ    vex_state->guest_QFLAG32 = 0;
1868 //ZZ    vex_state->guest_GEFLAG0 = 0;
1869 //ZZ    vex_state->guest_GEFLAG1 = 0;
1870 //ZZ    vex_state->guest_GEFLAG2 = 0;
1871 //ZZ    vex_state->guest_GEFLAG3 = 0;
1872 //ZZ
1873 //ZZ    vex_state->guest_EMNOTE  = EmNote_NONE;
1874 //ZZ    vex_state->guest_CMSTART = 0;
1875 //ZZ    vex_state->guest_CMLEN   = 0;
1876 //ZZ    vex_state->guest_NRADDR  = 0;
1877 //ZZ    vex_state->guest_IP_AT_SYSCALL = 0;
1878 //ZZ
1879 //ZZ    vex_state->guest_D0  = 0;
1880 //ZZ    vex_state->guest_D1  = 0;
1881 //ZZ    vex_state->guest_D2  = 0;
1882 //ZZ    vex_state->guest_D3  = 0;
1883 //ZZ    vex_state->guest_D4  = 0;
1884 //ZZ    vex_state->guest_D5  = 0;
1885 //ZZ    vex_state->guest_D6  = 0;
1886 //ZZ    vex_state->guest_D7  = 0;
1887 //ZZ    vex_state->guest_D8  = 0;
1888 //ZZ    vex_state->guest_D9  = 0;
1889 //ZZ    vex_state->guest_D10 = 0;
1890 //ZZ    vex_state->guest_D11 = 0;
1891 //ZZ    vex_state->guest_D12 = 0;
1892 //ZZ    vex_state->guest_D13 = 0;
1893 //ZZ    vex_state->guest_D14 = 0;
1894 //ZZ    vex_state->guest_D15 = 0;
1895 //ZZ    vex_state->guest_D16 = 0;
1896 //ZZ    vex_state->guest_D17 = 0;
1897 //ZZ    vex_state->guest_D18 = 0;
1898 //ZZ    vex_state->guest_D19 = 0;
1899 //ZZ    vex_state->guest_D20 = 0;
1900 //ZZ    vex_state->guest_D21 = 0;
1901 //ZZ    vex_state->guest_D22 = 0;
1902 //ZZ    vex_state->guest_D23 = 0;
1903 //ZZ    vex_state->guest_D24 = 0;
1904 //ZZ    vex_state->guest_D25 = 0;
1905 //ZZ    vex_state->guest_D26 = 0;
1906 //ZZ    vex_state->guest_D27 = 0;
1907 //ZZ    vex_state->guest_D28 = 0;
1908 //ZZ    vex_state->guest_D29 = 0;
1909 //ZZ    vex_state->guest_D30 = 0;
1910 //ZZ    vex_state->guest_D31 = 0;
1911 //ZZ
1912 //ZZ    /* ARM encoded; zero is the default as it happens (result flags
1913 //ZZ       (NZCV) cleared, FZ disabled, round to nearest, non-vector mode,
1914 //ZZ       all exns masked, all exn sticky bits cleared). */
1915 //ZZ    vex_state->guest_FPSCR = 0;
1916 //ZZ
1917 //ZZ    vex_state->guest_TPIDRURO = 0;
1918 //ZZ
1919 //ZZ    /* Not in a Thumb IT block. */
1920 //ZZ    vex_state->guest_ITSTATE = 0;
1921 //ZZ
1922 //ZZ    vex_state->padding1 = 0;
1923 //ZZ    vex_state->padding2 = 0;
1924 //ZZ    vex_state->padding3 = 0;
1925 //ZZ    vex_state->padding4 = 0;
1926 //ZZ    vex_state->padding5 = 0;
1927 }
1928
1929
1930 /*-----------------------------------------------------------*/
1931 /*--- Describing the arm guest state, for the benefit     ---*/
1932 /*--- of iropt and instrumenters.                         ---*/
1933 /*-----------------------------------------------------------*/
1934
1935 /* Figure out if any part of the guest state contained in minoff
1936    .. maxoff requires precise memory exceptions.  If in doubt return
1937    True (but this generates significantly slower code).
1938
1939    We enforce precise exns for guest SP, PC, 29(FP), 30(LR).
1940    That might be overkill (for 29 and 30); I don't know.
1941 */
1942 Bool guest_arm64_state_requires_precise_mem_exns (
1943         Int minoff, Int maxoff, VexRegisterUpdates pxControl
1944      )
1945 {
1946    Int xsp_min = offsetof(VexGuestARM64State, guest_XSP);
1947    Int xsp_max = xsp_min + 8 - 1;
1948    Int pc_min  = offsetof(VexGuestARM64State, guest_PC);
1949    Int pc_max  = pc_min + 8 - 1;
1950
1951    if (maxoff < xsp_min || minoff > xsp_max) {
1952       /* no overlap with xsp */
1953       if (pxControl == VexRegUpdSpAtMemAccess)
1954          return False; // We only need to check stack pointer.
1955    } else {
1956       return True;
1957    }
1958
1959    if (maxoff < pc_min || minoff > pc_max) {
1960       /* no overlap with pc */
1961    } else {
1962       return True;
1963    }
1964
1965    /* Guessing that we need PX for FP, but I don't really know. */
1966    Int x29_min = offsetof(VexGuestARM64State, guest_X29);
1967    Int x29_max = x29_min + 8 - 1;
1968
1969    if (maxoff < x29_min || minoff > x29_max) {
1970       /* no overlap with x29 */
1971    } else {
1972       return True;
1973    }
1974
1975    /* Guessing that we need PX for LR, but I don't really know. */
1976    Int x30_min = offsetof(VexGuestARM64State, guest_X30);
1977    Int x30_max = x30_min + 8 - 1;
1978
1979    if (maxoff < x30_min || minoff > x30_max) {
1980       /* no overlap with r30 */
1981    } else {
1982       return True;
1983    }
1984
1985    return False;
1986 }
1987
1988
1989 #define ALWAYSDEFD(field)                             \
1990     { offsetof(VexGuestARM64State, field),            \
1991       (sizeof ((VexGuestARM64State*)0)->field) }
1992 VexGuestLayout
1993    arm64Guest_layout
1994       = {
1995           /* Total size of the guest state, in bytes. */
1996           .total_sizeB = sizeof(VexGuestARM64State),
1997
1998           /* Describe the stack pointer. */
1999           .offset_SP = offsetof(VexGuestARM64State,guest_XSP),
2000           .sizeof_SP = 8,
2001
2002           /* Describe the instruction pointer. */
2003           .offset_IP = offsetof(VexGuestARM64State,guest_PC),
2004           .sizeof_IP = 8,
2005
2006           /* Describe any sections to be regarded by Memcheck as
2007              'always-defined'. */
2008           .n_alwaysDefd = 9,
2009
2010           /* flags thunk: OP is always defd, whereas DEP1 and DEP2
2011              have to be tracked.  See detailed comment in gdefs.h on
2012              meaning of thunk fields. */
2013           .alwaysDefd
2014              = { /* 0 */ ALWAYSDEFD(guest_PC),
2015                  /* 1 */ ALWAYSDEFD(guest_CC_OP),
2016                  /* 2 */ ALWAYSDEFD(guest_CC_NDEP),
2017                  /* 3 */ ALWAYSDEFD(guest_EMNOTE),
2018                  /* 4 */ ALWAYSDEFD(guest_CMSTART),
2019                  /* 5 */ ALWAYSDEFD(guest_CMLEN),
2020                  /* 6 */ ALWAYSDEFD(guest_NRADDR),
2021                  /* 7 */ ALWAYSDEFD(guest_IP_AT_SYSCALL),
2022                  /* 8 */ ALWAYSDEFD(guest_TPIDR_EL0)
2023                }
2024         };
2025
2026
2027 /*---------------------------------------------------------------*/
2028 /*--- end                               guest_arm64_helpers.c ---*/
2029 /*---------------------------------------------------------------*/