Bug 439685 compiler warning in callgrind/main.c
[valgrind.git] / VEX / priv / guest_amd64_helpers.c
blob42ec80e0369a82e1487b77377eaf090414c0aa4e
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_amd64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_emnote.h"
36 #include "libvex_guest_amd64.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
40 #include "main_util.h"
41 #include "main_globals.h"
42 #include "guest_generic_bb_to_IR.h"
43 #include "guest_amd64_defs.h"
44 #include "guest_generic_x87.h"
47 /* This file contains helper functions for amd64 guest code.
48 Calls to these functions are generated by the back end.
49 These calls are of course in the host machine code and
50 this file will be compiled to host machine code, so that
51 all makes sense.
53 Only change the signatures of these helper functions very
54 carefully. If you change the signature here, you'll have to change
55 the parameters passed to it in the IR calls constructed by
56 guest-amd64/toIR.c.
58 The convention used is that all functions called from generated
59 code are named amd64g_<something>, and any function whose name lacks
60 that prefix is not called from generated code. Note that some
61 LibVEX_* functions can however be called by VEX's client, but that
62 is not the same as calling them from VEX-generated code.
66 /* Set to 1 to get detailed profiling info about use of the flag
67 machinery. */
68 #define PROFILE_RFLAGS 0
71 /*---------------------------------------------------------------*/
72 /*--- %rflags run-time helpers. ---*/
73 /*---------------------------------------------------------------*/
75 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
76 after imulq/mulq. */
78 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
80 const Long halfMask = 0xFFFFFFFFLL;
81 ULong u0, v0, w0;
82 Long u1, v1, w1, w2, t;
83 u0 = u & halfMask;
84 u1 = u >> 32;
85 v0 = v & halfMask;
86 v1 = v >> 32;
87 w0 = u0 * v0;
88 t = u1 * v0 + (w0 >> 32);
89 w1 = t & halfMask;
90 w2 = t >> 32;
91 w1 = u0 * v1 + w1;
92 *rHi = u1 * v1 + w2 + (w1 >> 32);
93 *rLo = (Long)((ULong)u * (ULong)v);
96 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
98 const ULong halfMask = 0xFFFFFFFFULL;
99 ULong u0, v0, w0;
100 ULong u1, v1, w1,w2,t;
101 u0 = u & halfMask;
102 u1 = u >> 32;
103 v0 = v & halfMask;
104 v1 = v >> 32;
105 w0 = u0 * v0;
106 t = u1 * v0 + (w0 >> 32);
107 w1 = t & halfMask;
108 w2 = t >> 32;
109 w1 = u0 * v1 + w1;
110 *rHi = u1 * v1 + w2 + (w1 >> 32);
111 *rLo = u * v;
115 static const UChar parity_table[256] = {
116 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
117 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
118 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
119 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
120 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
121 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
122 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
123 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
124 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
125 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
126 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
127 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
128 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
129 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
130 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
131 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
132 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
133 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
134 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
135 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
136 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
137 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
138 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
139 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
140 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
141 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
142 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
143 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
144 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
145 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
146 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
147 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
150 /* generalised left-shifter */
151 static inline Long lshift ( Long x, Int n )
153 if (n >= 0)
154 return (ULong)x << n;
155 else
156 return x >> (-n);
159 /* identity on ULong */
160 static inline ULong idULong ( ULong x )
162 return x;
166 #define PREAMBLE(__data_bits) \
167 /* const */ ULong DATA_MASK \
168 = __data_bits==8 \
169 ? 0xFFULL \
170 : (__data_bits==16 \
171 ? 0xFFFFULL \
172 : (__data_bits==32 \
173 ? 0xFFFFFFFFULL \
174 : 0xFFFFFFFFFFFFFFFFULL)); \
175 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \
176 /* const */ ULong CC_DEP1 = cc_dep1_formal; \
177 /* const */ ULong CC_DEP2 = cc_dep2_formal; \
178 /* const */ ULong CC_NDEP = cc_ndep_formal; \
179 /* Four bogus assignments, which hopefully gcc can */ \
180 /* optimise away, and which stop it complaining about */ \
181 /* unused variables. */ \
182 SIGN_MASK = SIGN_MASK; \
183 DATA_MASK = DATA_MASK; \
184 CC_DEP2 = CC_DEP2; \
185 CC_NDEP = CC_NDEP;
188 /*-------------------------------------------------------------*/
190 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
192 PREAMBLE(DATA_BITS); \
193 { ULong cf, pf, af, zf, sf, of; \
194 ULong argL, argR, res; \
195 argL = CC_DEP1; \
196 argR = CC_DEP2; \
197 res = argL + argR; \
198 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
199 pf = parity_table[(UChar)res]; \
200 af = (res ^ argL ^ argR) & 0x10; \
201 zf = ((DATA_UTYPE)res == 0) << 6; \
202 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
203 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
204 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
205 return cf | pf | af | zf | sf | of; \
209 /*-------------------------------------------------------------*/
211 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
213 PREAMBLE(DATA_BITS); \
214 { ULong cf, pf, af, zf, sf, of; \
215 ULong argL, argR, res; \
216 argL = CC_DEP1; \
217 argR = CC_DEP2; \
218 res = argL - argR; \
219 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
220 pf = parity_table[(UChar)res]; \
221 af = (res ^ argL ^ argR) & 0x10; \
222 zf = ((DATA_UTYPE)res == 0) << 6; \
223 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
224 of = lshift((argL ^ argR) & (argL ^ res), \
225 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
226 return cf | pf | af | zf | sf | of; \
230 /*-------------------------------------------------------------*/
232 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
234 PREAMBLE(DATA_BITS); \
235 { ULong cf, pf, af, zf, sf, of; \
236 ULong argL, argR, oldC, res; \
237 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
238 argL = CC_DEP1; \
239 argR = CC_DEP2 ^ oldC; \
240 res = (argL + argR) + oldC; \
241 if (oldC) \
242 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
243 else \
244 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
245 pf = parity_table[(UChar)res]; \
246 af = (res ^ argL ^ argR) & 0x10; \
247 zf = ((DATA_UTYPE)res == 0) << 6; \
248 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
249 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
250 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
251 return cf | pf | af | zf | sf | of; \
255 /*-------------------------------------------------------------*/
257 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
259 PREAMBLE(DATA_BITS); \
260 { ULong cf, pf, af, zf, sf, of; \
261 ULong argL, argR, oldC, res; \
262 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
263 argL = CC_DEP1; \
264 argR = CC_DEP2 ^ oldC; \
265 res = (argL - argR) - oldC; \
266 if (oldC) \
267 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
268 else \
269 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
270 pf = parity_table[(UChar)res]; \
271 af = (res ^ argL ^ argR) & 0x10; \
272 zf = ((DATA_UTYPE)res == 0) << 6; \
273 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
274 of = lshift((argL ^ argR) & (argL ^ res), \
275 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
276 return cf | pf | af | zf | sf | of; \
280 /*-------------------------------------------------------------*/
282 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
284 PREAMBLE(DATA_BITS); \
285 { ULong cf, pf, af, zf, sf, of; \
286 cf = 0; \
287 pf = parity_table[(UChar)CC_DEP1]; \
288 af = 0; \
289 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
290 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
291 of = 0; \
292 return cf | pf | af | zf | sf | of; \
296 /*-------------------------------------------------------------*/
298 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
300 PREAMBLE(DATA_BITS); \
301 { ULong cf, pf, af, zf, sf, of; \
302 ULong argL, argR, res; \
303 res = CC_DEP1; \
304 argL = res - 1; \
305 argR = 1; \
306 cf = CC_NDEP & AMD64G_CC_MASK_C; \
307 pf = parity_table[(UChar)res]; \
308 af = (res ^ argL ^ argR) & 0x10; \
309 zf = ((DATA_UTYPE)res == 0) << 6; \
310 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
311 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
312 return cf | pf | af | zf | sf | of; \
316 /*-------------------------------------------------------------*/
318 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
320 PREAMBLE(DATA_BITS); \
321 { ULong cf, pf, af, zf, sf, of; \
322 ULong argL, argR, res; \
323 res = CC_DEP1; \
324 argL = res + 1; \
325 argR = 1; \
326 cf = CC_NDEP & AMD64G_CC_MASK_C; \
327 pf = parity_table[(UChar)res]; \
328 af = (res ^ argL ^ argR) & 0x10; \
329 zf = ((DATA_UTYPE)res == 0) << 6; \
330 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
331 of = ((res & DATA_MASK) \
332 == ((ULong)SIGN_MASK - 1)) << 11; \
333 return cf | pf | af | zf | sf | of; \
337 /*-------------------------------------------------------------*/
339 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
341 PREAMBLE(DATA_BITS); \
342 { ULong cf, pf, af, zf, sf, of; \
343 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
344 pf = parity_table[(UChar)CC_DEP1]; \
345 af = 0; /* undefined */ \
346 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
347 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
348 /* of is defined if shift count == 1 */ \
349 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
350 & AMD64G_CC_MASK_O; \
351 return cf | pf | af | zf | sf | of; \
355 /*-------------------------------------------------------------*/
357 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
359 PREAMBLE(DATA_BITS); \
360 { ULong cf, pf, af, zf, sf, of; \
361 cf = CC_DEP2 & 1; \
362 pf = parity_table[(UChar)CC_DEP1]; \
363 af = 0; /* undefined */ \
364 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
365 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
366 /* of is defined if shift count == 1 */ \
367 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
368 & AMD64G_CC_MASK_O; \
369 return cf | pf | af | zf | sf | of; \
373 /*-------------------------------------------------------------*/
375 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
376 /* DEP1 = result, NDEP = old flags */
377 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
379 PREAMBLE(DATA_BITS); \
380 { ULong fl \
381 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
382 | (AMD64G_CC_MASK_C & CC_DEP1) \
383 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
384 11-(DATA_BITS-1)) \
385 ^ lshift(CC_DEP1, 11))); \
386 return fl; \
390 /*-------------------------------------------------------------*/
392 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
393 /* DEP1 = result, NDEP = old flags */
394 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
396 PREAMBLE(DATA_BITS); \
397 { ULong fl \
398 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
399 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
400 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
401 11-(DATA_BITS-1)) \
402 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
403 return fl; \
407 /*-------------------------------------------------------------*/
409 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
410 DATA_U2TYPE, NARROWto2U) \
412 PREAMBLE(DATA_BITS); \
413 { ULong cf, pf, af, zf, sf, of; \
414 DATA_UTYPE hi; \
415 DATA_UTYPE lo \
416 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
417 * ((DATA_UTYPE)CC_DEP2) ); \
418 DATA_U2TYPE rr \
419 = NARROWto2U( \
420 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
421 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
422 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
423 cf = (hi != 0); \
424 pf = parity_table[(UChar)lo]; \
425 af = 0; /* undefined */ \
426 zf = (lo == 0) << 6; \
427 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
428 of = cf << 11; \
429 return cf | pf | af | zf | sf | of; \
433 /*-------------------------------------------------------------*/
435 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
436 DATA_S2TYPE, NARROWto2S) \
438 PREAMBLE(DATA_BITS); \
439 { ULong cf, pf, af, zf, sf, of; \
440 DATA_STYPE hi; \
441 DATA_STYPE lo \
442 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
443 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
444 DATA_S2TYPE rr \
445 = NARROWto2S( \
446 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
447 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
448 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
449 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
450 pf = parity_table[(UChar)lo]; \
451 af = 0; /* undefined */ \
452 zf = (lo == 0) << 6; \
453 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
454 of = cf << 11; \
455 return cf | pf | af | zf | sf | of; \
459 /*-------------------------------------------------------------*/
461 #define ACTIONS_UMULQ \
463 PREAMBLE(64); \
464 { ULong cf, pf, af, zf, sf, of; \
465 ULong lo, hi; \
466 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
467 cf = (hi != 0); \
468 pf = parity_table[(UChar)lo]; \
469 af = 0; /* undefined */ \
470 zf = (lo == 0) << 6; \
471 sf = lshift(lo, 8 - 64) & 0x80; \
472 of = cf << 11; \
473 return cf | pf | af | zf | sf | of; \
477 /*-------------------------------------------------------------*/
479 #define ACTIONS_SMULQ \
481 PREAMBLE(64); \
482 { ULong cf, pf, af, zf, sf, of; \
483 Long lo, hi; \
484 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
485 cf = (hi != (lo >>/*s*/ (64-1))); \
486 pf = parity_table[(UChar)lo]; \
487 af = 0; /* undefined */ \
488 zf = (lo == 0) << 6; \
489 sf = lshift(lo, 8 - 64) & 0x80; \
490 of = cf << 11; \
491 return cf | pf | af | zf | sf | of; \
495 /*-------------------------------------------------------------*/
497 #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \
499 PREAMBLE(DATA_BITS); \
500 { ULong cf, pf, af, zf, sf, of; \
501 cf = 0; \
502 pf = 0; \
503 af = 0; \
504 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
505 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
506 of = 0; \
507 return cf | pf | af | zf | sf | of; \
511 /*-------------------------------------------------------------*/
513 #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \
515 PREAMBLE(DATA_BITS); \
516 { ULong cf, pf, af, zf, sf, of; \
517 cf = ((DATA_UTYPE)CC_DEP2 != 0); \
518 pf = 0; \
519 af = 0; \
520 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
521 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
522 of = 0; \
523 return cf | pf | af | zf | sf | of; \
527 /*-------------------------------------------------------------*/
529 #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \
531 PREAMBLE(DATA_BITS); \
532 { Long cf, pf, af, zf, sf, of; \
533 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
534 pf = 0; \
535 af = 0; \
536 zf = 0; \
537 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
538 of = 0; \
539 return cf | pf | af | zf | sf | of; \
543 /*-------------------------------------------------------------*/
545 #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \
547 PREAMBLE(DATA_BITS); \
548 { ULong cf, pf, af, zf, sf, of; \
549 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
550 pf = 0; \
551 af = 0; \
552 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
553 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
554 of = 0; \
555 return cf | pf | af | zf | sf | of; \
559 /*-------------------------------------------------------------*/
561 #define ACTIONS_ADX(DATA_BITS,DATA_UTYPE,FLAGNAME) \
563 PREAMBLE(DATA_BITS); \
564 { ULong ocf; /* o or c */ \
565 ULong argL, argR, oldOC, res; \
566 oldOC = (CC_NDEP >> AMD64G_CC_SHIFT_##FLAGNAME) & 1; \
567 argL = CC_DEP1; \
568 argR = CC_DEP2 ^ oldOC; \
569 res = (argL + argR) + oldOC; \
570 if (oldOC) \
571 ocf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
572 else \
573 ocf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
574 return (CC_NDEP & ~AMD64G_CC_MASK_##FLAGNAME) \
575 | (ocf << AMD64G_CC_SHIFT_##FLAGNAME); \
579 /*-------------------------------------------------------------*/
582 #if PROFILE_RFLAGS
584 static Bool initted = False;
586 /* C flag, fast route */
587 static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
588 /* C flag, slow route */
589 static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
590 /* table for calculate_cond */
591 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
592 /* total entry counts for calc_all, calc_c, calc_cond. */
593 static UInt n_calc_all = 0;
594 static UInt n_calc_c = 0;
595 static UInt n_calc_cond = 0;
597 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
600 static void showCounts ( void )
602 Int op, co;
603 HChar ch;
604 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
605 n_calc_all, n_calc_cond, n_calc_c);
607 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
608 " S NS P NP L NL LE NLE\n");
609 vex_printf(" -----------------------------------------------------"
610 "----------------------------------------\n");
611 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
613 ch = ' ';
614 if (op > 0 && (op-1) % 4 == 0)
615 ch = 'B';
616 if (op > 0 && (op-1) % 4 == 1)
617 ch = 'W';
618 if (op > 0 && (op-1) % 4 == 2)
619 ch = 'L';
620 if (op > 0 && (op-1) % 4 == 3)
621 ch = 'Q';
623 vex_printf("%2d%c: ", op, ch);
624 vex_printf("%6u ", tabc_slow[op]);
625 vex_printf("%6u ", tabc_fast[op]);
626 for (co = 0; co < 16; co++) {
627 Int n = tab_cond[op][co];
628 if (n >= 1000) {
629 vex_printf(" %3dK", n / 1000);
630 } else
631 if (n >= 0) {
632 vex_printf(" %3d ", n );
633 } else {
634 vex_printf(" ");
637 vex_printf("\n");
639 vex_printf("\n");
642 static void initCounts ( void )
644 Int op, co;
645 initted = True;
646 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
647 tabc_fast[op] = tabc_slow[op] = 0;
648 for (co = 0; co < 16; co++)
649 tab_cond[op][co] = 0;
653 #endif /* PROFILE_RFLAGS */
656 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
657 /* Calculate all the 6 flags from the supplied thunk parameters.
658 Worker function, not directly called from generated code. */
659 static
660 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
661 ULong cc_dep1_formal,
662 ULong cc_dep2_formal,
663 ULong cc_ndep_formal )
665 switch (cc_op) {
666 case AMD64G_CC_OP_COPY:
667 return cc_dep1_formal
668 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
669 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
671 case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
672 case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
673 case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
674 case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong );
676 case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
677 case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
678 case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
679 case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong );
681 case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
682 case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
683 case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
684 case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong );
686 case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
687 case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
688 case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
689 case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong );
691 case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
692 case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
693 case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
694 case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong );
696 case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
697 case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
698 case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
699 case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong );
701 case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
702 case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
703 case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
704 case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong );
706 case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
707 case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
708 case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
709 case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong );
711 case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
712 case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
713 case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
714 case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong );
716 case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
717 case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
718 case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
719 case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong );
721 case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
722 case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
723 case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
724 case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong );
726 case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
727 UShort, toUShort );
728 case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
729 UInt, toUInt );
730 case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
731 ULong, idULong );
733 case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ;
735 case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
736 Short, toUShort );
737 case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
738 Int, toUInt );
739 case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
740 Long, idULong );
742 case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ;
744 case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt );
745 case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong );
747 case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt );
748 case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong );
750 case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt );
751 case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong );
753 case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt );
754 case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong );
756 case AMD64G_CC_OP_ADCX32: ACTIONS_ADX( 32, UInt, C );
757 case AMD64G_CC_OP_ADCX64: ACTIONS_ADX( 64, ULong, C );
759 case AMD64G_CC_OP_ADOX32: ACTIONS_ADX( 32, UInt, O );
760 case AMD64G_CC_OP_ADOX64: ACTIONS_ADX( 64, ULong, O );
762 default:
763 /* shouldn't really make these calls from generated code */
764 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
765 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
766 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
767 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
771 #if defined(VGO_freebsd) || defined(VGO_darwin)
773 /* This dummy function is just used to have an address just after
774 amd64g_calculate_rflags_all_WRK */
776 static
777 void _______VVVVVVVV_amd64g_calculate_rflags_all_WRK_VVVVVVVV_______ (void)
781 /* Export addresses of amd64g_calculate_rflags_all_WRK and
782 _______VVVVVVVV_amd64g_calculate_rflags_all_WRK_VVVVVVVV_______
783 Used in syswrap-main.c / VG_(post_syscall) in the case where
784 the above function was interrupted and we need to work out
785 what needs to be done for the resumption */
787 Addr addr_amd64g_calculate_rflags_all_WRK = (Addr)amd64g_calculate_rflags_all_WRK;
788 Addr addr________VVVVVVVV_amd64g_calculate_rflags_all_WRK_VVVVVVVV_______ = (Addr)_______VVVVVVVV_amd64g_calculate_rflags_all_WRK_VVVVVVVV_______;
789 #endif
791 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
792 /* Calculate all the 6 flags from the supplied thunk parameters. */
793 ULong amd64g_calculate_rflags_all ( ULong cc_op,
794 ULong cc_dep1,
795 ULong cc_dep2,
796 ULong cc_ndep )
798 # if PROFILE_RFLAGS
799 if (!initted) initCounts();
800 n_calc_all++;
801 if (SHOW_COUNTS_NOW) showCounts();
802 # endif
803 return
804 amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
808 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
809 /* Calculate just the carry flag from the supplied thunk parameters. */
810 ULong amd64g_calculate_rflags_c ( ULong cc_op,
811 ULong cc_dep1,
812 ULong cc_dep2,
813 ULong cc_ndep )
815 # if PROFILE_RFLAGS
816 if (!initted) initCounts();
817 n_calc_c++;
818 tabc_fast[cc_op]++;
819 if (SHOW_COUNTS_NOW) showCounts();
820 # endif
822 /* Fast-case some common ones. */
823 switch (cc_op) {
824 case AMD64G_CC_OP_COPY:
825 return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
826 case AMD64G_CC_OP_LOGICQ:
827 case AMD64G_CC_OP_LOGICL:
828 case AMD64G_CC_OP_LOGICW:
829 case AMD64G_CC_OP_LOGICB:
830 return 0;
831 // case AMD64G_CC_OP_SUBL:
832 // return ((UInt)cc_dep1) < ((UInt)cc_dep2)
833 // ? AMD64G_CC_MASK_C : 0;
834 // case AMD64G_CC_OP_SUBW:
835 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
836 // ? AMD64G_CC_MASK_C : 0;
837 // case AMD64G_CC_OP_SUBB:
838 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
839 // ? AMD64G_CC_MASK_C : 0;
840 // case AMD64G_CC_OP_INCL:
841 // case AMD64G_CC_OP_DECL:
842 // return cc_ndep & AMD64G_CC_MASK_C;
843 default:
844 break;
847 # if PROFILE_RFLAGS
848 tabc_fast[cc_op]--;
849 tabc_slow[cc_op]++;
850 # endif
852 return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
853 & AMD64G_CC_MASK_C;
857 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
858 /* returns 1 or 0 */
859 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
860 ULong cc_op,
861 ULong cc_dep1,
862 ULong cc_dep2,
863 ULong cc_ndep )
865 ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
866 cc_dep2, cc_ndep);
867 ULong of,sf,zf,cf,pf;
868 ULong inv = cond & 1;
870 # if PROFILE_RFLAGS
871 if (!initted) initCounts();
872 tab_cond[cc_op][cond]++;
873 n_calc_cond++;
874 if (SHOW_COUNTS_NOW) showCounts();
875 # endif
877 switch (cond) {
878 case AMD64CondNO:
879 case AMD64CondO: /* OF == 1 */
880 of = rflags >> AMD64G_CC_SHIFT_O;
881 return 1 & (inv ^ of);
883 case AMD64CondNZ:
884 case AMD64CondZ: /* ZF == 1 */
885 zf = rflags >> AMD64G_CC_SHIFT_Z;
886 return 1 & (inv ^ zf);
888 case AMD64CondNB:
889 case AMD64CondB: /* CF == 1 */
890 cf = rflags >> AMD64G_CC_SHIFT_C;
891 return 1 & (inv ^ cf);
892 break;
894 case AMD64CondNBE:
895 case AMD64CondBE: /* (CF or ZF) == 1 */
896 cf = rflags >> AMD64G_CC_SHIFT_C;
897 zf = rflags >> AMD64G_CC_SHIFT_Z;
898 return 1 & (inv ^ (cf | zf));
899 break;
901 case AMD64CondNS:
902 case AMD64CondS: /* SF == 1 */
903 sf = rflags >> AMD64G_CC_SHIFT_S;
904 return 1 & (inv ^ sf);
906 case AMD64CondNP:
907 case AMD64CondP: /* PF == 1 */
908 pf = rflags >> AMD64G_CC_SHIFT_P;
909 return 1 & (inv ^ pf);
911 case AMD64CondNL:
912 case AMD64CondL: /* (SF xor OF) == 1 */
913 sf = rflags >> AMD64G_CC_SHIFT_S;
914 of = rflags >> AMD64G_CC_SHIFT_O;
915 return 1 & (inv ^ (sf ^ of));
916 break;
918 case AMD64CondNLE:
919 case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */
920 sf = rflags >> AMD64G_CC_SHIFT_S;
921 of = rflags >> AMD64G_CC_SHIFT_O;
922 zf = rflags >> AMD64G_CC_SHIFT_Z;
923 return 1 & (inv ^ ((sf ^ of) | zf));
924 break;
926 default:
927 /* shouldn't really make these calls from generated code */
928 vex_printf("amd64g_calculate_condition"
929 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
930 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
931 vpanic("amd64g_calculate_condition");
936 /* VISIBLE TO LIBVEX CLIENT */
937 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state )
939 ULong rflags = amd64g_calculate_rflags_all_WRK(
940 vex_state->guest_CC_OP,
941 vex_state->guest_CC_DEP1,
942 vex_state->guest_CC_DEP2,
943 vex_state->guest_CC_NDEP
945 Long dflag = vex_state->guest_DFLAG;
946 vassert(dflag == 1 || dflag == -1);
947 if (dflag == -1)
948 rflags |= (1<<10);
949 if (vex_state->guest_IDFLAG == 1)
950 rflags |= (1<<21);
951 if (vex_state->guest_ACFLAG == 1)
952 rflags |= (1<<18);
954 return rflags;
957 /* VISIBLE TO LIBVEX CLIENT */
958 void
959 LibVEX_GuestAMD64_put_rflags ( ULong rflags,
960 /*MOD*/VexGuestAMD64State* vex_state )
962 /* D flag */
963 if (rflags & AMD64G_CC_MASK_D) {
964 vex_state->guest_DFLAG = -1;
965 rflags &= ~AMD64G_CC_MASK_D;
967 else
968 vex_state->guest_DFLAG = 1;
970 /* ID flag */
971 if (rflags & AMD64G_CC_MASK_ID) {
972 vex_state->guest_IDFLAG = 1;
973 rflags &= ~AMD64G_CC_MASK_ID;
975 else
976 vex_state->guest_IDFLAG = 0;
978 /* AC flag */
979 if (rflags & AMD64G_CC_MASK_AC) {
980 vex_state->guest_ACFLAG = 1;
981 rflags &= ~AMD64G_CC_MASK_AC;
983 else
984 vex_state->guest_ACFLAG = 0;
986 UInt cc_mask = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z |
987 AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P;
988 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
989 vex_state->guest_CC_DEP1 = rflags & cc_mask;
990 vex_state->guest_CC_DEP2 = 0;
991 vex_state->guest_CC_NDEP = 0;
994 /* VISIBLE TO LIBVEX CLIENT */
995 void
996 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
997 /*MOD*/VexGuestAMD64State* vex_state )
999 ULong oszacp = amd64g_calculate_rflags_all_WRK(
1000 vex_state->guest_CC_OP,
1001 vex_state->guest_CC_DEP1,
1002 vex_state->guest_CC_DEP2,
1003 vex_state->guest_CC_NDEP
1005 if (new_carry_flag & 1) {
1006 oszacp |= AMD64G_CC_MASK_C;
1007 } else {
1008 oszacp &= ~AMD64G_CC_MASK_C;
1010 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
1011 vex_state->guest_CC_DEP1 = oszacp;
1012 vex_state->guest_CC_DEP2 = 0;
1013 vex_state->guest_CC_NDEP = 0;
1016 #if defined(VGO_freebsd) || defined(VGO_darwin)
1017 /* Used in syswrap-main.c / VG_(post_syscall) in the case where
1018 the above function was interrupted and we need to work out
1019 what needs to be done for the resumption. These functions
1020 are extern so no need for 'addr' global variables */
1021 void _______VVVVVVVV_after_GuestAMD64_put_rflag_c_VVVVVVVV_______ (void)
1024 #endif
1026 /*---------------------------------------------------------------*/
1027 /*--- %rflags translation-time function specialisers. ---*/
1028 /*--- These help iropt specialise calls the above run-time ---*/
1029 /*--- %rflags functions. ---*/
1030 /*---------------------------------------------------------------*/
1032 /* Used by the optimiser to try specialisations. Returns an
1033 equivalent expression, or NULL if none. */
1035 static inline Bool isU64 ( IRExpr* e, ULong n )
1037 return e->tag == Iex_Const
1038 && e->Iex.Const.con->tag == Ico_U64
1039 && e->Iex.Const.con->Ico.U64 == n;
1042 /* Returns N if W64 is a value of the form 1 << N for N in 1 to 31,
1043 and zero in any other case. */
1044 static Int isU64_1_shl_N_literal ( ULong w64 )
1046 if (w64 < (1ULL << 1) || w64 > (1ULL << 31))
1047 return 0;
1048 if ((w64 & (w64 - 1)) != 0)
1049 return 0;
1050 /* At this point, we know w64 is a power of two in the range 2^1 .. 2^31,
1051 and we only need to find out which one it is. */
1052 for (Int n = 1; n <= 31; n++) {
1053 if (w64 == (1ULL << n))
1054 return n;
1056 /* Consequently we should never get here. */
1057 /*UNREACHED*/
1058 vassert(0);
1059 return 0;
1062 /* Returns N if E is an immediate of the form 1 << N for N in 1 to 31,
1063 and zero in any other case. */
1064 static Int isU64_1_shl_N ( IRExpr* e )
1066 if (e->tag != Iex_Const || e->Iex.Const.con->tag != Ico_U64)
1067 return 0;
1068 ULong w64 = e->Iex.Const.con->Ico.U64;
1069 return isU64_1_shl_N_literal(w64);
1072 /* Returns N if E is an immediate of the form (1 << N) - 1 for N in 1 to 31,
1073 and zero in any other case. */
1074 static Int isU64_1_shl_N_minus_1 ( IRExpr* e )
1076 if (e->tag != Iex_Const || e->Iex.Const.con->tag != Ico_U64)
1077 return 0;
1078 ULong w64 = e->Iex.Const.con->Ico.U64;
1079 // This isn't actually necessary since isU64_1_shl_N_literal will return
1080 // zero given a zero argument, but still ..
1081 if (w64 == 0xFFFFFFFFFFFFFFFFULL)
1082 return 0;
1083 return isU64_1_shl_N_literal(w64 + 1);
1086 IRExpr* guest_amd64_spechelper ( const HChar* function_name,
1087 IRExpr** args,
1088 IRStmt** precedingStmts,
1089 Int n_precedingStmts )
1091 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
1092 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
1093 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
1094 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
1095 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
1097 Int i, arity = 0;
1098 for (i = 0; args[i]; i++)
1099 arity++;
1100 # if 0
1101 vex_printf("spec request:\n");
1102 vex_printf(" %s ", function_name);
1103 for (i = 0; i < arity; i++) {
1104 vex_printf(" ");
1105 ppIRExpr(args[i]);
1107 vex_printf("\n");
1108 # endif
1110 /* --------- specialising "amd64g_calculate_condition" --------- */
1112 if (vex_streq(function_name, "amd64g_calculate_condition")) {
1113 /* specialise calls to above "calculate condition" function */
1114 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
1115 vassert(arity == 5);
1116 cond = args[0];
1117 cc_op = args[1];
1118 cc_dep1 = args[2];
1119 cc_dep2 = args[3];
1121 /*---------------- ADDQ ----------------*/
1123 /* 4, */
1124 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
1125 /* long long add, then Z --> test (dst+src == 0) */
1126 return unop(Iop_1Uto64,
1127 binop(Iop_CmpEQ64,
1128 binop(Iop_Add64, cc_dep1, cc_dep2),
1129 mkU64(0)));
1132 /* 8, */
1133 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondS)) {
1134 /* long long add, then S (negative)
1135 --> (dst+src)[63]
1136 --> ((dst + src) >>u 63) & 1
1138 return binop(Iop_And64,
1139 binop(Iop_Shr64,
1140 binop(Iop_Add64, cc_dep1, cc_dep2),
1141 mkU8(63)),
1142 mkU64(1));
1145 /*---------------- ADDL ----------------*/
1147 /* 0, */
1148 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondO)) {
1149 /* This is very commonly generated by Javascript JITs, for
1150 the idiom "do a 32-bit add and jump to out-of-line code if
1151 an overflow occurs". */
1152 /* long add, then O (overflow)
1153 --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31]
1154 --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1155 --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1157 vassert(isIRAtom(cc_dep1));
1158 vassert(isIRAtom(cc_dep2));
1159 return
1160 binop(Iop_And64,
1161 binop(Iop_Shr64,
1162 binop(Iop_And64,
1163 unop(Iop_Not64,
1164 binop(Iop_Xor64, cc_dep1, cc_dep2)),
1165 binop(Iop_Xor64,
1166 cc_dep1,
1167 binop(Iop_Add64, cc_dep1, cc_dep2))),
1168 mkU8(31)),
1169 mkU64(1));
1173 /* 8, 9 */
1174 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondS)) {
1175 /* long add, then S (negative)
1176 --> (dst+src)[31]
1177 --> ((dst +64 src) >>u 31) & 1
1178 Pointless to narrow the args to 32 bit before the add. */
1179 return binop(Iop_And64,
1180 binop(Iop_Shr64,
1181 binop(Iop_Add64, cc_dep1, cc_dep2),
1182 mkU8(31)),
1183 mkU64(1));
1185 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondNS)) {
1186 /* long add, then NS (not negative)
1187 --> (dst+src)[31] ^ 1
1188 --> (((dst +64 src) >>u 31) & 1) ^ 1
1189 Pointless to narrow the args to 32 bit before the add. */
1190 return binop(Iop_Xor64,
1191 binop(Iop_And64,
1192 binop(Iop_Shr64,
1193 binop(Iop_Add64, cc_dep1, cc_dep2),
1194 mkU8(31)),
1195 mkU64(1)),
1196 mkU64(1));
1199 /*---------------- SUBQ ----------------*/
1201 /* 0, */
1202 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondO)) {
1203 /* long long sub/cmp, then O (overflow)
1204 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63]
1205 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63
1207 vassert(isIRAtom(cc_dep1));
1208 vassert(isIRAtom(cc_dep2));
1209 return binop(Iop_Shr64,
1210 binop(Iop_And64,
1211 binop(Iop_Xor64, cc_dep1, cc_dep2),
1212 binop(Iop_Xor64,
1213 cc_dep1,
1214 binop(Iop_Sub64, cc_dep1, cc_dep2))),
1215 mkU8(63));
1217 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNO)) {
1218 /* No action. Never yet found a test case. */
1221 /* 2, 3 */
1222 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
1223 /* long long sub/cmp, then B (unsigned less than)
1224 --> test dst <u src */
1225 return unop(Iop_1Uto64,
1226 binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1228 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
1229 /* long long sub/cmp, then NB (unsigned greater than or equal)
1230 --> test src <=u dst */
1231 /* Note, args are opposite way round from the usual */
1232 return unop(Iop_1Uto64,
1233 binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1236 /* 4, 5 */
1237 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
1238 /* long long sub/cmp, then Z --> test dst==src */
1239 return unop(Iop_1Uto64,
1240 binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
1242 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
1243 /* long long sub/cmp, then NZ --> test dst!=src */
1244 return unop(Iop_1Uto64,
1245 binop(Iop_CmpNE64,cc_dep1,cc_dep2));
1248 /* 6, 7 */
1249 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
1250 /* long long sub/cmp, then BE (unsigned less than or equal)
1251 --> test dst <=u src */
1252 return unop(Iop_1Uto64,
1253 binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1255 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) {
1256 /* long long sub/cmp, then NBE (unsigned greater than)
1257 --> test !(dst <=u src) */
1258 return binop(Iop_Xor64,
1259 unop(Iop_1Uto64,
1260 binop(Iop_CmpLE64U, cc_dep1, cc_dep2)),
1261 mkU64(1));
1264 /* 8, 9 */
1265 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondS)) {
1266 /* long long sub/cmp, then S (negative)
1267 --> (dst-src)[63]
1268 --> (dst-src) >>u 63 */
1269 return binop(Iop_Shr64,
1270 binop(Iop_Sub64, cc_dep1, cc_dep2),
1271 mkU8(63));
1273 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNS)) {
1274 /* long long sub/cmp, then NS (not negative)
1275 --> (dst-src)[63] ^ 1
1276 --> ((dst-src) >>u 63) ^ 1 */
1277 return binop(Iop_Xor64,
1278 binop(Iop_Shr64,
1279 binop(Iop_Sub64, cc_dep1, cc_dep2),
1280 mkU8(63)),
1281 mkU64(1));
1284 /* 12, 13 */
1285 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
1286 /* long long sub/cmp, then L (signed less than)
1287 --> test dst <s src */
1288 return unop(Iop_1Uto64,
1289 binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1291 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNL)) {
1292 /* long long sub/cmp, then NL (signed greater than or equal)
1293 --> test dst >=s src
1294 --> test src <=s dst */
1295 return unop(Iop_1Uto64,
1296 binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1299 /* 14, 15 */
1300 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondLE)) {
1301 /* long long sub/cmp, then LE (signed less than or equal)
1302 --> test dst <=s src */
1303 return unop(Iop_1Uto64,
1304 binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1306 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) {
1307 /* long sub/cmp, then NLE (signed greater than)
1308 --> test !(dst <=s src)
1309 --> test (dst >s src)
1310 --> test (src <s dst) */
1311 return unop(Iop_1Uto64,
1312 binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1316 /*---------------- SUBL ----------------*/
1318 /* 0, */
1319 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondO)) {
1320 /* This is very commonly generated by Javascript JITs, for
1321 the idiom "do a 32-bit subtract and jump to out-of-line
1322 code if an overflow occurs". */
1323 /* long sub/cmp, then O (overflow)
1324 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31]
1325 --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1
1327 vassert(isIRAtom(cc_dep1));
1328 vassert(isIRAtom(cc_dep2));
1329 return
1330 binop(Iop_And64,
1331 binop(Iop_Shr64,
1332 binop(Iop_And64,
1333 binop(Iop_Xor64, cc_dep1, cc_dep2),
1334 binop(Iop_Xor64,
1335 cc_dep1,
1336 binop(Iop_Sub64, cc_dep1, cc_dep2))),
1337 mkU8(31)),
1338 mkU64(1));
1340 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNO)) {
1341 /* No action. Never yet found a test case. */
1344 /* 2, 3 */
1346 /* It appears that LLVM 5.0 and later have a new way to find out
1347 whether the top N bits of a word W are all zero, by computing
1349 W <u 0---(N-1)---0 1 0---0 or
1350 W <=u 0---(N-1)---0 0 1---1
1352 In particular, the result will be defined if the top N bits of W
1353 are defined, even if the trailing bits -- those corresponding to
1354 the rightmost 0---0 / 1---1 section -- are undefined. Rather than
1355 make Memcheck more complex, we detect this case where we can and
1356 shift out the irrelevant and potentially undefined bits. */
1357 Int n = 0;
1358 Bool is_NB_or_NBE = False;
1359 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
1360 if (isU64(cond, AMD64CondB) || isU64(cond, AMD64CondNB)) {
1361 /* long sub/cmp, then B (unsigned less than),
1362 where dep2 is a power of 2:
1363 -> CmpLT32U(dep1, 1 << N)
1364 -> CmpEQ32(dep1 >>u N, 0)
1366 long sub/cmp, then NB (unsigned greater than or equal),
1367 where dep2 is a power of 2:
1368 -> CmpGE32U(dep1, 1 << N)
1369 -> CmpNE32(dep1 >>u N, 0)
1370 This avoids CmpLT32U/CmpGE32U being applied to potentially
1371 uninitialised bits in the area being shifted out. */
1372 n = isU64_1_shl_N(cc_dep2);
1373 is_NB_or_NBE = isU64(cond, AMD64CondNB);
1374 } else if (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE)) {
1375 /* long sub/cmp, then BE (unsigned less than or equal),
1376 where dep2 is a power of 2 minus 1:
1377 -> CmpLE32U(dep1, (1 << N) - 1)
1378 -> CmpEQ32(dep1 >>u N, 0)
1380 long sub/cmp, then NBE (unsigned greater than),
1381 where dep2 is a power of 2 minus 1:
1382 -> CmpGT32U(dep1, (1 << N) - 1)
1383 -> CmpNE32(dep1 >>u N, 0)
1384 This avoids CmpLE32U/CmpGT32U being applied to potentially
1385 uninitialised bits in the area being shifted out. */
1386 n = isU64_1_shl_N_minus_1(cc_dep2);
1387 is_NB_or_NBE = isU64(cond, AMD64CondNBE);
1390 if (n > 0) {
1391 vassert(n >= 1 && n <= 31);
1392 return unop(Iop_1Uto64,
1393 binop(is_NB_or_NBE ? Iop_CmpNE32 : Iop_CmpEQ32,
1394 binop(Iop_Shr32, unop(Iop_64to32, cc_dep1),
1395 mkU8(n)),
1396 mkU32(0)));
1399 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
1400 /* long sub/cmp, then B (unsigned less than)
1401 --> test dst <u src */
1402 return unop(Iop_1Uto64,
1403 binop(Iop_CmpLT32U,
1404 unop(Iop_64to32, cc_dep1),
1405 unop(Iop_64to32, cc_dep2)));
1407 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNB)) {
1408 /* long sub/cmp, then NB (unsigned greater than or equal)
1409 --> test src <=u dst */
1410 /* Note, args are opposite way round from the usual */
1411 return unop(Iop_1Uto64,
1412 binop(Iop_CmpLE32U,
1413 unop(Iop_64to32, cc_dep2),
1414 unop(Iop_64to32, cc_dep1)));
1417 /* 4, 5 */
1418 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
1419 /* long sub/cmp, then Z --> test dst==src */
1420 return unop(Iop_1Uto64,
1421 binop(Iop_CmpEQ32,
1422 unop(Iop_64to32, cc_dep1),
1423 unop(Iop_64to32, cc_dep2)));
1425 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
1426 /* long sub/cmp, then NZ --> test dst!=src */
1427 return unop(Iop_1Uto64,
1428 binop(Iop_CmpNE32,
1429 unop(Iop_64to32, cc_dep1),
1430 unop(Iop_64to32, cc_dep2)));
1433 /* 6, 7 */
1434 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
1435 /* long sub/cmp, then BE (unsigned less than or equal)
1436 --> test dst <=u src */
1437 return unop(Iop_1Uto64,
1438 binop(Iop_CmpLE32U,
1439 unop(Iop_64to32, cc_dep1),
1440 unop(Iop_64to32, cc_dep2)));
1442 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
1443 /* long sub/cmp, then NBE (unsigned greater than)
1444 --> test src <u dst */
1445 /* Note, args are opposite way round from the usual */
1446 return unop(Iop_1Uto64,
1447 binop(Iop_CmpLT32U,
1448 unop(Iop_64to32, cc_dep2),
1449 unop(Iop_64to32, cc_dep1)));
1452 /* 8, 9 */
1453 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
1454 /* long sub/cmp, then S (negative)
1455 --> (dst-src)[31]
1456 --> ((dst -64 src) >>u 31) & 1
1457 Pointless to narrow the args to 32 bit before the subtract. */
1458 return binop(Iop_And64,
1459 binop(Iop_Shr64,
1460 binop(Iop_Sub64, cc_dep1, cc_dep2),
1461 mkU8(31)),
1462 mkU64(1));
1464 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNS)) {
1465 /* long sub/cmp, then NS (not negative)
1466 --> (dst-src)[31] ^ 1
1467 --> (((dst -64 src) >>u 31) & 1) ^ 1
1468 Pointless to narrow the args to 32 bit before the subtract. */
1469 return binop(Iop_Xor64,
1470 binop(Iop_And64,
1471 binop(Iop_Shr64,
1472 binop(Iop_Sub64, cc_dep1, cc_dep2),
1473 mkU8(31)),
1474 mkU64(1)),
1475 mkU64(1));
1478 /* 12, 13 */
1479 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
1480 /* long sub/cmp, then L (signed less than)
1481 --> test dst <s src */
1482 return unop(Iop_1Uto64,
1483 binop(Iop_CmpLT32S,
1484 unop(Iop_64to32, cc_dep1),
1485 unop(Iop_64to32, cc_dep2)));
1487 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNL)) {
1488 /* long sub/cmp, then NL (signed greater than or equal)
1489 --> test dst >=s src
1490 --> test src <=s dst */
1491 return unop(Iop_1Uto64,
1492 binop(Iop_CmpLE32S,
1493 unop(Iop_64to32, cc_dep2),
1494 unop(Iop_64to32, cc_dep1)));
1497 /* 14, 15 */
1498 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
1499 /* long sub/cmp, then LE (signed less than or equal)
1500 --> test dst <=s src */
1501 return unop(Iop_1Uto64,
1502 binop(Iop_CmpLE32S,
1503 unop(Iop_64to32, cc_dep1),
1504 unop(Iop_64to32, cc_dep2)));
1507 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
1508 /* long sub/cmp, then NLE (signed greater than)
1509 --> test !(dst <=s src)
1510 --> test (dst >s src)
1511 --> test (src <s dst) */
1512 return unop(Iop_1Uto64,
1513 binop(Iop_CmpLT32S,
1514 unop(Iop_64to32, cc_dep2),
1515 unop(Iop_64to32, cc_dep1)));
1519 /*---------------- SUBW ----------------*/
1521 /* 4, 5 */
1522 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
1523 /* word sub/cmp, then Z --> test dst==src */
1524 return unop(Iop_1Uto64,
1525 binop(Iop_CmpEQ16,
1526 unop(Iop_64to16,cc_dep1),
1527 unop(Iop_64to16,cc_dep2)));
1529 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
1530 /* word sub/cmp, then NZ --> test dst!=src */
1531 return unop(Iop_1Uto64,
1532 binop(Iop_CmpNE16,
1533 unop(Iop_64to16,cc_dep1),
1534 unop(Iop_64to16,cc_dep2)));
1537 /* 6, */
1538 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) {
1539 /* word sub/cmp, then BE (unsigned less than or equal)
1540 --> test dst <=u src */
1541 return unop(Iop_1Uto64,
1542 binop(Iop_CmpLE64U,
1543 binop(Iop_Shl64, cc_dep1, mkU8(48)),
1544 binop(Iop_Shl64, cc_dep2, mkU8(48))));
1547 /* 8, 9 */
1548 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondS)
1549 && isU64(cc_dep2, 0)) {
1550 /* word sub/cmp of zero, then S --> test (dst-0 <s 0)
1551 --> test dst <s 0
1552 --> (ULong)dst[15]
1553 This is yet another scheme by which clang figures out if the
1554 top bit of a word is 1 or 0. See also LOGICB/CondS below. */
1555 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1556 for an 16-bit comparison, since the args to the helper
1557 function are always U64s. */
1558 return binop(Iop_And64,
1559 binop(Iop_Shr64,cc_dep1,mkU8(15)),
1560 mkU64(1));
1562 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNS)
1563 && isU64(cc_dep2, 0)) {
1564 /* word sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1565 --> test !(dst <s 0)
1566 --> (ULong) !dst[15]
1568 return binop(Iop_Xor64,
1569 binop(Iop_And64,
1570 binop(Iop_Shr64,cc_dep1,mkU8(15)),
1571 mkU64(1)),
1572 mkU64(1));
1575 /* 14, */
1576 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
1577 /* word sub/cmp, then LE (signed less than or equal)
1578 --> test dst <=s src */
1579 return unop(Iop_1Uto64,
1580 binop(Iop_CmpLE64S,
1581 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1582 binop(Iop_Shl64,cc_dep2,mkU8(48))));
1586 /*---------------- SUBB ----------------*/
1588 /* 2, 3 */
1589 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) {
1590 /* byte sub/cmp, then B (unsigned less than)
1591 --> test dst <u src */
1592 return unop(Iop_1Uto64,
1593 binop(Iop_CmpLT64U,
1594 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1595 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1597 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) {
1598 /* byte sub/cmp, then NB (unsigned greater than or equal)
1599 --> test src <=u dst */
1600 /* Note, args are opposite way round from the usual */
1601 return unop(Iop_1Uto64,
1602 binop(Iop_CmpLE64U,
1603 binop(Iop_And64, cc_dep2, mkU64(0xFF)),
1604 binop(Iop_And64, cc_dep1, mkU64(0xFF))));
1607 /* 4, 5 */
1608 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
1609 /* byte sub/cmp, then Z --> test dst==src */
1610 return unop(Iop_1Uto64,
1611 binop(Iop_CmpEQ8,
1612 unop(Iop_64to8,cc_dep1),
1613 unop(Iop_64to8,cc_dep2)));
1615 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
1616 /* byte sub/cmp, then NZ --> test dst!=src */
1617 return unop(Iop_1Uto64,
1618 binop(Iop_CmpNE8,
1619 unop(Iop_64to8,cc_dep1),
1620 unop(Iop_64to8,cc_dep2)));
1623 /* 6, */
1624 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
1625 /* byte sub/cmp, then BE (unsigned less than or equal)
1626 --> test dst <=u src */
1627 return unop(Iop_1Uto64,
1628 binop(Iop_CmpLE64U,
1629 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1630 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1633 /* 8, 9 */
1634 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
1635 && isU64(cc_dep2, 0)) {
1636 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1637 --> test dst <s 0
1638 --> (ULong)dst[7]
1639 This is yet another scheme by which gcc figures out if the
1640 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1641 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1642 for an 8-bit comparison, since the args to the helper
1643 function are always U64s. */
1644 return binop(Iop_And64,
1645 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1646 mkU64(1));
1648 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
1649 && isU64(cc_dep2, 0)) {
1650 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1651 --> test !(dst <s 0)
1652 --> (ULong) !dst[7]
1654 return binop(Iop_Xor64,
1655 binop(Iop_And64,
1656 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1657 mkU64(1)),
1658 mkU64(1));
1661 /*---------------- LOGICQ ----------------*/
1663 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
1664 /* long long and/or/xor, then Z --> test dst==0 */
1665 return unop(Iop_1Uto64,
1666 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1668 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
1669 /* long long and/or/xor, then NZ --> test dst!=0 */
1670 return unop(Iop_1Uto64,
1671 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1674 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
1675 /* long long and/or/xor, then L
1676 LOGIC sets SF and ZF according to the
1677 result and makes OF be zero. L computes SF ^ OF, but
1678 OF is zero, so this reduces to SF -- which will be 1 iff
1679 the result is < signed 0. Hence ...
1681 return unop(Iop_1Uto64,
1682 binop(Iop_CmpLT64S,
1683 cc_dep1,
1684 mkU64(0)));
1687 // Verified
1688 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondS)) {
1689 /* long long and/or/xor, then S --> (ULong)result[63] */
1690 return binop(Iop_Shr64, cc_dep1, mkU8(63));
1692 // Verified
1693 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNS)) {
1694 /* long long and/or/xor, then S --> (ULong) ~ result[63] */
1695 return binop(Iop_Xor64,
1696 binop(Iop_Shr64, cc_dep1, mkU8(63)),
1697 mkU64(1));
1700 /*---------------- LOGICL ----------------*/
1702 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
1703 /* long and/or/xor, then Z --> test dst==0 */
1704 return unop(Iop_1Uto64,
1705 binop(Iop_CmpEQ32,
1706 unop(Iop_64to32, cc_dep1),
1707 mkU32(0)));
1709 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
1710 /* long and/or/xor, then NZ --> test dst!=0 */
1711 return unop(Iop_1Uto64,
1712 binop(Iop_CmpNE32,
1713 unop(Iop_64to32, cc_dep1),
1714 mkU32(0)));
1717 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
1718 /* long and/or/xor, then LE
1719 This is pretty subtle. LOGIC sets SF and ZF according to the
1720 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but
1721 OF is zero, so this reduces to SF | ZF -- which will be 1 iff
1722 the result is <=signed 0. Hence ...
1724 return unop(Iop_1Uto64,
1725 binop(Iop_CmpLE32S,
1726 unop(Iop_64to32, cc_dep1),
1727 mkU32(0)));
1730 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
1731 /* long and/or/xor, then S --> (ULong)result[31] */
1732 return binop(Iop_And64,
1733 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1734 mkU64(1));
1736 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
1737 /* long and/or/xor, then S --> (ULong) ~ result[31] */
1738 return binop(Iop_Xor64,
1739 binop(Iop_And64,
1740 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1741 mkU64(1)),
1742 mkU64(1));
1745 /*---------------- LOGICW ----------------*/
1747 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) {
1748 /* word and/or/xor, then Z --> test dst==0 */
1749 // Use CmpEQ32 rather than CmpEQ64 here, so that Memcheck instruments
1750 // it exactly at EdcAUTO.
1751 return unop(Iop_1Uto64,
1752 binop(Iop_CmpEQ32,
1753 unop(Iop_16Uto32, unop(Iop_64to16, cc_dep1)),
1754 mkU32(0)));
1756 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) {
1757 /* word and/or/xor, then NZ --> test dst!=0 */
1758 // Use CmpNE32 rather than CmpNE64 here, so that Memcheck instruments
1759 // it exactly at EdcAUTO.
1760 return unop(Iop_1Uto64,
1761 binop(Iop_CmpNE32,
1762 unop(Iop_16Uto32, unop(Iop_64to16, cc_dep1)),
1763 mkU32(0)));
1766 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondS)) {
1767 /* word and/or/xor, then S --> (ULong)result[15] */
1768 return binop(Iop_And64,
1769 binop(Iop_Shr64, cc_dep1, mkU8(15)),
1770 mkU64(1));
1772 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNS)) {
1773 /* word and/or/xor, then S --> (ULong) ~ result[15] */
1774 return binop(Iop_Xor64,
1775 binop(Iop_And64,
1776 binop(Iop_Shr64, cc_dep1, mkU8(15)),
1777 mkU64(1)),
1778 mkU64(1));
1781 /*---------------- LOGICB ----------------*/
1783 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
1784 /* byte and/or/xor, then Z --> test dst==0 */
1785 // Use CmpEQ32 rather than CmpEQ64 here, so that Memcheck instruments
1786 // it exactly at EdcAUTO.
1787 return unop(Iop_1Uto64,
1788 binop(Iop_CmpEQ32,
1789 unop(Iop_8Uto32, unop(Iop_64to8, cc_dep1)),
1790 mkU32(0)));
1792 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
1793 /* byte and/or/xor, then NZ --> test dst!=0 */
1794 // Use CmpNE32 rather than CmpNE64 here, so that Memcheck instruments
1795 // it exactly at EdcAUTO.
1796 return unop(Iop_1Uto64,
1797 binop(Iop_CmpNE32,
1798 unop(Iop_8Uto32, unop(Iop_64to8, cc_dep1)),
1799 mkU32(0)));
1802 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
1803 /* this is an idiom gcc sometimes uses to find out if the top
1804 bit of a byte register is set: eg testb %al,%al; js ..
1805 Since it just depends on the top bit of the byte, extract
1806 that bit and explicitly get rid of all the rest. This
1807 helps memcheck avoid false positives in the case where any
1808 of the other bits in the byte are undefined. */
1809 /* byte and/or/xor, then S --> (UInt)result[7] */
1810 return binop(Iop_And64,
1811 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1812 mkU64(1));
1814 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
1815 /* byte and/or/xor, then NS --> (UInt)!result[7] */
1816 return binop(Iop_Xor64,
1817 binop(Iop_And64,
1818 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1819 mkU64(1)),
1820 mkU64(1));
1823 /*---------------- INCB ----------------*/
1825 if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
1826 /* 8-bit inc, then LE --> sign bit of the arg */
1827 return binop(Iop_And64,
1828 binop(Iop_Shr64,
1829 binop(Iop_Sub64, cc_dep1, mkU64(1)),
1830 mkU8(7)),
1831 mkU64(1));
1834 /*---------------- INCW ----------------*/
1836 if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
1837 /* 16-bit inc, then Z --> test dst == 0 */
1838 return unop(Iop_1Uto64,
1839 binop(Iop_CmpEQ64,
1840 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1841 mkU64(0)));
1844 /*---------------- DECL ----------------*/
1846 if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
1847 /* dec L, then Z --> test dst == 0 */
1848 return unop(Iop_1Uto64,
1849 binop(Iop_CmpEQ32,
1850 unop(Iop_64to32, cc_dep1),
1851 mkU32(0)));
1854 /*---------------- DECW ----------------*/
1856 if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
1857 /* 16-bit dec, then NZ --> test dst != 0 */
1858 return unop(Iop_1Uto64,
1859 binop(Iop_CmpNE64,
1860 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1861 mkU64(0)));
1864 /*---------------- SHRQ ----------------*/
1866 if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondZ)) {
1867 /* SHRQ, then Z --> test result[63:0] == 0 */
1868 return unop(Iop_1Uto64,
1869 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1871 if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNZ)) {
1872 /* SHRQ, then NZ --> test result[63:0] != 0 */
1873 return unop(Iop_1Uto64,
1874 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1877 if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondS)) {
1878 /* SHRQ, then S --> (ULong)result[63] (result is in dep1) */
1879 return binop(Iop_Shr64, cc_dep1, mkU8(63));
1881 // No known test case for this, hence disabled:
1882 //if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNS)) {
1883 // /* SHRQ, then NS --> (ULong) ~ result[63] */
1884 // vassert(0);
1887 /*---------------- SHRL ----------------*/
1889 if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondZ)) {
1890 /* SHRL, then Z --> test dep1 == 0 */
1891 return unop(Iop_1Uto64,
1892 binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
1893 mkU32(0)));
1895 if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondNZ)) {
1896 /* SHRL, then NZ --> test dep1 != 0 */
1897 return unop(Iop_1Uto64,
1898 binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1),
1899 mkU32(0)));
1902 if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondS)) {
1903 /* SHRL/SARL, then S --> (ULong)result[31] */
1904 return binop(Iop_And64,
1905 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1906 mkU64(1));
1908 if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondNS)) {
1909 /* SHRL/SARL, then NS --> (ULong) ~ result[31] */
1910 return binop(Iop_Xor64,
1911 binop(Iop_And64,
1912 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1913 mkU64(1)),
1914 mkU64(1));
1917 /*---------------- SHRW ----------------*/
1919 if (isU64(cc_op, AMD64G_CC_OP_SHRW) && isU64(cond, AMD64CondZ)) {
1920 /* SHRW, then Z --> test dep1 == 0 */
1921 return unop(Iop_1Uto64,
1922 binop(Iop_CmpEQ32,
1923 unop(Iop_16Uto32, unop(Iop_64to16, cc_dep1)),
1924 mkU32(0)));
1926 // No known test case for this, hence disabled:
1927 //if (isU64(cc_op, AMD64G_CC_OP_SHRW) && isU64(cond, AMD64CondNZ)) {
1928 // /* SHRW, then NZ --> test dep1 == 0 */
1929 // return unop(Iop_1Uto64,
1930 // binop(Iop_CmpNE32,
1931 // unop(Iop_16Uto32, unop(Iop_64to16, cc_dep1)),
1932 // mkU32(0)));
1935 /*---------------- SHLQ ----------------*/
1937 if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondZ)) {
1938 /* SHLQ, then Z --> test dep1 == 0 */
1939 return unop(Iop_1Uto64,
1940 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1942 if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondNZ)) {
1943 /* SHLQ, then NZ --> test dep1 != 0 */
1944 return unop(Iop_1Uto64,
1945 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1948 // Verified
1949 if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondS)) {
1950 /* SHLQ, then S --> (ULong)result[63] */
1951 return binop(Iop_Shr64, cc_dep1, mkU8(63));
1953 // No known test case
1954 //if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondNS)) {
1955 // /* SHLQ, then NS --> (ULong) ~ result[63] */
1956 // vassert(0);
1959 /*---------------- SHLL ----------------*/
1961 if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondZ)) {
1962 /* SHLL, then Z --> test result[31:0] == 0 */
1963 return unop(Iop_1Uto64,
1964 binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
1965 mkU32(0)));
1967 // Verified
1968 if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondNZ)) {
1969 /* SHLL, then NZ --> test dep1 != 0 */
1970 return unop(Iop_1Uto64,
1971 binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1),
1972 mkU32(0)));
1975 if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondS)) {
1976 /* SHLL, then S --> (ULong)result[31] */
1977 return binop(Iop_And64,
1978 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1979 mkU64(1));
1981 // No known test case
1982 //if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondNS)) {
1983 // /* SHLL, then NS --> (ULong) ~ result[31] */
1984 // vassert(0);
1987 /*---------------- COPY ----------------*/
1988 /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1989 jbe" for example. */
1991 if (isU64(cc_op, AMD64G_CC_OP_COPY)
1992 && (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
1993 /* COPY, then BE --> extract C and Z from dep1, and test (C
1994 or Z == 1). */
1995 /* COPY, then NBE --> extract C and Z from dep1, and test (C
1996 or Z == 0). */
1997 ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
1998 return
1999 unop(
2000 Iop_1Uto64,
2001 binop(
2002 Iop_CmpEQ64,
2003 binop(
2004 Iop_And64,
2005 binop(
2006 Iop_Or64,
2007 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
2008 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
2010 mkU64(1)
2012 mkU64(nnn)
2017 if (isU64(cc_op, AMD64G_CC_OP_COPY)
2018 && (isU64(cond, AMD64CondB) || isU64(cond, AMD64CondNB))) {
2019 /* COPY, then B --> extract C from dep1, and test (C == 1). */
2020 /* COPY, then NB --> extract C from dep1, and test (C == 0). */
2021 ULong nnn = isU64(cond, AMD64CondB) ? 1 : 0;
2022 return
2023 unop(
2024 Iop_1Uto64,
2025 binop(
2026 Iop_CmpEQ64,
2027 binop(
2028 Iop_And64,
2029 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
2030 mkU64(1)
2032 mkU64(nnn)
2037 if (isU64(cc_op, AMD64G_CC_OP_COPY)
2038 && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
2039 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
2040 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
2041 ULong nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
2042 return
2043 unop(
2044 Iop_1Uto64,
2045 binop(
2046 Iop_CmpEQ64,
2047 binop(
2048 Iop_And64,
2049 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
2050 mkU64(1)
2052 mkU64(nnn)
2057 if (isU64(cc_op, AMD64G_CC_OP_COPY)
2058 && (isU64(cond, AMD64CondP) || isU64(cond, AMD64CondNP))) {
2059 /* COPY, then P --> extract P from dep1, and test (P == 1). */
2060 /* COPY, then NP --> extract P from dep1, and test (P == 0). */
2061 ULong nnn = isU64(cond, AMD64CondP) ? 1 : 0;
2062 return
2063 unop(
2064 Iop_1Uto64,
2065 binop(
2066 Iop_CmpEQ64,
2067 binop(
2068 Iop_And64,
2069 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
2070 mkU64(1)
2072 mkU64(nnn)
2077 # if 0
2078 if (cond->tag == Iex_Const && cc_op->tag == Iex_Const) {
2079 vex_printf("spec request failed: ");
2080 vex_printf(" %s ", function_name);
2081 for (i = 0; i < 2/*arity*/; i++) {
2082 vex_printf(" ");
2083 ppIRExpr(args[i]);
2085 vex_printf("\n");
2087 # endif
2089 return NULL;
2092 /* --------- specialising "amd64g_calculate_rflags_c" --------- */
2094 if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
2095 /* specialise calls to above "calculate_rflags_c" function */
2096 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
2097 vassert(arity == 4);
2098 cc_op = args[0];
2099 cc_dep1 = args[1];
2100 cc_dep2 = args[2];
2101 cc_ndep = args[3];
2103 if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
2104 /* C after sub denotes unsigned less than */
2105 return unop(Iop_1Uto64,
2106 binop(Iop_CmpLT64U,
2107 cc_dep1,
2108 cc_dep2));
2110 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
2111 /* C after sub denotes unsigned less than */
2112 return unop(Iop_1Uto64,
2113 binop(Iop_CmpLT32U,
2114 unop(Iop_64to32, cc_dep1),
2115 unop(Iop_64to32, cc_dep2)));
2117 if (isU64(cc_op, AMD64G_CC_OP_SUBW)) {
2118 /* C after sub denotes unsigned less than */
2119 return unop(Iop_1Uto64,
2120 binop(Iop_CmpLT64U,
2121 binop(Iop_And64,cc_dep1,mkU64(0xFFFF)),
2122 binop(Iop_And64,cc_dep2,mkU64(0xFFFF))));
2124 if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
2125 /* C after sub denotes unsigned less than */
2126 return unop(Iop_1Uto64,
2127 binop(Iop_CmpLT64U,
2128 binop(Iop_And64,cc_dep1,mkU64(0xFF)),
2129 binop(Iop_And64,cc_dep2,mkU64(0xFF))));
2131 if (isU64(cc_op, AMD64G_CC_OP_ADDQ)) {
2132 /* C after add denotes sum <u either arg */
2133 return unop(Iop_1Uto64,
2134 binop(Iop_CmpLT64U,
2135 binop(Iop_Add64, cc_dep1, cc_dep2),
2136 cc_dep1));
2138 if (isU64(cc_op, AMD64G_CC_OP_ADDL)) {
2139 /* C after add denotes sum <u either arg */
2140 return unop(Iop_1Uto64,
2141 binop(Iop_CmpLT32U,
2142 unop(Iop_64to32, binop(Iop_Add64, cc_dep1, cc_dep2)),
2143 unop(Iop_64to32, cc_dep1)));
2145 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
2146 || isU64(cc_op, AMD64G_CC_OP_LOGICL)
2147 || isU64(cc_op, AMD64G_CC_OP_LOGICW)
2148 || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
2149 /* cflag after logic is zero */
2150 return mkU64(0);
2152 if (isU64(cc_op, AMD64G_CC_OP_DECL)
2153 || isU64(cc_op, AMD64G_CC_OP_INCL)
2154 || isU64(cc_op, AMD64G_CC_OP_DECQ)
2155 || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
2156 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
2157 return cc_ndep;
2160 # if 0
2161 if (cc_op->tag == Iex_Const) {
2162 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
2164 # endif
2166 # if 0
2167 if (cc_op->tag == Iex_Const) {
2168 vex_printf("spec request failed: ");
2169 vex_printf(" %s ", function_name);
2170 for (i = 0; i < 2/*arity*/; i++) {
2171 vex_printf(" ");
2172 ppIRExpr(args[i]);
2174 vex_printf("\n");
2176 # endif
2178 return NULL;
2181 # undef unop
2182 # undef binop
2183 # undef mkU64
2184 # undef mkU32
2185 # undef mkU8
2187 return NULL;
2191 /*---------------------------------------------------------------*/
2192 /*--- Supporting functions for x87 FPU activities. ---*/
2193 /*---------------------------------------------------------------*/
2195 static inline Bool host_is_little_endian ( void )
2197 UInt x = 0x76543210;
2198 UChar* p = (UChar*)(&x);
2199 return toBool(*p == 0x10);
2202 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
2203 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2204 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
2206 Bool mantissaIsZero;
2207 Int bexp;
2208 UChar sign;
2209 UChar* f64;
2211 vassert(host_is_little_endian());
2213 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
2215 f64 = (UChar*)(&dbl);
2216 sign = toUChar( (f64[7] >> 7) & 1 );
2218 /* First off, if the tag indicates the register was empty,
2219 return 1,0,sign,1 */
2220 if (tag == 0) {
2221 /* vex_printf("Empty\n"); */
2222 return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
2223 | AMD64G_FC_MASK_C0;
2226 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
2227 bexp &= 0x7FF;
2229 mantissaIsZero
2230 = toBool(
2231 (f64[6] & 0x0F) == 0
2232 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
2235 /* If both exponent and mantissa are zero, the value is zero.
2236 Return 1,0,sign,0. */
2237 if (bexp == 0 && mantissaIsZero) {
2238 /* vex_printf("Zero\n"); */
2239 return AMD64G_FC_MASK_C3 | 0
2240 | (sign << AMD64G_FC_SHIFT_C1) | 0;
2243 /* If exponent is zero but mantissa isn't, it's a denormal.
2244 Return 1,1,sign,0. */
2245 if (bexp == 0 && !mantissaIsZero) {
2246 /* vex_printf("Denormal\n"); */
2247 return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
2248 | (sign << AMD64G_FC_SHIFT_C1) | 0;
2251 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
2252 Return 0,1,sign,1. */
2253 if (bexp == 0x7FF && mantissaIsZero) {
2254 /* vex_printf("Inf\n"); */
2255 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
2256 | AMD64G_FC_MASK_C0;
2259 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
2260 Return 0,0,sign,1. */
2261 if (bexp == 0x7FF && !mantissaIsZero) {
2262 /* vex_printf("NaN\n"); */
2263 return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
2266 /* Uh, ok, we give up. It must be a normal finite number.
2267 Return 0,1,sign,0.
2269 /* vex_printf("normal\n"); */
2270 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
2274 /* This is used to implement both 'frstor' and 'fldenv'. The latter
2275 appears to differ from the former only in that the 8 FP registers
2276 themselves are not transferred into the guest state. */
2277 static
2278 VexEmNote do_put_x87 ( Bool moveRegs,
2279 /*IN*/Fpu_State* x87_state,
2280 /*OUT*/VexGuestAMD64State* vex_state )
2282 Int stno, preg;
2283 UInt tag;
2284 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2285 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2286 UInt ftop = (x87_state->env[FP_ENV_STAT] >> 11) & 7;
2287 UInt tagw = x87_state->env[FP_ENV_TAG];
2288 UInt fpucw = x87_state->env[FP_ENV_CTRL];
2289 UInt c3210 = x87_state->env[FP_ENV_STAT] & 0x4700;
2290 VexEmNote ew;
2291 UInt fpround;
2292 ULong pair;
2294 /* Copy registers and tags */
2295 for (stno = 0; stno < 8; stno++) {
2296 preg = (stno + ftop) & 7;
2297 tag = (tagw >> (2*preg)) & 3;
2298 if (tag == 3) {
2299 /* register is empty */
2300 /* hmm, if it's empty, does it still get written? Probably
2301 safer to say it does. If we don't, memcheck could get out
2302 of sync, in that it thinks all FP registers are defined by
2303 this helper, but in reality some have not been updated. */
2304 if (moveRegs)
2305 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
2306 vexTags[preg] = 0;
2307 } else {
2308 /* register is non-empty */
2309 if (moveRegs)
2310 convert_f80le_to_f64le( &x87_state->reg[10*stno],
2311 (UChar*)&vexRegs[preg] );
2312 vexTags[preg] = 1;
2316 /* stack pointer */
2317 vex_state->guest_FTOP = ftop;
2319 /* status word */
2320 vex_state->guest_FC3210 = c3210;
2322 /* handle the control word, setting FPROUND and detecting any
2323 emulation warnings. */
2324 pair = amd64g_check_fldcw ( (ULong)fpucw );
2325 fpround = (UInt)pair & 0xFFFFFFFFULL;
2326 ew = (VexEmNote)(pair >> 32);
2328 vex_state->guest_FPROUND = fpround & 3;
2330 /* emulation warnings --> caller */
2331 return ew;
2335 /* Create an x87 FPU state from the guest state, as close as
2336 we can approximate it. */
2337 static
2338 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
2339 /*OUT*/Fpu_State* x87_state )
2341 Int i, stno, preg;
2342 UInt tagw;
2343 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2344 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2345 UInt ftop = vex_state->guest_FTOP;
2346 UInt c3210 = vex_state->guest_FC3210;
2348 for (i = 0; i < 14; i++)
2349 x87_state->env[i] = 0;
2351 x87_state->env[1] = x87_state->env[3] = x87_state->env[5]
2352 = x87_state->env[13] = 0xFFFF;
2353 x87_state->env[FP_ENV_STAT]
2354 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
2355 x87_state->env[FP_ENV_CTRL]
2356 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
2358 /* Dump the register stack in ST order. */
2359 tagw = 0;
2360 for (stno = 0; stno < 8; stno++) {
2361 preg = (stno + ftop) & 7;
2362 if (vexTags[preg] == 0) {
2363 /* register is empty */
2364 tagw |= (3 << (2*preg));
2365 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2366 &x87_state->reg[10*stno] );
2367 } else {
2368 /* register is full. */
2369 tagw |= (0 << (2*preg));
2370 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2371 &x87_state->reg[10*stno] );
2374 x87_state->env[FP_ENV_TAG] = toUShort(tagw);
2378 /*---------------------------------------------------------------*/
2379 /*--- Supporting functions for XSAVE/FXSAVE. ---*/
2380 /*---------------------------------------------------------------*/
2382 /* CALLED FROM GENERATED CODE */
2383 /* DIRTY HELPER (reads guest state, writes guest mem) */
2384 /* XSAVE component 0 is the x87 FPU state. */
2385 void amd64g_dirtyhelper_XSAVE_COMPONENT_0
2386 ( VexGuestAMD64State* gst, HWord addr )
2388 /* Derived from values obtained from
2389 vendor_id : AuthenticAMD
2390 cpu family : 15
2391 model : 12
2392 model name : AMD Athlon(tm) 64 Processor 3200+
2393 stepping : 0
2394 cpu MHz : 2200.000
2395 cache size : 512 KB
2397 /* Somewhat roundabout, but at least it's simple. */
2398 Fpu_State tmp;
2399 UShort* addrS = (UShort*)addr;
2400 UChar* addrC = (UChar*)addr;
2401 UShort fp_tags;
2402 UInt summary_tags;
2403 Int r, stno;
2404 UShort *srcS, *dstS;
2406 do_get_x87( gst, &tmp );
2408 /* Now build the proper fxsave x87 image from the fsave x87 image
2409 we just made. */
2411 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
2412 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
2414 /* set addrS[2] in an endian-independent way */
2415 summary_tags = 0;
2416 fp_tags = tmp.env[FP_ENV_TAG];
2417 for (r = 0; r < 8; r++) {
2418 if ( ((fp_tags >> (2*r)) & 3) != 3 )
2419 summary_tags |= (1 << r);
2421 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
2422 addrC[5] = 0; /* pad */
2424 /* FOP: faulting fpu opcode. From experimentation, the real CPU
2425 does not write this field. (?!) */
2426 addrS[3] = 0; /* BOGUS */
2428 /* RIP (Last x87 instruction pointer). From experimentation, the
2429 real CPU does not write this field. (?!) */
2430 addrS[4] = 0; /* BOGUS */
2431 addrS[5] = 0; /* BOGUS */
2432 addrS[6] = 0; /* BOGUS */
2433 addrS[7] = 0; /* BOGUS */
2435 /* RDP (Last x87 data pointer). From experimentation, the real CPU
2436 does not write this field. (?!) */
2437 addrS[8] = 0; /* BOGUS */
2438 addrS[9] = 0; /* BOGUS */
2439 addrS[10] = 0; /* BOGUS */
2440 addrS[11] = 0; /* BOGUS */
2442 /* addrS[13,12] are MXCSR -- not written */
2443 /* addrS[15,14] are MXCSR_MASK -- not written */
2445 /* Copy in the FP registers, in ST order. */
2446 for (stno = 0; stno < 8; stno++) {
2447 srcS = (UShort*)(&tmp.reg[10*stno]);
2448 dstS = (UShort*)(&addrS[16 + 8*stno]);
2449 dstS[0] = srcS[0];
2450 dstS[1] = srcS[1];
2451 dstS[2] = srcS[2];
2452 dstS[3] = srcS[3];
2453 dstS[4] = srcS[4];
2454 dstS[5] = 0;
2455 dstS[6] = 0;
2456 dstS[7] = 0;
2461 /* CALLED FROM GENERATED CODE */
2462 /* DIRTY HELPER (reads guest state, writes guest mem) */
2463 /* XSAVE component 1 is the SSE state. */
2464 void amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
2465 ( VexGuestAMD64State* gst, HWord addr )
2467 UShort* addrS = (UShort*)addr;
2468 UInt mxcsr;
2470 /* The only non-register parts of the SSE state are MXCSR and
2471 MXCSR_MASK. */
2472 mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
2474 addrS[12] = toUShort(mxcsr); /* MXCSR */
2475 addrS[13] = toUShort(mxcsr >> 16);
2477 addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
2478 addrS[15] = 0x0000; /* MXCSR mask (hi16) */
2482 /* VISIBLE TO LIBVEX CLIENT */
2483 /* Do FXSAVE from the supplied VexGuestAMD64State structure and store
2484 the result at the given address which represents a buffer of at
2485 least 416 bytes.
2487 This function is not called from generated code. FXSAVE is dealt
2488 with by the amd64 front end by calling the XSAVE_COMPONENT_{0,1}
2489 functions above plus some in-line IR. This function is merely a
2490 convenience function for VEX's users.
2492 void LibVEX_GuestAMD64_fxsave ( /*IN*/VexGuestAMD64State* gst,
2493 /*OUT*/HWord fp_state )
2495 /* Do the x87 part */
2496 amd64g_dirtyhelper_XSAVE_COMPONENT_0(gst, fp_state);
2498 /* And now the SSE part, except for the registers themselves. */
2499 amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state);
2501 /* That's the first 160 bytes of the image done. */
2502 /* Now only %xmm0 .. %xmm15 remain to be copied. If the host is
2503 big-endian, these need to be byte-swapped. */
2504 U128 *xmm = (U128 *)(fp_state + 160);
2505 vassert(host_is_little_endian());
2507 # define COPY_U128(_dst,_src) \
2508 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
2509 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
2510 while (0)
2512 COPY_U128( xmm[0], gst->guest_YMM0 );
2513 COPY_U128( xmm[1], gst->guest_YMM1 );
2514 COPY_U128( xmm[2], gst->guest_YMM2 );
2515 COPY_U128( xmm[3], gst->guest_YMM3 );
2516 COPY_U128( xmm[4], gst->guest_YMM4 );
2517 COPY_U128( xmm[5], gst->guest_YMM5 );
2518 COPY_U128( xmm[6], gst->guest_YMM6 );
2519 COPY_U128( xmm[7], gst->guest_YMM7 );
2520 COPY_U128( xmm[8], gst->guest_YMM8 );
2521 COPY_U128( xmm[9], gst->guest_YMM9 );
2522 COPY_U128( xmm[10], gst->guest_YMM10 );
2523 COPY_U128( xmm[11], gst->guest_YMM11 );
2524 COPY_U128( xmm[12], gst->guest_YMM12 );
2525 COPY_U128( xmm[13], gst->guest_YMM13 );
2526 COPY_U128( xmm[14], gst->guest_YMM14 );
2527 COPY_U128( xmm[15], gst->guest_YMM15 );
2528 # undef COPY_U128
2532 /*---------------------------------------------------------------*/
2533 /*--- Supporting functions for XRSTOR/FXRSTOR. ---*/
2534 /*---------------------------------------------------------------*/
2536 /* CALLED FROM GENERATED CODE */
2537 /* DIRTY HELPER (writes guest state, reads guest mem) */
2538 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_0
2539 ( VexGuestAMD64State* gst, HWord addr )
2541 Fpu_State tmp;
2542 UShort* addrS = (UShort*)addr;
2543 UChar* addrC = (UChar*)addr;
2544 UShort fp_tags;
2545 Int r, stno, i;
2547 /* Copy the x87 registers out of the image, into a temporary
2548 Fpu_State struct. */
2549 for (i = 0; i < 14; i++) tmp.env[i] = 0;
2550 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
2551 /* fill in tmp.reg[0..7] */
2552 for (stno = 0; stno < 8; stno++) {
2553 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
2554 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
2555 dstS[0] = srcS[0];
2556 dstS[1] = srcS[1];
2557 dstS[2] = srcS[2];
2558 dstS[3] = srcS[3];
2559 dstS[4] = srcS[4];
2561 /* fill in tmp.env[0..13] */
2562 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
2563 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
2565 fp_tags = 0;
2566 for (r = 0; r < 8; r++) {
2567 if (addrC[4] & (1<<r))
2568 fp_tags |= (0 << (2*r)); /* EMPTY */
2569 else
2570 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
2572 tmp.env[FP_ENV_TAG] = fp_tags;
2574 /* Now write 'tmp' into the guest state. */
2575 VexEmNote warnX87 = do_put_x87( True/*moveRegs*/, &tmp, gst );
2577 return warnX87;
2581 /* CALLED FROM GENERATED CODE */
2582 /* DIRTY HELPER (writes guest state, reads guest mem) */
2583 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
2584 ( VexGuestAMD64State* gst, HWord addr )
2586 UShort* addrS = (UShort*)addr;
2587 UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
2588 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
2589 ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
2591 VexEmNote warnXMM = (VexEmNote)(w64 >> 32);
2593 gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
2594 return warnXMM;
2598 /* VISIBLE TO LIBVEX CLIENT */
2599 /* Do FXRSTOR from the supplied address and store read values to the given
2600 VexGuestAMD64State structure.
2602 This function is not called from generated code. FXRSTOR is dealt
2603 with by the amd64 front end by calling the XRSTOR_COMPONENT_{0,1}
2604 functions above plus some in-line IR. This function is merely a
2605 convenience function for VEX's users.
2607 VexEmNote LibVEX_GuestAMD64_fxrstor ( /*IN*/HWord fp_state,
2608 /*MOD*/VexGuestAMD64State* gst )
2610 /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need
2611 to be byte-swapped. */
2612 U128 *xmm = (U128 *)(fp_state + 160);
2614 vassert(host_is_little_endian());
2616 # define COPY_U128(_dst,_src) \
2617 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
2618 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
2619 while (0)
2621 COPY_U128( gst->guest_YMM0, xmm[0] );
2622 COPY_U128( gst->guest_YMM1, xmm[1] );
2623 COPY_U128( gst->guest_YMM2, xmm[2] );
2624 COPY_U128( gst->guest_YMM3, xmm[3] );
2625 COPY_U128( gst->guest_YMM4, xmm[4] );
2626 COPY_U128( gst->guest_YMM5, xmm[5] );
2627 COPY_U128( gst->guest_YMM6, xmm[6] );
2628 COPY_U128( gst->guest_YMM7, xmm[7] );
2629 COPY_U128( gst->guest_YMM8, xmm[8] );
2630 COPY_U128( gst->guest_YMM9, xmm[9] );
2631 COPY_U128( gst->guest_YMM10, xmm[10] );
2632 COPY_U128( gst->guest_YMM11, xmm[11] );
2633 COPY_U128( gst->guest_YMM12, xmm[12] );
2634 COPY_U128( gst->guest_YMM13, xmm[13] );
2635 COPY_U128( gst->guest_YMM14, xmm[14] );
2636 COPY_U128( gst->guest_YMM15, xmm[15] );
2638 # undef COPY_U128
2640 VexEmNote warnXMM
2641 = amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state);
2642 VexEmNote warnX87
2643 = amd64g_dirtyhelper_XRSTOR_COMPONENT_0(gst, fp_state);
2645 /* Prefer an X87 emwarn over an XMM one, if both exist. */
2646 if (warnX87 != EmNote_NONE)
2647 return warnX87;
2648 else
2649 return warnXMM;
2653 /*---------------------------------------------------------------*/
2654 /*--- Supporting functions for FSAVE/FRSTOR ---*/
2655 /*---------------------------------------------------------------*/
2657 /* DIRTY HELPER (writes guest state) */
2658 /* Initialise the x87 FPU state as per 'finit'. */
2659 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
2661 Int i;
2662 gst->guest_FTOP = 0;
2663 for (i = 0; i < 8; i++) {
2664 gst->guest_FPTAG[i] = 0; /* empty */
2665 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
2667 gst->guest_FPROUND = (ULong)Irrm_NEAREST;
2668 gst->guest_FC3210 = 0;
2672 /* CALLED FROM GENERATED CODE */
2673 /* DIRTY HELPER (reads guest memory) */
2674 ULong amd64g_dirtyhelper_loadF80le ( Addr addrU )
2676 ULong f64;
2677 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
2678 return f64;
2681 /* CALLED FROM GENERATED CODE */
2682 /* DIRTY HELPER (writes guest memory) */
2683 void amd64g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
2685 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
2689 /* CALLED FROM GENERATED CODE */
2690 /* CLEAN HELPER */
2691 /* mxcsr[15:0] contains a SSE native format MXCSR value.
2692 Extract from it the required SSEROUND value and any resulting
2693 emulation warning, and return (warn << 32) | sseround value.
2695 ULong amd64g_check_ldmxcsr ( ULong mxcsr )
2697 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
2698 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2699 ULong rmode = (mxcsr >> 13) & 3;
2701 /* Detect any required emulation warnings. */
2702 VexEmNote ew = EmNote_NONE;
2704 if ((mxcsr & 0x1F80) != 0x1F80) {
2705 /* unmasked exceptions! */
2706 ew = EmWarn_X86_sseExns;
2708 else
2709 if (mxcsr & (1<<15)) {
2710 /* FZ is set */
2711 ew = EmWarn_X86_fz;
2713 else
2714 if (mxcsr & (1<<6)) {
2715 /* DAZ is set */
2716 ew = EmWarn_X86_daz;
2719 return (((ULong)ew) << 32) | ((ULong)rmode);
2723 /* CALLED FROM GENERATED CODE */
2724 /* CLEAN HELPER */
2725 /* Given sseround as an IRRoundingMode value, create a suitable SSE
2726 native format MXCSR value. */
2727 ULong amd64g_create_mxcsr ( ULong sseround )
2729 sseround &= 3;
2730 return 0x1F80 | (sseround << 13);
2734 /* CLEAN HELPER */
2735 /* fpucw[15:0] contains a x87 native format FPU control word.
2736 Extract from it the required FPROUND value and any resulting
2737 emulation warning, and return (warn << 32) | fpround value.
2739 ULong amd64g_check_fldcw ( ULong fpucw )
2741 /* Decide on a rounding mode. fpucw[11:10] holds it. */
2742 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2743 ULong rmode = (fpucw >> 10) & 3;
2745 /* Detect any required emulation warnings. */
2746 VexEmNote ew = EmNote_NONE;
2748 if ((fpucw & 0x3F) != 0x3F) {
2749 /* unmasked exceptions! */
2750 ew = EmWarn_X86_x87exns;
2752 else
2753 if (((fpucw >> 8) & 3) != 3) {
2754 /* unsupported precision */
2755 ew = EmWarn_X86_x87precision;
2758 return (((ULong)ew) << 32) | ((ULong)rmode);
2762 /* CLEAN HELPER */
2763 /* Given fpround as an IRRoundingMode value, create a suitable x87
2764 native format FPU control word. */
2765 ULong amd64g_create_fpucw ( ULong fpround )
2767 fpround &= 3;
2768 return 0x037F | (fpround << 10);
2772 /* This is used to implement 'fldenv'.
2773 Reads 28 bytes at x87_state[0 .. 27]. */
2774 /* CALLED FROM GENERATED CODE */
2775 /* DIRTY HELPER */
2776 VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
2777 /*IN*/HWord x87_state)
2779 return do_put_x87( False, (Fpu_State*)x87_state, vex_state );
2783 /* CALLED FROM GENERATED CODE */
2784 /* DIRTY HELPER */
2785 /* Create an x87 FPU env from the guest state, as close as we can
2786 approximate it. Writes 28 bytes at x87_state[0..27]. */
2787 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
2788 /*OUT*/HWord x87_state )
2790 Int i, stno, preg;
2791 UInt tagw;
2792 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2793 Fpu_State* x87 = (Fpu_State*)x87_state;
2794 UInt ftop = vex_state->guest_FTOP;
2795 ULong c3210 = vex_state->guest_FC3210;
2797 for (i = 0; i < 14; i++)
2798 x87->env[i] = 0;
2800 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
2801 x87->env[FP_ENV_STAT]
2802 = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
2803 x87->env[FP_ENV_CTRL]
2804 = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
2806 /* Compute the x87 tag word. */
2807 tagw = 0;
2808 for (stno = 0; stno < 8; stno++) {
2809 preg = (stno + ftop) & 7;
2810 if (vexTags[preg] == 0) {
2811 /* register is empty */
2812 tagw |= (3 << (2*preg));
2813 } else {
2814 /* register is full. */
2815 tagw |= (0 << (2*preg));
2818 x87->env[FP_ENV_TAG] = toUShort(tagw);
2820 /* We don't dump the x87 registers, tho. */
2824 /* This is used to implement 'fnsave'.
2825 Writes 108 bytes at x87_state[0 .. 107]. */
2826 /* CALLED FROM GENERATED CODE */
2827 /* DIRTY HELPER */
2828 void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state,
2829 /*OUT*/HWord x87_state)
2831 do_get_x87( vex_state, (Fpu_State*)x87_state );
2835 /* This is used to implement 'fnsaves'.
2836 Writes 94 bytes at x87_state[0 .. 93]. */
2837 /* CALLED FROM GENERATED CODE */
2838 /* DIRTY HELPER */
2839 void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state,
2840 /*OUT*/HWord x87_state)
2842 Int i, stno, preg;
2843 UInt tagw;
2844 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2845 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2846 Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
2847 UInt ftop = vex_state->guest_FTOP;
2848 UInt c3210 = vex_state->guest_FC3210;
2850 for (i = 0; i < 7; i++)
2851 x87->env[i] = 0;
2853 x87->env[FPS_ENV_STAT]
2854 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
2855 x87->env[FPS_ENV_CTRL]
2856 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
2858 /* Dump the register stack in ST order. */
2859 tagw = 0;
2860 for (stno = 0; stno < 8; stno++) {
2861 preg = (stno + ftop) & 7;
2862 if (vexTags[preg] == 0) {
2863 /* register is empty */
2864 tagw |= (3 << (2*preg));
2865 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2866 &x87->reg[10*stno] );
2867 } else {
2868 /* register is full. */
2869 tagw |= (0 << (2*preg));
2870 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2871 &x87->reg[10*stno] );
2874 x87->env[FPS_ENV_TAG] = toUShort(tagw);
2878 /* This is used to implement 'frstor'.
2879 Reads 108 bytes at x87_state[0 .. 107]. */
2880 /* CALLED FROM GENERATED CODE */
2881 /* DIRTY HELPER */
2882 VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state,
2883 /*IN*/HWord x87_state)
2885 return do_put_x87( True, (Fpu_State*)x87_state, vex_state );
2889 /* This is used to implement 'frstors'.
2890 Reads 94 bytes at x87_state[0 .. 93]. */
2891 /* CALLED FROM GENERATED CODE */
2892 /* DIRTY HELPER */
2893 VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state,
2894 /*IN*/HWord x87_state)
2896 Int stno, preg;
2897 UInt tag;
2898 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2899 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2900 Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
2901 UInt ftop = (x87->env[FPS_ENV_STAT] >> 11) & 7;
2902 UInt tagw = x87->env[FPS_ENV_TAG];
2903 UInt fpucw = x87->env[FPS_ENV_CTRL];
2904 UInt c3210 = x87->env[FPS_ENV_STAT] & 0x4700;
2905 VexEmNote ew;
2906 UInt fpround;
2907 ULong pair;
2909 /* Copy registers and tags */
2910 for (stno = 0; stno < 8; stno++) {
2911 preg = (stno + ftop) & 7;
2912 tag = (tagw >> (2*preg)) & 3;
2913 if (tag == 3) {
2914 /* register is empty */
2915 /* hmm, if it's empty, does it still get written? Probably
2916 safer to say it does. If we don't, memcheck could get out
2917 of sync, in that it thinks all FP registers are defined by
2918 this helper, but in reality some have not been updated. */
2919 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
2920 vexTags[preg] = 0;
2921 } else {
2922 /* register is non-empty */
2923 convert_f80le_to_f64le( &x87->reg[10*stno],
2924 (UChar*)&vexRegs[preg] );
2925 vexTags[preg] = 1;
2929 /* stack pointer */
2930 vex_state->guest_FTOP = ftop;
2932 /* status word */
2933 vex_state->guest_FC3210 = c3210;
2935 /* handle the control word, setting FPROUND and detecting any
2936 emulation warnings. */
2937 pair = amd64g_check_fldcw ( (ULong)fpucw );
2938 fpround = (UInt)pair & 0xFFFFFFFFULL;
2939 ew = (VexEmNote)(pair >> 32);
2941 vex_state->guest_FPROUND = fpround & 3;
2943 /* emulation warnings --> caller */
2944 return ew;
2948 /*---------------------------------------------------------------*/
2949 /*--- CPUID helpers. ---*/
2950 /*---------------------------------------------------------------*/
2952 /* Claim to be the following CPU, which is probably representative of
2953 the lowliest (earliest) amd64 offerings. It can do neither sse3
2954 nor cx16.
2956 vendor_id : AuthenticAMD
2957 cpu family : 15
2958 model : 5
2959 model name : AMD Opteron (tm) Processor 848
2960 stepping : 10
2961 cpu MHz : 1797.682
2962 cache size : 1024 KB
2963 fpu : yes
2964 fpu_exception : yes
2965 cpuid level : 1
2966 wp : yes
2967 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2968 mtrr pge mca cmov pat pse36 clflush mmx fxsr
2969 sse sse2 syscall nx mmxext lm 3dnowext 3dnow
2970 bogomips : 3600.62
2971 TLB size : 1088 4K pages
2972 clflush size : 64
2973 cache_alignment : 64
2974 address sizes : 40 bits physical, 48 bits virtual
2975 power management: ts fid vid ttp
2977 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact
2978 we don't support them. See #291568. 3dnow is 80000001.EDX.31
2979 and 3dnowext is 80000001.EDX.30.
2981 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
2983 # define SET_ABCD(_a,_b,_c,_d) \
2984 do { st->guest_RAX = (ULong)(_a); \
2985 st->guest_RBX = (ULong)(_b); \
2986 st->guest_RCX = (ULong)(_c); \
2987 st->guest_RDX = (ULong)(_d); \
2988 } while (0)
2990 switch (0xFFFFFFFF & st->guest_RAX) {
2991 case 0x00000000:
2992 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
2993 break;
2994 case 0x00000001:
2995 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
2996 break;
2997 case 0x80000000:
2998 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
2999 break;
3000 case 0x80000001:
3001 /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is
3002 the original it-is-supported value that the h/w provides.
3003 See #291568. */
3004 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/
3005 0x21d3fbff);
3006 break;
3007 case 0x80000002:
3008 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
3009 break;
3010 case 0x80000003:
3011 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
3012 break;
3013 case 0x80000004:
3014 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3015 break;
3016 case 0x80000005:
3017 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
3018 break;
3019 case 0x80000006:
3020 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
3021 break;
3022 case 0x80000007:
3023 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
3024 break;
3025 case 0x80000008:
3026 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
3027 break;
3028 default:
3029 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3030 break;
3032 # undef SET_ABCD
3036 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
3037 capable.
3039 vendor_id : GenuineIntel
3040 cpu family : 6
3041 model : 15
3042 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
3043 stepping : 6
3044 cpu MHz : 2394.000
3045 cache size : 4096 KB
3046 physical id : 0
3047 siblings : 2
3048 core id : 0
3049 cpu cores : 2
3050 fpu : yes
3051 fpu_exception : yes
3052 cpuid level : 10
3053 wp : yes
3054 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
3055 mtrr pge mca cmov pat pse36 clflush dts acpi
3056 mmx fxsr sse sse2 ss ht tm syscall nx lm
3057 constant_tsc pni monitor ds_cpl vmx est tm2
3058 cx16 xtpr lahf_lm
3059 bogomips : 4798.78
3060 clflush size : 64
3061 cache_alignment : 64
3062 address sizes : 36 bits physical, 48 bits virtual
3063 power management:
3065 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
3067 # define SET_ABCD(_a,_b,_c,_d) \
3068 do { st->guest_RAX = (ULong)(_a); \
3069 st->guest_RBX = (ULong)(_b); \
3070 st->guest_RCX = (ULong)(_c); \
3071 st->guest_RDX = (ULong)(_d); \
3072 } while (0)
3074 switch (0xFFFFFFFF & st->guest_RAX) {
3075 case 0x00000000:
3076 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
3077 break;
3078 case 0x00000001:
3079 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
3080 break;
3081 case 0x00000002:
3082 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
3083 break;
3084 case 0x00000003:
3085 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3086 break;
3087 case 0x00000004: {
3088 switch (0xFFFFFFFF & st->guest_RCX) {
3089 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
3090 0x0000003f, 0x00000001); break;
3091 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
3092 0x0000003f, 0x00000001); break;
3093 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
3094 0x00000fff, 0x00000001); break;
3095 default: SET_ABCD(0x00000000, 0x00000000,
3096 0x00000000, 0x00000000); break;
3098 break;
3100 case 0x00000005:
3101 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
3102 break;
3103 case 0x00000006:
3104 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
3105 break;
3106 case 0x00000007:
3107 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3108 break;
3109 case 0x00000008:
3110 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
3111 break;
3112 case 0x00000009:
3113 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3114 break;
3115 case 0x0000000a:
3116 unhandled_eax_value:
3117 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
3118 break;
3119 case 0x80000000:
3120 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3121 break;
3122 case 0x80000001:
3123 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
3124 break;
3125 case 0x80000002:
3126 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3127 break;
3128 case 0x80000003:
3129 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
3130 break;
3131 case 0x80000004:
3132 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
3133 break;
3134 case 0x80000005:
3135 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3136 break;
3137 case 0x80000006:
3138 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
3139 break;
3140 case 0x80000007:
3141 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3142 break;
3143 case 0x80000008:
3144 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3145 break;
3146 default:
3147 goto unhandled_eax_value;
3149 # undef SET_ABCD
3153 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
3154 capable.
3156 vendor_id : GenuineIntel
3157 cpu family : 6
3158 model : 37
3159 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz
3160 stepping : 2
3161 cpu MHz : 3334.000
3162 cache size : 4096 KB
3163 physical id : 0
3164 siblings : 4
3165 core id : 0
3166 cpu cores : 2
3167 apicid : 0
3168 initial apicid : 0
3169 fpu : yes
3170 fpu_exception : yes
3171 cpuid level : 11
3172 wp : yes
3173 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
3174 mtrr pge mca cmov pat pse36 clflush dts acpi
3175 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
3176 lm constant_tsc arch_perfmon pebs bts rep_good
3177 xtopology nonstop_tsc aperfmperf pni pclmulqdq
3178 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
3179 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
3180 arat tpr_shadow vnmi flexpriority ept vpid
3181 bogomips : 6957.57
3182 clflush size : 64
3183 cache_alignment : 64
3184 address sizes : 36 bits physical, 48 bits virtual
3185 power management:
3187 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
3189 # define SET_ABCD(_a,_b,_c,_d) \
3190 do { st->guest_RAX = (ULong)(_a); \
3191 st->guest_RBX = (ULong)(_b); \
3192 st->guest_RCX = (ULong)(_c); \
3193 st->guest_RDX = (ULong)(_d); \
3194 } while (0)
3196 UInt old_eax = (UInt)st->guest_RAX;
3197 UInt old_ecx = (UInt)st->guest_RCX;
3199 switch (old_eax) {
3200 case 0x00000000:
3201 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
3202 break;
3203 case 0x00000001:
3204 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff);
3205 break;
3206 case 0x00000002:
3207 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
3208 break;
3209 case 0x00000003:
3210 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3211 break;
3212 case 0x00000004:
3213 switch (old_ecx) {
3214 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3215 0x0000003f, 0x00000000); break;
3216 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
3217 0x0000007f, 0x00000000); break;
3218 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3219 0x000001ff, 0x00000000); break;
3220 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
3221 0x00000fff, 0x00000002); break;
3222 default: SET_ABCD(0x00000000, 0x00000000,
3223 0x00000000, 0x00000000); break;
3225 break;
3226 case 0x00000005:
3227 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
3228 break;
3229 case 0x00000006:
3230 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
3231 break;
3232 case 0x00000007:
3233 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3234 break;
3235 case 0x00000008:
3236 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3237 break;
3238 case 0x00000009:
3239 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3240 break;
3241 case 0x0000000a:
3242 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
3243 break;
3244 case 0x0000000b:
3245 switch (old_ecx) {
3246 case 0x00000000:
3247 SET_ABCD(0x00000001, 0x00000002,
3248 0x00000100, 0x00000000); break;
3249 case 0x00000001:
3250 SET_ABCD(0x00000004, 0x00000004,
3251 0x00000201, 0x00000000); break;
3252 default:
3253 SET_ABCD(0x00000000, 0x00000000,
3254 old_ecx, 0x00000000); break;
3256 break;
3257 case 0x0000000c:
3258 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
3259 break;
3260 case 0x0000000d:
3261 switch (old_ecx) {
3262 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
3263 0x00000100, 0x00000000); break;
3264 case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
3265 0x00000201, 0x00000000); break;
3266 default: SET_ABCD(0x00000000, 0x00000000,
3267 old_ecx, 0x00000000); break;
3269 break;
3270 case 0x80000000:
3271 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3272 break;
3273 case 0x80000001:
3274 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
3275 break;
3276 case 0x80000002:
3277 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3278 break;
3279 case 0x80000003:
3280 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
3281 break;
3282 case 0x80000004:
3283 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
3284 break;
3285 case 0x80000005:
3286 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3287 break;
3288 case 0x80000006:
3289 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3290 break;
3291 case 0x80000007:
3292 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3293 break;
3294 case 0x80000008:
3295 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3296 break;
3297 default:
3298 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
3299 break;
3301 # undef SET_ABCD
3305 /* Claim to be the following CPU (4 x ...), which is AVX and cx16
3306 capable. Plus (kludge!) it "supports" HTM.
3308 Also with the following change: claim that XSaveOpt is not
3309 available, by cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1
3310 on the real CPU. Consequently, programs that correctly observe
3311 these CPUID values should only try to use 3 of the 8 XSave-family
3312 instructions: XGETBV, XSAVE and XRSTOR. In particular this avoids
3313 having to implement the compacted or optimised save/restore
3314 variants.
3316 vendor_id : GenuineIntel
3317 cpu family : 6
3318 model : 42
3319 model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz
3320 stepping : 7
3321 cpu MHz : 1600.000
3322 cache size : 6144 KB
3323 physical id : 0
3324 siblings : 4
3325 core id : 3
3326 cpu cores : 4
3327 apicid : 6
3328 initial apicid : 6
3329 fpu : yes
3330 fpu_exception : yes
3331 cpuid level : 13
3332 wp : yes
3333 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
3334 mtrr pge mca cmov pat pse36 clflush dts acpi
3335 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
3336 lm constant_tsc arch_perfmon pebs bts rep_good
3337 nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq
3338 dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16
3339 xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx
3340 lahf_lm ida arat epb xsaveopt pln pts dts
3341 tpr_shadow vnmi flexpriority ept vpid
3343 bogomips : 5768.94
3344 clflush size : 64
3345 cache_alignment : 64
3346 address sizes : 36 bits physical, 48 bits virtual
3347 power management:
3349 void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st,
3350 ULong hasF16C, ULong hasRDRAND,
3351 ULong hasRDSEED )
3353 vassert((hasF16C >> 1) == 0ULL);
3354 vassert((hasRDRAND >> 1) == 0ULL);
3355 # define SET_ABCD(_a,_b,_c,_d) \
3356 do { st->guest_RAX = (ULong)(_a); \
3357 st->guest_RBX = (ULong)(_b); \
3358 st->guest_RCX = (ULong)(_c); \
3359 st->guest_RDX = (ULong)(_d); \
3360 } while (0)
3362 UInt old_eax = (UInt)st->guest_RAX;
3363 UInt old_ecx = (UInt)st->guest_RCX;
3365 switch (old_eax) {
3366 case 0x00000000:
3367 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
3368 break;
3369 case 0x00000001: {
3370 // As a baseline, advertise neither F16C (ecx:29) nor RDRAND (ecx:30),
3371 // but patch in support for them as directed by the caller.
3372 UInt ecx_extra
3373 = (hasF16C ? (1U << 29) : 0) | (hasRDRAND ? (1U << 30) : 0);
3374 SET_ABCD(0x000206a7, 0x00100800, (0x1f9ae3bf | ecx_extra), 0xbfebfbff);
3375 break;
3377 case 0x00000002:
3378 SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000);
3379 break;
3380 case 0x00000003:
3381 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3382 break;
3383 case 0x00000004:
3384 switch (old_ecx) {
3385 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3386 0x0000003f, 0x00000000); break;
3387 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
3388 0x0000003f, 0x00000000); break;
3389 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3390 0x000001ff, 0x00000000); break;
3391 case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f,
3392 0x00001fff, 0x00000006); break;
3393 default: SET_ABCD(0x00000000, 0x00000000,
3394 0x00000000, 0x00000000); break;
3396 break;
3397 case 0x00000005:
3398 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
3399 break;
3400 case 0x00000006:
3401 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
3402 break;
3403 case 0x00000007: {
3404 UInt ebx_extra = 0;
3405 if (old_ecx == 0)
3406 ebx_extra = hasRDSEED ? (1U << 18) : 0;
3407 SET_ABCD(0x00000000, 0x00000800 | ebx_extra, 0x00000000,
3408 0x00000000);
3409 break;
3411 case 0x00000008:
3412 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3413 break;
3414 case 0x00000009:
3415 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3416 break;
3417 case 0x0000000a:
3418 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
3419 break;
3420 case 0x0000000b:
3421 switch (old_ecx) {
3422 case 0x00000000:
3423 SET_ABCD(0x00000001, 0x00000001,
3424 0x00000100, 0x00000000); break;
3425 case 0x00000001:
3426 SET_ABCD(0x00000004, 0x00000004,
3427 0x00000201, 0x00000000); break;
3428 default:
3429 SET_ABCD(0x00000000, 0x00000000,
3430 old_ecx, 0x00000000); break;
3432 break;
3433 case 0x0000000c:
3434 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3435 break;
3436 case 0x0000000d:
3437 switch (old_ecx) {
3438 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
3439 0x00000340, 0x00000000); break;
3440 case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
3441 0x00000000, 0x00000000); break;
3442 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
3443 0x00000000, 0x00000000); break;
3444 default: SET_ABCD(0x00000000, 0x00000000,
3445 0x00000000, 0x00000000); break;
3447 break;
3448 case 0x0000000e:
3449 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3450 break;
3451 case 0x0000000f:
3452 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3453 break;
3454 case 0x80000000:
3455 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3456 break;
3457 case 0x80000001:
3458 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
3459 break;
3460 case 0x80000002:
3461 SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c);
3462 break;
3463 case 0x80000003:
3464 SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d);
3465 break;
3466 case 0x80000004:
3467 SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847);
3468 break;
3469 case 0x80000005:
3470 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3471 break;
3472 case 0x80000006:
3473 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3474 break;
3475 case 0x80000007:
3476 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3477 break;
3478 case 0x80000008:
3479 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3480 break;
3481 default:
3482 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3483 break;
3485 # undef SET_ABCD
3489 /* Claim to be the following CPU (4 x ...), which is AVX2 capable.
3491 With the following change: claim that XSaveOpt is not available, by
3492 cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1 on the real
3493 CPU. Consequently, programs that correctly observe these CPUID
3494 values should only try to use 3 of the 8 XSave-family instructions:
3495 XGETBV, XSAVE and XRSTOR. In particular this avoids having to
3496 implement the compacted or optimised save/restore variants.
3498 vendor_id : GenuineIntel
3499 cpu family : 6
3500 model : 60
3501 model name : Intel(R) Core(TM) i7-4910MQ CPU @ 2.90GHz
3502 stepping : 3
3503 microcode : 0x1c
3504 cpu MHz : 919.957
3505 cache size : 8192 KB
3506 physical id : 0
3507 siblings : 4
3508 core id : 3
3509 cpu cores : 4
3510 apicid : 6
3511 initial apicid : 6
3512 fpu : yes
3513 fpu_exception : yes
3514 cpuid level : 13
3515 wp : yes
3516 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca
3517 cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
3518 tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc
3519 arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc
3520 aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl
3521 vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1
3522 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave
3523 avx f16c rdrand lahf_lm abm ida arat epb pln pts dtherm
3524 tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust
3525 bmi1 avx2 smep bmi2 erms invpcid xsaveopt
3526 bugs :
3527 bogomips : 5786.68
3528 clflush size : 64
3529 cache_alignment : 64
3530 address sizes : 39 bits physical, 48 bits virtual
3531 power management:
3533 void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st,
3534 ULong hasF16C, ULong hasRDRAND,
3535 ULong hasRDSEED )
3537 vassert((hasF16C >> 1) == 0ULL);
3538 vassert((hasRDRAND >> 1) == 0ULL);
3539 # define SET_ABCD(_a,_b,_c,_d) \
3540 do { st->guest_RAX = (ULong)(_a); \
3541 st->guest_RBX = (ULong)(_b); \
3542 st->guest_RCX = (ULong)(_c); \
3543 st->guest_RDX = (ULong)(_d); \
3544 } while (0)
3546 UInt old_eax = (UInt)st->guest_RAX;
3547 UInt old_ecx = (UInt)st->guest_RCX;
3549 switch (old_eax) {
3550 case 0x00000000:
3551 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
3552 break;
3553 case 0x00000001: {
3554 // As a baseline, advertise neither F16C (ecx:29) nor RDRAND (ecx:30),
3555 // but patch in support for them as directed by the caller.
3556 UInt ecx_extra
3557 = (hasF16C ? (1U << 29) : 0) | (hasRDRAND ? (1U << 30) : 0);
3558 SET_ABCD(0x000306c3, 0x02100800, (0x1ffafbff | ecx_extra), 0xbfebfbff);
3559 break;
3561 case 0x00000002:
3562 SET_ABCD(0x76036301, 0x00f0b6ff, 0x00000000, 0x00c10000);
3563 break;
3564 case 0x00000003:
3565 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3566 break;
3567 case 0x00000004:
3568 switch (old_ecx) {
3569 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3570 0x0000003f, 0x00000000); break;
3571 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
3572 0x0000003f, 0x00000000); break;
3573 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3574 0x000001ff, 0x00000000); break;
3575 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
3576 0x00001fff, 0x00000006); break;
3577 default: SET_ABCD(0x00000000, 0x00000000,
3578 0x00000000, 0x00000000); break;
3580 break;
3581 case 0x00000005:
3582 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00042120);
3583 break;
3584 case 0x00000006:
3585 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
3586 break;
3587 case 0x00000007:
3588 switch (old_ecx) {
3589 /* Don't advertise FSGSBASE support, bit 0 in EBX. */
3591 case 0x00000000: {
3592 UInt ebx_extra = hasRDSEED ? (1U << 18) : 0;
3593 SET_ABCD(0x00000000, 0x000027aa | ebx_extra,
3594 0x00000000, 0x00000000); break;
3596 default: SET_ABCD(0x00000000, 0x00000000,
3597 0x00000000, 0x00000000); break;
3599 break;
3600 case 0x00000008:
3601 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3602 break;
3603 case 0x00000009:
3604 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3605 break;
3606 case 0x0000000a:
3607 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
3608 break;
3609 case 0x0000000b:
3610 switch (old_ecx) {
3611 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
3612 0x00000100, 0x00000002); break;
3613 case 0x00000001: SET_ABCD(0x00000004, 0x00000008,
3614 0x00000201, 0x00000002); break;
3615 default: SET_ABCD(0x00000000, 0x00000000,
3616 old_ecx, 0x00000002); break;
3618 break;
3619 case 0x0000000c:
3620 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3621 break;
3622 case 0x0000000d:
3623 switch (old_ecx) {
3624 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
3625 0x00000340, 0x00000000); break;
3626 case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
3627 0x00000000, 0x00000000); break;
3628 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
3629 0x00000000, 0x00000000); break;
3630 default: SET_ABCD(0x00000000, 0x00000000,
3631 0x00000000, 0x00000000); break;
3633 break;
3634 case 0x80000000:
3635 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3636 break;
3637 case 0x80000001:
3638 SET_ABCD(0x00000000, 0x00000000, 0x00000021, 0x2c100800);
3639 break;
3640 case 0x80000002:
3641 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3642 break;
3643 case 0x80000003:
3644 SET_ABCD(0x37692029, 0x3139342d, 0x20514d30, 0x20555043);
3645 break;
3646 case 0x80000004:
3647 SET_ABCD(0x2e322040, 0x48473039, 0x0000007a, 0x00000000);
3648 break;
3649 case 0x80000005:
3650 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3651 break;
3652 case 0x80000006:
3653 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3654 break;
3655 case 0x80000007:
3656 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3657 break;
3658 case 0x80000008:
3659 SET_ABCD(0x00003027, 0x00000000, 0x00000000, 0x00000000);
3660 break;
3661 default:
3662 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3663 break;
3665 # undef SET_ABCD
3669 /*---------------------------------------------------------------*/
3670 /*--- Misc integer helpers, including rotates and crypto. ---*/
3671 /*---------------------------------------------------------------*/
3673 ULong amd64g_calculate_RCR ( ULong arg,
3674 ULong rot_amt,
3675 ULong rflags_in,
3676 Long szIN )
3678 Bool wantRflags = toBool(szIN < 0);
3679 ULong sz = wantRflags ? (-szIN) : szIN;
3680 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
3681 ULong cf=0, of=0, tempcf;
3683 switch (sz) {
3684 case 8:
3685 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3686 of = ((arg >> 63) ^ cf) & 1;
3687 while (tempCOUNT > 0) {
3688 tempcf = arg & 1;
3689 arg = (arg >> 1) | (cf << 63);
3690 cf = tempcf;
3691 tempCOUNT--;
3693 break;
3694 case 4:
3695 while (tempCOUNT >= 33) tempCOUNT -= 33;
3696 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3697 of = ((arg >> 31) ^ cf) & 1;
3698 while (tempCOUNT > 0) {
3699 tempcf = arg & 1;
3700 arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
3701 cf = tempcf;
3702 tempCOUNT--;
3704 break;
3705 case 2:
3706 while (tempCOUNT >= 17) tempCOUNT -= 17;
3707 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3708 of = ((arg >> 15) ^ cf) & 1;
3709 while (tempCOUNT > 0) {
3710 tempcf = arg & 1;
3711 arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
3712 cf = tempcf;
3713 tempCOUNT--;
3715 break;
3716 case 1:
3717 while (tempCOUNT >= 9) tempCOUNT -= 9;
3718 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3719 of = ((arg >> 7) ^ cf) & 1;
3720 while (tempCOUNT > 0) {
3721 tempcf = arg & 1;
3722 arg = ((arg >> 1) & 0x7FULL) | (cf << 7);
3723 cf = tempcf;
3724 tempCOUNT--;
3726 break;
3727 default:
3728 vpanic("calculate_RCR(amd64g): invalid size");
3731 cf &= 1;
3732 of &= 1;
3733 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
3734 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
3736 /* caller can ask to have back either the resulting flags or
3737 resulting value, but not both */
3738 return wantRflags ? rflags_in : arg;
3741 ULong amd64g_calculate_RCL ( ULong arg,
3742 ULong rot_amt,
3743 ULong rflags_in,
3744 Long szIN )
3746 Bool wantRflags = toBool(szIN < 0);
3747 ULong sz = wantRflags ? (-szIN) : szIN;
3748 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
3749 ULong cf=0, of=0, tempcf;
3751 switch (sz) {
3752 case 8:
3753 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3754 while (tempCOUNT > 0) {
3755 tempcf = (arg >> 63) & 1;
3756 arg = (arg << 1) | (cf & 1);
3757 cf = tempcf;
3758 tempCOUNT--;
3760 of = ((arg >> 63) ^ cf) & 1;
3761 break;
3762 case 4:
3763 while (tempCOUNT >= 33) tempCOUNT -= 33;
3764 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3765 while (tempCOUNT > 0) {
3766 tempcf = (arg >> 31) & 1;
3767 arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
3768 cf = tempcf;
3769 tempCOUNT--;
3771 of = ((arg >> 31) ^ cf) & 1;
3772 break;
3773 case 2:
3774 while (tempCOUNT >= 17) tempCOUNT -= 17;
3775 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3776 while (tempCOUNT > 0) {
3777 tempcf = (arg >> 15) & 1;
3778 arg = 0xFFFFULL & ((arg << 1) | (cf & 1));
3779 cf = tempcf;
3780 tempCOUNT--;
3782 of = ((arg >> 15) ^ cf) & 1;
3783 break;
3784 case 1:
3785 while (tempCOUNT >= 9) tempCOUNT -= 9;
3786 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3787 while (tempCOUNT > 0) {
3788 tempcf = (arg >> 7) & 1;
3789 arg = 0xFFULL & ((arg << 1) | (cf & 1));
3790 cf = tempcf;
3791 tempCOUNT--;
3793 of = ((arg >> 7) ^ cf) & 1;
3794 break;
3795 default:
3796 vpanic("calculate_RCL(amd64g): invalid size");
3799 cf &= 1;
3800 of &= 1;
3801 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
3802 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
3804 return wantRflags ? rflags_in : arg;
3807 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
3808 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
3810 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
3812 ULong hi, lo, tmp, A[16];
3814 A[0] = 0; A[1] = a;
3815 A[2] = A[1] << 1; A[3] = A[2] ^ a;
3816 A[4] = A[2] << 1; A[5] = A[4] ^ a;
3817 A[6] = A[3] << 1; A[7] = A[6] ^ a;
3818 A[8] = A[4] << 1; A[9] = A[8] ^ a;
3819 A[10] = A[5] << 1; A[11] = A[10] ^ a;
3820 A[12] = A[6] << 1; A[13] = A[12] ^ a;
3821 A[14] = A[7] << 1; A[15] = A[14] ^ a;
3823 lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
3824 hi = lo >> 56;
3825 lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
3826 hi = (hi << 8) | (lo >> 56);
3827 lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
3828 hi = (hi << 8) | (lo >> 56);
3829 lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
3830 hi = (hi << 8) | (lo >> 56);
3831 lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
3832 hi = (hi << 8) | (lo >> 56);
3833 lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
3834 hi = (hi << 8) | (lo >> 56);
3835 lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
3836 hi = (hi << 8) | (lo >> 56);
3837 lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
3839 ULong m0 = -1;
3840 m0 /= 255;
3841 tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
3842 tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
3843 tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
3844 tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
3845 tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
3846 tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
3847 tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
3849 return which ? hi : lo;
3853 /* CALLED FROM GENERATED CODE */
3854 /* DIRTY HELPER (non-referentially-transparent) */
3855 /* Horrible hack. On non-amd64 platforms, return 1. */
3856 ULong amd64g_dirtyhelper_RDTSC ( void )
3858 # if defined(__x86_64__)
3859 UInt eax, edx;
3860 __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
3861 return (((ULong)edx) << 32) | ((ULong)eax);
3862 # else
3863 return 1ULL;
3864 # endif
3867 /* CALLED FROM GENERATED CODE */
3868 /* DIRTY HELPER (non-referentially-transparent) */
3869 /* Horrible hack. On non-amd64 platforms, return 1. */
3870 /* This uses a different calling convention from _RDTSC just above
3871 only because of the difficulty of returning 96 bits from a C
3872 function -- RDTSC returns 64 bits and so is simple by comparison,
3873 on amd64. */
3874 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st )
3876 # if defined(__x86_64__)
3877 UInt eax, ecx, edx;
3878 __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx));
3879 st->guest_RAX = (ULong)eax;
3880 st->guest_RCX = (ULong)ecx;
3881 st->guest_RDX = (ULong)edx;
3882 # else
3883 /* Do nothing. */
3884 # endif
3887 /* CALLED FROM GENERATED CODE */
3888 /* DIRTY HELPER (non-referentially-transparent) */
3889 /* Horrible hack. On non-amd64 platforms, return 0. */
3890 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
3892 # if defined(__x86_64__)
3893 ULong r = 0;
3894 portno &= 0xFFFF;
3895 switch (sz) {
3896 case 4:
3897 __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
3898 : "=a" (r) : "Nd" (portno));
3899 break;
3900 case 2:
3901 __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
3902 : "=a" (r) : "Nd" (portno));
3903 break;
3904 case 1:
3905 __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
3906 : "=a" (r) : "Nd" (portno));
3907 break;
3908 default:
3909 break; /* note: no 64-bit version of insn exists */
3911 return r;
3912 # else
3913 return 0;
3914 # endif
3918 /* CALLED FROM GENERATED CODE */
3919 /* DIRTY HELPER (non-referentially-transparent) */
3920 /* Horrible hack. On non-amd64 platforms, do nothing. */
3921 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
3923 # if defined(__x86_64__)
3924 portno &= 0xFFFF;
3925 switch (sz) {
3926 case 4:
3927 __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
3928 : : "a" (data), "Nd" (portno));
3929 break;
3930 case 2:
3931 __asm__ __volatile__("outw %w0, %w1"
3932 : : "a" (data), "Nd" (portno));
3933 break;
3934 case 1:
3935 __asm__ __volatile__("outb %b0, %w1"
3936 : : "a" (data), "Nd" (portno));
3937 break;
3938 default:
3939 break; /* note: no 64-bit version of insn exists */
3941 # else
3942 /* do nothing */
3943 # endif
3946 /* CALLED FROM GENERATED CODE */
3947 /* DIRTY HELPER (non-referentially-transparent) */
3948 /* Horrible hack. On non-amd64 platforms, do nothing. */
3949 /* op = 0: call the native SGDT instruction.
3950 op = 1: call the native SIDT instruction.
3952 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
3953 # if defined(__x86_64__)
3954 switch (op) {
3955 case 0:
3956 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
3957 break;
3958 case 1:
3959 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
3960 break;
3961 default:
3962 vpanic("amd64g_dirtyhelper_SxDT");
3964 # else
3965 /* do nothing */
3966 UChar* p = (UChar*)address;
3967 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
3968 p[6] = p[7] = p[8] = p[9] = 0;
3969 # endif
3972 /* CALLED FROM GENERATED CODE */
3973 /* DIRTY HELPER (non-referentially-transparent) */
3974 /* Horrible hack. On non-amd64 platforms, do nothing. On amd64 targets, get a
3975 32 bit random number using RDRAND, and return it and the associated rflags.C
3976 value. */
3977 ULong amd64g_dirtyhelper_RDRAND ( void ) {
3978 # if defined(__x86_64__)
3979 ULong res = 0;
3980 ULong cflag = 0;
3981 __asm__ __volatile__(
3982 "movq $0, %%r11 ; "
3983 "movq $0, %%r12 ; "
3984 "rdrand %%r11d ; "
3985 "setc %%r12b ; "
3986 "movq %%r11, %0 ; "
3987 "movq %%r12, %1"
3988 : "=r"(res), "=r"(cflag) : : "r11", "r12"
3990 res &= 0xFFFFFFFFULL;
3991 cflag &= 1ULL;
3992 return (cflag << 32) | res;
3993 # else
3994 /* There's nothing we can sensibly do. Return a value denoting
3995 "I succeeded, and the random bits are all zero" :-/ */
3996 return 1ULL << 32;
3997 # endif
4000 ULong amd64g_dirtyhelper_RDSEED ( void ) {
4001 # if defined(__x86_64__)
4002 ULong res = 0;
4003 ULong cflag = 0;
4004 __asm__ __volatile__(
4005 "movq $0, %%r11 ; "
4006 "movq $0, %%r12 ; "
4007 "rdseed %%r11d ; "
4008 "setc %%r12b ; "
4009 "movq %%r11, %0 ; "
4010 "movq %%r12, %1"
4011 : "=r"(res), "=r"(cflag) : : "r11", "r12"
4013 res &= 0xFFFFFFFFULL;
4014 cflag &= 1ULL;
4015 return (cflag << 32) | res;
4016 # else
4017 /* There's nothing we can sensibly do. Return a value denoting
4018 "I succeeded, and the random bits are all zero" :-/ */
4019 return 1ULL << 32;
4020 # endif
4023 /*---------------------------------------------------------------*/
4024 /*--- Helpers for MMX/SSE/SSE2. ---*/
4025 /*---------------------------------------------------------------*/
4027 static inline UChar abdU8 ( UChar xx, UChar yy ) {
4028 return toUChar(xx>yy ? xx-yy : yy-xx);
4031 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
4032 return (((ULong)w1) << 32) | ((ULong)w0);
4035 static inline UShort sel16x4_3 ( ULong w64 ) {
4036 UInt hi32 = toUInt(w64 >> 32);
4037 return toUShort(hi32 >> 16);
4039 static inline UShort sel16x4_2 ( ULong w64 ) {
4040 UInt hi32 = toUInt(w64 >> 32);
4041 return toUShort(hi32);
4043 static inline UShort sel16x4_1 ( ULong w64 ) {
4044 UInt lo32 = toUInt(w64);
4045 return toUShort(lo32 >> 16);
4047 static inline UShort sel16x4_0 ( ULong w64 ) {
4048 UInt lo32 = toUInt(w64);
4049 return toUShort(lo32);
4052 static inline UChar sel8x8_7 ( ULong w64 ) {
4053 UInt hi32 = toUInt(w64 >> 32);
4054 return toUChar(hi32 >> 24);
4056 static inline UChar sel8x8_6 ( ULong w64 ) {
4057 UInt hi32 = toUInt(w64 >> 32);
4058 return toUChar(hi32 >> 16);
4060 static inline UChar sel8x8_5 ( ULong w64 ) {
4061 UInt hi32 = toUInt(w64 >> 32);
4062 return toUChar(hi32 >> 8);
4064 static inline UChar sel8x8_4 ( ULong w64 ) {
4065 UInt hi32 = toUInt(w64 >> 32);
4066 return toUChar(hi32 >> 0);
4068 static inline UChar sel8x8_3 ( ULong w64 ) {
4069 UInt lo32 = toUInt(w64);
4070 return toUChar(lo32 >> 24);
4072 static inline UChar sel8x8_2 ( ULong w64 ) {
4073 UInt lo32 = toUInt(w64);
4074 return toUChar(lo32 >> 16);
4076 static inline UChar sel8x8_1 ( ULong w64 ) {
4077 UInt lo32 = toUInt(w64);
4078 return toUChar(lo32 >> 8);
4080 static inline UChar sel8x8_0 ( ULong w64 ) {
4081 UInt lo32 = toUInt(w64);
4082 return toUChar(lo32 >> 0);
4085 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4086 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
4088 return
4089 mk32x2(
4090 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
4091 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
4092 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
4093 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
4097 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4098 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
4100 UInt t = 0;
4101 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
4102 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
4103 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
4104 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
4105 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
4106 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
4107 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
4108 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
4109 t &= 0xFFFF;
4110 return (ULong)t;
4113 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4114 ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi )
4116 UShort t, min;
4117 UInt idx;
4118 t = sel16x4_0(sLo); if (True) { min = t; idx = 0; }
4119 t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; }
4120 t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; }
4121 t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; }
4122 t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; }
4123 t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; }
4124 t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; }
4125 t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; }
4126 return ((ULong)(idx << 16)) | ((ULong)min);
4129 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4130 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
4132 UInt i;
4133 ULong crc = (b & 0xFFULL) ^ crcIn;
4134 for (i = 0; i < 8; i++)
4135 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
4136 return crc;
4139 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4140 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w )
4142 UInt i;
4143 ULong crc = (w & 0xFFFFULL) ^ crcIn;
4144 for (i = 0; i < 16; i++)
4145 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
4146 return crc;
4149 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4150 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l )
4152 UInt i;
4153 ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn;
4154 for (i = 0; i < 32; i++)
4155 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
4156 return crc;
4159 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4160 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q )
4162 ULong crc = amd64g_calc_crc32l(crcIn, q);
4163 return amd64g_calc_crc32l(crc, q >> 32);
4167 /* .. helper for next fn .. */
4168 static inline ULong sad_8x4 ( ULong xx, ULong yy )
4170 UInt t = 0;
4171 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
4172 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
4173 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
4174 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
4175 return (ULong)t;
4178 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4179 ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo,
4180 ULong dHi, ULong dLo,
4181 ULong imm_and_return_control_bit )
4183 UInt imm8 = imm_and_return_control_bit & 7;
4184 Bool calcHi = (imm_and_return_control_bit >> 7) & 1;
4185 UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */
4186 UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */
4187 /* For src we only need 32 bits, so get them into the
4188 lower half of a 64 bit word. */
4189 ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1));
4190 /* For dst we need to get hold of 56 bits (7 bytes) from a total of
4191 11 bytes. If calculating the low part of the result, need bytes
4192 dstOffsL * 4 + (0 .. 6); if calculating the high part,
4193 dstOffsL * 4 + (4 .. 10). */
4194 ULong dst;
4195 /* dstOffL = 0, Lo -> 0 .. 6
4196 dstOffL = 1, Lo -> 4 .. 10
4197 dstOffL = 0, Hi -> 4 .. 10
4198 dstOffL = 1, Hi -> 8 .. 14
4200 if (calcHi && dstOffsL) {
4201 /* 8 .. 14 */
4202 dst = dHi & 0x00FFFFFFFFFFFFFFULL;
4204 else if (!calcHi && !dstOffsL) {
4205 /* 0 .. 6 */
4206 dst = dLo & 0x00FFFFFFFFFFFFFFULL;
4208 else {
4209 /* 4 .. 10 */
4210 dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32);
4212 ULong r0 = sad_8x4( dst >> 0, src );
4213 ULong r1 = sad_8x4( dst >> 8, src );
4214 ULong r2 = sad_8x4( dst >> 16, src );
4215 ULong r3 = sad_8x4( dst >> 24, src );
4216 ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0;
4217 return res;
4220 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4221 ULong amd64g_calculate_pext ( ULong src_masked, ULong mask )
4223 ULong dst = 0;
4224 ULong src_bit;
4225 ULong dst_bit = 1;
4226 for (src_bit = 1; src_bit; src_bit <<= 1) {
4227 if (mask & src_bit) {
4228 if (src_masked & src_bit) dst |= dst_bit;
4229 dst_bit <<= 1;
4232 return dst;
4235 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4236 ULong amd64g_calculate_pdep ( ULong src, ULong mask )
4238 ULong dst = 0;
4239 ULong dst_bit;
4240 ULong src_bit = 1;
4241 for (dst_bit = 1; dst_bit; dst_bit <<= 1) {
4242 if (mask & dst_bit) {
4243 if (src & src_bit) dst |= dst_bit;
4244 src_bit <<= 1;
4247 return dst;
4250 /*---------------------------------------------------------------*/
4251 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/
4252 /*---------------------------------------------------------------*/
4254 static UInt zmask_from_V128 ( V128* arg )
4256 UInt i, res = 0;
4257 for (i = 0; i < 16; i++) {
4258 res |= ((arg->w8[i] == 0) ? 1 : 0) << i;
4260 return res;
4263 static UInt zmask_from_V128_wide ( V128* arg )
4265 UInt i, res = 0;
4266 for (i = 0; i < 8; i++) {
4267 res |= ((arg->w16[i] == 0) ? 1 : 0) << i;
4269 return res;
4272 /* Helps with PCMP{I,E}STR{I,M}.
4274 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
4275 actually it could be a clean helper, but for the fact that we can't
4276 pass by value 2 x V128 to a clean helper, nor have one returned.)
4277 Reads guest state, writes to guest state for the xSTRM cases, no
4278 accesses of memory, is a pure function.
4280 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
4281 the callee knows which I/E and I/M variant it is dealing with and
4282 what the specific operation is. 4th byte of opcode is in the range
4283 0x60 to 0x63:
4284 istri 66 0F 3A 63
4285 istrm 66 0F 3A 62
4286 estri 66 0F 3A 61
4287 estrm 66 0F 3A 60
4289 gstOffL and gstOffR are the guest state offsets for the two XMM
4290 register inputs. We never have to deal with the memory case since
4291 that is handled by pre-loading the relevant value into the fake
4292 XMM16 register.
4294 For ESTRx variants, edxIN and eaxIN hold the values of those two
4295 registers.
4297 In all cases, the bottom 16 bits of the result contain the new
4298 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
4299 result hold the new %ecx value. For xSTRM variants, the helper
4300 writes the result directly to the guest XMM0.
4302 Declarable side effects: in all cases, reads guest state at
4303 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
4304 guest_XMM0.
4306 Is expected to be called with opc_and_imm combinations which have
4307 actually been validated, and will assert if otherwise. The front
4308 end should ensure we're only called with verified values.
4310 ULong amd64g_dirtyhelper_PCMPxSTRx (
4311 VexGuestAMD64State* gst,
4312 HWord opc4_and_imm,
4313 HWord gstOffL, HWord gstOffR,
4314 HWord edxIN, HWord eaxIN
4317 HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
4318 HWord imm8 = opc4_and_imm & 0xFF;
4319 HWord isISTRx = opc4 & 2;
4320 HWord isxSTRM = (opc4 & 1) ^ 1;
4321 vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
4322 HWord wide = (imm8 & 1);
4324 // where the args are
4325 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
4326 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
4328 /* Create the arg validity masks, either from the vectors
4329 themselves or from the supplied edx/eax values. */
4330 // FIXME: this is only right for the 8-bit data cases.
4331 // At least that is asserted above.
4332 UInt zmaskL, zmaskR;
4334 // temp spot for the resulting flags and vector.
4335 V128 resV;
4336 UInt resOSZACP;
4338 // for checking whether case was handled
4339 Bool ok = False;
4341 if (wide) {
4342 if (isISTRx) {
4343 zmaskL = zmask_from_V128_wide(argL);
4344 zmaskR = zmask_from_V128_wide(argR);
4345 } else {
4346 Int tmp;
4347 tmp = edxIN & 0xFFFFFFFF;
4348 if (tmp < -8) tmp = -8;
4349 if (tmp > 8) tmp = 8;
4350 if (tmp < 0) tmp = -tmp;
4351 vassert(tmp >= 0 && tmp <= 8);
4352 zmaskL = (1 << tmp) & 0xFF;
4353 tmp = eaxIN & 0xFFFFFFFF;
4354 if (tmp < -8) tmp = -8;
4355 if (tmp > 8) tmp = 8;
4356 if (tmp < 0) tmp = -tmp;
4357 vassert(tmp >= 0 && tmp <= 8);
4358 zmaskR = (1 << tmp) & 0xFF;
4360 // do the meyaath
4361 ok = compute_PCMPxSTRx_wide (
4362 &resV, &resOSZACP, argL, argR,
4363 zmaskL, zmaskR, imm8, (Bool)isxSTRM
4365 } else {
4366 if (isISTRx) {
4367 zmaskL = zmask_from_V128(argL);
4368 zmaskR = zmask_from_V128(argR);
4369 } else {
4370 Int tmp;
4371 tmp = edxIN & 0xFFFFFFFF;
4372 if (tmp < -16) tmp = -16;
4373 if (tmp > 16) tmp = 16;
4374 if (tmp < 0) tmp = -tmp;
4375 vassert(tmp >= 0 && tmp <= 16);
4376 zmaskL = (1 << tmp) & 0xFFFF;
4377 tmp = eaxIN & 0xFFFFFFFF;
4378 if (tmp < -16) tmp = -16;
4379 if (tmp > 16) tmp = 16;
4380 if (tmp < 0) tmp = -tmp;
4381 vassert(tmp >= 0 && tmp <= 16);
4382 zmaskR = (1 << tmp) & 0xFFFF;
4384 // do the meyaath
4385 ok = compute_PCMPxSTRx (
4386 &resV, &resOSZACP, argL, argR,
4387 zmaskL, zmaskR, imm8, (Bool)isxSTRM
4391 // front end shouldn't pass us any imm8 variants we can't
4392 // handle. Hence:
4393 vassert(ok);
4395 // So, finally we need to get the results back to the caller.
4396 // In all cases, the new OSZACP value is the lowest 16 of
4397 // the return value.
4398 if (isxSTRM) {
4399 gst->guest_YMM0[0] = resV.w32[0];
4400 gst->guest_YMM0[1] = resV.w32[1];
4401 gst->guest_YMM0[2] = resV.w32[2];
4402 gst->guest_YMM0[3] = resV.w32[3];
4403 return resOSZACP & 0x8D5;
4404 } else {
4405 UInt newECX = resV.w32[0] & 0xFFFF;
4406 return (newECX << 16) | (resOSZACP & 0x8D5);
4410 /*---------------------------------------------------------------*/
4411 /*--- AES primitives and helpers ---*/
4412 /*---------------------------------------------------------------*/
4413 /* a 16 x 16 matrix */
4414 static const UChar sbox[256] = { // row nr
4415 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1
4416 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
4417 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2
4418 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
4419 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3
4420 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
4421 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4
4422 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
4423 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5
4424 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
4425 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6
4426 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
4427 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7
4428 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
4429 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8
4430 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
4431 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9
4432 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
4433 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10
4434 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
4435 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11
4436 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
4437 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12
4438 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
4439 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13
4440 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
4441 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14
4442 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
4443 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15
4444 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
4445 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16
4446 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
4448 static void SubBytes (V128* v)
4450 V128 r;
4451 UInt i;
4452 for (i = 0; i < 16; i++)
4453 r.w8[i] = sbox[v->w8[i]];
4454 *v = r;
4457 /* a 16 x 16 matrix */
4458 static const UChar invsbox[256] = { // row nr
4459 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1
4460 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
4461 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2
4462 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
4463 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3
4464 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
4465 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4
4466 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
4467 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5
4468 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
4469 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6
4470 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
4471 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7
4472 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
4473 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8
4474 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
4475 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9
4476 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
4477 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10
4478 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
4479 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11
4480 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
4481 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12
4482 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
4483 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13
4484 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
4485 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14
4486 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
4487 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15
4488 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
4489 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16
4490 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
4492 static void InvSubBytes (V128* v)
4494 V128 r;
4495 UInt i;
4496 for (i = 0; i < 16; i++)
4497 r.w8[i] = invsbox[v->w8[i]];
4498 *v = r;
4501 static const UChar ShiftRows_op[16] =
4502 {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0};
4503 static void ShiftRows (V128* v)
4505 V128 r;
4506 UInt i;
4507 for (i = 0; i < 16; i++)
4508 r.w8[i] = v->w8[ShiftRows_op[15-i]];
4509 *v = r;
4512 static const UChar InvShiftRows_op[16] =
4513 {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0};
4514 static void InvShiftRows (V128* v)
4516 V128 r;
4517 UInt i;
4518 for (i = 0; i < 16; i++)
4519 r.w8[i] = v->w8[InvShiftRows_op[15-i]];
4520 *v = r;
4523 /* Multiplication of the finite fields elements of AES.
4524 See "A Specification for The AES Algorithm Rijndael
4525 (by Joan Daemen & Vincent Rijmen)"
4526 Dr. Brian Gladman, v3.1, 3rd March 2001. */
4527 /* N values so that (hex) xy = 0x03^N.
4528 0x00 cannot be used. We put 0xff for this value.*/
4529 /* a 16 x 16 matrix */
4530 static const UChar Nxy[256] = { // row nr
4531 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1
4532 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
4533 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2
4534 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
4535 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3
4536 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
4537 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4
4538 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
4539 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5
4540 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
4541 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6
4542 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
4543 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7
4544 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
4545 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8
4546 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
4547 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9
4548 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
4549 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10
4550 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
4551 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11
4552 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
4553 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12
4554 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
4555 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13
4556 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
4557 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14
4558 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
4559 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15
4560 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
4561 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16
4562 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07
4565 /* E values so that E = 0x03^xy. */
4566 static const UChar Exy[256] = { // row nr
4567 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1
4568 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35,
4569 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2
4570 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa,
4571 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3
4572 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31,
4573 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4
4574 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd,
4575 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5
4576 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88,
4577 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6
4578 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a,
4579 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7
4580 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3,
4581 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8
4582 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0,
4583 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9
4584 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41,
4585 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10
4586 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75,
4587 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11
4588 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80,
4589 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12
4590 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54,
4591 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13
4592 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca,
4593 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14
4594 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e,
4595 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15
4596 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17,
4597 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16
4598 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01};
4600 static inline UChar ff_mul(UChar u1, UChar u2)
4602 if ((u1 > 0) && (u2 > 0)) {
4603 UInt ui = Nxy[u1] + Nxy[u2];
4604 if (ui >= 255)
4605 ui = ui - 255;
4606 return Exy[ui];
4607 } else {
4608 return 0;
4612 static void MixColumns (V128* v)
4614 V128 r;
4615 Int j;
4616 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4617 for (j = 0; j < 4; j++) {
4618 P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1))
4619 ^ P(v,j,2) ^ P(v,j,3);
4620 P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) )
4621 ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3);
4622 P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) )
4623 ^ ff_mul(0x03, P(v,j,3) );
4624 P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2)
4625 ^ ff_mul( 0x02, P(v,j,3) );
4627 *v = r;
4628 #undef P
4631 static void InvMixColumns (V128* v)
4633 V128 r;
4634 Int j;
4635 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4636 for (j = 0; j < 4; j++) {
4637 P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) )
4638 ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) );
4639 P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) )
4640 ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) );
4641 P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) )
4642 ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) );
4643 P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) )
4644 ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) );
4646 *v = r;
4647 #undef P
4651 /* For description, see definition in guest_amd64_defs.h */
4652 void amd64g_dirtyhelper_AES (
4653 VexGuestAMD64State* gst,
4654 HWord opc4, HWord gstOffD,
4655 HWord gstOffL, HWord gstOffR
4658 // where the args are
4659 V128* argD = (V128*)( ((UChar*)gst) + gstOffD );
4660 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
4661 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
4662 V128 r;
4664 switch (opc4) {
4665 case 0xDC: /* AESENC */
4666 case 0xDD: /* AESENCLAST */
4667 r = *argR;
4668 ShiftRows (&r);
4669 SubBytes (&r);
4670 if (opc4 == 0xDC)
4671 MixColumns (&r);
4672 argD->w64[0] = r.w64[0] ^ argL->w64[0];
4673 argD->w64[1] = r.w64[1] ^ argL->w64[1];
4674 break;
4676 case 0xDE: /* AESDEC */
4677 case 0xDF: /* AESDECLAST */
4678 r = *argR;
4679 InvShiftRows (&r);
4680 InvSubBytes (&r);
4681 if (opc4 == 0xDE)
4682 InvMixColumns (&r);
4683 argD->w64[0] = r.w64[0] ^ argL->w64[0];
4684 argD->w64[1] = r.w64[1] ^ argL->w64[1];
4685 break;
4687 case 0xDB: /* AESIMC */
4688 *argD = *argL;
4689 InvMixColumns (argD);
4690 break;
4691 default: vassert(0);
4695 static inline UInt RotWord (UInt w32)
4697 return ((w32 >> 8) | (w32 << 24));
4700 static inline UInt SubWord (UInt w32)
4702 UChar *w8;
4703 UChar *r8;
4704 UInt res;
4705 w8 = (UChar*) &w32;
4706 r8 = (UChar*) &res;
4707 r8[0] = sbox[w8[0]];
4708 r8[1] = sbox[w8[1]];
4709 r8[2] = sbox[w8[2]];
4710 r8[3] = sbox[w8[3]];
4711 return res;
4714 /* For description, see definition in guest_amd64_defs.h */
4715 extern void amd64g_dirtyhelper_AESKEYGENASSIST (
4716 VexGuestAMD64State* gst,
4717 HWord imm8,
4718 HWord gstOffL, HWord gstOffR
4721 // where the args are
4722 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
4723 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
4725 // We have to create the result in a temporary in the
4726 // case where the src and dst regs are the same. See #341698.
4727 V128 tmp;
4729 tmp.w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8;
4730 tmp.w32[2] = SubWord (argL->w32[3]);
4731 tmp.w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8;
4732 tmp.w32[0] = SubWord (argL->w32[1]);
4734 argR->w32[3] = tmp.w32[3];
4735 argR->w32[2] = tmp.w32[2];
4736 argR->w32[1] = tmp.w32[1];
4737 argR->w32[0] = tmp.w32[0];
4742 /*---------------------------------------------------------------*/
4743 /*--- Helpers for dealing with, and describing, ---*/
4744 /*--- guest state as a whole. ---*/
4745 /*---------------------------------------------------------------*/
4747 /* Initialise the entire amd64 guest state. */
4748 /* VISIBLE TO LIBVEX CLIENT */
4749 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
4751 vex_state->host_EvC_FAILADDR = 0;
4752 vex_state->host_EvC_COUNTER = 0;
4753 vex_state->pad0 = 0;
4755 vex_state->guest_RAX = 0;
4756 vex_state->guest_RCX = 0;
4757 vex_state->guest_RDX = 0;
4758 vex_state->guest_RBX = 0;
4759 vex_state->guest_RSP = 0;
4760 vex_state->guest_RBP = 0;
4761 vex_state->guest_RSI = 0;
4762 vex_state->guest_RDI = 0;
4763 vex_state->guest_R8 = 0;
4764 vex_state->guest_R9 = 0;
4765 vex_state->guest_R10 = 0;
4766 vex_state->guest_R11 = 0;
4767 vex_state->guest_R12 = 0;
4768 vex_state->guest_R13 = 0;
4769 vex_state->guest_R14 = 0;
4770 vex_state->guest_R15 = 0;
4772 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
4773 vex_state->guest_CC_DEP1 = 0;
4774 vex_state->guest_CC_DEP2 = 0;
4775 vex_state->guest_CC_NDEP = 0;
4777 vex_state->guest_DFLAG = 1; /* forwards */
4778 vex_state->guest_IDFLAG = 0;
4779 vex_state->guest_ACFLAG = 0;
4781 /* HACK: represent the offset associated with a constant %fs.
4782 Typically, on linux, this assumes that %fs is only ever zero (main
4783 thread) or 0x63. */
4784 vex_state->guest_FS_CONST = 0;
4786 vex_state->guest_RIP = 0;
4788 /* Initialise the simulated FPU */
4789 amd64g_dirtyhelper_FINIT( vex_state );
4791 /* Initialise the AVX state. */
4792 # define AVXZERO(_ymm) \
4793 do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \
4794 _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \
4795 } while (0)
4796 vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
4797 AVXZERO(vex_state->guest_YMM0);
4798 AVXZERO(vex_state->guest_YMM1);
4799 AVXZERO(vex_state->guest_YMM2);
4800 AVXZERO(vex_state->guest_YMM3);
4801 AVXZERO(vex_state->guest_YMM4);
4802 AVXZERO(vex_state->guest_YMM5);
4803 AVXZERO(vex_state->guest_YMM6);
4804 AVXZERO(vex_state->guest_YMM7);
4805 AVXZERO(vex_state->guest_YMM8);
4806 AVXZERO(vex_state->guest_YMM9);
4807 AVXZERO(vex_state->guest_YMM10);
4808 AVXZERO(vex_state->guest_YMM11);
4809 AVXZERO(vex_state->guest_YMM12);
4810 AVXZERO(vex_state->guest_YMM13);
4811 AVXZERO(vex_state->guest_YMM14);
4812 AVXZERO(vex_state->guest_YMM15);
4813 AVXZERO(vex_state->guest_YMM16);
4815 # undef AVXZERO
4817 vex_state->guest_EMNOTE = EmNote_NONE;
4819 /* These should not ever be either read or written, but we
4820 initialise them anyway. */
4821 vex_state->guest_CMSTART = 0;
4822 vex_state->guest_CMLEN = 0;
4824 vex_state->guest_NRADDR = 0;
4825 vex_state->guest_SC_CLASS = 0;
4826 vex_state->guest_GS_CONST = 0;
4828 vex_state->guest_IP_AT_SYSCALL = 0;
4829 vex_state->pad1 = 0;
4833 /* Figure out if any part of the guest state contained in minoff
4834 .. maxoff requires precise memory exceptions. If in doubt return
4835 True (but this generates significantly slower code).
4837 By default we enforce precise exns for guest %RSP, %RBP and %RIP
4838 only. These are the minimum needed to extract correct stack
4839 backtraces from amd64 code.
4841 Only %RSP is needed in mode VexRegUpdSpAtMemAccess.
4843 Bool guest_amd64_state_requires_precise_mem_exns (
4844 Int minoff, Int maxoff, VexRegisterUpdates pxControl
4847 Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
4848 Int rbp_max = rbp_min + 8 - 1;
4849 Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
4850 Int rsp_max = rsp_min + 8 - 1;
4851 Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
4852 Int rip_max = rip_min + 8 - 1;
4854 if (maxoff < rsp_min || minoff > rsp_max) {
4855 /* no overlap with rsp */
4856 if (pxControl == VexRegUpdSpAtMemAccess)
4857 return False; // We only need to check stack pointer.
4858 } else {
4859 return True;
4862 if (maxoff < rbp_min || minoff > rbp_max) {
4863 /* no overlap with rbp */
4864 } else {
4865 return True;
4868 if (maxoff < rip_min || minoff > rip_max) {
4869 /* no overlap with eip */
4870 } else {
4871 return True;
4874 return False;
4878 #define ALWAYSDEFD(field) \
4879 { offsetof(VexGuestAMD64State, field), \
4880 (sizeof ((VexGuestAMD64State*)0)->field) }
4882 VexGuestLayout
4883 amd64guest_layout
4885 /* Total size of the guest state, in bytes. */
4886 .total_sizeB = sizeof(VexGuestAMD64State),
4888 /* Describe the stack pointer. */
4889 .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
4890 .sizeof_SP = 8,
4892 /* Describe the frame pointer. */
4893 .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
4894 .sizeof_FP = 8,
4896 /* Describe the instruction pointer. */
4897 .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
4898 .sizeof_IP = 8,
4900 /* Describe any sections to be regarded by Memcheck as
4901 'always-defined'. */
4902 .n_alwaysDefd = 16,
4904 /* flags thunk: OP and NDEP are always defd, whereas DEP1
4905 and DEP2 have to be tracked. See detailed comment in
4906 gdefs.h on meaning of thunk fields. */
4907 .alwaysDefd
4908 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
4909 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
4910 /* 2 */ ALWAYSDEFD(guest_DFLAG),
4911 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
4912 /* 4 */ ALWAYSDEFD(guest_RIP),
4913 /* 5 */ ALWAYSDEFD(guest_FS_CONST),
4914 /* 6 */ ALWAYSDEFD(guest_FTOP),
4915 /* 7 */ ALWAYSDEFD(guest_FPTAG),
4916 /* 8 */ ALWAYSDEFD(guest_FPROUND),
4917 /* 9 */ ALWAYSDEFD(guest_FC3210),
4918 // /* */ ALWAYSDEFD(guest_CS),
4919 // /* */ ALWAYSDEFD(guest_DS),
4920 // /* */ ALWAYSDEFD(guest_ES),
4921 // /* */ ALWAYSDEFD(guest_FS),
4922 // /* */ ALWAYSDEFD(guest_GS),
4923 // /* */ ALWAYSDEFD(guest_SS),
4924 // /* */ ALWAYSDEFD(guest_LDT),
4925 // /* */ ALWAYSDEFD(guest_GDT),
4926 /* 10 */ ALWAYSDEFD(guest_EMNOTE),
4927 /* 11 */ ALWAYSDEFD(guest_SSEROUND),
4928 /* 12 */ ALWAYSDEFD(guest_CMSTART),
4929 /* 13 */ ALWAYSDEFD(guest_CMLEN),
4930 /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
4931 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
4936 /*---------------------------------------------------------------*/
4937 /*--- end guest_amd64_helpers.c ---*/
4938 /*---------------------------------------------------------------*/