2 /*---------------------------------------------------------------*/
3 /*--- begin guest_amd64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_emnote.h"
36 #include "libvex_guest_amd64.h"
37 #include "libvex_ir.h"
40 #include "main_util.h"
41 #include "main_globals.h"
42 #include "guest_generic_bb_to_IR.h"
43 #include "guest_amd64_defs.h"
44 #include "guest_generic_x87.h"
47 /* This file contains helper functions for amd64 guest code.
48 Calls to these functions are generated by the back end.
49 These calls are of course in the host machine code and
50 this file will be compiled to host machine code, so that
53 Only change the signatures of these helper functions very
54 carefully. If you change the signature here, you'll have to change
55 the parameters passed to it in the IR calls constructed by
58 The convention used is that all functions called from generated
59 code are named amd64g_<something>, and any function whose name lacks
60 that prefix is not called from generated code. Note that some
61 LibVEX_* functions can however be called by VEX's client, but that
62 is not the same as calling them from VEX-generated code.
66 /* Set to 1 to get detailed profiling info about use of the flag
68 #define PROFILE_RFLAGS 0
71 /*---------------------------------------------------------------*/
72 /*--- %rflags run-time helpers. ---*/
73 /*---------------------------------------------------------------*/
75 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
78 static void mullS64 ( Long u
, Long v
, Long
* rHi
, Long
* rLo
)
80 const Long halfMask
= 0xFFFFFFFFLL
;
82 Long u1
, v1
, w1
, w2
, t
;
88 t
= u1
* v0
+ (w0
>> 32);
92 *rHi
= u1
* v1
+ w2
+ (w1
>> 32);
93 *rLo
= (Long
)((ULong
)u
* (ULong
)v
);
96 static void mullU64 ( ULong u
, ULong v
, ULong
* rHi
, ULong
* rLo
)
98 const ULong halfMask
= 0xFFFFFFFFULL
;
100 ULong u1
, v1
, w1
,w2
,t
;
106 t
= u1
* v0
+ (w0
>> 32);
110 *rHi
= u1
* v1
+ w2
+ (w1
>> 32);
115 static const UChar parity_table
[256] = {
116 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
117 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
118 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
119 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
120 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
121 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
122 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
123 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
124 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
125 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
126 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
127 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
128 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
129 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
130 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
131 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
132 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
133 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
134 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
135 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
136 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
137 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
138 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
139 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
140 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
141 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
142 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
143 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
144 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
145 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
146 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
147 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
150 /* generalised left-shifter */
151 static inline Long
lshift ( Long x
, Int n
)
154 return (ULong
)x
<< n
;
159 /* identity on ULong */
160 static inline ULong
idULong ( ULong x
)
166 #define PREAMBLE(__data_bits) \
167 /* const */ ULong DATA_MASK \
174 : 0xFFFFFFFFFFFFFFFFULL)); \
175 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \
176 /* const */ ULong CC_DEP1 = cc_dep1_formal; \
177 /* const */ ULong CC_DEP2 = cc_dep2_formal; \
178 /* const */ ULong CC_NDEP = cc_ndep_formal; \
179 /* Four bogus assignments, which hopefully gcc can */ \
180 /* optimise away, and which stop it complaining about */ \
181 /* unused variables. */ \
182 SIGN_MASK = SIGN_MASK; \
183 DATA_MASK = DATA_MASK; \
188 /*-------------------------------------------------------------*/
190 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
192 PREAMBLE(DATA_BITS); \
193 { ULong cf, pf, af, zf, sf, of; \
194 ULong argL, argR, res; \
198 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
199 pf = parity_table[(UChar)res]; \
200 af = (res ^ argL ^ argR) & 0x10; \
201 zf = ((DATA_UTYPE)res == 0) << 6; \
202 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
203 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
204 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
205 return cf | pf | af | zf | sf | of; \
209 /*-------------------------------------------------------------*/
211 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
213 PREAMBLE(DATA_BITS); \
214 { ULong cf, pf, af, zf, sf, of; \
215 ULong argL, argR, res; \
219 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
220 pf = parity_table[(UChar)res]; \
221 af = (res ^ argL ^ argR) & 0x10; \
222 zf = ((DATA_UTYPE)res == 0) << 6; \
223 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
224 of = lshift((argL ^ argR) & (argL ^ res), \
225 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
226 return cf | pf | af | zf | sf | of; \
230 /*-------------------------------------------------------------*/
232 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
234 PREAMBLE(DATA_BITS); \
235 { ULong cf, pf, af, zf, sf, of; \
236 ULong argL, argR, oldC, res; \
237 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
239 argR = CC_DEP2 ^ oldC; \
240 res = (argL + argR) + oldC; \
242 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
244 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
245 pf = parity_table[(UChar)res]; \
246 af = (res ^ argL ^ argR) & 0x10; \
247 zf = ((DATA_UTYPE)res == 0) << 6; \
248 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
249 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
250 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
251 return cf | pf | af | zf | sf | of; \
255 /*-------------------------------------------------------------*/
257 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
259 PREAMBLE(DATA_BITS); \
260 { ULong cf, pf, af, zf, sf, of; \
261 ULong argL, argR, oldC, res; \
262 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
264 argR = CC_DEP2 ^ oldC; \
265 res = (argL - argR) - oldC; \
267 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
269 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
270 pf = parity_table[(UChar)res]; \
271 af = (res ^ argL ^ argR) & 0x10; \
272 zf = ((DATA_UTYPE)res == 0) << 6; \
273 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
274 of = lshift((argL ^ argR) & (argL ^ res), \
275 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
276 return cf | pf | af | zf | sf | of; \
280 /*-------------------------------------------------------------*/
282 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
284 PREAMBLE(DATA_BITS); \
285 { ULong cf, pf, af, zf, sf, of; \
287 pf = parity_table[(UChar)CC_DEP1]; \
289 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
290 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
292 return cf | pf | af | zf | sf | of; \
296 /*-------------------------------------------------------------*/
298 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
300 PREAMBLE(DATA_BITS); \
301 { ULong cf, pf, af, zf, sf, of; \
302 ULong argL, argR, res; \
306 cf = CC_NDEP & AMD64G_CC_MASK_C; \
307 pf = parity_table[(UChar)res]; \
308 af = (res ^ argL ^ argR) & 0x10; \
309 zf = ((DATA_UTYPE)res == 0) << 6; \
310 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
311 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
312 return cf | pf | af | zf | sf | of; \
316 /*-------------------------------------------------------------*/
318 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
320 PREAMBLE(DATA_BITS); \
321 { ULong cf, pf, af, zf, sf, of; \
322 ULong argL, argR, res; \
326 cf = CC_NDEP & AMD64G_CC_MASK_C; \
327 pf = parity_table[(UChar)res]; \
328 af = (res ^ argL ^ argR) & 0x10; \
329 zf = ((DATA_UTYPE)res == 0) << 6; \
330 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
331 of = ((res & DATA_MASK) \
332 == ((ULong)SIGN_MASK - 1)) << 11; \
333 return cf | pf | af | zf | sf | of; \
337 /*-------------------------------------------------------------*/
339 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
341 PREAMBLE(DATA_BITS); \
342 { ULong cf, pf, af, zf, sf, of; \
343 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
344 pf = parity_table[(UChar)CC_DEP1]; \
345 af = 0; /* undefined */ \
346 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
347 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
348 /* of is defined if shift count == 1 */ \
349 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
350 & AMD64G_CC_MASK_O; \
351 return cf | pf | af | zf | sf | of; \
355 /*-------------------------------------------------------------*/
357 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
359 PREAMBLE(DATA_BITS); \
360 { ULong cf, pf, af, zf, sf, of; \
362 pf = parity_table[(UChar)CC_DEP1]; \
363 af = 0; /* undefined */ \
364 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
365 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
366 /* of is defined if shift count == 1 */ \
367 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
368 & AMD64G_CC_MASK_O; \
369 return cf | pf | af | zf | sf | of; \
373 /*-------------------------------------------------------------*/
375 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
376 /* DEP1 = result, NDEP = old flags */
377 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
379 PREAMBLE(DATA_BITS); \
381 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
382 | (AMD64G_CC_MASK_C & CC_DEP1) \
383 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
385 ^ lshift(CC_DEP1, 11))); \
390 /*-------------------------------------------------------------*/
392 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
393 /* DEP1 = result, NDEP = old flags */
394 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
396 PREAMBLE(DATA_BITS); \
398 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
399 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
400 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
402 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
407 /*-------------------------------------------------------------*/
409 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
410 DATA_U2TYPE, NARROWto2U) \
412 PREAMBLE(DATA_BITS); \
413 { ULong cf, pf, af, zf, sf, of; \
416 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
417 * ((DATA_UTYPE)CC_DEP2) ); \
420 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
421 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
422 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
424 pf = parity_table[(UChar)lo]; \
425 af = 0; /* undefined */ \
426 zf = (lo == 0) << 6; \
427 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
429 return cf | pf | af | zf | sf | of; \
433 /*-------------------------------------------------------------*/
435 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
436 DATA_S2TYPE, NARROWto2S) \
438 PREAMBLE(DATA_BITS); \
439 { ULong cf, pf, af, zf, sf, of; \
442 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
443 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
446 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
447 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
448 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
449 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
450 pf = parity_table[(UChar)lo]; \
451 af = 0; /* undefined */ \
452 zf = (lo == 0) << 6; \
453 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
455 return cf | pf | af | zf | sf | of; \
459 /*-------------------------------------------------------------*/
461 #define ACTIONS_UMULQ \
464 { ULong cf, pf, af, zf, sf, of; \
466 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
468 pf = parity_table[(UChar)lo]; \
469 af = 0; /* undefined */ \
470 zf = (lo == 0) << 6; \
471 sf = lshift(lo, 8 - 64) & 0x80; \
473 return cf | pf | af | zf | sf | of; \
477 /*-------------------------------------------------------------*/
479 #define ACTIONS_SMULQ \
482 { ULong cf, pf, af, zf, sf, of; \
484 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
485 cf = (hi != (lo >>/*s*/ (64-1))); \
486 pf = parity_table[(UChar)lo]; \
487 af = 0; /* undefined */ \
488 zf = (lo == 0) << 6; \
489 sf = lshift(lo, 8 - 64) & 0x80; \
491 return cf | pf | af | zf | sf | of; \
495 /*-------------------------------------------------------------*/
497 #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \
499 PREAMBLE(DATA_BITS); \
500 { ULong cf, pf, af, zf, sf, of; \
504 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
505 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
507 return cf | pf | af | zf | sf | of; \
511 /*-------------------------------------------------------------*/
513 #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \
515 PREAMBLE(DATA_BITS); \
516 { ULong cf, pf, af, zf, sf, of; \
517 cf = ((DATA_UTYPE)CC_DEP2 != 0); \
520 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
521 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
523 return cf | pf | af | zf | sf | of; \
527 /*-------------------------------------------------------------*/
529 #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \
531 PREAMBLE(DATA_BITS); \
532 { Long cf, pf, af, zf, sf, of; \
533 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
537 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
539 return cf | pf | af | zf | sf | of; \
543 /*-------------------------------------------------------------*/
545 #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \
547 PREAMBLE(DATA_BITS); \
548 { ULong cf, pf, af, zf, sf, of; \
549 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
552 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
553 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
555 return cf | pf | af | zf | sf | of; \
559 /*-------------------------------------------------------------*/
561 #define ACTIONS_ADX(DATA_BITS,DATA_UTYPE,FLAGNAME) \
563 PREAMBLE(DATA_BITS); \
564 { ULong ocf; /* o or c */ \
565 ULong argL, argR, oldOC, res; \
566 oldOC = (CC_NDEP >> AMD64G_CC_SHIFT_##FLAGNAME) & 1; \
568 argR = CC_DEP2 ^ oldOC; \
569 res = (argL + argR) + oldOC; \
571 ocf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
573 ocf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
574 return (CC_NDEP & ~AMD64G_CC_MASK_##FLAGNAME) \
575 | (ocf << AMD64G_CC_SHIFT_##FLAGNAME); \
579 /*-------------------------------------------------------------*/
584 static Bool initted
= False
;
586 /* C flag, fast route */
587 static UInt tabc_fast
[AMD64G_CC_OP_NUMBER
];
588 /* C flag, slow route */
589 static UInt tabc_slow
[AMD64G_CC_OP_NUMBER
];
590 /* table for calculate_cond */
591 static UInt tab_cond
[AMD64G_CC_OP_NUMBER
][16];
592 /* total entry counts for calc_all, calc_c, calc_cond. */
593 static UInt n_calc_all
= 0;
594 static UInt n_calc_c
= 0;
595 static UInt n_calc_cond
= 0;
597 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
600 static void showCounts ( void )
604 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
605 n_calc_all
, n_calc_cond
, n_calc_c
);
607 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
608 " S NS P NP L NL LE NLE\n");
609 vex_printf(" -----------------------------------------------------"
610 "----------------------------------------\n");
611 for (op
= 0; op
< AMD64G_CC_OP_NUMBER
; op
++) {
614 if (op
> 0 && (op
-1) % 4 == 0)
616 if (op
> 0 && (op
-1) % 4 == 1)
618 if (op
> 0 && (op
-1) % 4 == 2)
620 if (op
> 0 && (op
-1) % 4 == 3)
623 vex_printf("%2d%c: ", op
, ch
);
624 vex_printf("%6u ", tabc_slow
[op
]);
625 vex_printf("%6u ", tabc_fast
[op
]);
626 for (co
= 0; co
< 16; co
++) {
627 Int n
= tab_cond
[op
][co
];
629 vex_printf(" %3dK", n
/ 1000);
632 vex_printf(" %3d ", n
);
642 static void initCounts ( void )
646 for (op
= 0; op
< AMD64G_CC_OP_NUMBER
; op
++) {
647 tabc_fast
[op
] = tabc_slow
[op
] = 0;
648 for (co
= 0; co
< 16; co
++)
649 tab_cond
[op
][co
] = 0;
653 #endif /* PROFILE_RFLAGS */
656 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
657 /* Calculate all the 6 flags from the supplied thunk parameters.
658 Worker function, not directly called from generated code. */
660 ULong
amd64g_calculate_rflags_all_WRK ( ULong cc_op
,
661 ULong cc_dep1_formal
,
662 ULong cc_dep2_formal
,
663 ULong cc_ndep_formal
)
666 case AMD64G_CC_OP_COPY
:
667 return cc_dep1_formal
668 & (AMD64G_CC_MASK_O
| AMD64G_CC_MASK_S
| AMD64G_CC_MASK_Z
669 | AMD64G_CC_MASK_A
| AMD64G_CC_MASK_C
| AMD64G_CC_MASK_P
);
671 case AMD64G_CC_OP_ADDB
: ACTIONS_ADD( 8, UChar
);
672 case AMD64G_CC_OP_ADDW
: ACTIONS_ADD( 16, UShort
);
673 case AMD64G_CC_OP_ADDL
: ACTIONS_ADD( 32, UInt
);
674 case AMD64G_CC_OP_ADDQ
: ACTIONS_ADD( 64, ULong
);
676 case AMD64G_CC_OP_ADCB
: ACTIONS_ADC( 8, UChar
);
677 case AMD64G_CC_OP_ADCW
: ACTIONS_ADC( 16, UShort
);
678 case AMD64G_CC_OP_ADCL
: ACTIONS_ADC( 32, UInt
);
679 case AMD64G_CC_OP_ADCQ
: ACTIONS_ADC( 64, ULong
);
681 case AMD64G_CC_OP_SUBB
: ACTIONS_SUB( 8, UChar
);
682 case AMD64G_CC_OP_SUBW
: ACTIONS_SUB( 16, UShort
);
683 case AMD64G_CC_OP_SUBL
: ACTIONS_SUB( 32, UInt
);
684 case AMD64G_CC_OP_SUBQ
: ACTIONS_SUB( 64, ULong
);
686 case AMD64G_CC_OP_SBBB
: ACTIONS_SBB( 8, UChar
);
687 case AMD64G_CC_OP_SBBW
: ACTIONS_SBB( 16, UShort
);
688 case AMD64G_CC_OP_SBBL
: ACTIONS_SBB( 32, UInt
);
689 case AMD64G_CC_OP_SBBQ
: ACTIONS_SBB( 64, ULong
);
691 case AMD64G_CC_OP_LOGICB
: ACTIONS_LOGIC( 8, UChar
);
692 case AMD64G_CC_OP_LOGICW
: ACTIONS_LOGIC( 16, UShort
);
693 case AMD64G_CC_OP_LOGICL
: ACTIONS_LOGIC( 32, UInt
);
694 case AMD64G_CC_OP_LOGICQ
: ACTIONS_LOGIC( 64, ULong
);
696 case AMD64G_CC_OP_INCB
: ACTIONS_INC( 8, UChar
);
697 case AMD64G_CC_OP_INCW
: ACTIONS_INC( 16, UShort
);
698 case AMD64G_CC_OP_INCL
: ACTIONS_INC( 32, UInt
);
699 case AMD64G_CC_OP_INCQ
: ACTIONS_INC( 64, ULong
);
701 case AMD64G_CC_OP_DECB
: ACTIONS_DEC( 8, UChar
);
702 case AMD64G_CC_OP_DECW
: ACTIONS_DEC( 16, UShort
);
703 case AMD64G_CC_OP_DECL
: ACTIONS_DEC( 32, UInt
);
704 case AMD64G_CC_OP_DECQ
: ACTIONS_DEC( 64, ULong
);
706 case AMD64G_CC_OP_SHLB
: ACTIONS_SHL( 8, UChar
);
707 case AMD64G_CC_OP_SHLW
: ACTIONS_SHL( 16, UShort
);
708 case AMD64G_CC_OP_SHLL
: ACTIONS_SHL( 32, UInt
);
709 case AMD64G_CC_OP_SHLQ
: ACTIONS_SHL( 64, ULong
);
711 case AMD64G_CC_OP_SHRB
: ACTIONS_SHR( 8, UChar
);
712 case AMD64G_CC_OP_SHRW
: ACTIONS_SHR( 16, UShort
);
713 case AMD64G_CC_OP_SHRL
: ACTIONS_SHR( 32, UInt
);
714 case AMD64G_CC_OP_SHRQ
: ACTIONS_SHR( 64, ULong
);
716 case AMD64G_CC_OP_ROLB
: ACTIONS_ROL( 8, UChar
);
717 case AMD64G_CC_OP_ROLW
: ACTIONS_ROL( 16, UShort
);
718 case AMD64G_CC_OP_ROLL
: ACTIONS_ROL( 32, UInt
);
719 case AMD64G_CC_OP_ROLQ
: ACTIONS_ROL( 64, ULong
);
721 case AMD64G_CC_OP_RORB
: ACTIONS_ROR( 8, UChar
);
722 case AMD64G_CC_OP_RORW
: ACTIONS_ROR( 16, UShort
);
723 case AMD64G_CC_OP_RORL
: ACTIONS_ROR( 32, UInt
);
724 case AMD64G_CC_OP_RORQ
: ACTIONS_ROR( 64, ULong
);
726 case AMD64G_CC_OP_UMULB
: ACTIONS_UMUL( 8, UChar
, toUChar
,
728 case AMD64G_CC_OP_UMULW
: ACTIONS_UMUL( 16, UShort
, toUShort
,
730 case AMD64G_CC_OP_UMULL
: ACTIONS_UMUL( 32, UInt
, toUInt
,
733 case AMD64G_CC_OP_UMULQ
: ACTIONS_UMULQ
;
735 case AMD64G_CC_OP_SMULB
: ACTIONS_SMUL( 8, Char
, toUChar
,
737 case AMD64G_CC_OP_SMULW
: ACTIONS_SMUL( 16, Short
, toUShort
,
739 case AMD64G_CC_OP_SMULL
: ACTIONS_SMUL( 32, Int
, toUInt
,
742 case AMD64G_CC_OP_SMULQ
: ACTIONS_SMULQ
;
744 case AMD64G_CC_OP_ANDN32
: ACTIONS_ANDN( 32, UInt
);
745 case AMD64G_CC_OP_ANDN64
: ACTIONS_ANDN( 64, ULong
);
747 case AMD64G_CC_OP_BLSI32
: ACTIONS_BLSI( 32, UInt
);
748 case AMD64G_CC_OP_BLSI64
: ACTIONS_BLSI( 64, ULong
);
750 case AMD64G_CC_OP_BLSMSK32
: ACTIONS_BLSMSK( 32, UInt
);
751 case AMD64G_CC_OP_BLSMSK64
: ACTIONS_BLSMSK( 64, ULong
);
753 case AMD64G_CC_OP_BLSR32
: ACTIONS_BLSR( 32, UInt
);
754 case AMD64G_CC_OP_BLSR64
: ACTIONS_BLSR( 64, ULong
);
756 case AMD64G_CC_OP_ADCX32
: ACTIONS_ADX( 32, UInt
, C
);
757 case AMD64G_CC_OP_ADCX64
: ACTIONS_ADX( 64, ULong
, C
);
759 case AMD64G_CC_OP_ADOX32
: ACTIONS_ADX( 32, UInt
, O
);
760 case AMD64G_CC_OP_ADOX64
: ACTIONS_ADX( 64, ULong
, O
);
763 /* shouldn't really make these calls from generated code */
764 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
765 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
766 cc_op
, cc_dep1_formal
, cc_dep2_formal
, cc_ndep_formal
);
767 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
771 #if defined(VGO_freebsd) || defined(VGO_darwin)
773 /* This dummy function is just used to have an address just after
774 amd64g_calculate_rflags_all_WRK */
777 void _______VVVVVVVV_amd64g_calculate_rflags_all_WRK_VVVVVVVV_______ (void)
781 /* Export addresses of amd64g_calculate_rflags_all_WRK and
782 _______VVVVVVVV_amd64g_calculate_rflags_all_WRK_VVVVVVVV_______
783 Used in syswrap-main.c / VG_(post_syscall) in the case where
784 the above function was interrupted and we need to work out
785 what needs to be done for the resumption */
787 Addr addr_amd64g_calculate_rflags_all_WRK
= (Addr
)amd64g_calculate_rflags_all_WRK
;
788 Addr addr________VVVVVVVV_amd64g_calculate_rflags_all_WRK_VVVVVVVV_______
= (Addr
)_______VVVVVVVV_amd64g_calculate_rflags_all_WRK_VVVVVVVV_______
;
791 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
792 /* Calculate all the 6 flags from the supplied thunk parameters. */
793 ULong
amd64g_calculate_rflags_all ( ULong cc_op
,
799 if (!initted
) initCounts();
801 if (SHOW_COUNTS_NOW
) showCounts();
804 amd64g_calculate_rflags_all_WRK ( cc_op
, cc_dep1
, cc_dep2
, cc_ndep
);
808 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
809 /* Calculate just the carry flag from the supplied thunk parameters. */
810 ULong
amd64g_calculate_rflags_c ( ULong cc_op
,
816 if (!initted
) initCounts();
819 if (SHOW_COUNTS_NOW
) showCounts();
822 /* Fast-case some common ones. */
824 case AMD64G_CC_OP_COPY
:
825 return (cc_dep1
>> AMD64G_CC_SHIFT_C
) & 1;
826 case AMD64G_CC_OP_LOGICQ
:
827 case AMD64G_CC_OP_LOGICL
:
828 case AMD64G_CC_OP_LOGICW
:
829 case AMD64G_CC_OP_LOGICB
:
831 // case AMD64G_CC_OP_SUBL:
832 // return ((UInt)cc_dep1) < ((UInt)cc_dep2)
833 // ? AMD64G_CC_MASK_C : 0;
834 // case AMD64G_CC_OP_SUBW:
835 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
836 // ? AMD64G_CC_MASK_C : 0;
837 // case AMD64G_CC_OP_SUBB:
838 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
839 // ? AMD64G_CC_MASK_C : 0;
840 // case AMD64G_CC_OP_INCL:
841 // case AMD64G_CC_OP_DECL:
842 // return cc_ndep & AMD64G_CC_MASK_C;
852 return amd64g_calculate_rflags_all_WRK(cc_op
,cc_dep1
,cc_dep2
,cc_ndep
)
857 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
859 ULong
amd64g_calculate_condition ( ULong
/*AMD64Condcode*/ cond
,
865 ULong rflags
= amd64g_calculate_rflags_all_WRK(cc_op
, cc_dep1
,
867 ULong of
,sf
,zf
,cf
,pf
;
868 ULong inv
= cond
& 1;
871 if (!initted
) initCounts();
872 tab_cond
[cc_op
][cond
]++;
874 if (SHOW_COUNTS_NOW
) showCounts();
879 case AMD64CondO
: /* OF == 1 */
880 of
= rflags
>> AMD64G_CC_SHIFT_O
;
881 return 1 & (inv
^ of
);
884 case AMD64CondZ
: /* ZF == 1 */
885 zf
= rflags
>> AMD64G_CC_SHIFT_Z
;
886 return 1 & (inv
^ zf
);
889 case AMD64CondB
: /* CF == 1 */
890 cf
= rflags
>> AMD64G_CC_SHIFT_C
;
891 return 1 & (inv
^ cf
);
895 case AMD64CondBE
: /* (CF or ZF) == 1 */
896 cf
= rflags
>> AMD64G_CC_SHIFT_C
;
897 zf
= rflags
>> AMD64G_CC_SHIFT_Z
;
898 return 1 & (inv
^ (cf
| zf
));
902 case AMD64CondS
: /* SF == 1 */
903 sf
= rflags
>> AMD64G_CC_SHIFT_S
;
904 return 1 & (inv
^ sf
);
907 case AMD64CondP
: /* PF == 1 */
908 pf
= rflags
>> AMD64G_CC_SHIFT_P
;
909 return 1 & (inv
^ pf
);
912 case AMD64CondL
: /* (SF xor OF) == 1 */
913 sf
= rflags
>> AMD64G_CC_SHIFT_S
;
914 of
= rflags
>> AMD64G_CC_SHIFT_O
;
915 return 1 & (inv
^ (sf
^ of
));
919 case AMD64CondLE
: /* ((SF xor OF) or ZF) == 1 */
920 sf
= rflags
>> AMD64G_CC_SHIFT_S
;
921 of
= rflags
>> AMD64G_CC_SHIFT_O
;
922 zf
= rflags
>> AMD64G_CC_SHIFT_Z
;
923 return 1 & (inv
^ ((sf
^ of
) | zf
));
927 /* shouldn't really make these calls from generated code */
928 vex_printf("amd64g_calculate_condition"
929 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
930 cond
, cc_op
, cc_dep1
, cc_dep2
, cc_ndep
);
931 vpanic("amd64g_calculate_condition");
936 /* VISIBLE TO LIBVEX CLIENT */
937 ULong
LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State
* vex_state
)
939 ULong rflags
= amd64g_calculate_rflags_all_WRK(
940 vex_state
->guest_CC_OP
,
941 vex_state
->guest_CC_DEP1
,
942 vex_state
->guest_CC_DEP2
,
943 vex_state
->guest_CC_NDEP
945 Long dflag
= vex_state
->guest_DFLAG
;
946 vassert(dflag
== 1 || dflag
== -1);
949 if (vex_state
->guest_IDFLAG
== 1)
951 if (vex_state
->guest_ACFLAG
== 1)
957 /* VISIBLE TO LIBVEX CLIENT */
959 LibVEX_GuestAMD64_put_rflags ( ULong rflags
,
960 /*MOD*/VexGuestAMD64State
* vex_state
)
963 if (rflags
& AMD64G_CC_MASK_D
) {
964 vex_state
->guest_DFLAG
= -1;
965 rflags
&= ~AMD64G_CC_MASK_D
;
968 vex_state
->guest_DFLAG
= 1;
971 if (rflags
& AMD64G_CC_MASK_ID
) {
972 vex_state
->guest_IDFLAG
= 1;
973 rflags
&= ~AMD64G_CC_MASK_ID
;
976 vex_state
->guest_IDFLAG
= 0;
979 if (rflags
& AMD64G_CC_MASK_AC
) {
980 vex_state
->guest_ACFLAG
= 1;
981 rflags
&= ~AMD64G_CC_MASK_AC
;
984 vex_state
->guest_ACFLAG
= 0;
986 UInt cc_mask
= AMD64G_CC_MASK_O
| AMD64G_CC_MASK_S
| AMD64G_CC_MASK_Z
|
987 AMD64G_CC_MASK_A
| AMD64G_CC_MASK_C
| AMD64G_CC_MASK_P
;
988 vex_state
->guest_CC_OP
= AMD64G_CC_OP_COPY
;
989 vex_state
->guest_CC_DEP1
= rflags
& cc_mask
;
990 vex_state
->guest_CC_DEP2
= 0;
991 vex_state
->guest_CC_NDEP
= 0;
994 /* VISIBLE TO LIBVEX CLIENT */
996 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag
,
997 /*MOD*/VexGuestAMD64State
* vex_state
)
999 ULong oszacp
= amd64g_calculate_rflags_all_WRK(
1000 vex_state
->guest_CC_OP
,
1001 vex_state
->guest_CC_DEP1
,
1002 vex_state
->guest_CC_DEP2
,
1003 vex_state
->guest_CC_NDEP
1005 if (new_carry_flag
& 1) {
1006 oszacp
|= AMD64G_CC_MASK_C
;
1008 oszacp
&= ~AMD64G_CC_MASK_C
;
1010 vex_state
->guest_CC_OP
= AMD64G_CC_OP_COPY
;
1011 vex_state
->guest_CC_DEP1
= oszacp
;
1012 vex_state
->guest_CC_DEP2
= 0;
1013 vex_state
->guest_CC_NDEP
= 0;
1016 #if defined(VGO_freebsd) || defined(VGO_darwin)
1017 /* Used in syswrap-main.c / VG_(post_syscall) in the case where
1018 the above function was interrupted and we need to work out
1019 what needs to be done for the resumption. These functions
1020 are extern so no need for 'addr' global variables */
1021 void _______VVVVVVVV_after_GuestAMD64_put_rflag_c_VVVVVVVV_______ (void)
1026 /*---------------------------------------------------------------*/
1027 /*--- %rflags translation-time function specialisers. ---*/
1028 /*--- These help iropt specialise calls the above run-time ---*/
1029 /*--- %rflags functions. ---*/
1030 /*---------------------------------------------------------------*/
1032 /* Used by the optimiser to try specialisations. Returns an
1033 equivalent expression, or NULL if none. */
1035 static inline Bool
isU64 ( IRExpr
* e
, ULong n
)
1037 return e
->tag
== Iex_Const
1038 && e
->Iex
.Const
.con
->tag
== Ico_U64
1039 && e
->Iex
.Const
.con
->Ico
.U64
== n
;
1042 /* Returns N if W64 is a value of the form 1 << N for N in 1 to 31,
1043 and zero in any other case. */
1044 static Int
isU64_1_shl_N_literal ( ULong w64
)
1046 if (w64
< (1ULL << 1) || w64
> (1ULL << 31))
1048 if ((w64
& (w64
- 1)) != 0)
1050 /* At this point, we know w64 is a power of two in the range 2^1 .. 2^31,
1051 and we only need to find out which one it is. */
1052 for (Int n
= 1; n
<= 31; n
++) {
1053 if (w64
== (1ULL << n
))
1056 /* Consequently we should never get here. */
1062 /* Returns N if E is an immediate of the form 1 << N for N in 1 to 31,
1063 and zero in any other case. */
1064 static Int
isU64_1_shl_N ( IRExpr
* e
)
1066 if (e
->tag
!= Iex_Const
|| e
->Iex
.Const
.con
->tag
!= Ico_U64
)
1068 ULong w64
= e
->Iex
.Const
.con
->Ico
.U64
;
1069 return isU64_1_shl_N_literal(w64
);
1072 /* Returns N if E is an immediate of the form (1 << N) - 1 for N in 1 to 31,
1073 and zero in any other case. */
1074 static Int
isU64_1_shl_N_minus_1 ( IRExpr
* e
)
1076 if (e
->tag
!= Iex_Const
|| e
->Iex
.Const
.con
->tag
!= Ico_U64
)
1078 ULong w64
= e
->Iex
.Const
.con
->Ico
.U64
;
1079 // This isn't actually necessary since isU64_1_shl_N_literal will return
1080 // zero given a zero argument, but still ..
1081 if (w64
== 0xFFFFFFFFFFFFFFFFULL
)
1083 return isU64_1_shl_N_literal(w64
+ 1);
1086 IRExpr
* guest_amd64_spechelper ( const HChar
* function_name
,
1088 IRStmt
** precedingStmts
,
1089 Int n_precedingStmts
)
1091 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
1092 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
1093 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
1094 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
1095 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
1098 for (i
= 0; args
[i
]; i
++)
1101 vex_printf("spec request:\n");
1102 vex_printf(" %s ", function_name
);
1103 for (i
= 0; i
< arity
; i
++) {
1110 /* --------- specialising "amd64g_calculate_condition" --------- */
1112 if (vex_streq(function_name
, "amd64g_calculate_condition")) {
1113 /* specialise calls to above "calculate condition" function */
1114 IRExpr
*cond
, *cc_op
, *cc_dep1
, *cc_dep2
;
1115 vassert(arity
== 5);
1121 /*---------------- ADDQ ----------------*/
1124 if (isU64(cc_op
, AMD64G_CC_OP_ADDQ
) && isU64(cond
, AMD64CondZ
)) {
1125 /* long long add, then Z --> test (dst+src == 0) */
1126 return unop(Iop_1Uto64
,
1128 binop(Iop_Add64
, cc_dep1
, cc_dep2
),
1133 if (isU64(cc_op
, AMD64G_CC_OP_ADDQ
) && isU64(cond
, AMD64CondS
)) {
1134 /* long long add, then S (negative)
1136 --> ((dst + src) >>u 63) & 1
1138 return binop(Iop_And64
,
1140 binop(Iop_Add64
, cc_dep1
, cc_dep2
),
1145 /*---------------- ADDL ----------------*/
1148 if (isU64(cc_op
, AMD64G_CC_OP_ADDL
) && isU64(cond
, AMD64CondO
)) {
1149 /* This is very commonly generated by Javascript JITs, for
1150 the idiom "do a 32-bit add and jump to out-of-line code if
1151 an overflow occurs". */
1152 /* long add, then O (overflow)
1153 --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31]
1154 --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1155 --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1157 vassert(isIRAtom(cc_dep1
));
1158 vassert(isIRAtom(cc_dep2
));
1164 binop(Iop_Xor64
, cc_dep1
, cc_dep2
)),
1167 binop(Iop_Add64
, cc_dep1
, cc_dep2
))),
1174 if (isU64(cc_op
, AMD64G_CC_OP_ADDL
) && isU64(cond
, AMD64CondS
)) {
1175 /* long add, then S (negative)
1177 --> ((dst +64 src) >>u 31) & 1
1178 Pointless to narrow the args to 32 bit before the add. */
1179 return binop(Iop_And64
,
1181 binop(Iop_Add64
, cc_dep1
, cc_dep2
),
1185 if (isU64(cc_op
, AMD64G_CC_OP_ADDL
) && isU64(cond
, AMD64CondNS
)) {
1186 /* long add, then NS (not negative)
1187 --> (dst+src)[31] ^ 1
1188 --> (((dst +64 src) >>u 31) & 1) ^ 1
1189 Pointless to narrow the args to 32 bit before the add. */
1190 return binop(Iop_Xor64
,
1193 binop(Iop_Add64
, cc_dep1
, cc_dep2
),
1199 /*---------------- SUBQ ----------------*/
1202 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondO
)) {
1203 /* long long sub/cmp, then O (overflow)
1204 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63]
1205 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63
1207 vassert(isIRAtom(cc_dep1
));
1208 vassert(isIRAtom(cc_dep2
));
1209 return binop(Iop_Shr64
,
1211 binop(Iop_Xor64
, cc_dep1
, cc_dep2
),
1214 binop(Iop_Sub64
, cc_dep1
, cc_dep2
))),
1217 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNO
)) {
1218 /* No action. Never yet found a test case. */
1222 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondB
)) {
1223 /* long long sub/cmp, then B (unsigned less than)
1224 --> test dst <u src */
1225 return unop(Iop_1Uto64
,
1226 binop(Iop_CmpLT64U
, cc_dep1
, cc_dep2
));
1228 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNB
)) {
1229 /* long long sub/cmp, then NB (unsigned greater than or equal)
1230 --> test src <=u dst */
1231 /* Note, args are opposite way round from the usual */
1232 return unop(Iop_1Uto64
,
1233 binop(Iop_CmpLE64U
, cc_dep2
, cc_dep1
));
1237 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondZ
)) {
1238 /* long long sub/cmp, then Z --> test dst==src */
1239 return unop(Iop_1Uto64
,
1240 binop(Iop_CmpEQ64
,cc_dep1
,cc_dep2
));
1242 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNZ
)) {
1243 /* long long sub/cmp, then NZ --> test dst!=src */
1244 return unop(Iop_1Uto64
,
1245 binop(Iop_CmpNE64
,cc_dep1
,cc_dep2
));
1249 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondBE
)) {
1250 /* long long sub/cmp, then BE (unsigned less than or equal)
1251 --> test dst <=u src */
1252 return unop(Iop_1Uto64
,
1253 binop(Iop_CmpLE64U
, cc_dep1
, cc_dep2
));
1255 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNBE
)) {
1256 /* long long sub/cmp, then NBE (unsigned greater than)
1257 --> test !(dst <=u src) */
1258 return binop(Iop_Xor64
,
1260 binop(Iop_CmpLE64U
, cc_dep1
, cc_dep2
)),
1265 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondS
)) {
1266 /* long long sub/cmp, then S (negative)
1268 --> (dst-src) >>u 63 */
1269 return binop(Iop_Shr64
,
1270 binop(Iop_Sub64
, cc_dep1
, cc_dep2
),
1273 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNS
)) {
1274 /* long long sub/cmp, then NS (not negative)
1275 --> (dst-src)[63] ^ 1
1276 --> ((dst-src) >>u 63) ^ 1 */
1277 return binop(Iop_Xor64
,
1279 binop(Iop_Sub64
, cc_dep1
, cc_dep2
),
1285 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondL
)) {
1286 /* long long sub/cmp, then L (signed less than)
1287 --> test dst <s src */
1288 return unop(Iop_1Uto64
,
1289 binop(Iop_CmpLT64S
, cc_dep1
, cc_dep2
));
1291 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNL
)) {
1292 /* long long sub/cmp, then NL (signed greater than or equal)
1293 --> test dst >=s src
1294 --> test src <=s dst */
1295 return unop(Iop_1Uto64
,
1296 binop(Iop_CmpLE64S
, cc_dep2
, cc_dep1
));
1300 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondLE
)) {
1301 /* long long sub/cmp, then LE (signed less than or equal)
1302 --> test dst <=s src */
1303 return unop(Iop_1Uto64
,
1304 binop(Iop_CmpLE64S
, cc_dep1
, cc_dep2
));
1306 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNLE
)) {
1307 /* long sub/cmp, then NLE (signed greater than)
1308 --> test !(dst <=s src)
1309 --> test (dst >s src)
1310 --> test (src <s dst) */
1311 return unop(Iop_1Uto64
,
1312 binop(Iop_CmpLT64S
, cc_dep2
, cc_dep1
));
1316 /*---------------- SUBL ----------------*/
1319 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondO
)) {
1320 /* This is very commonly generated by Javascript JITs, for
1321 the idiom "do a 32-bit subtract and jump to out-of-line
1322 code if an overflow occurs". */
1323 /* long sub/cmp, then O (overflow)
1324 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31]
1325 --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1
1327 vassert(isIRAtom(cc_dep1
));
1328 vassert(isIRAtom(cc_dep2
));
1333 binop(Iop_Xor64
, cc_dep1
, cc_dep2
),
1336 binop(Iop_Sub64
, cc_dep1
, cc_dep2
))),
1340 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNO
)) {
1341 /* No action. Never yet found a test case. */
1346 /* It appears that LLVM 5.0 and later have a new way to find out
1347 whether the top N bits of a word W are all zero, by computing
1349 W <u 0---(N-1)---0 1 0---0 or
1350 W <=u 0---(N-1)---0 0 1---1
1352 In particular, the result will be defined if the top N bits of W
1353 are defined, even if the trailing bits -- those corresponding to
1354 the rightmost 0---0 / 1---1 section -- are undefined. Rather than
1355 make Memcheck more complex, we detect this case where we can and
1356 shift out the irrelevant and potentially undefined bits. */
1358 Bool is_NB_or_NBE
= False
;
1359 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
)) {
1360 if (isU64(cond
, AMD64CondB
) || isU64(cond
, AMD64CondNB
)) {
1361 /* long sub/cmp, then B (unsigned less than),
1362 where dep2 is a power of 2:
1363 -> CmpLT32U(dep1, 1 << N)
1364 -> CmpEQ32(dep1 >>u N, 0)
1366 long sub/cmp, then NB (unsigned greater than or equal),
1367 where dep2 is a power of 2:
1368 -> CmpGE32U(dep1, 1 << N)
1369 -> CmpNE32(dep1 >>u N, 0)
1370 This avoids CmpLT32U/CmpGE32U being applied to potentially
1371 uninitialised bits in the area being shifted out. */
1372 n
= isU64_1_shl_N(cc_dep2
);
1373 is_NB_or_NBE
= isU64(cond
, AMD64CondNB
);
1374 } else if (isU64(cond
, AMD64CondBE
) || isU64(cond
, AMD64CondNBE
)) {
1375 /* long sub/cmp, then BE (unsigned less than or equal),
1376 where dep2 is a power of 2 minus 1:
1377 -> CmpLE32U(dep1, (1 << N) - 1)
1378 -> CmpEQ32(dep1 >>u N, 0)
1380 long sub/cmp, then NBE (unsigned greater than),
1381 where dep2 is a power of 2 minus 1:
1382 -> CmpGT32U(dep1, (1 << N) - 1)
1383 -> CmpNE32(dep1 >>u N, 0)
1384 This avoids CmpLE32U/CmpGT32U being applied to potentially
1385 uninitialised bits in the area being shifted out. */
1386 n
= isU64_1_shl_N_minus_1(cc_dep2
);
1387 is_NB_or_NBE
= isU64(cond
, AMD64CondNBE
);
1391 vassert(n
>= 1 && n
<= 31);
1392 return unop(Iop_1Uto64
,
1393 binop(is_NB_or_NBE
? Iop_CmpNE32
: Iop_CmpEQ32
,
1394 binop(Iop_Shr32
, unop(Iop_64to32
, cc_dep1
),
1399 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondB
)) {
1400 /* long sub/cmp, then B (unsigned less than)
1401 --> test dst <u src */
1402 return unop(Iop_1Uto64
,
1404 unop(Iop_64to32
, cc_dep1
),
1405 unop(Iop_64to32
, cc_dep2
)));
1407 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNB
)) {
1408 /* long sub/cmp, then NB (unsigned greater than or equal)
1409 --> test src <=u dst */
1410 /* Note, args are opposite way round from the usual */
1411 return unop(Iop_1Uto64
,
1413 unop(Iop_64to32
, cc_dep2
),
1414 unop(Iop_64to32
, cc_dep1
)));
1418 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondZ
)) {
1419 /* long sub/cmp, then Z --> test dst==src */
1420 return unop(Iop_1Uto64
,
1422 unop(Iop_64to32
, cc_dep1
),
1423 unop(Iop_64to32
, cc_dep2
)));
1425 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNZ
)) {
1426 /* long sub/cmp, then NZ --> test dst!=src */
1427 return unop(Iop_1Uto64
,
1429 unop(Iop_64to32
, cc_dep1
),
1430 unop(Iop_64to32
, cc_dep2
)));
1434 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondBE
)) {
1435 /* long sub/cmp, then BE (unsigned less than or equal)
1436 --> test dst <=u src */
1437 return unop(Iop_1Uto64
,
1439 unop(Iop_64to32
, cc_dep1
),
1440 unop(Iop_64to32
, cc_dep2
)));
1442 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNBE
)) {
1443 /* long sub/cmp, then NBE (unsigned greater than)
1444 --> test src <u dst */
1445 /* Note, args are opposite way round from the usual */
1446 return unop(Iop_1Uto64
,
1448 unop(Iop_64to32
, cc_dep2
),
1449 unop(Iop_64to32
, cc_dep1
)));
1453 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondS
)) {
1454 /* long sub/cmp, then S (negative)
1456 --> ((dst -64 src) >>u 31) & 1
1457 Pointless to narrow the args to 32 bit before the subtract. */
1458 return binop(Iop_And64
,
1460 binop(Iop_Sub64
, cc_dep1
, cc_dep2
),
1464 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNS
)) {
1465 /* long sub/cmp, then NS (not negative)
1466 --> (dst-src)[31] ^ 1
1467 --> (((dst -64 src) >>u 31) & 1) ^ 1
1468 Pointless to narrow the args to 32 bit before the subtract. */
1469 return binop(Iop_Xor64
,
1472 binop(Iop_Sub64
, cc_dep1
, cc_dep2
),
1479 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondL
)) {
1480 /* long sub/cmp, then L (signed less than)
1481 --> test dst <s src */
1482 return unop(Iop_1Uto64
,
1484 unop(Iop_64to32
, cc_dep1
),
1485 unop(Iop_64to32
, cc_dep2
)));
1487 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNL
)) {
1488 /* long sub/cmp, then NL (signed greater than or equal)
1489 --> test dst >=s src
1490 --> test src <=s dst */
1491 return unop(Iop_1Uto64
,
1493 unop(Iop_64to32
, cc_dep2
),
1494 unop(Iop_64to32
, cc_dep1
)));
1498 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondLE
)) {
1499 /* long sub/cmp, then LE (signed less than or equal)
1500 --> test dst <=s src */
1501 return unop(Iop_1Uto64
,
1503 unop(Iop_64to32
, cc_dep1
),
1504 unop(Iop_64to32
, cc_dep2
)));
1507 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNLE
)) {
1508 /* long sub/cmp, then NLE (signed greater than)
1509 --> test !(dst <=s src)
1510 --> test (dst >s src)
1511 --> test (src <s dst) */
1512 return unop(Iop_1Uto64
,
1514 unop(Iop_64to32
, cc_dep2
),
1515 unop(Iop_64to32
, cc_dep1
)));
1519 /*---------------- SUBW ----------------*/
1522 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
) && isU64(cond
, AMD64CondZ
)) {
1523 /* word sub/cmp, then Z --> test dst==src */
1524 return unop(Iop_1Uto64
,
1526 unop(Iop_64to16
,cc_dep1
),
1527 unop(Iop_64to16
,cc_dep2
)));
1529 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
) && isU64(cond
, AMD64CondNZ
)) {
1530 /* word sub/cmp, then NZ --> test dst!=src */
1531 return unop(Iop_1Uto64
,
1533 unop(Iop_64to16
,cc_dep1
),
1534 unop(Iop_64to16
,cc_dep2
)));
1538 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
) && isU64(cond
, AMD64CondBE
)) {
1539 /* word sub/cmp, then BE (unsigned less than or equal)
1540 --> test dst <=u src */
1541 return unop(Iop_1Uto64
,
1543 binop(Iop_Shl64
, cc_dep1
, mkU8(48)),
1544 binop(Iop_Shl64
, cc_dep2
, mkU8(48))));
1548 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
) && isU64(cond
, AMD64CondS
)
1549 && isU64(cc_dep2
, 0)) {
1550 /* word sub/cmp of zero, then S --> test (dst-0 <s 0)
1553 This is yet another scheme by which clang figures out if the
1554 top bit of a word is 1 or 0. See also LOGICB/CondS below. */
1555 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1556 for an 16-bit comparison, since the args to the helper
1557 function are always U64s. */
1558 return binop(Iop_And64
,
1559 binop(Iop_Shr64
,cc_dep1
,mkU8(15)),
1562 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
) && isU64(cond
, AMD64CondNS
)
1563 && isU64(cc_dep2
, 0)) {
1564 /* word sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1565 --> test !(dst <s 0)
1566 --> (ULong) !dst[15]
1568 return binop(Iop_Xor64
,
1570 binop(Iop_Shr64
,cc_dep1
,mkU8(15)),
1576 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
) && isU64(cond
, AMD64CondLE
)) {
1577 /* word sub/cmp, then LE (signed less than or equal)
1578 --> test dst <=s src */
1579 return unop(Iop_1Uto64
,
1581 binop(Iop_Shl64
,cc_dep1
,mkU8(48)),
1582 binop(Iop_Shl64
,cc_dep2
,mkU8(48))));
1586 /*---------------- SUBB ----------------*/
1589 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondB
)) {
1590 /* byte sub/cmp, then B (unsigned less than)
1591 --> test dst <u src */
1592 return unop(Iop_1Uto64
,
1594 binop(Iop_And64
, cc_dep1
, mkU64(0xFF)),
1595 binop(Iop_And64
, cc_dep2
, mkU64(0xFF))));
1597 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondNB
)) {
1598 /* byte sub/cmp, then NB (unsigned greater than or equal)
1599 --> test src <=u dst */
1600 /* Note, args are opposite way round from the usual */
1601 return unop(Iop_1Uto64
,
1603 binop(Iop_And64
, cc_dep2
, mkU64(0xFF)),
1604 binop(Iop_And64
, cc_dep1
, mkU64(0xFF))));
1608 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondZ
)) {
1609 /* byte sub/cmp, then Z --> test dst==src */
1610 return unop(Iop_1Uto64
,
1612 unop(Iop_64to8
,cc_dep1
),
1613 unop(Iop_64to8
,cc_dep2
)));
1615 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondNZ
)) {
1616 /* byte sub/cmp, then NZ --> test dst!=src */
1617 return unop(Iop_1Uto64
,
1619 unop(Iop_64to8
,cc_dep1
),
1620 unop(Iop_64to8
,cc_dep2
)));
1624 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondBE
)) {
1625 /* byte sub/cmp, then BE (unsigned less than or equal)
1626 --> test dst <=u src */
1627 return unop(Iop_1Uto64
,
1629 binop(Iop_And64
, cc_dep1
, mkU64(0xFF)),
1630 binop(Iop_And64
, cc_dep2
, mkU64(0xFF))));
1634 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondS
)
1635 && isU64(cc_dep2
, 0)) {
1636 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1639 This is yet another scheme by which gcc figures out if the
1640 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1641 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1642 for an 8-bit comparison, since the args to the helper
1643 function are always U64s. */
1644 return binop(Iop_And64
,
1645 binop(Iop_Shr64
,cc_dep1
,mkU8(7)),
1648 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondNS
)
1649 && isU64(cc_dep2
, 0)) {
1650 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1651 --> test !(dst <s 0)
1654 return binop(Iop_Xor64
,
1656 binop(Iop_Shr64
,cc_dep1
,mkU8(7)),
1661 /*---------------- LOGICQ ----------------*/
1663 if (isU64(cc_op
, AMD64G_CC_OP_LOGICQ
) && isU64(cond
, AMD64CondZ
)) {
1664 /* long long and/or/xor, then Z --> test dst==0 */
1665 return unop(Iop_1Uto64
,
1666 binop(Iop_CmpEQ64
, cc_dep1
, mkU64(0)));
1668 if (isU64(cc_op
, AMD64G_CC_OP_LOGICQ
) && isU64(cond
, AMD64CondNZ
)) {
1669 /* long long and/or/xor, then NZ --> test dst!=0 */
1670 return unop(Iop_1Uto64
,
1671 binop(Iop_CmpNE64
, cc_dep1
, mkU64(0)));
1674 if (isU64(cc_op
, AMD64G_CC_OP_LOGICQ
) && isU64(cond
, AMD64CondL
)) {
1675 /* long long and/or/xor, then L
1676 LOGIC sets SF and ZF according to the
1677 result and makes OF be zero. L computes SF ^ OF, but
1678 OF is zero, so this reduces to SF -- which will be 1 iff
1679 the result is < signed 0. Hence ...
1681 return unop(Iop_1Uto64
,
1688 if (isU64(cc_op
, AMD64G_CC_OP_LOGICQ
) && isU64(cond
, AMD64CondS
)) {
1689 /* long long and/or/xor, then S --> (ULong)result[63] */
1690 return binop(Iop_Shr64
, cc_dep1
, mkU8(63));
1693 if (isU64(cc_op
, AMD64G_CC_OP_LOGICQ
) && isU64(cond
, AMD64CondNS
)) {
1694 /* long long and/or/xor, then S --> (ULong) ~ result[63] */
1695 return binop(Iop_Xor64
,
1696 binop(Iop_Shr64
, cc_dep1
, mkU8(63)),
1700 /*---------------- LOGICL ----------------*/
1702 if (isU64(cc_op
, AMD64G_CC_OP_LOGICL
) && isU64(cond
, AMD64CondZ
)) {
1703 /* long and/or/xor, then Z --> test dst==0 */
1704 return unop(Iop_1Uto64
,
1706 unop(Iop_64to32
, cc_dep1
),
1709 if (isU64(cc_op
, AMD64G_CC_OP_LOGICL
) && isU64(cond
, AMD64CondNZ
)) {
1710 /* long and/or/xor, then NZ --> test dst!=0 */
1711 return unop(Iop_1Uto64
,
1713 unop(Iop_64to32
, cc_dep1
),
1717 if (isU64(cc_op
, AMD64G_CC_OP_LOGICL
) && isU64(cond
, AMD64CondLE
)) {
1718 /* long and/or/xor, then LE
1719 This is pretty subtle. LOGIC sets SF and ZF according to the
1720 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but
1721 OF is zero, so this reduces to SF | ZF -- which will be 1 iff
1722 the result is <=signed 0. Hence ...
1724 return unop(Iop_1Uto64
,
1726 unop(Iop_64to32
, cc_dep1
),
1730 if (isU64(cc_op
, AMD64G_CC_OP_LOGICL
) && isU64(cond
, AMD64CondS
)) {
1731 /* long and/or/xor, then S --> (ULong)result[31] */
1732 return binop(Iop_And64
,
1733 binop(Iop_Shr64
, cc_dep1
, mkU8(31)),
1736 if (isU64(cc_op
, AMD64G_CC_OP_LOGICL
) && isU64(cond
, AMD64CondNS
)) {
1737 /* long and/or/xor, then S --> (ULong) ~ result[31] */
1738 return binop(Iop_Xor64
,
1740 binop(Iop_Shr64
, cc_dep1
, mkU8(31)),
1745 /*---------------- LOGICW ----------------*/
1747 if (isU64(cc_op
, AMD64G_CC_OP_LOGICW
) && isU64(cond
, AMD64CondZ
)) {
1748 /* word and/or/xor, then Z --> test dst==0 */
1749 // Use CmpEQ32 rather than CmpEQ64 here, so that Memcheck instruments
1750 // it exactly at EdcAUTO.
1751 return unop(Iop_1Uto64
,
1753 unop(Iop_16Uto32
, unop(Iop_64to16
, cc_dep1
)),
1756 if (isU64(cc_op
, AMD64G_CC_OP_LOGICW
) && isU64(cond
, AMD64CondNZ
)) {
1757 /* word and/or/xor, then NZ --> test dst!=0 */
1758 // Use CmpNE32 rather than CmpNE64 here, so that Memcheck instruments
1759 // it exactly at EdcAUTO.
1760 return unop(Iop_1Uto64
,
1762 unop(Iop_16Uto32
, unop(Iop_64to16
, cc_dep1
)),
1766 if (isU64(cc_op
, AMD64G_CC_OP_LOGICW
) && isU64(cond
, AMD64CondS
)) {
1767 /* word and/or/xor, then S --> (ULong)result[15] */
1768 return binop(Iop_And64
,
1769 binop(Iop_Shr64
, cc_dep1
, mkU8(15)),
1772 if (isU64(cc_op
, AMD64G_CC_OP_LOGICW
) && isU64(cond
, AMD64CondNS
)) {
1773 /* word and/or/xor, then S --> (ULong) ~ result[15] */
1774 return binop(Iop_Xor64
,
1776 binop(Iop_Shr64
, cc_dep1
, mkU8(15)),
1781 /*---------------- LOGICB ----------------*/
1783 if (isU64(cc_op
, AMD64G_CC_OP_LOGICB
) && isU64(cond
, AMD64CondZ
)) {
1784 /* byte and/or/xor, then Z --> test dst==0 */
1785 // Use CmpEQ32 rather than CmpEQ64 here, so that Memcheck instruments
1786 // it exactly at EdcAUTO.
1787 return unop(Iop_1Uto64
,
1789 unop(Iop_8Uto32
, unop(Iop_64to8
, cc_dep1
)),
1792 if (isU64(cc_op
, AMD64G_CC_OP_LOGICB
) && isU64(cond
, AMD64CondNZ
)) {
1793 /* byte and/or/xor, then NZ --> test dst!=0 */
1794 // Use CmpNE32 rather than CmpNE64 here, so that Memcheck instruments
1795 // it exactly at EdcAUTO.
1796 return unop(Iop_1Uto64
,
1798 unop(Iop_8Uto32
, unop(Iop_64to8
, cc_dep1
)),
1802 if (isU64(cc_op
, AMD64G_CC_OP_LOGICB
) && isU64(cond
, AMD64CondS
)) {
1803 /* this is an idiom gcc sometimes uses to find out if the top
1804 bit of a byte register is set: eg testb %al,%al; js ..
1805 Since it just depends on the top bit of the byte, extract
1806 that bit and explicitly get rid of all the rest. This
1807 helps memcheck avoid false positives in the case where any
1808 of the other bits in the byte are undefined. */
1809 /* byte and/or/xor, then S --> (UInt)result[7] */
1810 return binop(Iop_And64
,
1811 binop(Iop_Shr64
,cc_dep1
,mkU8(7)),
1814 if (isU64(cc_op
, AMD64G_CC_OP_LOGICB
) && isU64(cond
, AMD64CondNS
)) {
1815 /* byte and/or/xor, then NS --> (UInt)!result[7] */
1816 return binop(Iop_Xor64
,
1818 binop(Iop_Shr64
,cc_dep1
,mkU8(7)),
1823 /*---------------- INCB ----------------*/
1825 if (isU64(cc_op
, AMD64G_CC_OP_INCB
) && isU64(cond
, AMD64CondLE
)) {
1826 /* 8-bit inc, then LE --> sign bit of the arg */
1827 return binop(Iop_And64
,
1829 binop(Iop_Sub64
, cc_dep1
, mkU64(1)),
1834 /*---------------- INCW ----------------*/
1836 if (isU64(cc_op
, AMD64G_CC_OP_INCW
) && isU64(cond
, AMD64CondZ
)) {
1837 /* 16-bit inc, then Z --> test dst == 0 */
1838 return unop(Iop_1Uto64
,
1840 binop(Iop_Shl64
,cc_dep1
,mkU8(48)),
1844 /*---------------- DECL ----------------*/
1846 if (isU64(cc_op
, AMD64G_CC_OP_DECL
) && isU64(cond
, AMD64CondZ
)) {
1847 /* dec L, then Z --> test dst == 0 */
1848 return unop(Iop_1Uto64
,
1850 unop(Iop_64to32
, cc_dep1
),
1854 /*---------------- DECW ----------------*/
1856 if (isU64(cc_op
, AMD64G_CC_OP_DECW
) && isU64(cond
, AMD64CondNZ
)) {
1857 /* 16-bit dec, then NZ --> test dst != 0 */
1858 return unop(Iop_1Uto64
,
1860 binop(Iop_Shl64
,cc_dep1
,mkU8(48)),
1864 /*---------------- SHRQ ----------------*/
1866 if (isU64(cc_op
, AMD64G_CC_OP_SHRQ
) && isU64(cond
, AMD64CondZ
)) {
1867 /* SHRQ, then Z --> test result[63:0] == 0 */
1868 return unop(Iop_1Uto64
,
1869 binop(Iop_CmpEQ64
, cc_dep1
, mkU64(0)));
1871 if (isU64(cc_op
, AMD64G_CC_OP_SHRQ
) && isU64(cond
, AMD64CondNZ
)) {
1872 /* SHRQ, then NZ --> test result[63:0] != 0 */
1873 return unop(Iop_1Uto64
,
1874 binop(Iop_CmpNE64
, cc_dep1
, mkU64(0)));
1877 if (isU64(cc_op
, AMD64G_CC_OP_SHRQ
) && isU64(cond
, AMD64CondS
)) {
1878 /* SHRQ, then S --> (ULong)result[63] (result is in dep1) */
1879 return binop(Iop_Shr64
, cc_dep1
, mkU8(63));
1881 // No known test case for this, hence disabled:
1882 //if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNS)) {
1883 // /* SHRQ, then NS --> (ULong) ~ result[63] */
1887 /*---------------- SHRL ----------------*/
1889 if (isU64(cc_op
, AMD64G_CC_OP_SHRL
) && isU64(cond
, AMD64CondZ
)) {
1890 /* SHRL, then Z --> test dep1 == 0 */
1891 return unop(Iop_1Uto64
,
1892 binop(Iop_CmpEQ32
, unop(Iop_64to32
, cc_dep1
),
1895 if (isU64(cc_op
, AMD64G_CC_OP_SHRL
) && isU64(cond
, AMD64CondNZ
)) {
1896 /* SHRL, then NZ --> test dep1 != 0 */
1897 return unop(Iop_1Uto64
,
1898 binop(Iop_CmpNE32
, unop(Iop_64to32
, cc_dep1
),
1902 if (isU64(cc_op
, AMD64G_CC_OP_SHRL
) && isU64(cond
, AMD64CondS
)) {
1903 /* SHRL/SARL, then S --> (ULong)result[31] */
1904 return binop(Iop_And64
,
1905 binop(Iop_Shr64
, cc_dep1
, mkU8(31)),
1908 if (isU64(cc_op
, AMD64G_CC_OP_SHRL
) && isU64(cond
, AMD64CondNS
)) {
1909 /* SHRL/SARL, then NS --> (ULong) ~ result[31] */
1910 return binop(Iop_Xor64
,
1912 binop(Iop_Shr64
, cc_dep1
, mkU8(31)),
1917 /*---------------- SHRW ----------------*/
1919 if (isU64(cc_op
, AMD64G_CC_OP_SHRW
) && isU64(cond
, AMD64CondZ
)) {
1920 /* SHRW, then Z --> test dep1 == 0 */
1921 return unop(Iop_1Uto64
,
1923 unop(Iop_16Uto32
, unop(Iop_64to16
, cc_dep1
)),
1926 // No known test case for this, hence disabled:
1927 //if (isU64(cc_op, AMD64G_CC_OP_SHRW) && isU64(cond, AMD64CondNZ)) {
1928 // /* SHRW, then NZ --> test dep1 == 0 */
1929 // return unop(Iop_1Uto64,
1930 // binop(Iop_CmpNE32,
1931 // unop(Iop_16Uto32, unop(Iop_64to16, cc_dep1)),
1935 /*---------------- SHLQ ----------------*/
1937 if (isU64(cc_op
, AMD64G_CC_OP_SHLQ
) && isU64(cond
, AMD64CondZ
)) {
1938 /* SHLQ, then Z --> test dep1 == 0 */
1939 return unop(Iop_1Uto64
,
1940 binop(Iop_CmpEQ64
, cc_dep1
, mkU64(0)));
1942 if (isU64(cc_op
, AMD64G_CC_OP_SHLQ
) && isU64(cond
, AMD64CondNZ
)) {
1943 /* SHLQ, then NZ --> test dep1 != 0 */
1944 return unop(Iop_1Uto64
,
1945 binop(Iop_CmpNE64
, cc_dep1
, mkU64(0)));
1949 if (isU64(cc_op
, AMD64G_CC_OP_SHLQ
) && isU64(cond
, AMD64CondS
)) {
1950 /* SHLQ, then S --> (ULong)result[63] */
1951 return binop(Iop_Shr64
, cc_dep1
, mkU8(63));
1953 // No known test case
1954 //if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondNS)) {
1955 // /* SHLQ, then NS --> (ULong) ~ result[63] */
1959 /*---------------- SHLL ----------------*/
1961 if (isU64(cc_op
, AMD64G_CC_OP_SHLL
) && isU64(cond
, AMD64CondZ
)) {
1962 /* SHLL, then Z --> test result[31:0] == 0 */
1963 return unop(Iop_1Uto64
,
1964 binop(Iop_CmpEQ32
, unop(Iop_64to32
, cc_dep1
),
1968 if (isU64(cc_op
, AMD64G_CC_OP_SHLL
) && isU64(cond
, AMD64CondNZ
)) {
1969 /* SHLL, then NZ --> test dep1 != 0 */
1970 return unop(Iop_1Uto64
,
1971 binop(Iop_CmpNE32
, unop(Iop_64to32
, cc_dep1
),
1975 if (isU64(cc_op
, AMD64G_CC_OP_SHLL
) && isU64(cond
, AMD64CondS
)) {
1976 /* SHLL, then S --> (ULong)result[31] */
1977 return binop(Iop_And64
,
1978 binop(Iop_Shr64
, cc_dep1
, mkU8(31)),
1981 // No known test case
1982 //if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondNS)) {
1983 // /* SHLL, then NS --> (ULong) ~ result[31] */
1987 /*---------------- COPY ----------------*/
1988 /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1989 jbe" for example. */
1991 if (isU64(cc_op
, AMD64G_CC_OP_COPY
)
1992 && (isU64(cond
, AMD64CondBE
) || isU64(cond
, AMD64CondNBE
))) {
1993 /* COPY, then BE --> extract C and Z from dep1, and test (C
1995 /* COPY, then NBE --> extract C and Z from dep1, and test (C
1997 ULong nnn
= isU64(cond
, AMD64CondBE
) ? 1 : 0;
2007 binop(Iop_Shr64
, cc_dep1
, mkU8(AMD64G_CC_SHIFT_C
)),
2008 binop(Iop_Shr64
, cc_dep1
, mkU8(AMD64G_CC_SHIFT_Z
))
2017 if (isU64(cc_op
, AMD64G_CC_OP_COPY
)
2018 && (isU64(cond
, AMD64CondB
) || isU64(cond
, AMD64CondNB
))) {
2019 /* COPY, then B --> extract C from dep1, and test (C == 1). */
2020 /* COPY, then NB --> extract C from dep1, and test (C == 0). */
2021 ULong nnn
= isU64(cond
, AMD64CondB
) ? 1 : 0;
2029 binop(Iop_Shr64
, cc_dep1
, mkU8(AMD64G_CC_SHIFT_C
)),
2037 if (isU64(cc_op
, AMD64G_CC_OP_COPY
)
2038 && (isU64(cond
, AMD64CondZ
) || isU64(cond
, AMD64CondNZ
))) {
2039 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
2040 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
2041 ULong nnn
= isU64(cond
, AMD64CondZ
) ? 1 : 0;
2049 binop(Iop_Shr64
, cc_dep1
, mkU8(AMD64G_CC_SHIFT_Z
)),
2057 if (isU64(cc_op
, AMD64G_CC_OP_COPY
)
2058 && (isU64(cond
, AMD64CondP
) || isU64(cond
, AMD64CondNP
))) {
2059 /* COPY, then P --> extract P from dep1, and test (P == 1). */
2060 /* COPY, then NP --> extract P from dep1, and test (P == 0). */
2061 ULong nnn
= isU64(cond
, AMD64CondP
) ? 1 : 0;
2069 binop(Iop_Shr64
, cc_dep1
, mkU8(AMD64G_CC_SHIFT_P
)),
2078 if (cond
->tag
== Iex_Const
&& cc_op
->tag
== Iex_Const
) {
2079 vex_printf("spec request failed: ");
2080 vex_printf(" %s ", function_name
);
2081 for (i
= 0; i
< 2/*arity*/; i
++) {
2092 /* --------- specialising "amd64g_calculate_rflags_c" --------- */
2094 if (vex_streq(function_name
, "amd64g_calculate_rflags_c")) {
2095 /* specialise calls to above "calculate_rflags_c" function */
2096 IRExpr
*cc_op
, *cc_dep1
, *cc_dep2
, *cc_ndep
;
2097 vassert(arity
== 4);
2103 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
)) {
2104 /* C after sub denotes unsigned less than */
2105 return unop(Iop_1Uto64
,
2110 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
)) {
2111 /* C after sub denotes unsigned less than */
2112 return unop(Iop_1Uto64
,
2114 unop(Iop_64to32
, cc_dep1
),
2115 unop(Iop_64to32
, cc_dep2
)));
2117 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
)) {
2118 /* C after sub denotes unsigned less than */
2119 return unop(Iop_1Uto64
,
2121 binop(Iop_And64
,cc_dep1
,mkU64(0xFFFF)),
2122 binop(Iop_And64
,cc_dep2
,mkU64(0xFFFF))));
2124 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
)) {
2125 /* C after sub denotes unsigned less than */
2126 return unop(Iop_1Uto64
,
2128 binop(Iop_And64
,cc_dep1
,mkU64(0xFF)),
2129 binop(Iop_And64
,cc_dep2
,mkU64(0xFF))));
2131 if (isU64(cc_op
, AMD64G_CC_OP_ADDQ
)) {
2132 /* C after add denotes sum <u either arg */
2133 return unop(Iop_1Uto64
,
2135 binop(Iop_Add64
, cc_dep1
, cc_dep2
),
2138 if (isU64(cc_op
, AMD64G_CC_OP_ADDL
)) {
2139 /* C after add denotes sum <u either arg */
2140 return unop(Iop_1Uto64
,
2142 unop(Iop_64to32
, binop(Iop_Add64
, cc_dep1
, cc_dep2
)),
2143 unop(Iop_64to32
, cc_dep1
)));
2145 if (isU64(cc_op
, AMD64G_CC_OP_LOGICQ
)
2146 || isU64(cc_op
, AMD64G_CC_OP_LOGICL
)
2147 || isU64(cc_op
, AMD64G_CC_OP_LOGICW
)
2148 || isU64(cc_op
, AMD64G_CC_OP_LOGICB
)) {
2149 /* cflag after logic is zero */
2152 if (isU64(cc_op
, AMD64G_CC_OP_DECL
)
2153 || isU64(cc_op
, AMD64G_CC_OP_INCL
)
2154 || isU64(cc_op
, AMD64G_CC_OP_DECQ
)
2155 || isU64(cc_op
, AMD64G_CC_OP_INCQ
)) {
2156 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
2161 if (cc_op
->tag
== Iex_Const
) {
2162 vex_printf("CFLAG "); ppIRExpr(cc_op
); vex_printf("\n");
2167 if (cc_op
->tag
== Iex_Const
) {
2168 vex_printf("spec request failed: ");
2169 vex_printf(" %s ", function_name
);
2170 for (i
= 0; i
< 2/*arity*/; i
++) {
2191 /*---------------------------------------------------------------*/
2192 /*--- Supporting functions for x87 FPU activities. ---*/
2193 /*---------------------------------------------------------------*/
2195 static inline Bool
host_is_little_endian ( void )
2197 UInt x
= 0x76543210;
2198 UChar
* p
= (UChar
*)(&x
);
2199 return toBool(*p
== 0x10);
2202 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
2203 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2204 ULong
amd64g_calculate_FXAM ( ULong tag
, ULong dbl
)
2206 Bool mantissaIsZero
;
2211 vassert(host_is_little_endian());
2213 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
2215 f64
= (UChar
*)(&dbl
);
2216 sign
= toUChar( (f64
[7] >> 7) & 1 );
2218 /* First off, if the tag indicates the register was empty,
2219 return 1,0,sign,1 */
2221 /* vex_printf("Empty\n"); */
2222 return AMD64G_FC_MASK_C3
| 0 | (sign
<< AMD64G_FC_SHIFT_C1
)
2223 | AMD64G_FC_MASK_C0
;
2226 bexp
= (f64
[7] << 4) | ((f64
[6] >> 4) & 0x0F);
2231 (f64
[6] & 0x0F) == 0
2232 && (f64
[5] | f64
[4] | f64
[3] | f64
[2] | f64
[1] | f64
[0]) == 0
2235 /* If both exponent and mantissa are zero, the value is zero.
2236 Return 1,0,sign,0. */
2237 if (bexp
== 0 && mantissaIsZero
) {
2238 /* vex_printf("Zero\n"); */
2239 return AMD64G_FC_MASK_C3
| 0
2240 | (sign
<< AMD64G_FC_SHIFT_C1
) | 0;
2243 /* If exponent is zero but mantissa isn't, it's a denormal.
2244 Return 1,1,sign,0. */
2245 if (bexp
== 0 && !mantissaIsZero
) {
2246 /* vex_printf("Denormal\n"); */
2247 return AMD64G_FC_MASK_C3
| AMD64G_FC_MASK_C2
2248 | (sign
<< AMD64G_FC_SHIFT_C1
) | 0;
2251 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
2252 Return 0,1,sign,1. */
2253 if (bexp
== 0x7FF && mantissaIsZero
) {
2254 /* vex_printf("Inf\n"); */
2255 return 0 | AMD64G_FC_MASK_C2
| (sign
<< AMD64G_FC_SHIFT_C1
)
2256 | AMD64G_FC_MASK_C0
;
2259 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
2260 Return 0,0,sign,1. */
2261 if (bexp
== 0x7FF && !mantissaIsZero
) {
2262 /* vex_printf("NaN\n"); */
2263 return 0 | 0 | (sign
<< AMD64G_FC_SHIFT_C1
) | AMD64G_FC_MASK_C0
;
2266 /* Uh, ok, we give up. It must be a normal finite number.
2269 /* vex_printf("normal\n"); */
2270 return 0 | AMD64G_FC_MASK_C2
| (sign
<< AMD64G_FC_SHIFT_C1
) | 0;
2274 /* This is used to implement both 'frstor' and 'fldenv'. The latter
2275 appears to differ from the former only in that the 8 FP registers
2276 themselves are not transferred into the guest state. */
2278 VexEmNote
do_put_x87 ( Bool moveRegs
,
2279 /*IN*/Fpu_State
* x87_state
,
2280 /*OUT*/VexGuestAMD64State
* vex_state
)
2284 ULong
* vexRegs
= (ULong
*)(&vex_state
->guest_FPREG
[0]);
2285 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
2286 UInt ftop
= (x87_state
->env
[FP_ENV_STAT
] >> 11) & 7;
2287 UInt tagw
= x87_state
->env
[FP_ENV_TAG
];
2288 UInt fpucw
= x87_state
->env
[FP_ENV_CTRL
];
2289 UInt c3210
= x87_state
->env
[FP_ENV_STAT
] & 0x4700;
2294 /* Copy registers and tags */
2295 for (stno
= 0; stno
< 8; stno
++) {
2296 preg
= (stno
+ ftop
) & 7;
2297 tag
= (tagw
>> (2*preg
)) & 3;
2299 /* register is empty */
2300 /* hmm, if it's empty, does it still get written? Probably
2301 safer to say it does. If we don't, memcheck could get out
2302 of sync, in that it thinks all FP registers are defined by
2303 this helper, but in reality some have not been updated. */
2305 vexRegs
[preg
] = 0; /* IEEE754 64-bit zero */
2308 /* register is non-empty */
2310 convert_f80le_to_f64le( &x87_state
->reg
[10*stno
],
2311 (UChar
*)&vexRegs
[preg
] );
2317 vex_state
->guest_FTOP
= ftop
;
2320 vex_state
->guest_FC3210
= c3210
;
2322 /* handle the control word, setting FPROUND and detecting any
2323 emulation warnings. */
2324 pair
= amd64g_check_fldcw ( (ULong
)fpucw
);
2325 fpround
= (UInt
)pair
& 0xFFFFFFFFULL
;
2326 ew
= (VexEmNote
)(pair
>> 32);
2328 vex_state
->guest_FPROUND
= fpround
& 3;
2330 /* emulation warnings --> caller */
2335 /* Create an x87 FPU state from the guest state, as close as
2336 we can approximate it. */
2338 void do_get_x87 ( /*IN*/VexGuestAMD64State
* vex_state
,
2339 /*OUT*/Fpu_State
* x87_state
)
2343 ULong
* vexRegs
= (ULong
*)(&vex_state
->guest_FPREG
[0]);
2344 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
2345 UInt ftop
= vex_state
->guest_FTOP
;
2346 UInt c3210
= vex_state
->guest_FC3210
;
2348 for (i
= 0; i
< 14; i
++)
2349 x87_state
->env
[i
] = 0;
2351 x87_state
->env
[1] = x87_state
->env
[3] = x87_state
->env
[5]
2352 = x87_state
->env
[13] = 0xFFFF;
2353 x87_state
->env
[FP_ENV_STAT
]
2354 = toUShort(((ftop
& 7) << 11) | (c3210
& 0x4700));
2355 x87_state
->env
[FP_ENV_CTRL
]
2356 = toUShort(amd64g_create_fpucw( vex_state
->guest_FPROUND
));
2358 /* Dump the register stack in ST order. */
2360 for (stno
= 0; stno
< 8; stno
++) {
2361 preg
= (stno
+ ftop
) & 7;
2362 if (vexTags
[preg
] == 0) {
2363 /* register is empty */
2364 tagw
|= (3 << (2*preg
));
2365 convert_f64le_to_f80le( (UChar
*)&vexRegs
[preg
],
2366 &x87_state
->reg
[10*stno
] );
2368 /* register is full. */
2369 tagw
|= (0 << (2*preg
));
2370 convert_f64le_to_f80le( (UChar
*)&vexRegs
[preg
],
2371 &x87_state
->reg
[10*stno
] );
2374 x87_state
->env
[FP_ENV_TAG
] = toUShort(tagw
);
2378 /*---------------------------------------------------------------*/
2379 /*--- Supporting functions for XSAVE/FXSAVE. ---*/
2380 /*---------------------------------------------------------------*/
2382 /* CALLED FROM GENERATED CODE */
2383 /* DIRTY HELPER (reads guest state, writes guest mem) */
2384 /* XSAVE component 0 is the x87 FPU state. */
2385 void amd64g_dirtyhelper_XSAVE_COMPONENT_0
2386 ( VexGuestAMD64State
* gst
, HWord addr
)
2388 /* Derived from values obtained from
2389 vendor_id : AuthenticAMD
2392 model name : AMD Athlon(tm) 64 Processor 3200+
2397 /* Somewhat roundabout, but at least it's simple. */
2399 UShort
* addrS
= (UShort
*)addr
;
2400 UChar
* addrC
= (UChar
*)addr
;
2404 UShort
*srcS
, *dstS
;
2406 do_get_x87( gst
, &tmp
);
2408 /* Now build the proper fxsave x87 image from the fsave x87 image
2411 addrS
[0] = tmp
.env
[FP_ENV_CTRL
]; /* FCW: fpu control word */
2412 addrS
[1] = tmp
.env
[FP_ENV_STAT
]; /* FCW: fpu status word */
2414 /* set addrS[2] in an endian-independent way */
2416 fp_tags
= tmp
.env
[FP_ENV_TAG
];
2417 for (r
= 0; r
< 8; r
++) {
2418 if ( ((fp_tags
>> (2*r
)) & 3) != 3 )
2419 summary_tags
|= (1 << r
);
2421 addrC
[4] = toUChar(summary_tags
); /* FTW: tag summary byte */
2422 addrC
[5] = 0; /* pad */
2424 /* FOP: faulting fpu opcode. From experimentation, the real CPU
2425 does not write this field. (?!) */
2426 addrS
[3] = 0; /* BOGUS */
2428 /* RIP (Last x87 instruction pointer). From experimentation, the
2429 real CPU does not write this field. (?!) */
2430 addrS
[4] = 0; /* BOGUS */
2431 addrS
[5] = 0; /* BOGUS */
2432 addrS
[6] = 0; /* BOGUS */
2433 addrS
[7] = 0; /* BOGUS */
2435 /* RDP (Last x87 data pointer). From experimentation, the real CPU
2436 does not write this field. (?!) */
2437 addrS
[8] = 0; /* BOGUS */
2438 addrS
[9] = 0; /* BOGUS */
2439 addrS
[10] = 0; /* BOGUS */
2440 addrS
[11] = 0; /* BOGUS */
2442 /* addrS[13,12] are MXCSR -- not written */
2443 /* addrS[15,14] are MXCSR_MASK -- not written */
2445 /* Copy in the FP registers, in ST order. */
2446 for (stno
= 0; stno
< 8; stno
++) {
2447 srcS
= (UShort
*)(&tmp
.reg
[10*stno
]);
2448 dstS
= (UShort
*)(&addrS
[16 + 8*stno
]);
2461 /* CALLED FROM GENERATED CODE */
2462 /* DIRTY HELPER (reads guest state, writes guest mem) */
2463 /* XSAVE component 1 is the SSE state. */
2464 void amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
2465 ( VexGuestAMD64State
* gst
, HWord addr
)
2467 UShort
* addrS
= (UShort
*)addr
;
2470 /* The only non-register parts of the SSE state are MXCSR and
2472 mxcsr
= amd64g_create_mxcsr( gst
->guest_SSEROUND
);
2474 addrS
[12] = toUShort(mxcsr
); /* MXCSR */
2475 addrS
[13] = toUShort(mxcsr
>> 16);
2477 addrS
[14] = 0xFFFF; /* MXCSR mask (lo16) */
2478 addrS
[15] = 0x0000; /* MXCSR mask (hi16) */
2482 /* VISIBLE TO LIBVEX CLIENT */
2483 /* Do FXSAVE from the supplied VexGuestAMD64State structure and store
2484 the result at the given address which represents a buffer of at
2487 This function is not called from generated code. FXSAVE is dealt
2488 with by the amd64 front end by calling the XSAVE_COMPONENT_{0,1}
2489 functions above plus some in-line IR. This function is merely a
2490 convenience function for VEX's users.
2492 void LibVEX_GuestAMD64_fxsave ( /*IN*/VexGuestAMD64State
* gst
,
2493 /*OUT*/HWord fp_state
)
2495 /* Do the x87 part */
2496 amd64g_dirtyhelper_XSAVE_COMPONENT_0(gst
, fp_state
);
2498 /* And now the SSE part, except for the registers themselves. */
2499 amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(gst
, fp_state
);
2501 /* That's the first 160 bytes of the image done. */
2502 /* Now only %xmm0 .. %xmm15 remain to be copied. If the host is
2503 big-endian, these need to be byte-swapped. */
2504 U128
*xmm
= (U128
*)(fp_state
+ 160);
2505 vassert(host_is_little_endian());
2507 # define COPY_U128(_dst,_src) \
2508 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
2509 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
2512 COPY_U128( xmm
[0], gst
->guest_YMM0
);
2513 COPY_U128( xmm
[1], gst
->guest_YMM1
);
2514 COPY_U128( xmm
[2], gst
->guest_YMM2
);
2515 COPY_U128( xmm
[3], gst
->guest_YMM3
);
2516 COPY_U128( xmm
[4], gst
->guest_YMM4
);
2517 COPY_U128( xmm
[5], gst
->guest_YMM5
);
2518 COPY_U128( xmm
[6], gst
->guest_YMM6
);
2519 COPY_U128( xmm
[7], gst
->guest_YMM7
);
2520 COPY_U128( xmm
[8], gst
->guest_YMM8
);
2521 COPY_U128( xmm
[9], gst
->guest_YMM9
);
2522 COPY_U128( xmm
[10], gst
->guest_YMM10
);
2523 COPY_U128( xmm
[11], gst
->guest_YMM11
);
2524 COPY_U128( xmm
[12], gst
->guest_YMM12
);
2525 COPY_U128( xmm
[13], gst
->guest_YMM13
);
2526 COPY_U128( xmm
[14], gst
->guest_YMM14
);
2527 COPY_U128( xmm
[15], gst
->guest_YMM15
);
2532 /*---------------------------------------------------------------*/
2533 /*--- Supporting functions for XRSTOR/FXRSTOR. ---*/
2534 /*---------------------------------------------------------------*/
2536 /* CALLED FROM GENERATED CODE */
2537 /* DIRTY HELPER (writes guest state, reads guest mem) */
2538 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_0
2539 ( VexGuestAMD64State
* gst
, HWord addr
)
2542 UShort
* addrS
= (UShort
*)addr
;
2543 UChar
* addrC
= (UChar
*)addr
;
2547 /* Copy the x87 registers out of the image, into a temporary
2548 Fpu_State struct. */
2549 for (i
= 0; i
< 14; i
++) tmp
.env
[i
] = 0;
2550 for (i
= 0; i
< 80; i
++) tmp
.reg
[i
] = 0;
2551 /* fill in tmp.reg[0..7] */
2552 for (stno
= 0; stno
< 8; stno
++) {
2553 UShort
* dstS
= (UShort
*)(&tmp
.reg
[10*stno
]);
2554 UShort
* srcS
= (UShort
*)(&addrS
[16 + 8*stno
]);
2561 /* fill in tmp.env[0..13] */
2562 tmp
.env
[FP_ENV_CTRL
] = addrS
[0]; /* FCW: fpu control word */
2563 tmp
.env
[FP_ENV_STAT
] = addrS
[1]; /* FCW: fpu status word */
2566 for (r
= 0; r
< 8; r
++) {
2567 if (addrC
[4] & (1<<r
))
2568 fp_tags
|= (0 << (2*r
)); /* EMPTY */
2570 fp_tags
|= (3 << (2*r
)); /* VALID -- not really precise enough. */
2572 tmp
.env
[FP_ENV_TAG
] = fp_tags
;
2574 /* Now write 'tmp' into the guest state. */
2575 VexEmNote warnX87
= do_put_x87( True
/*moveRegs*/, &tmp
, gst
);
2581 /* CALLED FROM GENERATED CODE */
2582 /* DIRTY HELPER (writes guest state, reads guest mem) */
2583 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
2584 ( VexGuestAMD64State
* gst
, HWord addr
)
2586 UShort
* addrS
= (UShort
*)addr
;
2587 UInt w32
= (((UInt
)addrS
[12]) & 0xFFFF)
2588 | ((((UInt
)addrS
[13]) & 0xFFFF) << 16);
2589 ULong w64
= amd64g_check_ldmxcsr( (ULong
)w32
);
2591 VexEmNote warnXMM
= (VexEmNote
)(w64
>> 32);
2593 gst
->guest_SSEROUND
= w64
& 0xFFFFFFFFULL
;
2598 /* VISIBLE TO LIBVEX CLIENT */
2599 /* Do FXRSTOR from the supplied address and store read values to the given
2600 VexGuestAMD64State structure.
2602 This function is not called from generated code. FXRSTOR is dealt
2603 with by the amd64 front end by calling the XRSTOR_COMPONENT_{0,1}
2604 functions above plus some in-line IR. This function is merely a
2605 convenience function for VEX's users.
2607 VexEmNote
LibVEX_GuestAMD64_fxrstor ( /*IN*/HWord fp_state
,
2608 /*MOD*/VexGuestAMD64State
* gst
)
2610 /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need
2611 to be byte-swapped. */
2612 U128
*xmm
= (U128
*)(fp_state
+ 160);
2614 vassert(host_is_little_endian());
2616 # define COPY_U128(_dst,_src) \
2617 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
2618 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
2621 COPY_U128( gst
->guest_YMM0
, xmm
[0] );
2622 COPY_U128( gst
->guest_YMM1
, xmm
[1] );
2623 COPY_U128( gst
->guest_YMM2
, xmm
[2] );
2624 COPY_U128( gst
->guest_YMM3
, xmm
[3] );
2625 COPY_U128( gst
->guest_YMM4
, xmm
[4] );
2626 COPY_U128( gst
->guest_YMM5
, xmm
[5] );
2627 COPY_U128( gst
->guest_YMM6
, xmm
[6] );
2628 COPY_U128( gst
->guest_YMM7
, xmm
[7] );
2629 COPY_U128( gst
->guest_YMM8
, xmm
[8] );
2630 COPY_U128( gst
->guest_YMM9
, xmm
[9] );
2631 COPY_U128( gst
->guest_YMM10
, xmm
[10] );
2632 COPY_U128( gst
->guest_YMM11
, xmm
[11] );
2633 COPY_U128( gst
->guest_YMM12
, xmm
[12] );
2634 COPY_U128( gst
->guest_YMM13
, xmm
[13] );
2635 COPY_U128( gst
->guest_YMM14
, xmm
[14] );
2636 COPY_U128( gst
->guest_YMM15
, xmm
[15] );
2641 = amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(gst
, fp_state
);
2643 = amd64g_dirtyhelper_XRSTOR_COMPONENT_0(gst
, fp_state
);
2645 /* Prefer an X87 emwarn over an XMM one, if both exist. */
2646 if (warnX87
!= EmNote_NONE
)
2653 /*---------------------------------------------------------------*/
2654 /*--- Supporting functions for FSAVE/FRSTOR ---*/
2655 /*---------------------------------------------------------------*/
2657 /* DIRTY HELPER (writes guest state) */
2658 /* Initialise the x87 FPU state as per 'finit'. */
2659 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State
* gst
)
2662 gst
->guest_FTOP
= 0;
2663 for (i
= 0; i
< 8; i
++) {
2664 gst
->guest_FPTAG
[i
] = 0; /* empty */
2665 gst
->guest_FPREG
[i
] = 0; /* IEEE754 64-bit zero */
2667 gst
->guest_FPROUND
= (ULong
)Irrm_NEAREST
;
2668 gst
->guest_FC3210
= 0;
2672 /* CALLED FROM GENERATED CODE */
2673 /* DIRTY HELPER (reads guest memory) */
2674 ULong
amd64g_dirtyhelper_loadF80le ( Addr addrU
)
2677 convert_f80le_to_f64le ( (UChar
*)addrU
, (UChar
*)&f64
);
2681 /* CALLED FROM GENERATED CODE */
2682 /* DIRTY HELPER (writes guest memory) */
2683 void amd64g_dirtyhelper_storeF80le ( Addr addrU
, ULong f64
)
2685 convert_f64le_to_f80le( (UChar
*)&f64
, (UChar
*)addrU
);
2689 /* CALLED FROM GENERATED CODE */
2691 /* mxcsr[15:0] contains a SSE native format MXCSR value.
2692 Extract from it the required SSEROUND value and any resulting
2693 emulation warning, and return (warn << 32) | sseround value.
2695 ULong
amd64g_check_ldmxcsr ( ULong mxcsr
)
2697 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
2698 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2699 ULong rmode
= (mxcsr
>> 13) & 3;
2701 /* Detect any required emulation warnings. */
2702 VexEmNote ew
= EmNote_NONE
;
2704 if ((mxcsr
& 0x1F80) != 0x1F80) {
2705 /* unmasked exceptions! */
2706 ew
= EmWarn_X86_sseExns
;
2709 if (mxcsr
& (1<<15)) {
2714 if (mxcsr
& (1<<6)) {
2716 ew
= EmWarn_X86_daz
;
2719 return (((ULong
)ew
) << 32) | ((ULong
)rmode
);
2723 /* CALLED FROM GENERATED CODE */
2725 /* Given sseround as an IRRoundingMode value, create a suitable SSE
2726 native format MXCSR value. */
2727 ULong
amd64g_create_mxcsr ( ULong sseround
)
2730 return 0x1F80 | (sseround
<< 13);
2735 /* fpucw[15:0] contains a x87 native format FPU control word.
2736 Extract from it the required FPROUND value and any resulting
2737 emulation warning, and return (warn << 32) | fpround value.
2739 ULong
amd64g_check_fldcw ( ULong fpucw
)
2741 /* Decide on a rounding mode. fpucw[11:10] holds it. */
2742 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2743 ULong rmode
= (fpucw
>> 10) & 3;
2745 /* Detect any required emulation warnings. */
2746 VexEmNote ew
= EmNote_NONE
;
2748 if ((fpucw
& 0x3F) != 0x3F) {
2749 /* unmasked exceptions! */
2750 ew
= EmWarn_X86_x87exns
;
2753 if (((fpucw
>> 8) & 3) != 3) {
2754 /* unsupported precision */
2755 ew
= EmWarn_X86_x87precision
;
2758 return (((ULong
)ew
) << 32) | ((ULong
)rmode
);
2763 /* Given fpround as an IRRoundingMode value, create a suitable x87
2764 native format FPU control word. */
2765 ULong
amd64g_create_fpucw ( ULong fpround
)
2768 return 0x037F | (fpround
<< 10);
2772 /* This is used to implement 'fldenv'.
2773 Reads 28 bytes at x87_state[0 .. 27]. */
2774 /* CALLED FROM GENERATED CODE */
2776 VexEmNote
amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State
* vex_state
,
2777 /*IN*/HWord x87_state
)
2779 return do_put_x87( False
, (Fpu_State
*)x87_state
, vex_state
);
2783 /* CALLED FROM GENERATED CODE */
2785 /* Create an x87 FPU env from the guest state, as close as we can
2786 approximate it. Writes 28 bytes at x87_state[0..27]. */
2787 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State
* vex_state
,
2788 /*OUT*/HWord x87_state
)
2792 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
2793 Fpu_State
* x87
= (Fpu_State
*)x87_state
;
2794 UInt ftop
= vex_state
->guest_FTOP
;
2795 ULong c3210
= vex_state
->guest_FC3210
;
2797 for (i
= 0; i
< 14; i
++)
2800 x87
->env
[1] = x87
->env
[3] = x87
->env
[5] = x87
->env
[13] = 0xFFFF;
2801 x87
->env
[FP_ENV_STAT
]
2802 = toUShort(toUInt( ((ftop
& 7) << 11) | (c3210
& 0x4700) ));
2803 x87
->env
[FP_ENV_CTRL
]
2804 = toUShort(toUInt( amd64g_create_fpucw( vex_state
->guest_FPROUND
) ));
2806 /* Compute the x87 tag word. */
2808 for (stno
= 0; stno
< 8; stno
++) {
2809 preg
= (stno
+ ftop
) & 7;
2810 if (vexTags
[preg
] == 0) {
2811 /* register is empty */
2812 tagw
|= (3 << (2*preg
));
2814 /* register is full. */
2815 tagw
|= (0 << (2*preg
));
2818 x87
->env
[FP_ENV_TAG
] = toUShort(tagw
);
2820 /* We don't dump the x87 registers, tho. */
2824 /* This is used to implement 'fnsave'.
2825 Writes 108 bytes at x87_state[0 .. 107]. */
2826 /* CALLED FROM GENERATED CODE */
2828 void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State
* vex_state
,
2829 /*OUT*/HWord x87_state
)
2831 do_get_x87( vex_state
, (Fpu_State
*)x87_state
);
2835 /* This is used to implement 'fnsaves'.
2836 Writes 94 bytes at x87_state[0 .. 93]. */
2837 /* CALLED FROM GENERATED CODE */
2839 void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State
* vex_state
,
2840 /*OUT*/HWord x87_state
)
2844 ULong
* vexRegs
= (ULong
*)(&vex_state
->guest_FPREG
[0]);
2845 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
2846 Fpu_State_16
* x87
= (Fpu_State_16
*)x87_state
;
2847 UInt ftop
= vex_state
->guest_FTOP
;
2848 UInt c3210
= vex_state
->guest_FC3210
;
2850 for (i
= 0; i
< 7; i
++)
2853 x87
->env
[FPS_ENV_STAT
]
2854 = toUShort(((ftop
& 7) << 11) | (c3210
& 0x4700));
2855 x87
->env
[FPS_ENV_CTRL
]
2856 = toUShort(amd64g_create_fpucw( vex_state
->guest_FPROUND
));
2858 /* Dump the register stack in ST order. */
2860 for (stno
= 0; stno
< 8; stno
++) {
2861 preg
= (stno
+ ftop
) & 7;
2862 if (vexTags
[preg
] == 0) {
2863 /* register is empty */
2864 tagw
|= (3 << (2*preg
));
2865 convert_f64le_to_f80le( (UChar
*)&vexRegs
[preg
],
2866 &x87
->reg
[10*stno
] );
2868 /* register is full. */
2869 tagw
|= (0 << (2*preg
));
2870 convert_f64le_to_f80le( (UChar
*)&vexRegs
[preg
],
2871 &x87
->reg
[10*stno
] );
2874 x87
->env
[FPS_ENV_TAG
] = toUShort(tagw
);
2878 /* This is used to implement 'frstor'.
2879 Reads 108 bytes at x87_state[0 .. 107]. */
2880 /* CALLED FROM GENERATED CODE */
2882 VexEmNote
amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State
* vex_state
,
2883 /*IN*/HWord x87_state
)
2885 return do_put_x87( True
, (Fpu_State
*)x87_state
, vex_state
);
2889 /* This is used to implement 'frstors'.
2890 Reads 94 bytes at x87_state[0 .. 93]. */
2891 /* CALLED FROM GENERATED CODE */
2893 VexEmNote
amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State
* vex_state
,
2894 /*IN*/HWord x87_state
)
2898 ULong
* vexRegs
= (ULong
*)(&vex_state
->guest_FPREG
[0]);
2899 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
2900 Fpu_State_16
* x87
= (Fpu_State_16
*)x87_state
;
2901 UInt ftop
= (x87
->env
[FPS_ENV_STAT
] >> 11) & 7;
2902 UInt tagw
= x87
->env
[FPS_ENV_TAG
];
2903 UInt fpucw
= x87
->env
[FPS_ENV_CTRL
];
2904 UInt c3210
= x87
->env
[FPS_ENV_STAT
] & 0x4700;
2909 /* Copy registers and tags */
2910 for (stno
= 0; stno
< 8; stno
++) {
2911 preg
= (stno
+ ftop
) & 7;
2912 tag
= (tagw
>> (2*preg
)) & 3;
2914 /* register is empty */
2915 /* hmm, if it's empty, does it still get written? Probably
2916 safer to say it does. If we don't, memcheck could get out
2917 of sync, in that it thinks all FP registers are defined by
2918 this helper, but in reality some have not been updated. */
2919 vexRegs
[preg
] = 0; /* IEEE754 64-bit zero */
2922 /* register is non-empty */
2923 convert_f80le_to_f64le( &x87
->reg
[10*stno
],
2924 (UChar
*)&vexRegs
[preg
] );
2930 vex_state
->guest_FTOP
= ftop
;
2933 vex_state
->guest_FC3210
= c3210
;
2935 /* handle the control word, setting FPROUND and detecting any
2936 emulation warnings. */
2937 pair
= amd64g_check_fldcw ( (ULong
)fpucw
);
2938 fpround
= (UInt
)pair
& 0xFFFFFFFFULL
;
2939 ew
= (VexEmNote
)(pair
>> 32);
2941 vex_state
->guest_FPROUND
= fpround
& 3;
2943 /* emulation warnings --> caller */
2948 /*---------------------------------------------------------------*/
2949 /*--- CPUID helpers. ---*/
2950 /*---------------------------------------------------------------*/
2952 /* Claim to be the following CPU, which is probably representative of
2953 the lowliest (earliest) amd64 offerings. It can do neither sse3
2956 vendor_id : AuthenticAMD
2959 model name : AMD Opteron (tm) Processor 848
2962 cache size : 1024 KB
2967 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2968 mtrr pge mca cmov pat pse36 clflush mmx fxsr
2969 sse sse2 syscall nx mmxext lm 3dnowext 3dnow
2971 TLB size : 1088 4K pages
2973 cache_alignment : 64
2974 address sizes : 40 bits physical, 48 bits virtual
2975 power management: ts fid vid ttp
2977 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact
2978 we don't support them. See #291568. 3dnow is 80000001.EDX.31
2979 and 3dnowext is 80000001.EDX.30.
2981 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State
* st
)
2983 # define SET_ABCD(_a,_b,_c,_d) \
2984 do { st->guest_RAX = (ULong)(_a); \
2985 st->guest_RBX = (ULong)(_b); \
2986 st->guest_RCX = (ULong)(_c); \
2987 st->guest_RDX = (ULong)(_d); \
2990 switch (0xFFFFFFFF & st
->guest_RAX
) {
2992 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
2995 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
2998 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
3001 /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is
3002 the original it-is-supported value that the h/w provides.
3004 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/
3008 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
3011 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
3014 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3017 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
3020 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
3023 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
3026 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
3029 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3036 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
3039 vendor_id : GenuineIntel
3042 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
3045 cache size : 4096 KB
3054 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
3055 mtrr pge mca cmov pat pse36 clflush dts acpi
3056 mmx fxsr sse sse2 ss ht tm syscall nx lm
3057 constant_tsc pni monitor ds_cpl vmx est tm2
3061 cache_alignment : 64
3062 address sizes : 36 bits physical, 48 bits virtual
3065 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State
* st
)
3067 # define SET_ABCD(_a,_b,_c,_d) \
3068 do { st->guest_RAX = (ULong)(_a); \
3069 st->guest_RBX = (ULong)(_b); \
3070 st->guest_RCX = (ULong)(_c); \
3071 st->guest_RDX = (ULong)(_d); \
3074 switch (0xFFFFFFFF & st
->guest_RAX
) {
3076 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
3079 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
3082 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
3085 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3088 switch (0xFFFFFFFF & st
->guest_RCX
) {
3089 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
3090 0x0000003f, 0x00000001); break;
3091 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
3092 0x0000003f, 0x00000001); break;
3093 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
3094 0x00000fff, 0x00000001); break;
3095 default: SET_ABCD(0x00000000, 0x00000000,
3096 0x00000000, 0x00000000); break;
3101 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
3104 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
3107 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3110 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
3113 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3116 unhandled_eax_value
:
3117 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
3120 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3123 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
3126 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3129 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
3132 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
3135 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3138 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
3141 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3144 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3147 goto unhandled_eax_value
;
3153 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
3156 vendor_id : GenuineIntel
3159 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz
3162 cache size : 4096 KB
3173 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
3174 mtrr pge mca cmov pat pse36 clflush dts acpi
3175 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
3176 lm constant_tsc arch_perfmon pebs bts rep_good
3177 xtopology nonstop_tsc aperfmperf pni pclmulqdq
3178 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
3179 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
3180 arat tpr_shadow vnmi flexpriority ept vpid
3183 cache_alignment : 64
3184 address sizes : 36 bits physical, 48 bits virtual
3187 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State
* st
)
3189 # define SET_ABCD(_a,_b,_c,_d) \
3190 do { st->guest_RAX = (ULong)(_a); \
3191 st->guest_RBX = (ULong)(_b); \
3192 st->guest_RCX = (ULong)(_c); \
3193 st->guest_RDX = (ULong)(_d); \
3196 UInt old_eax
= (UInt
)st
->guest_RAX
;
3197 UInt old_ecx
= (UInt
)st
->guest_RCX
;
3201 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
3204 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff);
3207 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
3210 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3214 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3215 0x0000003f, 0x00000000); break;
3216 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
3217 0x0000007f, 0x00000000); break;
3218 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3219 0x000001ff, 0x00000000); break;
3220 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
3221 0x00000fff, 0x00000002); break;
3222 default: SET_ABCD(0x00000000, 0x00000000,
3223 0x00000000, 0x00000000); break;
3227 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
3230 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
3233 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3236 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3239 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3242 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
3247 SET_ABCD(0x00000001, 0x00000002,
3248 0x00000100, 0x00000000); break;
3250 SET_ABCD(0x00000004, 0x00000004,
3251 0x00000201, 0x00000000); break;
3253 SET_ABCD(0x00000000, 0x00000000,
3254 old_ecx
, 0x00000000); break;
3258 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
3262 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
3263 0x00000100, 0x00000000); break;
3264 case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
3265 0x00000201, 0x00000000); break;
3266 default: SET_ABCD(0x00000000, 0x00000000,
3267 old_ecx
, 0x00000000); break;
3271 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3274 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
3277 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3280 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
3283 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
3286 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3289 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3292 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3295 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3298 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
3305 /* Claim to be the following CPU (4 x ...), which is AVX and cx16
3306 capable. Plus (kludge!) it "supports" HTM.
3308 Also with the following change: claim that XSaveOpt is not
3309 available, by cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1
3310 on the real CPU. Consequently, programs that correctly observe
3311 these CPUID values should only try to use 3 of the 8 XSave-family
3312 instructions: XGETBV, XSAVE and XRSTOR. In particular this avoids
3313 having to implement the compacted or optimised save/restore
3316 vendor_id : GenuineIntel
3319 model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz
3322 cache size : 6144 KB
3333 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
3334 mtrr pge mca cmov pat pse36 clflush dts acpi
3335 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
3336 lm constant_tsc arch_perfmon pebs bts rep_good
3337 nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq
3338 dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16
3339 xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx
3340 lahf_lm ida arat epb xsaveopt pln pts dts
3341 tpr_shadow vnmi flexpriority ept vpid
3345 cache_alignment : 64
3346 address sizes : 36 bits physical, 48 bits virtual
3349 void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State
* st
,
3350 ULong hasF16C
, ULong hasRDRAND
,
3353 vassert((hasF16C
>> 1) == 0ULL);
3354 vassert((hasRDRAND
>> 1) == 0ULL);
3355 # define SET_ABCD(_a,_b,_c,_d) \
3356 do { st->guest_RAX = (ULong)(_a); \
3357 st->guest_RBX = (ULong)(_b); \
3358 st->guest_RCX = (ULong)(_c); \
3359 st->guest_RDX = (ULong)(_d); \
3362 UInt old_eax
= (UInt
)st
->guest_RAX
;
3363 UInt old_ecx
= (UInt
)st
->guest_RCX
;
3367 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
3370 // As a baseline, advertise neither F16C (ecx:29) nor RDRAND (ecx:30),
3371 // but patch in support for them as directed by the caller.
3373 = (hasF16C
? (1U << 29) : 0) | (hasRDRAND
? (1U << 30) : 0);
3374 SET_ABCD(0x000206a7, 0x00100800, (0x1f9ae3bf | ecx_extra
), 0xbfebfbff);
3378 SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000);
3381 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3385 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3386 0x0000003f, 0x00000000); break;
3387 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
3388 0x0000003f, 0x00000000); break;
3389 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3390 0x000001ff, 0x00000000); break;
3391 case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f,
3392 0x00001fff, 0x00000006); break;
3393 default: SET_ABCD(0x00000000, 0x00000000,
3394 0x00000000, 0x00000000); break;
3398 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
3401 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
3406 ebx_extra
= hasRDSEED
? (1U << 18) : 0;
3407 SET_ABCD(0x00000000, 0x00000800 | ebx_extra
, 0x00000000,
3412 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3415 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3418 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
3423 SET_ABCD(0x00000001, 0x00000001,
3424 0x00000100, 0x00000000); break;
3426 SET_ABCD(0x00000004, 0x00000004,
3427 0x00000201, 0x00000000); break;
3429 SET_ABCD(0x00000000, 0x00000000,
3430 old_ecx
, 0x00000000); break;
3434 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3438 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
3439 0x00000340, 0x00000000); break;
3440 case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
3441 0x00000000, 0x00000000); break;
3442 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
3443 0x00000000, 0x00000000); break;
3444 default: SET_ABCD(0x00000000, 0x00000000,
3445 0x00000000, 0x00000000); break;
3449 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3452 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3455 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3458 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
3461 SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c);
3464 SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d);
3467 SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847);
3470 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3473 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3476 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3479 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3482 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3489 /* Claim to be the following CPU (4 x ...), which is AVX2 capable.
3491 With the following change: claim that XSaveOpt is not available, by
3492 cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1 on the real
3493 CPU. Consequently, programs that correctly observe these CPUID
3494 values should only try to use 3 of the 8 XSave-family instructions:
3495 XGETBV, XSAVE and XRSTOR. In particular this avoids having to
3496 implement the compacted or optimised save/restore variants.
3498 vendor_id : GenuineIntel
3501 model name : Intel(R) Core(TM) i7-4910MQ CPU @ 2.90GHz
3505 cache size : 8192 KB
3516 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca
3517 cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
3518 tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc
3519 arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc
3520 aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl
3521 vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1
3522 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave
3523 avx f16c rdrand lahf_lm abm ida arat epb pln pts dtherm
3524 tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust
3525 bmi1 avx2 smep bmi2 erms invpcid xsaveopt
3529 cache_alignment : 64
3530 address sizes : 39 bits physical, 48 bits virtual
3533 void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State
* st
,
3534 ULong hasF16C
, ULong hasRDRAND
,
3537 vassert((hasF16C
>> 1) == 0ULL);
3538 vassert((hasRDRAND
>> 1) == 0ULL);
3539 # define SET_ABCD(_a,_b,_c,_d) \
3540 do { st->guest_RAX = (ULong)(_a); \
3541 st->guest_RBX = (ULong)(_b); \
3542 st->guest_RCX = (ULong)(_c); \
3543 st->guest_RDX = (ULong)(_d); \
3546 UInt old_eax
= (UInt
)st
->guest_RAX
;
3547 UInt old_ecx
= (UInt
)st
->guest_RCX
;
3551 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
3554 // As a baseline, advertise neither F16C (ecx:29) nor RDRAND (ecx:30),
3555 // but patch in support for them as directed by the caller.
3557 = (hasF16C
? (1U << 29) : 0) | (hasRDRAND
? (1U << 30) : 0);
3558 SET_ABCD(0x000306c3, 0x02100800, (0x1ffafbff | ecx_extra
), 0xbfebfbff);
3562 SET_ABCD(0x76036301, 0x00f0b6ff, 0x00000000, 0x00c10000);
3565 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3569 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3570 0x0000003f, 0x00000000); break;
3571 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
3572 0x0000003f, 0x00000000); break;
3573 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3574 0x000001ff, 0x00000000); break;
3575 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
3576 0x00001fff, 0x00000006); break;
3577 default: SET_ABCD(0x00000000, 0x00000000,
3578 0x00000000, 0x00000000); break;
3582 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00042120);
3585 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
3589 /* Don't advertise FSGSBASE support, bit 0 in EBX. */
3592 UInt ebx_extra
= hasRDSEED
? (1U << 18) : 0;
3593 SET_ABCD(0x00000000, 0x000027aa | ebx_extra
,
3594 0x00000000, 0x00000000); break;
3596 default: SET_ABCD(0x00000000, 0x00000000,
3597 0x00000000, 0x00000000); break;
3601 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3604 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3607 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
3611 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
3612 0x00000100, 0x00000002); break;
3613 case 0x00000001: SET_ABCD(0x00000004, 0x00000008,
3614 0x00000201, 0x00000002); break;
3615 default: SET_ABCD(0x00000000, 0x00000000,
3616 old_ecx
, 0x00000002); break;
3620 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3624 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
3625 0x00000340, 0x00000000); break;
3626 case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
3627 0x00000000, 0x00000000); break;
3628 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
3629 0x00000000, 0x00000000); break;
3630 default: SET_ABCD(0x00000000, 0x00000000,
3631 0x00000000, 0x00000000); break;
3635 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3638 SET_ABCD(0x00000000, 0x00000000, 0x00000021, 0x2c100800);
3641 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3644 SET_ABCD(0x37692029, 0x3139342d, 0x20514d30, 0x20555043);
3647 SET_ABCD(0x2e322040, 0x48473039, 0x0000007a, 0x00000000);
3650 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3653 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3656 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3659 SET_ABCD(0x00003027, 0x00000000, 0x00000000, 0x00000000);
3662 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3669 /*---------------------------------------------------------------*/
3670 /*--- Misc integer helpers, including rotates and crypto. ---*/
3671 /*---------------------------------------------------------------*/
3673 ULong
amd64g_calculate_RCR ( ULong arg
,
3678 Bool wantRflags
= toBool(szIN
< 0);
3679 ULong sz
= wantRflags
? (-szIN
) : szIN
;
3680 ULong tempCOUNT
= rot_amt
& (sz
== 8 ? 0x3F : 0x1F);
3681 ULong cf
=0, of
=0, tempcf
;
3685 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3686 of
= ((arg
>> 63) ^ cf
) & 1;
3687 while (tempCOUNT
> 0) {
3689 arg
= (arg
>> 1) | (cf
<< 63);
3695 while (tempCOUNT
>= 33) tempCOUNT
-= 33;
3696 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3697 of
= ((arg
>> 31) ^ cf
) & 1;
3698 while (tempCOUNT
> 0) {
3700 arg
= ((arg
>> 1) & 0x7FFFFFFFULL
) | (cf
<< 31);
3706 while (tempCOUNT
>= 17) tempCOUNT
-= 17;
3707 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3708 of
= ((arg
>> 15) ^ cf
) & 1;
3709 while (tempCOUNT
> 0) {
3711 arg
= ((arg
>> 1) & 0x7FFFULL
) | (cf
<< 15);
3717 while (tempCOUNT
>= 9) tempCOUNT
-= 9;
3718 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3719 of
= ((arg
>> 7) ^ cf
) & 1;
3720 while (tempCOUNT
> 0) {
3722 arg
= ((arg
>> 1) & 0x7FULL
) | (cf
<< 7);
3728 vpanic("calculate_RCR(amd64g): invalid size");
3733 rflags_in
&= ~(AMD64G_CC_MASK_C
| AMD64G_CC_MASK_O
);
3734 rflags_in
|= (cf
<< AMD64G_CC_SHIFT_C
) | (of
<< AMD64G_CC_SHIFT_O
);
3736 /* caller can ask to have back either the resulting flags or
3737 resulting value, but not both */
3738 return wantRflags
? rflags_in
: arg
;
3741 ULong
amd64g_calculate_RCL ( ULong arg
,
3746 Bool wantRflags
= toBool(szIN
< 0);
3747 ULong sz
= wantRflags
? (-szIN
) : szIN
;
3748 ULong tempCOUNT
= rot_amt
& (sz
== 8 ? 0x3F : 0x1F);
3749 ULong cf
=0, of
=0, tempcf
;
3753 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3754 while (tempCOUNT
> 0) {
3755 tempcf
= (arg
>> 63) & 1;
3756 arg
= (arg
<< 1) | (cf
& 1);
3760 of
= ((arg
>> 63) ^ cf
) & 1;
3763 while (tempCOUNT
>= 33) tempCOUNT
-= 33;
3764 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3765 while (tempCOUNT
> 0) {
3766 tempcf
= (arg
>> 31) & 1;
3767 arg
= 0xFFFFFFFFULL
& ((arg
<< 1) | (cf
& 1));
3771 of
= ((arg
>> 31) ^ cf
) & 1;
3774 while (tempCOUNT
>= 17) tempCOUNT
-= 17;
3775 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3776 while (tempCOUNT
> 0) {
3777 tempcf
= (arg
>> 15) & 1;
3778 arg
= 0xFFFFULL
& ((arg
<< 1) | (cf
& 1));
3782 of
= ((arg
>> 15) ^ cf
) & 1;
3785 while (tempCOUNT
>= 9) tempCOUNT
-= 9;
3786 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3787 while (tempCOUNT
> 0) {
3788 tempcf
= (arg
>> 7) & 1;
3789 arg
= 0xFFULL
& ((arg
<< 1) | (cf
& 1));
3793 of
= ((arg
>> 7) ^ cf
) & 1;
3796 vpanic("calculate_RCL(amd64g): invalid size");
3801 rflags_in
&= ~(AMD64G_CC_MASK_C
| AMD64G_CC_MASK_O
);
3802 rflags_in
|= (cf
<< AMD64G_CC_SHIFT_C
) | (of
<< AMD64G_CC_SHIFT_O
);
3804 return wantRflags
? rflags_in
: arg
;
3807 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
3808 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
3810 ULong
amd64g_calculate_pclmul(ULong a
, ULong b
, ULong which
)
3812 ULong hi
, lo
, tmp
, A
[16];
3815 A
[2] = A
[1] << 1; A
[3] = A
[2] ^ a
;
3816 A
[4] = A
[2] << 1; A
[5] = A
[4] ^ a
;
3817 A
[6] = A
[3] << 1; A
[7] = A
[6] ^ a
;
3818 A
[8] = A
[4] << 1; A
[9] = A
[8] ^ a
;
3819 A
[10] = A
[5] << 1; A
[11] = A
[10] ^ a
;
3820 A
[12] = A
[6] << 1; A
[13] = A
[12] ^ a
;
3821 A
[14] = A
[7] << 1; A
[15] = A
[14] ^ a
;
3823 lo
= (A
[b
>> 60] << 4) ^ A
[(b
>> 56) & 15];
3825 lo
= (lo
<< 8) ^ (A
[(b
>> 52) & 15] << 4) ^ A
[(b
>> 48) & 15];
3826 hi
= (hi
<< 8) | (lo
>> 56);
3827 lo
= (lo
<< 8) ^ (A
[(b
>> 44) & 15] << 4) ^ A
[(b
>> 40) & 15];
3828 hi
= (hi
<< 8) | (lo
>> 56);
3829 lo
= (lo
<< 8) ^ (A
[(b
>> 36) & 15] << 4) ^ A
[(b
>> 32) & 15];
3830 hi
= (hi
<< 8) | (lo
>> 56);
3831 lo
= (lo
<< 8) ^ (A
[(b
>> 28) & 15] << 4) ^ A
[(b
>> 24) & 15];
3832 hi
= (hi
<< 8) | (lo
>> 56);
3833 lo
= (lo
<< 8) ^ (A
[(b
>> 20) & 15] << 4) ^ A
[(b
>> 16) & 15];
3834 hi
= (hi
<< 8) | (lo
>> 56);
3835 lo
= (lo
<< 8) ^ (A
[(b
>> 12) & 15] << 4) ^ A
[(b
>> 8) & 15];
3836 hi
= (hi
<< 8) | (lo
>> 56);
3837 lo
= (lo
<< 8) ^ (A
[(b
>> 4) & 15] << 4) ^ A
[b
& 15];
3841 tmp
= -((a
>> 63) & 1); tmp
&= ((b
& (m0
* 0xfe)) >> 1); hi
= hi
^ tmp
;
3842 tmp
= -((a
>> 62) & 1); tmp
&= ((b
& (m0
* 0xfc)) >> 2); hi
= hi
^ tmp
;
3843 tmp
= -((a
>> 61) & 1); tmp
&= ((b
& (m0
* 0xf8)) >> 3); hi
= hi
^ tmp
;
3844 tmp
= -((a
>> 60) & 1); tmp
&= ((b
& (m0
* 0xf0)) >> 4); hi
= hi
^ tmp
;
3845 tmp
= -((a
>> 59) & 1); tmp
&= ((b
& (m0
* 0xe0)) >> 5); hi
= hi
^ tmp
;
3846 tmp
= -((a
>> 58) & 1); tmp
&= ((b
& (m0
* 0xc0)) >> 6); hi
= hi
^ tmp
;
3847 tmp
= -((a
>> 57) & 1); tmp
&= ((b
& (m0
* 0x80)) >> 7); hi
= hi
^ tmp
;
3849 return which
? hi
: lo
;
3853 /* CALLED FROM GENERATED CODE */
3854 /* DIRTY HELPER (non-referentially-transparent) */
3855 /* Horrible hack. On non-amd64 platforms, return 1. */
3856 ULong
amd64g_dirtyhelper_RDTSC ( void )
3858 # if defined(__x86_64__)
3860 __asm__
__volatile__("rdtsc" : "=a" (eax
), "=d" (edx
));
3861 return (((ULong
)edx
) << 32) | ((ULong
)eax
);
3867 /* CALLED FROM GENERATED CODE */
3868 /* DIRTY HELPER (non-referentially-transparent) */
3869 /* Horrible hack. On non-amd64 platforms, return 1. */
3870 /* This uses a different calling convention from _RDTSC just above
3871 only because of the difficulty of returning 96 bits from a C
3872 function -- RDTSC returns 64 bits and so is simple by comparison,
3874 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State
* st
)
3876 # if defined(__x86_64__)
3878 __asm__
__volatile__("rdtscp" : "=a" (eax
), "=d" (edx
), "=c" (ecx
));
3879 st
->guest_RAX
= (ULong
)eax
;
3880 st
->guest_RCX
= (ULong
)ecx
;
3881 st
->guest_RDX
= (ULong
)edx
;
3887 /* CALLED FROM GENERATED CODE */
3888 /* DIRTY HELPER (non-referentially-transparent) */
3889 /* Horrible hack. On non-amd64 platforms, return 0. */
3890 ULong
amd64g_dirtyhelper_IN ( ULong portno
, ULong sz
/*1,2 or 4*/ )
3892 # if defined(__x86_64__)
3897 __asm__
__volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
3898 : "=a" (r
) : "Nd" (portno
));
3901 __asm__
__volatile__("movq $0,%%rax; inw %w1,%w0"
3902 : "=a" (r
) : "Nd" (portno
));
3905 __asm__
__volatile__("movq $0,%%rax; inb %w1,%b0"
3906 : "=a" (r
) : "Nd" (portno
));
3909 break; /* note: no 64-bit version of insn exists */
3918 /* CALLED FROM GENERATED CODE */
3919 /* DIRTY HELPER (non-referentially-transparent) */
3920 /* Horrible hack. On non-amd64 platforms, do nothing. */
3921 void amd64g_dirtyhelper_OUT ( ULong portno
, ULong data
, ULong sz
/*1,2 or 4*/ )
3923 # if defined(__x86_64__)
3927 __asm__
__volatile__("movq %0,%%rax; outl %%eax, %w1"
3928 : : "a" (data
), "Nd" (portno
));
3931 __asm__
__volatile__("outw %w0, %w1"
3932 : : "a" (data
), "Nd" (portno
));
3935 __asm__
__volatile__("outb %b0, %w1"
3936 : : "a" (data
), "Nd" (portno
));
3939 break; /* note: no 64-bit version of insn exists */
3946 /* CALLED FROM GENERATED CODE */
3947 /* DIRTY HELPER (non-referentially-transparent) */
3948 /* Horrible hack. On non-amd64 platforms, do nothing. */
3949 /* op = 0: call the native SGDT instruction.
3950 op = 1: call the native SIDT instruction.
3952 void amd64g_dirtyhelper_SxDT ( void *address
, ULong op
) {
3953 # if defined(__x86_64__)
3956 __asm__
__volatile__("sgdt (%0)" : : "r" (address
) : "memory");
3959 __asm__
__volatile__("sidt (%0)" : : "r" (address
) : "memory");
3962 vpanic("amd64g_dirtyhelper_SxDT");
3966 UChar
* p
= (UChar
*)address
;
3967 p
[0] = p
[1] = p
[2] = p
[3] = p
[4] = p
[5] = 0;
3968 p
[6] = p
[7] = p
[8] = p
[9] = 0;
3972 /* CALLED FROM GENERATED CODE */
3973 /* DIRTY HELPER (non-referentially-transparent) */
3974 /* Horrible hack. On non-amd64 platforms, do nothing. On amd64 targets, get a
3975 32 bit random number using RDRAND, and return it and the associated rflags.C
3977 ULong
amd64g_dirtyhelper_RDRAND ( void ) {
3978 # if defined(__x86_64__)
3981 __asm__
__volatile__(
3988 : "=r"(res
), "=r"(cflag
) : : "r11", "r12"
3990 res
&= 0xFFFFFFFFULL
;
3992 return (cflag
<< 32) | res
;
3994 /* There's nothing we can sensibly do. Return a value denoting
3995 "I succeeded, and the random bits are all zero" :-/ */
4000 ULong
amd64g_dirtyhelper_RDSEED ( void ) {
4001 # if defined(__x86_64__)
4004 __asm__
__volatile__(
4011 : "=r"(res
), "=r"(cflag
) : : "r11", "r12"
4013 res
&= 0xFFFFFFFFULL
;
4015 return (cflag
<< 32) | res
;
4017 /* There's nothing we can sensibly do. Return a value denoting
4018 "I succeeded, and the random bits are all zero" :-/ */
4023 /*---------------------------------------------------------------*/
4024 /*--- Helpers for MMX/SSE/SSE2. ---*/
4025 /*---------------------------------------------------------------*/
4027 static inline UChar
abdU8 ( UChar xx
, UChar yy
) {
4028 return toUChar(xx
>yy
? xx
-yy
: yy
-xx
);
4031 static inline ULong
mk32x2 ( UInt w1
, UInt w0
) {
4032 return (((ULong
)w1
) << 32) | ((ULong
)w0
);
4035 static inline UShort
sel16x4_3 ( ULong w64
) {
4036 UInt hi32
= toUInt(w64
>> 32);
4037 return toUShort(hi32
>> 16);
4039 static inline UShort
sel16x4_2 ( ULong w64
) {
4040 UInt hi32
= toUInt(w64
>> 32);
4041 return toUShort(hi32
);
4043 static inline UShort
sel16x4_1 ( ULong w64
) {
4044 UInt lo32
= toUInt(w64
);
4045 return toUShort(lo32
>> 16);
4047 static inline UShort
sel16x4_0 ( ULong w64
) {
4048 UInt lo32
= toUInt(w64
);
4049 return toUShort(lo32
);
4052 static inline UChar
sel8x8_7 ( ULong w64
) {
4053 UInt hi32
= toUInt(w64
>> 32);
4054 return toUChar(hi32
>> 24);
4056 static inline UChar
sel8x8_6 ( ULong w64
) {
4057 UInt hi32
= toUInt(w64
>> 32);
4058 return toUChar(hi32
>> 16);
4060 static inline UChar
sel8x8_5 ( ULong w64
) {
4061 UInt hi32
= toUInt(w64
>> 32);
4062 return toUChar(hi32
>> 8);
4064 static inline UChar
sel8x8_4 ( ULong w64
) {
4065 UInt hi32
= toUInt(w64
>> 32);
4066 return toUChar(hi32
>> 0);
4068 static inline UChar
sel8x8_3 ( ULong w64
) {
4069 UInt lo32
= toUInt(w64
);
4070 return toUChar(lo32
>> 24);
4072 static inline UChar
sel8x8_2 ( ULong w64
) {
4073 UInt lo32
= toUInt(w64
);
4074 return toUChar(lo32
>> 16);
4076 static inline UChar
sel8x8_1 ( ULong w64
) {
4077 UInt lo32
= toUInt(w64
);
4078 return toUChar(lo32
>> 8);
4080 static inline UChar
sel8x8_0 ( ULong w64
) {
4081 UInt lo32
= toUInt(w64
);
4082 return toUChar(lo32
>> 0);
4085 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4086 ULong
amd64g_calculate_mmx_pmaddwd ( ULong xx
, ULong yy
)
4090 (((Int
)(Short
)sel16x4_3(xx
)) * ((Int
)(Short
)sel16x4_3(yy
)))
4091 + (((Int
)(Short
)sel16x4_2(xx
)) * ((Int
)(Short
)sel16x4_2(yy
))),
4092 (((Int
)(Short
)sel16x4_1(xx
)) * ((Int
)(Short
)sel16x4_1(yy
)))
4093 + (((Int
)(Short
)sel16x4_0(xx
)) * ((Int
)(Short
)sel16x4_0(yy
)))
4097 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4098 ULong
amd64g_calculate_mmx_psadbw ( ULong xx
, ULong yy
)
4101 t
+= (UInt
)abdU8( sel8x8_7(xx
), sel8x8_7(yy
) );
4102 t
+= (UInt
)abdU8( sel8x8_6(xx
), sel8x8_6(yy
) );
4103 t
+= (UInt
)abdU8( sel8x8_5(xx
), sel8x8_5(yy
) );
4104 t
+= (UInt
)abdU8( sel8x8_4(xx
), sel8x8_4(yy
) );
4105 t
+= (UInt
)abdU8( sel8x8_3(xx
), sel8x8_3(yy
) );
4106 t
+= (UInt
)abdU8( sel8x8_2(xx
), sel8x8_2(yy
) );
4107 t
+= (UInt
)abdU8( sel8x8_1(xx
), sel8x8_1(yy
) );
4108 t
+= (UInt
)abdU8( sel8x8_0(xx
), sel8x8_0(yy
) );
4113 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4114 ULong
amd64g_calculate_sse_phminposuw ( ULong sLo
, ULong sHi
)
4118 t
= sel16x4_0(sLo
); if (True
) { min
= t
; idx
= 0; }
4119 t
= sel16x4_1(sLo
); if (t
< min
) { min
= t
; idx
= 1; }
4120 t
= sel16x4_2(sLo
); if (t
< min
) { min
= t
; idx
= 2; }
4121 t
= sel16x4_3(sLo
); if (t
< min
) { min
= t
; idx
= 3; }
4122 t
= sel16x4_0(sHi
); if (t
< min
) { min
= t
; idx
= 4; }
4123 t
= sel16x4_1(sHi
); if (t
< min
) { min
= t
; idx
= 5; }
4124 t
= sel16x4_2(sHi
); if (t
< min
) { min
= t
; idx
= 6; }
4125 t
= sel16x4_3(sHi
); if (t
< min
) { min
= t
; idx
= 7; }
4126 return ((ULong
)(idx
<< 16)) | ((ULong
)min
);
4129 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4130 ULong
amd64g_calc_crc32b ( ULong crcIn
, ULong b
)
4133 ULong crc
= (b
& 0xFFULL
) ^ crcIn
;
4134 for (i
= 0; i
< 8; i
++)
4135 crc
= (crc
>> 1) ^ ((crc
& 1) ? 0x82f63b78ULL
: 0);
4139 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4140 ULong
amd64g_calc_crc32w ( ULong crcIn
, ULong w
)
4143 ULong crc
= (w
& 0xFFFFULL
) ^ crcIn
;
4144 for (i
= 0; i
< 16; i
++)
4145 crc
= (crc
>> 1) ^ ((crc
& 1) ? 0x82f63b78ULL
: 0);
4149 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4150 ULong
amd64g_calc_crc32l ( ULong crcIn
, ULong l
)
4153 ULong crc
= (l
& 0xFFFFFFFFULL
) ^ crcIn
;
4154 for (i
= 0; i
< 32; i
++)
4155 crc
= (crc
>> 1) ^ ((crc
& 1) ? 0x82f63b78ULL
: 0);
4159 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4160 ULong
amd64g_calc_crc32q ( ULong crcIn
, ULong q
)
4162 ULong crc
= amd64g_calc_crc32l(crcIn
, q
);
4163 return amd64g_calc_crc32l(crc
, q
>> 32);
4167 /* .. helper for next fn .. */
4168 static inline ULong
sad_8x4 ( ULong xx
, ULong yy
)
4171 t
+= (UInt
)abdU8( sel8x8_3(xx
), sel8x8_3(yy
) );
4172 t
+= (UInt
)abdU8( sel8x8_2(xx
), sel8x8_2(yy
) );
4173 t
+= (UInt
)abdU8( sel8x8_1(xx
), sel8x8_1(yy
) );
4174 t
+= (UInt
)abdU8( sel8x8_0(xx
), sel8x8_0(yy
) );
4178 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4179 ULong
amd64g_calc_mpsadbw ( ULong sHi
, ULong sLo
,
4180 ULong dHi
, ULong dLo
,
4181 ULong imm_and_return_control_bit
)
4183 UInt imm8
= imm_and_return_control_bit
& 7;
4184 Bool calcHi
= (imm_and_return_control_bit
>> 7) & 1;
4185 UInt srcOffsL
= imm8
& 3; /* src offs in 32-bit (L) chunks */
4186 UInt dstOffsL
= (imm8
>> 2) & 1; /* dst offs in ditto chunks */
4187 /* For src we only need 32 bits, so get them into the
4188 lower half of a 64 bit word. */
4189 ULong src
= ((srcOffsL
& 2) ? sHi
: sLo
) >> (32 * (srcOffsL
& 1));
4190 /* For dst we need to get hold of 56 bits (7 bytes) from a total of
4191 11 bytes. If calculating the low part of the result, need bytes
4192 dstOffsL * 4 + (0 .. 6); if calculating the high part,
4193 dstOffsL * 4 + (4 .. 10). */
4195 /* dstOffL = 0, Lo -> 0 .. 6
4196 dstOffL = 1, Lo -> 4 .. 10
4197 dstOffL = 0, Hi -> 4 .. 10
4198 dstOffL = 1, Hi -> 8 .. 14
4200 if (calcHi
&& dstOffsL
) {
4202 dst
= dHi
& 0x00FFFFFFFFFFFFFFULL
;
4204 else if (!calcHi
&& !dstOffsL
) {
4206 dst
= dLo
& 0x00FFFFFFFFFFFFFFULL
;
4210 dst
= (dLo
>> 32) | ((dHi
& 0x00FFFFFFULL
) << 32);
4212 ULong r0
= sad_8x4( dst
>> 0, src
);
4213 ULong r1
= sad_8x4( dst
>> 8, src
);
4214 ULong r2
= sad_8x4( dst
>> 16, src
);
4215 ULong r3
= sad_8x4( dst
>> 24, src
);
4216 ULong res
= (r3
<< 48) | (r2
<< 32) | (r1
<< 16) | r0
;
4220 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4221 ULong
amd64g_calculate_pext ( ULong src_masked
, ULong mask
)
4226 for (src_bit
= 1; src_bit
; src_bit
<<= 1) {
4227 if (mask
& src_bit
) {
4228 if (src_masked
& src_bit
) dst
|= dst_bit
;
4235 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4236 ULong
amd64g_calculate_pdep ( ULong src
, ULong mask
)
4241 for (dst_bit
= 1; dst_bit
; dst_bit
<<= 1) {
4242 if (mask
& dst_bit
) {
4243 if (src
& src_bit
) dst
|= dst_bit
;
4250 /*---------------------------------------------------------------*/
4251 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/
4252 /*---------------------------------------------------------------*/
4254 static UInt
zmask_from_V128 ( V128
* arg
)
4257 for (i
= 0; i
< 16; i
++) {
4258 res
|= ((arg
->w8
[i
] == 0) ? 1 : 0) << i
;
4263 static UInt
zmask_from_V128_wide ( V128
* arg
)
4266 for (i
= 0; i
< 8; i
++) {
4267 res
|= ((arg
->w16
[i
] == 0) ? 1 : 0) << i
;
4272 /* Helps with PCMP{I,E}STR{I,M}.
4274 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
4275 actually it could be a clean helper, but for the fact that we can't
4276 pass by value 2 x V128 to a clean helper, nor have one returned.)
4277 Reads guest state, writes to guest state for the xSTRM cases, no
4278 accesses of memory, is a pure function.
4280 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
4281 the callee knows which I/E and I/M variant it is dealing with and
4282 what the specific operation is. 4th byte of opcode is in the range
4289 gstOffL and gstOffR are the guest state offsets for the two XMM
4290 register inputs. We never have to deal with the memory case since
4291 that is handled by pre-loading the relevant value into the fake
4294 For ESTRx variants, edxIN and eaxIN hold the values of those two
4297 In all cases, the bottom 16 bits of the result contain the new
4298 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
4299 result hold the new %ecx value. For xSTRM variants, the helper
4300 writes the result directly to the guest XMM0.
4302 Declarable side effects: in all cases, reads guest state at
4303 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
4306 Is expected to be called with opc_and_imm combinations which have
4307 actually been validated, and will assert if otherwise. The front
4308 end should ensure we're only called with verified values.
4310 ULong
amd64g_dirtyhelper_PCMPxSTRx (
4311 VexGuestAMD64State
* gst
,
4313 HWord gstOffL
, HWord gstOffR
,
4314 HWord edxIN
, HWord eaxIN
4317 HWord opc4
= (opc4_and_imm
>> 8) & 0xFF;
4318 HWord imm8
= opc4_and_imm
& 0xFF;
4319 HWord isISTRx
= opc4
& 2;
4320 HWord isxSTRM
= (opc4
& 1) ^ 1;
4321 vassert((opc4
& 0xFC) == 0x60); /* 0x60 .. 0x63 */
4322 HWord wide
= (imm8
& 1);
4324 // where the args are
4325 V128
* argL
= (V128
*)( ((UChar
*)gst
) + gstOffL
);
4326 V128
* argR
= (V128
*)( ((UChar
*)gst
) + gstOffR
);
4328 /* Create the arg validity masks, either from the vectors
4329 themselves or from the supplied edx/eax values. */
4330 // FIXME: this is only right for the 8-bit data cases.
4331 // At least that is asserted above.
4332 UInt zmaskL
, zmaskR
;
4334 // temp spot for the resulting flags and vector.
4338 // for checking whether case was handled
4343 zmaskL
= zmask_from_V128_wide(argL
);
4344 zmaskR
= zmask_from_V128_wide(argR
);
4347 tmp
= edxIN
& 0xFFFFFFFF;
4348 if (tmp
< -8) tmp
= -8;
4349 if (tmp
> 8) tmp
= 8;
4350 if (tmp
< 0) tmp
= -tmp
;
4351 vassert(tmp
>= 0 && tmp
<= 8);
4352 zmaskL
= (1 << tmp
) & 0xFF;
4353 tmp
= eaxIN
& 0xFFFFFFFF;
4354 if (tmp
< -8) tmp
= -8;
4355 if (tmp
> 8) tmp
= 8;
4356 if (tmp
< 0) tmp
= -tmp
;
4357 vassert(tmp
>= 0 && tmp
<= 8);
4358 zmaskR
= (1 << tmp
) & 0xFF;
4361 ok
= compute_PCMPxSTRx_wide (
4362 &resV
, &resOSZACP
, argL
, argR
,
4363 zmaskL
, zmaskR
, imm8
, (Bool
)isxSTRM
4367 zmaskL
= zmask_from_V128(argL
);
4368 zmaskR
= zmask_from_V128(argR
);
4371 tmp
= edxIN
& 0xFFFFFFFF;
4372 if (tmp
< -16) tmp
= -16;
4373 if (tmp
> 16) tmp
= 16;
4374 if (tmp
< 0) tmp
= -tmp
;
4375 vassert(tmp
>= 0 && tmp
<= 16);
4376 zmaskL
= (1 << tmp
) & 0xFFFF;
4377 tmp
= eaxIN
& 0xFFFFFFFF;
4378 if (tmp
< -16) tmp
= -16;
4379 if (tmp
> 16) tmp
= 16;
4380 if (tmp
< 0) tmp
= -tmp
;
4381 vassert(tmp
>= 0 && tmp
<= 16);
4382 zmaskR
= (1 << tmp
) & 0xFFFF;
4385 ok
= compute_PCMPxSTRx (
4386 &resV
, &resOSZACP
, argL
, argR
,
4387 zmaskL
, zmaskR
, imm8
, (Bool
)isxSTRM
4391 // front end shouldn't pass us any imm8 variants we can't
4395 // So, finally we need to get the results back to the caller.
4396 // In all cases, the new OSZACP value is the lowest 16 of
4397 // the return value.
4399 gst
->guest_YMM0
[0] = resV
.w32
[0];
4400 gst
->guest_YMM0
[1] = resV
.w32
[1];
4401 gst
->guest_YMM0
[2] = resV
.w32
[2];
4402 gst
->guest_YMM0
[3] = resV
.w32
[3];
4403 return resOSZACP
& 0x8D5;
4405 UInt newECX
= resV
.w32
[0] & 0xFFFF;
4406 return (newECX
<< 16) | (resOSZACP
& 0x8D5);
4410 /*---------------------------------------------------------------*/
4411 /*--- AES primitives and helpers ---*/
4412 /*---------------------------------------------------------------*/
4413 /* a 16 x 16 matrix */
4414 static const UChar sbox
[256] = { // row nr
4415 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1
4416 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
4417 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2
4418 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
4419 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3
4420 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
4421 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4
4422 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
4423 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5
4424 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
4425 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6
4426 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
4427 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7
4428 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
4429 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8
4430 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
4431 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9
4432 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
4433 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10
4434 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
4435 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11
4436 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
4437 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12
4438 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
4439 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13
4440 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
4441 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14
4442 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
4443 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15
4444 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
4445 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16
4446 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
4448 static void SubBytes (V128
* v
)
4452 for (i
= 0; i
< 16; i
++)
4453 r
.w8
[i
] = sbox
[v
->w8
[i
]];
4457 /* a 16 x 16 matrix */
4458 static const UChar invsbox
[256] = { // row nr
4459 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1
4460 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
4461 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2
4462 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
4463 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3
4464 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
4465 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4
4466 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
4467 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5
4468 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
4469 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6
4470 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
4471 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7
4472 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
4473 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8
4474 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
4475 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9
4476 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
4477 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10
4478 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
4479 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11
4480 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
4481 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12
4482 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
4483 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13
4484 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
4485 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14
4486 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
4487 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15
4488 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
4489 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16
4490 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
4492 static void InvSubBytes (V128
* v
)
4496 for (i
= 0; i
< 16; i
++)
4497 r
.w8
[i
] = invsbox
[v
->w8
[i
]];
4501 static const UChar ShiftRows_op
[16] =
4502 {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0};
4503 static void ShiftRows (V128
* v
)
4507 for (i
= 0; i
< 16; i
++)
4508 r
.w8
[i
] = v
->w8
[ShiftRows_op
[15-i
]];
4512 static const UChar InvShiftRows_op
[16] =
4513 {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0};
4514 static void InvShiftRows (V128
* v
)
4518 for (i
= 0; i
< 16; i
++)
4519 r
.w8
[i
] = v
->w8
[InvShiftRows_op
[15-i
]];
4523 /* Multiplication of the finite fields elements of AES.
4524 See "A Specification for The AES Algorithm Rijndael
4525 (by Joan Daemen & Vincent Rijmen)"
4526 Dr. Brian Gladman, v3.1, 3rd March 2001. */
4527 /* N values so that (hex) xy = 0x03^N.
4528 0x00 cannot be used. We put 0xff for this value.*/
4529 /* a 16 x 16 matrix */
4530 static const UChar Nxy
[256] = { // row nr
4531 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1
4532 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
4533 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2
4534 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
4535 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3
4536 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
4537 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4
4538 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
4539 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5
4540 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
4541 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6
4542 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
4543 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7
4544 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
4545 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8
4546 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
4547 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9
4548 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
4549 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10
4550 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
4551 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11
4552 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
4553 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12
4554 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
4555 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13
4556 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
4557 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14
4558 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
4559 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15
4560 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
4561 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16
4562 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07
4565 /* E values so that E = 0x03^xy. */
4566 static const UChar Exy
[256] = { // row nr
4567 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1
4568 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35,
4569 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2
4570 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa,
4571 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3
4572 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31,
4573 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4
4574 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd,
4575 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5
4576 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88,
4577 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6
4578 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a,
4579 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7
4580 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3,
4581 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8
4582 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0,
4583 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9
4584 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41,
4585 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10
4586 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75,
4587 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11
4588 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80,
4589 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12
4590 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54,
4591 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13
4592 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca,
4593 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14
4594 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e,
4595 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15
4596 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17,
4597 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16
4598 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01};
4600 static inline UChar
ff_mul(UChar u1
, UChar u2
)
4602 if ((u1
> 0) && (u2
> 0)) {
4603 UInt ui
= Nxy
[u1
] + Nxy
[u2
];
4612 static void MixColumns (V128
* v
)
4616 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4617 for (j
= 0; j
< 4; j
++) {
4618 P(&r
,j
,0) = ff_mul(0x02, P(v
,j
,0)) ^ ff_mul(0x03, P(v
,j
,1))
4619 ^ P(v
,j
,2) ^ P(v
,j
,3);
4620 P(&r
,j
,1) = P(v
,j
,0) ^ ff_mul( 0x02, P(v
,j
,1) )
4621 ^ ff_mul(0x03, P(v
,j
,2) ) ^ P(v
,j
,3);
4622 P(&r
,j
,2) = P(v
,j
,0) ^ P(v
,j
,1) ^ ff_mul( 0x02, P(v
,j
,2) )
4623 ^ ff_mul(0x03, P(v
,j
,3) );
4624 P(&r
,j
,3) = ff_mul(0x03, P(v
,j
,0) ) ^ P(v
,j
,1) ^ P(v
,j
,2)
4625 ^ ff_mul( 0x02, P(v
,j
,3) );
4631 static void InvMixColumns (V128
* v
)
4635 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4636 for (j
= 0; j
< 4; j
++) {
4637 P(&r
,j
,0) = ff_mul(0x0e, P(v
,j
,0) ) ^ ff_mul(0x0b, P(v
,j
,1) )
4638 ^ ff_mul(0x0d,P(v
,j
,2) ) ^ ff_mul(0x09, P(v
,j
,3) );
4639 P(&r
,j
,1) = ff_mul(0x09, P(v
,j
,0) ) ^ ff_mul(0x0e, P(v
,j
,1) )
4640 ^ ff_mul(0x0b,P(v
,j
,2) ) ^ ff_mul(0x0d, P(v
,j
,3) );
4641 P(&r
,j
,2) = ff_mul(0x0d, P(v
,j
,0) ) ^ ff_mul(0x09, P(v
,j
,1) )
4642 ^ ff_mul(0x0e,P(v
,j
,2) ) ^ ff_mul(0x0b, P(v
,j
,3) );
4643 P(&r
,j
,3) = ff_mul(0x0b, P(v
,j
,0) ) ^ ff_mul(0x0d, P(v
,j
,1) )
4644 ^ ff_mul(0x09,P(v
,j
,2) ) ^ ff_mul(0x0e, P(v
,j
,3) );
4651 /* For description, see definition in guest_amd64_defs.h */
4652 void amd64g_dirtyhelper_AES (
4653 VexGuestAMD64State
* gst
,
4654 HWord opc4
, HWord gstOffD
,
4655 HWord gstOffL
, HWord gstOffR
4658 // where the args are
4659 V128
* argD
= (V128
*)( ((UChar
*)gst
) + gstOffD
);
4660 V128
* argL
= (V128
*)( ((UChar
*)gst
) + gstOffL
);
4661 V128
* argR
= (V128
*)( ((UChar
*)gst
) + gstOffR
);
4665 case 0xDC: /* AESENC */
4666 case 0xDD: /* AESENCLAST */
4672 argD
->w64
[0] = r
.w64
[0] ^ argL
->w64
[0];
4673 argD
->w64
[1] = r
.w64
[1] ^ argL
->w64
[1];
4676 case 0xDE: /* AESDEC */
4677 case 0xDF: /* AESDECLAST */
4683 argD
->w64
[0] = r
.w64
[0] ^ argL
->w64
[0];
4684 argD
->w64
[1] = r
.w64
[1] ^ argL
->w64
[1];
4687 case 0xDB: /* AESIMC */
4689 InvMixColumns (argD
);
4691 default: vassert(0);
4695 static inline UInt
RotWord (UInt w32
)
4697 return ((w32
>> 8) | (w32
<< 24));
4700 static inline UInt
SubWord (UInt w32
)
4707 r8
[0] = sbox
[w8
[0]];
4708 r8
[1] = sbox
[w8
[1]];
4709 r8
[2] = sbox
[w8
[2]];
4710 r8
[3] = sbox
[w8
[3]];
4714 /* For description, see definition in guest_amd64_defs.h */
4715 extern void amd64g_dirtyhelper_AESKEYGENASSIST (
4716 VexGuestAMD64State
* gst
,
4718 HWord gstOffL
, HWord gstOffR
4721 // where the args are
4722 V128
* argL
= (V128
*)( ((UChar
*)gst
) + gstOffL
);
4723 V128
* argR
= (V128
*)( ((UChar
*)gst
) + gstOffR
);
4725 // We have to create the result in a temporary in the
4726 // case where the src and dst regs are the same. See #341698.
4729 tmp
.w32
[3] = RotWord (SubWord (argL
->w32
[3])) ^ imm8
;
4730 tmp
.w32
[2] = SubWord (argL
->w32
[3]);
4731 tmp
.w32
[1] = RotWord (SubWord (argL
->w32
[1])) ^ imm8
;
4732 tmp
.w32
[0] = SubWord (argL
->w32
[1]);
4734 argR
->w32
[3] = tmp
.w32
[3];
4735 argR
->w32
[2] = tmp
.w32
[2];
4736 argR
->w32
[1] = tmp
.w32
[1];
4737 argR
->w32
[0] = tmp
.w32
[0];
4742 /*---------------------------------------------------------------*/
4743 /*--- Helpers for dealing with, and describing, ---*/
4744 /*--- guest state as a whole. ---*/
4745 /*---------------------------------------------------------------*/
4747 /* Initialise the entire amd64 guest state. */
4748 /* VISIBLE TO LIBVEX CLIENT */
4749 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State
* vex_state
)
4751 vex_state
->host_EvC_FAILADDR
= 0;
4752 vex_state
->host_EvC_COUNTER
= 0;
4753 vex_state
->pad0
= 0;
4755 vex_state
->guest_RAX
= 0;
4756 vex_state
->guest_RCX
= 0;
4757 vex_state
->guest_RDX
= 0;
4758 vex_state
->guest_RBX
= 0;
4759 vex_state
->guest_RSP
= 0;
4760 vex_state
->guest_RBP
= 0;
4761 vex_state
->guest_RSI
= 0;
4762 vex_state
->guest_RDI
= 0;
4763 vex_state
->guest_R8
= 0;
4764 vex_state
->guest_R9
= 0;
4765 vex_state
->guest_R10
= 0;
4766 vex_state
->guest_R11
= 0;
4767 vex_state
->guest_R12
= 0;
4768 vex_state
->guest_R13
= 0;
4769 vex_state
->guest_R14
= 0;
4770 vex_state
->guest_R15
= 0;
4772 vex_state
->guest_CC_OP
= AMD64G_CC_OP_COPY
;
4773 vex_state
->guest_CC_DEP1
= 0;
4774 vex_state
->guest_CC_DEP2
= 0;
4775 vex_state
->guest_CC_NDEP
= 0;
4777 vex_state
->guest_DFLAG
= 1; /* forwards */
4778 vex_state
->guest_IDFLAG
= 0;
4779 vex_state
->guest_ACFLAG
= 0;
4781 /* HACK: represent the offset associated with a constant %fs.
4782 Typically, on linux, this assumes that %fs is only ever zero (main
4784 vex_state
->guest_FS_CONST
= 0;
4786 vex_state
->guest_RIP
= 0;
4788 /* Initialise the simulated FPU */
4789 amd64g_dirtyhelper_FINIT( vex_state
);
4791 /* Initialise the AVX state. */
4792 # define AVXZERO(_ymm) \
4793 do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \
4794 _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \
4796 vex_state
->guest_SSEROUND
= (ULong
)Irrm_NEAREST
;
4797 AVXZERO(vex_state
->guest_YMM0
);
4798 AVXZERO(vex_state
->guest_YMM1
);
4799 AVXZERO(vex_state
->guest_YMM2
);
4800 AVXZERO(vex_state
->guest_YMM3
);
4801 AVXZERO(vex_state
->guest_YMM4
);
4802 AVXZERO(vex_state
->guest_YMM5
);
4803 AVXZERO(vex_state
->guest_YMM6
);
4804 AVXZERO(vex_state
->guest_YMM7
);
4805 AVXZERO(vex_state
->guest_YMM8
);
4806 AVXZERO(vex_state
->guest_YMM9
);
4807 AVXZERO(vex_state
->guest_YMM10
);
4808 AVXZERO(vex_state
->guest_YMM11
);
4809 AVXZERO(vex_state
->guest_YMM12
);
4810 AVXZERO(vex_state
->guest_YMM13
);
4811 AVXZERO(vex_state
->guest_YMM14
);
4812 AVXZERO(vex_state
->guest_YMM15
);
4813 AVXZERO(vex_state
->guest_YMM16
);
4817 vex_state
->guest_EMNOTE
= EmNote_NONE
;
4819 /* These should not ever be either read or written, but we
4820 initialise them anyway. */
4821 vex_state
->guest_CMSTART
= 0;
4822 vex_state
->guest_CMLEN
= 0;
4824 vex_state
->guest_NRADDR
= 0;
4825 vex_state
->guest_SC_CLASS
= 0;
4826 vex_state
->guest_GS_CONST
= 0;
4828 vex_state
->guest_IP_AT_SYSCALL
= 0;
4829 vex_state
->pad1
= 0;
4833 /* Figure out if any part of the guest state contained in minoff
4834 .. maxoff requires precise memory exceptions. If in doubt return
4835 True (but this generates significantly slower code).
4837 By default we enforce precise exns for guest %RSP, %RBP and %RIP
4838 only. These are the minimum needed to extract correct stack
4839 backtraces from amd64 code.
4841 Only %RSP is needed in mode VexRegUpdSpAtMemAccess.
4843 Bool
guest_amd64_state_requires_precise_mem_exns (
4844 Int minoff
, Int maxoff
, VexRegisterUpdates pxControl
4847 Int rbp_min
= offsetof(VexGuestAMD64State
, guest_RBP
);
4848 Int rbp_max
= rbp_min
+ 8 - 1;
4849 Int rsp_min
= offsetof(VexGuestAMD64State
, guest_RSP
);
4850 Int rsp_max
= rsp_min
+ 8 - 1;
4851 Int rip_min
= offsetof(VexGuestAMD64State
, guest_RIP
);
4852 Int rip_max
= rip_min
+ 8 - 1;
4854 if (maxoff
< rsp_min
|| minoff
> rsp_max
) {
4855 /* no overlap with rsp */
4856 if (pxControl
== VexRegUpdSpAtMemAccess
)
4857 return False
; // We only need to check stack pointer.
4862 if (maxoff
< rbp_min
|| minoff
> rbp_max
) {
4863 /* no overlap with rbp */
4868 if (maxoff
< rip_min
|| minoff
> rip_max
) {
4869 /* no overlap with eip */
4878 #define ALWAYSDEFD(field) \
4879 { offsetof(VexGuestAMD64State, field), \
4880 (sizeof ((VexGuestAMD64State*)0)->field) }
4885 /* Total size of the guest state, in bytes. */
4886 .total_sizeB
= sizeof(VexGuestAMD64State
),
4888 /* Describe the stack pointer. */
4889 .offset_SP
= offsetof(VexGuestAMD64State
,guest_RSP
),
4892 /* Describe the frame pointer. */
4893 .offset_FP
= offsetof(VexGuestAMD64State
,guest_RBP
),
4896 /* Describe the instruction pointer. */
4897 .offset_IP
= offsetof(VexGuestAMD64State
,guest_RIP
),
4900 /* Describe any sections to be regarded by Memcheck as
4901 'always-defined'. */
4904 /* flags thunk: OP and NDEP are always defd, whereas DEP1
4905 and DEP2 have to be tracked. See detailed comment in
4906 gdefs.h on meaning of thunk fields. */
4908 = { /* 0 */ ALWAYSDEFD(guest_CC_OP
),
4909 /* 1 */ ALWAYSDEFD(guest_CC_NDEP
),
4910 /* 2 */ ALWAYSDEFD(guest_DFLAG
),
4911 /* 3 */ ALWAYSDEFD(guest_IDFLAG
),
4912 /* 4 */ ALWAYSDEFD(guest_RIP
),
4913 /* 5 */ ALWAYSDEFD(guest_FS_CONST
),
4914 /* 6 */ ALWAYSDEFD(guest_FTOP
),
4915 /* 7 */ ALWAYSDEFD(guest_FPTAG
),
4916 /* 8 */ ALWAYSDEFD(guest_FPROUND
),
4917 /* 9 */ ALWAYSDEFD(guest_FC3210
),
4918 // /* */ ALWAYSDEFD(guest_CS),
4919 // /* */ ALWAYSDEFD(guest_DS),
4920 // /* */ ALWAYSDEFD(guest_ES),
4921 // /* */ ALWAYSDEFD(guest_FS),
4922 // /* */ ALWAYSDEFD(guest_GS),
4923 // /* */ ALWAYSDEFD(guest_SS),
4924 // /* */ ALWAYSDEFD(guest_LDT),
4925 // /* */ ALWAYSDEFD(guest_GDT),
4926 /* 10 */ ALWAYSDEFD(guest_EMNOTE
),
4927 /* 11 */ ALWAYSDEFD(guest_SSEROUND
),
4928 /* 12 */ ALWAYSDEFD(guest_CMSTART
),
4929 /* 13 */ ALWAYSDEFD(guest_CMLEN
),
4930 /* 14 */ ALWAYSDEFD(guest_SC_CLASS
),
4931 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL
)
4936 /*---------------------------------------------------------------*/
4937 /*--- end guest_amd64_helpers.c ---*/
4938 /*---------------------------------------------------------------*/