2 /*---------------------------------------------------------------*/
3 /*--- begin guest_x86_helpers.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_emnote.h"
36 #include "libvex_guest_x86.h"
37 #include "libvex_ir.h"
40 #include "main_util.h"
41 #include "main_globals.h"
42 #include "guest_generic_bb_to_IR.h"
43 #include "guest_x86_defs.h"
44 #include "guest_generic_x87.h"
47 /* This file contains helper functions for x86 guest code.
48 Calls to these functions are generated by the back end.
49 These calls are of course in the host machine code and
50 this file will be compiled to host machine code, so that
53 Only change the signatures of these helper functions very
54 carefully. If you change the signature here, you'll have to change
55 the parameters passed to it in the IR calls constructed by
58 The convention used is that all functions called from generated
59 code are named x86g_<something>, and any function whose name lacks
60 that prefix is not called from generated code. Note that some
61 LibVEX_* functions can however be called by VEX's client, but that
62 is not the same as calling them from VEX-generated code.
66 /* Set to 1 to get detailed profiling info about use of the flag
68 #define PROFILE_EFLAGS 0
71 /*---------------------------------------------------------------*/
72 /*--- %eflags run-time helpers. ---*/
73 /*---------------------------------------------------------------*/
75 static const UChar parity_table
[256] = {
76 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
77 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
78 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
79 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
80 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
81 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
82 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
83 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
84 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
85 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
86 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
87 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
88 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
89 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
90 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
91 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
92 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
93 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
94 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
95 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
96 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
97 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
98 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
99 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
100 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
101 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
102 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
103 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
104 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
105 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
106 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
107 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
110 /* generalised left-shifter */
111 inline static Int
lshift ( Int x
, Int n
)
119 /* identity on ULong */
120 static inline ULong
idULong ( ULong x
)
126 #define PREAMBLE(__data_bits) \
127 /* const */ UInt DATA_MASK \
128 = __data_bits==8 ? 0xFF \
129 : (__data_bits==16 ? 0xFFFF \
131 /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1); \
132 /* const */ UInt CC_DEP1 = cc_dep1_formal; \
133 /* const */ UInt CC_DEP2 = cc_dep2_formal; \
134 /* const */ UInt CC_NDEP = cc_ndep_formal; \
135 /* Four bogus assignments, which hopefully gcc can */ \
136 /* optimise away, and which stop it complaining about */ \
137 /* unused variables. */ \
138 SIGN_MASK = SIGN_MASK; \
139 DATA_MASK = DATA_MASK; \
144 /*-------------------------------------------------------------*/
146 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
148 PREAMBLE(DATA_BITS); \
149 { UInt cf, pf, af, zf, sf, of; \
150 UInt argL, argR, res; \
154 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
155 pf = parity_table[(UChar)res]; \
156 af = (res ^ argL ^ argR) & 0x10; \
157 zf = ((DATA_UTYPE)res == 0) << 6; \
158 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
159 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
160 12 - DATA_BITS) & X86G_CC_MASK_O; \
161 return cf | pf | af | zf | sf | of; \
165 /*-------------------------------------------------------------*/
167 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
169 PREAMBLE(DATA_BITS); \
170 { UInt cf, pf, af, zf, sf, of; \
171 UInt argL, argR, res; \
175 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
176 pf = parity_table[(UChar)res]; \
177 af = (res ^ argL ^ argR) & 0x10; \
178 zf = ((DATA_UTYPE)res == 0) << 6; \
179 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
180 of = lshift((argL ^ argR) & (argL ^ res), \
181 12 - DATA_BITS) & X86G_CC_MASK_O; \
182 return cf | pf | af | zf | sf | of; \
186 /*-------------------------------------------------------------*/
188 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
190 PREAMBLE(DATA_BITS); \
191 { UInt cf, pf, af, zf, sf, of; \
192 UInt argL, argR, oldC, res; \
193 oldC = CC_NDEP & X86G_CC_MASK_C; \
195 argR = CC_DEP2 ^ oldC; \
196 res = (argL + argR) + oldC; \
198 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
200 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
201 pf = parity_table[(UChar)res]; \
202 af = (res ^ argL ^ argR) & 0x10; \
203 zf = ((DATA_UTYPE)res == 0) << 6; \
204 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
205 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
206 12 - DATA_BITS) & X86G_CC_MASK_O; \
207 return cf | pf | af | zf | sf | of; \
211 /*-------------------------------------------------------------*/
213 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
215 PREAMBLE(DATA_BITS); \
216 { UInt cf, pf, af, zf, sf, of; \
217 UInt argL, argR, oldC, res; \
218 oldC = CC_NDEP & X86G_CC_MASK_C; \
220 argR = CC_DEP2 ^ oldC; \
221 res = (argL - argR) - oldC; \
223 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
225 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
226 pf = parity_table[(UChar)res]; \
227 af = (res ^ argL ^ argR) & 0x10; \
228 zf = ((DATA_UTYPE)res == 0) << 6; \
229 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
230 of = lshift((argL ^ argR) & (argL ^ res), \
231 12 - DATA_BITS) & X86G_CC_MASK_O; \
232 return cf | pf | af | zf | sf | of; \
236 /*-------------------------------------------------------------*/
238 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
240 PREAMBLE(DATA_BITS); \
241 { UInt cf, pf, af, zf, sf, of; \
243 pf = parity_table[(UChar)CC_DEP1]; \
245 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
246 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
248 return cf | pf | af | zf | sf | of; \
252 /*-------------------------------------------------------------*/
254 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
256 PREAMBLE(DATA_BITS); \
257 { UInt cf, pf, af, zf, sf, of; \
258 UInt argL, argR, res; \
262 cf = CC_NDEP & X86G_CC_MASK_C; \
263 pf = parity_table[(UChar)res]; \
264 af = (res ^ argL ^ argR) & 0x10; \
265 zf = ((DATA_UTYPE)res == 0) << 6; \
266 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
267 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
268 return cf | pf | af | zf | sf | of; \
272 /*-------------------------------------------------------------*/
274 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
276 PREAMBLE(DATA_BITS); \
277 { UInt cf, pf, af, zf, sf, of; \
278 UInt argL, argR, res; \
282 cf = CC_NDEP & X86G_CC_MASK_C; \
283 pf = parity_table[(UChar)res]; \
284 af = (res ^ argL ^ argR) & 0x10; \
285 zf = ((DATA_UTYPE)res == 0) << 6; \
286 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
287 of = ((res & DATA_MASK) \
288 == ((UInt)SIGN_MASK - 1)) << 11; \
289 return cf | pf | af | zf | sf | of; \
293 /*-------------------------------------------------------------*/
295 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
297 PREAMBLE(DATA_BITS); \
298 { UInt cf, pf, af, zf, sf, of; \
299 cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \
300 pf = parity_table[(UChar)CC_DEP1]; \
301 af = 0; /* undefined */ \
302 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
303 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
304 /* of is defined if shift count == 1 */ \
305 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
307 return cf | pf | af | zf | sf | of; \
311 /*-------------------------------------------------------------*/
313 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
315 PREAMBLE(DATA_BITS); \
316 { UInt cf, pf, af, zf, sf, of; \
318 pf = parity_table[(UChar)CC_DEP1]; \
319 af = 0; /* undefined */ \
320 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
321 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
322 /* of is defined if shift count == 1 */ \
323 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
325 return cf | pf | af | zf | sf | of; \
329 /*-------------------------------------------------------------*/
331 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
332 /* DEP1 = result, NDEP = old flags */
333 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
335 PREAMBLE(DATA_BITS); \
337 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
338 | (X86G_CC_MASK_C & CC_DEP1) \
339 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
341 ^ lshift(CC_DEP1, 11))); \
346 /*-------------------------------------------------------------*/
348 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
349 /* DEP1 = result, NDEP = old flags */
350 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
352 PREAMBLE(DATA_BITS); \
354 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
355 | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
356 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
358 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
363 /*-------------------------------------------------------------*/
365 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
366 DATA_U2TYPE, NARROWto2U) \
368 PREAMBLE(DATA_BITS); \
369 { UInt cf, pf, af, zf, sf, of; \
372 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
373 * ((DATA_UTYPE)CC_DEP2) ); \
376 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
377 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
378 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
380 pf = parity_table[(UChar)lo]; \
381 af = 0; /* undefined */ \
382 zf = (lo == 0) << 6; \
383 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
385 return cf | pf | af | zf | sf | of; \
389 /*-------------------------------------------------------------*/
391 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
392 DATA_S2TYPE, NARROWto2S) \
394 PREAMBLE(DATA_BITS); \
395 { UInt cf, pf, af, zf, sf, of; \
398 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
399 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
402 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
403 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
404 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
405 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
406 pf = parity_table[(UChar)lo]; \
407 af = 0; /* undefined */ \
408 zf = (lo == 0) << 6; \
409 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
411 return cf | pf | af | zf | sf | of; \
418 static Bool initted
= False
;
420 /* C flag, fast route */
421 static UInt tabc_fast
[X86G_CC_OP_NUMBER
];
422 /* C flag, slow route */
423 static UInt tabc_slow
[X86G_CC_OP_NUMBER
];
424 /* table for calculate_cond */
425 static UInt tab_cond
[X86G_CC_OP_NUMBER
][16];
426 /* total entry counts for calc_all, calc_c, calc_cond. */
427 static UInt n_calc_all
= 0;
428 static UInt n_calc_c
= 0;
429 static UInt n_calc_cond
= 0;
431 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
434 static void showCounts ( void )
438 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
439 n_calc_all
, n_calc_cond
, n_calc_c
);
441 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
442 " S NS P NP L NL LE NLE\n");
443 vex_printf(" -----------------------------------------------------"
444 "----------------------------------------\n");
445 for (op
= 0; op
< X86G_CC_OP_NUMBER
; op
++) {
448 if (op
> 0 && (op
-1) % 3 == 0)
450 if (op
> 0 && (op
-1) % 3 == 1)
452 if (op
> 0 && (op
-1) % 3 == 2)
455 vex_printf("%2d%c: ", op
, ch
);
456 vex_printf("%6u ", tabc_slow
[op
]);
457 vex_printf("%6u ", tabc_fast
[op
]);
458 for (co
= 0; co
< 16; co
++) {
459 Int n
= tab_cond
[op
][co
];
461 vex_printf(" %3dK", n
/ 1000);
464 vex_printf(" %3d ", n
);
474 static void initCounts ( void )
478 for (op
= 0; op
< X86G_CC_OP_NUMBER
; op
++) {
479 tabc_fast
[op
] = tabc_slow
[op
] = 0;
480 for (co
= 0; co
< 16; co
++)
481 tab_cond
[op
][co
] = 0;
485 #endif /* PROFILE_EFLAGS */
488 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
489 /* Calculate all the 6 flags from the supplied thunk parameters.
490 Worker function, not directly called from generated code. */
492 UInt
x86g_calculate_eflags_all_WRK ( UInt cc_op
,
495 UInt cc_ndep_formal
)
498 case X86G_CC_OP_COPY
:
499 return cc_dep1_formal
500 & (X86G_CC_MASK_O
| X86G_CC_MASK_S
| X86G_CC_MASK_Z
501 | X86G_CC_MASK_A
| X86G_CC_MASK_C
| X86G_CC_MASK_P
);
503 case X86G_CC_OP_ADDB
: ACTIONS_ADD( 8, UChar
);
504 case X86G_CC_OP_ADDW
: ACTIONS_ADD( 16, UShort
);
505 case X86G_CC_OP_ADDL
: ACTIONS_ADD( 32, UInt
);
507 case X86G_CC_OP_ADCB
: ACTIONS_ADC( 8, UChar
);
508 case X86G_CC_OP_ADCW
: ACTIONS_ADC( 16, UShort
);
509 case X86G_CC_OP_ADCL
: ACTIONS_ADC( 32, UInt
);
511 case X86G_CC_OP_SUBB
: ACTIONS_SUB( 8, UChar
);
512 case X86G_CC_OP_SUBW
: ACTIONS_SUB( 16, UShort
);
513 case X86G_CC_OP_SUBL
: ACTIONS_SUB( 32, UInt
);
515 case X86G_CC_OP_SBBB
: ACTIONS_SBB( 8, UChar
);
516 case X86G_CC_OP_SBBW
: ACTIONS_SBB( 16, UShort
);
517 case X86G_CC_OP_SBBL
: ACTIONS_SBB( 32, UInt
);
519 case X86G_CC_OP_LOGICB
: ACTIONS_LOGIC( 8, UChar
);
520 case X86G_CC_OP_LOGICW
: ACTIONS_LOGIC( 16, UShort
);
521 case X86G_CC_OP_LOGICL
: ACTIONS_LOGIC( 32, UInt
);
523 case X86G_CC_OP_INCB
: ACTIONS_INC( 8, UChar
);
524 case X86G_CC_OP_INCW
: ACTIONS_INC( 16, UShort
);
525 case X86G_CC_OP_INCL
: ACTIONS_INC( 32, UInt
);
527 case X86G_CC_OP_DECB
: ACTIONS_DEC( 8, UChar
);
528 case X86G_CC_OP_DECW
: ACTIONS_DEC( 16, UShort
);
529 case X86G_CC_OP_DECL
: ACTIONS_DEC( 32, UInt
);
531 case X86G_CC_OP_SHLB
: ACTIONS_SHL( 8, UChar
);
532 case X86G_CC_OP_SHLW
: ACTIONS_SHL( 16, UShort
);
533 case X86G_CC_OP_SHLL
: ACTIONS_SHL( 32, UInt
);
535 case X86G_CC_OP_SHRB
: ACTIONS_SHR( 8, UChar
);
536 case X86G_CC_OP_SHRW
: ACTIONS_SHR( 16, UShort
);
537 case X86G_CC_OP_SHRL
: ACTIONS_SHR( 32, UInt
);
539 case X86G_CC_OP_ROLB
: ACTIONS_ROL( 8, UChar
);
540 case X86G_CC_OP_ROLW
: ACTIONS_ROL( 16, UShort
);
541 case X86G_CC_OP_ROLL
: ACTIONS_ROL( 32, UInt
);
543 case X86G_CC_OP_RORB
: ACTIONS_ROR( 8, UChar
);
544 case X86G_CC_OP_RORW
: ACTIONS_ROR( 16, UShort
);
545 case X86G_CC_OP_RORL
: ACTIONS_ROR( 32, UInt
);
547 case X86G_CC_OP_UMULB
: ACTIONS_UMUL( 8, UChar
, toUChar
,
549 case X86G_CC_OP_UMULW
: ACTIONS_UMUL( 16, UShort
, toUShort
,
551 case X86G_CC_OP_UMULL
: ACTIONS_UMUL( 32, UInt
, toUInt
,
554 case X86G_CC_OP_SMULB
: ACTIONS_SMUL( 8, Char
, toUChar
,
556 case X86G_CC_OP_SMULW
: ACTIONS_SMUL( 16, Short
, toUShort
,
558 case X86G_CC_OP_SMULL
: ACTIONS_SMUL( 32, Int
, toUInt
,
562 /* shouldn't really make these calls from generated code */
563 vex_printf("x86g_calculate_eflags_all_WRK(X86)"
564 "( %u, 0x%x, 0x%x, 0x%x )\n",
565 cc_op
, cc_dep1_formal
, cc_dep2_formal
, cc_ndep_formal
);
566 vpanic("x86g_calculate_eflags_all_WRK(X86)");
570 #if defined(VGO_freebsd) || defined(VGO_darwin)
572 /* see guest_amd64_helpers.c
573 Used in syswrap-main.c / VG_(post_syscall) for signal
576 static void _______VVVVVVVV_after_x86g_calculate_eflags_all_WRK_VVVVVVVV_______ (void)
580 Addr addr_x86g_calculate_eflags_all_WRK
= (Addr
)x86g_calculate_eflags_all_WRK
;
581 Addr addr________VVVVVVVV_x86g_calculate_eflags_all_WRK_VVVVVVVV_______
= (Addr
)_______VVVVVVVV_after_x86g_calculate_eflags_all_WRK_VVVVVVVV_______
;
585 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
586 /* Calculate all the 6 flags from the supplied thunk parameters. */
587 UInt
x86g_calculate_eflags_all ( UInt cc_op
,
593 if (!initted
) initCounts();
595 if (SHOW_COUNTS_NOW
) showCounts();
598 x86g_calculate_eflags_all_WRK ( cc_op
, cc_dep1
, cc_dep2
, cc_ndep
);
602 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
603 /* Calculate just the carry flag from the supplied thunk parameters. */
605 UInt
x86g_calculate_eflags_c ( UInt cc_op
,
611 if (!initted
) initCounts();
614 if (SHOW_COUNTS_NOW
) showCounts();
617 /* Fast-case some common ones. */
619 case X86G_CC_OP_LOGICL
:
620 case X86G_CC_OP_LOGICW
:
621 case X86G_CC_OP_LOGICB
:
623 case X86G_CC_OP_SUBL
:
624 return ((UInt
)cc_dep1
) < ((UInt
)cc_dep2
)
625 ? X86G_CC_MASK_C
: 0;
626 case X86G_CC_OP_SUBW
:
627 return ((UInt
)(cc_dep1
& 0xFFFF)) < ((UInt
)(cc_dep2
& 0xFFFF))
628 ? X86G_CC_MASK_C
: 0;
629 case X86G_CC_OP_SUBB
:
630 return ((UInt
)(cc_dep1
& 0xFF)) < ((UInt
)(cc_dep2
& 0xFF))
631 ? X86G_CC_MASK_C
: 0;
632 case X86G_CC_OP_INCL
:
633 case X86G_CC_OP_DECL
:
634 return cc_ndep
& X86G_CC_MASK_C
;
644 return x86g_calculate_eflags_all_WRK(cc_op
,cc_dep1
,cc_dep2
,cc_ndep
)
649 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
651 UInt
x86g_calculate_condition ( UInt
/*X86Condcode*/ cond
,
657 UInt eflags
= x86g_calculate_eflags_all_WRK(cc_op
, cc_dep1
,
663 if (!initted
) initCounts();
664 tab_cond
[cc_op
][cond
]++;
666 if (SHOW_COUNTS_NOW
) showCounts();
671 case X86CondO
: /* OF == 1 */
672 of
= eflags
>> X86G_CC_SHIFT_O
;
673 return 1 & (inv
^ of
);
676 case X86CondZ
: /* ZF == 1 */
677 zf
= eflags
>> X86G_CC_SHIFT_Z
;
678 return 1 & (inv
^ zf
);
681 case X86CondB
: /* CF == 1 */
682 cf
= eflags
>> X86G_CC_SHIFT_C
;
683 return 1 & (inv
^ cf
);
687 case X86CondBE
: /* (CF or ZF) == 1 */
688 cf
= eflags
>> X86G_CC_SHIFT_C
;
689 zf
= eflags
>> X86G_CC_SHIFT_Z
;
690 return 1 & (inv
^ (cf
| zf
));
694 case X86CondS
: /* SF == 1 */
695 sf
= eflags
>> X86G_CC_SHIFT_S
;
696 return 1 & (inv
^ sf
);
699 case X86CondP
: /* PF == 1 */
700 pf
= eflags
>> X86G_CC_SHIFT_P
;
701 return 1 & (inv
^ pf
);
704 case X86CondL
: /* (SF xor OF) == 1 */
705 sf
= eflags
>> X86G_CC_SHIFT_S
;
706 of
= eflags
>> X86G_CC_SHIFT_O
;
707 return 1 & (inv
^ (sf
^ of
));
711 case X86CondLE
: /* ((SF xor OF) or ZF) == 1 */
712 sf
= eflags
>> X86G_CC_SHIFT_S
;
713 of
= eflags
>> X86G_CC_SHIFT_O
;
714 zf
= eflags
>> X86G_CC_SHIFT_Z
;
715 return 1 & (inv
^ ((sf
^ of
) | zf
));
719 /* shouldn't really make these calls from generated code */
720 vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
721 cond
, cc_op
, cc_dep1
, cc_dep2
, cc_ndep
);
722 vpanic("x86g_calculate_condition");
727 /* VISIBLE TO LIBVEX CLIENT */
728 UInt
LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State
* vex_state
)
730 UInt eflags
= x86g_calculate_eflags_all_WRK(
731 vex_state
->guest_CC_OP
,
732 vex_state
->guest_CC_DEP1
,
733 vex_state
->guest_CC_DEP2
,
734 vex_state
->guest_CC_NDEP
736 UInt dflag
= vex_state
->guest_DFLAG
;
737 vassert(dflag
== 1 || dflag
== 0xFFFFFFFF);
738 if (dflag
== 0xFFFFFFFF)
739 eflags
|= X86G_CC_MASK_D
;
740 if (vex_state
->guest_IDFLAG
== 1)
741 eflags
|= X86G_CC_MASK_ID
;
742 if (vex_state
->guest_ACFLAG
== 1)
743 eflags
|= X86G_CC_MASK_AC
;
748 /* VISIBLE TO LIBVEX CLIENT */
750 LibVEX_GuestX86_put_eflags ( UInt eflags
,
751 /*MOD*/VexGuestX86State
* vex_state
)
754 if (eflags
& X86G_CC_MASK_D
) {
755 vex_state
->guest_DFLAG
= 0xFFFFFFFF;
756 eflags
&= ~X86G_CC_MASK_D
;
759 vex_state
->guest_DFLAG
= 1;
762 if (eflags
& X86G_CC_MASK_ID
) {
763 vex_state
->guest_IDFLAG
= 1;
764 eflags
&= ~X86G_CC_MASK_ID
;
767 vex_state
->guest_IDFLAG
= 0;
770 if (eflags
& X86G_CC_MASK_AC
) {
771 vex_state
->guest_ACFLAG
= 1;
772 eflags
&= ~X86G_CC_MASK_AC
;
775 vex_state
->guest_ACFLAG
= 0;
777 UInt cc_mask
= X86G_CC_MASK_O
| X86G_CC_MASK_S
| X86G_CC_MASK_Z
|
778 X86G_CC_MASK_A
| X86G_CC_MASK_C
| X86G_CC_MASK_P
;
779 vex_state
->guest_CC_OP
= X86G_CC_OP_COPY
;
780 vex_state
->guest_CC_DEP1
= eflags
& cc_mask
;
781 vex_state
->guest_CC_DEP2
= 0;
782 vex_state
->guest_CC_NDEP
= 0;
785 /* VISIBLE TO LIBVEX CLIENT */
787 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag
,
788 /*MOD*/VexGuestX86State
* vex_state
)
790 UInt oszacp
= x86g_calculate_eflags_all_WRK(
791 vex_state
->guest_CC_OP
,
792 vex_state
->guest_CC_DEP1
,
793 vex_state
->guest_CC_DEP2
,
794 vex_state
->guest_CC_NDEP
796 if (new_carry_flag
& 1) {
797 oszacp
|= X86G_CC_MASK_C
;
799 oszacp
&= ~X86G_CC_MASK_C
;
801 vex_state
->guest_CC_OP
= X86G_CC_OP_COPY
;
802 vex_state
->guest_CC_DEP1
= oszacp
;
803 vex_state
->guest_CC_DEP2
= 0;
804 vex_state
->guest_CC_NDEP
= 0;
807 #if defined(VGO_freebsd) || defined(VGO_darwin)
809 /* Used in syswrap-main.c / VG_(post_syscall) for signal resumption */
811 void _______VVVVVVVV_after_LibVEX_GuestX86_put_eflag_c_VVVVVVVV_______ (void)
817 /*---------------------------------------------------------------*/
818 /*--- %eflags translation-time function specialisers. ---*/
819 /*--- These help iropt specialise calls the above run-time ---*/
820 /*--- %eflags functions. ---*/
821 /*---------------------------------------------------------------*/
823 /* Used by the optimiser to try specialisations. Returns an
824 equivalent expression, or NULL if none. */
826 static inline Bool
isU32 ( IRExpr
* e
, UInt n
)
829 toBool( e
->tag
== Iex_Const
830 && e
->Iex
.Const
.con
->tag
== Ico_U32
831 && e
->Iex
.Const
.con
->Ico
.U32
== n
);
834 IRExpr
* guest_x86_spechelper ( const HChar
* function_name
,
836 IRStmt
** precedingStmts
,
837 Int n_precedingStmts
)
839 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
840 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
841 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
842 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
845 for (i
= 0; args
[i
]; i
++)
848 vex_printf("spec request:\n");
849 vex_printf(" %s ", function_name
);
850 for (i
= 0; i
< arity
; i
++) {
857 /* --------- specialising "x86g_calculate_condition" --------- */
859 if (vex_streq(function_name
, "x86g_calculate_condition")) {
860 /* specialise calls to above "calculate condition" function */
861 IRExpr
*cond
, *cc_op
, *cc_dep1
, *cc_dep2
;
868 /*---------------- ADDL ----------------*/
870 if (isU32(cc_op
, X86G_CC_OP_ADDL
) && isU32(cond
, X86CondZ
)) {
871 /* long add, then Z --> test (dst+src == 0) */
872 return unop(Iop_1Uto32
,
874 binop(Iop_Add32
, cc_dep1
, cc_dep2
),
878 /*---------------- SUBL ----------------*/
881 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondZ
)) {
882 /* long sub/cmp, then Z --> test dst==src */
883 return unop(Iop_1Uto32
,
884 binop(Iop_CmpEQ32
, cc_dep1
, cc_dep2
));
886 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNZ
)) {
887 /* long sub/cmp, then NZ --> test dst!=src */
888 return unop(Iop_1Uto32
,
889 binop(Iop_CmpNE32
, cc_dep1
, cc_dep2
));
893 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondL
)) {
894 /* long sub/cmp, then L (signed less than)
895 --> test dst <s src */
896 return unop(Iop_1Uto32
,
897 binop(Iop_CmpLT32S
, cc_dep1
, cc_dep2
));
899 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNL
)) {
900 /* long sub/cmp, then NL (signed greater than or equal)
901 --> test !(dst <s src) */
902 return binop(Iop_Xor32
,
904 binop(Iop_CmpLT32S
, cc_dep1
, cc_dep2
)),
909 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondLE
)) {
910 /* long sub/cmp, then LE (signed less than or equal)
911 --> test dst <=s src */
912 return unop(Iop_1Uto32
,
913 binop(Iop_CmpLE32S
, cc_dep1
, cc_dep2
));
915 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNLE
)) {
916 /* long sub/cmp, then NLE (signed not less than or equal)
918 --> test !(dst <=s src) */
919 return binop(Iop_Xor32
,
921 binop(Iop_CmpLE32S
, cc_dep1
, cc_dep2
)),
926 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondBE
)) {
927 /* long sub/cmp, then BE (unsigned less than or equal)
928 --> test dst <=u src */
929 return unop(Iop_1Uto32
,
930 binop(Iop_CmpLE32U
, cc_dep1
, cc_dep2
));
932 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNBE
)) {
933 /* long sub/cmp, then BE (unsigned greater than)
934 --> test !(dst <=u src) */
935 return binop(Iop_Xor32
,
937 binop(Iop_CmpLE32U
, cc_dep1
, cc_dep2
)),
942 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondB
)) {
943 /* long sub/cmp, then B (unsigned less than)
944 --> test dst <u src */
945 return unop(Iop_1Uto32
,
946 binop(Iop_CmpLT32U
, cc_dep1
, cc_dep2
));
948 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNB
)) {
949 /* long sub/cmp, then NB (unsigned greater than or equal)
950 --> test !(dst <u src) */
951 return binop(Iop_Xor32
,
953 binop(Iop_CmpLT32U
, cc_dep1
, cc_dep2
)),
958 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondS
)
959 && isU32(cc_dep2
, 0)) {
960 /* long sub/cmp of zero, then S --> test (dst-0 <s 0)
963 return binop(Iop_And32
,
964 binop(Iop_Shr32
,cc_dep1
,mkU8(31)),
967 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNS
)
968 && isU32(cc_dep2
, 0)) {
969 /* long sub/cmp of zero, then NS --> test !(dst-0 <s 0)
971 --> (UInt) !dst[31] */
972 return binop(Iop_Xor32
,
974 binop(Iop_Shr32
,cc_dep1
,mkU8(31)),
979 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondS
)) {
980 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
981 return unop(Iop_1Uto32
,
983 binop(Iop_Sub32
, cc_dep1
, cc_dep2
),
986 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNS
)) {
987 /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
988 return binop(Iop_Xor32
,
991 binop(Iop_Sub32
, cc_dep1
, cc_dep2
),
996 /*---------------- SUBW ----------------*/
998 if (isU32(cc_op
, X86G_CC_OP_SUBW
) && isU32(cond
, X86CondZ
)) {
999 /* word sub/cmp, then Z --> test dst==src */
1000 return unop(Iop_1Uto32
,
1002 unop(Iop_32to16
,cc_dep1
),
1003 unop(Iop_32to16
,cc_dep2
)));
1005 if (isU32(cc_op
, X86G_CC_OP_SUBW
) && isU32(cond
, X86CondNZ
)) {
1006 /* word sub/cmp, then NZ --> test dst!=src */
1007 return unop(Iop_1Uto32
,
1009 unop(Iop_32to16
,cc_dep1
),
1010 unop(Iop_32to16
,cc_dep2
)));
1013 /*---------------- SUBB ----------------*/
1015 if (isU32(cc_op
, X86G_CC_OP_SUBB
) && isU32(cond
, X86CondZ
)) {
1016 /* byte sub/cmp, then Z --> test dst==src */
1017 return unop(Iop_1Uto32
,
1019 unop(Iop_32to8
,cc_dep1
),
1020 unop(Iop_32to8
,cc_dep2
)));
1022 if (isU32(cc_op
, X86G_CC_OP_SUBB
) && isU32(cond
, X86CondNZ
)) {
1023 /* byte sub/cmp, then NZ --> test dst!=src */
1024 return unop(Iop_1Uto32
,
1026 unop(Iop_32to8
,cc_dep1
),
1027 unop(Iop_32to8
,cc_dep2
)));
1030 if (isU32(cc_op
, X86G_CC_OP_SUBB
) && isU32(cond
, X86CondNBE
)) {
1031 /* byte sub/cmp, then NBE (unsigned greater than)
1032 --> test src <u dst */
1033 /* Note, args are opposite way round from the usual */
1034 return unop(Iop_1Uto32
,
1036 binop(Iop_And32
,cc_dep2
,mkU32(0xFF)),
1037 binop(Iop_And32
,cc_dep1
,mkU32(0xFF))));
1040 if (isU32(cc_op
, X86G_CC_OP_SUBB
) && isU32(cond
, X86CondS
)
1041 && isU32(cc_dep2
, 0)) {
1042 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1045 This is yet another scheme by which gcc figures out if the
1046 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1047 /* Note: isU32(cc_dep2, 0) is correct, even though this is
1048 for an 8-bit comparison, since the args to the helper
1049 function are always U32s. */
1050 return binop(Iop_And32
,
1051 binop(Iop_Shr32
,cc_dep1
,mkU8(7)),
1054 if (isU32(cc_op
, X86G_CC_OP_SUBB
) && isU32(cond
, X86CondNS
)
1055 && isU32(cc_dep2
, 0)) {
1056 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1057 --> test !(dst <s 0)
1060 return binop(Iop_Xor32
,
1062 binop(Iop_Shr32
,cc_dep1
,mkU8(7)),
1067 /*---------------- LOGICL ----------------*/
1069 if (isU32(cc_op
, X86G_CC_OP_LOGICL
) && isU32(cond
, X86CondZ
)) {
1070 /* long and/or/xor, then Z --> test dst==0 */
1071 return unop(Iop_1Uto32
,binop(Iop_CmpEQ32
, cc_dep1
, mkU32(0)));
1073 if (isU32(cc_op
, X86G_CC_OP_LOGICL
) && isU32(cond
, X86CondNZ
)) {
1074 /* long and/or/xor, then NZ --> test dst!=0 */
1075 return unop(Iop_1Uto32
,binop(Iop_CmpNE32
, cc_dep1
, mkU32(0)));
1078 if (isU32(cc_op
, X86G_CC_OP_LOGICL
) && isU32(cond
, X86CondLE
)) {
1079 /* long and/or/xor, then LE
1080 This is pretty subtle. LOGIC sets SF and ZF according to the
1081 result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but
1082 OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
1083 the result is <=signed 0. Hence ...
1085 return unop(Iop_1Uto32
,binop(Iop_CmpLE32S
, cc_dep1
, mkU32(0)));
1088 if (isU32(cc_op
, X86G_CC_OP_LOGICL
) && isU32(cond
, X86CondBE
)) {
1089 /* long and/or/xor, then BE
1090 LOGIC sets ZF according to the result and makes CF be zero.
1091 BE computes (CF | ZF), but CF is zero, so this reduces ZF
1092 -- which will be 1 iff the result is zero. Hence ...
1094 return unop(Iop_1Uto32
,binop(Iop_CmpEQ32
, cc_dep1
, mkU32(0)));
1097 if (isU32(cc_op
, X86G_CC_OP_LOGICL
) && isU32(cond
, X86CondS
)) {
1098 /* see comment below for (LOGICB, CondS) */
1099 /* long and/or/xor, then S --> (UInt)result[31] */
1100 return binop(Iop_And32
,
1101 binop(Iop_Shr32
,cc_dep1
,mkU8(31)),
1104 if (isU32(cc_op
, X86G_CC_OP_LOGICL
) && isU32(cond
, X86CondNS
)) {
1105 /* see comment below for (LOGICB, CondNS) */
1106 /* long and/or/xor, then S --> (UInt) ~ result[31] */
1107 return binop(Iop_Xor32
,
1109 binop(Iop_Shr32
,cc_dep1
,mkU8(31)),
1114 /*---------------- LOGICW ----------------*/
1116 if (isU32(cc_op
, X86G_CC_OP_LOGICW
) && isU32(cond
, X86CondZ
)) {
1117 /* word and/or/xor, then Z --> test dst==0 */
1118 return unop(Iop_1Uto32
,
1119 binop(Iop_CmpEQ32
, binop(Iop_And32
,cc_dep1
,mkU32(0xFFFF)),
1123 if (isU32(cc_op
, X86G_CC_OP_LOGICW
) && isU32(cond
, X86CondS
)) {
1124 /* see comment below for (LOGICB, CondS) */
1125 /* word and/or/xor, then S --> (UInt)result[15] */
1126 return binop(Iop_And32
,
1127 binop(Iop_Shr32
,cc_dep1
,mkU8(15)),
1131 /*---------------- LOGICB ----------------*/
1133 if (isU32(cc_op
, X86G_CC_OP_LOGICB
) && isU32(cond
, X86CondZ
)) {
1134 /* byte and/or/xor, then Z --> test dst==0 */
1135 return unop(Iop_1Uto32
,
1136 binop(Iop_CmpEQ32
, binop(Iop_And32
,cc_dep1
,mkU32(255)),
1139 if (isU32(cc_op
, X86G_CC_OP_LOGICB
) && isU32(cond
, X86CondNZ
)) {
1140 /* byte and/or/xor, then Z --> test dst!=0 */
1141 /* b9ac9: 84 c0 test %al,%al
1142 b9acb: 75 0d jne b9ada */
1143 return unop(Iop_1Uto32
,
1144 binop(Iop_CmpNE32
, binop(Iop_And32
,cc_dep1
,mkU32(255)),
1148 if (isU32(cc_op
, X86G_CC_OP_LOGICB
) && isU32(cond
, X86CondS
)) {
1149 /* this is an idiom gcc sometimes uses to find out if the top
1150 bit of a byte register is set: eg testb %al,%al; js ..
1151 Since it just depends on the top bit of the byte, extract
1152 that bit and explicitly get rid of all the rest. This
1153 helps memcheck avoid false positives in the case where any
1154 of the other bits in the byte are undefined. */
1155 /* byte and/or/xor, then S --> (UInt)result[7] */
1156 return binop(Iop_And32
,
1157 binop(Iop_Shr32
,cc_dep1
,mkU8(7)),
1160 if (isU32(cc_op
, X86G_CC_OP_LOGICB
) && isU32(cond
, X86CondNS
)) {
1161 /* ditto, for negation-of-S. */
1162 /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1163 return binop(Iop_Xor32
,
1165 binop(Iop_Shr32
,cc_dep1
,mkU8(7)),
1170 /*---------------- DECL ----------------*/
1172 if (isU32(cc_op
, X86G_CC_OP_DECL
) && isU32(cond
, X86CondZ
)) {
1173 /* dec L, then Z --> test dst == 0 */
1174 return unop(Iop_1Uto32
,binop(Iop_CmpEQ32
, cc_dep1
, mkU32(0)));
1177 if (isU32(cc_op
, X86G_CC_OP_DECL
) && isU32(cond
, X86CondS
)) {
1178 /* dec L, then S --> compare DST <s 0 */
1179 return unop(Iop_1Uto32
,binop(Iop_CmpLT32S
, cc_dep1
, mkU32(0)));
1182 /*---------------- DECW ----------------*/
1184 if (isU32(cc_op
, X86G_CC_OP_DECW
) && isU32(cond
, X86CondZ
)) {
1185 /* dec W, then Z --> test dst == 0 */
1186 return unop(Iop_1Uto32
,
1188 binop(Iop_Shl32
,cc_dep1
,mkU8(16)),
1192 /*---------------- INCW ----------------*/
1194 if (isU32(cc_op
, X86G_CC_OP_INCW
) && isU32(cond
, X86CondZ
)) {
1195 /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1196 /* inc W, then Z --> test dst == 0 */
1197 return unop(Iop_1Uto32
,
1199 binop(Iop_Shl32
,cc_dep1
,mkU8(16)),
1203 /*---------------- SHRL ----------------*/
1205 if (isU32(cc_op
, X86G_CC_OP_SHRL
) && isU32(cond
, X86CondZ
)) {
1206 /* SHRL, then Z --> test dep1(result) == 0 */
1207 return unop(Iop_1Uto32
,binop(Iop_CmpEQ32
, cc_dep1
, mkU32(0)));
1209 if (isU32(cc_op
, X86G_CC_OP_SHRL
) && isU32(cond
, X86CondNZ
)) {
1210 /* SHRL, then NZ --> test dep1(result) != 0 */
1211 return unop(Iop_1Uto32
,binop(Iop_CmpNE32
, cc_dep1
, mkU32(0)));
1214 /*---------------- SHLL ----------------*/
1216 if (isU32(cc_op
, X86G_CC_OP_SHLL
) && isU32(cond
, X86CondZ
)) {
1217 /* SHLL, then Z --> test dep1(result) == 0 */
1218 return unop(Iop_1Uto32
,binop(Iop_CmpEQ32
, cc_dep1
, mkU32(0)));
1220 //if (isU32(cc_op, X86G_CC_OP_SHLL) && isU32(cond, X86CondNZ)) {
1221 // /* SHLL, then NZ --> test dep1(result) != 0 */
1222 // vassert(0); // No test case yet observed
1225 /*---------------- COPY ----------------*/
1226 /* This can happen, as a result of x87 FP compares: "fcom ... ;
1227 fnstsw %ax ; sahf ; jbe" for example. */
1229 if (isU32(cc_op
, X86G_CC_OP_COPY
) &&
1230 (isU32(cond
, X86CondBE
) || isU32(cond
, X86CondNBE
))) {
1231 /* COPY, then BE --> extract C and Z from dep1, and test
1233 /* COPY, then NBE --> extract C and Z from dep1, and test
1235 UInt nnn
= isU32(cond
, X86CondBE
) ? 1 : 0;
1245 binop(Iop_Shr32
, cc_dep1
, mkU8(X86G_CC_SHIFT_C
)),
1246 binop(Iop_Shr32
, cc_dep1
, mkU8(X86G_CC_SHIFT_Z
))
1255 if (isU32(cc_op
, X86G_CC_OP_COPY
)
1256 && (isU32(cond
, X86CondB
) || isU32(cond
, X86CondNB
))) {
1257 /* COPY, then B --> extract C from dep1, and test (C == 1). */
1258 /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1259 UInt nnn
= isU32(cond
, X86CondB
) ? 1 : 0;
1267 binop(Iop_Shr32
, cc_dep1
, mkU8(X86G_CC_SHIFT_C
)),
1275 if (isU32(cc_op
, X86G_CC_OP_COPY
)
1276 && (isU32(cond
, X86CondZ
) || isU32(cond
, X86CondNZ
))) {
1277 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1278 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1279 UInt nnn
= isU32(cond
, X86CondZ
) ? 1 : 0;
1287 binop(Iop_Shr32
, cc_dep1
, mkU8(X86G_CC_SHIFT_Z
)),
1295 if (isU32(cc_op
, X86G_CC_OP_COPY
)
1296 && (isU32(cond
, X86CondP
) || isU32(cond
, X86CondNP
))) {
1297 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1298 /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1299 UInt nnn
= isU32(cond
, X86CondP
) ? 1 : 0;
1307 binop(Iop_Shr32
, cc_dep1
, mkU8(X86G_CC_SHIFT_P
)),
1318 /* --------- specialising "x86g_calculate_eflags_c" --------- */
1320 if (vex_streq(function_name
, "x86g_calculate_eflags_c")) {
1321 /* specialise calls to above "calculate_eflags_c" function */
1322 IRExpr
*cc_op
, *cc_dep1
, *cc_dep2
, *cc_ndep
;
1323 vassert(arity
== 4);
1329 if (isU32(cc_op
, X86G_CC_OP_SUBL
)) {
1330 /* C after sub denotes unsigned less than */
1331 return unop(Iop_1Uto32
,
1332 binop(Iop_CmpLT32U
, cc_dep1
, cc_dep2
));
1334 if (isU32(cc_op
, X86G_CC_OP_SUBB
)) {
1335 /* C after sub denotes unsigned less than */
1336 return unop(Iop_1Uto32
,
1338 binop(Iop_And32
,cc_dep1
,mkU32(0xFF)),
1339 binop(Iop_And32
,cc_dep2
,mkU32(0xFF))));
1341 if (isU32(cc_op
, X86G_CC_OP_LOGICL
)
1342 || isU32(cc_op
, X86G_CC_OP_LOGICW
)
1343 || isU32(cc_op
, X86G_CC_OP_LOGICB
)) {
1344 /* cflag after logic is zero */
1347 if (isU32(cc_op
, X86G_CC_OP_DECL
) || isU32(cc_op
, X86G_CC_OP_INCL
)) {
1348 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1351 if (isU32(cc_op
, X86G_CC_OP_COPY
)) {
1352 /* cflag after COPY is stored in DEP1. */
1356 binop(Iop_Shr32
, cc_dep1
, mkU8(X86G_CC_SHIFT_C
)),
1360 if (isU32(cc_op
, X86G_CC_OP_ADDL
)) {
1361 /* C after add denotes sum <u either arg */
1362 return unop(Iop_1Uto32
,
1364 binop(Iop_Add32
, cc_dep1
, cc_dep2
),
1367 // ATC, requires verification, no test case known
1368 //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1369 // /* C after signed widening multiply denotes the case where
1370 // the top half of the result isn't simply the sign extension
1371 // of the bottom half (iow the result doesn't fit completely
1372 // in the bottom half). Hence:
1373 // C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1374 // where 'x' denotes signed widening multiply.*/
1377 // binop(Iop_CmpNE32,
1378 // unop(Iop_64HIto32,
1379 // binop(Iop_MullS32, cc_dep1, cc_dep2)),
1381 // binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1384 if (cc_op
->tag
== Iex_Const
) {
1385 vex_printf("CFLAG "); ppIRExpr(cc_op
); vex_printf("\n");
1392 /* --------- specialising "x86g_calculate_eflags_all" --------- */
1394 if (vex_streq(function_name
, "x86g_calculate_eflags_all")) {
1395 /* specialise calls to above "calculate_eflags_all" function */
1396 IRExpr
*cc_op
, *cc_dep1
; /*, *cc_dep2, *cc_ndep; */
1397 vassert(arity
== 4);
1400 /* cc_dep2 = args[2]; */
1401 /* cc_ndep = args[3]; */
1403 if (isU32(cc_op
, X86G_CC_OP_COPY
)) {
1404 /* eflags after COPY are stored in DEP1. */
1409 mkU32(X86G_CC_MASK_O
| X86G_CC_MASK_S
| X86G_CC_MASK_Z
1410 | X86G_CC_MASK_A
| X86G_CC_MASK_C
| X86G_CC_MASK_P
)
1425 /*---------------------------------------------------------------*/
1426 /*--- Supporting functions for x87 FPU activities. ---*/
1427 /*---------------------------------------------------------------*/
1429 static inline Bool
host_is_little_endian ( void )
1431 UInt x
= 0x76543210;
1432 UChar
* p
= (UChar
*)(&x
);
1433 return toBool(*p
== 0x10);
1436 /* 80 and 64-bit floating point formats:
1441 S 0 0X------X denormals
1442 S 1-7FFE 1X------X normals (all normals have leading 1)
1443 S 7FFF 10------0 infinity
1444 S 7FFF 10X-----X snan
1445 S 7FFF 11X-----X qnan
1447 S is the sign bit. For runs X----X, at least one of the Xs must be
1448 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
1449 there is an explicitly represented leading 1, and a sign bit,
1452 64-bit avoids the confusion of an explicitly represented leading 1
1456 S 0 X------X denormals
1458 S 7FF 0------0 infinity
1462 Exponent is 11 bits, fractional part is 52 bits, and there is a
1463 sign bit, giving 64 in total.
1466 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1467 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1468 UInt
x86g_calculate_FXAM ( UInt tag
, ULong dbl
)
1470 Bool mantissaIsZero
;
1475 vassert(host_is_little_endian());
1477 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1479 f64
= (UChar
*)(&dbl
);
1480 sign
= toUChar( (f64
[7] >> 7) & 1 );
1482 /* First off, if the tag indicates the register was empty,
1483 return 1,0,sign,1 */
1485 /* vex_printf("Empty\n"); */
1486 return X86G_FC_MASK_C3
| 0 | (sign
<< X86G_FC_SHIFT_C1
)
1490 bexp
= (f64
[7] << 4) | ((f64
[6] >> 4) & 0x0F);
1495 (f64
[6] & 0x0F) == 0
1496 && (f64
[5] | f64
[4] | f64
[3] | f64
[2] | f64
[1] | f64
[0]) == 0
1499 /* If both exponent and mantissa are zero, the value is zero.
1500 Return 1,0,sign,0. */
1501 if (bexp
== 0 && mantissaIsZero
) {
1502 /* vex_printf("Zero\n"); */
1503 return X86G_FC_MASK_C3
| 0
1504 | (sign
<< X86G_FC_SHIFT_C1
) | 0;
1507 /* If exponent is zero but mantissa isn't, it's a denormal.
1508 Return 1,1,sign,0. */
1509 if (bexp
== 0 && !mantissaIsZero
) {
1510 /* vex_printf("Denormal\n"); */
1511 return X86G_FC_MASK_C3
| X86G_FC_MASK_C2
1512 | (sign
<< X86G_FC_SHIFT_C1
) | 0;
1515 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1516 Return 0,1,sign,1. */
1517 if (bexp
== 0x7FF && mantissaIsZero
) {
1518 /* vex_printf("Inf\n"); */
1519 return 0 | X86G_FC_MASK_C2
| (sign
<< X86G_FC_SHIFT_C1
)
1523 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1524 Return 0,0,sign,1. */
1525 if (bexp
== 0x7FF && !mantissaIsZero
) {
1526 /* vex_printf("NaN\n"); */
1527 return 0 | 0 | (sign
<< X86G_FC_SHIFT_C1
) | X86G_FC_MASK_C0
;
1530 /* Uh, ok, we give up. It must be a normal finite number.
1533 /* vex_printf("normal\n"); */
1534 return 0 | X86G_FC_MASK_C2
| (sign
<< X86G_FC_SHIFT_C1
) | 0;
1538 /* CALLED FROM GENERATED CODE */
1539 /* DIRTY HELPER (reads guest memory) */
1540 ULong
x86g_dirtyhelper_loadF80le ( Addr addrU
)
1543 convert_f80le_to_f64le ( (UChar
*)addrU
, (UChar
*)&f64
);
1547 /* CALLED FROM GENERATED CODE */
1548 /* DIRTY HELPER (writes guest memory) */
1549 void x86g_dirtyhelper_storeF80le ( Addr addrU
, ULong f64
)
1551 convert_f64le_to_f80le( (UChar
*)&f64
, (UChar
*)addrU
);
1555 /*----------------------------------------------*/
1556 /*--- The exported fns .. ---*/
1557 /*----------------------------------------------*/
1559 /* Layout of the real x87 state. */
1560 /* 13 June 05: Fpu_State and auxiliary constants was moved to
1565 /* fpucw[15:0] contains a x87 native format FPU control word.
1566 Extract from it the required FPROUND value and any resulting
1567 emulation warning, and return (warn << 32) | fpround value.
1569 ULong
x86g_check_fldcw ( UInt fpucw
)
1571 /* Decide on a rounding mode. fpucw[11:10] holds it. */
1572 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1573 UInt rmode
= (fpucw
>> 10) & 3;
1575 /* Detect any required emulation warnings. */
1576 VexEmNote ew
= EmNote_NONE
;
1578 if ((fpucw
& 0x3F) != 0x3F) {
1579 /* unmasked exceptions! */
1580 ew
= EmWarn_X86_x87exns
;
1583 if (((fpucw
>> 8) & 3) != 3) {
1584 /* unsupported precision */
1585 ew
= EmWarn_X86_x87precision
;
1588 return (((ULong
)ew
) << 32) | ((ULong
)rmode
);
1592 /* Given fpround as an IRRoundingMode value, create a suitable x87
1593 native format FPU control word. */
1594 UInt
x86g_create_fpucw ( UInt fpround
)
1597 return 0x037F | (fpround
<< 10);
1602 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1603 Extract from it the required SSEROUND value and any resulting
1604 emulation warning, and return (warn << 32) | sseround value.
1606 ULong
x86g_check_ldmxcsr ( UInt mxcsr
)
1608 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
1609 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1610 UInt rmode
= (mxcsr
>> 13) & 3;
1612 /* Detect any required emulation warnings. */
1613 VexEmNote ew
= EmNote_NONE
;
1615 if ((mxcsr
& 0x1F80) != 0x1F80) {
1616 /* unmasked exceptions! */
1617 ew
= EmWarn_X86_sseExns
;
1620 if (mxcsr
& (1<<15)) {
1625 if (mxcsr
& (1<<6)) {
1627 ew
= EmWarn_X86_daz
;
1630 return (((ULong
)ew
) << 32) | ((ULong
)rmode
);
1635 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1636 native format MXCSR value. */
1637 UInt
x86g_create_mxcsr ( UInt sseround
)
1640 return 0x1F80 | (sseround
<< 13);
1644 /* CALLED FROM GENERATED CODE */
1645 /* DIRTY HELPER (writes guest state) */
1646 /* Initialise the x87 FPU state as per 'finit'. */
1647 void x86g_dirtyhelper_FINIT ( VexGuestX86State
* gst
)
1650 gst
->guest_FTOP
= 0;
1651 for (i
= 0; i
< 8; i
++) {
1652 gst
->guest_FPTAG
[i
] = 0; /* empty */
1653 gst
->guest_FPREG
[i
] = 0; /* IEEE754 64-bit zero */
1655 gst
->guest_FPROUND
= (UInt
)Irrm_NEAREST
;
1656 gst
->guest_FC3210
= 0;
1660 /* This is used to implement both 'frstor' and 'fldenv'. The latter
1661 appears to differ from the former only in that the 8 FP registers
1662 themselves are not transferred into the guest state. */
1664 VexEmNote
do_put_x87 ( Bool moveRegs
,
1665 /*IN*/Fpu_State
* x87_state
,
1666 /*OUT*/VexGuestX86State
* vex_state
)
1670 ULong
* vexRegs
= (ULong
*)(&vex_state
->guest_FPREG
[0]);
1671 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
1672 UInt ftop
= (x87_state
->env
[FP_ENV_STAT
] >> 11) & 7;
1673 UInt tagw
= x87_state
->env
[FP_ENV_TAG
];
1674 UInt fpucw
= x87_state
->env
[FP_ENV_CTRL
];
1675 UInt c3210
= x87_state
->env
[FP_ENV_STAT
] & 0x4700;
1680 /* Copy registers and tags */
1681 for (stno
= 0; stno
< 8; stno
++) {
1682 preg
= (stno
+ ftop
) & 7;
1683 tag
= (tagw
>> (2*preg
)) & 3;
1685 /* register is empty */
1686 /* hmm, if it's empty, does it still get written? Probably
1687 safer to say it does. If we don't, memcheck could get out
1688 of sync, in that it thinks all FP registers are defined by
1689 this helper, but in reality some have not been updated. */
1691 vexRegs
[preg
] = 0; /* IEEE754 64-bit zero */
1694 /* register is non-empty */
1696 convert_f80le_to_f64le( &x87_state
->reg
[10*stno
],
1697 (UChar
*)&vexRegs
[preg
] );
1703 vex_state
->guest_FTOP
= ftop
;
1706 vex_state
->guest_FC3210
= c3210
;
1708 /* handle the control word, setting FPROUND and detecting any
1709 emulation warnings. */
1710 pair
= x86g_check_fldcw ( (UInt
)fpucw
);
1711 fpround
= (UInt
)pair
;
1712 ew
= (VexEmNote
)(pair
>> 32);
1714 vex_state
->guest_FPROUND
= fpround
& 3;
1716 /* emulation warnings --> caller */
1721 /* Create an x87 FPU state from the guest state, as close as
1722 we can approximate it. */
1724 void do_get_x87 ( /*IN*/VexGuestX86State
* vex_state
,
1725 /*OUT*/Fpu_State
* x87_state
)
1729 ULong
* vexRegs
= (ULong
*)(&vex_state
->guest_FPREG
[0]);
1730 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
1731 UInt ftop
= vex_state
->guest_FTOP
;
1732 UInt c3210
= vex_state
->guest_FC3210
;
1734 for (i
= 0; i
< 14; i
++)
1735 x87_state
->env
[i
] = 0;
1737 x87_state
->env
[1] = x87_state
->env
[3] = x87_state
->env
[5]
1738 = x87_state
->env
[13] = 0xFFFF;
1739 x87_state
->env
[FP_ENV_STAT
]
1740 = toUShort(((ftop
& 7) << 11) | (c3210
& 0x4700));
1741 x87_state
->env
[FP_ENV_CTRL
]
1742 = toUShort(x86g_create_fpucw( vex_state
->guest_FPROUND
));
1744 /* Dump the register stack in ST order. */
1746 for (stno
= 0; stno
< 8; stno
++) {
1747 preg
= (stno
+ ftop
) & 7;
1748 if (vexTags
[preg
] == 0) {
1749 /* register is empty */
1750 tagw
|= (3 << (2*preg
));
1751 convert_f64le_to_f80le( (UChar
*)&vexRegs
[preg
],
1752 &x87_state
->reg
[10*stno
] );
1754 /* register is full. */
1755 tagw
|= (0 << (2*preg
));
1756 convert_f64le_to_f80le( (UChar
*)&vexRegs
[preg
],
1757 &x87_state
->reg
[10*stno
] );
1760 x87_state
->env
[FP_ENV_TAG
] = toUShort(tagw
);
1764 /* CALLED FROM GENERATED CODE */
1765 /* DIRTY HELPER (reads guest state, writes guest mem) */
1766 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State
* gst
, HWord addr
)
1768 /* Somewhat roundabout, but at least it's simple. */
1770 UShort
* addrS
= (UShort
*)addr
;
1771 UChar
* addrC
= (UChar
*)addr
;
1772 U128
* xmm
= (U128
*)(addr
+ 160);
1777 UShort
*srcS
, *dstS
;
1779 do_get_x87( gst
, &tmp
);
1780 mxcsr
= x86g_create_mxcsr( gst
->guest_SSEROUND
);
1782 /* Now build the proper fxsave image from the x87 image we just
1785 addrS
[0] = tmp
.env
[FP_ENV_CTRL
]; /* FCW: fpu control word */
1786 addrS
[1] = tmp
.env
[FP_ENV_STAT
]; /* FCW: fpu status word */
1788 /* set addrS[2] in an endian-independent way */
1790 fp_tags
= tmp
.env
[FP_ENV_TAG
];
1791 for (r
= 0; r
< 8; r
++) {
1792 if ( ((fp_tags
>> (2*r
)) & 3) != 3 )
1793 summary_tags
|= (1 << r
);
1795 addrC
[4] = toUChar(summary_tags
); /* FTW: tag summary byte */
1796 addrC
[5] = 0; /* pad */
1798 addrS
[3] = 0; /* FOP: fpu opcode (bogus) */
1800 addrS
[5] = 0; /* FPU IP (bogus) */
1801 addrS
[6] = 0; /* FPU IP's segment selector (bogus) (although we
1802 could conceivably dump %CS here) */
1804 addrS
[7] = 0; /* Intel reserved */
1806 addrS
[8] = 0; /* FPU DP (operand pointer) (bogus) */
1807 addrS
[9] = 0; /* FPU DP (operand pointer) (bogus) */
1808 addrS
[10] = 0; /* segment selector for above operand pointer; %DS
1810 addrS
[11] = 0; /* Intel reserved */
1812 addrS
[12] = toUShort(mxcsr
); /* MXCSR */
1813 addrS
[13] = toUShort(mxcsr
>> 16);
1815 addrS
[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1816 addrS
[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1818 /* Copy in the FP registers, in ST order. */
1819 for (stno
= 0; stno
< 8; stno
++) {
1820 srcS
= (UShort
*)(&tmp
.reg
[10*stno
]);
1821 dstS
= (UShort
*)(&addrS
[16 + 8*stno
]);
1832 /* That's the first 160 bytes of the image done. Now only %xmm0
1833 .. %xmm7 remain to be copied. If the host is big-endian, these
1834 need to be byte-swapped. */
1835 vassert(host_is_little_endian());
1837 # define COPY_U128(_dst,_src) \
1838 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1839 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1842 COPY_U128( xmm
[0], gst
->guest_XMM0
);
1843 COPY_U128( xmm
[1], gst
->guest_XMM1
);
1844 COPY_U128( xmm
[2], gst
->guest_XMM2
);
1845 COPY_U128( xmm
[3], gst
->guest_XMM3
);
1846 COPY_U128( xmm
[4], gst
->guest_XMM4
);
1847 COPY_U128( xmm
[5], gst
->guest_XMM5
);
1848 COPY_U128( xmm
[6], gst
->guest_XMM6
);
1849 COPY_U128( xmm
[7], gst
->guest_XMM7
);
1855 /* CALLED FROM GENERATED CODE */
1856 /* DIRTY HELPER (writes guest state, reads guest mem) */
1857 VexEmNote
x86g_dirtyhelper_FXRSTOR ( VexGuestX86State
* gst
, HWord addr
)
1860 VexEmNote warnX87
= EmNote_NONE
;
1861 VexEmNote warnXMM
= EmNote_NONE
;
1862 UShort
* addrS
= (UShort
*)addr
;
1863 UChar
* addrC
= (UChar
*)addr
;
1864 U128
* xmm
= (U128
*)(addr
+ 160);
1868 /* Restore %xmm0 .. %xmm7. If the host is big-endian, these need
1869 to be byte-swapped. */
1870 vassert(host_is_little_endian());
1872 # define COPY_U128(_dst,_src) \
1873 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1874 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1877 COPY_U128( gst
->guest_XMM0
, xmm
[0] );
1878 COPY_U128( gst
->guest_XMM1
, xmm
[1] );
1879 COPY_U128( gst
->guest_XMM2
, xmm
[2] );
1880 COPY_U128( gst
->guest_XMM3
, xmm
[3] );
1881 COPY_U128( gst
->guest_XMM4
, xmm
[4] );
1882 COPY_U128( gst
->guest_XMM5
, xmm
[5] );
1883 COPY_U128( gst
->guest_XMM6
, xmm
[6] );
1884 COPY_U128( gst
->guest_XMM7
, xmm
[7] );
1888 /* Copy the x87 registers out of the image, into a temporary
1889 Fpu_State struct. */
1891 /* LLVM on Darwin turns the following loop into a movaps plus a
1892 handful of scalar stores. This would work fine except for the
1893 fact that VEX doesn't keep the stack correctly (16-) aligned for
1894 the call, so it segfaults. Hence, split the loop into two
1895 pieces (and pray LLVM doesn't merely glue them back together) so
1896 it's composed only of scalar stores and so is alignment
1897 insensitive. Of course this is a kludge of the lamest kind --
1898 VEX should be fixed properly. */
1899 /* Code that seems to trigger the problem:
1900 for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1901 for (i
= 0; i
< 7; i
++) tmp
.env
[i
+0] = 0;
1902 __asm__
__volatile__("" ::: "memory");
1903 for (i
= 0; i
< 7; i
++) tmp
.env
[i
+7] = 0;
1905 for (i
= 0; i
< 80; i
++) tmp
.reg
[i
] = 0;
1906 /* fill in tmp.reg[0..7] */
1907 for (stno
= 0; stno
< 8; stno
++) {
1908 UShort
* dstS
= (UShort
*)(&tmp
.reg
[10*stno
]);
1909 UShort
* srcS
= (UShort
*)(&addrS
[16 + 8*stno
]);
1916 /* fill in tmp.env[0..13] */
1917 tmp
.env
[FP_ENV_CTRL
] = addrS
[0]; /* FCW: fpu control word */
1918 tmp
.env
[FP_ENV_STAT
] = addrS
[1]; /* FCW: fpu status word */
1921 for (r
= 0; r
< 8; r
++) {
1922 if (addrC
[4] & (1<<r
))
1923 fp_tags
|= (0 << (2*r
)); /* EMPTY */
1925 fp_tags
|= (3 << (2*r
)); /* VALID -- not really precise enough. */
1927 tmp
.env
[FP_ENV_TAG
] = fp_tags
;
1929 /* Now write 'tmp' into the guest state. */
1930 warnX87
= do_put_x87( True
/*moveRegs*/, &tmp
, gst
);
1932 { UInt w32
= (((UInt
)addrS
[12]) & 0xFFFF)
1933 | ((((UInt
)addrS
[13]) & 0xFFFF) << 16);
1934 ULong w64
= x86g_check_ldmxcsr( w32
);
1936 warnXMM
= (VexEmNote
)(w64
>> 32);
1938 gst
->guest_SSEROUND
= w64
& 0xFFFFFFFF;
1941 /* Prefer an X87 emwarn over an XMM one, if both exist. */
1942 if (warnX87
!= EmNote_NONE
)
1949 /* CALLED FROM GENERATED CODE */
1950 /* DIRTY HELPER (reads guest state, writes guest mem) */
1951 void x86g_dirtyhelper_FSAVE ( VexGuestX86State
* gst
, HWord addr
)
1953 do_get_x87( gst
, (Fpu_State
*)addr
);
1956 /* CALLED FROM GENERATED CODE */
1957 /* DIRTY HELPER (writes guest state, reads guest mem) */
1958 VexEmNote
x86g_dirtyhelper_FRSTOR ( VexGuestX86State
* gst
, HWord addr
)
1960 return do_put_x87( True
/*regs too*/, (Fpu_State
*)addr
, gst
);
1963 /* CALLED FROM GENERATED CODE */
1964 /* DIRTY HELPER (reads guest state, writes guest mem) */
1965 void x86g_dirtyhelper_FSTENV ( VexGuestX86State
* gst
, HWord addr
)
1967 /* Somewhat roundabout, but at least it's simple. */
1969 UShort
* addrP
= (UShort
*)addr
;
1971 do_get_x87( gst
, &tmp
);
1972 for (i
= 0; i
< 14; i
++)
1973 addrP
[i
] = tmp
.env
[i
];
1976 /* CALLED FROM GENERATED CODE */
1977 /* DIRTY HELPER (writes guest state, reads guest mem) */
1978 VexEmNote
x86g_dirtyhelper_FLDENV ( VexGuestX86State
* gst
, HWord addr
)
1980 return do_put_x87( False
/*don't move regs*/, (Fpu_State
*)addr
, gst
);
1983 /* VISIBLE TO LIBVEX CLIENT */
1984 /* Do x87 save from the supplied VexGuestX86State structure and store the
1985 result at the given address which represents a buffer of at least 108
1987 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State
* vex_state
,
1988 /*OUT*/UChar
* x87_state
)
1990 do_get_x87 ( vex_state
, (Fpu_State
*)x87_state
);
1993 /* VISIBLE TO LIBVEX CLIENT */
1994 /* Do x87 restore from the supplied address and store read values to the given
1995 VexGuestX86State structure. */
1996 VexEmNote
LibVEX_GuestX86_put_x87 ( /*IN*/UChar
* x87_state
,
1997 /*MOD*/VexGuestX86State
* vex_state
)
1999 return do_put_x87 ( True
/*moveRegs*/, (Fpu_State
*)x87_state
, vex_state
);
2002 /* VISIBLE TO LIBVEX CLIENT */
2003 /* Return mxcsr from the supplied VexGuestX86State structure. */
2004 UInt
LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State
* vex_state
)
2006 return x86g_create_mxcsr ( vex_state
->guest_SSEROUND
);
2009 /* VISIBLE TO LIBVEX CLIENT */
2010 /* Modify the given VexGuestX86State structure according to the passed mxcsr
2012 VexEmNote
LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr
,
2013 /*MOD*/VexGuestX86State
* vex_state
)
2015 ULong w64
= x86g_check_ldmxcsr( mxcsr
);
2016 vex_state
->guest_SSEROUND
= w64
& 0xFFFFFFFF;
2017 return (VexEmNote
)(w64
>> 32);
2020 /*---------------------------------------------------------------*/
2021 /*--- Misc integer helpers, including rotates and CPUID. ---*/
2022 /*---------------------------------------------------------------*/
2024 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2025 /* Calculate both flags and value result for rotate right
2026 through the carry bit. Result in low 32 bits,
2027 new flags (OSZACP) in high 32 bits.
2029 ULong
x86g_calculate_RCR ( UInt arg
, UInt rot_amt
, UInt eflags_in
, UInt sz
)
2031 UInt tempCOUNT
= rot_amt
& 0x1F, cf
=0, of
=0, tempcf
;
2035 cf
= (eflags_in
>> X86G_CC_SHIFT_C
) & 1;
2036 of
= ((arg
>> 31) ^ cf
) & 1;
2037 while (tempCOUNT
> 0) {
2039 arg
= (arg
>> 1) | (cf
<< 31);
2045 while (tempCOUNT
>= 17) tempCOUNT
-= 17;
2046 cf
= (eflags_in
>> X86G_CC_SHIFT_C
) & 1;
2047 of
= ((arg
>> 15) ^ cf
) & 1;
2048 while (tempCOUNT
> 0) {
2050 arg
= ((arg
>> 1) & 0x7FFF) | (cf
<< 15);
2056 while (tempCOUNT
>= 9) tempCOUNT
-= 9;
2057 cf
= (eflags_in
>> X86G_CC_SHIFT_C
) & 1;
2058 of
= ((arg
>> 7) ^ cf
) & 1;
2059 while (tempCOUNT
> 0) {
2061 arg
= ((arg
>> 1) & 0x7F) | (cf
<< 7);
2067 vpanic("calculate_RCR: invalid size");
2072 eflags_in
&= ~(X86G_CC_MASK_C
| X86G_CC_MASK_O
);
2073 eflags_in
|= (cf
<< X86G_CC_SHIFT_C
) | (of
<< X86G_CC_SHIFT_O
);
2075 return (((ULong
)eflags_in
) << 32) | ((ULong
)arg
);
2079 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2080 /* Calculate both flags and value result for rotate left
2081 through the carry bit. Result in low 32 bits,
2082 new flags (OSZACP) in high 32 bits.
2084 ULong
x86g_calculate_RCL ( UInt arg
, UInt rot_amt
, UInt eflags_in
, UInt sz
)
2086 UInt tempCOUNT
= rot_amt
& 0x1F, cf
=0, of
=0, tempcf
;
2090 cf
= (eflags_in
>> X86G_CC_SHIFT_C
) & 1;
2091 while (tempCOUNT
> 0) {
2092 tempcf
= (arg
>> 31) & 1;
2093 arg
= (arg
<< 1) | (cf
& 1);
2097 of
= ((arg
>> 31) ^ cf
) & 1;
2100 while (tempCOUNT
>= 17) tempCOUNT
-= 17;
2101 cf
= (eflags_in
>> X86G_CC_SHIFT_C
) & 1;
2102 while (tempCOUNT
> 0) {
2103 tempcf
= (arg
>> 15) & 1;
2104 arg
= 0xFFFF & ((arg
<< 1) | (cf
& 1));
2108 of
= ((arg
>> 15) ^ cf
) & 1;
2111 while (tempCOUNT
>= 9) tempCOUNT
-= 9;
2112 cf
= (eflags_in
>> X86G_CC_SHIFT_C
) & 1;
2113 while (tempCOUNT
> 0) {
2114 tempcf
= (arg
>> 7) & 1;
2115 arg
= 0xFF & ((arg
<< 1) | (cf
& 1));
2119 of
= ((arg
>> 7) ^ cf
) & 1;
2122 vpanic("calculate_RCL: invalid size");
2127 eflags_in
&= ~(X86G_CC_MASK_C
| X86G_CC_MASK_O
);
2128 eflags_in
|= (cf
<< X86G_CC_SHIFT_C
) | (of
<< X86G_CC_SHIFT_O
);
2130 return (((ULong
)eflags_in
) << 32) | ((ULong
)arg
);
2134 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2135 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2136 AX value in low half of arg, OSZACP in upper half.
2137 See guest-x86/toIR.c usage point for details.
2139 static UInt
calc_parity_8bit ( UInt w32
) {
2142 for (i
= 0; i
< 8; i
++)
2143 p
^= (1 & (w32
>> i
));
2146 UInt
x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX
, UInt opcode
)
2148 UInt r_AL
= (flags_and_AX
>> 0) & 0xFF;
2149 UInt r_AH
= (flags_and_AX
>> 8) & 0xFF;
2150 UInt r_O
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_O
)) & 1;
2151 UInt r_S
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_S
)) & 1;
2152 UInt r_Z
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_Z
)) & 1;
2153 UInt r_A
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_A
)) & 1;
2154 UInt r_C
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_C
)) & 1;
2155 UInt r_P
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_P
)) & 1;
2159 case 0x27: { /* DAA */
2163 if ((r_AL
& 0xF) > 9 || r_A
== 1) {
2166 if (r_AL
>= 0x100) r_C
= 1;
2171 if (old_AL
> 0x99 || old_C
== 1) {
2177 /* O is undefined. S Z and P are set according to the
2180 r_O
= 0; /* let's say */
2181 r_S
= (r_AL
& 0x80) ? 1 : 0;
2182 r_Z
= (r_AL
== 0) ? 1 : 0;
2183 r_P
= calc_parity_8bit( r_AL
);
2186 case 0x2F: { /* DAS */
2190 if ((r_AL
& 0xF) > 9 || r_A
== 1) {
2191 Bool borrow
= r_AL
< 6;
2194 if (borrow
) r_C
= 1;
2199 if (old_AL
> 0x99 || old_C
== 1) {
2203 /* Intel docs are wrong: r_C = 0; */
2205 /* O is undefined. S Z and P are set according to the
2208 r_O
= 0; /* let's say */
2209 r_S
= (r_AL
& 0x80) ? 1 : 0;
2210 r_Z
= (r_AL
== 0) ? 1 : 0;
2211 r_P
= calc_parity_8bit( r_AL
);
2214 case 0x37: { /* AAA */
2215 Bool nudge
= r_AL
> 0xF9;
2216 if ((r_AL
& 0xF) > 9 || r_A
== 1) {
2218 r_AH
= r_AH
+ 1 + (nudge
? 1 : 0);
2227 /* O S Z and P are undefined. */
2228 r_O
= r_S
= r_Z
= r_P
= 0; /* let's say */
2231 case 0x3F: { /* AAS */
2232 Bool nudge
= r_AL
< 0x06;
2233 if ((r_AL
& 0xF) > 9 || r_A
== 1) {
2235 r_AH
= r_AH
- 1 - (nudge
? 1 : 0);
2244 /* O S Z and P are undefined. */
2245 r_O
= r_S
= r_Z
= r_P
= 0; /* let's say */
2251 result
= ( (r_O
& 1) << (16 + X86G_CC_SHIFT_O
) )
2252 | ( (r_S
& 1) << (16 + X86G_CC_SHIFT_S
) )
2253 | ( (r_Z
& 1) << (16 + X86G_CC_SHIFT_Z
) )
2254 | ( (r_A
& 1) << (16 + X86G_CC_SHIFT_A
) )
2255 | ( (r_C
& 1) << (16 + X86G_CC_SHIFT_C
) )
2256 | ( (r_P
& 1) << (16 + X86G_CC_SHIFT_P
) )
2257 | ( (r_AH
& 0xFF) << 8 )
2258 | ( (r_AL
& 0xFF) << 0 );
2262 UInt
x86g_calculate_aad_aam ( UInt flags_and_AX
, UInt opcode
)
2264 UInt r_AL
= (flags_and_AX
>> 0) & 0xFF;
2265 UInt r_AH
= (flags_and_AX
>> 8) & 0xFF;
2266 UInt r_O
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_O
)) & 1;
2267 UInt r_S
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_S
)) & 1;
2268 UInt r_Z
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_Z
)) & 1;
2269 UInt r_A
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_A
)) & 1;
2270 UInt r_C
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_C
)) & 1;
2271 UInt r_P
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_P
)) & 1;
2275 case 0xD4: { /* AAM */
2280 case 0xD5: { /* AAD */
2281 r_AL
= ((r_AH
* 10) + r_AL
) & 0xff;
2289 r_O
= 0; /* let's say (undefined) */
2290 r_C
= 0; /* let's say (undefined) */
2291 r_A
= 0; /* let's say (undefined) */
2292 r_S
= (r_AL
& 0x80) ? 1 : 0;
2293 r_Z
= (r_AL
== 0) ? 1 : 0;
2294 r_P
= calc_parity_8bit( r_AL
);
2296 result
= ( (r_O
& 1) << (16 + X86G_CC_SHIFT_O
) )
2297 | ( (r_S
& 1) << (16 + X86G_CC_SHIFT_S
) )
2298 | ( (r_Z
& 1) << (16 + X86G_CC_SHIFT_Z
) )
2299 | ( (r_A
& 1) << (16 + X86G_CC_SHIFT_A
) )
2300 | ( (r_C
& 1) << (16 + X86G_CC_SHIFT_C
) )
2301 | ( (r_P
& 1) << (16 + X86G_CC_SHIFT_P
) )
2302 | ( (r_AH
& 0xFF) << 8 )
2303 | ( (r_AL
& 0xFF) << 0 );
2308 /* CALLED FROM GENERATED CODE */
2309 /* DIRTY HELPER (non-referentially-transparent) */
2310 /* Horrible hack. On non-x86 platforms, return 1. */
2311 ULong
x86g_dirtyhelper_RDTSC ( void )
2313 # if defined(__i386__)
2315 __asm__
__volatile__("rdtsc" : "=A" (res
));
2323 /* CALLED FROM GENERATED CODE */
2324 /* DIRTY HELPER (modifies guest state) */
2325 /* Claim to be a P55C (Intel Pentium/MMX) */
2326 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State
* st
)
2328 switch (st
->guest_EAX
) {
2330 st
->guest_EAX
= 0x1;
2331 st
->guest_EBX
= 0x756e6547;
2332 st
->guest_ECX
= 0x6c65746e;
2333 st
->guest_EDX
= 0x49656e69;
2336 st
->guest_EAX
= 0x543;
2337 st
->guest_EBX
= 0x0;
2338 st
->guest_ECX
= 0x0;
2339 st
->guest_EDX
= 0x8001bf;
2344 /* CALLED FROM GENERATED CODE */
2345 /* DIRTY HELPER (modifies guest state) */
2346 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2347 /* But without 3DNow support (weird, but we really don't support it). */
2348 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State
* st
)
2350 switch (st
->guest_EAX
) {
2353 st
->guest_EAX
= 0x1;
2354 st
->guest_EBX
= 0x68747541;
2355 st
->guest_ECX
= 0x444d4163;
2356 st
->guest_EDX
= 0x69746e65;
2360 st
->guest_EAX
= 0x621;
2361 st
->guest_EBX
= 0x0;
2362 st
->guest_ECX
= 0x0;
2363 st
->guest_EDX
= 0x183f9ff;
2365 /* Highest Extended Function Supported (0x80000004 brand string) */
2367 st
->guest_EAX
= 0x80000004;
2368 st
->guest_EBX
= 0x68747541;
2369 st
->guest_ECX
= 0x444d4163;
2370 st
->guest_EDX
= 0x69746e65;
2372 /* Extended Processor Info and Feature Bits */
2374 st
->guest_EAX
= 0x721;
2375 st
->guest_EBX
= 0x0;
2376 st
->guest_ECX
= 0x0;
2377 st
->guest_EDX
= 0x1c3f9ff; /* Note no 3DNow. */
2379 /* Processor Brand String "AMD Athlon(tm) Processor" */
2381 st
->guest_EAX
= 0x20444d41;
2382 st
->guest_EBX
= 0x6c687441;
2383 st
->guest_ECX
= 0x74286e6f;
2384 st
->guest_EDX
= 0x5020296d;
2387 st
->guest_EAX
= 0x65636f72;
2388 st
->guest_EBX
= 0x726f7373;
2389 st
->guest_ECX
= 0x0;
2390 st
->guest_EDX
= 0x0;
2393 st
->guest_EAX
= 0x0;
2394 st
->guest_EBX
= 0x0;
2395 st
->guest_ECX
= 0x0;
2396 st
->guest_EDX
= 0x0;
2401 /* CALLED FROM GENERATED CODE */
2402 /* DIRTY HELPER (modifies guest state) */
2403 /* Claim to be the following SSE1-capable CPU:
2404 vendor_id : GenuineIntel
2407 model name : Intel(R) Pentium(R) III CPU family 1133MHz
2412 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State
* st
)
2414 switch (st
->guest_EAX
) {
2416 st
->guest_EAX
= 0x00000002;
2417 st
->guest_EBX
= 0x756e6547;
2418 st
->guest_ECX
= 0x6c65746e;
2419 st
->guest_EDX
= 0x49656e69;
2422 st
->guest_EAX
= 0x000006b1;
2423 st
->guest_EBX
= 0x00000004;
2424 st
->guest_ECX
= 0x00000000;
2425 st
->guest_EDX
= 0x0383fbff;
2428 st
->guest_EAX
= 0x03020101;
2429 st
->guest_EBX
= 0x00000000;
2430 st
->guest_ECX
= 0x00000000;
2431 st
->guest_EDX
= 0x0c040883;
2436 /* Claim to be the following SSE2-capable CPU:
2437 vendor_id : GenuineIntel
2440 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz
2445 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
2446 pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
2449 cache_alignment : 128
2450 address sizes : 36 bits physical, 32 bits virtual
2452 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State
* st
)
2454 switch (st
->guest_EAX
) {
2456 st
->guest_EAX
= 0x00000002;
2457 st
->guest_EBX
= 0x756e6547;
2458 st
->guest_ECX
= 0x6c65746e;
2459 st
->guest_EDX
= 0x49656e69;
2462 st
->guest_EAX
= 0x00000f29;
2463 st
->guest_EBX
= 0x01020809;
2464 st
->guest_ECX
= 0x00004400;
2465 st
->guest_EDX
= 0xbfebfbff;
2468 st
->guest_EAX
= 0x03020101;
2469 st
->guest_EBX
= 0x00000000;
2470 st
->guest_ECX
= 0x00000000;
2471 st
->guest_EDX
= 0x0c040883;
2476 /* Claim to be the following SSSE3-capable CPU (2 x ...):
2477 vendor_id : GenuineIntel
2480 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2483 cache size : 4096 KB
2492 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2493 mtrr pge mca cmov pat pse36 clflush dts acpi
2494 mmx fxsr sse sse2 ss ht tm syscall nx lm
2495 constant_tsc pni monitor ds_cpl vmx est tm2
2499 cache_alignment : 64
2500 address sizes : 36 bits physical, 48 bits virtual
2503 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State
* st
)
2505 # define SET_ABCD(_a,_b,_c,_d) \
2506 do { st->guest_EAX = (UInt)(_a); \
2507 st->guest_EBX = (UInt)(_b); \
2508 st->guest_ECX = (UInt)(_c); \
2509 st->guest_EDX = (UInt)(_d); \
2512 switch (st
->guest_EAX
) {
2514 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2517 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2520 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2523 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2526 switch (st
->guest_ECX
) {
2527 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2528 0x0000003f, 0x00000001); break;
2529 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2530 0x0000003f, 0x00000001); break;
2531 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2532 0x00000fff, 0x00000001); break;
2533 default: SET_ABCD(0x00000000, 0x00000000,
2534 0x00000000, 0x00000000); break;
2539 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2542 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2545 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2548 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2551 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2554 unhandled_eax_value
:
2555 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2558 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2561 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2564 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2567 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2570 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2573 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2576 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2579 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2582 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2585 goto unhandled_eax_value
;
2591 /* CALLED FROM GENERATED CODE */
2592 /* DIRTY HELPER (non-referentially-transparent) */
2593 /* Horrible hack. On non-x86 platforms, return 0. */
2594 UInt
x86g_dirtyhelper_IN ( UInt portno
, UInt sz
/*1,2 or 4*/ )
2596 # if defined(__i386__)
2601 __asm__
__volatile__("movl $0,%%eax; inl %w1,%0"
2602 : "=a" (r
) : "Nd" (portno
));
2605 __asm__
__volatile__("movl $0,%%eax; inw %w1,%w0"
2606 : "=a" (r
) : "Nd" (portno
));
2609 __asm__
__volatile__("movl $0,%%eax; inb %w1,%b0"
2610 : "=a" (r
) : "Nd" (portno
));
2622 /* CALLED FROM GENERATED CODE */
2623 /* DIRTY HELPER (non-referentially-transparent) */
2624 /* Horrible hack. On non-x86 platforms, do nothing. */
2625 void x86g_dirtyhelper_OUT ( UInt portno
, UInt data
, UInt sz
/*1,2 or 4*/ )
2627 # if defined(__i386__)
2631 __asm__
__volatile__("outl %0, %w1"
2632 : : "a" (data
), "Nd" (portno
));
2635 __asm__
__volatile__("outw %w0, %w1"
2636 : : "a" (data
), "Nd" (portno
));
2639 __asm__
__volatile__("outb %b0, %w1"
2640 : : "a" (data
), "Nd" (portno
));
2650 /* CALLED FROM GENERATED CODE */
2651 /* DIRTY HELPER (non-referentially-transparent) */
2652 /* Horrible hack. On non-x86 platforms, do nothing. */
2653 /* op = 0: call the native SGDT instruction.
2654 op = 1: call the native SIDT instruction.
2656 void x86g_dirtyhelper_SxDT ( void *address
, UInt op
) {
2657 # if defined(__i386__)
2660 __asm__
__volatile__("sgdt (%0)" : : "r" (address
) : "memory");
2663 __asm__
__volatile__("sidt (%0)" : : "r" (address
) : "memory");
2666 vpanic("x86g_dirtyhelper_SxDT");
2670 UChar
* p
= (UChar
*)address
;
2671 p
[0] = p
[1] = p
[2] = p
[3] = p
[4] = p
[5] = 0;
2675 /*---------------------------------------------------------------*/
2676 /*--- Helpers for MMX/SSE/SSE2. ---*/
2677 /*---------------------------------------------------------------*/
2679 static inline UChar
abdU8 ( UChar xx
, UChar yy
) {
2680 return toUChar(xx
>yy
? xx
-yy
: yy
-xx
);
2683 static inline ULong
mk32x2 ( UInt w1
, UInt w0
) {
2684 return (((ULong
)w1
) << 32) | ((ULong
)w0
);
2687 static inline UShort
sel16x4_3 ( ULong w64
) {
2688 UInt hi32
= toUInt(w64
>> 32);
2689 return toUShort(hi32
>> 16);
2691 static inline UShort
sel16x4_2 ( ULong w64
) {
2692 UInt hi32
= toUInt(w64
>> 32);
2693 return toUShort(hi32
);
2695 static inline UShort
sel16x4_1 ( ULong w64
) {
2696 UInt lo32
= toUInt(w64
);
2697 return toUShort(lo32
>> 16);
2699 static inline UShort
sel16x4_0 ( ULong w64
) {
2700 UInt lo32
= toUInt(w64
);
2701 return toUShort(lo32
);
2704 static inline UChar
sel8x8_7 ( ULong w64
) {
2705 UInt hi32
= toUInt(w64
>> 32);
2706 return toUChar(hi32
>> 24);
2708 static inline UChar
sel8x8_6 ( ULong w64
) {
2709 UInt hi32
= toUInt(w64
>> 32);
2710 return toUChar(hi32
>> 16);
2712 static inline UChar
sel8x8_5 ( ULong w64
) {
2713 UInt hi32
= toUInt(w64
>> 32);
2714 return toUChar(hi32
>> 8);
2716 static inline UChar
sel8x8_4 ( ULong w64
) {
2717 UInt hi32
= toUInt(w64
>> 32);
2718 return toUChar(hi32
>> 0);
2720 static inline UChar
sel8x8_3 ( ULong w64
) {
2721 UInt lo32
= toUInt(w64
);
2722 return toUChar(lo32
>> 24);
2724 static inline UChar
sel8x8_2 ( ULong w64
) {
2725 UInt lo32
= toUInt(w64
);
2726 return toUChar(lo32
>> 16);
2728 static inline UChar
sel8x8_1 ( ULong w64
) {
2729 UInt lo32
= toUInt(w64
);
2730 return toUChar(lo32
>> 8);
2732 static inline UChar
sel8x8_0 ( ULong w64
) {
2733 UInt lo32
= toUInt(w64
);
2734 return toUChar(lo32
>> 0);
2737 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2738 ULong
x86g_calculate_mmx_pmaddwd ( ULong xx
, ULong yy
)
2742 (((Int
)(Short
)sel16x4_3(xx
)) * ((Int
)(Short
)sel16x4_3(yy
)))
2743 + (((Int
)(Short
)sel16x4_2(xx
)) * ((Int
)(Short
)sel16x4_2(yy
))),
2744 (((Int
)(Short
)sel16x4_1(xx
)) * ((Int
)(Short
)sel16x4_1(yy
)))
2745 + (((Int
)(Short
)sel16x4_0(xx
)) * ((Int
)(Short
)sel16x4_0(yy
)))
2749 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2750 ULong
x86g_calculate_mmx_psadbw ( ULong xx
, ULong yy
)
2753 t
+= (UInt
)abdU8( sel8x8_7(xx
), sel8x8_7(yy
) );
2754 t
+= (UInt
)abdU8( sel8x8_6(xx
), sel8x8_6(yy
) );
2755 t
+= (UInt
)abdU8( sel8x8_5(xx
), sel8x8_5(yy
) );
2756 t
+= (UInt
)abdU8( sel8x8_4(xx
), sel8x8_4(yy
) );
2757 t
+= (UInt
)abdU8( sel8x8_3(xx
), sel8x8_3(yy
) );
2758 t
+= (UInt
)abdU8( sel8x8_2(xx
), sel8x8_2(yy
) );
2759 t
+= (UInt
)abdU8( sel8x8_1(xx
), sel8x8_1(yy
) );
2760 t
+= (UInt
)abdU8( sel8x8_0(xx
), sel8x8_0(yy
) );
2766 /*---------------------------------------------------------------*/
2767 /*--- Helpers for dealing with segment overrides. ---*/
2768 /*---------------------------------------------------------------*/
2771 UInt
get_segdescr_base ( VexGuestX86SegDescr
* ent
)
2773 UInt lo
= 0xFFFF & (UInt
)ent
->LdtEnt
.Bits
.BaseLow
;
2774 UInt mid
= 0xFF & (UInt
)ent
->LdtEnt
.Bits
.BaseMid
;
2775 UInt hi
= 0xFF & (UInt
)ent
->LdtEnt
.Bits
.BaseHi
;
2776 return (hi
<< 24) | (mid
<< 16) | lo
;
2780 UInt
get_segdescr_limit ( VexGuestX86SegDescr
* ent
)
2782 UInt lo
= 0xFFFF & (UInt
)ent
->LdtEnt
.Bits
.LimitLow
;
2783 UInt hi
= 0xF & (UInt
)ent
->LdtEnt
.Bits
.LimitHi
;
2784 UInt limit
= (hi
<< 16) | lo
;
2785 if (ent
->LdtEnt
.Bits
.Granularity
)
2786 limit
= (limit
<< 12) | 0xFFF;
2790 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2791 ULong
x86g_use_seg_selector ( HWord ldt
, HWord gdt
,
2792 UInt seg_selector
, UInt virtual_addr
)
2794 UInt tiBit
, base
, limit
;
2795 VexGuestX86SegDescr
* the_descrs
;
2797 Bool verboze
= False
;
2799 /* If this isn't true, we're in Big Trouble. */
2800 vassert(8 == sizeof(VexGuestX86SegDescr
));
2803 vex_printf("x86h_use_seg_selector: "
2804 "seg_selector = 0x%x, vaddr = 0x%x\n",
2805 seg_selector
, virtual_addr
);
2807 /* Check for wildly invalid selector. */
2808 if (seg_selector
& ~0xFFFF)
2811 seg_selector
&= 0x0000FFFF;
2813 /* Sanity check the segment selector. Ensure that RPL=11b (least
2814 privilege). This forms the bottom 2 bits of the selector. */
2815 if ((seg_selector
& 3) != 3)
2818 /* Extract the TI bit (0 means GDT, 1 means LDT) */
2819 tiBit
= (seg_selector
>> 2) & 1;
2821 /* Convert the segment selector onto a table index */
2823 vassert(seg_selector
>= 0 && seg_selector
< 8192);
2828 /* Do we actually have a GDT to look at? */
2832 /* Check for access to non-existent entry. */
2833 if (seg_selector
>= VEX_GUEST_X86_GDT_NENT
)
2836 the_descrs
= (VexGuestX86SegDescr
*)gdt
;
2837 base
= get_segdescr_base (&the_descrs
[seg_selector
]);
2838 limit
= get_segdescr_limit(&the_descrs
[seg_selector
]);
2842 /* All the same stuff, except for the LDT. */
2846 if (seg_selector
>= VEX_GUEST_X86_LDT_NENT
)
2849 the_descrs
= (VexGuestX86SegDescr
*)ldt
;
2850 base
= get_segdescr_base (&the_descrs
[seg_selector
]);
2851 limit
= get_segdescr_limit(&the_descrs
[seg_selector
]);
2855 /* Do the limit check. Note, this check is just slightly too
2856 slack. Really it should be "if (virtual_addr + size - 1 >=
2857 limit)," but we don't have the size info to hand. Getting it
2858 could be significantly complex. */
2859 if (virtual_addr
>= limit
)
2863 vex_printf("x86h_use_seg_selector: "
2864 "base = 0x%x, addr = 0x%x\n",
2865 base
, base
+ virtual_addr
);
2867 /* High 32 bits are zero, indicating success. */
2868 return (ULong
)( ((UInt
)virtual_addr
) + base
);
2875 /*---------------------------------------------------------------*/
2876 /*--- Helpers for dealing with, and describing, ---*/
2877 /*--- guest state as a whole. ---*/
2878 /*---------------------------------------------------------------*/
2880 /* Initialise the entire x86 guest state. */
2881 /* VISIBLE TO LIBVEX CLIENT */
2882 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State
* vex_state
)
2884 vex_state
->host_EvC_FAILADDR
= 0;
2885 vex_state
->host_EvC_COUNTER
= 0;
2887 vex_state
->guest_EAX
= 0;
2888 vex_state
->guest_ECX
= 0;
2889 vex_state
->guest_EDX
= 0;
2890 vex_state
->guest_EBX
= 0;
2891 vex_state
->guest_ESP
= 0;
2892 vex_state
->guest_EBP
= 0;
2893 vex_state
->guest_ESI
= 0;
2894 vex_state
->guest_EDI
= 0;
2896 vex_state
->guest_CC_OP
= X86G_CC_OP_COPY
;
2897 vex_state
->guest_CC_DEP1
= 0;
2898 vex_state
->guest_CC_DEP2
= 0;
2899 vex_state
->guest_CC_NDEP
= 0;
2900 vex_state
->guest_DFLAG
= 1; /* forwards */
2901 vex_state
->guest_IDFLAG
= 0;
2902 vex_state
->guest_ACFLAG
= 0;
2904 vex_state
->guest_EIP
= 0;
2906 /* Initialise the simulated FPU */
2907 x86g_dirtyhelper_FINIT( vex_state
);
2909 /* Initialse the SSE state. */
2910 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2912 vex_state
->guest_SSEROUND
= (UInt
)Irrm_NEAREST
;
2913 SSEZERO(vex_state
->guest_XMM0
);
2914 SSEZERO(vex_state
->guest_XMM1
);
2915 SSEZERO(vex_state
->guest_XMM2
);
2916 SSEZERO(vex_state
->guest_XMM3
);
2917 SSEZERO(vex_state
->guest_XMM4
);
2918 SSEZERO(vex_state
->guest_XMM5
);
2919 SSEZERO(vex_state
->guest_XMM6
);
2920 SSEZERO(vex_state
->guest_XMM7
);
2924 vex_state
->guest_CS
= 0;
2925 vex_state
->guest_DS
= 0;
2926 vex_state
->guest_ES
= 0;
2927 vex_state
->guest_FS
= 0;
2928 vex_state
->guest_GS
= 0;
2929 vex_state
->guest_SS
= 0;
2930 vex_state
->guest_LDT
= 0;
2931 vex_state
->guest_GDT
= 0;
2933 vex_state
->guest_EMNOTE
= EmNote_NONE
;
2935 /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2936 vex_state
->guest_CMSTART
= 0;
2937 vex_state
->guest_CMLEN
= 0;
2939 vex_state
->guest_NRADDR
= 0;
2940 vex_state
->guest_SC_CLASS
= 0;
2941 vex_state
->guest_IP_AT_SYSCALL
= 0;
2943 vex_state
->padding1
= 0;
2944 vex_state
->padding2
= 0;
2945 vex_state
->padding3
= 0;
2949 /* Figure out if any part of the guest state contained in minoff
2950 .. maxoff requires precise memory exceptions. If in doubt return
2951 True (but this generates significantly slower code).
2953 By default we enforce precise exns for guest %ESP, %EBP and %EIP
2954 only. These are the minimum needed to extract correct stack
2955 backtraces from x86 code.
2957 Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2959 Bool
guest_x86_state_requires_precise_mem_exns (
2960 Int minoff
, Int maxoff
, VexRegisterUpdates pxControl
2963 Int ebp_min
= offsetof(VexGuestX86State
, guest_EBP
);
2964 Int ebp_max
= ebp_min
+ 4 - 1;
2965 Int esp_min
= offsetof(VexGuestX86State
, guest_ESP
);
2966 Int esp_max
= esp_min
+ 4 - 1;
2967 Int eip_min
= offsetof(VexGuestX86State
, guest_EIP
);
2968 Int eip_max
= eip_min
+ 4 - 1;
2970 if (maxoff
< esp_min
|| minoff
> esp_max
) {
2971 /* no overlap with esp */
2972 if (pxControl
== VexRegUpdSpAtMemAccess
)
2973 return False
; // We only need to check stack pointer.
2978 if (maxoff
< ebp_min
|| minoff
> ebp_max
) {
2979 /* no overlap with ebp */
2984 if (maxoff
< eip_min
|| minoff
> eip_max
) {
2985 /* no overlap with eip */
2994 #define ALWAYSDEFD(field) \
2995 { offsetof(VexGuestX86State, field), \
2996 (sizeof ((VexGuestX86State*)0)->field) }
3001 /* Total size of the guest state, in bytes. */
3002 .total_sizeB
= sizeof(VexGuestX86State
),
3004 /* Describe the stack pointer. */
3005 .offset_SP
= offsetof(VexGuestX86State
,guest_ESP
),
3008 /* Describe the frame pointer. */
3009 .offset_FP
= offsetof(VexGuestX86State
,guest_EBP
),
3012 /* Describe the instruction pointer. */
3013 .offset_IP
= offsetof(VexGuestX86State
,guest_EIP
),
3016 /* Describe any sections to be regarded by Memcheck as
3017 'always-defined'. */
3020 /* flags thunk: OP and NDEP are always defd, whereas DEP1
3021 and DEP2 have to be tracked. See detailed comment in
3022 gdefs.h on meaning of thunk fields. */
3024 = { /* 0 */ ALWAYSDEFD(guest_CC_OP
),
3025 /* 1 */ ALWAYSDEFD(guest_CC_NDEP
),
3026 /* 2 */ ALWAYSDEFD(guest_DFLAG
),
3027 /* 3 */ ALWAYSDEFD(guest_IDFLAG
),
3028 /* 4 */ ALWAYSDEFD(guest_ACFLAG
),
3029 /* 5 */ ALWAYSDEFD(guest_EIP
),
3030 /* 6 */ ALWAYSDEFD(guest_FTOP
),
3031 /* 7 */ ALWAYSDEFD(guest_FPTAG
),
3032 /* 8 */ ALWAYSDEFD(guest_FPROUND
),
3033 /* 9 */ ALWAYSDEFD(guest_FC3210
),
3034 /* 10 */ ALWAYSDEFD(guest_CS
),
3035 /* 11 */ ALWAYSDEFD(guest_DS
),
3036 /* 12 */ ALWAYSDEFD(guest_ES
),
3037 /* 13 */ ALWAYSDEFD(guest_FS
),
3038 /* 14 */ ALWAYSDEFD(guest_GS
),
3039 /* 15 */ ALWAYSDEFD(guest_SS
),
3040 /* 16 */ ALWAYSDEFD(guest_LDT
),
3041 /* 17 */ ALWAYSDEFD(guest_GDT
),
3042 /* 18 */ ALWAYSDEFD(guest_EMNOTE
),
3043 /* 19 */ ALWAYSDEFD(guest_SSEROUND
),
3044 /* 20 */ ALWAYSDEFD(guest_CMSTART
),
3045 /* 21 */ ALWAYSDEFD(guest_CMLEN
),
3046 /* 22 */ ALWAYSDEFD(guest_SC_CLASS
),
3047 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL
)
3052 /*---------------------------------------------------------------*/
3053 /*--- end guest_x86_helpers.c ---*/
3054 /*---------------------------------------------------------------*/