2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations. ---*/
4 /*--- mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of MemCheck, a heavyweight Valgrind tool for
9 detecting memory errors.
11 Copyright (C) 2000-2017 Julian Seward
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 #include "pub_tool_basics.h"
31 #include "pub_tool_poolalloc.h" // For mc_include.h
32 #include "pub_tool_hashtable.h" // For mc_include.h
33 #include "pub_tool_libcassert.h"
34 #include "pub_tool_libcprint.h"
35 #include "pub_tool_tooliface.h"
36 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
37 #include "pub_tool_xarray.h"
38 #include "pub_tool_mallocfree.h"
39 #include "pub_tool_libcbase.h"
41 #include "mc_include.h"
44 /* FIXMEs JRS 2011-June-16.
46 Check the interpretation for vector narrowing and widening ops,
47 particularly the saturating ones. I suspect they are either overly
48 pessimistic and/or wrong.
50 Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional
51 saturating shifts): the interpretation is overly pessimistic.
52 See comments on the relevant cases below for details.
54 Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts,
55 both rounding and non-rounding variants): ditto
58 /* This file implements the Memcheck instrumentation, and in
59 particular contains the core of its undefined value detection
60 machinery. For a comprehensive background of the terminology,
61 algorithms and rationale used herein, read:
63 Using Valgrind to detect undefined value errors with
66 Julian Seward and Nicholas Nethercote
68 2005 USENIX Annual Technical Conference (General Track),
69 Anaheim, CA, USA, April 10-15, 2005.
73 Here is as good a place as any to record exactly when V bits are and
74 should be checked, why, and what function is responsible.
77 Memcheck complains when an undefined value is used:
79 1. In the condition of a conditional branch. Because it could cause
80 incorrect control flow, and thus cause incorrect externally-visible
81 behaviour. [mc_translate.c:complainIfUndefined]
83 2. As an argument to a system call, or as the value that specifies
84 the system call number. Because it could cause an incorrect
85 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
87 3. As the address in a load or store. Because it could cause an
88 incorrect value to be used later, which could cause externally-visible
89 behaviour (eg. via incorrect control flow or an incorrect system call
90 argument) [complainIfUndefined]
92 4. As the target address of a branch. Because it could cause incorrect
93 control flow. [complainIfUndefined]
95 5. As an argument to setenv, unsetenv, or putenv. Because it could put
96 an incorrect value into the external environment.
97 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
99 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
100 [complainIfUndefined]
102 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
103 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
104 requested it. [in memcheck.h]
107 Memcheck also complains, but should not, when an undefined value is used:
109 8. As the shift value in certain SIMD shift operations (but not in the
110 standard integer shift operations). This inconsistency is due to
111 historical reasons.) [complainIfUndefined]
114 Memcheck does not complain, but should, when an undefined value is used:
116 9. As an input to a client request. Because the client request may
117 affect the visible behaviour -- see bug #144362 for an example
118 involving the malloc replacements in vg_replace_malloc.c and
119 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
120 isn't identified. That bug report also has some info on how to solve
121 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
124 In practice, 1 and 2 account for the vast majority of cases.
127 /* Generation of addr-definedness, addr-validity and
128 guard-definedness checks pertaining to loads and stores (Iex_Load,
129 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
130 loads/stores) was re-checked 11 May 2013. */
133 /*------------------------------------------------------------*/
134 /*--- Forward decls ---*/
135 /*------------------------------------------------------------*/
139 // See below for comments explaining what this is for.
141 enum __attribute__((packed
)) { HuUnU
=0, HuPCa
=1, HuOth
=2 }
144 static IRType
shadowTypeV ( IRType ty
);
145 static IRExpr
* expr2vbits ( struct _MCEnv
* mce
, IRExpr
* e
,
146 HowUsed hu
/*use HuOth if unknown*/ );
147 static IRTemp
findShadowTmpB ( struct _MCEnv
* mce
, IRTemp orig
);
149 static IRExpr
*i128_const_zero(void);
152 /*------------------------------------------------------------*/
153 /*--- Memcheck running state, and tmp management. ---*/
154 /*------------------------------------------------------------*/
156 /* For a few (maybe 1%) IROps, we have both a cheaper, less exact vbit
157 propagation scheme, and a more expensive, more precise vbit propagation
158 scheme. This enum describes, for such an IROp, which scheme to use. */
161 // Use the cheaper, less-exact variant.
163 // Choose between cheap and expensive based on analysis of the block
164 // to be instrumented. Note that the choice may be done on a
165 // per-instance basis of the IROp that this DetailLevel describes.
167 // Use the more expensive, more-exact variant.
173 /* A readonly part of the running state. For IROps that have both a
174 less-exact and more-exact interpretation, records which interpretation is
178 // For Add32/64 and Sub32/64, all 3 settings are allowed. For the
179 // DLauto case, a per-instance decision is to be made by inspecting
180 // the associated tmp's entry in MCEnv.tmpHowUsed.
181 DetailLevel dl_Add32
;
182 DetailLevel dl_Add64
;
183 DetailLevel dl_Sub32
;
184 DetailLevel dl_Sub64
;
185 // For Cmp{EQ,NE}{64,32,16,8}, only DLcheap and DLexpensive are
187 DetailLevel dl_CmpEQ64_CmpNE64
;
188 DetailLevel dl_CmpEQ32_CmpNE32
;
189 DetailLevel dl_CmpEQ16_CmpNE16
;
190 DetailLevel dl_CmpEQ8_CmpNE8
;
194 static void DetailLevelByOp__set_all ( /*OUT*/DetailLevelByOp
* dlbo
,
201 dlbo
->dl_CmpEQ64_CmpNE64
= dl
;
202 dlbo
->dl_CmpEQ32_CmpNE32
= dl
;
203 dlbo
->dl_CmpEQ16_CmpNE16
= dl
;
204 dlbo
->dl_CmpEQ8_CmpNE8
= dl
;
207 static void DetailLevelByOp__check_sanity ( const DetailLevelByOp
* dlbo
)
209 tl_assert(dlbo
->dl_Add32
>= DLcheap
&& dlbo
->dl_Add32
<= DLexpensive
);
210 tl_assert(dlbo
->dl_Add64
>= DLcheap
&& dlbo
->dl_Add64
<= DLexpensive
);
211 tl_assert(dlbo
->dl_Sub32
>= DLcheap
&& dlbo
->dl_Sub32
<= DLexpensive
);
212 tl_assert(dlbo
->dl_Sub64
>= DLcheap
&& dlbo
->dl_Sub64
<= DLexpensive
);
213 tl_assert(dlbo
->dl_CmpEQ64_CmpNE64
== DLcheap
214 || dlbo
->dl_CmpEQ64_CmpNE64
== DLexpensive
);
215 tl_assert(dlbo
->dl_CmpEQ32_CmpNE32
== DLcheap
216 || dlbo
->dl_CmpEQ32_CmpNE32
== DLexpensive
);
217 tl_assert(dlbo
->dl_CmpEQ16_CmpNE16
== DLcheap
218 || dlbo
->dl_CmpEQ16_CmpNE16
== DLexpensive
);
219 tl_assert(dlbo
->dl_CmpEQ8_CmpNE8
== DLcheap
220 || dlbo
->dl_CmpEQ8_CmpNE8
== DLexpensive
);
223 static UInt
DetailLevelByOp__count ( const DetailLevelByOp
* dlbo
,
227 n
+= (dlbo
->dl_Add32
== dl
? 1 : 0);
228 n
+= (dlbo
->dl_Add64
== dl
? 1 : 0);
229 n
+= (dlbo
->dl_Sub32
== dl
? 1 : 0);
230 n
+= (dlbo
->dl_Sub64
== dl
? 1 : 0);
231 n
+= (dlbo
->dl_CmpEQ64_CmpNE64
== dl
? 1 : 0);
232 n
+= (dlbo
->dl_CmpEQ32_CmpNE32
== dl
? 1 : 0);
233 n
+= (dlbo
->dl_CmpEQ16_CmpNE16
== dl
? 1 : 0);
234 n
+= (dlbo
->dl_CmpEQ8_CmpNE8
== dl
? 1 : 0);
239 /* Carries info about a particular tmp. The tmp's number is not
240 recorded, as this is implied by (equal to) its index in the tmpMap
241 in MCEnv. The tmp's type is also not recorded, as this is present
244 When .kind is Orig, .shadowV and .shadowB may give the identities
245 of the temps currently holding the associated definedness (shadowV)
246 and origin (shadowB) values, or these may be IRTemp_INVALID if code
247 to compute such values has not yet been emitted.
249 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
250 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
251 illogical for a shadow tmp itself to be shadowed.
254 enum { Orig
=1, VSh
=2, BSh
=3 }
266 /* A |HowUsed| value carries analysis results about how values are used,
267 pertaining to whether we need to instrument integer adds expensively or
268 not. The running state carries a (readonly) mapping from original tmp to
269 a HowUsed value for it. A usage value can be one of three values,
270 forming a 3-point chain lattice.
272 HuOth ("Other") used in some arbitrary way
274 HuPCa ("PCast") used *only* in effectively a PCast, in which all
275 | we care about is the all-defined vs not-all-defined distinction
277 HuUnU ("Unused") not used at all.
279 The "safe" (don't-know) end of the lattice is "HuOth". See comments
280 below in |preInstrumentationAnalysis| for further details.
284 enum __attribute__((packed)) { HuUnU=0, HuPCa=1, HuOth=2 }
288 // Not actually necessary, but we don't want to waste D1 space.
289 STATIC_ASSERT(sizeof(HowUsed
) == 1);
292 /* Carries around state during memcheck instrumentation. */
295 /* MODIFIED: the superblock being constructed. IRStmts are
300 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
301 current kind and possibly shadow temps for each temp in the
302 IRSB being constructed. Note that it does not contain the
303 type of each tmp. If you want to know the type, look at the
304 relevant entry in sb->tyenv. It follows that at all times
305 during the instrumentation process, the valid indices for
306 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
307 total number of Orig, V- and B- temps allocated so far.
309 The reason for this strange split (types in one place, all
310 other info in another) is that we need the types to be
311 attached to sb so as to make it possible to do
312 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
313 instrumentation process. */
314 XArray
* /* of TempMapEnt */ tmpMap
;
316 /* READONLY: contains details of which ops should be expensively
318 DetailLevelByOp dlbo
;
320 /* READONLY: for each original tmp, how the tmp is used. This is
321 computed by |preInstrumentationAnalysis|. Valid indices are
322 0 .. #temps_in_sb-1 (same as for tmpMap). */
325 /* READONLY: the guest layout. This indicates which parts of
326 the guest state should be regarded as 'always defined'. */
327 const VexGuestLayout
* layout
;
329 /* READONLY: the host word type. Needed for constructing
330 arguments of type 'HWord' to be passed to helper functions.
331 Ity_I32 or Ity_I64 only. */
337 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
338 demand), as they are encountered. This is for two reasons.
340 (1) (less important reason): Many original tmps are unused due to
341 initial IR optimisation, and we do not want to spaces in tables
344 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
345 table indexed [0 .. n_types-1], which gives the current shadow for
346 each original tmp, or INVALID_IRTEMP if none is so far assigned.
347 It is necessary to support making multiple assignments to a shadow
348 -- specifically, after testing a shadow for definedness, it needs
349 to be made defined. But IR's SSA property disallows this.
351 (2) (more important reason): Therefore, when a shadow needs to get
352 a new value, a new temporary is created, the value is assigned to
353 that, and the tmpMap is updated to reflect the new binding.
355 A corollary is that if the tmpMap maps a given tmp to
356 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
357 there's a read-before-write error in the original tmps. The IR
358 sanity checker should catch all such anomalies, however.
361 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
362 both the table in mce->sb and to our auxiliary mapping. Note that
363 newTemp may cause mce->tmpMap to resize, hence previous results
364 from VG_(indexXA)(mce->tmpMap) are invalidated. */
365 static IRTemp
newTemp ( MCEnv
* mce
, IRType ty
, TempKind kind
)
369 IRTemp tmp
= newIRTemp(mce
->sb
->tyenv
, ty
);
371 ent
.shadowV
= IRTemp_INVALID
;
372 ent
.shadowB
= IRTemp_INVALID
;
373 newIx
= VG_(addToXA
)( mce
->tmpMap
, &ent
);
374 tl_assert(newIx
== (Word
)tmp
);
379 /* Find the tmp currently shadowing the given original tmp. If none
380 so far exists, allocate one. */
381 static IRTemp
findShadowTmpV ( MCEnv
* mce
, IRTemp orig
)
384 /* VG_(indexXA) range-checks 'orig', hence no need to check
386 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
387 tl_assert(ent
->kind
== Orig
);
388 if (ent
->shadowV
== IRTemp_INVALID
) {
390 = newTemp( mce
, shadowTypeV(mce
->sb
->tyenv
->types
[orig
]), VSh
);
391 /* newTemp may cause mce->tmpMap to resize, hence previous results
392 from VG_(indexXA) are invalid. */
393 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
394 tl_assert(ent
->kind
== Orig
);
395 tl_assert(ent
->shadowV
== IRTemp_INVALID
);
401 /* Allocate a new shadow for the given original tmp. This means any
402 previous shadow is abandoned. This is needed because it is
403 necessary to give a new value to a shadow once it has been tested
404 for undefinedness, but unfortunately IR's SSA property disallows
405 this. Instead we must abandon the old shadow, allocate a new one
406 and use that instead.
408 This is the same as findShadowTmpV, except we don't bother to see
409 if a shadow temp already existed -- we simply allocate a new one
411 static void newShadowTmpV ( MCEnv
* mce
, IRTemp orig
)
414 /* VG_(indexXA) range-checks 'orig', hence no need to check
416 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
417 tl_assert(ent
->kind
== Orig
);
420 = newTemp( mce
, shadowTypeV(mce
->sb
->tyenv
->types
[orig
]), VSh
);
421 /* newTemp may cause mce->tmpMap to resize, hence previous results
422 from VG_(indexXA) are invalid. */
423 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
424 tl_assert(ent
->kind
== Orig
);
430 /*------------------------------------------------------------*/
431 /*--- IRAtoms -- a subset of IRExprs ---*/
432 /*------------------------------------------------------------*/
434 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
435 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
436 input, most of this code deals in atoms. Usefully, a value atom
437 always has a V-value which is also an atom: constants are shadowed
438 by constants, and temps are shadowed by the corresponding shadow
441 typedef IRExpr IRAtom
;
443 /* (used for sanity checks only): is this an atom which looks
444 like it's from original code? */
445 static Bool
isOriginalAtom ( MCEnv
* mce
, IRAtom
* a1
)
447 if (a1
->tag
== Iex_Const
)
449 if (a1
->tag
== Iex_RdTmp
) {
450 TempMapEnt
* ent
= VG_(indexXA
)( mce
->tmpMap
, a1
->Iex
.RdTmp
.tmp
);
451 return ent
->kind
== Orig
;
456 /* (used for sanity checks only): is this an atom which looks
457 like it's from shadow code? */
458 static Bool
isShadowAtom ( MCEnv
* mce
, IRAtom
* a1
)
460 if (a1
->tag
== Iex_Const
)
462 if (a1
->tag
== Iex_RdTmp
) {
463 TempMapEnt
* ent
= VG_(indexXA
)( mce
->tmpMap
, a1
->Iex
.RdTmp
.tmp
);
464 return ent
->kind
== VSh
|| ent
->kind
== BSh
;
469 /* (used for sanity checks only): check that both args are atoms and
470 are identically-kinded. */
471 static Bool
sameKindedAtoms ( IRAtom
* a1
, IRAtom
* a2
)
473 if (a1
->tag
== Iex_RdTmp
&& a2
->tag
== Iex_RdTmp
)
475 if (a1
->tag
== Iex_Const
&& a2
->tag
== Iex_Const
)
481 /*------------------------------------------------------------*/
482 /*--- Type management ---*/
483 /*------------------------------------------------------------*/
485 /* Shadow state is always accessed using integer types. This returns
486 an integer type with the same size (as per sizeofIRType) as the
487 given type. The only valid shadow types are Bit, I8, I16, I32,
488 I64, I128, V128, V256. */
490 static IRType
shadowTypeV ( IRType ty
)
498 case Ity_I128
: return ty
;
499 case Ity_F16
: return Ity_I16
;
500 case Ity_F32
: return Ity_I32
;
501 case Ity_D32
: return Ity_I32
;
502 case Ity_F64
: return Ity_I64
;
503 case Ity_D64
: return Ity_I64
;
504 case Ity_F128
: return Ity_I128
;
505 case Ity_D128
: return Ity_I128
;
506 case Ity_V128
: return Ity_V128
;
507 case Ity_V256
: return Ity_V256
;
508 default: ppIRType(ty
);
509 VG_(tool_panic
)("memcheck:shadowTypeV");
513 /* Produce a 'defined' value of the given shadow type. Should only be
514 supplied shadow types (Bit/I8/I16/I32/UI64). */
515 static IRExpr
* definedOfType ( IRType ty
) {
517 case Ity_I1
: return IRExpr_Const(IRConst_U1(False
));
518 case Ity_I8
: return IRExpr_Const(IRConst_U8(0));
519 case Ity_I16
: return IRExpr_Const(IRConst_U16(0));
520 case Ity_I32
: return IRExpr_Const(IRConst_U32(0));
521 case Ity_I64
: return IRExpr_Const(IRConst_U64(0));
522 case Ity_I128
: return i128_const_zero();
523 case Ity_V128
: return IRExpr_Const(IRConst_V128(0x0000));
524 case Ity_V256
: return IRExpr_Const(IRConst_V256(0x00000000));
525 default: VG_(tool_panic
)("memcheck:definedOfType");
530 /*------------------------------------------------------------*/
531 /*--- Constructing IR fragments ---*/
532 /*------------------------------------------------------------*/
534 /* add stmt to a bb */
535 static inline void stmt ( HChar cat
, MCEnv
* mce
, IRStmt
* st
) {
537 VG_(printf
)(" %c: ", cat
);
541 addStmtToIRSB(mce
->sb
, st
);
544 /* assign value to tmp */
546 void assign ( HChar cat
, MCEnv
* mce
, IRTemp tmp
, IRExpr
* expr
) {
547 stmt(cat
, mce
, IRStmt_WrTmp(tmp
,expr
));
550 /* build various kinds of expressions */
551 #define triop(_op, _arg1, _arg2, _arg3) \
552 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
553 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
554 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
555 #define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
556 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
557 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
558 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
559 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
560 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
561 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
563 /* Bind the given expression to a new temporary, and return the
564 temporary. This effectively converts an arbitrary expression into
567 'ty' is the type of 'e' and hence the type that the new temporary
568 needs to be. But passing it in is redundant, since we can deduce
569 the type merely by inspecting 'e'. So at least use that fact to
570 assert that the two types agree. */
571 static IRAtom
* assignNew ( HChar cat
, MCEnv
* mce
, IRType ty
, IRExpr
* e
)
575 IRType tyE
= typeOfIRExpr(mce
->sb
->tyenv
, e
);
577 tl_assert(tyE
== ty
); /* so 'ty' is redundant (!) */
579 case 'V': k
= VSh
; break;
580 case 'B': k
= BSh
; break;
581 case 'C': k
= Orig
; break;
582 /* happens when we are making up new "orig"
583 expressions, for IRCAS handling */
584 default: tl_assert(0);
586 t
= newTemp(mce
, ty
, k
);
587 assign(cat
, mce
, t
, e
);
592 /*------------------------------------------------------------*/
593 /*--- Helper functions for 128-bit ops ---*/
594 /*------------------------------------------------------------*/
596 static IRExpr
*i128_const_zero(void)
598 IRAtom
* z64
= IRExpr_Const(IRConst_U64(0));
599 return binop(Iop_64HLto128
, z64
, z64
);
602 /* There are no I128-bit loads and/or stores [as generated by any
603 current front ends]. So we do not need to worry about that in
607 /*------------------------------------------------------------*/
608 /*--- Constructing definedness primitive ops ---*/
609 /*------------------------------------------------------------*/
611 /* --------- Defined-if-either-defined --------- */
613 static IRAtom
* mkDifD1 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
614 tl_assert(isShadowAtom(mce
,a1
));
615 tl_assert(isShadowAtom(mce
,a2
));
616 return assignNew('V', mce
, Ity_I1
, binop(Iop_And1
, a1
, a2
));
619 static IRAtom
* mkDifD8 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
620 tl_assert(isShadowAtom(mce
,a1
));
621 tl_assert(isShadowAtom(mce
,a2
));
622 return assignNew('V', mce
, Ity_I8
, binop(Iop_And8
, a1
, a2
));
625 static IRAtom
* mkDifD16 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
626 tl_assert(isShadowAtom(mce
,a1
));
627 tl_assert(isShadowAtom(mce
,a2
));
628 return assignNew('V', mce
, Ity_I16
, binop(Iop_And16
, a1
, a2
));
631 static IRAtom
* mkDifD32 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
632 tl_assert(isShadowAtom(mce
,a1
));
633 tl_assert(isShadowAtom(mce
,a2
));
634 return assignNew('V', mce
, Ity_I32
, binop(Iop_And32
, a1
, a2
));
637 static IRAtom
* mkDifD64 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
638 tl_assert(isShadowAtom(mce
,a1
));
639 tl_assert(isShadowAtom(mce
,a2
));
640 return assignNew('V', mce
, Ity_I64
, binop(Iop_And64
, a1
, a2
));
643 static IRAtom
* mkDifDV128 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
644 tl_assert(isShadowAtom(mce
,a1
));
645 tl_assert(isShadowAtom(mce
,a2
));
646 return assignNew('V', mce
, Ity_V128
, binop(Iop_AndV128
, a1
, a2
));
649 static IRAtom
* mkDifDV256 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
650 tl_assert(isShadowAtom(mce
,a1
));
651 tl_assert(isShadowAtom(mce
,a2
));
652 return assignNew('V', mce
, Ity_V256
, binop(Iop_AndV256
, a1
, a2
));
655 /* --------- Undefined-if-either-undefined --------- */
657 static IRAtom
* mkUifU1 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
658 tl_assert(isShadowAtom(mce
,a1
));
659 tl_assert(isShadowAtom(mce
,a2
));
660 return assignNew('V', mce
, Ity_I1
, binop(Iop_Or1
, a1
, a2
));
663 static IRAtom
* mkUifU8 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
664 tl_assert(isShadowAtom(mce
,a1
));
665 tl_assert(isShadowAtom(mce
,a2
));
666 return assignNew('V', mce
, Ity_I8
, binop(Iop_Or8
, a1
, a2
));
669 static IRAtom
* mkUifU16 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
670 tl_assert(isShadowAtom(mce
,a1
));
671 tl_assert(isShadowAtom(mce
,a2
));
672 return assignNew('V', mce
, Ity_I16
, binop(Iop_Or16
, a1
, a2
));
675 static IRAtom
* mkUifU32 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
676 tl_assert(isShadowAtom(mce
,a1
));
677 tl_assert(isShadowAtom(mce
,a2
));
678 return assignNew('V', mce
, Ity_I32
, binop(Iop_Or32
, a1
, a2
));
681 static IRAtom
* mkUifU64 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
682 tl_assert(isShadowAtom(mce
,a1
));
683 tl_assert(isShadowAtom(mce
,a2
));
684 return assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, a1
, a2
));
687 static IRAtom
* mkUifU128 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
688 IRAtom
*tmp1
, *tmp2
, *tmp3
, *tmp4
, *tmp5
, *tmp6
;
689 tl_assert(isShadowAtom(mce
,a1
));
690 tl_assert(isShadowAtom(mce
,a2
));
691 tmp1
= assignNew('V', mce
, Ity_I64
, unop(Iop_128to64
, a1
));
692 tmp2
= assignNew('V', mce
, Ity_I64
, unop(Iop_128HIto64
, a1
));
693 tmp3
= assignNew('V', mce
, Ity_I64
, unop(Iop_128to64
, a2
));
694 tmp4
= assignNew('V', mce
, Ity_I64
, unop(Iop_128HIto64
, a2
));
695 tmp5
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, tmp1
, tmp3
));
696 tmp6
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, tmp2
, tmp4
));
698 return assignNew('V', mce
, Ity_I128
, binop(Iop_64HLto128
, tmp6
, tmp5
));
701 static IRAtom
* mkUifUV128 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
702 tl_assert(isShadowAtom(mce
,a1
));
703 tl_assert(isShadowAtom(mce
,a2
));
704 return assignNew('V', mce
, Ity_V128
, binop(Iop_OrV128
, a1
, a2
));
707 static IRAtom
* mkUifUV256 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
708 tl_assert(isShadowAtom(mce
,a1
));
709 tl_assert(isShadowAtom(mce
,a2
));
710 return assignNew('V', mce
, Ity_V256
, binop(Iop_OrV256
, a1
, a2
));
713 static IRAtom
* mkUifU ( MCEnv
* mce
, IRType vty
, IRAtom
* a1
, IRAtom
* a2
) {
715 case Ity_I8
: return mkUifU8(mce
, a1
, a2
);
716 case Ity_I16
: return mkUifU16(mce
, a1
, a2
);
717 case Ity_I32
: return mkUifU32(mce
, a1
, a2
);
718 case Ity_I64
: return mkUifU64(mce
, a1
, a2
);
719 case Ity_I128
: return mkUifU128(mce
, a1
, a2
);
720 case Ity_V128
: return mkUifUV128(mce
, a1
, a2
);
721 case Ity_V256
: return mkUifUV256(mce
, a1
, a2
);
723 VG_(printf
)("\n"); ppIRType(vty
); VG_(printf
)("\n");
724 VG_(tool_panic
)("memcheck:mkUifU");
728 /* --------- The Left-family of operations. --------- */
730 static IRAtom
* mkLeft8 ( MCEnv
* mce
, IRAtom
* a1
) {
731 tl_assert(isShadowAtom(mce
,a1
));
732 return assignNew('V', mce
, Ity_I8
, unop(Iop_Left8
, a1
));
735 static IRAtom
* mkLeft16 ( MCEnv
* mce
, IRAtom
* a1
) {
736 tl_assert(isShadowAtom(mce
,a1
));
737 return assignNew('V', mce
, Ity_I16
, unop(Iop_Left16
, a1
));
740 static IRAtom
* mkLeft32 ( MCEnv
* mce
, IRAtom
* a1
) {
741 tl_assert(isShadowAtom(mce
,a1
));
742 return assignNew('V', mce
, Ity_I32
, unop(Iop_Left32
, a1
));
745 static IRAtom
* mkLeft64 ( MCEnv
* mce
, IRAtom
* a1
) {
746 tl_assert(isShadowAtom(mce
,a1
));
747 return assignNew('V', mce
, Ity_I64
, unop(Iop_Left64
, a1
));
750 /* --------- The Right-family of operations. --------- */
752 /* Unfortunately these are a lot more expensive then their Left
753 counterparts. Fortunately they are only very rarely used -- only for
754 count-leading-zeroes instrumentation. */
756 static IRAtom
* mkRight32 ( MCEnv
* mce
, IRAtom
* a1
)
758 for (Int i
= 1; i
<= 16; i
*= 2) {
761 = assignNew('V', mce
, Ity_I32
, binop(Iop_Shr32
, a1
, mkU8(i
)));
762 a1
= assignNew('V', mce
, Ity_I32
, binop(Iop_Or32
, a1
, tmp
));
767 static IRAtom
* mkRight64 ( MCEnv
* mce
, IRAtom
* a1
)
769 for (Int i
= 1; i
<= 32; i
*= 2) {
772 = assignNew('V', mce
, Ity_I64
, binop(Iop_Shr64
, a1
, mkU8(i
)));
773 a1
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, a1
, tmp
));
778 /* --------- 'Improvement' functions for AND/OR. --------- */
780 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
781 defined (0); all other -> undefined (1).
783 static IRAtom
* mkImproveAND1 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
785 tl_assert(isOriginalAtom(mce
, data
));
786 tl_assert(isShadowAtom(mce
, vbits
));
787 tl_assert(sameKindedAtoms(data
, vbits
));
788 return assignNew('V', mce
, Ity_I1
, binop(Iop_Or1
, data
, vbits
));
791 static IRAtom
* mkImproveAND8 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
793 tl_assert(isOriginalAtom(mce
, data
));
794 tl_assert(isShadowAtom(mce
, vbits
));
795 tl_assert(sameKindedAtoms(data
, vbits
));
796 return assignNew('V', mce
, Ity_I8
, binop(Iop_Or8
, data
, vbits
));
799 static IRAtom
* mkImproveAND16 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
801 tl_assert(isOriginalAtom(mce
, data
));
802 tl_assert(isShadowAtom(mce
, vbits
));
803 tl_assert(sameKindedAtoms(data
, vbits
));
804 return assignNew('V', mce
, Ity_I16
, binop(Iop_Or16
, data
, vbits
));
807 static IRAtom
* mkImproveAND32 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
809 tl_assert(isOriginalAtom(mce
, data
));
810 tl_assert(isShadowAtom(mce
, vbits
));
811 tl_assert(sameKindedAtoms(data
, vbits
));
812 return assignNew('V', mce
, Ity_I32
, binop(Iop_Or32
, data
, vbits
));
815 static IRAtom
* mkImproveAND64 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
817 tl_assert(isOriginalAtom(mce
, data
));
818 tl_assert(isShadowAtom(mce
, vbits
));
819 tl_assert(sameKindedAtoms(data
, vbits
));
820 return assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, data
, vbits
));
823 static IRAtom
* mkImproveANDV128 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
825 tl_assert(isOriginalAtom(mce
, data
));
826 tl_assert(isShadowAtom(mce
, vbits
));
827 tl_assert(sameKindedAtoms(data
, vbits
));
828 return assignNew('V', mce
, Ity_V128
, binop(Iop_OrV128
, data
, vbits
));
831 static IRAtom
* mkImproveANDV256 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
833 tl_assert(isOriginalAtom(mce
, data
));
834 tl_assert(isShadowAtom(mce
, vbits
));
835 tl_assert(sameKindedAtoms(data
, vbits
));
836 return assignNew('V', mce
, Ity_V256
, binop(Iop_OrV256
, data
, vbits
));
839 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
840 defined (0); all other -> undefined (1).
842 static IRAtom
* mkImproveOR1 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
844 tl_assert(isOriginalAtom(mce
, data
));
845 tl_assert(isShadowAtom(mce
, vbits
));
846 tl_assert(sameKindedAtoms(data
, vbits
));
850 assignNew('V', mce
, Ity_I1
, unop(Iop_Not1
, data
)),
854 static IRAtom
* mkImproveOR8 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
856 tl_assert(isOriginalAtom(mce
, data
));
857 tl_assert(isShadowAtom(mce
, vbits
));
858 tl_assert(sameKindedAtoms(data
, vbits
));
862 assignNew('V', mce
, Ity_I8
, unop(Iop_Not8
, data
)),
866 static IRAtom
* mkImproveOR16 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
868 tl_assert(isOriginalAtom(mce
, data
));
869 tl_assert(isShadowAtom(mce
, vbits
));
870 tl_assert(sameKindedAtoms(data
, vbits
));
874 assignNew('V', mce
, Ity_I16
, unop(Iop_Not16
, data
)),
878 static IRAtom
* mkImproveOR32 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
880 tl_assert(isOriginalAtom(mce
, data
));
881 tl_assert(isShadowAtom(mce
, vbits
));
882 tl_assert(sameKindedAtoms(data
, vbits
));
886 assignNew('V', mce
, Ity_I32
, unop(Iop_Not32
, data
)),
890 static IRAtom
* mkImproveOR64 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
892 tl_assert(isOriginalAtom(mce
, data
));
893 tl_assert(isShadowAtom(mce
, vbits
));
894 tl_assert(sameKindedAtoms(data
, vbits
));
898 assignNew('V', mce
, Ity_I64
, unop(Iop_Not64
, data
)),
902 static IRAtom
* mkImproveORV128 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
904 tl_assert(isOriginalAtom(mce
, data
));
905 tl_assert(isShadowAtom(mce
, vbits
));
906 tl_assert(sameKindedAtoms(data
, vbits
));
910 assignNew('V', mce
, Ity_V128
, unop(Iop_NotV128
, data
)),
914 static IRAtom
* mkImproveORV256 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
916 tl_assert(isOriginalAtom(mce
, data
));
917 tl_assert(isShadowAtom(mce
, vbits
));
918 tl_assert(sameKindedAtoms(data
, vbits
));
922 assignNew('V', mce
, Ity_V256
, unop(Iop_NotV256
, data
)),
926 /* --------- Pessimising casts. --------- */
928 /* The function returns an expression of type DST_TY. If any of the VBITS
929 is undefined (value == 1) the resulting expression has all bits set to
930 1. Otherwise, all bits are 0. */
932 static IRAtom
* mkPCastTo( MCEnv
* mce
, IRType dst_ty
, IRAtom
* vbits
)
937 /* Note, dst_ty is a shadow type, not an original type. */
938 tl_assert(isShadowAtom(mce
,vbits
));
939 src_ty
= typeOfIRExpr(mce
->sb
->tyenv
, vbits
);
941 /* Fast-track some common cases */
942 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I32
)
943 return assignNew('V', mce
, Ity_I32
, unop(Iop_CmpwNEZ32
, vbits
));
945 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I64
)
946 return assignNew('V', mce
, Ity_I64
, unop(Iop_CmpwNEZ64
, vbits
));
948 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I64
) {
949 /* PCast the arg, then clone it. */
950 IRAtom
* tmp
= assignNew('V', mce
, Ity_I32
, unop(Iop_CmpwNEZ32
, vbits
));
951 return assignNew('V', mce
, Ity_I64
, binop(Iop_32HLto64
, tmp
, tmp
));
954 if (src_ty
== Ity_I32
&& dst_ty
== Ity_V128
) {
955 /* PCast the arg, then clone it 4 times. */
956 IRAtom
* tmp
= assignNew('V', mce
, Ity_I32
, unop(Iop_CmpwNEZ32
, vbits
));
957 tmp
= assignNew('V', mce
, Ity_I64
, binop(Iop_32HLto64
, tmp
, tmp
));
958 return assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
, tmp
, tmp
));
961 if (src_ty
== Ity_I32
&& dst_ty
== Ity_V256
) {
962 /* PCast the arg, then clone it 8 times. */
963 IRAtom
* tmp
= assignNew('V', mce
, Ity_I32
, unop(Iop_CmpwNEZ32
, vbits
));
964 tmp
= assignNew('V', mce
, Ity_I64
, binop(Iop_32HLto64
, tmp
, tmp
));
965 tmp
= assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
, tmp
, tmp
));
966 return assignNew('V', mce
, Ity_V256
, binop(Iop_V128HLtoV256
, tmp
, tmp
));
969 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I32
) {
970 /* PCast the arg. This gives all 0s or all 1s. Then throw away
972 IRAtom
* tmp
= assignNew('V', mce
, Ity_I64
, unop(Iop_CmpwNEZ64
, vbits
));
973 return assignNew('V', mce
, Ity_I32
, unop(Iop_64to32
, tmp
));
976 if (src_ty
== Ity_V128
&& dst_ty
== Ity_I64
) {
977 /* Use InterleaveHI64x2 to copy the top half of the vector into
978 the bottom half. Then we can UifU it with the original, throw
979 away the upper half of the result, and PCast-I64-to-I64
981 // Generates vbits[127:64] : vbits[127:64]
983 = assignNew('V', mce
, Ity_V128
,
984 binop(Iop_InterleaveHI64x2
, vbits
, vbits
));
986 // UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0])
987 // == vbits[127:64] : UifU(vbits[127:64],vbits[63:0])
989 = mkUifUV128(mce
, hi64hi64
, vbits
);
990 // Generates UifU(vbits[127:64],vbits[63:0])
992 = assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, lohi64
));
994 // PCast-to-I64( UifU(vbits[127:64], vbits[63:0] )
995 // == PCast-to-I64( vbits[127:0] )
997 = assignNew('V', mce
, Ity_I64
, unop(Iop_CmpwNEZ64
, lo64
));
1001 /* Else do it the slow way .. */
1002 /* First of all, collapse vbits down to a single bit. */
1009 tmp1
= assignNew('V', mce
, Ity_I1
, unop(Iop_CmpNEZ8
, vbits
));
1012 tmp1
= assignNew('V', mce
, Ity_I1
, unop(Iop_CmpNEZ16
, vbits
));
1015 tmp1
= assignNew('V', mce
, Ity_I1
, unop(Iop_CmpNEZ32
, vbits
));
1018 tmp1
= assignNew('V', mce
, Ity_I1
, unop(Iop_CmpNEZ64
, vbits
));
1021 /* Gah. Chop it in half, OR the halves together, and compare
1023 IRAtom
* tmp2
= assignNew('V', mce
, Ity_I64
, unop(Iop_128HIto64
, vbits
));
1024 IRAtom
* tmp3
= assignNew('V', mce
, Ity_I64
, unop(Iop_128to64
, vbits
));
1025 IRAtom
* tmp4
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, tmp2
, tmp3
));
1026 tmp1
= assignNew('V', mce
, Ity_I1
,
1027 unop(Iop_CmpNEZ64
, tmp4
));
1031 /* Chop it in half, OR the halves together, and compare that
1034 IRAtom
* tmp2
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128HIto64
, vbits
));
1035 IRAtom
* tmp3
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, vbits
));
1036 IRAtom
* tmp4
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, tmp2
, tmp3
));
1037 tmp1
= assignNew('V', mce
, Ity_I1
,
1038 unop(Iop_CmpNEZ64
, tmp4
));
1043 VG_(tool_panic
)("mkPCastTo(1)");
1046 /* Now widen up to the dst type. */
1051 return assignNew('V', mce
, Ity_I8
, unop(Iop_1Sto8
, tmp1
));
1053 return assignNew('V', mce
, Ity_I16
, unop(Iop_1Sto16
, tmp1
));
1055 return assignNew('V', mce
, Ity_I32
, unop(Iop_1Sto32
, tmp1
));
1057 return assignNew('V', mce
, Ity_I64
, unop(Iop_1Sto64
, tmp1
));
1059 tmp1
= assignNew('V', mce
, Ity_I64
, unop(Iop_1Sto64
, tmp1
));
1060 tmp1
= assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
, tmp1
, tmp1
));
1063 tmp1
= assignNew('V', mce
, Ity_I64
, unop(Iop_1Sto64
, tmp1
));
1064 tmp1
= assignNew('V', mce
, Ity_I128
, binop(Iop_64HLto128
, tmp1
, tmp1
));
1067 tmp1
= assignNew('V', mce
, Ity_I64
, unop(Iop_1Sto64
, tmp1
));
1068 tmp1
= assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
,
1070 tmp1
= assignNew('V', mce
, Ity_V256
, binop(Iop_V128HLtoV256
,
1075 VG_(tool_panic
)("mkPCastTo(2)");
1079 /* This is a minor variant. It takes an arg of some type and returns
1080 a value of the same type. The result consists entirely of Defined
1081 (zero) bits except its least significant bit, which is a PCast of
1082 the entire argument down to a single bit. */
1083 static IRAtom
* mkPCastXXtoXXlsb ( MCEnv
* mce
, IRAtom
* varg
, IRType ty
)
1085 if (ty
== Ity_V128
) {
1086 /* --- Case for V128 --- */
1087 IRAtom
* varg128
= varg
;
1088 // generates: PCast-to-I64(varg128)
1089 IRAtom
* pcdTo64
= mkPCastTo(mce
, Ity_I64
, varg128
);
1090 // Now introduce zeros (defined bits) in the top 63 places
1091 // generates: Def--(63)--Def PCast-to-I1(varg128)
1093 = assignNew('V', mce
, Ity_I64
, binop(Iop_And64
, pcdTo64
, mkU64(1)));
1094 // generates: Def--(64)--Def
1096 = definedOfType(Ity_I64
);
1097 // generates: Def--(127)--Def PCast-to-I1(varg128)
1099 = assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
, d64
, d63pc
));
1102 if (ty
== Ity_I64
) {
1103 /* --- Case for I64 --- */
1105 IRAtom
* pcd
= mkPCastTo(mce
, Ity_I64
, varg
);
1106 // Zero (Def) out the top 63 bits
1108 = assignNew('V', mce
, Ity_I64
, binop(Iop_And64
, pcd
, mkU64(1)));
1115 /* --------- Optimistic casts. --------- */
1117 /* The function takes and returns an expression of type TY. If any of the
1118 VBITS indicate defined (value == 0) the resulting expression has all bits
1119 set to 0. Otherwise, all bits are 1. In words, if any bits are defined
1120 then all bits are made to be defined.
1122 In short we compute (vbits - (vbits >>u 1)) >>s (bitsize(vbits)-1).
1124 static IRAtom
* mkOCastAt( MCEnv
* mce
, IRType ty
, IRAtom
* vbits
)
1126 IROp opSUB
, opSHR
, opSAR
;
1131 opSUB
= Iop_Sub64
; opSHR
= Iop_Shr64
; opSAR
= Iop_Sar64
; sh
= 63;
1134 opSUB
= Iop_Sub32
; opSHR
= Iop_Shr32
; opSAR
= Iop_Sar32
; sh
= 31;
1137 opSUB
= Iop_Sub16
; opSHR
= Iop_Shr16
; opSAR
= Iop_Sar16
; sh
= 15;
1140 opSUB
= Iop_Sub8
; opSHR
= Iop_Shr8
; opSAR
= Iop_Sar8
; sh
= 7;
1144 VG_(tool_panic
)("mkOCastTo");
1148 shr1
= assignNew('V', mce
,ty
, binop(opSHR
, vbits
, mkU8(1)));
1149 at
= assignNew('V', mce
,ty
, binop(opSUB
, vbits
, shr1
));
1150 at
= assignNew('V', mce
,ty
, binop(opSAR
, at
, mkU8(sh
)));
1155 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
1157 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
1158 PCasting to Ity_U1. However, sometimes it is necessary to be more
1159 accurate. The insight is that the result is defined if two
1160 corresponding bits can be found, one from each argument, so that
1161 both bits are defined but are different -- that makes EQ say "No"
1162 and NE say "Yes". Hence, we compute an improvement term and DifD
1163 it onto the "normal" (UifU) result.
1178 vec contains 0 (defined) bits where the corresponding arg bits
1179 are defined but different, and 1 bits otherwise.
1181 vec = Or<sz>( vxx, // 0 iff bit defined
1182 vyy, // 0 iff bit defined
1183 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
1186 If any bit of vec is 0, the result is defined and so the
1187 improvement term should produce 0...0, else it should produce
1190 Hence require for the improvement term:
1192 OCast(vec) = if vec == 1...1 then 1...1 else 0...0
1194 which you can think of as an "optimistic cast" (OCast, the opposite of
1195 the normal "pessimistic cast" (PCast) family. An OCast says all bits
1196 are defined if any bit is defined.
1198 It is possible to show that
1200 if vec == 1...1 then 1...1 else 0...0
1202 can be implemented in straight-line code as
1204 (vec - (vec >>u 1)) >>s (word-size-in-bits - 1)
1206 We note that vec contains the sub-term Or<sz>(vxx, vyy). Since UifU is
1207 implemented with Or (since 1 signifies undefinedness), this is a
1208 duplicate of the UifU<sz>(vxx, vyy) term and so we can CSE it out, giving
1211 let naive = UifU<sz>(vxx, vyy)
1212 vec = Or<sz>(naive, Not<sz>(Xor<sz)(xx, yy))
1214 PCastTo<1>( DifD<sz>(naive, OCast<sz>(vec)) )
1216 This was extensively re-analysed and checked on 6 July 05 and again
1219 static IRAtom
* expensiveCmpEQorNE ( MCEnv
* mce
,
1221 IRAtom
* vxx
, IRAtom
* vyy
,
1222 IRAtom
* xx
, IRAtom
* yy
)
1224 IRAtom
*naive
, *vec
, *improved
, *final_cast
;
1225 IROp opDIFD
, opUIFU
, opOR
, opXOR
, opNOT
;
1227 tl_assert(isShadowAtom(mce
,vxx
));
1228 tl_assert(isShadowAtom(mce
,vyy
));
1229 tl_assert(isOriginalAtom(mce
,xx
));
1230 tl_assert(isOriginalAtom(mce
,yy
));
1231 tl_assert(sameKindedAtoms(vxx
,xx
));
1232 tl_assert(sameKindedAtoms(vyy
,yy
));
1264 VG_(tool_panic
)("expensiveCmpEQorNE");
1268 = assignNew('V', mce
, ty
, binop(opUIFU
, vxx
, vyy
));
1278 assignNew('V', mce
,ty
, binop(opXOR
, xx
, yy
))))));
1281 = assignNew( 'V', mce
,ty
,
1282 binop(opDIFD
, naive
, mkOCastAt(mce
, ty
, vec
)));
1285 = mkPCastTo( mce
, Ity_I1
, improved
);
1291 /* --------- Semi-accurate interpretation of CmpORD. --------- */
1293 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
1295 CmpORD32S(x,y) = 1<<3 if x <s y
1299 and similarly the unsigned variant. The default interpretation is:
1301 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
1304 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
1305 are zero and therefore defined (viz, zero).
1307 Also deal with a special case better:
1311 Here, bit 3 (LT) of the result is a copy of the top bit of x and
1312 will be defined even if the rest of x isn't. In which case we do:
1314 CmpORD32S#(x,x#,0,{impliedly 0}#)
1315 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
1316 | (x# >>u 31) << 3 -- LT# = x#[31]
1318 Analogous handling for CmpORD64{S,U}.
1320 static Bool
isZeroU32 ( IRAtom
* e
)
1323 toBool( e
->tag
== Iex_Const
1324 && e
->Iex
.Const
.con
->tag
== Ico_U32
1325 && e
->Iex
.Const
.con
->Ico
.U32
== 0 );
1328 static Bool
isZeroU64 ( IRAtom
* e
)
1331 toBool( e
->tag
== Iex_Const
1332 && e
->Iex
.Const
.con
->tag
== Ico_U64
1333 && e
->Iex
.Const
.con
->Ico
.U64
== 0 );
1336 static IRAtom
* doCmpORD ( MCEnv
* mce
,
1338 IRAtom
* xxhash
, IRAtom
* yyhash
,
1339 IRAtom
* xx
, IRAtom
* yy
)
1341 Bool m64
= cmp_op
== Iop_CmpORD64S
|| cmp_op
== Iop_CmpORD64U
;
1342 Bool syned
= cmp_op
== Iop_CmpORD64S
|| cmp_op
== Iop_CmpORD32S
;
1343 IROp opOR
= m64
? Iop_Or64
: Iop_Or32
;
1344 IROp opAND
= m64
? Iop_And64
: Iop_And32
;
1345 IROp opSHL
= m64
? Iop_Shl64
: Iop_Shl32
;
1346 IROp opSHR
= m64
? Iop_Shr64
: Iop_Shr32
;
1347 IROp op1UtoWS
= m64
? Iop_1Uto64
: Iop_1Uto32
;
1348 IRType ty
= m64
? Ity_I64
: Ity_I32
;
1349 Int width
= m64
? 64 : 32;
1351 Bool (*isZero
)(IRAtom
*) = m64
? isZeroU64
: isZeroU32
;
1353 tl_assert(isShadowAtom(mce
,xxhash
));
1354 tl_assert(isShadowAtom(mce
,yyhash
));
1355 tl_assert(isOriginalAtom(mce
,xx
));
1356 tl_assert(isOriginalAtom(mce
,yy
));
1357 tl_assert(sameKindedAtoms(xxhash
,xx
));
1358 tl_assert(sameKindedAtoms(yyhash
,yy
));
1359 tl_assert(cmp_op
== Iop_CmpORD32S
|| cmp_op
== Iop_CmpORD32U
1360 || cmp_op
== Iop_CmpORD64S
|| cmp_op
== Iop_CmpORD64U
);
1363 ppIROp(cmp_op
); VG_(printf
)(" ");
1364 ppIRExpr(xx
); VG_(printf
)(" "); ppIRExpr( yy
); VG_(printf
)("\n");
1367 if (syned
&& isZero(yy
)) {
1368 /* fancy interpretation */
1369 /* if yy is zero, then it must be fully defined (zero#). */
1370 tl_assert(isZero(yyhash
));
1371 // This is still inaccurate, but I don't think it matters, since
1372 // nobody writes code of the form
1373 // "is <partially-undefined-value> signedly greater than zero?".
1374 // We therefore simply declare "x >s 0" to be undefined if any bit in
1375 // x is undefined. That's clearly suboptimal in some cases. Eg, if
1376 // the highest order bit is a defined 1 then x is negative so it
1377 // doesn't matter whether the remaining bits are defined or not.
1383 mkPCastTo(mce
,ty
, xxhash
),
1384 m64
? mkU64(1<<2) : mkU32(1<<2)
1386 // For "x <s 0", we can just copy the definedness of the top bit of x
1387 // and we have a precise result.
1395 binop(opSHR
, xxhash
, mkU8(width
-1))),
1398 // For "x == 0" we can hand the problem off to expensiveCmpEQorNE.
1404 assignNew('V', mce
,ty
,
1407 expensiveCmpEQorNE(mce
, ty
, xxhash
, yyhash
, xx
, yy
))
1414 assignNew('V', mce
,ty
, binop(opOR
, t_lt_0_0_0
, t_0_gt_0_0
)),
1418 /* standard interpretation */
1419 IRAtom
* sevenLeft1
= m64
? mkU64(7<<1) : mkU32(7<<1);
1424 mkUifU(mce
,ty
, xxhash
,yyhash
)),
1431 /*------------------------------------------------------------*/
1432 /*--- Emit a test and complaint if something is undefined. ---*/
1433 /*------------------------------------------------------------*/
1435 static IRAtom
* schemeE ( MCEnv
* mce
, IRExpr
* e
); /* fwds */
1438 /* Set the annotations on a dirty helper to indicate that the stack
1439 pointer and instruction pointers might be read. This is the
1440 behaviour of all 'emit-a-complaint' style functions we might
1443 static void setHelperAnns ( MCEnv
* mce
, IRDirty
* di
) {
1445 di
->fxState
[0].fx
= Ifx_Read
;
1446 di
->fxState
[0].offset
= mce
->layout
->offset_SP
;
1447 di
->fxState
[0].size
= mce
->layout
->sizeof_SP
;
1448 di
->fxState
[0].nRepeats
= 0;
1449 di
->fxState
[0].repeatLen
= 0;
1450 di
->fxState
[1].fx
= Ifx_Read
;
1451 di
->fxState
[1].offset
= mce
->layout
->offset_IP
;
1452 di
->fxState
[1].size
= mce
->layout
->sizeof_IP
;
1453 di
->fxState
[1].nRepeats
= 0;
1454 di
->fxState
[1].repeatLen
= 0;
1458 /* Check the supplied *original* |atom| for undefinedness, and emit a
1459 complaint if so. Once that happens, mark it as defined. This is
1460 possible because the atom is either a tmp or literal. If it's a
1461 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1462 be defined. In fact as mentioned above, we will have to allocate a
1463 new tmp to carry the new 'defined' shadow value, and update the
1464 original->tmp mapping accordingly; we cannot simply assign a new
1465 value to an existing shadow tmp as this breaks SSAness.
1467 The checks are performed, any resulting complaint emitted, and
1468 |atom|'s shadow temp set to 'defined', ONLY in the case that
1469 |guard| evaluates to True at run-time. If it evaluates to False
1470 then no action is performed. If |guard| is NULL (the usual case)
1471 then it is assumed to be always-true, and hence these actions are
1472 performed unconditionally.
1474 This routine does not generate code to check the definedness of
1475 |guard|. The caller is assumed to have taken care of that already.
1477 static void complainIfUndefined ( MCEnv
* mce
, IRAtom
* atom
, IRExpr
*guard
)
1490 // Don't do V bit tests if we're not reporting undefined value errors.
1491 if (MC_(clo_mc_level
) == 1)
1495 tl_assert(isOriginalAtom(mce
, guard
));
1497 /* Since the original expression is atomic, there's no duplicated
1498 work generated by making multiple V-expressions for it. So we
1499 don't really care about the possibility that someone else may
1500 also create a V-interpretion for it. */
1501 tl_assert(isOriginalAtom(mce
, atom
));
1502 vatom
= expr2vbits( mce
, atom
, HuOth
);
1503 tl_assert(isShadowAtom(mce
, vatom
));
1504 tl_assert(sameKindedAtoms(atom
, vatom
));
1506 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vatom
);
1508 /* sz is only used for constructing the error message */
1509 sz
= ty
==Ity_I1
? 0 : sizeofIRType(ty
);
1511 cond
= mkPCastTo( mce
, Ity_I1
, vatom
);
1512 /* cond will be 0 if all defined, and 1 if any not defined. */
1514 /* Get the origin info for the value we are about to check. At
1515 least, if we are doing origin tracking. If not, use a dummy
1517 if (MC_(clo_mc_level
) == 3) {
1518 origin
= schemeE( mce
, atom
);
1519 if (mce
->hWordTy
== Ity_I64
) {
1520 origin
= assignNew( 'B', mce
, Ity_I64
, unop(Iop_32Uto64
, origin
) );
1534 fn
= &MC_(helperc_value_check0_fail_w_o
);
1535 nm
= "MC_(helperc_value_check0_fail_w_o)";
1536 args
= mkIRExprVec_1(origin
);
1539 fn
= &MC_(helperc_value_check0_fail_no_o
);
1540 nm
= "MC_(helperc_value_check0_fail_no_o)";
1541 args
= mkIRExprVec_0();
1547 fn
= &MC_(helperc_value_check1_fail_w_o
);
1548 nm
= "MC_(helperc_value_check1_fail_w_o)";
1549 args
= mkIRExprVec_1(origin
);
1552 fn
= &MC_(helperc_value_check1_fail_no_o
);
1553 nm
= "MC_(helperc_value_check1_fail_no_o)";
1554 args
= mkIRExprVec_0();
1560 fn
= &MC_(helperc_value_check4_fail_w_o
);
1561 nm
= "MC_(helperc_value_check4_fail_w_o)";
1562 args
= mkIRExprVec_1(origin
);
1565 fn
= &MC_(helperc_value_check4_fail_no_o
);
1566 nm
= "MC_(helperc_value_check4_fail_no_o)";
1567 args
= mkIRExprVec_0();
1573 fn
= &MC_(helperc_value_check8_fail_w_o
);
1574 nm
= "MC_(helperc_value_check8_fail_w_o)";
1575 args
= mkIRExprVec_1(origin
);
1578 fn
= &MC_(helperc_value_check8_fail_no_o
);
1579 nm
= "MC_(helperc_value_check8_fail_no_o)";
1580 args
= mkIRExprVec_0();
1587 fn
= &MC_(helperc_value_checkN_fail_w_o
);
1588 nm
= "MC_(helperc_value_checkN_fail_w_o)";
1589 args
= mkIRExprVec_2( mkIRExpr_HWord( sz
), origin
);
1592 fn
= &MC_(helperc_value_checkN_fail_no_o
);
1593 nm
= "MC_(helperc_value_checkN_fail_no_o)";
1594 args
= mkIRExprVec_1( mkIRExpr_HWord( sz
) );
1599 VG_(tool_panic
)("unexpected szB");
1605 tl_assert(nargs
>= 0 && nargs
<= 2);
1606 tl_assert( (MC_(clo_mc_level
) == 3 && origin
!= NULL
)
1607 || (MC_(clo_mc_level
) == 2 && origin
== NULL
) );
1609 di
= unsafeIRDirty_0_N( nargs
/*regparms*/, nm
,
1610 VG_(fnptr_to_fnentry
)( fn
), args
);
1611 di
->guard
= cond
; // and cond is PCast-to-1(atom#)
1613 /* If the complaint is to be issued under a guard condition, AND
1614 that into the guard condition for the helper call. */
1616 IRAtom
*g1
= assignNew('V', mce
, Ity_I32
, unop(Iop_1Uto32
, di
->guard
));
1617 IRAtom
*g2
= assignNew('V', mce
, Ity_I32
, unop(Iop_1Uto32
, guard
));
1618 IRAtom
*e
= assignNew('V', mce
, Ity_I32
, binop(Iop_And32
, g1
, g2
));
1619 di
->guard
= assignNew('V', mce
, Ity_I1
, unop(Iop_32to1
, e
));
1622 setHelperAnns( mce
, di
);
1623 stmt( 'V', mce
, IRStmt_Dirty(di
));
1625 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1626 defined -- but only in the case where the guard evaluates to
1627 True at run-time. Do the update by setting the orig->shadow
1628 mapping for tmp to reflect the fact that this shadow is getting
1630 tl_assert(isIRAtom(vatom
));
1631 /* sameKindedAtoms ... */
1632 if (vatom
->tag
== Iex_RdTmp
) {
1633 tl_assert(atom
->tag
== Iex_RdTmp
);
1634 if (guard
== NULL
) {
1635 // guard is 'always True', hence update unconditionally
1636 newShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
);
1637 assign('V', mce
, findShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
),
1640 // update the temp only conditionally. Do this by copying
1641 // its old value when the guard is False.
1643 IRTemp old_tmpV
= findShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
);
1644 newShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
);
1646 = assignNew('V', mce
, shadowTypeV(ty
),
1647 IRExpr_ITE(guard
, definedOfType(ty
),
1649 assign('V', mce
, findShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
), new_tmpV
);
1655 /*------------------------------------------------------------*/
1656 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1657 /*------------------------------------------------------------*/
1659 /* Examine the always-defined sections declared in layout to see if
1660 the (offset,size) section is within one. Note, is is an error to
1661 partially fall into such a region: (offset,size) should either be
1662 completely in such a region or completely not-in such a region.
1664 static Bool
isAlwaysDefd ( MCEnv
* mce
, Int offset
, Int size
)
1666 Int minoffD
, maxoffD
, i
;
1667 Int minoff
= offset
;
1668 Int maxoff
= minoff
+ size
- 1;
1669 tl_assert((minoff
& ~0xFFFF) == 0);
1670 tl_assert((maxoff
& ~0xFFFF) == 0);
1672 for (i
= 0; i
< mce
->layout
->n_alwaysDefd
; i
++) {
1673 minoffD
= mce
->layout
->alwaysDefd
[i
].offset
;
1674 maxoffD
= minoffD
+ mce
->layout
->alwaysDefd
[i
].size
- 1;
1675 tl_assert((minoffD
& ~0xFFFF) == 0);
1676 tl_assert((maxoffD
& ~0xFFFF) == 0);
1678 if (maxoff
< minoffD
|| maxoffD
< minoff
)
1679 continue; /* no overlap */
1680 if (minoff
>= minoffD
&& maxoff
<= maxoffD
)
1681 return True
; /* completely contained in an always-defd section */
1683 VG_(tool_panic
)("memcheck:isAlwaysDefd:partial overlap");
1685 return False
; /* could not find any containing section */
1689 /* Generate into bb suitable actions to shadow this Put. If the state
1690 slice is marked 'always defined', do nothing. Otherwise, write the
1691 supplied V bits to the shadow state. We can pass in either an
1692 original atom or a V-atom, but not both. In the former case the
1693 relevant V-bits are then generated from the original.
1694 We assume here, that the definedness of GUARD has already been checked.
1697 void do_shadow_PUT ( MCEnv
* mce
, Int offset
,
1698 IRAtom
* atom
, IRAtom
* vatom
, IRExpr
*guard
)
1702 // Don't do shadow PUTs if we're not doing undefined value checking.
1703 // Their absence lets Vex's optimiser remove all the shadow computation
1704 // that they depend on, which includes GETs of the shadow registers.
1705 if (MC_(clo_mc_level
) == 1)
1710 tl_assert(isOriginalAtom(mce
, atom
));
1711 vatom
= expr2vbits( mce
, atom
, HuOth
);
1714 tl_assert(isShadowAtom(mce
, vatom
));
1717 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vatom
);
1718 tl_assert(ty
!= Ity_I1
);
1719 if (isAlwaysDefd(mce
, offset
, sizeofIRType(ty
))) {
1721 /* emit code to emit a complaint if any of the vbits are 1. */
1722 /* complainIfUndefined(mce, atom); */
1724 /* Do a plain shadow Put. */
1726 /* If the guard expression evaluates to false we simply Put the value
1727 that is already stored in the guest state slot */
1728 IRAtom
*cond
, *iffalse
;
1730 cond
= assignNew('V', mce
, Ity_I1
, guard
);
1731 iffalse
= assignNew('V', mce
, ty
,
1732 IRExpr_Get(offset
+ mce
->layout
->total_sizeB
, ty
));
1733 vatom
= assignNew('V', mce
, ty
, IRExpr_ITE(cond
, vatom
, iffalse
));
1735 stmt( 'V', mce
, IRStmt_Put( offset
+ mce
->layout
->total_sizeB
, vatom
));
1740 /* Return an expression which contains the V bits corresponding to the
1741 given GETI (passed in in pieces).
1744 void do_shadow_PUTI ( MCEnv
* mce
, IRPutI
*puti
)
1749 IRRegArray
* descr
= puti
->descr
;
1750 IRAtom
* ix
= puti
->ix
;
1751 Int bias
= puti
->bias
;
1752 IRAtom
* atom
= puti
->data
;
1754 // Don't do shadow PUTIs if we're not doing undefined value checking.
1755 // Their absence lets Vex's optimiser remove all the shadow computation
1756 // that they depend on, which includes GETIs of the shadow registers.
1757 if (MC_(clo_mc_level
) == 1)
1760 tl_assert(isOriginalAtom(mce
,atom
));
1761 vatom
= expr2vbits( mce
, atom
, HuOth
);
1762 tl_assert(sameKindedAtoms(atom
, vatom
));
1764 tyS
= shadowTypeV(ty
);
1765 arrSize
= descr
->nElems
* sizeofIRType(ty
);
1766 tl_assert(ty
!= Ity_I1
);
1767 tl_assert(isOriginalAtom(mce
,ix
));
1768 complainIfUndefined(mce
, ix
, NULL
);
1769 if (isAlwaysDefd(mce
, descr
->base
, arrSize
)) {
1771 /* emit code to emit a complaint if any of the vbits are 1. */
1772 /* complainIfUndefined(mce, atom); */
1774 /* Do a cloned version of the Put that refers to the shadow
1776 IRRegArray
* new_descr
1777 = mkIRRegArray( descr
->base
+ mce
->layout
->total_sizeB
,
1778 tyS
, descr
->nElems
);
1779 stmt( 'V', mce
, IRStmt_PutI( mkIRPutI(new_descr
, ix
, bias
, vatom
) ));
1784 /* Return an expression which contains the V bits corresponding to the
1785 given GET (passed in in pieces).
1788 IRExpr
* shadow_GET ( MCEnv
* mce
, Int offset
, IRType ty
)
1790 IRType tyS
= shadowTypeV(ty
);
1791 tl_assert(ty
!= Ity_I1
);
1792 tl_assert(ty
!= Ity_I128
);
1793 if (isAlwaysDefd(mce
, offset
, sizeofIRType(ty
))) {
1794 /* Always defined, return all zeroes of the relevant type */
1795 return definedOfType(tyS
);
1797 /* return a cloned version of the Get that refers to the shadow
1799 /* FIXME: this isn't an atom! */
1800 return IRExpr_Get( offset
+ mce
->layout
->total_sizeB
, tyS
);
1805 /* Return an expression which contains the V bits corresponding to the
1806 given GETI (passed in in pieces).
1809 IRExpr
* shadow_GETI ( MCEnv
* mce
,
1810 IRRegArray
* descr
, IRAtom
* ix
, Int bias
)
1812 IRType ty
= descr
->elemTy
;
1813 IRType tyS
= shadowTypeV(ty
);
1814 Int arrSize
= descr
->nElems
* sizeofIRType(ty
);
1815 tl_assert(ty
!= Ity_I1
);
1816 tl_assert(isOriginalAtom(mce
,ix
));
1817 complainIfUndefined(mce
, ix
, NULL
);
1818 if (isAlwaysDefd(mce
, descr
->base
, arrSize
)) {
1819 /* Always defined, return all zeroes of the relevant type */
1820 return definedOfType(tyS
);
1822 /* return a cloned version of the Get that refers to the shadow
1824 IRRegArray
* new_descr
1825 = mkIRRegArray( descr
->base
+ mce
->layout
->total_sizeB
,
1826 tyS
, descr
->nElems
);
1827 return IRExpr_GetI( new_descr
, ix
, bias
);
1832 /*------------------------------------------------------------*/
1833 /*--- Generating approximations for unknown operations, ---*/
1834 /*--- using lazy-propagate semantics ---*/
1835 /*------------------------------------------------------------*/
1837 /* Lazy propagation of undefinedness from two values, resulting in the
1838 specified shadow type.
1841 IRAtom
* mkLazy2 ( MCEnv
* mce
, IRType finalVty
, IRAtom
* va1
, IRAtom
* va2
)
1844 IRType t1
= typeOfIRExpr(mce
->sb
->tyenv
, va1
);
1845 IRType t2
= typeOfIRExpr(mce
->sb
->tyenv
, va2
);
1846 tl_assert(isShadowAtom(mce
,va1
));
1847 tl_assert(isShadowAtom(mce
,va2
));
1849 /* The general case is inefficient because PCast is an expensive
1850 operation. Here are some special cases which use PCast only
1851 once rather than twice. */
1853 /* I64 x I64 -> I64 */
1854 if (t1
== Ity_I64
&& t2
== Ity_I64
&& finalVty
== Ity_I64
) {
1855 if (0) VG_(printf
)("mkLazy2: I64 x I64 -> I64\n");
1856 at
= mkUifU(mce
, Ity_I64
, va1
, va2
);
1857 at
= mkPCastTo(mce
, Ity_I64
, at
);
1861 /* I64 x I64 -> I32 */
1862 if (t1
== Ity_I64
&& t2
== Ity_I64
&& finalVty
== Ity_I32
) {
1863 if (0) VG_(printf
)("mkLazy2: I64 x I64 -> I32\n");
1864 at
= mkUifU(mce
, Ity_I64
, va1
, va2
);
1865 at
= mkPCastTo(mce
, Ity_I32
, at
);
1869 /* I32 x I32 -> I32 */
1870 if (t1
== Ity_I32
&& t2
== Ity_I32
&& finalVty
== Ity_I32
) {
1871 if (0) VG_(printf
)("mkLazy2: I32 x I32 -> I32\n");
1872 at
= mkUifU(mce
, Ity_I32
, va1
, va2
);
1873 at
= mkPCastTo(mce
, Ity_I32
, at
);
1878 VG_(printf
)("mkLazy2 ");
1887 /* General case: force everything via 32-bit intermediaries. */
1888 at
= mkPCastTo(mce
, Ity_I32
, va1
);
1889 at
= mkUifU(mce
, Ity_I32
, at
, mkPCastTo(mce
, Ity_I32
, va2
));
1890 at
= mkPCastTo(mce
, finalVty
, at
);
1895 /* 3-arg version of the above. */
1897 IRAtom
* mkLazy3 ( MCEnv
* mce
, IRType finalVty
,
1898 IRAtom
* va1
, IRAtom
* va2
, IRAtom
* va3
)
1901 IRType t1
= typeOfIRExpr(mce
->sb
->tyenv
, va1
);
1902 IRType t2
= typeOfIRExpr(mce
->sb
->tyenv
, va2
);
1903 IRType t3
= typeOfIRExpr(mce
->sb
->tyenv
, va3
);
1904 tl_assert(isShadowAtom(mce
,va1
));
1905 tl_assert(isShadowAtom(mce
,va2
));
1906 tl_assert(isShadowAtom(mce
,va3
));
1908 /* The general case is inefficient because PCast is an expensive
1909 operation. Here are some special cases which use PCast only
1910 twice rather than three times. */
1912 /* I32 x I64 x I64 -> I64 */
1913 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1914 if (t1
== Ity_I32
&& t2
== Ity_I64
&& t3
== Ity_I64
1915 && finalVty
== Ity_I64
) {
1916 if (0) VG_(printf
)("mkLazy3: I32 x I64 x I64 -> I64\n");
1917 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1918 mode indication which is fully defined, this should get
1919 folded out later. */
1920 at
= mkPCastTo(mce
, Ity_I64
, va1
);
1921 /* Now fold in 2nd and 3rd args. */
1922 at
= mkUifU(mce
, Ity_I64
, at
, va2
);
1923 at
= mkUifU(mce
, Ity_I64
, at
, va3
);
1924 /* and PCast once again. */
1925 at
= mkPCastTo(mce
, Ity_I64
, at
);
1929 /* I32 x I8 x I64 -> I64 */
1930 if (t1
== Ity_I32
&& t2
== Ity_I8
&& t3
== Ity_I64
1931 && finalVty
== Ity_I64
) {
1932 if (0) VG_(printf
)("mkLazy3: I32 x I8 x I64 -> I64\n");
1933 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1934 * rounding mode indication which is fully defined, this should
1935 * get folded out later.
1937 IRAtom
* at1
= mkPCastTo(mce
, Ity_I64
, va1
);
1938 IRAtom
* at2
= mkPCastTo(mce
, Ity_I64
, va2
);
1939 at
= mkUifU(mce
, Ity_I64
, at1
, at2
); // UifU(PCast(va1), PCast(va2))
1940 at
= mkUifU(mce
, Ity_I64
, at
, va3
);
1941 /* and PCast once again. */
1942 at
= mkPCastTo(mce
, Ity_I64
, at
);
1946 /* I32 x I64 x I64 -> I32 */
1947 if (t1
== Ity_I32
&& t2
== Ity_I64
&& t3
== Ity_I64
1948 && finalVty
== Ity_I32
) {
1949 if (0) VG_(printf
)("mkLazy3: I32 x I64 x I64 -> I32\n");
1950 at
= mkPCastTo(mce
, Ity_I64
, va1
);
1951 at
= mkUifU(mce
, Ity_I64
, at
, va2
);
1952 at
= mkUifU(mce
, Ity_I64
, at
, va3
);
1953 at
= mkPCastTo(mce
, Ity_I32
, at
);
1957 /* I32 x I32 x I32 -> I32 */
1958 /* 32-bit FP idiom, as (eg) happens on ARM */
1959 if (t1
== Ity_I32
&& t2
== Ity_I32
&& t3
== Ity_I32
1960 && finalVty
== Ity_I32
) {
1961 if (0) VG_(printf
)("mkLazy3: I32 x I32 x I32 -> I32\n");
1963 at
= mkUifU(mce
, Ity_I32
, at
, va2
);
1964 at
= mkUifU(mce
, Ity_I32
, at
, va3
);
1965 at
= mkPCastTo(mce
, Ity_I32
, at
);
1969 /* I32 x I16 x I16 -> I16 */
1970 /* 16-bit half-precision FP idiom, as (eg) happens on arm64 v8.2 onwards */
1971 if (t1
== Ity_I32
&& t2
== Ity_I16
&& t3
== Ity_I16
1972 && finalVty
== Ity_I16
) {
1973 if (0) VG_(printf
)("mkLazy3: I32 x I16 x I16 -> I16\n");
1974 at
= mkPCastTo(mce
, Ity_I16
, va1
);
1975 at
= mkUifU(mce
, Ity_I16
, at
, va2
);
1976 at
= mkUifU(mce
, Ity_I16
, at
, va3
);
1977 at
= mkPCastTo(mce
, Ity_I16
, at
);
1981 /* I32 x I128 x I128 -> I128 */
1982 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1983 if (t1
== Ity_I32
&& t2
== Ity_I128
&& t3
== Ity_I128
1984 && finalVty
== Ity_I128
) {
1985 if (0) VG_(printf
)("mkLazy3: I32 x I128 x I128 -> I128\n");
1986 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1987 mode indication which is fully defined, this should get
1988 folded out later. */
1989 at
= mkPCastTo(mce
, Ity_I128
, va1
);
1990 /* Now fold in 2nd and 3rd args. */
1991 at
= mkUifU(mce
, Ity_I128
, at
, va2
);
1992 at
= mkUifU(mce
, Ity_I128
, at
, va3
);
1993 /* and PCast once again. */
1994 at
= mkPCastTo(mce
, Ity_I128
, at
);
1998 /* I32 x I8 x I128 -> I128 */
1999 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
2000 if (t1
== Ity_I32
&& t2
== Ity_I8
&& t3
== Ity_I128
2001 && finalVty
== Ity_I128
) {
2002 if (0) VG_(printf
)("mkLazy3: I32 x I8 x I128 -> I128\n");
2003 /* Use I64 as an intermediate type, which means PCasting all 3
2004 args to I64 to start with. 1st arg is typically a rounding
2005 mode indication which is fully defined, so we hope that it
2006 will get folded out later. */
2007 IRAtom
* at1
= mkPCastTo(mce
, Ity_I64
, va1
);
2008 IRAtom
* at2
= mkPCastTo(mce
, Ity_I64
, va2
);
2009 IRAtom
* at3
= mkPCastTo(mce
, Ity_I64
, va3
);
2010 /* Now UifU all three together. */
2011 at
= mkUifU(mce
, Ity_I64
, at1
, at2
); // UifU(PCast(va1), PCast(va2))
2012 at
= mkUifU(mce
, Ity_I64
, at
, at3
); // ... `UifU` PCast(va3)
2013 /* and PCast once again. */
2014 at
= mkPCastTo(mce
, Ity_I128
, at
);
2018 VG_(printf
)("mkLazy3: ");
2024 VG_(printf
)(" -> ");
2030 /* General case: force everything via 32-bit intermediaries. */
2032 at = mkPCastTo(mce, Ity_I32, va1);
2033 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
2034 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
2035 at = mkPCastTo(mce, finalVty, at);
2041 /* 4-arg version of the above. */
2043 IRAtom
* mkLazy4 ( MCEnv
* mce
, IRType finalVty
,
2044 IRAtom
* va1
, IRAtom
* va2
, IRAtom
* va3
, IRAtom
* va4
)
2047 IRType t1
= typeOfIRExpr(mce
->sb
->tyenv
, va1
);
2048 IRType t2
= typeOfIRExpr(mce
->sb
->tyenv
, va2
);
2049 IRType t3
= typeOfIRExpr(mce
->sb
->tyenv
, va3
);
2050 IRType t4
= typeOfIRExpr(mce
->sb
->tyenv
, va4
);
2051 tl_assert(isShadowAtom(mce
,va1
));
2052 tl_assert(isShadowAtom(mce
,va2
));
2053 tl_assert(isShadowAtom(mce
,va3
));
2054 tl_assert(isShadowAtom(mce
,va4
));
2056 /* The general case is inefficient because PCast is an expensive
2057 operation. Here are some special cases which use PCast only
2058 twice rather than three times. */
2060 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
2062 if (t1
== Ity_I32
&& t2
== Ity_I128
&& t3
== Ity_I128
&& t4
== Ity_I128
2063 && finalVty
== Ity_I128
) {
2064 if (0) VG_(printf
)("mkLazy4: I32 x I128 x I128 x I128 -> I128\n");
2065 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
2066 mode indication which is fully defined, this should get
2067 folded out later. */
2068 at
= mkPCastTo(mce
, Ity_I128
, va1
);
2069 /* Now fold in 2nd, 3rd, 4th args. */
2070 at
= mkUifU(mce
, Ity_I128
, at
, va2
);
2071 at
= mkUifU(mce
, Ity_I128
, at
, va3
);
2072 at
= mkUifU(mce
, Ity_I128
, at
, va4
);
2073 /* and PCast once again. */
2074 at
= mkPCastTo(mce
, Ity_I128
, at
);
2078 /* I32 x I64 x I64 x I64 -> I64 */
2079 if (t1
== Ity_I32
&& t2
== Ity_I64
&& t3
== Ity_I64
&& t4
== Ity_I64
2080 && finalVty
== Ity_I64
) {
2081 if (0) VG_(printf
)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
2082 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
2083 mode indication which is fully defined, this should get
2084 folded out later. */
2085 at
= mkPCastTo(mce
, Ity_I64
, va1
);
2086 /* Now fold in 2nd, 3rd, 4th args. */
2087 at
= mkUifU(mce
, Ity_I64
, at
, va2
);
2088 at
= mkUifU(mce
, Ity_I64
, at
, va3
);
2089 at
= mkUifU(mce
, Ity_I64
, at
, va4
);
2090 /* and PCast once again. */
2091 at
= mkPCastTo(mce
, Ity_I64
, at
);
2094 /* I32 x I32 x I32 x I32 -> I32 */
2095 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
2096 if (t1
== Ity_I32
&& t2
== Ity_I32
&& t3
== Ity_I32
&& t4
== Ity_I32
2097 && finalVty
== Ity_I32
) {
2098 if (0) VG_(printf
)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
2100 /* Now fold in 2nd, 3rd, 4th args. */
2101 at
= mkUifU(mce
, Ity_I32
, at
, va2
);
2102 at
= mkUifU(mce
, Ity_I32
, at
, va3
);
2103 at
= mkUifU(mce
, Ity_I32
, at
, va4
);
2104 at
= mkPCastTo(mce
, Ity_I32
, at
);
2108 if (t1
== Ity_I32
&& t2
== Ity_I8
&& t3
== Ity_I8
&& t4
== Ity_I8
2109 && finalVty
== Ity_I32
) {
2110 if (0) VG_(printf
)("mkLazy4: I32 x I8 x I8 x I8 -> I32\n");
2111 at
= mkPCastTo(mce
, Ity_I8
, va1
);
2112 /* Now fold in 2nd, 3rd, 4th args. */
2113 at
= mkUifU(mce
, Ity_I8
, at
, va2
);
2114 at
= mkUifU(mce
, Ity_I8
, at
, va3
);
2115 at
= mkUifU(mce
, Ity_I8
, at
, va4
);
2116 at
= mkPCastTo(mce
, Ity_I32
, at
);
2120 if (t1
== Ity_I64
&& t2
== Ity_I8
&& t3
== Ity_I8
&& t4
== Ity_I8
2121 && finalVty
== Ity_I64
) {
2122 if (0) VG_(printf
)("mkLazy4: I64 x I8 x I8 x I8 -> I64\n");
2123 at
= mkPCastTo(mce
, Ity_I8
, va1
);
2124 /* Now fold in 2nd, 3rd, 4th args. */
2125 at
= mkUifU(mce
, Ity_I8
, at
, va2
);
2126 at
= mkUifU(mce
, Ity_I8
, at
, va3
);
2127 at
= mkUifU(mce
, Ity_I8
, at
, va4
);
2128 at
= mkPCastTo(mce
, Ity_I64
, at
);
2133 VG_(printf
)("mkLazy4: ");
2141 VG_(printf
)(" -> ");
2150 /* Do the lazy propagation game from a null-terminated vector of
2151 atoms. This is presumably the arguments to a helper call, so the
2152 IRCallee info is also supplied in order that we can know which
2153 arguments should be ignored (via the .mcx_mask field).
2156 IRAtom
* mkLazyN ( MCEnv
* mce
,
2157 IRAtom
** exprvec
, IRType finalVtype
, IRCallee
* cee
)
2163 Bool mergeTy64
= True
;
2165 /* Decide on the type of the merge intermediary. If all relevant
2166 args are I64, then it's I64. In all other circumstances, use
2168 for (i
= 0; exprvec
[i
]; i
++) {
2170 tl_assert(isOriginalAtom(mce
, exprvec
[i
]));
2171 if (cee
->mcx_mask
& (1<<i
))
2173 if (typeOfIRExpr(mce
->sb
->tyenv
, exprvec
[i
]) != Ity_I64
)
2177 mergeTy
= mergeTy64
? Ity_I64
: Ity_I32
;
2178 curr
= definedOfType(mergeTy
);
2180 for (i
= 0; exprvec
[i
]; i
++) {
2182 tl_assert(isOriginalAtom(mce
, exprvec
[i
]));
2183 /* Only take notice of this arg if the callee's mc-exclusion
2184 mask does not say it is to be excluded. */
2185 if (cee
->mcx_mask
& (1<<i
)) {
2186 /* the arg is to be excluded from definedness checking. Do
2188 if (0) VG_(printf
)("excluding %s(%d)\n", cee
->name
, i
);
2190 /* calculate the arg's definedness, and pessimistically merge
2192 here
= mkPCastTo( mce
, mergeTy
, expr2vbits(mce
, exprvec
[i
], HuOth
) );
2194 ? mkUifU64(mce
, here
, curr
)
2195 : mkUifU32(mce
, here
, curr
);
2198 return mkPCastTo(mce
, finalVtype
, curr
);
2202 /*------------------------------------------------------------*/
2203 /*--- Generating expensive sequences for exact carry-chain ---*/
2204 /*--- propagation in add/sub and related operations. ---*/
2205 /*------------------------------------------------------------*/
2208 IRAtom
* expensiveAddSub ( MCEnv
* mce
,
2211 IRAtom
* qaa
, IRAtom
* qbb
,
2212 IRAtom
* aa
, IRAtom
* bb
)
2214 IRAtom
*a_min
, *b_min
, *a_max
, *b_max
;
2215 IROp opAND
, opOR
, opXOR
, opNOT
, opADD
, opSUB
;
2217 tl_assert(isShadowAtom(mce
,qaa
));
2218 tl_assert(isShadowAtom(mce
,qbb
));
2219 tl_assert(isOriginalAtom(mce
,aa
));
2220 tl_assert(isOriginalAtom(mce
,bb
));
2221 tl_assert(sameKindedAtoms(qaa
,aa
));
2222 tl_assert(sameKindedAtoms(qbb
,bb
));
2242 VG_(tool_panic
)("expensiveAddSub");
2245 // a_min = aa & ~qaa
2246 a_min
= assignNew('V', mce
,ty
,
2248 assignNew('V', mce
,ty
, unop(opNOT
, qaa
))));
2250 // b_min = bb & ~qbb
2251 b_min
= assignNew('V', mce
,ty
,
2253 assignNew('V', mce
,ty
, unop(opNOT
, qbb
))));
2256 a_max
= assignNew('V', mce
,ty
, binop(opOR
, aa
, qaa
));
2259 b_max
= assignNew('V', mce
,ty
, binop(opOR
, bb
, qbb
));
2262 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
2264 assignNew('V', mce
,ty
,
2266 assignNew('V', mce
,ty
, binop(opOR
, qaa
, qbb
)),
2267 assignNew('V', mce
,ty
,
2269 assignNew('V', mce
,ty
, binop(opADD
, a_min
, b_min
)),
2270 assignNew('V', mce
,ty
, binop(opADD
, a_max
, b_max
))
2276 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max - b_min))
2278 assignNew('V', mce
,ty
,
2280 assignNew('V', mce
,ty
, binop(opOR
, qaa
, qbb
)),
2281 assignNew('V', mce
,ty
,
2283 assignNew('V', mce
,ty
, binop(opSUB
, a_min
, b_max
)),
2284 assignNew('V', mce
,ty
, binop(opSUB
, a_max
, b_min
))
2295 IRAtom
* expensiveCountTrailingZeroes ( MCEnv
* mce
, IROp czop
,
2296 IRAtom
* atom
, IRAtom
* vatom
)
2299 IROp xorOp
, subOp
, andOp
;
2301 IRAtom
*improver
, *improved
;
2302 tl_assert(isShadowAtom(mce
,vatom
));
2303 tl_assert(isOriginalAtom(mce
,atom
));
2304 tl_assert(sameKindedAtoms(atom
,vatom
));
2307 case Iop_Ctz32
: case Iop_CtzNat32
:
2314 case Iop_Ctz64
: case Iop_CtzNat64
:
2323 VG_(tool_panic
)("memcheck:expensiveCountTrailingZeroes");
2326 // improver = atom ^ (atom - 1)
2328 // That is, improver has its low ctz(atom)+1 bits equal to one;
2329 // higher bits (if any) equal to zero. So it's exactly the right
2330 // mask to use to remove the irrelevant undefined input bits.
2331 /* Here are some examples:
2332 atom = U...U 1 0...0
2333 atom-1 = U...U 0 1...1
2334 ^ed = 0...0 1 11111, which correctly describes which bits of |atom|
2335 actually influence the result
2339 ^ed = 11111, also a correct mask for the input: all input bits
2341 Another boundary case
2344 ^ed = 0..0 1, also a correct mask: only the rightmost input bit
2346 Now with misc U bits interspersed:
2347 atom = U...U 1 0 U...U 0 1 0...0
2348 atom-1 = U...U 1 0 U...U 0 0 1...1
2349 ^ed = 0...0 0 0 0...0 0 1 1...1, also correct
2350 (Per re-check/analysis of 14 Nov 2018)
2352 improver
= assignNew('V', mce
,ty
,
2355 assignNew('V', mce
, ty
,
2356 binop(subOp
, atom
, one
))));
2358 // improved = vatom & improver
2360 // That is, treat any V bits to the left of the rightmost ctz(atom)+1
2361 // bits as "defined".
2362 improved
= assignNew('V', mce
, ty
,
2363 binop(andOp
, vatom
, improver
));
2365 // Return pessimizing cast of improved.
2366 return mkPCastTo(mce
, ty
, improved
);
2370 IRAtom
* expensiveCountLeadingZeroes ( MCEnv
* mce
, IROp czop
,
2371 IRAtom
* atom
, IRAtom
* vatom
)
2374 IROp shrOp
, notOp
, andOp
;
2375 IRAtom
* (*mkRight
)(MCEnv
*, IRAtom
*);
2376 IRAtom
*improver
, *improved
;
2377 tl_assert(isShadowAtom(mce
,vatom
));
2378 tl_assert(isOriginalAtom(mce
,atom
));
2379 tl_assert(sameKindedAtoms(atom
,vatom
));
2382 case Iop_Clz32
: case Iop_ClzNat32
:
2387 mkRight
= mkRight32
;
2389 case Iop_Clz64
: case Iop_ClzNat64
:
2394 mkRight
= mkRight64
;
2398 VG_(tool_panic
)("memcheck:expensiveCountLeadingZeroes");
2401 // This is in principle very similar to how expensiveCountTrailingZeroes
2402 // works. That function computed an "improver", which it used to mask
2403 // off all but the rightmost 1-bit and the zeroes to the right of it,
2404 // hence removing irrelevant bits from the input. Here, we play the
2405 // exact same game but with the left-vs-right roles interchanged.
2406 // Unfortunately calculation of the improver in this case is
2407 // significantly more expensive.
2409 // improver = ~(RIGHT(atom) >>u 1)
2411 // That is, improver has its upper clz(atom)+1 bits equal to one;
2412 // lower bits (if any) equal to zero. So it's exactly the right
2413 // mask to use to remove the irrelevant undefined input bits.
2414 /* Here are some examples:
2415 atom = 0...0 1 U...U
2416 R(atom) = 0...0 1 1...1
2417 R(atom) >>u 1 = 0...0 0 1...1
2418 ~(R(atom) >>u 1) = 1...1 1 0...0
2419 which correctly describes which bits of |atom|
2420 actually influence the result
2424 R(atom) >>u 1 = 0...0
2425 ~(R(atom) >>u 1) = 1...1
2426 also a correct mask for the input: all input bits
2428 Another boundary case
2431 R(atom) >>u 1 = 0 1..1
2432 ~(R(atom) >>u 1) = 1 0..0
2433 also a correct mask: only the leftmost input bit
2435 Now with misc U bits interspersed:
2436 atom = 0...0 1 U...U 0 1 U...U
2437 R(atom) = 0...0 1 1...1 1 1 1...1
2438 R(atom) >>u 1 = 0...0 0 1...1 1 1 1...1
2439 ~(R(atom) >>u 1) = 1...1 1 0...0 0 0 0...0, also correct
2440 (Per initial implementation of 15 Nov 2018)
2442 improver
= mkRight(mce
, atom
);
2443 improver
= assignNew('V', mce
, ty
, binop(shrOp
, improver
, mkU8(1)));
2444 improver
= assignNew('V', mce
, ty
, unop(notOp
, improver
));
2446 // improved = vatom & improver
2448 // That is, treat any V bits to the right of the leftmost clz(atom)+1
2449 // bits as "defined".
2450 improved
= assignNew('V', mce
, ty
,
2451 binop(andOp
, vatom
, improver
));
2453 // Return pessimizing cast of improved.
2454 return mkPCastTo(mce
, ty
, improved
);
2458 /*------------------------------------------------------------*/
2459 /*--- Scalar shifts. ---*/
2460 /*------------------------------------------------------------*/
2462 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
2463 idea is to shift the definedness bits by the original shift amount.
2464 This introduces 0s ("defined") in new positions for left shifts and
2465 unsigned right shifts, and copies the top definedness bit for
2466 signed right shifts. So, conveniently, applying the original shift
2467 operator to the definedness bits for the left arg is exactly the
2472 However if the shift amount is undefined then the whole result
2473 is undefined. Hence need:
2475 (qaa << bb) `UifU` PCast(qbb)
2477 If the shift amount bb is a literal than qbb will say 'all defined'
2478 and the UifU and PCast will get folded out by post-instrumentation
2481 static IRAtom
* scalarShift ( MCEnv
* mce
,
2484 IRAtom
* qaa
, IRAtom
* qbb
,
2485 IRAtom
* aa
, IRAtom
* bb
)
2487 tl_assert(isShadowAtom(mce
,qaa
));
2488 tl_assert(isShadowAtom(mce
,qbb
));
2489 tl_assert(isOriginalAtom(mce
,aa
));
2490 tl_assert(isOriginalAtom(mce
,bb
));
2491 tl_assert(sameKindedAtoms(qaa
,aa
));
2492 tl_assert(sameKindedAtoms(qbb
,bb
));
2497 assignNew('V', mce
, ty
, binop(original_op
, qaa
, bb
)),
2498 mkPCastTo(mce
, ty
, qbb
)
2504 /*------------------------------------------------------------*/
2505 /*--- Helpers for dealing with vector primops. ---*/
2506 /*------------------------------------------------------------*/
2508 /* Vector pessimisation -- pessimise within each lane individually. */
2510 static IRAtom
* mkPCast8x16 ( MCEnv
* mce
, IRAtom
* at
)
2512 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ8x16
, at
));
2515 static IRAtom
* mkPCast16x8 ( MCEnv
* mce
, IRAtom
* at
)
2517 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ16x8
, at
));
2520 static IRAtom
* mkPCast32x4 ( MCEnv
* mce
, IRAtom
* at
)
2522 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ32x4
, at
));
2525 static IRAtom
* mkPCast64x2 ( MCEnv
* mce
, IRAtom
* at
)
2527 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ64x2
, at
));
2530 static IRAtom
* mkPCast128x1 ( MCEnv
* mce
, IRAtom
* at
)
2532 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ128x1
, at
));
2535 static IRAtom
* mkPCast64x4 ( MCEnv
* mce
, IRAtom
* at
)
2537 return assignNew('V', mce
, Ity_V256
, unop(Iop_CmpNEZ64x4
, at
));
2540 static IRAtom
* mkPCast32x8 ( MCEnv
* mce
, IRAtom
* at
)
2542 return assignNew('V', mce
, Ity_V256
, unop(Iop_CmpNEZ32x8
, at
));
2545 static IRAtom
* mkPCast32x2 ( MCEnv
* mce
, IRAtom
* at
)
2547 return assignNew('V', mce
, Ity_I64
, unop(Iop_CmpNEZ32x2
, at
));
2550 static IRAtom
* mkPCast16x16 ( MCEnv
* mce
, IRAtom
* at
)
2552 return assignNew('V', mce
, Ity_V256
, unop(Iop_CmpNEZ16x16
, at
));
2555 static IRAtom
* mkPCast16x4 ( MCEnv
* mce
, IRAtom
* at
)
2557 return assignNew('V', mce
, Ity_I64
, unop(Iop_CmpNEZ16x4
, at
));
2560 static IRAtom
* mkPCast8x32 ( MCEnv
* mce
, IRAtom
* at
)
2562 return assignNew('V', mce
, Ity_V256
, unop(Iop_CmpNEZ8x32
, at
));
2565 static IRAtom
* mkPCast8x8 ( MCEnv
* mce
, IRAtom
* at
)
2567 return assignNew('V', mce
, Ity_I64
, unop(Iop_CmpNEZ8x8
, at
));
2570 static IRAtom
* mkPCast16x2 ( MCEnv
* mce
, IRAtom
* at
)
2572 return assignNew('V', mce
, Ity_I32
, unop(Iop_CmpNEZ16x2
, at
));
2575 static IRAtom
* mkPCast8x4 ( MCEnv
* mce
, IRAtom
* at
)
2577 return assignNew('V', mce
, Ity_I32
, unop(Iop_CmpNEZ8x4
, at
));
2581 /* Here's a simple scheme capable of handling ops derived from SSE1
2582 code and while only generating ops that can be efficiently
2583 implemented in SSE1. */
2585 /* All-lanes versions are straightforward:
2587 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
2589 unary32Fx4(x,y) ==> PCast32x4(x#)
2591 Lowest-lane-only versions are more complex:
2593 binary32F0x4(x,y) ==> SetV128lo32(
2595 PCast32(V128to32(UifUV128(x#,y#)))
2598 This is perhaps not so obvious. In particular, it's faster to
2599 do a V128-bit UifU and then take the bottom 32 bits than the more
2600 obvious scheme of taking the bottom 32 bits of each operand
2601 and doing a 32-bit UifU. Basically since UifU is fast and
2602 chopping lanes off vector values is slow.
2606 unary32F0x4(x) ==> SetV128lo32(
2608 PCast32(V128to32(x#))
2613 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2614 PCast32x4(v#) = CmpNEZ32x4(v#)
2618 IRAtom
* binary32Fx4 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2621 tl_assert(isShadowAtom(mce
, vatomX
));
2622 tl_assert(isShadowAtom(mce
, vatomY
));
2623 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2624 at
= assignNew('V', mce
, Ity_V128
, mkPCast32x4(mce
, at
));
2629 IRAtom
* unary32Fx4 ( MCEnv
* mce
, IRAtom
* vatomX
)
2632 tl_assert(isShadowAtom(mce
, vatomX
));
2633 at
= assignNew('V', mce
, Ity_V128
, mkPCast32x4(mce
, vatomX
));
2638 IRAtom
* binary32F0x4 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2641 tl_assert(isShadowAtom(mce
, vatomX
));
2642 tl_assert(isShadowAtom(mce
, vatomY
));
2643 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2644 at
= assignNew('V', mce
, Ity_I32
, unop(Iop_V128to32
, at
));
2645 at
= mkPCastTo(mce
, Ity_I32
, at
);
2646 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SetV128lo32
, vatomX
, at
));
2651 IRAtom
* unary32F0x4 ( MCEnv
* mce
, IRAtom
* vatomX
)
2654 tl_assert(isShadowAtom(mce
, vatomX
));
2655 at
= assignNew('V', mce
, Ity_I32
, unop(Iop_V128to32
, vatomX
));
2656 at
= mkPCastTo(mce
, Ity_I32
, at
);
2657 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SetV128lo32
, vatomX
, at
));
2661 /* --- ... and ... 64Fx2 versions of the same ... --- */
2664 IRAtom
* binary64Fx2 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2667 tl_assert(isShadowAtom(mce
, vatomX
));
2668 tl_assert(isShadowAtom(mce
, vatomY
));
2669 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2670 at
= assignNew('V', mce
, Ity_V128
, mkPCast64x2(mce
, at
));
2675 IRAtom
* unary64Fx2 ( MCEnv
* mce
, IRAtom
* vatomX
)
2678 tl_assert(isShadowAtom(mce
, vatomX
));
2679 at
= assignNew('V', mce
, Ity_V128
, mkPCast64x2(mce
, vatomX
));
2684 IRAtom
* binary64F0x2 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2687 tl_assert(isShadowAtom(mce
, vatomX
));
2688 tl_assert(isShadowAtom(mce
, vatomY
));
2689 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2690 at
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, at
));
2691 at
= mkPCastTo(mce
, Ity_I64
, at
);
2692 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SetV128lo64
, vatomX
, at
));
2697 IRAtom
* unary64F0x2 ( MCEnv
* mce
, IRAtom
* vatomX
)
2700 tl_assert(isShadowAtom(mce
, vatomX
));
2701 at
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, vatomX
));
2702 at
= mkPCastTo(mce
, Ity_I64
, at
);
2703 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SetV128lo64
, vatomX
, at
));
2707 /* --- --- ... and ... 16Fx8 versions of the same --- --- */
2710 IRAtom
* binary16Fx8 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2713 tl_assert(isShadowAtom(mce
, vatomX
));
2714 tl_assert(isShadowAtom(mce
, vatomY
));
2715 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2716 at
= assignNew('V', mce
, Ity_V128
, mkPCast16x8(mce
, at
));
2721 IRAtom
* unary16Fx8 ( MCEnv
* mce
, IRAtom
* vatomX
)
2724 tl_assert(isShadowAtom(mce
, vatomX
));
2725 at
= assignNew('V', mce
, Ity_V128
, mkPCast16x8(mce
, vatomX
));
2729 /* TODO: remaining versions of 16x4 FP ops when more of the half-precision IR is
2733 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
2736 IRAtom
* binary32Fx2 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2739 tl_assert(isShadowAtom(mce
, vatomX
));
2740 tl_assert(isShadowAtom(mce
, vatomY
));
2741 at
= mkUifU64(mce
, vatomX
, vatomY
);
2742 at
= assignNew('V', mce
, Ity_I64
, mkPCast32x2(mce
, at
));
2747 IRAtom
* unary32Fx2 ( MCEnv
* mce
, IRAtom
* vatomX
)
2750 tl_assert(isShadowAtom(mce
, vatomX
));
2751 at
= assignNew('V', mce
, Ity_I64
, mkPCast32x2(mce
, vatomX
));
2755 /* --- ... and ... 64Fx4 versions of the same ... --- */
2758 IRAtom
* binary64Fx4 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2761 tl_assert(isShadowAtom(mce
, vatomX
));
2762 tl_assert(isShadowAtom(mce
, vatomY
));
2763 at
= mkUifUV256(mce
, vatomX
, vatomY
);
2764 at
= assignNew('V', mce
, Ity_V256
, mkPCast64x4(mce
, at
));
2769 IRAtom
* unary64Fx4 ( MCEnv
* mce
, IRAtom
* vatomX
)
2772 tl_assert(isShadowAtom(mce
, vatomX
));
2773 at
= assignNew('V', mce
, Ity_V256
, mkPCast64x4(mce
, vatomX
));
2777 /* --- ... and ... 32Fx8 versions of the same ... --- */
2780 IRAtom
* binary32Fx8 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2783 tl_assert(isShadowAtom(mce
, vatomX
));
2784 tl_assert(isShadowAtom(mce
, vatomY
));
2785 at
= mkUifUV256(mce
, vatomX
, vatomY
);
2786 at
= assignNew('V', mce
, Ity_V256
, mkPCast32x8(mce
, at
));
2791 IRAtom
* unary32Fx8 ( MCEnv
* mce
, IRAtom
* vatomX
)
2794 tl_assert(isShadowAtom(mce
, vatomX
));
2795 at
= assignNew('V', mce
, Ity_V256
, mkPCast32x8(mce
, vatomX
));
2799 /* --- 64Fx2 binary FP ops, with rounding mode --- */
2802 IRAtom
* binary64Fx2_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2803 IRAtom
* vatomX
, IRAtom
* vatomY
)
2805 /* This is the same as binary64Fx2, except that we subsequently
2806 pessimise vRM (definedness of the rounding mode), widen to 128
2807 bits and UifU it into the result. As with the scalar cases, if
2808 the RM is a constant then it is defined and so this extra bit
2809 will get constant-folded out later. */
2810 // "do" the vector args
2811 IRAtom
* t1
= binary64Fx2(mce
, vatomX
, vatomY
);
2812 // PCast the RM, and widen it to 128 bits
2813 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2814 // Roll it into the result
2815 t1
= mkUifUV128(mce
, t1
, t2
);
2819 /* --- ... and ... 32Fx4 versions of the same --- */
2822 IRAtom
* binary32Fx4_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2823 IRAtom
* vatomX
, IRAtom
* vatomY
)
2825 IRAtom
* t1
= binary32Fx4(mce
, vatomX
, vatomY
);
2826 // PCast the RM, and widen it to 128 bits
2827 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2828 // Roll it into the result
2829 t1
= mkUifUV128(mce
, t1
, t2
);
2833 /* --- ... and ... 64Fx4 versions of the same --- */
2836 IRAtom
* binary64Fx4_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2837 IRAtom
* vatomX
, IRAtom
* vatomY
)
2839 IRAtom
* t1
= binary64Fx4(mce
, vatomX
, vatomY
);
2840 // PCast the RM, and widen it to 256 bits
2841 IRAtom
* t2
= mkPCastTo(mce
, Ity_V256
, vRM
);
2842 // Roll it into the result
2843 t1
= mkUifUV256(mce
, t1
, t2
);
2847 /* --- ... and ... 16Fx8 versions of the same --- */
2850 IRAtom
* binary16Fx8_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2851 IRAtom
* vatomX
, IRAtom
* vatomY
)
2853 IRAtom
* t1
= binary16Fx8(mce
, vatomX
, vatomY
);
2854 // PCast the RM, and widen it to 128 bits
2855 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2856 // Roll it into the result
2857 t1
= mkUifUV128(mce
, t1
, t2
);
2861 /* TODO: remaining versions of 16x4 FP ops when more of the half-precision IR is
2865 /* --- ... and ... 32Fx8 versions of the same --- */
2868 IRAtom
* binary32Fx8_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2869 IRAtom
* vatomX
, IRAtom
* vatomY
)
2871 IRAtom
* t1
= binary32Fx8(mce
, vatomX
, vatomY
);
2872 // PCast the RM, and widen it to 256 bits
2873 IRAtom
* t2
= mkPCastTo(mce
, Ity_V256
, vRM
);
2874 // Roll it into the result
2875 t1
= mkUifUV256(mce
, t1
, t2
);
2879 /* --- 64Fx2 unary FP ops, with rounding mode --- */
2882 IRAtom
* unary64Fx2_w_rm ( MCEnv
* mce
, IRAtom
* vRM
, IRAtom
* vatomX
)
2884 /* Same scheme as binary64Fx2_w_rm. */
2885 // "do" the vector arg
2886 IRAtom
* t1
= unary64Fx2(mce
, vatomX
);
2887 // PCast the RM, and widen it to 128 bits
2888 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2889 // Roll it into the result
2890 t1
= mkUifUV128(mce
, t1
, t2
);
2894 /* --- ... and ... 32Fx4 versions of the same --- */
2897 IRAtom
* unary32Fx4_w_rm ( MCEnv
* mce
, IRAtom
* vRM
, IRAtom
* vatomX
)
2899 /* Same scheme as binaryFx4_w_rm. */
2900 IRAtom
* t1
= unary32Fx4(mce
, vatomX
);
2901 // PCast the RM, and widen it to 128 bits
2902 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2903 // Roll it into the result
2904 t1
= mkUifUV128(mce
, t1
, t2
);
2908 /* --- ... and ... 16Fx8 versions of the same --- */
2911 IRAtom
* unary16Fx8_w_rm ( MCEnv
* mce
, IRAtom
* vRM
, IRAtom
* vatomX
)
2913 /* Same scheme as binaryFx4_w_rm. */
2914 IRAtom
* t1
= unary16Fx8(mce
, vatomX
);
2915 // PCast the RM, and widen it to 128 bits
2916 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2917 // Roll it into the result
2918 t1
= mkUifUV128(mce
, t1
, t2
);
2922 /* --- ... and ... 32Fx8 versions of the same --- */
2925 IRAtom
* unary32Fx8_w_rm ( MCEnv
* mce
, IRAtom
* vRM
, IRAtom
* vatomX
)
2927 /* Same scheme as unary32Fx8_w_rm. */
2928 IRAtom
* t1
= unary32Fx8(mce
, vatomX
);
2929 // PCast the RM, and widen it to 256 bits
2930 IRAtom
* t2
= mkPCastTo(mce
, Ity_V256
, vRM
);
2931 // Roll it into the result
2932 t1
= mkUifUV256(mce
, t1
, t2
);
2937 /* --- --- Vector saturated narrowing --- --- */
2939 /* We used to do something very clever here, but on closer inspection
2940 (2011-Jun-15), and in particular bug #279698, it turns out to be
2941 wrong. Part of the problem came from the fact that for a long
2942 time, the IR primops to do with saturated narrowing were
2943 underspecified and managed to confuse multiple cases which needed
2944 to be separate: the op names had a signedness qualifier, but in
2945 fact the source and destination signednesses needed to be specified
2946 independently, so the op names really need two independent
2947 signedness specifiers.
2949 As of 2011-Jun-15 (ish) the underspecification was sorted out
2950 properly. The incorrect instrumentation remained, though. That
2951 has now (2011-Oct-22) been fixed.
2953 What we now do is simple:
2955 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2956 number of lanes, X is the source lane width and signedness, and Y
2957 is the destination lane width and signedness. In all cases the
2958 destination lane width is half the source lane width, so the names
2959 have a bit of redundancy, but are at least easy to read.
2961 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2964 Let Vanilla(OP) be a function that takes OP, one of these
2965 saturating narrowing ops, and produces the same "shaped" narrowing
2966 op which is not saturating, but merely dumps the most significant
2967 bits. "same shape" means that the lane numbers and widths are the
2970 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2971 = Iop_NarrowBin32to16x8,
2972 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2973 dumping the top half of each lane.
2975 So, with that in place, the scheme is simple, and it is simple to
2976 pessimise each lane individually and then apply Vanilla(OP) so as
2977 to get the result in the right "shape". If the original OP is
2978 QNarrowBinXtoYxZ then we produce
2980 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2982 or for the case when OP is unary (Iop_QNarrowUn*)
2984 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2987 IROp
vanillaNarrowingOpOfShape ( IROp qnarrowOp
)
2989 switch (qnarrowOp
) {
2990 /* Binary: (128, 128) -> 128 */
2991 case Iop_QNarrowBin16Sto8Ux16
:
2992 case Iop_QNarrowBin16Sto8Sx16
:
2993 case Iop_QNarrowBin16Uto8Ux16
:
2994 case Iop_QNarrowBin64Sto32Sx4
:
2995 case Iop_QNarrowBin64Uto32Ux4
:
2996 return Iop_NarrowBin16to8x16
;
2997 case Iop_QNarrowBin32Sto16Ux8
:
2998 case Iop_QNarrowBin32Sto16Sx8
:
2999 case Iop_QNarrowBin32Uto16Ux8
:
3000 return Iop_NarrowBin32to16x8
;
3001 /* Binary: (64, 64) -> 64 */
3002 case Iop_QNarrowBin32Sto16Sx4
:
3003 return Iop_NarrowBin32to16x4
;
3004 case Iop_QNarrowBin16Sto8Ux8
:
3005 case Iop_QNarrowBin16Sto8Sx8
:
3006 return Iop_NarrowBin16to8x8
;
3007 /* Unary: 128 -> 64 */
3008 case Iop_QNarrowUn64Uto32Ux2
:
3009 case Iop_QNarrowUn64Sto32Sx2
:
3010 case Iop_QNarrowUn64Sto32Ux2
:
3011 return Iop_NarrowUn64to32x2
;
3012 case Iop_QNarrowUn32Uto16Ux4
:
3013 case Iop_QNarrowUn32Sto16Sx4
:
3014 case Iop_QNarrowUn32Sto16Ux4
:
3015 case Iop_F32toF16x4_DEP
:
3016 return Iop_NarrowUn32to16x4
;
3017 case Iop_QNarrowUn16Uto8Ux8
:
3018 case Iop_QNarrowUn16Sto8Sx8
:
3019 case Iop_QNarrowUn16Sto8Ux8
:
3020 return Iop_NarrowUn16to8x8
;
3023 VG_(tool_panic
)("vanillaNarrowOpOfShape");
3028 IRAtom
* vectorNarrowBinV128 ( MCEnv
* mce
, IROp narrow_op
,
3029 IRAtom
* vatom1
, IRAtom
* vatom2
)
3031 IRAtom
*at1
, *at2
, *at3
;
3032 IRAtom
* (*pcast
)( MCEnv
*, IRAtom
* );
3033 switch (narrow_op
) {
3034 case Iop_QNarrowBin64Sto32Sx4
: pcast
= mkPCast32x4
; break;
3035 case Iop_QNarrowBin64Uto32Ux4
: pcast
= mkPCast32x4
; break;
3036 case Iop_QNarrowBin32Sto16Sx8
: pcast
= mkPCast32x4
; break;
3037 case Iop_QNarrowBin32Uto16Ux8
: pcast
= mkPCast32x4
; break;
3038 case Iop_QNarrowBin32Sto16Ux8
: pcast
= mkPCast32x4
; break;
3039 case Iop_QNarrowBin16Sto8Sx16
: pcast
= mkPCast16x8
; break;
3040 case Iop_QNarrowBin16Uto8Ux16
: pcast
= mkPCast16x8
; break;
3041 case Iop_QNarrowBin16Sto8Ux16
: pcast
= mkPCast16x8
; break;
3042 default: VG_(tool_panic
)("vectorNarrowBinV128");
3044 IROp vanilla_narrow
= vanillaNarrowingOpOfShape(narrow_op
);
3045 tl_assert(isShadowAtom(mce
,vatom1
));
3046 tl_assert(isShadowAtom(mce
,vatom2
));
3047 at1
= assignNew('V', mce
, Ity_V128
, pcast(mce
, vatom1
));
3048 at2
= assignNew('V', mce
, Ity_V128
, pcast(mce
, vatom2
));
3049 at3
= assignNew('V', mce
, Ity_V128
, binop(vanilla_narrow
, at1
, at2
));
3054 IRAtom
* vectorNarrowBin64 ( MCEnv
* mce
, IROp narrow_op
,
3055 IRAtom
* vatom1
, IRAtom
* vatom2
)
3057 IRAtom
*at1
, *at2
, *at3
;
3058 IRAtom
* (*pcast
)( MCEnv
*, IRAtom
* );
3059 switch (narrow_op
) {
3060 case Iop_QNarrowBin32Sto16Sx4
: pcast
= mkPCast32x2
; break;
3061 case Iop_QNarrowBin16Sto8Sx8
: pcast
= mkPCast16x4
; break;
3062 case Iop_QNarrowBin16Sto8Ux8
: pcast
= mkPCast16x4
; break;
3063 default: VG_(tool_panic
)("vectorNarrowBin64");
3065 IROp vanilla_narrow
= vanillaNarrowingOpOfShape(narrow_op
);
3066 tl_assert(isShadowAtom(mce
,vatom1
));
3067 tl_assert(isShadowAtom(mce
,vatom2
));
3068 at1
= assignNew('V', mce
, Ity_I64
, pcast(mce
, vatom1
));
3069 at2
= assignNew('V', mce
, Ity_I64
, pcast(mce
, vatom2
));
3070 at3
= assignNew('V', mce
, Ity_I64
, binop(vanilla_narrow
, at1
, at2
));
3075 IRAtom
* vectorNarrowUnV128 ( MCEnv
* mce
, IROp narrow_op
,
3079 IRAtom
* (*pcast
)( MCEnv
*, IRAtom
* );
3080 tl_assert(isShadowAtom(mce
,vatom1
));
3081 /* For vanilla narrowing (non-saturating), we can just apply
3082 the op directly to the V bits. */
3083 switch (narrow_op
) {
3084 case Iop_NarrowUn16to8x8
:
3085 case Iop_NarrowUn32to16x4
:
3086 case Iop_NarrowUn64to32x2
:
3087 case Iop_F32toF16x4_DEP
:
3088 at1
= assignNew('V', mce
, Ity_I64
, unop(narrow_op
, vatom1
));
3091 break; /* Do Plan B */
3093 /* Plan B: for ops that involve a saturation operation on the args,
3094 we must PCast before the vanilla narrow. */
3095 switch (narrow_op
) {
3096 case Iop_QNarrowUn16Sto8Sx8
: pcast
= mkPCast16x8
; break;
3097 case Iop_QNarrowUn16Sto8Ux8
: pcast
= mkPCast16x8
; break;
3098 case Iop_QNarrowUn16Uto8Ux8
: pcast
= mkPCast16x8
; break;
3099 case Iop_QNarrowUn32Sto16Sx4
: pcast
= mkPCast32x4
; break;
3100 case Iop_QNarrowUn32Sto16Ux4
: pcast
= mkPCast32x4
; break;
3101 case Iop_QNarrowUn32Uto16Ux4
: pcast
= mkPCast32x4
; break;
3102 case Iop_QNarrowUn64Sto32Sx2
: pcast
= mkPCast64x2
; break;
3103 case Iop_QNarrowUn64Sto32Ux2
: pcast
= mkPCast64x2
; break;
3104 case Iop_QNarrowUn64Uto32Ux2
: pcast
= mkPCast64x2
; break;
3105 default: VG_(tool_panic
)("vectorNarrowUnV128");
3107 IROp vanilla_narrow
= vanillaNarrowingOpOfShape(narrow_op
);
3108 at1
= assignNew('V', mce
, Ity_V128
, pcast(mce
, vatom1
));
3109 at2
= assignNew('V', mce
, Ity_I64
, unop(vanilla_narrow
, at1
));
3114 IRAtom
* vectorWidenI64 ( MCEnv
* mce
, IROp longen_op
,
3118 IRAtom
* (*pcast
)( MCEnv
*, IRAtom
* );
3119 switch (longen_op
) {
3120 case Iop_Widen8Uto16x8
: pcast
= mkPCast16x8
; break;
3121 case Iop_Widen8Sto16x8
: pcast
= mkPCast16x8
; break;
3122 case Iop_Widen16Uto32x4
: pcast
= mkPCast32x4
; break;
3123 case Iop_Widen16Sto32x4
: pcast
= mkPCast32x4
; break;
3124 case Iop_Widen32Uto64x2
: pcast
= mkPCast64x2
; break;
3125 case Iop_Widen32Sto64x2
: pcast
= mkPCast64x2
; break;
3126 case Iop_F16toF32x4
: pcast
= mkPCast32x4
; break;
3127 default: VG_(tool_panic
)("vectorWidenI64");
3129 tl_assert(isShadowAtom(mce
,vatom1
));
3130 at1
= assignNew('V', mce
, Ity_V128
, unop(longen_op
, vatom1
));
3131 at2
= assignNew('V', mce
, Ity_V128
, pcast(mce
, at1
));
3136 /* --- --- Vector integer arithmetic --- --- */
3138 /* Simple ... UifU the args and per-lane pessimise the results. */
3140 /* --- V256-bit versions --- */
3143 IRAtom
* binary8Ix32 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3146 at
= mkUifUV256(mce
, vatom1
, vatom2
);
3147 at
= mkPCast8x32(mce
, at
);
3152 IRAtom
* binary16Ix16 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3155 at
= mkUifUV256(mce
, vatom1
, vatom2
);
3156 at
= mkPCast16x16(mce
, at
);
3161 IRAtom
* binary32Ix8 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3164 at
= mkUifUV256(mce
, vatom1
, vatom2
);
3165 at
= mkPCast32x8(mce
, at
);
3170 IRAtom
* binary64Ix4 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3173 at
= mkUifUV256(mce
, vatom1
, vatom2
);
3174 at
= mkPCast64x4(mce
, at
);
3178 /* --- V128-bit versions --- */
3181 IRAtom
* binary8Ix16 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3184 at
= mkUifUV128(mce
, vatom1
, vatom2
);
3185 at
= mkPCast8x16(mce
, at
);
3190 IRAtom
* binary16Ix8 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3193 at
= mkUifUV128(mce
, vatom1
, vatom2
);
3194 at
= mkPCast16x8(mce
, at
);
3199 IRAtom
* binary32Ix4 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3202 at
= mkUifUV128(mce
, vatom1
, vatom2
);
3203 at
= mkPCast32x4(mce
, at
);
3208 IRAtom
* binary64Ix2 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3211 at
= mkUifUV128(mce
, vatom1
, vatom2
);
3212 at
= mkPCast64x2(mce
, at
);
3217 IRAtom
* binary128Ix1 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3220 at
= mkUifUV128(mce
, vatom1
, vatom2
);
3221 at
= mkPCast128x1(mce
, at
);
3225 /* --- 64-bit versions --- */
3228 IRAtom
* binary8Ix8 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3231 at
= mkUifU64(mce
, vatom1
, vatom2
);
3232 at
= mkPCast8x8(mce
, at
);
3237 IRAtom
* binary16Ix4 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3240 at
= mkUifU64(mce
, vatom1
, vatom2
);
3241 at
= mkPCast16x4(mce
, at
);
3246 IRAtom
* binary32Ix2 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3249 at
= mkUifU64(mce
, vatom1
, vatom2
);
3250 at
= mkPCast32x2(mce
, at
);
3255 IRAtom
* binary64Ix1 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3258 at
= mkUifU64(mce
, vatom1
, vatom2
);
3259 at
= mkPCastTo(mce
, Ity_I64
, at
);
3263 /* --- 32-bit versions --- */
3266 IRAtom
* binary8Ix4 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3269 at
= mkUifU32(mce
, vatom1
, vatom2
);
3270 at
= mkPCast8x4(mce
, at
);
3275 IRAtom
* binary16Ix2 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3278 at
= mkUifU32(mce
, vatom1
, vatom2
);
3279 at
= mkPCast16x2(mce
, at
);
3284 /*------------------------------------------------------------*/
3285 /*--- Generate shadow values from all kinds of IRExprs. ---*/
3286 /*------------------------------------------------------------*/
3289 IRAtom
* expr2vbits_Qop ( MCEnv
* mce
,
3291 IRAtom
* atom1
, IRAtom
* atom2
,
3292 IRAtom
* atom3
, IRAtom
* atom4
)
3294 IRAtom
* vatom1
= expr2vbits( mce
, atom1
, HuOth
);
3295 IRAtom
* vatom2
= expr2vbits( mce
, atom2
, HuOth
);
3296 IRAtom
* vatom3
= expr2vbits( mce
, atom3
, HuOth
);
3297 IRAtom
* vatom4
= expr2vbits( mce
, atom4
, HuOth
);
3299 tl_assert(isOriginalAtom(mce
,atom1
));
3300 tl_assert(isOriginalAtom(mce
,atom2
));
3301 tl_assert(isOriginalAtom(mce
,atom3
));
3302 tl_assert(isOriginalAtom(mce
,atom4
));
3303 tl_assert(isShadowAtom(mce
,vatom1
));
3304 tl_assert(isShadowAtom(mce
,vatom2
));
3305 tl_assert(isShadowAtom(mce
,vatom3
));
3306 tl_assert(isShadowAtom(mce
,vatom4
));
3307 tl_assert(sameKindedAtoms(atom1
,vatom1
));
3308 tl_assert(sameKindedAtoms(atom2
,vatom2
));
3309 tl_assert(sameKindedAtoms(atom3
,vatom3
));
3310 tl_assert(sameKindedAtoms(atom4
,vatom4
));
3313 case Iop_MAddF64r32
:
3315 case Iop_MSubF64r32
:
3316 /* I32(rm) x F64 x F64 x F64 -> F64 */
3317 return mkLazy4(mce
, Ity_I64
, vatom1
, vatom2
, vatom3
, vatom4
);
3321 /* I32(rm) x F32 x F32 x F32 -> F32 */
3322 return mkLazy4(mce
, Ity_I32
, vatom1
, vatom2
, vatom3
, vatom4
);
3326 case Iop_NegMAddF128
:
3327 case Iop_NegMSubF128
:
3328 /* I32(rm) x F128 x F128 x F128 -> F128 */
3329 return mkLazy4(mce
, Ity_I128
, vatom1
, vatom2
, vatom3
, vatom4
);
3331 /* V256-bit data-steering */
3332 case Iop_64x4toV256
:
3333 return assignNew('V', mce
, Ity_V256
,
3334 IRExpr_Qop(op
, vatom1
, vatom2
, vatom3
, vatom4
));
3336 /* I32/I64 x I8 x I8 x I8 -> I32/I64 */
3338 return mkLazy4(mce
, Ity_I32
, vatom1
, vatom2
, vatom3
, vatom4
);
3340 return mkLazy4(mce
, Ity_I64
, vatom1
, vatom2
, vatom3
, vatom4
);
3343 VG_(tool_panic
)("memcheck:expr2vbits_Qop");
3349 IRAtom
* expr2vbits_Triop ( MCEnv
* mce
,
3351 IRAtom
* atom1
, IRAtom
* atom2
, IRAtom
* atom3
)
3353 IRAtom
* vatom1
= expr2vbits( mce
, atom1
, HuOth
);
3354 IRAtom
* vatom2
= expr2vbits( mce
, atom2
, HuOth
);
3355 IRAtom
* vatom3
= expr2vbits( mce
, atom3
, HuOth
);
3357 tl_assert(isOriginalAtom(mce
,atom1
));
3358 tl_assert(isOriginalAtom(mce
,atom2
));
3359 tl_assert(isOriginalAtom(mce
,atom3
));
3360 tl_assert(isShadowAtom(mce
,vatom1
));
3361 tl_assert(isShadowAtom(mce
,vatom2
));
3362 tl_assert(isShadowAtom(mce
,vatom3
));
3363 tl_assert(sameKindedAtoms(atom1
,vatom1
));
3364 tl_assert(sameKindedAtoms(atom2
,vatom2
));
3365 tl_assert(sameKindedAtoms(atom3
,vatom3
));
3375 case Iop_QuantizeD128
:
3376 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
3377 return mkLazy3(mce
, Ity_I128
, vatom1
, vatom2
, vatom3
);
3396 case Iop_QuantizeD64
:
3397 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
3398 return mkLazy3(mce
, Ity_I64
, vatom1
, vatom2
, vatom3
);
3399 case Iop_PRemC3210F64
:
3400 case Iop_PRem1C3210F64
:
3401 /* I32(rm) x F64 x F64 -> I32 */
3402 return mkLazy3(mce
, Ity_I32
, vatom1
, vatom2
, vatom3
);
3407 /* I32(rm) x F32 x F32 -> I32 */
3408 return mkLazy3(mce
, Ity_I32
, vatom1
, vatom2
, vatom3
);
3411 /* I32(rm) x F16 x F16 -> I16 */
3412 return mkLazy3(mce
, Ity_I16
, vatom1
, vatom2
, vatom3
);
3413 case Iop_SignificanceRoundD64
:
3414 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
3415 return mkLazy3(mce
, Ity_I64
, vatom1
, vatom2
, vatom3
);
3416 case Iop_SignificanceRoundD128
:
3417 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
3418 return mkLazy3(mce
, Ity_I128
, vatom1
, vatom2
, vatom3
);
3420 /* (V128, V128, I8) -> V128 */
3421 complainIfUndefined(mce
, atom3
, NULL
);
3422 return assignNew('V', mce
, Ity_V128
, triop(op
, vatom1
, vatom2
, atom3
));
3424 /* (I64, I64, I8) -> I64 */
3425 complainIfUndefined(mce
, atom3
, NULL
);
3426 return assignNew('V', mce
, Ity_I64
, triop(op
, vatom1
, vatom2
, atom3
));
3427 case Iop_SetElem8x8
:
3428 case Iop_SetElem16x4
:
3429 case Iop_SetElem32x2
:
3430 complainIfUndefined(mce
, atom2
, NULL
);
3431 return assignNew('V', mce
, Ity_I64
, triop(op
, vatom1
, atom2
, vatom3
));
3433 case Iop_SetElem8x16
:
3434 case Iop_SetElem16x8
:
3435 case Iop_SetElem32x4
:
3436 case Iop_SetElem64x2
:
3437 complainIfUndefined(mce
, atom2
, NULL
);
3438 return assignNew('V', mce
, Ity_V128
, triop(op
, vatom1
, atom2
, vatom3
));
3440 /* Int 128-bit Integer three arg */
3441 case Iop_2xMultU64Add128CarryOut
:
3442 case Iop_Perm8x16x2
:
3443 /* (V128, V128, V128) -> V128 */
3444 complainIfUndefined(mce
, atom3
, NULL
);
3447 assignNew('V', mce
, Ity_V128
, triop(op
, vatom1
, vatom2
, atom3
)),
3448 mkPCast8x16(mce
, vatom3
)
3451 /* Vector FP with rounding mode as the first arg */
3456 case Iop_Scale2_64Fx2
:
3457 return binary64Fx2_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3463 case Iop_Scale2_32Fx4
:
3464 return binary32Fx4_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3470 return binary64Fx4_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3472 /* TODO: remaining versions of 16x4 FP ops when more of the half-precision
3477 return binary16Fx8_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3483 return binary32Fx8_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3485 case Iop_F32x4_2toQ16x8
:
3486 return assignNew('V', mce
, Ity_V128
,
3487 binop(Iop_PackEvenLanes16x8
,
3488 unary32Fx4_w_rm(mce
, vatom1
, vatom2
),
3489 unary32Fx4_w_rm(mce
, vatom1
, vatom3
)));
3490 case Iop_F64x2_2toQ32x4
:
3491 return assignNew('V', mce
, Ity_V128
,
3492 binop(Iop_PackEvenLanes32x4
,
3493 unary64Fx2_w_rm(mce
, vatom1
, vatom2
),
3494 unary64Fx2_w_rm(mce
, vatom1
, vatom3
)));
3498 VG_(tool_panic
)("memcheck:expr2vbits_Triop");
3504 IRAtom
* expr2vbits_Binop ( MCEnv
* mce
,
3506 IRAtom
* atom1
, IRAtom
* atom2
,
3507 HowUsed hu
/*use HuOth if unknown*/ )
3509 IRType and_or_ty
= Ity_INVALID
;
3510 IRAtom
* (*uifu
) (MCEnv
*, IRAtom
*, IRAtom
*) = NULL
;
3511 IRAtom
* (*difd
) (MCEnv
*, IRAtom
*, IRAtom
*) = NULL
;
3512 IRAtom
* (*improve
) (MCEnv
*, IRAtom
*, IRAtom
*) = NULL
;
3514 IRAtom
* vatom1
= expr2vbits( mce
, atom1
, HuOth
);
3515 IRAtom
* vatom2
= expr2vbits( mce
, atom2
, HuOth
);
3517 tl_assert(isOriginalAtom(mce
,atom1
));
3518 tl_assert(isOriginalAtom(mce
,atom2
));
3519 tl_assert(isShadowAtom(mce
,vatom1
));
3520 tl_assert(isShadowAtom(mce
,vatom2
));
3521 tl_assert(sameKindedAtoms(atom1
,vatom1
));
3522 tl_assert(sameKindedAtoms(atom2
,vatom2
));
3537 return binary16Ix2(mce
, vatom1
, vatom2
);
3549 return binary8Ix4(mce
, vatom1
, vatom2
);
3562 /* Same scheme as with all other shifts. */
3563 complainIfUndefined(mce
, atom2
, NULL
);
3564 return assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
));
3566 case Iop_QNarrowBin32Sto16Sx4
:
3567 case Iop_QNarrowBin16Sto8Sx8
:
3568 case Iop_QNarrowBin16Sto8Ux8
:
3569 return vectorNarrowBin64(mce
, op
, vatom1
, vatom2
);
3588 case Iop_PolynomialMul8x8
:
3589 return binary8Ix8(mce
, vatom1
, vatom2
);
3600 case Iop_MulHi16Sx4
:
3601 case Iop_MulHi16Ux4
:
3602 case Iop_CmpGT16Sx4
:
3603 case Iop_CmpGT16Ux4
:
3610 case Iop_QDMulHi16Sx4
:
3611 case Iop_QRDMulHi16Sx4
:
3612 return binary16Ix4(mce
, vatom1
, vatom2
);
3620 case Iop_CmpGT32Sx2
:
3621 case Iop_CmpGT32Ux2
:
3630 case Iop_QDMulHi32Sx2
:
3631 case Iop_QRDMulHi32Sx2
:
3632 return binary32Ix2(mce
, vatom1
, vatom2
);
3641 return binary64Ix1(mce
, vatom1
, vatom2
);
3643 case Iop_QShlNsatSU8x8
:
3644 case Iop_QShlNsatUU8x8
:
3645 case Iop_QShlNsatSS8x8
:
3646 complainIfUndefined(mce
, atom2
, NULL
);
3647 return mkPCast8x8(mce
, vatom1
);
3649 case Iop_QShlNsatSU16x4
:
3650 case Iop_QShlNsatUU16x4
:
3651 case Iop_QShlNsatSS16x4
:
3652 complainIfUndefined(mce
, atom2
, NULL
);
3653 return mkPCast16x4(mce
, vatom1
);
3655 case Iop_QShlNsatSU32x2
:
3656 case Iop_QShlNsatUU32x2
:
3657 case Iop_QShlNsatSS32x2
:
3658 complainIfUndefined(mce
, atom2
, NULL
);
3659 return mkPCast32x2(mce
, vatom1
);
3661 case Iop_QShlNsatSU64x1
:
3662 case Iop_QShlNsatUU64x1
:
3663 case Iop_QShlNsatSS64x1
:
3664 complainIfUndefined(mce
, atom2
, NULL
);
3665 return mkPCast32x2(mce
, vatom1
);
3667 case Iop_PwMax32Sx2
:
3668 case Iop_PwMax32Ux2
:
3669 case Iop_PwMin32Sx2
:
3670 case Iop_PwMin32Ux2
:
3671 case Iop_PwMax32Fx2
:
3672 case Iop_PwMin32Fx2
:
3673 return assignNew('V', mce
, Ity_I64
,
3674 binop(Iop_PwMax32Ux2
,
3675 mkPCast32x2(mce
, vatom1
),
3676 mkPCast32x2(mce
, vatom2
)));
3678 case Iop_PwMax16Sx4
:
3679 case Iop_PwMax16Ux4
:
3680 case Iop_PwMin16Sx4
:
3681 case Iop_PwMin16Ux4
:
3682 return assignNew('V', mce
, Ity_I64
,
3683 binop(Iop_PwMax16Ux4
,
3684 mkPCast16x4(mce
, vatom1
),
3685 mkPCast16x4(mce
, vatom2
)));
3691 return assignNew('V', mce
, Ity_I64
,
3692 binop(Iop_PwMax8Ux8
,
3693 mkPCast8x8(mce
, vatom1
),
3694 mkPCast8x8(mce
, vatom2
)));
3697 case Iop_PwAdd32Fx2
:
3698 return mkPCast32x2(mce
,
3699 assignNew('V', mce
, Ity_I64
,
3700 binop(Iop_PwAdd32x2
,
3701 mkPCast32x2(mce
, vatom1
),
3702 mkPCast32x2(mce
, vatom2
))));
3705 return mkPCast16x4(mce
,
3706 assignNew('V', mce
, Ity_I64
,
3707 binop(op
, mkPCast16x4(mce
, vatom1
),
3708 mkPCast16x4(mce
, vatom2
))));
3711 return mkPCast8x8(mce
,
3712 assignNew('V', mce
, Ity_I64
,
3713 binop(op
, mkPCast8x8(mce
, vatom1
),
3714 mkPCast8x8(mce
, vatom2
))));
3720 return mkUifU64(mce
,
3721 assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
)),
3722 mkPCast8x8(mce
,vatom2
)
3729 return mkUifU64(mce
,
3730 assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
)),
3731 mkPCast16x4(mce
,vatom2
)
3738 return mkUifU64(mce
,
3739 assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
)),
3740 mkPCast32x2(mce
,vatom2
)
3743 /* 64-bit data-steering */
3744 case Iop_InterleaveLO32x2
:
3745 case Iop_InterleaveLO16x4
:
3746 case Iop_InterleaveLO8x8
:
3747 case Iop_InterleaveHI32x2
:
3748 case Iop_InterleaveHI16x4
:
3749 case Iop_InterleaveHI8x8
:
3750 case Iop_CatOddLanes8x8
:
3751 case Iop_CatEvenLanes8x8
:
3752 case Iop_CatOddLanes16x4
:
3753 case Iop_CatEvenLanes16x4
:
3754 case Iop_InterleaveOddLanes8x8
:
3755 case Iop_InterleaveEvenLanes8x8
:
3756 case Iop_InterleaveOddLanes16x4
:
3757 case Iop_InterleaveEvenLanes16x4
:
3758 return assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, vatom2
));
3760 case Iop_GetElem8x8
:
3761 complainIfUndefined(mce
, atom2
, NULL
);
3762 return assignNew('V', mce
, Ity_I8
, binop(op
, vatom1
, atom2
));
3763 case Iop_GetElem16x4
:
3764 complainIfUndefined(mce
, atom2
, NULL
);
3765 return assignNew('V', mce
, Ity_I16
, binop(op
, vatom1
, atom2
));
3766 case Iop_GetElem32x2
:
3767 complainIfUndefined(mce
, atom2
, NULL
);
3768 return assignNew('V', mce
, Ity_I32
, binop(op
, vatom1
, atom2
));
3770 /* Perm8x8: rearrange values in left arg using steering values from
3771 right arg. So rearrange the vbits in the same way but pessimise wrt
3772 steering values. We assume that unused bits in the steering value
3773 are defined zeros, so we can safely PCast within each lane of the the
3774 steering value without having to take precautions to avoid a
3775 dependency on those unused bits.
3777 This is also correct for PermOrZero8x8, but it is a bit subtle. For
3778 each lane, if bit 7 of the steering value is zero, then we'll steer
3779 the shadow value exactly as per Perm8x8. If that bit is one, then
3780 the operation will set the resulting (concrete) value to zero. That
3781 means it is defined, and should have a shadow value of zero. Hence
3782 in both cases (bit 7 is 0 or 1) we can self-shadow (in the same way
3783 as Perm8x8) and then pessimise against the steering values. */
3785 case Iop_PermOrZero8x8
:
3788 assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
)),
3789 mkPCast8x8(mce
, vatom2
)
3794 case Iop_I32StoF32x4
:
3795 case Iop_F32toI32Sx4
:
3797 return unary16Fx8_w_rm(mce
, vatom1
, vatom2
);
3799 return unary32Fx4_w_rm(mce
, vatom1
, vatom2
);
3801 return unary64Fx2_w_rm(mce
, vatom1
, vatom2
);
3815 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3816 this is wrong now, scalar shifts are done properly lazily.
3817 Vector shifts should be fixed too. */
3818 complainIfUndefined(mce
, atom2
, NULL
);
3819 return assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
));
3821 /* V x V shifts/rotates are done using the standard lazy scheme. */
3822 /* For the non-rounding variants of bi-di vector x vector
3823 shifts (the Iop_Sh.. ops, that is) we use the lazy scheme.
3824 But note that this is overly pessimistic, because in fact only
3825 the bottom 8 bits of each lane of the second argument are taken
3826 into account when shifting. So really we ought to ignore
3827 undefinedness in bits 8 and above of each lane in the
3836 return mkUifUV128(mce
,
3837 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3838 mkPCast8x16(mce
,vatom2
)
3848 return mkUifUV128(mce
,
3849 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3850 mkPCast16x8(mce
,vatom2
)
3860 return mkUifUV128(mce
,
3861 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3862 mkPCast32x4(mce
,vatom2
)
3872 return mkUifUV128(mce
,
3873 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3874 mkPCast64x2(mce
,vatom2
)
3877 /* For the rounding variants of bi-di vector x vector shifts, the
3878 rounding adjustment can cause undefinedness to propagate through
3879 the entire lane, in the worst case. Too complex to handle
3880 properly .. just UifU the arguments and then PCast them.
3881 Suboptimal but safe. */
3884 return binary8Ix16(mce
, vatom1
, vatom2
);
3887 return binary16Ix8(mce
, vatom1
, vatom2
);
3890 return binary32Ix4(mce
, vatom1
, vatom2
);
3893 return binary64Ix2(mce
, vatom1
, vatom2
);
3895 case Iop_F32ToFixed32Ux4_RZ
:
3896 case Iop_F32ToFixed32Sx4_RZ
:
3897 case Iop_Fixed32UToF32x4_RN
:
3898 case Iop_Fixed32SToF32x4_RN
:
3899 complainIfUndefined(mce
, atom2
, NULL
);
3900 return mkPCast32x4(mce
, vatom1
);
3902 case Iop_F32ToFixed32Ux2_RZ
:
3903 case Iop_F32ToFixed32Sx2_RZ
:
3904 case Iop_Fixed32UToF32x2_RN
:
3905 case Iop_Fixed32SToF32x2_RN
:
3906 complainIfUndefined(mce
, atom2
, NULL
);
3907 return mkPCast32x2(mce
, vatom1
);
3916 case Iop_CmpGT8Sx16
:
3917 case Iop_CmpGT8Ux16
:
3923 case Iop_QAddExtUSsatSS8x16
:
3924 case Iop_QAddExtSUsatUU8x16
:
3929 case Iop_MulHi8Sx16
:
3930 case Iop_MulHi8Ux16
:
3931 case Iop_PolynomialMul8x16
:
3932 case Iop_PolynomialMulAdd8x16
:
3933 return binary8Ix16(mce
, vatom1
, vatom2
);
3939 case Iop_MulHi16Sx8
:
3940 case Iop_MulHi16Ux8
:
3945 case Iop_CmpGT16Sx8
:
3946 case Iop_CmpGT16Ux8
:
3952 case Iop_QAddExtUSsatSS16x8
:
3953 case Iop_QAddExtSUsatUU16x8
:
3957 case Iop_QDMulHi16Sx8
:
3958 case Iop_QRDMulHi16Sx8
:
3959 case Iop_PolynomialMulAdd16x8
:
3960 /* PwExtUSMulQAdd8x16 is a bit subtle. The effect of it is that each
3961 16-bit chunk of the output is formed from corresponding 16-bit chunks
3962 of the input args, so we can treat it like an other binary 16x8
3963 operation. That's despite it having '8x16' in its name. */
3964 case Iop_PwExtUSMulQAdd8x16
:
3965 return binary16Ix8(mce
, vatom1
, vatom2
);
3968 case Iop_CmpGT32Sx4
:
3969 case Iop_CmpGT32Ux4
:
3975 case Iop_QAddExtUSsatSS32x4
:
3976 case Iop_QAddExtSUsatUU32x4
:
3987 case Iop_MulHi32Sx4
:
3988 case Iop_MulHi32Ux4
:
3989 case Iop_QDMulHi32Sx4
:
3990 case Iop_QRDMulHi32Sx4
:
3991 case Iop_PolynomialMulAdd32x4
:
3992 return binary32Ix4(mce
, vatom1
, vatom2
);
4003 case Iop_CmpGT64Sx2
:
4004 case Iop_CmpGT64Ux2
:
4011 case Iop_QAddExtUSsatSS64x2
:
4012 case Iop_QAddExtSUsatUU64x2
:
4013 case Iop_PolynomialMulAdd64x2
:
4014 case Iop_CipherV128
:
4015 case Iop_CipherLV128
:
4016 case Iop_NCipherV128
:
4017 case Iop_NCipherLV128
:
4018 case Iop_MulI128by10E
:
4019 case Iop_MulI128by10ECarry
:
4020 return binary64Ix2(mce
, vatom1
, vatom2
);
4024 case Iop_CmpNEZ128x1
:
4025 return binary128Ix1(mce
, vatom1
, vatom2
);
4033 /* I128 x I128 -> I128 */
4034 return mkLazy2(mce
, Ity_V128
, vatom1
, vatom2
);
4036 case Iop_QNarrowBin64Sto32Sx4
:
4037 case Iop_QNarrowBin64Uto32Ux4
:
4038 case Iop_QNarrowBin32Sto16Sx8
:
4039 case Iop_QNarrowBin32Uto16Ux8
:
4040 case Iop_QNarrowBin32Sto16Ux8
:
4041 case Iop_QNarrowBin16Sto8Sx16
:
4042 case Iop_QNarrowBin16Uto8Ux16
:
4043 case Iop_QNarrowBin16Sto8Ux16
:
4044 return vectorNarrowBinV128(mce
, op
, vatom1
, vatom2
);
4048 case Iop_CmpLT64Fx2
:
4049 case Iop_CmpLE64Fx2
:
4050 case Iop_CmpEQ64Fx2
:
4051 case Iop_CmpUN64Fx2
:
4052 case Iop_RecipStep64Fx2
:
4053 case Iop_RSqrtStep64Fx2
:
4054 return binary64Fx2(mce
, vatom1
, vatom2
);
4056 case Iop_CmpLT16Fx8
:
4057 case Iop_CmpLE16Fx8
:
4058 case Iop_CmpEQ16Fx8
:
4059 return binary16Fx8(mce
, vatom1
, vatom2
);
4066 case Iop_CmpLT64F0x2
:
4067 case Iop_CmpLE64F0x2
:
4068 case Iop_CmpEQ64F0x2
:
4069 case Iop_CmpUN64F0x2
:
4071 return binary64F0x2(mce
, vatom1
, vatom2
);
4075 case Iop_CmpLT32Fx4
:
4076 case Iop_CmpLE32Fx4
:
4077 case Iop_CmpEQ32Fx4
:
4078 case Iop_CmpUN32Fx4
:
4079 case Iop_CmpGT32Fx4
:
4080 case Iop_CmpGE32Fx4
:
4081 case Iop_RecipStep32Fx4
:
4082 case Iop_RSqrtStep32Fx4
:
4083 return binary32Fx4(mce
, vatom1
, vatom2
);
4089 case Iop_CmpEQ32Fx2
:
4090 case Iop_CmpGT32Fx2
:
4091 case Iop_CmpGE32Fx2
:
4093 case Iop_RecipStep32Fx2
:
4094 case Iop_RSqrtStep32Fx2
:
4095 return binary32Fx2(mce
, vatom1
, vatom2
);
4102 case Iop_CmpLT32F0x4
:
4103 case Iop_CmpLE32F0x4
:
4104 case Iop_CmpEQ32F0x4
:
4105 case Iop_CmpUN32F0x4
:
4107 return binary32F0x4(mce
, vatom1
, vatom2
);
4109 case Iop_QShlNsatSU8x16
:
4110 case Iop_QShlNsatUU8x16
:
4111 case Iop_QShlNsatSS8x16
:
4112 complainIfUndefined(mce
, atom2
, NULL
);
4113 return mkPCast8x16(mce
, vatom1
);
4115 case Iop_QShlNsatSU16x8
:
4116 case Iop_QShlNsatUU16x8
:
4117 case Iop_QShlNsatSS16x8
:
4118 complainIfUndefined(mce
, atom2
, NULL
);
4119 return mkPCast16x8(mce
, vatom1
);
4121 case Iop_QShlNsatSU32x4
:
4122 case Iop_QShlNsatUU32x4
:
4123 case Iop_QShlNsatSS32x4
:
4124 complainIfUndefined(mce
, atom2
, NULL
);
4125 return mkPCast32x4(mce
, vatom1
);
4127 case Iop_QShlNsatSU64x2
:
4128 case Iop_QShlNsatUU64x2
:
4129 case Iop_QShlNsatSS64x2
:
4130 complainIfUndefined(mce
, atom2
, NULL
);
4131 return mkPCast32x4(mce
, vatom1
);
4133 /* Q-and-Qshift-by-imm-and-narrow of the form (V128, I8) -> V128.
4134 To make this simpler, do the following:
4135 * complain if the shift amount (the I8) is undefined
4136 * pcast each lane at the wide width
4137 * truncate each lane to half width
4138 * pcast the resulting 64-bit value to a single bit and use
4139 that as the least significant bit of the upper half of the
4141 case Iop_QandQShrNnarrow64Uto32Ux2
:
4142 case Iop_QandQSarNnarrow64Sto32Sx2
:
4143 case Iop_QandQSarNnarrow64Sto32Ux2
:
4144 case Iop_QandQRShrNnarrow64Uto32Ux2
:
4145 case Iop_QandQRSarNnarrow64Sto32Sx2
:
4146 case Iop_QandQRSarNnarrow64Sto32Ux2
:
4147 case Iop_QandQShrNnarrow32Uto16Ux4
:
4148 case Iop_QandQSarNnarrow32Sto16Sx4
:
4149 case Iop_QandQSarNnarrow32Sto16Ux4
:
4150 case Iop_QandQRShrNnarrow32Uto16Ux4
:
4151 case Iop_QandQRSarNnarrow32Sto16Sx4
:
4152 case Iop_QandQRSarNnarrow32Sto16Ux4
:
4153 case Iop_QandQShrNnarrow16Uto8Ux8
:
4154 case Iop_QandQSarNnarrow16Sto8Sx8
:
4155 case Iop_QandQSarNnarrow16Sto8Ux8
:
4156 case Iop_QandQRShrNnarrow16Uto8Ux8
:
4157 case Iop_QandQRSarNnarrow16Sto8Sx8
:
4158 case Iop_QandQRSarNnarrow16Sto8Ux8
:
4160 IRAtom
* (*fnPessim
) (MCEnv
*, IRAtom
*) = NULL
;
4161 IROp opNarrow
= Iop_INVALID
;
4163 case Iop_QandQShrNnarrow64Uto32Ux2
:
4164 case Iop_QandQSarNnarrow64Sto32Sx2
:
4165 case Iop_QandQSarNnarrow64Sto32Ux2
:
4166 case Iop_QandQRShrNnarrow64Uto32Ux2
:
4167 case Iop_QandQRSarNnarrow64Sto32Sx2
:
4168 case Iop_QandQRSarNnarrow64Sto32Ux2
:
4169 fnPessim
= mkPCast64x2
;
4170 opNarrow
= Iop_NarrowUn64to32x2
;
4172 case Iop_QandQShrNnarrow32Uto16Ux4
:
4173 case Iop_QandQSarNnarrow32Sto16Sx4
:
4174 case Iop_QandQSarNnarrow32Sto16Ux4
:
4175 case Iop_QandQRShrNnarrow32Uto16Ux4
:
4176 case Iop_QandQRSarNnarrow32Sto16Sx4
:
4177 case Iop_QandQRSarNnarrow32Sto16Ux4
:
4178 fnPessim
= mkPCast32x4
;
4179 opNarrow
= Iop_NarrowUn32to16x4
;
4181 case Iop_QandQShrNnarrow16Uto8Ux8
:
4182 case Iop_QandQSarNnarrow16Sto8Sx8
:
4183 case Iop_QandQSarNnarrow16Sto8Ux8
:
4184 case Iop_QandQRShrNnarrow16Uto8Ux8
:
4185 case Iop_QandQRSarNnarrow16Sto8Sx8
:
4186 case Iop_QandQRSarNnarrow16Sto8Ux8
:
4187 fnPessim
= mkPCast16x8
;
4188 opNarrow
= Iop_NarrowUn16to8x8
;
4193 complainIfUndefined(mce
, atom2
, NULL
);
4194 // Pessimised shift result
4196 = fnPessim(mce
, vatom1
);
4197 // Narrowed, pessimised shift result
4199 = assignNew('V', mce
, Ity_I64
, unop(opNarrow
, shV
));
4200 // Generates: Def--(63)--Def PCast-to-I1(narrowed)
4201 IRAtom
* qV
= mkPCastXXtoXXlsb(mce
, shVnarrowed
, Ity_I64
);
4202 // and assemble the result
4203 return assignNew('V', mce
, Ity_V128
,
4204 binop(Iop_64HLtoV128
, qV
, shVnarrowed
));
4209 case Iop_QDMull32Sx2
:
4210 return vectorWidenI64(mce
, Iop_Widen32Sto64x2
,
4211 mkUifU64(mce
, vatom1
, vatom2
));
4215 case Iop_QDMull16Sx4
:
4216 return vectorWidenI64(mce
, Iop_Widen16Sto32x4
,
4217 mkUifU64(mce
, vatom1
, vatom2
));
4221 case Iop_PolynomialMull8x8
:
4222 return vectorWidenI64(mce
, Iop_Widen8Sto16x8
,
4223 mkUifU64(mce
, vatom1
, vatom2
));
4226 return mkPCast32x4(mce
,
4227 assignNew('V', mce
, Ity_V128
, binop(op
, mkPCast32x4(mce
, vatom1
),
4228 mkPCast32x4(mce
, vatom2
))));
4231 return mkPCast16x8(mce
,
4232 assignNew('V', mce
, Ity_V128
, binop(op
, mkPCast16x8(mce
, vatom1
),
4233 mkPCast16x8(mce
, vatom2
))));
4236 return mkPCast8x16(mce
,
4237 assignNew('V', mce
, Ity_V128
, binop(op
, mkPCast8x16(mce
, vatom1
),
4238 mkPCast8x16(mce
, vatom2
))));
4240 /* V128-bit data-steering */
4241 case Iop_SetV128lo32
:
4242 case Iop_SetV128lo64
:
4243 case Iop_64HLtoV128
:
4244 case Iop_InterleaveLO64x2
:
4245 case Iop_InterleaveLO32x4
:
4246 case Iop_InterleaveLO16x8
:
4247 case Iop_InterleaveLO8x16
:
4248 case Iop_InterleaveHI64x2
:
4249 case Iop_InterleaveHI32x4
:
4250 case Iop_InterleaveHI16x8
:
4251 case Iop_InterleaveHI8x16
:
4252 case Iop_CatOddLanes8x16
:
4253 case Iop_CatOddLanes16x8
:
4254 case Iop_CatOddLanes32x4
:
4255 case Iop_CatEvenLanes8x16
:
4256 case Iop_CatEvenLanes16x8
:
4257 case Iop_CatEvenLanes32x4
:
4258 case Iop_InterleaveOddLanes8x16
:
4259 case Iop_InterleaveOddLanes16x8
:
4260 case Iop_InterleaveOddLanes32x4
:
4261 case Iop_InterleaveEvenLanes8x16
:
4262 case Iop_InterleaveEvenLanes16x8
:
4263 case Iop_InterleaveEvenLanes32x4
:
4264 case Iop_PackOddLanes8x16
:
4265 case Iop_PackOddLanes16x8
:
4266 case Iop_PackOddLanes32x4
:
4267 case Iop_PackEvenLanes8x16
:
4268 case Iop_PackEvenLanes16x8
:
4269 case Iop_PackEvenLanes32x4
:
4270 return assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, vatom2
));
4272 case Iop_GetElem8x16
:
4273 complainIfUndefined(mce
, atom2
, NULL
);
4274 return assignNew('V', mce
, Ity_I8
, binop(op
, vatom1
, atom2
));
4275 case Iop_GetElem16x8
:
4276 complainIfUndefined(mce
, atom2
, NULL
);
4277 return assignNew('V', mce
, Ity_I16
, binop(op
, vatom1
, atom2
));
4278 case Iop_GetElem32x4
:
4279 complainIfUndefined(mce
, atom2
, NULL
);
4280 return assignNew('V', mce
, Ity_I32
, binop(op
, vatom1
, atom2
));
4281 case Iop_GetElem64x2
:
4282 complainIfUndefined(mce
, atom2
, NULL
);
4283 return assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
));
4285 /* Perm8x16: rearrange values in left arg using steering values
4286 from right arg. So rearrange the vbits in the same way but
4287 pessimise wrt steering values. Perm32x4 ditto. */
4288 /* PermOrZero8x16: see comments above for PermOrZero8x8. */
4290 case Iop_PermOrZero8x16
:
4293 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
4294 mkPCast8x16(mce
, vatom2
)
4299 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
4300 mkPCast32x4(mce
, vatom2
)
4303 /* These two take the lower half of each 16-bit lane, sign/zero
4304 extend it to 32, and multiply together, producing a 32x4
4305 result (and implicitly ignoring half the operand bits). So
4306 treat it as a bunch of independent 16x8 operations, but then
4307 do 32-bit shifts left-right to copy the lower half results
4308 (which are all 0s or all 1s due to PCasting in binary16Ix8)
4309 into the upper half of each result lane. */
4310 case Iop_MullEven16Ux8
:
4311 case Iop_MullEven16Sx8
: {
4313 at
= binary16Ix8(mce
,vatom1
,vatom2
);
4314 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_ShlN32x4
, at
, mkU8(16)));
4315 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SarN32x4
, at
, mkU8(16)));
4319 /* Same deal as Iop_MullEven16{S,U}x8 */
4320 case Iop_MullEven8Ux16
:
4321 case Iop_MullEven8Sx16
: {
4323 at
= binary8Ix16(mce
,vatom1
,vatom2
);
4324 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_ShlN16x8
, at
, mkU8(8)));
4325 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SarN16x8
, at
, mkU8(8)));
4329 /* Same deal as Iop_MullEven16{S,U}x8 */
4330 case Iop_MullEven32Ux4
:
4331 case Iop_MullEven32Sx4
: {
4333 at
= binary32Ix4(mce
,vatom1
,vatom2
);
4334 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_ShlN64x2
, at
, mkU8(32)));
4335 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SarN64x2
, at
, mkU8(32)));
4339 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
4340 32x4 -> 16x8 laneage, discarding the upper half of each lane.
4341 Simply apply same op to the V bits, since this really no more
4342 than a data steering operation. */
4343 case Iop_NarrowBin32to16x8
:
4344 case Iop_NarrowBin16to8x16
:
4345 case Iop_NarrowBin64to32x4
:
4346 return assignNew('V', mce
, Ity_V128
,
4347 binop(op
, vatom1
, vatom2
));
4352 case Iop_I128StoBCD128
:
4353 /* Same scheme as with all other shifts. Note: 10 Nov 05:
4354 this is wrong now, scalar shifts are done properly lazily.
4355 Vector shifts should be fixed too. */
4356 complainIfUndefined(mce
, atom2
, NULL
);
4357 return assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
));
4359 case Iop_I128UtoF128
: /* I128 -> F128 */
4360 case Iop_I128StoF128
: /* I128 -> F128 */
4361 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4365 return mkLazy2(mce
, Ity_V128
, vatom1
, vatom2
);
4370 complainIfUndefined(mce
, atom2
, NULL
);
4371 return assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
));
4373 /* I128-bit data-steering */
4375 return assignNew('V', mce
, Ity_I128
, binop(op
, vatom1
, vatom2
));
4381 return binary64Fx4(mce
, vatom1
, vatom2
);
4385 return binary32Fx8(mce
, vatom1
, vatom2
);
4387 /* V256-bit data-steering */
4388 case Iop_V128HLtoV256
:
4389 return assignNew('V', mce
, Ity_V256
, binop(op
, vatom1
, vatom2
));
4391 /* Scalar floating point */
4395 /* I32(rm) x F32 -> I64 */
4396 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4399 /* I32(rm) x I64 -> F32 */
4400 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4402 case Iop_RoundF64toInt
:
4403 case Iop_RoundF64toF32
:
4413 case Iop_RecpExpF64
:
4414 /* I32(rm) x I64/F64 -> I64/F64 */
4415 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4419 case Iop_RoundD64toInt
:
4420 /* I32(rm) x D64 -> D64 */
4421 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4425 case Iop_RoundD128toInt
:
4426 /* I32(rm) x D128 -> D128 */
4427 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4429 case Iop_RoundF128toInt
:
4430 /* I32(rm) x F128 -> F128 */
4431 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4437 /* I32(rm) x I64/D64 -> D64/I64 */
4438 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4446 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
4447 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4455 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
4456 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4460 case Iop_F128toD128
:
4463 case Iop_D128toF128
:
4464 case Iop_I128StoD128
:
4465 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
4466 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4469 /* I32(rm) x F16 -> F16 */
4470 return mkLazy2(mce
, Ity_I16
, vatom1
, vatom2
);
4472 case Iop_RoundF32toInt
:
4474 case Iop_RecpExpF32
:
4475 /* I32(rm) x I32/F32 -> I32/F32 */
4476 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4479 /* I32(rm) x F128 -> F128 */
4480 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4486 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
4487 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4491 /* First arg is I32 (rounding mode), second is F64/F32 (data). */
4492 return mkLazy2(mce
, Ity_I16
, vatom1
, vatom2
);
4494 case Iop_F128toI32S
: /* IRRoundingMode(I32) x F128 -> signed I32 */
4495 case Iop_F128toI32U
: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
4496 case Iop_F128toF32
: /* IRRoundingMode(I32) x F128 -> F32 */
4497 case Iop_D128toI32S
: /* IRRoundingMode(I32) x D128 -> signed I32 */
4498 case Iop_D128toI32U
: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
4499 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4501 case Iop_F128toI128S
: /* IRRoundingMode(I32) x F128 -> signed I128 */
4502 case Iop_RndF128
: /* IRRoundingMode(I32) x F128 -> F128 */
4503 case Iop_D128toI128S
: /* IRRoundingMode(I32) x D128 -> signed I128 */
4504 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4506 case Iop_F128toI64S
: /* IRRoundingMode(I32) x F128 -> signed I64 */
4507 case Iop_F128toI64U
: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
4508 case Iop_F128toF64
: /* IRRoundingMode(I32) x F128 -> F64 */
4509 case Iop_D128toD64
: /* IRRoundingMode(I64) x D128 -> D64 */
4510 case Iop_D128toI64S
: /* IRRoundingMode(I64) x D128 -> signed I64 */
4511 case Iop_D128toI64U
: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
4512 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4514 case Iop_F64HLtoF128
:
4515 case Iop_D64HLtoD128
:
4516 return assignNew('V', mce
, Ity_I128
,
4517 binop(Iop_64HLto128
, vatom1
, vatom2
));
4525 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
4526 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4529 /* First arg is I32 (rounding mode), second is D64 (data). */
4530 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4533 /* First arg is I32 (rounding mode), second is F64 (data). */
4534 return mkLazy2(mce
, Ity_I16
, vatom1
, vatom2
);
4536 case Iop_InsertExpD64
:
4537 /* I64 x I64 -> D64 */
4538 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4540 case Iop_InsertExpD128
:
4541 /* I64 x I128 -> D128 */
4542 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4551 case Iop_CmpExpD128
:
4552 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4556 /* F32 x F32 -> F32 */
4557 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4561 /* F64 x F64 -> F64 */
4562 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4564 /* non-FP after here */
4566 case Iop_DivModU64to32
:
4567 case Iop_DivModS64to32
:
4568 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4570 case Iop_DivModU128to64
:
4571 case Iop_DivModS128to64
:
4572 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4575 return assignNew('V', mce
, Ity_I16
, binop(op
, vatom1
, vatom2
));
4577 return assignNew('V', mce
, Ity_I32
, binop(op
, vatom1
, vatom2
));
4579 return assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, vatom2
));
4581 case Iop_DivModU64to64
:
4582 case Iop_DivModS64to64
: {
4583 IRAtom
* vTmp64
= mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4584 return assignNew('V', mce
, Ity_I128
,
4585 binop(Iop_64HLto128
, vTmp64
, vTmp64
));
4590 IRAtom
* vLo64
= mkLeft64(mce
, mkUifU64(mce
, vatom1
,vatom2
));
4591 IRAtom
* vHi64
= mkPCastTo(mce
, Ity_I64
, vLo64
);
4592 return assignNew('V', mce
, Ity_I128
,
4593 binop(Iop_64HLto128
, vHi64
, vLo64
));
4596 case Iop_DivModU32to32
:
4597 case Iop_DivModS32to32
: {
4598 IRAtom
* vTmp32
= mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4599 return assignNew('V', mce
, Ity_I64
,
4600 binop(Iop_32HLto64
, vTmp32
, vTmp32
));
4605 IRAtom
* vLo32
= mkLeft32(mce
, mkUifU32(mce
, vatom1
,vatom2
));
4606 IRAtom
* vHi32
= mkPCastTo(mce
, Ity_I32
, vLo32
);
4607 return assignNew('V', mce
, Ity_I64
,
4608 binop(Iop_32HLto64
, vHi32
, vLo32
));
4613 IRAtom
* vLo16
= mkLeft16(mce
, mkUifU16(mce
, vatom1
,vatom2
));
4614 IRAtom
* vHi16
= mkPCastTo(mce
, Ity_I16
, vLo16
);
4615 return assignNew('V', mce
, Ity_I32
,
4616 binop(Iop_16HLto32
, vHi16
, vLo16
));
4621 IRAtom
* vLo8
= mkLeft8(mce
, mkUifU8(mce
, vatom1
,vatom2
));
4622 IRAtom
* vHi8
= mkPCastTo(mce
, Ity_I8
, vLo8
);
4623 return assignNew('V', mce
, Ity_I16
, binop(Iop_8HLto16
, vHi8
, vLo8
));
4626 case Iop_Sad8Ux4
: /* maybe we could do better? ftm, do mkLazy2. */
4631 case Iop_QAdd32S
: /* could probably do better */
4632 case Iop_QSub32S
: /* could probably do better */
4633 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4639 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4642 if (mce
->dlbo
.dl_Add32
== DLexpensive
4643 || (mce
->dlbo
.dl_Add32
== DLauto
&& hu
== HuOth
)) {
4644 return expensiveAddSub(mce
,True
,Ity_I32
,
4645 vatom1
,vatom2
, atom1
,atom2
);
4647 goto cheap_AddSub32
;
4650 if (mce
->dlbo
.dl_Sub32
== DLexpensive
4651 || (mce
->dlbo
.dl_Sub32
== DLauto
&& hu
== HuOth
)) {
4652 return expensiveAddSub(mce
,False
,Ity_I32
,
4653 vatom1
,vatom2
, atom1
,atom2
);
4655 goto cheap_AddSub32
;
4660 return mkLeft32(mce
, mkUifU32(mce
, vatom1
,vatom2
));
4666 return doCmpORD(mce
, op
, vatom1
,vatom2
, atom1
,atom2
);
4669 if (mce
->dlbo
.dl_Add64
== DLexpensive
4670 || (mce
->dlbo
.dl_Add64
== DLauto
&& hu
== HuOth
)) {
4671 return expensiveAddSub(mce
,True
,Ity_I64
,
4672 vatom1
,vatom2
, atom1
,atom2
);
4674 goto cheap_AddSub64
;
4677 if (mce
->dlbo
.dl_Sub64
== DLexpensive
4678 || (mce
->dlbo
.dl_Sub64
== DLauto
&& hu
== HuOth
)) {
4679 return expensiveAddSub(mce
,False
,Ity_I64
,
4680 vatom1
,vatom2
, atom1
,atom2
);
4682 goto cheap_AddSub64
;
4687 return mkLeft64(mce
, mkUifU64(mce
, vatom1
,vatom2
));
4692 return mkLeft16(mce
, mkUifU16(mce
, vatom1
,vatom2
));
4697 return mkLeft8(mce
, mkUifU8(mce
, vatom1
,vatom2
));
4700 case Iop_CmpEQ64
: case Iop_CmpNE64
:
4701 if (mce
->dlbo
.dl_CmpEQ64_CmpNE64
== DLexpensive
)
4702 goto expensive_cmp64
;
4707 case Iop_ExpCmpNE64
:
4708 return expensiveCmpEQorNE(mce
,Ity_I64
, vatom1
,vatom2
, atom1
,atom2
);
4711 case Iop_CmpLE64S
: case Iop_CmpLE64U
:
4712 case Iop_CmpLT64U
: case Iop_CmpLT64S
:
4713 return mkPCastTo(mce
, Ity_I1
, mkUifU64(mce
, vatom1
,vatom2
));
4716 case Iop_CmpEQ32
: case Iop_CmpNE32
:
4717 if (mce
->dlbo
.dl_CmpEQ32_CmpNE32
== DLexpensive
)
4718 goto expensive_cmp32
;
4723 case Iop_ExpCmpNE32
:
4724 return expensiveCmpEQorNE(mce
,Ity_I32
, vatom1
,vatom2
, atom1
,atom2
);
4727 case Iop_CmpLE32S
: case Iop_CmpLE32U
:
4728 case Iop_CmpLT32U
: case Iop_CmpLT32S
:
4729 return mkPCastTo(mce
, Ity_I1
, mkUifU32(mce
, vatom1
,vatom2
));
4732 case Iop_CmpEQ16
: case Iop_CmpNE16
:
4733 if (mce
->dlbo
.dl_CmpEQ16_CmpNE16
== DLexpensive
)
4734 goto expensive_cmp16
;
4739 case Iop_ExpCmpNE16
:
4740 return expensiveCmpEQorNE(mce
,Ity_I16
, vatom1
,vatom2
, atom1
,atom2
);
4743 return mkPCastTo(mce
, Ity_I1
, mkUifU16(mce
, vatom1
,vatom2
));
4746 case Iop_CmpEQ8
: case Iop_CmpNE8
:
4747 if (mce
->dlbo
.dl_CmpEQ8_CmpNE8
== DLexpensive
)
4748 goto expensive_cmp8
;
4753 return expensiveCmpEQorNE(mce
,Ity_I8
, vatom1
,vatom2
, atom1
,atom2
);
4756 return mkPCastTo(mce
, Ity_I1
, mkUifU8(mce
, vatom1
,vatom2
));
4758 ////---- end CmpXX{64,32,16,8}
4760 case Iop_CasCmpEQ8
: case Iop_CasCmpNE8
:
4761 case Iop_CasCmpEQ16
: case Iop_CasCmpNE16
:
4762 case Iop_CasCmpEQ32
: case Iop_CasCmpNE32
:
4763 case Iop_CasCmpEQ64
: case Iop_CasCmpNE64
:
4764 /* Just say these all produce a defined result, regardless
4765 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
4766 return assignNew('V', mce
, Ity_I1
, definedOfType(Ity_I1
));
4768 case Iop_Shl64
: case Iop_Shr64
: case Iop_Sar64
:
4769 return scalarShift( mce
, Ity_I64
, op
, vatom1
,vatom2
, atom1
,atom2
);
4771 case Iop_Shl32
: case Iop_Shr32
: case Iop_Sar32
:
4772 return scalarShift( mce
, Ity_I32
, op
, vatom1
,vatom2
, atom1
,atom2
);
4774 case Iop_Shl16
: case Iop_Shr16
: case Iop_Sar16
:
4775 return scalarShift( mce
, Ity_I16
, op
, vatom1
,vatom2
, atom1
,atom2
);
4777 case Iop_Shl8
: case Iop_Shr8
: case Iop_Sar8
:
4778 return scalarShift( mce
, Ity_I8
, op
, vatom1
,vatom2
, atom1
,atom2
);
4781 uifu
= mkUifUV256
; difd
= mkDifDV256
;
4782 and_or_ty
= Ity_V256
; improve
= mkImproveANDV256
; goto do_And_Or
;
4784 uifu
= mkUifUV128
; difd
= mkDifDV128
;
4785 and_or_ty
= Ity_V128
; improve
= mkImproveANDV128
; goto do_And_Or
;
4787 uifu
= mkUifU64
; difd
= mkDifD64
;
4788 and_or_ty
= Ity_I64
; improve
= mkImproveAND64
; goto do_And_Or
;
4790 uifu
= mkUifU32
; difd
= mkDifD32
;
4791 and_or_ty
= Ity_I32
; improve
= mkImproveAND32
; goto do_And_Or
;
4793 uifu
= mkUifU16
; difd
= mkDifD16
;
4794 and_or_ty
= Ity_I16
; improve
= mkImproveAND16
; goto do_And_Or
;
4796 uifu
= mkUifU8
; difd
= mkDifD8
;
4797 and_or_ty
= Ity_I8
; improve
= mkImproveAND8
; goto do_And_Or
;
4799 uifu
= mkUifU1
; difd
= mkDifD1
;
4800 and_or_ty
= Ity_I1
; improve
= mkImproveAND1
; goto do_And_Or
;
4803 uifu
= mkUifUV256
; difd
= mkDifDV256
;
4804 and_or_ty
= Ity_V256
; improve
= mkImproveORV256
; goto do_And_Or
;
4806 uifu
= mkUifUV128
; difd
= mkDifDV128
;
4807 and_or_ty
= Ity_V128
; improve
= mkImproveORV128
; goto do_And_Or
;
4809 uifu
= mkUifU64
; difd
= mkDifD64
;
4810 and_or_ty
= Ity_I64
; improve
= mkImproveOR64
; goto do_And_Or
;
4812 uifu
= mkUifU32
; difd
= mkDifD32
;
4813 and_or_ty
= Ity_I32
; improve
= mkImproveOR32
; goto do_And_Or
;
4815 uifu
= mkUifU16
; difd
= mkDifD16
;
4816 and_or_ty
= Ity_I16
; improve
= mkImproveOR16
; goto do_And_Or
;
4818 uifu
= mkUifU8
; difd
= mkDifD8
;
4819 and_or_ty
= Ity_I8
; improve
= mkImproveOR8
; goto do_And_Or
;
4821 uifu
= mkUifU1
; difd
= mkDifD1
;
4822 and_or_ty
= Ity_I1
; improve
= mkImproveOR1
; goto do_And_Or
;
4829 difd(mce
, uifu(mce
, vatom1
, vatom2
),
4830 difd(mce
, improve(mce
, atom1
, vatom1
),
4831 improve(mce
, atom2
, vatom2
) ) ) );
4833 return assignNew('V', mce
, and_or_ty
,
4834 difd(mce
, uifu(mce
, vatom1
, vatom2
),
4835 difd(mce
, improve(mce
, atom1
, vatom1
),
4836 improve(mce
, atom2
, vatom2
) ) ) );
4838 return mkUifU8(mce
, vatom1
, vatom2
);
4840 return mkUifU16(mce
, vatom1
, vatom2
);
4842 return mkUifU32(mce
, vatom1
, vatom2
);
4844 return mkUifU64(mce
, vatom1
, vatom2
);
4846 return mkUifUV128(mce
, vatom1
, vatom2
);
4848 return mkUifUV256(mce
, vatom1
, vatom2
);
4860 /* Same scheme as with all other shifts. Note: 22 Oct 05:
4861 this is wrong now, scalar shifts are done properly lazily.
4862 Vector shifts should be fixed too. */
4863 complainIfUndefined(mce
, atom2
, NULL
);
4864 return assignNew('V', mce
, Ity_V256
, binop(op
, vatom1
, atom2
));
4873 case Iop_CmpGT8Sx32
:
4879 return binary8Ix32(mce
, vatom1
, vatom2
);
4881 case Iop_QSub16Ux16
:
4882 case Iop_QSub16Sx16
:
4885 case Iop_MulHi16Sx16
:
4886 case Iop_MulHi16Ux16
:
4891 case Iop_CmpGT16Sx16
:
4892 case Iop_CmpEQ16x16
:
4894 case Iop_QAdd16Ux16
:
4895 case Iop_QAdd16Sx16
:
4897 return binary16Ix16(mce
, vatom1
, vatom2
);
4900 case Iop_CmpGT32Sx8
:
4908 return binary32Ix8(mce
, vatom1
, vatom2
);
4913 case Iop_CmpGT64Sx4
:
4914 return binary64Ix4(mce
, vatom1
, vatom2
);
4916 case Iop_I32StoF32x8
:
4917 case Iop_F32toI32Sx8
:
4918 return unary32Fx8_w_rm(mce
, vatom1
, vatom2
);
4920 /* Perm32x8: rearrange values in left arg using steering values
4921 from right arg. So rearrange the vbits in the same way but
4922 pessimise wrt steering values. */
4926 assignNew('V', mce
, Ity_V256
, binop(op
, vatom1
, atom2
)),
4927 mkPCast32x8(mce
, vatom2
)
4930 /* Q-and-Qshift-by-vector of the form (V128, V128) -> V256.
4931 Handle the shifted results in the same way that other
4932 binary Q ops are handled, eg QSub: UifU the two args,
4933 then pessimise -- which is binaryNIxM. But for the upper
4934 V128, we require to generate just 1 bit which is the
4935 pessimised shift result, with 127 defined zeroes above it.
4937 Note that this overly pessimistic in that in fact only the
4938 bottom 8 bits of each lane of the second arg determine the shift
4939 amount. Really we ought to ignore any undefinedness in the
4940 rest of the lanes of the second arg. */
4941 case Iop_QandSQsh64x2
: case Iop_QandUQsh64x2
:
4942 case Iop_QandSQRsh64x2
: case Iop_QandUQRsh64x2
:
4943 case Iop_QandSQsh32x4
: case Iop_QandUQsh32x4
:
4944 case Iop_QandSQRsh32x4
: case Iop_QandUQRsh32x4
:
4945 case Iop_QandSQsh16x8
: case Iop_QandUQsh16x8
:
4946 case Iop_QandSQRsh16x8
: case Iop_QandUQRsh16x8
:
4947 case Iop_QandSQsh8x16
: case Iop_QandUQsh8x16
:
4948 case Iop_QandSQRsh8x16
: case Iop_QandUQRsh8x16
:
4950 // The function to generate the pessimised shift result
4951 IRAtom
* (*binaryNIxM
)(MCEnv
*,IRAtom
*,IRAtom
*) = NULL
;
4953 case Iop_QandSQsh64x2
:
4954 case Iop_QandUQsh64x2
:
4955 case Iop_QandSQRsh64x2
:
4956 case Iop_QandUQRsh64x2
:
4957 binaryNIxM
= binary64Ix2
;
4959 case Iop_QandSQsh32x4
:
4960 case Iop_QandUQsh32x4
:
4961 case Iop_QandSQRsh32x4
:
4962 case Iop_QandUQRsh32x4
:
4963 binaryNIxM
= binary32Ix4
;
4965 case Iop_QandSQsh16x8
:
4966 case Iop_QandUQsh16x8
:
4967 case Iop_QandSQRsh16x8
:
4968 case Iop_QandUQRsh16x8
:
4969 binaryNIxM
= binary16Ix8
;
4971 case Iop_QandSQsh8x16
:
4972 case Iop_QandUQsh8x16
:
4973 case Iop_QandSQRsh8x16
:
4974 case Iop_QandUQRsh8x16
:
4975 binaryNIxM
= binary8Ix16
;
4980 tl_assert(binaryNIxM
);
4981 // Pessimised shift result, shV[127:0]
4982 IRAtom
* shV
= binaryNIxM(mce
, vatom1
, vatom2
);
4983 // Generates: Def--(127)--Def PCast-to-I1(shV)
4984 IRAtom
* qV
= mkPCastXXtoXXlsb(mce
, shV
, Ity_V128
);
4985 // and assemble the result
4986 return assignNew('V', mce
, Ity_V256
,
4987 binop(Iop_V128HLtoV256
, qV
, shV
));
4990 case Iop_F32toF16x4
: {
4991 // First, PCast the input vector, retaining the 32x4 format.
4992 IRAtom
* pcasted
= mkPCast32x4(mce
, vatom2
); // :: 32x4
4993 // Now truncate each 32 bit lane to 16 bits. Since we already PCasted
4994 // the input, we're not going to lose any information.
4996 = assignNew('V', mce
, Ity_I64
, unop(Iop_V128HIto64
, pcasted
));//32x2
4998 = assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, pcasted
)); // 32x2
5000 = assignNew('V', mce
, Ity_I64
, binop(Iop_NarrowBin32to16x4
,
5001 pcHI64
, pcLO64
)); // 16x4
5002 // Finally, roll in any badness from the rounding mode.
5003 IRAtom
* rmPCasted
= mkPCastTo(mce
, Ity_I64
, vatom1
);
5004 return mkUifU64(mce
, narrowed
, rmPCasted
);
5007 case Iop_F32toF16x8
: {
5008 // Same scheme as for Iop_F32toF16x4.
5009 IRAtom
* pcasted
= mkPCast32x8(mce
, vatom2
); // :: 32x8
5011 = assignNew('V', mce
, Ity_V128
, unop(Iop_V256toV128_1
,
5014 = assignNew('V', mce
, Ity_V128
, unop(Iop_V256toV128_0
,
5017 = assignNew('V', mce
, Ity_V128
, binop(Iop_NarrowBin32to16x8
,
5018 pcHI128
, pcLO128
)); // 16x8
5019 // Finally, roll in any badness from the rounding mode.
5020 IRAtom
* rmPCasted
= mkPCastTo(mce
, Ity_V128
, vatom1
);
5021 return mkUifUV128(mce
, narrowed
, rmPCasted
);
5026 VG_(tool_panic
)("memcheck:expr2vbits_Binop");
5032 IRExpr
* expr2vbits_Unop ( MCEnv
* mce
, IROp op
, IRAtom
* atom
)
5034 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
5035 selection of shadow operation implicitly duplicates the logic in
5036 do_shadow_LoadG and should be kept in sync (in the very unlikely
5037 event that the interpretation of such widening ops changes in
5038 future). See comment in do_shadow_LoadG. */
5039 IRAtom
* vatom
= expr2vbits( mce
, atom
, HuOth
);
5040 tl_assert(isOriginalAtom(mce
,atom
));
5045 case Iop_RSqrtEst64Fx2
:
5046 case Iop_RecipEst64Fx2
:
5047 case Iop_Log2_64Fx2
:
5048 return unary64Fx2(mce
, vatom
);
5050 case Iop_Sqrt64F0x2
:
5051 return unary64F0x2(mce
, vatom
);
5054 case Iop_RSqrtEst32Fx8
:
5055 case Iop_RecipEst32Fx8
:
5056 return unary32Fx8(mce
, vatom
);
5059 return unary64Fx4(mce
, vatom
);
5061 case Iop_RecipEst32Fx4
:
5062 case Iop_I32UtoF32x4_DEP
:
5063 case Iop_I32StoF32x4_DEP
:
5064 case Iop_QF32toI32Ux4_RZ
:
5065 case Iop_QF32toI32Sx4_RZ
:
5066 case Iop_RoundF32x4_RM
:
5067 case Iop_RoundF32x4_RP
:
5068 case Iop_RoundF32x4_RN
:
5069 case Iop_RoundF32x4_RZ
:
5070 case Iop_RecipEst32Ux4
:
5073 case Iop_RSqrtEst32Fx4
:
5074 case Iop_Log2_32Fx4
:
5075 case Iop_Exp2_32Fx4
:
5076 return unary32Fx4(mce
, vatom
);
5078 case Iop_I32UtoF32x2_DEP
:
5079 case Iop_I32StoF32x2_DEP
:
5080 case Iop_RecipEst32Fx2
:
5081 case Iop_RecipEst32Ux2
:
5084 case Iop_RSqrtEst32Fx2
:
5085 return unary32Fx2(mce
, vatom
);
5087 case Iop_Sqrt32F0x4
:
5088 case Iop_RSqrtEst32F0x4
:
5089 case Iop_RecipEst32F0x4
:
5090 return unary32F0x4(mce
, vatom
);
5094 return unary16Fx8(mce
, vatom
);
5096 // These are self-shadowing.
5102 case Iop_Reverse1sIn8_x16
:
5103 case Iop_Reverse8sIn16_x8
:
5104 case Iop_Reverse8sIn32_x4
:
5105 case Iop_Reverse16sIn32_x4
:
5106 case Iop_Reverse8sIn64_x2
:
5107 case Iop_Reverse16sIn64_x2
:
5108 case Iop_Reverse32sIn64_x2
:
5109 case Iop_V256toV128_1
: case Iop_V256toV128_0
:
5110 case Iop_ZeroHI64ofV128
:
5111 case Iop_ZeroHI96ofV128
:
5112 case Iop_ZeroHI112ofV128
:
5113 case Iop_ZeroHI120ofV128
:
5114 case Iop_ReinterpI128asV128
: /* I128 -> V128 */
5115 return assignNew('V', mce
, Ity_V128
, unop(op
, vatom
));
5117 case Iop_F128HItoF64
: /* F128 -> high half of F128 */
5118 case Iop_D128HItoD64
: /* D128 -> high half of D128 */
5119 return assignNew('V', mce
, Ity_I64
, unop(Iop_128HIto64
, vatom
));
5121 case Iop_F128LOtoF64
: /* F128 -> low half of F128 */
5122 case Iop_D128LOtoD64
: /* D128 -> low half of D128 */
5123 return assignNew('V', mce
, Ity_I64
, unop(Iop_128to64
, vatom
));
5128 case Iop_TruncF128toI128S
: /* F128 -> I128S */
5129 case Iop_TruncF128toI128U
: /* F128 -> I128U */
5130 case Iop_ReinterpV128asI128
: /* V128 -> I128 */
5131 case Iop_ReinterpI128asF128
:
5132 case Iop_ReinterpF128asI128
:
5133 return mkPCastTo(mce
, Ity_I128
, vatom
);
5135 case Iop_BCD128toI128S
:
5136 case Iop_MulI128by10
:
5137 case Iop_MulI128by10Carry
:
5138 case Iop_F16toF64x2
:
5139 case Iop_F64toF16x2_DEP
:
5140 // FIXME JRS 2018-Nov-15. This is surely not correct!
5143 case Iop_ReinterpI32asF32
:
5144 case Iop_ReinterpF32asI32
:
5145 return assignNew('V', mce
, Ity_I32
, vatom
);
5147 case Iop_ReinterpF64asI64
:
5148 case Iop_ReinterpI64asF64
:
5149 case Iop_ReinterpI64asD64
:
5150 case Iop_ReinterpD64asI64
:
5151 return assignNew('V', mce
, Ity_I64
, vatom
);
5153 case Iop_I32StoF128
: /* signed I32 -> F128 */
5154 case Iop_I64StoF128
: /* signed I64 -> F128 */
5155 case Iop_I32UtoF128
: /* unsigned I32 -> F128 */
5156 case Iop_I64UtoF128
: /* unsigned I64 -> F128 */
5157 case Iop_F32toF128
: /* F32 -> F128 */
5158 case Iop_F64toF128
: /* F64 -> F128 */
5159 case Iop_I32StoD128
: /* signed I64 -> D128 */
5160 case Iop_I64StoD128
: /* signed I64 -> D128 */
5161 case Iop_I32UtoD128
: /* unsigned I32 -> D128 */
5162 case Iop_I64UtoD128
: /* unsigned I64 -> D128 */
5163 return mkPCastTo(mce
, Ity_I128
, vatom
);
5171 case Iop_RSqrtEst5GoodF64
:
5172 case Iop_RoundF64toF64_NEAREST
:
5173 case Iop_RoundF64toF64_NegINF
:
5174 case Iop_RoundF64toF64_PosINF
:
5175 case Iop_RoundF64toF64_ZERO
:
5179 case Iop_ExtractExpD64
: /* D64 -> I64 */
5180 case Iop_ExtractExpD128
: /* D128 -> I64 */
5181 case Iop_ExtractSigD64
: /* D64 -> I64 */
5182 case Iop_ExtractSigD128
: /* D128 -> I64 */
5185 return mkPCastTo(mce
, Ity_I64
, vatom
);
5188 return mkPCastTo(mce
, Ity_I128
, vatom
);
5190 case Iop_TruncF64asF32
:
5194 return mkPCastTo(mce
, Ity_I32
, vatom
);
5198 return mkPCastTo(mce
, Ity_I16
, vatom
);
5200 case Iop_Ctz32
: case Iop_CtzNat32
:
5201 case Iop_Ctz64
: case Iop_CtzNat64
:
5202 return expensiveCountTrailingZeroes(mce
, op
, atom
, vatom
);
5204 case Iop_Clz32
: case Iop_ClzNat32
:
5205 case Iop_Clz64
: case Iop_ClzNat64
:
5206 return expensiveCountLeadingZeroes(mce
, op
, atom
, vatom
);
5208 // PopCount32: this is slightly pessimistic. It is true that the
5209 // result depends on all input bits, so that aspect of the PCast is
5210 // correct. However, regardless of the input, only the lowest 5 bits
5211 // out of the output can ever be undefined. So we could actually
5212 // "improve" the results here by marking the top 27 bits of output as
5213 // defined. A similar comment applies for PopCount64.
5214 case Iop_PopCount32
:
5215 return mkPCastTo(mce
, Ity_I32
, vatom
);
5216 case Iop_PopCount64
:
5217 return mkPCastTo(mce
, Ity_I64
, vatom
);
5219 // These are self-shadowing.
5229 case Iop_V128HIto64
:
5235 case Iop_Reverse8sIn16_x4
:
5236 case Iop_Reverse8sIn32_x2
:
5237 case Iop_Reverse16sIn32_x2
:
5238 case Iop_Reverse8sIn64_x1
:
5239 case Iop_Reverse16sIn64_x1
:
5240 case Iop_Reverse32sIn64_x1
:
5241 case Iop_V256to64_0
: case Iop_V256to64_1
:
5242 case Iop_V256to64_2
: case Iop_V256to64_3
:
5243 return assignNew('V', mce
, Ity_I64
, unop(op
, vatom
));
5245 // These are self-shadowing.
5255 case Iop_Reverse8sIn32_x1
:
5256 return assignNew('V', mce
, Ity_I32
, unop(op
, vatom
));
5258 // These are self-shadowing.
5265 case Iop_GetMSBs8x16
:
5266 return assignNew('V', mce
, Ity_I16
, unop(op
, vatom
));
5268 // These are self-shadowing.
5275 case Iop_GetMSBs8x8
:
5276 return assignNew('V', mce
, Ity_I8
, unop(op
, vatom
));
5279 return assignNew('V', mce
, Ity_I1
, unop(Iop_32to1
, vatom
));
5282 return assignNew('V', mce
, Ity_I1
, unop(Iop_64to1
, vatom
));
5291 // FIXME JRS 2018-Nov-15. This is surely not correct!
5299 return mkPCast8x8(mce
, vatom
);
5301 case Iop_CmpNEZ8x16
:
5307 return mkPCast8x16(mce
, vatom
);
5309 case Iop_CmpNEZ16x4
:
5313 return mkPCast16x4(mce
, vatom
);
5315 case Iop_CmpNEZ16x8
:
5320 return mkPCast16x8(mce
, vatom
);
5322 case Iop_CmpNEZ32x2
:
5325 case Iop_F32toI32Ux2_RZ
:
5326 case Iop_F32toI32Sx2_RZ
:
5328 return mkPCast32x2(mce
, vatom
);
5330 case Iop_CmpNEZ32x4
:
5333 case Iop_F32toI32Ux4_RZ
:
5334 case Iop_F32toI32Sx4_RZ
:
5336 case Iop_RSqrtEst32Ux4
:
5338 return mkPCast32x4(mce
, vatom
);
5340 case Iop_TruncF128toI32S
: /* F128 -> I32S (result stored in 64-bits) */
5341 case Iop_TruncF128toI32U
: /* F128 -> I32U (result stored in 64-bits) */
5343 return mkPCastTo(mce
, Ity_I32
, vatom
);
5345 case Iop_TruncF128toI64S
: /* F128 -> I64S */
5346 case Iop_TruncF128toI64U
: /* F128 -> I64U */
5348 return mkPCastTo(mce
, Ity_I64
, vatom
);
5350 case Iop_CmpNEZ64x2
:
5351 case Iop_CipherSV128
:
5355 return mkPCast64x2(mce
, vatom
);
5357 // This is self-shadowing.
5358 case Iop_PwBitMtxXpose64x2
:
5359 return assignNew('V', mce
, Ity_V128
, unop(op
, vatom
));
5361 case Iop_NarrowUn16to8x8
:
5362 case Iop_NarrowUn32to16x4
:
5363 case Iop_NarrowUn64to32x2
:
5364 case Iop_QNarrowUn16Sto8Sx8
:
5365 case Iop_QNarrowUn16Sto8Ux8
:
5366 case Iop_QNarrowUn16Uto8Ux8
:
5367 case Iop_QNarrowUn32Sto16Sx4
:
5368 case Iop_QNarrowUn32Sto16Ux4
:
5369 case Iop_QNarrowUn32Uto16Ux4
:
5370 case Iop_QNarrowUn64Sto32Sx2
:
5371 case Iop_QNarrowUn64Sto32Ux2
:
5372 case Iop_QNarrowUn64Uto32Ux2
:
5373 return vectorNarrowUnV128(mce
, op
, vatom
);
5375 // JRS FIXME 2019 Mar 17: per comments on F16toF32x4, this is probably not
5377 case Iop_F32toF16x4_DEP
:
5378 return vectorNarrowUnV128(mce
, op
, vatom
);
5380 case Iop_Widen8Sto16x8
:
5381 case Iop_Widen8Uto16x8
:
5382 case Iop_Widen16Sto32x4
:
5383 case Iop_Widen16Uto32x4
:
5384 case Iop_Widen32Sto64x2
:
5385 case Iop_Widen32Uto64x2
:
5386 return vectorWidenI64(mce
, op
, vatom
);
5388 case Iop_F16toF32x4
:
5389 // JRS 2019 Mar 17: this definitely isn't right, but it probably works
5390 // OK by accident if -- as seems likely -- the F16 to F32 conversion
5391 // preserves will generate an output 32 bits with at least one 1 bit
5392 // set if there's one or more 1 bits set in the input 16 bits. More
5393 // correct code for this is just below, but commented out, so as to
5394 // avoid short-term backend failures on targets that can't do
5395 // Iop_Interleave{LO,HI}16x4.
5396 return vectorWidenI64(mce
, op
, vatom
);
5398 case Iop_F16toF32x8
: {
5399 // PCast the input at 16x8. This makes each lane hold either all
5400 // zeroes or all ones.
5401 IRAtom
* pcasted
= mkPCast16x8(mce
, vatom
); // :: I16x8
5402 // Now double the width of each lane to 32 bits. Because the lanes are
5403 // all zeroes or all ones, we can just copy the each lane twice into
5404 // the result. Here's the low half:
5405 IRAtom
* widenedLO
// :: I32x4
5406 = assignNew('V', mce
, Ity_V128
, binop(Iop_InterleaveLO16x8
,
5408 // And the high half:
5409 IRAtom
* widenedHI
// :: I32x4
5410 = assignNew('V', mce
, Ity_V128
, binop(Iop_InterleaveHI16x8
,
5412 // Glue them back together:
5413 return assignNew('V', mce
, Ity_V256
, binop(Iop_V128HLtoV256
,
5414 widenedHI
, widenedLO
));
5417 // See comment just above, for Iop_F16toF32x4
5418 //case Iop_F16toF32x4: {
5419 // // Same scheme as F16toF32x4
5420 // IRAtom* pcasted = mkPCast16x4(mce, vatom); // :: I16x4
5421 // IRAtom* widenedLO // :: I32x2
5422 // = assignNew('V', mce, Ity_I64, binop(Iop_InterleaveLO16x4,
5423 // pcasted, pcasted));
5424 // IRAtom* widenedHI // :: I32x4
5425 // = assignNew('V', mce, Ity_I64, binop(Iop_InterleaveHI16x4,
5426 // pcasted, pcasted));
5427 // // Glue them back together:
5428 // return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
5429 // widenedHI, widenedLO));
5432 case Iop_PwAddL32Ux2
:
5433 case Iop_PwAddL32Sx2
:
5434 return mkPCastTo(mce
, Ity_I64
,
5435 assignNew('V', mce
, Ity_I64
, unop(op
, mkPCast32x2(mce
, vatom
))));
5437 case Iop_PwAddL16Ux4
:
5438 case Iop_PwAddL16Sx4
:
5439 return mkPCast32x2(mce
,
5440 assignNew('V', mce
, Ity_I64
, unop(op
, mkPCast16x4(mce
, vatom
))));
5442 case Iop_PwAddL8Ux8
:
5443 case Iop_PwAddL8Sx8
:
5444 return mkPCast16x4(mce
,
5445 assignNew('V', mce
, Ity_I64
, unop(op
, mkPCast8x8(mce
, vatom
))));
5447 case Iop_PwAddL32Ux4
:
5448 case Iop_PwAddL32Sx4
:
5449 return mkPCast64x2(mce
,
5450 assignNew('V', mce
, Ity_V128
, unop(op
, mkPCast32x4(mce
, vatom
))));
5452 case Iop_PwAddL64Ux2
:
5453 return mkPCast128x1(mce
,
5454 assignNew('V', mce
, Ity_V128
, unop(op
, mkPCast64x2(mce
, vatom
))));
5456 case Iop_PwAddL16Ux8
:
5457 case Iop_PwAddL16Sx8
:
5458 return mkPCast32x4(mce
,
5459 assignNew('V', mce
, Ity_V128
, unop(op
, mkPCast16x8(mce
, vatom
))));
5461 case Iop_PwAddL8Ux16
:
5462 case Iop_PwAddL8Sx16
:
5463 return mkPCast16x8(mce
,
5464 assignNew('V', mce
, Ity_V128
, unop(op
, mkPCast8x16(mce
, vatom
))));
5469 VG_(tool_panic
)("memcheck:expr2vbits_Unop");
5474 /* Worker function -- do not call directly. See comments on
5475 expr2vbits_Load for the meaning of |guard|.
5477 Generates IR to (1) perform a definedness test of |addr|, (2)
5478 perform a validity test of |addr|, and (3) return the Vbits for the
5479 location indicated by |addr|. All of this only happens when
5480 |guard| is NULL or |guard| evaluates to True at run time.
5482 If |guard| evaluates to False at run time, the returned value is
5483 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
5486 The definedness of |guard| itself is not checked. That is assumed
5487 to have been done before this point, by the caller. */
5489 IRAtom
* expr2vbits_Load_WRK ( MCEnv
* mce
,
5490 IREndness end
, IRType ty
,
5491 IRAtom
* addr
, UInt bias
, IRAtom
* guard
)
5493 tl_assert(isOriginalAtom(mce
,addr
));
5494 tl_assert(end
== Iend_LE
|| end
== Iend_BE
);
5496 /* First, emit a definedness test for the address. This also sets
5497 the address (shadow) to 'defined' following the test. */
5498 complainIfUndefined( mce
, addr
, guard
);
5500 /* Now cook up a call to the relevant helper function, to read the data V
5501 bits from shadow memory. Note that I128 loads are done by pretending
5502 we're doing a V128 load, and then converting the resulting V128 vbits
5503 word to an I128, right at the end of this function -- see `castedToI128`
5504 below. (It's only a minor hack :-) This pertains to bug 444399. */
5505 ty
= shadowTypeV(ty
);
5507 void* helper
= NULL
;
5508 const HChar
* hname
= NULL
;
5509 Bool ret_via_outparam
= False
;
5511 if (end
== Iend_LE
) {
5513 case Ity_V256
: helper
= &MC_(helperc_LOADV256le
);
5514 hname
= "MC_(helperc_LOADV256le)";
5515 ret_via_outparam
= True
;
5517 case Ity_I128
: // fallthrough. See comment above.
5518 case Ity_V128
: helper
= &MC_(helperc_LOADV128le
);
5519 hname
= "MC_(helperc_LOADV128le)";
5520 ret_via_outparam
= True
;
5522 case Ity_I64
: helper
= &MC_(helperc_LOADV64le
);
5523 hname
= "MC_(helperc_LOADV64le)";
5525 case Ity_I32
: helper
= &MC_(helperc_LOADV32le
);
5526 hname
= "MC_(helperc_LOADV32le)";
5528 case Ity_I16
: helper
= &MC_(helperc_LOADV16le
);
5529 hname
= "MC_(helperc_LOADV16le)";
5531 case Ity_I8
: helper
= &MC_(helperc_LOADV8
);
5532 hname
= "MC_(helperc_LOADV8)";
5534 default: ppIRType(ty
);
5535 VG_(tool_panic
)("memcheck:expr2vbits_Load_WRK(LE)");
5539 case Ity_V256
: helper
= &MC_(helperc_LOADV256be
);
5540 hname
= "MC_(helperc_LOADV256be)";
5541 ret_via_outparam
= True
;
5543 case Ity_V128
: helper
= &MC_(helperc_LOADV128be
);
5544 hname
= "MC_(helperc_LOADV128be)";
5545 ret_via_outparam
= True
;
5547 case Ity_I64
: helper
= &MC_(helperc_LOADV64be
);
5548 hname
= "MC_(helperc_LOADV64be)";
5550 case Ity_I32
: helper
= &MC_(helperc_LOADV32be
);
5551 hname
= "MC_(helperc_LOADV32be)";
5553 case Ity_I16
: helper
= &MC_(helperc_LOADV16be
);
5554 hname
= "MC_(helperc_LOADV16be)";
5556 case Ity_I8
: helper
= &MC_(helperc_LOADV8
);
5557 hname
= "MC_(helperc_LOADV8)";
5559 default: ppIRType(ty
);
5560 VG_(tool_panic
)("memcheck:expr2vbits_Load_WRK(BE)");
5567 /* Generate the actual address into addrAct. */
5574 IRType tyAddr
= mce
->hWordTy
;
5575 tl_assert( tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
5576 mkAdd
= tyAddr
==Ity_I32
? Iop_Add32
: Iop_Add64
;
5577 eBias
= tyAddr
==Ity_I32
? mkU32(bias
) : mkU64(bias
);
5578 addrAct
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBias
) );
5581 /* We need to have a place to park the V bits we're just about to
5583 IRTemp datavbits
= newTemp(mce
, ty
== Ity_I128
? Ity_V128
: ty
, VSh
);
5585 /* Here's the call. */
5587 if (ret_via_outparam
) {
5588 di
= unsafeIRDirty_1_N( datavbits
,
5590 hname
, VG_(fnptr_to_fnentry
)( helper
),
5591 mkIRExprVec_2( IRExpr_VECRET(), addrAct
) );
5593 di
= unsafeIRDirty_1_N( datavbits
,
5595 hname
, VG_(fnptr_to_fnentry
)( helper
),
5596 mkIRExprVec_1( addrAct
) );
5599 setHelperAnns( mce
, di
);
5602 /* Ideally the didn't-happen return value here would be all-ones
5603 (all-undefined), so it'd be obvious if it got used
5604 inadvertently. We can get by with the IR-mandated default
5605 value (0b01 repeating, 0x55 etc) as that'll still look pretty
5606 undefined if it ever leaks out. */
5608 stmt( 'V', mce
, IRStmt_Dirty(di
) );
5610 if (ty
== Ity_I128
) {
5611 IRAtom
* castedToI128
5612 = assignNew('V', mce
, Ity_I128
,
5613 unop(Iop_ReinterpV128asI128
, mkexpr(datavbits
)));
5614 return castedToI128
;
5616 return mkexpr(datavbits
);
5621 /* Generate IR to do a shadow load. The helper is expected to check
5622 the validity of the address and return the V bits for that address.
5623 This can optionally be controlled by a guard, which is assumed to
5624 be True if NULL. In the case where the guard is False at runtime,
5625 the helper will return the didn't-do-the-call value of 0x55..55.
5626 Since that means "completely undefined result", the caller of
5627 this function will need to fix up the result somehow in that
5630 Caller of this function is also expected to have checked the
5631 definedness of |guard| before this point.
5634 IRAtom
* expr2vbits_Load ( MCEnv
* mce
,
5635 IREndness end
, IRType ty
,
5636 IRAtom
* addr
, UInt bias
,
5639 tl_assert(end
== Iend_LE
|| end
== Iend_BE
);
5640 switch (shadowTypeV(ty
)) {
5648 return expr2vbits_Load_WRK(mce
, end
, ty
, addr
, bias
, guard
);
5650 VG_(tool_panic
)("expr2vbits_Load");
5655 /* The most general handler for guarded loads. Assumes the
5656 definedness of GUARD has already been checked by the caller. A
5657 GUARD of NULL is assumed to mean "always True". Generates code to
5658 check the definedness and validity of ADDR.
5660 Generate IR to do a shadow load from ADDR and return the V bits.
5661 The loaded type is TY. The loaded data is then (shadow) widened by
5662 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
5663 evaluates to False at run time then the returned Vbits are simply
5664 VALT instead. Note therefore that the argument type of VWIDEN must
5665 be TY and the result type of VWIDEN must equal the type of VALT.
5668 IRAtom
* expr2vbits_Load_guarded_General ( MCEnv
* mce
,
5669 IREndness end
, IRType ty
,
5670 IRAtom
* addr
, UInt bias
,
5672 IROp vwiden
, IRAtom
* valt
)
5674 /* Sanity check the conversion operation, and also set TYWIDE. */
5675 IRType tyWide
= Ity_INVALID
;
5680 case Iop_16Uto32
: case Iop_16Sto32
: case Iop_8Uto32
: case Iop_8Sto32
:
5684 VG_(tool_panic
)("memcheck:expr2vbits_Load_guarded_General");
5687 /* If the guard evaluates to True, this will hold the loaded V bits
5688 at TY. If the guard evaluates to False, this will be all
5689 ones, meaning "all undefined", in which case we will have to
5690 replace it using an ITE below. */
5692 = assignNew('V', mce
, ty
,
5693 expr2vbits_Load(mce
, end
, ty
, addr
, bias
, guard
));
5694 /* Now (shadow-) widen the loaded V bits to the desired width. In
5695 the guard-is-False case, the allowable widening operators will
5696 in the worst case (unsigned widening) at least leave the
5697 pre-widened part as being marked all-undefined, and in the best
5698 case (signed widening) mark the whole widened result as
5699 undefined. Anyway, it doesn't matter really, since in this case
5700 we will replace said value with the default value |valt| using an
5703 = vwiden
== Iop_INVALID
5705 : assignNew('V', mce
, tyWide
, unop(vwiden
, iftrue1
));
5706 /* These are the V bits we will return if the load doesn't take
5710 /* Prepare the cond for the ITE. Convert a NULL cond into
5711 something that iropt knows how to fold out later. */
5713 = guard
== NULL
? mkU1(1) : guard
;
5714 /* And assemble the final result. */
5715 return assignNew('V', mce
, tyWide
, IRExpr_ITE(cond
, iftrue2
, iffalse
));
5719 /* A simpler handler for guarded loads, in which there is no
5720 conversion operation, and the default V bit return (when the guard
5721 evaluates to False at runtime) is "all defined". If there is no
5722 guard expression or the guard is always TRUE this function behaves
5723 like expr2vbits_Load. It is assumed that definedness of GUARD has
5724 already been checked at the call site. */
5726 IRAtom
* expr2vbits_Load_guarded_Simple ( MCEnv
* mce
,
5727 IREndness end
, IRType ty
,
5728 IRAtom
* addr
, UInt bias
,
5731 return expr2vbits_Load_guarded_General(
5732 mce
, end
, ty
, addr
, bias
, guard
, Iop_INVALID
, definedOfType(ty
)
5738 IRAtom
* expr2vbits_ITE ( MCEnv
* mce
,
5739 IRAtom
* cond
, IRAtom
* iftrue
, IRAtom
* iffalse
)
5741 IRAtom
*vbitsC
, *vbits0
, *vbits1
;
5743 /* Given ITE(cond, iftrue, iffalse), generate
5744 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
5745 That is, steer the V bits like the originals, but trash the
5746 result if the steering value is undefined. This gives
5747 lazy propagation. */
5748 tl_assert(isOriginalAtom(mce
, cond
));
5749 tl_assert(isOriginalAtom(mce
, iftrue
));
5750 tl_assert(isOriginalAtom(mce
, iffalse
));
5752 vbitsC
= expr2vbits(mce
, cond
, HuOth
); // could we use HuPCa here?
5753 vbits1
= expr2vbits(mce
, iftrue
, HuOth
);
5754 vbits0
= expr2vbits(mce
, iffalse
, HuOth
);
5755 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vbits0
);
5758 mkUifU(mce
, ty
, assignNew('V', mce
, ty
,
5759 IRExpr_ITE(cond
, vbits1
, vbits0
)),
5760 mkPCastTo(mce
, ty
, vbitsC
) );
5763 /* --------- This is the main expression-handling function. --------- */
5766 IRExpr
* expr2vbits ( MCEnv
* mce
, IRExpr
* e
,
5767 HowUsed hu
/*use HuOth if unknown*/ )
5772 return shadow_GET( mce
, e
->Iex
.Get
.offset
, e
->Iex
.Get
.ty
);
5775 return shadow_GETI( mce
, e
->Iex
.GetI
.descr
,
5776 e
->Iex
.GetI
.ix
, e
->Iex
.GetI
.bias
);
5779 return IRExpr_RdTmp( findShadowTmpV(mce
, e
->Iex
.RdTmp
.tmp
) );
5782 return definedOfType(shadowTypeV(typeOfIRExpr(mce
->sb
->tyenv
, e
)));
5785 return expr2vbits_Qop(
5787 e
->Iex
.Qop
.details
->op
,
5788 e
->Iex
.Qop
.details
->arg1
, e
->Iex
.Qop
.details
->arg2
,
5789 e
->Iex
.Qop
.details
->arg3
, e
->Iex
.Qop
.details
->arg4
5793 return expr2vbits_Triop(
5795 e
->Iex
.Triop
.details
->op
,
5796 e
->Iex
.Triop
.details
->arg1
, e
->Iex
.Triop
.details
->arg2
,
5797 e
->Iex
.Triop
.details
->arg3
5801 return expr2vbits_Binop(
5804 e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
,
5809 return expr2vbits_Unop( mce
, e
->Iex
.Unop
.op
, e
->Iex
.Unop
.arg
);
5812 return expr2vbits_Load( mce
, e
->Iex
.Load
.end
,
5814 e
->Iex
.Load
.addr
, 0/*addr bias*/,
5815 NULL
/* guard == "always True"*/ );
5818 return mkLazyN( mce
, e
->Iex
.CCall
.args
,
5823 return expr2vbits_ITE( mce
, e
->Iex
.ITE
.cond
, e
->Iex
.ITE
.iftrue
,
5824 e
->Iex
.ITE
.iffalse
);
5830 VG_(tool_panic
)("memcheck: expr2vbits");
5835 /*------------------------------------------------------------*/
5836 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
5837 /*------------------------------------------------------------*/
5839 /* Widen a value to the host word size. */
5842 IRExpr
* zwidenToHostWord ( MCEnv
* mce
, IRAtom
* vatom
)
5846 /* vatom is vbits-value and as such can only have a shadow type. */
5847 tl_assert(isShadowAtom(mce
,vatom
));
5849 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vatom
);
5852 if (tyH
== Ity_I32
) {
5857 return assignNew('V', mce
, tyH
, unop(Iop_16Uto32
, vatom
));
5859 return assignNew('V', mce
, tyH
, unop(Iop_8Uto32
, vatom
));
5864 if (tyH
== Ity_I64
) {
5867 return assignNew('V', mce
, tyH
, unop(Iop_32Uto64
, vatom
));
5869 return assignNew('V', mce
, tyH
, unop(Iop_32Uto64
,
5870 assignNew('V', mce
, Ity_I32
, unop(Iop_16Uto32
, vatom
))));
5872 return assignNew('V', mce
, tyH
, unop(Iop_32Uto64
,
5873 assignNew('V', mce
, Ity_I32
, unop(Iop_8Uto32
, vatom
))));
5881 VG_(printf
)("\nty = "); ppIRType(ty
); VG_(printf
)("\n");
5882 VG_(tool_panic
)("zwidenToHostWord");
5886 /* Generate a shadow store. |addr| is always the original address
5887 atom. You can pass in either originals or V-bits for the data
5888 atom, but obviously not both. This function generates a check for
5889 the definedness and (indirectly) the validity of |addr|, but only
5890 when |guard| evaluates to True at run time (or is NULL).
5892 |guard| :: Ity_I1 controls whether the store really happens; NULL
5893 means it unconditionally does. Note that |guard| itself is not
5894 checked for definedness; the caller of this function must do that
5898 void do_shadow_Store ( MCEnv
* mce
,
5900 IRAtom
* addr
, UInt bias
,
5901 IRAtom
* data
, IRAtom
* vdata
,
5906 void* helper
= NULL
;
5907 const HChar
* hname
= NULL
;
5910 tyAddr
= mce
->hWordTy
;
5911 mkAdd
= tyAddr
==Ity_I32
? Iop_Add32
: Iop_Add64
;
5912 tl_assert( tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
5913 tl_assert( end
== Iend_LE
|| end
== Iend_BE
);
5917 tl_assert(isOriginalAtom(mce
, data
));
5918 tl_assert(bias
== 0);
5919 vdata
= expr2vbits( mce
, data
, HuOth
);
5924 tl_assert(isOriginalAtom(mce
,addr
));
5925 tl_assert(isShadowAtom(mce
,vdata
));
5928 tl_assert(isOriginalAtom(mce
, guard
));
5929 tl_assert(typeOfIRExpr(mce
->sb
->tyenv
, guard
) == Ity_I1
);
5932 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vdata
);
5934 // If we're not doing undefined value checking, pretend that this value
5935 // is "all valid". That lets Vex's optimiser remove some of the V bit
5936 // shadow computation ops that precede it.
5937 if (MC_(clo_mc_level
) == 1) {
5939 case Ity_V256
: // V256 weirdness -- used four times
5940 c
= IRConst_V256(V_BITS32_DEFINED
); break;
5941 case Ity_V128
: // V128 weirdness -- used twice
5942 c
= IRConst_V128(V_BITS16_DEFINED
); break;
5943 case Ity_I128
: c
= IRConst_U128(V_BITS16_DEFINED
); break;
5944 case Ity_I64
: c
= IRConst_U64 (V_BITS64_DEFINED
); break;
5945 case Ity_I32
: c
= IRConst_U32 (V_BITS32_DEFINED
); break;
5946 case Ity_I16
: c
= IRConst_U16 (V_BITS16_DEFINED
); break;
5947 case Ity_I8
: c
= IRConst_U8 (V_BITS8_DEFINED
); break;
5948 default: VG_(tool_panic
)("memcheck:do_shadow_Store(LE)");
5950 vdata
= IRExpr_Const( c
);
5953 /* First, emit a definedness test for the address. This also sets
5954 the address (shadow) to 'defined' following the test. Both of
5955 those actions are gated on |guard|. */
5956 complainIfUndefined( mce
, addr
, guard
);
5958 /* Now decide which helper function to call to write the data V
5959 bits into shadow memory. */
5960 if (end
== Iend_LE
) {
5962 case Ity_V256
: /* we'll use the helper four times */
5963 case Ity_V128
: /* we'll use the helper twice */
5964 case Ity_I128
: /* we'll use the helper twice */
5965 case Ity_I64
: helper
= &MC_(helperc_STOREV64le
);
5966 hname
= "MC_(helperc_STOREV64le)";
5968 case Ity_I32
: helper
= &MC_(helperc_STOREV32le
);
5969 hname
= "MC_(helperc_STOREV32le)";
5971 case Ity_I16
: helper
= &MC_(helperc_STOREV16le
);
5972 hname
= "MC_(helperc_STOREV16le)";
5974 case Ity_I8
: helper
= &MC_(helperc_STOREV8
);
5975 hname
= "MC_(helperc_STOREV8)";
5977 default: VG_(tool_panic
)("memcheck:do_shadow_Store(LE)");
5981 case Ity_V128
: /* we'll use the helper twice */
5982 case Ity_I64
: helper
= &MC_(helperc_STOREV64be
);
5983 hname
= "MC_(helperc_STOREV64be)";
5985 case Ity_I32
: helper
= &MC_(helperc_STOREV32be
);
5986 hname
= "MC_(helperc_STOREV32be)";
5988 case Ity_I16
: helper
= &MC_(helperc_STOREV16be
);
5989 hname
= "MC_(helperc_STOREV16be)";
5991 case Ity_I8
: helper
= &MC_(helperc_STOREV8
);
5992 hname
= "MC_(helperc_STOREV8)";
5994 /* Note, no V256 case here, because no big-endian target that
5995 we support, has 256 vectors. */
5996 default: VG_(tool_panic
)("memcheck:do_shadow_Store(BE)");
6000 if (UNLIKELY(ty
== Ity_V256
)) {
6002 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
6003 Q3 being the most significant lane. */
6004 /* These are the offsets of the Qs in memory. */
6005 Int offQ0
, offQ1
, offQ2
, offQ3
;
6007 /* Various bits for constructing the 4 lane helper calls */
6008 IRDirty
*diQ0
, *diQ1
, *diQ2
, *diQ3
;
6009 IRAtom
*addrQ0
, *addrQ1
, *addrQ2
, *addrQ3
;
6010 IRAtom
*vdataQ0
, *vdataQ1
, *vdataQ2
, *vdataQ3
;
6011 IRAtom
*eBiasQ0
, *eBiasQ1
, *eBiasQ2
, *eBiasQ3
;
6013 if (end
== Iend_LE
) {
6014 offQ0
= 0; offQ1
= 8; offQ2
= 16; offQ3
= 24;
6016 offQ3
= 0; offQ2
= 8; offQ1
= 16; offQ0
= 24;
6019 eBiasQ0
= tyAddr
==Ity_I32
? mkU32(bias
+offQ0
) : mkU64(bias
+offQ0
);
6020 addrQ0
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasQ0
) );
6021 vdataQ0
= assignNew('V', mce
, Ity_I64
, unop(Iop_V256to64_0
, vdata
));
6022 diQ0
= unsafeIRDirty_0_N(
6024 hname
, VG_(fnptr_to_fnentry
)( helper
),
6025 mkIRExprVec_2( addrQ0
, vdataQ0
)
6028 eBiasQ1
= tyAddr
==Ity_I32
? mkU32(bias
+offQ1
) : mkU64(bias
+offQ1
);
6029 addrQ1
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasQ1
) );
6030 vdataQ1
= assignNew('V', mce
, Ity_I64
, unop(Iop_V256to64_1
, vdata
));
6031 diQ1
= unsafeIRDirty_0_N(
6033 hname
, VG_(fnptr_to_fnentry
)( helper
),
6034 mkIRExprVec_2( addrQ1
, vdataQ1
)
6037 eBiasQ2
= tyAddr
==Ity_I32
? mkU32(bias
+offQ2
) : mkU64(bias
+offQ2
);
6038 addrQ2
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasQ2
) );
6039 vdataQ2
= assignNew('V', mce
, Ity_I64
, unop(Iop_V256to64_2
, vdata
));
6040 diQ2
= unsafeIRDirty_0_N(
6042 hname
, VG_(fnptr_to_fnentry
)( helper
),
6043 mkIRExprVec_2( addrQ2
, vdataQ2
)
6046 eBiasQ3
= tyAddr
==Ity_I32
? mkU32(bias
+offQ3
) : mkU64(bias
+offQ3
);
6047 addrQ3
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasQ3
) );
6048 vdataQ3
= assignNew('V', mce
, Ity_I64
, unop(Iop_V256to64_3
, vdata
));
6049 diQ3
= unsafeIRDirty_0_N(
6051 hname
, VG_(fnptr_to_fnentry
)( helper
),
6052 mkIRExprVec_2( addrQ3
, vdataQ3
)
6056 diQ0
->guard
= diQ1
->guard
= diQ2
->guard
= diQ3
->guard
= guard
;
6058 setHelperAnns( mce
, diQ0
);
6059 setHelperAnns( mce
, diQ1
);
6060 setHelperAnns( mce
, diQ2
);
6061 setHelperAnns( mce
, diQ3
);
6062 stmt( 'V', mce
, IRStmt_Dirty(diQ0
) );
6063 stmt( 'V', mce
, IRStmt_Dirty(diQ1
) );
6064 stmt( 'V', mce
, IRStmt_Dirty(diQ2
) );
6065 stmt( 'V', mce
, IRStmt_Dirty(diQ3
) );
6068 else if (UNLIKELY(ty
== Ity_V128
|| ty
== Ity_I128
)) {
6070 /* V128/I128-bit case */
6071 /* See comment in next clause re 64-bit regparms */
6072 /* also, need to be careful about endianness */
6074 Int offLo64
, offHi64
;
6075 IRDirty
*diLo64
, *diHi64
;
6076 IRAtom
*addrLo64
, *addrHi64
;
6077 IRAtom
*vdataLo64
, *vdataHi64
;
6078 IRAtom
*eBiasLo64
, *eBiasHi64
;
6079 IROp opGetLO64
, opGetHI64
;
6081 if (end
== Iend_LE
) {
6089 if (ty
== Ity_V128
) {
6090 opGetLO64
= Iop_V128to64
;
6091 opGetHI64
= Iop_V128HIto64
;
6093 opGetLO64
= Iop_128to64
;
6094 opGetHI64
= Iop_128HIto64
;
6097 eBiasLo64
= tyAddr
==Ity_I32
? mkU32(bias
+offLo64
) : mkU64(bias
+offLo64
);
6098 addrLo64
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasLo64
) );
6099 vdataLo64
= assignNew('V', mce
, Ity_I64
, unop(opGetLO64
, vdata
));
6100 diLo64
= unsafeIRDirty_0_N(
6102 hname
, VG_(fnptr_to_fnentry
)( helper
),
6103 mkIRExprVec_2( addrLo64
, vdataLo64
)
6105 eBiasHi64
= tyAddr
==Ity_I32
? mkU32(bias
+offHi64
) : mkU64(bias
+offHi64
);
6106 addrHi64
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasHi64
) );
6107 vdataHi64
= assignNew('V', mce
, Ity_I64
, unop(opGetHI64
, vdata
));
6108 diHi64
= unsafeIRDirty_0_N(
6110 hname
, VG_(fnptr_to_fnentry
)( helper
),
6111 mkIRExprVec_2( addrHi64
, vdataHi64
)
6113 if (guard
) diLo64
->guard
= guard
;
6114 if (guard
) diHi64
->guard
= guard
;
6115 setHelperAnns( mce
, diLo64
);
6116 setHelperAnns( mce
, diHi64
);
6117 stmt( 'V', mce
, IRStmt_Dirty(diLo64
) );
6118 stmt( 'V', mce
, IRStmt_Dirty(diHi64
) );
6125 /* 8/16/32/64-bit cases */
6126 /* Generate the actual address into addrAct. */
6130 IRAtom
* eBias
= tyAddr
==Ity_I32
? mkU32(bias
) : mkU64(bias
);
6131 addrAct
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBias
));
6134 if (ty
== Ity_I64
) {
6135 /* We can't do this with regparm 2 on 32-bit platforms, since
6136 the back ends aren't clever enough to handle 64-bit
6137 regparm args. Therefore be different. */
6138 di
= unsafeIRDirty_0_N(
6140 hname
, VG_(fnptr_to_fnentry
)( helper
),
6141 mkIRExprVec_2( addrAct
, vdata
)
6144 di
= unsafeIRDirty_0_N(
6146 hname
, VG_(fnptr_to_fnentry
)( helper
),
6147 mkIRExprVec_2( addrAct
,
6148 zwidenToHostWord( mce
, vdata
))
6151 if (guard
) di
->guard
= guard
;
6152 setHelperAnns( mce
, di
);
6153 stmt( 'V', mce
, IRStmt_Dirty(di
) );
6159 /* Do lazy pessimistic propagation through a dirty helper call, by
6160 looking at the annotations on it. This is the most complex part of
6163 static IRType
szToITy ( Int n
)
6166 case 1: return Ity_I8
;
6167 case 2: return Ity_I16
;
6168 case 4: return Ity_I32
;
6169 case 8: return Ity_I64
;
6170 default: VG_(tool_panic
)("szToITy(memcheck)");
6175 void do_shadow_Dirty ( MCEnv
* mce
, IRDirty
* d
)
6177 Int i
, k
, n
, toDo
, gSz
, gOff
;
6178 IRAtom
*src
, *here
, *curr
;
6179 IRType tySrc
, tyDst
;
6183 /* What's the native endianness? We need to know this. */
6184 # if defined(VG_BIGENDIAN)
6186 # elif defined(VG_LITTLEENDIAN)
6189 # error "Unknown endianness"
6192 /* First check the guard. */
6193 complainIfUndefined(mce
, d
->guard
, NULL
);
6195 /* Now round up all inputs and PCast over them. */
6196 curr
= definedOfType(Ity_I32
);
6198 /* Inputs: unmasked args
6199 Note: arguments are evaluated REGARDLESS of the guard expression */
6200 for (i
= 0; d
->args
[i
]; i
++) {
6201 IRAtom
* arg
= d
->args
[i
];
6202 if ( (d
->cee
->mcx_mask
& (1<<i
))
6203 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg
)) ) {
6204 /* ignore this arg */
6206 here
= mkPCastTo( mce
, Ity_I32
, expr2vbits(mce
, arg
, HuOth
) );
6207 curr
= mkUifU32(mce
, here
, curr
);
6211 /* Inputs: guest state that we read. */
6212 for (i
= 0; i
< d
->nFxState
; i
++) {
6213 tl_assert(d
->fxState
[i
].fx
!= Ifx_None
);
6214 if (d
->fxState
[i
].fx
== Ifx_Write
)
6217 /* Enumerate the described state segments */
6218 for (k
= 0; k
< 1 + d
->fxState
[i
].nRepeats
; k
++) {
6219 gOff
= d
->fxState
[i
].offset
+ k
* d
->fxState
[i
].repeatLen
;
6220 gSz
= d
->fxState
[i
].size
;
6222 /* Ignore any sections marked as 'always defined'. */
6223 if (isAlwaysDefd(mce
, gOff
, gSz
)) {
6225 VG_(printf
)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6230 /* This state element is read or modified. So we need to
6231 consider it. If larger than 8 bytes, deal with it in
6234 tl_assert(gSz
>= 0);
6235 if (gSz
== 0) break;
6236 n
= gSz
<= 8 ? gSz
: 8;
6237 /* update 'curr' with UifU of the state slice
6239 tySrc
= szToITy( n
);
6241 /* Observe the guard expression. If it is false use an
6242 all-bits-defined bit pattern */
6243 IRAtom
*cond
, *iffalse
, *iftrue
;
6245 cond
= assignNew('V', mce
, Ity_I1
, d
->guard
);
6246 iftrue
= assignNew('V', mce
, tySrc
, shadow_GET(mce
, gOff
, tySrc
));
6247 iffalse
= assignNew('V', mce
, tySrc
, definedOfType(tySrc
));
6248 src
= assignNew('V', mce
, tySrc
,
6249 IRExpr_ITE(cond
, iftrue
, iffalse
));
6251 here
= mkPCastTo( mce
, Ity_I32
, src
);
6252 curr
= mkUifU32(mce
, here
, curr
);
6259 /* Inputs: memory. First set up some info needed regardless of
6260 whether we're doing reads or writes. */
6262 if (d
->mFx
!= Ifx_None
) {
6263 /* Because we may do multiple shadow loads/stores from the same
6264 base address, it's best to do a single test of its
6265 definedness right now. Post-instrumentation optimisation
6266 should remove all but this test. */
6268 tl_assert(d
->mAddr
);
6269 complainIfUndefined(mce
, d
->mAddr
, d
->guard
);
6271 tyAddr
= typeOfIRExpr(mce
->sb
->tyenv
, d
->mAddr
);
6272 tl_assert(tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
6273 tl_assert(tyAddr
== mce
->hWordTy
); /* not really right */
6276 /* Deal with memory inputs (reads or modifies) */
6277 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
) {
6279 /* chew off 32-bit chunks. We don't care about the endianness
6280 since it's all going to be condensed down to a single bit,
6281 but nevertheless choose an endianness which is hopefully
6282 native to the platform. */
6286 expr2vbits_Load_guarded_Simple(
6287 mce
, end
, Ity_I32
, d
->mAddr
, d
->mSize
- toDo
, d
->guard
)
6289 curr
= mkUifU32(mce
, here
, curr
);
6292 /* chew off 16-bit chunks */
6296 expr2vbits_Load_guarded_Simple(
6297 mce
, end
, Ity_I16
, d
->mAddr
, d
->mSize
- toDo
, d
->guard
)
6299 curr
= mkUifU32(mce
, here
, curr
);
6302 /* chew off the remaining 8-bit chunk, if any */
6306 expr2vbits_Load_guarded_Simple(
6307 mce
, end
, Ity_I8
, d
->mAddr
, d
->mSize
- toDo
, d
->guard
)
6309 curr
= mkUifU32(mce
, here
, curr
);
6312 tl_assert(toDo
== 0);
6315 /* Whew! So curr is a 32-bit V-value summarising pessimistically
6316 all the inputs to the helper. Now we need to re-distribute the
6317 results to all destinations. */
6319 /* Outputs: the destination temporary, if there is one. */
6320 if (d
->tmp
!= IRTemp_INVALID
) {
6321 dst
= findShadowTmpV(mce
, d
->tmp
);
6322 tyDst
= typeOfIRTemp(mce
->sb
->tyenv
, d
->tmp
);
6323 assign( 'V', mce
, dst
, mkPCastTo( mce
, tyDst
, curr
) );
6326 /* Outputs: guest state that we write or modify. */
6327 for (i
= 0; i
< d
->nFxState
; i
++) {
6328 tl_assert(d
->fxState
[i
].fx
!= Ifx_None
);
6329 if (d
->fxState
[i
].fx
== Ifx_Read
)
6332 /* Enumerate the described state segments */
6333 for (k
= 0; k
< 1 + d
->fxState
[i
].nRepeats
; k
++) {
6334 gOff
= d
->fxState
[i
].offset
+ k
* d
->fxState
[i
].repeatLen
;
6335 gSz
= d
->fxState
[i
].size
;
6337 /* Ignore any sections marked as 'always defined'. */
6338 if (isAlwaysDefd(mce
, gOff
, gSz
))
6341 /* This state element is written or modified. So we need to
6342 consider it. If larger than 8 bytes, deal with it in
6345 tl_assert(gSz
>= 0);
6346 if (gSz
== 0) break;
6347 n
= gSz
<= 8 ? gSz
: 8;
6348 /* Write suitably-casted 'curr' to the state slice
6350 tyDst
= szToITy( n
);
6351 do_shadow_PUT( mce
, gOff
,
6352 NULL
, /* original atom */
6353 mkPCastTo( mce
, tyDst
, curr
), d
->guard
);
6360 /* Outputs: memory that we write or modify. Same comments about
6361 endianness as above apply. */
6362 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
) {
6364 /* chew off 32-bit chunks */
6366 do_shadow_Store( mce
, end
, d
->mAddr
, d
->mSize
- toDo
,
6367 NULL
, /* original data */
6368 mkPCastTo( mce
, Ity_I32
, curr
),
6372 /* chew off 16-bit chunks */
6374 do_shadow_Store( mce
, end
, d
->mAddr
, d
->mSize
- toDo
,
6375 NULL
, /* original data */
6376 mkPCastTo( mce
, Ity_I16
, curr
),
6380 /* chew off the remaining 8-bit chunk, if any */
6382 do_shadow_Store( mce
, end
, d
->mAddr
, d
->mSize
- toDo
,
6383 NULL
, /* original data */
6384 mkPCastTo( mce
, Ity_I8
, curr
),
6388 tl_assert(toDo
== 0);
6394 /* We have an ABI hint telling us that [base .. base+len-1] is to
6395 become undefined ("writable"). Generate code to call a helper to
6396 notify the A/V bit machinery of this fact.
6399 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
6403 void do_AbiHint ( MCEnv
* mce
, IRExpr
* base
, Int len
, IRExpr
* nia
)
6407 if (MC_(clo_mc_level
) == 3) {
6408 di
= unsafeIRDirty_0_N(
6410 "MC_(helperc_MAKE_STACK_UNINIT_w_o)",
6411 VG_(fnptr_to_fnentry
)( &MC_(helperc_MAKE_STACK_UNINIT_w_o
) ),
6412 mkIRExprVec_3( base
, mkIRExpr_HWord( (UInt
)len
), nia
)
6415 /* We ignore the supplied nia, since it is irrelevant. */
6416 tl_assert(MC_(clo_mc_level
) == 2 || MC_(clo_mc_level
) == 1);
6417 /* Special-case the len==128 case, since that is for amd64-ELF,
6418 which is a very common target. */
6420 di
= unsafeIRDirty_0_N(
6422 "MC_(helperc_MAKE_STACK_UNINIT_128_no_o)",
6423 VG_(fnptr_to_fnentry
)( &MC_(helperc_MAKE_STACK_UNINIT_128_no_o
)),
6424 mkIRExprVec_1( base
)
6427 di
= unsafeIRDirty_0_N(
6429 "MC_(helperc_MAKE_STACK_UNINIT_no_o)",
6430 VG_(fnptr_to_fnentry
)( &MC_(helperc_MAKE_STACK_UNINIT_no_o
) ),
6431 mkIRExprVec_2( base
, mkIRExpr_HWord( (UInt
)len
) )
6436 stmt( 'V', mce
, IRStmt_Dirty(di
) );
6440 /* ------ Dealing with IRCAS (big and complex) ------ */
6443 static IRAtom
* gen_load_b ( MCEnv
* mce
, Int szB
,
6444 IRAtom
* baseaddr
, Int offset
);
6445 static IRAtom
* gen_maxU32 ( MCEnv
* mce
, IRAtom
* b1
, IRAtom
* b2
);
6446 static void gen_store_b ( MCEnv
* mce
, Int szB
,
6447 IRAtom
* baseaddr
, Int offset
, IRAtom
* dataB
,
6450 static void do_shadow_CAS_single ( MCEnv
* mce
, IRCAS
* cas
);
6451 static void do_shadow_CAS_double ( MCEnv
* mce
, IRCAS
* cas
);
6454 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
6455 IRExpr.Consts, else this asserts. If they are both Consts, it
6456 doesn't do anything. So that just leaves the RdTmp case.
6458 In which case: this assigns the shadow value SHADOW to the IR
6459 shadow temporary associated with ORIG. That is, ORIG, being an
6460 original temporary, will have a shadow temporary associated with
6461 it. However, in the case envisaged here, there will so far have
6462 been no IR emitted to actually write a shadow value into that
6463 temporary. What this routine does is to (emit IR to) copy the
6464 value in SHADOW into said temporary, so that after this call,
6465 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
6468 Point is to allow callers to compute "by hand" a shadow value for
6469 ORIG, and force it to be associated with ORIG.
6471 How do we know that that shadow associated with ORIG has not so far
6472 been assigned to? Well, we don't per se know that, but supposing
6473 it had. Then this routine would create a second assignment to it,
6474 and later the IR sanity checker would barf. But that never
6477 static void bind_shadow_tmp_to_orig ( UChar how
,
6479 IRAtom
* orig
, IRAtom
* shadow
)
6481 tl_assert(isOriginalAtom(mce
, orig
));
6482 tl_assert(isShadowAtom(mce
, shadow
));
6483 switch (orig
->tag
) {
6485 tl_assert(shadow
->tag
== Iex_Const
);
6488 tl_assert(shadow
->tag
== Iex_RdTmp
);
6490 assign('V', mce
, findShadowTmpV(mce
,orig
->Iex
.RdTmp
.tmp
),
6493 tl_assert(how
== 'B');
6494 assign('B', mce
, findShadowTmpB(mce
,orig
->Iex
.RdTmp
.tmp
),
6505 void do_shadow_CAS ( MCEnv
* mce
, IRCAS
* cas
)
6507 /* Scheme is (both single- and double- cases):
6509 1. fetch data#,dataB (the proposed new value)
6511 2. fetch expd#,expdB (what we expect to see at the address)
6513 3. check definedness of address
6515 4. load old#,oldB from shadow memory; this also checks
6516 addressibility of the address
6520 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
6522 7. if "expected == old" (as computed by (6))
6523 store data#,dataB to shadow memory
6525 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
6526 'data' but 7 stores 'data#'. Hence it is possible for the
6527 shadow data to be incorrectly checked and/or updated:
6529 * 7 is at least gated correctly, since the 'expected == old'
6530 condition is derived from outputs of 5. However, the shadow
6531 write could happen too late: imagine after 5 we are
6532 descheduled, a different thread runs, writes a different
6533 (shadow) value at the address, and then we resume, hence
6534 overwriting the shadow value written by the other thread.
6536 Because the original memory access is atomic, there's no way to
6537 make both the original and shadow accesses into a single atomic
6538 thing, hence this is unavoidable.
6540 At least as Valgrind stands, I don't think it's a problem, since
6541 we're single threaded *and* we guarantee that there are no
6542 context switches during the execution of any specific superblock
6543 -- context switches can only happen at superblock boundaries.
6545 If Valgrind ever becomes MT in the future, then it might be more
6546 of a problem. A possible kludge would be to artificially
6547 associate with the location, a lock, which we must acquire and
6548 release around the transaction as a whole. Hmm, that probably
6549 would't work properly since it only guards us against other
6550 threads doing CASs on the same location, not against other
6551 threads doing normal reads and writes.
6553 ------------------------------------------------------------
6555 COMMENT_ON_CasCmpEQ:
6557 Note two things. Firstly, in the sequence above, we compute
6558 "expected == old", but we don't check definedness of it. Why
6559 not? Also, the x86 and amd64 front ends use
6560 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
6561 determination (expected == old ?) for themselves, and we also
6562 don't check definedness for those primops; we just say that the
6563 result is defined. Why? Details follow.
6565 x86/amd64 contains various forms of locked insns:
6566 * lock prefix before all basic arithmetic insn;
6567 eg lock xorl %reg1,(%reg2)
6568 * atomic exchange reg-mem
6571 Rather than attempt to represent them all, which would be a
6572 royal PITA, I used a result from Maurice Herlihy
6573 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
6574 demonstrates that compare-and-swap is a primitive more general
6575 than the other two, and so can be used to represent all of them.
6576 So the translation scheme for (eg) lock incl (%reg) is as
6582 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
6584 The "atomically" is the CAS bit. The scheme is always the same:
6585 get old value from memory, compute new value, atomically stuff
6586 new value back in memory iff the old value has not changed (iow,
6587 no other thread modified it in the meantime). If it has changed
6588 then we've been out-raced and we have to start over.
6590 Now that's all very neat, but it has the bad side effect of
6591 introducing an explicit equality test into the translation.
6592 Consider the behaviour of said code on a memory location which
6593 is uninitialised. We will wind up doing a comparison on
6594 uninitialised data, and mc duly complains.
6596 What's difficult about this is, the common case is that the
6597 location is uncontended, and so we're usually comparing the same
6598 value (* %reg) with itself. So we shouldn't complain even if it
6599 is undefined. But mc doesn't know that.
6601 My solution is to mark the == in the IR specially, so as to tell
6602 mc that it almost certainly compares a value with itself, and we
6603 should just regard the result as always defined. Rather than
6604 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
6605 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
6607 So there's always the question of, can this give a false
6608 negative? eg, imagine that initially, * %reg is defined; and we
6609 read that; but then in the gap between the read and the CAS, a
6610 different thread writes an undefined (and different) value at
6611 the location. Then the CAS in this thread will fail and we will
6612 go back to "again:", but without knowing that the trip back
6613 there was based on an undefined comparison. No matter; at least
6614 the other thread won the race and the location is correctly
6615 marked as undefined. What if it wrote an uninitialised version
6616 of the same value that was there originally, though?
6618 etc etc. Seems like there's a small corner case in which we
6619 might lose the fact that something's defined -- we're out-raced
6620 in between the "old = * reg" and the "atomically {", _and_ the
6621 other thread is writing in an undefined version of what's
6622 already there. Well, that seems pretty unlikely.
6626 If we ever need to reinstate it .. code which generates a
6627 definedness test for "expected == old" was removed at r10432 of
6630 if (cas
->oldHi
== IRTemp_INVALID
) {
6631 do_shadow_CAS_single( mce
, cas
);
6633 do_shadow_CAS_double( mce
, cas
);
6638 static void do_shadow_CAS_single ( MCEnv
* mce
, IRCAS
* cas
)
6640 IRAtom
*vdataLo
= NULL
, *bdataLo
= NULL
;
6641 IRAtom
*vexpdLo
= NULL
, *bexpdLo
= NULL
;
6642 IRAtom
*voldLo
= NULL
, *boldLo
= NULL
;
6643 IRAtom
*expd_eq_old
= NULL
;
6647 Bool otrak
= MC_(clo_mc_level
) >= 3; /* a shorthand */
6650 tl_assert(cas
->oldHi
== IRTemp_INVALID
);
6651 tl_assert(cas
->expdHi
== NULL
);
6652 tl_assert(cas
->dataHi
== NULL
);
6654 elemTy
= typeOfIRExpr(mce
->sb
->tyenv
, cas
->expdLo
);
6656 case Ity_I8
: elemSzB
= 1; opCasCmpEQ
= Iop_CasCmpEQ8
; break;
6657 case Ity_I16
: elemSzB
= 2; opCasCmpEQ
= Iop_CasCmpEQ16
; break;
6658 case Ity_I32
: elemSzB
= 4; opCasCmpEQ
= Iop_CasCmpEQ32
; break;
6659 case Ity_I64
: elemSzB
= 8; opCasCmpEQ
= Iop_CasCmpEQ64
; break;
6660 default: tl_assert(0); /* IR defn disallows any other types */
6663 /* 1. fetch data# (the proposed new value) */
6664 tl_assert(isOriginalAtom(mce
, cas
->dataLo
));
6666 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->dataLo
, HuOth
));
6667 tl_assert(isShadowAtom(mce
, vdataLo
));
6670 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->dataLo
));
6671 tl_assert(isShadowAtom(mce
, bdataLo
));
6674 /* 2. fetch expected# (what we expect to see at the address) */
6675 tl_assert(isOriginalAtom(mce
, cas
->expdLo
));
6677 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->expdLo
, HuOth
));
6678 tl_assert(isShadowAtom(mce
, vexpdLo
));
6681 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->expdLo
));
6682 tl_assert(isShadowAtom(mce
, bexpdLo
));
6685 /* 3. check definedness of address */
6686 /* 4. fetch old# from shadow memory; this also checks
6687 addressibility of the address */
6693 cas
->end
, elemTy
, cas
->addr
, 0/*Addr bias*/,
6694 NULL
/*always happens*/
6696 bind_shadow_tmp_to_orig('V', mce
, mkexpr(cas
->oldLo
), voldLo
);
6699 = assignNew('B', mce
, Ity_I32
,
6700 gen_load_b(mce
, elemSzB
, cas
->addr
, 0/*addr bias*/));
6701 bind_shadow_tmp_to_orig('B', mce
, mkexpr(cas
->oldLo
), boldLo
);
6704 /* 5. the CAS itself */
6705 stmt( 'C', mce
, IRStmt_CAS(cas
) );
6707 /* 6. compute "expected == old" */
6708 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6709 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6710 tree, but it's not copied from the input block. */
6712 = assignNew('C', mce
, Ity_I1
,
6713 binop(opCasCmpEQ
, cas
->expdLo
, mkexpr(cas
->oldLo
)));
6715 /* 7. if "expected == old"
6716 store data# to shadow memory */
6717 do_shadow_Store( mce
, cas
->end
, cas
->addr
, 0/*bias*/,
6718 NULL
/*data*/, vdataLo
/*vdata*/,
6719 expd_eq_old
/*guard for store*/ );
6721 gen_store_b( mce
, elemSzB
, cas
->addr
, 0/*offset*/,
6723 expd_eq_old
/*guard for store*/ );
6728 static void do_shadow_CAS_double ( MCEnv
* mce
, IRCAS
* cas
)
6730 IRAtom
*vdataHi
= NULL
, *bdataHi
= NULL
;
6731 IRAtom
*vdataLo
= NULL
, *bdataLo
= NULL
;
6732 IRAtom
*vexpdHi
= NULL
, *bexpdHi
= NULL
;
6733 IRAtom
*vexpdLo
= NULL
, *bexpdLo
= NULL
;
6734 IRAtom
*voldHi
= NULL
, *boldHi
= NULL
;
6735 IRAtom
*voldLo
= NULL
, *boldLo
= NULL
;
6736 IRAtom
*xHi
= NULL
, *xLo
= NULL
, *xHL
= NULL
;
6737 IRAtom
*expd_eq_old
= NULL
, *zero
= NULL
;
6738 IROp opCasCmpEQ
, opOr
, opXor
;
6739 Int elemSzB
, memOffsLo
, memOffsHi
;
6741 Bool otrak
= MC_(clo_mc_level
) >= 3; /* a shorthand */
6744 tl_assert(cas
->oldHi
!= IRTemp_INVALID
);
6745 tl_assert(cas
->expdHi
!= NULL
);
6746 tl_assert(cas
->dataHi
!= NULL
);
6748 elemTy
= typeOfIRExpr(mce
->sb
->tyenv
, cas
->expdLo
);
6751 opCasCmpEQ
= Iop_CasCmpEQ8
; opOr
= Iop_Or8
; opXor
= Iop_Xor8
;
6752 elemSzB
= 1; zero
= mkU8(0);
6755 opCasCmpEQ
= Iop_CasCmpEQ16
; opOr
= Iop_Or16
; opXor
= Iop_Xor16
;
6756 elemSzB
= 2; zero
= mkU16(0);
6759 opCasCmpEQ
= Iop_CasCmpEQ32
; opOr
= Iop_Or32
; opXor
= Iop_Xor32
;
6760 elemSzB
= 4; zero
= mkU32(0);
6763 opCasCmpEQ
= Iop_CasCmpEQ64
; opOr
= Iop_Or64
; opXor
= Iop_Xor64
;
6764 elemSzB
= 8; zero
= mkU64(0);
6767 tl_assert(0); /* IR defn disallows any other types */
6770 /* 1. fetch data# (the proposed new value) */
6771 tl_assert(isOriginalAtom(mce
, cas
->dataHi
));
6772 tl_assert(isOriginalAtom(mce
, cas
->dataLo
));
6774 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->dataHi
, HuOth
));
6776 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->dataLo
, HuOth
));
6777 tl_assert(isShadowAtom(mce
, vdataHi
));
6778 tl_assert(isShadowAtom(mce
, vdataLo
));
6781 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->dataHi
));
6783 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->dataLo
));
6784 tl_assert(isShadowAtom(mce
, bdataHi
));
6785 tl_assert(isShadowAtom(mce
, bdataLo
));
6788 /* 2. fetch expected# (what we expect to see at the address) */
6789 tl_assert(isOriginalAtom(mce
, cas
->expdHi
));
6790 tl_assert(isOriginalAtom(mce
, cas
->expdLo
));
6792 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->expdHi
, HuOth
));
6794 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->expdLo
, HuOth
));
6795 tl_assert(isShadowAtom(mce
, vexpdHi
));
6796 tl_assert(isShadowAtom(mce
, vexpdLo
));
6799 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->expdHi
));
6801 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->expdLo
));
6802 tl_assert(isShadowAtom(mce
, bexpdHi
));
6803 tl_assert(isShadowAtom(mce
, bexpdLo
));
6806 /* 3. check definedness of address */
6807 /* 4. fetch old# from shadow memory; this also checks
6808 addressibility of the address */
6809 if (cas
->end
== Iend_LE
) {
6811 memOffsHi
= elemSzB
;
6813 tl_assert(cas
->end
== Iend_BE
);
6814 memOffsLo
= elemSzB
;
6822 cas
->end
, elemTy
, cas
->addr
, memOffsHi
/*Addr bias*/,
6823 NULL
/*always happens*/
6830 cas
->end
, elemTy
, cas
->addr
, memOffsLo
/*Addr bias*/,
6831 NULL
/*always happens*/
6833 bind_shadow_tmp_to_orig('V', mce
, mkexpr(cas
->oldHi
), voldHi
);
6834 bind_shadow_tmp_to_orig('V', mce
, mkexpr(cas
->oldLo
), voldLo
);
6837 = assignNew('B', mce
, Ity_I32
,
6838 gen_load_b(mce
, elemSzB
, cas
->addr
,
6839 memOffsHi
/*addr bias*/));
6841 = assignNew('B', mce
, Ity_I32
,
6842 gen_load_b(mce
, elemSzB
, cas
->addr
,
6843 memOffsLo
/*addr bias*/));
6844 bind_shadow_tmp_to_orig('B', mce
, mkexpr(cas
->oldHi
), boldHi
);
6845 bind_shadow_tmp_to_orig('B', mce
, mkexpr(cas
->oldLo
), boldLo
);
6848 /* 5. the CAS itself */
6849 stmt( 'C', mce
, IRStmt_CAS(cas
) );
6851 /* 6. compute "expected == old" */
6852 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6853 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6854 tree, but it's not copied from the input block. */
6856 xHi = oldHi ^ expdHi;
6857 xLo = oldLo ^ expdLo;
6859 expd_eq_old = xHL == 0;
6861 xHi
= assignNew('C', mce
, elemTy
,
6862 binop(opXor
, cas
->expdHi
, mkexpr(cas
->oldHi
)));
6863 xLo
= assignNew('C', mce
, elemTy
,
6864 binop(opXor
, cas
->expdLo
, mkexpr(cas
->oldLo
)));
6865 xHL
= assignNew('C', mce
, elemTy
,
6866 binop(opOr
, xHi
, xLo
));
6868 = assignNew('C', mce
, Ity_I1
,
6869 binop(opCasCmpEQ
, xHL
, zero
));
6871 /* 7. if "expected == old"
6872 store data# to shadow memory */
6873 do_shadow_Store( mce
, cas
->end
, cas
->addr
, memOffsHi
/*bias*/,
6874 NULL
/*data*/, vdataHi
/*vdata*/,
6875 expd_eq_old
/*guard for store*/ );
6876 do_shadow_Store( mce
, cas
->end
, cas
->addr
, memOffsLo
/*bias*/,
6877 NULL
/*data*/, vdataLo
/*vdata*/,
6878 expd_eq_old
/*guard for store*/ );
6880 gen_store_b( mce
, elemSzB
, cas
->addr
, memOffsHi
/*offset*/,
6882 expd_eq_old
/*guard for store*/ );
6883 gen_store_b( mce
, elemSzB
, cas
->addr
, memOffsLo
/*offset*/,
6885 expd_eq_old
/*guard for store*/ );
6890 /* ------ Dealing with LL/SC (not difficult) ------ */
6892 static void do_shadow_LLSC ( MCEnv
* mce
,
6896 IRExpr
* stStoredata
)
6898 /* In short: treat a load-linked like a normal load followed by an
6899 assignment of the loaded (shadow) data to the result temporary.
6900 Treat a store-conditional like a normal store, and mark the
6901 result temporary as defined. */
6902 IRType resTy
= typeOfIRTemp(mce
->sb
->tyenv
, stResult
);
6903 IRTemp resTmp
= findShadowTmpV(mce
, stResult
);
6905 tl_assert(isIRAtom(stAddr
));
6907 tl_assert(isIRAtom(stStoredata
));
6909 if (stStoredata
== NULL
) {
6911 /* Just treat this as a normal load, followed by an assignment of
6912 the value to .result. */
6914 tl_assert(resTy
== Ity_I128
|| resTy
== Ity_I64
|| resTy
== Ity_I32
6915 || resTy
== Ity_I16
|| resTy
== Ity_I8
);
6916 assign( 'V', mce
, resTmp
,
6918 mce
, stEnd
, resTy
, stAddr
, 0/*addr bias*/,
6919 NULL
/*always happens*/) );
6921 /* Store Conditional */
6923 IRType dataTy
= typeOfIRExpr(mce
->sb
->tyenv
,
6925 tl_assert(dataTy
== Ity_I128
|| dataTy
== Ity_I64
|| dataTy
== Ity_I32
6926 || dataTy
== Ity_I16
|| dataTy
== Ity_I8
);
6927 do_shadow_Store( mce
, stEnd
,
6928 stAddr
, 0/* addr bias */,
6930 NULL
/* shadow data */,
6932 /* This is a store conditional, so it writes to .result a value
6933 indicating whether or not the store succeeded. Just claim
6934 this value is always defined. In the PowerPC interpretation
6935 of store-conditional, definedness of the success indication
6936 depends on whether the address of the store matches the
6937 reservation address. But we can't tell that here (and
6938 anyway, we're not being PowerPC-specific). At least we are
6939 guaranteed that the definedness of the store address, and its
6940 addressibility, will be checked as per normal. So it seems
6941 pretty safe to just say that the success indication is always
6944 In schemeS, for origin tracking, we must correspondingly set
6945 a no-origin value for the origin shadow of .result.
6947 tl_assert(resTy
== Ity_I1
);
6948 assign( 'V', mce
, resTmp
, definedOfType(resTy
) );
6953 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
6955 static void do_shadow_StoreG ( MCEnv
* mce
, IRStoreG
* sg
)
6957 complainIfUndefined(mce
, sg
->guard
, NULL
);
6958 /* do_shadow_Store will generate code to check the definedness and
6959 validity of sg->addr, in the case where sg->guard evaluates to
6960 True at run-time. */
6961 do_shadow_Store( mce
, sg
->end
,
6962 sg
->addr
, 0/* addr bias */,
6964 NULL
/* shadow data */,
6968 static void do_shadow_LoadG ( MCEnv
* mce
, IRLoadG
* lg
)
6970 complainIfUndefined(mce
, lg
->guard
, NULL
);
6971 /* expr2vbits_Load_guarded_General will generate code to check the
6972 definedness and validity of lg->addr, in the case where
6973 lg->guard evaluates to True at run-time. */
6975 /* Look at the LoadG's built-in conversion operation, to determine
6976 the source (actual loaded data) type, and the equivalent IROp.
6977 NOTE that implicitly we are taking a widening operation to be
6978 applied to original atoms and producing one that applies to V
6979 bits. Since signed and unsigned widening are self-shadowing,
6980 this is a straight copy of the op (modulo swapping from the
6981 IRLoadGOp form to the IROp form). Note also therefore that this
6982 implicitly duplicates the logic to do with said widening ops in
6983 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
6984 IROp vwiden
= Iop_INVALID
;
6985 IRType loadedTy
= Ity_INVALID
;
6987 case ILGop_IdentV128
: loadedTy
= Ity_V128
; vwiden
= Iop_INVALID
; break;
6988 case ILGop_Ident64
: loadedTy
= Ity_I64
; vwiden
= Iop_INVALID
; break;
6989 case ILGop_Ident32
: loadedTy
= Ity_I32
; vwiden
= Iop_INVALID
; break;
6990 case ILGop_16Uto32
: loadedTy
= Ity_I16
; vwiden
= Iop_16Uto32
; break;
6991 case ILGop_16Sto32
: loadedTy
= Ity_I16
; vwiden
= Iop_16Sto32
; break;
6992 case ILGop_8Uto32
: loadedTy
= Ity_I8
; vwiden
= Iop_8Uto32
; break;
6993 case ILGop_8Sto32
: loadedTy
= Ity_I8
; vwiden
= Iop_8Sto32
; break;
6994 default: VG_(tool_panic
)("do_shadow_LoadG");
6998 = expr2vbits( mce
, lg
->alt
, HuOth
);
7000 = expr2vbits_Load_guarded_General(mce
, lg
->end
, loadedTy
,
7001 lg
->addr
, 0/*addr bias*/,
7002 lg
->guard
, vwiden
, vbits_alt
);
7003 /* And finally, bind the V bits to the destination temporary. */
7004 assign( 'V', mce
, findShadowTmpV(mce
, lg
->dst
), vbits_final
);
7008 /*------------------------------------------------------------*/
7009 /*--- Origin tracking stuff ---*/
7010 /*------------------------------------------------------------*/
7012 /* Almost identical to findShadowTmpV. */
7013 static IRTemp
findShadowTmpB ( MCEnv
* mce
, IRTemp orig
)
7016 /* VG_(indexXA) range-checks 'orig', hence no need to check
7018 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
7019 tl_assert(ent
->kind
== Orig
);
7020 if (ent
->shadowB
== IRTemp_INVALID
) {
7022 = newTemp( mce
, Ity_I32
, BSh
);
7023 /* newTemp may cause mce->tmpMap to resize, hence previous results
7024 from VG_(indexXA) are invalid. */
7025 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
7026 tl_assert(ent
->kind
== Orig
);
7027 tl_assert(ent
->shadowB
== IRTemp_INVALID
);
7028 ent
->shadowB
= tmpB
;
7030 return ent
->shadowB
;
7033 static IRAtom
* gen_maxU32 ( MCEnv
* mce
, IRAtom
* b1
, IRAtom
* b2
)
7035 return assignNew( 'B', mce
, Ity_I32
, binop(Iop_Max32U
, b1
, b2
) );
7039 /* Make a guarded origin load, with no special handling in the
7040 didn't-happen case. A GUARD of NULL is assumed to mean "always
7043 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
7044 return the otag. The loaded size is SZB. If GUARD evaluates to
7045 False at run time then the returned otag is zero.
7047 static IRAtom
* gen_guarded_load_b ( MCEnv
* mce
, Int szB
,
7049 Int offset
, IRExpr
* guard
)
7055 IRType aTy
= typeOfIRExpr( mce
->sb
->tyenv
, baseaddr
);
7056 IROp opAdd
= aTy
== Ity_I32
? Iop_Add32
: Iop_Add64
;
7057 IRAtom
* ea
= baseaddr
;
7059 IRAtom
* off
= aTy
== Ity_I32
? mkU32( offset
)
7060 : mkU64( (Long
)(Int
)offset
);
7061 ea
= assignNew( 'B', mce
, aTy
, binop(opAdd
, ea
, off
));
7063 bTmp
= newTemp(mce
, mce
->hWordTy
, BSh
);
7066 case 1: hFun
= (void*)&MC_(helperc_b_load1
);
7067 hName
= "MC_(helperc_b_load1)";
7069 case 2: hFun
= (void*)&MC_(helperc_b_load2
);
7070 hName
= "MC_(helperc_b_load2)";
7072 case 4: hFun
= (void*)&MC_(helperc_b_load4
);
7073 hName
= "MC_(helperc_b_load4)";
7075 case 8: hFun
= (void*)&MC_(helperc_b_load8
);
7076 hName
= "MC_(helperc_b_load8)";
7078 case 16: hFun
= (void*)&MC_(helperc_b_load16
);
7079 hName
= "MC_(helperc_b_load16)";
7081 case 32: hFun
= (void*)&MC_(helperc_b_load32
);
7082 hName
= "MC_(helperc_b_load32)";
7085 VG_(printf
)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB
);
7088 di
= unsafeIRDirty_1_N(
7089 bTmp
, 1/*regparms*/, hName
, VG_(fnptr_to_fnentry
)( hFun
),
7094 /* Ideally the didn't-happen return value here would be
7095 all-zeroes (unknown-origin), so it'd be harmless if it got
7096 used inadvertently. We slum it out with the IR-mandated
7097 default value (0b01 repeating, 0x55 etc) as that'll probably
7098 trump all legitimate otags via Max32, and it's pretty
7101 /* no need to mess with any annotations. This call accesses
7102 neither guest state nor guest memory. */
7103 stmt( 'B', mce
, IRStmt_Dirty(di
) );
7104 if (mce
->hWordTy
== Ity_I64
) {
7106 IRTemp bTmp32
= newTemp(mce
, Ity_I32
, BSh
);
7107 assign( 'B', mce
, bTmp32
, unop(Iop_64to32
, mkexpr(bTmp
)) );
7108 return mkexpr(bTmp32
);
7111 return mkexpr(bTmp
);
7116 /* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
7117 loaded size is SZB. The load is regarded as unconditional (always
7120 static IRAtom
* gen_load_b ( MCEnv
* mce
, Int szB
, IRAtom
* baseaddr
,
7123 return gen_guarded_load_b(mce
, szB
, baseaddr
, offset
, NULL
/*guard*/);
7127 /* The most general handler for guarded origin loads. A GUARD of NULL
7128 is assumed to mean "always True".
7130 Generate IR to do a shadow origin load from ADDR+BIAS and return
7131 the B bits. The loaded type is TY. If GUARD evaluates to False at
7132 run time then the returned B bits are simply BALT instead.
7135 IRAtom
* expr2ori_Load_guarded_General ( MCEnv
* mce
,
7137 IRAtom
* addr
, UInt bias
,
7138 IRAtom
* guard
, IRAtom
* balt
)
7140 /* If the guard evaluates to True, this will hold the loaded
7141 origin. If the guard evaluates to False, this will be zero,
7142 meaning "unknown origin", in which case we will have to replace
7143 it using an ITE below. */
7145 = assignNew('B', mce
, Ity_I32
,
7146 gen_guarded_load_b(mce
, sizeofIRType(ty
),
7147 addr
, bias
, guard
));
7148 /* These are the bits we will return if the load doesn't take
7152 /* Prepare the cond for the ITE. Convert a NULL cond into
7153 something that iropt knows how to fold out later. */
7155 = guard
== NULL
? mkU1(1) : guard
;
7156 /* And assemble the final result. */
7157 return assignNew('B', mce
, Ity_I32
, IRExpr_ITE(cond
, iftrue
, iffalse
));
7161 /* Generate a shadow origins store. guard :: Ity_I1 controls whether
7162 the store really happens; NULL means it unconditionally does. */
7163 static void gen_store_b ( MCEnv
* mce
, Int szB
,
7164 IRAtom
* baseaddr
, Int offset
, IRAtom
* dataB
,
7170 IRType aTy
= typeOfIRExpr( mce
->sb
->tyenv
, baseaddr
);
7171 IROp opAdd
= aTy
== Ity_I32
? Iop_Add32
: Iop_Add64
;
7172 IRAtom
* ea
= baseaddr
;
7174 tl_assert(isOriginalAtom(mce
, guard
));
7175 tl_assert(typeOfIRExpr(mce
->sb
->tyenv
, guard
) == Ity_I1
);
7178 IRAtom
* off
= aTy
== Ity_I32
? mkU32( offset
)
7179 : mkU64( (Long
)(Int
)offset
);
7180 ea
= assignNew( 'B', mce
, aTy
, binop(opAdd
, ea
, off
));
7182 if (mce
->hWordTy
== Ity_I64
)
7183 dataB
= assignNew( 'B', mce
, Ity_I64
, unop(Iop_32Uto64
, dataB
));
7186 case 1: hFun
= (void*)&MC_(helperc_b_store1
);
7187 hName
= "MC_(helperc_b_store1)";
7189 case 2: hFun
= (void*)&MC_(helperc_b_store2
);
7190 hName
= "MC_(helperc_b_store2)";
7192 case 4: hFun
= (void*)&MC_(helperc_b_store4
);
7193 hName
= "MC_(helperc_b_store4)";
7195 case 8: hFun
= (void*)&MC_(helperc_b_store8
);
7196 hName
= "MC_(helperc_b_store8)";
7198 case 16: hFun
= (void*)&MC_(helperc_b_store16
);
7199 hName
= "MC_(helperc_b_store16)";
7201 case 32: hFun
= (void*)&MC_(helperc_b_store32
);
7202 hName
= "MC_(helperc_b_store32)";
7207 di
= unsafeIRDirty_0_N( 2/*regparms*/,
7208 hName
, VG_(fnptr_to_fnentry
)( hFun
),
7209 mkIRExprVec_2( ea
, dataB
)
7211 /* no need to mess with any annotations. This call accesses
7212 neither guest state nor guest memory. */
7213 if (guard
) di
->guard
= guard
;
7214 stmt( 'B', mce
, IRStmt_Dirty(di
) );
7217 static IRAtom
* narrowTo32 ( MCEnv
* mce
, IRAtom
* e
) {
7218 IRType eTy
= typeOfIRExpr(mce
->sb
->tyenv
, e
);
7220 return assignNew( 'B', mce
, Ity_I32
, unop(Iop_64to32
, e
) );
7226 static IRAtom
* zWidenFrom32 ( MCEnv
* mce
, IRType dstTy
, IRAtom
* e
) {
7227 IRType eTy
= typeOfIRExpr(mce
->sb
->tyenv
, e
);
7228 tl_assert(eTy
== Ity_I32
);
7229 if (dstTy
== Ity_I64
)
7230 return assignNew( 'B', mce
, Ity_I64
, unop(Iop_32Uto64
, e
) );
7235 static IRAtom
* schemeE ( MCEnv
* mce
, IRExpr
* e
)
7237 tl_assert(MC_(clo_mc_level
) == 3);
7242 IRRegArray
* descr_b
;
7243 IRAtom
*t1
, *t2
, *t3
, *t4
;
7244 IRRegArray
* descr
= e
->Iex
.GetI
.descr
;
7246 = MC_(get_otrack_reg_array_equiv_int_type
)(descr
);
7247 /* If this array is unshadowable for whatever reason, use the
7248 usual approximation. */
7249 if (equivIntTy
== Ity_INVALID
)
7251 tl_assert(sizeofIRType(equivIntTy
) >= 4);
7252 tl_assert(sizeofIRType(equivIntTy
) == sizeofIRType(descr
->elemTy
));
7253 descr_b
= mkIRRegArray( descr
->base
+ 2*mce
->layout
->total_sizeB
,
7254 equivIntTy
, descr
->nElems
);
7255 /* Do a shadow indexed get of the same size, giving t1. Take
7256 the bottom 32 bits of it, giving t2. Compute into t3 the
7257 origin for the index (almost certainly zero, but there's
7258 no harm in being completely general here, since iropt will
7259 remove any useless code), and fold it in, giving a final
7261 t1
= assignNew( 'B', mce
, equivIntTy
,
7262 IRExpr_GetI( descr_b
, e
->Iex
.GetI
.ix
,
7263 e
->Iex
.GetI
.bias
));
7264 t2
= narrowTo32( mce
, t1
);
7265 t3
= schemeE( mce
, e
->Iex
.GetI
.ix
);
7266 t4
= gen_maxU32( mce
, t2
, t3
);
7272 IRExpr
** args
= e
->Iex
.CCall
.args
;
7273 IRAtom
* curr
= mkU32(0);
7274 for (i
= 0; args
[i
]; i
++) {
7276 tl_assert(isOriginalAtom(mce
, args
[i
]));
7277 /* Only take notice of this arg if the callee's
7278 mc-exclusion mask does not say it is to be excluded. */
7279 if (e
->Iex
.CCall
.cee
->mcx_mask
& (1<<i
)) {
7280 /* the arg is to be excluded from definedness checking.
7282 if (0) VG_(printf
)("excluding %s(%d)\n",
7283 e
->Iex
.CCall
.cee
->name
, i
);
7285 /* calculate the arg's definedness, and pessimistically
7287 here
= schemeE( mce
, args
[i
] );
7288 curr
= gen_maxU32( mce
, curr
, here
);
7295 dszB
= sizeofIRType(e
->Iex
.Load
.ty
);
7296 /* assert that the B value for the address is already
7297 available (somewhere) */
7298 tl_assert(isIRAtom(e
->Iex
.Load
.addr
));
7299 tl_assert(mce
->hWordTy
== Ity_I32
|| mce
->hWordTy
== Ity_I64
);
7300 return gen_load_b( mce
, dszB
, e
->Iex
.Load
.addr
, 0 );
7303 IRAtom
* b1
= schemeE( mce
, e
->Iex
.ITE
.cond
);
7304 IRAtom
* b3
= schemeE( mce
, e
->Iex
.ITE
.iftrue
);
7305 IRAtom
* b2
= schemeE( mce
, e
->Iex
.ITE
.iffalse
);
7306 return gen_maxU32( mce
, b1
, gen_maxU32( mce
, b2
, b3
));
7309 IRAtom
* b1
= schemeE( mce
, e
->Iex
.Qop
.details
->arg1
);
7310 IRAtom
* b2
= schemeE( mce
, e
->Iex
.Qop
.details
->arg2
);
7311 IRAtom
* b3
= schemeE( mce
, e
->Iex
.Qop
.details
->arg3
);
7312 IRAtom
* b4
= schemeE( mce
, e
->Iex
.Qop
.details
->arg4
);
7313 return gen_maxU32( mce
, gen_maxU32( mce
, b1
, b2
),
7314 gen_maxU32( mce
, b3
, b4
) );
7317 IRAtom
* b1
= schemeE( mce
, e
->Iex
.Triop
.details
->arg1
);
7318 IRAtom
* b2
= schemeE( mce
, e
->Iex
.Triop
.details
->arg2
);
7319 IRAtom
* b3
= schemeE( mce
, e
->Iex
.Triop
.details
->arg3
);
7320 return gen_maxU32( mce
, b1
, gen_maxU32( mce
, b2
, b3
) );
7323 switch (e
->Iex
.Binop
.op
) {
7324 case Iop_CasCmpEQ8
: case Iop_CasCmpNE8
:
7325 case Iop_CasCmpEQ16
: case Iop_CasCmpNE16
:
7326 case Iop_CasCmpEQ32
: case Iop_CasCmpNE32
:
7327 case Iop_CasCmpEQ64
: case Iop_CasCmpNE64
:
7328 /* Just say these all produce a defined result,
7329 regardless of their arguments. See
7330 COMMENT_ON_CasCmpEQ in this file. */
7333 IRAtom
* b1
= schemeE( mce
, e
->Iex
.Binop
.arg1
);
7334 IRAtom
* b2
= schemeE( mce
, e
->Iex
.Binop
.arg2
);
7335 return gen_maxU32( mce
, b1
, b2
);
7342 IRAtom
* b1
= schemeE( mce
, e
->Iex
.Unop
.arg
);
7348 return mkexpr( findShadowTmpB( mce
, e
->Iex
.RdTmp
.tmp
));
7350 Int b_offset
= MC_(get_otrack_shadow_offset
)(
7352 sizeofIRType(e
->Iex
.Get
.ty
)
7354 tl_assert(b_offset
>= -1
7355 && b_offset
<= mce
->layout
->total_sizeB
-4);
7356 if (b_offset
>= 0) {
7357 /* FIXME: this isn't an atom! */
7358 return IRExpr_Get( b_offset
+ 2*mce
->layout
->total_sizeB
,
7364 VG_(printf
)("mc_translate.c: schemeE: unhandled: ");
7366 VG_(tool_panic
)("memcheck:schemeE");
7371 static void do_origins_Dirty ( MCEnv
* mce
, IRDirty
* d
)
7373 // This is a hacked version of do_shadow_Dirty
7374 Int i
, k
, n
, toDo
, gSz
, gOff
;
7375 IRAtom
*here
, *curr
;
7378 /* First check the guard. */
7379 curr
= schemeE( mce
, d
->guard
);
7381 /* Now round up all inputs and maxU32 over them. */
7383 /* Inputs: unmasked args
7384 Note: arguments are evaluated REGARDLESS of the guard expression */
7385 for (i
= 0; d
->args
[i
]; i
++) {
7386 IRAtom
* arg
= d
->args
[i
];
7387 if ( (d
->cee
->mcx_mask
& (1<<i
))
7388 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg
)) ) {
7389 /* ignore this arg */
7391 here
= schemeE( mce
, arg
);
7392 curr
= gen_maxU32( mce
, curr
, here
);
7396 /* Inputs: guest state that we read. */
7397 for (i
= 0; i
< d
->nFxState
; i
++) {
7398 tl_assert(d
->fxState
[i
].fx
!= Ifx_None
);
7399 if (d
->fxState
[i
].fx
== Ifx_Write
)
7402 /* Enumerate the described state segments */
7403 for (k
= 0; k
< 1 + d
->fxState
[i
].nRepeats
; k
++) {
7404 gOff
= d
->fxState
[i
].offset
+ k
* d
->fxState
[i
].repeatLen
;
7405 gSz
= d
->fxState
[i
].size
;
7407 /* Ignore any sections marked as 'always defined'. */
7408 if (isAlwaysDefd(mce
, gOff
, gSz
)) {
7410 VG_(printf
)("memcheck: Dirty gst: ignored off %d, sz %d\n",
7415 /* This state element is read or modified. So we need to
7416 consider it. If larger than 4 bytes, deal with it in
7420 tl_assert(gSz
>= 0);
7421 if (gSz
== 0) break;
7422 n
= gSz
<= 4 ? gSz
: 4;
7423 /* update 'curr' with maxU32 of the state slice
7425 b_offset
= MC_(get_otrack_shadow_offset
)(gOff
, 4);
7426 if (b_offset
!= -1) {
7427 /* Observe the guard expression. If it is false use 0, i.e.
7428 nothing is known about the origin */
7429 IRAtom
*cond
, *iffalse
, *iftrue
;
7431 cond
= assignNew( 'B', mce
, Ity_I1
, d
->guard
);
7433 iftrue
= assignNew( 'B', mce
, Ity_I32
,
7435 + 2*mce
->layout
->total_sizeB
,
7437 here
= assignNew( 'B', mce
, Ity_I32
,
7438 IRExpr_ITE(cond
, iftrue
, iffalse
));
7439 curr
= gen_maxU32( mce
, curr
, here
);
7447 /* Inputs: memory */
7449 if (d
->mFx
!= Ifx_None
) {
7450 /* Because we may do multiple shadow loads/stores from the same
7451 base address, it's best to do a single test of its
7452 definedness right now. Post-instrumentation optimisation
7453 should remove all but this test. */
7454 tl_assert(d
->mAddr
);
7455 here
= schemeE( mce
, d
->mAddr
);
7456 curr
= gen_maxU32( mce
, curr
, here
);
7459 /* Deal with memory inputs (reads or modifies) */
7460 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
) {
7462 /* chew off 32-bit chunks. We don't care about the endianness
7463 since it's all going to be condensed down to a single bit,
7464 but nevertheless choose an endianness which is hopefully
7465 native to the platform. */
7467 here
= gen_guarded_load_b( mce
, 4, d
->mAddr
, d
->mSize
- toDo
,
7469 curr
= gen_maxU32( mce
, curr
, here
);
7472 /* handle possible 16-bit excess */
7474 here
= gen_guarded_load_b( mce
, 2, d
->mAddr
, d
->mSize
- toDo
,
7476 curr
= gen_maxU32( mce
, curr
, here
);
7479 /* chew off the remaining 8-bit chunk, if any */
7481 here
= gen_guarded_load_b( mce
, 1, d
->mAddr
, d
->mSize
- toDo
,
7483 curr
= gen_maxU32( mce
, curr
, here
);
7486 tl_assert(toDo
== 0);
7489 /* Whew! So curr is a 32-bit B-value which should give an origin
7490 of some use if any of the inputs to the helper are undefined.
7491 Now we need to re-distribute the results to all destinations. */
7493 /* Outputs: the destination temporary, if there is one. */
7494 if (d
->tmp
!= IRTemp_INVALID
) {
7495 dst
= findShadowTmpB(mce
, d
->tmp
);
7496 assign( 'V', mce
, dst
, curr
);
7499 /* Outputs: guest state that we write or modify. */
7500 for (i
= 0; i
< d
->nFxState
; i
++) {
7501 tl_assert(d
->fxState
[i
].fx
!= Ifx_None
);
7502 if (d
->fxState
[i
].fx
== Ifx_Read
)
7505 /* Enumerate the described state segments */
7506 for (k
= 0; k
< 1 + d
->fxState
[i
].nRepeats
; k
++) {
7507 gOff
= d
->fxState
[i
].offset
+ k
* d
->fxState
[i
].repeatLen
;
7508 gSz
= d
->fxState
[i
].size
;
7510 /* Ignore any sections marked as 'always defined'. */
7511 if (isAlwaysDefd(mce
, gOff
, gSz
))
7514 /* This state element is written or modified. So we need to
7515 consider it. If larger than 4 bytes, deal with it in
7519 tl_assert(gSz
>= 0);
7520 if (gSz
== 0) break;
7521 n
= gSz
<= 4 ? gSz
: 4;
7522 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
7523 b_offset
= MC_(get_otrack_shadow_offset
)(gOff
, 4);
7524 if (b_offset
!= -1) {
7526 /* If the guard expression evaluates to false we simply Put
7527 the value that is already stored in the guest state slot */
7528 IRAtom
*cond
, *iffalse
;
7530 cond
= assignNew('B', mce
, Ity_I1
,
7532 iffalse
= assignNew('B', mce
, Ity_I32
,
7533 IRExpr_Get(b_offset
+
7534 2*mce
->layout
->total_sizeB
,
7536 curr
= assignNew('V', mce
, Ity_I32
,
7537 IRExpr_ITE(cond
, curr
, iffalse
));
7539 stmt( 'B', mce
, IRStmt_Put(b_offset
7540 + 2*mce
->layout
->total_sizeB
,
7549 /* Outputs: memory that we write or modify. Same comments about
7550 endianness as above apply. */
7551 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
) {
7553 /* chew off 32-bit chunks */
7555 gen_store_b( mce
, 4, d
->mAddr
, d
->mSize
- toDo
, curr
,
7559 /* handle possible 16-bit excess */
7561 gen_store_b( mce
, 2, d
->mAddr
, d
->mSize
- toDo
, curr
,
7565 /* chew off the remaining 8-bit chunk, if any */
7567 gen_store_b( mce
, 1, d
->mAddr
, d
->mSize
- toDo
, curr
,
7571 tl_assert(toDo
== 0);
7576 /* Generate IR for origin shadowing for a general guarded store. */
7577 static void do_origins_Store_guarded ( MCEnv
* mce
,
7585 /* assert that the B value for the address is already available
7586 (somewhere), since the call to schemeE will want to see it.
7587 XXXX how does this actually ensure that?? */
7588 tl_assert(isIRAtom(stAddr
));
7589 tl_assert(isIRAtom(stData
));
7590 dszB
= sizeofIRType( typeOfIRExpr(mce
->sb
->tyenv
, stData
) );
7591 dataB
= schemeE( mce
, stData
);
7592 gen_store_b( mce
, dszB
, stAddr
, 0/*offset*/, dataB
, guard
);
7596 /* Generate IR for origin shadowing for a plain store. */
7597 static void do_origins_Store_plain ( MCEnv
* mce
,
7602 do_origins_Store_guarded ( mce
, stEnd
, stAddr
, stData
,
7607 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
7609 static void do_origins_StoreG ( MCEnv
* mce
, IRStoreG
* sg
)
7611 do_origins_Store_guarded( mce
, sg
->end
, sg
->addr
,
7612 sg
->data
, sg
->guard
);
7615 static void do_origins_LoadG ( MCEnv
* mce
, IRLoadG
* lg
)
7617 IRType loadedTy
= Ity_INVALID
;
7619 case ILGop_IdentV128
: loadedTy
= Ity_V128
; break;
7620 case ILGop_Ident64
: loadedTy
= Ity_I64
; break;
7621 case ILGop_Ident32
: loadedTy
= Ity_I32
; break;
7622 case ILGop_16Uto32
: loadedTy
= Ity_I16
; break;
7623 case ILGop_16Sto32
: loadedTy
= Ity_I16
; break;
7624 case ILGop_8Uto32
: loadedTy
= Ity_I8
; break;
7625 case ILGop_8Sto32
: loadedTy
= Ity_I8
; break;
7626 default: VG_(tool_panic
)("schemeS.IRLoadG");
7629 = schemeE( mce
,lg
->alt
);
7631 = expr2ori_Load_guarded_General(mce
, loadedTy
,
7632 lg
->addr
, 0/*addr bias*/,
7633 lg
->guard
, ori_alt
);
7634 /* And finally, bind the origin to the destination temporary. */
7635 assign( 'B', mce
, findShadowTmpB(mce
, lg
->dst
), ori_final
);
7639 static void schemeS ( MCEnv
* mce
, IRStmt
* st
)
7641 tl_assert(MC_(clo_mc_level
) == 3);
7646 /* The value-check instrumenter handles this - by arranging
7647 to pass the address of the next instruction to
7648 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
7649 happen for origin tracking w.r.t. AbiHints. So there is
7650 nothing to do here. */
7654 IRPutI
*puti
= st
->Ist
.PutI
.details
;
7655 IRRegArray
* descr_b
;
7656 IRAtom
*t1
, *t2
, *t3
, *t4
;
7657 IRRegArray
* descr
= puti
->descr
;
7659 = MC_(get_otrack_reg_array_equiv_int_type
)(descr
);
7660 /* If this array is unshadowable for whatever reason,
7661 generate no code. */
7662 if (equivIntTy
== Ity_INVALID
)
7664 tl_assert(sizeofIRType(equivIntTy
) >= 4);
7665 tl_assert(sizeofIRType(equivIntTy
) == sizeofIRType(descr
->elemTy
));
7667 = mkIRRegArray( descr
->base
+ 2*mce
->layout
->total_sizeB
,
7668 equivIntTy
, descr
->nElems
);
7669 /* Compute a value to Put - the conjoinment of the origin for
7670 the data to be Put-ted (obviously) and of the index value
7671 (not so obviously). */
7672 t1
= schemeE( mce
, puti
->data
);
7673 t2
= schemeE( mce
, puti
->ix
);
7674 t3
= gen_maxU32( mce
, t1
, t2
);
7675 t4
= zWidenFrom32( mce
, equivIntTy
, t3
);
7676 stmt( 'B', mce
, IRStmt_PutI( mkIRPutI(descr_b
, puti
->ix
,
7682 do_origins_Dirty( mce
, st
->Ist
.Dirty
.details
);
7686 do_origins_Store_plain( mce
, st
->Ist
.Store
.end
,
7688 st
->Ist
.Store
.data
);
7692 do_origins_StoreG( mce
, st
->Ist
.StoreG
.details
);
7696 do_origins_LoadG( mce
, st
->Ist
.LoadG
.details
);
7700 /* In short: treat a load-linked like a normal load followed
7701 by an assignment of the loaded (shadow) data the result
7702 temporary. Treat a store-conditional like a normal store,
7703 and mark the result temporary as defined. */
7704 if (st
->Ist
.LLSC
.storedata
== NULL
) {
7707 = typeOfIRTemp(mce
->sb
->tyenv
, st
->Ist
.LLSC
.result
);
7709 = IRExpr_Load(st
->Ist
.LLSC
.end
, resTy
, st
->Ist
.LLSC
.addr
);
7710 tl_assert(resTy
== Ity_I128
|| resTy
== Ity_I64
|| resTy
== Ity_I32
7711 || resTy
== Ity_I16
|| resTy
== Ity_I8
);
7712 assign( 'B', mce
, findShadowTmpB(mce
, st
->Ist
.LLSC
.result
),
7713 schemeE(mce
, vanillaLoad
));
7715 /* Store conditional */
7716 do_origins_Store_plain( mce
, st
->Ist
.LLSC
.end
,
7718 st
->Ist
.LLSC
.storedata
);
7719 /* For the rationale behind this, see comments at the
7720 place where the V-shadow for .result is constructed, in
7721 do_shadow_LLSC. In short, we regard .result as
7723 assign( 'B', mce
, findShadowTmpB(mce
, st
->Ist
.LLSC
.result
),
7731 = MC_(get_otrack_shadow_offset
)(
7733 sizeofIRType(typeOfIRExpr(mce
->sb
->tyenv
, st
->Ist
.Put
.data
))
7735 if (b_offset
>= 0) {
7736 /* FIXME: this isn't an atom! */
7737 stmt( 'B', mce
, IRStmt_Put(b_offset
+ 2*mce
->layout
->total_sizeB
,
7738 schemeE( mce
, st
->Ist
.Put
.data
)) );
7744 assign( 'B', mce
, findShadowTmpB(mce
, st
->Ist
.WrTmp
.tmp
),
7745 schemeE(mce
, st
->Ist
.WrTmp
.data
) );
7755 VG_(printf
)("mc_translate.c: schemeS: unhandled: ");
7757 VG_(tool_panic
)("memcheck:schemeS");
7762 /*------------------------------------------------------------*/
7763 /*--- Post-tree-build final tidying ---*/
7764 /*------------------------------------------------------------*/
7766 /* This exploits the observation that Memcheck often produces
7767 repeated conditional calls of the form
7769 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
7771 with the same guard expression G guarding the same helper call.
7772 The second and subsequent calls are redundant. This usually
7773 results from instrumentation of guest code containing multiple
7774 memory references at different constant offsets from the same base
7775 register. After optimisation of the instrumentation, you get a
7776 test for the definedness of the base register for each memory
7777 reference, which is kinda pointless. MC_(final_tidy) therefore
7778 looks for such repeated calls and removes all but the first. */
7781 /* With some testing on perf/bz2.c, on amd64 and x86, compiled with
7782 gcc-5.3.1 -O2, it appears that 16 entries in the array are enough to
7783 get almost all the benefits of this transformation whilst causing
7784 the slide-back case to just often enough to be verifiably
7785 correct. For posterity, the numbers are:
7789 1 4,336 (112,212 -> 1,709,473; ratio 15.2)
7790 2 4,336 (112,194 -> 1,669,895; ratio 14.9)
7791 3 4,336 (112,194 -> 1,660,713; ratio 14.8)
7792 4 4,336 (112,194 -> 1,658,555; ratio 14.8)
7793 5 4,336 (112,194 -> 1,655,447; ratio 14.8)
7794 6 4,336 (112,194 -> 1,655,101; ratio 14.8)
7795 7 4,336 (112,194 -> 1,654,858; ratio 14.7)
7796 8 4,336 (112,194 -> 1,654,810; ratio 14.7)
7797 10 4,336 (112,194 -> 1,654,621; ratio 14.7)
7798 12 4,336 (112,194 -> 1,654,678; ratio 14.7)
7799 16 4,336 (112,194 -> 1,654,494; ratio 14.7)
7800 32 4,336 (112,194 -> 1,654,602; ratio 14.7)
7801 inf 4,336 (112,194 -> 1,654,602; ratio 14.7)
7805 1 4,113 (107,329 -> 1,822,171; ratio 17.0)
7806 2 4,113 (107,329 -> 1,806,443; ratio 16.8)
7807 3 4,113 (107,329 -> 1,803,967; ratio 16.8)
7808 4 4,113 (107,329 -> 1,802,785; ratio 16.8)
7809 5 4,113 (107,329 -> 1,802,412; ratio 16.8)
7810 6 4,113 (107,329 -> 1,802,062; ratio 16.8)
7811 7 4,113 (107,329 -> 1,801,976; ratio 16.8)
7812 8 4,113 (107,329 -> 1,801,886; ratio 16.8)
7813 10 4,113 (107,329 -> 1,801,653; ratio 16.8)
7814 12 4,113 (107,329 -> 1,801,526; ratio 16.8)
7815 16 4,113 (107,329 -> 1,801,298; ratio 16.8)
7816 32 4,113 (107,329 -> 1,800,827; ratio 16.8)
7817 inf 4,113 (107,329 -> 1,800,827; ratio 16.8)
7820 /* Structs for recording which (helper, guard) pairs we have already
7823 #define N_TIDYING_PAIRS 16
7826 struct { void* entry
; IRExpr
* guard
; }
7831 Pair pairs
[N_TIDYING_PAIRS
+1/*for bounds checking*/];
7837 /* Return True if e1 and e2 definitely denote the same value (used to
7838 compare guards). Return False if unknown; False is the safe
7839 answer. Since guest registers and guest memory do not have the
7840 SSA property we must return False if any Gets or Loads appear in
7841 the expression. This implicitly assumes that e1 and e2 have the
7842 same IR type, which is always true here -- the type is Ity_I1. */
7844 static Bool
sameIRValue ( IRExpr
* e1
, IRExpr
* e2
)
7846 if (e1
->tag
!= e2
->tag
)
7850 return eqIRConst( e1
->Iex
.Const
.con
, e2
->Iex
.Const
.con
);
7852 return e1
->Iex
.Binop
.op
== e2
->Iex
.Binop
.op
7853 && sameIRValue(e1
->Iex
.Binop
.arg1
, e2
->Iex
.Binop
.arg1
)
7854 && sameIRValue(e1
->Iex
.Binop
.arg2
, e2
->Iex
.Binop
.arg2
);
7856 return e1
->Iex
.Unop
.op
== e2
->Iex
.Unop
.op
7857 && sameIRValue(e1
->Iex
.Unop
.arg
, e2
->Iex
.Unop
.arg
);
7859 return e1
->Iex
.RdTmp
.tmp
== e2
->Iex
.RdTmp
.tmp
;
7861 return sameIRValue( e1
->Iex
.ITE
.cond
, e2
->Iex
.ITE
.cond
)
7862 && sameIRValue( e1
->Iex
.ITE
.iftrue
, e2
->Iex
.ITE
.iftrue
)
7863 && sameIRValue( e1
->Iex
.ITE
.iffalse
, e2
->Iex
.ITE
.iffalse
);
7867 /* be lazy. Could define equality for these, but they never
7868 appear to be used. */
7873 /* be conservative - these may not give the same value each
7877 /* should never see this */
7880 VG_(printf
)("mc_translate.c: sameIRValue: unhandled: ");
7882 VG_(tool_panic
)("memcheck:sameIRValue");
7887 /* See if 'pairs' already has an entry for (entry, guard). Return
7888 True if so. If not, add an entry. */
7891 Bool
check_or_add ( Pairs
* tidyingEnv
, IRExpr
* guard
, void* entry
)
7893 UInt i
, n
= tidyingEnv
->pairsUsed
;
7894 tl_assert(n
<= N_TIDYING_PAIRS
);
7895 for (i
= 0; i
< n
; i
++) {
7896 if (tidyingEnv
->pairs
[i
].entry
== entry
7897 && sameIRValue(tidyingEnv
->pairs
[i
].guard
, guard
))
7900 /* (guard, entry) wasn't found in the array. Add it at the end.
7901 If the array is already full, slide the entries one slot
7902 backwards. This means we will lose to ability to detect
7903 duplicates from the pair in slot zero, but that happens so
7904 rarely that it's unlikely to have much effect on overall code
7905 quality. Also, this strategy loses the check for the oldest
7906 tracked exit (memory reference, basically) and so that is (I'd
7907 guess) least likely to be re-used after this point. */
7909 if (n
== N_TIDYING_PAIRS
) {
7910 for (i
= 1; i
< N_TIDYING_PAIRS
; i
++) {
7911 tidyingEnv
->pairs
[i
-1] = tidyingEnv
->pairs
[i
];
7913 tidyingEnv
->pairs
[N_TIDYING_PAIRS
-1].entry
= entry
;
7914 tidyingEnv
->pairs
[N_TIDYING_PAIRS
-1].guard
= guard
;
7916 tl_assert(n
< N_TIDYING_PAIRS
);
7917 tidyingEnv
->pairs
[n
].entry
= entry
;
7918 tidyingEnv
->pairs
[n
].guard
= guard
;
7920 tidyingEnv
->pairsUsed
= n
;
7925 static Bool
is_helperc_value_checkN_fail ( const HChar
* name
)
7927 /* This is expensive because it happens a lot. We are checking to
7928 see whether |name| is one of the following 8 strings:
7930 MC_(helperc_value_check8_fail_no_o)
7931 MC_(helperc_value_check4_fail_no_o)
7932 MC_(helperc_value_check0_fail_no_o)
7933 MC_(helperc_value_check1_fail_no_o)
7934 MC_(helperc_value_check8_fail_w_o)
7935 MC_(helperc_value_check0_fail_w_o)
7936 MC_(helperc_value_check1_fail_w_o)
7937 MC_(helperc_value_check4_fail_w_o)
7939 To speed it up, check the common prefix just once, rather than
7942 const HChar
* prefix
= "MC_(helperc_value_check";
7948 if (p
== 0) break; /* ran off the end of the prefix */
7949 /* We still have some prefix to use */
7950 if (n
== 0) return False
; /* have prefix, but name ran out */
7951 if (n
!= p
) return False
; /* have both pfx and name, but no match */
7956 /* Check the part after the prefix. */
7957 tl_assert(*prefix
== 0 && *name
!= 0);
7958 return 0==VG_(strcmp
)(name
, "8_fail_no_o)")
7959 || 0==VG_(strcmp
)(name
, "4_fail_no_o)")
7960 || 0==VG_(strcmp
)(name
, "0_fail_no_o)")
7961 || 0==VG_(strcmp
)(name
, "1_fail_no_o)")
7962 || 0==VG_(strcmp
)(name
, "8_fail_w_o)")
7963 || 0==VG_(strcmp
)(name
, "4_fail_w_o)")
7964 || 0==VG_(strcmp
)(name
, "0_fail_w_o)")
7965 || 0==VG_(strcmp
)(name
, "1_fail_w_o)");
7968 IRSB
* MC_(final_tidy
) ( IRSB
* sb_in
)
7975 Bool alreadyPresent
;
7978 pairs
.pairsUsed
= 0;
7980 pairs
.pairs
[N_TIDYING_PAIRS
].entry
= (void*)0x123;
7981 pairs
.pairs
[N_TIDYING_PAIRS
].guard
= (IRExpr
*)0x456;
7983 /* Scan forwards through the statements. Each time a call to one
7984 of the relevant helpers is seen, check if we have made a
7985 previous call to the same helper using the same guard
7986 expression, and if so, delete the call. */
7987 for (i
= 0; i
< sb_in
->stmts_used
; i
++) {
7988 st
= sb_in
->stmts
[i
];
7990 if (st
->tag
!= Ist_Dirty
)
7992 di
= st
->Ist
.Dirty
.details
;
7995 if (0) { ppIRExpr(guard
); VG_(printf
)("\n"); }
7997 if (!is_helperc_value_checkN_fail( cee
->name
))
7999 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
8000 guard 'guard'. Check if we have already seen a call to this
8001 function with the same guard. If so, delete it. If not,
8002 add it to the set of calls we do know about. */
8003 alreadyPresent
= check_or_add( &pairs
, guard
, cee
->addr
);
8004 if (alreadyPresent
) {
8005 sb_in
->stmts
[i
] = IRStmt_NoOp();
8006 if (0) VG_(printf
)("XX\n");
8010 tl_assert(pairs
.pairs
[N_TIDYING_PAIRS
].entry
== (void*)0x123);
8011 tl_assert(pairs
.pairs
[N_TIDYING_PAIRS
].guard
== (IRExpr
*)0x456);
8016 #undef N_TIDYING_PAIRS
8019 /*------------------------------------------------------------*/
8020 /*--- Startup assertion checking ---*/
8021 /*------------------------------------------------------------*/
8023 void MC_(do_instrumentation_startup_checks
)( void )
8025 /* Make a best-effort check to see that is_helperc_value_checkN_fail
8026 is working as we expect. */
8028 # define CHECK(_expected, _string) \
8029 tl_assert((_expected) == is_helperc_value_checkN_fail(_string))
8031 /* It should identify these 8, and no others, as targets. */
8032 CHECK(True
, "MC_(helperc_value_check8_fail_no_o)");
8033 CHECK(True
, "MC_(helperc_value_check4_fail_no_o)");
8034 CHECK(True
, "MC_(helperc_value_check0_fail_no_o)");
8035 CHECK(True
, "MC_(helperc_value_check1_fail_no_o)");
8036 CHECK(True
, "MC_(helperc_value_check8_fail_w_o)");
8037 CHECK(True
, "MC_(helperc_value_check0_fail_w_o)");
8038 CHECK(True
, "MC_(helperc_value_check1_fail_w_o)");
8039 CHECK(True
, "MC_(helperc_value_check4_fail_w_o)");
8041 /* Ad-hoc selection of other strings gathered via a quick test. */
8042 CHECK(False
, "amd64g_dirtyhelper_CPUID_avx2");
8043 CHECK(False
, "amd64g_dirtyhelper_RDTSC");
8044 CHECK(False
, "MC_(helperc_b_load1)");
8045 CHECK(False
, "MC_(helperc_b_load2)");
8046 CHECK(False
, "MC_(helperc_b_load4)");
8047 CHECK(False
, "MC_(helperc_b_load8)");
8048 CHECK(False
, "MC_(helperc_b_load16)");
8049 CHECK(False
, "MC_(helperc_b_load32)");
8050 CHECK(False
, "MC_(helperc_b_store1)");
8051 CHECK(False
, "MC_(helperc_b_store2)");
8052 CHECK(False
, "MC_(helperc_b_store4)");
8053 CHECK(False
, "MC_(helperc_b_store8)");
8054 CHECK(False
, "MC_(helperc_b_store16)");
8055 CHECK(False
, "MC_(helperc_b_store32)");
8056 CHECK(False
, "MC_(helperc_LOADV8)");
8057 CHECK(False
, "MC_(helperc_LOADV16le)");
8058 CHECK(False
, "MC_(helperc_LOADV32le)");
8059 CHECK(False
, "MC_(helperc_LOADV64le)");
8060 CHECK(False
, "MC_(helperc_LOADV128le)");
8061 CHECK(False
, "MC_(helperc_LOADV256le)");
8062 CHECK(False
, "MC_(helperc_STOREV16le)");
8063 CHECK(False
, "MC_(helperc_STOREV32le)");
8064 CHECK(False
, "MC_(helperc_STOREV64le)");
8065 CHECK(False
, "MC_(helperc_STOREV8)");
8066 CHECK(False
, "track_die_mem_stack_8");
8067 CHECK(False
, "track_new_mem_stack_8_w_ECU");
8068 CHECK(False
, "MC_(helperc_MAKE_STACK_UNINIT_w_o)");
8069 CHECK(False
, "VG_(unknown_SP_update_w_ECU)");
8075 /*------------------------------------------------------------*/
8076 /*--- Memcheck main ---*/
8077 /*------------------------------------------------------------*/
8079 static Bool
isBogusAtom ( IRAtom
* at
)
8081 if (at
->tag
== Iex_RdTmp
)
8083 tl_assert(at
->tag
== Iex_Const
);
8086 IRConst
* con
= at
->Iex
.Const
.con
;
8088 case Ico_U1
: return False
;
8089 case Ico_U8
: n
= (ULong
)con
->Ico
.U8
; break;
8090 case Ico_U16
: n
= (ULong
)con
->Ico
.U16
; break;
8091 case Ico_U32
: n
= (ULong
)con
->Ico
.U32
; break;
8092 case Ico_U64
: n
= (ULong
)con
->Ico
.U64
; break;
8093 case Ico_F32
: return False
;
8094 case Ico_F64
: return False
;
8095 case Ico_F32i
: return False
;
8096 case Ico_F64i
: return False
;
8097 case Ico_V128
: return False
;
8098 case Ico_V256
: return False
;
8099 default: ppIRExpr(at
); tl_assert(0);
8101 /* VG_(printf)("%llx\n", n); */
8103 if (LIKELY(n
<= 0x0000000000001000ULL
)) return False
;
8104 if (LIKELY(n
>= 0xFFFFFFFFFFFFF000ULL
)) return False
;
8105 /* The list of bogus atoms is: */
8106 return (/*32*/ n
== 0xFEFEFEFFULL
8107 /*32*/ || n
== 0x80808080ULL
8108 /*32*/ || n
== 0x7F7F7F7FULL
8109 /*32*/ || n
== 0x7EFEFEFFULL
8110 /*32*/ || n
== 0x81010100ULL
8111 /*64*/ || n
== 0xFFFFFFFFFEFEFEFFULL
8112 /*64*/ || n
== 0xFEFEFEFEFEFEFEFFULL
8113 /*64*/ || n
== 0x0000000000008080ULL
8114 /*64*/ || n
== 0x8080808080808080ULL
8115 /*64*/ || n
== 0x0101010101010101ULL
8120 /* Does 'st' mention any of the literals identified/listed in
8122 static inline Bool
containsBogusLiterals ( /*FLAT*/ IRStmt
* st
)
8130 e
= st
->Ist
.WrTmp
.data
;
8136 return isBogusAtom(e
);
8138 return isBogusAtom(e
->Iex
.Unop
.arg
)
8139 || e
->Iex
.Unop
.op
== Iop_GetMSBs8x16
;
8141 return isBogusAtom(e
->Iex
.GetI
.ix
);
8143 return isBogusAtom(e
->Iex
.Binop
.arg1
)
8144 || isBogusAtom(e
->Iex
.Binop
.arg2
);
8146 return isBogusAtom(e
->Iex
.Triop
.details
->arg1
)
8147 || isBogusAtom(e
->Iex
.Triop
.details
->arg2
)
8148 || isBogusAtom(e
->Iex
.Triop
.details
->arg3
);
8150 return isBogusAtom(e
->Iex
.Qop
.details
->arg1
)
8151 || isBogusAtom(e
->Iex
.Qop
.details
->arg2
)
8152 || isBogusAtom(e
->Iex
.Qop
.details
->arg3
)
8153 || isBogusAtom(e
->Iex
.Qop
.details
->arg4
);
8155 return isBogusAtom(e
->Iex
.ITE
.cond
)
8156 || isBogusAtom(e
->Iex
.ITE
.iftrue
)
8157 || isBogusAtom(e
->Iex
.ITE
.iffalse
);
8159 return isBogusAtom(e
->Iex
.Load
.addr
);
8161 for (i
= 0; e
->Iex
.CCall
.args
[i
]; i
++)
8162 if (isBogusAtom(e
->Iex
.CCall
.args
[i
]))
8169 d
= st
->Ist
.Dirty
.details
;
8170 for (i
= 0; d
->args
[i
]; i
++) {
8171 IRAtom
* atom
= d
->args
[i
];
8172 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(atom
))) {
8173 if (isBogusAtom(atom
))
8177 if (isBogusAtom(d
->guard
))
8179 if (d
->mAddr
&& isBogusAtom(d
->mAddr
))
8183 return isBogusAtom(st
->Ist
.Put
.data
);
8185 return isBogusAtom(st
->Ist
.PutI
.details
->ix
)
8186 || isBogusAtom(st
->Ist
.PutI
.details
->data
);
8188 return isBogusAtom(st
->Ist
.Store
.addr
)
8189 || isBogusAtom(st
->Ist
.Store
.data
);
8191 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
8192 return isBogusAtom(sg
->addr
) || isBogusAtom(sg
->data
)
8193 || isBogusAtom(sg
->guard
);
8196 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
8197 return isBogusAtom(lg
->addr
) || isBogusAtom(lg
->alt
)
8198 || isBogusAtom(lg
->guard
);
8201 return isBogusAtom(st
->Ist
.Exit
.guard
);
8203 return isBogusAtom(st
->Ist
.AbiHint
.base
)
8204 || isBogusAtom(st
->Ist
.AbiHint
.nia
);
8210 cas
= st
->Ist
.CAS
.details
;
8211 return isBogusAtom(cas
->addr
)
8212 || (cas
->expdHi
? isBogusAtom(cas
->expdHi
) : False
)
8213 || isBogusAtom(cas
->expdLo
)
8214 || (cas
->dataHi
? isBogusAtom(cas
->dataHi
) : False
)
8215 || isBogusAtom(cas
->dataLo
);
8217 return isBogusAtom(st
->Ist
.LLSC
.addr
)
8218 || (st
->Ist
.LLSC
.storedata
8219 ? isBogusAtom(st
->Ist
.LLSC
.storedata
)
8224 VG_(tool_panic
)("hasBogusLiterals");
8229 /* This is the pre-instrumentation analysis. It does a backwards pass over
8230 the stmts in |sb_in| to determine a HowUsed value for each tmp defined in
8233 Unrelatedly, it also checks all literals in the block with |isBogusAtom|,
8234 as a positive result from that is a strong indication that we need to
8235 expensively instrument add/sub in the block. We do both analyses in one
8236 pass, even though they are independent, so as to avoid the overhead of
8237 having to traverse the whole block twice.
8239 The usage pass proceeds as follows. Let max= be the max operation in the
8240 HowUsed lattice, hence
8242 X max= Y means X = max(X, Y)
8246 for t in original tmps . useEnv[t] = HuUnU
8248 for t used in the block's . next field
8249 useEnv[t] max= HuPCa // because jmp targets are PCast-tested
8251 for st iterating *backwards* in the block
8255 case "t1 = load(t2)" // case 1
8256 useEnv[t2] max= HuPCa
8258 case "t1 = add(t2, t3)" // case 2
8259 useEnv[t2] max= useEnv[t1]
8260 useEnv[t3] max= useEnv[t1]
8263 for t in st.usedTmps // case 3
8264 useEnv[t] max= HuOth
8265 // same as useEnv[t] = HuOth
8267 The general idea is that we accumulate, in useEnv[], information about
8268 how each tmp is used. That can be updated as we work further back
8269 through the block and find more uses of it, but its HowUsed value can
8270 only ascend the lattice, not descend.
8272 Initially we mark all tmps as unused. In case (1), if a tmp is seen to
8273 be used as a memory address, then its use is at least HuPCa. The point
8274 is that for a memory address we will add instrumentation to check if any
8275 bit of the address is undefined, which means that we won't need expensive
8276 V-bit propagation through an add expression that computed the address --
8277 cheap add instrumentation will be equivalent.
8279 Note in case (1) that if we have previously seen a non-memory-address use
8280 of the tmp, then its use will already be HuOth and will be unchanged by
8281 the max= operation. And if it turns out that the source of the tmp was
8282 an add, then we'll have to expensively instrument the add, because we
8283 can't prove that, for the previous non-memory-address use of the tmp,
8284 cheap and expensive instrumentation will be equivalent.
8286 In case 2, we propagate the usage-mode of the result of an add back
8287 through to its operands. Again, we use max= so as to take account of the
8288 fact that t2 or t3 might later in the block (viz, earlier in the
8289 iteration) have been used in a way that requires expensive add
8292 In case 3, we deal with all other tmp uses. We assume that we'll need a
8293 result that is as accurate as possible, so we max= HuOth into its use
8294 mode. Since HuOth is the top of the lattice, that's equivalent to just
8295 setting its use to HuOth.
8297 The net result of all this is that:
8299 tmps that are used either
8300 - only as a memory address, or
8301 - only as part of a tree of adds that computes a memory address,
8302 and has no other use
8303 are marked as HuPCa, and so we can instrument their generating Add
8304 nodes cheaply, which is the whole point of this analysis
8306 tmps that are used any other way at all are marked as HuOth
8308 tmps that are unused are marked as HuUnU. We don't expect to see any
8309 since we expect that the incoming IR has had all dead assignments
8310 removed by previous optimisation passes. Nevertheless the analysis is
8311 correct even in the presence of dead tmps.
8313 A final comment on dead tmps. In case 1 and case 2, we could actually
8314 conditionalise the updates thusly:
8316 if (useEnv[t1] > HuUnU) { useEnv[t2] max= HuPCa } // case 1
8318 if (useEnv[t1] > HuUnU) { useEnv[t2] max= useEnv[t1] } // case 2
8319 if (useEnv[t1] > HuUnU) { useEnv[t3] max= useEnv[t1] } // case 2
8321 In other words, if the assigned-to tmp |t1| is never used, then there's
8322 no point in propagating any use through to its operands. That won't
8323 change the final HuPCa-vs-HuOth results, which is what we care about.
8324 Given that we expect to get dead-code-free inputs, there's no point in
8325 adding this extra refinement.
8328 /* Helper for |preInstrumentationAnalysis|. */
8329 static inline void noteTmpUsesIn ( /*MOD*/HowUsed
* useEnv
,
8331 HowUsed newUse
, IRAtom
* at
)
8333 /* For the atom |at|, declare that for any tmp |t| in |at|, we will have
8334 seen a use of |newUse|. So, merge that info into |t|'s accumulated
8342 IRTemp t
= at
->Iex
.RdTmp
.tmp
;
8343 tl_assert(t
< tyenvUsed
); // "is an original tmp"
8344 // The "max" operation in the lattice
8345 if (newUse
> useEnv
[t
]) useEnv
[t
] = newUse
;
8349 // We should never get here -- it implies non-flat IR
8351 VG_(tool_panic
)("noteTmpUsesIn");
8358 static void preInstrumentationAnalysis ( /*OUT*/HowUsed
** useEnvP
,
8359 /*OUT*/Bool
* hasBogusLiteralsP
,
8362 const UInt nOrigTmps
= (UInt
)sb_in
->tyenv
->types_used
;
8364 // We've seen no bogus literals so far.
8367 // This is calloc'd, so implicitly all entries are initialised to HuUnU.
8368 HowUsed
* useEnv
= VG_(calloc
)("mc.preInstrumentationAnalysis.1",
8369 nOrigTmps
, sizeof(HowUsed
));
8371 // Firstly, roll in contributions from the final dst address.
8372 bogus
= isBogusAtom(sb_in
->next
);
8373 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, sb_in
->next
);
8375 // Now work backwards through the stmts.
8376 for (Int i
= sb_in
->stmts_used
-1; i
>= 0; i
--) {
8377 IRStmt
* st
= sb_in
->stmts
[i
];
8379 // Deal with literals.
8380 if (LIKELY(!bogus
)) {
8381 bogus
= containsBogusLiterals(st
);
8384 // Deal with tmp uses.
8387 IRTemp dst
= st
->Ist
.WrTmp
.tmp
;
8388 IRExpr
* rhs
= st
->Ist
.WrTmp
.data
;
8389 // This is the one place where we have to consider all possible
8390 // tags for |rhs|, and can't just assume it is a tmp or a const.
8393 // just propagate demand for |dst| into this tmp use.
8394 noteTmpUsesIn(useEnv
, nOrigTmps
, useEnv
[dst
], rhs
);
8397 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, rhs
->Iex
.Unop
.arg
);
8400 if (rhs
->Iex
.Binop
.op
== Iop_Add64
8401 || rhs
->Iex
.Binop
.op
== Iop_Add32
) {
8402 // propagate demand for |dst| through to the operands.
8403 noteTmpUsesIn(useEnv
, nOrigTmps
,
8404 useEnv
[dst
], rhs
->Iex
.Binop
.arg1
);
8405 noteTmpUsesIn(useEnv
, nOrigTmps
,
8406 useEnv
[dst
], rhs
->Iex
.Binop
.arg2
);
8408 // just say that the operands are used in some unknown way.
8409 noteTmpUsesIn(useEnv
, nOrigTmps
,
8410 HuOth
, rhs
->Iex
.Binop
.arg1
);
8411 noteTmpUsesIn(useEnv
, nOrigTmps
,
8412 HuOth
, rhs
->Iex
.Binop
.arg2
);
8416 // All operands are used in some unknown way.
8417 IRTriop
* tri
= rhs
->Iex
.Triop
.details
;
8418 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, tri
->arg1
);
8419 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, tri
->arg2
);
8420 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, tri
->arg3
);
8424 // All operands are used in some unknown way.
8425 IRQop
* qop
= rhs
->Iex
.Qop
.details
;
8426 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, qop
->arg1
);
8427 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, qop
->arg2
);
8428 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, qop
->arg3
);
8429 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, qop
->arg4
);
8433 // The address will be checked (== PCasted).
8434 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, rhs
->Iex
.Load
.addr
);
8437 // The condition is PCasted, the then- and else-values
8439 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, rhs
->Iex
.ITE
.cond
);
8440 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, rhs
->Iex
.ITE
.iftrue
);
8441 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, rhs
->Iex
.ITE
.iffalse
);
8444 // The args are used in unknown ways.
8445 for (IRExpr
** args
= rhs
->Iex
.CCall
.args
; *args
; args
++) {
8446 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, *args
);
8450 // The index will be checked/PCasted (see do_shadow_GETI)
8451 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, rhs
->Iex
.GetI
.ix
);
8459 VG_(tool_panic
)("preInstrumentationAnalysis:"
8460 " unhandled IRExpr");
8465 // The address will be checked (== PCasted). The data will be
8466 // used in some unknown way.
8467 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, st
->Ist
.Store
.addr
);
8468 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.Store
.data
);
8471 // The guard will be checked (== PCasted)
8472 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, st
->Ist
.Exit
.guard
);
8475 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.Put
.data
);
8478 IRPutI
* putI
= st
->Ist
.PutI
.details
;
8479 // The index will be checked/PCasted (see do_shadow_PUTI). The
8480 // data will be used in an unknown way.
8481 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, putI
->ix
);
8482 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, putI
->data
);
8486 IRDirty
* d
= st
->Ist
.Dirty
.details
;
8487 // The guard will be checked (== PCasted)
8488 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, d
->guard
);
8489 // The args will be used in unknown ways.
8490 for (IRExpr
** args
= d
->args
; *args
; args
++) {
8491 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, *args
);
8496 IRCAS
* cas
= st
->Ist
.CAS
.details
;
8497 // Address will be pcasted, everything else used as unknown
8498 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, cas
->addr
);
8499 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, cas
->expdLo
);
8500 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, cas
->dataLo
);
8502 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, cas
->expdHi
);
8504 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, cas
->dataHi
);
8508 // Both exprs are used in unknown ways. TODO: can we safely
8509 // just ignore AbiHints?
8510 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.AbiHint
.base
);
8511 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.AbiHint
.nia
);
8514 // We might be able to do better, and use HuPCa for the addr.
8515 // It's not immediately obvious that we can, because the address
8516 // is regarded as "used" only when the guard is true.
8517 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
8518 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, sg
->addr
);
8519 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, sg
->data
);
8520 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, sg
->guard
);
8524 // Per similar comments to Ist_StoreG .. not sure whether this
8525 // is really optimal.
8526 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
8527 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, lg
->addr
);
8528 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, lg
->alt
);
8529 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, lg
->guard
);
8533 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, st
->Ist
.LLSC
.addr
);
8534 if (st
->Ist
.LLSC
.storedata
)
8535 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.LLSC
.storedata
);
8544 VG_(tool_panic
)("preInstrumentationAnalysis: unhandled IRStmt");
8547 } // Now work backwards through the stmts.
8549 // Return the computed use env and the bogus-atom flag.
8550 tl_assert(*useEnvP
== NULL
);
8553 tl_assert(*hasBogusLiteralsP
== False
);
8554 *hasBogusLiteralsP
= bogus
;
8558 IRSB
* MC_(instrument
) ( VgCallbackClosure
* closure
,
8560 const VexGuestLayout
* layout
,
8561 const VexGuestExtents
* vge
,
8562 const VexArchInfo
* archinfo_host
,
8563 IRType gWordTy
, IRType hWordTy
)
8565 Bool verboze
= 0||False
;
8566 Int i
, j
, first_stmt
;
8571 if (gWordTy
!= hWordTy
) {
8572 /* We don't currently support this case. */
8573 VG_(tool_panic
)("host/guest word size mismatch");
8576 /* Check we're not completely nuts */
8577 tl_assert(sizeof(UWord
) == sizeof(void*));
8578 tl_assert(sizeof(Word
) == sizeof(void*));
8579 tl_assert(sizeof(Addr
) == sizeof(void*));
8580 tl_assert(sizeof(ULong
) == 8);
8581 tl_assert(sizeof(Long
) == 8);
8582 tl_assert(sizeof(UInt
) == 4);
8583 tl_assert(sizeof(Int
) == 4);
8585 tl_assert(MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3);
8588 sb_out
= deepCopyIRSBExceptStmts(sb_in
);
8590 /* Set up the running environment. Both .sb and .tmpMap are
8591 modified as we go along. Note that tmps are added to both
8592 .sb->tyenv and .tmpMap together, so the valid index-set for
8593 those two arrays should always be identical. */
8594 VG_(memset
)(&mce
, 0, sizeof(mce
));
8596 mce
.trace
= verboze
;
8597 mce
.layout
= layout
;
8598 mce
.hWordTy
= hWordTy
;
8599 mce
.tmpHowUsed
= NULL
;
8601 /* BEGIN decide on expense levels for instrumentation. */
8603 /* Initially, select the cheap version of everything for which we have an
8605 DetailLevelByOp__set_all( &mce
.dlbo
, DLcheap
);
8607 /* Take account of the --expensive-definedness-checks= flag. */
8608 if (MC_(clo_expensive_definedness_checks
) == EdcNO
) {
8609 /* We just selected 'cheap for everything', so we don't need to do
8610 anything here. mce.tmpHowUsed remains NULL. */
8612 else if (MC_(clo_expensive_definedness_checks
) == EdcYES
) {
8613 /* Select 'expensive for everything'. mce.tmpHowUsed remains NULL. */
8614 DetailLevelByOp__set_all( &mce
.dlbo
, DLexpensive
);
8617 tl_assert(MC_(clo_expensive_definedness_checks
) == EdcAUTO
);
8618 /* We'll make our own selection, based on known per-target constraints
8619 and also on analysis of the block to be instrumented. First, set
8620 up default values for detail levels.
8622 On x86 and amd64, we'll routinely encounter code optimised by LLVM
8623 5 and above. Enable accurate interpretation of the following.
8624 LLVM uses adds for some bitfield inserts, and we get a lot of false
8625 errors if the cheap interpretation is used, alas. Could solve this
8626 much better if we knew which of such adds came from x86/amd64 LEA
8627 instructions, since these are the only ones really needing the
8628 expensive interpretation, but that would require some way to tag
8629 them in the _toIR.c front ends, which is a lot of faffing around.
8630 So for now we use preInstrumentationAnalysis() to detect adds which
8631 are used only to construct memory addresses, which is an
8632 approximation to the above, and is self-contained.*/
8633 # if defined(VGA_x86)
8634 mce
.dlbo
.dl_Add32
= DLauto
;
8635 mce
.dlbo
.dl_CmpEQ16_CmpNE16
= DLexpensive
;
8636 mce
.dlbo
.dl_CmpEQ32_CmpNE32
= DLexpensive
;
8637 # elif defined(VGA_amd64)
8638 mce
.dlbo
.dl_Add32
= DLexpensive
;
8639 mce
.dlbo
.dl_Add64
= DLauto
;
8640 mce
.dlbo
.dl_CmpEQ16_CmpNE16
= DLexpensive
;
8641 mce
.dlbo
.dl_CmpEQ32_CmpNE32
= DLexpensive
;
8642 mce
.dlbo
.dl_CmpEQ64_CmpNE64
= DLexpensive
;
8643 # elif defined(VGA_ppc64le)
8644 // Needed by (at least) set_AV_CR6() in the front end.
8645 mce
.dlbo
.dl_CmpEQ64_CmpNE64
= DLexpensive
;
8646 # elif defined(VGA_arm64)
8647 mce
.dlbo
.dl_CmpEQ32_CmpNE32
= DLexpensive
;
8648 mce
.dlbo
.dl_CmpEQ64_CmpNE64
= DLexpensive
;
8649 # elif defined(VGA_arm)
8650 mce
.dlbo
.dl_CmpEQ32_CmpNE32
= DLexpensive
;
8653 /* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then
8655 Bool hasBogusLiterals
= False
;
8656 preInstrumentationAnalysis( &mce
.tmpHowUsed
, &hasBogusLiterals
, sb_in
);
8658 if (hasBogusLiterals
) {
8659 /* This happens very rarely. In this case just select expensive
8660 for everything, and throw away the tmp-use analysis results. */
8661 DetailLevelByOp__set_all( &mce
.dlbo
, DLexpensive
);
8662 VG_(free
)( mce
.tmpHowUsed
);
8663 mce
.tmpHowUsed
= NULL
;
8665 /* Nothing. mce.tmpHowUsed contains tmp-use analysis results,
8666 which will be used for some subset of Iop_{Add,Sub}{32,64},
8667 based on which ones are set to DLauto for this target. */
8671 DetailLevelByOp__check_sanity( &mce
.dlbo
);
8674 // Debug printing: which tmps have been identified as PCast-only use
8675 if (mce
.tmpHowUsed
) {
8676 VG_(printf
)("Cheapies: ");
8677 for (UInt q
= 0; q
< sb_in
->tyenv
->types_used
; q
++) {
8678 if (mce
.tmpHowUsed
[q
] == HuPCa
) {
8679 VG_(printf
)("t%u ", q
);
8685 // Debug printing: number of ops by detail level
8686 UChar nCheap
= DetailLevelByOp__count( &mce
.dlbo
, DLcheap
);
8687 UChar nAuto
= DetailLevelByOp__count( &mce
.dlbo
, DLauto
);
8688 UChar nExpensive
= DetailLevelByOp__count( &mce
.dlbo
, DLexpensive
);
8689 tl_assert(nCheap
+ nAuto
+ nExpensive
== 8);
8691 VG_(printf
)("%u,%u,%u ", nCheap
, nAuto
, nExpensive
);
8693 /* END decide on expense levels for instrumentation. */
8695 /* Initialise the running the tmp environment. */
8697 mce
.tmpMap
= VG_(newXA
)( VG_(malloc
), "mc.MC_(instrument).1", VG_(free
),
8698 sizeof(TempMapEnt
));
8699 VG_(hintSizeXA
) (mce
.tmpMap
, sb_in
->tyenv
->types_used
);
8700 for (i
= 0; i
< sb_in
->tyenv
->types_used
; i
++) {
8703 ent
.shadowV
= IRTemp_INVALID
;
8704 ent
.shadowB
= IRTemp_INVALID
;
8705 VG_(addToXA
)( mce
.tmpMap
, &ent
);
8707 tl_assert( VG_(sizeXA
)( mce
.tmpMap
) == sb_in
->tyenv
->types_used
);
8709 /* Finally, begin instrumentation. */
8710 /* Copy verbatim any IR preamble preceding the first IMark */
8712 tl_assert(mce
.sb
== sb_out
);
8713 tl_assert(mce
.sb
!= sb_in
);
8716 while (i
< sb_in
->stmts_used
&& sb_in
->stmts
[i
]->tag
!= Ist_IMark
) {
8718 st
= sb_in
->stmts
[i
];
8720 tl_assert(isFlatIRStmt(st
));
8722 stmt( 'C', &mce
, sb_in
->stmts
[i
] );
8726 /* Nasty problem. IR optimisation of the pre-instrumented IR may
8727 cause the IR following the preamble to contain references to IR
8728 temporaries defined in the preamble. Because the preamble isn't
8729 instrumented, these temporaries don't have any shadows.
8730 Nevertheless uses of them following the preamble will cause
8731 memcheck to generate references to their shadows. End effect is
8732 to cause IR sanity check failures, due to references to
8733 non-existent shadows. This is only evident for the complex
8734 preambles used for function wrapping on TOC-afflicted platforms
8737 The following loop therefore scans the preamble looking for
8738 assignments to temporaries. For each one found it creates an
8739 assignment to the corresponding (V) shadow temp, marking it as
8740 'defined'. This is the same resulting IR as if the main
8741 instrumentation loop before had been applied to the statement
8744 Similarly, if origin tracking is enabled, we must generate an
8745 assignment for the corresponding origin (B) shadow, claiming
8746 no-origin, as appropriate for a defined value.
8748 for (j
= 0; j
< i
; j
++) {
8749 if (sb_in
->stmts
[j
]->tag
== Ist_WrTmp
) {
8750 /* findShadowTmpV checks its arg is an original tmp;
8751 no need to assert that here. */
8752 IRTemp tmp_o
= sb_in
->stmts
[j
]->Ist
.WrTmp
.tmp
;
8753 IRTemp tmp_v
= findShadowTmpV(&mce
, tmp_o
);
8754 IRType ty_v
= typeOfIRTemp(sb_out
->tyenv
, tmp_v
);
8755 assign( 'V', &mce
, tmp_v
, definedOfType( ty_v
) );
8756 if (MC_(clo_mc_level
) == 3) {
8757 IRTemp tmp_b
= findShadowTmpB(&mce
, tmp_o
);
8758 tl_assert(typeOfIRTemp(sb_out
->tyenv
, tmp_b
) == Ity_I32
);
8759 assign( 'B', &mce
, tmp_b
, mkU32(0)/* UNKNOWN ORIGIN */);
8762 VG_(printf
)("create shadow tmp(s) for preamble tmp [%d] ty ", j
);
8769 /* Iterate over the remaining stmts to generate instrumentation. */
8771 tl_assert(sb_in
->stmts_used
> 0);
8773 tl_assert(i
< sb_in
->stmts_used
);
8774 tl_assert(sb_in
->stmts
[i
]->tag
== Ist_IMark
);
8776 for (/* use current i*/; i
< sb_in
->stmts_used
; i
++) {
8778 st
= sb_in
->stmts
[i
];
8779 first_stmt
= sb_out
->stmts_used
;
8787 if (MC_(clo_mc_level
) == 3) {
8788 /* See comments on case Ist_CAS below. */
8789 if (st
->tag
!= Ist_CAS
)
8790 schemeS( &mce
, st
);
8793 /* Generate instrumentation code for each stmt ... */
8798 IRTemp dst
= st
->Ist
.WrTmp
.tmp
;
8799 tl_assert(dst
< (UInt
)sb_in
->tyenv
->types_used
);
8800 HowUsed hu
= mce
.tmpHowUsed
? mce
.tmpHowUsed
[dst
]
8801 : HuOth
/*we don't know, so play safe*/;
8802 assign( 'V', &mce
, findShadowTmpV(&mce
, st
->Ist
.WrTmp
.tmp
),
8803 expr2vbits( &mce
, st
->Ist
.WrTmp
.data
, hu
));
8808 do_shadow_PUT( &mce
,
8811 NULL
/* shadow atom */, NULL
/* guard */ );
8815 do_shadow_PUTI( &mce
, st
->Ist
.PutI
.details
);
8819 do_shadow_Store( &mce
, st
->Ist
.Store
.end
,
8820 st
->Ist
.Store
.addr
, 0/* addr bias */,
8822 NULL
/* shadow data */,
8827 do_shadow_StoreG( &mce
, st
->Ist
.StoreG
.details
);
8831 do_shadow_LoadG( &mce
, st
->Ist
.LoadG
.details
);
8835 complainIfUndefined( &mce
, st
->Ist
.Exit
.guard
, NULL
);
8846 do_shadow_Dirty( &mce
, st
->Ist
.Dirty
.details
);
8850 do_AbiHint( &mce
, st
->Ist
.AbiHint
.base
,
8851 st
->Ist
.AbiHint
.len
,
8852 st
->Ist
.AbiHint
.nia
);
8856 do_shadow_CAS( &mce
, st
->Ist
.CAS
.details
);
8857 /* Note, do_shadow_CAS copies the CAS itself to the output
8858 block, because it needs to add instrumentation both
8859 before and after it. Hence skip the copy below. Also
8860 skip the origin-tracking stuff (call to schemeS) above,
8861 since that's all tangled up with it too; do_shadow_CAS
8866 do_shadow_LLSC( &mce
,
8868 st
->Ist
.LLSC
.result
,
8870 st
->Ist
.LLSC
.storedata
);
8877 VG_(tool_panic
)("memcheck: unhandled IRStmt");
8879 } /* switch (st->tag) */
8882 for (j
= first_stmt
; j
< sb_out
->stmts_used
; j
++) {
8884 ppIRStmt(sb_out
->stmts
[j
]);
8890 /* ... and finally copy the stmt itself to the output. Except,
8891 skip the copy of IRCASs; see comments on case Ist_CAS
8893 if (st
->tag
!= Ist_CAS
)
8894 stmt('C', &mce
, st
);
8897 /* Now we need to complain if the jump target is undefined. */
8898 first_stmt
= sb_out
->stmts_used
;
8901 VG_(printf
)("sb_in->next = ");
8902 ppIRExpr(sb_in
->next
);
8903 VG_(printf
)("\n\n");
8906 complainIfUndefined( &mce
, sb_in
->next
, NULL
);
8909 for (j
= first_stmt
; j
< sb_out
->stmts_used
; j
++) {
8911 ppIRStmt(sb_out
->stmts
[j
]);
8917 /* If this fails, there's been some serious snafu with tmp management,
8918 that should be investigated. */
8919 tl_assert( VG_(sizeXA
)( mce
.tmpMap
) == mce
.sb
->tyenv
->types_used
);
8920 VG_(deleteXA
)( mce
.tmpMap
);
8922 if (mce
.tmpHowUsed
) {
8923 VG_(free
)( mce
.tmpHowUsed
);
8926 tl_assert(mce
.sb
== sb_out
);
8931 /*--------------------------------------------------------------------*/
8932 /*--- end mc_translate.c ---*/
8933 /*--------------------------------------------------------------------*/