Update bug status.
[valgrind.git] / memcheck / mc_translate.c
blob87b8ac6886cec5b4902128c3d01d3f8104bddf40
2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations. ---*/
4 /*--- mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of MemCheck, a heavyweight Valgrind tool for
9 detecting memory errors.
11 Copyright (C) 2000-2017 Julian Seward
12 jseward@acm.org
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 #include "pub_tool_basics.h"
31 #include "pub_tool_poolalloc.h" // For mc_include.h
32 #include "pub_tool_hashtable.h" // For mc_include.h
33 #include "pub_tool_libcassert.h"
34 #include "pub_tool_libcprint.h"
35 #include "pub_tool_tooliface.h"
36 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
37 #include "pub_tool_xarray.h"
38 #include "pub_tool_mallocfree.h"
39 #include "pub_tool_libcbase.h"
41 #include "mc_include.h"
44 /* FIXMEs JRS 2011-June-16.
46 Check the interpretation for vector narrowing and widening ops,
47 particularly the saturating ones. I suspect they are either overly
48 pessimistic and/or wrong.
50 Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional
51 saturating shifts): the interpretation is overly pessimistic.
52 See comments on the relevant cases below for details.
54 Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts,
55 both rounding and non-rounding variants): ditto
58 /* This file implements the Memcheck instrumentation, and in
59 particular contains the core of its undefined value detection
60 machinery. For a comprehensive background of the terminology,
61 algorithms and rationale used herein, read:
63 Using Valgrind to detect undefined value errors with
64 bit-precision
66 Julian Seward and Nicholas Nethercote
68 2005 USENIX Annual Technical Conference (General Track),
69 Anaheim, CA, USA, April 10-15, 2005.
71 ----
73 Here is as good a place as any to record exactly when V bits are and
74 should be checked, why, and what function is responsible.
77 Memcheck complains when an undefined value is used:
79 1. In the condition of a conditional branch. Because it could cause
80 incorrect control flow, and thus cause incorrect externally-visible
81 behaviour. [mc_translate.c:complainIfUndefined]
83 2. As an argument to a system call, or as the value that specifies
84 the system call number. Because it could cause an incorrect
85 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
87 3. As the address in a load or store. Because it could cause an
88 incorrect value to be used later, which could cause externally-visible
89 behaviour (eg. via incorrect control flow or an incorrect system call
90 argument) [complainIfUndefined]
92 4. As the target address of a branch. Because it could cause incorrect
93 control flow. [complainIfUndefined]
95 5. As an argument to setenv, unsetenv, or putenv. Because it could put
96 an incorrect value into the external environment.
97 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
99 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
100 [complainIfUndefined]
102 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
103 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
104 requested it. [in memcheck.h]
107 Memcheck also complains, but should not, when an undefined value is used:
109 8. As the shift value in certain SIMD shift operations (but not in the
110 standard integer shift operations). This inconsistency is due to
111 historical reasons.) [complainIfUndefined]
114 Memcheck does not complain, but should, when an undefined value is used:
116 9. As an input to a client request. Because the client request may
117 affect the visible behaviour -- see bug #144362 for an example
118 involving the malloc replacements in vg_replace_malloc.c and
119 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
120 isn't identified. That bug report also has some info on how to solve
121 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
124 In practice, 1 and 2 account for the vast majority of cases.
127 /* Generation of addr-definedness, addr-validity and
128 guard-definedness checks pertaining to loads and stores (Iex_Load,
129 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
130 loads/stores) was re-checked 11 May 2013. */
133 /*------------------------------------------------------------*/
134 /*--- Forward decls ---*/
135 /*------------------------------------------------------------*/
137 struct _MCEnv;
139 // See below for comments explaining what this is for.
140 typedef
141 enum __attribute__((packed)) { HuUnU=0, HuPCa=1, HuOth=2 }
142 HowUsed;
144 static IRType shadowTypeV ( IRType ty );
145 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e,
146 HowUsed hu/*use HuOth if unknown*/ );
147 static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
149 static IRExpr *i128_const_zero(void);
152 /*------------------------------------------------------------*/
153 /*--- Memcheck running state, and tmp management. ---*/
154 /*------------------------------------------------------------*/
156 /* For a few (maybe 1%) IROps, we have both a cheaper, less exact vbit
157 propagation scheme, and a more expensive, more precise vbit propagation
158 scheme. This enum describes, for such an IROp, which scheme to use. */
159 typedef
160 enum {
161 // Use the cheaper, less-exact variant.
162 DLcheap=4,
163 // Choose between cheap and expensive based on analysis of the block
164 // to be instrumented. Note that the choice may be done on a
165 // per-instance basis of the IROp that this DetailLevel describes.
166 DLauto,
167 // Use the more expensive, more-exact variant.
168 DLexpensive
170 DetailLevel;
173 /* A readonly part of the running state. For IROps that have both a
174 less-exact and more-exact interpretation, records which interpretation is
175 to be used. */
176 typedef
177 struct {
178 // For Add32/64 and Sub32/64, all 3 settings are allowed. For the
179 // DLauto case, a per-instance decision is to be made by inspecting
180 // the associated tmp's entry in MCEnv.tmpHowUsed.
181 DetailLevel dl_Add32;
182 DetailLevel dl_Add64;
183 DetailLevel dl_Sub32;
184 DetailLevel dl_Sub64;
185 // For Cmp{EQ,NE}{64,32,16,8}, only DLcheap and DLexpensive are
186 // allowed.
187 DetailLevel dl_CmpEQ64_CmpNE64;
188 DetailLevel dl_CmpEQ32_CmpNE32;
189 DetailLevel dl_CmpEQ16_CmpNE16;
190 DetailLevel dl_CmpEQ8_CmpNE8;
192 DetailLevelByOp;
194 static void DetailLevelByOp__set_all ( /*OUT*/DetailLevelByOp* dlbo,
195 DetailLevel dl )
197 dlbo->dl_Add32 = dl;
198 dlbo->dl_Add64 = dl;
199 dlbo->dl_Sub32 = dl;
200 dlbo->dl_Sub64 = dl;
201 dlbo->dl_CmpEQ64_CmpNE64 = dl;
202 dlbo->dl_CmpEQ32_CmpNE32 = dl;
203 dlbo->dl_CmpEQ16_CmpNE16 = dl;
204 dlbo->dl_CmpEQ8_CmpNE8 = dl;
207 static void DetailLevelByOp__check_sanity ( const DetailLevelByOp* dlbo )
209 tl_assert(dlbo->dl_Add32 >= DLcheap && dlbo->dl_Add32 <= DLexpensive);
210 tl_assert(dlbo->dl_Add64 >= DLcheap && dlbo->dl_Add64 <= DLexpensive);
211 tl_assert(dlbo->dl_Sub32 >= DLcheap && dlbo->dl_Sub32 <= DLexpensive);
212 tl_assert(dlbo->dl_Sub64 >= DLcheap && dlbo->dl_Sub64 <= DLexpensive);
213 tl_assert(dlbo->dl_CmpEQ64_CmpNE64 == DLcheap
214 || dlbo->dl_CmpEQ64_CmpNE64 == DLexpensive);
215 tl_assert(dlbo->dl_CmpEQ32_CmpNE32 == DLcheap
216 || dlbo->dl_CmpEQ32_CmpNE32 == DLexpensive);
217 tl_assert(dlbo->dl_CmpEQ16_CmpNE16 == DLcheap
218 || dlbo->dl_CmpEQ16_CmpNE16 == DLexpensive);
219 tl_assert(dlbo->dl_CmpEQ8_CmpNE8 == DLcheap
220 || dlbo->dl_CmpEQ8_CmpNE8 == DLexpensive);
223 static UInt DetailLevelByOp__count ( const DetailLevelByOp* dlbo,
224 DetailLevel dl )
226 UInt n = 0;
227 n += (dlbo->dl_Add32 == dl ? 1 : 0);
228 n += (dlbo->dl_Add64 == dl ? 1 : 0);
229 n += (dlbo->dl_Sub32 == dl ? 1 : 0);
230 n += (dlbo->dl_Sub64 == dl ? 1 : 0);
231 n += (dlbo->dl_CmpEQ64_CmpNE64 == dl ? 1 : 0);
232 n += (dlbo->dl_CmpEQ32_CmpNE32 == dl ? 1 : 0);
233 n += (dlbo->dl_CmpEQ16_CmpNE16 == dl ? 1 : 0);
234 n += (dlbo->dl_CmpEQ8_CmpNE8 == dl ? 1 : 0);
235 return n;
239 /* Carries info about a particular tmp. The tmp's number is not
240 recorded, as this is implied by (equal to) its index in the tmpMap
241 in MCEnv. The tmp's type is also not recorded, as this is present
242 in MCEnv.sb->tyenv.
244 When .kind is Orig, .shadowV and .shadowB may give the identities
245 of the temps currently holding the associated definedness (shadowV)
246 and origin (shadowB) values, or these may be IRTemp_INVALID if code
247 to compute such values has not yet been emitted.
249 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
250 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
251 illogical for a shadow tmp itself to be shadowed.
253 typedef
254 enum { Orig=1, VSh=2, BSh=3 }
255 TempKind;
257 typedef
258 struct {
259 TempKind kind;
260 IRTemp shadowV;
261 IRTemp shadowB;
263 TempMapEnt;
266 /* A |HowUsed| value carries analysis results about how values are used,
267 pertaining to whether we need to instrument integer adds expensively or
268 not. The running state carries a (readonly) mapping from original tmp to
269 a HowUsed value for it. A usage value can be one of three values,
270 forming a 3-point chain lattice.
272 HuOth ("Other") used in some arbitrary way
274 HuPCa ("PCast") used *only* in effectively a PCast, in which all
275 | we care about is the all-defined vs not-all-defined distinction
277 HuUnU ("Unused") not used at all.
279 The "safe" (don't-know) end of the lattice is "HuOth". See comments
280 below in |preInstrumentationAnalysis| for further details.
282 /* DECLARED ABOVE:
283 typedef
284 enum __attribute__((packed)) { HuUnU=0, HuPCa=1, HuOth=2 }
285 HowUsed;
288 // Not actually necessary, but we don't want to waste D1 space.
289 STATIC_ASSERT(sizeof(HowUsed) == 1);
292 /* Carries around state during memcheck instrumentation. */
293 typedef
294 struct _MCEnv {
295 /* MODIFIED: the superblock being constructed. IRStmts are
296 added. */
297 IRSB* sb;
298 Bool trace;
300 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
301 current kind and possibly shadow temps for each temp in the
302 IRSB being constructed. Note that it does not contain the
303 type of each tmp. If you want to know the type, look at the
304 relevant entry in sb->tyenv. It follows that at all times
305 during the instrumentation process, the valid indices for
306 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
307 total number of Orig, V- and B- temps allocated so far.
309 The reason for this strange split (types in one place, all
310 other info in another) is that we need the types to be
311 attached to sb so as to make it possible to do
312 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
313 instrumentation process. */
314 XArray* /* of TempMapEnt */ tmpMap;
316 /* READONLY: contains details of which ops should be expensively
317 instrumented. */
318 DetailLevelByOp dlbo;
320 /* READONLY: for each original tmp, how the tmp is used. This is
321 computed by |preInstrumentationAnalysis|. Valid indices are
322 0 .. #temps_in_sb-1 (same as for tmpMap). */
323 HowUsed* tmpHowUsed;
325 /* READONLY: the guest layout. This indicates which parts of
326 the guest state should be regarded as 'always defined'. */
327 const VexGuestLayout* layout;
329 /* READONLY: the host word type. Needed for constructing
330 arguments of type 'HWord' to be passed to helper functions.
331 Ity_I32 or Ity_I64 only. */
332 IRType hWordTy;
334 MCEnv;
337 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
338 demand), as they are encountered. This is for two reasons.
340 (1) (less important reason): Many original tmps are unused due to
341 initial IR optimisation, and we do not want to spaces in tables
342 tracking them.
344 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
345 table indexed [0 .. n_types-1], which gives the current shadow for
346 each original tmp, or INVALID_IRTEMP if none is so far assigned.
347 It is necessary to support making multiple assignments to a shadow
348 -- specifically, after testing a shadow for definedness, it needs
349 to be made defined. But IR's SSA property disallows this.
351 (2) (more important reason): Therefore, when a shadow needs to get
352 a new value, a new temporary is created, the value is assigned to
353 that, and the tmpMap is updated to reflect the new binding.
355 A corollary is that if the tmpMap maps a given tmp to
356 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
357 there's a read-before-write error in the original tmps. The IR
358 sanity checker should catch all such anomalies, however.
361 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
362 both the table in mce->sb and to our auxiliary mapping. Note that
363 newTemp may cause mce->tmpMap to resize, hence previous results
364 from VG_(indexXA)(mce->tmpMap) are invalidated. */
365 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
367 Word newIx;
368 TempMapEnt ent;
369 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
370 ent.kind = kind;
371 ent.shadowV = IRTemp_INVALID;
372 ent.shadowB = IRTemp_INVALID;
373 newIx = VG_(addToXA)( mce->tmpMap, &ent );
374 tl_assert(newIx == (Word)tmp);
375 return tmp;
379 /* Find the tmp currently shadowing the given original tmp. If none
380 so far exists, allocate one. */
381 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
383 TempMapEnt* ent;
384 /* VG_(indexXA) range-checks 'orig', hence no need to check
385 here. */
386 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
387 tl_assert(ent->kind == Orig);
388 if (ent->shadowV == IRTemp_INVALID) {
389 IRTemp tmpV
390 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
391 /* newTemp may cause mce->tmpMap to resize, hence previous results
392 from VG_(indexXA) are invalid. */
393 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
394 tl_assert(ent->kind == Orig);
395 tl_assert(ent->shadowV == IRTemp_INVALID);
396 ent->shadowV = tmpV;
398 return ent->shadowV;
401 /* Allocate a new shadow for the given original tmp. This means any
402 previous shadow is abandoned. This is needed because it is
403 necessary to give a new value to a shadow once it has been tested
404 for undefinedness, but unfortunately IR's SSA property disallows
405 this. Instead we must abandon the old shadow, allocate a new one
406 and use that instead.
408 This is the same as findShadowTmpV, except we don't bother to see
409 if a shadow temp already existed -- we simply allocate a new one
410 regardless. */
411 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
413 TempMapEnt* ent;
414 /* VG_(indexXA) range-checks 'orig', hence no need to check
415 here. */
416 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
417 tl_assert(ent->kind == Orig);
418 if (1) {
419 IRTemp tmpV
420 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
421 /* newTemp may cause mce->tmpMap to resize, hence previous results
422 from VG_(indexXA) are invalid. */
423 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
424 tl_assert(ent->kind == Orig);
425 ent->shadowV = tmpV;
430 /*------------------------------------------------------------*/
431 /*--- IRAtoms -- a subset of IRExprs ---*/
432 /*------------------------------------------------------------*/
434 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
435 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
436 input, most of this code deals in atoms. Usefully, a value atom
437 always has a V-value which is also an atom: constants are shadowed
438 by constants, and temps are shadowed by the corresponding shadow
439 temporary. */
441 typedef IRExpr IRAtom;
443 /* (used for sanity checks only): is this an atom which looks
444 like it's from original code? */
445 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
447 if (a1->tag == Iex_Const)
448 return True;
449 if (a1->tag == Iex_RdTmp) {
450 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
451 return ent->kind == Orig;
453 return False;
456 /* (used for sanity checks only): is this an atom which looks
457 like it's from shadow code? */
458 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
460 if (a1->tag == Iex_Const)
461 return True;
462 if (a1->tag == Iex_RdTmp) {
463 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
464 return ent->kind == VSh || ent->kind == BSh;
466 return False;
469 /* (used for sanity checks only): check that both args are atoms and
470 are identically-kinded. */
471 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
473 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
474 return True;
475 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
476 return True;
477 return False;
481 /*------------------------------------------------------------*/
482 /*--- Type management ---*/
483 /*------------------------------------------------------------*/
485 /* Shadow state is always accessed using integer types. This returns
486 an integer type with the same size (as per sizeofIRType) as the
487 given type. The only valid shadow types are Bit, I8, I16, I32,
488 I64, I128, V128, V256. */
490 static IRType shadowTypeV ( IRType ty )
492 switch (ty) {
493 case Ity_I1:
494 case Ity_I8:
495 case Ity_I16:
496 case Ity_I32:
497 case Ity_I64:
498 case Ity_I128: return ty;
499 case Ity_F16: return Ity_I16;
500 case Ity_F32: return Ity_I32;
501 case Ity_D32: return Ity_I32;
502 case Ity_F64: return Ity_I64;
503 case Ity_D64: return Ity_I64;
504 case Ity_F128: return Ity_I128;
505 case Ity_D128: return Ity_I128;
506 case Ity_V128: return Ity_V128;
507 case Ity_V256: return Ity_V256;
508 default: ppIRType(ty);
509 VG_(tool_panic)("memcheck:shadowTypeV");
513 /* Produce a 'defined' value of the given shadow type. Should only be
514 supplied shadow types (Bit/I8/I16/I32/UI64). */
515 static IRExpr* definedOfType ( IRType ty ) {
516 switch (ty) {
517 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
518 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
519 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
520 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
521 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
522 case Ity_I128: return i128_const_zero();
523 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
524 case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000));
525 default: VG_(tool_panic)("memcheck:definedOfType");
530 /*------------------------------------------------------------*/
531 /*--- Constructing IR fragments ---*/
532 /*------------------------------------------------------------*/
534 /* add stmt to a bb */
535 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
536 if (mce->trace) {
537 VG_(printf)(" %c: ", cat);
538 ppIRStmt(st);
539 VG_(printf)("\n");
541 addStmtToIRSB(mce->sb, st);
544 /* assign value to tmp */
545 static inline
546 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
547 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
550 /* build various kinds of expressions */
551 #define triop(_op, _arg1, _arg2, _arg3) \
552 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
553 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
554 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
555 #define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
556 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
557 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
558 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
559 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
560 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
561 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
563 /* Bind the given expression to a new temporary, and return the
564 temporary. This effectively converts an arbitrary expression into
565 an atom.
567 'ty' is the type of 'e' and hence the type that the new temporary
568 needs to be. But passing it in is redundant, since we can deduce
569 the type merely by inspecting 'e'. So at least use that fact to
570 assert that the two types agree. */
571 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
573 TempKind k;
574 IRTemp t;
575 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
577 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
578 switch (cat) {
579 case 'V': k = VSh; break;
580 case 'B': k = BSh; break;
581 case 'C': k = Orig; break;
582 /* happens when we are making up new "orig"
583 expressions, for IRCAS handling */
584 default: tl_assert(0);
586 t = newTemp(mce, ty, k);
587 assign(cat, mce, t, e);
588 return mkexpr(t);
592 /*------------------------------------------------------------*/
593 /*--- Helper functions for 128-bit ops ---*/
594 /*------------------------------------------------------------*/
596 static IRExpr *i128_const_zero(void)
598 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
599 return binop(Iop_64HLto128, z64, z64);
602 /* There are no I128-bit loads and/or stores [as generated by any
603 current front ends]. So we do not need to worry about that in
604 expr2vbits_Load */
607 /*------------------------------------------------------------*/
608 /*--- Constructing definedness primitive ops ---*/
609 /*------------------------------------------------------------*/
611 /* --------- Defined-if-either-defined --------- */
613 static IRAtom* mkDifD1 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
614 tl_assert(isShadowAtom(mce,a1));
615 tl_assert(isShadowAtom(mce,a2));
616 return assignNew('V', mce, Ity_I1, binop(Iop_And1, a1, a2));
619 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
620 tl_assert(isShadowAtom(mce,a1));
621 tl_assert(isShadowAtom(mce,a2));
622 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
625 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
626 tl_assert(isShadowAtom(mce,a1));
627 tl_assert(isShadowAtom(mce,a2));
628 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
631 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
632 tl_assert(isShadowAtom(mce,a1));
633 tl_assert(isShadowAtom(mce,a2));
634 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
637 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
638 tl_assert(isShadowAtom(mce,a1));
639 tl_assert(isShadowAtom(mce,a2));
640 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
643 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
644 tl_assert(isShadowAtom(mce,a1));
645 tl_assert(isShadowAtom(mce,a2));
646 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
649 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
650 tl_assert(isShadowAtom(mce,a1));
651 tl_assert(isShadowAtom(mce,a2));
652 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
655 /* --------- Undefined-if-either-undefined --------- */
657 static IRAtom* mkUifU1 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
658 tl_assert(isShadowAtom(mce,a1));
659 tl_assert(isShadowAtom(mce,a2));
660 return assignNew('V', mce, Ity_I1, binop(Iop_Or1, a1, a2));
663 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
664 tl_assert(isShadowAtom(mce,a1));
665 tl_assert(isShadowAtom(mce,a2));
666 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
669 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
670 tl_assert(isShadowAtom(mce,a1));
671 tl_assert(isShadowAtom(mce,a2));
672 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
675 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
676 tl_assert(isShadowAtom(mce,a1));
677 tl_assert(isShadowAtom(mce,a2));
678 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
681 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
682 tl_assert(isShadowAtom(mce,a1));
683 tl_assert(isShadowAtom(mce,a2));
684 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
687 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
688 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
689 tl_assert(isShadowAtom(mce,a1));
690 tl_assert(isShadowAtom(mce,a2));
691 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
692 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
693 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
694 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
695 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
696 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
698 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
701 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
702 tl_assert(isShadowAtom(mce,a1));
703 tl_assert(isShadowAtom(mce,a2));
704 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
707 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
708 tl_assert(isShadowAtom(mce,a1));
709 tl_assert(isShadowAtom(mce,a2));
710 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
713 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
714 switch (vty) {
715 case Ity_I8: return mkUifU8(mce, a1, a2);
716 case Ity_I16: return mkUifU16(mce, a1, a2);
717 case Ity_I32: return mkUifU32(mce, a1, a2);
718 case Ity_I64: return mkUifU64(mce, a1, a2);
719 case Ity_I128: return mkUifU128(mce, a1, a2);
720 case Ity_V128: return mkUifUV128(mce, a1, a2);
721 case Ity_V256: return mkUifUV256(mce, a1, a2);
722 default:
723 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
724 VG_(tool_panic)("memcheck:mkUifU");
728 /* --------- The Left-family of operations. --------- */
730 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
731 tl_assert(isShadowAtom(mce,a1));
732 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
735 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
736 tl_assert(isShadowAtom(mce,a1));
737 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
740 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
741 tl_assert(isShadowAtom(mce,a1));
742 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
745 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
746 tl_assert(isShadowAtom(mce,a1));
747 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
750 /* --------- The Right-family of operations. --------- */
752 /* Unfortunately these are a lot more expensive then their Left
753 counterparts. Fortunately they are only very rarely used -- only for
754 count-leading-zeroes instrumentation. */
756 static IRAtom* mkRight32 ( MCEnv* mce, IRAtom* a1 )
758 for (Int i = 1; i <= 16; i *= 2) {
759 // a1 |= (a1 >>u i)
760 IRAtom* tmp
761 = assignNew('V', mce, Ity_I32, binop(Iop_Shr32, a1, mkU8(i)));
762 a1 = assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, tmp));
764 return a1;
767 static IRAtom* mkRight64 ( MCEnv* mce, IRAtom* a1 )
769 for (Int i = 1; i <= 32; i *= 2) {
770 // a1 |= (a1 >>u i)
771 IRAtom* tmp
772 = assignNew('V', mce, Ity_I64, binop(Iop_Shr64, a1, mkU8(i)));
773 a1 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, tmp));
775 return a1;
778 /* --------- 'Improvement' functions for AND/OR. --------- */
780 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
781 defined (0); all other -> undefined (1).
783 static IRAtom* mkImproveAND1 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
785 tl_assert(isOriginalAtom(mce, data));
786 tl_assert(isShadowAtom(mce, vbits));
787 tl_assert(sameKindedAtoms(data, vbits));
788 return assignNew('V', mce, Ity_I1, binop(Iop_Or1, data, vbits));
791 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
793 tl_assert(isOriginalAtom(mce, data));
794 tl_assert(isShadowAtom(mce, vbits));
795 tl_assert(sameKindedAtoms(data, vbits));
796 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
799 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
801 tl_assert(isOriginalAtom(mce, data));
802 tl_assert(isShadowAtom(mce, vbits));
803 tl_assert(sameKindedAtoms(data, vbits));
804 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
807 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
809 tl_assert(isOriginalAtom(mce, data));
810 tl_assert(isShadowAtom(mce, vbits));
811 tl_assert(sameKindedAtoms(data, vbits));
812 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
815 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
817 tl_assert(isOriginalAtom(mce, data));
818 tl_assert(isShadowAtom(mce, vbits));
819 tl_assert(sameKindedAtoms(data, vbits));
820 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
823 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
825 tl_assert(isOriginalAtom(mce, data));
826 tl_assert(isShadowAtom(mce, vbits));
827 tl_assert(sameKindedAtoms(data, vbits));
828 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
831 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
833 tl_assert(isOriginalAtom(mce, data));
834 tl_assert(isShadowAtom(mce, vbits));
835 tl_assert(sameKindedAtoms(data, vbits));
836 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
839 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
840 defined (0); all other -> undefined (1).
842 static IRAtom* mkImproveOR1 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
844 tl_assert(isOriginalAtom(mce, data));
845 tl_assert(isShadowAtom(mce, vbits));
846 tl_assert(sameKindedAtoms(data, vbits));
847 return assignNew(
848 'V', mce, Ity_I1,
849 binop(Iop_Or1,
850 assignNew('V', mce, Ity_I1, unop(Iop_Not1, data)),
851 vbits) );
854 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
856 tl_assert(isOriginalAtom(mce, data));
857 tl_assert(isShadowAtom(mce, vbits));
858 tl_assert(sameKindedAtoms(data, vbits));
859 return assignNew(
860 'V', mce, Ity_I8,
861 binop(Iop_Or8,
862 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
863 vbits) );
866 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
868 tl_assert(isOriginalAtom(mce, data));
869 tl_assert(isShadowAtom(mce, vbits));
870 tl_assert(sameKindedAtoms(data, vbits));
871 return assignNew(
872 'V', mce, Ity_I16,
873 binop(Iop_Or16,
874 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
875 vbits) );
878 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
880 tl_assert(isOriginalAtom(mce, data));
881 tl_assert(isShadowAtom(mce, vbits));
882 tl_assert(sameKindedAtoms(data, vbits));
883 return assignNew(
884 'V', mce, Ity_I32,
885 binop(Iop_Or32,
886 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
887 vbits) );
890 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
892 tl_assert(isOriginalAtom(mce, data));
893 tl_assert(isShadowAtom(mce, vbits));
894 tl_assert(sameKindedAtoms(data, vbits));
895 return assignNew(
896 'V', mce, Ity_I64,
897 binop(Iop_Or64,
898 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
899 vbits) );
902 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
904 tl_assert(isOriginalAtom(mce, data));
905 tl_assert(isShadowAtom(mce, vbits));
906 tl_assert(sameKindedAtoms(data, vbits));
907 return assignNew(
908 'V', mce, Ity_V128,
909 binop(Iop_OrV128,
910 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
911 vbits) );
914 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
916 tl_assert(isOriginalAtom(mce, data));
917 tl_assert(isShadowAtom(mce, vbits));
918 tl_assert(sameKindedAtoms(data, vbits));
919 return assignNew(
920 'V', mce, Ity_V256,
921 binop(Iop_OrV256,
922 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
923 vbits) );
926 /* --------- Pessimising casts. --------- */
928 /* The function returns an expression of type DST_TY. If any of the VBITS
929 is undefined (value == 1) the resulting expression has all bits set to
930 1. Otherwise, all bits are 0. */
932 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
934 IRType src_ty;
935 IRAtom* tmp1;
937 /* Note, dst_ty is a shadow type, not an original type. */
938 tl_assert(isShadowAtom(mce,vbits));
939 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
941 /* Fast-track some common cases */
942 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
943 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
945 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
946 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
948 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
949 /* PCast the arg, then clone it. */
950 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
951 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
954 if (src_ty == Ity_I32 && dst_ty == Ity_V128) {
955 /* PCast the arg, then clone it 4 times. */
956 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
957 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
958 return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
961 if (src_ty == Ity_I32 && dst_ty == Ity_V256) {
962 /* PCast the arg, then clone it 8 times. */
963 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
964 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
965 tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
966 return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp));
969 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
970 /* PCast the arg. This gives all 0s or all 1s. Then throw away
971 the top half. */
972 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
973 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
976 if (src_ty == Ity_V128 && dst_ty == Ity_I64) {
977 /* Use InterleaveHI64x2 to copy the top half of the vector into
978 the bottom half. Then we can UifU it with the original, throw
979 away the upper half of the result, and PCast-I64-to-I64
980 the lower half. */
981 // Generates vbits[127:64] : vbits[127:64]
982 IRAtom* hi64hi64
983 = assignNew('V', mce, Ity_V128,
984 binop(Iop_InterleaveHI64x2, vbits, vbits));
985 // Generates
986 // UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0])
987 // == vbits[127:64] : UifU(vbits[127:64],vbits[63:0])
988 IRAtom* lohi64
989 = mkUifUV128(mce, hi64hi64, vbits);
990 // Generates UifU(vbits[127:64],vbits[63:0])
991 IRAtom* lo64
992 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, lohi64));
993 // Generates
994 // PCast-to-I64( UifU(vbits[127:64], vbits[63:0] )
995 // == PCast-to-I64( vbits[127:0] )
996 IRAtom* res
997 = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, lo64));
998 return res;
1001 /* Else do it the slow way .. */
1002 /* First of all, collapse vbits down to a single bit. */
1003 tmp1 = NULL;
1004 switch (src_ty) {
1005 case Ity_I1:
1006 tmp1 = vbits;
1007 break;
1008 case Ity_I8:
1009 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
1010 break;
1011 case Ity_I16:
1012 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
1013 break;
1014 case Ity_I32:
1015 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
1016 break;
1017 case Ity_I64:
1018 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
1019 break;
1020 case Ity_I128: {
1021 /* Gah. Chop it in half, OR the halves together, and compare
1022 that with zero. */
1023 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
1024 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
1025 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
1026 tmp1 = assignNew('V', mce, Ity_I1,
1027 unop(Iop_CmpNEZ64, tmp4));
1028 break;
1030 case Ity_V128: {
1031 /* Chop it in half, OR the halves together, and compare that
1032 * with zero.
1034 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vbits));
1035 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vbits));
1036 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
1037 tmp1 = assignNew('V', mce, Ity_I1,
1038 unop(Iop_CmpNEZ64, tmp4));
1039 break;
1041 default:
1042 ppIRType(src_ty);
1043 VG_(tool_panic)("mkPCastTo(1)");
1045 tl_assert(tmp1);
1046 /* Now widen up to the dst type. */
1047 switch (dst_ty) {
1048 case Ity_I1:
1049 return tmp1;
1050 case Ity_I8:
1051 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
1052 case Ity_I16:
1053 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
1054 case Ity_I32:
1055 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
1056 case Ity_I64:
1057 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1058 case Ity_V128:
1059 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1060 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
1061 return tmp1;
1062 case Ity_I128:
1063 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1064 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
1065 return tmp1;
1066 case Ity_V256:
1067 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1068 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
1069 tmp1, tmp1));
1070 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
1071 tmp1, tmp1));
1072 return tmp1;
1073 default:
1074 ppIRType(dst_ty);
1075 VG_(tool_panic)("mkPCastTo(2)");
1079 /* This is a minor variant. It takes an arg of some type and returns
1080 a value of the same type. The result consists entirely of Defined
1081 (zero) bits except its least significant bit, which is a PCast of
1082 the entire argument down to a single bit. */
1083 static IRAtom* mkPCastXXtoXXlsb ( MCEnv* mce, IRAtom* varg, IRType ty )
1085 if (ty == Ity_V128) {
1086 /* --- Case for V128 --- */
1087 IRAtom* varg128 = varg;
1088 // generates: PCast-to-I64(varg128)
1089 IRAtom* pcdTo64 = mkPCastTo(mce, Ity_I64, varg128);
1090 // Now introduce zeros (defined bits) in the top 63 places
1091 // generates: Def--(63)--Def PCast-to-I1(varg128)
1092 IRAtom* d63pc
1093 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcdTo64, mkU64(1)));
1094 // generates: Def--(64)--Def
1095 IRAtom* d64
1096 = definedOfType(Ity_I64);
1097 // generates: Def--(127)--Def PCast-to-I1(varg128)
1098 IRAtom* res
1099 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, d64, d63pc));
1100 return res;
1102 if (ty == Ity_I64) {
1103 /* --- Case for I64 --- */
1104 // PCast to 64
1105 IRAtom* pcd = mkPCastTo(mce, Ity_I64, varg);
1106 // Zero (Def) out the top 63 bits
1107 IRAtom* res
1108 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcd, mkU64(1)));
1109 return res;
1111 /*NOTREACHED*/
1112 tl_assert(0);
1115 /* --------- Optimistic casts. --------- */
1117 /* The function takes and returns an expression of type TY. If any of the
1118 VBITS indicate defined (value == 0) the resulting expression has all bits
1119 set to 0. Otherwise, all bits are 1. In words, if any bits are defined
1120 then all bits are made to be defined.
1122 In short we compute (vbits - (vbits >>u 1)) >>s (bitsize(vbits)-1).
1124 static IRAtom* mkOCastAt( MCEnv* mce, IRType ty, IRAtom* vbits )
1126 IROp opSUB, opSHR, opSAR;
1127 UInt sh;
1129 switch (ty) {
1130 case Ity_I64:
1131 opSUB = Iop_Sub64; opSHR = Iop_Shr64; opSAR = Iop_Sar64; sh = 63;
1132 break;
1133 case Ity_I32:
1134 opSUB = Iop_Sub32; opSHR = Iop_Shr32; opSAR = Iop_Sar32; sh = 31;
1135 break;
1136 case Ity_I16:
1137 opSUB = Iop_Sub16; opSHR = Iop_Shr16; opSAR = Iop_Sar16; sh = 15;
1138 break;
1139 case Ity_I8:
1140 opSUB = Iop_Sub8; opSHR = Iop_Shr8; opSAR = Iop_Sar8; sh = 7;
1141 break;
1142 default:
1143 ppIRType(ty);
1144 VG_(tool_panic)("mkOCastTo");
1147 IRAtom *shr1, *at;
1148 shr1 = assignNew('V', mce,ty, binop(opSHR, vbits, mkU8(1)));
1149 at = assignNew('V', mce,ty, binop(opSUB, vbits, shr1));
1150 at = assignNew('V', mce,ty, binop(opSAR, at, mkU8(sh)));
1151 return at;
1155 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
1157 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
1158 PCasting to Ity_U1. However, sometimes it is necessary to be more
1159 accurate. The insight is that the result is defined if two
1160 corresponding bits can be found, one from each argument, so that
1161 both bits are defined but are different -- that makes EQ say "No"
1162 and NE say "Yes". Hence, we compute an improvement term and DifD
1163 it onto the "normal" (UifU) result.
1165 The result is:
1167 PCastTo<1> (
1168 -- naive version
1169 UifU<sz>(vxx, vyy)
1171 `DifD<sz>`
1173 -- improvement term
1174 OCast<sz>(vec)
1177 where
1178 vec contains 0 (defined) bits where the corresponding arg bits
1179 are defined but different, and 1 bits otherwise.
1181 vec = Or<sz>( vxx, // 0 iff bit defined
1182 vyy, // 0 iff bit defined
1183 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
1186 If any bit of vec is 0, the result is defined and so the
1187 improvement term should produce 0...0, else it should produce
1188 1...1.
1190 Hence require for the improvement term:
1192 OCast(vec) = if vec == 1...1 then 1...1 else 0...0
1194 which you can think of as an "optimistic cast" (OCast, the opposite of
1195 the normal "pessimistic cast" (PCast) family. An OCast says all bits
1196 are defined if any bit is defined.
1198 It is possible to show that
1200 if vec == 1...1 then 1...1 else 0...0
1202 can be implemented in straight-line code as
1204 (vec - (vec >>u 1)) >>s (word-size-in-bits - 1)
1206 We note that vec contains the sub-term Or<sz>(vxx, vyy). Since UifU is
1207 implemented with Or (since 1 signifies undefinedness), this is a
1208 duplicate of the UifU<sz>(vxx, vyy) term and so we can CSE it out, giving
1209 a final version of:
1211 let naive = UifU<sz>(vxx, vyy)
1212 vec = Or<sz>(naive, Not<sz>(Xor<sz)(xx, yy))
1214 PCastTo<1>( DifD<sz>(naive, OCast<sz>(vec)) )
1216 This was extensively re-analysed and checked on 6 July 05 and again
1217 in July 2017.
1219 static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
1220 IRType ty,
1221 IRAtom* vxx, IRAtom* vyy,
1222 IRAtom* xx, IRAtom* yy )
1224 IRAtom *naive, *vec, *improved, *final_cast;
1225 IROp opDIFD, opUIFU, opOR, opXOR, opNOT;
1227 tl_assert(isShadowAtom(mce,vxx));
1228 tl_assert(isShadowAtom(mce,vyy));
1229 tl_assert(isOriginalAtom(mce,xx));
1230 tl_assert(isOriginalAtom(mce,yy));
1231 tl_assert(sameKindedAtoms(vxx,xx));
1232 tl_assert(sameKindedAtoms(vyy,yy));
1234 switch (ty) {
1235 case Ity_I8:
1236 opDIFD = Iop_And8;
1237 opUIFU = Iop_Or8;
1238 opOR = Iop_Or8;
1239 opXOR = Iop_Xor8;
1240 opNOT = Iop_Not8;
1241 break;
1242 case Ity_I16:
1243 opDIFD = Iop_And16;
1244 opUIFU = Iop_Or16;
1245 opOR = Iop_Or16;
1246 opXOR = Iop_Xor16;
1247 opNOT = Iop_Not16;
1248 break;
1249 case Ity_I32:
1250 opDIFD = Iop_And32;
1251 opUIFU = Iop_Or32;
1252 opOR = Iop_Or32;
1253 opXOR = Iop_Xor32;
1254 opNOT = Iop_Not32;
1255 break;
1256 case Ity_I64:
1257 opDIFD = Iop_And64;
1258 opUIFU = Iop_Or64;
1259 opOR = Iop_Or64;
1260 opXOR = Iop_Xor64;
1261 opNOT = Iop_Not64;
1262 break;
1263 default:
1264 VG_(tool_panic)("expensiveCmpEQorNE");
1267 naive
1268 = assignNew('V', mce, ty, binop(opUIFU, vxx, vyy));
1270 vec
1271 = assignNew(
1272 'V', mce,ty,
1273 binop( opOR,
1274 naive,
1275 assignNew(
1276 'V', mce,ty,
1277 unop(opNOT,
1278 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
1280 improved
1281 = assignNew( 'V', mce,ty,
1282 binop(opDIFD, naive, mkOCastAt(mce, ty, vec)));
1284 final_cast
1285 = mkPCastTo( mce, Ity_I1, improved );
1287 return final_cast;
1291 /* --------- Semi-accurate interpretation of CmpORD. --------- */
1293 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
1295 CmpORD32S(x,y) = 1<<3 if x <s y
1296 = 1<<2 if x >s y
1297 = 1<<1 if x == y
1299 and similarly the unsigned variant. The default interpretation is:
1301 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
1302 & (7<<1)
1304 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
1305 are zero and therefore defined (viz, zero).
1307 Also deal with a special case better:
1309 CmpORD32S(x,0)
1311 Here, bit 3 (LT) of the result is a copy of the top bit of x and
1312 will be defined even if the rest of x isn't. In which case we do:
1314 CmpORD32S#(x,x#,0,{impliedly 0}#)
1315 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
1316 | (x# >>u 31) << 3 -- LT# = x#[31]
1318 Analogous handling for CmpORD64{S,U}.
1320 static Bool isZeroU32 ( IRAtom* e )
1322 return
1323 toBool( e->tag == Iex_Const
1324 && e->Iex.Const.con->tag == Ico_U32
1325 && e->Iex.Const.con->Ico.U32 == 0 );
1328 static Bool isZeroU64 ( IRAtom* e )
1330 return
1331 toBool( e->tag == Iex_Const
1332 && e->Iex.Const.con->tag == Ico_U64
1333 && e->Iex.Const.con->Ico.U64 == 0 );
1336 static IRAtom* doCmpORD ( MCEnv* mce,
1337 IROp cmp_op,
1338 IRAtom* xxhash, IRAtom* yyhash,
1339 IRAtom* xx, IRAtom* yy )
1341 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1342 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1343 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
1344 IROp opAND = m64 ? Iop_And64 : Iop_And32;
1345 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
1346 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
1347 IROp op1UtoWS = m64 ? Iop_1Uto64 : Iop_1Uto32;
1348 IRType ty = m64 ? Ity_I64 : Ity_I32;
1349 Int width = m64 ? 64 : 32;
1351 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1353 tl_assert(isShadowAtom(mce,xxhash));
1354 tl_assert(isShadowAtom(mce,yyhash));
1355 tl_assert(isOriginalAtom(mce,xx));
1356 tl_assert(isOriginalAtom(mce,yy));
1357 tl_assert(sameKindedAtoms(xxhash,xx));
1358 tl_assert(sameKindedAtoms(yyhash,yy));
1359 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1360 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
1362 if (0) {
1363 ppIROp(cmp_op); VG_(printf)(" ");
1364 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1367 if (syned && isZero(yy)) {
1368 /* fancy interpretation */
1369 /* if yy is zero, then it must be fully defined (zero#). */
1370 tl_assert(isZero(yyhash));
1371 // This is still inaccurate, but I don't think it matters, since
1372 // nobody writes code of the form
1373 // "is <partially-undefined-value> signedly greater than zero?".
1374 // We therefore simply declare "x >s 0" to be undefined if any bit in
1375 // x is undefined. That's clearly suboptimal in some cases. Eg, if
1376 // the highest order bit is a defined 1 then x is negative so it
1377 // doesn't matter whether the remaining bits are defined or not.
1378 IRAtom* t_0_gt_0_0
1379 = assignNew(
1380 'V', mce,ty,
1381 binop(
1382 opAND,
1383 mkPCastTo(mce,ty, xxhash),
1384 m64 ? mkU64(1<<2) : mkU32(1<<2)
1386 // For "x <s 0", we can just copy the definedness of the top bit of x
1387 // and we have a precise result.
1388 IRAtom* t_lt_0_0_0
1389 = assignNew(
1390 'V', mce,ty,
1391 binop(
1392 opSHL,
1393 assignNew(
1394 'V', mce,ty,
1395 binop(opSHR, xxhash, mkU8(width-1))),
1396 mkU8(3)
1398 // For "x == 0" we can hand the problem off to expensiveCmpEQorNE.
1399 IRAtom* t_0_0_eq_0
1400 = assignNew(
1401 'V', mce,ty,
1402 binop(
1403 opSHL,
1404 assignNew('V', mce,ty,
1405 unop(
1406 op1UtoWS,
1407 expensiveCmpEQorNE(mce, ty, xxhash, yyhash, xx, yy))
1409 mkU8(1)
1411 return
1412 binop(
1413 opOR,
1414 assignNew('V', mce,ty, binop(opOR, t_lt_0_0_0, t_0_gt_0_0)),
1415 t_0_0_eq_0
1417 } else {
1418 /* standard interpretation */
1419 IRAtom* sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
1420 return
1421 binop(
1422 opAND,
1423 mkPCastTo( mce,ty,
1424 mkUifU(mce,ty, xxhash,yyhash)),
1425 sevenLeft1
1431 /*------------------------------------------------------------*/
1432 /*--- Emit a test and complaint if something is undefined. ---*/
1433 /*------------------------------------------------------------*/
1435 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1438 /* Set the annotations on a dirty helper to indicate that the stack
1439 pointer and instruction pointers might be read. This is the
1440 behaviour of all 'emit-a-complaint' style functions we might
1441 call. */
1443 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1444 di->nFxState = 2;
1445 di->fxState[0].fx = Ifx_Read;
1446 di->fxState[0].offset = mce->layout->offset_SP;
1447 di->fxState[0].size = mce->layout->sizeof_SP;
1448 di->fxState[0].nRepeats = 0;
1449 di->fxState[0].repeatLen = 0;
1450 di->fxState[1].fx = Ifx_Read;
1451 di->fxState[1].offset = mce->layout->offset_IP;
1452 di->fxState[1].size = mce->layout->sizeof_IP;
1453 di->fxState[1].nRepeats = 0;
1454 di->fxState[1].repeatLen = 0;
1458 /* Check the supplied *original* |atom| for undefinedness, and emit a
1459 complaint if so. Once that happens, mark it as defined. This is
1460 possible because the atom is either a tmp or literal. If it's a
1461 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1462 be defined. In fact as mentioned above, we will have to allocate a
1463 new tmp to carry the new 'defined' shadow value, and update the
1464 original->tmp mapping accordingly; we cannot simply assign a new
1465 value to an existing shadow tmp as this breaks SSAness.
1467 The checks are performed, any resulting complaint emitted, and
1468 |atom|'s shadow temp set to 'defined', ONLY in the case that
1469 |guard| evaluates to True at run-time. If it evaluates to False
1470 then no action is performed. If |guard| is NULL (the usual case)
1471 then it is assumed to be always-true, and hence these actions are
1472 performed unconditionally.
1474 This routine does not generate code to check the definedness of
1475 |guard|. The caller is assumed to have taken care of that already.
1477 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
1479 IRAtom* vatom;
1480 IRType ty;
1481 Int sz;
1482 IRDirty* di;
1483 IRAtom* cond;
1484 IRAtom* origin;
1485 void* fn;
1486 const HChar* nm;
1487 IRExpr** args;
1488 Int nargs;
1490 // Don't do V bit tests if we're not reporting undefined value errors.
1491 if (MC_(clo_mc_level) == 1)
1492 return;
1494 if (guard)
1495 tl_assert(isOriginalAtom(mce, guard));
1497 /* Since the original expression is atomic, there's no duplicated
1498 work generated by making multiple V-expressions for it. So we
1499 don't really care about the possibility that someone else may
1500 also create a V-interpretion for it. */
1501 tl_assert(isOriginalAtom(mce, atom));
1502 vatom = expr2vbits( mce, atom, HuOth );
1503 tl_assert(isShadowAtom(mce, vatom));
1504 tl_assert(sameKindedAtoms(atom, vatom));
1506 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1508 /* sz is only used for constructing the error message */
1509 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1511 cond = mkPCastTo( mce, Ity_I1, vatom );
1512 /* cond will be 0 if all defined, and 1 if any not defined. */
1514 /* Get the origin info for the value we are about to check. At
1515 least, if we are doing origin tracking. If not, use a dummy
1516 zero origin. */
1517 if (MC_(clo_mc_level) == 3) {
1518 origin = schemeE( mce, atom );
1519 if (mce->hWordTy == Ity_I64) {
1520 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1522 } else {
1523 origin = NULL;
1526 fn = NULL;
1527 nm = NULL;
1528 args = NULL;
1529 nargs = -1;
1531 switch (sz) {
1532 case 0:
1533 if (origin) {
1534 fn = &MC_(helperc_value_check0_fail_w_o);
1535 nm = "MC_(helperc_value_check0_fail_w_o)";
1536 args = mkIRExprVec_1(origin);
1537 nargs = 1;
1538 } else {
1539 fn = &MC_(helperc_value_check0_fail_no_o);
1540 nm = "MC_(helperc_value_check0_fail_no_o)";
1541 args = mkIRExprVec_0();
1542 nargs = 0;
1544 break;
1545 case 1:
1546 if (origin) {
1547 fn = &MC_(helperc_value_check1_fail_w_o);
1548 nm = "MC_(helperc_value_check1_fail_w_o)";
1549 args = mkIRExprVec_1(origin);
1550 nargs = 1;
1551 } else {
1552 fn = &MC_(helperc_value_check1_fail_no_o);
1553 nm = "MC_(helperc_value_check1_fail_no_o)";
1554 args = mkIRExprVec_0();
1555 nargs = 0;
1557 break;
1558 case 4:
1559 if (origin) {
1560 fn = &MC_(helperc_value_check4_fail_w_o);
1561 nm = "MC_(helperc_value_check4_fail_w_o)";
1562 args = mkIRExprVec_1(origin);
1563 nargs = 1;
1564 } else {
1565 fn = &MC_(helperc_value_check4_fail_no_o);
1566 nm = "MC_(helperc_value_check4_fail_no_o)";
1567 args = mkIRExprVec_0();
1568 nargs = 0;
1570 break;
1571 case 8:
1572 if (origin) {
1573 fn = &MC_(helperc_value_check8_fail_w_o);
1574 nm = "MC_(helperc_value_check8_fail_w_o)";
1575 args = mkIRExprVec_1(origin);
1576 nargs = 1;
1577 } else {
1578 fn = &MC_(helperc_value_check8_fail_no_o);
1579 nm = "MC_(helperc_value_check8_fail_no_o)";
1580 args = mkIRExprVec_0();
1581 nargs = 0;
1583 break;
1584 case 2:
1585 case 16:
1586 if (origin) {
1587 fn = &MC_(helperc_value_checkN_fail_w_o);
1588 nm = "MC_(helperc_value_checkN_fail_w_o)";
1589 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1590 nargs = 2;
1591 } else {
1592 fn = &MC_(helperc_value_checkN_fail_no_o);
1593 nm = "MC_(helperc_value_checkN_fail_no_o)";
1594 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1595 nargs = 1;
1597 break;
1598 default:
1599 VG_(tool_panic)("unexpected szB");
1602 tl_assert(fn);
1603 tl_assert(nm);
1604 tl_assert(args);
1605 tl_assert(nargs >= 0 && nargs <= 2);
1606 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1607 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1609 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1610 VG_(fnptr_to_fnentry)( fn ), args );
1611 di->guard = cond; // and cond is PCast-to-1(atom#)
1613 /* If the complaint is to be issued under a guard condition, AND
1614 that into the guard condition for the helper call. */
1615 if (guard) {
1616 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1617 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1618 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1619 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1622 setHelperAnns( mce, di );
1623 stmt( 'V', mce, IRStmt_Dirty(di));
1625 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1626 defined -- but only in the case where the guard evaluates to
1627 True at run-time. Do the update by setting the orig->shadow
1628 mapping for tmp to reflect the fact that this shadow is getting
1629 a new value. */
1630 tl_assert(isIRAtom(vatom));
1631 /* sameKindedAtoms ... */
1632 if (vatom->tag == Iex_RdTmp) {
1633 tl_assert(atom->tag == Iex_RdTmp);
1634 if (guard == NULL) {
1635 // guard is 'always True', hence update unconditionally
1636 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1637 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1638 definedOfType(ty));
1639 } else {
1640 // update the temp only conditionally. Do this by copying
1641 // its old value when the guard is False.
1642 // The old value ..
1643 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1644 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1645 IRAtom* new_tmpV
1646 = assignNew('V', mce, shadowTypeV(ty),
1647 IRExpr_ITE(guard, definedOfType(ty),
1648 mkexpr(old_tmpV)));
1649 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV);
1655 /*------------------------------------------------------------*/
1656 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1657 /*------------------------------------------------------------*/
1659 /* Examine the always-defined sections declared in layout to see if
1660 the (offset,size) section is within one. Note, is is an error to
1661 partially fall into such a region: (offset,size) should either be
1662 completely in such a region or completely not-in such a region.
1664 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1666 Int minoffD, maxoffD, i;
1667 Int minoff = offset;
1668 Int maxoff = minoff + size - 1;
1669 tl_assert((minoff & ~0xFFFF) == 0);
1670 tl_assert((maxoff & ~0xFFFF) == 0);
1672 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1673 minoffD = mce->layout->alwaysDefd[i].offset;
1674 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1675 tl_assert((minoffD & ~0xFFFF) == 0);
1676 tl_assert((maxoffD & ~0xFFFF) == 0);
1678 if (maxoff < minoffD || maxoffD < minoff)
1679 continue; /* no overlap */
1680 if (minoff >= minoffD && maxoff <= maxoffD)
1681 return True; /* completely contained in an always-defd section */
1683 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1685 return False; /* could not find any containing section */
1689 /* Generate into bb suitable actions to shadow this Put. If the state
1690 slice is marked 'always defined', do nothing. Otherwise, write the
1691 supplied V bits to the shadow state. We can pass in either an
1692 original atom or a V-atom, but not both. In the former case the
1693 relevant V-bits are then generated from the original.
1694 We assume here, that the definedness of GUARD has already been checked.
1696 static
1697 void do_shadow_PUT ( MCEnv* mce, Int offset,
1698 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
1700 IRType ty;
1702 // Don't do shadow PUTs if we're not doing undefined value checking.
1703 // Their absence lets Vex's optimiser remove all the shadow computation
1704 // that they depend on, which includes GETs of the shadow registers.
1705 if (MC_(clo_mc_level) == 1)
1706 return;
1708 if (atom) {
1709 tl_assert(!vatom);
1710 tl_assert(isOriginalAtom(mce, atom));
1711 vatom = expr2vbits( mce, atom, HuOth );
1712 } else {
1713 tl_assert(vatom);
1714 tl_assert(isShadowAtom(mce, vatom));
1717 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1718 tl_assert(ty != Ity_I1);
1719 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1720 /* later: no ... */
1721 /* emit code to emit a complaint if any of the vbits are 1. */
1722 /* complainIfUndefined(mce, atom); */
1723 } else {
1724 /* Do a plain shadow Put. */
1725 if (guard) {
1726 /* If the guard expression evaluates to false we simply Put the value
1727 that is already stored in the guest state slot */
1728 IRAtom *cond, *iffalse;
1730 cond = assignNew('V', mce, Ity_I1, guard);
1731 iffalse = assignNew('V', mce, ty,
1732 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1733 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse));
1735 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
1740 /* Return an expression which contains the V bits corresponding to the
1741 given GETI (passed in in pieces).
1743 static
1744 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
1746 IRAtom* vatom;
1747 IRType ty, tyS;
1748 Int arrSize;;
1749 IRRegArray* descr = puti->descr;
1750 IRAtom* ix = puti->ix;
1751 Int bias = puti->bias;
1752 IRAtom* atom = puti->data;
1754 // Don't do shadow PUTIs if we're not doing undefined value checking.
1755 // Their absence lets Vex's optimiser remove all the shadow computation
1756 // that they depend on, which includes GETIs of the shadow registers.
1757 if (MC_(clo_mc_level) == 1)
1758 return;
1760 tl_assert(isOriginalAtom(mce,atom));
1761 vatom = expr2vbits( mce, atom, HuOth );
1762 tl_assert(sameKindedAtoms(atom, vatom));
1763 ty = descr->elemTy;
1764 tyS = shadowTypeV(ty);
1765 arrSize = descr->nElems * sizeofIRType(ty);
1766 tl_assert(ty != Ity_I1);
1767 tl_assert(isOriginalAtom(mce,ix));
1768 complainIfUndefined(mce, ix, NULL);
1769 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1770 /* later: no ... */
1771 /* emit code to emit a complaint if any of the vbits are 1. */
1772 /* complainIfUndefined(mce, atom); */
1773 } else {
1774 /* Do a cloned version of the Put that refers to the shadow
1775 area. */
1776 IRRegArray* new_descr
1777 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1778 tyS, descr->nElems);
1779 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
1784 /* Return an expression which contains the V bits corresponding to the
1785 given GET (passed in in pieces).
1787 static
1788 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1790 IRType tyS = shadowTypeV(ty);
1791 tl_assert(ty != Ity_I1);
1792 tl_assert(ty != Ity_I128);
1793 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1794 /* Always defined, return all zeroes of the relevant type */
1795 return definedOfType(tyS);
1796 } else {
1797 /* return a cloned version of the Get that refers to the shadow
1798 area. */
1799 /* FIXME: this isn't an atom! */
1800 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1805 /* Return an expression which contains the V bits corresponding to the
1806 given GETI (passed in in pieces).
1808 static
1809 IRExpr* shadow_GETI ( MCEnv* mce,
1810 IRRegArray* descr, IRAtom* ix, Int bias )
1812 IRType ty = descr->elemTy;
1813 IRType tyS = shadowTypeV(ty);
1814 Int arrSize = descr->nElems * sizeofIRType(ty);
1815 tl_assert(ty != Ity_I1);
1816 tl_assert(isOriginalAtom(mce,ix));
1817 complainIfUndefined(mce, ix, NULL);
1818 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1819 /* Always defined, return all zeroes of the relevant type */
1820 return definedOfType(tyS);
1821 } else {
1822 /* return a cloned version of the Get that refers to the shadow
1823 area. */
1824 IRRegArray* new_descr
1825 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1826 tyS, descr->nElems);
1827 return IRExpr_GetI( new_descr, ix, bias );
1832 /*------------------------------------------------------------*/
1833 /*--- Generating approximations for unknown operations, ---*/
1834 /*--- using lazy-propagate semantics ---*/
1835 /*------------------------------------------------------------*/
1837 /* Lazy propagation of undefinedness from two values, resulting in the
1838 specified shadow type.
1840 static
1841 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1843 IRAtom* at;
1844 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1845 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1846 tl_assert(isShadowAtom(mce,va1));
1847 tl_assert(isShadowAtom(mce,va2));
1849 /* The general case is inefficient because PCast is an expensive
1850 operation. Here are some special cases which use PCast only
1851 once rather than twice. */
1853 /* I64 x I64 -> I64 */
1854 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1855 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1856 at = mkUifU(mce, Ity_I64, va1, va2);
1857 at = mkPCastTo(mce, Ity_I64, at);
1858 return at;
1861 /* I64 x I64 -> I32 */
1862 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1863 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1864 at = mkUifU(mce, Ity_I64, va1, va2);
1865 at = mkPCastTo(mce, Ity_I32, at);
1866 return at;
1869 /* I32 x I32 -> I32 */
1870 if (t1 == Ity_I32 && t2 == Ity_I32 && finalVty == Ity_I32) {
1871 if (0) VG_(printf)("mkLazy2: I32 x I32 -> I32\n");
1872 at = mkUifU(mce, Ity_I32, va1, va2);
1873 at = mkPCastTo(mce, Ity_I32, at);
1874 return at;
1877 if (0) {
1878 VG_(printf)("mkLazy2 ");
1879 ppIRType(t1);
1880 VG_(printf)("_");
1881 ppIRType(t2);
1882 VG_(printf)("_");
1883 ppIRType(finalVty);
1884 VG_(printf)("\n");
1887 /* General case: force everything via 32-bit intermediaries. */
1888 at = mkPCastTo(mce, Ity_I32, va1);
1889 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1890 at = mkPCastTo(mce, finalVty, at);
1891 return at;
1895 /* 3-arg version of the above. */
1896 static
1897 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1898 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1900 IRAtom* at;
1901 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1902 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1903 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1904 tl_assert(isShadowAtom(mce,va1));
1905 tl_assert(isShadowAtom(mce,va2));
1906 tl_assert(isShadowAtom(mce,va3));
1908 /* The general case is inefficient because PCast is an expensive
1909 operation. Here are some special cases which use PCast only
1910 twice rather than three times. */
1912 /* I32 x I64 x I64 -> I64 */
1913 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1914 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1915 && finalVty == Ity_I64) {
1916 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1917 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1918 mode indication which is fully defined, this should get
1919 folded out later. */
1920 at = mkPCastTo(mce, Ity_I64, va1);
1921 /* Now fold in 2nd and 3rd args. */
1922 at = mkUifU(mce, Ity_I64, at, va2);
1923 at = mkUifU(mce, Ity_I64, at, va3);
1924 /* and PCast once again. */
1925 at = mkPCastTo(mce, Ity_I64, at);
1926 return at;
1929 /* I32 x I8 x I64 -> I64 */
1930 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64
1931 && finalVty == Ity_I64) {
1932 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n");
1933 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1934 * rounding mode indication which is fully defined, this should
1935 * get folded out later.
1937 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1938 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1939 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1940 at = mkUifU(mce, Ity_I64, at, va3);
1941 /* and PCast once again. */
1942 at = mkPCastTo(mce, Ity_I64, at);
1943 return at;
1946 /* I32 x I64 x I64 -> I32 */
1947 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1948 && finalVty == Ity_I32) {
1949 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1950 at = mkPCastTo(mce, Ity_I64, va1);
1951 at = mkUifU(mce, Ity_I64, at, va2);
1952 at = mkUifU(mce, Ity_I64, at, va3);
1953 at = mkPCastTo(mce, Ity_I32, at);
1954 return at;
1957 /* I32 x I32 x I32 -> I32 */
1958 /* 32-bit FP idiom, as (eg) happens on ARM */
1959 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1960 && finalVty == Ity_I32) {
1961 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1962 at = va1;
1963 at = mkUifU(mce, Ity_I32, at, va2);
1964 at = mkUifU(mce, Ity_I32, at, va3);
1965 at = mkPCastTo(mce, Ity_I32, at);
1966 return at;
1969 /* I32 x I128 x I128 -> I128 */
1970 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1971 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1972 && finalVty == Ity_I128) {
1973 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1974 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1975 mode indication which is fully defined, this should get
1976 folded out later. */
1977 at = mkPCastTo(mce, Ity_I128, va1);
1978 /* Now fold in 2nd and 3rd args. */
1979 at = mkUifU(mce, Ity_I128, at, va2);
1980 at = mkUifU(mce, Ity_I128, at, va3);
1981 /* and PCast once again. */
1982 at = mkPCastTo(mce, Ity_I128, at);
1983 return at;
1986 /* I32 x I8 x I128 -> I128 */
1987 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1988 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128
1989 && finalVty == Ity_I128) {
1990 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n");
1991 /* Use I64 as an intermediate type, which means PCasting all 3
1992 args to I64 to start with. 1st arg is typically a rounding
1993 mode indication which is fully defined, so we hope that it
1994 will get folded out later. */
1995 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1996 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1997 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3);
1998 /* Now UifU all three together. */
1999 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
2000 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3)
2001 /* and PCast once again. */
2002 at = mkPCastTo(mce, Ity_I128, at);
2003 return at;
2005 if (1) {
2006 VG_(printf)("mkLazy3: ");
2007 ppIRType(t1);
2008 VG_(printf)(" x ");
2009 ppIRType(t2);
2010 VG_(printf)(" x ");
2011 ppIRType(t3);
2012 VG_(printf)(" -> ");
2013 ppIRType(finalVty);
2014 VG_(printf)("\n");
2017 tl_assert(0);
2018 /* General case: force everything via 32-bit intermediaries. */
2020 at = mkPCastTo(mce, Ity_I32, va1);
2021 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
2022 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
2023 at = mkPCastTo(mce, finalVty, at);
2024 return at;
2029 /* 4-arg version of the above. */
2030 static
2031 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
2032 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
2034 IRAtom* at;
2035 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
2036 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
2037 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
2038 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
2039 tl_assert(isShadowAtom(mce,va1));
2040 tl_assert(isShadowAtom(mce,va2));
2041 tl_assert(isShadowAtom(mce,va3));
2042 tl_assert(isShadowAtom(mce,va4));
2044 /* The general case is inefficient because PCast is an expensive
2045 operation. Here are some special cases which use PCast only
2046 twice rather than three times. */
2048 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
2050 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128 && t4 == Ity_I128
2051 && finalVty == Ity_I128) {
2052 if (0) VG_(printf)("mkLazy4: I32 x I128 x I128 x I128 -> I128\n");
2053 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
2054 mode indication which is fully defined, this should get
2055 folded out later. */
2056 at = mkPCastTo(mce, Ity_I128, va1);
2057 /* Now fold in 2nd, 3rd, 4th args. */
2058 at = mkUifU(mce, Ity_I128, at, va2);
2059 at = mkUifU(mce, Ity_I128, at, va3);
2060 at = mkUifU(mce, Ity_I128, at, va4);
2061 /* and PCast once again. */
2062 at = mkPCastTo(mce, Ity_I128, at);
2063 return at;
2066 /* I32 x I64 x I64 x I64 -> I64 */
2067 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
2068 && finalVty == Ity_I64) {
2069 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
2070 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
2071 mode indication which is fully defined, this should get
2072 folded out later. */
2073 at = mkPCastTo(mce, Ity_I64, va1);
2074 /* Now fold in 2nd, 3rd, 4th args. */
2075 at = mkUifU(mce, Ity_I64, at, va2);
2076 at = mkUifU(mce, Ity_I64, at, va3);
2077 at = mkUifU(mce, Ity_I64, at, va4);
2078 /* and PCast once again. */
2079 at = mkPCastTo(mce, Ity_I64, at);
2080 return at;
2082 /* I32 x I32 x I32 x I32 -> I32 */
2083 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
2084 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
2085 && finalVty == Ity_I32) {
2086 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
2087 at = va1;
2088 /* Now fold in 2nd, 3rd, 4th args. */
2089 at = mkUifU(mce, Ity_I32, at, va2);
2090 at = mkUifU(mce, Ity_I32, at, va3);
2091 at = mkUifU(mce, Ity_I32, at, va4);
2092 at = mkPCastTo(mce, Ity_I32, at);
2093 return at;
2096 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I8 && t4 == Ity_I8
2097 && finalVty == Ity_I32) {
2098 if (0) VG_(printf)("mkLazy4: I32 x I8 x I8 x I8 -> I32\n");
2099 at = mkPCastTo(mce, Ity_I8, va1);
2100 /* Now fold in 2nd, 3rd, 4th args. */
2101 at = mkUifU(mce, Ity_I8, at, va2);
2102 at = mkUifU(mce, Ity_I8, at, va3);
2103 at = mkUifU(mce, Ity_I8, at, va4);
2104 at = mkPCastTo(mce, Ity_I32, at);
2105 return at;
2108 if (t1 == Ity_I64 && t2 == Ity_I8 && t3 == Ity_I8 && t4 == Ity_I8
2109 && finalVty == Ity_I64) {
2110 if (0) VG_(printf)("mkLazy4: I64 x I8 x I8 x I8 -> I64\n");
2111 at = mkPCastTo(mce, Ity_I8, va1);
2112 /* Now fold in 2nd, 3rd, 4th args. */
2113 at = mkUifU(mce, Ity_I8, at, va2);
2114 at = mkUifU(mce, Ity_I8, at, va3);
2115 at = mkUifU(mce, Ity_I8, at, va4);
2116 at = mkPCastTo(mce, Ity_I64, at);
2117 return at;
2120 if (1) {
2121 VG_(printf)("mkLazy4: ");
2122 ppIRType(t1);
2123 VG_(printf)(" x ");
2124 ppIRType(t2);
2125 VG_(printf)(" x ");
2126 ppIRType(t3);
2127 VG_(printf)(" x ");
2128 ppIRType(t4);
2129 VG_(printf)(" -> ");
2130 ppIRType(finalVty);
2131 VG_(printf)("\n");
2134 tl_assert(0);
2138 /* Do the lazy propagation game from a null-terminated vector of
2139 atoms. This is presumably the arguments to a helper call, so the
2140 IRCallee info is also supplied in order that we can know which
2141 arguments should be ignored (via the .mcx_mask field).
2143 static
2144 IRAtom* mkLazyN ( MCEnv* mce,
2145 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
2147 Int i;
2148 IRAtom* here;
2149 IRAtom* curr;
2150 IRType mergeTy;
2151 Bool mergeTy64 = True;
2153 /* Decide on the type of the merge intermediary. If all relevant
2154 args are I64, then it's I64. In all other circumstances, use
2155 I32. */
2156 for (i = 0; exprvec[i]; i++) {
2157 tl_assert(i < 32);
2158 tl_assert(isOriginalAtom(mce, exprvec[i]));
2159 if (cee->mcx_mask & (1<<i))
2160 continue;
2161 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
2162 mergeTy64 = False;
2165 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
2166 curr = definedOfType(mergeTy);
2168 for (i = 0; exprvec[i]; i++) {
2169 tl_assert(i < 32);
2170 tl_assert(isOriginalAtom(mce, exprvec[i]));
2171 /* Only take notice of this arg if the callee's mc-exclusion
2172 mask does not say it is to be excluded. */
2173 if (cee->mcx_mask & (1<<i)) {
2174 /* the arg is to be excluded from definedness checking. Do
2175 nothing. */
2176 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
2177 } else {
2178 /* calculate the arg's definedness, and pessimistically merge
2179 it in. */
2180 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i], HuOth) );
2181 curr = mergeTy64
2182 ? mkUifU64(mce, here, curr)
2183 : mkUifU32(mce, here, curr);
2186 return mkPCastTo(mce, finalVtype, curr );
2190 /*------------------------------------------------------------*/
2191 /*--- Generating expensive sequences for exact carry-chain ---*/
2192 /*--- propagation in add/sub and related operations. ---*/
2193 /*------------------------------------------------------------*/
2195 static
2196 IRAtom* expensiveAddSub ( MCEnv* mce,
2197 Bool add,
2198 IRType ty,
2199 IRAtom* qaa, IRAtom* qbb,
2200 IRAtom* aa, IRAtom* bb )
2202 IRAtom *a_min, *b_min, *a_max, *b_max;
2203 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
2205 tl_assert(isShadowAtom(mce,qaa));
2206 tl_assert(isShadowAtom(mce,qbb));
2207 tl_assert(isOriginalAtom(mce,aa));
2208 tl_assert(isOriginalAtom(mce,bb));
2209 tl_assert(sameKindedAtoms(qaa,aa));
2210 tl_assert(sameKindedAtoms(qbb,bb));
2212 switch (ty) {
2213 case Ity_I32:
2214 opAND = Iop_And32;
2215 opOR = Iop_Or32;
2216 opXOR = Iop_Xor32;
2217 opNOT = Iop_Not32;
2218 opADD = Iop_Add32;
2219 opSUB = Iop_Sub32;
2220 break;
2221 case Ity_I64:
2222 opAND = Iop_And64;
2223 opOR = Iop_Or64;
2224 opXOR = Iop_Xor64;
2225 opNOT = Iop_Not64;
2226 opADD = Iop_Add64;
2227 opSUB = Iop_Sub64;
2228 break;
2229 default:
2230 VG_(tool_panic)("expensiveAddSub");
2233 // a_min = aa & ~qaa
2234 a_min = assignNew('V', mce,ty,
2235 binop(opAND, aa,
2236 assignNew('V', mce,ty, unop(opNOT, qaa))));
2238 // b_min = bb & ~qbb
2239 b_min = assignNew('V', mce,ty,
2240 binop(opAND, bb,
2241 assignNew('V', mce,ty, unop(opNOT, qbb))));
2243 // a_max = aa | qaa
2244 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
2246 // b_max = bb | qbb
2247 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
2249 if (add) {
2250 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
2251 return
2252 assignNew('V', mce,ty,
2253 binop( opOR,
2254 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
2255 assignNew('V', mce,ty,
2256 binop( opXOR,
2257 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
2258 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
2263 } else {
2264 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max - b_min))
2265 return
2266 assignNew('V', mce,ty,
2267 binop( opOR,
2268 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
2269 assignNew('V', mce,ty,
2270 binop( opXOR,
2271 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
2272 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
2282 static
2283 IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
2284 IRAtom* atom, IRAtom* vatom )
2286 IRType ty;
2287 IROp xorOp, subOp, andOp;
2288 IRExpr *one;
2289 IRAtom *improver, *improved;
2290 tl_assert(isShadowAtom(mce,vatom));
2291 tl_assert(isOriginalAtom(mce,atom));
2292 tl_assert(sameKindedAtoms(atom,vatom));
2294 switch (czop) {
2295 case Iop_Ctz32: case Iop_CtzNat32:
2296 ty = Ity_I32;
2297 xorOp = Iop_Xor32;
2298 subOp = Iop_Sub32;
2299 andOp = Iop_And32;
2300 one = mkU32(1);
2301 break;
2302 case Iop_Ctz64: case Iop_CtzNat64:
2303 ty = Ity_I64;
2304 xorOp = Iop_Xor64;
2305 subOp = Iop_Sub64;
2306 andOp = Iop_And64;
2307 one = mkU64(1);
2308 break;
2309 default:
2310 ppIROp(czop);
2311 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
2314 // improver = atom ^ (atom - 1)
2316 // That is, improver has its low ctz(atom)+1 bits equal to one;
2317 // higher bits (if any) equal to zero. So it's exactly the right
2318 // mask to use to remove the irrelevant undefined input bits.
2319 /* Here are some examples:
2320 atom = U...U 1 0...0
2321 atom-1 = U...U 0 1...1
2322 ^ed = 0...0 1 11111, which correctly describes which bits of |atom|
2323 actually influence the result
2324 A boundary case
2325 atom = 0...0
2326 atom-1 = 1...1
2327 ^ed = 11111, also a correct mask for the input: all input bits
2328 are relevant
2329 Another boundary case
2330 atom = 1..1 1
2331 atom-1 = 1..1 0
2332 ^ed = 0..0 1, also a correct mask: only the rightmost input bit
2333 is relevant
2334 Now with misc U bits interspersed:
2335 atom = U...U 1 0 U...U 0 1 0...0
2336 atom-1 = U...U 1 0 U...U 0 0 1...1
2337 ^ed = 0...0 0 0 0...0 0 1 1...1, also correct
2338 (Per re-check/analysis of 14 Nov 2018)
2340 improver = assignNew('V', mce,ty,
2341 binop(xorOp,
2342 atom,
2343 assignNew('V', mce, ty,
2344 binop(subOp, atom, one))));
2346 // improved = vatom & improver
2348 // That is, treat any V bits to the left of the rightmost ctz(atom)+1
2349 // bits as "defined".
2350 improved = assignNew('V', mce, ty,
2351 binop(andOp, vatom, improver));
2353 // Return pessimizing cast of improved.
2354 return mkPCastTo(mce, ty, improved);
2357 static
2358 IRAtom* expensiveCountLeadingZeroes ( MCEnv* mce, IROp czop,
2359 IRAtom* atom, IRAtom* vatom )
2361 IRType ty;
2362 IROp shrOp, notOp, andOp;
2363 IRAtom* (*mkRight)(MCEnv*, IRAtom*);
2364 IRAtom *improver, *improved;
2365 tl_assert(isShadowAtom(mce,vatom));
2366 tl_assert(isOriginalAtom(mce,atom));
2367 tl_assert(sameKindedAtoms(atom,vatom));
2369 switch (czop) {
2370 case Iop_Clz32: case Iop_ClzNat32:
2371 ty = Ity_I32;
2372 shrOp = Iop_Shr32;
2373 notOp = Iop_Not32;
2374 andOp = Iop_And32;
2375 mkRight = mkRight32;
2376 break;
2377 case Iop_Clz64: case Iop_ClzNat64:
2378 ty = Ity_I64;
2379 shrOp = Iop_Shr64;
2380 notOp = Iop_Not64;
2381 andOp = Iop_And64;
2382 mkRight = mkRight64;
2383 break;
2384 default:
2385 ppIROp(czop);
2386 VG_(tool_panic)("memcheck:expensiveCountLeadingZeroes");
2389 // This is in principle very similar to how expensiveCountTrailingZeroes
2390 // works. That function computed an "improver", which it used to mask
2391 // off all but the rightmost 1-bit and the zeroes to the right of it,
2392 // hence removing irrelevant bits from the input. Here, we play the
2393 // exact same game but with the left-vs-right roles interchanged.
2394 // Unfortunately calculation of the improver in this case is
2395 // significantly more expensive.
2397 // improver = ~(RIGHT(atom) >>u 1)
2399 // That is, improver has its upper clz(atom)+1 bits equal to one;
2400 // lower bits (if any) equal to zero. So it's exactly the right
2401 // mask to use to remove the irrelevant undefined input bits.
2402 /* Here are some examples:
2403 atom = 0...0 1 U...U
2404 R(atom) = 0...0 1 1...1
2405 R(atom) >>u 1 = 0...0 0 1...1
2406 ~(R(atom) >>u 1) = 1...1 1 0...0
2407 which correctly describes which bits of |atom|
2408 actually influence the result
2409 A boundary case
2410 atom = 0...0
2411 R(atom) = 0...0
2412 R(atom) >>u 1 = 0...0
2413 ~(R(atom) >>u 1) = 1...1
2414 also a correct mask for the input: all input bits
2415 are relevant
2416 Another boundary case
2417 atom = 1 1..1
2418 R(atom) = 1 1..1
2419 R(atom) >>u 1 = 0 1..1
2420 ~(R(atom) >>u 1) = 1 0..0
2421 also a correct mask: only the leftmost input bit
2422 is relevant
2423 Now with misc U bits interspersed:
2424 atom = 0...0 1 U...U 0 1 U...U
2425 R(atom) = 0...0 1 1...1 1 1 1...1
2426 R(atom) >>u 1 = 0...0 0 1...1 1 1 1...1
2427 ~(R(atom) >>u 1) = 1...1 1 0...0 0 0 0...0, also correct
2428 (Per initial implementation of 15 Nov 2018)
2430 improver = mkRight(mce, atom);
2431 improver = assignNew('V', mce, ty, binop(shrOp, improver, mkU8(1)));
2432 improver = assignNew('V', mce, ty, unop(notOp, improver));
2434 // improved = vatom & improver
2436 // That is, treat any V bits to the right of the leftmost clz(atom)+1
2437 // bits as "defined".
2438 improved = assignNew('V', mce, ty,
2439 binop(andOp, vatom, improver));
2441 // Return pessimizing cast of improved.
2442 return mkPCastTo(mce, ty, improved);
2446 /*------------------------------------------------------------*/
2447 /*--- Scalar shifts. ---*/
2448 /*------------------------------------------------------------*/
2450 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
2451 idea is to shift the definedness bits by the original shift amount.
2452 This introduces 0s ("defined") in new positions for left shifts and
2453 unsigned right shifts, and copies the top definedness bit for
2454 signed right shifts. So, conveniently, applying the original shift
2455 operator to the definedness bits for the left arg is exactly the
2456 right thing to do:
2458 (qaa << bb)
2460 However if the shift amount is undefined then the whole result
2461 is undefined. Hence need:
2463 (qaa << bb) `UifU` PCast(qbb)
2465 If the shift amount bb is a literal than qbb will say 'all defined'
2466 and the UifU and PCast will get folded out by post-instrumentation
2467 optimisation.
2469 static IRAtom* scalarShift ( MCEnv* mce,
2470 IRType ty,
2471 IROp original_op,
2472 IRAtom* qaa, IRAtom* qbb,
2473 IRAtom* aa, IRAtom* bb )
2475 tl_assert(isShadowAtom(mce,qaa));
2476 tl_assert(isShadowAtom(mce,qbb));
2477 tl_assert(isOriginalAtom(mce,aa));
2478 tl_assert(isOriginalAtom(mce,bb));
2479 tl_assert(sameKindedAtoms(qaa,aa));
2480 tl_assert(sameKindedAtoms(qbb,bb));
2481 return
2482 assignNew(
2483 'V', mce, ty,
2484 mkUifU( mce, ty,
2485 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
2486 mkPCastTo(mce, ty, qbb)
2492 /*------------------------------------------------------------*/
2493 /*--- Helpers for dealing with vector primops. ---*/
2494 /*------------------------------------------------------------*/
2496 /* Vector pessimisation -- pessimise within each lane individually. */
2498 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
2500 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
2503 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
2505 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
2508 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
2510 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
2513 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
2515 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
2518 static IRAtom* mkPCast128x1 ( MCEnv* mce, IRAtom* at )
2520 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ128x1, at));
2523 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
2525 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
2528 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
2530 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
2533 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
2535 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
2538 static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
2540 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
2543 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
2545 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
2548 static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
2550 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
2553 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
2555 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
2558 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
2560 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
2563 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
2565 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
2569 /* Here's a simple scheme capable of handling ops derived from SSE1
2570 code and while only generating ops that can be efficiently
2571 implemented in SSE1. */
2573 /* All-lanes versions are straightforward:
2575 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
2577 unary32Fx4(x,y) ==> PCast32x4(x#)
2579 Lowest-lane-only versions are more complex:
2581 binary32F0x4(x,y) ==> SetV128lo32(
2582 x#,
2583 PCast32(V128to32(UifUV128(x#,y#)))
2586 This is perhaps not so obvious. In particular, it's faster to
2587 do a V128-bit UifU and then take the bottom 32 bits than the more
2588 obvious scheme of taking the bottom 32 bits of each operand
2589 and doing a 32-bit UifU. Basically since UifU is fast and
2590 chopping lanes off vector values is slow.
2592 Finally:
2594 unary32F0x4(x) ==> SetV128lo32(
2595 x#,
2596 PCast32(V128to32(x#))
2599 Where:
2601 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2602 PCast32x4(v#) = CmpNEZ32x4(v#)
2605 static
2606 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2608 IRAtom* at;
2609 tl_assert(isShadowAtom(mce, vatomX));
2610 tl_assert(isShadowAtom(mce, vatomY));
2611 at = mkUifUV128(mce, vatomX, vatomY);
2612 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
2613 return at;
2616 static
2617 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2619 IRAtom* at;
2620 tl_assert(isShadowAtom(mce, vatomX));
2621 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
2622 return at;
2625 static
2626 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2628 IRAtom* at;
2629 tl_assert(isShadowAtom(mce, vatomX));
2630 tl_assert(isShadowAtom(mce, vatomY));
2631 at = mkUifUV128(mce, vatomX, vatomY);
2632 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
2633 at = mkPCastTo(mce, Ity_I32, at);
2634 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2635 return at;
2638 static
2639 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2641 IRAtom* at;
2642 tl_assert(isShadowAtom(mce, vatomX));
2643 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
2644 at = mkPCastTo(mce, Ity_I32, at);
2645 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2646 return at;
2649 /* --- ... and ... 64Fx2 versions of the same ... --- */
2651 static
2652 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2654 IRAtom* at;
2655 tl_assert(isShadowAtom(mce, vatomX));
2656 tl_assert(isShadowAtom(mce, vatomY));
2657 at = mkUifUV128(mce, vatomX, vatomY);
2658 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
2659 return at;
2662 static
2663 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2665 IRAtom* at;
2666 tl_assert(isShadowAtom(mce, vatomX));
2667 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
2668 return at;
2671 static
2672 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2674 IRAtom* at;
2675 tl_assert(isShadowAtom(mce, vatomX));
2676 tl_assert(isShadowAtom(mce, vatomY));
2677 at = mkUifUV128(mce, vatomX, vatomY);
2678 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
2679 at = mkPCastTo(mce, Ity_I64, at);
2680 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2681 return at;
2684 static
2685 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2687 IRAtom* at;
2688 tl_assert(isShadowAtom(mce, vatomX));
2689 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
2690 at = mkPCastTo(mce, Ity_I64, at);
2691 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2692 return at;
2695 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
2697 static
2698 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2700 IRAtom* at;
2701 tl_assert(isShadowAtom(mce, vatomX));
2702 tl_assert(isShadowAtom(mce, vatomY));
2703 at = mkUifU64(mce, vatomX, vatomY);
2704 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2705 return at;
2708 static
2709 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2711 IRAtom* at;
2712 tl_assert(isShadowAtom(mce, vatomX));
2713 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2714 return at;
2717 /* --- ... and ... 64Fx4 versions of the same ... --- */
2719 static
2720 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2722 IRAtom* at;
2723 tl_assert(isShadowAtom(mce, vatomX));
2724 tl_assert(isShadowAtom(mce, vatomY));
2725 at = mkUifUV256(mce, vatomX, vatomY);
2726 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2727 return at;
2730 static
2731 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2733 IRAtom* at;
2734 tl_assert(isShadowAtom(mce, vatomX));
2735 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2736 return at;
2739 /* --- ... and ... 32Fx8 versions of the same ... --- */
2741 static
2742 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2744 IRAtom* at;
2745 tl_assert(isShadowAtom(mce, vatomX));
2746 tl_assert(isShadowAtom(mce, vatomY));
2747 at = mkUifUV256(mce, vatomX, vatomY);
2748 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2749 return at;
2752 static
2753 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2755 IRAtom* at;
2756 tl_assert(isShadowAtom(mce, vatomX));
2757 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2758 return at;
2761 /* --- 64Fx2 binary FP ops, with rounding mode --- */
2763 static
2764 IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM,
2765 IRAtom* vatomX, IRAtom* vatomY )
2767 /* This is the same as binary64Fx2, except that we subsequently
2768 pessimise vRM (definedness of the rounding mode), widen to 128
2769 bits and UifU it into the result. As with the scalar cases, if
2770 the RM is a constant then it is defined and so this extra bit
2771 will get constant-folded out later. */
2772 // "do" the vector args
2773 IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY);
2774 // PCast the RM, and widen it to 128 bits
2775 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2776 // Roll it into the result
2777 t1 = mkUifUV128(mce, t1, t2);
2778 return t1;
2781 /* --- ... and ... 32Fx4 versions of the same --- */
2783 static
2784 IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2785 IRAtom* vatomX, IRAtom* vatomY )
2787 IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY);
2788 // PCast the RM, and widen it to 128 bits
2789 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2790 // Roll it into the result
2791 t1 = mkUifUV128(mce, t1, t2);
2792 return t1;
2795 /* --- ... and ... 64Fx4 versions of the same --- */
2797 static
2798 IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2799 IRAtom* vatomX, IRAtom* vatomY )
2801 IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY);
2802 // PCast the RM, and widen it to 256 bits
2803 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2804 // Roll it into the result
2805 t1 = mkUifUV256(mce, t1, t2);
2806 return t1;
2809 /* --- ... and ... 32Fx8 versions of the same --- */
2811 static
2812 IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM,
2813 IRAtom* vatomX, IRAtom* vatomY )
2815 IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY);
2816 // PCast the RM, and widen it to 256 bits
2817 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2818 // Roll it into the result
2819 t1 = mkUifUV256(mce, t1, t2);
2820 return t1;
2823 /* --- 64Fx2 unary FP ops, with rounding mode --- */
2825 static
2826 IRAtom* unary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2828 /* Same scheme as binary64Fx2_w_rm. */
2829 // "do" the vector arg
2830 IRAtom* t1 = unary64Fx2(mce, vatomX);
2831 // PCast the RM, and widen it to 128 bits
2832 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2833 // Roll it into the result
2834 t1 = mkUifUV128(mce, t1, t2);
2835 return t1;
2838 /* --- ... and ... 32Fx4 versions of the same --- */
2840 static
2841 IRAtom* unary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2843 /* Same scheme as binaryFx4_w_rm. */
2844 IRAtom* t1 = unary32Fx4(mce, vatomX);
2845 // PCast the RM, and widen it to 128 bits
2846 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2847 // Roll it into the result
2848 t1 = mkUifUV128(mce, t1, t2);
2849 return t1;
2852 /* --- ... and ... 32Fx8 versions of the same --- */
2854 static
2855 IRAtom* unary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2857 /* Same scheme as unary32Fx8_w_rm. */
2858 IRAtom* t1 = unary32Fx8(mce, vatomX);
2859 // PCast the RM, and widen it to 256 bits
2860 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2861 // Roll it into the result
2862 t1 = mkUifUV256(mce, t1, t2);
2863 return t1;
2867 /* --- --- Vector saturated narrowing --- --- */
2869 /* We used to do something very clever here, but on closer inspection
2870 (2011-Jun-15), and in particular bug #279698, it turns out to be
2871 wrong. Part of the problem came from the fact that for a long
2872 time, the IR primops to do with saturated narrowing were
2873 underspecified and managed to confuse multiple cases which needed
2874 to be separate: the op names had a signedness qualifier, but in
2875 fact the source and destination signednesses needed to be specified
2876 independently, so the op names really need two independent
2877 signedness specifiers.
2879 As of 2011-Jun-15 (ish) the underspecification was sorted out
2880 properly. The incorrect instrumentation remained, though. That
2881 has now (2011-Oct-22) been fixed.
2883 What we now do is simple:
2885 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2886 number of lanes, X is the source lane width and signedness, and Y
2887 is the destination lane width and signedness. In all cases the
2888 destination lane width is half the source lane width, so the names
2889 have a bit of redundancy, but are at least easy to read.
2891 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2892 to unsigned 16s.
2894 Let Vanilla(OP) be a function that takes OP, one of these
2895 saturating narrowing ops, and produces the same "shaped" narrowing
2896 op which is not saturating, but merely dumps the most significant
2897 bits. "same shape" means that the lane numbers and widths are the
2898 same as with OP.
2900 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2901 = Iop_NarrowBin32to16x8,
2902 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2903 dumping the top half of each lane.
2905 So, with that in place, the scheme is simple, and it is simple to
2906 pessimise each lane individually and then apply Vanilla(OP) so as
2907 to get the result in the right "shape". If the original OP is
2908 QNarrowBinXtoYxZ then we produce
2910 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2912 or for the case when OP is unary (Iop_QNarrowUn*)
2914 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2916 static
2917 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2919 switch (qnarrowOp) {
2920 /* Binary: (128, 128) -> 128 */
2921 case Iop_QNarrowBin16Sto8Ux16:
2922 case Iop_QNarrowBin16Sto8Sx16:
2923 case Iop_QNarrowBin16Uto8Ux16:
2924 case Iop_QNarrowBin64Sto32Sx4:
2925 case Iop_QNarrowBin64Uto32Ux4:
2926 return Iop_NarrowBin16to8x16;
2927 case Iop_QNarrowBin32Sto16Ux8:
2928 case Iop_QNarrowBin32Sto16Sx8:
2929 case Iop_QNarrowBin32Uto16Ux8:
2930 return Iop_NarrowBin32to16x8;
2931 /* Binary: (64, 64) -> 64 */
2932 case Iop_QNarrowBin32Sto16Sx4:
2933 return Iop_NarrowBin32to16x4;
2934 case Iop_QNarrowBin16Sto8Ux8:
2935 case Iop_QNarrowBin16Sto8Sx8:
2936 return Iop_NarrowBin16to8x8;
2937 /* Unary: 128 -> 64 */
2938 case Iop_QNarrowUn64Uto32Ux2:
2939 case Iop_QNarrowUn64Sto32Sx2:
2940 case Iop_QNarrowUn64Sto32Ux2:
2941 return Iop_NarrowUn64to32x2;
2942 case Iop_QNarrowUn32Uto16Ux4:
2943 case Iop_QNarrowUn32Sto16Sx4:
2944 case Iop_QNarrowUn32Sto16Ux4:
2945 case Iop_F32toF16x4_DEP:
2946 return Iop_NarrowUn32to16x4;
2947 case Iop_QNarrowUn16Uto8Ux8:
2948 case Iop_QNarrowUn16Sto8Sx8:
2949 case Iop_QNarrowUn16Sto8Ux8:
2950 return Iop_NarrowUn16to8x8;
2951 default:
2952 ppIROp(qnarrowOp);
2953 VG_(tool_panic)("vanillaNarrowOpOfShape");
2957 static
2958 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2959 IRAtom* vatom1, IRAtom* vatom2)
2961 IRAtom *at1, *at2, *at3;
2962 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2963 switch (narrow_op) {
2964 case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break;
2965 case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break;
2966 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2967 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2968 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2969 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2970 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2971 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2972 default: VG_(tool_panic)("vectorNarrowBinV128");
2974 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2975 tl_assert(isShadowAtom(mce,vatom1));
2976 tl_assert(isShadowAtom(mce,vatom2));
2977 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2978 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2979 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
2980 return at3;
2983 static
2984 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2985 IRAtom* vatom1, IRAtom* vatom2)
2987 IRAtom *at1, *at2, *at3;
2988 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2989 switch (narrow_op) {
2990 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2991 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2992 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2993 default: VG_(tool_panic)("vectorNarrowBin64");
2995 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2996 tl_assert(isShadowAtom(mce,vatom1));
2997 tl_assert(isShadowAtom(mce,vatom2));
2998 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2999 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
3000 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
3001 return at3;
3004 static
3005 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
3006 IRAtom* vatom1)
3008 IRAtom *at1, *at2;
3009 IRAtom* (*pcast)( MCEnv*, IRAtom* );
3010 tl_assert(isShadowAtom(mce,vatom1));
3011 /* For vanilla narrowing (non-saturating), we can just apply
3012 the op directly to the V bits. */
3013 switch (narrow_op) {
3014 case Iop_NarrowUn16to8x8:
3015 case Iop_NarrowUn32to16x4:
3016 case Iop_NarrowUn64to32x2:
3017 case Iop_F32toF16x4_DEP:
3018 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
3019 return at1;
3020 default:
3021 break; /* Do Plan B */
3023 /* Plan B: for ops that involve a saturation operation on the args,
3024 we must PCast before the vanilla narrow. */
3025 switch (narrow_op) {
3026 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
3027 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
3028 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
3029 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
3030 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
3031 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
3032 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
3033 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
3034 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
3035 default: VG_(tool_panic)("vectorNarrowUnV128");
3037 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
3038 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
3039 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
3040 return at2;
3043 static
3044 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
3045 IRAtom* vatom1)
3047 IRAtom *at1, *at2;
3048 IRAtom* (*pcast)( MCEnv*, IRAtom* );
3049 switch (longen_op) {
3050 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
3051 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
3052 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
3053 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
3054 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
3055 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
3056 case Iop_F16toF32x4: pcast = mkPCast32x4; break;
3057 default: VG_(tool_panic)("vectorWidenI64");
3059 tl_assert(isShadowAtom(mce,vatom1));
3060 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
3061 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
3062 return at2;
3066 /* --- --- Vector integer arithmetic --- --- */
3068 /* Simple ... UifU the args and per-lane pessimise the results. */
3070 /* --- V256-bit versions --- */
3072 static
3073 IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3075 IRAtom* at;
3076 at = mkUifUV256(mce, vatom1, vatom2);
3077 at = mkPCast8x32(mce, at);
3078 return at;
3081 static
3082 IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3084 IRAtom* at;
3085 at = mkUifUV256(mce, vatom1, vatom2);
3086 at = mkPCast16x16(mce, at);
3087 return at;
3090 static
3091 IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3093 IRAtom* at;
3094 at = mkUifUV256(mce, vatom1, vatom2);
3095 at = mkPCast32x8(mce, at);
3096 return at;
3099 static
3100 IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3102 IRAtom* at;
3103 at = mkUifUV256(mce, vatom1, vatom2);
3104 at = mkPCast64x4(mce, at);
3105 return at;
3108 /* --- V128-bit versions --- */
3110 static
3111 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3113 IRAtom* at;
3114 at = mkUifUV128(mce, vatom1, vatom2);
3115 at = mkPCast8x16(mce, at);
3116 return at;
3119 static
3120 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3122 IRAtom* at;
3123 at = mkUifUV128(mce, vatom1, vatom2);
3124 at = mkPCast16x8(mce, at);
3125 return at;
3128 static
3129 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3131 IRAtom* at;
3132 at = mkUifUV128(mce, vatom1, vatom2);
3133 at = mkPCast32x4(mce, at);
3134 return at;
3137 static
3138 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3140 IRAtom* at;
3141 at = mkUifUV128(mce, vatom1, vatom2);
3142 at = mkPCast64x2(mce, at);
3143 return at;
3146 static
3147 IRAtom* binary128Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3149 IRAtom* at;
3150 at = mkUifUV128(mce, vatom1, vatom2);
3151 at = mkPCast128x1(mce, at);
3152 return at;
3155 /* --- 64-bit versions --- */
3157 static
3158 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3160 IRAtom* at;
3161 at = mkUifU64(mce, vatom1, vatom2);
3162 at = mkPCast8x8(mce, at);
3163 return at;
3166 static
3167 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3169 IRAtom* at;
3170 at = mkUifU64(mce, vatom1, vatom2);
3171 at = mkPCast16x4(mce, at);
3172 return at;
3175 static
3176 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3178 IRAtom* at;
3179 at = mkUifU64(mce, vatom1, vatom2);
3180 at = mkPCast32x2(mce, at);
3181 return at;
3184 static
3185 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3187 IRAtom* at;
3188 at = mkUifU64(mce, vatom1, vatom2);
3189 at = mkPCastTo(mce, Ity_I64, at);
3190 return at;
3193 /* --- 32-bit versions --- */
3195 static
3196 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3198 IRAtom* at;
3199 at = mkUifU32(mce, vatom1, vatom2);
3200 at = mkPCast8x4(mce, at);
3201 return at;
3204 static
3205 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3207 IRAtom* at;
3208 at = mkUifU32(mce, vatom1, vatom2);
3209 at = mkPCast16x2(mce, at);
3210 return at;
3214 /*------------------------------------------------------------*/
3215 /*--- Generate shadow values from all kinds of IRExprs. ---*/
3216 /*------------------------------------------------------------*/
3218 static
3219 IRAtom* expr2vbits_Qop ( MCEnv* mce,
3220 IROp op,
3221 IRAtom* atom1, IRAtom* atom2,
3222 IRAtom* atom3, IRAtom* atom4 )
3224 IRAtom* vatom1 = expr2vbits( mce, atom1, HuOth );
3225 IRAtom* vatom2 = expr2vbits( mce, atom2, HuOth );
3226 IRAtom* vatom3 = expr2vbits( mce, atom3, HuOth );
3227 IRAtom* vatom4 = expr2vbits( mce, atom4, HuOth );
3229 tl_assert(isOriginalAtom(mce,atom1));
3230 tl_assert(isOriginalAtom(mce,atom2));
3231 tl_assert(isOriginalAtom(mce,atom3));
3232 tl_assert(isOriginalAtom(mce,atom4));
3233 tl_assert(isShadowAtom(mce,vatom1));
3234 tl_assert(isShadowAtom(mce,vatom2));
3235 tl_assert(isShadowAtom(mce,vatom3));
3236 tl_assert(isShadowAtom(mce,vatom4));
3237 tl_assert(sameKindedAtoms(atom1,vatom1));
3238 tl_assert(sameKindedAtoms(atom2,vatom2));
3239 tl_assert(sameKindedAtoms(atom3,vatom3));
3240 tl_assert(sameKindedAtoms(atom4,vatom4));
3241 switch (op) {
3242 case Iop_MAddF64:
3243 case Iop_MAddF64r32:
3244 case Iop_MSubF64:
3245 case Iop_MSubF64r32:
3246 /* I32(rm) x F64 x F64 x F64 -> F64 */
3247 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
3249 case Iop_MAddF32:
3250 case Iop_MSubF32:
3251 /* I32(rm) x F32 x F32 x F32 -> F32 */
3252 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
3254 case Iop_MAddF128:
3255 case Iop_MSubF128:
3256 case Iop_NegMAddF128:
3257 case Iop_NegMSubF128:
3258 /* I32(rm) x F128 x F128 x F128 -> F128 */
3259 return mkLazy4(mce, Ity_I128, vatom1, vatom2, vatom3, vatom4);
3261 /* V256-bit data-steering */
3262 case Iop_64x4toV256:
3263 return assignNew('V', mce, Ity_V256,
3264 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
3266 /* I32/I64 x I8 x I8 x I8 -> I32/I64 */
3267 case Iop_Rotx32:
3268 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
3269 case Iop_Rotx64:
3270 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
3271 default:
3272 ppIROp(op);
3273 VG_(tool_panic)("memcheck:expr2vbits_Qop");
3278 static
3279 IRAtom* expr2vbits_Triop ( MCEnv* mce,
3280 IROp op,
3281 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
3283 IRAtom* vatom1 = expr2vbits( mce, atom1, HuOth );
3284 IRAtom* vatom2 = expr2vbits( mce, atom2, HuOth );
3285 IRAtom* vatom3 = expr2vbits( mce, atom3, HuOth );
3287 tl_assert(isOriginalAtom(mce,atom1));
3288 tl_assert(isOriginalAtom(mce,atom2));
3289 tl_assert(isOriginalAtom(mce,atom3));
3290 tl_assert(isShadowAtom(mce,vatom1));
3291 tl_assert(isShadowAtom(mce,vatom2));
3292 tl_assert(isShadowAtom(mce,vatom3));
3293 tl_assert(sameKindedAtoms(atom1,vatom1));
3294 tl_assert(sameKindedAtoms(atom2,vatom2));
3295 tl_assert(sameKindedAtoms(atom3,vatom3));
3296 switch (op) {
3297 case Iop_AddF128:
3298 case Iop_SubF128:
3299 case Iop_MulF128:
3300 case Iop_DivF128:
3301 case Iop_AddD128:
3302 case Iop_SubD128:
3303 case Iop_MulD128:
3304 case Iop_DivD128:
3305 case Iop_QuantizeD128:
3306 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
3307 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
3308 case Iop_AddF64:
3309 case Iop_AddD64:
3310 case Iop_AddF64r32:
3311 case Iop_SubF64:
3312 case Iop_SubD64:
3313 case Iop_SubF64r32:
3314 case Iop_MulF64:
3315 case Iop_MulD64:
3316 case Iop_MulF64r32:
3317 case Iop_DivF64:
3318 case Iop_DivD64:
3319 case Iop_DivF64r32:
3320 case Iop_ScaleF64:
3321 case Iop_Yl2xF64:
3322 case Iop_Yl2xp1F64:
3323 case Iop_AtanF64:
3324 case Iop_PRemF64:
3325 case Iop_PRem1F64:
3326 case Iop_QuantizeD64:
3327 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
3328 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
3329 case Iop_PRemC3210F64:
3330 case Iop_PRem1C3210F64:
3331 /* I32(rm) x F64 x F64 -> I32 */
3332 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
3333 case Iop_AddF32:
3334 case Iop_SubF32:
3335 case Iop_MulF32:
3336 case Iop_DivF32:
3337 /* I32(rm) x F32 x F32 -> I32 */
3338 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
3339 case Iop_SignificanceRoundD64:
3340 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
3341 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
3342 case Iop_SignificanceRoundD128:
3343 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
3344 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
3345 case Iop_SliceV128:
3346 /* (V128, V128, I8) -> V128 */
3347 complainIfUndefined(mce, atom3, NULL);
3348 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
3349 case Iop_Slice64:
3350 /* (I64, I64, I8) -> I64 */
3351 complainIfUndefined(mce, atom3, NULL);
3352 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
3353 case Iop_SetElem8x8:
3354 case Iop_SetElem16x4:
3355 case Iop_SetElem32x2:
3356 complainIfUndefined(mce, atom2, NULL);
3357 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
3359 case Iop_SetElem8x16:
3360 case Iop_SetElem16x8:
3361 case Iop_SetElem32x4:
3362 case Iop_SetElem64x2:
3363 complainIfUndefined(mce, atom2, NULL);
3364 return assignNew('V', mce, Ity_V128, triop(op, vatom1, atom2, vatom3));
3366 case Iop_Perm8x16x2:
3367 /* (V128, V128, V128) -> V128 */
3368 complainIfUndefined(mce, atom3, NULL);
3369 return mkUifUV128(
3370 mce,
3371 assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)),
3372 mkPCast8x16(mce, vatom3)
3375 /* Vector FP with rounding mode as the first arg */
3376 case Iop_Add64Fx2:
3377 case Iop_Sub64Fx2:
3378 case Iop_Mul64Fx2:
3379 case Iop_Div64Fx2:
3380 case Iop_Scale2_64Fx2:
3381 return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3);
3383 case Iop_Add32Fx4:
3384 case Iop_Sub32Fx4:
3385 case Iop_Mul32Fx4:
3386 case Iop_Div32Fx4:
3387 case Iop_Scale2_32Fx4:
3388 return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3);
3390 case Iop_Add64Fx4:
3391 case Iop_Sub64Fx4:
3392 case Iop_Mul64Fx4:
3393 case Iop_Div64Fx4:
3394 return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3);
3396 case Iop_Add32Fx8:
3397 case Iop_Sub32Fx8:
3398 case Iop_Mul32Fx8:
3399 case Iop_Div32Fx8:
3400 return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3);
3402 case Iop_F32x4_2toQ16x8:
3403 return assignNew('V', mce, Ity_V128,
3404 binop(Iop_PackEvenLanes16x8,
3405 unary32Fx4_w_rm(mce, vatom1, vatom2),
3406 unary32Fx4_w_rm(mce, vatom1, vatom3)));
3407 case Iop_F64x2_2toQ32x4:
3408 return assignNew('V', mce, Ity_V128,
3409 binop(Iop_PackEvenLanes32x4,
3410 unary64Fx2_w_rm(mce, vatom1, vatom2),
3411 unary64Fx2_w_rm(mce, vatom1, vatom3)));
3414 default:
3415 ppIROp(op);
3416 VG_(tool_panic)("memcheck:expr2vbits_Triop");
3421 static
3422 IRAtom* expr2vbits_Binop ( MCEnv* mce,
3423 IROp op,
3424 IRAtom* atom1, IRAtom* atom2,
3425 HowUsed hu/*use HuOth if unknown*/ )
3427 IRType and_or_ty = Ity_INVALID;
3428 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*) = NULL;
3429 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*) = NULL;
3430 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*) = NULL;
3432 IRAtom* vatom1 = expr2vbits( mce, atom1, HuOth );
3433 IRAtom* vatom2 = expr2vbits( mce, atom2, HuOth );
3435 tl_assert(isOriginalAtom(mce,atom1));
3436 tl_assert(isOriginalAtom(mce,atom2));
3437 tl_assert(isShadowAtom(mce,vatom1));
3438 tl_assert(isShadowAtom(mce,vatom2));
3439 tl_assert(sameKindedAtoms(atom1,vatom1));
3440 tl_assert(sameKindedAtoms(atom2,vatom2));
3441 switch (op) {
3443 /* 32-bit SIMD */
3445 case Iop_Add16x2:
3446 case Iop_HAdd16Ux2:
3447 case Iop_HAdd16Sx2:
3448 case Iop_Sub16x2:
3449 case Iop_HSub16Ux2:
3450 case Iop_HSub16Sx2:
3451 case Iop_QAdd16Sx2:
3452 case Iop_QSub16Sx2:
3453 case Iop_QSub16Ux2:
3454 case Iop_QAdd16Ux2:
3455 return binary16Ix2(mce, vatom1, vatom2);
3457 case Iop_Add8x4:
3458 case Iop_HAdd8Ux4:
3459 case Iop_HAdd8Sx4:
3460 case Iop_Sub8x4:
3461 case Iop_HSub8Ux4:
3462 case Iop_HSub8Sx4:
3463 case Iop_QSub8Ux4:
3464 case Iop_QAdd8Ux4:
3465 case Iop_QSub8Sx4:
3466 case Iop_QAdd8Sx4:
3467 return binary8Ix4(mce, vatom1, vatom2);
3469 /* 64-bit SIMD */
3471 case Iop_ShrN8x8:
3472 case Iop_ShrN16x4:
3473 case Iop_ShrN32x2:
3474 case Iop_SarN8x8:
3475 case Iop_SarN16x4:
3476 case Iop_SarN32x2:
3477 case Iop_ShlN16x4:
3478 case Iop_ShlN32x2:
3479 case Iop_ShlN8x8:
3480 /* Same scheme as with all other shifts. */
3481 complainIfUndefined(mce, atom2, NULL);
3482 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3484 case Iop_QNarrowBin32Sto16Sx4:
3485 case Iop_QNarrowBin16Sto8Sx8:
3486 case Iop_QNarrowBin16Sto8Ux8:
3487 return vectorNarrowBin64(mce, op, vatom1, vatom2);
3489 case Iop_Min8Ux8:
3490 case Iop_Min8Sx8:
3491 case Iop_Max8Ux8:
3492 case Iop_Max8Sx8:
3493 case Iop_Avg8Ux8:
3494 case Iop_QSub8Sx8:
3495 case Iop_QSub8Ux8:
3496 case Iop_Sub8x8:
3497 case Iop_CmpGT8Sx8:
3498 case Iop_CmpGT8Ux8:
3499 case Iop_CmpEQ8x8:
3500 case Iop_QAdd8Sx8:
3501 case Iop_QAdd8Ux8:
3502 case Iop_QSal8x8:
3503 case Iop_QShl8x8:
3504 case Iop_Add8x8:
3505 case Iop_Mul8x8:
3506 case Iop_PolynomialMul8x8:
3507 return binary8Ix8(mce, vatom1, vatom2);
3509 case Iop_Min16Sx4:
3510 case Iop_Min16Ux4:
3511 case Iop_Max16Sx4:
3512 case Iop_Max16Ux4:
3513 case Iop_Avg16Ux4:
3514 case Iop_QSub16Ux4:
3515 case Iop_QSub16Sx4:
3516 case Iop_Sub16x4:
3517 case Iop_Mul16x4:
3518 case Iop_MulHi16Sx4:
3519 case Iop_MulHi16Ux4:
3520 case Iop_CmpGT16Sx4:
3521 case Iop_CmpGT16Ux4:
3522 case Iop_CmpEQ16x4:
3523 case Iop_QAdd16Sx4:
3524 case Iop_QAdd16Ux4:
3525 case Iop_QSal16x4:
3526 case Iop_QShl16x4:
3527 case Iop_Add16x4:
3528 case Iop_QDMulHi16Sx4:
3529 case Iop_QRDMulHi16Sx4:
3530 return binary16Ix4(mce, vatom1, vatom2);
3532 case Iop_Sub32x2:
3533 case Iop_Mul32x2:
3534 case Iop_Max32Sx2:
3535 case Iop_Max32Ux2:
3536 case Iop_Min32Sx2:
3537 case Iop_Min32Ux2:
3538 case Iop_CmpGT32Sx2:
3539 case Iop_CmpGT32Ux2:
3540 case Iop_CmpEQ32x2:
3541 case Iop_Add32x2:
3542 case Iop_QAdd32Ux2:
3543 case Iop_QAdd32Sx2:
3544 case Iop_QSub32Ux2:
3545 case Iop_QSub32Sx2:
3546 case Iop_QSal32x2:
3547 case Iop_QShl32x2:
3548 case Iop_QDMulHi32Sx2:
3549 case Iop_QRDMulHi32Sx2:
3550 return binary32Ix2(mce, vatom1, vatom2);
3552 case Iop_QSub64Ux1:
3553 case Iop_QSub64Sx1:
3554 case Iop_QAdd64Ux1:
3555 case Iop_QAdd64Sx1:
3556 case Iop_QSal64x1:
3557 case Iop_QShl64x1:
3558 case Iop_Sal64x1:
3559 return binary64Ix1(mce, vatom1, vatom2);
3561 case Iop_QShlNsatSU8x8:
3562 case Iop_QShlNsatUU8x8:
3563 case Iop_QShlNsatSS8x8:
3564 complainIfUndefined(mce, atom2, NULL);
3565 return mkPCast8x8(mce, vatom1);
3567 case Iop_QShlNsatSU16x4:
3568 case Iop_QShlNsatUU16x4:
3569 case Iop_QShlNsatSS16x4:
3570 complainIfUndefined(mce, atom2, NULL);
3571 return mkPCast16x4(mce, vatom1);
3573 case Iop_QShlNsatSU32x2:
3574 case Iop_QShlNsatUU32x2:
3575 case Iop_QShlNsatSS32x2:
3576 complainIfUndefined(mce, atom2, NULL);
3577 return mkPCast32x2(mce, vatom1);
3579 case Iop_QShlNsatSU64x1:
3580 case Iop_QShlNsatUU64x1:
3581 case Iop_QShlNsatSS64x1:
3582 complainIfUndefined(mce, atom2, NULL);
3583 return mkPCast32x2(mce, vatom1);
3585 case Iop_PwMax32Sx2:
3586 case Iop_PwMax32Ux2:
3587 case Iop_PwMin32Sx2:
3588 case Iop_PwMin32Ux2:
3589 case Iop_PwMax32Fx2:
3590 case Iop_PwMin32Fx2:
3591 return assignNew('V', mce, Ity_I64,
3592 binop(Iop_PwMax32Ux2,
3593 mkPCast32x2(mce, vatom1),
3594 mkPCast32x2(mce, vatom2)));
3596 case Iop_PwMax16Sx4:
3597 case Iop_PwMax16Ux4:
3598 case Iop_PwMin16Sx4:
3599 case Iop_PwMin16Ux4:
3600 return assignNew('V', mce, Ity_I64,
3601 binop(Iop_PwMax16Ux4,
3602 mkPCast16x4(mce, vatom1),
3603 mkPCast16x4(mce, vatom2)));
3605 case Iop_PwMax8Sx8:
3606 case Iop_PwMax8Ux8:
3607 case Iop_PwMin8Sx8:
3608 case Iop_PwMin8Ux8:
3609 return assignNew('V', mce, Ity_I64,
3610 binop(Iop_PwMax8Ux8,
3611 mkPCast8x8(mce, vatom1),
3612 mkPCast8x8(mce, vatom2)));
3614 case Iop_PwAdd32x2:
3615 case Iop_PwAdd32Fx2:
3616 return mkPCast32x2(mce,
3617 assignNew('V', mce, Ity_I64,
3618 binop(Iop_PwAdd32x2,
3619 mkPCast32x2(mce, vatom1),
3620 mkPCast32x2(mce, vatom2))));
3622 case Iop_PwAdd16x4:
3623 return mkPCast16x4(mce,
3624 assignNew('V', mce, Ity_I64,
3625 binop(op, mkPCast16x4(mce, vatom1),
3626 mkPCast16x4(mce, vatom2))));
3628 case Iop_PwAdd8x8:
3629 return mkPCast8x8(mce,
3630 assignNew('V', mce, Ity_I64,
3631 binop(op, mkPCast8x8(mce, vatom1),
3632 mkPCast8x8(mce, vatom2))));
3634 case Iop_Shl8x8:
3635 case Iop_Shr8x8:
3636 case Iop_Sar8x8:
3637 case Iop_Sal8x8:
3638 return mkUifU64(mce,
3639 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3640 mkPCast8x8(mce,vatom2)
3643 case Iop_Shl16x4:
3644 case Iop_Shr16x4:
3645 case Iop_Sar16x4:
3646 case Iop_Sal16x4:
3647 return mkUifU64(mce,
3648 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3649 mkPCast16x4(mce,vatom2)
3652 case Iop_Shl32x2:
3653 case Iop_Shr32x2:
3654 case Iop_Sar32x2:
3655 case Iop_Sal32x2:
3656 return mkUifU64(mce,
3657 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3658 mkPCast32x2(mce,vatom2)
3661 /* 64-bit data-steering */
3662 case Iop_InterleaveLO32x2:
3663 case Iop_InterleaveLO16x4:
3664 case Iop_InterleaveLO8x8:
3665 case Iop_InterleaveHI32x2:
3666 case Iop_InterleaveHI16x4:
3667 case Iop_InterleaveHI8x8:
3668 case Iop_CatOddLanes8x8:
3669 case Iop_CatEvenLanes8x8:
3670 case Iop_CatOddLanes16x4:
3671 case Iop_CatEvenLanes16x4:
3672 case Iop_InterleaveOddLanes8x8:
3673 case Iop_InterleaveEvenLanes8x8:
3674 case Iop_InterleaveOddLanes16x4:
3675 case Iop_InterleaveEvenLanes16x4:
3676 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3678 case Iop_GetElem8x8:
3679 complainIfUndefined(mce, atom2, NULL);
3680 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3681 case Iop_GetElem16x4:
3682 complainIfUndefined(mce, atom2, NULL);
3683 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3684 case Iop_GetElem32x2:
3685 complainIfUndefined(mce, atom2, NULL);
3686 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3688 /* Perm8x8: rearrange values in left arg using steering values from
3689 right arg. So rearrange the vbits in the same way but pessimise wrt
3690 steering values. We assume that unused bits in the steering value
3691 are defined zeros, so we can safely PCast within each lane of the the
3692 steering value without having to take precautions to avoid a
3693 dependency on those unused bits.
3695 This is also correct for PermOrZero8x8, but it is a bit subtle. For
3696 each lane, if bit 7 of the steering value is zero, then we'll steer
3697 the shadow value exactly as per Perm8x8. If that bit is one, then
3698 the operation will set the resulting (concrete) value to zero. That
3699 means it is defined, and should have a shadow value of zero. Hence
3700 in both cases (bit 7 is 0 or 1) we can self-shadow (in the same way
3701 as Perm8x8) and then pessimise against the steering values. */
3702 case Iop_Perm8x8:
3703 case Iop_PermOrZero8x8:
3704 return mkUifU64(
3705 mce,
3706 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3707 mkPCast8x8(mce, vatom2)
3710 /* V128-bit SIMD */
3712 case Iop_I32StoF32x4:
3713 case Iop_F32toI32Sx4:
3714 case Iop_Sqrt32Fx4:
3715 return unary32Fx4_w_rm(mce, vatom1, vatom2);
3716 case Iop_Sqrt64Fx2:
3717 return unary64Fx2_w_rm(mce, vatom1, vatom2);
3719 case Iop_ShrN8x16:
3720 case Iop_ShrN16x8:
3721 case Iop_ShrN32x4:
3722 case Iop_ShrN64x2:
3723 case Iop_SarN8x16:
3724 case Iop_SarN16x8:
3725 case Iop_SarN32x4:
3726 case Iop_SarN64x2:
3727 case Iop_ShlN8x16:
3728 case Iop_ShlN16x8:
3729 case Iop_ShlN32x4:
3730 case Iop_ShlN64x2:
3731 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3732 this is wrong now, scalar shifts are done properly lazily.
3733 Vector shifts should be fixed too. */
3734 complainIfUndefined(mce, atom2, NULL);
3735 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3737 /* V x V shifts/rotates are done using the standard lazy scheme. */
3738 /* For the non-rounding variants of bi-di vector x vector
3739 shifts (the Iop_Sh.. ops, that is) we use the lazy scheme.
3740 But note that this is overly pessimistic, because in fact only
3741 the bottom 8 bits of each lane of the second argument are taken
3742 into account when shifting. So really we ought to ignore
3743 undefinedness in bits 8 and above of each lane in the
3744 second argument. */
3745 case Iop_Shl8x16:
3746 case Iop_Shr8x16:
3747 case Iop_Sar8x16:
3748 case Iop_Sal8x16:
3749 case Iop_Rol8x16:
3750 case Iop_Sh8Sx16:
3751 case Iop_Sh8Ux16:
3752 return mkUifUV128(mce,
3753 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3754 mkPCast8x16(mce,vatom2)
3757 case Iop_Shl16x8:
3758 case Iop_Shr16x8:
3759 case Iop_Sar16x8:
3760 case Iop_Sal16x8:
3761 case Iop_Rol16x8:
3762 case Iop_Sh16Sx8:
3763 case Iop_Sh16Ux8:
3764 return mkUifUV128(mce,
3765 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3766 mkPCast16x8(mce,vatom2)
3769 case Iop_Shl32x4:
3770 case Iop_Shr32x4:
3771 case Iop_Sar32x4:
3772 case Iop_Sal32x4:
3773 case Iop_Rol32x4:
3774 case Iop_Sh32Sx4:
3775 case Iop_Sh32Ux4:
3776 return mkUifUV128(mce,
3777 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3778 mkPCast32x4(mce,vatom2)
3781 case Iop_Shl64x2:
3782 case Iop_Shr64x2:
3783 case Iop_Sar64x2:
3784 case Iop_Sal64x2:
3785 case Iop_Rol64x2:
3786 case Iop_Sh64Sx2:
3787 case Iop_Sh64Ux2:
3788 return mkUifUV128(mce,
3789 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3790 mkPCast64x2(mce,vatom2)
3793 /* For the rounding variants of bi-di vector x vector shifts, the
3794 rounding adjustment can cause undefinedness to propagate through
3795 the entire lane, in the worst case. Too complex to handle
3796 properly .. just UifU the arguments and then PCast them.
3797 Suboptimal but safe. */
3798 case Iop_Rsh8Sx16:
3799 case Iop_Rsh8Ux16:
3800 return binary8Ix16(mce, vatom1, vatom2);
3801 case Iop_Rsh16Sx8:
3802 case Iop_Rsh16Ux8:
3803 return binary16Ix8(mce, vatom1, vatom2);
3804 case Iop_Rsh32Sx4:
3805 case Iop_Rsh32Ux4:
3806 return binary32Ix4(mce, vatom1, vatom2);
3807 case Iop_Rsh64Sx2:
3808 case Iop_Rsh64Ux2:
3809 return binary64Ix2(mce, vatom1, vatom2);
3811 case Iop_F32ToFixed32Ux4_RZ:
3812 case Iop_F32ToFixed32Sx4_RZ:
3813 case Iop_Fixed32UToF32x4_RN:
3814 case Iop_Fixed32SToF32x4_RN:
3815 complainIfUndefined(mce, atom2, NULL);
3816 return mkPCast32x4(mce, vatom1);
3818 case Iop_F32ToFixed32Ux2_RZ:
3819 case Iop_F32ToFixed32Sx2_RZ:
3820 case Iop_Fixed32UToF32x2_RN:
3821 case Iop_Fixed32SToF32x2_RN:
3822 complainIfUndefined(mce, atom2, NULL);
3823 return mkPCast32x2(mce, vatom1);
3825 case Iop_QSub8Ux16:
3826 case Iop_QSub8Sx16:
3827 case Iop_Sub8x16:
3828 case Iop_Min8Ux16:
3829 case Iop_Min8Sx16:
3830 case Iop_Max8Ux16:
3831 case Iop_Max8Sx16:
3832 case Iop_CmpGT8Sx16:
3833 case Iop_CmpGT8Ux16:
3834 case Iop_CmpEQ8x16:
3835 case Iop_Avg8Ux16:
3836 case Iop_Avg8Sx16:
3837 case Iop_QAdd8Ux16:
3838 case Iop_QAdd8Sx16:
3839 case Iop_QAddExtUSsatSS8x16:
3840 case Iop_QAddExtSUsatUU8x16:
3841 case Iop_QSal8x16:
3842 case Iop_QShl8x16:
3843 case Iop_Add8x16:
3844 case Iop_Mul8x16:
3845 case Iop_MulHi8Sx16:
3846 case Iop_MulHi8Ux16:
3847 case Iop_PolynomialMul8x16:
3848 case Iop_PolynomialMulAdd8x16:
3849 return binary8Ix16(mce, vatom1, vatom2);
3851 case Iop_QSub16Ux8:
3852 case Iop_QSub16Sx8:
3853 case Iop_Sub16x8:
3854 case Iop_Mul16x8:
3855 case Iop_MulHi16Sx8:
3856 case Iop_MulHi16Ux8:
3857 case Iop_Min16Sx8:
3858 case Iop_Min16Ux8:
3859 case Iop_Max16Sx8:
3860 case Iop_Max16Ux8:
3861 case Iop_CmpGT16Sx8:
3862 case Iop_CmpGT16Ux8:
3863 case Iop_CmpEQ16x8:
3864 case Iop_Avg16Ux8:
3865 case Iop_Avg16Sx8:
3866 case Iop_QAdd16Ux8:
3867 case Iop_QAdd16Sx8:
3868 case Iop_QAddExtUSsatSS16x8:
3869 case Iop_QAddExtSUsatUU16x8:
3870 case Iop_QSal16x8:
3871 case Iop_QShl16x8:
3872 case Iop_Add16x8:
3873 case Iop_QDMulHi16Sx8:
3874 case Iop_QRDMulHi16Sx8:
3875 case Iop_PolynomialMulAdd16x8:
3876 /* PwExtUSMulQAdd8x16 is a bit subtle. The effect of it is that each
3877 16-bit chunk of the output is formed from corresponding 16-bit chunks
3878 of the input args, so we can treat it like an other binary 16x8
3879 operation. That's despite it having '8x16' in its name. */
3880 case Iop_PwExtUSMulQAdd8x16:
3881 return binary16Ix8(mce, vatom1, vatom2);
3883 case Iop_Sub32x4:
3884 case Iop_CmpGT32Sx4:
3885 case Iop_CmpGT32Ux4:
3886 case Iop_CmpEQ32x4:
3887 case Iop_QAdd32Sx4:
3888 case Iop_QAdd32Ux4:
3889 case Iop_QSub32Sx4:
3890 case Iop_QSub32Ux4:
3891 case Iop_QAddExtUSsatSS32x4:
3892 case Iop_QAddExtSUsatUU32x4:
3893 case Iop_QSal32x4:
3894 case Iop_QShl32x4:
3895 case Iop_Avg32Ux4:
3896 case Iop_Avg32Sx4:
3897 case Iop_Add32x4:
3898 case Iop_Max32Ux4:
3899 case Iop_Max32Sx4:
3900 case Iop_Min32Ux4:
3901 case Iop_Min32Sx4:
3902 case Iop_Mul32x4:
3903 case Iop_MulHi32Sx4:
3904 case Iop_MulHi32Ux4:
3905 case Iop_QDMulHi32Sx4:
3906 case Iop_QRDMulHi32Sx4:
3907 case Iop_PolynomialMulAdd32x4:
3908 return binary32Ix4(mce, vatom1, vatom2);
3910 case Iop_Sub64x2:
3911 case Iop_Add64x2:
3912 case Iop_Avg64Ux2:
3913 case Iop_Avg64Sx2:
3914 case Iop_Max64Sx2:
3915 case Iop_Max64Ux2:
3916 case Iop_Min64Sx2:
3917 case Iop_Min64Ux2:
3918 case Iop_CmpEQ64x2:
3919 case Iop_CmpGT64Sx2:
3920 case Iop_CmpGT64Ux2:
3921 case Iop_QSal64x2:
3922 case Iop_QShl64x2:
3923 case Iop_QAdd64Ux2:
3924 case Iop_QAdd64Sx2:
3925 case Iop_QSub64Ux2:
3926 case Iop_QSub64Sx2:
3927 case Iop_QAddExtUSsatSS64x2:
3928 case Iop_QAddExtSUsatUU64x2:
3929 case Iop_PolynomialMulAdd64x2:
3930 case Iop_CipherV128:
3931 case Iop_CipherLV128:
3932 case Iop_NCipherV128:
3933 case Iop_NCipherLV128:
3934 case Iop_MulI128by10E:
3935 case Iop_MulI128by10ECarry:
3936 return binary64Ix2(mce, vatom1, vatom2);
3938 case Iop_Add128x1:
3939 case Iop_Sub128x1:
3940 case Iop_CmpNEZ128x1:
3941 return binary128Ix1(mce, vatom1, vatom2);
3943 case Iop_QNarrowBin64Sto32Sx4:
3944 case Iop_QNarrowBin64Uto32Ux4:
3945 case Iop_QNarrowBin32Sto16Sx8:
3946 case Iop_QNarrowBin32Uto16Ux8:
3947 case Iop_QNarrowBin32Sto16Ux8:
3948 case Iop_QNarrowBin16Sto8Sx16:
3949 case Iop_QNarrowBin16Uto8Ux16:
3950 case Iop_QNarrowBin16Sto8Ux16:
3951 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
3953 case Iop_Min64Fx2:
3954 case Iop_Max64Fx2:
3955 case Iop_CmpLT64Fx2:
3956 case Iop_CmpLE64Fx2:
3957 case Iop_CmpEQ64Fx2:
3958 case Iop_CmpUN64Fx2:
3959 case Iop_RecipStep64Fx2:
3960 case Iop_RSqrtStep64Fx2:
3961 return binary64Fx2(mce, vatom1, vatom2);
3963 case Iop_Sub64F0x2:
3964 case Iop_Mul64F0x2:
3965 case Iop_Min64F0x2:
3966 case Iop_Max64F0x2:
3967 case Iop_Div64F0x2:
3968 case Iop_CmpLT64F0x2:
3969 case Iop_CmpLE64F0x2:
3970 case Iop_CmpEQ64F0x2:
3971 case Iop_CmpUN64F0x2:
3972 case Iop_Add64F0x2:
3973 return binary64F0x2(mce, vatom1, vatom2);
3975 case Iop_Min32Fx4:
3976 case Iop_Max32Fx4:
3977 case Iop_CmpLT32Fx4:
3978 case Iop_CmpLE32Fx4:
3979 case Iop_CmpEQ32Fx4:
3980 case Iop_CmpUN32Fx4:
3981 case Iop_CmpGT32Fx4:
3982 case Iop_CmpGE32Fx4:
3983 case Iop_RecipStep32Fx4:
3984 case Iop_RSqrtStep32Fx4:
3985 return binary32Fx4(mce, vatom1, vatom2);
3987 case Iop_Sub32Fx2:
3988 case Iop_Mul32Fx2:
3989 case Iop_Min32Fx2:
3990 case Iop_Max32Fx2:
3991 case Iop_CmpEQ32Fx2:
3992 case Iop_CmpGT32Fx2:
3993 case Iop_CmpGE32Fx2:
3994 case Iop_Add32Fx2:
3995 case Iop_RecipStep32Fx2:
3996 case Iop_RSqrtStep32Fx2:
3997 return binary32Fx2(mce, vatom1, vatom2);
3999 case Iop_Sub32F0x4:
4000 case Iop_Mul32F0x4:
4001 case Iop_Min32F0x4:
4002 case Iop_Max32F0x4:
4003 case Iop_Div32F0x4:
4004 case Iop_CmpLT32F0x4:
4005 case Iop_CmpLE32F0x4:
4006 case Iop_CmpEQ32F0x4:
4007 case Iop_CmpUN32F0x4:
4008 case Iop_Add32F0x4:
4009 return binary32F0x4(mce, vatom1, vatom2);
4011 case Iop_QShlNsatSU8x16:
4012 case Iop_QShlNsatUU8x16:
4013 case Iop_QShlNsatSS8x16:
4014 complainIfUndefined(mce, atom2, NULL);
4015 return mkPCast8x16(mce, vatom1);
4017 case Iop_QShlNsatSU16x8:
4018 case Iop_QShlNsatUU16x8:
4019 case Iop_QShlNsatSS16x8:
4020 complainIfUndefined(mce, atom2, NULL);
4021 return mkPCast16x8(mce, vatom1);
4023 case Iop_QShlNsatSU32x4:
4024 case Iop_QShlNsatUU32x4:
4025 case Iop_QShlNsatSS32x4:
4026 complainIfUndefined(mce, atom2, NULL);
4027 return mkPCast32x4(mce, vatom1);
4029 case Iop_QShlNsatSU64x2:
4030 case Iop_QShlNsatUU64x2:
4031 case Iop_QShlNsatSS64x2:
4032 complainIfUndefined(mce, atom2, NULL);
4033 return mkPCast32x4(mce, vatom1);
4035 /* Q-and-Qshift-by-imm-and-narrow of the form (V128, I8) -> V128.
4036 To make this simpler, do the following:
4037 * complain if the shift amount (the I8) is undefined
4038 * pcast each lane at the wide width
4039 * truncate each lane to half width
4040 * pcast the resulting 64-bit value to a single bit and use
4041 that as the least significant bit of the upper half of the
4042 result. */
4043 case Iop_QandQShrNnarrow64Uto32Ux2:
4044 case Iop_QandQSarNnarrow64Sto32Sx2:
4045 case Iop_QandQSarNnarrow64Sto32Ux2:
4046 case Iop_QandQRShrNnarrow64Uto32Ux2:
4047 case Iop_QandQRSarNnarrow64Sto32Sx2:
4048 case Iop_QandQRSarNnarrow64Sto32Ux2:
4049 case Iop_QandQShrNnarrow32Uto16Ux4:
4050 case Iop_QandQSarNnarrow32Sto16Sx4:
4051 case Iop_QandQSarNnarrow32Sto16Ux4:
4052 case Iop_QandQRShrNnarrow32Uto16Ux4:
4053 case Iop_QandQRSarNnarrow32Sto16Sx4:
4054 case Iop_QandQRSarNnarrow32Sto16Ux4:
4055 case Iop_QandQShrNnarrow16Uto8Ux8:
4056 case Iop_QandQSarNnarrow16Sto8Sx8:
4057 case Iop_QandQSarNnarrow16Sto8Ux8:
4058 case Iop_QandQRShrNnarrow16Uto8Ux8:
4059 case Iop_QandQRSarNnarrow16Sto8Sx8:
4060 case Iop_QandQRSarNnarrow16Sto8Ux8:
4062 IRAtom* (*fnPessim) (MCEnv*, IRAtom*) = NULL;
4063 IROp opNarrow = Iop_INVALID;
4064 switch (op) {
4065 case Iop_QandQShrNnarrow64Uto32Ux2:
4066 case Iop_QandQSarNnarrow64Sto32Sx2:
4067 case Iop_QandQSarNnarrow64Sto32Ux2:
4068 case Iop_QandQRShrNnarrow64Uto32Ux2:
4069 case Iop_QandQRSarNnarrow64Sto32Sx2:
4070 case Iop_QandQRSarNnarrow64Sto32Ux2:
4071 fnPessim = mkPCast64x2;
4072 opNarrow = Iop_NarrowUn64to32x2;
4073 break;
4074 case Iop_QandQShrNnarrow32Uto16Ux4:
4075 case Iop_QandQSarNnarrow32Sto16Sx4:
4076 case Iop_QandQSarNnarrow32Sto16Ux4:
4077 case Iop_QandQRShrNnarrow32Uto16Ux4:
4078 case Iop_QandQRSarNnarrow32Sto16Sx4:
4079 case Iop_QandQRSarNnarrow32Sto16Ux4:
4080 fnPessim = mkPCast32x4;
4081 opNarrow = Iop_NarrowUn32to16x4;
4082 break;
4083 case Iop_QandQShrNnarrow16Uto8Ux8:
4084 case Iop_QandQSarNnarrow16Sto8Sx8:
4085 case Iop_QandQSarNnarrow16Sto8Ux8:
4086 case Iop_QandQRShrNnarrow16Uto8Ux8:
4087 case Iop_QandQRSarNnarrow16Sto8Sx8:
4088 case Iop_QandQRSarNnarrow16Sto8Ux8:
4089 fnPessim = mkPCast16x8;
4090 opNarrow = Iop_NarrowUn16to8x8;
4091 break;
4092 default:
4093 tl_assert(0);
4095 complainIfUndefined(mce, atom2, NULL);
4096 // Pessimised shift result
4097 IRAtom* shV
4098 = fnPessim(mce, vatom1);
4099 // Narrowed, pessimised shift result
4100 IRAtom* shVnarrowed
4101 = assignNew('V', mce, Ity_I64, unop(opNarrow, shV));
4102 // Generates: Def--(63)--Def PCast-to-I1(narrowed)
4103 IRAtom* qV = mkPCastXXtoXXlsb(mce, shVnarrowed, Ity_I64);
4104 // and assemble the result
4105 return assignNew('V', mce, Ity_V128,
4106 binop(Iop_64HLtoV128, qV, shVnarrowed));
4109 case Iop_Mull32Sx2:
4110 case Iop_Mull32Ux2:
4111 case Iop_QDMull32Sx2:
4112 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
4113 mkUifU64(mce, vatom1, vatom2));
4115 case Iop_Mull16Sx4:
4116 case Iop_Mull16Ux4:
4117 case Iop_QDMull16Sx4:
4118 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
4119 mkUifU64(mce, vatom1, vatom2));
4121 case Iop_Mull8Sx8:
4122 case Iop_Mull8Ux8:
4123 case Iop_PolynomialMull8x8:
4124 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
4125 mkUifU64(mce, vatom1, vatom2));
4127 case Iop_PwAdd32x4:
4128 return mkPCast32x4(mce,
4129 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
4130 mkPCast32x4(mce, vatom2))));
4132 case Iop_PwAdd16x8:
4133 return mkPCast16x8(mce,
4134 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
4135 mkPCast16x8(mce, vatom2))));
4137 case Iop_PwAdd8x16:
4138 return mkPCast8x16(mce,
4139 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
4140 mkPCast8x16(mce, vatom2))));
4142 /* V128-bit data-steering */
4143 case Iop_SetV128lo32:
4144 case Iop_SetV128lo64:
4145 case Iop_64HLtoV128:
4146 case Iop_InterleaveLO64x2:
4147 case Iop_InterleaveLO32x4:
4148 case Iop_InterleaveLO16x8:
4149 case Iop_InterleaveLO8x16:
4150 case Iop_InterleaveHI64x2:
4151 case Iop_InterleaveHI32x4:
4152 case Iop_InterleaveHI16x8:
4153 case Iop_InterleaveHI8x16:
4154 case Iop_CatOddLanes8x16:
4155 case Iop_CatOddLanes16x8:
4156 case Iop_CatOddLanes32x4:
4157 case Iop_CatEvenLanes8x16:
4158 case Iop_CatEvenLanes16x8:
4159 case Iop_CatEvenLanes32x4:
4160 case Iop_InterleaveOddLanes8x16:
4161 case Iop_InterleaveOddLanes16x8:
4162 case Iop_InterleaveOddLanes32x4:
4163 case Iop_InterleaveEvenLanes8x16:
4164 case Iop_InterleaveEvenLanes16x8:
4165 case Iop_InterleaveEvenLanes32x4:
4166 case Iop_PackOddLanes8x16:
4167 case Iop_PackOddLanes16x8:
4168 case Iop_PackOddLanes32x4:
4169 case Iop_PackEvenLanes8x16:
4170 case Iop_PackEvenLanes16x8:
4171 case Iop_PackEvenLanes32x4:
4172 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
4174 case Iop_GetElem8x16:
4175 complainIfUndefined(mce, atom2, NULL);
4176 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
4177 case Iop_GetElem16x8:
4178 complainIfUndefined(mce, atom2, NULL);
4179 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
4180 case Iop_GetElem32x4:
4181 complainIfUndefined(mce, atom2, NULL);
4182 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
4183 case Iop_GetElem64x2:
4184 complainIfUndefined(mce, atom2, NULL);
4185 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
4187 /* Perm8x16: rearrange values in left arg using steering values
4188 from right arg. So rearrange the vbits in the same way but
4189 pessimise wrt steering values. Perm32x4 ditto. */
4190 /* PermOrZero8x16: see comments above for PermOrZero8x8. */
4191 case Iop_Perm8x16:
4192 case Iop_PermOrZero8x16:
4193 return mkUifUV128(
4194 mce,
4195 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
4196 mkPCast8x16(mce, vatom2)
4198 case Iop_Perm32x4:
4199 return mkUifUV128(
4200 mce,
4201 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
4202 mkPCast32x4(mce, vatom2)
4205 /* These two take the lower half of each 16-bit lane, sign/zero
4206 extend it to 32, and multiply together, producing a 32x4
4207 result (and implicitly ignoring half the operand bits). So
4208 treat it as a bunch of independent 16x8 operations, but then
4209 do 32-bit shifts left-right to copy the lower half results
4210 (which are all 0s or all 1s due to PCasting in binary16Ix8)
4211 into the upper half of each result lane. */
4212 case Iop_MullEven16Ux8:
4213 case Iop_MullEven16Sx8: {
4214 IRAtom* at;
4215 at = binary16Ix8(mce,vatom1,vatom2);
4216 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
4217 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
4218 return at;
4221 /* Same deal as Iop_MullEven16{S,U}x8 */
4222 case Iop_MullEven8Ux16:
4223 case Iop_MullEven8Sx16: {
4224 IRAtom* at;
4225 at = binary8Ix16(mce,vatom1,vatom2);
4226 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
4227 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
4228 return at;
4231 /* Same deal as Iop_MullEven16{S,U}x8 */
4232 case Iop_MullEven32Ux4:
4233 case Iop_MullEven32Sx4: {
4234 IRAtom* at;
4235 at = binary32Ix4(mce,vatom1,vatom2);
4236 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN64x2, at, mkU8(32)));
4237 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN64x2, at, mkU8(32)));
4238 return at;
4241 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
4242 32x4 -> 16x8 laneage, discarding the upper half of each lane.
4243 Simply apply same op to the V bits, since this really no more
4244 than a data steering operation. */
4245 case Iop_NarrowBin32to16x8:
4246 case Iop_NarrowBin16to8x16:
4247 case Iop_NarrowBin64to32x4:
4248 return assignNew('V', mce, Ity_V128,
4249 binop(op, vatom1, vatom2));
4251 case Iop_ShrV128:
4252 case Iop_SarV128:
4253 case Iop_ShlV128:
4254 case Iop_I128StoBCD128:
4255 /* Same scheme as with all other shifts. Note: 10 Nov 05:
4256 this is wrong now, scalar shifts are done properly lazily.
4257 Vector shifts should be fixed too. */
4258 complainIfUndefined(mce, atom2, NULL);
4259 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
4261 case Iop_BCDAdd:
4262 case Iop_BCDSub:
4263 return mkLazy2(mce, Ity_V128, vatom1, vatom2);
4265 /* SHA Iops */
4266 case Iop_SHA256:
4267 case Iop_SHA512:
4268 complainIfUndefined(mce, atom2, NULL);
4269 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
4271 /* I128-bit data-steering */
4272 case Iop_64HLto128:
4273 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
4275 /* V256-bit SIMD */
4277 case Iop_Max64Fx4:
4278 case Iop_Min64Fx4:
4279 return binary64Fx4(mce, vatom1, vatom2);
4281 case Iop_Max32Fx8:
4282 case Iop_Min32Fx8:
4283 return binary32Fx8(mce, vatom1, vatom2);
4285 /* V256-bit data-steering */
4286 case Iop_V128HLtoV256:
4287 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
4289 /* Scalar floating point */
4291 case Iop_F32toI64S:
4292 case Iop_F32toI64U:
4293 /* I32(rm) x F32 -> I64 */
4294 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4296 case Iop_I64StoF32:
4297 /* I32(rm) x I64 -> F32 */
4298 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4300 case Iop_RoundF64toInt:
4301 case Iop_RoundF64toF32:
4302 case Iop_F64toI64S:
4303 case Iop_F64toI64U:
4304 case Iop_I64StoF64:
4305 case Iop_I64UtoF64:
4306 case Iop_SinF64:
4307 case Iop_CosF64:
4308 case Iop_TanF64:
4309 case Iop_2xm1F64:
4310 case Iop_SqrtF64:
4311 case Iop_RecpExpF64:
4312 /* I32(rm) x I64/F64 -> I64/F64 */
4313 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4315 case Iop_ShlD64:
4316 case Iop_ShrD64:
4317 case Iop_RoundD64toInt:
4318 /* I32(rm) x D64 -> D64 */
4319 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4321 case Iop_ShlD128:
4322 case Iop_ShrD128:
4323 case Iop_RoundD128toInt:
4324 /* I32(rm) x D128 -> D128 */
4325 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4327 case Iop_RoundF128toInt:
4328 /* I32(rm) x F128 -> F128 */
4329 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4331 case Iop_D64toI64S:
4332 case Iop_D64toI64U:
4333 case Iop_I64StoD64:
4334 case Iop_I64UtoD64:
4335 /* I32(rm) x I64/D64 -> D64/I64 */
4336 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4338 case Iop_F32toD32:
4339 case Iop_F64toD32:
4340 case Iop_F128toD32:
4341 case Iop_D32toF32:
4342 case Iop_D64toF32:
4343 case Iop_D128toF32:
4344 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
4345 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4347 case Iop_F32toD64:
4348 case Iop_F64toD64:
4349 case Iop_F128toD64:
4350 case Iop_D32toF64:
4351 case Iop_D64toF64:
4352 case Iop_D128toF64:
4353 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
4354 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4356 case Iop_F32toD128:
4357 case Iop_F64toD128:
4358 case Iop_F128toD128:
4359 case Iop_D32toF128:
4360 case Iop_D64toF128:
4361 case Iop_D128toF128:
4362 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
4363 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4365 case Iop_RoundF32toInt:
4366 case Iop_SqrtF32:
4367 case Iop_RecpExpF32:
4368 /* I32(rm) x I32/F32 -> I32/F32 */
4369 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4371 case Iop_SqrtF128:
4372 /* I32(rm) x F128 -> F128 */
4373 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4375 case Iop_I32StoF32:
4376 case Iop_I32UtoF32:
4377 case Iop_F32toI32S:
4378 case Iop_F32toI32U:
4379 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
4380 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4382 case Iop_F64toF16:
4383 case Iop_F32toF16:
4384 /* First arg is I32 (rounding mode), second is F64/F32 (data). */
4385 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
4387 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
4388 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
4389 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
4390 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */
4391 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
4392 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4394 case Iop_F128toI128S: /* IRRoundingMode(I32) x F128 -> signed I128 */
4395 case Iop_RndF128: /* IRRoundingMode(I32) x F128 -> F128 */
4396 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4398 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
4399 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
4400 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
4401 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */
4402 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */
4403 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
4404 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4406 case Iop_F64HLtoF128:
4407 case Iop_D64HLtoD128:
4408 return assignNew('V', mce, Ity_I128,
4409 binop(Iop_64HLto128, vatom1, vatom2));
4411 case Iop_F64toI32U:
4412 case Iop_F64toI32S:
4413 case Iop_F64toF32:
4414 case Iop_I64UtoF32:
4415 case Iop_D64toI32U:
4416 case Iop_D64toI32S:
4417 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
4418 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4420 case Iop_D64toD32:
4421 /* First arg is I32 (rounding mode), second is D64 (data). */
4422 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4424 case Iop_F64toI16S:
4425 /* First arg is I32 (rounding mode), second is F64 (data). */
4426 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
4428 case Iop_InsertExpD64:
4429 /* I64 x I64 -> D64 */
4430 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4432 case Iop_InsertExpD128:
4433 /* I64 x I128 -> D128 */
4434 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4436 case Iop_CmpF32:
4437 case Iop_CmpF64:
4438 case Iop_CmpF128:
4439 case Iop_CmpD64:
4440 case Iop_CmpD128:
4441 case Iop_CmpExpD64:
4442 case Iop_CmpExpD128:
4443 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4445 case Iop_MaxNumF32:
4446 case Iop_MinNumF32:
4447 /* F32 x F32 -> F32 */
4448 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4450 case Iop_MaxNumF64:
4451 case Iop_MinNumF64:
4452 /* F64 x F64 -> F64 */
4453 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4455 /* non-FP after here */
4457 case Iop_DivModU64to32:
4458 case Iop_DivModS64to32:
4459 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4461 case Iop_DivModU128to64:
4462 case Iop_DivModS128to64:
4463 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4465 case Iop_8HLto16:
4466 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
4467 case Iop_16HLto32:
4468 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
4469 case Iop_32HLto64:
4470 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
4472 case Iop_DivModU64to64:
4473 case Iop_DivModS64to64: {
4474 IRAtom* vTmp64 = mkLazy2(mce, Ity_I64, vatom1, vatom2);
4475 return assignNew('V', mce, Ity_I128,
4476 binop(Iop_64HLto128, vTmp64, vTmp64));
4479 case Iop_MullS64:
4480 case Iop_MullU64: {
4481 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
4482 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
4483 return assignNew('V', mce, Ity_I128,
4484 binop(Iop_64HLto128, vHi64, vLo64));
4487 case Iop_DivModU32to32:
4488 case Iop_DivModS32to32: {
4489 IRAtom* vTmp32 = mkLazy2(mce, Ity_I32, vatom1, vatom2);
4490 return assignNew('V', mce, Ity_I64,
4491 binop(Iop_32HLto64, vTmp32, vTmp32));
4494 case Iop_MullS32:
4495 case Iop_MullU32: {
4496 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
4497 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
4498 return assignNew('V', mce, Ity_I64,
4499 binop(Iop_32HLto64, vHi32, vLo32));
4502 case Iop_MullS16:
4503 case Iop_MullU16: {
4504 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
4505 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
4506 return assignNew('V', mce, Ity_I32,
4507 binop(Iop_16HLto32, vHi16, vLo16));
4510 case Iop_MullS8:
4511 case Iop_MullU8: {
4512 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
4513 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
4514 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
4517 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
4518 case Iop_DivS32:
4519 case Iop_DivU32:
4520 case Iop_DivU32E:
4521 case Iop_DivS32E:
4522 case Iop_QAdd32S: /* could probably do better */
4523 case Iop_QSub32S: /* could probably do better */
4524 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4526 case Iop_DivS64:
4527 case Iop_DivU64:
4528 case Iop_DivS64E:
4529 case Iop_DivU64E:
4530 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4532 case Iop_Add32:
4533 if (mce->dlbo.dl_Add32 == DLexpensive
4534 || (mce->dlbo.dl_Add32 == DLauto && hu == HuOth)) {
4535 return expensiveAddSub(mce,True,Ity_I32,
4536 vatom1,vatom2, atom1,atom2);
4537 } else {
4538 goto cheap_AddSub32;
4540 case Iop_Sub32:
4541 if (mce->dlbo.dl_Sub32 == DLexpensive
4542 || (mce->dlbo.dl_Sub32 == DLauto && hu == HuOth)) {
4543 return expensiveAddSub(mce,False,Ity_I32,
4544 vatom1,vatom2, atom1,atom2);
4545 } else {
4546 goto cheap_AddSub32;
4549 cheap_AddSub32:
4550 case Iop_Mul32:
4551 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
4553 case Iop_CmpORD32S:
4554 case Iop_CmpORD32U:
4555 case Iop_CmpORD64S:
4556 case Iop_CmpORD64U:
4557 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
4559 case Iop_Add64:
4560 if (mce->dlbo.dl_Add64 == DLexpensive
4561 || (mce->dlbo.dl_Add64 == DLauto && hu == HuOth)) {
4562 return expensiveAddSub(mce,True,Ity_I64,
4563 vatom1,vatom2, atom1,atom2);
4564 } else {
4565 goto cheap_AddSub64;
4567 case Iop_Sub64:
4568 if (mce->dlbo.dl_Sub64 == DLexpensive
4569 || (mce->dlbo.dl_Sub64 == DLauto && hu == HuOth)) {
4570 return expensiveAddSub(mce,False,Ity_I64,
4571 vatom1,vatom2, atom1,atom2);
4572 } else {
4573 goto cheap_AddSub64;
4576 cheap_AddSub64:
4577 case Iop_Mul64:
4578 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
4580 case Iop_Mul16:
4581 case Iop_Add16:
4582 case Iop_Sub16:
4583 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
4585 case Iop_Mul8:
4586 case Iop_Sub8:
4587 case Iop_Add8:
4588 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
4590 ////---- CmpXX64
4591 case Iop_CmpEQ64: case Iop_CmpNE64:
4592 if (mce->dlbo.dl_CmpEQ64_CmpNE64 == DLexpensive)
4593 goto expensive_cmp64;
4594 else
4595 goto cheap_cmp64;
4597 expensive_cmp64:
4598 case Iop_ExpCmpNE64:
4599 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
4601 cheap_cmp64:
4602 case Iop_CmpLE64S: case Iop_CmpLE64U:
4603 case Iop_CmpLT64U: case Iop_CmpLT64S:
4604 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
4606 ////---- CmpXX32
4607 case Iop_CmpEQ32: case Iop_CmpNE32:
4608 if (mce->dlbo.dl_CmpEQ32_CmpNE32 == DLexpensive)
4609 goto expensive_cmp32;
4610 else
4611 goto cheap_cmp32;
4613 expensive_cmp32:
4614 case Iop_ExpCmpNE32:
4615 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
4617 cheap_cmp32:
4618 case Iop_CmpLE32S: case Iop_CmpLE32U:
4619 case Iop_CmpLT32U: case Iop_CmpLT32S:
4620 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
4622 ////---- CmpXX16
4623 case Iop_CmpEQ16: case Iop_CmpNE16:
4624 if (mce->dlbo.dl_CmpEQ16_CmpNE16 == DLexpensive)
4625 goto expensive_cmp16;
4626 else
4627 goto cheap_cmp16;
4629 expensive_cmp16:
4630 case Iop_ExpCmpNE16:
4631 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
4633 cheap_cmp16:
4634 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
4636 ////---- CmpXX8
4637 case Iop_CmpEQ8: case Iop_CmpNE8:
4638 if (mce->dlbo.dl_CmpEQ8_CmpNE8 == DLexpensive)
4639 goto expensive_cmp8;
4640 else
4641 goto cheap_cmp8;
4643 expensive_cmp8:
4644 return expensiveCmpEQorNE(mce,Ity_I8, vatom1,vatom2, atom1,atom2 );
4646 cheap_cmp8:
4647 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
4649 ////---- end CmpXX{64,32,16,8}
4651 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
4652 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
4653 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
4654 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
4655 /* Just say these all produce a defined result, regardless
4656 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
4657 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
4659 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
4660 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
4662 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
4663 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
4665 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
4666 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
4668 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
4669 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
4671 case Iop_AndV256:
4672 uifu = mkUifUV256; difd = mkDifDV256;
4673 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
4674 case Iop_AndV128:
4675 uifu = mkUifUV128; difd = mkDifDV128;
4676 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
4677 case Iop_And64:
4678 uifu = mkUifU64; difd = mkDifD64;
4679 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
4680 case Iop_And32:
4681 uifu = mkUifU32; difd = mkDifD32;
4682 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
4683 case Iop_And16:
4684 uifu = mkUifU16; difd = mkDifD16;
4685 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
4686 case Iop_And8:
4687 uifu = mkUifU8; difd = mkDifD8;
4688 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
4689 case Iop_And1:
4690 uifu = mkUifU1; difd = mkDifD1;
4691 and_or_ty = Ity_I1; improve = mkImproveAND1; goto do_And_Or;
4693 case Iop_OrV256:
4694 uifu = mkUifUV256; difd = mkDifDV256;
4695 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
4696 case Iop_OrV128:
4697 uifu = mkUifUV128; difd = mkDifDV128;
4698 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
4699 case Iop_Or64:
4700 uifu = mkUifU64; difd = mkDifD64;
4701 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
4702 case Iop_Or32:
4703 uifu = mkUifU32; difd = mkDifD32;
4704 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
4705 case Iop_Or16:
4706 uifu = mkUifU16; difd = mkDifD16;
4707 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
4708 case Iop_Or8:
4709 uifu = mkUifU8; difd = mkDifD8;
4710 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
4711 case Iop_Or1:
4712 uifu = mkUifU1; difd = mkDifD1;
4713 and_or_ty = Ity_I1; improve = mkImproveOR1; goto do_And_Or;
4715 do_And_Or:
4716 return
4717 assignNew(
4718 'V', mce,
4719 and_or_ty,
4720 difd(mce, uifu(mce, vatom1, vatom2),
4721 difd(mce, improve(mce, atom1, vatom1),
4722 improve(mce, atom2, vatom2) ) ) );
4724 case Iop_Xor8:
4725 return mkUifU8(mce, vatom1, vatom2);
4726 case Iop_Xor16:
4727 return mkUifU16(mce, vatom1, vatom2);
4728 case Iop_Xor32:
4729 return mkUifU32(mce, vatom1, vatom2);
4730 case Iop_Xor64:
4731 return mkUifU64(mce, vatom1, vatom2);
4732 case Iop_XorV128:
4733 return mkUifUV128(mce, vatom1, vatom2);
4734 case Iop_XorV256:
4735 return mkUifUV256(mce, vatom1, vatom2);
4737 /* V256-bit SIMD */
4739 case Iop_ShrN16x16:
4740 case Iop_ShrN32x8:
4741 case Iop_ShrN64x4:
4742 case Iop_SarN16x16:
4743 case Iop_SarN32x8:
4744 case Iop_ShlN16x16:
4745 case Iop_ShlN32x8:
4746 case Iop_ShlN64x4:
4747 /* Same scheme as with all other shifts. Note: 22 Oct 05:
4748 this is wrong now, scalar shifts are done properly lazily.
4749 Vector shifts should be fixed too. */
4750 complainIfUndefined(mce, atom2, NULL);
4751 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2));
4753 case Iop_QSub8Ux32:
4754 case Iop_QSub8Sx32:
4755 case Iop_Sub8x32:
4756 case Iop_Min8Ux32:
4757 case Iop_Min8Sx32:
4758 case Iop_Max8Ux32:
4759 case Iop_Max8Sx32:
4760 case Iop_CmpGT8Sx32:
4761 case Iop_CmpEQ8x32:
4762 case Iop_Avg8Ux32:
4763 case Iop_QAdd8Ux32:
4764 case Iop_QAdd8Sx32:
4765 case Iop_Add8x32:
4766 return binary8Ix32(mce, vatom1, vatom2);
4768 case Iop_QSub16Ux16:
4769 case Iop_QSub16Sx16:
4770 case Iop_Sub16x16:
4771 case Iop_Mul16x16:
4772 case Iop_MulHi16Sx16:
4773 case Iop_MulHi16Ux16:
4774 case Iop_Min16Sx16:
4775 case Iop_Min16Ux16:
4776 case Iop_Max16Sx16:
4777 case Iop_Max16Ux16:
4778 case Iop_CmpGT16Sx16:
4779 case Iop_CmpEQ16x16:
4780 case Iop_Avg16Ux16:
4781 case Iop_QAdd16Ux16:
4782 case Iop_QAdd16Sx16:
4783 case Iop_Add16x16:
4784 return binary16Ix16(mce, vatom1, vatom2);
4786 case Iop_Sub32x8:
4787 case Iop_CmpGT32Sx8:
4788 case Iop_CmpEQ32x8:
4789 case Iop_Add32x8:
4790 case Iop_Max32Ux8:
4791 case Iop_Max32Sx8:
4792 case Iop_Min32Ux8:
4793 case Iop_Min32Sx8:
4794 case Iop_Mul32x8:
4795 return binary32Ix8(mce, vatom1, vatom2);
4797 case Iop_Sub64x4:
4798 case Iop_Add64x4:
4799 case Iop_CmpEQ64x4:
4800 case Iop_CmpGT64Sx4:
4801 return binary64Ix4(mce, vatom1, vatom2);
4803 case Iop_I32StoF32x8:
4804 case Iop_F32toI32Sx8:
4805 return unary32Fx8_w_rm(mce, vatom1, vatom2);
4807 /* Perm32x8: rearrange values in left arg using steering values
4808 from right arg. So rearrange the vbits in the same way but
4809 pessimise wrt steering values. */
4810 case Iop_Perm32x8:
4811 return mkUifUV256(
4812 mce,
4813 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)),
4814 mkPCast32x8(mce, vatom2)
4817 /* Q-and-Qshift-by-vector of the form (V128, V128) -> V256.
4818 Handle the shifted results in the same way that other
4819 binary Q ops are handled, eg QSub: UifU the two args,
4820 then pessimise -- which is binaryNIxM. But for the upper
4821 V128, we require to generate just 1 bit which is the
4822 pessimised shift result, with 127 defined zeroes above it.
4824 Note that this overly pessimistic in that in fact only the
4825 bottom 8 bits of each lane of the second arg determine the shift
4826 amount. Really we ought to ignore any undefinedness in the
4827 rest of the lanes of the second arg. */
4828 case Iop_QandSQsh64x2: case Iop_QandUQsh64x2:
4829 case Iop_QandSQRsh64x2: case Iop_QandUQRsh64x2:
4830 case Iop_QandSQsh32x4: case Iop_QandUQsh32x4:
4831 case Iop_QandSQRsh32x4: case Iop_QandUQRsh32x4:
4832 case Iop_QandSQsh16x8: case Iop_QandUQsh16x8:
4833 case Iop_QandSQRsh16x8: case Iop_QandUQRsh16x8:
4834 case Iop_QandSQsh8x16: case Iop_QandUQsh8x16:
4835 case Iop_QandSQRsh8x16: case Iop_QandUQRsh8x16:
4837 // The function to generate the pessimised shift result
4838 IRAtom* (*binaryNIxM)(MCEnv*,IRAtom*,IRAtom*) = NULL;
4839 switch (op) {
4840 case Iop_QandSQsh64x2:
4841 case Iop_QandUQsh64x2:
4842 case Iop_QandSQRsh64x2:
4843 case Iop_QandUQRsh64x2:
4844 binaryNIxM = binary64Ix2;
4845 break;
4846 case Iop_QandSQsh32x4:
4847 case Iop_QandUQsh32x4:
4848 case Iop_QandSQRsh32x4:
4849 case Iop_QandUQRsh32x4:
4850 binaryNIxM = binary32Ix4;
4851 break;
4852 case Iop_QandSQsh16x8:
4853 case Iop_QandUQsh16x8:
4854 case Iop_QandSQRsh16x8:
4855 case Iop_QandUQRsh16x8:
4856 binaryNIxM = binary16Ix8;
4857 break;
4858 case Iop_QandSQsh8x16:
4859 case Iop_QandUQsh8x16:
4860 case Iop_QandSQRsh8x16:
4861 case Iop_QandUQRsh8x16:
4862 binaryNIxM = binary8Ix16;
4863 break;
4864 default:
4865 tl_assert(0);
4867 tl_assert(binaryNIxM);
4868 // Pessimised shift result, shV[127:0]
4869 IRAtom* shV = binaryNIxM(mce, vatom1, vatom2);
4870 // Generates: Def--(127)--Def PCast-to-I1(shV)
4871 IRAtom* qV = mkPCastXXtoXXlsb(mce, shV, Ity_V128);
4872 // and assemble the result
4873 return assignNew('V', mce, Ity_V256,
4874 binop(Iop_V128HLtoV256, qV, shV));
4877 case Iop_F32toF16x4: {
4878 // First, PCast the input vector, retaining the 32x4 format.
4879 IRAtom* pcasted = mkPCast32x4(mce, vatom2); // :: 32x4
4880 // Now truncate each 32 bit lane to 16 bits. Since we already PCasted
4881 // the input, we're not going to lose any information.
4882 IRAtom* pcHI64
4883 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, pcasted));//32x2
4884 IRAtom* pcLO64
4885 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, pcasted)); // 32x2
4886 IRAtom* narrowed
4887 = assignNew('V', mce, Ity_I64, binop(Iop_NarrowBin32to16x4,
4888 pcHI64, pcLO64)); // 16x4
4889 // Finally, roll in any badness from the rounding mode.
4890 IRAtom* rmPCasted = mkPCastTo(mce, Ity_I64, vatom1);
4891 return mkUifU64(mce, narrowed, rmPCasted);
4894 case Iop_F32toF16x8: {
4895 // Same scheme as for Iop_F32toF16x4.
4896 IRAtom* pcasted = mkPCast32x8(mce, vatom2); // :: 32x8
4897 IRAtom* pcHI128
4898 = assignNew('V', mce, Ity_V128, unop(Iop_V256toV128_1,
4899 pcasted)); // 32x4
4900 IRAtom* pcLO128
4901 = assignNew('V', mce, Ity_V128, unop(Iop_V256toV128_0,
4902 pcasted)); // 32x4
4903 IRAtom* narrowed
4904 = assignNew('V', mce, Ity_V128, binop(Iop_NarrowBin32to16x8,
4905 pcHI128, pcLO128)); // 16x8
4906 // Finally, roll in any badness from the rounding mode.
4907 IRAtom* rmPCasted = mkPCastTo(mce, Ity_V128, vatom1);
4908 return mkUifUV128(mce, narrowed, rmPCasted);
4911 default:
4912 ppIROp(op);
4913 VG_(tool_panic)("memcheck:expr2vbits_Binop");
4918 static
4919 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
4921 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
4922 selection of shadow operation implicitly duplicates the logic in
4923 do_shadow_LoadG and should be kept in sync (in the very unlikely
4924 event that the interpretation of such widening ops changes in
4925 future). See comment in do_shadow_LoadG. */
4926 IRAtom* vatom = expr2vbits( mce, atom, HuOth );
4927 tl_assert(isOriginalAtom(mce,atom));
4928 switch (op) {
4930 case Iop_Abs64Fx2:
4931 case Iop_Neg64Fx2:
4932 case Iop_RSqrtEst64Fx2:
4933 case Iop_RecipEst64Fx2:
4934 case Iop_Log2_64Fx2:
4935 return unary64Fx2(mce, vatom);
4937 case Iop_Sqrt64F0x2:
4938 return unary64F0x2(mce, vatom);
4940 case Iop_Sqrt32Fx8:
4941 case Iop_RSqrtEst32Fx8:
4942 case Iop_RecipEst32Fx8:
4943 return unary32Fx8(mce, vatom);
4945 case Iop_Sqrt64Fx4:
4946 return unary64Fx4(mce, vatom);
4948 case Iop_RecipEst32Fx4:
4949 case Iop_I32UtoF32x4_DEP:
4950 case Iop_I32StoF32x4_DEP:
4951 case Iop_QF32toI32Ux4_RZ:
4952 case Iop_QF32toI32Sx4_RZ:
4953 case Iop_RoundF32x4_RM:
4954 case Iop_RoundF32x4_RP:
4955 case Iop_RoundF32x4_RN:
4956 case Iop_RoundF32x4_RZ:
4957 case Iop_RecipEst32Ux4:
4958 case Iop_Abs32Fx4:
4959 case Iop_Neg32Fx4:
4960 case Iop_RSqrtEst32Fx4:
4961 case Iop_Log2_32Fx4:
4962 case Iop_Exp2_32Fx4:
4963 return unary32Fx4(mce, vatom);
4965 case Iop_I32UtoF32x2_DEP:
4966 case Iop_I32StoF32x2_DEP:
4967 case Iop_RecipEst32Fx2:
4968 case Iop_RecipEst32Ux2:
4969 case Iop_Abs32Fx2:
4970 case Iop_Neg32Fx2:
4971 case Iop_RSqrtEst32Fx2:
4972 return unary32Fx2(mce, vatom);
4974 case Iop_Sqrt32F0x4:
4975 case Iop_RSqrtEst32F0x4:
4976 case Iop_RecipEst32F0x4:
4977 return unary32F0x4(mce, vatom);
4979 // These are self-shadowing.
4980 case Iop_32UtoV128:
4981 case Iop_64UtoV128:
4982 case Iop_Dup8x16:
4983 case Iop_Dup16x8:
4984 case Iop_Dup32x4:
4985 case Iop_Reverse1sIn8_x16:
4986 case Iop_Reverse8sIn16_x8:
4987 case Iop_Reverse8sIn32_x4:
4988 case Iop_Reverse16sIn32_x4:
4989 case Iop_Reverse8sIn64_x2:
4990 case Iop_Reverse16sIn64_x2:
4991 case Iop_Reverse32sIn64_x2:
4992 case Iop_V256toV128_1: case Iop_V256toV128_0:
4993 case Iop_ZeroHI64ofV128:
4994 case Iop_ZeroHI96ofV128:
4995 case Iop_ZeroHI112ofV128:
4996 case Iop_ZeroHI120ofV128:
4997 return assignNew('V', mce, Ity_V128, unop(op, vatom));
4999 case Iop_F128HItoF64: /* F128 -> high half of F128 */
5000 case Iop_D128HItoD64: /* D128 -> high half of D128 */
5001 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
5002 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
5003 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
5004 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
5006 case Iop_NegF128:
5007 case Iop_AbsF128:
5008 case Iop_RndF128:
5009 case Iop_TruncF128toI64S: /* F128 -> I64S */
5010 case Iop_TruncF128toI32S: /* F128 -> I32S (result stored in 64-bits) */
5011 case Iop_TruncF128toI64U: /* F128 -> I64U */
5012 case Iop_TruncF128toI32U: /* F128 -> I32U (result stored in 64-bits) */
5013 return mkPCastTo(mce, Ity_I128, vatom);
5015 case Iop_BCD128toI128S:
5016 case Iop_MulI128by10:
5017 case Iop_MulI128by10Carry:
5018 case Iop_F16toF64x2:
5019 case Iop_F64toF16x2_DEP:
5020 // FIXME JRS 2018-Nov-15. This is surely not correct!
5021 return vatom;
5023 case Iop_I32StoF128: /* signed I32 -> F128 */
5024 case Iop_I64StoF128: /* signed I64 -> F128 */
5025 case Iop_I32UtoF128: /* unsigned I32 -> F128 */
5026 case Iop_I64UtoF128: /* unsigned I64 -> F128 */
5027 case Iop_F32toF128: /* F32 -> F128 */
5028 case Iop_F64toF128: /* F64 -> F128 */
5029 case Iop_I32StoD128: /* signed I64 -> D128 */
5030 case Iop_I64StoD128: /* signed I64 -> D128 */
5031 case Iop_I32UtoD128: /* unsigned I32 -> D128 */
5032 case Iop_I64UtoD128: /* unsigned I64 -> D128 */
5033 return mkPCastTo(mce, Ity_I128, vatom);
5035 case Iop_F16toF64:
5036 case Iop_F32toF64:
5037 case Iop_I32StoF64:
5038 case Iop_I32UtoF64:
5039 case Iop_NegF64:
5040 case Iop_AbsF64:
5041 case Iop_RSqrtEst5GoodF64:
5042 case Iop_RoundF64toF64_NEAREST:
5043 case Iop_RoundF64toF64_NegINF:
5044 case Iop_RoundF64toF64_PosINF:
5045 case Iop_RoundF64toF64_ZERO:
5046 case Iop_D32toD64:
5047 case Iop_I32StoD64:
5048 case Iop_I32UtoD64:
5049 case Iop_ExtractExpD64: /* D64 -> I64 */
5050 case Iop_ExtractExpD128: /* D128 -> I64 */
5051 case Iop_ExtractSigD64: /* D64 -> I64 */
5052 case Iop_ExtractSigD128: /* D128 -> I64 */
5053 case Iop_DPBtoBCD:
5054 case Iop_BCDtoDPB:
5055 return mkPCastTo(mce, Ity_I64, vatom);
5057 case Iop_D64toD128:
5058 return mkPCastTo(mce, Ity_I128, vatom);
5060 case Iop_TruncF64asF32:
5061 case Iop_NegF32:
5062 case Iop_AbsF32:
5063 case Iop_F16toF32:
5064 return mkPCastTo(mce, Ity_I32, vatom);
5066 case Iop_Ctz32: case Iop_CtzNat32:
5067 case Iop_Ctz64: case Iop_CtzNat64:
5068 return expensiveCountTrailingZeroes(mce, op, atom, vatom);
5070 case Iop_Clz32: case Iop_ClzNat32:
5071 case Iop_Clz64: case Iop_ClzNat64:
5072 return expensiveCountLeadingZeroes(mce, op, atom, vatom);
5074 // PopCount32: this is slightly pessimistic. It is true that the
5075 // result depends on all input bits, so that aspect of the PCast is
5076 // correct. However, regardless of the input, only the lowest 5 bits
5077 // out of the output can ever be undefined. So we could actually
5078 // "improve" the results here by marking the top 27 bits of output as
5079 // defined. A similar comment applies for PopCount64.
5080 case Iop_PopCount32:
5081 return mkPCastTo(mce, Ity_I32, vatom);
5082 case Iop_PopCount64:
5083 return mkPCastTo(mce, Ity_I64, vatom);
5085 // These are self-shadowing.
5086 case Iop_1Uto64:
5087 case Iop_1Sto64:
5088 case Iop_8Uto64:
5089 case Iop_8Sto64:
5090 case Iop_16Uto64:
5091 case Iop_16Sto64:
5092 case Iop_32Sto64:
5093 case Iop_32Uto64:
5094 case Iop_V128to64:
5095 case Iop_V128HIto64:
5096 case Iop_128HIto64:
5097 case Iop_128to64:
5098 case Iop_Dup8x8:
5099 case Iop_Dup16x4:
5100 case Iop_Dup32x2:
5101 case Iop_Reverse8sIn16_x4:
5102 case Iop_Reverse8sIn32_x2:
5103 case Iop_Reverse16sIn32_x2:
5104 case Iop_Reverse8sIn64_x1:
5105 case Iop_Reverse16sIn64_x1:
5106 case Iop_Reverse32sIn64_x1:
5107 case Iop_V256to64_0: case Iop_V256to64_1:
5108 case Iop_V256to64_2: case Iop_V256to64_3:
5109 return assignNew('V', mce, Ity_I64, unop(op, vatom));
5111 // These are self-shadowing.
5112 case Iop_64to32:
5113 case Iop_64HIto32:
5114 case Iop_1Uto32:
5115 case Iop_1Sto32:
5116 case Iop_8Uto32:
5117 case Iop_16Uto32:
5118 case Iop_16Sto32:
5119 case Iop_8Sto32:
5120 case Iop_V128to32:
5121 case Iop_Reverse8sIn32_x1:
5122 return assignNew('V', mce, Ity_I32, unop(op, vatom));
5124 // These are self-shadowing.
5125 case Iop_8Sto16:
5126 case Iop_8Uto16:
5127 case Iop_32to16:
5128 case Iop_32HIto16:
5129 case Iop_64to16:
5130 case Iop_GetMSBs8x16:
5131 return assignNew('V', mce, Ity_I16, unop(op, vatom));
5133 // These are self-shadowing.
5134 case Iop_1Uto8:
5135 case Iop_1Sto8:
5136 case Iop_16to8:
5137 case Iop_16HIto8:
5138 case Iop_32to8:
5139 case Iop_64to8:
5140 case Iop_GetMSBs8x8:
5141 return assignNew('V', mce, Ity_I8, unop(op, vatom));
5143 case Iop_32to1:
5144 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
5146 case Iop_64to1:
5147 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
5149 case Iop_ReinterpF64asI64:
5150 case Iop_ReinterpI64asF64:
5151 case Iop_ReinterpI32asF32:
5152 case Iop_ReinterpF32asI32:
5153 case Iop_ReinterpI64asD64:
5154 case Iop_ReinterpD64asI64:
5155 case Iop_NotV256:
5156 case Iop_NotV128:
5157 case Iop_Not64:
5158 case Iop_Not32:
5159 case Iop_Not16:
5160 case Iop_Not8:
5161 case Iop_Not1:
5162 // FIXME JRS 2018-Nov-15. This is surely not correct!
5163 return vatom;
5165 case Iop_CmpNEZ8x8:
5166 case Iop_Cnt8x8:
5167 case Iop_Clz8x8:
5168 case Iop_Cls8x8:
5169 case Iop_Abs8x8:
5170 return mkPCast8x8(mce, vatom);
5172 case Iop_CmpNEZ8x16:
5173 case Iop_Cnt8x16:
5174 case Iop_Clz8x16:
5175 case Iop_Cls8x16:
5176 case Iop_Abs8x16:
5177 case Iop_Ctz8x16:
5178 return mkPCast8x16(mce, vatom);
5180 case Iop_CmpNEZ16x4:
5181 case Iop_Clz16x4:
5182 case Iop_Cls16x4:
5183 case Iop_Abs16x4:
5184 return mkPCast16x4(mce, vatom);
5186 case Iop_CmpNEZ16x8:
5187 case Iop_Clz16x8:
5188 case Iop_Cls16x8:
5189 case Iop_Abs16x8:
5190 case Iop_Ctz16x8:
5191 return mkPCast16x8(mce, vatom);
5193 case Iop_CmpNEZ32x2:
5194 case Iop_Clz32x2:
5195 case Iop_Cls32x2:
5196 case Iop_F32toI32Ux2_RZ:
5197 case Iop_F32toI32Sx2_RZ:
5198 case Iop_Abs32x2:
5199 return mkPCast32x2(mce, vatom);
5201 case Iop_CmpNEZ32x4:
5202 case Iop_Clz32x4:
5203 case Iop_Cls32x4:
5204 case Iop_F32toI32Ux4_RZ:
5205 case Iop_F32toI32Sx4_RZ:
5206 case Iop_Abs32x4:
5207 case Iop_RSqrtEst32Ux4:
5208 case Iop_Ctz32x4:
5209 return mkPCast32x4(mce, vatom);
5211 case Iop_CmpwNEZ32:
5212 return mkPCastTo(mce, Ity_I32, vatom);
5214 case Iop_CmpwNEZ64:
5215 return mkPCastTo(mce, Ity_I64, vatom);
5217 case Iop_CmpNEZ64x2:
5218 case Iop_CipherSV128:
5219 case Iop_Clz64x2:
5220 case Iop_Abs64x2:
5221 case Iop_Ctz64x2:
5222 return mkPCast64x2(mce, vatom);
5224 // This is self-shadowing.
5225 case Iop_PwBitMtxXpose64x2:
5226 return assignNew('V', mce, Ity_V128, unop(op, vatom));
5228 case Iop_NarrowUn16to8x8:
5229 case Iop_NarrowUn32to16x4:
5230 case Iop_NarrowUn64to32x2:
5231 case Iop_QNarrowUn16Sto8Sx8:
5232 case Iop_QNarrowUn16Sto8Ux8:
5233 case Iop_QNarrowUn16Uto8Ux8:
5234 case Iop_QNarrowUn32Sto16Sx4:
5235 case Iop_QNarrowUn32Sto16Ux4:
5236 case Iop_QNarrowUn32Uto16Ux4:
5237 case Iop_QNarrowUn64Sto32Sx2:
5238 case Iop_QNarrowUn64Sto32Ux2:
5239 case Iop_QNarrowUn64Uto32Ux2:
5240 return vectorNarrowUnV128(mce, op, vatom);
5242 // JRS FIXME 2019 Mar 17: per comments on F16toF32x4, this is probably not
5243 // right.
5244 case Iop_F32toF16x4_DEP:
5245 return vectorNarrowUnV128(mce, op, vatom);
5247 case Iop_Widen8Sto16x8:
5248 case Iop_Widen8Uto16x8:
5249 case Iop_Widen16Sto32x4:
5250 case Iop_Widen16Uto32x4:
5251 case Iop_Widen32Sto64x2:
5252 case Iop_Widen32Uto64x2:
5253 return vectorWidenI64(mce, op, vatom);
5255 case Iop_F16toF32x4:
5256 // JRS 2019 Mar 17: this definitely isn't right, but it probably works
5257 // OK by accident if -- as seems likely -- the F16 to F32 conversion
5258 // preserves will generate an output 32 bits with at least one 1 bit
5259 // set if there's one or more 1 bits set in the input 16 bits. More
5260 // correct code for this is just below, but commented out, so as to
5261 // avoid short-term backend failures on targets that can't do
5262 // Iop_Interleave{LO,HI}16x4.
5263 return vectorWidenI64(mce, op, vatom);
5265 case Iop_F16toF32x8: {
5266 // PCast the input at 16x8. This makes each lane hold either all
5267 // zeroes or all ones.
5268 IRAtom* pcasted = mkPCast16x8(mce, vatom); // :: I16x8
5269 // Now double the width of each lane to 32 bits. Because the lanes are
5270 // all zeroes or all ones, we can just copy the each lane twice into
5271 // the result. Here's the low half:
5272 IRAtom* widenedLO // :: I32x4
5273 = assignNew('V', mce, Ity_V128, binop(Iop_InterleaveLO16x8,
5274 pcasted, pcasted));
5275 // And the high half:
5276 IRAtom* widenedHI // :: I32x4
5277 = assignNew('V', mce, Ity_V128, binop(Iop_InterleaveHI16x8,
5278 pcasted, pcasted));
5279 // Glue them back together:
5280 return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
5281 widenedHI, widenedLO));
5284 // See comment just above, for Iop_F16toF32x4
5285 //case Iop_F16toF32x4: {
5286 // // Same scheme as F16toF32x4
5287 // IRAtom* pcasted = mkPCast16x4(mce, vatom); // :: I16x4
5288 // IRAtom* widenedLO // :: I32x2
5289 // = assignNew('V', mce, Ity_I64, binop(Iop_InterleaveLO16x4,
5290 // pcasted, pcasted));
5291 // IRAtom* widenedHI // :: I32x4
5292 // = assignNew('V', mce, Ity_I64, binop(Iop_InterleaveHI16x4,
5293 // pcasted, pcasted));
5294 // // Glue them back together:
5295 // return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
5296 // widenedHI, widenedLO));
5299 case Iop_PwAddL32Ux2:
5300 case Iop_PwAddL32Sx2:
5301 return mkPCastTo(mce, Ity_I64,
5302 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
5304 case Iop_PwAddL16Ux4:
5305 case Iop_PwAddL16Sx4:
5306 return mkPCast32x2(mce,
5307 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
5309 case Iop_PwAddL8Ux8:
5310 case Iop_PwAddL8Sx8:
5311 return mkPCast16x4(mce,
5312 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
5314 case Iop_PwAddL32Ux4:
5315 case Iop_PwAddL32Sx4:
5316 return mkPCast64x2(mce,
5317 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
5319 case Iop_PwAddL64Ux2:
5320 return mkPCast128x1(mce,
5321 assignNew('V', mce, Ity_V128, unop(op, mkPCast64x2(mce, vatom))));
5323 case Iop_PwAddL16Ux8:
5324 case Iop_PwAddL16Sx8:
5325 return mkPCast32x4(mce,
5326 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
5328 case Iop_PwAddL8Ux16:
5329 case Iop_PwAddL8Sx16:
5330 return mkPCast16x8(mce,
5331 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
5333 case Iop_I64UtoF32:
5334 default:
5335 ppIROp(op);
5336 VG_(tool_panic)("memcheck:expr2vbits_Unop");
5341 /* Worker function -- do not call directly. See comments on
5342 expr2vbits_Load for the meaning of |guard|.
5344 Generates IR to (1) perform a definedness test of |addr|, (2)
5345 perform a validity test of |addr|, and (3) return the Vbits for the
5346 location indicated by |addr|. All of this only happens when
5347 |guard| is NULL or |guard| evaluates to True at run time.
5349 If |guard| evaluates to False at run time, the returned value is
5350 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
5351 performed.
5353 The definedness of |guard| itself is not checked. That is assumed
5354 to have been done before this point, by the caller. */
5355 static
5356 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
5357 IREndness end, IRType ty,
5358 IRAtom* addr, UInt bias, IRAtom* guard )
5360 tl_assert(isOriginalAtom(mce,addr));
5361 tl_assert(end == Iend_LE || end == Iend_BE);
5363 /* First, emit a definedness test for the address. This also sets
5364 the address (shadow) to 'defined' following the test. */
5365 complainIfUndefined( mce, addr, guard );
5367 /* Now cook up a call to the relevant helper function, to read the
5368 data V bits from shadow memory. */
5369 ty = shadowTypeV(ty);
5371 void* helper = NULL;
5372 const HChar* hname = NULL;
5373 Bool ret_via_outparam = False;
5375 if (end == Iend_LE) {
5376 switch (ty) {
5377 case Ity_V256: helper = &MC_(helperc_LOADV256le);
5378 hname = "MC_(helperc_LOADV256le)";
5379 ret_via_outparam = True;
5380 break;
5381 case Ity_V128: helper = &MC_(helperc_LOADV128le);
5382 hname = "MC_(helperc_LOADV128le)";
5383 ret_via_outparam = True;
5384 break;
5385 case Ity_I64: helper = &MC_(helperc_LOADV64le);
5386 hname = "MC_(helperc_LOADV64le)";
5387 break;
5388 case Ity_I32: helper = &MC_(helperc_LOADV32le);
5389 hname = "MC_(helperc_LOADV32le)";
5390 break;
5391 case Ity_I16: helper = &MC_(helperc_LOADV16le);
5392 hname = "MC_(helperc_LOADV16le)";
5393 break;
5394 case Ity_I8: helper = &MC_(helperc_LOADV8);
5395 hname = "MC_(helperc_LOADV8)";
5396 break;
5397 default: ppIRType(ty);
5398 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)");
5400 } else {
5401 switch (ty) {
5402 case Ity_V256: helper = &MC_(helperc_LOADV256be);
5403 hname = "MC_(helperc_LOADV256be)";
5404 ret_via_outparam = True;
5405 break;
5406 case Ity_V128: helper = &MC_(helperc_LOADV128be);
5407 hname = "MC_(helperc_LOADV128be)";
5408 ret_via_outparam = True;
5409 break;
5410 case Ity_I64: helper = &MC_(helperc_LOADV64be);
5411 hname = "MC_(helperc_LOADV64be)";
5412 break;
5413 case Ity_I32: helper = &MC_(helperc_LOADV32be);
5414 hname = "MC_(helperc_LOADV32be)";
5415 break;
5416 case Ity_I16: helper = &MC_(helperc_LOADV16be);
5417 hname = "MC_(helperc_LOADV16be)";
5418 break;
5419 case Ity_I8: helper = &MC_(helperc_LOADV8);
5420 hname = "MC_(helperc_LOADV8)";
5421 break;
5422 default: ppIRType(ty);
5423 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)");
5427 tl_assert(helper);
5428 tl_assert(hname);
5430 /* Generate the actual address into addrAct. */
5431 IRAtom* addrAct;
5432 if (bias == 0) {
5433 addrAct = addr;
5434 } else {
5435 IROp mkAdd;
5436 IRAtom* eBias;
5437 IRType tyAddr = mce->hWordTy;
5438 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
5439 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
5440 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
5441 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
5444 /* We need to have a place to park the V bits we're just about to
5445 read. */
5446 IRTemp datavbits = newTemp(mce, ty, VSh);
5448 /* Here's the call. */
5449 IRDirty* di;
5450 if (ret_via_outparam) {
5451 di = unsafeIRDirty_1_N( datavbits,
5452 2/*regparms*/,
5453 hname, VG_(fnptr_to_fnentry)( helper ),
5454 mkIRExprVec_2( IRExpr_VECRET(), addrAct ) );
5455 } else {
5456 di = unsafeIRDirty_1_N( datavbits,
5457 1/*regparms*/,
5458 hname, VG_(fnptr_to_fnentry)( helper ),
5459 mkIRExprVec_1( addrAct ) );
5462 setHelperAnns( mce, di );
5463 if (guard) {
5464 di->guard = guard;
5465 /* Ideally the didn't-happen return value here would be all-ones
5466 (all-undefined), so it'd be obvious if it got used
5467 inadvertently. We can get by with the IR-mandated default
5468 value (0b01 repeating, 0x55 etc) as that'll still look pretty
5469 undefined if it ever leaks out. */
5471 stmt( 'V', mce, IRStmt_Dirty(di) );
5473 return mkexpr(datavbits);
5477 /* Generate IR to do a shadow load. The helper is expected to check
5478 the validity of the address and return the V bits for that address.
5479 This can optionally be controlled by a guard, which is assumed to
5480 be True if NULL. In the case where the guard is False at runtime,
5481 the helper will return the didn't-do-the-call value of 0x55..55.
5482 Since that means "completely undefined result", the caller of
5483 this function will need to fix up the result somehow in that
5484 case.
5486 Caller of this function is also expected to have checked the
5487 definedness of |guard| before this point.
5489 static
5490 IRAtom* expr2vbits_Load ( MCEnv* mce,
5491 IREndness end, IRType ty,
5492 IRAtom* addr, UInt bias,
5493 IRAtom* guard )
5495 tl_assert(end == Iend_LE || end == Iend_BE);
5496 switch (shadowTypeV(ty)) {
5497 case Ity_I8:
5498 case Ity_I16:
5499 case Ity_I32:
5500 case Ity_I64:
5501 case Ity_V128:
5502 case Ity_V256:
5503 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
5504 default:
5505 VG_(tool_panic)("expr2vbits_Load");
5510 /* The most general handler for guarded loads. Assumes the
5511 definedness of GUARD has already been checked by the caller. A
5512 GUARD of NULL is assumed to mean "always True". Generates code to
5513 check the definedness and validity of ADDR.
5515 Generate IR to do a shadow load from ADDR and return the V bits.
5516 The loaded type is TY. The loaded data is then (shadow) widened by
5517 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
5518 evaluates to False at run time then the returned Vbits are simply
5519 VALT instead. Note therefore that the argument type of VWIDEN must
5520 be TY and the result type of VWIDEN must equal the type of VALT.
5522 static
5523 IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce,
5524 IREndness end, IRType ty,
5525 IRAtom* addr, UInt bias,
5526 IRAtom* guard,
5527 IROp vwiden, IRAtom* valt )
5529 /* Sanity check the conversion operation, and also set TYWIDE. */
5530 IRType tyWide = Ity_INVALID;
5531 switch (vwiden) {
5532 case Iop_INVALID:
5533 tyWide = ty;
5534 break;
5535 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32:
5536 tyWide = Ity_I32;
5537 break;
5538 default:
5539 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General");
5542 /* If the guard evaluates to True, this will hold the loaded V bits
5543 at TY. If the guard evaluates to False, this will be all
5544 ones, meaning "all undefined", in which case we will have to
5545 replace it using an ITE below. */
5546 IRAtom* iftrue1
5547 = assignNew('V', mce, ty,
5548 expr2vbits_Load(mce, end, ty, addr, bias, guard));
5549 /* Now (shadow-) widen the loaded V bits to the desired width. In
5550 the guard-is-False case, the allowable widening operators will
5551 in the worst case (unsigned widening) at least leave the
5552 pre-widened part as being marked all-undefined, and in the best
5553 case (signed widening) mark the whole widened result as
5554 undefined. Anyway, it doesn't matter really, since in this case
5555 we will replace said value with the default value |valt| using an
5556 ITE. */
5557 IRAtom* iftrue2
5558 = vwiden == Iop_INVALID
5559 ? iftrue1
5560 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1));
5561 /* These are the V bits we will return if the load doesn't take
5562 place. */
5563 IRAtom* iffalse
5564 = valt;
5565 /* Prepare the cond for the ITE. Convert a NULL cond into
5566 something that iropt knows how to fold out later. */
5567 IRAtom* cond
5568 = guard == NULL ? mkU1(1) : guard;
5569 /* And assemble the final result. */
5570 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse));
5574 /* A simpler handler for guarded loads, in which there is no
5575 conversion operation, and the default V bit return (when the guard
5576 evaluates to False at runtime) is "all defined". If there is no
5577 guard expression or the guard is always TRUE this function behaves
5578 like expr2vbits_Load. It is assumed that definedness of GUARD has
5579 already been checked at the call site. */
5580 static
5581 IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce,
5582 IREndness end, IRType ty,
5583 IRAtom* addr, UInt bias,
5584 IRAtom *guard )
5586 return expr2vbits_Load_guarded_General(
5587 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty)
5592 static
5593 IRAtom* expr2vbits_ITE ( MCEnv* mce,
5594 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
5596 IRAtom *vbitsC, *vbits0, *vbits1;
5597 IRType ty;
5598 /* Given ITE(cond, iftrue, iffalse), generate
5599 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
5600 That is, steer the V bits like the originals, but trash the
5601 result if the steering value is undefined. This gives
5602 lazy propagation. */
5603 tl_assert(isOriginalAtom(mce, cond));
5604 tl_assert(isOriginalAtom(mce, iftrue));
5605 tl_assert(isOriginalAtom(mce, iffalse));
5607 vbitsC = expr2vbits(mce, cond, HuOth); // could we use HuPCa here?
5608 vbits1 = expr2vbits(mce, iftrue, HuOth);
5609 vbits0 = expr2vbits(mce, iffalse, HuOth);
5610 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
5612 return
5613 mkUifU(mce, ty, assignNew('V', mce, ty,
5614 IRExpr_ITE(cond, vbits1, vbits0)),
5615 mkPCastTo(mce, ty, vbitsC) );
5618 /* --------- This is the main expression-handling function. --------- */
5620 static
5621 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e,
5622 HowUsed hu/*use HuOth if unknown*/ )
5624 switch (e->tag) {
5626 case Iex_Get:
5627 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
5629 case Iex_GetI:
5630 return shadow_GETI( mce, e->Iex.GetI.descr,
5631 e->Iex.GetI.ix, e->Iex.GetI.bias );
5633 case Iex_RdTmp:
5634 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
5636 case Iex_Const:
5637 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
5639 case Iex_Qop:
5640 return expr2vbits_Qop(
5641 mce,
5642 e->Iex.Qop.details->op,
5643 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
5644 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
5647 case Iex_Triop:
5648 return expr2vbits_Triop(
5649 mce,
5650 e->Iex.Triop.details->op,
5651 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
5652 e->Iex.Triop.details->arg3
5655 case Iex_Binop:
5656 return expr2vbits_Binop(
5657 mce,
5658 e->Iex.Binop.op,
5659 e->Iex.Binop.arg1, e->Iex.Binop.arg2,
5663 case Iex_Unop:
5664 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
5666 case Iex_Load:
5667 return expr2vbits_Load( mce, e->Iex.Load.end,
5668 e->Iex.Load.ty,
5669 e->Iex.Load.addr, 0/*addr bias*/,
5670 NULL/* guard == "always True"*/ );
5672 case Iex_CCall:
5673 return mkLazyN( mce, e->Iex.CCall.args,
5674 e->Iex.CCall.retty,
5675 e->Iex.CCall.cee );
5677 case Iex_ITE:
5678 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
5679 e->Iex.ITE.iffalse);
5681 default:
5682 VG_(printf)("\n");
5683 ppIRExpr(e);
5684 VG_(printf)("\n");
5685 VG_(tool_panic)("memcheck: expr2vbits");
5690 /*------------------------------------------------------------*/
5691 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
5692 /*------------------------------------------------------------*/
5694 /* Widen a value to the host word size. */
5696 static
5697 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
5699 IRType ty, tyH;
5701 /* vatom is vbits-value and as such can only have a shadow type. */
5702 tl_assert(isShadowAtom(mce,vatom));
5704 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
5705 tyH = mce->hWordTy;
5707 if (tyH == Ity_I32) {
5708 switch (ty) {
5709 case Ity_I32:
5710 return vatom;
5711 case Ity_I16:
5712 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
5713 case Ity_I8:
5714 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
5715 default:
5716 goto unhandled;
5718 } else
5719 if (tyH == Ity_I64) {
5720 switch (ty) {
5721 case Ity_I32:
5722 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
5723 case Ity_I16:
5724 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
5725 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
5726 case Ity_I8:
5727 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
5728 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
5729 default:
5730 goto unhandled;
5732 } else {
5733 goto unhandled;
5735 unhandled:
5736 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
5737 VG_(tool_panic)("zwidenToHostWord");
5741 /* Generate a shadow store. |addr| is always the original address
5742 atom. You can pass in either originals or V-bits for the data
5743 atom, but obviously not both. This function generates a check for
5744 the definedness and (indirectly) the validity of |addr|, but only
5745 when |guard| evaluates to True at run time (or is NULL).
5747 |guard| :: Ity_I1 controls whether the store really happens; NULL
5748 means it unconditionally does. Note that |guard| itself is not
5749 checked for definedness; the caller of this function must do that
5750 if necessary.
5752 static
5753 void do_shadow_Store ( MCEnv* mce,
5754 IREndness end,
5755 IRAtom* addr, UInt bias,
5756 IRAtom* data, IRAtom* vdata,
5757 IRAtom* guard )
5759 IROp mkAdd;
5760 IRType ty, tyAddr;
5761 void* helper = NULL;
5762 const HChar* hname = NULL;
5763 IRConst* c;
5765 tyAddr = mce->hWordTy;
5766 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
5767 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
5768 tl_assert( end == Iend_LE || end == Iend_BE );
5770 if (data) {
5771 tl_assert(!vdata);
5772 tl_assert(isOriginalAtom(mce, data));
5773 tl_assert(bias == 0);
5774 vdata = expr2vbits( mce, data, HuOth );
5775 } else {
5776 tl_assert(vdata);
5779 tl_assert(isOriginalAtom(mce,addr));
5780 tl_assert(isShadowAtom(mce,vdata));
5782 if (guard) {
5783 tl_assert(isOriginalAtom(mce, guard));
5784 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5787 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
5789 // If we're not doing undefined value checking, pretend that this value
5790 // is "all valid". That lets Vex's optimiser remove some of the V bit
5791 // shadow computation ops that precede it.
5792 if (MC_(clo_mc_level) == 1) {
5793 switch (ty) {
5794 case Ity_V256: // V256 weirdness -- used four times
5795 c = IRConst_V256(V_BITS32_DEFINED); break;
5796 case Ity_V128: // V128 weirdness -- used twice
5797 c = IRConst_V128(V_BITS16_DEFINED); break;
5798 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
5799 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
5800 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
5801 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
5802 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5804 vdata = IRExpr_Const( c );
5807 /* First, emit a definedness test for the address. This also sets
5808 the address (shadow) to 'defined' following the test. Both of
5809 those actions are gated on |guard|. */
5810 complainIfUndefined( mce, addr, guard );
5812 /* Now decide which helper function to call to write the data V
5813 bits into shadow memory. */
5814 if (end == Iend_LE) {
5815 switch (ty) {
5816 case Ity_V256: /* we'll use the helper four times */
5817 case Ity_V128: /* we'll use the helper twice */
5818 case Ity_I64: helper = &MC_(helperc_STOREV64le);
5819 hname = "MC_(helperc_STOREV64le)";
5820 break;
5821 case Ity_I32: helper = &MC_(helperc_STOREV32le);
5822 hname = "MC_(helperc_STOREV32le)";
5823 break;
5824 case Ity_I16: helper = &MC_(helperc_STOREV16le);
5825 hname = "MC_(helperc_STOREV16le)";
5826 break;
5827 case Ity_I8: helper = &MC_(helperc_STOREV8);
5828 hname = "MC_(helperc_STOREV8)";
5829 break;
5830 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5832 } else {
5833 switch (ty) {
5834 case Ity_V128: /* we'll use the helper twice */
5835 case Ity_I64: helper = &MC_(helperc_STOREV64be);
5836 hname = "MC_(helperc_STOREV64be)";
5837 break;
5838 case Ity_I32: helper = &MC_(helperc_STOREV32be);
5839 hname = "MC_(helperc_STOREV32be)";
5840 break;
5841 case Ity_I16: helper = &MC_(helperc_STOREV16be);
5842 hname = "MC_(helperc_STOREV16be)";
5843 break;
5844 case Ity_I8: helper = &MC_(helperc_STOREV8);
5845 hname = "MC_(helperc_STOREV8)";
5846 break;
5847 /* Note, no V256 case here, because no big-endian target that
5848 we support, has 256 vectors. */
5849 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
5853 if (UNLIKELY(ty == Ity_V256)) {
5855 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
5856 Q3 being the most significant lane. */
5857 /* These are the offsets of the Qs in memory. */
5858 Int offQ0, offQ1, offQ2, offQ3;
5860 /* Various bits for constructing the 4 lane helper calls */
5861 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
5862 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
5863 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
5864 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
5866 if (end == Iend_LE) {
5867 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
5868 } else {
5869 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
5872 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
5873 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
5874 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
5875 diQ0 = unsafeIRDirty_0_N(
5876 1/*regparms*/,
5877 hname, VG_(fnptr_to_fnentry)( helper ),
5878 mkIRExprVec_2( addrQ0, vdataQ0 )
5881 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
5882 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
5883 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
5884 diQ1 = unsafeIRDirty_0_N(
5885 1/*regparms*/,
5886 hname, VG_(fnptr_to_fnentry)( helper ),
5887 mkIRExprVec_2( addrQ1, vdataQ1 )
5890 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
5891 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
5892 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
5893 diQ2 = unsafeIRDirty_0_N(
5894 1/*regparms*/,
5895 hname, VG_(fnptr_to_fnentry)( helper ),
5896 mkIRExprVec_2( addrQ2, vdataQ2 )
5899 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
5900 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
5901 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
5902 diQ3 = unsafeIRDirty_0_N(
5903 1/*regparms*/,
5904 hname, VG_(fnptr_to_fnentry)( helper ),
5905 mkIRExprVec_2( addrQ3, vdataQ3 )
5908 if (guard)
5909 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
5911 setHelperAnns( mce, diQ0 );
5912 setHelperAnns( mce, diQ1 );
5913 setHelperAnns( mce, diQ2 );
5914 setHelperAnns( mce, diQ3 );
5915 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
5916 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
5917 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
5918 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
5921 else if (UNLIKELY(ty == Ity_V128)) {
5923 /* V128-bit case */
5924 /* See comment in next clause re 64-bit regparms */
5925 /* also, need to be careful about endianness */
5927 Int offLo64, offHi64;
5928 IRDirty *diLo64, *diHi64;
5929 IRAtom *addrLo64, *addrHi64;
5930 IRAtom *vdataLo64, *vdataHi64;
5931 IRAtom *eBiasLo64, *eBiasHi64;
5933 if (end == Iend_LE) {
5934 offLo64 = 0;
5935 offHi64 = 8;
5936 } else {
5937 offLo64 = 8;
5938 offHi64 = 0;
5941 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
5942 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
5943 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
5944 diLo64 = unsafeIRDirty_0_N(
5945 1/*regparms*/,
5946 hname, VG_(fnptr_to_fnentry)( helper ),
5947 mkIRExprVec_2( addrLo64, vdataLo64 )
5949 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
5950 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
5951 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
5952 diHi64 = unsafeIRDirty_0_N(
5953 1/*regparms*/,
5954 hname, VG_(fnptr_to_fnentry)( helper ),
5955 mkIRExprVec_2( addrHi64, vdataHi64 )
5957 if (guard) diLo64->guard = guard;
5958 if (guard) diHi64->guard = guard;
5959 setHelperAnns( mce, diLo64 );
5960 setHelperAnns( mce, diHi64 );
5961 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
5962 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
5964 } else {
5966 IRDirty *di;
5967 IRAtom *addrAct;
5969 /* 8/16/32/64-bit cases */
5970 /* Generate the actual address into addrAct. */
5971 if (bias == 0) {
5972 addrAct = addr;
5973 } else {
5974 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
5975 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
5978 if (ty == Ity_I64) {
5979 /* We can't do this with regparm 2 on 32-bit platforms, since
5980 the back ends aren't clever enough to handle 64-bit
5981 regparm args. Therefore be different. */
5982 di = unsafeIRDirty_0_N(
5983 1/*regparms*/,
5984 hname, VG_(fnptr_to_fnentry)( helper ),
5985 mkIRExprVec_2( addrAct, vdata )
5987 } else {
5988 di = unsafeIRDirty_0_N(
5989 2/*regparms*/,
5990 hname, VG_(fnptr_to_fnentry)( helper ),
5991 mkIRExprVec_2( addrAct,
5992 zwidenToHostWord( mce, vdata ))
5995 if (guard) di->guard = guard;
5996 setHelperAnns( mce, di );
5997 stmt( 'V', mce, IRStmt_Dirty(di) );
6003 /* Do lazy pessimistic propagation through a dirty helper call, by
6004 looking at the annotations on it. This is the most complex part of
6005 Memcheck. */
6007 static IRType szToITy ( Int n )
6009 switch (n) {
6010 case 1: return Ity_I8;
6011 case 2: return Ity_I16;
6012 case 4: return Ity_I32;
6013 case 8: return Ity_I64;
6014 default: VG_(tool_panic)("szToITy(memcheck)");
6018 static
6019 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
6021 Int i, k, n, toDo, gSz, gOff;
6022 IRAtom *src, *here, *curr;
6023 IRType tySrc, tyDst;
6024 IRTemp dst;
6025 IREndness end;
6027 /* What's the native endianness? We need to know this. */
6028 # if defined(VG_BIGENDIAN)
6029 end = Iend_BE;
6030 # elif defined(VG_LITTLEENDIAN)
6031 end = Iend_LE;
6032 # else
6033 # error "Unknown endianness"
6034 # endif
6036 /* First check the guard. */
6037 complainIfUndefined(mce, d->guard, NULL);
6039 /* Now round up all inputs and PCast over them. */
6040 curr = definedOfType(Ity_I32);
6042 /* Inputs: unmasked args
6043 Note: arguments are evaluated REGARDLESS of the guard expression */
6044 for (i = 0; d->args[i]; i++) {
6045 IRAtom* arg = d->args[i];
6046 if ( (d->cee->mcx_mask & (1<<i))
6047 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)) ) {
6048 /* ignore this arg */
6049 } else {
6050 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg, HuOth) );
6051 curr = mkUifU32(mce, here, curr);
6055 /* Inputs: guest state that we read. */
6056 for (i = 0; i < d->nFxState; i++) {
6057 tl_assert(d->fxState[i].fx != Ifx_None);
6058 if (d->fxState[i].fx == Ifx_Write)
6059 continue;
6061 /* Enumerate the described state segments */
6062 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6063 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6064 gSz = d->fxState[i].size;
6066 /* Ignore any sections marked as 'always defined'. */
6067 if (isAlwaysDefd(mce, gOff, gSz)) {
6068 if (0)
6069 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6070 gOff, gSz);
6071 continue;
6074 /* This state element is read or modified. So we need to
6075 consider it. If larger than 8 bytes, deal with it in
6076 8-byte chunks. */
6077 while (True) {
6078 tl_assert(gSz >= 0);
6079 if (gSz == 0) break;
6080 n = gSz <= 8 ? gSz : 8;
6081 /* update 'curr' with UifU of the state slice
6082 gOff .. gOff+n-1 */
6083 tySrc = szToITy( n );
6085 /* Observe the guard expression. If it is false use an
6086 all-bits-defined bit pattern */
6087 IRAtom *cond, *iffalse, *iftrue;
6089 cond = assignNew('V', mce, Ity_I1, d->guard);
6090 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
6091 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
6092 src = assignNew('V', mce, tySrc,
6093 IRExpr_ITE(cond, iftrue, iffalse));
6095 here = mkPCastTo( mce, Ity_I32, src );
6096 curr = mkUifU32(mce, here, curr);
6097 gSz -= n;
6098 gOff += n;
6103 /* Inputs: memory. First set up some info needed regardless of
6104 whether we're doing reads or writes. */
6106 if (d->mFx != Ifx_None) {
6107 /* Because we may do multiple shadow loads/stores from the same
6108 base address, it's best to do a single test of its
6109 definedness right now. Post-instrumentation optimisation
6110 should remove all but this test. */
6111 IRType tyAddr;
6112 tl_assert(d->mAddr);
6113 complainIfUndefined(mce, d->mAddr, d->guard);
6115 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
6116 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
6117 tl_assert(tyAddr == mce->hWordTy); /* not really right */
6120 /* Deal with memory inputs (reads or modifies) */
6121 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
6122 toDo = d->mSize;
6123 /* chew off 32-bit chunks. We don't care about the endianness
6124 since it's all going to be condensed down to a single bit,
6125 but nevertheless choose an endianness which is hopefully
6126 native to the platform. */
6127 while (toDo >= 4) {
6128 here = mkPCastTo(
6129 mce, Ity_I32,
6130 expr2vbits_Load_guarded_Simple(
6131 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard )
6133 curr = mkUifU32(mce, here, curr);
6134 toDo -= 4;
6136 /* chew off 16-bit chunks */
6137 while (toDo >= 2) {
6138 here = mkPCastTo(
6139 mce, Ity_I32,
6140 expr2vbits_Load_guarded_Simple(
6141 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard )
6143 curr = mkUifU32(mce, here, curr);
6144 toDo -= 2;
6146 /* chew off the remaining 8-bit chunk, if any */
6147 if (toDo == 1) {
6148 here = mkPCastTo(
6149 mce, Ity_I32,
6150 expr2vbits_Load_guarded_Simple(
6151 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard )
6153 curr = mkUifU32(mce, here, curr);
6154 toDo -= 1;
6156 tl_assert(toDo == 0);
6159 /* Whew! So curr is a 32-bit V-value summarising pessimistically
6160 all the inputs to the helper. Now we need to re-distribute the
6161 results to all destinations. */
6163 /* Outputs: the destination temporary, if there is one. */
6164 if (d->tmp != IRTemp_INVALID) {
6165 dst = findShadowTmpV(mce, d->tmp);
6166 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
6167 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
6170 /* Outputs: guest state that we write or modify. */
6171 for (i = 0; i < d->nFxState; i++) {
6172 tl_assert(d->fxState[i].fx != Ifx_None);
6173 if (d->fxState[i].fx == Ifx_Read)
6174 continue;
6176 /* Enumerate the described state segments */
6177 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6178 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6179 gSz = d->fxState[i].size;
6181 /* Ignore any sections marked as 'always defined'. */
6182 if (isAlwaysDefd(mce, gOff, gSz))
6183 continue;
6185 /* This state element is written or modified. So we need to
6186 consider it. If larger than 8 bytes, deal with it in
6187 8-byte chunks. */
6188 while (True) {
6189 tl_assert(gSz >= 0);
6190 if (gSz == 0) break;
6191 n = gSz <= 8 ? gSz : 8;
6192 /* Write suitably-casted 'curr' to the state slice
6193 gOff .. gOff+n-1 */
6194 tyDst = szToITy( n );
6195 do_shadow_PUT( mce, gOff,
6196 NULL, /* original atom */
6197 mkPCastTo( mce, tyDst, curr ), d->guard );
6198 gSz -= n;
6199 gOff += n;
6204 /* Outputs: memory that we write or modify. Same comments about
6205 endianness as above apply. */
6206 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
6207 toDo = d->mSize;
6208 /* chew off 32-bit chunks */
6209 while (toDo >= 4) {
6210 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
6211 NULL, /* original data */
6212 mkPCastTo( mce, Ity_I32, curr ),
6213 d->guard );
6214 toDo -= 4;
6216 /* chew off 16-bit chunks */
6217 while (toDo >= 2) {
6218 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
6219 NULL, /* original data */
6220 mkPCastTo( mce, Ity_I16, curr ),
6221 d->guard );
6222 toDo -= 2;
6224 /* chew off the remaining 8-bit chunk, if any */
6225 if (toDo == 1) {
6226 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
6227 NULL, /* original data */
6228 mkPCastTo( mce, Ity_I8, curr ),
6229 d->guard );
6230 toDo -= 1;
6232 tl_assert(toDo == 0);
6238 /* We have an ABI hint telling us that [base .. base+len-1] is to
6239 become undefined ("writable"). Generate code to call a helper to
6240 notify the A/V bit machinery of this fact.
6242 We call
6243 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
6244 Addr nia );
6246 static
6247 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
6249 IRDirty* di;
6251 if (MC_(clo_mc_level) == 3) {
6252 di = unsafeIRDirty_0_N(
6253 3/*regparms*/,
6254 "MC_(helperc_MAKE_STACK_UNINIT_w_o)",
6255 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_w_o) ),
6256 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
6258 } else {
6259 /* We ignore the supplied nia, since it is irrelevant. */
6260 tl_assert(MC_(clo_mc_level) == 2 || MC_(clo_mc_level) == 1);
6261 /* Special-case the len==128 case, since that is for amd64-ELF,
6262 which is a very common target. */
6263 if (len == 128) {
6264 di = unsafeIRDirty_0_N(
6265 1/*regparms*/,
6266 "MC_(helperc_MAKE_STACK_UNINIT_128_no_o)",
6267 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_128_no_o)),
6268 mkIRExprVec_1( base )
6270 } else {
6271 di = unsafeIRDirty_0_N(
6272 2/*regparms*/,
6273 "MC_(helperc_MAKE_STACK_UNINIT_no_o)",
6274 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_no_o) ),
6275 mkIRExprVec_2( base, mkIRExpr_HWord( (UInt)len) )
6280 stmt( 'V', mce, IRStmt_Dirty(di) );
6284 /* ------ Dealing with IRCAS (big and complex) ------ */
6286 /* FWDS */
6287 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
6288 IRAtom* baseaddr, Int offset );
6289 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
6290 static void gen_store_b ( MCEnv* mce, Int szB,
6291 IRAtom* baseaddr, Int offset, IRAtom* dataB,
6292 IRAtom* guard );
6294 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
6295 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
6298 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
6299 IRExpr.Consts, else this asserts. If they are both Consts, it
6300 doesn't do anything. So that just leaves the RdTmp case.
6302 In which case: this assigns the shadow value SHADOW to the IR
6303 shadow temporary associated with ORIG. That is, ORIG, being an
6304 original temporary, will have a shadow temporary associated with
6305 it. However, in the case envisaged here, there will so far have
6306 been no IR emitted to actually write a shadow value into that
6307 temporary. What this routine does is to (emit IR to) copy the
6308 value in SHADOW into said temporary, so that after this call,
6309 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
6310 value in SHADOW.
6312 Point is to allow callers to compute "by hand" a shadow value for
6313 ORIG, and force it to be associated with ORIG.
6315 How do we know that that shadow associated with ORIG has not so far
6316 been assigned to? Well, we don't per se know that, but supposing
6317 it had. Then this routine would create a second assignment to it,
6318 and later the IR sanity checker would barf. But that never
6319 happens. QED.
6321 static void bind_shadow_tmp_to_orig ( UChar how,
6322 MCEnv* mce,
6323 IRAtom* orig, IRAtom* shadow )
6325 tl_assert(isOriginalAtom(mce, orig));
6326 tl_assert(isShadowAtom(mce, shadow));
6327 switch (orig->tag) {
6328 case Iex_Const:
6329 tl_assert(shadow->tag == Iex_Const);
6330 break;
6331 case Iex_RdTmp:
6332 tl_assert(shadow->tag == Iex_RdTmp);
6333 if (how == 'V') {
6334 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
6335 shadow);
6336 } else {
6337 tl_assert(how == 'B');
6338 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
6339 shadow);
6341 break;
6342 default:
6343 tl_assert(0);
6348 static
6349 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
6351 /* Scheme is (both single- and double- cases):
6353 1. fetch data#,dataB (the proposed new value)
6355 2. fetch expd#,expdB (what we expect to see at the address)
6357 3. check definedness of address
6359 4. load old#,oldB from shadow memory; this also checks
6360 addressibility of the address
6362 5. the CAS itself
6364 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
6366 7. if "expected == old" (as computed by (6))
6367 store data#,dataB to shadow memory
6369 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
6370 'data' but 7 stores 'data#'. Hence it is possible for the
6371 shadow data to be incorrectly checked and/or updated:
6373 * 7 is at least gated correctly, since the 'expected == old'
6374 condition is derived from outputs of 5. However, the shadow
6375 write could happen too late: imagine after 5 we are
6376 descheduled, a different thread runs, writes a different
6377 (shadow) value at the address, and then we resume, hence
6378 overwriting the shadow value written by the other thread.
6380 Because the original memory access is atomic, there's no way to
6381 make both the original and shadow accesses into a single atomic
6382 thing, hence this is unavoidable.
6384 At least as Valgrind stands, I don't think it's a problem, since
6385 we're single threaded *and* we guarantee that there are no
6386 context switches during the execution of any specific superblock
6387 -- context switches can only happen at superblock boundaries.
6389 If Valgrind ever becomes MT in the future, then it might be more
6390 of a problem. A possible kludge would be to artificially
6391 associate with the location, a lock, which we must acquire and
6392 release around the transaction as a whole. Hmm, that probably
6393 would't work properly since it only guards us against other
6394 threads doing CASs on the same location, not against other
6395 threads doing normal reads and writes.
6397 ------------------------------------------------------------
6399 COMMENT_ON_CasCmpEQ:
6401 Note two things. Firstly, in the sequence above, we compute
6402 "expected == old", but we don't check definedness of it. Why
6403 not? Also, the x86 and amd64 front ends use
6404 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
6405 determination (expected == old ?) for themselves, and we also
6406 don't check definedness for those primops; we just say that the
6407 result is defined. Why? Details follow.
6409 x86/amd64 contains various forms of locked insns:
6410 * lock prefix before all basic arithmetic insn;
6411 eg lock xorl %reg1,(%reg2)
6412 * atomic exchange reg-mem
6413 * compare-and-swaps
6415 Rather than attempt to represent them all, which would be a
6416 royal PITA, I used a result from Maurice Herlihy
6417 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
6418 demonstrates that compare-and-swap is a primitive more general
6419 than the other two, and so can be used to represent all of them.
6420 So the translation scheme for (eg) lock incl (%reg) is as
6421 follows:
6423 again:
6424 old = * %reg
6425 new = old + 1
6426 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
6428 The "atomically" is the CAS bit. The scheme is always the same:
6429 get old value from memory, compute new value, atomically stuff
6430 new value back in memory iff the old value has not changed (iow,
6431 no other thread modified it in the meantime). If it has changed
6432 then we've been out-raced and we have to start over.
6434 Now that's all very neat, but it has the bad side effect of
6435 introducing an explicit equality test into the translation.
6436 Consider the behaviour of said code on a memory location which
6437 is uninitialised. We will wind up doing a comparison on
6438 uninitialised data, and mc duly complains.
6440 What's difficult about this is, the common case is that the
6441 location is uncontended, and so we're usually comparing the same
6442 value (* %reg) with itself. So we shouldn't complain even if it
6443 is undefined. But mc doesn't know that.
6445 My solution is to mark the == in the IR specially, so as to tell
6446 mc that it almost certainly compares a value with itself, and we
6447 should just regard the result as always defined. Rather than
6448 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
6449 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
6451 So there's always the question of, can this give a false
6452 negative? eg, imagine that initially, * %reg is defined; and we
6453 read that; but then in the gap between the read and the CAS, a
6454 different thread writes an undefined (and different) value at
6455 the location. Then the CAS in this thread will fail and we will
6456 go back to "again:", but without knowing that the trip back
6457 there was based on an undefined comparison. No matter; at least
6458 the other thread won the race and the location is correctly
6459 marked as undefined. What if it wrote an uninitialised version
6460 of the same value that was there originally, though?
6462 etc etc. Seems like there's a small corner case in which we
6463 might lose the fact that something's defined -- we're out-raced
6464 in between the "old = * reg" and the "atomically {", _and_ the
6465 other thread is writing in an undefined version of what's
6466 already there. Well, that seems pretty unlikely.
6470 If we ever need to reinstate it .. code which generates a
6471 definedness test for "expected == old" was removed at r10432 of
6472 this file.
6474 if (cas->oldHi == IRTemp_INVALID) {
6475 do_shadow_CAS_single( mce, cas );
6476 } else {
6477 do_shadow_CAS_double( mce, cas );
6482 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
6484 IRAtom *vdataLo = NULL, *bdataLo = NULL;
6485 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
6486 IRAtom *voldLo = NULL, *boldLo = NULL;
6487 IRAtom *expd_eq_old = NULL;
6488 IROp opCasCmpEQ;
6489 Int elemSzB;
6490 IRType elemTy;
6491 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
6493 /* single CAS */
6494 tl_assert(cas->oldHi == IRTemp_INVALID);
6495 tl_assert(cas->expdHi == NULL);
6496 tl_assert(cas->dataHi == NULL);
6498 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
6499 switch (elemTy) {
6500 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
6501 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
6502 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
6503 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
6504 default: tl_assert(0); /* IR defn disallows any other types */
6507 /* 1. fetch data# (the proposed new value) */
6508 tl_assert(isOriginalAtom(mce, cas->dataLo));
6509 vdataLo
6510 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo, HuOth));
6511 tl_assert(isShadowAtom(mce, vdataLo));
6512 if (otrak) {
6513 bdataLo
6514 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
6515 tl_assert(isShadowAtom(mce, bdataLo));
6518 /* 2. fetch expected# (what we expect to see at the address) */
6519 tl_assert(isOriginalAtom(mce, cas->expdLo));
6520 vexpdLo
6521 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo, HuOth));
6522 tl_assert(isShadowAtom(mce, vexpdLo));
6523 if (otrak) {
6524 bexpdLo
6525 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
6526 tl_assert(isShadowAtom(mce, bexpdLo));
6529 /* 3. check definedness of address */
6530 /* 4. fetch old# from shadow memory; this also checks
6531 addressibility of the address */
6532 voldLo
6533 = assignNew(
6534 'V', mce, elemTy,
6535 expr2vbits_Load(
6536 mce,
6537 cas->end, elemTy, cas->addr, 0/*Addr bias*/,
6538 NULL/*always happens*/
6540 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
6541 if (otrak) {
6542 boldLo
6543 = assignNew('B', mce, Ity_I32,
6544 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
6545 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
6548 /* 5. the CAS itself */
6549 stmt( 'C', mce, IRStmt_CAS(cas) );
6551 /* 6. compute "expected == old" */
6552 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6553 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6554 tree, but it's not copied from the input block. */
6555 expd_eq_old
6556 = assignNew('C', mce, Ity_I1,
6557 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
6559 /* 7. if "expected == old"
6560 store data# to shadow memory */
6561 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
6562 NULL/*data*/, vdataLo/*vdata*/,
6563 expd_eq_old/*guard for store*/ );
6564 if (otrak) {
6565 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
6566 bdataLo/*bdata*/,
6567 expd_eq_old/*guard for store*/ );
6572 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
6574 IRAtom *vdataHi = NULL, *bdataHi = NULL;
6575 IRAtom *vdataLo = NULL, *bdataLo = NULL;
6576 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
6577 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
6578 IRAtom *voldHi = NULL, *boldHi = NULL;
6579 IRAtom *voldLo = NULL, *boldLo = NULL;
6580 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
6581 IRAtom *expd_eq_old = NULL, *zero = NULL;
6582 IROp opCasCmpEQ, opOr, opXor;
6583 Int elemSzB, memOffsLo, memOffsHi;
6584 IRType elemTy;
6585 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
6587 /* double CAS */
6588 tl_assert(cas->oldHi != IRTemp_INVALID);
6589 tl_assert(cas->expdHi != NULL);
6590 tl_assert(cas->dataHi != NULL);
6592 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
6593 switch (elemTy) {
6594 case Ity_I8:
6595 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
6596 elemSzB = 1; zero = mkU8(0);
6597 break;
6598 case Ity_I16:
6599 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
6600 elemSzB = 2; zero = mkU16(0);
6601 break;
6602 case Ity_I32:
6603 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
6604 elemSzB = 4; zero = mkU32(0);
6605 break;
6606 case Ity_I64:
6607 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
6608 elemSzB = 8; zero = mkU64(0);
6609 break;
6610 default:
6611 tl_assert(0); /* IR defn disallows any other types */
6614 /* 1. fetch data# (the proposed new value) */
6615 tl_assert(isOriginalAtom(mce, cas->dataHi));
6616 tl_assert(isOriginalAtom(mce, cas->dataLo));
6617 vdataHi
6618 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi, HuOth));
6619 vdataLo
6620 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo, HuOth));
6621 tl_assert(isShadowAtom(mce, vdataHi));
6622 tl_assert(isShadowAtom(mce, vdataLo));
6623 if (otrak) {
6624 bdataHi
6625 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
6626 bdataLo
6627 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
6628 tl_assert(isShadowAtom(mce, bdataHi));
6629 tl_assert(isShadowAtom(mce, bdataLo));
6632 /* 2. fetch expected# (what we expect to see at the address) */
6633 tl_assert(isOriginalAtom(mce, cas->expdHi));
6634 tl_assert(isOriginalAtom(mce, cas->expdLo));
6635 vexpdHi
6636 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi, HuOth));
6637 vexpdLo
6638 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo, HuOth));
6639 tl_assert(isShadowAtom(mce, vexpdHi));
6640 tl_assert(isShadowAtom(mce, vexpdLo));
6641 if (otrak) {
6642 bexpdHi
6643 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
6644 bexpdLo
6645 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
6646 tl_assert(isShadowAtom(mce, bexpdHi));
6647 tl_assert(isShadowAtom(mce, bexpdLo));
6650 /* 3. check definedness of address */
6651 /* 4. fetch old# from shadow memory; this also checks
6652 addressibility of the address */
6653 if (cas->end == Iend_LE) {
6654 memOffsLo = 0;
6655 memOffsHi = elemSzB;
6656 } else {
6657 tl_assert(cas->end == Iend_BE);
6658 memOffsLo = elemSzB;
6659 memOffsHi = 0;
6661 voldHi
6662 = assignNew(
6663 'V', mce, elemTy,
6664 expr2vbits_Load(
6665 mce,
6666 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/,
6667 NULL/*always happens*/
6669 voldLo
6670 = assignNew(
6671 'V', mce, elemTy,
6672 expr2vbits_Load(
6673 mce,
6674 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/,
6675 NULL/*always happens*/
6677 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
6678 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
6679 if (otrak) {
6680 boldHi
6681 = assignNew('B', mce, Ity_I32,
6682 gen_load_b(mce, elemSzB, cas->addr,
6683 memOffsHi/*addr bias*/));
6684 boldLo
6685 = assignNew('B', mce, Ity_I32,
6686 gen_load_b(mce, elemSzB, cas->addr,
6687 memOffsLo/*addr bias*/));
6688 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
6689 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
6692 /* 5. the CAS itself */
6693 stmt( 'C', mce, IRStmt_CAS(cas) );
6695 /* 6. compute "expected == old" */
6696 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6697 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6698 tree, but it's not copied from the input block. */
6700 xHi = oldHi ^ expdHi;
6701 xLo = oldLo ^ expdLo;
6702 xHL = xHi | xLo;
6703 expd_eq_old = xHL == 0;
6705 xHi = assignNew('C', mce, elemTy,
6706 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
6707 xLo = assignNew('C', mce, elemTy,
6708 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
6709 xHL = assignNew('C', mce, elemTy,
6710 binop(opOr, xHi, xLo));
6711 expd_eq_old
6712 = assignNew('C', mce, Ity_I1,
6713 binop(opCasCmpEQ, xHL, zero));
6715 /* 7. if "expected == old"
6716 store data# to shadow memory */
6717 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
6718 NULL/*data*/, vdataHi/*vdata*/,
6719 expd_eq_old/*guard for store*/ );
6720 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
6721 NULL/*data*/, vdataLo/*vdata*/,
6722 expd_eq_old/*guard for store*/ );
6723 if (otrak) {
6724 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
6725 bdataHi/*bdata*/,
6726 expd_eq_old/*guard for store*/ );
6727 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
6728 bdataLo/*bdata*/,
6729 expd_eq_old/*guard for store*/ );
6734 /* ------ Dealing with LL/SC (not difficult) ------ */
6736 static void do_shadow_LLSC ( MCEnv* mce,
6737 IREndness stEnd,
6738 IRTemp stResult,
6739 IRExpr* stAddr,
6740 IRExpr* stStoredata )
6742 /* In short: treat a load-linked like a normal load followed by an
6743 assignment of the loaded (shadow) data to the result temporary.
6744 Treat a store-conditional like a normal store, and mark the
6745 result temporary as defined. */
6746 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
6747 IRTemp resTmp = findShadowTmpV(mce, stResult);
6749 tl_assert(isIRAtom(stAddr));
6750 if (stStoredata)
6751 tl_assert(isIRAtom(stStoredata));
6753 if (stStoredata == NULL) {
6754 /* Load Linked */
6755 /* Just treat this as a normal load, followed by an assignment of
6756 the value to .result. */
6757 /* Stay sane */
6758 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6759 || resTy == Ity_I16 || resTy == Ity_I8);
6760 assign( 'V', mce, resTmp,
6761 expr2vbits_Load(
6762 mce, stEnd, resTy, stAddr, 0/*addr bias*/,
6763 NULL/*always happens*/) );
6764 } else {
6765 /* Store Conditional */
6766 /* Stay sane */
6767 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
6768 stStoredata);
6769 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
6770 || dataTy == Ity_I16 || dataTy == Ity_I8);
6771 do_shadow_Store( mce, stEnd,
6772 stAddr, 0/* addr bias */,
6773 stStoredata,
6774 NULL /* shadow data */,
6775 NULL/*guard*/ );
6776 /* This is a store conditional, so it writes to .result a value
6777 indicating whether or not the store succeeded. Just claim
6778 this value is always defined. In the PowerPC interpretation
6779 of store-conditional, definedness of the success indication
6780 depends on whether the address of the store matches the
6781 reservation address. But we can't tell that here (and
6782 anyway, we're not being PowerPC-specific). At least we are
6783 guaranteed that the definedness of the store address, and its
6784 addressibility, will be checked as per normal. So it seems
6785 pretty safe to just say that the success indication is always
6786 defined.
6788 In schemeS, for origin tracking, we must correspondingly set
6789 a no-origin value for the origin shadow of .result.
6791 tl_assert(resTy == Ity_I1);
6792 assign( 'V', mce, resTmp, definedOfType(resTy) );
6797 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
6799 static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg )
6801 complainIfUndefined(mce, sg->guard, NULL);
6802 /* do_shadow_Store will generate code to check the definedness and
6803 validity of sg->addr, in the case where sg->guard evaluates to
6804 True at run-time. */
6805 do_shadow_Store( mce, sg->end,
6806 sg->addr, 0/* addr bias */,
6807 sg->data,
6808 NULL /* shadow data */,
6809 sg->guard );
6812 static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg )
6814 complainIfUndefined(mce, lg->guard, NULL);
6815 /* expr2vbits_Load_guarded_General will generate code to check the
6816 definedness and validity of lg->addr, in the case where
6817 lg->guard evaluates to True at run-time. */
6819 /* Look at the LoadG's built-in conversion operation, to determine
6820 the source (actual loaded data) type, and the equivalent IROp.
6821 NOTE that implicitly we are taking a widening operation to be
6822 applied to original atoms and producing one that applies to V
6823 bits. Since signed and unsigned widening are self-shadowing,
6824 this is a straight copy of the op (modulo swapping from the
6825 IRLoadGOp form to the IROp form). Note also therefore that this
6826 implicitly duplicates the logic to do with said widening ops in
6827 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
6828 IROp vwiden = Iop_INVALID;
6829 IRType loadedTy = Ity_INVALID;
6830 switch (lg->cvt) {
6831 case ILGop_IdentV128: loadedTy = Ity_V128; vwiden = Iop_INVALID; break;
6832 case ILGop_Ident64: loadedTy = Ity_I64; vwiden = Iop_INVALID; break;
6833 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break;
6834 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break;
6835 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break;
6836 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break;
6837 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break;
6838 default: VG_(tool_panic)("do_shadow_LoadG");
6841 IRAtom* vbits_alt
6842 = expr2vbits( mce, lg->alt, HuOth );
6843 IRAtom* vbits_final
6844 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy,
6845 lg->addr, 0/*addr bias*/,
6846 lg->guard, vwiden, vbits_alt );
6847 /* And finally, bind the V bits to the destination temporary. */
6848 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final );
6852 /*------------------------------------------------------------*/
6853 /*--- Origin tracking stuff ---*/
6854 /*------------------------------------------------------------*/
6856 /* Almost identical to findShadowTmpV. */
6857 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
6859 TempMapEnt* ent;
6860 /* VG_(indexXA) range-checks 'orig', hence no need to check
6861 here. */
6862 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6863 tl_assert(ent->kind == Orig);
6864 if (ent->shadowB == IRTemp_INVALID) {
6865 IRTemp tmpB
6866 = newTemp( mce, Ity_I32, BSh );
6867 /* newTemp may cause mce->tmpMap to resize, hence previous results
6868 from VG_(indexXA) are invalid. */
6869 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6870 tl_assert(ent->kind == Orig);
6871 tl_assert(ent->shadowB == IRTemp_INVALID);
6872 ent->shadowB = tmpB;
6874 return ent->shadowB;
6877 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
6879 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
6883 /* Make a guarded origin load, with no special handling in the
6884 didn't-happen case. A GUARD of NULL is assumed to mean "always
6885 True".
6887 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6888 return the otag. The loaded size is SZB. If GUARD evaluates to
6889 False at run time then the returned otag is zero.
6891 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB,
6892 IRAtom* baseaddr,
6893 Int offset, IRExpr* guard )
6895 void* hFun;
6896 const HChar* hName;
6897 IRTemp bTmp;
6898 IRDirty* di;
6899 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
6900 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6901 IRAtom* ea = baseaddr;
6902 if (offset != 0) {
6903 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6904 : mkU64( (Long)(Int)offset );
6905 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6907 bTmp = newTemp(mce, mce->hWordTy, BSh);
6909 switch (szB) {
6910 case 1: hFun = (void*)&MC_(helperc_b_load1);
6911 hName = "MC_(helperc_b_load1)";
6912 break;
6913 case 2: hFun = (void*)&MC_(helperc_b_load2);
6914 hName = "MC_(helperc_b_load2)";
6915 break;
6916 case 4: hFun = (void*)&MC_(helperc_b_load4);
6917 hName = "MC_(helperc_b_load4)";
6918 break;
6919 case 8: hFun = (void*)&MC_(helperc_b_load8);
6920 hName = "MC_(helperc_b_load8)";
6921 break;
6922 case 16: hFun = (void*)&MC_(helperc_b_load16);
6923 hName = "MC_(helperc_b_load16)";
6924 break;
6925 case 32: hFun = (void*)&MC_(helperc_b_load32);
6926 hName = "MC_(helperc_b_load32)";
6927 break;
6928 default:
6929 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
6930 tl_assert(0);
6932 di = unsafeIRDirty_1_N(
6933 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
6934 mkIRExprVec_1( ea )
6936 if (guard) {
6937 di->guard = guard;
6938 /* Ideally the didn't-happen return value here would be
6939 all-zeroes (unknown-origin), so it'd be harmless if it got
6940 used inadvertently. We slum it out with the IR-mandated
6941 default value (0b01 repeating, 0x55 etc) as that'll probably
6942 trump all legitimate otags via Max32, and it's pretty
6943 obviously bogus. */
6945 /* no need to mess with any annotations. This call accesses
6946 neither guest state nor guest memory. */
6947 stmt( 'B', mce, IRStmt_Dirty(di) );
6948 if (mce->hWordTy == Ity_I64) {
6949 /* 64-bit host */
6950 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
6951 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
6952 return mkexpr(bTmp32);
6953 } else {
6954 /* 32-bit host */
6955 return mkexpr(bTmp);
6960 /* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
6961 loaded size is SZB. The load is regarded as unconditional (always
6962 happens).
6964 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
6965 Int offset )
6967 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/);
6971 /* The most general handler for guarded origin loads. A GUARD of NULL
6972 is assumed to mean "always True".
6974 Generate IR to do a shadow origin load from ADDR+BIAS and return
6975 the B bits. The loaded type is TY. If GUARD evaluates to False at
6976 run time then the returned B bits are simply BALT instead.
6978 static
6979 IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce,
6980 IRType ty,
6981 IRAtom* addr, UInt bias,
6982 IRAtom* guard, IRAtom* balt )
6984 /* If the guard evaluates to True, this will hold the loaded
6985 origin. If the guard evaluates to False, this will be zero,
6986 meaning "unknown origin", in which case we will have to replace
6987 it using an ITE below. */
6988 IRAtom* iftrue
6989 = assignNew('B', mce, Ity_I32,
6990 gen_guarded_load_b(mce, sizeofIRType(ty),
6991 addr, bias, guard));
6992 /* These are the bits we will return if the load doesn't take
6993 place. */
6994 IRAtom* iffalse
6995 = balt;
6996 /* Prepare the cond for the ITE. Convert a NULL cond into
6997 something that iropt knows how to fold out later. */
6998 IRAtom* cond
6999 = guard == NULL ? mkU1(1) : guard;
7000 /* And assemble the final result. */
7001 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse));
7005 /* Generate a shadow origins store. guard :: Ity_I1 controls whether
7006 the store really happens; NULL means it unconditionally does. */
7007 static void gen_store_b ( MCEnv* mce, Int szB,
7008 IRAtom* baseaddr, Int offset, IRAtom* dataB,
7009 IRAtom* guard )
7011 void* hFun;
7012 const HChar* hName;
7013 IRDirty* di;
7014 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
7015 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
7016 IRAtom* ea = baseaddr;
7017 if (guard) {
7018 tl_assert(isOriginalAtom(mce, guard));
7019 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
7021 if (offset != 0) {
7022 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
7023 : mkU64( (Long)(Int)offset );
7024 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
7026 if (mce->hWordTy == Ity_I64)
7027 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
7029 switch (szB) {
7030 case 1: hFun = (void*)&MC_(helperc_b_store1);
7031 hName = "MC_(helperc_b_store1)";
7032 break;
7033 case 2: hFun = (void*)&MC_(helperc_b_store2);
7034 hName = "MC_(helperc_b_store2)";
7035 break;
7036 case 4: hFun = (void*)&MC_(helperc_b_store4);
7037 hName = "MC_(helperc_b_store4)";
7038 break;
7039 case 8: hFun = (void*)&MC_(helperc_b_store8);
7040 hName = "MC_(helperc_b_store8)";
7041 break;
7042 case 16: hFun = (void*)&MC_(helperc_b_store16);
7043 hName = "MC_(helperc_b_store16)";
7044 break;
7045 case 32: hFun = (void*)&MC_(helperc_b_store32);
7046 hName = "MC_(helperc_b_store32)";
7047 break;
7048 default:
7049 tl_assert(0);
7051 di = unsafeIRDirty_0_N( 2/*regparms*/,
7052 hName, VG_(fnptr_to_fnentry)( hFun ),
7053 mkIRExprVec_2( ea, dataB )
7055 /* no need to mess with any annotations. This call accesses
7056 neither guest state nor guest memory. */
7057 if (guard) di->guard = guard;
7058 stmt( 'B', mce, IRStmt_Dirty(di) );
7061 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
7062 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
7063 if (eTy == Ity_I64)
7064 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
7065 if (eTy == Ity_I32)
7066 return e;
7067 tl_assert(0);
7070 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
7071 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
7072 tl_assert(eTy == Ity_I32);
7073 if (dstTy == Ity_I64)
7074 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
7075 tl_assert(0);
7079 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
7081 tl_assert(MC_(clo_mc_level) == 3);
7083 switch (e->tag) {
7085 case Iex_GetI: {
7086 IRRegArray* descr_b;
7087 IRAtom *t1, *t2, *t3, *t4;
7088 IRRegArray* descr = e->Iex.GetI.descr;
7089 IRType equivIntTy
7090 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
7091 /* If this array is unshadowable for whatever reason, use the
7092 usual approximation. */
7093 if (equivIntTy == Ity_INVALID)
7094 return mkU32(0);
7095 tl_assert(sizeofIRType(equivIntTy) >= 4);
7096 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
7097 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
7098 equivIntTy, descr->nElems );
7099 /* Do a shadow indexed get of the same size, giving t1. Take
7100 the bottom 32 bits of it, giving t2. Compute into t3 the
7101 origin for the index (almost certainly zero, but there's
7102 no harm in being completely general here, since iropt will
7103 remove any useless code), and fold it in, giving a final
7104 value t4. */
7105 t1 = assignNew( 'B', mce, equivIntTy,
7106 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
7107 e->Iex.GetI.bias ));
7108 t2 = narrowTo32( mce, t1 );
7109 t3 = schemeE( mce, e->Iex.GetI.ix );
7110 t4 = gen_maxU32( mce, t2, t3 );
7111 return t4;
7113 case Iex_CCall: {
7114 Int i;
7115 IRAtom* here;
7116 IRExpr** args = e->Iex.CCall.args;
7117 IRAtom* curr = mkU32(0);
7118 for (i = 0; args[i]; i++) {
7119 tl_assert(i < 32);
7120 tl_assert(isOriginalAtom(mce, args[i]));
7121 /* Only take notice of this arg if the callee's
7122 mc-exclusion mask does not say it is to be excluded. */
7123 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
7124 /* the arg is to be excluded from definedness checking.
7125 Do nothing. */
7126 if (0) VG_(printf)("excluding %s(%d)\n",
7127 e->Iex.CCall.cee->name, i);
7128 } else {
7129 /* calculate the arg's definedness, and pessimistically
7130 merge it in. */
7131 here = schemeE( mce, args[i] );
7132 curr = gen_maxU32( mce, curr, here );
7135 return curr;
7137 case Iex_Load: {
7138 Int dszB;
7139 dszB = sizeofIRType(e->Iex.Load.ty);
7140 /* assert that the B value for the address is already
7141 available (somewhere) */
7142 tl_assert(isIRAtom(e->Iex.Load.addr));
7143 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
7144 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
7146 case Iex_ITE: {
7147 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond );
7148 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue );
7149 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse );
7150 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
7152 case Iex_Qop: {
7153 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
7154 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
7155 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
7156 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
7157 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
7158 gen_maxU32( mce, b3, b4 ) );
7160 case Iex_Triop: {
7161 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
7162 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
7163 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
7164 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
7166 case Iex_Binop: {
7167 switch (e->Iex.Binop.op) {
7168 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
7169 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
7170 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
7171 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
7172 /* Just say these all produce a defined result,
7173 regardless of their arguments. See
7174 COMMENT_ON_CasCmpEQ in this file. */
7175 return mkU32(0);
7176 default: {
7177 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
7178 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
7179 return gen_maxU32( mce, b1, b2 );
7182 tl_assert(0);
7183 /*NOTREACHED*/
7185 case Iex_Unop: {
7186 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
7187 return b1;
7189 case Iex_Const:
7190 return mkU32(0);
7191 case Iex_RdTmp:
7192 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
7193 case Iex_Get: {
7194 Int b_offset = MC_(get_otrack_shadow_offset)(
7195 e->Iex.Get.offset,
7196 sizeofIRType(e->Iex.Get.ty)
7198 tl_assert(b_offset >= -1
7199 && b_offset <= mce->layout->total_sizeB -4);
7200 if (b_offset >= 0) {
7201 /* FIXME: this isn't an atom! */
7202 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
7203 Ity_I32 );
7205 return mkU32(0);
7207 default:
7208 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
7209 ppIRExpr(e);
7210 VG_(tool_panic)("memcheck:schemeE");
7215 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
7217 // This is a hacked version of do_shadow_Dirty
7218 Int i, k, n, toDo, gSz, gOff;
7219 IRAtom *here, *curr;
7220 IRTemp dst;
7222 /* First check the guard. */
7223 curr = schemeE( mce, d->guard );
7225 /* Now round up all inputs and maxU32 over them. */
7227 /* Inputs: unmasked args
7228 Note: arguments are evaluated REGARDLESS of the guard expression */
7229 for (i = 0; d->args[i]; i++) {
7230 IRAtom* arg = d->args[i];
7231 if ( (d->cee->mcx_mask & (1<<i))
7232 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)) ) {
7233 /* ignore this arg */
7234 } else {
7235 here = schemeE( mce, arg );
7236 curr = gen_maxU32( mce, curr, here );
7240 /* Inputs: guest state that we read. */
7241 for (i = 0; i < d->nFxState; i++) {
7242 tl_assert(d->fxState[i].fx != Ifx_None);
7243 if (d->fxState[i].fx == Ifx_Write)
7244 continue;
7246 /* Enumerate the described state segments */
7247 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
7248 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
7249 gSz = d->fxState[i].size;
7251 /* Ignore any sections marked as 'always defined'. */
7252 if (isAlwaysDefd(mce, gOff, gSz)) {
7253 if (0)
7254 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
7255 gOff, gSz);
7256 continue;
7259 /* This state element is read or modified. So we need to
7260 consider it. If larger than 4 bytes, deal with it in
7261 4-byte chunks. */
7262 while (True) {
7263 Int b_offset;
7264 tl_assert(gSz >= 0);
7265 if (gSz == 0) break;
7266 n = gSz <= 4 ? gSz : 4;
7267 /* update 'curr' with maxU32 of the state slice
7268 gOff .. gOff+n-1 */
7269 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
7270 if (b_offset != -1) {
7271 /* Observe the guard expression. If it is false use 0, i.e.
7272 nothing is known about the origin */
7273 IRAtom *cond, *iffalse, *iftrue;
7275 cond = assignNew( 'B', mce, Ity_I1, d->guard);
7276 iffalse = mkU32(0);
7277 iftrue = assignNew( 'B', mce, Ity_I32,
7278 IRExpr_Get(b_offset
7279 + 2*mce->layout->total_sizeB,
7280 Ity_I32));
7281 here = assignNew( 'B', mce, Ity_I32,
7282 IRExpr_ITE(cond, iftrue, iffalse));
7283 curr = gen_maxU32( mce, curr, here );
7285 gSz -= n;
7286 gOff += n;
7291 /* Inputs: memory */
7293 if (d->mFx != Ifx_None) {
7294 /* Because we may do multiple shadow loads/stores from the same
7295 base address, it's best to do a single test of its
7296 definedness right now. Post-instrumentation optimisation
7297 should remove all but this test. */
7298 tl_assert(d->mAddr);
7299 here = schemeE( mce, d->mAddr );
7300 curr = gen_maxU32( mce, curr, here );
7303 /* Deal with memory inputs (reads or modifies) */
7304 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
7305 toDo = d->mSize;
7306 /* chew off 32-bit chunks. We don't care about the endianness
7307 since it's all going to be condensed down to a single bit,
7308 but nevertheless choose an endianness which is hopefully
7309 native to the platform. */
7310 while (toDo >= 4) {
7311 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
7312 d->guard );
7313 curr = gen_maxU32( mce, curr, here );
7314 toDo -= 4;
7316 /* handle possible 16-bit excess */
7317 while (toDo >= 2) {
7318 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
7319 d->guard );
7320 curr = gen_maxU32( mce, curr, here );
7321 toDo -= 2;
7323 /* chew off the remaining 8-bit chunk, if any */
7324 if (toDo == 1) {
7325 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
7326 d->guard );
7327 curr = gen_maxU32( mce, curr, here );
7328 toDo -= 1;
7330 tl_assert(toDo == 0);
7333 /* Whew! So curr is a 32-bit B-value which should give an origin
7334 of some use if any of the inputs to the helper are undefined.
7335 Now we need to re-distribute the results to all destinations. */
7337 /* Outputs: the destination temporary, if there is one. */
7338 if (d->tmp != IRTemp_INVALID) {
7339 dst = findShadowTmpB(mce, d->tmp);
7340 assign( 'V', mce, dst, curr );
7343 /* Outputs: guest state that we write or modify. */
7344 for (i = 0; i < d->nFxState; i++) {
7345 tl_assert(d->fxState[i].fx != Ifx_None);
7346 if (d->fxState[i].fx == Ifx_Read)
7347 continue;
7349 /* Enumerate the described state segments */
7350 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
7351 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
7352 gSz = d->fxState[i].size;
7354 /* Ignore any sections marked as 'always defined'. */
7355 if (isAlwaysDefd(mce, gOff, gSz))
7356 continue;
7358 /* This state element is written or modified. So we need to
7359 consider it. If larger than 4 bytes, deal with it in
7360 4-byte chunks. */
7361 while (True) {
7362 Int b_offset;
7363 tl_assert(gSz >= 0);
7364 if (gSz == 0) break;
7365 n = gSz <= 4 ? gSz : 4;
7366 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
7367 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
7368 if (b_offset != -1) {
7370 /* If the guard expression evaluates to false we simply Put
7371 the value that is already stored in the guest state slot */
7372 IRAtom *cond, *iffalse;
7374 cond = assignNew('B', mce, Ity_I1,
7375 d->guard);
7376 iffalse = assignNew('B', mce, Ity_I32,
7377 IRExpr_Get(b_offset +
7378 2*mce->layout->total_sizeB,
7379 Ity_I32));
7380 curr = assignNew('V', mce, Ity_I32,
7381 IRExpr_ITE(cond, curr, iffalse));
7383 stmt( 'B', mce, IRStmt_Put(b_offset
7384 + 2*mce->layout->total_sizeB,
7385 curr ));
7387 gSz -= n;
7388 gOff += n;
7393 /* Outputs: memory that we write or modify. Same comments about
7394 endianness as above apply. */
7395 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
7396 toDo = d->mSize;
7397 /* chew off 32-bit chunks */
7398 while (toDo >= 4) {
7399 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
7400 d->guard );
7401 toDo -= 4;
7403 /* handle possible 16-bit excess */
7404 while (toDo >= 2) {
7405 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
7406 d->guard );
7407 toDo -= 2;
7409 /* chew off the remaining 8-bit chunk, if any */
7410 if (toDo == 1) {
7411 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
7412 d->guard );
7413 toDo -= 1;
7415 tl_assert(toDo == 0);
7420 /* Generate IR for origin shadowing for a general guarded store. */
7421 static void do_origins_Store_guarded ( MCEnv* mce,
7422 IREndness stEnd,
7423 IRExpr* stAddr,
7424 IRExpr* stData,
7425 IRExpr* guard )
7427 Int dszB;
7428 IRAtom* dataB;
7429 /* assert that the B value for the address is already available
7430 (somewhere), since the call to schemeE will want to see it.
7431 XXXX how does this actually ensure that?? */
7432 tl_assert(isIRAtom(stAddr));
7433 tl_assert(isIRAtom(stData));
7434 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
7435 dataB = schemeE( mce, stData );
7436 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard );
7440 /* Generate IR for origin shadowing for a plain store. */
7441 static void do_origins_Store_plain ( MCEnv* mce,
7442 IREndness stEnd,
7443 IRExpr* stAddr,
7444 IRExpr* stData )
7446 do_origins_Store_guarded ( mce, stEnd, stAddr, stData,
7447 NULL/*guard*/ );
7451 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
7453 static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg )
7455 do_origins_Store_guarded( mce, sg->end, sg->addr,
7456 sg->data, sg->guard );
7459 static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg )
7461 IRType loadedTy = Ity_INVALID;
7462 switch (lg->cvt) {
7463 case ILGop_IdentV128: loadedTy = Ity_V128; break;
7464 case ILGop_Ident64: loadedTy = Ity_I64; break;
7465 case ILGop_Ident32: loadedTy = Ity_I32; break;
7466 case ILGop_16Uto32: loadedTy = Ity_I16; break;
7467 case ILGop_16Sto32: loadedTy = Ity_I16; break;
7468 case ILGop_8Uto32: loadedTy = Ity_I8; break;
7469 case ILGop_8Sto32: loadedTy = Ity_I8; break;
7470 default: VG_(tool_panic)("schemeS.IRLoadG");
7472 IRAtom* ori_alt
7473 = schemeE( mce,lg->alt );
7474 IRAtom* ori_final
7475 = expr2ori_Load_guarded_General(mce, loadedTy,
7476 lg->addr, 0/*addr bias*/,
7477 lg->guard, ori_alt );
7478 /* And finally, bind the origin to the destination temporary. */
7479 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final );
7483 static void schemeS ( MCEnv* mce, IRStmt* st )
7485 tl_assert(MC_(clo_mc_level) == 3);
7487 switch (st->tag) {
7489 case Ist_AbiHint:
7490 /* The value-check instrumenter handles this - by arranging
7491 to pass the address of the next instruction to
7492 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
7493 happen for origin tracking w.r.t. AbiHints. So there is
7494 nothing to do here. */
7495 break;
7497 case Ist_PutI: {
7498 IRPutI *puti = st->Ist.PutI.details;
7499 IRRegArray* descr_b;
7500 IRAtom *t1, *t2, *t3, *t4;
7501 IRRegArray* descr = puti->descr;
7502 IRType equivIntTy
7503 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
7504 /* If this array is unshadowable for whatever reason,
7505 generate no code. */
7506 if (equivIntTy == Ity_INVALID)
7507 break;
7508 tl_assert(sizeofIRType(equivIntTy) >= 4);
7509 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
7510 descr_b
7511 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
7512 equivIntTy, descr->nElems );
7513 /* Compute a value to Put - the conjoinment of the origin for
7514 the data to be Put-ted (obviously) and of the index value
7515 (not so obviously). */
7516 t1 = schemeE( mce, puti->data );
7517 t2 = schemeE( mce, puti->ix );
7518 t3 = gen_maxU32( mce, t1, t2 );
7519 t4 = zWidenFrom32( mce, equivIntTy, t3 );
7520 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
7521 puti->bias, t4) ));
7522 break;
7525 case Ist_Dirty:
7526 do_origins_Dirty( mce, st->Ist.Dirty.details );
7527 break;
7529 case Ist_Store:
7530 do_origins_Store_plain( mce, st->Ist.Store.end,
7531 st->Ist.Store.addr,
7532 st->Ist.Store.data );
7533 break;
7535 case Ist_StoreG:
7536 do_origins_StoreG( mce, st->Ist.StoreG.details );
7537 break;
7539 case Ist_LoadG:
7540 do_origins_LoadG( mce, st->Ist.LoadG.details );
7541 break;
7543 case Ist_LLSC: {
7544 /* In short: treat a load-linked like a normal load followed
7545 by an assignment of the loaded (shadow) data the result
7546 temporary. Treat a store-conditional like a normal store,
7547 and mark the result temporary as defined. */
7548 if (st->Ist.LLSC.storedata == NULL) {
7549 /* Load Linked */
7550 IRType resTy
7551 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
7552 IRExpr* vanillaLoad
7553 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
7554 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
7555 || resTy == Ity_I16 || resTy == Ity_I8);
7556 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7557 schemeE(mce, vanillaLoad));
7558 } else {
7559 /* Store conditional */
7560 do_origins_Store_plain( mce, st->Ist.LLSC.end,
7561 st->Ist.LLSC.addr,
7562 st->Ist.LLSC.storedata );
7563 /* For the rationale behind this, see comments at the
7564 place where the V-shadow for .result is constructed, in
7565 do_shadow_LLSC. In short, we regard .result as
7566 always-defined. */
7567 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7568 mkU32(0) );
7570 break;
7573 case Ist_Put: {
7574 Int b_offset
7575 = MC_(get_otrack_shadow_offset)(
7576 st->Ist.Put.offset,
7577 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
7579 if (b_offset >= 0) {
7580 /* FIXME: this isn't an atom! */
7581 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
7582 schemeE( mce, st->Ist.Put.data )) );
7584 break;
7587 case Ist_WrTmp:
7588 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
7589 schemeE(mce, st->Ist.WrTmp.data) );
7590 break;
7592 case Ist_MBE:
7593 case Ist_NoOp:
7594 case Ist_Exit:
7595 case Ist_IMark:
7596 break;
7598 default:
7599 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
7600 ppIRStmt(st);
7601 VG_(tool_panic)("memcheck:schemeS");
7606 /*------------------------------------------------------------*/
7607 /*--- Post-tree-build final tidying ---*/
7608 /*------------------------------------------------------------*/
7610 /* This exploits the observation that Memcheck often produces
7611 repeated conditional calls of the form
7613 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
7615 with the same guard expression G guarding the same helper call.
7616 The second and subsequent calls are redundant. This usually
7617 results from instrumentation of guest code containing multiple
7618 memory references at different constant offsets from the same base
7619 register. After optimisation of the instrumentation, you get a
7620 test for the definedness of the base register for each memory
7621 reference, which is kinda pointless. MC_(final_tidy) therefore
7622 looks for such repeated calls and removes all but the first. */
7625 /* With some testing on perf/bz2.c, on amd64 and x86, compiled with
7626 gcc-5.3.1 -O2, it appears that 16 entries in the array are enough to
7627 get almost all the benefits of this transformation whilst causing
7628 the slide-back case to just often enough to be verifiably
7629 correct. For posterity, the numbers are:
7631 bz2-32
7633 1 4,336 (112,212 -> 1,709,473; ratio 15.2)
7634 2 4,336 (112,194 -> 1,669,895; ratio 14.9)
7635 3 4,336 (112,194 -> 1,660,713; ratio 14.8)
7636 4 4,336 (112,194 -> 1,658,555; ratio 14.8)
7637 5 4,336 (112,194 -> 1,655,447; ratio 14.8)
7638 6 4,336 (112,194 -> 1,655,101; ratio 14.8)
7639 7 4,336 (112,194 -> 1,654,858; ratio 14.7)
7640 8 4,336 (112,194 -> 1,654,810; ratio 14.7)
7641 10 4,336 (112,194 -> 1,654,621; ratio 14.7)
7642 12 4,336 (112,194 -> 1,654,678; ratio 14.7)
7643 16 4,336 (112,194 -> 1,654,494; ratio 14.7)
7644 32 4,336 (112,194 -> 1,654,602; ratio 14.7)
7645 inf 4,336 (112,194 -> 1,654,602; ratio 14.7)
7647 bz2-64
7649 1 4,113 (107,329 -> 1,822,171; ratio 17.0)
7650 2 4,113 (107,329 -> 1,806,443; ratio 16.8)
7651 3 4,113 (107,329 -> 1,803,967; ratio 16.8)
7652 4 4,113 (107,329 -> 1,802,785; ratio 16.8)
7653 5 4,113 (107,329 -> 1,802,412; ratio 16.8)
7654 6 4,113 (107,329 -> 1,802,062; ratio 16.8)
7655 7 4,113 (107,329 -> 1,801,976; ratio 16.8)
7656 8 4,113 (107,329 -> 1,801,886; ratio 16.8)
7657 10 4,113 (107,329 -> 1,801,653; ratio 16.8)
7658 12 4,113 (107,329 -> 1,801,526; ratio 16.8)
7659 16 4,113 (107,329 -> 1,801,298; ratio 16.8)
7660 32 4,113 (107,329 -> 1,800,827; ratio 16.8)
7661 inf 4,113 (107,329 -> 1,800,827; ratio 16.8)
7664 /* Structs for recording which (helper, guard) pairs we have already
7665 seen. */
7667 #define N_TIDYING_PAIRS 16
7669 typedef
7670 struct { void* entry; IRExpr* guard; }
7671 Pair;
7673 typedef
7674 struct {
7675 Pair pairs[N_TIDYING_PAIRS +1/*for bounds checking*/];
7676 UInt pairsUsed;
7678 Pairs;
7681 /* Return True if e1 and e2 definitely denote the same value (used to
7682 compare guards). Return False if unknown; False is the safe
7683 answer. Since guest registers and guest memory do not have the
7684 SSA property we must return False if any Gets or Loads appear in
7685 the expression. This implicitly assumes that e1 and e2 have the
7686 same IR type, which is always true here -- the type is Ity_I1. */
7688 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
7690 if (e1->tag != e2->tag)
7691 return False;
7692 switch (e1->tag) {
7693 case Iex_Const:
7694 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
7695 case Iex_Binop:
7696 return e1->Iex.Binop.op == e2->Iex.Binop.op
7697 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
7698 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
7699 case Iex_Unop:
7700 return e1->Iex.Unop.op == e2->Iex.Unop.op
7701 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
7702 case Iex_RdTmp:
7703 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
7704 case Iex_ITE:
7705 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond )
7706 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue )
7707 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse );
7708 case Iex_Qop:
7709 case Iex_Triop:
7710 case Iex_CCall:
7711 /* be lazy. Could define equality for these, but they never
7712 appear to be used. */
7713 return False;
7714 case Iex_Get:
7715 case Iex_GetI:
7716 case Iex_Load:
7717 /* be conservative - these may not give the same value each
7718 time */
7719 return False;
7720 case Iex_Binder:
7721 /* should never see this */
7722 /* fallthrough */
7723 default:
7724 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
7725 ppIRExpr(e1);
7726 VG_(tool_panic)("memcheck:sameIRValue");
7727 return False;
7731 /* See if 'pairs' already has an entry for (entry, guard). Return
7732 True if so. If not, add an entry. */
7734 static
7735 Bool check_or_add ( Pairs* tidyingEnv, IRExpr* guard, void* entry )
7737 UInt i, n = tidyingEnv->pairsUsed;
7738 tl_assert(n <= N_TIDYING_PAIRS);
7739 for (i = 0; i < n; i++) {
7740 if (tidyingEnv->pairs[i].entry == entry
7741 && sameIRValue(tidyingEnv->pairs[i].guard, guard))
7742 return True;
7744 /* (guard, entry) wasn't found in the array. Add it at the end.
7745 If the array is already full, slide the entries one slot
7746 backwards. This means we will lose to ability to detect
7747 duplicates from the pair in slot zero, but that happens so
7748 rarely that it's unlikely to have much effect on overall code
7749 quality. Also, this strategy loses the check for the oldest
7750 tracked exit (memory reference, basically) and so that is (I'd
7751 guess) least likely to be re-used after this point. */
7752 tl_assert(i == n);
7753 if (n == N_TIDYING_PAIRS) {
7754 for (i = 1; i < N_TIDYING_PAIRS; i++) {
7755 tidyingEnv->pairs[i-1] = tidyingEnv->pairs[i];
7757 tidyingEnv->pairs[N_TIDYING_PAIRS-1].entry = entry;
7758 tidyingEnv->pairs[N_TIDYING_PAIRS-1].guard = guard;
7759 } else {
7760 tl_assert(n < N_TIDYING_PAIRS);
7761 tidyingEnv->pairs[n].entry = entry;
7762 tidyingEnv->pairs[n].guard = guard;
7763 n++;
7764 tidyingEnv->pairsUsed = n;
7766 return False;
7769 static Bool is_helperc_value_checkN_fail ( const HChar* name )
7771 /* This is expensive because it happens a lot. We are checking to
7772 see whether |name| is one of the following 8 strings:
7774 MC_(helperc_value_check8_fail_no_o)
7775 MC_(helperc_value_check4_fail_no_o)
7776 MC_(helperc_value_check0_fail_no_o)
7777 MC_(helperc_value_check1_fail_no_o)
7778 MC_(helperc_value_check8_fail_w_o)
7779 MC_(helperc_value_check0_fail_w_o)
7780 MC_(helperc_value_check1_fail_w_o)
7781 MC_(helperc_value_check4_fail_w_o)
7783 To speed it up, check the common prefix just once, rather than
7784 all 8 times.
7786 const HChar* prefix = "MC_(helperc_value_check";
7788 HChar n, p;
7789 while (True) {
7790 n = *name;
7791 p = *prefix;
7792 if (p == 0) break; /* ran off the end of the prefix */
7793 /* We still have some prefix to use */
7794 if (n == 0) return False; /* have prefix, but name ran out */
7795 if (n != p) return False; /* have both pfx and name, but no match */
7796 name++;
7797 prefix++;
7800 /* Check the part after the prefix. */
7801 tl_assert(*prefix == 0 && *name != 0);
7802 return 0==VG_(strcmp)(name, "8_fail_no_o)")
7803 || 0==VG_(strcmp)(name, "4_fail_no_o)")
7804 || 0==VG_(strcmp)(name, "0_fail_no_o)")
7805 || 0==VG_(strcmp)(name, "1_fail_no_o)")
7806 || 0==VG_(strcmp)(name, "8_fail_w_o)")
7807 || 0==VG_(strcmp)(name, "4_fail_w_o)")
7808 || 0==VG_(strcmp)(name, "0_fail_w_o)")
7809 || 0==VG_(strcmp)(name, "1_fail_w_o)");
7812 IRSB* MC_(final_tidy) ( IRSB* sb_in )
7814 Int i;
7815 IRStmt* st;
7816 IRDirty* di;
7817 IRExpr* guard;
7818 IRCallee* cee;
7819 Bool alreadyPresent;
7820 Pairs pairs;
7822 pairs.pairsUsed = 0;
7824 pairs.pairs[N_TIDYING_PAIRS].entry = (void*)0x123;
7825 pairs.pairs[N_TIDYING_PAIRS].guard = (IRExpr*)0x456;
7827 /* Scan forwards through the statements. Each time a call to one
7828 of the relevant helpers is seen, check if we have made a
7829 previous call to the same helper using the same guard
7830 expression, and if so, delete the call. */
7831 for (i = 0; i < sb_in->stmts_used; i++) {
7832 st = sb_in->stmts[i];
7833 tl_assert(st);
7834 if (st->tag != Ist_Dirty)
7835 continue;
7836 di = st->Ist.Dirty.details;
7837 guard = di->guard;
7838 tl_assert(guard);
7839 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
7840 cee = di->cee;
7841 if (!is_helperc_value_checkN_fail( cee->name ))
7842 continue;
7843 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
7844 guard 'guard'. Check if we have already seen a call to this
7845 function with the same guard. If so, delete it. If not,
7846 add it to the set of calls we do know about. */
7847 alreadyPresent = check_or_add( &pairs, guard, cee->addr );
7848 if (alreadyPresent) {
7849 sb_in->stmts[i] = IRStmt_NoOp();
7850 if (0) VG_(printf)("XX\n");
7854 tl_assert(pairs.pairs[N_TIDYING_PAIRS].entry == (void*)0x123);
7855 tl_assert(pairs.pairs[N_TIDYING_PAIRS].guard == (IRExpr*)0x456);
7857 return sb_in;
7860 #undef N_TIDYING_PAIRS
7863 /*------------------------------------------------------------*/
7864 /*--- Startup assertion checking ---*/
7865 /*------------------------------------------------------------*/
7867 void MC_(do_instrumentation_startup_checks)( void )
7869 /* Make a best-effort check to see that is_helperc_value_checkN_fail
7870 is working as we expect. */
7872 # define CHECK(_expected, _string) \
7873 tl_assert((_expected) == is_helperc_value_checkN_fail(_string))
7875 /* It should identify these 8, and no others, as targets. */
7876 CHECK(True, "MC_(helperc_value_check8_fail_no_o)");
7877 CHECK(True, "MC_(helperc_value_check4_fail_no_o)");
7878 CHECK(True, "MC_(helperc_value_check0_fail_no_o)");
7879 CHECK(True, "MC_(helperc_value_check1_fail_no_o)");
7880 CHECK(True, "MC_(helperc_value_check8_fail_w_o)");
7881 CHECK(True, "MC_(helperc_value_check0_fail_w_o)");
7882 CHECK(True, "MC_(helperc_value_check1_fail_w_o)");
7883 CHECK(True, "MC_(helperc_value_check4_fail_w_o)");
7885 /* Ad-hoc selection of other strings gathered via a quick test. */
7886 CHECK(False, "amd64g_dirtyhelper_CPUID_avx2");
7887 CHECK(False, "amd64g_dirtyhelper_RDTSC");
7888 CHECK(False, "MC_(helperc_b_load1)");
7889 CHECK(False, "MC_(helperc_b_load2)");
7890 CHECK(False, "MC_(helperc_b_load4)");
7891 CHECK(False, "MC_(helperc_b_load8)");
7892 CHECK(False, "MC_(helperc_b_load16)");
7893 CHECK(False, "MC_(helperc_b_load32)");
7894 CHECK(False, "MC_(helperc_b_store1)");
7895 CHECK(False, "MC_(helperc_b_store2)");
7896 CHECK(False, "MC_(helperc_b_store4)");
7897 CHECK(False, "MC_(helperc_b_store8)");
7898 CHECK(False, "MC_(helperc_b_store16)");
7899 CHECK(False, "MC_(helperc_b_store32)");
7900 CHECK(False, "MC_(helperc_LOADV8)");
7901 CHECK(False, "MC_(helperc_LOADV16le)");
7902 CHECK(False, "MC_(helperc_LOADV32le)");
7903 CHECK(False, "MC_(helperc_LOADV64le)");
7904 CHECK(False, "MC_(helperc_LOADV128le)");
7905 CHECK(False, "MC_(helperc_LOADV256le)");
7906 CHECK(False, "MC_(helperc_STOREV16le)");
7907 CHECK(False, "MC_(helperc_STOREV32le)");
7908 CHECK(False, "MC_(helperc_STOREV64le)");
7909 CHECK(False, "MC_(helperc_STOREV8)");
7910 CHECK(False, "track_die_mem_stack_8");
7911 CHECK(False, "track_new_mem_stack_8_w_ECU");
7912 CHECK(False, "MC_(helperc_MAKE_STACK_UNINIT_w_o)");
7913 CHECK(False, "VG_(unknown_SP_update_w_ECU)");
7915 # undef CHECK
7919 /*------------------------------------------------------------*/
7920 /*--- Memcheck main ---*/
7921 /*------------------------------------------------------------*/
7923 static Bool isBogusAtom ( IRAtom* at )
7925 if (at->tag == Iex_RdTmp)
7926 return False;
7927 tl_assert(at->tag == Iex_Const);
7929 ULong n = 0;
7930 IRConst* con = at->Iex.Const.con;
7931 switch (con->tag) {
7932 case Ico_U1: return False;
7933 case Ico_U8: n = (ULong)con->Ico.U8; break;
7934 case Ico_U16: n = (ULong)con->Ico.U16; break;
7935 case Ico_U32: n = (ULong)con->Ico.U32; break;
7936 case Ico_U64: n = (ULong)con->Ico.U64; break;
7937 case Ico_F32: return False;
7938 case Ico_F64: return False;
7939 case Ico_F32i: return False;
7940 case Ico_F64i: return False;
7941 case Ico_V128: return False;
7942 case Ico_V256: return False;
7943 default: ppIRExpr(at); tl_assert(0);
7945 /* VG_(printf)("%llx\n", n); */
7946 /* Shortcuts */
7947 if (LIKELY(n <= 0x0000000000001000ULL)) return False;
7948 if (LIKELY(n >= 0xFFFFFFFFFFFFF000ULL)) return False;
7949 /* The list of bogus atoms is: */
7950 return (/*32*/ n == 0xFEFEFEFFULL
7951 /*32*/ || n == 0x80808080ULL
7952 /*32*/ || n == 0x7F7F7F7FULL
7953 /*32*/ || n == 0x7EFEFEFFULL
7954 /*32*/ || n == 0x81010100ULL
7955 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
7956 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
7957 /*64*/ || n == 0x0000000000008080ULL
7958 /*64*/ || n == 0x8080808080808080ULL
7959 /*64*/ || n == 0x0101010101010101ULL
7964 /* Does 'st' mention any of the literals identified/listed in
7965 isBogusAtom()? */
7966 static inline Bool containsBogusLiterals ( /*FLAT*/ IRStmt* st )
7968 Int i;
7969 IRExpr* e;
7970 IRDirty* d;
7971 IRCAS* cas;
7972 switch (st->tag) {
7973 case Ist_WrTmp:
7974 e = st->Ist.WrTmp.data;
7975 switch (e->tag) {
7976 case Iex_Get:
7977 case Iex_RdTmp:
7978 return False;
7979 case Iex_Const:
7980 return isBogusAtom(e);
7981 case Iex_Unop:
7982 return isBogusAtom(e->Iex.Unop.arg)
7983 || e->Iex.Unop.op == Iop_GetMSBs8x16;
7984 case Iex_GetI:
7985 return isBogusAtom(e->Iex.GetI.ix);
7986 case Iex_Binop:
7987 return isBogusAtom(e->Iex.Binop.arg1)
7988 || isBogusAtom(e->Iex.Binop.arg2);
7989 case Iex_Triop:
7990 return isBogusAtom(e->Iex.Triop.details->arg1)
7991 || isBogusAtom(e->Iex.Triop.details->arg2)
7992 || isBogusAtom(e->Iex.Triop.details->arg3);
7993 case Iex_Qop:
7994 return isBogusAtom(e->Iex.Qop.details->arg1)
7995 || isBogusAtom(e->Iex.Qop.details->arg2)
7996 || isBogusAtom(e->Iex.Qop.details->arg3)
7997 || isBogusAtom(e->Iex.Qop.details->arg4);
7998 case Iex_ITE:
7999 return isBogusAtom(e->Iex.ITE.cond)
8000 || isBogusAtom(e->Iex.ITE.iftrue)
8001 || isBogusAtom(e->Iex.ITE.iffalse);
8002 case Iex_Load:
8003 return isBogusAtom(e->Iex.Load.addr);
8004 case Iex_CCall:
8005 for (i = 0; e->Iex.CCall.args[i]; i++)
8006 if (isBogusAtom(e->Iex.CCall.args[i]))
8007 return True;
8008 return False;
8009 default:
8010 goto unhandled;
8012 case Ist_Dirty:
8013 d = st->Ist.Dirty.details;
8014 for (i = 0; d->args[i]; i++) {
8015 IRAtom* atom = d->args[i];
8016 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(atom))) {
8017 if (isBogusAtom(atom))
8018 return True;
8021 if (isBogusAtom(d->guard))
8022 return True;
8023 if (d->mAddr && isBogusAtom(d->mAddr))
8024 return True;
8025 return False;
8026 case Ist_Put:
8027 return isBogusAtom(st->Ist.Put.data);
8028 case Ist_PutI:
8029 return isBogusAtom(st->Ist.PutI.details->ix)
8030 || isBogusAtom(st->Ist.PutI.details->data);
8031 case Ist_Store:
8032 return isBogusAtom(st->Ist.Store.addr)
8033 || isBogusAtom(st->Ist.Store.data);
8034 case Ist_StoreG: {
8035 IRStoreG* sg = st->Ist.StoreG.details;
8036 return isBogusAtom(sg->addr) || isBogusAtom(sg->data)
8037 || isBogusAtom(sg->guard);
8039 case Ist_LoadG: {
8040 IRLoadG* lg = st->Ist.LoadG.details;
8041 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt)
8042 || isBogusAtom(lg->guard);
8044 case Ist_Exit:
8045 return isBogusAtom(st->Ist.Exit.guard);
8046 case Ist_AbiHint:
8047 return isBogusAtom(st->Ist.AbiHint.base)
8048 || isBogusAtom(st->Ist.AbiHint.nia);
8049 case Ist_NoOp:
8050 case Ist_IMark:
8051 case Ist_MBE:
8052 return False;
8053 case Ist_CAS:
8054 cas = st->Ist.CAS.details;
8055 return isBogusAtom(cas->addr)
8056 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
8057 || isBogusAtom(cas->expdLo)
8058 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
8059 || isBogusAtom(cas->dataLo);
8060 case Ist_LLSC:
8061 return isBogusAtom(st->Ist.LLSC.addr)
8062 || (st->Ist.LLSC.storedata
8063 ? isBogusAtom(st->Ist.LLSC.storedata)
8064 : False);
8065 default:
8066 unhandled:
8067 ppIRStmt(st);
8068 VG_(tool_panic)("hasBogusLiterals");
8073 /* This is the pre-instrumentation analysis. It does a backwards pass over
8074 the stmts in |sb_in| to determine a HowUsed value for each tmp defined in
8075 the block.
8077 Unrelatedly, it also checks all literals in the block with |isBogusAtom|,
8078 as a positive result from that is a strong indication that we need to
8079 expensively instrument add/sub in the block. We do both analyses in one
8080 pass, even though they are independent, so as to avoid the overhead of
8081 having to traverse the whole block twice.
8083 The usage pass proceeds as follows. Let max= be the max operation in the
8084 HowUsed lattice, hence
8086 X max= Y means X = max(X, Y)
8088 then
8090 for t in original tmps . useEnv[t] = HuUnU
8092 for t used in the block's . next field
8093 useEnv[t] max= HuPCa // because jmp targets are PCast-tested
8095 for st iterating *backwards* in the block
8097 match st
8099 case "t1 = load(t2)" // case 1
8100 useEnv[t2] max= HuPCa
8102 case "t1 = add(t2, t3)" // case 2
8103 useEnv[t2] max= useEnv[t1]
8104 useEnv[t3] max= useEnv[t1]
8106 other
8107 for t in st.usedTmps // case 3
8108 useEnv[t] max= HuOth
8109 // same as useEnv[t] = HuOth
8111 The general idea is that we accumulate, in useEnv[], information about
8112 how each tmp is used. That can be updated as we work further back
8113 through the block and find more uses of it, but its HowUsed value can
8114 only ascend the lattice, not descend.
8116 Initially we mark all tmps as unused. In case (1), if a tmp is seen to
8117 be used as a memory address, then its use is at least HuPCa. The point
8118 is that for a memory address we will add instrumentation to check if any
8119 bit of the address is undefined, which means that we won't need expensive
8120 V-bit propagation through an add expression that computed the address --
8121 cheap add instrumentation will be equivalent.
8123 Note in case (1) that if we have previously seen a non-memory-address use
8124 of the tmp, then its use will already be HuOth and will be unchanged by
8125 the max= operation. And if it turns out that the source of the tmp was
8126 an add, then we'll have to expensively instrument the add, because we
8127 can't prove that, for the previous non-memory-address use of the tmp,
8128 cheap and expensive instrumentation will be equivalent.
8130 In case 2, we propagate the usage-mode of the result of an add back
8131 through to its operands. Again, we use max= so as to take account of the
8132 fact that t2 or t3 might later in the block (viz, earlier in the
8133 iteration) have been used in a way that requires expensive add
8134 instrumentation.
8136 In case 3, we deal with all other tmp uses. We assume that we'll need a
8137 result that is as accurate as possible, so we max= HuOth into its use
8138 mode. Since HuOth is the top of the lattice, that's equivalent to just
8139 setting its use to HuOth.
8141 The net result of all this is that:
8143 tmps that are used either
8144 - only as a memory address, or
8145 - only as part of a tree of adds that computes a memory address,
8146 and has no other use
8147 are marked as HuPCa, and so we can instrument their generating Add
8148 nodes cheaply, which is the whole point of this analysis
8150 tmps that are used any other way at all are marked as HuOth
8152 tmps that are unused are marked as HuUnU. We don't expect to see any
8153 since we expect that the incoming IR has had all dead assignments
8154 removed by previous optimisation passes. Nevertheless the analysis is
8155 correct even in the presence of dead tmps.
8157 A final comment on dead tmps. In case 1 and case 2, we could actually
8158 conditionalise the updates thusly:
8160 if (useEnv[t1] > HuUnU) { useEnv[t2] max= HuPCa } // case 1
8162 if (useEnv[t1] > HuUnU) { useEnv[t2] max= useEnv[t1] } // case 2
8163 if (useEnv[t1] > HuUnU) { useEnv[t3] max= useEnv[t1] } // case 2
8165 In other words, if the assigned-to tmp |t1| is never used, then there's
8166 no point in propagating any use through to its operands. That won't
8167 change the final HuPCa-vs-HuOth results, which is what we care about.
8168 Given that we expect to get dead-code-free inputs, there's no point in
8169 adding this extra refinement.
8172 /* Helper for |preInstrumentationAnalysis|. */
8173 static inline void noteTmpUsesIn ( /*MOD*/HowUsed* useEnv,
8174 UInt tyenvUsed,
8175 HowUsed newUse, IRAtom* at )
8177 /* For the atom |at|, declare that for any tmp |t| in |at|, we will have
8178 seen a use of |newUse|. So, merge that info into |t|'s accumulated
8179 use info. */
8180 switch (at->tag) {
8181 case Iex_GSPTR:
8182 case Iex_VECRET:
8183 case Iex_Const:
8184 return;
8185 case Iex_RdTmp: {
8186 IRTemp t = at->Iex.RdTmp.tmp;
8187 tl_assert(t < tyenvUsed); // "is an original tmp"
8188 // The "max" operation in the lattice
8189 if (newUse > useEnv[t]) useEnv[t] = newUse;
8190 return;
8192 default:
8193 // We should never get here -- it implies non-flat IR
8194 ppIRExpr(at);
8195 VG_(tool_panic)("noteTmpUsesIn");
8197 /*NOTREACHED*/
8198 tl_assert(0);
8202 static void preInstrumentationAnalysis ( /*OUT*/HowUsed** useEnvP,
8203 /*OUT*/Bool* hasBogusLiteralsP,
8204 const IRSB* sb_in )
8206 const UInt nOrigTmps = (UInt)sb_in->tyenv->types_used;
8208 // We've seen no bogus literals so far.
8209 Bool bogus = False;
8211 // This is calloc'd, so implicitly all entries are initialised to HuUnU.
8212 HowUsed* useEnv = VG_(calloc)("mc.preInstrumentationAnalysis.1",
8213 nOrigTmps, sizeof(HowUsed));
8215 // Firstly, roll in contributions from the final dst address.
8216 bogus = isBogusAtom(sb_in->next);
8217 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, sb_in->next);
8219 // Now work backwards through the stmts.
8220 for (Int i = sb_in->stmts_used-1; i >= 0; i--) {
8221 IRStmt* st = sb_in->stmts[i];
8223 // Deal with literals.
8224 if (LIKELY(!bogus)) {
8225 bogus = containsBogusLiterals(st);
8228 // Deal with tmp uses.
8229 switch (st->tag) {
8230 case Ist_WrTmp: {
8231 IRTemp dst = st->Ist.WrTmp.tmp;
8232 IRExpr* rhs = st->Ist.WrTmp.data;
8233 // This is the one place where we have to consider all possible
8234 // tags for |rhs|, and can't just assume it is a tmp or a const.
8235 switch (rhs->tag) {
8236 case Iex_RdTmp:
8237 // just propagate demand for |dst| into this tmp use.
8238 noteTmpUsesIn(useEnv, nOrigTmps, useEnv[dst], rhs);
8239 break;
8240 case Iex_Unop:
8241 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, rhs->Iex.Unop.arg);
8242 break;
8243 case Iex_Binop:
8244 if (rhs->Iex.Binop.op == Iop_Add64
8245 || rhs->Iex.Binop.op == Iop_Add32) {
8246 // propagate demand for |dst| through to the operands.
8247 noteTmpUsesIn(useEnv, nOrigTmps,
8248 useEnv[dst], rhs->Iex.Binop.arg1);
8249 noteTmpUsesIn(useEnv, nOrigTmps,
8250 useEnv[dst], rhs->Iex.Binop.arg2);
8251 } else {
8252 // just say that the operands are used in some unknown way.
8253 noteTmpUsesIn(useEnv, nOrigTmps,
8254 HuOth, rhs->Iex.Binop.arg1);
8255 noteTmpUsesIn(useEnv, nOrigTmps,
8256 HuOth, rhs->Iex.Binop.arg2);
8258 break;
8259 case Iex_Triop: {
8260 // All operands are used in some unknown way.
8261 IRTriop* tri = rhs->Iex.Triop.details;
8262 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, tri->arg1);
8263 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, tri->arg2);
8264 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, tri->arg3);
8265 break;
8267 case Iex_Qop: {
8268 // All operands are used in some unknown way.
8269 IRQop* qop = rhs->Iex.Qop.details;
8270 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, qop->arg1);
8271 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, qop->arg2);
8272 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, qop->arg3);
8273 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, qop->arg4);
8274 break;
8276 case Iex_Load:
8277 // The address will be checked (== PCasted).
8278 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, rhs->Iex.Load.addr);
8279 break;
8280 case Iex_ITE:
8281 // The condition is PCasted, the then- and else-values
8282 // aren't.
8283 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, rhs->Iex.ITE.cond);
8284 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, rhs->Iex.ITE.iftrue);
8285 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, rhs->Iex.ITE.iffalse);
8286 break;
8287 case Iex_CCall:
8288 // The args are used in unknown ways.
8289 for (IRExpr** args = rhs->Iex.CCall.args; *args; args++) {
8290 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, *args);
8292 break;
8293 case Iex_GetI: {
8294 // The index will be checked/PCasted (see do_shadow_GETI)
8295 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, rhs->Iex.GetI.ix);
8296 break;
8298 case Iex_Const:
8299 case Iex_Get:
8300 break;
8301 default:
8302 ppIRExpr(rhs);
8303 VG_(tool_panic)("preInstrumentationAnalysis:"
8304 " unhandled IRExpr");
8306 break;
8308 case Ist_Store:
8309 // The address will be checked (== PCasted). The data will be
8310 // used in some unknown way.
8311 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, st->Ist.Store.addr);
8312 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.Store.data);
8313 break;
8314 case Ist_Exit:
8315 // The guard will be checked (== PCasted)
8316 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, st->Ist.Exit.guard);
8317 break;
8318 case Ist_Put:
8319 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.Put.data);
8320 break;
8321 case Ist_PutI: {
8322 IRPutI* putI = st->Ist.PutI.details;
8323 // The index will be checked/PCasted (see do_shadow_PUTI). The
8324 // data will be used in an unknown way.
8325 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, putI->ix);
8326 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, putI->data);
8327 break;
8329 case Ist_Dirty: {
8330 IRDirty* d = st->Ist.Dirty.details;
8331 // The guard will be checked (== PCasted)
8332 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, d->guard);
8333 // The args will be used in unknown ways.
8334 for (IRExpr** args = d->args; *args; args++) {
8335 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, *args);
8337 break;
8339 case Ist_CAS: {
8340 IRCAS* cas = st->Ist.CAS.details;
8341 // Address will be pcasted, everything else used as unknown
8342 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, cas->addr);
8343 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, cas->expdLo);
8344 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, cas->dataLo);
8345 if (cas->expdHi)
8346 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, cas->expdHi);
8347 if (cas->dataHi)
8348 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, cas->dataHi);
8349 break;
8351 case Ist_AbiHint:
8352 // Both exprs are used in unknown ways. TODO: can we safely
8353 // just ignore AbiHints?
8354 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.AbiHint.base);
8355 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.AbiHint.nia);
8356 break;
8357 case Ist_StoreG: {
8358 // We might be able to do better, and use HuPCa for the addr.
8359 // It's not immediately obvious that we can, because the address
8360 // is regarded as "used" only when the guard is true.
8361 IRStoreG* sg = st->Ist.StoreG.details;
8362 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, sg->addr);
8363 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, sg->data);
8364 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, sg->guard);
8365 break;
8367 case Ist_LoadG: {
8368 // Per similar comments to Ist_StoreG .. not sure whether this
8369 // is really optimal.
8370 IRLoadG* lg = st->Ist.LoadG.details;
8371 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, lg->addr);
8372 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, lg->alt);
8373 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, lg->guard);
8374 break;
8376 case Ist_LLSC: {
8377 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, st->Ist.LLSC.addr);
8378 if (st->Ist.LLSC.storedata)
8379 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.LLSC.storedata);
8380 break;
8382 case Ist_MBE:
8383 case Ist_IMark:
8384 case Ist_NoOp:
8385 break;
8386 default: {
8387 ppIRStmt(st);
8388 VG_(tool_panic)("preInstrumentationAnalysis: unhandled IRStmt");
8391 } // Now work backwards through the stmts.
8393 // Return the computed use env and the bogus-atom flag.
8394 tl_assert(*useEnvP == NULL);
8395 *useEnvP = useEnv;
8397 tl_assert(*hasBogusLiteralsP == False);
8398 *hasBogusLiteralsP = bogus;
8402 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
8403 IRSB* sb_in,
8404 const VexGuestLayout* layout,
8405 const VexGuestExtents* vge,
8406 const VexArchInfo* archinfo_host,
8407 IRType gWordTy, IRType hWordTy )
8409 Bool verboze = 0||False;
8410 Int i, j, first_stmt;
8411 IRStmt* st;
8412 MCEnv mce;
8413 IRSB* sb_out;
8415 if (gWordTy != hWordTy) {
8416 /* We don't currently support this case. */
8417 VG_(tool_panic)("host/guest word size mismatch");
8420 /* Check we're not completely nuts */
8421 tl_assert(sizeof(UWord) == sizeof(void*));
8422 tl_assert(sizeof(Word) == sizeof(void*));
8423 tl_assert(sizeof(Addr) == sizeof(void*));
8424 tl_assert(sizeof(ULong) == 8);
8425 tl_assert(sizeof(Long) == 8);
8426 tl_assert(sizeof(UInt) == 4);
8427 tl_assert(sizeof(Int) == 4);
8429 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
8431 /* Set up SB */
8432 sb_out = deepCopyIRSBExceptStmts(sb_in);
8434 /* Set up the running environment. Both .sb and .tmpMap are
8435 modified as we go along. Note that tmps are added to both
8436 .sb->tyenv and .tmpMap together, so the valid index-set for
8437 those two arrays should always be identical. */
8438 VG_(memset)(&mce, 0, sizeof(mce));
8439 mce.sb = sb_out;
8440 mce.trace = verboze;
8441 mce.layout = layout;
8442 mce.hWordTy = hWordTy;
8443 mce.tmpHowUsed = NULL;
8445 /* BEGIN decide on expense levels for instrumentation. */
8447 /* Initially, select the cheap version of everything for which we have an
8448 option. */
8449 DetailLevelByOp__set_all( &mce.dlbo, DLcheap );
8451 /* Take account of the --expensive-definedness-checks= flag. */
8452 if (MC_(clo_expensive_definedness_checks) == EdcNO) {
8453 /* We just selected 'cheap for everything', so we don't need to do
8454 anything here. mce.tmpHowUsed remains NULL. */
8456 else if (MC_(clo_expensive_definedness_checks) == EdcYES) {
8457 /* Select 'expensive for everything'. mce.tmpHowUsed remains NULL. */
8458 DetailLevelByOp__set_all( &mce.dlbo, DLexpensive );
8460 else {
8461 tl_assert(MC_(clo_expensive_definedness_checks) == EdcAUTO);
8462 /* We'll make our own selection, based on known per-target constraints
8463 and also on analysis of the block to be instrumented. First, set
8464 up default values for detail levels.
8466 On x86 and amd64, we'll routinely encounter code optimised by LLVM
8467 5 and above. Enable accurate interpretation of the following.
8468 LLVM uses adds for some bitfield inserts, and we get a lot of false
8469 errors if the cheap interpretation is used, alas. Could solve this
8470 much better if we knew which of such adds came from x86/amd64 LEA
8471 instructions, since these are the only ones really needing the
8472 expensive interpretation, but that would require some way to tag
8473 them in the _toIR.c front ends, which is a lot of faffing around.
8474 So for now we use preInstrumentationAnalysis() to detect adds which
8475 are used only to construct memory addresses, which is an
8476 approximation to the above, and is self-contained.*/
8477 # if defined(VGA_x86)
8478 mce.dlbo.dl_Add32 = DLauto;
8479 mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive;
8480 # elif defined(VGA_amd64)
8481 mce.dlbo.dl_Add64 = DLauto;
8482 mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive;
8483 mce.dlbo.dl_CmpEQ64_CmpNE64 = DLexpensive;
8484 # elif defined(VGA_ppc64le)
8485 // Needed by (at least) set_AV_CR6() in the front end.
8486 mce.dlbo.dl_CmpEQ64_CmpNE64 = DLexpensive;
8487 # endif
8489 /* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then
8490 fill it in. */
8491 Bool hasBogusLiterals = False;
8492 preInstrumentationAnalysis( &mce.tmpHowUsed, &hasBogusLiterals, sb_in );
8494 if (hasBogusLiterals) {
8495 /* This happens very rarely. In this case just select expensive
8496 for everything, and throw away the tmp-use analysis results. */
8497 DetailLevelByOp__set_all( &mce.dlbo, DLexpensive );
8498 VG_(free)( mce.tmpHowUsed );
8499 mce.tmpHowUsed = NULL;
8500 } else {
8501 /* Nothing. mce.tmpHowUsed contains tmp-use analysis results,
8502 which will be used for some subset of Iop_{Add,Sub}{32,64},
8503 based on which ones are set to DLauto for this target. */
8507 DetailLevelByOp__check_sanity( &mce.dlbo );
8509 if (0) {
8510 // Debug printing: which tmps have been identified as PCast-only use
8511 if (mce.tmpHowUsed) {
8512 VG_(printf)("Cheapies: ");
8513 for (UInt q = 0; q < sb_in->tyenv->types_used; q++) {
8514 if (mce.tmpHowUsed[q] == HuPCa) {
8515 VG_(printf)("t%u ", q);
8518 VG_(printf)("\n");
8521 // Debug printing: number of ops by detail level
8522 UChar nCheap = DetailLevelByOp__count( &mce.dlbo, DLcheap );
8523 UChar nAuto = DetailLevelByOp__count( &mce.dlbo, DLauto );
8524 UChar nExpensive = DetailLevelByOp__count( &mce.dlbo, DLexpensive );
8525 tl_assert(nCheap + nAuto + nExpensive == 8);
8527 VG_(printf)("%u,%u,%u ", nCheap, nAuto, nExpensive);
8529 /* END decide on expense levels for instrumentation. */
8531 /* Initialise the running the tmp environment. */
8533 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
8534 sizeof(TempMapEnt));
8535 VG_(hintSizeXA) (mce.tmpMap, sb_in->tyenv->types_used);
8536 for (i = 0; i < sb_in->tyenv->types_used; i++) {
8537 TempMapEnt ent;
8538 ent.kind = Orig;
8539 ent.shadowV = IRTemp_INVALID;
8540 ent.shadowB = IRTemp_INVALID;
8541 VG_(addToXA)( mce.tmpMap, &ent );
8543 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
8545 /* Finally, begin instrumentation. */
8546 /* Copy verbatim any IR preamble preceding the first IMark */
8548 tl_assert(mce.sb == sb_out);
8549 tl_assert(mce.sb != sb_in);
8551 i = 0;
8552 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
8554 st = sb_in->stmts[i];
8555 tl_assert(st);
8556 tl_assert(isFlatIRStmt(st));
8558 stmt( 'C', &mce, sb_in->stmts[i] );
8559 i++;
8562 /* Nasty problem. IR optimisation of the pre-instrumented IR may
8563 cause the IR following the preamble to contain references to IR
8564 temporaries defined in the preamble. Because the preamble isn't
8565 instrumented, these temporaries don't have any shadows.
8566 Nevertheless uses of them following the preamble will cause
8567 memcheck to generate references to their shadows. End effect is
8568 to cause IR sanity check failures, due to references to
8569 non-existent shadows. This is only evident for the complex
8570 preambles used for function wrapping on TOC-afflicted platforms
8571 (ppc64-linux).
8573 The following loop therefore scans the preamble looking for
8574 assignments to temporaries. For each one found it creates an
8575 assignment to the corresponding (V) shadow temp, marking it as
8576 'defined'. This is the same resulting IR as if the main
8577 instrumentation loop before had been applied to the statement
8578 'tmp = CONSTANT'.
8580 Similarly, if origin tracking is enabled, we must generate an
8581 assignment for the corresponding origin (B) shadow, claiming
8582 no-origin, as appropriate for a defined value.
8584 for (j = 0; j < i; j++) {
8585 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
8586 /* findShadowTmpV checks its arg is an original tmp;
8587 no need to assert that here. */
8588 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
8589 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
8590 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
8591 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
8592 if (MC_(clo_mc_level) == 3) {
8593 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
8594 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
8595 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
8597 if (0) {
8598 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
8599 ppIRType( ty_v );
8600 VG_(printf)("\n");
8605 /* Iterate over the remaining stmts to generate instrumentation. */
8607 tl_assert(sb_in->stmts_used > 0);
8608 tl_assert(i >= 0);
8609 tl_assert(i < sb_in->stmts_used);
8610 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
8612 for (/* use current i*/; i < sb_in->stmts_used; i++) {
8614 st = sb_in->stmts[i];
8615 first_stmt = sb_out->stmts_used;
8617 if (verboze) {
8618 VG_(printf)("\n");
8619 ppIRStmt(st);
8620 VG_(printf)("\n");
8623 if (MC_(clo_mc_level) == 3) {
8624 /* See comments on case Ist_CAS below. */
8625 if (st->tag != Ist_CAS)
8626 schemeS( &mce, st );
8629 /* Generate instrumentation code for each stmt ... */
8631 switch (st->tag) {
8633 case Ist_WrTmp: {
8634 IRTemp dst = st->Ist.WrTmp.tmp;
8635 tl_assert(dst < (UInt)sb_in->tyenv->types_used);
8636 HowUsed hu = mce.tmpHowUsed ? mce.tmpHowUsed[dst]
8637 : HuOth/*we don't know, so play safe*/;
8638 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
8639 expr2vbits( &mce, st->Ist.WrTmp.data, hu ));
8640 break;
8643 case Ist_Put:
8644 do_shadow_PUT( &mce,
8645 st->Ist.Put.offset,
8646 st->Ist.Put.data,
8647 NULL /* shadow atom */, NULL /* guard */ );
8648 break;
8650 case Ist_PutI:
8651 do_shadow_PUTI( &mce, st->Ist.PutI.details);
8652 break;
8654 case Ist_Store:
8655 do_shadow_Store( &mce, st->Ist.Store.end,
8656 st->Ist.Store.addr, 0/* addr bias */,
8657 st->Ist.Store.data,
8658 NULL /* shadow data */,
8659 NULL/*guard*/ );
8660 break;
8662 case Ist_StoreG:
8663 do_shadow_StoreG( &mce, st->Ist.StoreG.details );
8664 break;
8666 case Ist_LoadG:
8667 do_shadow_LoadG( &mce, st->Ist.LoadG.details );
8668 break;
8670 case Ist_Exit:
8671 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
8672 break;
8674 case Ist_IMark:
8675 break;
8677 case Ist_NoOp:
8678 case Ist_MBE:
8679 break;
8681 case Ist_Dirty:
8682 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
8683 break;
8685 case Ist_AbiHint:
8686 do_AbiHint( &mce, st->Ist.AbiHint.base,
8687 st->Ist.AbiHint.len,
8688 st->Ist.AbiHint.nia );
8689 break;
8691 case Ist_CAS:
8692 do_shadow_CAS( &mce, st->Ist.CAS.details );
8693 /* Note, do_shadow_CAS copies the CAS itself to the output
8694 block, because it needs to add instrumentation both
8695 before and after it. Hence skip the copy below. Also
8696 skip the origin-tracking stuff (call to schemeS) above,
8697 since that's all tangled up with it too; do_shadow_CAS
8698 does it all. */
8699 break;
8701 case Ist_LLSC:
8702 do_shadow_LLSC( &mce,
8703 st->Ist.LLSC.end,
8704 st->Ist.LLSC.result,
8705 st->Ist.LLSC.addr,
8706 st->Ist.LLSC.storedata );
8707 break;
8709 default:
8710 VG_(printf)("\n");
8711 ppIRStmt(st);
8712 VG_(printf)("\n");
8713 VG_(tool_panic)("memcheck: unhandled IRStmt");
8715 } /* switch (st->tag) */
8717 if (0 && verboze) {
8718 for (j = first_stmt; j < sb_out->stmts_used; j++) {
8719 VG_(printf)(" ");
8720 ppIRStmt(sb_out->stmts[j]);
8721 VG_(printf)("\n");
8723 VG_(printf)("\n");
8726 /* ... and finally copy the stmt itself to the output. Except,
8727 skip the copy of IRCASs; see comments on case Ist_CAS
8728 above. */
8729 if (st->tag != Ist_CAS)
8730 stmt('C', &mce, st);
8733 /* Now we need to complain if the jump target is undefined. */
8734 first_stmt = sb_out->stmts_used;
8736 if (verboze) {
8737 VG_(printf)("sb_in->next = ");
8738 ppIRExpr(sb_in->next);
8739 VG_(printf)("\n\n");
8742 complainIfUndefined( &mce, sb_in->next, NULL );
8744 if (0 && verboze) {
8745 for (j = first_stmt; j < sb_out->stmts_used; j++) {
8746 VG_(printf)(" ");
8747 ppIRStmt(sb_out->stmts[j]);
8748 VG_(printf)("\n");
8750 VG_(printf)("\n");
8753 /* If this fails, there's been some serious snafu with tmp management,
8754 that should be investigated. */
8755 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
8756 VG_(deleteXA)( mce.tmpMap );
8758 if (mce.tmpHowUsed) {
8759 VG_(free)( mce.tmpHowUsed );
8762 tl_assert(mce.sb == sb_out);
8763 return sb_out;
8767 /*--------------------------------------------------------------------*/
8768 /*--- end mc_translate.c ---*/
8769 /*--------------------------------------------------------------------*/