1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*---------------------------------------------------------------*/
4 /*--- begin ir_opt.c ---*/
5 /*---------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2004-2017 OpenWorks LLP
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
31 Neither the names of the U.S. Department of Energy nor the
32 University of California nor the names of its contributors may be
33 used to endorse or promote products derived from this software
34 without prior written permission.
37 #include "libvex_basictypes.h"
38 #include "libvex_ir.h"
41 #include "main_util.h"
42 #include "main_globals.h"
46 /* Set to 1 for lots of debugging output. */
49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
53 /* What iropt does, 29 Dec 04.
55 It takes an IRSB and produces a new one with the same meaning,
58 After execution of the new BB, all guest state and guest memory is
59 the same as after execution of the original. This is true
60 regardless of how the block was exited (at the end vs side exit).
62 In addition, parts of the guest state will be identical to that
63 created by execution of the original at the following observation
66 * In a dirty helper call, any parts of the guest state that the
67 helper states that it reads or modifies will be up to date.
68 Also, guest memory will be up to date. Parts of the guest state
69 not marked as being read or modified by the helper cannot be
70 assumed to be up-to-date at the point where the helper is called.
72 * If iropt_register_updates == VexRegUpdSpAtMemAccess :
73 The guest state is only up to date only as explained above
74 (i.e. at SB exits and as specified by dirty helper call).
75 Also, the stack pointer register is up to date at memory
76 exception points (as this is needed for the stack extension
77 logic in m_signals.c).
79 * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
80 Immediately prior to any load or store, those parts of the guest
81 state marked as requiring precise exceptions will be up to date.
82 Also, guest memory will be up to date. Parts of the guest state
83 not marked as requiring precise exceptions cannot be assumed to
84 be up-to-date at the point of the load/store.
86 * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
87 Same as minimal, but all the guest state is up to date at memory
90 * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
91 Guest state is up to date at each instruction.
93 The relative order of loads and stores (including loads/stores of
94 guest memory done by dirty helpers annotated as such) is not
95 changed. However, the relative order of loads with no intervening
96 stores/modifies may be changed.
101 There are three levels of optimisation, controlled by
102 vex_control.iropt_level. Define first:
104 "Cheap transformations" are the following sequence:
105 * Redundant-Get removal
106 * Redundant-Put removal
107 * Constant propagation/folding
109 * Specialisation of clean helper functions
112 "Expensive transformations" are the following sequence:
114 * Folding of add/sub chains
115 * Redundant-GetI removal
116 * Redundant-PutI removal
119 Then the transformations are as follows, as defined by
120 vex_control.iropt_level:
123 * Flatten into atomic form.
125 Level 1: the following sequence:
126 * Flatten into atomic form.
127 * Cheap transformations.
129 Level 2: the following sequence
130 * Flatten into atomic form.
131 * Cheap transformations.
132 * If block contains any floating or vector types, CSE.
133 * If block contains GetI or PutI, Expensive transformations.
134 * Try unrolling loops. Three possible outcomes:
135 - No effect: do nothing more.
136 - Unrolled a loop, and block does not contain GetI or PutI:
139 - Unrolled a loop, and block contains GetI or PutI:
140 Do: * Expensive transformations
141 * Cheap transformations
144 /* Implementation notes, 29 Dec 04.
146 TODO (important): I think rPutI removal ignores precise exceptions
147 and is therefore in a sense, wrong. In the sense that PutIs are
148 assumed not to write parts of the guest state that we need to have
149 up-to-date at loads/stores. So far on x86 guest that has not
150 mattered since indeed only the x87 FP registers and tags are
151 accessed using GetI/PutI, and there is no need so far for them to
152 be up to date at mem exception points. The rPutI pass should be
155 TODO: improve pessimistic handling of precise exceptions
158 TODO: check interaction of rGetI and dirty helpers.
160 F64i constants are treated differently from other constants.
161 They are not regarded as atoms, and instead lifted off and
162 bound to temps. This allows them to participate in CSE, which
163 is important for getting good performance for x86 guest code.
165 CSE up F64 literals (already doing F64is)
167 CSE: consider carefully the requirement for precise exns
168 prior to making CSE any more aggressive. */
171 /*---------------------------------------------------------------*/
172 /*--- Finite mappery, of a sort ---*/
173 /*---------------------------------------------------------------*/
175 /* General map from HWord-sized thing HWord-sized thing. Could be by
176 hashing, but it's not clear whether or not this would really be any
189 static HashHW
* newHHW ( void )
191 HashHW
* h
= LibVEX_Alloc_inline(sizeof(HashHW
));
194 h
->inuse
= LibVEX_Alloc_inline(h
->size
* sizeof(Bool
));
195 h
->key
= LibVEX_Alloc_inline(h
->size
* sizeof(HWord
));
196 h
->val
= LibVEX_Alloc_inline(h
->size
* sizeof(HWord
));
201 /* Look up key in the map. */
203 static Bool
lookupHHW ( HashHW
* h
, /*OUT*/HWord
* val
, HWord key
)
206 /* vex_printf("lookupHHW(%llx)\n", key ); */
207 for (i
= 0; i
< h
->used
; i
++) {
208 if (h
->inuse
[i
] && h
->key
[i
] == key
) {
218 /* Add key->val to the map. Replaces any existing binding for key. */
220 static void addToHHW ( HashHW
* h
, HWord key
, HWord val
)
223 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
225 /* Find and replace existing binding, if any. */
226 for (i
= 0; i
< h
->used
; i
++) {
227 if (h
->inuse
[i
] && h
->key
[i
] == key
) {
233 /* Ensure a space is available. */
234 if (h
->used
== h
->size
) {
235 /* Copy into arrays twice the size. */
236 Bool
* inuse2
= LibVEX_Alloc_inline(2 * h
->size
* sizeof(Bool
));
237 HWord
* key2
= LibVEX_Alloc_inline(2 * h
->size
* sizeof(HWord
));
238 HWord
* val2
= LibVEX_Alloc_inline(2 * h
->size
* sizeof(HWord
));
239 for (i
= j
= 0; i
< h
->size
; i
++) {
240 if (!h
->inuse
[i
]) continue;
253 /* Finally, add it. */
254 vassert(h
->used
< h
->size
);
255 h
->inuse
[h
->used
] = True
;
256 h
->key
[h
->used
] = key
;
257 h
->val
[h
->used
] = val
;
262 /*---------------------------------------------------------------*/
263 /*--- Flattening out a BB into atomic SSA form ---*/
264 /*---------------------------------------------------------------*/
266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
267 copies made by flattening. If in doubt return False. */
269 static Bool
isFlat ( IRExpr
* e
)
271 if (e
->tag
== Iex_Get
)
273 if (e
->tag
== Iex_Binop
)
274 return toBool( isIRAtom(e
->Iex
.Binop
.arg1
)
275 && isIRAtom(e
->Iex
.Binop
.arg2
) );
276 if (e
->tag
== Iex_Load
)
277 return isIRAtom(e
->Iex
.Load
.addr
);
281 /* Flatten out 'ex' so it is atomic, returning a new expression with
282 the same value, after having appended extra IRTemp assignments to
285 static IRExpr
* flatten_Expr ( IRSB
* bb
, IRExpr
* ex
)
289 IRType ty
= typeOfIRExpr(bb
->tyenv
, ex
);
295 t1
= newIRTemp(bb
->tyenv
, ty
);
296 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
297 IRExpr_GetI(ex
->Iex
.GetI
.descr
,
298 flatten_Expr(bb
, ex
->Iex
.GetI
.ix
),
299 ex
->Iex
.GetI
.bias
)));
300 return IRExpr_RdTmp(t1
);
303 t1
= newIRTemp(bb
->tyenv
, ty
);
305 IRStmt_WrTmp(t1
, ex
));
306 return IRExpr_RdTmp(t1
);
309 IRQop
* qop
= ex
->Iex
.Qop
.details
;
310 t1
= newIRTemp(bb
->tyenv
, ty
);
311 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
313 flatten_Expr(bb
, qop
->arg1
),
314 flatten_Expr(bb
, qop
->arg2
),
315 flatten_Expr(bb
, qop
->arg3
),
316 flatten_Expr(bb
, qop
->arg4
))));
317 return IRExpr_RdTmp(t1
);
321 IRTriop
* triop
= ex
->Iex
.Triop
.details
;
322 t1
= newIRTemp(bb
->tyenv
, ty
);
323 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
324 IRExpr_Triop(triop
->op
,
325 flatten_Expr(bb
, triop
->arg1
),
326 flatten_Expr(bb
, triop
->arg2
),
327 flatten_Expr(bb
, triop
->arg3
))));
328 return IRExpr_RdTmp(t1
);
332 t1
= newIRTemp(bb
->tyenv
, ty
);
333 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
334 IRExpr_Binop(ex
->Iex
.Binop
.op
,
335 flatten_Expr(bb
, ex
->Iex
.Binop
.arg1
),
336 flatten_Expr(bb
, ex
->Iex
.Binop
.arg2
))));
337 return IRExpr_RdTmp(t1
);
340 t1
= newIRTemp(bb
->tyenv
, ty
);
341 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
342 IRExpr_Unop(ex
->Iex
.Unop
.op
,
343 flatten_Expr(bb
, ex
->Iex
.Unop
.arg
))));
344 return IRExpr_RdTmp(t1
);
347 t1
= newIRTemp(bb
->tyenv
, ty
);
348 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
349 IRExpr_Load(ex
->Iex
.Load
.end
,
351 flatten_Expr(bb
, ex
->Iex
.Load
.addr
))));
352 return IRExpr_RdTmp(t1
);
355 newargs
= shallowCopyIRExprVec(ex
->Iex
.CCall
.args
);
356 for (i
= 0; newargs
[i
]; i
++)
357 newargs
[i
] = flatten_Expr(bb
, newargs
[i
]);
358 t1
= newIRTemp(bb
->tyenv
, ty
);
359 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
360 IRExpr_CCall(ex
->Iex
.CCall
.cee
,
363 return IRExpr_RdTmp(t1
);
366 t1
= newIRTemp(bb
->tyenv
, ty
);
367 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
368 IRExpr_ITE(flatten_Expr(bb
, ex
->Iex
.ITE
.cond
),
369 flatten_Expr(bb
, ex
->Iex
.ITE
.iftrue
),
370 flatten_Expr(bb
, ex
->Iex
.ITE
.iffalse
))));
371 return IRExpr_RdTmp(t1
);
374 /* Lift F64i constants out onto temps so they can be CSEd
376 if (ex
->Iex
.Const
.con
->tag
== Ico_F64i
) {
377 t1
= newIRTemp(bb
->tyenv
, ty
);
378 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
379 IRExpr_Const(ex
->Iex
.Const
.con
)));
380 return IRExpr_RdTmp(t1
);
382 /* Leave all other constants alone. */
393 vpanic("flatten_Expr");
398 /* Append a completely flattened form of 'st' to the end of 'bb'. */
400 static void flatten_Stmt ( IRSB
* bb
, IRStmt
* st
)
403 IRExpr
*e1
, *e2
, *e3
, *e4
, *e5
;
406 IRPutI
*puti
, *puti2
;
411 if (isIRAtom(st
->Ist
.Put
.data
)) {
412 /* optimisation to reduce the amount of heap wasted
414 addStmtToIRSB(bb
, st
);
416 /* general case, always correct */
417 e1
= flatten_Expr(bb
, st
->Ist
.Put
.data
);
418 addStmtToIRSB(bb
, IRStmt_Put(st
->Ist
.Put
.offset
, e1
));
422 puti
= st
->Ist
.PutI
.details
;
423 e1
= flatten_Expr(bb
, puti
->ix
);
424 e2
= flatten_Expr(bb
, puti
->data
);
425 puti2
= mkIRPutI(puti
->descr
, e1
, puti
->bias
, e2
);
426 addStmtToIRSB(bb
, IRStmt_PutI(puti2
));
429 if (isFlat(st
->Ist
.WrTmp
.data
)) {
430 /* optimisation, to reduce the number of tmp-tmp
432 addStmtToIRSB(bb
, st
);
434 /* general case, always correct */
435 e1
= flatten_Expr(bb
, st
->Ist
.WrTmp
.data
);
436 addStmtToIRSB(bb
, IRStmt_WrTmp(st
->Ist
.WrTmp
.tmp
, e1
));
440 e1
= flatten_Expr(bb
, st
->Ist
.Store
.addr
);
441 e2
= flatten_Expr(bb
, st
->Ist
.Store
.data
);
442 addStmtToIRSB(bb
, IRStmt_Store(st
->Ist
.Store
.end
, e1
,e2
));
445 sg
= st
->Ist
.StoreG
.details
;
446 e1
= flatten_Expr(bb
, sg
->addr
);
447 e2
= flatten_Expr(bb
, sg
->data
);
448 e3
= flatten_Expr(bb
, sg
->guard
);
449 addStmtToIRSB(bb
, IRStmt_StoreG(sg
->end
, e1
, e2
, e3
));
452 lg
= st
->Ist
.LoadG
.details
;
453 e1
= flatten_Expr(bb
, lg
->addr
);
454 e2
= flatten_Expr(bb
, lg
->alt
);
455 e3
= flatten_Expr(bb
, lg
->guard
);
456 addStmtToIRSB(bb
, IRStmt_LoadG(lg
->end
, lg
->cvt
, lg
->dst
,
460 cas
= st
->Ist
.CAS
.details
;
461 e1
= flatten_Expr(bb
, cas
->addr
);
462 e2
= cas
->expdHi
? flatten_Expr(bb
, cas
->expdHi
) : NULL
;
463 e3
= flatten_Expr(bb
, cas
->expdLo
);
464 e4
= cas
->dataHi
? flatten_Expr(bb
, cas
->dataHi
) : NULL
;
465 e5
= flatten_Expr(bb
, cas
->dataLo
);
466 cas2
= mkIRCAS( cas
->oldHi
, cas
->oldLo
, cas
->end
,
467 e1
, e2
, e3
, e4
, e5
);
468 addStmtToIRSB(bb
, IRStmt_CAS(cas2
));
471 e1
= flatten_Expr(bb
, st
->Ist
.LLSC
.addr
);
472 e2
= st
->Ist
.LLSC
.storedata
473 ? flatten_Expr(bb
, st
->Ist
.LLSC
.storedata
)
475 addStmtToIRSB(bb
, IRStmt_LLSC(st
->Ist
.LLSC
.end
,
476 st
->Ist
.LLSC
.result
, e1
, e2
));
479 d
= st
->Ist
.Dirty
.details
;
482 d2
->args
= shallowCopyIRExprVec(d2
->args
);
483 if (d2
->mFx
!= Ifx_None
) {
484 d2
->mAddr
= flatten_Expr(bb
, d2
->mAddr
);
486 vassert(d2
->mAddr
== NULL
);
488 d2
->guard
= flatten_Expr(bb
, d2
->guard
);
489 for (i
= 0; d2
->args
[i
]; i
++) {
490 IRExpr
* arg
= d2
->args
[i
];
491 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
492 d2
->args
[i
] = flatten_Expr(bb
, arg
);
494 addStmtToIRSB(bb
, IRStmt_Dirty(d2
));
499 addStmtToIRSB(bb
, st
);
502 e1
= flatten_Expr(bb
, st
->Ist
.AbiHint
.base
);
503 e2
= flatten_Expr(bb
, st
->Ist
.AbiHint
.nia
);
504 addStmtToIRSB(bb
, IRStmt_AbiHint(e1
, st
->Ist
.AbiHint
.len
, e2
));
507 e1
= flatten_Expr(bb
, st
->Ist
.Exit
.guard
);
508 addStmtToIRSB(bb
, IRStmt_Exit(e1
, st
->Ist
.Exit
.jk
,
510 st
->Ist
.Exit
.offsIP
));
516 vpanic("flatten_Stmt");
521 static IRSB
* flatten_BB ( IRSB
* in
)
526 out
->tyenv
= deepCopyIRTypeEnv( in
->tyenv
);
527 for (i
= 0; i
< in
->stmts_used
; i
++)
529 flatten_Stmt( out
, in
->stmts
[i
] );
530 out
->next
= flatten_Expr( out
, in
->next
);
531 out
->jumpkind
= in
->jumpkind
;
532 out
->offsIP
= in
->offsIP
;
537 /*---------------------------------------------------------------*/
538 /*--- In-place removal of redundant GETs ---*/
539 /*---------------------------------------------------------------*/
541 /* Scan forwards, building up an environment binding (min offset, max
542 offset) pairs to values, which will either be temps or constants.
544 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
545 env and if it matches, replace the Get with the stored value. If
546 there is no match, add a (minoff,maxoff) :-> t binding.
548 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
549 any binding which fully or partially overlaps with (minoff,maxoff).
550 Then add a new (minoff,maxoff) :-> t or c binding. */
552 /* Extract the min/max offsets from a guest state array descriptor. */
555 static void getArrayBounds ( IRRegArray
* descr
,
556 UInt
* minoff
, UInt
* maxoff
)
558 *minoff
= descr
->base
;
559 *maxoff
= *minoff
+ descr
->nElems
*sizeofIRType(descr
->elemTy
) - 1;
560 vassert((*minoff
& ~0xFFFF) == 0);
561 vassert((*maxoff
& ~0xFFFF) == 0);
562 vassert(*minoff
<= *maxoff
);
565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
567 static UInt
mk_key_GetPut ( Int offset
, IRType ty
)
569 /* offset should fit in 16 bits. */
570 UInt minoff
= offset
;
571 UInt maxoff
= minoff
+ sizeofIRType(ty
) - 1;
572 vassert((minoff
& ~0xFFFF) == 0);
573 vassert((maxoff
& ~0xFFFF) == 0);
574 return (minoff
<< 16) | maxoff
;
577 static UInt
mk_key_GetIPutI ( IRRegArray
* descr
)
580 getArrayBounds( descr
, &minoff
, &maxoff
);
581 vassert((minoff
& ~0xFFFF) == 0);
582 vassert((maxoff
& ~0xFFFF) == 0);
583 return (minoff
<< 16) | maxoff
;
586 /* Supposing h has keys of the form generated by mk_key_GetPut and
587 mk_key_GetIPutI, invalidate any key which overlaps (k_lo
590 static void invalidateOverlaps ( HashHW
* h
, UInt k_lo
, UInt k_hi
)
594 vassert(k_lo
<= k_hi
);
595 /* invalidate any env entries which in any way overlap (k_lo
597 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
599 for (j
= 0; j
< h
->used
; j
++) {
602 e_lo
= (((UInt
)h
->key
[j
]) >> 16) & 0xFFFF;
603 e_hi
= ((UInt
)h
->key
[j
]) & 0xFFFF;
604 vassert(e_lo
<= e_hi
);
605 if (e_hi
< k_lo
|| k_hi
< e_lo
)
606 continue; /* no overlap possible */
608 /* overlap; invalidate */
614 static void redundant_get_removal_BB ( IRSB
* bb
)
616 HashHW
* env
= newHHW();
617 UInt key
= 0; /* keep gcc -O happy */
621 for (i
= 0; i
< bb
->stmts_used
; i
++) {
622 IRStmt
* st
= bb
->stmts
[i
];
624 if (st
->tag
== Ist_NoOp
)
628 if (st
->tag
== Ist_WrTmp
629 && st
->Ist
.WrTmp
.data
->tag
== Iex_Get
) {
630 /* st is 't = Get(...)'. Look up in the environment and see
631 if the Get can be replaced. */
632 IRExpr
* get
= st
->Ist
.WrTmp
.data
;
633 key
= (HWord
)mk_key_GetPut( get
->Iex
.Get
.offset
,
635 if (lookupHHW(env
, &val
, (HWord
)key
)) {
637 /* Note, we could do better here. If the types are
638 different we don't do the substitution, since doing so
639 could lead to invalidly-typed IR. An improvement would
640 be to stick in a reinterpret-style cast, although that
641 would make maintaining flatness more difficult. */
642 IRExpr
* valE
= (IRExpr
*)val
;
643 Bool typesOK
= toBool( typeOfIRExpr(bb
->tyenv
,valE
)
644 == st
->Ist
.WrTmp
.data
->Iex
.Get
.ty
);
645 if (typesOK
&& DEBUG_IROPT
) {
646 vex_printf("rGET: "); ppIRExpr(get
);
647 vex_printf(" -> "); ppIRExpr(valE
);
651 bb
->stmts
[i
] = IRStmt_WrTmp(st
->Ist
.WrTmp
.tmp
, valE
);
653 /* Not found, but at least we know that t and the Get(...)
654 are now associated. So add a binding to reflect that
656 addToHHW( env
, (HWord
)key
,
657 (HWord
)(void*)(IRExpr_RdTmp(st
->Ist
.WrTmp
.tmp
)) );
661 /* Deal with Puts: invalidate any env entries overlapped by this
663 if (st
->tag
== Ist_Put
|| st
->tag
== Ist_PutI
) {
665 if (st
->tag
== Ist_Put
) {
666 key
= mk_key_GetPut( st
->Ist
.Put
.offset
,
667 typeOfIRExpr(bb
->tyenv
,st
->Ist
.Put
.data
) );
669 vassert(st
->tag
== Ist_PutI
);
670 key
= mk_key_GetIPutI( st
->Ist
.PutI
.details
->descr
);
673 k_lo
= (key
>> 16) & 0xFFFF;
675 invalidateOverlaps(env
, k_lo
, k_hi
);
678 if (st
->tag
== Ist_Dirty
) {
679 /* Deal with dirty helpers which write or modify guest state.
680 Invalidate the entire env. We could do a lot better
682 IRDirty
* d
= st
->Ist
.Dirty
.details
;
684 for (j
= 0; j
< d
->nFxState
; j
++) {
685 if (d
->fxState
[j
].fx
== Ifx_Modify
686 || d
->fxState
[j
].fx
== Ifx_Write
)
690 /* dump the entire env (not clever, but correct ...) */
691 for (j
= 0; j
< env
->used
; j
++)
692 env
->inuse
[j
] = False
;
693 if (0) vex_printf("rGET: trash env due to dirty helper\n");
697 /* add this one to the env, if appropriate */
698 if (st
->tag
== Ist_Put
) {
699 vassert(isIRAtom(st
->Ist
.Put
.data
));
700 addToHHW( env
, (HWord
)key
, (HWord
)(st
->Ist
.Put
.data
));
703 } /* for (i = 0; i < bb->stmts_used; i++) */
708 /*---------------------------------------------------------------*/
709 /*--- In-place removal of redundant PUTs ---*/
710 /*---------------------------------------------------------------*/
712 /* Find any Get uses in st and invalidate any partially or fully
713 overlapping ranges listed in env. Due to the flattening phase, the
714 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
716 static void handle_gets_Stmt (
719 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
720 VexRegisterUpdates pxControl
724 UInt key
= 0; /* keep gcc -O happy */
731 /* This is the only interesting case. Deal with Gets in the RHS
734 e
= st
->Ist
.WrTmp
.data
;
738 key
= mk_key_GetPut ( e
->Iex
.Get
.offset
, e
->Iex
.Get
.ty
);
742 key
= mk_key_GetIPutI ( e
->Iex
.GetI
.descr
);
753 k_lo
= (key
>> 16) & 0xFFFF;
755 invalidateOverlaps(env
, k_lo
, k_hi
);
759 /* Be very conservative for dirty helper calls; dump the entire
760 environment. The helper might read guest state, in which
761 case it needs to be flushed first. Also, the helper might
762 access guest memory, in which case all parts of the guest
763 state requiring precise exceptions needs to be flushed. The
764 crude solution is just to flush everything; we could easily
765 enough do a lot better if needed. */
766 /* Probably also overly-conservative, but also dump everything
767 if we hit a memory bus event (fence, lock, unlock). Ditto
768 AbiHints, CASs, LLs and SCs. */
770 vassert(isIRAtom(st
->Ist
.AbiHint
.base
));
771 vassert(isIRAtom(st
->Ist
.AbiHint
.nia
));
777 for (j
= 0; j
< env
->used
; j
++)
778 env
->inuse
[j
] = False
;
781 /* all other cases are boring. */
783 vassert(isIRAtom(st
->Ist
.Store
.addr
));
784 vassert(isIRAtom(st
->Ist
.Store
.data
));
788 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
789 vassert(isIRAtom(sg
->addr
));
790 vassert(isIRAtom(sg
->data
));
791 vassert(isIRAtom(sg
->guard
));
796 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
797 vassert(isIRAtom(lg
->addr
));
798 vassert(isIRAtom(lg
->alt
));
799 vassert(isIRAtom(lg
->guard
));
804 vassert(isIRAtom(st
->Ist
.Exit
.guard
));
808 vassert(isIRAtom(st
->Ist
.Put
.data
));
812 vassert(isIRAtom(st
->Ist
.PutI
.details
->ix
));
813 vassert(isIRAtom(st
->Ist
.PutI
.details
->data
));
824 vpanic("handle_gets_Stmt");
828 /* This statement accesses memory. So we might need to dump all parts
829 of the environment corresponding to guest state that may not
830 be reordered with respect to memory references. That means
831 at least the stack pointer. */
833 case VexRegUpdAllregsAtMemAccess
:
834 /* Precise exceptions required at mem access.
835 Flush all guest state. */
836 for (j
= 0; j
< env
->used
; j
++)
837 env
->inuse
[j
] = False
;
839 case VexRegUpdSpAtMemAccess
:
840 /* We need to dump the stack pointer
841 (needed for stack extension in m_signals.c).
842 preciseMemExnsFn will use vex_control.iropt_register_updates
843 to verify only the sp is to be checked. */
845 case VexRegUpdUnwindregsAtMemAccess
:
846 for (j
= 0; j
< env
->used
; j
++) {
849 /* Just flush the minimal amount required, as computed by
851 HWord k_lo
= (env
->key
[j
] >> 16) & 0xFFFF;
852 HWord k_hi
= env
->key
[j
] & 0xFFFF;
853 if (preciseMemExnsFn( k_lo
, k_hi
, pxControl
))
854 env
->inuse
[j
] = False
;
857 case VexRegUpdAllregsAtEachInsn
:
858 // VexRegUpdAllregsAtEachInsn cannot happen here.
860 case VexRegUpd_INVALID
:
869 /* Scan backwards, building up a set of (min offset, max
870 offset) pairs, indicating those parts of the guest state
871 for which the next event is a write.
873 On seeing a conditional exit, empty the set.
875 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
876 completely within the set, remove the Put. Otherwise, add
877 (minoff,maxoff) to the set.
879 On seeing 'Get (minoff,maxoff)', remove any part of the set
880 overlapping (minoff,maxoff). The same has to happen for any events
881 which implicitly read parts of the guest state: dirty helper calls
885 static void redundant_put_removal_BB (
887 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
888 VexRegisterUpdates pxControl
894 UInt key
= 0; /* keep gcc -O happy */
896 vassert(pxControl
< VexRegUpdAllregsAtEachInsn
);
898 HashHW
* env
= newHHW();
900 /* Initialise the running env with the fact that the final exit
901 writes the IP (or, whatever it claims to write. We don't
903 key
= mk_key_GetPut(bb
->offsIP
, typeOfIRExpr(bb
->tyenv
, bb
->next
));
904 addToHHW(env
, (HWord
)key
, 0);
906 /* And now scan backwards through the statements. */
907 for (i
= bb
->stmts_used
-1; i
>= 0; i
--) {
910 if (st
->tag
== Ist_NoOp
)
913 /* Deal with conditional exits. */
914 if (st
->tag
== Ist_Exit
) {
916 /* Need to throw out from the env, any part of it which
917 doesn't overlap with the guest state written by this exit.
918 Since the exit only writes one section, it's simplest to
919 do this: (1) check whether env contains a write that
920 completely overlaps the write done by this exit; (2) empty
921 out env; and (3) if (1) was true, add the write done by
924 To make (1) a bit simpler, merely search for a write that
925 exactly matches the one done by this exit. That's safe
926 because it will fail as often or more often than a full
927 overlap check, and failure to find an overlapping write in
928 env is the safe case (we just nuke env if that
930 //vassert(isIRAtom(st->Ist.Exit.guard));
932 //key = mk_key_GetPut(st->Ist.Exit.offsIP,
933 // typeOfIRConst(st->Ist.Exit.dst));
934 //re_add = lookupHHW(env, NULL, key);
936 for (j
= 0; j
< env
->used
; j
++)
937 env
->inuse
[j
] = False
;
940 // addToHHW(env, (HWord)key, 0);
948 key
= mk_key_GetPut( st
->Ist
.Put
.offset
,
949 typeOfIRExpr(bb
->tyenv
,st
->Ist
.Put
.data
) );
950 vassert(isIRAtom(st
->Ist
.Put
.data
));
954 key
= mk_key_GetIPutI( st
->Ist
.PutI
.details
->descr
);
955 vassert(isIRAtom(st
->Ist
.PutI
.details
->ix
));
956 vassert(isIRAtom(st
->Ist
.PutI
.details
->data
));
961 if (isPut
&& st
->tag
!= Ist_PutI
) {
962 /* See if any single entry in env overlaps this Put. This is
963 simplistic in that the transformation is valid if, say, two
964 or more entries in the env overlap this Put, but the use of
965 lookupHHW will only find a single entry which exactly
966 overlaps this Put. This is suboptimal but safe. */
967 if (lookupHHW(env
, NULL
, (HWord
)key
)) {
968 /* This Put is redundant because a later one will overwrite
969 it. So NULL (nop) it out. */
971 vex_printf("rPUT: "); ppIRStmt(st
);
974 bb
->stmts
[i
] = IRStmt_NoOp();
976 /* We can't demonstrate that this Put is redundant, so add it
977 to the running collection. */
978 addToHHW(env
, (HWord
)key
, 0);
983 /* Deal with Gets. These remove bits of the environment since
984 appearance of a Get means that the next event for that slice
985 of the guest state is no longer a write, but a read. Also
986 deals with implicit reads of guest state needed to maintain
987 precise exceptions. */
988 handle_gets_Stmt( env
, st
, preciseMemExnsFn
, pxControl
);
993 /*---------------------------------------------------------------*/
994 /*--- Constant propagation and folding ---*/
995 /*---------------------------------------------------------------*/
998 /* How often sameIRExprs was invoked */
999 static UInt invocation_count
;
1000 /* How often sameIRExprs recursed through IRTemp assignments */
1001 static UInt recursion_count
;
1002 /* How often sameIRExprs found identical IRExprs */
1003 static UInt success_count
;
1004 /* How often recursing through assignments to IRTemps helped
1005 establishing equality. */
1006 static UInt recursion_success_count
;
1007 /* Whether or not recursing through an IRTemp assignment helped
1008 establishing IRExpr equality for a given sameIRExprs invocation. */
1009 static Bool recursion_helped
;
1010 /* Whether or not a given sameIRExprs invocation recursed through an
1011 IRTemp assignment */
1012 static Bool recursed
;
1013 /* Maximum number of nodes ever visited when comparing two IRExprs. */
1014 static UInt max_nodes_visited
;
1015 #endif /* STATS_IROPT */
1017 /* Count the number of nodes visited for a given sameIRExprs invocation. */
1018 static UInt num_nodes_visited
;
1020 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
1021 This is to guard against performance degradation by visiting large
1022 trees without success. */
1023 #define NODE_LIMIT 30
1026 /* The env in this section is a map from IRTemp to IRExpr*,
1027 that is, an array indexed by IRTemp. */
1029 /* Do both expressions compute the same value? The answer is generally
1030 conservative, i.e. it will report that the expressions do not compute
1031 the same value when in fact they do. The reason is that we do not
1032 keep track of changes in the guest state and memory. Thusly, two
1033 Get's, GetI's or Load's, even when accessing the same location, will be
1034 assumed to compute different values. After all the accesses may happen
1035 at different times and the guest state / memory can have changed in
1038 XXX IMPORTANT XXX the two expressions must have the same IR type.
1039 DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
1041 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
1042 wrapper that deals with the common tags-don't-match case, and a
1043 slower out of line general case. Saves a few insns. */
1045 __attribute__((noinline
))
1046 static Bool
sameIRExprs_aux2 ( IRExpr
** env
, IRExpr
* e1
, IRExpr
* e2
);
1049 static Bool
sameIRExprs_aux ( IRExpr
** env
, IRExpr
* e1
, IRExpr
* e2
)
1051 if (e1
->tag
!= e2
->tag
) return False
;
1052 return sameIRExprs_aux2(env
, e1
, e2
);
1055 __attribute__((noinline
))
1056 static Bool
sameIRExprs_aux2 ( IRExpr
** env
, IRExpr
* e1
, IRExpr
* e2
)
1058 if (num_nodes_visited
++ > NODE_LIMIT
) return False
;
1062 if (e1
->Iex
.RdTmp
.tmp
== e2
->Iex
.RdTmp
.tmp
) return True
;
1064 if (env
[e1
->Iex
.RdTmp
.tmp
] && env
[e2
->Iex
.RdTmp
.tmp
]) {
1065 Bool same
= sameIRExprs_aux(env
, env
[e1
->Iex
.RdTmp
.tmp
],
1066 env
[e2
->Iex
.RdTmp
.tmp
]);
1069 if (same
) recursion_helped
= True
;
1078 /* Guest state / memory could have changed in the meantime. */
1082 return toBool( e1
->Iex
.Binop
.op
== e2
->Iex
.Binop
.op
1083 && sameIRExprs_aux( env
, e1
->Iex
.Binop
.arg1
,
1084 e2
->Iex
.Binop
.arg1
)
1085 && sameIRExprs_aux( env
, e1
->Iex
.Binop
.arg2
,
1086 e2
->Iex
.Binop
.arg2
));
1089 return toBool( e1
->Iex
.Unop
.op
== e2
->Iex
.Unop
.op
1090 && sameIRExprs_aux( env
, e1
->Iex
.Unop
.arg
,
1091 e2
->Iex
.Unop
.arg
));
1094 IRConst
*c1
= e1
->Iex
.Const
.con
;
1095 IRConst
*c2
= e2
->Iex
.Const
.con
;
1096 vassert(c1
->tag
== c2
->tag
);
1098 case Ico_U1
: return toBool( c1
->Ico
.U1
== c2
->Ico
.U1
);
1099 case Ico_U8
: return toBool( c1
->Ico
.U8
== c2
->Ico
.U8
);
1100 case Ico_U16
: return toBool( c1
->Ico
.U16
== c2
->Ico
.U16
);
1101 case Ico_U32
: return toBool( c1
->Ico
.U32
== c2
->Ico
.U32
);
1102 case Ico_U64
: return toBool( c1
->Ico
.U64
== c2
->Ico
.U64
);
1109 IRTriop
*tri1
= e1
->Iex
.Triop
.details
;
1110 IRTriop
*tri2
= e2
->Iex
.Triop
.details
;
1111 return toBool( tri1
->op
== tri2
->op
1112 && sameIRExprs_aux( env
, tri1
->arg1
, tri2
->arg1
)
1113 && sameIRExprs_aux( env
, tri1
->arg2
, tri2
->arg2
)
1114 && sameIRExprs_aux( env
, tri1
->arg3
, tri2
->arg3
));
1118 return toBool( sameIRExprs_aux( env
, e1
->Iex
.ITE
.cond
,
1120 && sameIRExprs_aux( env
, e1
->Iex
.ITE
.iftrue
,
1121 e2
->Iex
.ITE
.iftrue
)
1122 && sameIRExprs_aux( env
, e1
->Iex
.ITE
.iffalse
,
1123 e2
->Iex
.ITE
.iffalse
));
1126 /* Not very likely to be "same". */
1134 static Bool
sameIRExprs ( IRExpr
** env
, IRExpr
* e1
, IRExpr
* e2
)
1138 num_nodes_visited
= 0;
1139 same
= sameIRExprs_aux(env
, e1
, e2
);
1143 if (recursed
) ++recursion_count
;
1144 success_count
+= same
;
1145 if (same
&& recursion_helped
)
1146 ++recursion_success_count
;
1147 if (num_nodes_visited
> max_nodes_visited
)
1148 max_nodes_visited
= num_nodes_visited
;
1149 recursed
= False
; /* reset */
1150 recursion_helped
= False
; /* reset */
1151 #endif /* STATS_IROPT */
1157 /* Debugging-only hack (not used in production runs): make a guess
1158 whether sameIRExprs might assert due to the two args being of
1159 different types. If in doubt return False. Is only used when
1160 --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
1161 Bad because it duplicates functionality from typeOfIRExpr. See
1162 comment on the single use point below for rationale. */
1164 Bool
debug_only_hack_sameIRExprs_might_assert ( IRExpr
* e1
, IRExpr
* e2
)
1166 if (e1
->tag
!= e2
->tag
) return False
;
1169 /* The only interesting case */
1170 IRConst
*c1
= e1
->Iex
.Const
.con
;
1171 IRConst
*c2
= e2
->Iex
.Const
.con
;
1172 return c1
->tag
!= c2
->tag
;
1181 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
1182 static Bool
isZeroU32 ( IRExpr
* e
)
1184 return toBool( e
->tag
== Iex_Const
1185 && e
->Iex
.Const
.con
->tag
== Ico_U32
1186 && e
->Iex
.Const
.con
->Ico
.U32
== 0);
1189 /* Is this literally IRExpr_Const(IRConst_U64(0)) ?
1190 Currently unused; commented out to avoid compiler warning */
1192 static Bool
isZeroU64 ( IRExpr
* e
)
1194 return toBool( e
->tag
== Iex_Const
1195 && e
->Iex
.Const
.con
->tag
== Ico_U64
1196 && e
->Iex
.Const
.con
->Ico
.U64
== 0);
1200 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
1201 static Bool
isZeroV128 ( IRExpr
* e
)
1203 return toBool( e
->tag
== Iex_Const
1204 && e
->Iex
.Const
.con
->tag
== Ico_V128
1205 && e
->Iex
.Const
.con
->Ico
.V128
== 0x0000);
1208 /* Is this literally IRExpr_Const(IRConst_V128(1...1)) ? */
1209 static Bool
isOnesV128 ( IRExpr
* e
)
1211 return toBool( e
->tag
== Iex_Const
1212 && e
->Iex
.Const
.con
->tag
== Ico_V128
1213 && e
->Iex
.Const
.con
->Ico
.V128
== 0xFFFF);
1216 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
1217 static Bool
isZeroV256 ( IRExpr
* e
)
1219 return toBool( e
->tag
== Iex_Const
1220 && e
->Iex
.Const
.con
->tag
== Ico_V256
1221 && e
->Iex
.Const
.con
->Ico
.V256
== 0x00000000);
1224 /* Is this an integer constant with value 0 ? */
1225 static Bool
isZeroU ( IRExpr
* e
)
1227 if (e
->tag
!= Iex_Const
) return False
;
1228 switch (e
->Iex
.Const
.con
->tag
) {
1229 case Ico_U1
: return toBool( e
->Iex
.Const
.con
->Ico
.U1
== 0);
1230 case Ico_U8
: return toBool( e
->Iex
.Const
.con
->Ico
.U8
== 0);
1231 case Ico_U16
: return toBool( e
->Iex
.Const
.con
->Ico
.U16
== 0);
1232 case Ico_U32
: return toBool( e
->Iex
.Const
.con
->Ico
.U32
== 0);
1233 case Ico_U64
: return toBool( e
->Iex
.Const
.con
->Ico
.U64
== 0);
1234 case Ico_V256
: return toBool( e
->Iex
.Const
.con
->Ico
.V256
== 0x00000000);
1235 default: vpanic("isZeroU");
1239 /* Is this an integer constant with value 1---1b ? */
1240 static Bool
isOnesU ( IRExpr
* e
)
1242 if (e
->tag
!= Iex_Const
) return False
;
1243 switch (e
->Iex
.Const
.con
->tag
) {
1244 case Ico_U8
: return toBool( e
->Iex
.Const
.con
->Ico
.U8
== 0xFF);
1245 case Ico_U16
: return toBool( e
->Iex
.Const
.con
->Ico
.U16
== 0xFFFF);
1246 case Ico_U32
: return toBool( e
->Iex
.Const
.con
->Ico
.U32
1248 case Ico_U64
: return toBool( e
->Iex
.Const
.con
->Ico
.U64
1249 == 0xFFFFFFFFFFFFFFFFULL
);
1250 default: ppIRExpr(e
); vpanic("isOnesU");
1254 static Bool
notBool ( Bool b
)
1256 if (b
== True
) return False
;
1257 if (b
== False
) return True
;
1261 /* Make a zero which has the same type as the result of the given
1263 static IRExpr
* mkZeroOfPrimopResultType ( IROp op
)
1266 case Iop_CmpNE32
: return IRExpr_Const(IRConst_U1(toBool(0)));
1267 case Iop_Xor8
: return IRExpr_Const(IRConst_U8(0));
1268 case Iop_Xor16
: return IRExpr_Const(IRConst_U16(0));
1270 case Iop_Xor32
: return IRExpr_Const(IRConst_U32(0));
1273 case Iop_Xor64
: return IRExpr_Const(IRConst_U64(0));
1275 case Iop_AndV128
: return IRExpr_Const(IRConst_V128(0));
1277 case Iop_AndV256
: return IRExpr_Const(IRConst_V256(0));
1278 default: vpanic("mkZeroOfPrimopResultType: bad primop");
1282 /* Make a value containing all 1-bits, which has the same type as the
1283 result of the given primop. */
1284 static IRExpr
* mkOnesOfPrimopResultType ( IROp op
)
1289 return IRExpr_Const(IRConst_U1(toBool(1)));
1291 return IRExpr_Const(IRConst_U8(0xFF));
1293 return IRExpr_Const(IRConst_U16(0xFFFF));
1295 return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
1298 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL
));
1302 return IRExpr_Const(IRConst_V128(0xFFFF));
1305 vpanic("mkOnesOfPrimopResultType: bad primop");
1309 /* Helpers for folding Clz32/64. */
1310 static UInt
fold_Clz64 ( ULong value
)
1313 vassert(value
!= 0ULL); /* no defined semantics for arg==0 */
1314 for (i
= 0; i
< 64; ++i
) {
1315 if (0ULL != (value
& (((ULong
)1) << (63 - i
)))) return i
;
1322 static UInt
fold_Clz32 ( UInt value
)
1325 vassert(value
!= 0); /* no defined semantics for arg==0 */
1326 for (i
= 0; i
< 32; ++i
) {
1327 if (0 != (value
& (((UInt
)1) << (31 - i
)))) return i
;
1334 /* V64 holds 8 summary-constant bits in V128/V256 style. Convert to
1335 the corresponding real constant. */
1336 //XXX re-check this before use
1337 //static ULong de_summarise_V64 ( UChar v64 )
1340 // if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
1341 // if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
1342 // if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
1343 // if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
1344 // if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
1345 // if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
1346 // if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
1347 // if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
1351 /* Helper for arbitrary expression pattern matching in flat IR. If
1352 'e' is a reference to a tmp, look it up in env -- repeatedly, if
1353 necessary -- until it resolves to a non-tmp. Note that this can
1354 return NULL if it can't resolve 'e' to a new expression, which will
1355 be the case if 'e' is instead defined by an IRStmt (IRDirty or
1357 static IRExpr
* chase ( IRExpr
** env
, IRExpr
* e
)
1359 /* Why is this loop guaranteed to terminate? Because all tmps must
1360 have definitions before use, hence a tmp cannot be bound
1361 (directly or indirectly) to itself. */
1362 while (e
->tag
== Iex_RdTmp
) {
1363 if (0) { vex_printf("chase "); ppIRExpr(e
); vex_printf("\n"); }
1364 e
= env
[(Int
)e
->Iex
.RdTmp
.tmp
];
1365 if (e
== NULL
) break;
1370 /* Similar to |chase|, but follows at most one level of tmp reference. */
1371 static IRExpr
* chase1 ( IRExpr
** env
, IRExpr
* e
)
1373 if (e
== NULL
|| e
->tag
!= Iex_RdTmp
)
1376 return env
[(Int
)e
->Iex
.RdTmp
.tmp
];
1379 static IRExpr
* fold_Expr ( IRExpr
** env
, IRExpr
* e
)
1382 IRExpr
* e2
= e
; /* e2 is the result of folding e, if possible */
1387 if (e
->Iex
.Unop
.arg
->tag
== Iex_Const
) {
1389 /* cases where the arg is a const */
1390 switch (e
->Iex
.Unop
.op
) {
1392 e2
= IRExpr_Const(IRConst_U8(toUChar(
1393 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1397 e2
= IRExpr_Const(IRConst_U32(
1398 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1402 e2
= IRExpr_Const(IRConst_U64(
1403 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1408 e2
= IRExpr_Const(IRConst_U8(toUChar(
1409 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1413 e2
= IRExpr_Const(IRConst_U16(toUShort(
1414 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1418 e2
= IRExpr_Const(IRConst_U32(
1419 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1423 e2
= IRExpr_Const(IRConst_U64(
1424 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1425 ? 0xFFFFFFFFFFFFFFFFULL
: 0));
1429 UInt u32
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
;
1431 u32
= (Int
)u32
>> 24; /* signed shift */
1432 e2
= IRExpr_Const(IRConst_U32(u32
));
1436 UInt u32
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
;
1438 u32
= (Int
)u32
>> 16; /* signed shift */
1439 e2
= IRExpr_Const(IRConst_U32(u32
));
1443 e2
= IRExpr_Const(IRConst_U64(
1444 0xFFULL
& e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
));
1447 e2
= IRExpr_Const(IRConst_U64(
1448 0xFFFFULL
& e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
));
1451 e2
= IRExpr_Const(IRConst_U32(
1452 0xFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
));
1455 UShort u16
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
;
1457 u16
= (Short
)u16
>> 8; /* signed shift */
1458 e2
= IRExpr_Const(IRConst_U16(u16
));
1462 e2
= IRExpr_Const(IRConst_U16(
1463 0xFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
));
1466 e2
= IRExpr_Const(IRConst_U32(
1467 0xFFFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
));
1470 e2
= IRExpr_Const(IRConst_U16(toUShort(
1471 0xFFFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
)));
1474 e2
= IRExpr_Const(IRConst_U8(toUChar(
1475 0xFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
)));
1478 e2
= IRExpr_Const(IRConst_U1(toBool(
1479 1 == (1 & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
)
1483 e2
= IRExpr_Const(IRConst_U1(toBool(
1484 1 == (1 & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
)
1489 e2
= IRExpr_Const(IRConst_V128(
1490 ~ (e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.V128
)));
1493 e2
= IRExpr_Const(IRConst_U64(
1494 ~ (e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
)));
1497 e2
= IRExpr_Const(IRConst_U32(
1498 ~ (e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
)));
1501 e2
= IRExpr_Const(IRConst_U16(toUShort(
1502 ~ (e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
))));
1505 e2
= IRExpr_Const(IRConst_U8(toUChar(
1506 ~ (e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
))));
1510 e2
= IRExpr_Const(IRConst_U1(
1511 notBool(e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
)));
1515 ULong w64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1517 e2
= IRExpr_Const(IRConst_U8( (UChar
)w64
));
1521 ULong w64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1523 e2
= IRExpr_Const(IRConst_U16( (UShort
)w64
));
1527 ULong w64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1528 w64
&= 0x00000000FFFFFFFFULL
;
1529 e2
= IRExpr_Const(IRConst_U32( (UInt
)w64
));
1532 case Iop_64HIto32
: {
1533 ULong w64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1535 e2
= IRExpr_Const(IRConst_U32( (UInt
)w64
));
1539 e2
= IRExpr_Const(IRConst_U64(
1541 & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
));
1544 ULong u64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
;
1546 u64
= (Long
)u64
>> 48; /* signed shift */
1547 e2
= IRExpr_Const(IRConst_U64(u64
));
1551 ULong u64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
;
1553 u64
= (Long
)u64
>> 32; /* signed shift */
1554 e2
= IRExpr_Const(IRConst_U64(u64
));
1559 UShort w16
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
;
1561 e2
= IRExpr_Const(IRConst_U8( (UChar
)w16
));
1565 UShort w16
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
;
1568 e2
= IRExpr_Const(IRConst_U8( (UChar
)w16
));
1573 e2
= IRExpr_Const(IRConst_U1(toBool(
1575 (0xFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
)
1579 e2
= IRExpr_Const(IRConst_U1(toBool(
1581 (0xFFFFFFFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
)
1585 e2
= IRExpr_Const(IRConst_U1(toBool(
1586 0ULL != e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
1590 case Iop_CmpwNEZ32
: {
1591 UInt w32
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
;
1593 e2
= IRExpr_Const(IRConst_U32( 0 ));
1595 e2
= IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1598 case Iop_CmpwNEZ64
: {
1599 ULong w64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1601 e2
= IRExpr_Const(IRConst_U64( 0 ));
1603 e2
= IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL
));
1608 UInt u32
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
;
1609 Int s32
= (Int
)(u32
& 0xFFFFFFFF);
1610 s32
= (s32
| (-s32
));
1611 e2
= IRExpr_Const( IRConst_U32( (UInt
)s32
));
1616 ULong u64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1617 Long s64
= (Long
)u64
;
1618 s64
= (s64
| (-s64
));
1619 e2
= IRExpr_Const( IRConst_U64( (ULong
)s64
));
1624 UInt u32
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
;
1626 e2
= IRExpr_Const(IRConst_U32(fold_Clz32(u32
)));
1630 ULong u64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1632 e2
= IRExpr_Const(IRConst_U64(fold_Clz64(u64
)));
1636 /* For these vector ones, can't fold all cases, but at least
1637 do the most obvious one. Could do better here using
1638 summarise/desummarise of vector constants, but too
1639 difficult to verify; hence just handle the zero cases. */
1640 case Iop_32UtoV128
: {
1641 UInt u32
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
;
1643 e2
= IRExpr_Const(IRConst_V128(0x0000));
1649 case Iop_V128to64
: {
1650 UShort v128
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.V128
;
1651 if (0 == ((v128
>> 0) & 0xFF)) {
1652 e2
= IRExpr_Const(IRConst_U64(0));
1658 case Iop_V128HIto64
: {
1659 UShort v128
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.V128
;
1660 if (0 == ((v128
>> 8) & 0xFF)) {
1661 e2
= IRExpr_Const(IRConst_U64(0));
1667 case Iop_64UtoV128
: {
1668 ULong u64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1670 e2
= IRExpr_Const(IRConst_V128(0x0000));
1677 /* Even stupider (although still correct ..) */
1678 case Iop_V256to64_0
: case Iop_V256to64_1
:
1679 case Iop_V256to64_2
: case Iop_V256to64_3
: {
1680 UInt v256
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.V256
;
1681 if (v256
== 0x00000000) {
1682 e2
= IRExpr_Const(IRConst_U64(0));
1689 case Iop_ZeroHI64ofV128
: {
1690 /* Could do better here -- only need to look at the bottom 64 bits
1691 of the argument, really. */
1692 UShort v128
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.V128
;
1693 if (v128
== 0x0000) {
1694 e2
= IRExpr_Const(IRConst_V128(0x0000));
1703 } // switch (e->Iex.Unop.op)
1707 /* other cases (identities, etc) */
1708 switch (e
->Iex
.Unop
.op
) {
1709 case Iop_PopCount64
: {
1710 // PopCount64( And64( Add64(x,-1), Not64(x) ) ) ==> CtzNat64(x)
1712 // a1:And64( a11:Add64(a111:x,a112:-1), a12:Not64(a121:x) )
1713 IRExpr
* a1
= chase(env
, e
->Iex
.Unop
.arg
);
1716 if (a1
->tag
!= Iex_Binop
|| a1
->Iex
.Binop
.op
!= Iop_And64
)
1718 // a1 is established
1719 IRExpr
* a11
= chase(env
, a1
->Iex
.Binop
.arg1
);
1722 if (a11
->tag
!= Iex_Binop
|| a11
->Iex
.Binop
.op
!= Iop_Add64
)
1724 // a11 is established
1725 IRExpr
* a12
= chase(env
, a1
->Iex
.Binop
.arg2
);
1728 if (a12
->tag
!= Iex_Unop
|| a12
->Iex
.Unop
.op
!= Iop_Not64
)
1730 // a12 is established
1731 IRExpr
* a111
= a11
->Iex
.Binop
.arg1
;
1732 IRExpr
* a112
= chase(env
, a11
->Iex
.Binop
.arg2
);
1733 IRExpr
* a121
= a12
->Iex
.Unop
.arg
;
1734 if (!a111
|| !a112
|| !a121
)
1736 // a111 and a121 need to be the same temp.
1737 if (!eqIRAtom(a111
, a121
))
1739 // Finally, a112 must be a 64-bit version of -1.
1742 // Match established. Transform.
1743 e2
= IRExpr_Unop(Iop_CtzNat64
, a111
);
1750 } // switch (e->Iex.Unop.op)
1752 } // if (e->Iex.Unop.arg->tag == Iex_Const)
1757 if (e
->Iex
.Binop
.arg1
->tag
== Iex_Const
1758 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
1759 /* cases where both args are consts */
1760 switch (e
->Iex
.Binop
.op
) {
1764 e2
= IRExpr_Const(IRConst_U8(toUChar(
1765 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U8
1766 | e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
))));
1769 e2
= IRExpr_Const(IRConst_U16(toUShort(
1770 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U16
1771 | e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U16
))));
1774 e2
= IRExpr_Const(IRConst_U32(
1775 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1776 | e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)));
1779 e2
= IRExpr_Const(IRConst_U64(
1780 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1781 | e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)));
1784 e2
= IRExpr_Const(IRConst_V128(
1785 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.V128
1786 | e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.V128
)));
1791 e2
= IRExpr_Const(IRConst_U8(toUChar(
1792 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U8
1793 ^ e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
))));
1796 e2
= IRExpr_Const(IRConst_U16(toUShort(
1797 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U16
1798 ^ e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U16
))));
1801 e2
= IRExpr_Const(IRConst_U32(
1802 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1803 ^ e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)));
1806 e2
= IRExpr_Const(IRConst_U64(
1807 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1808 ^ e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)));
1811 e2
= IRExpr_Const(IRConst_V128(
1812 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.V128
1813 ^ e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.V128
)));
1818 e2
= IRExpr_Const(IRConst_U8(toUChar(
1819 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U8
1820 & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
))));
1823 e2
= IRExpr_Const(IRConst_U16(toUShort(
1824 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U16
1825 & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U16
))));
1828 e2
= IRExpr_Const(IRConst_U32(
1829 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1830 & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)));
1833 e2
= IRExpr_Const(IRConst_U64(
1834 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1835 & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)));
1838 e2
= IRExpr_Const(IRConst_V128(
1839 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.V128
1840 & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.V128
)));
1845 e2
= IRExpr_Const(IRConst_U8(toUChar(
1846 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U8
1847 + e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
))));
1850 e2
= IRExpr_Const(IRConst_U32(
1851 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1852 + e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)));
1855 e2
= IRExpr_Const(IRConst_U64(
1856 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1857 + e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)));
1862 e2
= IRExpr_Const(IRConst_U8(toUChar(
1863 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U8
1864 - e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
))));
1867 e2
= IRExpr_Const(IRConst_U32(
1868 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1869 - e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)));
1872 e2
= IRExpr_Const(IRConst_U64(
1873 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1874 - e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)));
1879 UInt u32a
= e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
;
1880 UInt u32b
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
;
1881 UInt res
= u32a
> u32b
? u32a
: u32b
;
1882 e2
= IRExpr_Const(IRConst_U32(res
));
1888 e2
= IRExpr_Const(IRConst_U32(
1889 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1890 * e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)));
1893 e2
= IRExpr_Const(IRConst_U64(
1894 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1895 * e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)));
1900 UInt u32a
= e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
;
1901 UInt u32b
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
;
1902 Int s32a
= (Int
)u32a
;
1903 Int s32b
= (Int
)u32b
;
1904 Long s64a
= (Long
)s32a
;
1905 Long s64b
= (Long
)s32b
;
1906 Long sres
= s64a
* s64b
;
1907 ULong ures
= (ULong
)sres
;
1908 e2
= IRExpr_Const(IRConst_U64(ures
));
1914 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1915 shift
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
);
1916 if (shift
>= 0 && shift
<= 31)
1917 e2
= IRExpr_Const(IRConst_U32(
1918 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1922 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1923 shift
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
);
1924 if (shift
>= 0 && shift
<= 63)
1925 e2
= IRExpr_Const(IRConst_U64(
1926 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1934 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1935 s32
= (Int
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
);
1936 shift
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
);
1937 if (shift
>= 0 && shift
<= 31) {
1938 s32
>>=/*signed*/ shift
;
1939 e2
= IRExpr_Const(IRConst_U32((UInt
)s32
));
1945 /*signed*/ Long s64
;
1946 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1947 s64
= (Long
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
);
1948 shift
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
);
1949 if (shift
>= 0 && shift
<= 63) {
1950 s64
>>=/*signed*/ shift
;
1951 e2
= IRExpr_Const(IRConst_U64((ULong
)s64
));
1959 /*unsigned*/ UInt u32
;
1960 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1961 u32
= (UInt
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
);
1962 shift
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
);
1963 if (shift
>= 0 && shift
<= 31) {
1964 u32
>>=/*unsigned*/ shift
;
1965 e2
= IRExpr_Const(IRConst_U32(u32
));
1971 /*unsigned*/ ULong u64
;
1972 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1973 u64
= (ULong
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
);
1974 shift
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
);
1975 if (shift
>= 0 && shift
<= 63) {
1976 u64
>>=/*unsigned*/ shift
;
1977 e2
= IRExpr_Const(IRConst_U64(u64
));
1984 e2
= IRExpr_Const(IRConst_U1(toBool(
1985 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1986 == e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
))));
1989 e2
= IRExpr_Const(IRConst_U1(toBool(
1990 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1991 == e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
))));
1998 e2
= IRExpr_Const(IRConst_U1(toBool(
1999 ((0xFF & e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U8
)
2000 != (0xFF & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
)))));
2003 case Iop_CasCmpNE32
:
2004 case Iop_ExpCmpNE32
:
2005 e2
= IRExpr_Const(IRConst_U1(toBool(
2006 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
2007 != e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
))));
2010 case Iop_CasCmpNE64
:
2011 case Iop_ExpCmpNE64
:
2012 e2
= IRExpr_Const(IRConst_U1(toBool(
2013 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
2014 != e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
))));
2019 e2
= IRExpr_Const(IRConst_U1(toBool(
2020 ((UInt
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
)
2021 <= (UInt
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)))));
2024 e2
= IRExpr_Const(IRConst_U1(toBool(
2025 ((ULong
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
)
2026 <= (ULong
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)))));
2031 e2
= IRExpr_Const(IRConst_U1(toBool(
2032 ((Int
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
)
2033 <= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)))));
2036 e2
= IRExpr_Const(IRConst_U1(toBool(
2037 ((Long
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
)
2038 <= (Long
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)))));
2043 e2
= IRExpr_Const(IRConst_U1(toBool(
2044 ((Int
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
)
2045 < (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)))));
2048 e2
= IRExpr_Const(IRConst_U1(toBool(
2049 ((Long
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
)
2050 < (Long
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)))));
2055 e2
= IRExpr_Const(IRConst_U1(toBool(
2056 ((UInt
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
)
2057 < (UInt
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)))));
2060 e2
= IRExpr_Const(IRConst_U1(toBool(
2061 ((ULong
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
)
2062 < (ULong
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)))));
2066 case Iop_CmpORD32S
: {
2068 UInt u32a
= e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
;
2069 UInt u32b
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
;
2070 Int s32a
= (Int
)u32a
;
2071 Int s32b
= (Int
)u32b
;
2072 Int r
= 0x2; /* EQ */
2076 else if (s32a
> s32b
) {
2079 e2
= IRExpr_Const(IRConst_U32(r
));
2085 e2
= IRExpr_Const(IRConst_U64(
2086 (((ULong
)(e
->Iex
.Binop
.arg1
2087 ->Iex
.Const
.con
->Ico
.U32
)) << 32)
2088 | ((ULong
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
))
2092 /* We can't fold this, because there is no way to
2093 express he result in IR, but at least pretend to
2094 handle it, so as to stop getting blasted with
2095 no-rule-for-this-primop messages. */
2097 /* For this vector one, can't fold all cases, but at
2098 least do the most obvious one. Could do better here
2099 using summarise/desummarise of vector constants, but
2100 too difficult to verify; hence just handle the zero
2102 case Iop_64HLtoV128
: {
2103 ULong argHi
= e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
;
2104 ULong argLo
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
;
2105 if (0 == argHi
&& 0 == argLo
) {
2106 e2
= IRExpr_Const(IRConst_V128(0));
2112 /* Same reasoning for the 256-bit version. */
2113 case Iop_V128HLtoV256
: {
2114 IRExpr
* argHi
= e
->Iex
.Binop
.arg1
;
2115 IRExpr
* argLo
= e
->Iex
.Binop
.arg2
;
2116 if (isZeroV128(argHi
) && isZeroV128(argLo
)) {
2117 e2
= IRExpr_Const(IRConst_V256(0));
2124 /* -- V128 stuff -- */
2125 case Iop_InterleaveLO8x16
: {
2126 /* This turns up a lot in Memcheck instrumentation of
2127 Icc generated code. I don't know why. */
2128 UShort arg1
= e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.V128
;
2129 UShort arg2
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.V128
;
2130 if (0 == arg1
&& 0 == arg2
) {
2131 e2
= IRExpr_Const(IRConst_V128(0));
2144 /* other cases (identities, etc) */
2145 switch (e
->Iex
.Binop
.op
) {
2151 /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
2152 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2153 e2
= e
->Iex
.Binop
.arg1
;
2156 /* Shl32/Shl64/Shr64(0,x) ==> 0 */
2157 if (isZeroU(e
->Iex
.Binop
.arg1
)) {
2158 e2
= e
->Iex
.Binop
.arg1
;
2165 /* Shr32/Sar32(x,0) ==> x */
2166 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2167 e2
= e
->Iex
.Binop
.arg1
;
2177 /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
2178 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2179 e2
= e
->Iex
.Binop
.arg1
;
2182 /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
2183 if (isZeroU(e
->Iex
.Binop
.arg1
)) {
2184 e2
= e
->Iex
.Binop
.arg2
;
2187 /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
2188 /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
2189 if (isOnesU(e
->Iex
.Binop
.arg1
) || isOnesU(e
->Iex
.Binop
.arg2
)) {
2190 e2
= mkOnesOfPrimopResultType(e
->Iex
.Binop
.op
);
2193 /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
2194 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2195 e2
= e
->Iex
.Binop
.arg1
;
2201 /* Add8(t,t) ==> t << 1.
2202 Memcheck doesn't understand that
2203 x+x produces a defined least significant bit, and it seems
2204 simplest just to get rid of the problem by rewriting it
2205 out, since the opportunity to do so exists. */
2206 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2207 e2
= IRExpr_Binop(Iop_Shl8
, e
->Iex
.Binop
.arg1
,
2208 IRExpr_Const(IRConst_U8(1)));
2213 /* NB no Add16(t,t) case yet as no known test case exists */
2217 /* Add32/Add64(x,0) ==> x */
2218 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2219 e2
= e
->Iex
.Binop
.arg1
;
2222 /* Add32/Add64(0,x) ==> x */
2223 if (isZeroU(e
->Iex
.Binop
.arg1
)) {
2224 e2
= e
->Iex
.Binop
.arg2
;
2227 /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
2228 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2230 e
->Iex
.Binop
.op
== Iop_Add32
? Iop_Shl32
: Iop_Shl64
,
2231 e
->Iex
.Binop
.arg1
, IRExpr_Const(IRConst_U8(1)));
2238 /* Sub32/Sub64(x,0) ==> x */
2239 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2240 e2
= e
->Iex
.Binop
.arg1
;
2243 /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
2244 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2245 e2
= mkZeroOfPrimopResultType(e
->Iex
.Binop
.op
);
2250 /* Sub8x16(x,0) ==> x */
2251 if (isZeroV128(e
->Iex
.Binop
.arg2
)) {
2252 e2
= e
->Iex
.Binop
.arg1
;
2261 /* And8/And16/And32/And64(x,1---1b) ==> x */
2262 if (isOnesU(e
->Iex
.Binop
.arg2
)) {
2263 e2
= e
->Iex
.Binop
.arg1
;
2266 /* And8/And16/And32/And64(1---1b,x) ==> x */
2267 if (isOnesU(e
->Iex
.Binop
.arg1
)) {
2268 e2
= e
->Iex
.Binop
.arg2
;
2271 /* And8/And16/And32/And64(x,0) ==> 0 */
2272 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2273 e2
= e
->Iex
.Binop
.arg2
;
2276 /* And8/And16/And32/And64(0,x) ==> 0 */
2277 if (isZeroU(e
->Iex
.Binop
.arg1
)) {
2278 e2
= e
->Iex
.Binop
.arg1
;
2281 /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
2282 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2283 e2
= e
->Iex
.Binop
.arg1
;
2290 /* And8/And16/AndV128/AndV256(t,t)
2291 ==> t, for some IRTemp t */
2292 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2293 e2
= e
->Iex
.Binop
.arg1
;
2296 /* Deal with either arg zero. Could handle other And
2298 if (e
->Iex
.Binop
.op
== Iop_AndV256
2299 && (isZeroV256(e
->Iex
.Binop
.arg1
)
2300 || isZeroV256(e
->Iex
.Binop
.arg2
))) {
2301 e2
= mkZeroOfPrimopResultType(e
->Iex
.Binop
.op
);
2303 } else if (e
->Iex
.Binop
.op
== Iop_AndV128
2304 && (isZeroV128(e
->Iex
.Binop
.arg1
)
2305 || isZeroV128(e
->Iex
.Binop
.arg2
))) {
2306 e2
= mkZeroOfPrimopResultType(e
->Iex
.Binop
.op
);
2309 /* AndV128(t,1...1) ==> t. The amd64 front end generates these
2310 for *CMP{P,S}{S,D} etc. */
2311 if (e
->Iex
.Binop
.op
== Iop_AndV128
) {
2312 if (isOnesV128(e
->Iex
.Binop
.arg2
)) {
2313 e2
= e
->Iex
.Binop
.arg1
;
2321 /* V128/V256(t,t) ==> t, for some IRTemp t */
2322 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2323 e2
= e
->Iex
.Binop
.arg1
;
2326 /* OrV128(t,0) ==> t */
2327 if (e
->Iex
.Binop
.op
== Iop_OrV128
) {
2328 if (isZeroV128(e
->Iex
.Binop
.arg2
)) {
2329 e2
= e
->Iex
.Binop
.arg1
;
2332 if (isZeroV128(e
->Iex
.Binop
.arg1
)) {
2333 e2
= e
->Iex
.Binop
.arg2
;
2337 /* OrV256(t,0) ==> t */
2338 if (e
->Iex
.Binop
.op
== Iop_OrV256
) {
2339 if (isZeroV256(e
->Iex
.Binop
.arg2
)) {
2340 e2
= e
->Iex
.Binop
.arg1
;
2343 //Disabled because there's no known test case right now.
2344 //if (isZeroV256(e->Iex.Binop.arg1)) {
2345 // e2 = e->Iex.Binop.arg2;
2357 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
2358 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2359 e2
= mkZeroOfPrimopResultType(e
->Iex
.Binop
.op
);
2362 /* XorV128(t,0) ==> t */
2363 if (e
->Iex
.Binop
.op
== Iop_XorV128
) {
2364 if (isZeroV128(e
->Iex
.Binop
.arg2
)) {
2365 e2
= e
->Iex
.Binop
.arg1
;
2368 //Disabled because there's no known test case right now.
2369 //if (isZeroV128(e->Iex.Binop.arg1)) {
2370 // e2 = e->Iex.Binop.arg2;
2374 /* Xor8/16/32/64(0,t) ==> t */
2375 if (isZeroU(e
->Iex
.Binop
.arg1
)) {
2376 e2
= e
->Iex
.Binop
.arg2
;
2379 /* Xor8/16/32/64(t,0) ==> t */
2380 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2381 e2
= e
->Iex
.Binop
.arg1
;
2388 /* CmpNE32(t,t) ==> 0, for some IRTemp t */
2389 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2390 e2
= mkZeroOfPrimopResultType(e
->Iex
.Binop
.op
);
2393 /* CmpNE32(1Uto32(b), 0) ==> b */
2394 if (isZeroU32(e
->Iex
.Binop
.arg2
)) {
2395 IRExpr
* a1
= chase(env
, e
->Iex
.Binop
.arg1
);
2396 if (a1
&& a1
->tag
== Iex_Unop
2397 && a1
->Iex
.Unop
.op
== Iop_1Uto32
) {
2398 e2
= a1
->Iex
.Unop
.arg
;
2410 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2411 e2
= mkOnesOfPrimopResultType(e
->Iex
.Binop
.op
);
2424 /* is the discriminant is a constant? */
2425 if (e
->Iex
.ITE
.cond
->tag
== Iex_Const
) {
2426 /* assured us by the IR type rules */
2427 vassert(e
->Iex
.ITE
.cond
->Iex
.Const
.con
->tag
== Ico_U1
);
2428 e2
= e
->Iex
.ITE
.cond
->Iex
.Const
.con
->Ico
.U1
2429 ? e
->Iex
.ITE
.iftrue
: e
->Iex
.ITE
.iffalse
;
2432 /* are the arms identical? (pretty weedy test) */
2433 if (sameIRExprs(env
, e
->Iex
.ITE
.iftrue
,
2434 e
->Iex
.ITE
.iffalse
)) {
2435 e2
= e
->Iex
.ITE
.iffalse
;
2440 /* not considered */
2444 /* Show cases where we've found but not folded 'op(t,t)'. Be
2445 careful not to call sameIRExprs with values of different types,
2446 though, else it will assert (and so it should!). We can't
2447 conveniently call typeOfIRExpr on the two args without a whole
2448 bunch of extra plumbing to pass in a type env, so just use a
2449 hacky test to check the arguments are not anything that might
2450 sameIRExprs to assert. This is only OK because this kludge is
2451 only used for debug printing, not for "real" operation. For
2452 "real" operation (ie, all other calls to sameIRExprs), it is
2453 essential that the to args have the same type.
2455 The "right" solution is to plumb the containing block's
2456 IRTypeEnv through to here and use typeOfIRExpr to be sure. But
2457 that's a bunch of extra parameter passing which will just slow
2458 down the normal case, for no purpose. */
2459 if (vex_control
.iropt_verbosity
> 0
2461 && e
->tag
== Iex_Binop
2462 && !debug_only_hack_sameIRExprs_might_assert(e
->Iex
.Binop
.arg1
,
2464 && sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2465 vex_printf("vex iropt: fold_Expr: no ident rule for: ");
2470 /* Show the overall results of folding. */
2471 if (DEBUG_IROPT
&& e2
!= e
) {
2472 vex_printf("FOLD: ");
2473 ppIRExpr(e
); vex_printf(" -> ");
2474 ppIRExpr(e2
); vex_printf("\n");
2483 vpanic("fold_Expr: no rule for the above");
2485 if (vex_control
.iropt_verbosity
> 0) {
2486 vex_printf("vex iropt: fold_Expr: no const rule for: ");
2495 /* Apply the subst to a simple 1-level expression -- guaranteed to be
2496 1-level due to previous flattening pass. */
2498 static IRExpr
* subst_Expr ( IRExpr
** env
, IRExpr
* ex
)
2502 if (env
[(Int
)ex
->Iex
.RdTmp
.tmp
] != NULL
) {
2503 IRExpr
*rhs
= env
[(Int
)ex
->Iex
.RdTmp
.tmp
];
2504 if (rhs
->tag
== Iex_RdTmp
)
2506 if (rhs
->tag
== Iex_Const
2507 && rhs
->Iex
.Const
.con
->tag
!= Ico_F64i
)
2510 /* not bound in env */
2518 vassert(isIRAtom(ex
->Iex
.GetI
.ix
));
2521 subst_Expr(env
, ex
->Iex
.GetI
.ix
),
2526 IRQop
* qop
= ex
->Iex
.Qop
.details
;
2527 vassert(isIRAtom(qop
->arg1
));
2528 vassert(isIRAtom(qop
->arg2
));
2529 vassert(isIRAtom(qop
->arg3
));
2530 vassert(isIRAtom(qop
->arg4
));
2533 subst_Expr(env
, qop
->arg1
),
2534 subst_Expr(env
, qop
->arg2
),
2535 subst_Expr(env
, qop
->arg3
),
2536 subst_Expr(env
, qop
->arg4
)
2541 IRTriop
* triop
= ex
->Iex
.Triop
.details
;
2542 vassert(isIRAtom(triop
->arg1
));
2543 vassert(isIRAtom(triop
->arg2
));
2544 vassert(isIRAtom(triop
->arg3
));
2545 return IRExpr_Triop(
2547 subst_Expr(env
, triop
->arg1
),
2548 subst_Expr(env
, triop
->arg2
),
2549 subst_Expr(env
, triop
->arg3
)
2554 vassert(isIRAtom(ex
->Iex
.Binop
.arg1
));
2555 vassert(isIRAtom(ex
->Iex
.Binop
.arg2
));
2556 return IRExpr_Binop(
2558 subst_Expr(env
, ex
->Iex
.Binop
.arg1
),
2559 subst_Expr(env
, ex
->Iex
.Binop
.arg2
)
2563 vassert(isIRAtom(ex
->Iex
.Unop
.arg
));
2566 subst_Expr(env
, ex
->Iex
.Unop
.arg
)
2570 vassert(isIRAtom(ex
->Iex
.Load
.addr
));
2574 subst_Expr(env
, ex
->Iex
.Load
.addr
)
2579 IRExpr
** args2
= shallowCopyIRExprVec(ex
->Iex
.CCall
.args
);
2580 for (i
= 0; args2
[i
]; i
++) {
2581 vassert(isIRAtom(args2
[i
]));
2582 args2
[i
] = subst_Expr(env
, args2
[i
]);
2584 return IRExpr_CCall(
2586 ex
->Iex
.CCall
.retty
,
2592 vassert(isIRAtom(ex
->Iex
.ITE
.cond
));
2593 vassert(isIRAtom(ex
->Iex
.ITE
.iftrue
));
2594 vassert(isIRAtom(ex
->Iex
.ITE
.iffalse
));
2596 subst_Expr(env
, ex
->Iex
.ITE
.cond
),
2597 subst_Expr(env
, ex
->Iex
.ITE
.iftrue
),
2598 subst_Expr(env
, ex
->Iex
.ITE
.iffalse
)
2602 vex_printf("\n\n"); ppIRExpr(ex
);
2603 vpanic("subst_Expr");
2609 /* Apply the subst to stmt, then fold the result as much as possible.
2610 Much simplified due to stmt being previously flattened. As a
2611 result of this, the stmt may wind up being turned into a no-op.
2613 static IRStmt
* subst_and_fold_Stmt ( IRExpr
** env
, IRStmt
* st
)
2616 vex_printf("\nsubst and fold stmt\n");
2623 vassert(isIRAtom(st
->Ist
.AbiHint
.base
));
2624 vassert(isIRAtom(st
->Ist
.AbiHint
.nia
));
2625 return IRStmt_AbiHint(
2626 fold_Expr(env
, subst_Expr(env
, st
->Ist
.AbiHint
.base
)),
2627 st
->Ist
.AbiHint
.len
,
2628 fold_Expr(env
, subst_Expr(env
, st
->Ist
.AbiHint
.nia
))
2631 vassert(isIRAtom(st
->Ist
.Put
.data
));
2634 fold_Expr(env
, subst_Expr(env
, st
->Ist
.Put
.data
))
2638 IRPutI
*puti
, *puti2
;
2639 puti
= st
->Ist
.PutI
.details
;
2640 vassert(isIRAtom(puti
->ix
));
2641 vassert(isIRAtom(puti
->data
));
2642 puti2
= mkIRPutI(puti
->descr
,
2643 fold_Expr(env
, subst_Expr(env
, puti
->ix
)),
2645 fold_Expr(env
, subst_Expr(env
, puti
->data
)));
2646 return IRStmt_PutI(puti2
);
2650 /* This is the one place where an expr (st->Ist.WrTmp.data) is
2651 allowed to be more than just a constant or a tmp. */
2652 return IRStmt_WrTmp(
2654 fold_Expr(env
, subst_Expr(env
, st
->Ist
.WrTmp
.data
))
2658 vassert(isIRAtom(st
->Ist
.Store
.addr
));
2659 vassert(isIRAtom(st
->Ist
.Store
.data
));
2660 return IRStmt_Store(
2662 fold_Expr(env
, subst_Expr(env
, st
->Ist
.Store
.addr
)),
2663 fold_Expr(env
, subst_Expr(env
, st
->Ist
.Store
.data
))
2667 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
2668 vassert(isIRAtom(sg
->addr
));
2669 vassert(isIRAtom(sg
->data
));
2670 vassert(isIRAtom(sg
->guard
));
2671 IRExpr
* faddr
= fold_Expr(env
, subst_Expr(env
, sg
->addr
));
2672 IRExpr
* fdata
= fold_Expr(env
, subst_Expr(env
, sg
->data
));
2673 IRExpr
* fguard
= fold_Expr(env
, subst_Expr(env
, sg
->guard
));
2674 if (fguard
->tag
== Iex_Const
) {
2675 /* The condition on this store has folded down to a constant. */
2676 vassert(fguard
->Iex
.Const
.con
->tag
== Ico_U1
);
2677 if (fguard
->Iex
.Const
.con
->Ico
.U1
== False
) {
2678 return IRStmt_NoOp();
2680 vassert(fguard
->Iex
.Const
.con
->Ico
.U1
== True
);
2681 return IRStmt_Store(sg
->end
, faddr
, fdata
);
2684 return IRStmt_StoreG(sg
->end
, faddr
, fdata
, fguard
);
2688 /* This is complicated. If the guard folds down to 'false',
2689 we can replace it with an assignment 'dst := alt', but if
2690 the guard folds down to 'true', we can't conveniently
2691 replace it with an unconditional load, because doing so
2692 requires generating a new temporary, and that is not easy
2693 to do at this point. */
2694 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
2695 vassert(isIRAtom(lg
->addr
));
2696 vassert(isIRAtom(lg
->alt
));
2697 vassert(isIRAtom(lg
->guard
));
2698 IRExpr
* faddr
= fold_Expr(env
, subst_Expr(env
, lg
->addr
));
2699 IRExpr
* falt
= fold_Expr(env
, subst_Expr(env
, lg
->alt
));
2700 IRExpr
* fguard
= fold_Expr(env
, subst_Expr(env
, lg
->guard
));
2701 if (fguard
->tag
== Iex_Const
) {
2702 /* The condition on this load has folded down to a constant. */
2703 vassert(fguard
->Iex
.Const
.con
->tag
== Ico_U1
);
2704 if (fguard
->Iex
.Const
.con
->Ico
.U1
== False
) {
2705 /* The load is not going to happen -- instead 'alt' is
2706 assigned to 'dst'. */
2707 return IRStmt_WrTmp(lg
->dst
, falt
);
2709 vassert(fguard
->Iex
.Const
.con
->Ico
.U1
== True
);
2710 /* The load is always going to happen. We want to
2711 convert to an unconditional load and assign to 'dst'
2712 (IRStmt_WrTmp). Problem is we need an extra temp to
2713 hold the loaded value, but none is available.
2714 Instead, reconstitute the conditional load (with
2715 folded args, of course) and let the caller of this
2716 routine deal with the problem. */
2719 return IRStmt_LoadG(lg
->end
, lg
->cvt
, lg
->dst
, faddr
, falt
, fguard
);
2724 cas
= st
->Ist
.CAS
.details
;
2725 vassert(isIRAtom(cas
->addr
));
2726 vassert(cas
->expdHi
== NULL
|| isIRAtom(cas
->expdHi
));
2727 vassert(isIRAtom(cas
->expdLo
));
2728 vassert(cas
->dataHi
== NULL
|| isIRAtom(cas
->dataHi
));
2729 vassert(isIRAtom(cas
->dataLo
));
2731 cas
->oldHi
, cas
->oldLo
, cas
->end
,
2732 fold_Expr(env
, subst_Expr(env
, cas
->addr
)),
2733 cas
->expdHi
? fold_Expr(env
, subst_Expr(env
, cas
->expdHi
))
2735 fold_Expr(env
, subst_Expr(env
, cas
->expdLo
)),
2736 cas
->dataHi
? fold_Expr(env
, subst_Expr(env
, cas
->dataHi
))
2738 fold_Expr(env
, subst_Expr(env
, cas
->dataLo
))
2740 return IRStmt_CAS(cas2
);
2744 vassert(isIRAtom(st
->Ist
.LLSC
.addr
));
2745 if (st
->Ist
.LLSC
.storedata
)
2746 vassert(isIRAtom(st
->Ist
.LLSC
.storedata
));
2749 st
->Ist
.LLSC
.result
,
2750 fold_Expr(env
, subst_Expr(env
, st
->Ist
.LLSC
.addr
)),
2751 st
->Ist
.LLSC
.storedata
2752 ? fold_Expr(env
, subst_Expr(env
, st
->Ist
.LLSC
.storedata
))
2759 d
= st
->Ist
.Dirty
.details
;
2760 d2
= emptyIRDirty();
2762 d2
->args
= shallowCopyIRExprVec(d2
->args
);
2763 if (d2
->mFx
!= Ifx_None
) {
2764 vassert(isIRAtom(d2
->mAddr
));
2765 d2
->mAddr
= fold_Expr(env
, subst_Expr(env
, d2
->mAddr
));
2767 vassert(isIRAtom(d2
->guard
));
2768 d2
->guard
= fold_Expr(env
, subst_Expr(env
, d2
->guard
));
2769 for (i
= 0; d2
->args
[i
]; i
++) {
2770 IRExpr
* arg
= d2
->args
[i
];
2771 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
))) {
2772 vassert(isIRAtom(arg
));
2773 d2
->args
[i
] = fold_Expr(env
, subst_Expr(env
, arg
));
2776 return IRStmt_Dirty(d2
);
2780 return IRStmt_IMark(st
->Ist
.IMark
.addr
,
2782 st
->Ist
.IMark
.delta
);
2785 return IRStmt_NoOp();
2788 return IRStmt_MBE(st
->Ist
.MBE
.event
);
2792 vassert(isIRAtom(st
->Ist
.Exit
.guard
));
2793 fcond
= fold_Expr(env
, subst_Expr(env
, st
->Ist
.Exit
.guard
));
2794 if (fcond
->tag
== Iex_Const
) {
2795 /* Interesting. The condition on this exit has folded down to
2797 vassert(fcond
->Iex
.Const
.con
->tag
== Ico_U1
);
2798 if (fcond
->Iex
.Const
.con
->Ico
.U1
== False
) {
2799 /* exit is never going to happen, so dump the statement. */
2800 return IRStmt_NoOp();
2802 vassert(fcond
->Iex
.Const
.con
->Ico
.U1
== True
);
2803 /* Hmmm. The exit has become unconditional. Leave it
2804 as it is for now, since we'd have to truncate the BB
2805 at this point, which is tricky. Such truncation is
2806 done later by the dead-code elimination pass. */
2807 /* fall out into the reconstruct-the-exit code. */
2808 if (vex_control
.iropt_verbosity
> 0)
2809 /* really a misuse of vex_control.iropt_verbosity */
2810 vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2813 return IRStmt_Exit(fcond
, st
->Ist
.Exit
.jk
,
2814 st
->Ist
.Exit
.dst
, st
->Ist
.Exit
.offsIP
);
2818 vex_printf("\n"); ppIRStmt(st
);
2819 vpanic("subst_and_fold_Stmt");
2824 IRSB
* cprop_BB ( IRSB
* in
)
2829 Int n_tmps
= in
->tyenv
->types_used
;
2830 IRExpr
** env
= LibVEX_Alloc_inline(n_tmps
* sizeof(IRExpr
*));
2831 /* Keep track of IRStmt_LoadGs that we need to revisit after
2832 processing all the other statements. */
2833 const Int N_FIXUPS
= 16;
2834 Int fixups
[N_FIXUPS
]; /* indices in the stmt array of 'out' */
2838 out
->tyenv
= deepCopyIRTypeEnv( in
->tyenv
);
2840 /* Set up the env with which travels forward. This holds a
2841 substitution, mapping IRTemps to IRExprs. The environment
2842 is to be applied as we move along. Keys are IRTemps.
2843 Values are IRExpr*s.
2845 for (i
= 0; i
< n_tmps
; i
++)
2848 /* For each original SSA-form stmt ... */
2849 for (i
= 0; i
< in
->stmts_used
; i
++) {
2851 /* First apply the substitution to the current stmt. This
2852 propagates in any constants and tmp-tmp assignments
2853 accumulated prior to this point. As part of the subst_Stmt
2854 call, also then fold any constant expressions resulting. */
2858 /* perhaps st2 is already a no-op? */
2859 if (st2
->tag
== Ist_NoOp
) continue;
2861 st2
= subst_and_fold_Stmt( env
, st2
);
2863 /* Deal with some post-folding special cases. */
2866 /* If the statement has been folded into a no-op, forget
2871 /* If the statement assigns to an IRTemp add it to the
2872 running environment. This is for the benefit of copy
2873 propagation and to allow sameIRExpr look through
2876 vassert(env
[(Int
)(st2
->Ist
.WrTmp
.tmp
)] == NULL
);
2877 env
[(Int
)(st2
->Ist
.WrTmp
.tmp
)] = st2
->Ist
.WrTmp
.data
;
2879 /* 't1 = t2' -- don't add to BB; will be optimized out */
2880 if (st2
->Ist
.WrTmp
.data
->tag
== Iex_RdTmp
)
2883 /* 't = const' && 'const != F64i' -- don't add to BB
2884 Note, we choose not to propagate const when const is an
2885 F64i, so that F64i literals can be CSE'd later. This
2886 helps x86 floating point code generation. */
2887 if (st2
->Ist
.WrTmp
.data
->tag
== Iex_Const
2888 && st2
->Ist
.WrTmp
.data
->Iex
.Const
.con
->tag
!= Ico_F64i
) {
2891 /* else add it to the output, as normal */
2896 IRLoadG
* lg
= st2
->Ist
.LoadG
.details
;
2897 IRExpr
* guard
= lg
->guard
;
2898 if (guard
->tag
== Iex_Const
) {
2899 /* The guard has folded to a constant, and that
2900 constant must be 1:I1, since subst_and_fold_Stmt
2901 folds out the case 0:I1 by itself. */
2902 vassert(guard
->Iex
.Const
.con
->tag
== Ico_U1
);
2903 vassert(guard
->Iex
.Const
.con
->Ico
.U1
== True
);
2904 /* Add a NoOp here as a placeholder, and make a note of
2905 where it is in the output block. Afterwards we'll
2906 come back here and transform the NoOp and the LoadG
2907 into a load-convert pair. The fixups[] entry
2908 refers to the inserted NoOp, and we expect to find
2909 the relevant LoadG immediately after it. */
2910 vassert(n_fixups
>= 0 && n_fixups
<= N_FIXUPS
);
2911 if (n_fixups
< N_FIXUPS
) {
2912 fixups
[n_fixups
++] = out
->stmts_used
;
2913 addStmtToIRSB( out
, IRStmt_NoOp() );
2916 /* And always add the LoadG to the output, regardless. */
2924 /* Not interesting, copy st2 into the output block. */
2925 addStmtToIRSB( out
, st2
);
2929 vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n",
2930 invocation_count
, recursion_count
, success_count
,
2931 recursion_success_count
, max_nodes_visited
);
2934 out
->next
= subst_Expr( env
, in
->next
);
2935 out
->jumpkind
= in
->jumpkind
;
2936 out
->offsIP
= in
->offsIP
;
2938 /* Process any leftover unconditional LoadGs that we noticed
2939 in the main pass. */
2940 vassert(n_fixups
>= 0 && n_fixups
<= N_FIXUPS
);
2941 for (i
= 0; i
< n_fixups
; i
++) {
2943 /* Carefully verify that the LoadG has the expected form. */
2944 vassert(ix
>= 0 && ix
+1 < out
->stmts_used
);
2945 IRStmt
* nop
= out
->stmts
[ix
];
2946 IRStmt
* lgu
= out
->stmts
[ix
+1];
2947 vassert(nop
->tag
== Ist_NoOp
);
2948 vassert(lgu
->tag
== Ist_LoadG
);
2949 IRLoadG
* lg
= lgu
->Ist
.LoadG
.details
;
2950 IRExpr
* guard
= lg
->guard
;
2951 vassert(guard
->Iex
.Const
.con
->tag
== Ico_U1
);
2952 vassert(guard
->Iex
.Const
.con
->Ico
.U1
== True
);
2953 /* Figure out the load and result types, and the implied
2954 conversion operation. */
2955 IRType cvtRes
= Ity_INVALID
, cvtArg
= Ity_INVALID
;
2956 typeOfIRLoadGOp(lg
->cvt
, &cvtRes
, &cvtArg
);
2957 IROp cvtOp
= Iop_INVALID
;
2959 case ILGop_IdentV128
:
2961 case ILGop_Ident32
: break;
2962 case ILGop_8Uto32
: cvtOp
= Iop_8Uto32
; break;
2963 case ILGop_8Sto32
: cvtOp
= Iop_8Sto32
; break;
2964 case ILGop_16Uto32
: cvtOp
= Iop_16Uto32
; break;
2965 case ILGop_16Sto32
: cvtOp
= Iop_16Sto32
; break;
2966 default: vpanic("cprop_BB: unhandled ILGOp");
2968 /* Replace the placeholder NoOp by the required unconditional
2970 IRTemp tLoaded
= newIRTemp(out
->tyenv
, cvtArg
);
2972 = IRStmt_WrTmp(tLoaded
,
2973 IRExpr_Load(lg
->end
, cvtArg
, lg
->addr
));
2974 /* Replace the LoadG by a conversion from the loaded value's
2975 type to the required result type. */
2978 lg
->dst
, cvtOp
== Iop_INVALID
2979 ? IRExpr_RdTmp(tLoaded
)
2980 : IRExpr_Unop(cvtOp
, IRExpr_RdTmp(tLoaded
)));
2987 /*---------------------------------------------------------------*/
2988 /*--- Dead code (t = E) removal ---*/
2989 /*---------------------------------------------------------------*/
2991 /* As a side effect, also removes all code following an unconditional
2994 /* The type of the HashHW map is: a map from IRTemp to nothing
2995 -- really just operating a set or IRTemps.
2999 static void addUses_Temp ( Bool
* set
, IRTemp tmp
)
3001 set
[(Int
)tmp
] = True
;
3004 static void addUses_Expr ( Bool
* set
, IRExpr
* e
)
3009 addUses_Expr(set
, e
->Iex
.GetI
.ix
);
3012 addUses_Expr(set
, e
->Iex
.ITE
.cond
);
3013 addUses_Expr(set
, e
->Iex
.ITE
.iftrue
);
3014 addUses_Expr(set
, e
->Iex
.ITE
.iffalse
);
3017 for (i
= 0; e
->Iex
.CCall
.args
[i
]; i
++)
3018 addUses_Expr(set
, e
->Iex
.CCall
.args
[i
]);
3021 addUses_Expr(set
, e
->Iex
.Load
.addr
);
3024 addUses_Expr(set
, e
->Iex
.Qop
.details
->arg1
);
3025 addUses_Expr(set
, e
->Iex
.Qop
.details
->arg2
);
3026 addUses_Expr(set
, e
->Iex
.Qop
.details
->arg3
);
3027 addUses_Expr(set
, e
->Iex
.Qop
.details
->arg4
);
3030 addUses_Expr(set
, e
->Iex
.Triop
.details
->arg1
);
3031 addUses_Expr(set
, e
->Iex
.Triop
.details
->arg2
);
3032 addUses_Expr(set
, e
->Iex
.Triop
.details
->arg3
);
3035 addUses_Expr(set
, e
->Iex
.Binop
.arg1
);
3036 addUses_Expr(set
, e
->Iex
.Binop
.arg2
);
3039 addUses_Expr(set
, e
->Iex
.Unop
.arg
);
3042 addUses_Temp(set
, e
->Iex
.RdTmp
.tmp
);
3050 vpanic("addUses_Expr");
3054 static void addUses_Stmt ( Bool
* set
, IRStmt
* st
)
3061 addUses_Expr(set
, st
->Ist
.AbiHint
.base
);
3062 addUses_Expr(set
, st
->Ist
.AbiHint
.nia
);
3065 addUses_Expr(set
, st
->Ist
.PutI
.details
->ix
);
3066 addUses_Expr(set
, st
->Ist
.PutI
.details
->data
);
3069 addUses_Expr(set
, st
->Ist
.WrTmp
.data
);
3072 addUses_Expr(set
, st
->Ist
.Put
.data
);
3075 addUses_Expr(set
, st
->Ist
.Store
.addr
);
3076 addUses_Expr(set
, st
->Ist
.Store
.data
);
3079 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
3080 addUses_Expr(set
, sg
->addr
);
3081 addUses_Expr(set
, sg
->data
);
3082 addUses_Expr(set
, sg
->guard
);
3086 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
3087 addUses_Expr(set
, lg
->addr
);
3088 addUses_Expr(set
, lg
->alt
);
3089 addUses_Expr(set
, lg
->guard
);
3093 cas
= st
->Ist
.CAS
.details
;
3094 addUses_Expr(set
, cas
->addr
);
3096 addUses_Expr(set
, cas
->expdHi
);
3097 addUses_Expr(set
, cas
->expdLo
);
3099 addUses_Expr(set
, cas
->dataHi
);
3100 addUses_Expr(set
, cas
->dataLo
);
3103 addUses_Expr(set
, st
->Ist
.LLSC
.addr
);
3104 if (st
->Ist
.LLSC
.storedata
)
3105 addUses_Expr(set
, st
->Ist
.LLSC
.storedata
);
3108 d
= st
->Ist
.Dirty
.details
;
3109 if (d
->mFx
!= Ifx_None
)
3110 addUses_Expr(set
, d
->mAddr
);
3111 addUses_Expr(set
, d
->guard
);
3112 for (i
= 0; d
->args
[i
] != NULL
; i
++) {
3113 IRExpr
* arg
= d
->args
[i
];
3114 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
3115 addUses_Expr(set
, arg
);
3123 addUses_Expr(set
, st
->Ist
.Exit
.guard
);
3128 vpanic("addUses_Stmt");
3133 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
3134 static Bool
isZeroU1 ( IRExpr
* e
)
3136 return toBool( e
->tag
== Iex_Const
3137 && e
->Iex
.Const
.con
->tag
== Ico_U1
3138 && e
->Iex
.Const
.con
->Ico
.U1
== False
);
3141 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
3142 static Bool
isOneU1 ( IRExpr
* e
)
3144 return toBool( e
->tag
== Iex_Const
3145 && e
->Iex
.Const
.con
->tag
== Ico_U1
3146 && e
->Iex
.Const
.con
->Ico
.U1
== True
);
3150 /* Note, this destructively modifies the given IRSB. */
3152 /* Scan backwards through statements, carrying a set of IRTemps which
3153 are known to be used after the current point. On encountering 't =
3154 E', delete the binding if it is not used. Otherwise, add any temp
3155 uses to the set and keep on moving backwards.
3157 As an enhancement, the first (backwards) pass searches for IR exits
3158 with always-taken conditions and notes the location of the earliest
3159 one in the block. If any such are found, a second pass copies the
3160 exit destination and jump kind to the bb-end. Then, the exit and
3161 all statements following it are turned into no-ops.
3164 /* notstatic */ void do_deadcode_BB ( IRSB
* bb
)
3166 Int i
, i_unconditional_exit
;
3167 Int n_tmps
= bb
->tyenv
->types_used
;
3168 Bool
* set
= LibVEX_Alloc_inline(n_tmps
* sizeof(Bool
));
3171 for (i
= 0; i
< n_tmps
; i
++)
3174 /* start off by recording IRTemp uses in the next field. */
3175 addUses_Expr(set
, bb
->next
);
3179 /* Work backwards through the stmts */
3180 i_unconditional_exit
= -1;
3181 for (i
= bb
->stmts_used
-1; i
>= 0; i
--) {
3183 if (st
->tag
== Ist_NoOp
)
3185 /* take note of any unconditional exits */
3186 if (st
->tag
== Ist_Exit
3187 && isOneU1(st
->Ist
.Exit
.guard
))
3188 i_unconditional_exit
= i
;
3189 if (st
->tag
== Ist_WrTmp
3190 && set
[(Int
)(st
->Ist
.WrTmp
.tmp
)] == False
) {
3191 /* it's an IRTemp which never got used. Delete it. */
3193 vex_printf("DEAD: ");
3197 bb
->stmts
[i
] = IRStmt_NoOp();
3200 if (st
->tag
== Ist_Dirty
3201 && st
->Ist
.Dirty
.details
->guard
3202 && isZeroU1(st
->Ist
.Dirty
.details
->guard
)) {
3203 /* This is a dirty helper which will never get called.
3205 bb
->stmts
[i
] = IRStmt_NoOp();
3208 /* Note any IRTemp uses made by the current statement. */
3209 addUses_Stmt(set
, st
);
3213 /* Optional second pass: if any unconditional exits were found,
3214 delete them and all following statements. */
3216 if (i_unconditional_exit
!= -1) {
3217 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
3218 i_unconditional_exit
);
3219 vassert(i_unconditional_exit
>= 0
3220 && i_unconditional_exit
< bb
->stmts_used
);
3222 = IRExpr_Const( bb
->stmts
[i_unconditional_exit
]->Ist
.Exit
.dst
);
3224 = bb
->stmts
[i_unconditional_exit
]->Ist
.Exit
.jk
;
3226 = bb
->stmts
[i_unconditional_exit
]->Ist
.Exit
.offsIP
;
3227 for (i
= i_unconditional_exit
; i
< bb
->stmts_used
; i
++)
3228 bb
->stmts
[i
] = IRStmt_NoOp();
3233 /*---------------------------------------------------------------*/
3234 /*--- Specialisation of helper function calls, in ---*/
3235 /*--- collaboration with the front end ---*/
3236 /*---------------------------------------------------------------*/
3239 IRSB
* spec_helpers_BB(
3241 IRExpr
* (*specHelper
) (const HChar
*, IRExpr
**, IRStmt
**, Int
)
3249 for (i
= bb
->stmts_used
-1; i
>= 0; i
--) {
3252 if (st
->tag
!= Ist_WrTmp
3253 || st
->Ist
.WrTmp
.data
->tag
!= Iex_CCall
)
3256 ex
= (*specHelper
)( st
->Ist
.WrTmp
.data
->Iex
.CCall
.cee
->name
,
3257 st
->Ist
.WrTmp
.data
->Iex
.CCall
.args
,
3260 /* the front end can't think of a suitable replacement */
3263 /* We got something better. Install it in the bb. */
3266 = IRStmt_WrTmp(st
->Ist
.WrTmp
.tmp
, ex
);
3269 vex_printf("SPEC: ");
3270 ppIRExpr(st
->Ist
.WrTmp
.data
);
3271 vex_printf(" --> ");
3278 bb
= flatten_BB(bb
);
3283 /*---------------------------------------------------------------*/
3284 /*--- Determination of guest state aliasing relationships ---*/
3285 /*---------------------------------------------------------------*/
3287 /* These are helper functions for CSE and GetI/PutI transformations.
3289 Determine, to the extent possible, the relationship between two
3290 guest state accesses. The possible outcomes are:
3292 * Exact alias. These two accesses denote precisely the same
3293 piece of the guest state.
3295 * Definitely no alias. These two accesses are guaranteed not to
3296 overlap any part of the guest state.
3298 * Unknown -- if neither of the above can be established.
3300 If in doubt, return Unknown. */
3303 enum { ExactAlias
, NoAlias
, UnknownAlias
}
3307 /* Produces the alias relation between an indexed guest
3308 state access and a non-indexed access. */
3311 GSAliasing
getAliasingRelation_IC ( IRRegArray
* descr1
, IRExpr
* ix1
,
3312 Int offset2
, IRType ty2
)
3314 UInt minoff1
, maxoff1
, minoff2
, maxoff2
;
3316 getArrayBounds( descr1
, &minoff1
, &maxoff1
);
3318 maxoff2
= minoff2
+ sizeofIRType(ty2
) - 1;
3320 if (maxoff1
< minoff2
|| maxoff2
< minoff1
)
3323 /* Could probably do better here if required. For the moment
3324 however just claim not to know anything more. */
3325 return UnknownAlias
;
3329 /* Produces the alias relation between two indexed guest state
3333 GSAliasing
getAliasingRelation_II (
3334 IRRegArray
* descr1
, IRExpr
* ix1
, Int bias1
,
3335 IRRegArray
* descr2
, IRExpr
* ix2
, Int bias2
3338 UInt minoff1
, maxoff1
, minoff2
, maxoff2
;
3341 /* First try hard to show they don't alias. */
3342 getArrayBounds( descr1
, &minoff1
, &maxoff1
);
3343 getArrayBounds( descr2
, &minoff2
, &maxoff2
);
3344 if (maxoff1
< minoff2
|| maxoff2
< minoff1
)
3347 /* So the two arrays at least partially overlap. To get any
3348 further we'll have to be sure that the descriptors are
3350 if (!eqIRRegArray(descr1
, descr2
))
3351 return UnknownAlias
;
3353 /* The descriptors are identical. Now the only difference can be
3354 in the index expressions. If they cannot be shown to be
3355 identical, we have to say we don't know what the aliasing
3356 relation will be. Now, since the IR is flattened, the index
3357 expressions should be atoms -- either consts or tmps. So that
3358 makes the comparison simple. */
3359 vassert(isIRAtom(ix1
));
3360 vassert(isIRAtom(ix2
));
3361 if (!eqIRAtom(ix1
,ix2
))
3362 return UnknownAlias
;
3364 /* Ok, the index expressions are identical. So now the only way
3365 they can be different is in the bias. Normalise this
3366 paranoidly, to reliably establish equality/non-equality. */
3368 /* So now we know that the GetI and PutI index the same array
3369 with the same base. Are the offsets the same, modulo the
3370 array size? Do this paranoidly. */
3371 vassert(descr1
->nElems
== descr2
->nElems
);
3372 vassert(descr1
->elemTy
== descr2
->elemTy
);
3373 vassert(descr1
->base
== descr2
->base
);
3375 while (bias1
< 0 || bias2
< 0) {
3376 bias1
+= descr1
->nElems
;
3377 bias2
+= descr1
->nElems
;
3380 vpanic("getAliasingRelation: iters");
3382 vassert(bias1
>= 0 && bias2
>= 0);
3383 bias1
%= descr1
->nElems
;
3384 bias2
%= descr1
->nElems
;
3385 vassert(bias1
>= 0 && bias1
< descr1
->nElems
);
3386 vassert(bias2
>= 0 && bias2
< descr1
->nElems
);
3388 /* Finally, biasP and biasG are normalised into the range
3389 0 .. descrP/G->nElems - 1. And so we can establish
3390 equality/non-equality. */
3392 return bias1
==bias2
? ExactAlias
: NoAlias
;
3396 /*---------------------------------------------------------------*/
3397 /*--- Common Subexpression Elimination ---*/
3398 /*---------------------------------------------------------------*/
3400 /* Expensive in time and space. */
3402 /* Uses two environments:
3403 a IRTemp -> IRTemp mapping
3404 a mapping from AvailExpr* to IRTemp
3409 enum { TCc
, TCt
} tag
;
3410 union { IRTemp tmp
; IRConst
* con
; } u
;
3414 static Bool
eqTmpOrConst ( TmpOrConst
* tc1
, TmpOrConst
* tc2
)
3416 if (tc1
->tag
!= tc2
->tag
)
3420 return eqIRConst(tc1
->u
.con
, tc2
->u
.con
);
3422 return tc1
->u
.tmp
== tc2
->u
.tmp
;
3424 vpanic("eqTmpOrConst");
3428 static Bool
eqIRCallee ( IRCallee
* cee1
, IRCallee
* cee2
)
3430 Bool eq
= cee1
->addr
== cee2
->addr
;
3432 vassert(cee1
->regparms
== cee2
->regparms
);
3433 vassert(cee1
->mcx_mask
== cee2
->mcx_mask
);
3434 /* Names should be the same too, but we don't bother to
3440 /* Convert an atomic IRExpr* to a TmpOrConst. */
3441 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst
* tc
, IRExpr
* e
)
3446 tc
->u
.tmp
= e
->Iex
.RdTmp
.tmp
;
3450 tc
->u
.con
= e
->Iex
.Const
.con
;
3453 /* Getting here is a serious error. It means that the
3454 presented arg isn't an IR atom, as it should be. */
3455 vpanic("irExpr_to_TmpOrConst");
3459 /* Convert a TmpOrConst to an atomic IRExpr*. */
3460 static IRExpr
* tmpOrConst_to_IRExpr ( TmpOrConst
* tc
)
3463 case TCc
: return IRExpr_Const(tc
->u
.con
);
3464 case TCt
: return IRExpr_RdTmp(tc
->u
.tmp
);
3465 default: vpanic("tmpOrConst_to_IRExpr");
3469 /* Convert a NULL terminated IRExpr* vector to an array of
3470 TmpOrConsts, and a length. */
3471 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst
** outs
,
3476 /* We have to make two passes, one to count, one to copy. */
3477 for (n
= 0; ins
[n
]; n
++)
3479 *outs
= LibVEX_Alloc_inline(n
* sizeof(TmpOrConst
));
3481 /* and now copy .. */
3482 for (i
= 0; i
< n
; i
++) {
3483 IRExpr
* arg
= ins
[i
];
3484 TmpOrConst
* dst
= &(*outs
)[i
];
3485 irExpr_to_TmpOrConst(dst
, arg
);
3491 enum { Ut
, Btt
, Btc
, Bct
, Cf64i
, Ittt
, Itct
, Ittc
, Itcc
, GetIt
,
3500 /* binop(tmp,tmp) */
3506 /* binop(tmp,const) */
3512 /* binop(const,tmp) */
3518 /* F64i-style const */
3522 /* ITE(tmp,tmp,tmp) */
3528 /* ITE(tmp,tmp,const) */
3534 /* ITE(tmp,const,tmp) */
3540 /* ITE(tmp,const,const) */
3546 /* GetI(descr,tmp,bias)*/
3552 /* Clean helper call */
3559 /* Load(end,ty,addr) */
3569 static Bool
eq_AvailExpr ( AvailExpr
* a1
, AvailExpr
* a2
)
3571 if (LIKELY(a1
->tag
!= a2
->tag
))
3576 a1
->u
.Ut
.op
== a2
->u
.Ut
.op
3577 && a1
->u
.Ut
.arg
== a2
->u
.Ut
.arg
);
3580 a1
->u
.Btt
.op
== a2
->u
.Btt
.op
3581 && a1
->u
.Btt
.arg1
== a2
->u
.Btt
.arg1
3582 && a1
->u
.Btt
.arg2
== a2
->u
.Btt
.arg2
);
3585 a1
->u
.Btc
.op
== a2
->u
.Btc
.op
3586 && a1
->u
.Btc
.arg1
== a2
->u
.Btc
.arg1
3587 && eqIRConst(&a1
->u
.Btc
.con2
, &a2
->u
.Btc
.con2
));
3590 a1
->u
.Bct
.op
== a2
->u
.Bct
.op
3591 && a1
->u
.Bct
.arg2
== a2
->u
.Bct
.arg2
3592 && eqIRConst(&a1
->u
.Bct
.con1
, &a2
->u
.Bct
.con1
));
3594 return toBool(a1
->u
.Cf64i
.f64i
== a2
->u
.Cf64i
.f64i
);
3596 return toBool(a1
->u
.Ittt
.co
== a2
->u
.Ittt
.co
3597 && a1
->u
.Ittt
.e1
== a2
->u
.Ittt
.e1
3598 && a1
->u
.Ittt
.e0
== a2
->u
.Ittt
.e0
);
3600 return toBool(a1
->u
.Ittc
.co
== a2
->u
.Ittc
.co
3601 && a1
->u
.Ittc
.e1
== a2
->u
.Ittc
.e1
3602 && eqIRConst(&a1
->u
.Ittc
.con0
, &a2
->u
.Ittc
.con0
));
3604 return toBool(a1
->u
.Itct
.co
== a2
->u
.Itct
.co
3605 && eqIRConst(&a1
->u
.Itct
.con1
, &a2
->u
.Itct
.con1
)
3606 && a1
->u
.Itct
.e0
== a2
->u
.Itct
.e0
);
3608 return toBool(a1
->u
.Itcc
.co
== a2
->u
.Itcc
.co
3609 && eqIRConst(&a1
->u
.Itcc
.con1
, &a2
->u
.Itcc
.con1
)
3610 && eqIRConst(&a1
->u
.Itcc
.con0
, &a2
->u
.Itcc
.con0
));
3612 return toBool(eqIRRegArray(a1
->u
.GetIt
.descr
, a2
->u
.GetIt
.descr
)
3613 && a1
->u
.GetIt
.ix
== a2
->u
.GetIt
.ix
3614 && a1
->u
.GetIt
.bias
== a2
->u
.GetIt
.bias
);
3617 Bool eq
= a1
->u
.CCall
.nArgs
== a2
->u
.CCall
.nArgs
3618 && eqIRCallee(a1
->u
.CCall
.cee
, a2
->u
.CCall
.cee
);
3620 n
= a1
->u
.CCall
.nArgs
;
3621 for (i
= 0; i
< n
; i
++) {
3622 if (!eqTmpOrConst( &a1
->u
.CCall
.args
[i
],
3623 &a2
->u
.CCall
.args
[i
] )) {
3629 if (eq
) vassert(a1
->u
.CCall
.retty
== a2
->u
.CCall
.retty
);
3633 Bool eq
= toBool(a1
->u
.Load
.end
== a2
->u
.Load
.end
3634 && a1
->u
.Load
.ty
== a2
->u
.Load
.ty
3635 && eqTmpOrConst(&a1
->u
.Load
.addr
, &a2
->u
.Load
.addr
));
3639 vpanic("eq_AvailExpr");
3643 static IRExpr
* availExpr_to_IRExpr ( AvailExpr
* ae
)
3645 IRConst
*con
, *con0
, *con1
;
3648 return IRExpr_Unop( ae
->u
.Ut
.op
, IRExpr_RdTmp(ae
->u
.Ut
.arg
) );
3650 return IRExpr_Binop( ae
->u
.Btt
.op
,
3651 IRExpr_RdTmp(ae
->u
.Btt
.arg1
),
3652 IRExpr_RdTmp(ae
->u
.Btt
.arg2
) );
3654 con
= LibVEX_Alloc_inline(sizeof(IRConst
));
3655 *con
= ae
->u
.Btc
.con2
;
3656 return IRExpr_Binop( ae
->u
.Btc
.op
,
3657 IRExpr_RdTmp(ae
->u
.Btc
.arg1
),
3658 IRExpr_Const(con
) );
3660 con
= LibVEX_Alloc_inline(sizeof(IRConst
));
3661 *con
= ae
->u
.Bct
.con1
;
3662 return IRExpr_Binop( ae
->u
.Bct
.op
,
3664 IRExpr_RdTmp(ae
->u
.Bct
.arg2
) );
3666 return IRExpr_Const(IRConst_F64i(ae
->u
.Cf64i
.f64i
));
3668 return IRExpr_ITE(IRExpr_RdTmp(ae
->u
.Ittt
.co
),
3669 IRExpr_RdTmp(ae
->u
.Ittt
.e1
),
3670 IRExpr_RdTmp(ae
->u
.Ittt
.e0
));
3672 con0
= LibVEX_Alloc_inline(sizeof(IRConst
));
3673 *con0
= ae
->u
.Ittc
.con0
;
3674 return IRExpr_ITE(IRExpr_RdTmp(ae
->u
.Ittc
.co
),
3675 IRExpr_RdTmp(ae
->u
.Ittc
.e1
),
3676 IRExpr_Const(con0
));
3678 con1
= LibVEX_Alloc_inline(sizeof(IRConst
));
3679 *con1
= ae
->u
.Itct
.con1
;
3680 return IRExpr_ITE(IRExpr_RdTmp(ae
->u
.Itct
.co
),
3682 IRExpr_RdTmp(ae
->u
.Itct
.e0
));
3685 con0
= LibVEX_Alloc_inline(sizeof(IRConst
));
3686 con1
= LibVEX_Alloc_inline(sizeof(IRConst
));
3687 *con0
= ae
->u
.Itcc
.con0
;
3688 *con1
= ae
->u
.Itcc
.con1
;
3689 return IRExpr_ITE(IRExpr_RdTmp(ae
->u
.Itcc
.co
),
3691 IRExpr_Const(con0
));
3693 return IRExpr_GetI(ae
->u
.GetIt
.descr
,
3694 IRExpr_RdTmp(ae
->u
.GetIt
.ix
),
3697 Int i
, n
= ae
->u
.CCall
.nArgs
;
3699 IRExpr
** vec
= LibVEX_Alloc_inline((n
+1) * sizeof(IRExpr
*));
3701 for (i
= 0; i
< n
; i
++) {
3702 vec
[i
] = tmpOrConst_to_IRExpr(&ae
->u
.CCall
.args
[i
]);
3704 return IRExpr_CCall(ae
->u
.CCall
.cee
,
3709 return IRExpr_Load(ae
->u
.Load
.end
, ae
->u
.Load
.ty
,
3710 tmpOrConst_to_IRExpr(&ae
->u
.Load
.addr
));
3712 vpanic("availExpr_to_IRExpr");
3717 static IRTemp
subst_AvailExpr_Temp ( HashHW
* env
, IRTemp tmp
)
3720 /* env :: IRTemp -> IRTemp */
3721 if (lookupHHW( env
, &res
, (HWord
)tmp
))
3728 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst
* tc
,
3731 /* env :: IRTemp -> IRTemp */
3732 if (tc
->tag
== TCt
) {
3733 tc
->u
.tmp
= subst_AvailExpr_Temp( env
, tc
->u
.tmp
);
3737 static void subst_AvailExpr ( HashHW
* env
, AvailExpr
* ae
)
3739 /* env :: IRTemp -> IRTemp */
3742 ae
->u
.Ut
.arg
= subst_AvailExpr_Temp( env
, ae
->u
.Ut
.arg
);
3745 ae
->u
.Btt
.arg1
= subst_AvailExpr_Temp( env
, ae
->u
.Btt
.arg1
);
3746 ae
->u
.Btt
.arg2
= subst_AvailExpr_Temp( env
, ae
->u
.Btt
.arg2
);
3749 ae
->u
.Btc
.arg1
= subst_AvailExpr_Temp( env
, ae
->u
.Btc
.arg1
);
3752 ae
->u
.Bct
.arg2
= subst_AvailExpr_Temp( env
, ae
->u
.Bct
.arg2
);
3757 ae
->u
.Ittt
.co
= subst_AvailExpr_Temp( env
, ae
->u
.Ittt
.co
);
3758 ae
->u
.Ittt
.e1
= subst_AvailExpr_Temp( env
, ae
->u
.Ittt
.e1
);
3759 ae
->u
.Ittt
.e0
= subst_AvailExpr_Temp( env
, ae
->u
.Ittt
.e0
);
3762 ae
->u
.Ittc
.co
= subst_AvailExpr_Temp( env
, ae
->u
.Ittc
.co
);
3763 ae
->u
.Ittc
.e1
= subst_AvailExpr_Temp( env
, ae
->u
.Ittc
.e1
);
3766 ae
->u
.Itct
.co
= subst_AvailExpr_Temp( env
, ae
->u
.Itct
.co
);
3767 ae
->u
.Itct
.e0
= subst_AvailExpr_Temp( env
, ae
->u
.Itct
.e0
);
3770 ae
->u
.Itcc
.co
= subst_AvailExpr_Temp( env
, ae
->u
.Itcc
.co
);
3773 ae
->u
.GetIt
.ix
= subst_AvailExpr_Temp( env
, ae
->u
.GetIt
.ix
);
3776 Int i
, n
= ae
->u
.CCall
.nArgs
;;
3777 for (i
= 0; i
< n
; i
++) {
3778 subst_AvailExpr_TmpOrConst(&ae
->u
.CCall
.args
[i
], env
);
3783 subst_AvailExpr_TmpOrConst(&ae
->u
.Load
.addr
, env
);
3786 vpanic("subst_AvailExpr");
3790 static AvailExpr
* irExpr_to_AvailExpr ( IRExpr
* e
, Bool allowLoadsToBeCSEd
)
3796 if (e
->Iex
.Unop
.arg
->tag
== Iex_RdTmp
) {
3797 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3799 ae
->u
.Ut
.op
= e
->Iex
.Unop
.op
;
3800 ae
->u
.Ut
.arg
= e
->Iex
.Unop
.arg
->Iex
.RdTmp
.tmp
;
3806 if (e
->Iex
.Binop
.arg1
->tag
== Iex_RdTmp
) {
3807 if (e
->Iex
.Binop
.arg2
->tag
== Iex_RdTmp
) {
3808 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3810 ae
->u
.Btt
.op
= e
->Iex
.Binop
.op
;
3811 ae
->u
.Btt
.arg1
= e
->Iex
.Binop
.arg1
->Iex
.RdTmp
.tmp
;
3812 ae
->u
.Btt
.arg2
= e
->Iex
.Binop
.arg2
->Iex
.RdTmp
.tmp
;
3815 if (e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
3816 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3818 ae
->u
.Btc
.op
= e
->Iex
.Binop
.op
;
3819 ae
->u
.Btc
.arg1
= e
->Iex
.Binop
.arg1
->Iex
.RdTmp
.tmp
;
3820 ae
->u
.Btc
.con2
= *(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
);
3823 } else if (e
->Iex
.Binop
.arg1
->tag
== Iex_Const
3824 && e
->Iex
.Binop
.arg2
->tag
== Iex_RdTmp
) {
3825 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3827 ae
->u
.Bct
.op
= e
->Iex
.Binop
.op
;
3828 ae
->u
.Bct
.arg2
= e
->Iex
.Binop
.arg2
->Iex
.RdTmp
.tmp
;
3829 ae
->u
.Bct
.con1
= *(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
);
3835 if (e
->Iex
.Const
.con
->tag
== Ico_F64i
) {
3836 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3838 ae
->u
.Cf64i
.f64i
= e
->Iex
.Const
.con
->Ico
.F64i
;
3844 if (e
->Iex
.ITE
.cond
->tag
== Iex_RdTmp
) {
3845 if (e
->Iex
.ITE
.iffalse
->tag
== Iex_RdTmp
) {
3846 if (e
->Iex
.ITE
.iftrue
->tag
== Iex_RdTmp
) {
3847 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3849 ae
->u
.Ittt
.co
= e
->Iex
.ITE
.cond
->Iex
.RdTmp
.tmp
;
3850 ae
->u
.Ittt
.e1
= e
->Iex
.ITE
.iftrue
->Iex
.RdTmp
.tmp
;
3851 ae
->u
.Ittt
.e0
= e
->Iex
.ITE
.iffalse
->Iex
.RdTmp
.tmp
;
3854 if (e
->Iex
.ITE
.iftrue
->tag
== Iex_Const
) {
3855 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3857 ae
->u
.Itct
.co
= e
->Iex
.ITE
.cond
->Iex
.RdTmp
.tmp
;
3858 ae
->u
.Itct
.con1
= *(e
->Iex
.ITE
.iftrue
->Iex
.Const
.con
);
3859 ae
->u
.Itct
.e0
= e
->Iex
.ITE
.iffalse
->Iex
.RdTmp
.tmp
;
3862 } else if (e
->Iex
.ITE
.iffalse
->tag
== Iex_Const
) {
3863 if (e
->Iex
.ITE
.iftrue
->tag
== Iex_RdTmp
) {
3864 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3866 ae
->u
.Ittc
.co
= e
->Iex
.ITE
.cond
->Iex
.RdTmp
.tmp
;
3867 ae
->u
.Ittc
.e1
= e
->Iex
.ITE
.iftrue
->Iex
.RdTmp
.tmp
;
3868 ae
->u
.Ittc
.con0
= *(e
->Iex
.ITE
.iffalse
->Iex
.Const
.con
);
3871 if (e
->Iex
.ITE
.iftrue
->tag
== Iex_Const
) {
3872 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3874 ae
->u
.Itcc
.co
= e
->Iex
.ITE
.cond
->Iex
.RdTmp
.tmp
;
3875 ae
->u
.Itcc
.con1
= *(e
->Iex
.ITE
.iftrue
->Iex
.Const
.con
);
3876 ae
->u
.Itcc
.con0
= *(e
->Iex
.ITE
.iffalse
->Iex
.Const
.con
);
3884 if (e
->Iex
.GetI
.ix
->tag
== Iex_RdTmp
) {
3885 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3887 ae
->u
.GetIt
.descr
= e
->Iex
.GetI
.descr
;
3888 ae
->u
.GetIt
.ix
= e
->Iex
.GetI
.ix
->Iex
.RdTmp
.tmp
;
3889 ae
->u
.GetIt
.bias
= e
->Iex
.GetI
.bias
;
3895 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3897 /* Ok to share only the cee, since it is immutable. */
3898 ae
->u
.CCall
.cee
= e
->Iex
.CCall
.cee
;
3899 ae
->u
.CCall
.retty
= e
->Iex
.CCall
.retty
;
3900 /* irExprVec_to_TmpOrConsts will assert if the args are
3901 neither tmps nor constants, but that's ok .. that's all they
3903 irExprVec_to_TmpOrConsts(
3904 &ae
->u
.CCall
.args
, &ae
->u
.CCall
.nArgs
,
3910 /* If the caller of do_cse_BB has requested that loads also
3911 be CSEd, convert them into AvailExprs. If not, we'll just
3912 return NULL here, and the load never becomes considered
3913 "available", which effectively disables CSEing of them, as
3915 if (allowLoadsToBeCSEd
) {
3916 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3918 ae
->u
.Load
.end
= e
->Iex
.Load
.end
;
3919 ae
->u
.Load
.ty
= e
->Iex
.Load
.ty
;
3920 irExpr_to_TmpOrConst(&ae
->u
.Load
.addr
, e
->Iex
.Load
.addr
);
3933 /* The BB is modified in-place. Returns True if any changes were
3934 made. The caller can choose whether or not loads should be CSEd.
3935 In the normal course of things we don't do that, since CSEing loads
3936 is something of a dodgy proposition if the guest program is doing
3937 some screwy stuff to do with races and spinloops. */
3939 static Bool
do_cse_BB ( IRSB
* bb
, Bool allowLoadsToBeCSEd
)
3947 Bool anyDone
= False
;
3949 HashHW
* tenv
= newHHW(); /* :: IRTemp -> IRTemp */
3950 HashHW
* aenv
= newHHW(); /* :: AvailExpr* -> IRTemp */
3952 vassert(sizeof(IRTemp
) <= sizeof(HWord
));
3954 if (0) { ppIRSB(bb
); vex_printf("\n\n"); }
3956 /* Iterate forwards over the stmts.
3957 On seeing "t = E", where E is one of the AvailExpr forms:
3958 let E' = apply tenv substitution to E
3960 if a mapping E' -> q is found,
3961 replace this stmt by "t = q"
3962 and add binding t -> q to tenv
3964 add binding E' -> t to aenv
3965 replace this stmt by "t = E'"
3967 Other statements are only interesting to the extent that they
3968 might invalidate some of the expressions in aenv. So there is
3969 an invalidate-bindings check for each statement seen.
3971 for (i
= 0; i
< bb
->stmts_used
; i
++) {
3974 /* ------ BEGIN invalidate aenv bindings ------ */
3975 /* This is critical: remove from aenv any E' -> .. bindings
3976 which might be invalidated by this statement. The only
3977 vulnerable kind of bindings are the GetI and Load kinds.
3978 Dirty call - dump (paranoia level -> 2)
3979 Store - dump (ditto)
3980 Put, PutI - dump unless no-overlap is proven (.. -> 1)
3981 Uses getAliasingRelation_IC and getAliasingRelation_II
3982 to do the no-overlap assessments needed for Put/PutI.
3985 case Ist_Dirty
: case Ist_Store
: case Ist_MBE
:
3986 case Ist_CAS
: case Ist_LLSC
:
3988 paranoia
= 2; break;
3989 case Ist_Put
: case Ist_PutI
:
3990 paranoia
= 1; break;
3991 case Ist_NoOp
: case Ist_IMark
: case Ist_AbiHint
:
3992 case Ist_WrTmp
: case Ist_Exit
: case Ist_LoadG
:
3993 paranoia
= 0; break;
3995 vpanic("do_cse_BB(1)");
3999 for (j
= 0; j
< aenv
->used
; j
++) {
4000 if (!aenv
->inuse
[j
])
4002 ae
= (AvailExpr
*)aenv
->key
[j
];
4003 if (ae
->tag
!= GetIt
&& ae
->tag
!= Load
)
4006 if (paranoia
>= 2) {
4009 vassert(paranoia
== 1);
4010 if (ae
->tag
== Load
) {
4011 /* Loads can be invalidated by anything that could
4012 possibly touch memory. But in that case we
4013 should have |paranoia| == 2 and we won't get
4014 here. So there's nothing to do; we don't have to
4015 invalidate the load. */
4018 if (st
->tag
== Ist_Put
) {
4019 if (getAliasingRelation_IC(
4021 IRExpr_RdTmp(ae
->u
.GetIt
.ix
),
4023 typeOfIRExpr(bb
->tyenv
,st
->Ist
.Put
.data
)
4028 if (st
->tag
== Ist_PutI
) {
4029 IRPutI
*puti
= st
->Ist
.PutI
.details
;
4030 if (getAliasingRelation_II(
4032 IRExpr_RdTmp(ae
->u
.GetIt
.ix
),
4041 vpanic("do_cse_BB(2)");
4045 aenv
->inuse
[j
] = False
;
4046 aenv
->key
[j
] = (HWord
)NULL
; /* be sure */
4049 } /* paranoia > 0 */
4051 /* ------ ENV invalidate aenv bindings ------ */
4053 /* ignore not-interestings */
4054 if (st
->tag
!= Ist_WrTmp
)
4057 t
= st
->Ist
.WrTmp
.tmp
;
4058 eprime
= irExpr_to_AvailExpr(st
->Ist
.WrTmp
.data
, allowLoadsToBeCSEd
);
4059 /* ignore if not of AvailExpr form */
4063 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
4066 subst_AvailExpr( tenv
, eprime
);
4068 /* search aenv for eprime, unfortunately the hard way */
4069 for (j
= 0; j
< aenv
->used
; j
++)
4070 if (aenv
->inuse
[j
] && eq_AvailExpr(eprime
, (AvailExpr
*)aenv
->key
[j
]))
4073 if (j
< aenv
->used
) {
4074 /* A binding E' -> q was found. Replace stmt by "t = q" and
4075 note the t->q binding in tenv. */
4076 /* (this is the core of the CSE action) */
4077 q
= (IRTemp
)aenv
->val
[j
];
4078 bb
->stmts
[i
] = IRStmt_WrTmp( t
, IRExpr_RdTmp(q
) );
4079 addToHHW( tenv
, (HWord
)t
, (HWord
)q
);
4082 /* No binding was found, so instead we add E' -> t to our
4083 collection of available expressions, replace this stmt
4084 with "t = E'", and move on. */
4085 bb
->stmts
[i
] = IRStmt_WrTmp( t
, availExpr_to_IRExpr(eprime
) );
4086 addToHHW( aenv
, (HWord
)eprime
, (HWord
)t
);
4092 sanityCheckIRSB(bb, Ity_I32);
4099 /*---------------------------------------------------------------*/
4100 /*--- Add32/Sub32 chain collapsing ---*/
4101 /*---------------------------------------------------------------*/
4103 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
4105 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
4106 yes, set *tmp and *i32 appropriately. *i32 is set as if the
4107 root node is Add32, not Sub32. */
4109 static Bool
isAdd32OrSub32 ( IRExpr
* e
, IRTemp
* tmp
, Int
* i32
)
4111 if (e
->tag
!= Iex_Binop
)
4113 if (e
->Iex
.Binop
.op
!= Iop_Add32
&& e
->Iex
.Binop
.op
!= Iop_Sub32
)
4115 if (e
->Iex
.Binop
.arg1
->tag
!= Iex_RdTmp
)
4117 if (e
->Iex
.Binop
.arg2
->tag
!= Iex_Const
)
4119 *tmp
= e
->Iex
.Binop
.arg1
->Iex
.RdTmp
.tmp
;
4120 *i32
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
);
4121 if (e
->Iex
.Binop
.op
== Iop_Sub32
)
4127 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
4128 other tmp2. Scan backwards from the specified start point -- an
4131 static Bool
collapseChain ( IRSB
* bb
, Int startHere
,
4133 IRTemp
* tmp2
, Int
* i32
)
4140 /* the (var, con) pair contain the current 'representation' for
4141 'tmp'. We start with 'tmp + 0'. */
4145 /* Scan backwards to see if tmp can be replaced by some other tmp
4147 for (j
= startHere
; j
>= 0; j
--) {
4149 if (st
->tag
!= Ist_WrTmp
)
4151 if (st
->Ist
.WrTmp
.tmp
!= var
)
4153 e
= st
->Ist
.WrTmp
.data
;
4154 if (!isAdd32OrSub32(e
, &vv
, &ii
))
4160 /* no earlier binding for var .. ill-formed IR */
4161 vpanic("collapseChain");
4163 /* so, did we find anything interesting? */
4165 return False
; /* no .. */
4173 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
4175 static void collapse_AddSub_chains_BB ( IRSB
* bb
)
4181 for (i
= bb
->stmts_used
-1; i
>= 0; i
--) {
4183 if (st
->tag
== Ist_NoOp
)
4186 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
4188 if (st
->tag
== Ist_WrTmp
4189 && isAdd32OrSub32(st
->Ist
.WrTmp
.data
, &var
, &con
)) {
4191 /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
4192 Find out if var can be expressed as var2 + con2. */
4193 if (collapseChain(bb
, i
-1, var
, &var2
, &con2
)) {
4195 vex_printf("replacing1 ");
4197 vex_printf(" with ");
4204 ? IRExpr_Binop(Iop_Add32
,
4206 IRExpr_Const(IRConst_U32(con2
)))
4207 : IRExpr_Binop(Iop_Sub32
,
4209 IRExpr_Const(IRConst_U32(-con2
)))
4212 ppIRStmt(bb
->stmts
[i
]);
4220 /* Try to collapse 't1 = GetI[t2, con]'. */
4222 if (st
->tag
== Ist_WrTmp
4223 && st
->Ist
.WrTmp
.data
->tag
== Iex_GetI
4224 && st
->Ist
.WrTmp
.data
->Iex
.GetI
.ix
->tag
== Iex_RdTmp
4225 && collapseChain(bb
, i
-1, st
->Ist
.WrTmp
.data
->Iex
.GetI
.ix
4226 ->Iex
.RdTmp
.tmp
, &var2
, &con2
)) {
4228 vex_printf("replacing3 ");
4230 vex_printf(" with ");
4232 con2
+= st
->Ist
.WrTmp
.data
->Iex
.GetI
.bias
;
4236 IRExpr_GetI(st
->Ist
.WrTmp
.data
->Iex
.GetI
.descr
,
4240 ppIRStmt(bb
->stmts
[i
]);
4246 /* Perhaps st is PutI[t, con] ? */
4247 IRPutI
*puti
= st
->Ist
.PutI
.details
;
4248 if (st
->tag
== Ist_PutI
4249 && puti
->ix
->tag
== Iex_RdTmp
4250 && collapseChain(bb
, i
-1, puti
->ix
->Iex
.RdTmp
.tmp
,
4253 vex_printf("replacing2 ");
4255 vex_printf(" with ");
4259 = IRStmt_PutI(mkIRPutI(puti
->descr
,
4264 ppIRStmt(bb
->stmts
[i
]);
4274 /*---------------------------------------------------------------*/
4275 /*--- PutI/GetI transformations ---*/
4276 /*---------------------------------------------------------------*/
4278 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
4279 the given starting point to find, if any, a PutI which writes
4280 exactly the same piece of guest state, and so return the expression
4281 that the PutI writes. This is the core of PutI-GetI forwarding. */
4284 IRExpr
* findPutI ( IRSB
* bb
, Int startHere
,
4285 IRRegArray
* descrG
, IRExpr
* ixG
, Int biasG
)
4289 GSAliasing relation
;
4292 vex_printf("\nfindPutI ");
4293 ppIRRegArray(descrG
);
4296 vex_printf(" %d\n", biasG
);
4299 /* Scan backwards in bb from startHere to find a suitable PutI
4300 binding for (descrG, ixG, biasG), if any. */
4302 for (j
= startHere
; j
>= 0; j
--) {
4304 if (st
->tag
== Ist_NoOp
)
4307 if (st
->tag
== Ist_Put
) {
4308 /* Non-indexed Put. This can't give a binding, but we do
4309 need to check it doesn't invalidate the search by
4310 overlapping any part of the indexed guest state. */
4313 = getAliasingRelation_IC(
4316 typeOfIRExpr(bb
->tyenv
,st
->Ist
.Put
.data
) );
4318 if (relation
== NoAlias
) {
4319 /* we're OK; keep going */
4322 /* relation == UnknownAlias || relation == ExactAlias */
4323 /* If this assertion fails, we've found a Put which writes
4324 an area of guest state which is read by a GetI. Which
4325 is unlikely (although not per se wrong). */
4326 vassert(relation
!= ExactAlias
);
4327 /* This Put potentially writes guest state that the GetI
4328 reads; we must fail. */
4333 if (st
->tag
== Ist_PutI
) {
4334 IRPutI
*puti
= st
->Ist
.PutI
.details
;
4336 relation
= getAliasingRelation_II(
4343 if (relation
== NoAlias
) {
4344 /* This PutI definitely doesn't overlap. Ignore it and
4346 continue; /* the for j loop */
4349 if (relation
== UnknownAlias
) {
4350 /* We don't know if this PutI writes to the same guest
4351 state that the GetI, or not. So we have to give up. */
4355 /* Otherwise, we've found what we're looking for. */
4356 vassert(relation
== ExactAlias
);
4359 } /* if (st->tag == Ist_PutI) */
4361 if (st
->tag
== Ist_Dirty
) {
4362 /* Be conservative. If the dirty call has any guest effects at
4363 all, give up. We could do better -- only give up if there
4364 are any guest writes/modifies. */
4365 if (st
->Ist
.Dirty
.details
->nFxState
> 0)
4371 /* No valid replacement was found. */
4377 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
4378 that it writes exactly the same piece of guest state) ? Safe
4381 static Bool
identicalPutIs ( IRStmt
* pi
, IRStmt
* s2
)
4383 vassert(pi
->tag
== Ist_PutI
);
4384 if (s2
->tag
!= Ist_PutI
)
4387 IRPutI
*p1
= pi
->Ist
.PutI
.details
;
4388 IRPutI
*p2
= s2
->Ist
.PutI
.details
;
4391 getAliasingRelation_II(
4392 p1
->descr
, p1
->ix
, p1
->bias
,
4393 p2
->descr
, p2
->ix
, p2
->bias
4400 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
4401 overlap it? Safe answer: True. Note, we could do a lot better
4402 than this if needed. */
4405 Bool
guestAccessWhichMightOverlapPutI (
4406 IRTypeEnv
* tyenv
, IRStmt
* pi
, IRStmt
* s2
4409 GSAliasing relation
;
4410 UInt minoffP
, maxoffP
;
4412 vassert(pi
->tag
== Ist_PutI
);
4414 IRPutI
*p1
= pi
->Ist
.PutI
.details
;
4416 getArrayBounds(p1
->descr
, &minoffP
, &maxoffP
);
4425 /* just be paranoid ... these should be rare. */
4429 /* This is unbelievably lame, but it's probably not
4430 significant from a performance point of view. Really, a
4431 CAS is a load-store op, so it should be safe to say False.
4436 /* If the dirty call has any guest effects at all, give up.
4437 Probably could do better. */
4438 if (s2
->Ist
.Dirty
.details
->nFxState
> 0)
4443 vassert(isIRAtom(s2
->Ist
.Put
.data
));
4445 = getAliasingRelation_IC(
4448 typeOfIRExpr(tyenv
,s2
->Ist
.Put
.data
)
4453 IRPutI
*p2
= s2
->Ist
.PutI
.details
;
4455 vassert(isIRAtom(p2
->ix
));
4456 vassert(isIRAtom(p2
->data
));
4458 = getAliasingRelation_II(
4459 p1
->descr
, p1
->ix
, p1
->bias
,
4460 p2
->descr
, p2
->ix
, p2
->bias
4466 if (s2
->Ist
.WrTmp
.data
->tag
== Iex_GetI
) {
4468 = getAliasingRelation_II(
4469 p1
->descr
, p1
->ix
, p1
->bias
,
4470 s2
->Ist
.WrTmp
.data
->Iex
.GetI
.descr
,
4471 s2
->Ist
.WrTmp
.data
->Iex
.GetI
.ix
,
4472 s2
->Ist
.WrTmp
.data
->Iex
.GetI
.bias
4476 if (s2
->Ist
.WrTmp
.data
->tag
== Iex_Get
) {
4478 = getAliasingRelation_IC(
4480 s2
->Ist
.WrTmp
.data
->Iex
.Get
.offset
,
4481 s2
->Ist
.WrTmp
.data
->Iex
.Get
.ty
4488 vassert(isIRAtom(s2
->Ist
.Store
.addr
));
4489 vassert(isIRAtom(s2
->Ist
.Store
.data
));
4493 vex_printf("\n"); ppIRStmt(s2
); vex_printf("\n");
4494 vpanic("guestAccessWhichMightOverlapPutI");
4498 if (relation
== NoAlias
)
4501 return True
; /* ExactAlias or UnknownAlias */
4506 /* ---------- PutI/GetI transformations main functions --------- */
4508 /* Remove redundant GetIs, to the extent that they can be detected.
4509 bb is modified in-place. */
4512 void do_redundant_GetI_elimination ( IRSB
* bb
)
4517 for (i
= bb
->stmts_used
-1; i
>= 0; i
--) {
4519 if (st
->tag
== Ist_NoOp
)
4522 if (st
->tag
== Ist_WrTmp
4523 && st
->Ist
.WrTmp
.data
->tag
== Iex_GetI
4524 && st
->Ist
.WrTmp
.data
->Iex
.GetI
.ix
->tag
== Iex_RdTmp
) {
4525 IRRegArray
* descr
= st
->Ist
.WrTmp
.data
->Iex
.GetI
.descr
;
4526 IRExpr
* ix
= st
->Ist
.WrTmp
.data
->Iex
.GetI
.ix
;
4527 Int bias
= st
->Ist
.WrTmp
.data
->Iex
.GetI
.bias
;
4528 IRExpr
* replacement
= findPutI(bb
, i
-1, descr
, ix
, bias
);
4530 && isIRAtom(replacement
)
4531 /* Make sure we're doing a type-safe transformation! */
4532 && typeOfIRExpr(bb
->tyenv
, replacement
) == descr
->elemTy
) {
4534 vex_printf("rGI: ");
4535 ppIRExpr(st
->Ist
.WrTmp
.data
);
4537 ppIRExpr(replacement
);
4540 bb
->stmts
[i
] = IRStmt_WrTmp(st
->Ist
.WrTmp
.tmp
, replacement
);
4548 /* Remove redundant PutIs, to the extent which they can be detected.
4549 bb is modified in-place. */
4552 void do_redundant_PutI_elimination ( IRSB
* bb
, VexRegisterUpdates pxControl
)
4558 vassert(pxControl
< VexRegUpdAllregsAtEachInsn
);
4560 for (i
= 0; i
< bb
->stmts_used
; i
++) {
4562 if (st
->tag
!= Ist_PutI
)
4564 /* Ok, search forwards from here to see if we can find another
4565 PutI which makes this one redundant, and dodging various
4566 hazards. Search forwards:
4567 * If conditional exit, give up (because anything after that
4568 does not postdominate this put).
4569 * If a Get which might overlap, give up (because this PutI
4570 not necessarily dead).
4571 * If a Put which is identical, stop with success.
4572 * If a Put which might overlap, but is not identical, give up.
4573 * If a dirty helper call which might write guest state, give up.
4574 * If a Put which definitely doesn't overlap, or any other
4575 kind of stmt, continue.
4578 for (j
= i
+1; j
< bb
->stmts_used
; j
++) {
4580 if (stj
->tag
== Ist_NoOp
)
4582 if (identicalPutIs(st
, stj
)) {
4587 if (stj
->tag
== Ist_Exit
)
4590 if (st
->tag
== Ist_Dirty
)
4591 /* give up; could do better here */
4593 if (guestAccessWhichMightOverlapPutI(bb
->tyenv
, st
, stj
))
4600 vex_printf("rPI: ");
4604 bb
->stmts
[i
] = IRStmt_NoOp();
4611 /*---------------------------------------------------------------*/
4612 /*--- Loop unrolling ---*/
4613 /*---------------------------------------------------------------*/
4615 /* Adjust all tmp values (names) in e by delta. e is destructively
4618 static void deltaIRExpr ( IRExpr
* e
, Int delta
)
4623 e
->Iex
.RdTmp
.tmp
+= delta
;
4629 deltaIRExpr(e
->Iex
.GetI
.ix
, delta
);
4632 deltaIRExpr(e
->Iex
.Qop
.details
->arg1
, delta
);
4633 deltaIRExpr(e
->Iex
.Qop
.details
->arg2
, delta
);
4634 deltaIRExpr(e
->Iex
.Qop
.details
->arg3
, delta
);
4635 deltaIRExpr(e
->Iex
.Qop
.details
->arg4
, delta
);
4638 deltaIRExpr(e
->Iex
.Triop
.details
->arg1
, delta
);
4639 deltaIRExpr(e
->Iex
.Triop
.details
->arg2
, delta
);
4640 deltaIRExpr(e
->Iex
.Triop
.details
->arg3
, delta
);
4643 deltaIRExpr(e
->Iex
.Binop
.arg1
, delta
);
4644 deltaIRExpr(e
->Iex
.Binop
.arg2
, delta
);
4647 deltaIRExpr(e
->Iex
.Unop
.arg
, delta
);
4650 deltaIRExpr(e
->Iex
.Load
.addr
, delta
);
4653 for (i
= 0; e
->Iex
.CCall
.args
[i
]; i
++)
4654 deltaIRExpr(e
->Iex
.CCall
.args
[i
], delta
);
4657 deltaIRExpr(e
->Iex
.ITE
.cond
, delta
);
4658 deltaIRExpr(e
->Iex
.ITE
.iftrue
, delta
);
4659 deltaIRExpr(e
->Iex
.ITE
.iffalse
, delta
);
4662 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
4663 vpanic("deltaIRExpr");
4667 /* Adjust all tmp values (names) in st by delta. st is destructively
4670 static void deltaIRStmt ( IRStmt
* st
, Int delta
)
4680 deltaIRExpr(st
->Ist
.AbiHint
.base
, delta
);
4681 deltaIRExpr(st
->Ist
.AbiHint
.nia
, delta
);
4684 deltaIRExpr(st
->Ist
.Put
.data
, delta
);
4687 deltaIRExpr(st
->Ist
.PutI
.details
->ix
, delta
);
4688 deltaIRExpr(st
->Ist
.PutI
.details
->data
, delta
);
4691 st
->Ist
.WrTmp
.tmp
+= delta
;
4692 deltaIRExpr(st
->Ist
.WrTmp
.data
, delta
);
4695 deltaIRExpr(st
->Ist
.Exit
.guard
, delta
);
4698 deltaIRExpr(st
->Ist
.Store
.addr
, delta
);
4699 deltaIRExpr(st
->Ist
.Store
.data
, delta
);
4702 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
4703 deltaIRExpr(sg
->addr
, delta
);
4704 deltaIRExpr(sg
->data
, delta
);
4705 deltaIRExpr(sg
->guard
, delta
);
4709 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
4711 deltaIRExpr(lg
->addr
, delta
);
4712 deltaIRExpr(lg
->alt
, delta
);
4713 deltaIRExpr(lg
->guard
, delta
);
4717 if (st
->Ist
.CAS
.details
->oldHi
!= IRTemp_INVALID
)
4718 st
->Ist
.CAS
.details
->oldHi
+= delta
;
4719 st
->Ist
.CAS
.details
->oldLo
+= delta
;
4720 deltaIRExpr(st
->Ist
.CAS
.details
->addr
, delta
);
4721 if (st
->Ist
.CAS
.details
->expdHi
)
4722 deltaIRExpr(st
->Ist
.CAS
.details
->expdHi
, delta
);
4723 deltaIRExpr(st
->Ist
.CAS
.details
->expdLo
, delta
);
4724 if (st
->Ist
.CAS
.details
->dataHi
)
4725 deltaIRExpr(st
->Ist
.CAS
.details
->dataHi
, delta
);
4726 deltaIRExpr(st
->Ist
.CAS
.details
->dataLo
, delta
);
4729 st
->Ist
.LLSC
.result
+= delta
;
4730 deltaIRExpr(st
->Ist
.LLSC
.addr
, delta
);
4731 if (st
->Ist
.LLSC
.storedata
)
4732 deltaIRExpr(st
->Ist
.LLSC
.storedata
, delta
);
4735 d
= st
->Ist
.Dirty
.details
;
4736 deltaIRExpr(d
->guard
, delta
);
4737 for (i
= 0; d
->args
[i
]; i
++) {
4738 IRExpr
* arg
= d
->args
[i
];
4739 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
4740 deltaIRExpr(arg
, delta
);
4742 if (d
->tmp
!= IRTemp_INVALID
)
4745 deltaIRExpr(d
->mAddr
, delta
);
4748 vex_printf("\n"); ppIRStmt(st
); vex_printf("\n");
4749 vpanic("deltaIRStmt");
4754 /* If possible, return a loop-unrolled version of bb0. The original
4755 is changed. If not possible, return NULL. */
4757 /* The two schemas considered are:
4761 which unrolls to (eg) X: BODY;BODY; goto X
4765 X: BODY; if (c) goto X; goto Y
4766 which trivially transforms to
4767 X: BODY; if (!c) goto Y; goto X;
4768 so it falls in the scope of the first case.
4770 X and Y must be literal (guest) addresses.
4773 static Int
calc_unroll_factor( IRSB
* bb
)
4778 for (i
= 0; i
< bb
->stmts_used
; i
++) {
4779 if (bb
->stmts
[i
]->tag
!= Ist_NoOp
)
4783 if (n_stmts
<= vex_control
.iropt_unroll_thresh
/8) {
4784 if (vex_control
.iropt_verbosity
> 0)
4785 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
4786 n_stmts
, 8* n_stmts
);
4789 if (n_stmts
<= vex_control
.iropt_unroll_thresh
/4) {
4790 if (vex_control
.iropt_verbosity
> 0)
4791 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
4792 n_stmts
, 4* n_stmts
);
4796 if (n_stmts
<= vex_control
.iropt_unroll_thresh
/2) {
4797 if (vex_control
.iropt_verbosity
> 0)
4798 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
4799 n_stmts
, 2* n_stmts
);
4803 if (vex_control
.iropt_verbosity
> 0)
4804 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts
);
4810 static IRSB
* maybe_loop_unroll_BB ( IRSB
* bb0
, Addr my_addr
)
4812 Int i
, j
, jmax
, n_vars
;
4814 Addr64 xxx_value
, yyy_value
;
4821 if (vex_control
.iropt_unroll_thresh
<= 0)
4824 /* First off, figure out if we can unroll this loop. Do this
4825 without modifying bb0. */
4827 if (bb0
->jumpkind
!= Ijk_Boring
)
4833 /* Extract the next-guest address. If it isn't a literal, we
4837 if (udst
->tag
== Iex_Const
4838 && (udst
->Iex
.Const
.con
->tag
== Ico_U32
4839 || udst
->Iex
.Const
.con
->tag
== Ico_U64
)) {
4840 /* The BB ends in a jump to a literal location. */
4842 xxx_value
= udst
->Iex
.Const
.con
->tag
== Ico_U64
4843 ? udst
->Iex
.Const
.con
->Ico
.U64
4844 : (Addr64
)(udst
->Iex
.Const
.con
->Ico
.U32
);
4850 /* Now we know the BB ends to a jump to a literal location. If
4851 it's a jump to itself (viz, idiom #1), move directly to the
4852 unrolling stage, first cloning the bb so the original isn't
4854 if (xxx_value
== my_addr
) {
4855 unroll_factor
= calc_unroll_factor( bb0
);
4856 if (unroll_factor
< 2)
4858 bb1
= deepCopyIRSB( bb0
);
4860 udst
= NULL
; /* is now invalid */
4864 /* Search for the second idiomatic form:
4865 X: BODY; if (c) goto X; goto Y
4866 We know Y, but need to establish that the last stmt
4869 yyy_value
= xxx_value
;
4870 for (i
= bb0
->stmts_used
-1; i
>= 0; i
--)
4875 return NULL
; /* block with no stmts. Strange. */
4878 if (st
->tag
!= Ist_Exit
)
4880 if (st
->Ist
.Exit
.jk
!= Ijk_Boring
)
4883 con
= st
->Ist
.Exit
.dst
;
4884 vassert(con
->tag
== Ico_U32
|| con
->tag
== Ico_U64
);
4886 xxx_value
= con
->tag
== Ico_U64
4887 ? st
->Ist
.Exit
.dst
->Ico
.U64
4888 : (Addr64
)(st
->Ist
.Exit
.dst
->Ico
.U32
);
4890 /* If this assertion fails, we have some kind of type error. */
4891 vassert(con
->tag
== udst
->Iex
.Const
.con
->tag
);
4893 if (xxx_value
!= my_addr
)
4894 /* We didn't find either idiom. Give up. */
4897 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
4898 yyy values (which makes it look like idiom #1), and go into
4899 unrolling proper. This means finding (again) the last stmt, in
4902 unroll_factor
= calc_unroll_factor( bb0
);
4903 if (unroll_factor
< 2)
4906 bb1
= deepCopyIRSB( bb0
);
4908 udst
= NULL
; /* is now invalid */
4909 for (i
= bb1
->stmts_used
-1; i
>= 0; i
--)
4913 /* The next bunch of assertions should be true since we already
4914 found and checked the last stmt in the original bb. */
4919 vassert(st
->tag
== Ist_Exit
);
4921 con
= st
->Ist
.Exit
.dst
;
4922 vassert(con
->tag
== Ico_U32
|| con
->tag
== Ico_U64
);
4925 vassert(udst
->tag
== Iex_Const
);
4926 vassert(udst
->Iex
.Const
.con
->tag
== Ico_U32
4927 || udst
->Iex
.Const
.con
->tag
== Ico_U64
);
4928 vassert(con
->tag
== udst
->Iex
.Const
.con
->tag
);
4930 /* switch the xxx and yyy fields around */
4931 if (con
->tag
== Ico_U64
) {
4932 udst
->Iex
.Const
.con
->Ico
.U64
= xxx_value
;
4933 con
->Ico
.U64
= yyy_value
;
4935 udst
->Iex
.Const
.con
->Ico
.U32
= (UInt
)xxx_value
;
4936 con
->Ico
.U32
= (UInt
)yyy_value
;
4939 /* negate the test condition */
4941 = IRExpr_Unop(Iop_Not1
,deepCopyIRExpr(st
->Ist
.Exit
.guard
));
4943 /* --- The unroller proper. Both idioms are by now --- */
4944 /* --- now converted to idiom 1. --- */
4948 vassert(unroll_factor
== 2
4949 || unroll_factor
== 4
4950 || unroll_factor
== 8);
4952 jmax
= unroll_factor
==8 ? 3 : (unroll_factor
==4 ? 2 : 1);
4953 for (j
= 1; j
<= jmax
; j
++) {
4955 n_vars
= bb1
->tyenv
->types_used
;
4957 bb2
= deepCopyIRSB(bb1
);
4958 for (i
= 0; i
< n_vars
; i
++)
4959 (void)newIRTemp(bb1
->tyenv
, bb2
->tyenv
->types
[i
]);
4961 for (i
= 0; i
< bb2
->stmts_used
; i
++) {
4962 /* deltaIRStmt destructively modifies the stmt, but
4963 that's OK since bb2 is a complete fresh copy of bb1. */
4964 deltaIRStmt(bb2
->stmts
[i
], n_vars
);
4965 addStmtToIRSB(bb1
, bb2
->stmts
[i
]);
4970 vex_printf("\nUNROLLED (%lx)\n", my_addr
);
4975 /* Flattening; sigh. The unroller succeeds in breaking flatness
4976 by negating the test condition. This should be fixed properly.
4977 For the moment use this shotgun approach. */
4978 return flatten_BB(bb1
);
4982 /*---------------------------------------------------------------*/
4983 /*--- The tree builder ---*/
4984 /*---------------------------------------------------------------*/
4986 /* This isn't part of IR optimisation. Really it's a pass done prior
4987 to instruction selection, which improves the code that the
4988 instruction selector can produce. */
4990 /* --- The 'tmp' environment is the central data structure here --- */
4992 /* The number of outstanding bindings we're prepared to track.
4993 The number of times the env becomes full and we have to dump
4994 the oldest binding (hence reducing code quality) falls very
4995 rapidly as the env size increases. 8 gives reasonable performance
4996 under most circumstances. */
4999 /* An interval. Used to record the bytes in the guest state accessed
5000 by a Put[I] statement or by (one or more) Get[I] expression(s). In
5001 case of several Get[I] expressions, the lower/upper bounds are recorded.
5002 This is conservative but cheap.
5003 E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
5004 E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
5005 offset 200 would be recorded as [100,203] */
5015 intervals_overlap(Interval i1
, Interval i2
)
5017 return (i1
.low
>= i2
.low
&& i1
.low
<= i2
.high
) ||
5018 (i2
.low
>= i1
.low
&& i2
.low
<= i1
.high
);
5022 update_interval(Interval
*i
, Int low
, Int high
)
5024 vassert(low
<= high
);
5027 if (low
< i
->low
) i
->low
= low
;
5028 if (high
> i
->high
) i
->high
= high
;
5037 /* bindee == NULL === slot is not in use
5038 bindee != NULL === slot is in use
5045 /* Record the bytes of the guest state BINDEE reads from. */
5046 Interval getInterval
;
5050 __attribute__((unused
))
5051 static void ppAEnv ( ATmpInfo
* env
)
5054 for (i
= 0; i
< A_NENV
; i
++) {
5055 vex_printf("%d tmp %d val ", i
, (Int
)env
[i
].binder
);
5057 ppIRExpr(env
[i
].bindee
);
5059 vex_printf("(null)");
5064 /* --- Tree-traversal fns --- */
5066 /* Traverse an expr, and detect if any part of it reads memory or does
5067 a Get. Be careful ... this really controls how much the
5068 tree-builder can reorder the code, so getting it right is critical.
5070 static void setHints_Expr (Bool
* doesLoad
, Interval
* getInterval
, IRExpr
* e
)
5075 for (i
= 0; e
->Iex
.CCall
.args
[i
]; i
++)
5076 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.CCall
.args
[i
]);
5079 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.ITE
.cond
);
5080 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.ITE
.iftrue
);
5081 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.ITE
.iffalse
);
5084 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Qop
.details
->arg1
);
5085 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Qop
.details
->arg2
);
5086 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Qop
.details
->arg3
);
5087 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Qop
.details
->arg4
);
5090 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Triop
.details
->arg1
);
5091 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Triop
.details
->arg2
);
5092 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Triop
.details
->arg3
);
5095 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Binop
.arg1
);
5096 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Binop
.arg2
);
5099 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Unop
.arg
);
5103 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Load
.addr
);
5106 Int low
= e
->Iex
.Get
.offset
;
5107 Int high
= low
+ sizeofIRType(e
->Iex
.Get
.ty
) - 1;
5108 update_interval(getInterval
, low
, high
);
5112 IRRegArray
*descr
= e
->Iex
.GetI
.descr
;
5113 Int size
= sizeofIRType(descr
->elemTy
);
5114 Int low
= descr
->base
;
5115 Int high
= low
+ descr
->nElems
* size
- 1;
5116 update_interval(getInterval
, low
, high
);
5117 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.GetI
.ix
);
5124 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
5125 vpanic("setHints_Expr");
5130 /* Add a binding to the front of the env and slide all the rest
5131 backwards. It should be the case that the last slot is free. */
5132 static void addToEnvFront ( ATmpInfo
* env
, IRTemp binder
, IRExpr
* bindee
)
5135 vassert(env
[A_NENV
-1].bindee
== NULL
);
5136 for (i
= A_NENV
-1; i
>= 1; i
--)
5138 env
[0].binder
= binder
;
5139 env
[0].bindee
= bindee
;
5140 env
[0].doesLoad
= False
; /* filled in later */
5141 env
[0].getInterval
.present
= False
; /* filled in later */
5142 env
[0].getInterval
.low
= -1; /* filled in later */
5143 env
[0].getInterval
.high
= -1; /* filled in later */
5146 /* Given uses :: array of UShort, indexed by IRTemp
5147 Add the use-occurrences of temps in this expression
5150 static void aoccCount_Expr ( UShort
* uses
, IRExpr
* e
)
5156 case Iex_RdTmp
: /* the only interesting case */
5157 uses
[e
->Iex
.RdTmp
.tmp
]++;
5161 aoccCount_Expr(uses
, e
->Iex
.ITE
.cond
);
5162 aoccCount_Expr(uses
, e
->Iex
.ITE
.iftrue
);
5163 aoccCount_Expr(uses
, e
->Iex
.ITE
.iffalse
);
5167 aoccCount_Expr(uses
, e
->Iex
.Qop
.details
->arg1
);
5168 aoccCount_Expr(uses
, e
->Iex
.Qop
.details
->arg2
);
5169 aoccCount_Expr(uses
, e
->Iex
.Qop
.details
->arg3
);
5170 aoccCount_Expr(uses
, e
->Iex
.Qop
.details
->arg4
);
5174 aoccCount_Expr(uses
, e
->Iex
.Triop
.details
->arg1
);
5175 aoccCount_Expr(uses
, e
->Iex
.Triop
.details
->arg2
);
5176 aoccCount_Expr(uses
, e
->Iex
.Triop
.details
->arg3
);
5180 aoccCount_Expr(uses
, e
->Iex
.Binop
.arg1
);
5181 aoccCount_Expr(uses
, e
->Iex
.Binop
.arg2
);
5185 aoccCount_Expr(uses
, e
->Iex
.Unop
.arg
);
5189 aoccCount_Expr(uses
, e
->Iex
.Load
.addr
);
5193 for (i
= 0; e
->Iex
.CCall
.args
[i
]; i
++)
5194 aoccCount_Expr(uses
, e
->Iex
.CCall
.args
[i
]);
5198 aoccCount_Expr(uses
, e
->Iex
.GetI
.ix
);
5206 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
5207 vpanic("aoccCount_Expr");
5212 /* Given uses :: array of UShort, indexed by IRTemp
5213 Add the use-occurrences of temps in this statement
5216 static void aoccCount_Stmt ( UShort
* uses
, IRStmt
* st
)
5223 aoccCount_Expr(uses
, st
->Ist
.AbiHint
.base
);
5224 aoccCount_Expr(uses
, st
->Ist
.AbiHint
.nia
);
5227 aoccCount_Expr(uses
, st
->Ist
.WrTmp
.data
);
5230 aoccCount_Expr(uses
, st
->Ist
.Put
.data
);
5233 aoccCount_Expr(uses
, st
->Ist
.PutI
.details
->ix
);
5234 aoccCount_Expr(uses
, st
->Ist
.PutI
.details
->data
);
5237 aoccCount_Expr(uses
, st
->Ist
.Store
.addr
);
5238 aoccCount_Expr(uses
, st
->Ist
.Store
.data
);
5241 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
5242 aoccCount_Expr(uses
, sg
->addr
);
5243 aoccCount_Expr(uses
, sg
->data
);
5244 aoccCount_Expr(uses
, sg
->guard
);
5248 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
5249 aoccCount_Expr(uses
, lg
->addr
);
5250 aoccCount_Expr(uses
, lg
->alt
);
5251 aoccCount_Expr(uses
, lg
->guard
);
5255 cas
= st
->Ist
.CAS
.details
;
5256 aoccCount_Expr(uses
, cas
->addr
);
5258 aoccCount_Expr(uses
, cas
->expdHi
);
5259 aoccCount_Expr(uses
, cas
->expdLo
);
5261 aoccCount_Expr(uses
, cas
->dataHi
);
5262 aoccCount_Expr(uses
, cas
->dataLo
);
5265 aoccCount_Expr(uses
, st
->Ist
.LLSC
.addr
);
5266 if (st
->Ist
.LLSC
.storedata
)
5267 aoccCount_Expr(uses
, st
->Ist
.LLSC
.storedata
);
5270 d
= st
->Ist
.Dirty
.details
;
5271 if (d
->mFx
!= Ifx_None
)
5272 aoccCount_Expr(uses
, d
->mAddr
);
5273 aoccCount_Expr(uses
, d
->guard
);
5274 for (i
= 0; d
->args
[i
]; i
++) {
5275 IRExpr
* arg
= d
->args
[i
];
5276 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
5277 aoccCount_Expr(uses
, arg
);
5285 aoccCount_Expr(uses
, st
->Ist
.Exit
.guard
);
5288 vex_printf("\n"); ppIRStmt(st
); vex_printf("\n");
5289 vpanic("aoccCount_Stmt");
5293 /* Look up a binding for tmp in the env. If found, return the bound
5294 expression, and set the env's binding to NULL so it is marked as
5295 used. If not found, return NULL. */
5297 static IRExpr
* atbSubst_Temp ( ATmpInfo
* env
, IRTemp tmp
)
5300 for (i
= 0; i
< A_NENV
; i
++) {
5301 if (env
[i
].binder
== tmp
&& env
[i
].bindee
!= NULL
) {
5302 IRExpr
* bindee
= env
[i
].bindee
;
5303 env
[i
].bindee
= NULL
;
5310 /* Traverse e, looking for temps. For each observed temp, see if env
5311 contains a binding for the temp, and if so return the bound value.
5312 The env has the property that any binding it holds is
5313 'single-shot', so once a binding is used, it is marked as no longer
5314 available, by setting its .bindee field to NULL. */
5316 static inline Bool
is_Unop ( IRExpr
* e
, IROp op
) {
5317 return e
->tag
== Iex_Unop
&& e
->Iex
.Unop
.op
== op
;
5319 static inline Bool
is_Binop ( IRExpr
* e
, IROp op
) {
5320 return e
->tag
== Iex_Binop
&& e
->Iex
.Binop
.op
== op
;
5323 static IRExpr
* fold_IRExpr_Binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
5327 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */
5328 if (is_Unop(a1
, Iop_CmpwNEZ32
) && is_Unop(a2
, Iop_CmpwNEZ32
))
5329 return IRExpr_Unop( Iop_CmpwNEZ32
,
5330 IRExpr_Binop( Iop_Or32
, a1
->Iex
.Unop
.arg
,
5331 a2
->Iex
.Unop
.arg
) );
5335 /* Since X has type Ity_I1 we can simplify:
5336 CmpNE32(1Uto32(X),0)) ==> X */
5337 if (is_Unop(a1
, Iop_1Uto32
) && isZeroU32(a2
))
5338 return a1
->Iex
.Unop
.arg
;
5344 /* no reduction rule applies */
5345 return IRExpr_Binop( op
, a1
, a2
);
5348 static IRExpr
* fold_IRExpr_Unop ( IROp op
, IRExpr
* aa
)
5352 /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
5353 if (is_Unop(aa
, Iop_CmpwNEZ64
))
5354 return IRExpr_Unop( Iop_CmpwNEZ64
, aa
->Iex
.Unop
.arg
);
5355 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5356 if (is_Binop(aa
, Iop_Or64
)
5357 && is_Unop(aa
->Iex
.Binop
.arg1
, Iop_CmpwNEZ64
))
5358 return fold_IRExpr_Unop(
5360 IRExpr_Binop(Iop_Or64
,
5361 aa
->Iex
.Binop
.arg1
->Iex
.Unop
.arg
,
5362 aa
->Iex
.Binop
.arg2
));
5363 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5364 if (is_Binop(aa
, Iop_Or64
)
5365 && is_Unop(aa
->Iex
.Binop
.arg2
, Iop_CmpwNEZ64
))
5366 return fold_IRExpr_Unop(
5368 IRExpr_Binop(Iop_Or64
,
5370 aa
->Iex
.Binop
.arg2
->Iex
.Unop
.arg
));
5373 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
5374 if (is_Unop(aa
, Iop_Left64
))
5375 return IRExpr_Unop(Iop_CmpNEZ64
, aa
->Iex
.Unop
.arg
);
5376 /* CmpNEZ64( 1Uto64(X) ) --> X */
5377 if (is_Unop(aa
, Iop_1Uto64
))
5378 return aa
->Iex
.Unop
.arg
;
5381 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
5382 if (is_Unop(aa
, Iop_CmpwNEZ32
))
5383 return IRExpr_Unop( Iop_CmpwNEZ32
, aa
->Iex
.Unop
.arg
);
5386 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
5387 if (is_Unop(aa
, Iop_Left32
))
5388 return IRExpr_Unop(Iop_CmpNEZ32
, aa
->Iex
.Unop
.arg
);
5389 /* CmpNEZ32( 1Uto32(X) ) --> X */
5390 if (is_Unop(aa
, Iop_1Uto32
))
5391 return aa
->Iex
.Unop
.arg
;
5392 /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
5393 if (is_Unop(aa
, Iop_64to32
) && is_Unop(aa
->Iex
.Unop
.arg
, Iop_CmpwNEZ64
))
5394 return IRExpr_Unop(Iop_CmpNEZ64
, aa
->Iex
.Unop
.arg
->Iex
.Unop
.arg
);
5397 /* CmpNEZ8( 1Uto8(X) ) --> X */
5398 if (is_Unop(aa
, Iop_1Uto8
))
5399 return aa
->Iex
.Unop
.arg
;
5402 /* Left32( Left32(x) ) --> Left32(x) */
5403 if (is_Unop(aa
, Iop_Left32
))
5404 return IRExpr_Unop( Iop_Left32
, aa
->Iex
.Unop
.arg
);
5407 /* Left64( Left64(x) ) --> Left64(x) */
5408 if (is_Unop(aa
, Iop_Left64
))
5409 return IRExpr_Unop( Iop_Left64
, aa
->Iex
.Unop
.arg
);
5411 case Iop_ZeroHI64ofV128
:
5412 /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
5413 if (is_Unop(aa
, Iop_ZeroHI64ofV128
))
5414 return IRExpr_Unop( Iop_ZeroHI64ofV128
, aa
->Iex
.Unop
.arg
);
5417 /* 32to1( 1Uto32 ( x ) ) --> x */
5418 if (is_Unop(aa
, Iop_1Uto32
))
5419 return aa
->Iex
.Unop
.arg
;
5420 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
5421 if (is_Unop(aa
, Iop_CmpwNEZ32
))
5422 return IRExpr_Unop( Iop_CmpNEZ32
, aa
->Iex
.Unop
.arg
);
5425 /* 64to1( 1Uto64 ( x ) ) --> x */
5426 if (is_Unop(aa
, Iop_1Uto64
))
5427 return aa
->Iex
.Unop
.arg
;
5428 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
5429 if (is_Unop(aa
, Iop_CmpwNEZ64
))
5430 return IRExpr_Unop( Iop_CmpNEZ64
, aa
->Iex
.Unop
.arg
);
5433 /* 64to32( 32Uto64 ( x )) --> x */
5434 if (is_Unop(aa
, Iop_32Uto64
))
5435 return aa
->Iex
.Unop
.arg
;
5436 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
5437 if (is_Unop(aa
, Iop_8Uto64
))
5438 return IRExpr_Unop(Iop_8Uto32
, aa
->Iex
.Unop
.arg
);
5442 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
5443 if (is_Unop(aa
, Iop_8Uto32
))
5444 return IRExpr_Unop(Iop_8Uto64
, aa
->Iex
.Unop
.arg
);
5445 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
5446 if (is_Unop(aa
, Iop_16Uto32
))
5447 return IRExpr_Unop(Iop_16Uto64
, aa
->Iex
.Unop
.arg
);
5448 /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
5449 --> Shr64( 32Uto64(64to32(x)), sh )) */
5450 if (is_Unop(aa
, Iop_64to32
)
5451 && is_Binop(aa
->Iex
.Unop
.arg
, Iop_Shr64
)
5452 && is_Unop(aa
->Iex
.Unop
.arg
->Iex
.Binop
.arg1
, Iop_32Uto64
)
5453 && is_Unop(aa
->Iex
.Unop
.arg
->Iex
.Binop
.arg1
->Iex
.Unop
.arg
,
5455 return aa
->Iex
.Unop
.arg
;
5457 /* 32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
5458 --> 32Uto64(64to32( Shl64( x, sh )) */
5459 if (is_Unop(aa
, Iop_64to32
)
5460 && is_Binop(aa
->Iex
.Unop
.arg
, Iop_Shl64
)
5461 && is_Unop(aa
->Iex
.Unop
.arg
->Iex
.Binop
.arg1
, Iop_32Uto64
)
5462 && is_Unop(aa
->Iex
.Unop
.arg
->Iex
.Binop
.arg1
->Iex
.Unop
.arg
,
5471 aa
->Iex
.Unop
.arg
->Iex
.Binop
.arg1
->Iex
.Unop
.arg
->Iex
.Unop
.arg
,
5472 aa
->Iex
.Unop
.arg
->Iex
.Binop
.arg2
5478 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
5479 if (is_Unop(aa
, Iop_CmpNEZ8
)
5480 && is_Unop(aa
->Iex
.Unop
.arg
, Iop_32to8
)
5481 && is_Unop(aa
->Iex
.Unop
.arg
->Iex
.Unop
.arg
, Iop_1Uto32
)
5482 && is_Unop(aa
->Iex
.Unop
.arg
->Iex
.Unop
.arg
->Iex
.Unop
.arg
,
5484 return IRExpr_Unop( Iop_CmpwNEZ32
,
5485 aa
->Iex
.Unop
.arg
->Iex
.Unop
.arg
5486 ->Iex
.Unop
.arg
->Iex
.Unop
.arg
);
5493 /* no reduction rule applies */
5494 return IRExpr_Unop( op
, aa
);
5497 static IRExpr
* atbSubst_Expr ( ATmpInfo
* env
, IRExpr
* e
)
5506 args2
= shallowCopyIRExprVec(e
->Iex
.CCall
.args
);
5507 for (i
= 0; args2
[i
]; i
++)
5508 args2
[i
] = atbSubst_Expr(env
,args2
[i
]);
5509 return IRExpr_CCall(
5515 e2
= atbSubst_Temp(env
, e
->Iex
.RdTmp
.tmp
);
5519 atbSubst_Expr(env
, e
->Iex
.ITE
.cond
),
5520 atbSubst_Expr(env
, e
->Iex
.ITE
.iftrue
),
5521 atbSubst_Expr(env
, e
->Iex
.ITE
.iffalse
)
5525 e
->Iex
.Qop
.details
->op
,
5526 atbSubst_Expr(env
, e
->Iex
.Qop
.details
->arg1
),
5527 atbSubst_Expr(env
, e
->Iex
.Qop
.details
->arg2
),
5528 atbSubst_Expr(env
, e
->Iex
.Qop
.details
->arg3
),
5529 atbSubst_Expr(env
, e
->Iex
.Qop
.details
->arg4
)
5532 return IRExpr_Triop(
5533 e
->Iex
.Triop
.details
->op
,
5534 atbSubst_Expr(env
, e
->Iex
.Triop
.details
->arg1
),
5535 atbSubst_Expr(env
, e
->Iex
.Triop
.details
->arg2
),
5536 atbSubst_Expr(env
, e
->Iex
.Triop
.details
->arg3
)
5539 return fold_IRExpr_Binop(
5541 atbSubst_Expr(env
, e
->Iex
.Binop
.arg1
),
5542 atbSubst_Expr(env
, e
->Iex
.Binop
.arg2
)
5545 return fold_IRExpr_Unop(
5547 atbSubst_Expr(env
, e
->Iex
.Unop
.arg
)
5553 atbSubst_Expr(env
, e
->Iex
.Load
.addr
)
5558 atbSubst_Expr(env
, e
->Iex
.GetI
.ix
),
5565 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
5566 vpanic("atbSubst_Expr");
5570 /* Same deal as atbSubst_Expr, except for stmts. */
5572 static IRStmt
* atbSubst_Stmt ( ATmpInfo
* env
, IRStmt
* st
)
5577 IRPutI
*puti
, *puti2
;
5581 return IRStmt_AbiHint(
5582 atbSubst_Expr(env
, st
->Ist
.AbiHint
.base
),
5583 st
->Ist
.AbiHint
.len
,
5584 atbSubst_Expr(env
, st
->Ist
.AbiHint
.nia
)
5587 return IRStmt_Store(
5589 atbSubst_Expr(env
, st
->Ist
.Store
.addr
),
5590 atbSubst_Expr(env
, st
->Ist
.Store
.data
)
5593 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
5594 return IRStmt_StoreG(sg
->end
,
5595 atbSubst_Expr(env
, sg
->addr
),
5596 atbSubst_Expr(env
, sg
->data
),
5597 atbSubst_Expr(env
, sg
->guard
));
5600 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
5601 return IRStmt_LoadG(lg
->end
, lg
->cvt
, lg
->dst
,
5602 atbSubst_Expr(env
, lg
->addr
),
5603 atbSubst_Expr(env
, lg
->alt
),
5604 atbSubst_Expr(env
, lg
->guard
));
5607 return IRStmt_WrTmp(
5609 atbSubst_Expr(env
, st
->Ist
.WrTmp
.data
)
5614 atbSubst_Expr(env
, st
->Ist
.Put
.data
)
5617 puti
= st
->Ist
.PutI
.details
;
5618 puti2
= mkIRPutI(puti
->descr
,
5619 atbSubst_Expr(env
, puti
->ix
),
5621 atbSubst_Expr(env
, puti
->data
));
5622 return IRStmt_PutI(puti2
);
5626 atbSubst_Expr(env
, st
->Ist
.Exit
.guard
),
5632 return IRStmt_IMark(st
->Ist
.IMark
.addr
,
5634 st
->Ist
.IMark
.delta
);
5636 return IRStmt_NoOp();
5638 return IRStmt_MBE(st
->Ist
.MBE
.event
);
5640 cas
= st
->Ist
.CAS
.details
;
5642 cas
->oldHi
, cas
->oldLo
, cas
->end
,
5643 atbSubst_Expr(env
, cas
->addr
),
5644 cas
->expdHi
? atbSubst_Expr(env
, cas
->expdHi
) : NULL
,
5645 atbSubst_Expr(env
, cas
->expdLo
),
5646 cas
->dataHi
? atbSubst_Expr(env
, cas
->dataHi
) : NULL
,
5647 atbSubst_Expr(env
, cas
->dataLo
)
5649 return IRStmt_CAS(cas2
);
5653 st
->Ist
.LLSC
.result
,
5654 atbSubst_Expr(env
, st
->Ist
.LLSC
.addr
),
5655 st
->Ist
.LLSC
.storedata
5656 ? atbSubst_Expr(env
, st
->Ist
.LLSC
.storedata
) : NULL
5659 d
= st
->Ist
.Dirty
.details
;
5660 d2
= emptyIRDirty();
5662 if (d2
->mFx
!= Ifx_None
)
5663 d2
->mAddr
= atbSubst_Expr(env
, d2
->mAddr
);
5664 d2
->guard
= atbSubst_Expr(env
, d2
->guard
);
5665 for (i
= 0; d2
->args
[i
]; i
++) {
5666 IRExpr
* arg
= d2
->args
[i
];
5667 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
5668 d2
->args
[i
] = atbSubst_Expr(env
, arg
);
5670 return IRStmt_Dirty(d2
);
5672 vex_printf("\n"); ppIRStmt(st
); vex_printf("\n");
5673 vpanic("atbSubst_Stmt");
5678 static Bool
dirty_helper_stores ( const IRDirty
*d
)
5680 return d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
;
5684 static Interval
dirty_helper_puts (
5686 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
5687 VexRegisterUpdates pxControl
,
5688 /*OUT*/Bool
*requiresPreciseMemExns
5694 /* Passing the guest state pointer opens the door to modifying the
5695 guest state under the covers. It's not allowed, but let's be
5696 extra conservative and assume the worst. */
5697 for (i
= 0; d
->args
[i
]; i
++) {
5698 if (UNLIKELY(d
->args
[i
]->tag
== Iex_GSPTR
)) {
5699 *requiresPreciseMemExns
= True
;
5700 /* Assume all guest state is written. */
5701 interval
.present
= True
;
5703 interval
.high
= 0x7FFFFFFF;
5708 /* Check the side effects on the guest state */
5709 interval
.present
= False
;
5710 interval
.low
= interval
.high
= -1;
5711 *requiresPreciseMemExns
= False
;
5713 for (i
= 0; i
< d
->nFxState
; ++i
) {
5714 if (d
->fxState
[i
].fx
!= Ifx_Read
) {
5715 Int offset
= d
->fxState
[i
].offset
;
5716 Int size
= d
->fxState
[i
].size
;
5717 Int nRepeats
= d
->fxState
[i
].nRepeats
;
5718 Int repeatLen
= d
->fxState
[i
].repeatLen
;
5720 if (preciseMemExnsFn(offset
,
5721 offset
+ nRepeats
* repeatLen
+ size
- 1,
5723 *requiresPreciseMemExns
= True
;
5725 update_interval(&interval
, offset
,
5726 offset
+ nRepeats
* repeatLen
+ size
- 1);
5733 /* Return an interval if st modifies the guest state. Via
5734 requiresPreciseMemExns return whether or not that modification
5735 requires precise exceptions. */
5736 static Interval
stmt_modifies_guest_state (
5737 IRSB
*bb
, const IRStmt
*st
,
5738 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
5739 VexRegisterUpdates pxControl
,
5740 /*OUT*/Bool
*requiresPreciseMemExns
5747 Int offset
= st
->Ist
.Put
.offset
;
5748 Int size
= sizeofIRType(typeOfIRExpr(bb
->tyenv
, st
->Ist
.Put
.data
));
5750 *requiresPreciseMemExns
5751 = preciseMemExnsFn(offset
, offset
+ size
- 1, pxControl
);
5752 interval
.present
= True
;
5753 interval
.low
= offset
;
5754 interval
.high
= offset
+ size
- 1;
5759 IRRegArray
*descr
= st
->Ist
.PutI
.details
->descr
;
5760 Int offset
= descr
->base
;
5761 Int size
= sizeofIRType(descr
->elemTy
);
5763 /* We quietly assume here that all segments are contiguous and there
5764 are no holes. This is to avoid a loop. The assumption is conservative
5765 in the sense that we might report that precise memory exceptions are
5766 needed when in fact they are not. */
5767 *requiresPreciseMemExns
5768 = preciseMemExnsFn(offset
, offset
+ descr
->nElems
* size
- 1,
5770 interval
.present
= True
;
5771 interval
.low
= offset
;
5772 interval
.high
= offset
+ descr
->nElems
* size
- 1;
5777 return dirty_helper_puts(st
->Ist
.Dirty
.details
,
5778 preciseMemExnsFn
, pxControl
,
5779 requiresPreciseMemExns
);
5782 *requiresPreciseMemExns
= False
;
5783 interval
.present
= False
;
5790 /* notstatic */ Addr
ado_treebuild_BB (
5792 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
5793 VexRegisterUpdates pxControl
5797 Bool stmtStores
, invalidateMe
;
5798 Interval putInterval
;
5801 ATmpInfo env
[A_NENV
];
5803 Bool max_ga_known
= False
;
5806 Int n_tmps
= bb
->tyenv
->types_used
;
5807 UShort
* uses
= LibVEX_Alloc_inline(n_tmps
* sizeof(UShort
));
5809 /* Phase 1. Scan forwards in bb, counting use occurrences of each
5810 temp. Also count occurrences in the bb->next field. Take the
5811 opportunity to also find the maximum guest address in the block,
5812 since that will be needed later for deciding when we can safely
5813 elide event checks. */
5815 for (i
= 0; i
< n_tmps
; i
++)
5818 for (i
= 0; i
< bb
->stmts_used
; i
++) {
5824 UInt len
= st
->Ist
.IMark
.len
;
5825 Addr mga
= st
->Ist
.IMark
.addr
+ (len
< 1 ? 1 : len
) - 1;
5826 max_ga_known
= True
;
5834 aoccCount_Stmt( uses
, st
);
5836 aoccCount_Expr(uses
, bb
->next
);
5839 for (i
= 0; i
< n_tmps
; i
++) {
5842 ppIRTemp( (IRTemp
)i
);
5843 vex_printf(" used %d\n", (Int
)uses
[i
] );
5847 /* Phase 2. Scan forwards in bb. For each statement in turn:
5849 If the env is full, emit the end element. This guarantees
5850 there is at least one free slot in the following.
5852 On seeing 't = E', occ(t)==1,
5855 add t -> E' to the front of the env
5856 Examine E' and set the hints for E' appropriately
5857 (doesLoad? doesGet?)
5859 On seeing any other stmt,
5860 let stmt' = env(stmt)
5861 remove from env any 't=E' binds invalidated by stmt
5862 emit the invalidated stmts
5864 compact any holes in env
5865 by sliding entries towards the front
5867 Finally, apply env to bb->next.
5870 for (i
= 0; i
< A_NENV
; i
++) {
5871 env
[i
].bindee
= NULL
;
5872 env
[i
].binder
= IRTemp_INVALID
;
5875 /* The stmts in bb are being reordered, and we are guaranteed to
5876 end up with no more than the number we started with. Use i to
5877 be the cursor of the current stmt examined and j <= i to be that
5878 for the current stmt being written.
5881 for (i
= 0; i
< bb
->stmts_used
; i
++) {
5884 if (st
->tag
== Ist_NoOp
)
5887 /* Ensure there's at least one space in the env, by emitting
5888 the oldest binding if necessary. */
5889 if (env
[A_NENV
-1].bindee
!= NULL
) {
5890 bb
->stmts
[j
] = IRStmt_WrTmp( env
[A_NENV
-1].binder
,
5891 env
[A_NENV
-1].bindee
);
5894 env
[A_NENV
-1].bindee
= NULL
;
5897 /* Consider current stmt. */
5898 if (st
->tag
== Ist_WrTmp
&& uses
[st
->Ist
.WrTmp
.tmp
] <= 1) {
5901 /* optional extra: dump dead bindings as we find them.
5902 Removes the need for a prior dead-code removal pass. */
5903 if (uses
[st
->Ist
.WrTmp
.tmp
] == 0) {
5904 if (0) vex_printf("DEAD binding\n");
5905 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5907 vassert(uses
[st
->Ist
.WrTmp
.tmp
] == 1);
5909 /* ok, we have 't = E', occ(t)==1. Do the abovementioned
5911 e
= st
->Ist
.WrTmp
.data
;
5912 e2
= atbSubst_Expr(env
, e
);
5913 addToEnvFront(env
, st
->Ist
.WrTmp
.tmp
, e2
);
5914 setHints_Expr(&env
[0].doesLoad
, &env
[0].getInterval
, e2
);
5915 /* don't advance j, as we are deleting this stmt and instead
5916 holding it temporarily in the env. */
5917 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5920 /* we get here for any other kind of statement. */
5921 /* 'use up' any bindings required by the current statement. */
5922 st2
= atbSubst_Stmt(env
, st
);
5924 /* Now, before this stmt, dump any bindings in env that it
5925 invalidates. These need to be dumped in the order in which
5926 they originally entered env -- that means from oldest to
5929 /* putInterval/stmtStores characterise what the stmt under
5930 consideration does, or might do (sidely safe @ True). */
5932 Bool putRequiresPreciseMemExns
;
5933 putInterval
= stmt_modifies_guest_state(
5934 bb
, st
, preciseMemExnsFn
, pxControl
,
5935 &putRequiresPreciseMemExns
5938 /* be True if this stmt writes memory or might do (==> we don't
5939 want to reorder other loads or stores relative to it). Also,
5940 both LL and SC fall under this classification, since we
5941 really ought to be conservative and not reorder any other
5942 memory transactions relative to them. */
5944 = toBool( st
->tag
== Ist_Store
5945 || (st
->tag
== Ist_Dirty
5946 && dirty_helper_stores(st
->Ist
.Dirty
.details
))
5947 || st
->tag
== Ist_LLSC
5948 || st
->tag
== Ist_CAS
);
5950 for (k
= A_NENV
-1; k
>= 0; k
--) {
5951 if (env
[k
].bindee
== NULL
)
5953 /* Compare the actions of this stmt with the actions of
5954 binding 'k', to see if they invalidate the binding. */
5957 /* a store invalidates loaded data */
5958 (env
[k
].doesLoad
&& stmtStores
)
5959 /* a put invalidates get'd data, if they overlap */
5960 || ((env
[k
].getInterval
.present
&& putInterval
.present
) &&
5961 intervals_overlap(env
[k
].getInterval
, putInterval
))
5962 /* a put invalidates loaded data. That means, in essense, that
5963 a load expression cannot be substituted into a statement
5964 that follows the put. But there is nothing wrong doing so
5965 except when the put statement requries precise exceptions.
5966 Think of a load that is moved past a put where the put
5967 updates the IP in the guest state. If the load generates
5968 a segfault, the wrong address (line number) would be
5970 || (env
[k
].doesLoad
&& putInterval
.present
&&
5971 putRequiresPreciseMemExns
)
5972 /* probably overly conservative: a memory bus event
5973 invalidates absolutely everything, so that all
5974 computation prior to it is forced to complete before
5975 proceeding with the event (fence,lock,unlock). */
5976 || st
->tag
== Ist_MBE
5977 /* also be (probably overly) paranoid re AbiHints */
5978 || st
->tag
== Ist_AbiHint
5981 bb
->stmts
[j
] = IRStmt_WrTmp( env
[k
].binder
, env
[k
].bindee
);
5984 env
[k
].bindee
= NULL
;
5988 /* Slide in-use entries in env up to the front */
5990 for (k
= 0; k
< A_NENV
; k
++) {
5991 if (env
[k
].bindee
!= NULL
) {
5996 for (m
= m
; m
< A_NENV
; m
++) {
5997 env
[m
].bindee
= NULL
;
6000 /* finally, emit the substituted statement */
6002 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
6006 } /* for each stmt in the original bb ... */
6008 /* Finally ... substitute the ->next field as much as possible, and
6009 dump any left-over bindings. Hmm. Perhaps there should be no
6010 left over bindings? Or any left-over bindings are
6011 by definition dead? */
6012 bb
->next
= atbSubst_Expr(env
, bb
->next
);
6015 return max_ga_known
? max_ga
: ~(Addr
)0;
6019 /*---------------------------------------------------------------*/
6020 /*--- MSVC specific transformation hacks ---*/
6021 /*---------------------------------------------------------------*/
6023 /* The purpose of all this is to find MSVC's idiom for non-constant
6024 bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
6025 gcc's idiom "(a & ~c) | (b & c)". Motivation is that Memcheck has
6026 generates a lot of false positives from the MSVC version because it
6027 doesn't understand that XORing an undefined bit with itself gives a
6030 This isn't a problem for the simple case "x ^ x", because iropt
6031 folds it to a constant zero before Memcheck ever sees it. But in
6032 this case we have an intervening "& c" which defeats the simple
6033 case. So we have to carefully inspect all expressions rooted at an
6034 XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
6035 7 other variants resulting from swapping the order of arguments to
6036 the three binary operations. If we get a match, we then replace
6037 the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
6039 The key difficulty is to spot the two uses of "a". To normalise
6040 the IR to maximise the chances of success, we first do a CSE pass,
6041 with CSEing of loads enabled, since the two "a" expressions may be
6042 loads, which need to be commoned up. Then we do a constant folding
6043 pass, so as to remove any tmp-to-tmp assignment chains that would
6044 make matching the original expression more difficult.
6048 /* Helper function for debug printing */
6049 __attribute__((unused
))
6050 static void print_flat_expr ( IRExpr
** env
, IRExpr
* e
)
6058 ppIROp(e
->Iex
.Binop
.op
);
6060 print_flat_expr(env
, e
->Iex
.Binop
.arg1
);
6062 print_flat_expr(env
, e
->Iex
.Binop
.arg2
);
6067 ppIROp(e
->Iex
.Unop
.op
);
6069 print_flat_expr(env
, e
->Iex
.Unop
.arg
);
6074 ppIRTemp(e
->Iex
.RdTmp
.tmp
);
6076 print_flat_expr(env
, chase1(env
, e
));
6086 vex_printf("FAIL: "); ppIRExpr(e
); vex_printf("\n");
6091 /* Spot a ^ ((a ^ b) & c) for a,b and c tmp-or-const (atoms)
6092 or any of the other 7 variants generated by switching the order
6093 of arguments to the outer ^, the inner ^ and the &.
6095 static UInt
spotBitfieldAssignment ( /*OUT*/IRExpr
** aa
, /*OUT*/IRExpr
** bb
,
6097 IRExpr
** env
, IRExpr
* e
,
6098 IROp opAND
, IROp opXOR
)
6100 # define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
6101 && (_e)->Iex.Binop.op == (_op))
6102 # define ISATOM(_e) isIRAtom(_e)
6103 # define STEP(_e) chase1(env, (_e))
6104 # define LL(_e) ((_e)->Iex.Binop.arg1)
6105 # define RR(_e) ((_e)->Iex.Binop.arg2)
6107 IRExpr
*a1
, *and, *xor, *c
, *a2bL
, *a2bR
;
6109 /* This is common to all 8 cases */
6110 if (!ISBIN(e
, opXOR
)) goto fail
;
6112 /* -----and------ */
6114 /* find variant 1: a1 ^ ((a2 ^ b) & c) */
6115 /* find variant 2: a1 ^ ((b ^ a2) & c) */
6116 a1
= and = xor = c
= a2bL
= a2bR
= NULL
;
6120 if (!ISBIN(and, opAND
)) goto v34
;
6121 xor = STEP(LL(and));
6123 if (!ISBIN(xor, opXOR
)) goto v34
;
6127 if (eqIRAtom(a1
, a2bL
) && !eqIRAtom(a1
, a2bR
)) {
6133 if (eqIRAtom(a1
, a2bR
) && !eqIRAtom(a1
, a2bL
)) {
6141 /* -----and------ */
6143 /* find variant 3: ((a2 ^ b) & c) ^ a1 */
6144 /* find variant 4: ((b ^ a2) & c) ^ a1 */
6145 a1
= and = xor = c
= a2bL
= a2bR
= NULL
;
6149 if (!ISBIN(and, opAND
)) goto v56
;
6150 xor = STEP(LL(and));
6152 if (!ISBIN(xor, opXOR
)) goto v56
;
6156 if (eqIRAtom(a1
, a2bL
) && !eqIRAtom(a1
, a2bR
)) {
6162 if (eqIRAtom(a1
, a2bR
) && !eqIRAtom(a1
, a2bL
)) {
6170 /* -----and------ */
6172 /* find variant 5: a1 ^ (c & (a2 ^ b)) */
6173 /* find variant 6: a1 ^ (c & (b ^ a2)) */
6174 a1
= and = xor = c
= a2bL
= a2bR
= NULL
;
6178 if (!ISBIN(and, opAND
)) goto v78
;
6179 xor = STEP(RR(and));
6181 if (!ISBIN(xor, opXOR
)) goto v78
;
6185 if (eqIRAtom(a1
, a2bL
) && !eqIRAtom(a1
, a2bR
)) {
6189 vassert(5-5); // ATC
6192 if (eqIRAtom(a1
, a2bR
) && !eqIRAtom(a1
, a2bL
)) {
6196 vassert(6-6); // ATC
6201 /* -----and------ */
6203 /* find variant 7: (c & (a2 ^ b)) ^ a1 */
6204 /* find variant 8: (c & (b ^ a2)) ^ a1 */
6205 a1
= and = xor = c
= a2bL
= a2bR
= NULL
;
6209 if (!ISBIN(and, opAND
)) goto fail
;
6210 xor = STEP(RR(and));
6212 if (!ISBIN(xor, opXOR
)) goto fail
;
6216 if (eqIRAtom(a1
, a2bL
) && !eqIRAtom(a1
, a2bR
)) {
6222 if (eqIRAtom(a1
, a2bR
) && !eqIRAtom(a1
, a2bL
)) {
6239 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
6240 variants thereof generated by switching arguments around), return
6241 the IRExpr* for (a & ~c) | (b & c). Else return NULL. */
6242 static IRExpr
* do_XOR_TRANSFORMS_IRExpr ( IRExpr
** env
, IRExpr
* e
)
6244 if (e
->tag
!= Iex_Binop
)
6247 const HChar
* tyNm
= NULL
;
6248 IROp opOR
= Iop_INVALID
;
6249 IROp opAND
= Iop_INVALID
;
6250 IROp opNOT
= Iop_INVALID
;
6251 IROp opXOR
= Iop_INVALID
;
6252 switch (e
->Iex
.Binop
.op
) {
6255 opOR
= Iop_Or32
; opAND
= Iop_And32
;
6256 opNOT
= Iop_Not32
; opXOR
= Iop_Xor32
;
6260 opOR
= Iop_Or16
; opAND
= Iop_And16
;
6261 opNOT
= Iop_Not16
; opXOR
= Iop_Xor16
;
6265 opOR
= Iop_Or8
; opAND
= Iop_And8
;
6266 opNOT
= Iop_Not8
; opXOR
= Iop_Xor8
;
6275 UInt variant
= spotBitfieldAssignment(&aa
, &bb
, &cc
, env
, e
, opAND
, opXOR
);
6277 static UInt ctr
= 0;
6279 vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
6280 "type %s, variant %u\n",
6281 ++ctr
, tyNm
, variant
);
6282 /* It's vitally important that the returned aa, bb and cc are
6283 atoms -- either constants or tmps. If it's anything else
6284 (eg, a GET) then incorporating them in a tree at this point
6285 in the SB may erroneously pull them forwards (eg of a PUT
6286 that originally was after the GET) and so transform the IR
6287 wrongly. spotBitfieldAssignment should guarantee only to
6288 give us atoms, but we check here anyway. */
6289 vassert(aa
&& isIRAtom(aa
));
6290 vassert(bb
&& isIRAtom(bb
));
6291 vassert(cc
&& isIRAtom(cc
));
6292 return IRExpr_Binop(
6294 IRExpr_Binop(opAND
, aa
, IRExpr_Unop(opNOT
, cc
)),
6295 IRExpr_Binop(opAND
, bb
, cc
)
6302 /* SB is modified in-place. Visit all the IRExprs and, for those
6303 which are allowed to be non-atomic, perform the XOR transform if
6304 possible. This makes |sb| be non-flat, but that's ok, the caller
6305 can re-flatten it. Returns True iff any changes were made. */
6306 static Bool
do_XOR_TRANSFORM_IRSB ( IRSB
* sb
)
6309 Bool changed
= False
;
6311 /* Make the tmp->expr environment, so we can use it for
6312 chasing expressions. */
6313 Int n_tmps
= sb
->tyenv
->types_used
;
6314 IRExpr
** env
= LibVEX_Alloc_inline(n_tmps
* sizeof(IRExpr
*));
6315 for (i
= 0; i
< n_tmps
; i
++)
6318 for (i
= 0; i
< sb
->stmts_used
; i
++) {
6319 IRStmt
* st
= sb
->stmts
[i
];
6320 if (st
->tag
!= Ist_WrTmp
)
6322 IRTemp t
= st
->Ist
.WrTmp
.tmp
;
6323 vassert(t
>= 0 && t
< n_tmps
);
6324 env
[t
] = st
->Ist
.WrTmp
.data
;
6327 for (i
= 0; i
< sb
->stmts_used
; i
++) {
6328 IRStmt
* st
= sb
->stmts
[i
];
6332 vassert(isIRAtom(st
->Ist
.AbiHint
.base
));
6333 vassert(isIRAtom(st
->Ist
.AbiHint
.nia
));
6336 vassert(isIRAtom(st
->Ist
.Put
.data
));
6339 IRPutI
* puti
= st
->Ist
.PutI
.details
;
6340 vassert(isIRAtom(puti
->ix
));
6341 vassert(isIRAtom(puti
->data
));
6345 /* This is the one place where an expr (st->Ist.WrTmp.data) is
6346 allowed to be more than just a constant or a tmp. */
6348 = do_XOR_TRANSFORMS_IRExpr(env
, st
->Ist
.WrTmp
.data
);
6351 st
->Ist
.WrTmp
.data
= mb_new_data
;
6358 vassert(isIRAtom(st
->Ist
.Store
.addr
));
6359 vassert(isIRAtom(st
->Ist
.Store
.data
));
6362 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
6363 vassert(isIRAtom(sg
->addr
));
6364 vassert(isIRAtom(sg
->data
));
6365 vassert(isIRAtom(sg
->guard
));
6369 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
6370 vassert(isIRAtom(lg
->addr
));
6371 vassert(isIRAtom(lg
->alt
));
6372 vassert(isIRAtom(lg
->guard
));
6376 IRCAS
* cas
= st
->Ist
.CAS
.details
;
6377 vassert(isIRAtom(cas
->addr
));
6378 vassert(cas
->expdHi
== NULL
|| isIRAtom(cas
->expdHi
));
6379 vassert(isIRAtom(cas
->expdLo
));
6380 vassert(cas
->dataHi
== NULL
|| isIRAtom(cas
->dataHi
));
6381 vassert(isIRAtom(cas
->dataLo
));
6385 vassert(isIRAtom(st
->Ist
.LLSC
.addr
));
6386 if (st
->Ist
.LLSC
.storedata
)
6387 vassert(isIRAtom(st
->Ist
.LLSC
.storedata
));
6390 IRDirty
* d
= st
->Ist
.Dirty
.details
;
6391 if (d
->mFx
!= Ifx_None
) {
6392 vassert(isIRAtom(d
->mAddr
));
6394 vassert(isIRAtom(d
->guard
));
6395 for (Int j
= 0; d
->args
[j
]; j
++) {
6396 IRExpr
* arg
= d
->args
[j
];
6397 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
))) {
6398 vassert(isIRAtom(arg
));
6408 vassert(isIRAtom(st
->Ist
.Exit
.guard
));
6411 vex_printf("\n"); ppIRStmt(st
);
6412 vpanic("do_XOR_TRANSFORMS_IRSB");
6416 vassert(isIRAtom(sb
->next
));
6421 static IRSB
* do_MSVC_HACKS ( IRSB
* sb
)
6423 // Normalise as much as we can. This is the one-and-only place
6424 // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
6425 Bool any_cse_changes
= do_cse_BB( sb
, True
/*allowLoadsToBeCSEd*/ );
6426 if (any_cse_changes
) {
6427 // CSEing might have created dead code. Remove it.
6428 sb
= cprop_BB ( sb
);
6432 // Visit all atoms, do the transformation proper. bb is modified
6434 Bool changed
= do_XOR_TRANSFORM_IRSB(sb
);
6437 // The transformation generates non-flat expressions, so we now
6438 // need to re-flatten the block.
6439 sb
= flatten_BB(sb
);
6446 /*---------------------------------------------------------------*/
6447 /*--- iropt main ---*/
6448 /*---------------------------------------------------------------*/
6450 static Bool iropt_verbose
= False
; /* True; */
6453 /* Do a simple cleanup pass on bb. This is: redundant Get removal,
6454 redundant Put removal, constant propagation, dead code removal,
6455 clean helper specialisation, and dead code removal (again).
6460 IRSB
* cheap_transformations (
6462 IRExpr
* (*specHelper
) (const HChar
*, IRExpr
**, IRStmt
**, Int
),
6463 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
6464 VexRegisterUpdates pxControl
6467 redundant_get_removal_BB ( bb
);
6468 if (iropt_verbose
) {
6469 vex_printf("\n========= REDUNDANT GET\n\n" );
6473 if (pxControl
< VexRegUpdAllregsAtEachInsn
) {
6474 redundant_put_removal_BB ( bb
, preciseMemExnsFn
, pxControl
);
6476 if (iropt_verbose
) {
6477 vex_printf("\n========= REDUNDANT PUT\n\n" );
6481 bb
= cprop_BB ( bb
);
6482 if (iropt_verbose
) {
6483 vex_printf("\n========= CPROPD\n\n" );
6487 do_deadcode_BB ( bb
);
6488 if (iropt_verbose
) {
6489 vex_printf("\n========= DEAD\n\n" );
6493 bb
= spec_helpers_BB ( bb
, specHelper
);
6494 do_deadcode_BB ( bb
);
6495 if (iropt_verbose
) {
6496 vex_printf("\n========= SPECd \n\n" );
6504 /* Do some more expensive transformations on bb, which are aimed at
6505 optimising as much as possible in the presence of GetI and PutI. */
6508 IRSB
* expensive_transformations( IRSB
* bb
, VexRegisterUpdates pxControl
)
6510 (void)do_cse_BB( bb
, False
/*!allowLoadsToBeCSEd*/ );
6511 collapse_AddSub_chains_BB( bb
);
6512 do_redundant_GetI_elimination( bb
);
6513 if (pxControl
< VexRegUpdAllregsAtEachInsn
) {
6514 do_redundant_PutI_elimination( bb
, pxControl
);
6516 do_deadcode_BB( bb
);
6521 /* Scan a flattened BB to look for signs that more expensive
6522 optimisations might be useful:
6523 - find out if there are any GetIs and PutIs
6524 - find out if there are any floating or vector-typed temporaries
6527 static void considerExpensives ( /*OUT*/Bool
* hasGetIorPutI
,
6528 /*OUT*/Bool
* hasVorFtemps
,
6536 *hasGetIorPutI
= False
;
6537 *hasVorFtemps
= False
;
6539 for (i
= 0; i
< bb
->stmts_used
; i
++) {
6543 vassert(isIRAtom(st
->Ist
.AbiHint
.base
));
6544 vassert(isIRAtom(st
->Ist
.AbiHint
.nia
));
6547 *hasGetIorPutI
= True
;
6550 if (st
->Ist
.WrTmp
.data
->tag
== Iex_GetI
)
6551 *hasGetIorPutI
= True
;
6552 switch (typeOfIRTemp(bb
->tyenv
, st
->Ist
.WrTmp
.tmp
)) {
6553 case Ity_I1
: case Ity_I8
: case Ity_I16
:
6554 case Ity_I32
: case Ity_I64
: case Ity_I128
:
6556 case Ity_F16
: case Ity_F32
: case Ity_F64
: case Ity_F128
:
6557 case Ity_V128
: case Ity_V256
:
6558 *hasVorFtemps
= True
;
6560 case Ity_D32
: case Ity_D64
: case Ity_D128
:
6561 *hasVorFtemps
= True
;
6568 vassert(isIRAtom(st
->Ist
.Put
.data
));
6571 vassert(isIRAtom(st
->Ist
.Store
.addr
));
6572 vassert(isIRAtom(st
->Ist
.Store
.data
));
6575 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
6576 vassert(isIRAtom(sg
->addr
));
6577 vassert(isIRAtom(sg
->data
));
6578 vassert(isIRAtom(sg
->guard
));
6582 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
6583 vassert(isIRAtom(lg
->addr
));
6584 vassert(isIRAtom(lg
->alt
));
6585 vassert(isIRAtom(lg
->guard
));
6589 cas
= st
->Ist
.CAS
.details
;
6590 vassert(isIRAtom(cas
->addr
));
6591 vassert(cas
->expdHi
== NULL
|| isIRAtom(cas
->expdHi
));
6592 vassert(isIRAtom(cas
->expdLo
));
6593 vassert(cas
->dataHi
== NULL
|| isIRAtom(cas
->dataHi
));
6594 vassert(isIRAtom(cas
->dataLo
));
6597 vassert(isIRAtom(st
->Ist
.LLSC
.addr
));
6598 if (st
->Ist
.LLSC
.storedata
)
6599 vassert(isIRAtom(st
->Ist
.LLSC
.storedata
));
6602 d
= st
->Ist
.Dirty
.details
;
6603 vassert(isIRAtom(d
->guard
));
6604 for (j
= 0; d
->args
[j
]; j
++) {
6605 IRExpr
* arg
= d
->args
[j
];
6606 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
6607 vassert(isIRAtom(arg
));
6609 if (d
->mFx
!= Ifx_None
)
6610 vassert(isIRAtom(d
->mAddr
));
6617 vassert(isIRAtom(st
->Ist
.Exit
.guard
));
6622 vpanic("considerExpensives");
6628 /* ---------------- The main iropt entry point. ---------------- */
6630 /* exported from this file */
6631 /* Rules of the game:
6633 - IRExpr/IRStmt trees should be treated as immutable, as they
6634 may get shared. So never change a field of such a tree node;
6635 instead construct and return a new one if needed.
6641 IRExpr
* (*specHelper
) (const HChar
*, IRExpr
**, IRStmt
**, Int
),
6642 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
6643 VexRegisterUpdates pxControl
,
6648 static Int n_total
= 0;
6649 static Int n_expensive
= 0;
6651 Bool hasGetIorPutI
, hasVorFtemps
;
6656 /* First flatten the block out, since all other
6657 phases assume flat code. */
6659 bb
= flatten_BB ( bb0
);
6661 if (iropt_verbose
) {
6662 vex_printf("\n========= FLAT\n\n" );
6666 /* If at level 0, stop now. */
6667 if (vex_control
.iropt_level
<= 0) return bb
;
6669 /* Now do a preliminary cleanup pass, and figure out if we also
6670 need to do 'expensive' optimisations. Expensive optimisations
6671 are deemed necessary if the block contains any GetIs or PutIs.
6672 If needed, do expensive transformations and then another cheap
6675 bb
= cheap_transformations( bb
, specHelper
, preciseMemExnsFn
, pxControl
);
6677 if (guest_arch
== VexArchARM
) {
6678 /* Translating Thumb2 code produces a lot of chaff. We have to
6679 work extra hard to get rid of it. */
6681 bb
= spec_helpers_BB ( bb
, specHelper
);
6682 if (pxControl
< VexRegUpdAllregsAtEachInsn
) {
6683 redundant_put_removal_BB ( bb
, preciseMemExnsFn
, pxControl
);
6685 do_cse_BB( bb
, False
/*!allowLoadsToBeCSEd*/ );
6686 do_deadcode_BB( bb
);
6689 if (vex_control
.iropt_level
> 1) {
6691 /* Peer at what we have, to decide how much more effort to throw
6693 considerExpensives( &hasGetIorPutI
, &hasVorFtemps
, bb
);
6695 if (hasVorFtemps
&& !hasGetIorPutI
) {
6696 /* If any evidence of FP or Vector activity, CSE, as that
6697 tends to mop up all manner of lardy code to do with
6698 rounding modes. Don't bother if hasGetIorPutI since that
6699 case leads into the expensive transformations, which do
6701 (void)do_cse_BB( bb
, False
/*!allowLoadsToBeCSEd*/ );
6702 do_deadcode_BB( bb
);
6705 if (hasGetIorPutI
) {
6709 vex_printf("***** EXPENSIVE %d %d\n", n_total
, n_expensive
);
6710 bb
= expensive_transformations( bb
, pxControl
);
6711 bb
= cheap_transformations( bb
, specHelper
,
6712 preciseMemExnsFn
, pxControl
);
6713 /* Potentially common up GetIs */
6714 cses
= do_cse_BB( bb
, False
/*!allowLoadsToBeCSEd*/ );
6716 bb
= cheap_transformations( bb
, specHelper
,
6717 preciseMemExnsFn
, pxControl
);
6720 ///////////////////////////////////////////////////////////
6721 // BEGIN MSVC optimised code transformation hacks
6723 bb
= do_MSVC_HACKS(bb
);
6724 // END MSVC optimised code transformation hacks
6725 ///////////////////////////////////////////////////////////
6727 /* Now have a go at unrolling simple (single-BB) loops. If
6728 successful, clean up the results as much as possible. */
6730 bb2
= maybe_loop_unroll_BB( bb
, guest_addr
);
6732 bb
= cheap_transformations( bb2
, specHelper
,
6733 preciseMemExnsFn
, pxControl
);
6734 if (hasGetIorPutI
) {
6735 bb
= expensive_transformations( bb
, pxControl
);
6736 bb
= cheap_transformations( bb
, specHelper
,
6737 preciseMemExnsFn
, pxControl
);
6739 /* at least do CSE and dead code removal */
6740 do_cse_BB( bb
, False
/*!allowLoadsToBeCSEd*/ );
6741 do_deadcode_BB( bb
);
6743 if (0) vex_printf("vex iropt: unrolled a loop\n");
6752 /*---------------------------------------------------------------*/
6753 /*--- end ir_opt.c ---*/
6754 /*---------------------------------------------------------------*/