Tidy up ir_opt.c aspects relating to the 'grail' work. In particular:
[valgrind.git] / VEX / priv / guest_generic_bb_to_IR.c
blobf890c3338b7f04134c7e2bc4eaab07f2e91638f2
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_generic_bb_to_IR.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_ir.h"
36 #include "libvex.h"
37 #include "main_util.h"
38 #include "main_globals.h"
39 #include "guest_generic_bb_to_IR.h"
40 #include "ir_opt.h"
43 /*--------------------------------------------------------------*/
44 /*--- Forwards for fns called by self-checking translations ---*/
45 /*--------------------------------------------------------------*/
47 /* Forwards .. */
48 VEX_REGPARM(2) static UInt genericg_compute_checksum_4al ( HWord first_w32,
49 HWord n_w32s );
50 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 );
51 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 );
52 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 );
53 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 );
54 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 );
55 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 );
56 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 );
57 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 );
58 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 );
59 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 );
60 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 );
61 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 );
63 VEX_REGPARM(2) static ULong genericg_compute_checksum_8al ( HWord first_w64,
64 HWord n_w64s );
65 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 );
66 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 );
67 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 );
68 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 );
69 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 );
70 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 );
71 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 );
72 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 );
73 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 );
74 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 );
75 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 );
76 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 );
79 /*--------------------------------------------------------------*/
80 /*--- Creation of self-check IR ---*/
81 /*--------------------------------------------------------------*/
83 static void create_self_checks_as_needed(
84 /*MOD*/ IRSB* irsb,
85 /*OUT*/ UInt* n_sc_extents,
86 /*MOD*/ VexRegisterUpdates* pxControl,
87 /*MOD*/ void* callback_opaque,
88 /*IN*/ UInt (*needs_self_check)
89 (void*, /*MB_MOD*/VexRegisterUpdates*,
90 const VexGuestExtents*),
91 const VexGuestExtents* vge,
92 const VexAbiInfo* abiinfo_both,
93 const IRType guest_word_type,
94 const Int selfcheck_idx,
95 /*IN*/ Int offB_GUEST_CMSTART,
96 /*IN*/ Int offB_GUEST_CMLEN,
97 /*IN*/ Int offB_GUEST_IP,
98 const Addr guest_IP_sbstart
101 /* The scheme is to compute a rather crude checksum of the code
102 we're making a translation of, and add to the IR a call to a
103 helper routine which recomputes the checksum every time the
104 translation is run, and requests a retranslation if it doesn't
105 match. This is obviously very expensive and considerable
106 efforts are made to speed it up:
108 * the checksum is computed from all the naturally aligned
109 host-sized words that overlap the translated code. That means
110 it could depend on up to 7 bytes before and 7 bytes after
111 which aren't part of the translated area, and so if those
112 change then we'll unnecessarily have to discard and
113 retranslate. This seems like a pretty remote possibility and
114 it seems as if the benefit of not having to deal with the ends
115 of the range at byte precision far outweigh any possible extra
116 translations needed.
118 * there's a generic routine and 12 specialised cases, which
119 handle the cases of 1 through 12-word lengths respectively.
120 They seem to cover about 90% of the cases that occur in
121 practice.
123 We ask the caller, via needs_self_check, which of the 3 vge
124 extents needs a check, and only generate check code for those
125 that do.
128 Addr base2check;
129 UInt len2check;
130 HWord expectedhW;
131 IRTemp tistart_tmp, tilen_tmp, callresult_tmp, exitguard_tmp;
132 HWord VEX_REGPARM(2) (*fn_generic)(HWord, HWord);
133 HWord VEX_REGPARM(1) (*fn_spec)(HWord);
134 const HChar* nm_generic;
135 const HChar* nm_spec;
136 HWord fn_generic_entry = 0;
137 HWord fn_spec_entry = 0;
138 UInt host_word_szB = sizeof(HWord);
139 IRType host_word_type = Ity_INVALID;
141 UInt extents_needing_check
142 = needs_self_check(callback_opaque, pxControl, vge);
144 if (host_word_szB == 4) host_word_type = Ity_I32;
145 if (host_word_szB == 8) host_word_type = Ity_I64;
146 vassert(host_word_type != Ity_INVALID);
148 vassert(vge->n_used >= 1 && vge->n_used <= 3);
150 /* Caller shouldn't claim that nonexistent extents need a
151 check. */
152 vassert((extents_needing_check >> vge->n_used) == 0);
154 /* Guest addresses as IRConsts. Used in self-checks to specify the
155 restart-after-discard point. */
156 IRConst* guest_IP_sbstart_IRConst
157 = guest_word_type==Ity_I32
158 ? IRConst_U32(toUInt(guest_IP_sbstart))
159 : IRConst_U64(guest_IP_sbstart);
161 const Int n_extent_slots = sizeof(vge->base) / sizeof(vge->base[0]);
162 vassert(n_extent_slots == 3);
164 vassert(selfcheck_idx + (n_extent_slots - 1) * 7 + 6 < irsb->stmts_used);
166 for (Int i = 0; i < vge->n_used; i++) {
167 /* Do we need to generate a check for this extent? */
168 if ((extents_needing_check & (1 << i)) == 0)
169 continue;
171 /* Tell the caller */
172 (*n_sc_extents)++;
174 /* the extent we're generating a check for */
175 base2check = vge->base[i];
176 len2check = vge->len[i];
178 /* stay sane */
179 vassert(len2check >= 0 && len2check < 2000/*arbitrary*/);
181 /* Skip the check if the translation involved zero bytes */
182 if (len2check == 0)
183 continue;
185 HWord first_hW = ((HWord)base2check)
186 & ~(HWord)(host_word_szB-1);
187 HWord last_hW = (((HWord)base2check) + len2check - 1)
188 & ~(HWord)(host_word_szB-1);
189 vassert(first_hW <= last_hW);
190 HWord hW_diff = last_hW - first_hW;
191 vassert(0 == (hW_diff & (host_word_szB-1)));
192 HWord hWs_to_check = (hW_diff + host_word_szB) / host_word_szB;
193 vassert(hWs_to_check > 0
194 && hWs_to_check < 2004/*arbitrary*/ / host_word_szB);
196 /* vex_printf("%lx %lx %ld\n", first_hW, last_hW, hWs_to_check); */
198 if (host_word_szB == 8) {
199 fn_generic = (VEX_REGPARM(2) HWord(*)(HWord, HWord))
200 genericg_compute_checksum_8al;
201 nm_generic = "genericg_compute_checksum_8al";
202 } else {
203 fn_generic = (VEX_REGPARM(2) HWord(*)(HWord, HWord))
204 genericg_compute_checksum_4al;
205 nm_generic = "genericg_compute_checksum_4al";
208 fn_spec = NULL;
209 nm_spec = NULL;
211 if (host_word_szB == 8) {
212 const HChar* nm = NULL;
213 ULong VEX_REGPARM(1) (*fn)(HWord) = NULL;
214 switch (hWs_to_check) {
215 case 1: fn = genericg_compute_checksum_8al_1;
216 nm = "genericg_compute_checksum_8al_1"; break;
217 case 2: fn = genericg_compute_checksum_8al_2;
218 nm = "genericg_compute_checksum_8al_2"; break;
219 case 3: fn = genericg_compute_checksum_8al_3;
220 nm = "genericg_compute_checksum_8al_3"; break;
221 case 4: fn = genericg_compute_checksum_8al_4;
222 nm = "genericg_compute_checksum_8al_4"; break;
223 case 5: fn = genericg_compute_checksum_8al_5;
224 nm = "genericg_compute_checksum_8al_5"; break;
225 case 6: fn = genericg_compute_checksum_8al_6;
226 nm = "genericg_compute_checksum_8al_6"; break;
227 case 7: fn = genericg_compute_checksum_8al_7;
228 nm = "genericg_compute_checksum_8al_7"; break;
229 case 8: fn = genericg_compute_checksum_8al_8;
230 nm = "genericg_compute_checksum_8al_8"; break;
231 case 9: fn = genericg_compute_checksum_8al_9;
232 nm = "genericg_compute_checksum_8al_9"; break;
233 case 10: fn = genericg_compute_checksum_8al_10;
234 nm = "genericg_compute_checksum_8al_10"; break;
235 case 11: fn = genericg_compute_checksum_8al_11;
236 nm = "genericg_compute_checksum_8al_11"; break;
237 case 12: fn = genericg_compute_checksum_8al_12;
238 nm = "genericg_compute_checksum_8al_12"; break;
239 default: break;
241 fn_spec = (VEX_REGPARM(1) HWord(*)(HWord)) fn;
242 nm_spec = nm;
243 } else {
244 const HChar* nm = NULL;
245 UInt VEX_REGPARM(1) (*fn)(HWord) = NULL;
246 switch (hWs_to_check) {
247 case 1: fn = genericg_compute_checksum_4al_1;
248 nm = "genericg_compute_checksum_4al_1"; break;
249 case 2: fn = genericg_compute_checksum_4al_2;
250 nm = "genericg_compute_checksum_4al_2"; break;
251 case 3: fn = genericg_compute_checksum_4al_3;
252 nm = "genericg_compute_checksum_4al_3"; break;
253 case 4: fn = genericg_compute_checksum_4al_4;
254 nm = "genericg_compute_checksum_4al_4"; break;
255 case 5: fn = genericg_compute_checksum_4al_5;
256 nm = "genericg_compute_checksum_4al_5"; break;
257 case 6: fn = genericg_compute_checksum_4al_6;
258 nm = "genericg_compute_checksum_4al_6"; break;
259 case 7: fn = genericg_compute_checksum_4al_7;
260 nm = "genericg_compute_checksum_4al_7"; break;
261 case 8: fn = genericg_compute_checksum_4al_8;
262 nm = "genericg_compute_checksum_4al_8"; break;
263 case 9: fn = genericg_compute_checksum_4al_9;
264 nm = "genericg_compute_checksum_4al_9"; break;
265 case 10: fn = genericg_compute_checksum_4al_10;
266 nm = "genericg_compute_checksum_4al_10"; break;
267 case 11: fn = genericg_compute_checksum_4al_11;
268 nm = "genericg_compute_checksum_4al_11"; break;
269 case 12: fn = genericg_compute_checksum_4al_12;
270 nm = "genericg_compute_checksum_4al_12"; break;
271 default: break;
273 fn_spec = (VEX_REGPARM(1) HWord(*)(HWord))fn;
274 nm_spec = nm;
277 expectedhW = fn_generic( first_hW, hWs_to_check );
278 /* If we got a specialised version, check it produces the same
279 result as the generic version! */
280 if (fn_spec) {
281 vassert(nm_spec);
282 vassert(expectedhW == fn_spec( first_hW ));
283 } else {
284 vassert(!nm_spec);
287 /* Set CMSTART and CMLEN. These will describe to the despatcher
288 the area of guest code to invalidate should we exit with a
289 self-check failure. */
290 tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type);
291 tilen_tmp = newIRTemp(irsb->tyenv, guest_word_type);
293 IRConst* base2check_IRConst
294 = guest_word_type==Ity_I32 ? IRConst_U32(toUInt(base2check))
295 : IRConst_U64(base2check);
296 IRConst* len2check_IRConst
297 = guest_word_type==Ity_I32 ? IRConst_U32(len2check)
298 : IRConst_U64(len2check);
300 IRStmt** stmt0 = &irsb->stmts[selfcheck_idx + i * 7 + 0];
301 IRStmt** stmt1 = &irsb->stmts[selfcheck_idx + i * 7 + 1];
302 IRStmt** stmt2 = &irsb->stmts[selfcheck_idx + i * 7 + 2];
303 IRStmt** stmt3 = &irsb->stmts[selfcheck_idx + i * 7 + 3];
304 IRStmt** stmt4 = &irsb->stmts[selfcheck_idx + i * 7 + 4];
305 IRStmt** stmt5 = &irsb->stmts[selfcheck_idx + i * 7 + 5];
306 IRStmt** stmt6 = &irsb->stmts[selfcheck_idx + i * 7 + 6];
307 vassert((*stmt0)->tag == Ist_NoOp);
308 vassert((*stmt1)->tag == Ist_NoOp);
309 vassert((*stmt2)->tag == Ist_NoOp);
310 vassert((*stmt3)->tag == Ist_NoOp);
311 vassert((*stmt4)->tag == Ist_NoOp);
312 vassert((*stmt5)->tag == Ist_NoOp);
313 vassert((*stmt6)->tag == Ist_NoOp);
315 *stmt0 = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(base2check_IRConst) );
316 *stmt1 = IRStmt_WrTmp(tilen_tmp, IRExpr_Const(len2check_IRConst) );
317 *stmt2 = IRStmt_Put( offB_GUEST_CMSTART, IRExpr_RdTmp(tistart_tmp) );
318 *stmt3 = IRStmt_Put( offB_GUEST_CMLEN, IRExpr_RdTmp(tilen_tmp) );
320 /* Generate the entry point descriptors */
321 if (abiinfo_both->host_ppc_calls_use_fndescrs) {
322 HWord* descr = (HWord*)fn_generic;
323 fn_generic_entry = descr[0];
324 if (fn_spec) {
325 descr = (HWord*)fn_spec;
326 fn_spec_entry = descr[0];
327 } else {
328 fn_spec_entry = (HWord)NULL;
330 } else {
331 fn_generic_entry = (HWord)fn_generic;
332 if (fn_spec) {
333 fn_spec_entry = (HWord)fn_spec;
334 } else {
335 fn_spec_entry = (HWord)NULL;
339 /* Generate the call to the relevant function, and the comparison of
340 the result against the expected value. */
341 IRExpr* callexpr = NULL;
342 if (fn_spec) {
343 callexpr = mkIRExprCCall(
344 host_word_type, 1/*regparms*/,
345 nm_spec, (void*)fn_spec_entry,
346 mkIRExprVec_1(
347 mkIRExpr_HWord( (HWord)first_hW )
350 } else {
351 callexpr = mkIRExprCCall(
352 host_word_type, 2/*regparms*/,
353 nm_generic, (void*)fn_generic_entry,
354 mkIRExprVec_2(
355 mkIRExpr_HWord( (HWord)first_hW ),
356 mkIRExpr_HWord( (HWord)hWs_to_check )
361 callresult_tmp = newIRTemp(irsb->tyenv, host_word_type);
362 *stmt4 = IRStmt_WrTmp(callresult_tmp, callexpr);
364 exitguard_tmp = newIRTemp(irsb->tyenv, Ity_I1);
365 *stmt5 = IRStmt_WrTmp(
366 exitguard_tmp,
367 IRExpr_Binop(
368 host_word_type==Ity_I64 ? Iop_CmpNE64 : Iop_CmpNE32,
369 IRExpr_RdTmp(callresult_tmp),
370 host_word_type==Ity_I64
371 ? IRExpr_Const(IRConst_U64(expectedhW))
372 : IRExpr_Const(IRConst_U32(expectedhW))));
374 *stmt6 = IRStmt_Exit(
375 IRExpr_RdTmp(exitguard_tmp),
376 Ijk_InvalICache,
377 /* Where we must restart if there's a failure: at the
378 first extent, regardless of which extent the failure
379 actually happened in. */
380 guest_IP_sbstart_IRConst,
381 offB_GUEST_IP
383 } /* for (i = 0; i < vge->n_used; i++) */
385 for (Int i = vge->n_used;
386 i < sizeof(vge->base) / sizeof(vge->base[0]); i++) {
387 IRStmt* stmt0 = irsb->stmts[selfcheck_idx + i * 7 + 0];
388 IRStmt* stmt1 = irsb->stmts[selfcheck_idx + i * 7 + 1];
389 IRStmt* stmt2 = irsb->stmts[selfcheck_idx + i * 7 + 2];
390 IRStmt* stmt3 = irsb->stmts[selfcheck_idx + i * 7 + 3];
391 IRStmt* stmt4 = irsb->stmts[selfcheck_idx + i * 7 + 4];
392 IRStmt* stmt5 = irsb->stmts[selfcheck_idx + i * 7 + 5];
393 IRStmt* stmt6 = irsb->stmts[selfcheck_idx + i * 7 + 6];
394 vassert(stmt0->tag == Ist_NoOp);
395 vassert(stmt1->tag == Ist_NoOp);
396 vassert(stmt2->tag == Ist_NoOp);
397 vassert(stmt3->tag == Ist_NoOp);
398 vassert(stmt4->tag == Ist_NoOp);
399 vassert(stmt5->tag == Ist_NoOp);
400 vassert(stmt6->tag == Ist_NoOp);
406 /*--------------------------------------------------------------*/
407 /*--- To do with guarding (conditionalisation) of IRStmts ---*/
408 /*--------------------------------------------------------------*/
410 // Is it possible to guard |e|? Meaning, is it safe (exception-free) to compute
411 // |e| and ignore the result? Since |e| is by definition otherwise
412 // side-effect-free, we don't have to ask about any other effects caused by
413 // first computing |e| and then ignoring the result.
414 static Bool expr_is_guardable ( const IRExpr* e )
416 switch (e->tag) {
417 case Iex_Load:
418 return False;
419 case Iex_Unop:
420 return !primopMightTrap(e->Iex.Unop.op);
421 case Iex_Binop:
422 return !primopMightTrap(e->Iex.Binop.op);
423 case Iex_ITE:
424 case Iex_CCall:
425 case Iex_Get:
426 return True;
427 default:
428 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
429 vpanic("expr_is_guardable: unhandled expr");
433 // Is it possible to guard |st|? Meaning, is it possible to replace |st| by
434 // some other sequence of IRStmts which have the same effect on the architected
435 // state when the guard is true, but when it is false, have no effect on the
436 // architected state and are guaranteed not to cause any exceptions?
438 // Note that this isn't as aggressive as it could be: it sometimes returns False
439 // in cases where |st| is actually guardable. This routine must coordinate
440 // closely with add_guarded_stmt_to_end_of below, in the sense that that routine
441 // must be able to handle any |st| for which this routine returns True.
442 static Bool stmt_is_guardable ( const IRStmt* st )
444 switch (st->tag) {
445 case Ist_IMark:
446 case Ist_Put:
447 return True;
448 case Ist_Store: // definitely not
449 case Ist_CAS: // definitely not
450 case Ist_Exit: // We could in fact spec this, if required
451 return False;
452 case Ist_WrTmp:
453 return expr_is_guardable(st->Ist.WrTmp.data);
454 default:
455 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
456 vpanic("stmt_is_guardable: unhandled stmt");
460 // Are all stmts (but not the end dst value) in |bb| guardable, per
461 // stmt_is_guardable?
462 static Bool block_is_guardable ( const IRSB* bb )
464 Int i = bb->stmts_used;
465 vassert(i >= 2); // Must have at least: IMark, side Exit (at the end)
466 i--;
467 vassert(bb->stmts[i]->tag == Ist_Exit);
468 i--;
469 for (; i >= 0; i--) {
470 if (!stmt_is_guardable(bb->stmts[i]))
471 return False;
473 return True;
476 // Guard |st| with |guard| and add it to |bb|. This must be able to handle any
477 // |st| for which stmt_is_guardable returns True.
478 static void add_guarded_stmt_to_end_of ( /*MOD*/IRSB* bb,
479 /*IN*/ IRStmt* st, IRTemp guard )
481 switch (st->tag) {
482 case Ist_IMark:
483 case Ist_WrTmp:
484 addStmtToIRSB(bb, st);
485 break;
486 case Ist_Put: {
487 // Put(offs, e) ==> Put(offs, ITE(guard, e, Get(offs, sizeof(e))))
488 // Which when flattened out is:
489 // t1 = Get(offs, sizeof(e))
490 // t2 = ITE(guard, e, t2)
491 // Put(offs, t2)
492 Int offset = st->Ist.Put.offset;
493 IRExpr* e = st->Ist.Put.data;
494 IRType ty = typeOfIRExpr(bb->tyenv, e);
495 IRTemp t1 = newIRTemp(bb->tyenv, ty);
496 IRTemp t2 = newIRTemp(bb->tyenv, ty);
497 addStmtToIRSB(bb, IRStmt_WrTmp(t1, IRExpr_Get(offset, ty)));
498 addStmtToIRSB(bb, IRStmt_WrTmp(t2, IRExpr_ITE(IRExpr_RdTmp(guard),
499 e, IRExpr_RdTmp(t1))));
500 addStmtToIRSB(bb, IRStmt_Put(offset, IRExpr_RdTmp(t2)));
501 break;
503 case Ist_Exit: {
504 // Exit(xguard, dst, jk, offsIP)
505 // ==> t1 = And1(xguard, guard)
506 // Exit(t1, dst, jk, offsIP)
507 IRExpr* xguard = st->Ist.Exit.guard;
508 IRTemp t1 = newIRTemp(bb->tyenv, Ity_I1);
509 addStmtToIRSB(bb, IRStmt_WrTmp(t1, IRExpr_Binop(Iop_And1, xguard,
510 IRExpr_RdTmp(guard))));
511 addStmtToIRSB(bb, IRStmt_Exit(IRExpr_RdTmp(t1), st->Ist.Exit.jk,
512 st->Ist.Exit.dst, st->Ist.Exit.offsIP));
513 break;
515 default:
516 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
517 vpanic("add_guarded_stmt_to_end_of: unhandled stmt");
522 /*--------------------------------------------------------------*/
523 /*--- Analysis of block ends ---*/
524 /*--------------------------------------------------------------*/
526 typedef
527 enum {
528 Be_Other=1, // Block end isn't of interest to us
529 Be_Uncond, // Unconditional branch to known destination, unassisted
530 Be_Cond // Conditional branch to known destinations, unassisted
532 BlockEndTag;
534 typedef
535 struct {
536 BlockEndTag tag;
537 union {
538 struct {
539 } Other;
540 struct {
541 Long delta;
542 } Uncond;
543 struct {
544 IRTemp condSX;
545 Long deltaSX;
546 Long deltaFT;
547 } Cond;
548 } Be;
550 BlockEnd;
552 static void ppBlockEnd ( const BlockEnd* be )
554 switch (be->tag) {
555 case Be_Other:
556 vex_printf("Other");
557 break;
558 case Be_Uncond:
559 vex_printf("Uncond{delta=%lld}", be->Be.Uncond.delta);
560 break;
561 case Be_Cond:
562 vex_printf("Cond{condSX=");
563 ppIRTemp(be->Be.Cond.condSX);
564 vex_printf(", deltaSX=%lld, deltaFT=%lld}",
565 be->Be.Cond.deltaSX, be->Be.Cond.deltaFT);
566 break;
567 default:
568 vassert(0);
572 // Return True if |be| definitely does not jump to |delta|. In case of
573 // doubt, returns False.
574 static Bool definitely_does_not_jump_to_delta ( const BlockEnd* be, Long delta )
576 switch (be->tag) {
577 case Be_Other:
578 return False;
579 case Be_Uncond:
580 return be->Be.Uncond.delta != delta;
581 case Be_Cond:
582 return be->Be.Cond.deltaSX != delta && be->Be.Cond.deltaFT != delta;
583 default:
584 vassert(0);
588 static Addr irconst_to_Addr ( const IRConst* con, const IRType guest_word_type )
590 switch (con->tag) {
591 case Ico_U32:
592 vassert(guest_word_type == Ity_I32);
593 return con->Ico.U32;
594 case Ico_U64:
595 vassert(guest_word_type == Ity_I64);
596 return con->Ico.U64;
597 default:
598 vassert(0);
602 static Bool irconst_to_maybe_delta ( /*OUT*/Long* delta,
603 const IRConst* known_dst,
604 const Addr guest_IP_sbstart,
605 const IRType guest_word_type,
606 Bool (*chase_into_ok)(void*,Addr),
607 void* callback_opaque )
609 vassert(typeOfIRConst(known_dst) == guest_word_type);
611 *delta = 0;
613 // Extract the destination guest address.
614 Addr dst_ga = irconst_to_Addr(known_dst, guest_word_type);
616 // Check we're allowed to chase into it.
617 if (!chase_into_ok(callback_opaque, dst_ga))
618 return False;
620 Addr delta_as_Addr = dst_ga - guest_IP_sbstart;
621 // Either |delta_as_Addr| is a 64-bit value, in which case copy it directly
622 // to |delta|, or it's a 32 bit value, in which case sign extend it.
623 *delta = sizeof(Addr) == 8 ? (Long)delta_as_Addr : (Long)(Int)delta_as_Addr;
624 return True;
627 static Bool any_overlap ( Int start1, Int len1, Int start2, Int len2 )
629 vassert(len1 > 0 && len2 > 0);
630 vassert(start1 >= 0 && start2 >= 0);
631 if (start1 + len1 <= start2) return False;
632 if (start2 + len2 <= start1) return False;
633 return True;
636 /* Scan |stmts|, starting at |scan_start| and working backwards, to detect the
637 case where there are no IRStmt_Exits before we find the IMark. In other
638 words, it scans backwards through some prefix of an instruction's IR to see
639 if there is an exit there.
641 It also checks for explicit PUTs to the PC, via Ist_Put, Ist_PutI or
642 Ist_Dirty. I suspect this is ridiculous overkill, but is here for safety. */
643 static Bool insn_has_no_other_exits_or_PUTs_to_PC (
644 IRStmt** const stmts, Int scan_start,
645 Int offB_GUEST_IP, Int szB_GUEST_IP,
646 const IRTypeEnv* tyenv
649 Bool found_exit = False;
650 Bool found_PUT_to_PC = False;
651 Int i = scan_start;
652 while (True) {
653 if (i < 0)
654 break;
655 const IRStmt* st = stmts[i];
656 if (st->tag == Ist_IMark) {
657 // We're back at the start of the insn. Stop searching.
658 break;
660 if (st->tag == Ist_Exit) {
661 found_exit = True;
662 break;
664 if (st->tag == Ist_Put) {
665 Int offB = st->Ist.Put.offset;
666 Int szB = sizeofIRType(typeOfIRExpr(tyenv, st->Ist.Put.data));
667 if (any_overlap(offB, szB, offB_GUEST_IP, szB_GUEST_IP)) {
668 found_PUT_to_PC = True;
669 break;
672 if (st->tag == Ist_PutI) {
673 const IRPutI* details = st->Ist.PutI.details;
674 const IRRegArray* descr = details->descr;
675 Int offB = descr->base;
676 Int szB = descr->nElems * sizeofIRType(descr->elemTy);
677 if (any_overlap(offB, szB, offB_GUEST_IP, szB_GUEST_IP)) {
678 found_PUT_to_PC = True;
679 break;
682 if (st->tag == Ist_Dirty) {
683 vassert(!found_PUT_to_PC);
684 const IRDirty* details = st->Ist.Dirty.details;
685 for (Int j = 0; j < details->nFxState; j++) {
686 const IREffect fx = details->fxState[j].fx;
687 const Int offset = details->fxState[j].offset;
688 const Int size = details->fxState[j].size;
689 const Int nRepeats = details->fxState[j].nRepeats;
690 const Int repeatLen = details->fxState[j].repeatLen;
691 if (fx == Ifx_Write || fx == Ifx_Modify) {
692 for (Int k = 0; k < nRepeats; k++) {
693 Int offB = offset + k * repeatLen;
694 Int szB = size;
695 if (any_overlap(offB, szB, offB_GUEST_IP, szB_GUEST_IP)) {
696 found_PUT_to_PC = True;
701 if (found_PUT_to_PC) {
702 break;
705 i--;
707 // We expect IR for all instructions to start with an IMark.
708 vassert(i >= 0);
709 return !found_exit && !found_PUT_to_PC;
712 static void analyse_block_end ( /*OUT*/BlockEnd* be, const IRSB* irsb,
713 const Addr guest_IP_sbstart,
714 const IRType guest_word_type,
715 Bool (*chase_into_ok)(void*,Addr),
716 void* callback_opaque,
717 Int offB_GUEST_IP,
718 Int szB_GUEST_IP,
719 Bool debug_print )
721 vex_bzero(be, sizeof(*be));
723 // -- Conditional branch to known destination
724 /* In short, detect the following end form:
725 ------ IMark(0x4002009, 2, 0) ------
726 // Zero or more non-exit statements
727 if (t14) { PUT(184) = 0x4002040:I64; exit-Boring }
728 PUT(184) = 0x400200B:I64; exit-Boring
729 Checks:
730 - Both transfers are 'boring'
731 - Both dsts are constants
732 - The cond is non-constant (an IRExpr_Tmp)
733 - There are no other exits in this instruction
734 - The client allows chasing into both destinations
736 if (irsb->jumpkind == Ijk_Boring && irsb->stmts_used >= 2) {
737 const IRStmt* maybe_exit = irsb->stmts[irsb->stmts_used - 1];
738 if (maybe_exit->tag == Ist_Exit
739 && maybe_exit->Ist.Exit.guard->tag == Iex_RdTmp
740 && maybe_exit->Ist.Exit.jk == Ijk_Boring
741 && irsb->next->tag == Iex_Const
742 && insn_has_no_other_exits_or_PUTs_to_PC(
743 irsb->stmts, irsb->stmts_used - 2,
744 offB_GUEST_IP, szB_GUEST_IP, irsb->tyenv)) {
745 vassert(maybe_exit->Ist.Exit.offsIP == irsb->offsIP);
746 IRConst* dst_SX = maybe_exit->Ist.Exit.dst;
747 IRConst* dst_FT = irsb->next->Iex.Const.con;
748 IRTemp cond_SX = maybe_exit->Ist.Exit.guard->Iex.RdTmp.tmp;
749 Long delta_SX = 0;
750 Long delta_FT = 0;
751 Bool ok_SX
752 = irconst_to_maybe_delta(&delta_SX, dst_SX,
753 guest_IP_sbstart, guest_word_type,
754 chase_into_ok, callback_opaque);
755 Bool ok_FT
756 = irconst_to_maybe_delta(&delta_FT, dst_FT,
757 guest_IP_sbstart, guest_word_type,
758 chase_into_ok, callback_opaque);
759 if (ok_SX && ok_FT) {
760 be->tag = Be_Cond;
761 be->Be.Cond.condSX = cond_SX;
762 be->Be.Cond.deltaSX = delta_SX;
763 be->Be.Cond.deltaFT = delta_FT;
764 goto out;
769 // -- Unconditional branch/call to known destination
770 /* Four checks:
771 - The transfer is 'boring' or 'call', so that no assistance is needed
772 - The dst is a constant (known at jit time)
773 - There are no other exits in this instruction. In other words, the
774 transfer is unconditional.
775 - The client allows chasing into the destination.
777 if ((irsb->jumpkind == Ijk_Boring || irsb->jumpkind == Ijk_Call)
778 && irsb->next->tag == Iex_Const) {
779 if (insn_has_no_other_exits_or_PUTs_to_PC(
780 irsb->stmts, irsb->stmts_used - 1,
781 offB_GUEST_IP, szB_GUEST_IP, irsb->tyenv)) {
782 // We've got the right pattern. Check whether we can chase into the
783 // destination, and if so convert that to a delta value.
784 const IRConst* known_dst = irsb->next->Iex.Const.con;
785 Long delta = 0;
786 // This call also checks the type of the dst addr, and that the client
787 // allows chasing into it.
788 Bool ok = irconst_to_maybe_delta(&delta, known_dst,
789 guest_IP_sbstart, guest_word_type,
790 chase_into_ok, callback_opaque);
791 if (ok) {
792 be->tag = Be_Uncond;
793 be->Be.Uncond.delta = delta;
794 goto out;
799 // Not identified as anything of interest to us.
800 be->tag = Be_Other;
802 out:
803 if (debug_print) {
804 vex_printf("\nBlockEnd: ");
805 ppBlockEnd(be);
806 vex_printf("\n");
811 /*--------------------------------------------------------------*/
812 /*--- Disassembly of basic (not super) blocks ---*/
813 /*--------------------------------------------------------------*/
815 /* Disassemble instructions, starting at |&guest_code[delta_IN]|, into |irbb|,
816 and terminate the block properly. At most |n_instrs_allowed_IN| may be
817 disassembled, and this function may choose to disassemble fewer.
819 Also do minimal simplifications on the resulting block, so as to convert the
820 end of the block into something that |analyse_block_end| can reliably
821 recognise.
823 |irbb| will both be modified, and replaced by a new, simplified version,
824 which is returned.
826 static IRSB* disassemble_basic_block_till_stop(
827 /*OUT*/ Int* n_instrs, // #instrs actually used
828 /*OUT*/ Bool* is_verbose_seen, // did we get a 'verbose' hint?
829 /*OUT*/ Addr* extent_base, // VexGuestExtents[..].base
830 /*OUT*/ UShort* extent_len, // VexGuestExtents[..].len
831 /*MOD*/ IRSB* irbb,
832 const Long delta_IN,
833 const Int n_instrs_allowed_IN,
834 const Addr guest_IP_sbstart,
835 const VexEndness host_endness,
836 const Bool sigill_diag,
837 const VexArch arch_guest,
838 const VexArchInfo* archinfo_guest,
839 const VexAbiInfo* abiinfo_both,
840 const IRType guest_word_type,
841 const Bool debug_print,
842 const DisOneInstrFn dis_instr_fn,
843 const UChar* guest_code,
844 const Int offB_GUEST_IP
847 /* This is the max instrs we allow in the block. It starts off at
848 |n_instrs_allowed_IN| but we may choose to reduce it in the case where the
849 instruction disassembler returns an 'is verbose' hint. This is so as to
850 ensure that the JIT doesn't run out of space. See bug 375839 for a
851 motivating example. */
853 /* Process instructions. */
854 Long delta = delta_IN;
855 Int n_instrs_allowed = n_instrs_allowed_IN;
857 *n_instrs = 0;
858 *is_verbose_seen = False;
859 *extent_base = guest_IP_sbstart + delta;
860 *extent_len = 0;
862 while (True) {
863 vassert(*n_instrs < n_instrs_allowed);
865 /* This is the IP of the instruction we're just about to deal
866 with. */
867 Addr guest_IP_curr_instr = guest_IP_sbstart + delta;
869 /* This is the irbb statement array index of the first stmt in
870 this insn. That will always be the instruction-mark
871 descriptor. */
872 Int first_stmt_idx = irbb->stmts_used;
874 /* Add an instruction-mark statement. We won't know until after
875 disassembling the instruction how long it instruction is, so
876 just put in a zero length and we'll fix it up later.
878 On ARM, the least significant bit of the instr address
879 distinguishes ARM vs Thumb instructions. All instructions
880 actually start on at least 2-aligned addresses. So we need
881 to ignore the bottom bit of the insn address when forming the
882 IMark's address field, but put that bottom bit in the delta
883 field, so that comparisons against guest_R15T for Thumb can
884 be done correctly. By inspecting the delta field,
885 instruction processors can determine whether the instruction
886 was originally Thumb or ARM. For more details of this
887 convention, see comments on definition of guest_R15T in
888 libvex_guest_arm.h. */
889 if (arch_guest == VexArchARM && (guest_IP_curr_instr & 1)) {
890 /* Thumb insn => mask out the T bit, but put it in delta */
891 addStmtToIRSB( irbb,
892 IRStmt_IMark(guest_IP_curr_instr & ~(Addr)1,
893 0, /* len */
894 1 /* delta */
897 } else {
898 /* All other targets: store IP as-is, and set delta to zero. */
899 addStmtToIRSB( irbb,
900 IRStmt_IMark(guest_IP_curr_instr,
901 0, /* len */
902 0 /* delta */
907 if (debug_print && *n_instrs > 0)
908 vex_printf("\n");
910 /* Finally, actually disassemble an instruction. */
911 vassert(irbb->next == NULL);
912 DisResult dres
913 = dis_instr_fn ( irbb, guest_code, delta, guest_IP_curr_instr,
914 arch_guest, archinfo_guest, abiinfo_both,
915 host_endness, sigill_diag );
917 /* stay sane ... */
918 vassert(dres.whatNext == Dis_StopHere || dres.whatNext == Dis_Continue);
919 /* ... disassembled insn length is sane ... */
920 vassert(dres.len >= 0 && dres.len <= 24);
922 /* If the disassembly function passed us a hint, take note of it. */
923 if (LIKELY(dres.hint == Dis_HintNone)) {
924 /* Do nothing */
925 } else {
926 vassert(dres.hint == Dis_HintVerbose);
927 /* The current insn is known to be verbose. Lower the max insns limit
928 if necessary so as to avoid running the JIT out of space in the
929 event that we've encountered the start of a long sequence of them.
930 This is expected to be a very rare event. In any case the remaining
931 limit (in the default setting, 30 insns) is still so high that most
932 blocks will terminate anyway before then. So this is very unlikely
933 to give a perf hit in practice. See bug 375839 for the motivating
934 example. */
935 if (!(*is_verbose_seen)) {
936 *is_verbose_seen = True;
937 // Halve the number of allowed insns, but only above 2
938 if (n_instrs_allowed > 2) {
939 n_instrs_allowed = ((n_instrs_allowed - 2) / 2) + 2;
940 //vassert(*n_instrs <= n_instrs_allowed);
945 /* Fill in the insn-mark length field. */
946 vassert(first_stmt_idx >= 0 && first_stmt_idx < irbb->stmts_used);
947 IRStmt* imark = irbb->stmts[first_stmt_idx];
948 vassert(imark);
949 vassert(imark->tag == Ist_IMark);
950 vassert(imark->Ist.IMark.len == 0);
951 imark->Ist.IMark.len = dres.len;
953 /* Print the resulting IR, if needed. */
954 if (vex_traceflags & VEX_TRACE_FE) {
955 for (Int i = first_stmt_idx; i < irbb->stmts_used; i++) {
956 vex_printf(" ");
957 ppIRStmt(irbb->stmts[i]);
958 vex_printf("\n");
962 /* Individual insn disassembly may not mess with irbb->next.
963 This function is the only place where it can be set. */
964 vassert(irbb->next == NULL);
965 vassert(irbb->jumpkind == Ijk_Boring);
966 vassert(irbb->offsIP == 0);
968 /* Individual insn disassembly must finish the IR for each
969 instruction with an assignment to the guest PC. */
970 vassert(first_stmt_idx < irbb->stmts_used);
971 /* it follows that irbb->stmts_used must be > 0 */
972 { IRStmt* st = irbb->stmts[irbb->stmts_used-1];
973 vassert(st);
974 vassert(st->tag == Ist_Put);
975 vassert(st->Ist.Put.offset == offB_GUEST_IP);
976 /* Really we should also check that the type of the Put'd data
977 == guest_word_type, but that's a bit expensive. */
980 /* Update the extents entry that we are constructing. */
981 /* If vex_control.guest_max_insns is required to be < 100 and
982 each insn is at max 20 bytes long, this limit of 5000 then
983 seems reasonable since the max possible extent length will be
984 100 * 20 == 2000. */
985 vassert(*extent_len < 5000);
986 (*extent_len) += dres.len;
987 (*n_instrs)++;
989 /* Advance delta (inconspicuous but very important :-) */
990 delta += (Long)dres.len;
992 Bool stopNow = False;
993 switch (dres.whatNext) {
994 case Dis_Continue:
995 vassert(dres.jk_StopHere == Ijk_INVALID);
996 if (*n_instrs >= n_instrs_allowed) {
997 /* We have to stop. See comment above re irbb field
998 settings here. */
999 irbb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
1000 /* irbb->jumpkind must already by Ijk_Boring */
1001 irbb->offsIP = offB_GUEST_IP;
1002 stopNow = True;
1004 break;
1005 case Dis_StopHere:
1006 vassert(dres.jk_StopHere != Ijk_INVALID);
1007 /* See comment above re irbb field settings here. */
1008 irbb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
1009 irbb->jumpkind = dres.jk_StopHere;
1010 irbb->offsIP = offB_GUEST_IP;
1011 stopNow = True;
1012 break;
1013 default:
1014 vpanic("bb_to_IR");
1017 if (stopNow)
1018 break;
1019 } /* while (True) */
1021 /* irbb->next must now be set, since we've finished the block.
1022 Print it if necessary.*/
1023 vassert(irbb->next != NULL);
1024 if (debug_print) {
1025 vex_printf(" ");
1026 vex_printf( "PUT(%d) = ", irbb->offsIP);
1027 ppIRExpr( irbb->next );
1028 vex_printf( "; exit-");
1029 ppIRJumpKind(irbb->jumpkind);
1030 vex_printf( "\n");
1031 vex_printf( "\n");
1034 /* And clean it up. */
1035 irbb = do_minimal_initial_iropt_BB ( irbb );
1036 if (debug_print) {
1037 ppIRSB(irbb);
1040 return irbb;
1044 /*--------------------------------------------------------------*/
1045 /*--- Disassembly of traces: helper functions ---*/
1046 /*--------------------------------------------------------------*/
1048 // Swap the side exit and fall through exit for |bb|. Update |be| so as to be
1049 // consistent.
1050 static void swap_sx_and_ft ( /*MOD*/IRSB* bb, /*MOD*/BlockEnd* be )
1052 vassert(be->tag == Be_Cond);
1053 vassert(bb->stmts_used >= 2); // Must have at least: IMark, Exit
1054 IRStmt* exit = bb->stmts[bb->stmts_used - 1];
1055 vassert(exit->tag == Ist_Exit);
1056 vassert(exit->Ist.Exit.guard->tag == Iex_RdTmp);
1057 vassert(exit->Ist.Exit.guard->Iex.RdTmp.tmp == be->Be.Cond.condSX);
1058 vassert(bb->next->tag == Iex_Const);
1059 vassert(bb->jumpkind == Ijk_Boring);
1060 // We need to insert a new stmt, just before the exit, that computes 'Not1'
1061 // of the guard condition. Replace |bb->stmts[bb->stmts_used - 1]| by the
1062 // new stmt, and then place |exit| immediately after it.
1063 IRTemp invertedGuard = newIRTemp(bb->tyenv, Ity_I1);
1064 bb->stmts[bb->stmts_used - 1]
1065 = IRStmt_WrTmp(invertedGuard,
1066 IRExpr_Unop(Iop_Not1, IRExpr_RdTmp(exit->Ist.Exit.guard
1067 ->Iex.RdTmp.tmp)));
1068 exit->Ist.Exit.guard->Iex.RdTmp.tmp = invertedGuard;
1069 addStmtToIRSB(bb, exit);
1071 // Swap the actual destination constants.
1072 { IRConst* tmp = exit->Ist.Exit.dst;
1073 exit->Ist.Exit.dst = bb->next->Iex.Const.con;
1074 bb->next->Iex.Const.con = tmp;
1077 // And update |be|.
1078 { be->Be.Cond.condSX = invertedGuard;
1079 Long tmp = be->Be.Cond.deltaSX;
1080 be->Be.Cond.deltaSX = be->Be.Cond.deltaFT;
1081 be->Be.Cond.deltaFT = tmp;
1086 static void update_instr_budget( /*MOD*/Int* instrs_avail,
1087 /*MOD*/Bool* verbose_mode,
1088 const Int bb_instrs_used,
1089 const Bool bb_verbose_seen )
1091 if (0)
1092 vex_printf("UIB: verbose_mode %d, instrs_avail %d, "
1093 "bb_instrs_used %d, bb_verbose_seen %d\n",
1094 *verbose_mode ? 1 : 0, *instrs_avail,
1095 bb_instrs_used, bb_verbose_seen ? 1 : 0);
1097 vassert(bb_instrs_used <= *instrs_avail);
1099 if (bb_verbose_seen && !(*verbose_mode)) {
1100 *verbose_mode = True;
1101 // Adjust *instrs_avail so that, when it becomes zero, we haven't used
1102 // more than 50% of vex_control.guest_max_instrs.
1103 if (bb_instrs_used > vex_control.guest_max_insns / 2) {
1104 *instrs_avail = 0;
1105 } else {
1106 *instrs_avail = vex_control.guest_max_insns / 2;
1108 vassert(*instrs_avail >= 0);
1111 // Subtract bb_instrs_used from *instrs_avail, clamping at 0 if necessary.
1112 if (bb_instrs_used > *instrs_avail) {
1113 *instrs_avail = 0;
1114 } else {
1115 *instrs_avail -= bb_instrs_used;
1118 vassert(*instrs_avail >= 0);
1121 // Add the extent [base, +len) to |vge|. Asserts if |vge| is already full.
1122 // As an optimisation only, tries to also merge the new extent with the
1123 // previous one, if possible.
1124 static void add_extent ( /*MOD*/VexGuestExtents* vge, Addr base, UShort len )
1126 const UInt limit = sizeof(vge->base) / sizeof(vge->base[0]);
1127 vassert(limit == 3);
1128 const UInt i = vge->n_used;
1129 vassert(i < limit);
1130 vge->n_used++;
1131 vge->base[i] = base;
1132 vge->len[i] = len;
1133 // Try to merge with the previous extent
1134 if (i > 0
1135 && (((UInt)vge->len[i-1]) + ((UInt)len))
1136 < 200*25 /* say, 200 insns of size 25 bytes, absolute worst case */
1137 && vge->base[i-1] + vge->len[i-1] == base) {
1138 vge->len[i-1] += len;
1139 vge->n_used--;
1140 //vex_printf("MERGE\n");
1145 /*--------------------------------------------------------------*/
1146 /*--- Disassembly of traces: main function ---*/
1147 /*--------------------------------------------------------------*/
1149 /* Disassemble a complete basic block, starting at guest_IP_start,
1150 returning a new IRSB. The disassembler may chase across basic
1151 block boundaries if it wishes and if chase_into_ok allows it.
1152 The precise guest address ranges from which code has been taken
1153 are written into vge. guest_IP_sbstart is taken to be the IP in
1154 the guest's address space corresponding to the instruction at
1155 &guest_code[0].
1157 dis_instr_fn is the arch-specific fn to disassemble on function; it
1158 is this that does the real work.
1160 needs_self_check is a callback used to ask the caller which of the
1161 extents, if any, a self check is required for. The returned value
1162 is a bitmask with a 1 in position i indicating that the i'th extent
1163 needs a check. Since there can be at most 3 extents, the returned
1164 values must be between 0 and 7.
1166 The number of extents which did get a self check (0 to 3) is put in
1167 n_sc_extents. The caller already knows this because it told us
1168 which extents to add checks for, via the needs_self_check callback,
1169 but we ship the number back out here for the caller's convenience.
1171 preamble_function is a callback which allows the caller to add
1172 its own IR preamble (following the self-check, if any). May be
1173 NULL. If non-NULL, the IRSB under construction is handed to
1174 this function, which presumably adds IR statements to it. The
1175 callback may optionally complete the block and direct bb_to_IR
1176 not to disassemble any instructions into it; this is indicated
1177 by the callback returning True.
1179 offB_CMADDR and offB_CMLEN are the offsets of guest_CMADDR and
1180 guest_CMLEN. Since this routine has to work for any guest state,
1181 without knowing what it is, those offsets have to passed in.
1183 callback_opaque is a caller-supplied pointer to data which the
1184 callbacks may want to see. Vex has no idea what it is.
1185 (In fact it's a VgInstrumentClosure.)
1188 /* Regarding IP updating. dis_instr_fn (that does the guest specific
1189 work of disassembling an individual instruction) must finish the
1190 resulting IR with "PUT(guest_IP) = ". Hence in all cases it must
1191 state the next instruction address.
1193 If the block is to be ended at that point, then this routine
1194 (bb_to_IR) will set up the next/jumpkind/offsIP fields so as to
1195 make a transfer (of the right kind) to "GET(guest_IP)". Hence if
1196 dis_instr_fn generates incorrect IP updates we will see it
1197 immediately (due to jumping to the wrong next guest address).
1199 However it is also necessary to set this up so it can be optimised
1200 nicely. The IRSB exit is defined to update the guest IP, so that
1201 chaining works -- since the chain_me stubs expect the chain-to
1202 address to be in the guest state. Hence what the IRSB next fields
1203 will contain initially is (implicitly)
1205 PUT(guest_IP) [implicitly] = GET(guest_IP) [explicit expr on ::next]
1207 which looks pretty strange at first. Eg so unconditional branch
1208 to some address 0x123456 looks like this:
1210 PUT(guest_IP) = 0x123456; // dis_instr_fn generates this
1211 // the exit
1212 PUT(guest_IP) [implicitly] = GET(guest_IP); exit-Boring
1214 after redundant-GET and -PUT removal by iropt, we get what we want:
1216 // the exit
1217 PUT(guest_IP) [implicitly] = 0x123456; exit-Boring
1219 This makes the IRSB-end case the same as the side-exit case: update
1220 IP, then transfer. There is no redundancy of representation for
1221 the destination, and we use the destination specified by
1222 dis_instr_fn, so any errors it makes show up sooner.
1224 IRSB* bb_to_IR (
1225 /*OUT*/VexGuestExtents* vge,
1226 /*OUT*/UInt* n_sc_extents,
1227 /*OUT*/UInt* n_guest_instrs, /* stats only */
1228 /*OUT*/UShort* n_uncond_in_trace, /* stats only */
1229 /*OUT*/UShort* n_cond_in_trace, /* stats only */
1230 /*MOD*/VexRegisterUpdates* pxControl,
1231 /*IN*/ void* callback_opaque,
1232 /*IN*/ DisOneInstrFn dis_instr_fn,
1233 /*IN*/ const UChar* guest_code,
1234 /*IN*/ Addr guest_IP_sbstart,
1235 /*IN*/ Bool (*chase_into_ok)(void*,Addr),
1236 /*IN*/ VexEndness host_endness,
1237 /*IN*/ Bool sigill_diag,
1238 /*IN*/ VexArch arch_guest,
1239 /*IN*/ const VexArchInfo* archinfo_guest,
1240 /*IN*/ const VexAbiInfo* abiinfo_both,
1241 /*IN*/ IRType guest_word_type,
1242 /*IN*/ UInt (*needs_self_check)
1243 (void*, /*MB_MOD*/VexRegisterUpdates*,
1244 const VexGuestExtents*),
1245 /*IN*/ Bool (*preamble_function)(void*,IRSB*),
1246 /*IN*/ Int offB_GUEST_CMSTART,
1247 /*IN*/ Int offB_GUEST_CMLEN,
1248 /*IN*/ Int offB_GUEST_IP,
1249 /*IN*/ Int szB_GUEST_IP
1252 Bool debug_print = toBool(vex_traceflags & VEX_TRACE_FE);
1254 /* check sanity .. */
1255 vassert(sizeof(HWord) == sizeof(void*));
1256 vassert(vex_control.guest_max_insns >= 1);
1257 vassert(vex_control.guest_max_insns <= 100);
1258 vassert(vex_control.guest_chase == False || vex_control.guest_chase == True);
1259 vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64);
1261 if (guest_word_type == Ity_I32) {
1262 vassert(szB_GUEST_IP == 4);
1263 vassert((offB_GUEST_IP % 4) == 0);
1264 } else {
1265 vassert(szB_GUEST_IP == 8);
1266 vassert((offB_GUEST_IP % 8) == 0);
1269 /* Initialise all return-by-ref state. */
1270 vge->n_used = 0;
1271 *n_sc_extents = 0;
1272 *n_guest_instrs = 0;
1273 *n_uncond_in_trace = 0;
1274 *n_cond_in_trace = 0;
1276 /* And a new IR superblock to dump the result into. */
1277 IRSB* irsb = emptyIRSB();
1279 /* Leave 21 spaces in which to put the check statements for a self
1280 checking translation (up to 3 extents, and 7 stmts required for
1281 each). We won't know until later the extents and checksums of
1282 the areas, if any, that need to be checked. */
1283 IRStmt* nop = IRStmt_NoOp();
1284 Int selfcheck_idx = irsb->stmts_used;
1285 for (Int i = 0; i < 3 * 7; i++)
1286 addStmtToIRSB( irsb, nop );
1288 /* If the caller supplied a function to add its own preamble, use
1289 it now. */
1290 if (preamble_function) {
1291 Bool stopNow = preamble_function( callback_opaque, irsb );
1292 if (stopNow) {
1293 /* The callback has completed the IR block without any guest
1294 insns being disassembled into it, so just return it at
1295 this point, even if a self-check was requested - as there
1296 is nothing to self-check. The 21 self-check no-ops will
1297 still be in place, but they are harmless. */
1298 vge->n_used = 1;
1299 vge->base[0] = guest_IP_sbstart;
1300 vge->len[0] = 0;
1301 return irsb;
1305 /* Running state:
1306 irsb the SB we are incrementally constructing
1307 vge associated extents for irsb
1308 instrs_used instrs incorporated in irsb so far
1309 instrs_avail number of instrs we have space for
1310 verbose_mode did we see an 'is verbose' hint at some point?
1312 Int instrs_used = 0;
1313 Int instrs_avail = vex_control.guest_max_insns;
1314 Bool verbose_mode = False;
1316 /* Disassemble the initial block until we have to stop. */
1318 Int ib_instrs_used = 0;
1319 Bool ib_verbose_seen = False;
1320 Addr ib_base = 0;
1321 UShort ib_len = 0;
1322 irsb = disassemble_basic_block_till_stop(
1323 /*OUT*/ &ib_instrs_used, &ib_verbose_seen, &ib_base, &ib_len,
1324 /*MOD*/ irsb,
1325 /*IN*/ 0/*delta for the first block in the trace*/,
1326 instrs_avail, guest_IP_sbstart, host_endness, sigill_diag,
1327 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1328 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1330 vassert(ib_instrs_used <= instrs_avail);
1332 // Update instrs_used, extents, budget.
1333 instrs_used += ib_instrs_used;
1334 add_extent(vge, ib_base, ib_len);
1335 update_instr_budget(&instrs_avail, &verbose_mode,
1336 ib_instrs_used, ib_verbose_seen);
1339 /* Now, see if we can extend the initial block. */
1340 while (True) {
1341 const Int n_extent_slots = sizeof(vge->base) / sizeof(vge->base[0]);
1342 vassert(n_extent_slots == 3);
1344 // Reasons to give up immediately:
1345 // User or tool asked us not to chase
1346 if (!vex_control.guest_chase)
1347 break;
1349 // Out of extent slots
1350 vassert(vge->n_used <= n_extent_slots);
1351 if (vge->n_used == n_extent_slots)
1352 break;
1354 // Almost out of available instructions
1355 vassert(instrs_avail >= 0);
1356 if (instrs_avail < 3)
1357 break;
1359 // Try for an extend. What kind we do depends on how the current trace
1360 // ends.
1361 BlockEnd irsb_be;
1362 analyse_block_end(&irsb_be, irsb, guest_IP_sbstart, guest_word_type,
1363 chase_into_ok, callback_opaque,
1364 offB_GUEST_IP, szB_GUEST_IP, debug_print);
1366 // Try for an extend based on an unconditional branch or call to a known
1367 // destination.
1368 if (irsb_be.tag == Be_Uncond) {
1369 if (debug_print) {
1370 vex_printf("\n-+-+ Unconditional follow (ext# %d) to 0x%llx "
1371 "-+-+\n\n",
1372 (Int)vge->n_used,
1373 (ULong)((Long)guest_IP_sbstart+ irsb_be.Be.Uncond.delta));
1375 Int bb_instrs_used = 0;
1376 Bool bb_verbose_seen = False;
1377 Addr bb_base = 0;
1378 UShort bb_len = 0;
1379 IRSB* bb
1380 = disassemble_basic_block_till_stop(
1381 /*OUT*/ &bb_instrs_used, &bb_verbose_seen, &bb_base, &bb_len,
1382 /*MOD*/ emptyIRSB(),
1383 /*IN*/ irsb_be.Be.Uncond.delta,
1384 instrs_avail, guest_IP_sbstart, host_endness, sigill_diag,
1385 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1386 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1388 vassert(bb_instrs_used <= instrs_avail);
1390 /* Now we have to append 'bb' to 'irsb'. */
1391 concatenate_irsbs(irsb, bb);
1393 // Update instrs_used, extents, budget.
1394 instrs_used += bb_instrs_used;
1395 add_extent(vge, bb_base, bb_len);
1396 update_instr_budget(&instrs_avail, &verbose_mode,
1397 bb_instrs_used, bb_verbose_seen);
1398 *n_uncond_in_trace += 1;
1399 } // if (be.tag == Be_Uncond)
1401 // Try for an extend based on a conditional branch, specifically in the
1402 // hope of identifying and recovering, an "A && B" condition spread across
1403 // two basic blocks.
1404 if (irsb_be.tag == Be_Cond) {
1405 if (debug_print) {
1406 vex_printf("\n-+-+ (ext# %d) Considering cbranch to"
1407 " SX=0x%llx FT=0x%llx -+-+\n\n",
1408 (Int)vge->n_used,
1409 (ULong)((Long)guest_IP_sbstart+ irsb_be.Be.Cond.deltaSX),
1410 (ULong)((Long)guest_IP_sbstart+ irsb_be.Be.Cond.deltaFT));
1412 const Int instrs_avail_spec = 3;
1414 if (debug_print) {
1415 vex_printf("-+-+ SPEC side exit -+-+\n\n");
1417 Int sx_instrs_used = 0;
1418 Bool sx_verbose_seen = False;
1419 Addr sx_base = 0;
1420 UShort sx_len = 0;
1421 IRSB* sx_bb
1422 = disassemble_basic_block_till_stop(
1423 /*OUT*/ &sx_instrs_used, &sx_verbose_seen, &sx_base, &sx_len,
1424 /*MOD*/ emptyIRSB(),
1425 /*IN*/ irsb_be.Be.Cond.deltaSX,
1426 instrs_avail_spec, guest_IP_sbstart, host_endness, sigill_diag,
1427 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1428 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1430 vassert(sx_instrs_used <= instrs_avail_spec);
1431 BlockEnd sx_be;
1432 analyse_block_end(&sx_be, sx_bb, guest_IP_sbstart, guest_word_type,
1433 chase_into_ok, callback_opaque,
1434 offB_GUEST_IP, szB_GUEST_IP, debug_print);
1436 if (debug_print) {
1437 vex_printf("\n-+-+ SPEC fall through -+-+\n\n");
1439 Int ft_instrs_used = 0;
1440 Bool ft_verbose_seen = False;
1441 Addr ft_base = 0;
1442 UShort ft_len = 0;
1443 IRSB* ft_bb
1444 = disassemble_basic_block_till_stop(
1445 /*OUT*/ &ft_instrs_used, &ft_verbose_seen, &ft_base, &ft_len,
1446 /*MOD*/ emptyIRSB(),
1447 /*IN*/ irsb_be.Be.Cond.deltaFT,
1448 instrs_avail_spec, guest_IP_sbstart, host_endness, sigill_diag,
1449 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1450 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1452 vassert(ft_instrs_used <= instrs_avail_spec);
1453 BlockEnd ft_be;
1454 analyse_block_end(&ft_be, ft_bb, guest_IP_sbstart, guest_word_type,
1455 chase_into_ok, callback_opaque,
1456 offB_GUEST_IP, szB_GUEST_IP, debug_print);
1458 /* In order for the transformation to be remotely valid, we need:
1459 - At least one of the sx_bb or ft_bb to be have a Be_Cond end.
1460 - sx_bb and ft_bb definitely don't form a loop.
1462 Bool ok = sx_be.tag == Be_Cond || ft_be.tag == Be_Cond;
1463 if (ok) {
1464 ok = definitely_does_not_jump_to_delta(&sx_be,
1465 irsb_be.Be.Cond.deltaFT)
1466 || definitely_does_not_jump_to_delta(&ft_be,
1467 irsb_be.Be.Cond.deltaSX);
1470 // Check for other mutancy:
1471 // irsb ft == sx, or the same for ft itself or sx itself
1472 if (ok) {
1473 if (irsb_be.Be.Cond.deltaSX == irsb_be.Be.Cond.deltaFT
1474 || (sx_be.tag == Be_Cond
1475 && sx_be.Be.Cond.deltaSX == sx_be.Be.Cond.deltaFT)
1476 || (ft_be.tag == Be_Cond
1477 && ft_be.Be.Cond.deltaSX == ft_be.Be.Cond.deltaFT)) {
1478 ok = False;
1482 /* Now let's see if any of our four cases actually holds (viz, is this
1483 really an && idiom? */
1484 UInt idiom = 4;
1485 if (ok) {
1486 vassert(irsb_be.tag == Be_Cond);
1487 UInt iom1 = 4/*invalid*/;
1488 if (sx_be.tag == Be_Cond) {
1489 /**/ if (sx_be.Be.Cond.deltaFT == irsb_be.Be.Cond.deltaFT)
1490 iom1 = 0;
1491 else if (sx_be.Be.Cond.deltaSX == irsb_be.Be.Cond.deltaFT)
1492 iom1 = 1;
1494 UInt iom2 = 4/*invalid*/;
1495 if (ft_be.tag == Be_Cond) {
1496 /**/ if (ft_be.Be.Cond.deltaFT == irsb_be.Be.Cond.deltaSX)
1497 iom2 = 2;
1498 else if (ft_be.Be.Cond.deltaSX == irsb_be.Be.Cond.deltaSX)
1499 iom2 = 3;
1502 /* We should only have identified at most one of the four idioms. */
1503 vassert(iom1 == 4 || iom2 == 4);
1504 idiom = (iom1 < 4) ? iom1 : (iom2 < 4 ? iom2 : 4);
1505 if (idiom == 4) {
1506 ok = False;
1507 if (debug_print) {
1508 vex_printf("\n-+-+ &&-idiom not recognised, "
1509 "giving up. -+-+\n\n");
1514 if (ok) {
1515 vassert(idiom < 4);
1516 // "Normalise" the data so as to ensure we only have one of the four
1517 // idioms to transform.
1518 if (idiom == 2 || idiom == 3) {
1519 swap_sx_and_ft(irsb, &irsb_be);
1520 # define SWAP(_ty, _aa, _bb) \
1521 do { _ty _tmp = _aa; _aa = _bb; _bb = _tmp; } while (0)
1522 SWAP(Int, sx_instrs_used, ft_instrs_used);
1523 SWAP(Bool, sx_verbose_seen, ft_verbose_seen);
1524 SWAP(Addr, sx_base, ft_base);
1525 SWAP(UShort, sx_len, ft_len);
1526 SWAP(IRSB*, sx_bb, ft_bb);
1527 SWAP(BlockEnd, sx_be, ft_be);
1528 # undef SWAP
1530 if (idiom == 1 || idiom == 3) {
1531 swap_sx_and_ft(sx_bb, &sx_be);
1533 vassert(sx_be.tag == Be_Cond);
1534 vassert(sx_be.Be.Cond.deltaFT == irsb_be.Be.Cond.deltaFT);
1536 if (debug_print) {
1537 vex_printf("\n-+-+ After normalisation (idiom=%u) -+-+\n", idiom);
1538 vex_printf("\n-+-+ IRSB -+-+\n");
1539 ppIRSB(irsb);
1540 ppBlockEnd(&irsb_be);
1541 vex_printf("\n\n-+-+ SX -+-+\n");
1542 ppIRSB(sx_bb);
1543 ppBlockEnd(&sx_be);
1544 vex_printf("\n");
1546 // Finally, check the sx block actually is guardable.
1547 ok = block_is_guardable(sx_bb);
1548 if (!ok && debug_print) {
1549 vex_printf("\n-+-+ SX not guardable, giving up. -+-+\n\n");
1553 if (ok) {
1554 if (0 || debug_print) {
1555 vex_printf("\n-+-+ DOING &&-TRANSFORM -+-+\n");
1557 // Finally really actually do the transformation.
1558 // 0. remove the last Exit on irsb.
1559 // 1. Add irsb->tyenv->types_used to all the tmps in sx_bb,
1560 // by calling deltaIRStmt on all stmts.
1561 // 2. Guard all stmts in sx_bb on irsb_be.Be.Cond.condSX,
1562 // **including** the last stmt (which must be an Exit). It's
1563 // here that the And1 is generated.
1564 // 3. Copy all guarded stmts to the end of irsb.
1565 vassert(irsb->stmts_used >= 2);
1566 irsb->stmts_used--;
1567 Int delta = irsb->tyenv->types_used;
1569 // Append sx_bb's tyenv to irsb's
1570 for (Int i = 0; i < sx_bb->tyenv->types_used; i++) {
1571 (void)newIRTemp(irsb->tyenv, sx_bb->tyenv->types[i]);
1574 for (Int i = 0; i < sx_bb->stmts_used; i++) {
1575 IRStmt* st = deepCopyIRStmt(sx_bb->stmts[i]);
1576 deltaIRStmt(st, delta);
1577 add_guarded_stmt_to_end_of(irsb, st, irsb_be.Be.Cond.condSX);
1580 if (debug_print) {
1581 vex_printf("\n-+-+ FINAL RESULT -+-+\n\n");
1582 ppIRSB(irsb);
1583 vex_printf("\n");
1586 // Update instrs_used, extents, budget.
1587 instrs_used += sx_instrs_used;
1588 add_extent(vge, sx_base, sx_len);
1589 update_instr_budget(&instrs_avail, &verbose_mode,
1590 sx_instrs_used, sx_verbose_seen);
1591 *n_cond_in_trace += 1;
1593 break;
1594 } // if (be.tag == Be_Cond)
1596 // We don't know any other way to extend the block. Give up.
1597 else {
1598 break;
1601 } // while (True)
1603 /* We're almost done. The only thing that might need attending to is that
1604 a self-checking preamble may need to be created. If so it gets placed
1605 in the 21 slots reserved above. */
1606 create_self_checks_as_needed(
1607 irsb, n_sc_extents, pxControl, callback_opaque, needs_self_check,
1608 vge, abiinfo_both, guest_word_type, selfcheck_idx, offB_GUEST_CMSTART,
1609 offB_GUEST_CMLEN, offB_GUEST_IP, guest_IP_sbstart
1612 *n_guest_instrs = instrs_used;
1613 return irsb;
1617 /*--------------------------------------------------------------*/
1618 /*--- Functions called by self-checking transations ---*/
1619 /*--------------------------------------------------------------*/
1621 /* All of these are CLEAN HELPERs */
1622 /* All of these are CALLED FROM GENERATED CODE */
1624 /* Compute a checksum of host memory at [addr .. addr+len-1], as fast
1625 as possible. All _4al versions assume that the supplied address is
1626 4 aligned. All length values are in 4-byte chunks. These fns
1627 arecalled once for every use of a self-checking translation, so
1628 they needs to be as fast as possible. */
1630 /* --- 32-bit versions, used only on 32-bit hosts --- */
1632 static inline UInt ROL32 ( UInt w, Int n ) {
1633 w = (w << n) | (w >> (32-n));
1634 return w;
1637 VEX_REGPARM(2)
1638 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s )
1640 UInt sum1 = 0, sum2 = 0;
1641 UInt* p = (UInt*)first_w32;
1642 /* unrolled */
1643 while (n_w32s >= 4) {
1644 UInt w;
1645 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1646 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1647 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1648 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1649 p += 4;
1650 n_w32s -= 4;
1651 sum1 ^= sum2;
1653 while (n_w32s >= 1) {
1654 UInt w;
1655 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1656 p += 1;
1657 n_w32s -= 1;
1658 sum1 ^= sum2;
1660 return sum1 + sum2;
1663 /* Specialised versions of the above function */
1665 VEX_REGPARM(1)
1666 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 )
1668 UInt sum1 = 0, sum2 = 0;
1669 UInt* p = (UInt*)first_w32;
1670 UInt w;
1671 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1672 sum1 ^= sum2;
1673 return sum1 + sum2;
1676 VEX_REGPARM(1)
1677 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 )
1679 UInt sum1 = 0, sum2 = 0;
1680 UInt* p = (UInt*)first_w32;
1681 UInt w;
1682 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1683 sum1 ^= sum2;
1684 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1685 sum1 ^= sum2;
1686 return sum1 + sum2;
1689 VEX_REGPARM(1)
1690 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 )
1692 UInt sum1 = 0, sum2 = 0;
1693 UInt* p = (UInt*)first_w32;
1694 UInt w;
1695 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1696 sum1 ^= sum2;
1697 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1698 sum1 ^= sum2;
1699 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1700 sum1 ^= sum2;
1701 return sum1 + sum2;
1704 VEX_REGPARM(1)
1705 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 )
1707 UInt sum1 = 0, sum2 = 0;
1708 UInt* p = (UInt*)first_w32;
1709 UInt w;
1710 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1711 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1712 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1713 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1714 sum1 ^= sum2;
1715 return sum1 + sum2;
1718 VEX_REGPARM(1)
1719 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 )
1721 UInt sum1 = 0, sum2 = 0;
1722 UInt* p = (UInt*)first_w32;
1723 UInt w;
1724 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1725 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1726 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1727 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1728 sum1 ^= sum2;
1729 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1730 sum1 ^= sum2;
1731 return sum1 + sum2;
1734 VEX_REGPARM(1)
1735 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 )
1737 UInt sum1 = 0, sum2 = 0;
1738 UInt* p = (UInt*)first_w32;
1739 UInt w;
1740 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1741 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1742 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1743 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1744 sum1 ^= sum2;
1745 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1746 sum1 ^= sum2;
1747 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1748 sum1 ^= sum2;
1749 return sum1 + sum2;
1752 VEX_REGPARM(1)
1753 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 )
1755 UInt sum1 = 0, sum2 = 0;
1756 UInt* p = (UInt*)first_w32;
1757 UInt w;
1758 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1759 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1760 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1761 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1762 sum1 ^= sum2;
1763 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1764 sum1 ^= sum2;
1765 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1766 sum1 ^= sum2;
1767 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1768 sum1 ^= sum2;
1769 return sum1 + sum2;
1772 VEX_REGPARM(1)
1773 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 )
1775 UInt sum1 = 0, sum2 = 0;
1776 UInt* p = (UInt*)first_w32;
1777 UInt w;
1778 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1779 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1780 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1781 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1782 sum1 ^= sum2;
1783 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1784 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1785 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1786 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1787 sum1 ^= sum2;
1788 return sum1 + sum2;
1791 VEX_REGPARM(1)
1792 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 )
1794 UInt sum1 = 0, sum2 = 0;
1795 UInt* p = (UInt*)first_w32;
1796 UInt w;
1797 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1798 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1799 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1800 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1801 sum1 ^= sum2;
1802 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1803 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1804 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1805 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1806 sum1 ^= sum2;
1807 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1808 sum1 ^= sum2;
1809 return sum1 + sum2;
1812 VEX_REGPARM(1)
1813 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 )
1815 UInt sum1 = 0, sum2 = 0;
1816 UInt* p = (UInt*)first_w32;
1817 UInt w;
1818 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1819 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1820 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1821 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1822 sum1 ^= sum2;
1823 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1824 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1825 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1826 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1827 sum1 ^= sum2;
1828 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1829 sum1 ^= sum2;
1830 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1831 sum1 ^= sum2;
1832 return sum1 + sum2;
1835 VEX_REGPARM(1)
1836 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 )
1838 UInt sum1 = 0, sum2 = 0;
1839 UInt* p = (UInt*)first_w32;
1840 UInt w;
1841 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1842 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1843 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1844 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1845 sum1 ^= sum2;
1846 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1847 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1848 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1849 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1850 sum1 ^= sum2;
1851 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1852 sum1 ^= sum2;
1853 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1854 sum1 ^= sum2;
1855 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1856 sum1 ^= sum2;
1857 return sum1 + sum2;
1860 VEX_REGPARM(1)
1861 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 )
1863 UInt sum1 = 0, sum2 = 0;
1864 UInt* p = (UInt*)first_w32;
1865 UInt w;
1866 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1867 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1868 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1869 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1870 sum1 ^= sum2;
1871 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1872 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1873 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1874 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1875 sum1 ^= sum2;
1876 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1877 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1878 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1879 w = p[11]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1880 sum1 ^= sum2;
1881 return sum1 + sum2;
1885 /* --- 64-bit versions, used only on 64-bit hosts --- */
1887 static inline ULong ROL64 ( ULong w, Int n ) {
1888 w = (w << n) | (w >> (64-n));
1889 return w;
1892 VEX_REGPARM(2)
1893 static ULong genericg_compute_checksum_8al ( HWord first_w64, HWord n_w64s )
1895 ULong sum1 = 0, sum2 = 0;
1896 ULong* p = (ULong*)first_w64;
1897 /* unrolled */
1898 while (n_w64s >= 4) {
1899 ULong w;
1900 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1901 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1902 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1903 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1904 p += 4;
1905 n_w64s -= 4;
1906 sum1 ^= sum2;
1908 while (n_w64s >= 1) {
1909 ULong w;
1910 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1911 p += 1;
1912 n_w64s -= 1;
1913 sum1 ^= sum2;
1915 return sum1 + sum2;
1918 /* Specialised versions of the above function */
1920 VEX_REGPARM(1)
1921 static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 )
1923 ULong sum1 = 0, sum2 = 0;
1924 ULong* p = (ULong*)first_w64;
1925 ULong w;
1926 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1927 sum1 ^= sum2;
1928 return sum1 + sum2;
1931 VEX_REGPARM(1)
1932 static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 )
1934 ULong sum1 = 0, sum2 = 0;
1935 ULong* p = (ULong*)first_w64;
1936 ULong w;
1937 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1938 sum1 ^= sum2;
1939 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1940 sum1 ^= sum2;
1941 return sum1 + sum2;
1944 VEX_REGPARM(1)
1945 static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 )
1947 ULong sum1 = 0, sum2 = 0;
1948 ULong* p = (ULong*)first_w64;
1949 ULong w;
1950 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1951 sum1 ^= sum2;
1952 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1953 sum1 ^= sum2;
1954 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1955 sum1 ^= sum2;
1956 return sum1 + sum2;
1959 VEX_REGPARM(1)
1960 static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 )
1962 ULong sum1 = 0, sum2 = 0;
1963 ULong* p = (ULong*)first_w64;
1964 ULong w;
1965 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1966 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1967 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1968 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1969 sum1 ^= sum2;
1970 return sum1 + sum2;
1973 VEX_REGPARM(1)
1974 static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 )
1976 ULong sum1 = 0, sum2 = 0;
1977 ULong* p = (ULong*)first_w64;
1978 ULong w;
1979 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1980 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1981 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1982 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1983 sum1 ^= sum2;
1984 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1985 sum1 ^= sum2;
1986 return sum1 + sum2;
1989 VEX_REGPARM(1)
1990 static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 )
1992 ULong sum1 = 0, sum2 = 0;
1993 ULong* p = (ULong*)first_w64;
1994 ULong w;
1995 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1996 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1997 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1998 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1999 sum1 ^= sum2;
2000 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2001 sum1 ^= sum2;
2002 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2003 sum1 ^= sum2;
2004 return sum1 + sum2;
2007 VEX_REGPARM(1)
2008 static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 )
2010 ULong sum1 = 0, sum2 = 0;
2011 ULong* p = (ULong*)first_w64;
2012 ULong w;
2013 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2014 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2015 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2016 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2017 sum1 ^= sum2;
2018 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2019 sum1 ^= sum2;
2020 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2021 sum1 ^= sum2;
2022 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2023 sum1 ^= sum2;
2024 return sum1 + sum2;
2027 VEX_REGPARM(1)
2028 static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 )
2030 ULong sum1 = 0, sum2 = 0;
2031 ULong* p = (ULong*)first_w64;
2032 ULong w;
2033 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2034 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2035 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2036 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2037 sum1 ^= sum2;
2038 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2039 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2040 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2041 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2042 sum1 ^= sum2;
2043 return sum1 + sum2;
2046 VEX_REGPARM(1)
2047 static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 )
2049 ULong sum1 = 0, sum2 = 0;
2050 ULong* p = (ULong*)first_w64;
2051 ULong w;
2052 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2053 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2054 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2055 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2056 sum1 ^= sum2;
2057 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2058 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2059 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2060 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2061 sum1 ^= sum2;
2062 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2063 sum1 ^= sum2;
2064 return sum1 + sum2;
2067 VEX_REGPARM(1)
2068 static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 )
2070 ULong sum1 = 0, sum2 = 0;
2071 ULong* p = (ULong*)first_w64;
2072 ULong w;
2073 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2074 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2075 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2076 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2077 sum1 ^= sum2;
2078 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2079 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2080 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2081 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2082 sum1 ^= sum2;
2083 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2084 sum1 ^= sum2;
2085 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2086 sum1 ^= sum2;
2087 return sum1 + sum2;
2090 VEX_REGPARM(1)
2091 static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 )
2093 ULong sum1 = 0, sum2 = 0;
2094 ULong* p = (ULong*)first_w64;
2095 ULong w;
2096 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2097 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2098 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2099 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2100 sum1 ^= sum2;
2101 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2102 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2103 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2104 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2105 sum1 ^= sum2;
2106 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2107 sum1 ^= sum2;
2108 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2109 sum1 ^= sum2;
2110 w = p[10]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2111 sum1 ^= sum2;
2112 return sum1 + sum2;
2115 VEX_REGPARM(1)
2116 static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 )
2118 ULong sum1 = 0, sum2 = 0;
2119 ULong* p = (ULong*)first_w64;
2120 ULong w;
2121 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2122 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2123 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2124 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2125 sum1 ^= sum2;
2126 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2127 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2128 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2129 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2130 sum1 ^= sum2;
2131 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2132 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2133 w = p[10]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2134 w = p[11]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2135 sum1 ^= sum2;
2136 return sum1 + sum2;
2139 /*--------------------------------------------------------------------*/
2140 /*--- end guest_generic_bb_to_IR.c ---*/
2141 /*--------------------------------------------------------------------*/