Clean up machinery to do with conditionalising IRStmts:
[valgrind.git] / VEX / priv / guest_generic_bb_to_IR.c
blob7782bcf96f8b38373d954131666cfc9c988fa5b0
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_generic_bb_to_IR.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_ir.h"
36 #include "libvex.h"
37 #include "main_util.h"
38 #include "main_globals.h"
39 #include "guest_generic_bb_to_IR.h"
40 #include "ir_opt.h"
43 /*--------------------------------------------------------------*/
44 /*--- Forwards for fns called by self-checking translations ---*/
45 /*--------------------------------------------------------------*/
47 /* Forwards .. */
48 VEX_REGPARM(2) static UInt genericg_compute_checksum_4al ( HWord first_w32,
49 HWord n_w32s );
50 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 );
51 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 );
52 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 );
53 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 );
54 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 );
55 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 );
56 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 );
57 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 );
58 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 );
59 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 );
60 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 );
61 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 );
63 VEX_REGPARM(2) static ULong genericg_compute_checksum_8al ( HWord first_w64,
64 HWord n_w64s );
65 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 );
66 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 );
67 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 );
68 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 );
69 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 );
70 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 );
71 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 );
72 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 );
73 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 );
74 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 );
75 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 );
76 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 );
79 /*--------------------------------------------------------------*/
80 /*--- Creation of self-check IR ---*/
81 /*--------------------------------------------------------------*/
83 static void create_self_checks_as_needed(
84 /*MOD*/ IRSB* irsb,
85 /*OUT*/ UInt* n_sc_extents,
86 /*MOD*/ VexRegisterUpdates* pxControl,
87 /*MOD*/ void* callback_opaque,
88 /*IN*/ UInt (*needs_self_check)
89 (void*, /*MB_MOD*/VexRegisterUpdates*,
90 const VexGuestExtents*),
91 const VexGuestExtents* vge,
92 const VexAbiInfo* abiinfo_both,
93 const IRType guest_word_type,
94 const Int selfcheck_idx,
95 /*IN*/ Int offB_GUEST_CMSTART,
96 /*IN*/ Int offB_GUEST_CMLEN,
97 /*IN*/ Int offB_GUEST_IP,
98 const Addr guest_IP_sbstart
101 /* The scheme is to compute a rather crude checksum of the code
102 we're making a translation of, and add to the IR a call to a
103 helper routine which recomputes the checksum every time the
104 translation is run, and requests a retranslation if it doesn't
105 match. This is obviously very expensive and considerable
106 efforts are made to speed it up:
108 * the checksum is computed from all the naturally aligned
109 host-sized words that overlap the translated code. That means
110 it could depend on up to 7 bytes before and 7 bytes after
111 which aren't part of the translated area, and so if those
112 change then we'll unnecessarily have to discard and
113 retranslate. This seems like a pretty remote possibility and
114 it seems as if the benefit of not having to deal with the ends
115 of the range at byte precision far outweigh any possible extra
116 translations needed.
118 * there's a generic routine and 12 specialised cases, which
119 handle the cases of 1 through 12-word lengths respectively.
120 They seem to cover about 90% of the cases that occur in
121 practice.
123 We ask the caller, via needs_self_check, which of the 3 vge
124 extents needs a check, and only generate check code for those
125 that do.
128 Addr base2check;
129 UInt len2check;
130 HWord expectedhW;
131 IRTemp tistart_tmp, tilen_tmp;
132 HWord VEX_REGPARM(2) (*fn_generic)(HWord, HWord);
133 HWord VEX_REGPARM(1) (*fn_spec)(HWord);
134 const HChar* nm_generic;
135 const HChar* nm_spec;
136 HWord fn_generic_entry = 0;
137 HWord fn_spec_entry = 0;
138 UInt host_word_szB = sizeof(HWord);
139 IRType host_word_type = Ity_INVALID;
141 UInt extents_needing_check
142 = needs_self_check(callback_opaque, pxControl, vge);
144 if (host_word_szB == 4) host_word_type = Ity_I32;
145 if (host_word_szB == 8) host_word_type = Ity_I64;
146 vassert(host_word_type != Ity_INVALID);
148 vassert(vge->n_used >= 1 && vge->n_used <= 3);
150 /* Caller shouldn't claim that nonexistent extents need a
151 check. */
152 vassert((extents_needing_check >> vge->n_used) == 0);
154 /* Guest addresses as IRConsts. Used in self-checks to specify the
155 restart-after-discard point. */
156 IRConst* guest_IP_sbstart_IRConst
157 = guest_word_type==Ity_I32
158 ? IRConst_U32(toUInt(guest_IP_sbstart))
159 : IRConst_U64(guest_IP_sbstart);
161 const Int n_extent_slots = sizeof(vge->base) / sizeof(vge->base[0]);
162 vassert(n_extent_slots == 3);
164 vassert(selfcheck_idx + (n_extent_slots - 1) * 5 + 4 < irsb->stmts_used);
166 for (Int i = 0; i < vge->n_used; i++) {
167 /* Do we need to generate a check for this extent? */
168 if ((extents_needing_check & (1 << i)) == 0)
169 continue;
171 /* Tell the caller */
172 (*n_sc_extents)++;
174 /* the extent we're generating a check for */
175 base2check = vge->base[i];
176 len2check = vge->len[i];
178 /* stay sane */
179 vassert(len2check >= 0 && len2check < 2000/*arbitrary*/);
181 /* Skip the check if the translation involved zero bytes */
182 if (len2check == 0)
183 continue;
185 HWord first_hW = ((HWord)base2check)
186 & ~(HWord)(host_word_szB-1);
187 HWord last_hW = (((HWord)base2check) + len2check - 1)
188 & ~(HWord)(host_word_szB-1);
189 vassert(first_hW <= last_hW);
190 HWord hW_diff = last_hW - first_hW;
191 vassert(0 == (hW_diff & (host_word_szB-1)));
192 HWord hWs_to_check = (hW_diff + host_word_szB) / host_word_szB;
193 vassert(hWs_to_check > 0
194 && hWs_to_check < 2004/*arbitrary*/ / host_word_szB);
196 /* vex_printf("%lx %lx %ld\n", first_hW, last_hW, hWs_to_check); */
198 if (host_word_szB == 8) {
199 fn_generic = (VEX_REGPARM(2) HWord(*)(HWord, HWord))
200 genericg_compute_checksum_8al;
201 nm_generic = "genericg_compute_checksum_8al";
202 } else {
203 fn_generic = (VEX_REGPARM(2) HWord(*)(HWord, HWord))
204 genericg_compute_checksum_4al;
205 nm_generic = "genericg_compute_checksum_4al";
208 fn_spec = NULL;
209 nm_spec = NULL;
211 if (host_word_szB == 8) {
212 const HChar* nm = NULL;
213 ULong VEX_REGPARM(1) (*fn)(HWord) = NULL;
214 switch (hWs_to_check) {
215 case 1: fn = genericg_compute_checksum_8al_1;
216 nm = "genericg_compute_checksum_8al_1"; break;
217 case 2: fn = genericg_compute_checksum_8al_2;
218 nm = "genericg_compute_checksum_8al_2"; break;
219 case 3: fn = genericg_compute_checksum_8al_3;
220 nm = "genericg_compute_checksum_8al_3"; break;
221 case 4: fn = genericg_compute_checksum_8al_4;
222 nm = "genericg_compute_checksum_8al_4"; break;
223 case 5: fn = genericg_compute_checksum_8al_5;
224 nm = "genericg_compute_checksum_8al_5"; break;
225 case 6: fn = genericg_compute_checksum_8al_6;
226 nm = "genericg_compute_checksum_8al_6"; break;
227 case 7: fn = genericg_compute_checksum_8al_7;
228 nm = "genericg_compute_checksum_8al_7"; break;
229 case 8: fn = genericg_compute_checksum_8al_8;
230 nm = "genericg_compute_checksum_8al_8"; break;
231 case 9: fn = genericg_compute_checksum_8al_9;
232 nm = "genericg_compute_checksum_8al_9"; break;
233 case 10: fn = genericg_compute_checksum_8al_10;
234 nm = "genericg_compute_checksum_8al_10"; break;
235 case 11: fn = genericg_compute_checksum_8al_11;
236 nm = "genericg_compute_checksum_8al_11"; break;
237 case 12: fn = genericg_compute_checksum_8al_12;
238 nm = "genericg_compute_checksum_8al_12"; break;
239 default: break;
241 fn_spec = (VEX_REGPARM(1) HWord(*)(HWord)) fn;
242 nm_spec = nm;
243 } else {
244 const HChar* nm = NULL;
245 UInt VEX_REGPARM(1) (*fn)(HWord) = NULL;
246 switch (hWs_to_check) {
247 case 1: fn = genericg_compute_checksum_4al_1;
248 nm = "genericg_compute_checksum_4al_1"; break;
249 case 2: fn = genericg_compute_checksum_4al_2;
250 nm = "genericg_compute_checksum_4al_2"; break;
251 case 3: fn = genericg_compute_checksum_4al_3;
252 nm = "genericg_compute_checksum_4al_3"; break;
253 case 4: fn = genericg_compute_checksum_4al_4;
254 nm = "genericg_compute_checksum_4al_4"; break;
255 case 5: fn = genericg_compute_checksum_4al_5;
256 nm = "genericg_compute_checksum_4al_5"; break;
257 case 6: fn = genericg_compute_checksum_4al_6;
258 nm = "genericg_compute_checksum_4al_6"; break;
259 case 7: fn = genericg_compute_checksum_4al_7;
260 nm = "genericg_compute_checksum_4al_7"; break;
261 case 8: fn = genericg_compute_checksum_4al_8;
262 nm = "genericg_compute_checksum_4al_8"; break;
263 case 9: fn = genericg_compute_checksum_4al_9;
264 nm = "genericg_compute_checksum_4al_9"; break;
265 case 10: fn = genericg_compute_checksum_4al_10;
266 nm = "genericg_compute_checksum_4al_10"; break;
267 case 11: fn = genericg_compute_checksum_4al_11;
268 nm = "genericg_compute_checksum_4al_11"; break;
269 case 12: fn = genericg_compute_checksum_4al_12;
270 nm = "genericg_compute_checksum_4al_12"; break;
271 default: break;
273 fn_spec = (VEX_REGPARM(1) HWord(*)(HWord))fn;
274 nm_spec = nm;
277 expectedhW = fn_generic( first_hW, hWs_to_check );
278 /* If we got a specialised version, check it produces the same
279 result as the generic version! */
280 if (fn_spec) {
281 vassert(nm_spec);
282 vassert(expectedhW == fn_spec( first_hW ));
283 } else {
284 vassert(!nm_spec);
287 /* Set CMSTART and CMLEN. These will describe to the despatcher
288 the area of guest code to invalidate should we exit with a
289 self-check failure. */
290 tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type);
291 tilen_tmp = newIRTemp(irsb->tyenv, guest_word_type);
293 IRConst* base2check_IRConst
294 = guest_word_type==Ity_I32 ? IRConst_U32(toUInt(base2check))
295 : IRConst_U64(base2check);
296 IRConst* len2check_IRConst
297 = guest_word_type==Ity_I32 ? IRConst_U32(len2check)
298 : IRConst_U64(len2check);
300 IRStmt** stmt0 = &irsb->stmts[selfcheck_idx + i * 5 + 0];
301 IRStmt** stmt1 = &irsb->stmts[selfcheck_idx + i * 5 + 1];
302 IRStmt** stmt2 = &irsb->stmts[selfcheck_idx + i * 5 + 2];
303 IRStmt** stmt3 = &irsb->stmts[selfcheck_idx + i * 5 + 3];
304 IRStmt** stmt4 = &irsb->stmts[selfcheck_idx + i * 5 + 4];
305 vassert((*stmt0)->tag == Ist_NoOp);
306 vassert((*stmt1)->tag == Ist_NoOp);
307 vassert((*stmt2)->tag == Ist_NoOp);
308 vassert((*stmt3)->tag == Ist_NoOp);
309 vassert((*stmt4)->tag == Ist_NoOp);
311 *stmt0 = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(base2check_IRConst) );
312 *stmt1 = IRStmt_WrTmp(tilen_tmp, IRExpr_Const(len2check_IRConst) );
313 *stmt2 = IRStmt_Put( offB_GUEST_CMSTART, IRExpr_RdTmp(tistart_tmp) );
314 *stmt3 = IRStmt_Put( offB_GUEST_CMLEN, IRExpr_RdTmp(tilen_tmp) );
316 /* Generate the entry point descriptors */
317 if (abiinfo_both->host_ppc_calls_use_fndescrs) {
318 HWord* descr = (HWord*)fn_generic;
319 fn_generic_entry = descr[0];
320 if (fn_spec) {
321 descr = (HWord*)fn_spec;
322 fn_spec_entry = descr[0];
323 } else {
324 fn_spec_entry = (HWord)NULL;
326 } else {
327 fn_generic_entry = (HWord)fn_generic;
328 if (fn_spec) {
329 fn_spec_entry = (HWord)fn_spec;
330 } else {
331 fn_spec_entry = (HWord)NULL;
335 IRExpr* callexpr = NULL;
336 if (fn_spec) {
337 callexpr = mkIRExprCCall(
338 host_word_type, 1/*regparms*/,
339 nm_spec, (void*)fn_spec_entry,
340 mkIRExprVec_1(
341 mkIRExpr_HWord( (HWord)first_hW )
344 } else {
345 callexpr = mkIRExprCCall(
346 host_word_type, 2/*regparms*/,
347 nm_generic, (void*)fn_generic_entry,
348 mkIRExprVec_2(
349 mkIRExpr_HWord( (HWord)first_hW ),
350 mkIRExpr_HWord( (HWord)hWs_to_check )
355 *stmt4
356 = IRStmt_Exit(
357 IRExpr_Binop(
358 host_word_type==Ity_I64 ? Iop_CmpNE64 : Iop_CmpNE32,
359 callexpr,
360 host_word_type==Ity_I64
361 ? IRExpr_Const(IRConst_U64(expectedhW))
362 : IRExpr_Const(IRConst_U32(expectedhW))
364 Ijk_InvalICache,
365 /* Where we must restart if there's a failure: at the
366 first extent, regardless of which extent the
367 failure actually happened in. */
368 guest_IP_sbstart_IRConst,
369 offB_GUEST_IP
371 } /* for (i = 0; i < vge->n_used; i++) */
373 for (Int i = vge->n_used;
374 i < sizeof(vge->base) / sizeof(vge->base[0]); i++) {
375 IRStmt* stmt0 = irsb->stmts[selfcheck_idx + i * 5 + 0];
376 IRStmt* stmt1 = irsb->stmts[selfcheck_idx + i * 5 + 1];
377 IRStmt* stmt2 = irsb->stmts[selfcheck_idx + i * 5 + 2];
378 IRStmt* stmt3 = irsb->stmts[selfcheck_idx + i * 5 + 3];
379 IRStmt* stmt4 = irsb->stmts[selfcheck_idx + i * 5 + 4];
380 vassert(stmt0->tag == Ist_NoOp);
381 vassert(stmt1->tag == Ist_NoOp);
382 vassert(stmt2->tag == Ist_NoOp);
383 vassert(stmt3->tag == Ist_NoOp);
384 vassert(stmt4->tag == Ist_NoOp);
390 /*--------------------------------------------------------------*/
391 /*--- To do with guarding (conditionalisation) of IRStmts ---*/
392 /*--------------------------------------------------------------*/
394 // Is it possible to guard |e|? Meaning, is it safe (exception-free) to compute
395 // |e| and ignore the result? Since |e| is by definition otherwise
396 // side-effect-free, we don't have to ask about any other effects caused by
397 // first computing |e| and then ignoring the result.
398 static Bool expr_is_guardable ( const IRExpr* e )
400 switch (e->tag) {
401 case Iex_Load:
402 return False;
403 case Iex_Unop:
404 return !primopMightTrap(e->Iex.Unop.op);
405 case Iex_Binop:
406 return !primopMightTrap(e->Iex.Binop.op);
407 case Iex_ITE:
408 case Iex_CCall:
409 case Iex_Get:
410 return True;
411 default:
412 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
413 vpanic("expr_is_guardable: unhandled expr");
417 // Is it possible to guard |st|? Meaning, is it possible to replace |st| by
418 // some other sequence of IRStmts which have the same effect on the architected
419 // state when the guard is true, but when it is false, have no effect on the
420 // architected state and are guaranteed not to cause any exceptions?
422 // Note that this isn't as aggressive as it could be: it sometimes returns False
423 // in cases where |st| is actually guardable. This routine must coordinate
424 // closely with add_guarded_stmt_to_end_of below, in the sense that that routine
425 // must be able to handle any |st| for which this routine returns True.
426 static Bool stmt_is_guardable ( const IRStmt* st )
428 switch (st->tag) {
429 case Ist_IMark:
430 case Ist_Put:
431 return True;
432 case Ist_Store: // definitely not
433 case Ist_CAS: // definitely not
434 case Ist_Exit: // We could in fact spec this, if required
435 return False;
436 case Ist_WrTmp:
437 return expr_is_guardable(st->Ist.WrTmp.data);
438 default:
439 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
440 vpanic("stmt_is_guardable: unhandled stmt");
444 // Are all stmts (but not the end dst value) in |bb| guardable, per
445 // stmt_is_guardable?
446 static Bool block_is_guardable ( const IRSB* bb )
448 Int i = bb->stmts_used;
449 vassert(i >= 2); // Must have at least: IMark, side Exit (at the end)
450 i--;
451 vassert(bb->stmts[i]->tag == Ist_Exit);
452 i--;
453 for (; i >= 0; i--) {
454 if (!stmt_is_guardable(bb->stmts[i]))
455 return False;
457 return True;
460 // Guard |st| with |guard| and add it to |bb|. This must be able to handle any
461 // |st| for which stmt_is_guardable returns True.
462 static void add_guarded_stmt_to_end_of ( /*MOD*/IRSB* bb,
463 /*IN*/ IRStmt* st, IRTemp guard )
465 switch (st->tag) {
466 case Ist_IMark:
467 case Ist_WrTmp:
468 addStmtToIRSB(bb, st);
469 break;
470 case Ist_Put: {
471 // Put(offs, e) ==> Put(offs, ITE(guard, e, Get(offs, sizeof(e))))
472 // Which when flattened out is:
473 // t1 = Get(offs, sizeof(e))
474 // t2 = ITE(guard, e, t2)
475 // Put(offs, t2)
476 Int offset = st->Ist.Put.offset;
477 IRExpr* e = st->Ist.Put.data;
478 IRType ty = typeOfIRExpr(bb->tyenv, e);
479 IRTemp t1 = newIRTemp(bb->tyenv, ty);
480 IRTemp t2 = newIRTemp(bb->tyenv, ty);
481 addStmtToIRSB(bb, IRStmt_WrTmp(t1, IRExpr_Get(offset, ty)));
482 addStmtToIRSB(bb, IRStmt_WrTmp(t2, IRExpr_ITE(IRExpr_RdTmp(guard),
483 e, IRExpr_RdTmp(t1))));
484 addStmtToIRSB(bb, IRStmt_Put(offset, IRExpr_RdTmp(t2)));
485 break;
487 case Ist_Exit: {
488 // Exit(xguard, dst, jk, offsIP)
489 // ==> t1 = And1(xguard, guard)
490 // Exit(t1, dst, jk, offsIP)
491 IRExpr* xguard = st->Ist.Exit.guard;
492 IRTemp t1 = newIRTemp(bb->tyenv, Ity_I1);
493 addStmtToIRSB(bb, IRStmt_WrTmp(t1, IRExpr_Binop(Iop_And1, xguard,
494 IRExpr_RdTmp(guard))));
495 addStmtToIRSB(bb, IRStmt_Exit(IRExpr_RdTmp(t1), st->Ist.Exit.jk,
496 st->Ist.Exit.dst, st->Ist.Exit.offsIP));
497 break;
499 default:
500 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
501 vpanic("add_guarded_stmt_to_end_of: unhandled stmt");
506 /*--------------------------------------------------------------*/
507 /*--- Analysis of block ends ---*/
508 /*--------------------------------------------------------------*/
510 typedef
511 enum {
512 Be_Unknown=1, // Unknown end
513 Be_UnCond, // Unconditional branch to known destination, unassisted
514 Be_Cond // Conditional branch to known destinations, unassisted
516 BlockEndTag;
518 typedef
519 struct {
520 BlockEndTag tag;
521 union {
522 struct {
523 } Unknown;
524 struct {
525 Long delta;
526 } UnCond;
527 struct {
528 IRTemp condSX;
529 Long deltaSX;
530 Long deltaFT;
531 } Cond;
532 } Be;
534 BlockEnd;
536 static void ppBlockEnd ( const BlockEnd* be )
538 switch (be->tag) {
539 case Be_Unknown:
540 vex_printf("!!Unknown!!");
541 break;
542 case Be_UnCond:
543 vex_printf("UnCond{delta=%lld}", be->Be.UnCond.delta);
544 break;
545 case Be_Cond:
546 vex_printf("Cond{condSX=");
547 ppIRTemp(be->Be.Cond.condSX);
548 vex_printf(", deltaSX=%lld, deltaFT=%lld}",
549 be->Be.Cond.deltaSX, be->Be.Cond.deltaFT);
550 break;
551 default:
552 vassert(0);
556 // Return True if |be| definitely does not jump to |delta|. In case of
557 // doubt, returns False.
558 static Bool definitely_does_not_jump_to_delta ( const BlockEnd* be, Long delta )
560 switch (be->tag) {
561 case Be_Unknown: return False;
562 case Be_UnCond: return be->Be.UnCond.delta != delta;
563 case Be_Cond: return be->Be.Cond.deltaSX != delta
564 && be->Be.Cond.deltaFT != delta;
565 default: vassert(0);
569 static Bool irconst_to_maybe_delta ( /*OUT*/Long* delta,
570 const IRConst* known_dst,
571 const Addr guest_IP_sbstart,
572 const IRType guest_word_type,
573 Bool (*chase_into_ok)(void*,Addr),
574 void* callback_opaque )
576 vassert(typeOfIRConst(known_dst) == guest_word_type);
578 *delta = 0;
580 // Extract the destination guest address.
581 Addr dst_ga = 0;
582 switch (known_dst->tag) {
583 case Ico_U32:
584 vassert(guest_word_type == Ity_I32);
585 dst_ga = known_dst->Ico.U32;
586 break;
587 case Ico_U64:
588 vassert(guest_word_type == Ity_I64);
589 dst_ga = known_dst->Ico.U64;
590 break;
591 default:
592 vassert(0);
595 // Check we're allowed to chase into it.
596 if (!chase_into_ok(callback_opaque, dst_ga))
597 return False;
599 Addr delta_as_Addr = dst_ga - guest_IP_sbstart;
600 // Either |delta_as_Addr| is a 64-bit value, in which case copy it directly
601 // to |delta|, or it's a 32 bit value, in which case sign extend it.
602 *delta = sizeof(Addr) == 8 ? (Long)delta_as_Addr : (Long)(Int)delta_as_Addr;
603 return True;
606 /* Scan |stmts|, starting at |scan_start| and working backwards, to detect the
607 case where there are no IRStmt_Exits before we find the IMark. In other
608 words, it scans backwards through some prefix of an instruction's IR to see
609 if there is an exit there. */
610 static Bool insn_has_no_other_exits ( IRStmt** const stmts, Int scan_start )
612 Bool found_exit = False;
613 Int i = scan_start;
614 while (True) {
615 if (i < 0)
616 break;
617 const IRStmt* st = stmts[i];
618 if (st->tag == Ist_IMark)
619 break;
620 if (st->tag == Ist_Exit) {
621 found_exit = True;
622 break;
624 i--;
626 // We expect IR for all instructions to start with an IMark.
627 vassert(i >= 0);
628 return !found_exit;
631 // FIXME make this able to recognise all block ends
632 static void analyse_block_end ( /*OUT*/BlockEnd* be, const IRSB* irsb,
633 const Addr guest_IP_sbstart,
634 const IRType guest_word_type,
635 Bool (*chase_into_ok)(void*,Addr),
636 void* callback_opaque,
637 Bool debug_print )
639 vex_bzero(be, sizeof(*be));
641 // -- Conditional branch to known destination
642 /* In short, detect the following end form:
643 ------ IMark(0x4002009, 2, 0) ------
644 // Zero or more non-exit statements
645 if (t14) { PUT(184) = 0x4002040:I64; exit-Boring }
646 PUT(184) = 0x400200B:I64; exit-Boring
647 Checks:
648 - Both transfers are 'boring'
649 - Both dsts are constants
650 - The cond is non-constant (an IRExpr_Tmp)
651 - There are no other exits in this instruction
652 - The client allows chasing into both destinations
654 if (irsb->jumpkind == Ijk_Boring && irsb->stmts_used >= 2) {
655 const IRStmt* maybe_exit = irsb->stmts[irsb->stmts_used - 1];
656 if (maybe_exit->tag == Ist_Exit
657 && maybe_exit->Ist.Exit.guard->tag == Iex_RdTmp
658 && maybe_exit->Ist.Exit.jk == Ijk_Boring
659 && irsb->next->tag == Iex_Const
660 && insn_has_no_other_exits(irsb->stmts, irsb->stmts_used - 2)) {
661 vassert(maybe_exit->Ist.Exit.offsIP == irsb->offsIP);
662 IRConst* dst_SX = maybe_exit->Ist.Exit.dst;
663 IRConst* dst_FT = irsb->next->Iex.Const.con;
664 IRTemp cond_SX = maybe_exit->Ist.Exit.guard->Iex.RdTmp.tmp;
665 Long delta_SX = 0;
666 Long delta_FT = 0;
667 Bool ok_SX
668 = irconst_to_maybe_delta(&delta_SX, dst_SX,
669 guest_IP_sbstart, guest_word_type,
670 chase_into_ok, callback_opaque);
671 Bool ok_FT
672 = irconst_to_maybe_delta(&delta_FT, dst_FT,
673 guest_IP_sbstart, guest_word_type,
674 chase_into_ok, callback_opaque);
675 if (ok_SX && ok_FT) {
676 be->tag = Be_Cond;
677 be->Be.Cond.condSX = cond_SX;
678 be->Be.Cond.deltaSX = delta_SX;
679 be->Be.Cond.deltaFT = delta_FT;
680 goto out;
685 // -- Unconditional branch/call to known destination
686 /* Four checks:
687 - The transfer is 'boring' or 'call', so that no assistance is needed
688 - The dst is a constant (known at jit time)
689 - There are no other exits in this instruction. In other words, the
690 transfer is unconditional.
691 - The client allows chasing into the destination.
693 if ((irsb->jumpkind == Ijk_Boring || irsb->jumpkind == Ijk_Call)
694 && irsb->next->tag == Iex_Const) {
695 if (insn_has_no_other_exits(irsb->stmts, irsb->stmts_used - 1)) {
696 // We've got the right pattern. Check whether we can chase into the
697 // destination, and if so convert that to a delta value.
698 const IRConst* known_dst = irsb->next->Iex.Const.con;
699 Long delta = 0;
700 // This call also checks the type of the dst addr, and that the client
701 // allows chasing into it.
702 Bool ok = irconst_to_maybe_delta(&delta, known_dst,
703 guest_IP_sbstart, guest_word_type,
704 chase_into_ok, callback_opaque);
705 if (ok) {
706 be->tag = Be_UnCond;
707 be->Be.UnCond.delta = delta;
708 goto out;
713 be->tag = Be_Unknown;
714 // Not identified as anything in particular.
716 out:
717 if (debug_print) {
718 vex_printf("\nBlockEnd: ");
719 ppBlockEnd(be);
720 vex_printf("\n");
725 /*--------------------------------------------------------------*/
726 /*--- Disassembly of basic (not super) blocks ---*/
727 /*--------------------------------------------------------------*/
729 /* Disassemble instructions, starting at |&guest_code[delta_IN]|, into |irbb|,
730 and terminate the block properly. At most |n_instrs_allowed_IN| may be
731 disassembled, and this function may choose to disassemble fewer.
733 Also do minimal simplifications on the resulting block, so as to convert the
734 end of the block into something that |analyse_block_end| can reliably
735 recognise.
737 |irbb| will both be modified, and replaced by a new, simplified version,
738 which is returned.
740 static IRSB* disassemble_basic_block_till_stop(
741 /*OUT*/ Int* n_instrs, // #instrs actually used
742 /*OUT*/ Bool* is_verbose_seen, // did we get a 'verbose' hint?
743 /*OUT*/ Addr* extent_base, // VexGuestExtents[..].base
744 /*OUT*/ UShort* extent_len, // VexGuestExtents[..].len
745 /*MOD*/ IRSB* irbb,
746 const Long delta_IN,
747 const Int n_instrs_allowed_IN,
748 const Addr guest_IP_sbstart,
749 const VexEndness host_endness,
750 const Bool sigill_diag,
751 const VexArch arch_guest,
752 const VexArchInfo* archinfo_guest,
753 const VexAbiInfo* abiinfo_both,
754 const IRType guest_word_type,
755 const Bool debug_print,
756 const DisOneInstrFn dis_instr_fn,
757 const UChar* guest_code,
758 const Int offB_GUEST_IP
761 /* This is the max instrs we allow in the block. It starts off at
762 |n_instrs_allowed_IN| but we may choose to reduce it in the case where the
763 instruction disassembler returns an 'is verbose' hint. This is so as to
764 ensure that the JIT doesn't run out of space. See bug 375839 for a
765 motivating example. */
767 /* Process instructions. */
768 Long delta = delta_IN;
769 Int n_instrs_allowed = n_instrs_allowed_IN;
771 *n_instrs = 0;
772 *is_verbose_seen = False;
773 *extent_base = guest_IP_sbstart + delta;
774 *extent_len = 0;
776 while (True) {
777 vassert(*n_instrs < n_instrs_allowed);
779 /* This is the IP of the instruction we're just about to deal
780 with. */
781 Addr guest_IP_curr_instr = guest_IP_sbstart + delta;
783 /* This is the irbb statement array index of the first stmt in
784 this insn. That will always be the instruction-mark
785 descriptor. */
786 Int first_stmt_idx = irbb->stmts_used;
788 /* Add an instruction-mark statement. We won't know until after
789 disassembling the instruction how long it instruction is, so
790 just put in a zero length and we'll fix it up later.
792 On ARM, the least significant bit of the instr address
793 distinguishes ARM vs Thumb instructions. All instructions
794 actually start on at least 2-aligned addresses. So we need
795 to ignore the bottom bit of the insn address when forming the
796 IMark's address field, but put that bottom bit in the delta
797 field, so that comparisons against guest_R15T for Thumb can
798 be done correctly. By inspecting the delta field,
799 instruction processors can determine whether the instruction
800 was originally Thumb or ARM. For more details of this
801 convention, see comments on definition of guest_R15T in
802 libvex_guest_arm.h. */
803 if (arch_guest == VexArchARM && (guest_IP_curr_instr & 1)) {
804 /* Thumb insn => mask out the T bit, but put it in delta */
805 addStmtToIRSB( irbb,
806 IRStmt_IMark(guest_IP_curr_instr & ~(Addr)1,
807 0, /* len */
808 1 /* delta */
811 } else {
812 /* All other targets: store IP as-is, and set delta to zero. */
813 addStmtToIRSB( irbb,
814 IRStmt_IMark(guest_IP_curr_instr,
815 0, /* len */
816 0 /* delta */
821 if (debug_print && *n_instrs > 0)
822 vex_printf("\n");
824 /* Finally, actually disassemble an instruction. */
825 vassert(irbb->next == NULL);
826 DisResult dres
827 = dis_instr_fn ( irbb, guest_code, delta, guest_IP_curr_instr,
828 arch_guest, archinfo_guest, abiinfo_both,
829 host_endness, sigill_diag );
831 /* stay sane ... */
832 vassert(dres.whatNext == Dis_StopHere || dres.whatNext == Dis_Continue);
833 /* ... disassembled insn length is sane ... */
834 vassert(dres.len >= 0 && dres.len <= 24);
836 /* If the disassembly function passed us a hint, take note of it. */
837 if (LIKELY(dres.hint == Dis_HintNone)) {
838 /* Do nothing */
839 } else {
840 vassert(dres.hint == Dis_HintVerbose);
841 /* The current insn is known to be verbose. Lower the max insns limit
842 if necessary so as to avoid running the JIT out of space in the
843 event that we've encountered the start of a long sequence of them.
844 This is expected to be a very rare event. In any case the remaining
845 limit (in the default setting, 30 insns) is still so high that most
846 blocks will terminate anyway before then. So this is very unlikely
847 to give a perf hit in practice. See bug 375839 for the motivating
848 example. */
849 if (!(*is_verbose_seen)) {
850 *is_verbose_seen = True;
851 // Halve the number of allowed insns, but only above 2
852 if (n_instrs_allowed > 2) {
853 n_instrs_allowed = ((n_instrs_allowed - 2) / 2) + 2;
854 //vassert(*n_instrs <= n_instrs_allowed);
859 /* Fill in the insn-mark length field. */
860 vassert(first_stmt_idx >= 0 && first_stmt_idx < irbb->stmts_used);
861 IRStmt* imark = irbb->stmts[first_stmt_idx];
862 vassert(imark);
863 vassert(imark->tag == Ist_IMark);
864 vassert(imark->Ist.IMark.len == 0);
865 imark->Ist.IMark.len = dres.len;
867 /* Print the resulting IR, if needed. */
868 if (vex_traceflags & VEX_TRACE_FE) {
869 for (Int i = first_stmt_idx; i < irbb->stmts_used; i++) {
870 vex_printf(" ");
871 ppIRStmt(irbb->stmts[i]);
872 vex_printf("\n");
876 /* Individual insn disassembly may not mess with irbb->next.
877 This function is the only place where it can be set. */
878 vassert(irbb->next == NULL);
879 vassert(irbb->jumpkind == Ijk_Boring);
880 vassert(irbb->offsIP == 0);
882 /* Individual insn disassembly must finish the IR for each
883 instruction with an assignment to the guest PC. */
884 vassert(first_stmt_idx < irbb->stmts_used);
885 /* it follows that irbb->stmts_used must be > 0 */
886 { IRStmt* st = irbb->stmts[irbb->stmts_used-1];
887 vassert(st);
888 vassert(st->tag == Ist_Put);
889 vassert(st->Ist.Put.offset == offB_GUEST_IP);
890 /* Really we should also check that the type of the Put'd data
891 == guest_word_type, but that's a bit expensive. */
894 /* Update the extents entry that we are constructing. */
895 /* If vex_control.guest_max_insns is required to be < 100 and
896 each insn is at max 20 bytes long, this limit of 5000 then
897 seems reasonable since the max possible extent length will be
898 100 * 20 == 2000. */
899 vassert(*extent_len < 5000);
900 (*extent_len) += dres.len;
901 (*n_instrs)++;
903 /* Advance delta (inconspicuous but very important :-) */
904 delta += (Long)dres.len;
906 Bool stopNow = False;
907 switch (dres.whatNext) {
908 case Dis_Continue:
909 vassert(dres.jk_StopHere == Ijk_INVALID);
910 if (*n_instrs >= n_instrs_allowed) {
911 /* We have to stop. See comment above re irbb field
912 settings here. */
913 irbb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
914 /* irbb->jumpkind must already by Ijk_Boring */
915 irbb->offsIP = offB_GUEST_IP;
916 stopNow = True;
918 break;
919 case Dis_StopHere:
920 vassert(dres.jk_StopHere != Ijk_INVALID);
921 /* See comment above re irbb field settings here. */
922 irbb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
923 irbb->jumpkind = dres.jk_StopHere;
924 irbb->offsIP = offB_GUEST_IP;
925 stopNow = True;
926 break;
927 default:
928 vpanic("bb_to_IR");
931 if (stopNow)
932 break;
933 } /* while (True) */
935 /* irbb->next must now be set, since we've finished the block.
936 Print it if necessary.*/
937 vassert(irbb->next != NULL);
938 if (debug_print) {
939 vex_printf(" ");
940 vex_printf( "PUT(%d) = ", irbb->offsIP);
941 ppIRExpr( irbb->next );
942 vex_printf( "; exit-");
943 ppIRJumpKind(irbb->jumpkind);
944 vex_printf( "\n");
945 vex_printf( "\n");
948 /* And clean it up. */
949 irbb = do_minimal_initial_iropt_BB ( irbb );
950 if (debug_print) {
951 ppIRSB(irbb);
954 return irbb;
958 /*--------------------------------------------------------------*/
959 /*--- Disassembly of traces: helper functions ---*/
960 /*--------------------------------------------------------------*/
962 // Swap the side exit and fall through exit for |bb|. Update |be| so as to be
963 // consistent.
964 static void swap_sx_and_ft ( /*MOD*/IRSB* bb, /*MOD*/BlockEnd* be )
966 vassert(be->tag == Be_Cond);
967 vassert(bb->stmts_used >= 2); // Must have at least: IMark, Exit
968 IRStmt* exit = bb->stmts[bb->stmts_used - 1];
969 vassert(exit->tag == Ist_Exit);
970 vassert(exit->Ist.Exit.guard->tag == Iex_RdTmp);
971 vassert(exit->Ist.Exit.guard->Iex.RdTmp.tmp == be->Be.Cond.condSX);
972 vassert(bb->next->tag == Iex_Const);
973 vassert(bb->jumpkind == Ijk_Boring);
974 // We need to insert a new stmt, just before the exit, that computes 'Not1'
975 // of the guard condition. Replace |bb->stmts[bb->stmts_used - 1]| by the
976 // new stmt, and then place |exit| immediately after it.
977 IRTemp invertedGuard = newIRTemp(bb->tyenv, Ity_I1);
978 bb->stmts[bb->stmts_used - 1]
979 = IRStmt_WrTmp(invertedGuard,
980 IRExpr_Unop(Iop_Not1, IRExpr_RdTmp(exit->Ist.Exit.guard
981 ->Iex.RdTmp.tmp)));
982 exit->Ist.Exit.guard->Iex.RdTmp.tmp = invertedGuard;
983 addStmtToIRSB(bb, exit);
985 // Swap the actual destination constants.
986 { IRConst* tmp = exit->Ist.Exit.dst;
987 exit->Ist.Exit.dst = bb->next->Iex.Const.con;
988 bb->next->Iex.Const.con = tmp;
991 // And update |be|.
992 { be->Be.Cond.condSX = invertedGuard;
993 Long tmp = be->Be.Cond.deltaSX;
994 be->Be.Cond.deltaSX = be->Be.Cond.deltaFT;
995 be->Be.Cond.deltaFT = tmp;
1000 static void update_instr_budget( /*MOD*/Int* instrs_avail,
1001 /*MOD*/Bool* verbose_mode,
1002 const Int bb_instrs_used,
1003 const Bool bb_verbose_seen )
1005 if (0)
1006 vex_printf("UIB: verbose_mode %d, instrs_avail %d, "
1007 "bb_instrs_used %d, bb_verbose_seen %d\n",
1008 *verbose_mode ? 1 : 0, *instrs_avail,
1009 bb_instrs_used, bb_verbose_seen ? 1 : 0);
1011 vassert(bb_instrs_used <= *instrs_avail);
1013 if (bb_verbose_seen && !(*verbose_mode)) {
1014 *verbose_mode = True;
1015 // Adjust *instrs_avail so that, when it becomes zero, we haven't used
1016 // more than 50% of vex_control.guest_max_instrs.
1017 if (bb_instrs_used > vex_control.guest_max_insns / 2) {
1018 *instrs_avail = 0;
1019 } else {
1020 *instrs_avail = vex_control.guest_max_insns / 2;
1022 vassert(*instrs_avail >= 0);
1025 // Subtract bb_instrs_used from *instrs_avail, clamping at 0 if necessary.
1026 if (bb_instrs_used > *instrs_avail) {
1027 *instrs_avail = 0;
1028 } else {
1029 *instrs_avail -= bb_instrs_used;
1032 vassert(*instrs_avail >= 0);
1035 // Add the extent [base, +len) to |vge|. Asserts if |vge| is already full.
1036 // As an optimisation only, tries to also merge the new extent with the
1037 // previous one, if possible.
1038 static void add_extent ( /*MOD*/VexGuestExtents* vge, Addr base, UShort len )
1040 const UInt limit = sizeof(vge->base) / sizeof(vge->base[0]);
1041 vassert(limit == 3);
1042 const UInt i = vge->n_used;
1043 vassert(i < limit);
1044 vge->n_used++;
1045 vge->base[i] = base;
1046 vge->len[i] = len;
1047 // Try to merge with the previous extent
1048 if (i > 0
1049 && (((UInt)vge->len[i-1]) + ((UInt)len))
1050 < 200*25 /* say, 200 insns of size 25 bytes, absolute worst case */
1051 && vge->base[i-1] + vge->len[i-1] == base) {
1052 vge->len[i-1] += len;
1053 vge->n_used--;
1054 //vex_printf("MERGE\n");
1059 /*--------------------------------------------------------------*/
1060 /*--- Disassembly of traces: main function ---*/
1061 /*--------------------------------------------------------------*/
1063 /* Disassemble a complete basic block, starting at guest_IP_start,
1064 returning a new IRSB. The disassembler may chase across basic
1065 block boundaries if it wishes and if chase_into_ok allows it.
1066 The precise guest address ranges from which code has been taken
1067 are written into vge. guest_IP_sbstart is taken to be the IP in
1068 the guest's address space corresponding to the instruction at
1069 &guest_code[0].
1071 dis_instr_fn is the arch-specific fn to disassemble on function; it
1072 is this that does the real work.
1074 needs_self_check is a callback used to ask the caller which of the
1075 extents, if any, a self check is required for. The returned value
1076 is a bitmask with a 1 in position i indicating that the i'th extent
1077 needs a check. Since there can be at most 3 extents, the returned
1078 values must be between 0 and 7.
1080 The number of extents which did get a self check (0 to 3) is put in
1081 n_sc_extents. The caller already knows this because it told us
1082 which extents to add checks for, via the needs_self_check callback,
1083 but we ship the number back out here for the caller's convenience.
1085 preamble_function is a callback which allows the caller to add
1086 its own IR preamble (following the self-check, if any). May be
1087 NULL. If non-NULL, the IRSB under construction is handed to
1088 this function, which presumably adds IR statements to it. The
1089 callback may optionally complete the block and direct bb_to_IR
1090 not to disassemble any instructions into it; this is indicated
1091 by the callback returning True.
1093 offB_CMADDR and offB_CMLEN are the offsets of guest_CMADDR and
1094 guest_CMLEN. Since this routine has to work for any guest state,
1095 without knowing what it is, those offsets have to passed in.
1097 callback_opaque is a caller-supplied pointer to data which the
1098 callbacks may want to see. Vex has no idea what it is.
1099 (In fact it's a VgInstrumentClosure.)
1102 /* Regarding IP updating. dis_instr_fn (that does the guest specific
1103 work of disassembling an individual instruction) must finish the
1104 resulting IR with "PUT(guest_IP) = ". Hence in all cases it must
1105 state the next instruction address.
1107 If the block is to be ended at that point, then this routine
1108 (bb_to_IR) will set up the next/jumpkind/offsIP fields so as to
1109 make a transfer (of the right kind) to "GET(guest_IP)". Hence if
1110 dis_instr_fn generates incorrect IP updates we will see it
1111 immediately (due to jumping to the wrong next guest address).
1113 However it is also necessary to set this up so it can be optimised
1114 nicely. The IRSB exit is defined to update the guest IP, so that
1115 chaining works -- since the chain_me stubs expect the chain-to
1116 address to be in the guest state. Hence what the IRSB next fields
1117 will contain initially is (implicitly)
1119 PUT(guest_IP) [implicitly] = GET(guest_IP) [explicit expr on ::next]
1121 which looks pretty strange at first. Eg so unconditional branch
1122 to some address 0x123456 looks like this:
1124 PUT(guest_IP) = 0x123456; // dis_instr_fn generates this
1125 // the exit
1126 PUT(guest_IP) [implicitly] = GET(guest_IP); exit-Boring
1128 after redundant-GET and -PUT removal by iropt, we get what we want:
1130 // the exit
1131 PUT(guest_IP) [implicitly] = 0x123456; exit-Boring
1133 This makes the IRSB-end case the same as the side-exit case: update
1134 IP, then transfer. There is no redundancy of representation for
1135 the destination, and we use the destination specified by
1136 dis_instr_fn, so any errors it makes show up sooner.
1138 IRSB* bb_to_IR (
1139 /*OUT*/VexGuestExtents* vge,
1140 /*OUT*/UInt* n_sc_extents,
1141 /*OUT*/UInt* n_guest_instrs, /* stats only */
1142 /*MOD*/VexRegisterUpdates* pxControl,
1143 /*IN*/ void* callback_opaque,
1144 /*IN*/ DisOneInstrFn dis_instr_fn,
1145 /*IN*/ const UChar* guest_code,
1146 /*IN*/ Addr guest_IP_sbstart,
1147 /*IN*/ Bool (*chase_into_ok)(void*,Addr),
1148 /*IN*/ VexEndness host_endness,
1149 /*IN*/ Bool sigill_diag,
1150 /*IN*/ VexArch arch_guest,
1151 /*IN*/ const VexArchInfo* archinfo_guest,
1152 /*IN*/ const VexAbiInfo* abiinfo_both,
1153 /*IN*/ IRType guest_word_type,
1154 /*IN*/ UInt (*needs_self_check)
1155 (void*, /*MB_MOD*/VexRegisterUpdates*,
1156 const VexGuestExtents*),
1157 /*IN*/ Bool (*preamble_function)(void*,IRSB*),
1158 /*IN*/ Int offB_GUEST_CMSTART,
1159 /*IN*/ Int offB_GUEST_CMLEN,
1160 /*IN*/ Int offB_GUEST_IP,
1161 /*IN*/ Int szB_GUEST_IP
1164 Bool debug_print = toBool(vex_traceflags & VEX_TRACE_FE);
1166 /* check sanity .. */
1167 vassert(sizeof(HWord) == sizeof(void*));
1168 vassert(vex_control.guest_max_insns >= 1);
1169 vassert(vex_control.guest_max_insns <= 100);
1170 vassert(vex_control.guest_chase_thresh >= 0);
1171 vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns);
1172 vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64);
1174 if (guest_word_type == Ity_I32) {
1175 vassert(szB_GUEST_IP == 4);
1176 vassert((offB_GUEST_IP % 4) == 0);
1177 } else {
1178 vassert(szB_GUEST_IP == 8);
1179 vassert((offB_GUEST_IP % 8) == 0);
1182 /* Initialise all return-by-ref state. */
1183 vge->n_used = 0;
1184 *n_sc_extents = 0;
1185 *n_guest_instrs = 0;
1187 /* And a new IR superblock to dump the result into. */
1188 IRSB* irsb = emptyIRSB();
1190 /* Leave 15 spaces in which to put the check statements for a self
1191 checking translation (up to 3 extents, and 5 stmts required for
1192 each). We won't know until later the extents and checksums of
1193 the areas, if any, that need to be checked. */
1194 IRStmt* nop = IRStmt_NoOp();
1195 Int selfcheck_idx = irsb->stmts_used;
1196 for (Int i = 0; i < 3 * 5; i++)
1197 addStmtToIRSB( irsb, nop );
1199 /* If the caller supplied a function to add its own preamble, use
1200 it now. */
1201 if (preamble_function) {
1202 Bool stopNow = preamble_function( callback_opaque, irsb );
1203 if (stopNow) {
1204 /* The callback has completed the IR block without any guest
1205 insns being disassembled into it, so just return it at
1206 this point, even if a self-check was requested - as there
1207 is nothing to self-check. The 15 self-check no-ops will
1208 still be in place, but they are harmless. */
1209 vge->n_used = 1;
1210 vge->base[0] = guest_IP_sbstart;
1211 vge->len[0] = 0;
1212 return irsb;
1216 /* Running state:
1217 irsb the SB we are incrementally constructing
1218 vge associated extents for irsb
1219 instrs_used instrs incorporated in irsb so far
1220 instrs_avail number of instrs we have space for
1221 verbose_mode did we see an 'is verbose' hint at some point?
1223 Int instrs_used = 0;
1224 Int instrs_avail = vex_control.guest_max_insns;
1225 Bool verbose_mode = False;
1227 /* Disassemble the initial block until we have to stop. */
1229 Int ib_instrs_used = 0;
1230 Bool ib_verbose_seen = False;
1231 Addr ib_base = 0;
1232 UShort ib_len = 0;
1233 irsb = disassemble_basic_block_till_stop(
1234 /*OUT*/ &ib_instrs_used, &ib_verbose_seen, &ib_base, &ib_len,
1235 /*MOD*/ irsb,
1236 /*IN*/ 0/*delta for the first block in the trace*/,
1237 instrs_avail, guest_IP_sbstart, host_endness, sigill_diag,
1238 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1239 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1241 vassert(ib_instrs_used <= instrs_avail);
1243 // Update instrs_used, extents, budget.
1244 instrs_used += ib_instrs_used;
1245 add_extent(vge, ib_base, ib_len);
1246 update_instr_budget(&instrs_avail, &verbose_mode,
1247 ib_instrs_used, ib_verbose_seen);
1250 /* Now, see if we can extend the initial block. */
1251 while (True) {
1252 const Int n_extent_slots = sizeof(vge->base) / sizeof(vge->base[0]);
1253 vassert(n_extent_slots == 3);
1255 // Reasons to give up immediately:
1256 // User or tool asked us not to chase
1257 if (vex_control.guest_chase_thresh == 0)
1258 break;
1260 // Out of extent slots
1261 vassert(vge->n_used <= n_extent_slots);
1262 if (vge->n_used == n_extent_slots)
1263 break;
1265 // Almost out of available instructions
1266 vassert(instrs_avail >= 0);
1267 if (instrs_avail < 3)
1268 break;
1270 // Try for an extend. What kind we do depends on how the current trace
1271 // ends.
1272 BlockEnd irsb_be;
1273 analyse_block_end(&irsb_be, irsb, guest_IP_sbstart, guest_word_type,
1274 chase_into_ok, callback_opaque, debug_print);
1276 // Try for an extend based on an unconditional branch or call to a known
1277 // destination.
1278 if (irsb_be.tag == Be_UnCond) {
1279 if (debug_print) {
1280 vex_printf("\n-+-+ Unconditional follow (ext# %d) to 0x%llx "
1281 "-+-+\n\n",
1282 (Int)vge->n_used,
1283 (ULong)((Long)guest_IP_sbstart+ irsb_be.Be.UnCond.delta));
1285 Int bb_instrs_used = 0;
1286 Bool bb_verbose_seen = False;
1287 Addr bb_base = 0;
1288 UShort bb_len = 0;
1289 IRSB* bb
1290 = disassemble_basic_block_till_stop(
1291 /*OUT*/ &bb_instrs_used, &bb_verbose_seen, &bb_base, &bb_len,
1292 /*MOD*/ emptyIRSB(),
1293 /*IN*/ irsb_be.Be.UnCond.delta,
1294 instrs_avail, guest_IP_sbstart, host_endness, sigill_diag,
1295 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1296 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1298 vassert(bb_instrs_used <= instrs_avail);
1300 /* Now we have to append 'bb' to 'irsb'. */
1301 concatenate_irsbs(irsb, bb);
1303 // Update instrs_used, extents, budget.
1304 instrs_used += bb_instrs_used;
1305 add_extent(vge, bb_base, bb_len);
1306 update_instr_budget(&instrs_avail, &verbose_mode,
1307 bb_instrs_used, bb_verbose_seen);
1308 } // if (be.tag == Be_UnCond)
1310 // Try for an extend based on a conditional branch, specifically in the
1311 // hope of identifying and recovering, an "A && B" condition spread across
1312 // two basic blocks.
1313 if (irsb_be.tag == Be_Cond) {
1314 if (debug_print) {
1315 vex_printf("\n-+-+ (ext# %d) Considering cbranch to"
1316 " SX=0x%llx FT=0x%llx -+-+\n\n",
1317 (Int)vge->n_used,
1318 (ULong)((Long)guest_IP_sbstart+ irsb_be.Be.Cond.deltaSX),
1319 (ULong)((Long)guest_IP_sbstart+ irsb_be.Be.Cond.deltaFT));
1321 const Int instrs_avail_spec = 3;
1323 if (debug_print) {
1324 vex_printf("-+-+ SPEC side exit -+-+\n\n");
1326 Int sx_instrs_used = 0;
1327 Bool sx_verbose_seen = False;
1328 Addr sx_base = 0;
1329 UShort sx_len = 0;
1330 IRSB* sx_bb
1331 = disassemble_basic_block_till_stop(
1332 /*OUT*/ &sx_instrs_used, &sx_verbose_seen, &sx_base, &sx_len,
1333 /*MOD*/ emptyIRSB(),
1334 /*IN*/ irsb_be.Be.Cond.deltaSX,
1335 instrs_avail_spec, guest_IP_sbstart, host_endness, sigill_diag,
1336 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1337 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1339 vassert(sx_instrs_used <= instrs_avail_spec);
1340 BlockEnd sx_be;
1341 analyse_block_end(&sx_be, sx_bb, guest_IP_sbstart, guest_word_type,
1342 chase_into_ok, callback_opaque, debug_print);
1344 if (debug_print) {
1345 vex_printf("\n-+-+ SPEC fall through -+-+\n\n");
1347 Int ft_instrs_used = 0;
1348 Bool ft_verbose_seen = False;
1349 Addr ft_base = 0;
1350 UShort ft_len = 0;
1351 IRSB* ft_bb
1352 = disassemble_basic_block_till_stop(
1353 /*OUT*/ &ft_instrs_used, &ft_verbose_seen, &ft_base, &ft_len,
1354 /*MOD*/ emptyIRSB(),
1355 /*IN*/ irsb_be.Be.Cond.deltaFT,
1356 instrs_avail_spec, guest_IP_sbstart, host_endness, sigill_diag,
1357 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1358 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1360 vassert(ft_instrs_used <= instrs_avail_spec);
1361 BlockEnd ft_be;
1362 analyse_block_end(&ft_be, ft_bb, guest_IP_sbstart, guest_word_type,
1363 chase_into_ok, callback_opaque, debug_print);
1365 /* In order for the transformation to be remotely valid, we need:
1366 - At least one of the sx_bb or ft_bb to be have a Be_Cond end.
1367 - sx_bb and ft_bb definitely don't form a loop.
1369 Bool ok = sx_be.tag == Be_Cond || ft_be.tag == Be_Cond;
1370 if (ok) {
1371 ok = definitely_does_not_jump_to_delta(&sx_be,
1372 irsb_be.Be.Cond.deltaFT)
1373 || definitely_does_not_jump_to_delta(&ft_be,
1374 irsb_be.Be.Cond.deltaSX);
1377 // Check for other mutancy:
1378 // irsb ft == sx, or the same for ft itself or sx itself
1379 if (ok) {
1380 if (irsb_be.Be.Cond.deltaSX == irsb_be.Be.Cond.deltaFT
1381 || (sx_be.tag == Be_Cond
1382 && sx_be.Be.Cond.deltaSX == sx_be.Be.Cond.deltaFT)
1383 || (ft_be.tag == Be_Cond
1384 && ft_be.Be.Cond.deltaSX == ft_be.Be.Cond.deltaFT)) {
1385 ok = False;
1389 /* Now let's see if any of our four cases actually holds (viz, is this
1390 really an && idiom? */
1391 UInt idiom = 4;
1392 if (ok) {
1393 vassert(irsb_be.tag == Be_Cond);
1394 UInt iom1 = 4/*invalid*/;
1395 if (sx_be.tag == Be_Cond) {
1396 /**/ if (sx_be.Be.Cond.deltaFT == irsb_be.Be.Cond.deltaFT)
1397 iom1 = 0;
1398 else if (sx_be.Be.Cond.deltaSX == irsb_be.Be.Cond.deltaFT)
1399 iom1 = 1;
1401 UInt iom2 = 4/*invalid*/;
1402 if (ft_be.tag == Be_Cond) {
1403 /**/ if (ft_be.Be.Cond.deltaFT == irsb_be.Be.Cond.deltaSX)
1404 iom2 = 2;
1405 else if (ft_be.Be.Cond.deltaSX == irsb_be.Be.Cond.deltaSX)
1406 iom2 = 3;
1409 /* We should only have identified at most one of the four idioms. */
1410 vassert(iom1 == 4 || iom2 == 4);
1411 idiom = (iom1 < 4) ? iom1 : (iom2 < 4 ? iom2 : 4);
1412 if (idiom == 4) {
1413 ok = False;
1414 if (debug_print) {
1415 vex_printf("\n-+-+ &&-idiom not recognised, "
1416 "giving up. -+-+\n\n");
1421 if (ok) {
1422 vassert(idiom < 4);
1423 // "Normalise" the data so as to ensure we only have one of the four
1424 // idioms to transform.
1425 if (idiom == 2 || idiom == 3) {
1426 swap_sx_and_ft(irsb, &irsb_be);
1427 # define SWAP(_ty, _aa, _bb) \
1428 do { _ty _tmp = _aa; _aa = _bb; _bb = _tmp; } while (0)
1429 SWAP(Int, sx_instrs_used, ft_instrs_used);
1430 SWAP(Bool, sx_verbose_seen, ft_verbose_seen);
1431 SWAP(Addr, sx_base, ft_base);
1432 SWAP(UShort, sx_len, ft_len);
1433 SWAP(IRSB*, sx_bb, ft_bb);
1434 SWAP(BlockEnd, sx_be, ft_be);
1435 # undef SWAP
1437 if (idiom == 1 || idiom == 3) {
1438 swap_sx_and_ft(sx_bb, &sx_be);
1440 vassert(sx_be.tag == Be_Cond);
1441 vassert(sx_be.Be.Cond.deltaFT == irsb_be.Be.Cond.deltaFT);
1443 if (debug_print) {
1444 vex_printf("\n-+-+ After normalisation (idiom=%u) -+-+\n", idiom);
1445 vex_printf("\n-+-+ IRSB -+-+\n");
1446 ppIRSB(irsb);
1447 ppBlockEnd(&irsb_be);
1448 vex_printf("\n\n-+-+ SX -+-+\n");
1449 ppIRSB(sx_bb);
1450 ppBlockEnd(&sx_be);
1451 vex_printf("\n");
1453 // Finally, check the sx block actually is guardable.
1454 ok = block_is_guardable(sx_bb);
1455 if (!ok && debug_print) {
1456 vex_printf("\n-+-+ SX not guardable, giving up. -+-+\n\n");
1460 if (ok) {
1461 if (0 || debug_print) {
1462 vex_printf("\n-+-+ DOING &&-TRANSFORM -+-+\n");
1464 // Finally really actually do the transformation.
1465 // 0. remove the last Exit on irsb.
1466 // 1. Add irsb->tyenv->types_used to all the tmps in sx_bb,
1467 // by calling deltaIRStmt on all stmts.
1468 // 2. Guard all stmts in sx_bb on irsb_be.Be.Cond.condSX,
1469 // **including** the last stmt (which must be an Exit). It's
1470 // here that the And1 is generated.
1471 // 3. Copy all guarded stmts to the end of irsb.
1472 vassert(irsb->stmts_used >= 2);
1473 irsb->stmts_used--;
1474 Int delta = irsb->tyenv->types_used;
1476 // Append sx_bb's tyenv to irsb's
1477 for (Int i = 0; i < sx_bb->tyenv->types_used; i++) {
1478 (void)newIRTemp(irsb->tyenv, sx_bb->tyenv->types[i]);
1481 for (Int i = 0; i < sx_bb->stmts_used; i++) {
1482 IRStmt* st = deepCopyIRStmt(sx_bb->stmts[i]);
1483 deltaIRStmt(st, delta);
1484 add_guarded_stmt_to_end_of(irsb, st, irsb_be.Be.Cond.condSX);
1487 if (debug_print) {
1488 vex_printf("\n-+-+ FINAL RESULT -+-+\n\n");
1489 ppIRSB(irsb);
1490 vex_printf("\n");
1493 // Update instrs_used, extents, budget.
1494 instrs_used += sx_instrs_used;
1495 add_extent(vge, sx_base, sx_len);
1496 update_instr_budget(&instrs_avail, &verbose_mode,
1497 sx_instrs_used, sx_verbose_seen);
1499 break;
1500 } // if (be.tag == Be_Cond)
1502 // We don't know any other way to extend the block. Give up.
1503 else {
1504 break;
1507 } // while (True)
1509 /* We're almost done. The only thing that might need attending to is that
1510 a self-checking preamble may need to be created. If so it gets placed
1511 in the 15 slots reserved above. */
1512 create_self_checks_as_needed(
1513 irsb, n_sc_extents, pxControl, callback_opaque, needs_self_check,
1514 vge, abiinfo_both, guest_word_type, selfcheck_idx, offB_GUEST_CMSTART,
1515 offB_GUEST_CMLEN, offB_GUEST_IP, guest_IP_sbstart
1518 *n_guest_instrs = instrs_used;
1519 return irsb;
1523 /*--------------------------------------------------------------*/
1524 /*--- Functions called by self-checking transations ---*/
1525 /*--------------------------------------------------------------*/
1527 /* All of these are CLEAN HELPERs */
1528 /* All of these are CALLED FROM GENERATED CODE */
1530 /* Compute a checksum of host memory at [addr .. addr+len-1], as fast
1531 as possible. All _4al versions assume that the supplied address is
1532 4 aligned. All length values are in 4-byte chunks. These fns
1533 arecalled once for every use of a self-checking translation, so
1534 they needs to be as fast as possible. */
1536 /* --- 32-bit versions, used only on 32-bit hosts --- */
1538 static inline UInt ROL32 ( UInt w, Int n ) {
1539 w = (w << n) | (w >> (32-n));
1540 return w;
1543 VEX_REGPARM(2)
1544 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s )
1546 UInt sum1 = 0, sum2 = 0;
1547 UInt* p = (UInt*)first_w32;
1548 /* unrolled */
1549 while (n_w32s >= 4) {
1550 UInt w;
1551 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1552 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1553 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1554 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1555 p += 4;
1556 n_w32s -= 4;
1557 sum1 ^= sum2;
1559 while (n_w32s >= 1) {
1560 UInt w;
1561 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1562 p += 1;
1563 n_w32s -= 1;
1564 sum1 ^= sum2;
1566 return sum1 + sum2;
1569 /* Specialised versions of the above function */
1571 VEX_REGPARM(1)
1572 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 )
1574 UInt sum1 = 0, sum2 = 0;
1575 UInt* p = (UInt*)first_w32;
1576 UInt w;
1577 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1578 sum1 ^= sum2;
1579 return sum1 + sum2;
1582 VEX_REGPARM(1)
1583 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 )
1585 UInt sum1 = 0, sum2 = 0;
1586 UInt* p = (UInt*)first_w32;
1587 UInt w;
1588 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1589 sum1 ^= sum2;
1590 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1591 sum1 ^= sum2;
1592 return sum1 + sum2;
1595 VEX_REGPARM(1)
1596 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 )
1598 UInt sum1 = 0, sum2 = 0;
1599 UInt* p = (UInt*)first_w32;
1600 UInt w;
1601 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1602 sum1 ^= sum2;
1603 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1604 sum1 ^= sum2;
1605 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1606 sum1 ^= sum2;
1607 return sum1 + sum2;
1610 VEX_REGPARM(1)
1611 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 )
1613 UInt sum1 = 0, sum2 = 0;
1614 UInt* p = (UInt*)first_w32;
1615 UInt w;
1616 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1617 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1618 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1619 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1620 sum1 ^= sum2;
1621 return sum1 + sum2;
1624 VEX_REGPARM(1)
1625 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 )
1627 UInt sum1 = 0, sum2 = 0;
1628 UInt* p = (UInt*)first_w32;
1629 UInt w;
1630 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1631 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1632 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1633 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1634 sum1 ^= sum2;
1635 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1636 sum1 ^= sum2;
1637 return sum1 + sum2;
1640 VEX_REGPARM(1)
1641 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 )
1643 UInt sum1 = 0, sum2 = 0;
1644 UInt* p = (UInt*)first_w32;
1645 UInt w;
1646 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1647 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1648 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1649 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1650 sum1 ^= sum2;
1651 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1652 sum1 ^= sum2;
1653 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1654 sum1 ^= sum2;
1655 return sum1 + sum2;
1658 VEX_REGPARM(1)
1659 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 )
1661 UInt sum1 = 0, sum2 = 0;
1662 UInt* p = (UInt*)first_w32;
1663 UInt w;
1664 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1665 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1666 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1667 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1668 sum1 ^= sum2;
1669 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1670 sum1 ^= sum2;
1671 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1672 sum1 ^= sum2;
1673 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1674 sum1 ^= sum2;
1675 return sum1 + sum2;
1678 VEX_REGPARM(1)
1679 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 )
1681 UInt sum1 = 0, sum2 = 0;
1682 UInt* p = (UInt*)first_w32;
1683 UInt w;
1684 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1685 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1686 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1687 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1688 sum1 ^= sum2;
1689 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1690 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1691 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1692 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1693 sum1 ^= sum2;
1694 return sum1 + sum2;
1697 VEX_REGPARM(1)
1698 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 )
1700 UInt sum1 = 0, sum2 = 0;
1701 UInt* p = (UInt*)first_w32;
1702 UInt w;
1703 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1704 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1705 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1706 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1707 sum1 ^= sum2;
1708 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1709 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1710 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1711 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1712 sum1 ^= sum2;
1713 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1714 sum1 ^= sum2;
1715 return sum1 + sum2;
1718 VEX_REGPARM(1)
1719 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 )
1721 UInt sum1 = 0, sum2 = 0;
1722 UInt* p = (UInt*)first_w32;
1723 UInt w;
1724 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1725 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1726 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1727 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1728 sum1 ^= sum2;
1729 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1730 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1731 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1732 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1733 sum1 ^= sum2;
1734 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1735 sum1 ^= sum2;
1736 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1737 sum1 ^= sum2;
1738 return sum1 + sum2;
1741 VEX_REGPARM(1)
1742 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 )
1744 UInt sum1 = 0, sum2 = 0;
1745 UInt* p = (UInt*)first_w32;
1746 UInt w;
1747 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1748 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1749 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1750 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1751 sum1 ^= sum2;
1752 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1753 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1754 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1755 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1756 sum1 ^= sum2;
1757 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1758 sum1 ^= sum2;
1759 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1760 sum1 ^= sum2;
1761 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1762 sum1 ^= sum2;
1763 return sum1 + sum2;
1766 VEX_REGPARM(1)
1767 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 )
1769 UInt sum1 = 0, sum2 = 0;
1770 UInt* p = (UInt*)first_w32;
1771 UInt w;
1772 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1773 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1774 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1775 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1776 sum1 ^= sum2;
1777 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1778 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1779 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1780 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1781 sum1 ^= sum2;
1782 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1783 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1784 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1785 w = p[11]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1786 sum1 ^= sum2;
1787 return sum1 + sum2;
1791 /* --- 64-bit versions, used only on 64-bit hosts --- */
1793 static inline ULong ROL64 ( ULong w, Int n ) {
1794 w = (w << n) | (w >> (64-n));
1795 return w;
1798 VEX_REGPARM(2)
1799 static ULong genericg_compute_checksum_8al ( HWord first_w64, HWord n_w64s )
1801 ULong sum1 = 0, sum2 = 0;
1802 ULong* p = (ULong*)first_w64;
1803 /* unrolled */
1804 while (n_w64s >= 4) {
1805 ULong w;
1806 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1807 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1808 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1809 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1810 p += 4;
1811 n_w64s -= 4;
1812 sum1 ^= sum2;
1814 while (n_w64s >= 1) {
1815 ULong w;
1816 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1817 p += 1;
1818 n_w64s -= 1;
1819 sum1 ^= sum2;
1821 return sum1 + sum2;
1824 /* Specialised versions of the above function */
1826 VEX_REGPARM(1)
1827 static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 )
1829 ULong sum1 = 0, sum2 = 0;
1830 ULong* p = (ULong*)first_w64;
1831 ULong w;
1832 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1833 sum1 ^= sum2;
1834 return sum1 + sum2;
1837 VEX_REGPARM(1)
1838 static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 )
1840 ULong sum1 = 0, sum2 = 0;
1841 ULong* p = (ULong*)first_w64;
1842 ULong w;
1843 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1844 sum1 ^= sum2;
1845 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1846 sum1 ^= sum2;
1847 return sum1 + sum2;
1850 VEX_REGPARM(1)
1851 static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 )
1853 ULong sum1 = 0, sum2 = 0;
1854 ULong* p = (ULong*)first_w64;
1855 ULong w;
1856 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1857 sum1 ^= sum2;
1858 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1859 sum1 ^= sum2;
1860 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1861 sum1 ^= sum2;
1862 return sum1 + sum2;
1865 VEX_REGPARM(1)
1866 static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 )
1868 ULong sum1 = 0, sum2 = 0;
1869 ULong* p = (ULong*)first_w64;
1870 ULong w;
1871 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1872 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1873 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1874 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1875 sum1 ^= sum2;
1876 return sum1 + sum2;
1879 VEX_REGPARM(1)
1880 static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 )
1882 ULong sum1 = 0, sum2 = 0;
1883 ULong* p = (ULong*)first_w64;
1884 ULong w;
1885 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1886 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1887 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1888 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1889 sum1 ^= sum2;
1890 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1891 sum1 ^= sum2;
1892 return sum1 + sum2;
1895 VEX_REGPARM(1)
1896 static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 )
1898 ULong sum1 = 0, sum2 = 0;
1899 ULong* p = (ULong*)first_w64;
1900 ULong w;
1901 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1902 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1903 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1904 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1905 sum1 ^= sum2;
1906 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1907 sum1 ^= sum2;
1908 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1909 sum1 ^= sum2;
1910 return sum1 + sum2;
1913 VEX_REGPARM(1)
1914 static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 )
1916 ULong sum1 = 0, sum2 = 0;
1917 ULong* p = (ULong*)first_w64;
1918 ULong w;
1919 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1920 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1921 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1922 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1923 sum1 ^= sum2;
1924 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1925 sum1 ^= sum2;
1926 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1927 sum1 ^= sum2;
1928 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1929 sum1 ^= sum2;
1930 return sum1 + sum2;
1933 VEX_REGPARM(1)
1934 static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 )
1936 ULong sum1 = 0, sum2 = 0;
1937 ULong* p = (ULong*)first_w64;
1938 ULong w;
1939 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1940 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1941 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1942 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1943 sum1 ^= sum2;
1944 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1945 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1946 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1947 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1948 sum1 ^= sum2;
1949 return sum1 + sum2;
1952 VEX_REGPARM(1)
1953 static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 )
1955 ULong sum1 = 0, sum2 = 0;
1956 ULong* p = (ULong*)first_w64;
1957 ULong w;
1958 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1959 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1960 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1961 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1962 sum1 ^= sum2;
1963 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1964 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1965 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1966 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1967 sum1 ^= sum2;
1968 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1969 sum1 ^= sum2;
1970 return sum1 + sum2;
1973 VEX_REGPARM(1)
1974 static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 )
1976 ULong sum1 = 0, sum2 = 0;
1977 ULong* p = (ULong*)first_w64;
1978 ULong w;
1979 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1980 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1981 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1982 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1983 sum1 ^= sum2;
1984 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1985 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1986 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1987 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1988 sum1 ^= sum2;
1989 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1990 sum1 ^= sum2;
1991 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1992 sum1 ^= sum2;
1993 return sum1 + sum2;
1996 VEX_REGPARM(1)
1997 static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 )
1999 ULong sum1 = 0, sum2 = 0;
2000 ULong* p = (ULong*)first_w64;
2001 ULong w;
2002 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2003 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2004 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2005 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2006 sum1 ^= sum2;
2007 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2008 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2009 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2010 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2011 sum1 ^= sum2;
2012 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2013 sum1 ^= sum2;
2014 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2015 sum1 ^= sum2;
2016 w = p[10]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2017 sum1 ^= sum2;
2018 return sum1 + sum2;
2021 VEX_REGPARM(1)
2022 static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 )
2024 ULong sum1 = 0, sum2 = 0;
2025 ULong* p = (ULong*)first_w64;
2026 ULong w;
2027 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2028 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2029 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2030 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2031 sum1 ^= sum2;
2032 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2033 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2034 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2035 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2036 sum1 ^= sum2;
2037 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2038 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2039 w = p[10]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2040 w = p[11]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2041 sum1 ^= sum2;
2042 return sum1 + sum2;
2045 /*--------------------------------------------------------------------*/
2046 /*--- end guest_generic_bb_to_IR.c ---*/
2047 /*--------------------------------------------------------------------*/