'grail' fixes for s390x:
[valgrind.git] / VEX / priv / guest_generic_bb_to_IR.c
blob0b8f852ec0195887eda27502386cc2fb01163e72
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_generic_bb_to_IR.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_ir.h"
36 #include "libvex.h"
37 #include "main_util.h"
38 #include "main_globals.h"
39 #include "guest_generic_bb_to_IR.h"
40 #include "ir_opt.h"
43 /*--------------------------------------------------------------*/
44 /*--- Forwards for fns called by self-checking translations ---*/
45 /*--------------------------------------------------------------*/
47 /* Forwards .. */
48 VEX_REGPARM(2) static UInt genericg_compute_checksum_4al ( HWord first_w32,
49 HWord n_w32s );
50 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 );
51 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 );
52 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 );
53 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 );
54 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 );
55 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 );
56 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 );
57 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 );
58 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 );
59 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 );
60 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 );
61 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 );
63 VEX_REGPARM(2) static ULong genericg_compute_checksum_8al ( HWord first_w64,
64 HWord n_w64s );
65 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 );
66 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 );
67 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 );
68 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 );
69 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 );
70 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 );
71 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 );
72 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 );
73 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 );
74 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 );
75 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 );
76 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 );
79 /*--------------------------------------------------------------*/
80 /*--- Creation of self-check IR ---*/
81 /*--------------------------------------------------------------*/
83 static void create_self_checks_as_needed(
84 /*MOD*/ IRSB* irsb,
85 /*OUT*/ UInt* n_sc_extents,
86 /*MOD*/ VexRegisterUpdates* pxControl,
87 /*MOD*/ void* callback_opaque,
88 /*IN*/ UInt (*needs_self_check)
89 (void*, /*MB_MOD*/VexRegisterUpdates*,
90 const VexGuestExtents*),
91 const VexGuestExtents* vge,
92 const VexAbiInfo* abiinfo_both,
93 const IRType guest_word_type,
94 const Int selfcheck_idx,
95 /*IN*/ Int offB_GUEST_CMSTART,
96 /*IN*/ Int offB_GUEST_CMLEN,
97 /*IN*/ Int offB_GUEST_IP,
98 const Addr guest_IP_sbstart
101 /* The scheme is to compute a rather crude checksum of the code
102 we're making a translation of, and add to the IR a call to a
103 helper routine which recomputes the checksum every time the
104 translation is run, and requests a retranslation if it doesn't
105 match. This is obviously very expensive and considerable
106 efforts are made to speed it up:
108 * the checksum is computed from all the naturally aligned
109 host-sized words that overlap the translated code. That means
110 it could depend on up to 7 bytes before and 7 bytes after
111 which aren't part of the translated area, and so if those
112 change then we'll unnecessarily have to discard and
113 retranslate. This seems like a pretty remote possibility and
114 it seems as if the benefit of not having to deal with the ends
115 of the range at byte precision far outweigh any possible extra
116 translations needed.
118 * there's a generic routine and 12 specialised cases, which
119 handle the cases of 1 through 12-word lengths respectively.
120 They seem to cover about 90% of the cases that occur in
121 practice.
123 We ask the caller, via needs_self_check, which of the 3 vge
124 extents needs a check, and only generate check code for those
125 that do.
128 Addr base2check;
129 UInt len2check;
130 HWord expectedhW;
131 IRTemp tistart_tmp, tilen_tmp, callresult_tmp, exitguard_tmp;
132 HWord VEX_REGPARM(2) (*fn_generic)(HWord, HWord);
133 HWord VEX_REGPARM(1) (*fn_spec)(HWord);
134 const HChar* nm_generic;
135 const HChar* nm_spec;
136 HWord fn_generic_entry = 0;
137 HWord fn_spec_entry = 0;
138 UInt host_word_szB = sizeof(HWord);
139 IRType host_word_type = Ity_INVALID;
141 UInt extents_needing_check
142 = needs_self_check(callback_opaque, pxControl, vge);
144 if (host_word_szB == 4) host_word_type = Ity_I32;
145 if (host_word_szB == 8) host_word_type = Ity_I64;
146 vassert(host_word_type != Ity_INVALID);
148 vassert(vge->n_used >= 1 && vge->n_used <= 3);
150 /* Caller shouldn't claim that nonexistent extents need a
151 check. */
152 vassert((extents_needing_check >> vge->n_used) == 0);
154 /* Guest addresses as IRConsts. Used in self-checks to specify the
155 restart-after-discard point. */
156 IRConst* guest_IP_sbstart_IRConst
157 = guest_word_type==Ity_I32
158 ? IRConst_U32(toUInt(guest_IP_sbstart))
159 : IRConst_U64(guest_IP_sbstart);
161 const Int n_extent_slots = sizeof(vge->base) / sizeof(vge->base[0]);
162 vassert(n_extent_slots == 3);
164 vassert(selfcheck_idx + (n_extent_slots - 1) * 7 + 6 < irsb->stmts_used);
166 for (Int i = 0; i < vge->n_used; i++) {
167 /* Do we need to generate a check for this extent? */
168 if ((extents_needing_check & (1 << i)) == 0)
169 continue;
171 /* Tell the caller */
172 (*n_sc_extents)++;
174 /* the extent we're generating a check for */
175 base2check = vge->base[i];
176 len2check = vge->len[i];
178 /* stay sane */
179 vassert(len2check >= 0 && len2check < 2000/*arbitrary*/);
181 /* Skip the check if the translation involved zero bytes */
182 if (len2check == 0)
183 continue;
185 HWord first_hW = ((HWord)base2check)
186 & ~(HWord)(host_word_szB-1);
187 HWord last_hW = (((HWord)base2check) + len2check - 1)
188 & ~(HWord)(host_word_szB-1);
189 vassert(first_hW <= last_hW);
190 HWord hW_diff = last_hW - first_hW;
191 vassert(0 == (hW_diff & (host_word_szB-1)));
192 HWord hWs_to_check = (hW_diff + host_word_szB) / host_word_szB;
193 vassert(hWs_to_check > 0
194 && hWs_to_check < 2004/*arbitrary*/ / host_word_szB);
196 /* vex_printf("%lx %lx %ld\n", first_hW, last_hW, hWs_to_check); */
198 if (host_word_szB == 8) {
199 fn_generic = (VEX_REGPARM(2) HWord(*)(HWord, HWord))
200 genericg_compute_checksum_8al;
201 nm_generic = "genericg_compute_checksum_8al";
202 } else {
203 fn_generic = (VEX_REGPARM(2) HWord(*)(HWord, HWord))
204 genericg_compute_checksum_4al;
205 nm_generic = "genericg_compute_checksum_4al";
208 fn_spec = NULL;
209 nm_spec = NULL;
211 if (host_word_szB == 8) {
212 const HChar* nm = NULL;
213 ULong VEX_REGPARM(1) (*fn)(HWord) = NULL;
214 switch (hWs_to_check) {
215 case 1: fn = genericg_compute_checksum_8al_1;
216 nm = "genericg_compute_checksum_8al_1"; break;
217 case 2: fn = genericg_compute_checksum_8al_2;
218 nm = "genericg_compute_checksum_8al_2"; break;
219 case 3: fn = genericg_compute_checksum_8al_3;
220 nm = "genericg_compute_checksum_8al_3"; break;
221 case 4: fn = genericg_compute_checksum_8al_4;
222 nm = "genericg_compute_checksum_8al_4"; break;
223 case 5: fn = genericg_compute_checksum_8al_5;
224 nm = "genericg_compute_checksum_8al_5"; break;
225 case 6: fn = genericg_compute_checksum_8al_6;
226 nm = "genericg_compute_checksum_8al_6"; break;
227 case 7: fn = genericg_compute_checksum_8al_7;
228 nm = "genericg_compute_checksum_8al_7"; break;
229 case 8: fn = genericg_compute_checksum_8al_8;
230 nm = "genericg_compute_checksum_8al_8"; break;
231 case 9: fn = genericg_compute_checksum_8al_9;
232 nm = "genericg_compute_checksum_8al_9"; break;
233 case 10: fn = genericg_compute_checksum_8al_10;
234 nm = "genericg_compute_checksum_8al_10"; break;
235 case 11: fn = genericg_compute_checksum_8al_11;
236 nm = "genericg_compute_checksum_8al_11"; break;
237 case 12: fn = genericg_compute_checksum_8al_12;
238 nm = "genericg_compute_checksum_8al_12"; break;
239 default: break;
241 fn_spec = (VEX_REGPARM(1) HWord(*)(HWord)) fn;
242 nm_spec = nm;
243 } else {
244 const HChar* nm = NULL;
245 UInt VEX_REGPARM(1) (*fn)(HWord) = NULL;
246 switch (hWs_to_check) {
247 case 1: fn = genericg_compute_checksum_4al_1;
248 nm = "genericg_compute_checksum_4al_1"; break;
249 case 2: fn = genericg_compute_checksum_4al_2;
250 nm = "genericg_compute_checksum_4al_2"; break;
251 case 3: fn = genericg_compute_checksum_4al_3;
252 nm = "genericg_compute_checksum_4al_3"; break;
253 case 4: fn = genericg_compute_checksum_4al_4;
254 nm = "genericg_compute_checksum_4al_4"; break;
255 case 5: fn = genericg_compute_checksum_4al_5;
256 nm = "genericg_compute_checksum_4al_5"; break;
257 case 6: fn = genericg_compute_checksum_4al_6;
258 nm = "genericg_compute_checksum_4al_6"; break;
259 case 7: fn = genericg_compute_checksum_4al_7;
260 nm = "genericg_compute_checksum_4al_7"; break;
261 case 8: fn = genericg_compute_checksum_4al_8;
262 nm = "genericg_compute_checksum_4al_8"; break;
263 case 9: fn = genericg_compute_checksum_4al_9;
264 nm = "genericg_compute_checksum_4al_9"; break;
265 case 10: fn = genericg_compute_checksum_4al_10;
266 nm = "genericg_compute_checksum_4al_10"; break;
267 case 11: fn = genericg_compute_checksum_4al_11;
268 nm = "genericg_compute_checksum_4al_11"; break;
269 case 12: fn = genericg_compute_checksum_4al_12;
270 nm = "genericg_compute_checksum_4al_12"; break;
271 default: break;
273 fn_spec = (VEX_REGPARM(1) HWord(*)(HWord))fn;
274 nm_spec = nm;
277 expectedhW = fn_generic( first_hW, hWs_to_check );
278 /* If we got a specialised version, check it produces the same
279 result as the generic version! */
280 if (fn_spec) {
281 vassert(nm_spec);
282 vassert(expectedhW == fn_spec( first_hW ));
283 } else {
284 vassert(!nm_spec);
287 /* Set CMSTART and CMLEN. These will describe to the despatcher
288 the area of guest code to invalidate should we exit with a
289 self-check failure. */
290 tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type);
291 tilen_tmp = newIRTemp(irsb->tyenv, guest_word_type);
293 IRConst* base2check_IRConst
294 = guest_word_type==Ity_I32 ? IRConst_U32(toUInt(base2check))
295 : IRConst_U64(base2check);
296 IRConst* len2check_IRConst
297 = guest_word_type==Ity_I32 ? IRConst_U32(len2check)
298 : IRConst_U64(len2check);
300 IRStmt** stmt0 = &irsb->stmts[selfcheck_idx + i * 7 + 0];
301 IRStmt** stmt1 = &irsb->stmts[selfcheck_idx + i * 7 + 1];
302 IRStmt** stmt2 = &irsb->stmts[selfcheck_idx + i * 7 + 2];
303 IRStmt** stmt3 = &irsb->stmts[selfcheck_idx + i * 7 + 3];
304 IRStmt** stmt4 = &irsb->stmts[selfcheck_idx + i * 7 + 4];
305 IRStmt** stmt5 = &irsb->stmts[selfcheck_idx + i * 7 + 5];
306 IRStmt** stmt6 = &irsb->stmts[selfcheck_idx + i * 7 + 6];
307 vassert((*stmt0)->tag == Ist_NoOp);
308 vassert((*stmt1)->tag == Ist_NoOp);
309 vassert((*stmt2)->tag == Ist_NoOp);
310 vassert((*stmt3)->tag == Ist_NoOp);
311 vassert((*stmt4)->tag == Ist_NoOp);
312 vassert((*stmt5)->tag == Ist_NoOp);
313 vassert((*stmt6)->tag == Ist_NoOp);
315 *stmt0 = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(base2check_IRConst) );
316 *stmt1 = IRStmt_WrTmp(tilen_tmp, IRExpr_Const(len2check_IRConst) );
317 *stmt2 = IRStmt_Put( offB_GUEST_CMSTART, IRExpr_RdTmp(tistart_tmp) );
318 *stmt3 = IRStmt_Put( offB_GUEST_CMLEN, IRExpr_RdTmp(tilen_tmp) );
320 /* Generate the entry point descriptors */
321 if (abiinfo_both->host_ppc_calls_use_fndescrs) {
322 HWord* descr = (HWord*)fn_generic;
323 fn_generic_entry = descr[0];
324 if (fn_spec) {
325 descr = (HWord*)fn_spec;
326 fn_spec_entry = descr[0];
327 } else {
328 fn_spec_entry = (HWord)NULL;
330 } else {
331 fn_generic_entry = (HWord)fn_generic;
332 if (fn_spec) {
333 fn_spec_entry = (HWord)fn_spec;
334 } else {
335 fn_spec_entry = (HWord)NULL;
339 /* Generate the call to the relevant function, and the comparison of
340 the result against the expected value. */
341 IRExpr* callexpr = NULL;
342 if (fn_spec) {
343 callexpr = mkIRExprCCall(
344 host_word_type, 1/*regparms*/,
345 nm_spec, (void*)fn_spec_entry,
346 mkIRExprVec_1(
347 mkIRExpr_HWord( (HWord)first_hW )
350 } else {
351 callexpr = mkIRExprCCall(
352 host_word_type, 2/*regparms*/,
353 nm_generic, (void*)fn_generic_entry,
354 mkIRExprVec_2(
355 mkIRExpr_HWord( (HWord)first_hW ),
356 mkIRExpr_HWord( (HWord)hWs_to_check )
361 callresult_tmp = newIRTemp(irsb->tyenv, host_word_type);
362 *stmt4 = IRStmt_WrTmp(callresult_tmp, callexpr);
364 exitguard_tmp = newIRTemp(irsb->tyenv, Ity_I1);
365 *stmt5 = IRStmt_WrTmp(
366 exitguard_tmp,
367 IRExpr_Binop(
368 host_word_type==Ity_I64 ? Iop_CmpNE64 : Iop_CmpNE32,
369 IRExpr_RdTmp(callresult_tmp),
370 host_word_type==Ity_I64
371 ? IRExpr_Const(IRConst_U64(expectedhW))
372 : IRExpr_Const(IRConst_U32(expectedhW))));
374 *stmt6 = IRStmt_Exit(
375 IRExpr_RdTmp(exitguard_tmp),
376 Ijk_InvalICache,
377 /* Where we must restart if there's a failure: at the
378 first extent, regardless of which extent the failure
379 actually happened in. */
380 guest_IP_sbstart_IRConst,
381 offB_GUEST_IP
383 } /* for (i = 0; i < vge->n_used; i++) */
385 for (Int i = vge->n_used;
386 i < sizeof(vge->base) / sizeof(vge->base[0]); i++) {
387 IRStmt* stmt0 = irsb->stmts[selfcheck_idx + i * 7 + 0];
388 IRStmt* stmt1 = irsb->stmts[selfcheck_idx + i * 7 + 1];
389 IRStmt* stmt2 = irsb->stmts[selfcheck_idx + i * 7 + 2];
390 IRStmt* stmt3 = irsb->stmts[selfcheck_idx + i * 7 + 3];
391 IRStmt* stmt4 = irsb->stmts[selfcheck_idx + i * 7 + 4];
392 IRStmt* stmt5 = irsb->stmts[selfcheck_idx + i * 7 + 5];
393 IRStmt* stmt6 = irsb->stmts[selfcheck_idx + i * 7 + 6];
394 vassert(stmt0->tag == Ist_NoOp);
395 vassert(stmt1->tag == Ist_NoOp);
396 vassert(stmt2->tag == Ist_NoOp);
397 vassert(stmt3->tag == Ist_NoOp);
398 vassert(stmt4->tag == Ist_NoOp);
399 vassert(stmt5->tag == Ist_NoOp);
400 vassert(stmt6->tag == Ist_NoOp);
406 /*--------------------------------------------------------------*/
407 /*--- To do with guarding (conditionalisation) of IRStmts ---*/
408 /*--------------------------------------------------------------*/
410 // Is it possible to guard |e|? Meaning, is it safe (exception-free) to compute
411 // |e| and ignore the result? Since |e| is by definition otherwise
412 // side-effect-free, we don't have to ask about any other effects caused by
413 // first computing |e| and then ignoring the result.
414 static Bool expr_is_guardable ( const IRExpr* e )
416 switch (e->tag) {
417 case Iex_Load:
418 return False;
419 case Iex_Unop:
420 return !primopMightTrap(e->Iex.Unop.op);
421 case Iex_Binop:
422 return !primopMightTrap(e->Iex.Binop.op);
423 case Iex_Triop:
424 return !primopMightTrap(e->Iex.Triop.details->op);
425 case Iex_ITE:
426 case Iex_CCall:
427 case Iex_Get:
428 case Iex_Const:
429 case Iex_RdTmp:
430 return True;
431 default:
432 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
433 vpanic("expr_is_guardable: unhandled expr");
437 // Is it possible to guard |st|? Meaning, is it possible to replace |st| by
438 // some other sequence of IRStmts which have the same effect on the architected
439 // state when the guard is true, but when it is false, have no effect on the
440 // architected state and are guaranteed not to cause any exceptions?
442 // Note that this isn't as aggressive as it could be: it sometimes returns False
443 // in cases where |st| is actually guardable. This routine must coordinate
444 // closely with add_guarded_stmt_to_end_of below, in the sense that that routine
445 // must be able to handle any |st| for which this routine returns True.
446 static Bool stmt_is_guardable ( const IRStmt* st )
448 switch (st->tag) {
449 // These are easily guarded.
450 case Ist_NoOp:
451 case Ist_IMark:
452 case Ist_Put:
453 return True;
454 // These are definitely not guardable, or at least it's way too much
455 // hassle to do so.
456 case Ist_CAS:
457 case Ist_LLSC:
458 case Ist_MBE:
459 return False;
460 // These could be guarded, with some effort, if really needed, but
461 // currently aren't guardable.
462 case Ist_Store:
463 case Ist_StoreG:
464 case Ist_Exit:
465 return False;
466 // This is probably guardable, but it depends on the RHS of the
467 // assignment.
468 case Ist_WrTmp:
469 return expr_is_guardable(st->Ist.WrTmp.data);
470 default:
471 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
472 vpanic("stmt_is_guardable: unhandled stmt");
476 // Are all stmts (but not the end dst value) in |bb| guardable, per
477 // stmt_is_guardable?
478 static Bool block_is_guardable ( const IRSB* bb )
480 Int i = bb->stmts_used;
481 vassert(i >= 2); // Must have at least: IMark, side Exit (at the end)
482 i--;
483 vassert(bb->stmts[i]->tag == Ist_Exit);
484 i--;
485 for (; i >= 0; i--) {
486 if (!stmt_is_guardable(bb->stmts[i]))
487 return False;
489 return True;
492 // Guard |st| with |guard| and add it to |bb|. This must be able to handle any
493 // |st| for which stmt_is_guardable returns True.
494 static void add_guarded_stmt_to_end_of ( /*MOD*/IRSB* bb,
495 /*IN*/ IRStmt* st, IRTemp guard )
497 switch (st->tag) {
498 case Ist_NoOp:
499 case Ist_IMark:
500 case Ist_WrTmp:
501 addStmtToIRSB(bb, st);
502 break;
503 case Ist_Put: {
504 // Put(offs, e) ==> Put(offs, ITE(guard, e, Get(offs, sizeof(e))))
505 // Which when flattened out is:
506 // t1 = Get(offs, sizeof(e))
507 // t2 = ITE(guard, e, t2)
508 // Put(offs, t2)
509 Int offset = st->Ist.Put.offset;
510 IRExpr* e = st->Ist.Put.data;
511 IRType ty = typeOfIRExpr(bb->tyenv, e);
512 IRTemp t1 = newIRTemp(bb->tyenv, ty);
513 IRTemp t2 = newIRTemp(bb->tyenv, ty);
514 addStmtToIRSB(bb, IRStmt_WrTmp(t1, IRExpr_Get(offset, ty)));
515 addStmtToIRSB(bb, IRStmt_WrTmp(t2, IRExpr_ITE(IRExpr_RdTmp(guard),
516 e, IRExpr_RdTmp(t1))));
517 addStmtToIRSB(bb, IRStmt_Put(offset, IRExpr_RdTmp(t2)));
518 break;
520 case Ist_Exit: {
521 // Exit(xguard, dst, jk, offsIP)
522 // ==> t1 = And1(xguard, guard)
523 // Exit(t1, dst, jk, offsIP)
524 IRExpr* xguard = st->Ist.Exit.guard;
525 IRTemp t1 = newIRTemp(bb->tyenv, Ity_I1);
526 addStmtToIRSB(bb, IRStmt_WrTmp(t1, IRExpr_Binop(Iop_And1, xguard,
527 IRExpr_RdTmp(guard))));
528 addStmtToIRSB(bb, IRStmt_Exit(IRExpr_RdTmp(t1), st->Ist.Exit.jk,
529 st->Ist.Exit.dst, st->Ist.Exit.offsIP));
530 break;
532 default:
533 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
534 vpanic("add_guarded_stmt_to_end_of: unhandled stmt");
539 /*--------------------------------------------------------------*/
540 /*--- Analysis of block ends ---*/
541 /*--------------------------------------------------------------*/
543 typedef
544 enum {
545 Be_Other=1, // Block end isn't of interest to us
546 Be_Uncond, // Unconditional branch to known destination, unassisted
547 Be_Cond // Conditional branch to known destinations, unassisted
549 BlockEndTag;
551 typedef
552 struct {
553 BlockEndTag tag;
554 union {
555 struct {
556 } Other;
557 struct {
558 Long delta;
559 } Uncond;
560 struct {
561 IRTemp condSX;
562 Long deltaSX;
563 Long deltaFT;
564 } Cond;
565 } Be;
567 BlockEnd;
569 static void ppBlockEnd ( const BlockEnd* be )
571 switch (be->tag) {
572 case Be_Other:
573 vex_printf("Other");
574 break;
575 case Be_Uncond:
576 vex_printf("Uncond{delta=%lld}", be->Be.Uncond.delta);
577 break;
578 case Be_Cond:
579 vex_printf("Cond{condSX=");
580 ppIRTemp(be->Be.Cond.condSX);
581 vex_printf(", deltaSX=%lld, deltaFT=%lld}",
582 be->Be.Cond.deltaSX, be->Be.Cond.deltaFT);
583 break;
584 default:
585 vassert(0);
589 // Return True if |be| definitely does not jump to |delta|. In case of
590 // doubt, returns False.
591 static Bool definitely_does_not_jump_to_delta ( const BlockEnd* be, Long delta )
593 switch (be->tag) {
594 case Be_Other:
595 return False;
596 case Be_Uncond:
597 return be->Be.Uncond.delta != delta;
598 case Be_Cond:
599 return be->Be.Cond.deltaSX != delta && be->Be.Cond.deltaFT != delta;
600 default:
601 vassert(0);
605 static Addr irconst_to_Addr ( const IRConst* con, const IRType guest_word_type )
607 switch (con->tag) {
608 case Ico_U32:
609 vassert(guest_word_type == Ity_I32);
610 return con->Ico.U32;
611 case Ico_U64:
612 vassert(guest_word_type == Ity_I64);
613 return con->Ico.U64;
614 default:
615 vassert(0);
619 static Bool irconst_to_maybe_delta ( /*OUT*/Long* delta,
620 const IRConst* known_dst,
621 const Addr guest_IP_sbstart,
622 const IRType guest_word_type,
623 Bool (*chase_into_ok)(void*,Addr),
624 void* callback_opaque )
626 vassert(typeOfIRConst(known_dst) == guest_word_type);
628 *delta = 0;
630 // Extract the destination guest address.
631 Addr dst_ga = irconst_to_Addr(known_dst, guest_word_type);
633 // Check we're allowed to chase into it.
634 if (!chase_into_ok(callback_opaque, dst_ga))
635 return False;
637 Addr delta_as_Addr = dst_ga - guest_IP_sbstart;
638 // Either |delta_as_Addr| is a 64-bit value, in which case copy it directly
639 // to |delta|, or it's a 32 bit value, in which case sign extend it.
640 *delta = sizeof(Addr) == 8 ? (Long)delta_as_Addr : (Long)(Int)delta_as_Addr;
641 return True;
644 static Bool any_overlap ( Int start1, Int len1, Int start2, Int len2 )
646 vassert(len1 > 0 && len2 > 0);
647 vassert(start1 >= 0 && start2 >= 0);
648 if (start1 + len1 <= start2) return False;
649 if (start2 + len2 <= start1) return False;
650 return True;
653 /* Scan |stmts|, starting at |scan_start| and working backwards, to detect the
654 case where there are no IRStmt_Exits before we find the IMark. In other
655 words, it scans backwards through some prefix of an instruction's IR to see
656 if there is an exit there.
658 It also checks for explicit PUTs to the PC, via Ist_Put, Ist_PutI or
659 Ist_Dirty. I suspect this is ridiculous overkill, but is here for safety. */
660 static Bool insn_has_no_other_exits_or_PUTs_to_PC (
661 IRStmt** const stmts, Int scan_start,
662 Int offB_GUEST_IP, Int szB_GUEST_IP,
663 const IRTypeEnv* tyenv
666 Bool found_exit = False;
667 Bool found_PUT_to_PC = False;
668 Int i = scan_start;
669 while (True) {
670 if (i < 0)
671 break;
672 const IRStmt* st = stmts[i];
673 if (st->tag == Ist_IMark) {
674 // We're back at the start of the insn. Stop searching.
675 break;
677 if (st->tag == Ist_Exit) {
678 found_exit = True;
679 break;
681 if (st->tag == Ist_Put) {
682 Int offB = st->Ist.Put.offset;
683 Int szB = sizeofIRType(typeOfIRExpr(tyenv, st->Ist.Put.data));
684 if (any_overlap(offB, szB, offB_GUEST_IP, szB_GUEST_IP)) {
685 found_PUT_to_PC = True;
686 break;
689 if (st->tag == Ist_PutI) {
690 const IRPutI* details = st->Ist.PutI.details;
691 const IRRegArray* descr = details->descr;
692 Int offB = descr->base;
693 Int szB = descr->nElems * sizeofIRType(descr->elemTy);
694 if (any_overlap(offB, szB, offB_GUEST_IP, szB_GUEST_IP)) {
695 found_PUT_to_PC = True;
696 break;
699 if (st->tag == Ist_Dirty) {
700 vassert(!found_PUT_to_PC);
701 const IRDirty* details = st->Ist.Dirty.details;
702 for (Int j = 0; j < details->nFxState; j++) {
703 const IREffect fx = details->fxState[j].fx;
704 const Int offset = details->fxState[j].offset;
705 const Int size = details->fxState[j].size;
706 const Int nRepeats = details->fxState[j].nRepeats;
707 const Int repeatLen = details->fxState[j].repeatLen;
708 if (fx == Ifx_Write || fx == Ifx_Modify) {
709 for (Int k = 0; k < nRepeats; k++) {
710 Int offB = offset + k * repeatLen;
711 Int szB = size;
712 if (any_overlap(offB, szB, offB_GUEST_IP, szB_GUEST_IP)) {
713 found_PUT_to_PC = True;
718 if (found_PUT_to_PC) {
719 break;
722 i--;
724 // We expect IR for all instructions to start with an IMark.
725 vassert(i >= 0);
726 return !found_exit && !found_PUT_to_PC;
729 static void analyse_block_end ( /*OUT*/BlockEnd* be, const IRSB* irsb,
730 const Addr guest_IP_sbstart,
731 const IRType guest_word_type,
732 Bool (*chase_into_ok)(void*,Addr),
733 void* callback_opaque,
734 Int offB_GUEST_IP,
735 Int szB_GUEST_IP,
736 Bool debug_print )
738 vex_bzero(be, sizeof(*be));
740 // -- Conditional branch to known destination
741 /* In short, detect the following end form:
742 ------ IMark(0x4002009, 2, 0) ------
743 // Zero or more non-exit statements
744 if (t14) { PUT(184) = 0x4002040:I64; exit-Boring }
745 PUT(184) = 0x400200B:I64; exit-Boring
746 Checks:
747 - Both transfers are 'boring'
748 - Both dsts are constants
749 - The cond is non-constant (an IRExpr_Tmp)
750 - There are no other exits in this instruction
751 - The client allows chasing into both destinations
753 if (irsb->jumpkind == Ijk_Boring && irsb->stmts_used >= 2) {
754 const IRStmt* maybe_exit = irsb->stmts[irsb->stmts_used - 1];
755 if (maybe_exit->tag == Ist_Exit
756 && maybe_exit->Ist.Exit.guard->tag == Iex_RdTmp
757 && maybe_exit->Ist.Exit.jk == Ijk_Boring
758 && irsb->next->tag == Iex_Const
759 && insn_has_no_other_exits_or_PUTs_to_PC(
760 irsb->stmts, irsb->stmts_used - 2,
761 offB_GUEST_IP, szB_GUEST_IP, irsb->tyenv)) {
762 vassert(maybe_exit->Ist.Exit.offsIP == irsb->offsIP);
763 IRConst* dst_SX = maybe_exit->Ist.Exit.dst;
764 IRConst* dst_FT = irsb->next->Iex.Const.con;
765 IRTemp cond_SX = maybe_exit->Ist.Exit.guard->Iex.RdTmp.tmp;
766 Long delta_SX = 0;
767 Long delta_FT = 0;
768 Bool ok_SX
769 = irconst_to_maybe_delta(&delta_SX, dst_SX,
770 guest_IP_sbstart, guest_word_type,
771 chase_into_ok, callback_opaque);
772 Bool ok_FT
773 = irconst_to_maybe_delta(&delta_FT, dst_FT,
774 guest_IP_sbstart, guest_word_type,
775 chase_into_ok, callback_opaque);
776 if (ok_SX && ok_FT) {
777 be->tag = Be_Cond;
778 be->Be.Cond.condSX = cond_SX;
779 be->Be.Cond.deltaSX = delta_SX;
780 be->Be.Cond.deltaFT = delta_FT;
781 goto out;
786 // -- Unconditional branch/call to known destination
787 /* Four checks:
788 - The transfer is 'boring' or 'call', so that no assistance is needed
789 - The dst is a constant (known at jit time)
790 - There are no other exits in this instruction. In other words, the
791 transfer is unconditional.
792 - The client allows chasing into the destination.
794 if ((irsb->jumpkind == Ijk_Boring || irsb->jumpkind == Ijk_Call)
795 && irsb->next->tag == Iex_Const) {
796 if (insn_has_no_other_exits_or_PUTs_to_PC(
797 irsb->stmts, irsb->stmts_used - 1,
798 offB_GUEST_IP, szB_GUEST_IP, irsb->tyenv)) {
799 // We've got the right pattern. Check whether we can chase into the
800 // destination, and if so convert that to a delta value.
801 const IRConst* known_dst = irsb->next->Iex.Const.con;
802 Long delta = 0;
803 // This call also checks the type of the dst addr, and that the client
804 // allows chasing into it.
805 Bool ok = irconst_to_maybe_delta(&delta, known_dst,
806 guest_IP_sbstart, guest_word_type,
807 chase_into_ok, callback_opaque);
808 if (ok) {
809 be->tag = Be_Uncond;
810 be->Be.Uncond.delta = delta;
811 goto out;
816 // Not identified as anything of interest to us.
817 be->tag = Be_Other;
819 out:
820 if (debug_print) {
821 vex_printf("\nBlockEnd: ");
822 ppBlockEnd(be);
823 vex_printf("\n");
828 /*--------------------------------------------------------------*/
829 /*--- Disassembly of basic (not super) blocks ---*/
830 /*--------------------------------------------------------------*/
832 /* Disassemble instructions, starting at |&guest_code[delta_IN]|, into |irbb|,
833 and terminate the block properly. At most |n_instrs_allowed_IN| may be
834 disassembled, and this function may choose to disassemble fewer.
836 Also do minimal simplifications on the resulting block, so as to convert the
837 end of the block into something that |analyse_block_end| can reliably
838 recognise.
840 |irbb| will both be modified, and replaced by a new, simplified version,
841 which is returned.
843 static IRSB* disassemble_basic_block_till_stop(
844 /*OUT*/ Int* n_instrs, // #instrs actually used
845 /*OUT*/ Bool* is_verbose_seen, // did we get a 'verbose' hint?
846 /*OUT*/ Addr* extent_base, // VexGuestExtents[..].base
847 /*OUT*/ UShort* extent_len, // VexGuestExtents[..].len
848 /*MOD*/ IRSB* irbb,
849 const Long delta_IN,
850 const Int n_instrs_allowed_IN,
851 const Addr guest_IP_sbstart,
852 const VexEndness host_endness,
853 const Bool sigill_diag,
854 const VexArch arch_guest,
855 const VexArchInfo* archinfo_guest,
856 const VexAbiInfo* abiinfo_both,
857 const IRType guest_word_type,
858 const Bool debug_print,
859 const DisOneInstrFn dis_instr_fn,
860 const UChar* guest_code,
861 const Int offB_GUEST_IP
864 /* This is the max instrs we allow in the block. It starts off at
865 |n_instrs_allowed_IN| but we may choose to reduce it in the case where the
866 instruction disassembler returns an 'is verbose' hint. This is so as to
867 ensure that the JIT doesn't run out of space. See bug 375839 for a
868 motivating example. */
870 /* Process instructions. */
871 Long delta = delta_IN;
872 Int n_instrs_allowed = n_instrs_allowed_IN;
874 *n_instrs = 0;
875 *is_verbose_seen = False;
876 *extent_base = guest_IP_sbstart + delta;
877 *extent_len = 0;
879 while (True) {
880 vassert(*n_instrs < n_instrs_allowed);
882 /* This is the IP of the instruction we're just about to deal
883 with. */
884 Addr guest_IP_curr_instr = guest_IP_sbstart + delta;
886 /* This is the irbb statement array index of the first stmt in
887 this insn. That will always be the instruction-mark
888 descriptor. */
889 Int first_stmt_idx = irbb->stmts_used;
891 /* Add an instruction-mark statement. We won't know until after
892 disassembling the instruction how long it instruction is, so
893 just put in a zero length and we'll fix it up later.
895 On ARM, the least significant bit of the instr address
896 distinguishes ARM vs Thumb instructions. All instructions
897 actually start on at least 2-aligned addresses. So we need
898 to ignore the bottom bit of the insn address when forming the
899 IMark's address field, but put that bottom bit in the delta
900 field, so that comparisons against guest_R15T for Thumb can
901 be done correctly. By inspecting the delta field,
902 instruction processors can determine whether the instruction
903 was originally Thumb or ARM. For more details of this
904 convention, see comments on definition of guest_R15T in
905 libvex_guest_arm.h. */
906 if (arch_guest == VexArchARM && (guest_IP_curr_instr & 1)) {
907 /* Thumb insn => mask out the T bit, but put it in delta */
908 addStmtToIRSB( irbb,
909 IRStmt_IMark(guest_IP_curr_instr & ~(Addr)1,
910 0, /* len */
911 1 /* delta */
914 } else {
915 /* All other targets: store IP as-is, and set delta to zero. */
916 addStmtToIRSB( irbb,
917 IRStmt_IMark(guest_IP_curr_instr,
918 0, /* len */
919 0 /* delta */
924 if (debug_print && *n_instrs > 0)
925 vex_printf("\n");
927 /* Finally, actually disassemble an instruction. */
928 vassert(irbb->next == NULL);
929 DisResult dres
930 = dis_instr_fn ( irbb, guest_code, delta, guest_IP_curr_instr,
931 arch_guest, archinfo_guest, abiinfo_both,
932 host_endness, sigill_diag );
934 /* stay sane ... */
935 vassert(dres.whatNext == Dis_StopHere || dres.whatNext == Dis_Continue);
936 /* ... disassembled insn length is sane ... */
937 vassert(dres.len >= 0 && dres.len <= 24);
939 /* If the disassembly function passed us a hint, take note of it. */
940 if (LIKELY(dres.hint == Dis_HintNone)) {
941 /* Do nothing */
942 } else {
943 vassert(dres.hint == Dis_HintVerbose);
944 /* The current insn is known to be verbose. Lower the max insns limit
945 if necessary so as to avoid running the JIT out of space in the
946 event that we've encountered the start of a long sequence of them.
947 This is expected to be a very rare event. In any case the remaining
948 limit (in the default setting, 30 insns) is still so high that most
949 blocks will terminate anyway before then. So this is very unlikely
950 to give a perf hit in practice. See bug 375839 for the motivating
951 example. */
952 if (!(*is_verbose_seen)) {
953 *is_verbose_seen = True;
954 // Halve the number of allowed insns, but only above 2
955 if (n_instrs_allowed > 2) {
956 n_instrs_allowed = ((n_instrs_allowed - 2) / 2) + 2;
957 //vassert(*n_instrs <= n_instrs_allowed);
962 /* Fill in the insn-mark length field. */
963 vassert(first_stmt_idx >= 0 && first_stmt_idx < irbb->stmts_used);
964 IRStmt* imark = irbb->stmts[first_stmt_idx];
965 vassert(imark);
966 vassert(imark->tag == Ist_IMark);
967 vassert(imark->Ist.IMark.len == 0);
968 imark->Ist.IMark.len = dres.len;
970 /* Print the resulting IR, if needed. */
971 if (vex_traceflags & VEX_TRACE_FE) {
972 for (Int i = first_stmt_idx; i < irbb->stmts_used; i++) {
973 vex_printf(" ");
974 ppIRStmt(irbb->stmts[i]);
975 vex_printf("\n");
979 /* Individual insn disassembly may not mess with irbb->next.
980 This function is the only place where it can be set. */
981 vassert(irbb->next == NULL);
982 vassert(irbb->jumpkind == Ijk_Boring);
983 vassert(irbb->offsIP == 0);
985 /* Individual insn disassembly must finish the IR for each
986 instruction with an assignment to the guest PC. */
987 vassert(first_stmt_idx < irbb->stmts_used);
988 /* it follows that irbb->stmts_used must be > 0 */
989 { IRStmt* st = irbb->stmts[irbb->stmts_used-1];
990 vassert(st);
991 vassert(st->tag == Ist_Put);
992 vassert(st->Ist.Put.offset == offB_GUEST_IP);
993 /* Really we should also check that the type of the Put'd data
994 == guest_word_type, but that's a bit expensive. */
997 /* Update the extents entry that we are constructing. */
998 /* If vex_control.guest_max_insns is required to be < 100 and
999 each insn is at max 20 bytes long, this limit of 5000 then
1000 seems reasonable since the max possible extent length will be
1001 100 * 20 == 2000. */
1002 vassert(*extent_len < 5000);
1003 (*extent_len) += dres.len;
1004 (*n_instrs)++;
1006 /* Advance delta (inconspicuous but very important :-) */
1007 delta += (Long)dres.len;
1009 Bool stopNow = False;
1010 switch (dres.whatNext) {
1011 case Dis_Continue:
1012 vassert(dres.jk_StopHere == Ijk_INVALID);
1013 if (*n_instrs >= n_instrs_allowed) {
1014 /* We have to stop. See comment above re irbb field
1015 settings here. */
1016 irbb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
1017 /* irbb->jumpkind must already by Ijk_Boring */
1018 irbb->offsIP = offB_GUEST_IP;
1019 stopNow = True;
1021 break;
1022 case Dis_StopHere:
1023 vassert(dres.jk_StopHere != Ijk_INVALID);
1024 /* See comment above re irbb field settings here. */
1025 irbb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
1026 irbb->jumpkind = dres.jk_StopHere;
1027 irbb->offsIP = offB_GUEST_IP;
1028 stopNow = True;
1029 break;
1030 default:
1031 vpanic("bb_to_IR");
1034 if (stopNow)
1035 break;
1036 } /* while (True) */
1038 /* irbb->next must now be set, since we've finished the block.
1039 Print it if necessary.*/
1040 vassert(irbb->next != NULL);
1041 if (debug_print) {
1042 vex_printf(" ");
1043 vex_printf( "PUT(%d) = ", irbb->offsIP);
1044 ppIRExpr( irbb->next );
1045 vex_printf( "; exit-");
1046 ppIRJumpKind(irbb->jumpkind);
1047 vex_printf( "\n");
1048 vex_printf( "\n");
1051 /* And clean it up. */
1052 irbb = do_minimal_initial_iropt_BB ( irbb );
1053 if (debug_print) {
1054 ppIRSB(irbb);
1057 return irbb;
1061 /*--------------------------------------------------------------*/
1062 /*--- Disassembly of traces: helper functions ---*/
1063 /*--------------------------------------------------------------*/
1065 // Swap the side exit and fall through exit for |bb|. Update |be| so as to be
1066 // consistent.
1067 static void swap_sx_and_ft ( /*MOD*/IRSB* bb, /*MOD*/BlockEnd* be )
1069 vassert(be->tag == Be_Cond);
1070 vassert(bb->stmts_used >= 2); // Must have at least: IMark, Exit
1071 IRStmt* exit = bb->stmts[bb->stmts_used - 1];
1072 vassert(exit->tag == Ist_Exit);
1073 vassert(exit->Ist.Exit.guard->tag == Iex_RdTmp);
1074 vassert(exit->Ist.Exit.guard->Iex.RdTmp.tmp == be->Be.Cond.condSX);
1075 vassert(bb->next->tag == Iex_Const);
1076 vassert(bb->jumpkind == Ijk_Boring);
1077 // We need to insert a new stmt, just before the exit, that computes 'Not1'
1078 // of the guard condition. Replace |bb->stmts[bb->stmts_used - 1]| by the
1079 // new stmt, and then place |exit| immediately after it.
1080 IRTemp invertedGuard = newIRTemp(bb->tyenv, Ity_I1);
1081 bb->stmts[bb->stmts_used - 1]
1082 = IRStmt_WrTmp(invertedGuard,
1083 IRExpr_Unop(Iop_Not1, IRExpr_RdTmp(exit->Ist.Exit.guard
1084 ->Iex.RdTmp.tmp)));
1085 exit->Ist.Exit.guard->Iex.RdTmp.tmp = invertedGuard;
1086 addStmtToIRSB(bb, exit);
1088 // Swap the actual destination constants.
1089 { IRConst* tmp = exit->Ist.Exit.dst;
1090 exit->Ist.Exit.dst = bb->next->Iex.Const.con;
1091 bb->next->Iex.Const.con = tmp;
1094 // And update |be|.
1095 { be->Be.Cond.condSX = invertedGuard;
1096 Long tmp = be->Be.Cond.deltaSX;
1097 be->Be.Cond.deltaSX = be->Be.Cond.deltaFT;
1098 be->Be.Cond.deltaFT = tmp;
1103 static void update_instr_budget( /*MOD*/Int* instrs_avail,
1104 /*MOD*/Bool* verbose_mode,
1105 const Int bb_instrs_used,
1106 const Bool bb_verbose_seen )
1108 if (0)
1109 vex_printf("UIB: verbose_mode %d, instrs_avail %d, "
1110 "bb_instrs_used %d, bb_verbose_seen %d\n",
1111 *verbose_mode ? 1 : 0, *instrs_avail,
1112 bb_instrs_used, bb_verbose_seen ? 1 : 0);
1114 vassert(bb_instrs_used <= *instrs_avail);
1116 if (bb_verbose_seen && !(*verbose_mode)) {
1117 *verbose_mode = True;
1118 // Adjust *instrs_avail so that, when it becomes zero, we haven't used
1119 // more than 50% of vex_control.guest_max_instrs.
1120 if (bb_instrs_used > vex_control.guest_max_insns / 2) {
1121 *instrs_avail = 0;
1122 } else {
1123 *instrs_avail = vex_control.guest_max_insns / 2;
1125 vassert(*instrs_avail >= 0);
1128 // Subtract bb_instrs_used from *instrs_avail, clamping at 0 if necessary.
1129 if (bb_instrs_used > *instrs_avail) {
1130 *instrs_avail = 0;
1131 } else {
1132 *instrs_avail -= bb_instrs_used;
1135 vassert(*instrs_avail >= 0);
1138 // Add the extent [base, +len) to |vge|. Asserts if |vge| is already full.
1139 // As an optimisation only, tries to also merge the new extent with the
1140 // previous one, if possible.
1141 static void add_extent ( /*MOD*/VexGuestExtents* vge, Addr base, UShort len )
1143 const UInt limit = sizeof(vge->base) / sizeof(vge->base[0]);
1144 vassert(limit == 3);
1145 const UInt i = vge->n_used;
1146 vassert(i < limit);
1147 vge->n_used++;
1148 vge->base[i] = base;
1149 vge->len[i] = len;
1150 // Try to merge with the previous extent
1151 if (i > 0
1152 && (((UInt)vge->len[i-1]) + ((UInt)len))
1153 < 200*25 /* say, 200 insns of size 25 bytes, absolute worst case */
1154 && vge->base[i-1] + vge->len[i-1] == base) {
1155 vge->len[i-1] += len;
1156 vge->n_used--;
1157 //vex_printf("MERGE\n");
1162 /*--------------------------------------------------------------*/
1163 /*--- Disassembly of traces: main function ---*/
1164 /*--------------------------------------------------------------*/
1166 /* Disassemble a complete basic block, starting at guest_IP_start,
1167 returning a new IRSB. The disassembler may chase across basic
1168 block boundaries if it wishes and if chase_into_ok allows it.
1169 The precise guest address ranges from which code has been taken
1170 are written into vge. guest_IP_sbstart is taken to be the IP in
1171 the guest's address space corresponding to the instruction at
1172 &guest_code[0].
1174 dis_instr_fn is the arch-specific fn to disassemble on function; it
1175 is this that does the real work.
1177 needs_self_check is a callback used to ask the caller which of the
1178 extents, if any, a self check is required for. The returned value
1179 is a bitmask with a 1 in position i indicating that the i'th extent
1180 needs a check. Since there can be at most 3 extents, the returned
1181 values must be between 0 and 7.
1183 The number of extents which did get a self check (0 to 3) is put in
1184 n_sc_extents. The caller already knows this because it told us
1185 which extents to add checks for, via the needs_self_check callback,
1186 but we ship the number back out here for the caller's convenience.
1188 preamble_function is a callback which allows the caller to add
1189 its own IR preamble (following the self-check, if any). May be
1190 NULL. If non-NULL, the IRSB under construction is handed to
1191 this function, which presumably adds IR statements to it. The
1192 callback may optionally complete the block and direct bb_to_IR
1193 not to disassemble any instructions into it; this is indicated
1194 by the callback returning True.
1196 offB_CMADDR and offB_CMLEN are the offsets of guest_CMADDR and
1197 guest_CMLEN. Since this routine has to work for any guest state,
1198 without knowing what it is, those offsets have to passed in.
1200 callback_opaque is a caller-supplied pointer to data which the
1201 callbacks may want to see. Vex has no idea what it is.
1202 (In fact it's a VgInstrumentClosure.)
1205 /* Regarding IP updating. dis_instr_fn (that does the guest specific
1206 work of disassembling an individual instruction) must finish the
1207 resulting IR with "PUT(guest_IP) = ". Hence in all cases it must
1208 state the next instruction address.
1210 If the block is to be ended at that point, then this routine
1211 (bb_to_IR) will set up the next/jumpkind/offsIP fields so as to
1212 make a transfer (of the right kind) to "GET(guest_IP)". Hence if
1213 dis_instr_fn generates incorrect IP updates we will see it
1214 immediately (due to jumping to the wrong next guest address).
1216 However it is also necessary to set this up so it can be optimised
1217 nicely. The IRSB exit is defined to update the guest IP, so that
1218 chaining works -- since the chain_me stubs expect the chain-to
1219 address to be in the guest state. Hence what the IRSB next fields
1220 will contain initially is (implicitly)
1222 PUT(guest_IP) [implicitly] = GET(guest_IP) [explicit expr on ::next]
1224 which looks pretty strange at first. Eg so unconditional branch
1225 to some address 0x123456 looks like this:
1227 PUT(guest_IP) = 0x123456; // dis_instr_fn generates this
1228 // the exit
1229 PUT(guest_IP) [implicitly] = GET(guest_IP); exit-Boring
1231 after redundant-GET and -PUT removal by iropt, we get what we want:
1233 // the exit
1234 PUT(guest_IP) [implicitly] = 0x123456; exit-Boring
1236 This makes the IRSB-end case the same as the side-exit case: update
1237 IP, then transfer. There is no redundancy of representation for
1238 the destination, and we use the destination specified by
1239 dis_instr_fn, so any errors it makes show up sooner.
1241 IRSB* bb_to_IR (
1242 /*OUT*/VexGuestExtents* vge,
1243 /*OUT*/UInt* n_sc_extents,
1244 /*OUT*/UInt* n_guest_instrs, /* stats only */
1245 /*OUT*/UShort* n_uncond_in_trace, /* stats only */
1246 /*OUT*/UShort* n_cond_in_trace, /* stats only */
1247 /*MOD*/VexRegisterUpdates* pxControl,
1248 /*IN*/ void* callback_opaque,
1249 /*IN*/ DisOneInstrFn dis_instr_fn,
1250 /*IN*/ const UChar* guest_code,
1251 /*IN*/ Addr guest_IP_sbstart,
1252 /*IN*/ Bool (*chase_into_ok)(void*,Addr),
1253 /*IN*/ VexEndness host_endness,
1254 /*IN*/ Bool sigill_diag,
1255 /*IN*/ VexArch arch_guest,
1256 /*IN*/ const VexArchInfo* archinfo_guest,
1257 /*IN*/ const VexAbiInfo* abiinfo_both,
1258 /*IN*/ IRType guest_word_type,
1259 /*IN*/ UInt (*needs_self_check)
1260 (void*, /*MB_MOD*/VexRegisterUpdates*,
1261 const VexGuestExtents*),
1262 /*IN*/ Bool (*preamble_function)(void*,IRSB*),
1263 /*IN*/ Int offB_GUEST_CMSTART,
1264 /*IN*/ Int offB_GUEST_CMLEN,
1265 /*IN*/ Int offB_GUEST_IP,
1266 /*IN*/ Int szB_GUEST_IP
1269 Bool debug_print = toBool(vex_traceflags & VEX_TRACE_FE);
1271 /* check sanity .. */
1272 vassert(sizeof(HWord) == sizeof(void*));
1273 vassert(vex_control.guest_max_insns >= 1);
1274 vassert(vex_control.guest_max_insns <= 100);
1275 vassert(vex_control.guest_chase == False || vex_control.guest_chase == True);
1276 vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64);
1278 if (guest_word_type == Ity_I32) {
1279 vassert(szB_GUEST_IP == 4);
1280 vassert((offB_GUEST_IP % 4) == 0);
1281 } else {
1282 vassert(szB_GUEST_IP == 8);
1283 vassert((offB_GUEST_IP % 8) == 0);
1286 /* Initialise all return-by-ref state. */
1287 vge->n_used = 0;
1288 *n_sc_extents = 0;
1289 *n_guest_instrs = 0;
1290 *n_uncond_in_trace = 0;
1291 *n_cond_in_trace = 0;
1293 /* And a new IR superblock to dump the result into. */
1294 IRSB* irsb = emptyIRSB();
1296 /* Leave 21 spaces in which to put the check statements for a self
1297 checking translation (up to 3 extents, and 7 stmts required for
1298 each). We won't know until later the extents and checksums of
1299 the areas, if any, that need to be checked. */
1300 IRStmt* nop = IRStmt_NoOp();
1301 Int selfcheck_idx = irsb->stmts_used;
1302 for (Int i = 0; i < 3 * 7; i++)
1303 addStmtToIRSB( irsb, nop );
1305 /* If the caller supplied a function to add its own preamble, use
1306 it now. */
1307 if (preamble_function) {
1308 Bool stopNow = preamble_function( callback_opaque, irsb );
1309 if (stopNow) {
1310 /* The callback has completed the IR block without any guest
1311 insns being disassembled into it, so just return it at
1312 this point, even if a self-check was requested - as there
1313 is nothing to self-check. The 21 self-check no-ops will
1314 still be in place, but they are harmless. */
1315 vge->n_used = 1;
1316 vge->base[0] = guest_IP_sbstart;
1317 vge->len[0] = 0;
1318 return irsb;
1322 /* Running state:
1323 irsb the SB we are incrementally constructing
1324 vge associated extents for irsb
1325 instrs_used instrs incorporated in irsb so far
1326 instrs_avail number of instrs we have space for
1327 verbose_mode did we see an 'is verbose' hint at some point?
1329 Int instrs_used = 0;
1330 Int instrs_avail = vex_control.guest_max_insns;
1331 Bool verbose_mode = False;
1333 /* Disassemble the initial block until we have to stop. */
1335 Int ib_instrs_used = 0;
1336 Bool ib_verbose_seen = False;
1337 Addr ib_base = 0;
1338 UShort ib_len = 0;
1339 irsb = disassemble_basic_block_till_stop(
1340 /*OUT*/ &ib_instrs_used, &ib_verbose_seen, &ib_base, &ib_len,
1341 /*MOD*/ irsb,
1342 /*IN*/ 0/*delta for the first block in the trace*/,
1343 instrs_avail, guest_IP_sbstart, host_endness, sigill_diag,
1344 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1345 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1347 vassert(ib_instrs_used <= instrs_avail);
1349 // Update instrs_used, extents, budget.
1350 instrs_used += ib_instrs_used;
1351 add_extent(vge, ib_base, ib_len);
1352 update_instr_budget(&instrs_avail, &verbose_mode,
1353 ib_instrs_used, ib_verbose_seen);
1356 /* Now, see if we can extend the initial block. */
1357 while (True) {
1358 const Int n_extent_slots = sizeof(vge->base) / sizeof(vge->base[0]);
1359 vassert(n_extent_slots == 3);
1361 // Reasons to give up immediately:
1362 // User or tool asked us not to chase
1363 if (!vex_control.guest_chase)
1364 break;
1366 // Out of extent slots
1367 vassert(vge->n_used <= n_extent_slots);
1368 if (vge->n_used == n_extent_slots)
1369 break;
1371 // Almost out of available instructions
1372 vassert(instrs_avail >= 0);
1373 if (instrs_avail < 3)
1374 break;
1376 // Try for an extend. What kind we do depends on how the current trace
1377 // ends.
1378 /* Regarding the use of |sigill_diag| in the extension logic below. This
1379 is a Bool which controls whether or not the individual insn
1380 disassemblers print an error message in the case where they don't
1381 recognise an instruction. Generally speaking this is set to True, but
1382 VEX's client can set it to False if it wants.
1384 Now that we are speculatively chasing both arms of a conditional
1385 branch, this can lead to the following problem: one of those arms
1386 contains an undecodable instruction. That insn is not reached at run
1387 time, because the branch itself tests some CPU hwcaps info (or
1388 whatever) and execution goes down the other path. However, it has the
1389 bad side effect that the speculative disassembly will nevertheless
1390 produce an error message when |sigill_diag| is True.
1392 To avoid this, in calls to |disassemble_basic_block_till_stop| for
1393 speculative code, we pass False instead of |sigill_diag|. Note that
1394 any (unconditional-chase) call to |disassemble_basic_block_till_stop|
1395 that happens after a conditional chase that results in recovery of an
1396 &&-idiom, is still really non-speculative, because the &&-idiom
1397 translation can only happen when both paths lead to the same
1398 continuation point. The result is that we know that the initial BB,
1399 and BBs recovered via chasing an unconditional branch, are sure to be
1400 executed, even if that unconditional branch follows a conditional
1401 branch which got folded into an &&-idiom. So we don't need to change
1402 the |sigill_diag| value used for them. It's only for the
1403 conditional-branch SX and FT disassembly that it must be set to
1404 |False|.
1406 BlockEnd irsb_be;
1407 analyse_block_end(&irsb_be, irsb, guest_IP_sbstart, guest_word_type,
1408 chase_into_ok, callback_opaque,
1409 offB_GUEST_IP, szB_GUEST_IP, debug_print);
1411 // Try for an extend based on an unconditional branch or call to a known
1412 // destination.
1413 if (irsb_be.tag == Be_Uncond) {
1414 if (debug_print) {
1415 vex_printf("\n-+-+ Unconditional follow (ext# %d) to 0x%llx "
1416 "-+-+\n\n",
1417 (Int)vge->n_used,
1418 (ULong)((Long)guest_IP_sbstart+ irsb_be.Be.Uncond.delta));
1420 Int bb_instrs_used = 0;
1421 Bool bb_verbose_seen = False;
1422 Addr bb_base = 0;
1423 UShort bb_len = 0;
1424 IRSB* bb
1425 = disassemble_basic_block_till_stop(
1426 /*OUT*/ &bb_instrs_used, &bb_verbose_seen, &bb_base, &bb_len,
1427 /*MOD*/ emptyIRSB(),
1428 /*IN*/ irsb_be.Be.Uncond.delta,
1429 instrs_avail, guest_IP_sbstart, host_endness, sigill_diag,
1430 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1431 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1433 vassert(bb_instrs_used <= instrs_avail);
1435 /* Now we have to append 'bb' to 'irsb'. */
1436 concatenate_irsbs(irsb, bb);
1438 // Update instrs_used, extents, budget.
1439 instrs_used += bb_instrs_used;
1440 add_extent(vge, bb_base, bb_len);
1441 update_instr_budget(&instrs_avail, &verbose_mode,
1442 bb_instrs_used, bb_verbose_seen);
1443 *n_uncond_in_trace += 1;
1444 } // if (be.tag == Be_Uncond)
1446 // Try for an extend based on a conditional branch, specifically in the
1447 // hope of identifying and recovering, an "A && B" condition spread across
1448 // two basic blocks.
1449 if (irsb_be.tag == Be_Cond
1450 /* sewardj 2019Nov30: Alas, chasing cond branches on s390 causes
1451 Memcheck to crash, for as-yet unknown reasons. It also exposes
1452 some unhandled Iex_ITE cases in the s390x instruction selector.
1453 For now, disable. */
1454 && arch_guest != VexArchS390X)
1456 if (debug_print) {
1457 vex_printf("\n-+-+ (ext# %d) Considering cbranch to"
1458 " SX=0x%llx FT=0x%llx -+-+\n\n",
1459 (Int)vge->n_used,
1460 (ULong)((Long)guest_IP_sbstart+ irsb_be.Be.Cond.deltaSX),
1461 (ULong)((Long)guest_IP_sbstart+ irsb_be.Be.Cond.deltaFT));
1463 const Int instrs_avail_spec = 3;
1465 if (debug_print) {
1466 vex_printf("-+-+ SPEC side exit -+-+\n\n");
1468 Int sx_instrs_used = 0;
1469 Bool sx_verbose_seen = False;
1470 Addr sx_base = 0;
1471 UShort sx_len = 0;
1472 IRSB* sx_bb
1473 = disassemble_basic_block_till_stop(
1474 /*OUT*/ &sx_instrs_used, &sx_verbose_seen, &sx_base, &sx_len,
1475 /*MOD*/ emptyIRSB(),
1476 /*IN*/ irsb_be.Be.Cond.deltaSX,
1477 instrs_avail_spec, guest_IP_sbstart, host_endness,
1478 /*sigill_diag=*/False, // See comment above
1479 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1480 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1482 vassert(sx_instrs_used <= instrs_avail_spec);
1483 BlockEnd sx_be;
1484 analyse_block_end(&sx_be, sx_bb, guest_IP_sbstart, guest_word_type,
1485 chase_into_ok, callback_opaque,
1486 offB_GUEST_IP, szB_GUEST_IP, debug_print);
1488 if (debug_print) {
1489 vex_printf("\n-+-+ SPEC fall through -+-+\n\n");
1491 Int ft_instrs_used = 0;
1492 Bool ft_verbose_seen = False;
1493 Addr ft_base = 0;
1494 UShort ft_len = 0;
1495 IRSB* ft_bb
1496 = disassemble_basic_block_till_stop(
1497 /*OUT*/ &ft_instrs_used, &ft_verbose_seen, &ft_base, &ft_len,
1498 /*MOD*/ emptyIRSB(),
1499 /*IN*/ irsb_be.Be.Cond.deltaFT,
1500 instrs_avail_spec, guest_IP_sbstart, host_endness,
1501 /*sigill_diag=*/False, // See comment above
1502 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1503 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1505 vassert(ft_instrs_used <= instrs_avail_spec);
1506 BlockEnd ft_be;
1507 analyse_block_end(&ft_be, ft_bb, guest_IP_sbstart, guest_word_type,
1508 chase_into_ok, callback_opaque,
1509 offB_GUEST_IP, szB_GUEST_IP, debug_print);
1511 /* In order for the transformation to be remotely valid, we need:
1512 - At least one of the sx_bb or ft_bb to be have a Be_Cond end.
1513 - sx_bb and ft_bb definitely don't form a loop.
1515 Bool ok = sx_be.tag == Be_Cond || ft_be.tag == Be_Cond;
1516 if (ok) {
1517 ok = definitely_does_not_jump_to_delta(&sx_be,
1518 irsb_be.Be.Cond.deltaFT)
1519 || definitely_does_not_jump_to_delta(&ft_be,
1520 irsb_be.Be.Cond.deltaSX);
1523 // Check for other mutancy:
1524 // irsb ft == sx, or the same for ft itself or sx itself
1525 if (ok) {
1526 if (irsb_be.Be.Cond.deltaSX == irsb_be.Be.Cond.deltaFT
1527 || (sx_be.tag == Be_Cond
1528 && sx_be.Be.Cond.deltaSX == sx_be.Be.Cond.deltaFT)
1529 || (ft_be.tag == Be_Cond
1530 && ft_be.Be.Cond.deltaSX == ft_be.Be.Cond.deltaFT)) {
1531 ok = False;
1535 /* Now let's see if any of our four cases actually holds (viz, is this
1536 really an && idiom? */
1537 UInt idiom = 4;
1538 if (ok) {
1539 vassert(irsb_be.tag == Be_Cond);
1540 UInt iom1 = 4/*invalid*/;
1541 if (sx_be.tag == Be_Cond) {
1542 /**/ if (sx_be.Be.Cond.deltaFT == irsb_be.Be.Cond.deltaFT)
1543 iom1 = 0;
1544 else if (sx_be.Be.Cond.deltaSX == irsb_be.Be.Cond.deltaFT)
1545 iom1 = 1;
1547 UInt iom2 = 4/*invalid*/;
1548 if (ft_be.tag == Be_Cond) {
1549 /**/ if (ft_be.Be.Cond.deltaFT == irsb_be.Be.Cond.deltaSX)
1550 iom2 = 2;
1551 else if (ft_be.Be.Cond.deltaSX == irsb_be.Be.Cond.deltaSX)
1552 iom2 = 3;
1555 /* We should only have identified at most one of the four idioms. */
1556 vassert(iom1 == 4 || iom2 == 4);
1557 idiom = (iom1 < 4) ? iom1 : (iom2 < 4 ? iom2 : 4);
1558 if (idiom == 4) {
1559 ok = False;
1560 if (debug_print) {
1561 vex_printf("\n-+-+ &&-idiom not recognised, "
1562 "giving up. -+-+\n\n");
1567 if (ok) {
1568 vassert(idiom < 4);
1569 // "Normalise" the data so as to ensure we only have one of the four
1570 // idioms to transform.
1571 if (idiom == 2 || idiom == 3) {
1572 swap_sx_and_ft(irsb, &irsb_be);
1573 # define SWAP(_ty, _aa, _bb) \
1574 do { _ty _tmp = _aa; _aa = _bb; _bb = _tmp; } while (0)
1575 SWAP(Int, sx_instrs_used, ft_instrs_used);
1576 SWAP(Bool, sx_verbose_seen, ft_verbose_seen);
1577 SWAP(Addr, sx_base, ft_base);
1578 SWAP(UShort, sx_len, ft_len);
1579 SWAP(IRSB*, sx_bb, ft_bb);
1580 SWAP(BlockEnd, sx_be, ft_be);
1581 # undef SWAP
1583 if (idiom == 1 || idiom == 3) {
1584 swap_sx_and_ft(sx_bb, &sx_be);
1586 vassert(sx_be.tag == Be_Cond);
1587 vassert(sx_be.Be.Cond.deltaFT == irsb_be.Be.Cond.deltaFT);
1589 if (debug_print) {
1590 vex_printf("\n-+-+ After normalisation (idiom=%u) -+-+\n", idiom);
1591 vex_printf("\n-+-+ IRSB -+-+\n");
1592 ppIRSB(irsb);
1593 ppBlockEnd(&irsb_be);
1594 vex_printf("\n\n-+-+ SX -+-+\n");
1595 ppIRSB(sx_bb);
1596 ppBlockEnd(&sx_be);
1597 vex_printf("\n");
1599 // Finally, check the sx block actually is guardable.
1600 ok = block_is_guardable(sx_bb);
1601 if (!ok && debug_print) {
1602 vex_printf("\n-+-+ SX not guardable, giving up. -+-+\n\n");
1606 if (ok) {
1607 if (0 || debug_print) {
1608 vex_printf("\n-+-+ DOING &&-TRANSFORM -+-+\n");
1610 // Finally really actually do the transformation.
1611 // 0. remove the last Exit on irsb.
1612 // 1. Add irsb->tyenv->types_used to all the tmps in sx_bb,
1613 // by calling deltaIRStmt on all stmts.
1614 // 2. Guard all stmts in sx_bb on irsb_be.Be.Cond.condSX,
1615 // **including** the last stmt (which must be an Exit). It's
1616 // here that the And1 is generated.
1617 // 3. Copy all guarded stmts to the end of irsb.
1618 vassert(irsb->stmts_used >= 2);
1619 irsb->stmts_used--;
1620 Int delta = irsb->tyenv->types_used;
1622 // Append sx_bb's tyenv to irsb's
1623 for (Int i = 0; i < sx_bb->tyenv->types_used; i++) {
1624 (void)newIRTemp(irsb->tyenv, sx_bb->tyenv->types[i]);
1627 for (Int i = 0; i < sx_bb->stmts_used; i++) {
1628 IRStmt* st = deepCopyIRStmt(sx_bb->stmts[i]);
1629 deltaIRStmt(st, delta);
1630 add_guarded_stmt_to_end_of(irsb, st, irsb_be.Be.Cond.condSX);
1633 if (debug_print) {
1634 vex_printf("\n-+-+ FINAL RESULT -+-+\n\n");
1635 ppIRSB(irsb);
1636 vex_printf("\n");
1639 // Update instrs_used, extents, budget.
1640 instrs_used += sx_instrs_used;
1641 add_extent(vge, sx_base, sx_len);
1642 update_instr_budget(&instrs_avail, &verbose_mode,
1643 sx_instrs_used, sx_verbose_seen);
1644 *n_cond_in_trace += 1;
1646 break;
1647 } // if (be.tag == Be_Cond)
1649 // We don't know any other way to extend the block. Give up.
1650 else {
1651 break;
1654 } // while (True)
1656 /* We're almost done. The only thing that might need attending to is that
1657 a self-checking preamble may need to be created. If so it gets placed
1658 in the 21 slots reserved above. */
1659 create_self_checks_as_needed(
1660 irsb, n_sc_extents, pxControl, callback_opaque, needs_self_check,
1661 vge, abiinfo_both, guest_word_type, selfcheck_idx, offB_GUEST_CMSTART,
1662 offB_GUEST_CMLEN, offB_GUEST_IP, guest_IP_sbstart
1665 *n_guest_instrs = instrs_used;
1666 return irsb;
1670 /*--------------------------------------------------------------*/
1671 /*--- Functions called by self-checking transations ---*/
1672 /*--------------------------------------------------------------*/
1674 /* All of these are CLEAN HELPERs */
1675 /* All of these are CALLED FROM GENERATED CODE */
1677 /* Compute a checksum of host memory at [addr .. addr+len-1], as fast
1678 as possible. All _4al versions assume that the supplied address is
1679 4 aligned. All length values are in 4-byte chunks. These fns
1680 arecalled once for every use of a self-checking translation, so
1681 they needs to be as fast as possible. */
1683 /* --- 32-bit versions, used only on 32-bit hosts --- */
1685 static inline UInt ROL32 ( UInt w, Int n ) {
1686 w = (w << n) | (w >> (32-n));
1687 return w;
1690 VEX_REGPARM(2)
1691 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s )
1693 UInt sum1 = 0, sum2 = 0;
1694 UInt* p = (UInt*)first_w32;
1695 /* unrolled */
1696 while (n_w32s >= 4) {
1697 UInt w;
1698 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1699 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1700 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1701 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1702 p += 4;
1703 n_w32s -= 4;
1704 sum1 ^= sum2;
1706 while (n_w32s >= 1) {
1707 UInt w;
1708 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1709 p += 1;
1710 n_w32s -= 1;
1711 sum1 ^= sum2;
1713 return sum1 + sum2;
1716 /* Specialised versions of the above function */
1718 VEX_REGPARM(1)
1719 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 )
1721 UInt sum1 = 0, sum2 = 0;
1722 UInt* p = (UInt*)first_w32;
1723 UInt w;
1724 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1725 sum1 ^= sum2;
1726 return sum1 + sum2;
1729 VEX_REGPARM(1)
1730 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 )
1732 UInt sum1 = 0, sum2 = 0;
1733 UInt* p = (UInt*)first_w32;
1734 UInt w;
1735 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1736 sum1 ^= sum2;
1737 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1738 sum1 ^= sum2;
1739 return sum1 + sum2;
1742 VEX_REGPARM(1)
1743 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 )
1745 UInt sum1 = 0, sum2 = 0;
1746 UInt* p = (UInt*)first_w32;
1747 UInt w;
1748 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1749 sum1 ^= sum2;
1750 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1751 sum1 ^= sum2;
1752 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1753 sum1 ^= sum2;
1754 return sum1 + sum2;
1757 VEX_REGPARM(1)
1758 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 )
1760 UInt sum1 = 0, sum2 = 0;
1761 UInt* p = (UInt*)first_w32;
1762 UInt w;
1763 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1764 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1765 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1766 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1767 sum1 ^= sum2;
1768 return sum1 + sum2;
1771 VEX_REGPARM(1)
1772 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 )
1774 UInt sum1 = 0, sum2 = 0;
1775 UInt* p = (UInt*)first_w32;
1776 UInt w;
1777 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1778 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1779 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1780 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1781 sum1 ^= sum2;
1782 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1783 sum1 ^= sum2;
1784 return sum1 + sum2;
1787 VEX_REGPARM(1)
1788 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 )
1790 UInt sum1 = 0, sum2 = 0;
1791 UInt* p = (UInt*)first_w32;
1792 UInt w;
1793 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1794 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1795 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1796 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1797 sum1 ^= sum2;
1798 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1799 sum1 ^= sum2;
1800 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1801 sum1 ^= sum2;
1802 return sum1 + sum2;
1805 VEX_REGPARM(1)
1806 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 )
1808 UInt sum1 = 0, sum2 = 0;
1809 UInt* p = (UInt*)first_w32;
1810 UInt w;
1811 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1812 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1813 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1814 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1815 sum1 ^= sum2;
1816 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1817 sum1 ^= sum2;
1818 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1819 sum1 ^= sum2;
1820 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1821 sum1 ^= sum2;
1822 return sum1 + sum2;
1825 VEX_REGPARM(1)
1826 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 )
1828 UInt sum1 = 0, sum2 = 0;
1829 UInt* p = (UInt*)first_w32;
1830 UInt w;
1831 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1832 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1833 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1834 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1835 sum1 ^= sum2;
1836 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1837 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1838 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1839 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1840 sum1 ^= sum2;
1841 return sum1 + sum2;
1844 VEX_REGPARM(1)
1845 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 )
1847 UInt sum1 = 0, sum2 = 0;
1848 UInt* p = (UInt*)first_w32;
1849 UInt w;
1850 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1851 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1852 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1853 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1854 sum1 ^= sum2;
1855 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1856 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1857 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1858 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1859 sum1 ^= sum2;
1860 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1861 sum1 ^= sum2;
1862 return sum1 + sum2;
1865 VEX_REGPARM(1)
1866 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 )
1868 UInt sum1 = 0, sum2 = 0;
1869 UInt* p = (UInt*)first_w32;
1870 UInt w;
1871 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1872 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1873 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1874 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1875 sum1 ^= sum2;
1876 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1877 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1878 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1879 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1880 sum1 ^= sum2;
1881 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1882 sum1 ^= sum2;
1883 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1884 sum1 ^= sum2;
1885 return sum1 + sum2;
1888 VEX_REGPARM(1)
1889 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 )
1891 UInt sum1 = 0, sum2 = 0;
1892 UInt* p = (UInt*)first_w32;
1893 UInt w;
1894 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1895 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1896 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1897 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1898 sum1 ^= sum2;
1899 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1900 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1901 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1902 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1903 sum1 ^= sum2;
1904 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1905 sum1 ^= sum2;
1906 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1907 sum1 ^= sum2;
1908 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1909 sum1 ^= sum2;
1910 return sum1 + sum2;
1913 VEX_REGPARM(1)
1914 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 )
1916 UInt sum1 = 0, sum2 = 0;
1917 UInt* p = (UInt*)first_w32;
1918 UInt w;
1919 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1920 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1921 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1922 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1923 sum1 ^= sum2;
1924 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1925 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1926 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1927 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1928 sum1 ^= sum2;
1929 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1930 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1931 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1932 w = p[11]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1933 sum1 ^= sum2;
1934 return sum1 + sum2;
1938 /* --- 64-bit versions, used only on 64-bit hosts --- */
1940 static inline ULong ROL64 ( ULong w, Int n ) {
1941 w = (w << n) | (w >> (64-n));
1942 return w;
1945 VEX_REGPARM(2)
1946 static ULong genericg_compute_checksum_8al ( HWord first_w64, HWord n_w64s )
1948 ULong sum1 = 0, sum2 = 0;
1949 ULong* p = (ULong*)first_w64;
1950 /* unrolled */
1951 while (n_w64s >= 4) {
1952 ULong w;
1953 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1954 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1955 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1956 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1957 p += 4;
1958 n_w64s -= 4;
1959 sum1 ^= sum2;
1961 while (n_w64s >= 1) {
1962 ULong w;
1963 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1964 p += 1;
1965 n_w64s -= 1;
1966 sum1 ^= sum2;
1968 return sum1 + sum2;
1971 /* Specialised versions of the above function */
1973 VEX_REGPARM(1)
1974 static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 )
1976 ULong sum1 = 0, sum2 = 0;
1977 ULong* p = (ULong*)first_w64;
1978 ULong w;
1979 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1980 sum1 ^= sum2;
1981 return sum1 + sum2;
1984 VEX_REGPARM(1)
1985 static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 )
1987 ULong sum1 = 0, sum2 = 0;
1988 ULong* p = (ULong*)first_w64;
1989 ULong w;
1990 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1991 sum1 ^= sum2;
1992 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1993 sum1 ^= sum2;
1994 return sum1 + sum2;
1997 VEX_REGPARM(1)
1998 static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 )
2000 ULong sum1 = 0, sum2 = 0;
2001 ULong* p = (ULong*)first_w64;
2002 ULong w;
2003 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2004 sum1 ^= sum2;
2005 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2006 sum1 ^= sum2;
2007 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2008 sum1 ^= sum2;
2009 return sum1 + sum2;
2012 VEX_REGPARM(1)
2013 static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 )
2015 ULong sum1 = 0, sum2 = 0;
2016 ULong* p = (ULong*)first_w64;
2017 ULong w;
2018 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2019 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2020 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2021 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2022 sum1 ^= sum2;
2023 return sum1 + sum2;
2026 VEX_REGPARM(1)
2027 static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 )
2029 ULong sum1 = 0, sum2 = 0;
2030 ULong* p = (ULong*)first_w64;
2031 ULong w;
2032 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2033 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2034 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2035 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2036 sum1 ^= sum2;
2037 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2038 sum1 ^= sum2;
2039 return sum1 + sum2;
2042 VEX_REGPARM(1)
2043 static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 )
2045 ULong sum1 = 0, sum2 = 0;
2046 ULong* p = (ULong*)first_w64;
2047 ULong w;
2048 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2049 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2050 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2051 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2052 sum1 ^= sum2;
2053 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2054 sum1 ^= sum2;
2055 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2056 sum1 ^= sum2;
2057 return sum1 + sum2;
2060 VEX_REGPARM(1)
2061 static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 )
2063 ULong sum1 = 0, sum2 = 0;
2064 ULong* p = (ULong*)first_w64;
2065 ULong w;
2066 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2067 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2068 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2069 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2070 sum1 ^= sum2;
2071 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2072 sum1 ^= sum2;
2073 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2074 sum1 ^= sum2;
2075 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2076 sum1 ^= sum2;
2077 return sum1 + sum2;
2080 VEX_REGPARM(1)
2081 static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 )
2083 ULong sum1 = 0, sum2 = 0;
2084 ULong* p = (ULong*)first_w64;
2085 ULong w;
2086 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2087 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2088 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2089 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2090 sum1 ^= sum2;
2091 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2092 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2093 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2094 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2095 sum1 ^= sum2;
2096 return sum1 + sum2;
2099 VEX_REGPARM(1)
2100 static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 )
2102 ULong sum1 = 0, sum2 = 0;
2103 ULong* p = (ULong*)first_w64;
2104 ULong w;
2105 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2106 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2107 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2108 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2109 sum1 ^= sum2;
2110 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2111 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2112 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2113 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2114 sum1 ^= sum2;
2115 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2116 sum1 ^= sum2;
2117 return sum1 + sum2;
2120 VEX_REGPARM(1)
2121 static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 )
2123 ULong sum1 = 0, sum2 = 0;
2124 ULong* p = (ULong*)first_w64;
2125 ULong w;
2126 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2127 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2128 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2129 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2130 sum1 ^= sum2;
2131 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2132 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2133 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2134 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2135 sum1 ^= sum2;
2136 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2137 sum1 ^= sum2;
2138 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2139 sum1 ^= sum2;
2140 return sum1 + sum2;
2143 VEX_REGPARM(1)
2144 static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 )
2146 ULong sum1 = 0, sum2 = 0;
2147 ULong* p = (ULong*)first_w64;
2148 ULong w;
2149 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2150 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2151 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2152 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2153 sum1 ^= sum2;
2154 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2155 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2156 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2157 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2158 sum1 ^= sum2;
2159 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2160 sum1 ^= sum2;
2161 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2162 sum1 ^= sum2;
2163 w = p[10]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2164 sum1 ^= sum2;
2165 return sum1 + sum2;
2168 VEX_REGPARM(1)
2169 static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 )
2171 ULong sum1 = 0, sum2 = 0;
2172 ULong* p = (ULong*)first_w64;
2173 ULong w;
2174 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2175 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2176 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2177 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2178 sum1 ^= sum2;
2179 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2180 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2181 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2182 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2183 sum1 ^= sum2;
2184 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2185 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2186 w = p[10]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2187 w = p[11]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2188 sum1 ^= sum2;
2189 return sum1 + sum2;
2192 /*--------------------------------------------------------------------*/
2193 /*--- end guest_generic_bb_to_IR.c ---*/
2194 /*--------------------------------------------------------------------*/