Fix format string warnings from gcc9. No functional change (I think!)
[valgrind.git] / coregrind / m_transtab.c
blobf7717f6377651ae8cfb5f24e1a17c30c7986d4f2
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache. ---*/
4 /*--- m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2000-2017 Julian Seward
12 jseward@acm.org
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh
37 #include "pub_core_libcproc.h" // VG_(invalidate_icache)
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_options.h"
41 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB
42 #include "pub_core_transtab.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45 #include "pub_core_xarray.h"
46 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses
49 #define DEBUG_TRANSTAB 0
52 /*-------------------------------------------------------------*/
53 /*--- Management of the FIFO-based translation table+cache. ---*/
54 /*-------------------------------------------------------------*/
56 /* Nr of sectors provided via command line parameter. */
57 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
58 /* Nr of sectors.
59 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
60 static SECno n_sectors = 0;
62 /* Average size of a transtab code entry. 0 means to use the tool
63 provided default. */
64 UInt VG_(clo_avg_transtab_entry_size) = 0;
66 /*------------------ CONSTANTS ------------------*/
67 /* Number of entries in hash table of each sector. This needs to be a prime
68 number to work properly, it must be <= 65535 (so that a TTE index
69 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED, HTT_DELETED)
70 to denote 'deleted') and 0xFFFE (HTT_EMPTY) to denote 'Empty' in the
71 hash table.
72 It is strongly recommended not to change this.
73 65521 is the largest prime <= 65535. */
74 #define N_HTTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
76 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */
77 #define HTT_DELETED EC2TTE_DELETED
78 #define HTT_EMPTY 0XFFFE
80 // HTTno is the Sector->htt hash table index. Must be the same type as TTEno.
81 typedef UShort HTTno;
83 /* Because each sector contains a hash table of TTEntries, we need to
84 specify the maximum allowable loading, after which the sector is
85 deemed full. */
86 #define SECTOR_TT_LIMIT_PERCENT 65
88 /* The sector is deemed full when this many entries are in it. */
89 #define N_TTES_PER_SECTOR \
90 ((N_HTTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
92 /* Equivalence classes for fast address range deletion. There are 1 +
93 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an
94 address range which does not fall cleanly within any specific bin.
95 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32.
96 ECLASS_N must fit in a EclassNo. */
97 #define ECLASS_SHIFT 13
98 #define ECLASS_WIDTH 9
99 #define ECLASS_MISC (1 << ECLASS_WIDTH)
100 #define ECLASS_N (1 + ECLASS_MISC)
101 STATIC_ASSERT(ECLASS_SHIFT + ECLASS_WIDTH < 32);
103 typedef UShort EClassNo;
105 /*------------------ TYPES ------------------*/
107 /* In edges ("to-me") in the graph created by chaining. */
108 typedef
109 struct {
110 SECno from_sNo; /* sector number */
111 TTEno from_tteNo; /* TTE number in given sector */
112 UInt from_offs: (sizeof(UInt)*8)-1; /* code offset from TCEntry::tcptr
113 where the patch is */
114 Bool to_fastEP:1; /* Is the patch to a fast or slow entry point? */
116 InEdge;
119 /* Out edges ("from-me") in the graph created by chaining. */
120 typedef
121 struct {
122 SECno to_sNo; /* sector number */
123 TTEno to_tteNo; /* TTE number in given sector */
124 UInt from_offs; /* code offset in owning translation where patch is */
126 OutEdge;
129 #define N_FIXED_IN_EDGE_ARR 3
130 typedef
131 struct {
132 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */
133 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_IN_EDGE_ARR */
134 union {
135 InEdge fixed[N_FIXED_IN_EDGE_ARR]; /* if !has_var */
136 XArray* var; /* XArray* of InEdgeArr */ /* if has_var */
137 } edges;
139 InEdgeArr;
141 #define N_FIXED_OUT_EDGE_ARR 2
142 typedef
143 struct {
144 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */
145 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_OUT_EDGE_ARR */
146 union {
147 OutEdge fixed[N_FIXED_OUT_EDGE_ARR]; /* if !has_var */
148 XArray* var; /* XArray* of OutEdgeArr */ /* if has_var */
149 } edges;
151 OutEdgeArr;
154 /* A translation-table entry. This indicates precisely which areas of
155 guest code are included in the translation, and contains all other
156 auxiliary info too. These are split into hold and cold parts,
157 TTEntryH and TTEntryC, so as to be more cache-friendly
158 (a.k.a. "faster") when searching for translations that intersect
159 with a certain guest code address range, for deletion. In the
160 worst case this can involve a sequential scan through all the hot
161 parts, so they are packed as compactly as possible -- 32 bytes on a
162 64-bit platform, 20 bytes on a 32-bit platform.
164 Each TTEntry is logically a matching cold-and-hot pair, and in fact
165 it was originally one structure. First, the cold part.
167 typedef
168 struct {
169 union {
170 struct {
171 /* Profiling only: the count and weight (arbitrary meaning) for
172 this translation. Weight is a property of the translation
173 itself and computed once when the translation is created.
174 Count is an entry count for the translation and is
175 incremented by 1 every time the translation is used, if we
176 are profiling. */
177 ULong count;
178 UShort weight;
179 } prof; // if status == InUse
180 TTEno next_empty_tte; // if status != InUse
181 } usage;
183 /* 64-bit aligned pointer to one or more 64-bit words containing
184 the corresponding host code (must be in the same sector!)
185 This is a pointer into the sector's tc (code) area. */
186 ULong* tcptr;
188 /* This is the original guest address that purportedly is the
189 entry point of the translation. You might think that .entry
190 should be the same as .vge->base[0], and most of the time it
191 is. However, when doing redirections, that is not the case.
192 .vge must always correctly describe the guest code sections
193 from which this translation was made. However, .entry may or
194 may not be a lie, depending on whether or not we're doing
195 redirection. */
196 Addr entry;
198 /* Address range summary info: these are pointers back to
199 eclass[] entries in the containing Sector. Those entries in
200 turn point back here -- the two structures are mutually
201 redundant but both necessary to make fast deletions work.
202 The eclass info is similar to, and derived from, this entry's
203 'vge' field, but it is not the same */
204 UShort n_tte2ec; // # tte2ec pointers (1 to 3)
205 EClassNo tte2ec_ec[3]; // for each, the eclass #
206 UInt tte2ec_ix[3]; // and the index within the eclass.
207 // for i in 0 .. n_tte2ec-1
208 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
209 // should be the index
210 // of this TTEntry in the containing Sector's tt array.
212 /* Admin information for chaining. 'in_edges' is a set of the
213 patch points which jump to this translation -- hence are
214 predecessors in the control flow graph. 'out_edges' points
215 to successors in the control flow graph -- translations to
216 which this one has a patched jump. In short these are just
217 backwards and forwards edges in the graph of patched-together
218 blocks. The 'in_edges' contain slightly more info, enough
219 that we can undo the chaining of each mentioned patch point.
220 The 'out_edges' list exists only so that we can visit the
221 'in_edges' entries of all blocks we're patched through to, in
222 order to remove ourselves from then when we're deleted. */
224 /* A translation can disappear for two reasons:
225 1. erased (as part of the oldest sector cleanup) when the
226 youngest sector is full.
227 2. discarded due to calls to VG_(discard_translations).
228 VG_(discard_translations) sets the status of the
229 translation to 'Deleted'.
230 A.o., the gdbserver discards one or more translations
231 when a breakpoint is inserted or removed at an Addr,
232 or when single stepping mode is enabled/disabled
233 or when a translation is instrumented for gdbserver
234 (all the target jumps of this translation are
235 invalidated).
237 So, it is possible that the translation A to be patched
238 (to obtain a patched jump from A to B) is invalidated
239 after B is translated and before A is patched.
240 In case a translation is erased or discarded, the patching
241 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
242 are checking the 'from' translation still exists before
243 doing the patching.
245 Is it safe to erase or discard the current translation E being
246 executed ? Amazing, but yes, it is safe.
247 Here is the explanation:
249 The translation E being executed can only be erased if a new
250 translation N is being done. A new translation is done only
251 if the host addr is a not yet patched jump to another
252 translation. In such a case, the guest address of N is
253 assigned to the PC in the VEX state. Control is returned
254 to the scheduler. N will be translated. This can erase the
255 translation E (in case of sector full). VG_(tt_tc_do_chaining)
256 will not do the chaining to a non found translation E.
257 The execution will continue at the current guest PC
258 (i.e. the translation N).
259 => it is safe to erase the current translation being executed.
261 The current translation E being executed can also be discarded
262 (e.g. by gdbserver). VG_(discard_translations) will mark
263 this translation E as Deleted, but the translation itself
264 is not erased. In particular, its host code can only
265 be overwritten or erased in case a new translation is done.
266 A new translation will only be done if a not yet translated
267 jump is to be executed. The execution of the Deleted translation
268 E will continue till a non patched jump is encountered.
269 This situation is then similar to the 'erasing' case above :
270 the current translation E can be erased or overwritten, as the
271 execution will continue at the new translation N.
274 /* It is possible, although very unlikely, that a block A has
275 more than one patched jump to block B. This could happen if
276 (eg) A finishes "jcond B; jmp B".
278 This means in turn that B's in_edges set can list A more than
279 once (twice in this example). However, each such entry must
280 have a different from_offs, since a patched jump can only
281 jump to one place at once (it's meaningless for it to have
282 multiple destinations.) IOW, the successor and predecessor
283 edges in the graph are not uniquely determined by a
284 TTEntry --> TTEntry pair, but rather by a
285 (TTEntry,offset) --> TTEntry triple.
287 If A has multiple edges to B then B will mention A multiple
288 times in its in_edges. To make things simpler, we then
289 require that A mentions B exactly the same number of times in
290 its out_edges. Furthermore, a matching out-in pair must have
291 the same offset (from_offs). This facilitates sanity
292 checking, and it facilitates establishing the invariant that
293 a out_edges set may not have duplicates when using the
294 equality defined by (TTEntry,offset). Hence the out_edges
295 and in_edges sets really do have both have set semantics.
297 eg if A has been patched to B at offsets 42 and 87 (in A)
298 then A.out_edges = { (B,42), (B,87) } (in any order)
299 and B.in_edges = { (A,42), (A,87) } (in any order)
301 Hence for each node pair P->Q in the graph, there's a 1:1
302 mapping between P.out_edges and Q.in_edges.
304 InEdgeArr in_edges;
305 OutEdgeArr out_edges;
307 TTEntryC;
309 /* And this is the hot part. */
310 typedef
311 struct {
312 /* This structure describes precisely what ranges of guest code
313 the translation covers, so we can decide whether or not to
314 delete it when translations of a given address range are
315 invalidated. Originally this was a VexGuestExtents, but that
316 itself is 32 bytes on a 64-bit target, and we really want to
317 squeeze in an 8-bit |status| field into the 32 byte field, so
318 as to get 2 of them in a 64 byte LLC line. Hence the
319 VexGuestExtents fields are inlined, the _n_used field is
320 changed to a UChar (it only ever has the values 1, 2 or 3)
321 and the 8-bit status field is placed in byte 31 of the
322 structure. */
323 /* ORIGINALLY: VexGuestExtents vge; */
324 Addr vge_base[3];
325 UShort vge_len[3];
326 UChar vge_n_used; /* BEWARE: is UShort in VexGuestExtents */
328 /* Status of the slot. Note, we need to be able to do lazy
329 deletion, hence the Deleted state. */
330 enum { InUse, Deleted, Empty } status : 8;
332 TTEntryH;
335 /* Impedance matchers, that convert between a VexGuestExtents and a
336 TTEntryH, ignoring TTEntryH::status, which doesn't exist in a
337 VexGuestExtents -- it is entirely unrelated. */
339 /* Takes a VexGuestExtents and pushes it into a TTEntryH. The
340 TTEntryH.status field is left unchanged. */
341 static
342 inline void TTEntryH__from_VexGuestExtents ( /*MOD*/TTEntryH* tteH,
343 const VexGuestExtents* vge )
345 tteH->vge_base[0] = vge->base[0];
346 tteH->vge_base[1] = vge->base[1];
347 tteH->vge_base[2] = vge->base[2];
348 tteH->vge_len[0] = vge->len[0];
349 tteH->vge_len[1] = vge->len[1];
350 tteH->vge_len[2] = vge->len[2];
351 tteH->vge_n_used = (UChar)vge->n_used; /* BEWARE: no range check. */
354 /* Takes a TTEntryH and pushes the vge_ components into a VexGuestExtents. */
355 static
356 inline void TTEntryH__to_VexGuestExtents ( /*MOD*/VexGuestExtents* vge,
357 const TTEntryH* tteH )
359 vge->base[0] = tteH->vge_base[0];
360 vge->base[1] = tteH->vge_base[1];
361 vge->base[2] = tteH->vge_base[2];
362 vge->len[0] = tteH->vge_len[0] ;
363 vge->len[1] = tteH->vge_len[1] ;
364 vge->len[2] = tteH->vge_len[2] ;
365 vge->n_used = (UShort)tteH->vge_n_used ;
369 /* A structure used for mapping host code addresses back to the
370 relevant TTEntry. Used when doing chaining, for finding the
371 TTEntry to which some arbitrary patch address belongs. */
372 typedef
373 struct {
374 UChar* start;
375 UInt len;
376 TTEno tteNo;
378 HostExtent;
380 /* Finally, a sector itself. Each sector contains an array of
381 TCEntries, which hold code, and an array of TTEntries, containing
382 all required administrative info. Profiling is supported using the
383 TTEntry usage.prof.count and usage.prof.weight fields, if required.
385 If the sector is not in use, all three pointers are NULL and
386 tt_n_inuse is zero.
388 typedef
389 struct {
390 /* The TCEntry area. Size of this depends on the average
391 translation size. We try and size it so it becomes full
392 precisely when this sector's translation table (tt) reaches
393 its load limit (SECTOR_TT_LIMIT_PERCENT). */
394 ULong* tc;
396 /* An hash table, mapping guest address to an index in the tt array.
397 htt is a fixed size, always containing
398 exactly N_HTTES_PER_SECTOR entries. */
399 TTEno* htt;
401 /* The TTEntry{C,H} arrays. These are a fixed size, always
402 containing exactly N_TTES_PER_SECTOR entries. */
403 TTEntryC* ttC;
404 TTEntryH* ttH;
406 /* This points to the current allocation point in tc. */
407 ULong* tc_next;
409 /* The count of tt entries with state InUse. */
410 Int tt_n_inuse;
412 /* A list of Empty/Deleted entries, chained by tte->next_empty_tte */
413 TTEno empty_tt_list;
415 /* Expandable arrays of tt indices for each of the ECLASS_N
416 address range equivalence classes. These hold indices into
417 the containing sector's tt array, which in turn should point
418 back here. */
419 Int ec2tte_size[ECLASS_N];
420 Int ec2tte_used[ECLASS_N];
421 TTEno* ec2tte[ECLASS_N];
423 /* The host extents. The [start, +len) ranges are constructed
424 in strictly non-overlapping order, so we can binary search
425 them at any time. */
426 XArray* host_extents; /* XArray* of HostExtent */
428 Sector;
431 /*------------------ DECLS ------------------*/
433 /* The root data structure is an array of sectors. The index of the
434 youngest sector is recorded, and new translations are put into that
435 sector. When it fills up, we move along to the next sector and
436 start to fill that up, wrapping around at the end of the array.
437 That way, once all N_TC_SECTORS have been bought into use for the
438 first time, and are full, we then re-use the oldest sector,
439 endlessly.
441 When running, youngest sector should be between >= 0 and <
442 N_TC_SECTORS. The initial value indicates the TT/TC system is
443 not yet initialised.
445 static Sector sectors[MAX_N_SECTORS];
446 static Int youngest_sector = INV_SNO;
448 /* The number of ULongs in each TCEntry area. This is computed once
449 at startup and does not change. */
450 static Int tc_sector_szQ = 0;
453 /* A list of sector numbers, in the order which they should be
454 searched to find translations. This is an optimisation to be used
455 when searching for translations and should not affect
456 correctness. INV_SNO denotes "no entry". */
457 static SECno sector_search_order[MAX_N_SECTORS];
460 /* Fast helper for the TC. A 4-way set-associative cache, with more-or-less LRU
461 replacement. It holds a set of recently used (guest address, host address)
462 pairs. This array is referred to directly from
463 m_dispatch/dispatch-<platform>.S.
465 Entries in tt_fast may refer to any valid TC entry, regardless of
466 which sector it's in. Consequently we must be very careful to
467 invalidate this cache when TC entries are changed or disappear.
469 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
470 pointed at to cause that cache entry to miss. This relies on the
471 assumption that no guest code actually has that address, hence a
472 value 0x1 seems good. m_translate gives the client a synthetic
473 segfault if it tries to execute at this address.
476 typedef
477 struct {
478 Addr guest0;
479 Addr host0;
480 Addr guest1;
481 Addr host1;
482 Addr guest2;
483 Addr host2;
484 Addr guest3;
485 Addr host3;
487 FastCacheSet;
489 /*global*/ __attribute__((aligned(64)))
490 FastCacheSet VG_(tt_fast)[VG_TT_FAST_SETS];
492 /* Make sure we're not used before initialisation. */
493 static Bool init_done = False;
496 /*------------------ STATS DECLS ------------------*/
498 /* Number of fast-cache updates and flushes done. */
499 static ULong n_fast_flushes = 0;
500 static ULong n_fast_updates = 0;
502 /* Number of full lookups done. */
503 static ULong n_full_lookups = 0;
504 static ULong n_lookup_probes = 0;
506 /* Number/osize/tsize of translations entered; also the number of
507 those for which self-checking was requested. */
508 static ULong n_in_count = 0;
509 static ULong n_in_osize = 0;
510 static ULong n_in_tsize = 0;
511 static ULong n_in_sc_count = 0;
513 /* Number/osize of translations discarded due to lack of space. */
514 static ULong n_dump_count = 0;
515 static ULong n_dump_osize = 0;
516 static ULong n_sectors_recycled = 0;
518 /* Number/osize of translations discarded due to requests to do so. */
519 static ULong n_disc_count = 0;
520 static ULong n_disc_osize = 0;
523 /*-------------------------------------------------------------*/
524 /*--- Misc ---*/
525 /*-------------------------------------------------------------*/
527 static void* ttaux_malloc ( const HChar* tag, SizeT n )
529 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
532 static void ttaux_free ( void* p )
534 VG_(arena_free)(VG_AR_TTAUX, p);
538 /*-------------------------------------------------------------*/
539 /*--- Chaining support ---*/
540 /*-------------------------------------------------------------*/
542 static inline TTEntryC* index_tteC ( SECno sNo, TTEno tteNo )
544 vg_assert(sNo < n_sectors);
545 vg_assert(tteNo < N_TTES_PER_SECTOR);
546 Sector* s = &sectors[sNo];
547 vg_assert(s->ttC && s->ttH);
548 TTEntryC* tteC = &s->ttC[tteNo];
549 TTEntryH* tteH = &s->ttH[tteNo];
550 vg_assert(tteH->status == InUse);
551 return tteC;
554 static inline TTEntryH* index_tteH ( SECno sNo, TTEno tteNo )
556 vg_assert(sNo < n_sectors);
557 vg_assert(tteNo < N_TTES_PER_SECTOR);
558 Sector* s = &sectors[sNo];
559 vg_assert(s->ttH);
560 TTEntryH* tteH = &s->ttH[tteNo];
561 vg_assert(tteH->status == InUse);
562 return tteH;
565 static void InEdge__init ( InEdge* ie )
567 ie->from_sNo = INV_SNO; /* invalid */
568 ie->from_tteNo = 0;
569 ie->from_offs = 0;
570 ie->to_fastEP = False;
573 static void OutEdge__init ( OutEdge* oe )
575 oe->to_sNo = INV_SNO; /* invalid */
576 oe->to_tteNo = 0;
577 oe->from_offs = 0;
580 static void TTEntryC__init ( TTEntryC* tteC )
582 VG_(bzero_inline)(tteC, sizeof(*tteC));
585 static void TTEntryH__init ( TTEntryH* tteH )
587 VG_(bzero_inline)(tteH, sizeof(*tteH));
590 static UWord InEdgeArr__size ( const InEdgeArr* iea )
592 if (iea->has_var) {
593 vg_assert(iea->n_fixed == 0);
594 return VG_(sizeXA)(iea->edges.var);
595 } else {
596 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
597 return iea->n_fixed;
601 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
603 if (iea->has_var) {
604 vg_assert(iea->n_fixed == 0);
605 VG_(deleteXA)(iea->edges.var);
606 iea->edges.var = NULL;
607 iea->has_var = False;
608 } else {
609 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
610 iea->n_fixed = 0;
614 static
615 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
617 if (iea->has_var) {
618 vg_assert(iea->n_fixed == 0);
619 return (InEdge*)VG_(indexXA)(iea->edges.var, i);
620 } else {
621 vg_assert(i < iea->n_fixed);
622 return &iea->edges.fixed[i];
626 static
627 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
629 if (iea->has_var) {
630 vg_assert(iea->n_fixed == 0);
631 VG_(removeIndexXA)(iea->edges.var, i);
632 } else {
633 vg_assert(i < iea->n_fixed);
634 for (; i+1 < iea->n_fixed; i++) {
635 iea->edges.fixed[i] = iea->edges.fixed[i+1];
637 iea->n_fixed--;
641 static
642 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
644 if (iea->has_var) {
645 vg_assert(iea->n_fixed == 0);
646 VG_(addToXA)(iea->edges.var, ie);
647 } else {
648 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
649 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
650 /* The fixed array is full, so we have to initialise an
651 XArray and copy the fixed array into it. */
652 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
653 ttaux_free,
654 sizeof(InEdge));
655 VG_(hintSizeXA) (var, iea->n_fixed + 1);
656 UWord i;
657 for (i = 0; i < iea->n_fixed; i++) {
658 VG_(addToXA)(var, &iea->edges.fixed[i]);
660 VG_(addToXA)(var, ie);
661 iea->n_fixed = 0;
662 iea->has_var = True;
663 iea->edges.var = var;
664 } else {
665 /* Just add to the fixed array. */
666 iea->edges.fixed[iea->n_fixed++] = *ie;
671 static UWord OutEdgeArr__size ( const OutEdgeArr* oea )
673 if (oea->has_var) {
674 vg_assert(oea->n_fixed == 0);
675 return VG_(sizeXA)(oea->edges.var);
676 } else {
677 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
678 return oea->n_fixed;
682 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
684 if (oea->has_var) {
685 vg_assert(oea->n_fixed == 0);
686 VG_(deleteXA)(oea->edges.var);
687 oea->edges.var = NULL;
688 oea->has_var = False;
689 } else {
690 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
691 oea->n_fixed = 0;
695 static
696 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
698 if (oea->has_var) {
699 vg_assert(oea->n_fixed == 0);
700 return (OutEdge*)VG_(indexXA)(oea->edges.var, i);
701 } else {
702 vg_assert(i < oea->n_fixed);
703 return &oea->edges.fixed[i];
707 static
708 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
710 if (oea->has_var) {
711 vg_assert(oea->n_fixed == 0);
712 VG_(removeIndexXA)(oea->edges.var, i);
713 } else {
714 vg_assert(i < oea->n_fixed);
715 for (; i+1 < oea->n_fixed; i++) {
716 oea->edges.fixed[i] = oea->edges.fixed[i+1];
718 oea->n_fixed--;
722 static
723 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
725 if (oea->has_var) {
726 vg_assert(oea->n_fixed == 0);
727 VG_(addToXA)(oea->edges.var, oe);
728 } else {
729 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
730 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
731 /* The fixed array is full, so we have to initialise an
732 XArray and copy the fixed array into it. */
733 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
734 ttaux_free,
735 sizeof(OutEdge));
736 VG_(hintSizeXA) (var, oea->n_fixed+1);
737 UWord i;
738 for (i = 0; i < oea->n_fixed; i++) {
739 VG_(addToXA)(var, &oea->edges.fixed[i]);
741 VG_(addToXA)(var, oe);
742 oea->n_fixed = 0;
743 oea->has_var = True;
744 oea->edges.var = var;
745 } else {
746 /* Just add to the fixed array. */
747 oea->edges.fixed[oea->n_fixed++] = *oe;
752 static
753 Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
755 const HostExtent* hx1 = v1;
756 const HostExtent* hx2 = v2;
757 if (hx1->start + hx1->len <= hx2->start) return -1;
758 if (hx2->start + hx2->len <= hx1->start) return 1;
759 return 0; /* partial overlap */
762 /* True if hx is a dead host extent, i.e. corresponds to host code
763 of an entry that was invalidated. */
764 static
765 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
767 const TTEno tteNo = hx->tteNo;
768 #define LDEBUG(m) if (DEBUG_TRANSTAB) \
769 VG_(printf) (m \
770 " start 0x%p len %u sector %d ttslot %u" \
771 " tt.entry 0x%lu tt.tcptr 0x%p\n", \
772 hx->start, hx->len, (int)(sec - sectors), \
773 hx->tteNo, \
774 sec->ttC[tteNo].entry, sec->ttC[tteNo].tcptr)
776 /* Entry might have been invalidated and not re-used yet.*/
777 if (sec->ttH[tteNo].status == Deleted) {
778 LDEBUG("found deleted entry");
779 return True;
781 /* Maybe we found this entry via a host_extents which was
782 inserted for an entry which was changed to Deleted then
783 re-used after. If this entry was re-used, then its tcptr
784 is >= to host_extents start (i.e. the previous tcptr) + len.
785 This is the case as there is no re-use of host code: a new
786 entry or re-used entry always gets "higher value" host code. */
787 if ((UChar*) sec->ttC[tteNo].tcptr >= hx->start + hx->len) {
788 LDEBUG("found re-used entry");
789 return True;
792 return False;
793 #undef LDEBUG
796 static __attribute__((noinline))
797 Bool find_TTEntry_from_hcode( /*OUT*/SECno* from_sNo,
798 /*OUT*/TTEno* from_tteNo,
799 void* hcode )
801 SECno i;
803 /* Search order logic copied from VG_(search_transtab). */
804 for (i = 0; i < n_sectors; i++) {
805 SECno sno = sector_search_order[i];
806 if (UNLIKELY(sno == INV_SNO))
807 return False; /* run out of sectors to search */
809 const Sector* sec = &sectors[sno];
810 const XArray* /* of HostExtent */ host_extents = sec->host_extents;
811 vg_assert(host_extents);
813 HostExtent key;
814 VG_(memset)(&key, 0, sizeof(key));
815 key.start = hcode;
816 key.len = 1;
817 Word firstW = -1, lastW = -1;
818 Bool found = VG_(lookupXA_UNSAFE)(
819 host_extents, &key, &firstW, &lastW,
820 HostExtent__cmpOrd );
821 vg_assert(firstW == lastW); // always true, even if not found
822 if (found) {
823 HostExtent* hx = VG_(indexXA)(host_extents, firstW);
824 TTEno tteNo = hx->tteNo;
825 /* Do some additional sanity checks. */
826 vg_assert(tteNo < N_TTES_PER_SECTOR);
828 /* if this hx entry corresponds to dead host code, we must
829 tell this code has not been found, as it cannot be patched. */
830 if (HostExtent__is_dead (hx, sec))
831 return False;
833 vg_assert(sec->ttH[tteNo].status == InUse);
834 /* Can only half check that the found TTEntry contains hcode,
835 due to not having a length value for the hcode in the
836 TTEntry. */
837 vg_assert((UChar*)sec->ttC[tteNo].tcptr <= (UChar*)hcode);
838 /* Looks plausible */
839 *from_sNo = sno;
840 *from_tteNo = tteNo;
841 return True;
844 return False;
848 /* Figure out whether or not hcode is jitted code present in the main
849 code cache (but not in the no-redir cache). Used for sanity
850 checking. */
851 static Bool is_in_the_main_TC ( const void* hcode )
853 SECno i, sno;
854 for (i = 0; i < n_sectors; i++) {
855 sno = sector_search_order[i];
856 if (sno == INV_SNO)
857 break; /* run out of sectors to search */
858 if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc
859 && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next
860 + sizeof(ULong) - 1)
861 return True;
863 return False;
867 /* Fulfill a chaining request, and record admin info so we
868 can undo it later, if required.
870 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
871 SECno to_sNo,
872 TTEno to_tteNo,
873 Bool to_fastEP )
875 /* Get the CPU info established at startup. */
876 VexArch arch_host = VexArch_INVALID;
877 VexArchInfo archinfo_host;
878 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
879 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
880 VexEndness endness_host = archinfo_host.endness;
882 // host_code is where we're patching to. So it needs to
883 // take into account, whether we're jumping to the slow
884 // or fast entry point. By definition, the fast entry point
885 // is exactly one event check's worth of code along from
886 // the slow (tcptr) entry point.
887 TTEntryC* to_tteC = index_tteC(to_sNo, to_tteNo);
888 void* host_code = ((UChar*)to_tteC->tcptr)
889 + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0);
891 // stay sane -- the patch point (dst) is in this sector's code cache
892 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
893 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
894 + sizeof(ULong) - 1 );
896 /* Find the TTEntry for the from__ code. This isn't simple since
897 we only know the patch address, which is going to be somewhere
898 inside the from_ block. */
899 SECno from_sNo = INV_SNO;
900 TTEno from_tteNo = INV_TTE;
901 Bool from_found
902 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
903 from__patch_addr );
904 if (!from_found) {
905 // The from code might have been discarded due to sector re-use
906 // or marked Deleted due to translation invalidation.
907 // In such a case, don't do the chaining.
908 VG_(debugLog)(1,"transtab",
909 "host code %p not found (discarded? sector recycled?)"
910 " => no chaining done\n",
911 from__patch_addr);
912 return;
915 TTEntryC* from_tteC = index_tteC(from_sNo, from_tteNo);
917 /* Get VEX to do the patching itself. We have to hand it off
918 since it is host-dependent. */
919 VexInvalRange vir
920 = LibVEX_Chain(
921 arch_host, endness_host,
922 from__patch_addr,
923 VG_(fnptr_to_fnentry)(
924 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
925 : &VG_(disp_cp_chain_me_to_slowEP)),
926 (void*)host_code
928 VG_(invalidate_icache)( (void*)vir.start, vir.len );
930 /* Now do the tricky bit -- update the ch_succs and ch_preds info
931 for the two translations involved, so we can undo the chaining
932 later, which we will have to do if the to_ block gets removed
933 for whatever reason. */
935 /* This is the new from_ -> to_ link to add. */
936 InEdge ie;
937 InEdge__init(&ie);
938 ie.from_sNo = from_sNo;
939 ie.from_tteNo = from_tteNo;
940 ie.to_fastEP = to_fastEP;
941 HWord from_offs = (HWord)( (UChar*)from__patch_addr
942 - (UChar*)from_tteC->tcptr );
943 vg_assert(from_offs < 100000/* let's say */);
944 ie.from_offs = (UInt)from_offs;
946 /* This is the new to_ -> from_ backlink to add. */
947 OutEdge oe;
948 OutEdge__init(&oe);
949 oe.to_sNo = to_sNo;
950 oe.to_tteNo = to_tteNo;
951 oe.from_offs = (UInt)from_offs;
953 /* Add .. */
954 InEdgeArr__add(&to_tteC->in_edges, &ie);
955 OutEdgeArr__add(&from_tteC->out_edges, &oe);
959 /* Unchain one patch, as described by the specified InEdge. For
960 sanity check purposes only (to check that the patched location is
961 as expected) it also requires the fast and slow entry point
962 addresses of the destination block (that is, the block that owns
963 this InEdge). */
964 __attribute__((noinline))
965 static void unchain_one ( VexArch arch_host, VexEndness endness_host,
966 InEdge* ie,
967 void* to_fastEPaddr, void* to_slowEPaddr )
969 vg_assert(ie);
970 TTEntryC* tteC
971 = index_tteC(ie->from_sNo, ie->from_tteNo);
972 UChar* place_to_patch
973 = ((UChar*)tteC->tcptr) + ie->from_offs;
974 UChar* disp_cp_chain_me
975 = VG_(fnptr_to_fnentry)(
976 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
977 : &VG_(disp_cp_chain_me_to_slowEP)
979 UChar* place_to_jump_to_EXPECTED
980 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
982 // stay sane: both src and dst for this unchaining are
983 // in the main code cache
984 vg_assert( is_in_the_main_TC(place_to_patch) ); // src
985 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
986 // dst check is ok because LibVEX_UnChain checks that
987 // place_to_jump_to_EXPECTED really is the current dst, and
988 // asserts if it isn't.
989 VexInvalRange vir
990 = LibVEX_UnChain( arch_host, endness_host, place_to_patch,
991 place_to_jump_to_EXPECTED, disp_cp_chain_me );
992 VG_(invalidate_icache)( (void*)vir.start, vir.len );
996 /* The specified block is about to be deleted. Update the preds and
997 succs of its associated blocks accordingly. This includes undoing
998 any chained jumps to this block. */
999 static
1000 void unchain_in_preparation_for_deletion ( VexArch arch_host,
1001 VexEndness endness_host,
1002 SECno here_sNo, TTEno here_tteNo )
1004 if (DEBUG_TRANSTAB)
1005 VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
1006 UWord i, j, n, m;
1007 Int evCheckSzB = LibVEX_evCheckSzB(arch_host);
1008 TTEntryC* here_tteC = index_tteC(here_sNo, here_tteNo);
1009 TTEntryH* here_tteH = index_tteH(here_sNo, here_tteNo);
1010 if (DEBUG_TRANSTAB)
1011 VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n",
1012 here_tteC->entry, here_tteC->tcptr);
1013 vg_assert(here_tteH->status == InUse);
1015 /* Visit all InEdges owned by here_tte. */
1016 n = InEdgeArr__size(&here_tteC->in_edges);
1017 for (i = 0; i < n; i++) {
1018 InEdge* ie = InEdgeArr__index(&here_tteC->in_edges, i);
1019 // Undo the chaining.
1020 UChar* here_slow_EP = (UChar*)here_tteC->tcptr;
1021 UChar* here_fast_EP = here_slow_EP + evCheckSzB;
1022 unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP);
1023 // Find the corresponding entry in the "from" node's out_edges,
1024 // and remove it.
1025 TTEntryC* from_tteC = index_tteC(ie->from_sNo, ie->from_tteNo);
1026 m = OutEdgeArr__size(&from_tteC->out_edges);
1027 vg_assert(m > 0); // it must have at least one entry
1028 for (j = 0; j < m; j++) {
1029 OutEdge* oe = OutEdgeArr__index(&from_tteC->out_edges, j);
1030 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
1031 && oe->from_offs == ie->from_offs)
1032 break;
1034 vg_assert(j < m); // "oe must be findable"
1035 OutEdgeArr__deleteIndex(&from_tteC->out_edges, j);
1038 /* Visit all OutEdges owned by here_tte. */
1039 n = OutEdgeArr__size(&here_tteC->out_edges);
1040 for (i = 0; i < n; i++) {
1041 OutEdge* oe = OutEdgeArr__index(&here_tteC->out_edges, i);
1042 // Find the corresponding entry in the "to" node's in_edges,
1043 // and remove it.
1044 TTEntryC* to_tteC = index_tteC(oe->to_sNo, oe->to_tteNo);
1045 m = InEdgeArr__size(&to_tteC->in_edges);
1046 vg_assert(m > 0); // it must have at least one entry
1047 for (j = 0; j < m; j++) {
1048 InEdge* ie = InEdgeArr__index(&to_tteC->in_edges, j);
1049 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
1050 && ie->from_offs == oe->from_offs)
1051 break;
1053 vg_assert(j < m); // "ie must be findable"
1054 InEdgeArr__deleteIndex(&to_tteC->in_edges, j);
1057 InEdgeArr__makeEmpty(&here_tteC->in_edges);
1058 OutEdgeArr__makeEmpty(&here_tteC->out_edges);
1062 /*-------------------------------------------------------------*/
1063 /*--- Address-range equivalence class stuff ---*/
1064 /*-------------------------------------------------------------*/
1066 /* Return equivalence class number for a range. */
1068 static EClassNo range_to_eclass ( Addr start, UInt len )
1070 UInt mask = (1 << ECLASS_WIDTH) - 1;
1071 UInt lo = (UInt)start;
1072 UInt hi = lo + len - 1;
1073 UInt loBits = (lo >> ECLASS_SHIFT) & mask;
1074 UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
1075 if (loBits == hiBits) {
1076 vg_assert(loBits < ECLASS_N-1);
1077 return loBits;
1078 } else {
1079 return ECLASS_MISC;
1084 /* Calculates the equivalence class numbers for any VexGuestExtent.
1085 These are written in *eclasses, which must be big enough to hold 3
1086 Ints. The number written, between 1 and 3, is returned. The
1087 eclasses are presented in order, and any duplicates are removed.
1090 static
1091 Int vexGuestExtents_to_eclasses ( /*OUT*/EClassNo* eclasses,
1092 const TTEntryH* tteH )
1095 # define SWAP(_lv1,_lv2) \
1096 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
1098 UInt i, j, n_ec;
1099 EClassNo r;
1101 vg_assert(tteH->vge_n_used >= 1 && tteH->vge_n_used <= 3);
1103 n_ec = 0;
1104 for (i = 0; i < tteH->vge_n_used; i++) {
1105 r = range_to_eclass( tteH->vge_base[i], tteH->vge_len[i] );
1106 if (r == ECLASS_MISC)
1107 goto bad;
1108 /* only add if we haven't already seen it */
1109 for (j = 0; j < n_ec; j++)
1110 if (eclasses[j] == r)
1111 break;
1112 if (j == n_ec)
1113 eclasses[n_ec++] = r;
1116 if (n_ec == 1)
1117 return 1;
1119 if (n_ec == 2) {
1120 /* sort */
1121 if (eclasses[0] > eclasses[1])
1122 SWAP(eclasses[0], eclasses[1]);
1123 return 2;
1126 if (n_ec == 3) {
1127 /* sort */
1128 if (eclasses[0] > eclasses[2])
1129 SWAP(eclasses[0], eclasses[2]);
1130 if (eclasses[0] > eclasses[1])
1131 SWAP(eclasses[0], eclasses[1]);
1132 if (eclasses[1] > eclasses[2])
1133 SWAP(eclasses[1], eclasses[2]);
1134 return 3;
1137 /* NOTREACHED */
1138 vg_assert(0);
1140 bad:
1141 eclasses[0] = ECLASS_MISC;
1142 return 1;
1144 # undef SWAP
1148 /* Add tteno to the set of entries listed for equivalence class ec in
1149 this sector. Returns used location in eclass array. */
1151 static
1152 UInt addEClassNo ( /*MOD*/Sector* sec, EClassNo ec, TTEno tteno )
1154 Int old_sz, new_sz, i, r;
1155 TTEno *old_ar, *new_ar;
1157 vg_assert(ec >= 0 && ec < ECLASS_N);
1158 vg_assert(tteno < N_TTES_PER_SECTOR);
1160 if (DEBUG_TRANSTAB) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno);
1162 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1164 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1166 old_sz = sec->ec2tte_size[ec];
1167 old_ar = sec->ec2tte[ec];
1168 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1169 new_ar = ttaux_malloc("transtab.aECN.1",
1170 new_sz * sizeof(TTEno));
1171 for (i = 0; i < old_sz; i++)
1172 new_ar[i] = old_ar[i];
1173 if (old_ar)
1174 ttaux_free(old_ar);
1175 sec->ec2tte_size[ec] = new_sz;
1176 sec->ec2tte[ec] = new_ar;
1178 if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1181 /* Common case */
1182 r = sec->ec2tte_used[ec]++;
1183 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1184 sec->ec2tte[ec][r] = tteno;
1185 return (UInt)r;
1189 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate
1190 eclass entries to 'sec'. */
1192 static
1193 void upd_eclasses_after_add ( /*MOD*/Sector* sec, TTEno tteno )
1195 Int i, r;
1196 EClassNo eclasses[3];
1197 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1199 TTEntryH* tteH = &sec->ttH[tteno];
1200 r = vexGuestExtents_to_eclasses( eclasses, tteH );
1201 vg_assert(r >= 1 && r <= 3);
1203 TTEntryC* tteC = &sec->ttC[tteno];
1204 tteC->n_tte2ec = r;
1205 for (i = 0; i < r; i++) {
1206 tteC->tte2ec_ec[i] = eclasses[i];
1207 tteC->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], tteno );
1212 /* Check the eclass info in 'sec' to ensure it is consistent. Returns
1213 True if OK, False if something's not right. Expensive. */
1215 static Bool sanity_check_eclasses_in_sector ( const Sector* sec )
1217 # define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1219 const HChar* whassup = NULL;
1220 Int j, k, n, ec_idx;
1221 EClassNo i;
1222 EClassNo ec_num;
1223 TTEno tteno;
1224 ULong* tce;
1226 /* Basic checks on this sector */
1227 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR)
1228 BAD("invalid sec->tt_n_inuse");
1229 tce = sec->tc_next;
1230 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1231 BAD("sec->tc_next points outside tc");
1233 /* For each eclass ... */
1234 for (i = 0; i < ECLASS_N; i++) {
1235 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1236 BAD("ec2tte_size/ec2tte mismatch(1)");
1237 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1238 BAD("ec2tte_size/ec2tte mismatch(2)");
1239 if (sec->ec2tte_used[i] < 0
1240 || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1241 BAD("implausible ec2tte_used");
1242 if (sec->ec2tte_used[i] == 0)
1243 continue;
1245 /* For each tt reference in each eclass .. ensure the reference
1246 is to a valid tt entry, and that the entry's address ranges
1247 really include this eclass. */
1249 for (j = 0; j < sec->ec2tte_used[i]; j++) {
1250 tteno = sec->ec2tte[i][j];
1251 if (tteno == EC2TTE_DELETED)
1252 continue;
1253 if (tteno >= N_TTES_PER_SECTOR)
1254 BAD("implausible tteno");
1255 TTEntryC* tteC = &sec->ttC[tteno];
1256 TTEntryH* tteH = &sec->ttH[tteno];
1257 if (tteH->status != InUse)
1258 BAD("tteno points to non-inuse tte");
1259 if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3)
1260 BAD("tteC->n_tte2ec out of range");
1261 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1262 must equal i. Inspect tte's eclass info. */
1263 n = 0;
1264 for (k = 0; k < tteC->n_tte2ec; k++) {
1265 if (k < tteC->n_tte2ec-1
1266 && tteC->tte2ec_ec[k] >= tteC->tte2ec_ec[k+1])
1267 BAD("tteC->tte2ec_ec[..] out of order");
1268 ec_num = tteC->tte2ec_ec[k];
1269 if (ec_num < 0 || ec_num >= ECLASS_N)
1270 BAD("tteC->tte2ec_ec[..] out of range");
1271 if (ec_num != i)
1272 continue;
1273 ec_idx = tteC->tte2ec_ix[k];
1274 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1275 BAD("tteC->tte2ec_ix[..] out of range");
1276 if (ec_idx == j)
1277 n++;
1279 if (n != 1)
1280 BAD("tteno does not point back at eclass");
1284 /* That establishes that for each forward pointer from TTEntrys
1285 there is a corresponding backward pointer from the eclass[]
1286 arrays. However, it doesn't rule out the possibility of other,
1287 bogus pointers in the eclass[] arrays. So do those similarly:
1288 scan through them and check the TTEntryies they point at point
1289 back. */
1291 for (tteno = 0; tteno < N_TTES_PER_SECTOR; tteno++) {
1293 TTEntryC* tteC = &sec->ttC[tteno];
1294 TTEntryH* tteH = &sec->ttH[tteno];
1295 if (tteH->status == Empty || tteH->status == Deleted) {
1296 if (tteC->n_tte2ec != 0)
1297 BAD("tteC->n_tte2ec nonzero for unused tte");
1298 continue;
1301 vg_assert(tteH->status == InUse);
1303 if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3)
1304 BAD("tteC->n_tte2ec out of range(2)");
1306 for (j = 0; j < tteC->n_tte2ec; j++) {
1307 ec_num = tteC->tte2ec_ec[j];
1308 if (ec_num < 0 || ec_num >= ECLASS_N)
1309 BAD("tteC->tte2ec_ec[..] out of range");
1310 ec_idx = tteC->tte2ec_ix[j];
1311 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1312 BAD("tteC->tte2ec_ix[..] out of range(2)");
1313 if (sec->ec2tte[ec_num][ec_idx] != tteno)
1314 BAD("ec2tte does not point back to tte");
1318 return True;
1320 bad:
1321 if (whassup)
1322 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1324 # if 0
1325 VG_(printf)("eclass = %d\n", i);
1326 VG_(printf)("tteno = %d\n", (Int)tteno);
1327 switch (tte->status) {
1328 case InUse: VG_(printf)("InUse\n"); break;
1329 case Deleted: VG_(printf)("Deleted\n"); break;
1330 case Empty: VG_(printf)("Empty\n"); break;
1332 if (tte->status != Empty) {
1333 for (k = 0; k < tte->vge.n_used; k++)
1334 VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]);
1336 # endif
1338 return False;
1340 # undef BAD
1344 /* Sanity check absolutely everything. True == check passed. */
1346 /* forwards */
1347 static Bool sanity_check_redir_tt_tc ( void );
1349 static Bool sanity_check_sector_search_order ( void )
1351 SECno i, j, nListed;
1352 /* assert the array is the right size */
1353 vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1354 / sizeof(sector_search_order[0])));
1355 /* Check it's of the form valid_sector_numbers ++ [INV_SNO, INV_SNO, ..] */
1356 for (i = 0; i < n_sectors; i++) {
1357 if (sector_search_order[i] == INV_SNO
1358 || sector_search_order[i] >= n_sectors)
1359 break;
1361 nListed = i;
1362 for (/* */; i < n_sectors; i++) {
1363 if (sector_search_order[i] != INV_SNO)
1364 break;
1366 if (i != n_sectors)
1367 return False;
1368 /* Check each sector number only appears once */
1369 for (i = 0; i < n_sectors; i++) {
1370 if (sector_search_order[i] == INV_SNO)
1371 continue;
1372 for (j = i+1; j < n_sectors; j++) {
1373 if (sector_search_order[j] == sector_search_order[i])
1374 return False;
1377 /* Check that the number of listed sectors equals the number
1378 in use, by counting nListed back down. */
1379 for (i = 0; i < n_sectors; i++) {
1380 if (sectors[i].tc != NULL)
1381 nListed--;
1383 if (nListed != 0)
1384 return False;
1385 return True;
1388 static Bool sanity_check_all_sectors ( void )
1390 SECno sno;
1391 Bool sane;
1392 Sector* sec;
1393 for (sno = 0; sno < n_sectors; sno++) {
1394 Int i;
1395 Int nr_not_dead_hx = 0;
1396 Int szhxa;
1397 sec = &sectors[sno];
1398 if (sec->tc == NULL)
1399 continue;
1400 sane = sanity_check_eclasses_in_sector( sec );
1401 if (!sane)
1402 return False;
1403 szhxa = VG_(sizeXA)(sec->host_extents);
1404 for (i = 0; i < szhxa; i++) {
1405 const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1406 if (!HostExtent__is_dead (hx, sec))
1407 nr_not_dead_hx++;
1409 if (nr_not_dead_hx != sec->tt_n_inuse) {
1410 VG_(debugLog)(0, "transtab",
1411 "nr_not_dead_hx %d sanity fail "
1412 "(expected == in use %d)\n",
1413 nr_not_dead_hx, sec->tt_n_inuse);
1414 return False;
1418 if ( !sanity_check_redir_tt_tc() )
1419 return False;
1420 if ( !sanity_check_sector_search_order() )
1421 return False;
1422 return True;
1427 /*-------------------------------------------------------------*/
1428 /*--- Add/find translations ---*/
1429 /*-------------------------------------------------------------*/
1431 static UInt vge_osize ( const VexGuestExtents* vge )
1433 UInt i, n = 0;
1434 for (i = 0; i < vge->n_used; i++)
1435 n += (UInt)vge->len[i];
1436 return n;
1439 static UInt TTEntryH__osize ( const TTEntryH* tteH )
1441 UInt i, n = 0;
1442 for (i = 0; i < tteH->vge_n_used; i++)
1443 n += (UInt)tteH->vge_len[i];
1444 return n;
1447 static Bool isValidSector ( SECno sector )
1449 if (sector == INV_SNO || sector >= n_sectors)
1450 return False;
1451 return True;
1454 static inline HTTno HASH_TT ( Addr key )
1456 UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32);
1457 UInt kLo = (UInt)key;
1458 UInt k32 = kHi ^ kLo;
1459 UInt ror = 7;
1460 if (ror > 0)
1461 k32 = (k32 >> ror) | (k32 << (32-ror));
1462 return (HTTno)(k32 % N_HTTES_PER_SECTOR);
1465 /* Invalidate the fast cache VG_(tt_fast). */
1466 static void invalidateFastCache ( void )
1468 for (UWord j = 0; j < VG_TT_FAST_SETS; j++) {
1469 FastCacheSet* set = &VG_(tt_fast)[j];
1470 set->guest0 = TRANSTAB_BOGUS_GUEST_ADDR;
1471 set->guest1 = TRANSTAB_BOGUS_GUEST_ADDR;
1472 set->guest2 = TRANSTAB_BOGUS_GUEST_ADDR;
1473 set->guest3 = TRANSTAB_BOGUS_GUEST_ADDR;
1475 n_fast_flushes++;
1478 /* Invalidate a single fast cache entry. */
1479 static void invalidateFastCacheEntry ( Addr guest )
1481 /* This shouldn't fail. It should be assured by m_translate
1482 which should reject any attempt to make translation of code
1483 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1484 vg_assert(guest != TRANSTAB_BOGUS_GUEST_ADDR);
1485 /* If any entry in the line is the right one, just set it to
1486 TRANSTAB_BOGUS_GUEST_ADDR. Doing so ensure that the entry will never
1487 be used in future, so will eventually fall off the end of the line,
1488 due to LRU replacement, and be replaced with something that's actually
1489 useful. */
1490 UWord setNo = (UInt)VG_TT_FAST_HASH(guest);
1491 FastCacheSet* set = &VG_(tt_fast)[setNo];
1492 if (set->guest0 == guest) {
1493 set->guest0 = TRANSTAB_BOGUS_GUEST_ADDR;
1495 if (set->guest1 == guest) {
1496 set->guest1 = TRANSTAB_BOGUS_GUEST_ADDR;
1498 if (set->guest2 == guest) {
1499 set->guest2 = TRANSTAB_BOGUS_GUEST_ADDR;
1501 if (set->guest3 == guest) {
1502 set->guest3 = TRANSTAB_BOGUS_GUEST_ADDR;
1506 static void setFastCacheEntry ( Addr guest, ULong* tcptr )
1508 /* This shouldn't fail. It should be assured by m_translate
1509 which should reject any attempt to make translation of code
1510 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1511 vg_assert(guest != TRANSTAB_BOGUS_GUEST_ADDR);
1512 /* Shift all entries along one, so that the LRU one disappears, and put the
1513 new entry at the MRU position. */
1514 UWord setNo = (UInt)VG_TT_FAST_HASH(guest);
1515 FastCacheSet* set = &VG_(tt_fast)[setNo];
1516 set->host3 = set->host2;
1517 set->guest3 = set->guest2;
1518 set->host2 = set->host1;
1519 set->guest2 = set->guest1;
1520 set->host1 = set->host0;
1521 set->guest1 = set->guest0;
1522 set->host0 = (Addr)tcptr;
1523 set->guest0 = guest;
1524 n_fast_updates++;
1528 static TTEno get_empty_tt_slot(SECno sNo)
1530 TTEno i;
1532 i = sectors[sNo].empty_tt_list;
1533 sectors[sNo].empty_tt_list = sectors[sNo].ttC[i].usage.next_empty_tte;
1535 vg_assert (i >= 0 && i < N_TTES_PER_SECTOR);
1537 return i;
1540 static void add_to_empty_tt_list (SECno sNo, TTEno tteno)
1542 sectors[sNo].ttC[tteno].usage.next_empty_tte = sectors[sNo].empty_tt_list;
1543 sectors[sNo].empty_tt_list = tteno;
1546 static void initialiseSector ( SECno sno )
1548 UInt i;
1549 SysRes sres;
1550 Sector* sec;
1551 vg_assert(isValidSector(sno));
1553 { Bool sane = sanity_check_sector_search_order();
1554 vg_assert(sane);
1556 sec = &sectors[sno];
1558 if (sec->tc == NULL) {
1560 /* Sector has never been used before. Need to allocate tt and
1561 tc. */
1562 vg_assert(sec->ttC == NULL);
1563 vg_assert(sec->ttH == NULL);
1564 vg_assert(sec->tc_next == NULL);
1565 vg_assert(sec->tt_n_inuse == 0);
1566 for (EClassNo e = 0; e < ECLASS_N; e++) {
1567 vg_assert(sec->ec2tte_size[e] == 0);
1568 vg_assert(sec->ec2tte_used[e] == 0);
1569 vg_assert(sec->ec2tte[e] == NULL);
1571 vg_assert(sec->host_extents == NULL);
1573 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1574 VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1576 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1577 if (sr_isError(sres)) {
1578 VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1579 8 * tc_sector_szQ );
1580 /*NOTREACHED*/
1582 sec->tc = (ULong*)(Addr)sr_Res(sres);
1584 sres = VG_(am_mmap_anon_float_valgrind)
1585 ( N_TTES_PER_SECTOR * sizeof(TTEntryC) );
1586 if (sr_isError(sres)) {
1587 VG_(out_of_memory_NORETURN)("initialiseSector(TTC)",
1588 N_TTES_PER_SECTOR * sizeof(TTEntryC) );
1589 /*NOTREACHED*/
1591 sec->ttC = (TTEntryC*)(Addr)sr_Res(sres);
1593 sres = VG_(am_mmap_anon_float_valgrind)
1594 ( N_TTES_PER_SECTOR * sizeof(TTEntryH) );
1595 if (sr_isError(sres)) {
1596 VG_(out_of_memory_NORETURN)("initialiseSector(TTH)",
1597 N_TTES_PER_SECTOR * sizeof(TTEntryH) );
1598 /*NOTREACHED*/
1600 sec->ttH = (TTEntryH*)(Addr)sr_Res(sres);
1602 sec->empty_tt_list = HTT_EMPTY;
1603 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1604 sec->ttH[ei].status = Empty;
1605 sec->ttC[ei].n_tte2ec = 0;
1606 add_to_empty_tt_list(sno, ei);
1609 sres = VG_(am_mmap_anon_float_valgrind)
1610 ( N_HTTES_PER_SECTOR * sizeof(TTEno) );
1611 if (sr_isError(sres)) {
1612 VG_(out_of_memory_NORETURN)("initialiseSector(HTT)",
1613 N_HTTES_PER_SECTOR * sizeof(TTEno) );
1614 /*NOTREACHED*/
1616 sec->htt = (TTEno*)(Addr)sr_Res(sres);
1617 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1618 sec->htt[hi] = HTT_EMPTY;
1620 /* Set up the host_extents array. */
1621 sec->host_extents
1622 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1623 ttaux_free,
1624 sizeof(HostExtent));
1626 /* Add an entry in the sector_search_order */
1627 for (i = 0; i < n_sectors; i++) {
1628 if (sector_search_order[i] == INV_SNO)
1629 break;
1631 vg_assert(i >= 0 && i < n_sectors);
1632 sector_search_order[i] = sno;
1634 if (VG_(clo_verbosity) > 2)
1635 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1637 } else {
1639 /* Sector has been used before. Dump the old contents. */
1640 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1641 VG_(dmsg)("transtab: " "recycle sector %d\n", sno);
1642 n_sectors_recycled++;
1644 vg_assert(sec->ttC != NULL);
1645 vg_assert(sec->ttH != NULL);
1646 vg_assert(sec->tc_next != NULL);
1647 n_dump_count += sec->tt_n_inuse;
1649 VexArch arch_host = VexArch_INVALID;
1650 VexArchInfo archinfo_host;
1651 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1652 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1653 VexEndness endness_host = archinfo_host.endness;
1655 /* Visit each just-about-to-be-abandoned translation. */
1656 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1657 sno);
1658 sec->empty_tt_list = HTT_EMPTY;
1659 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1660 if (sec->ttH[ei].status == InUse) {
1661 vg_assert(sec->ttC[ei].n_tte2ec >= 1);
1662 vg_assert(sec->ttC[ei].n_tte2ec <= 3);
1663 n_dump_osize += TTEntryH__osize(&sec->ttH[ei]);
1664 /* Tell the tool too. */
1665 if (VG_(needs).superblock_discards) {
1666 VexGuestExtents vge_tmp;
1667 TTEntryH__to_VexGuestExtents( &vge_tmp, &sec->ttH[ei] );
1668 VG_TDICT_CALL( tool_discard_superblock_info,
1669 sec->ttC[ei].entry, vge_tmp );
1671 unchain_in_preparation_for_deletion(arch_host,
1672 endness_host, sno, ei);
1673 } else {
1674 vg_assert(sec->ttC[ei].n_tte2ec == 0);
1676 sec->ttH[ei].status = Empty;
1677 sec->ttC[ei].n_tte2ec = 0;
1678 add_to_empty_tt_list(sno, ei);
1680 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1681 sec->htt[hi] = HTT_EMPTY;
1683 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1684 sno);
1686 /* Free up the eclass structures. */
1687 for (EClassNo e = 0; e < ECLASS_N; e++) {
1688 if (sec->ec2tte_size[e] == 0) {
1689 vg_assert(sec->ec2tte_used[e] == 0);
1690 vg_assert(sec->ec2tte[e] == NULL);
1691 } else {
1692 vg_assert(sec->ec2tte[e] != NULL);
1693 ttaux_free(sec->ec2tte[e]);
1694 sec->ec2tte[e] = NULL;
1695 sec->ec2tte_size[e] = 0;
1696 sec->ec2tte_used[e] = 0;
1700 /* Empty out the host extents array. */
1701 vg_assert(sec->host_extents != NULL);
1702 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1703 vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1705 /* Sanity check: ensure it is already in
1706 sector_search_order[]. */
1707 SECno ix;
1708 for (ix = 0; ix < n_sectors; ix++) {
1709 if (sector_search_order[ix] == sno)
1710 break;
1712 vg_assert(ix >= 0 && ix < n_sectors);
1714 if (VG_(clo_verbosity) > 2)
1715 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1718 sec->tc_next = sec->tc;
1719 sec->tt_n_inuse = 0;
1721 invalidateFastCache();
1723 { Bool sane = sanity_check_sector_search_order();
1724 vg_assert(sane);
1728 /* Add a translation of vge to TT/TC. The translation is temporarily
1729 in code[0 .. code_len-1].
1731 pre: youngest_sector points to a valid (although possibly full)
1732 sector.
1734 void VG_(add_to_transtab)( const VexGuestExtents* vge,
1735 Addr entry,
1736 Addr code,
1737 UInt code_len,
1738 Bool is_self_checking,
1739 Int offs_profInc,
1740 UInt n_guest_instrs )
1742 Int tcAvailQ, reqdQ, y;
1743 ULong *tcptr, *tcptr2;
1744 UChar* srcP;
1745 UChar* dstP;
1747 vg_assert(init_done);
1748 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1750 /* 60000: should agree with N_TMPBUF in m_translate.c. */
1751 vg_assert(code_len > 0 && code_len < 60000);
1753 /* Generally stay sane */
1754 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1756 if (DEBUG_TRANSTAB)
1757 VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n",
1758 entry, code_len);
1760 n_in_count++;
1761 n_in_tsize += code_len;
1762 n_in_osize += vge_osize(vge);
1763 if (is_self_checking)
1764 n_in_sc_count++;
1766 y = youngest_sector;
1767 vg_assert(isValidSector(y));
1769 if (sectors[y].tc == NULL)
1770 initialiseSector(y);
1772 /* Try putting the translation in this sector. */
1773 reqdQ = (code_len + 7) >> 3;
1775 /* Will it fit in tc? */
1776 tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1777 - ((ULong*)(sectors[y].tc_next));
1778 vg_assert(tcAvailQ >= 0);
1779 vg_assert(tcAvailQ <= tc_sector_szQ);
1781 if (tcAvailQ < reqdQ
1782 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR) {
1783 /* No. So move on to the next sector. Either it's never been
1784 used before, in which case it will get its tt/tc allocated
1785 now, or it has been used before, in which case it is set to be
1786 empty, hence throwing out the oldest sector. */
1787 vg_assert(tc_sector_szQ > 0);
1788 Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1789 / N_HTTES_PER_SECTOR;
1790 Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1791 / tc_sector_szQ;
1792 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) {
1793 VG_(dmsg)("transtab: "
1794 "declare sector %d full "
1795 "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n",
1796 y, tt_loading_pct, tc_loading_pct,
1797 8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse);
1799 youngest_sector++;
1800 if (youngest_sector >= n_sectors)
1801 youngest_sector = 0;
1802 y = youngest_sector;
1803 initialiseSector(y);
1806 /* Be sure ... */
1807 tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1808 - ((ULong*)(sectors[y].tc_next));
1809 vg_assert(tcAvailQ >= 0);
1810 vg_assert(tcAvailQ <= tc_sector_szQ);
1811 vg_assert(tcAvailQ >= reqdQ);
1812 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR);
1813 vg_assert(sectors[y].tt_n_inuse >= 0);
1815 /* Copy into tc. */
1816 tcptr = sectors[y].tc_next;
1817 vg_assert(tcptr >= &sectors[y].tc[0]);
1818 vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
1820 dstP = (UChar*)tcptr;
1821 srcP = (UChar*)code;
1822 VG_(memcpy)(dstP, srcP, code_len);
1823 sectors[y].tc_next += reqdQ;
1824 sectors[y].tt_n_inuse++;
1826 /* more paranoia */
1827 tcptr2 = sectors[y].tc_next;
1828 vg_assert(tcptr2 >= &sectors[y].tc[0]);
1829 vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1831 /* Find an empty tt slot, and use it. There must be such a slot
1832 since tt is never allowed to get completely full. */
1833 TTEno tteix = get_empty_tt_slot(y);
1834 TTEntryC__init(&sectors[y].ttC[tteix]);
1835 TTEntryH__init(&sectors[y].ttH[tteix]);
1836 sectors[y].ttC[tteix].tcptr = tcptr;
1837 sectors[y].ttC[tteix].usage.prof.count = 0;
1838 sectors[y].ttC[tteix].usage.prof.weight =
1839 n_guest_instrs == 0 ? 1 : n_guest_instrs;
1840 sectors[y].ttC[tteix].entry = entry;
1841 TTEntryH__from_VexGuestExtents( &sectors[y].ttH[tteix], vge );
1842 sectors[y].ttH[tteix].status = InUse;
1844 // Point an htt entry to the tt slot
1845 HTTno htti = HASH_TT(entry);
1846 vg_assert(htti >= 0 && htti < N_HTTES_PER_SECTOR);
1847 while (True) {
1848 if (sectors[y].htt[htti] == HTT_EMPTY
1849 || sectors[y].htt[htti] == HTT_DELETED)
1850 break;
1851 htti++;
1852 if (htti >= N_HTTES_PER_SECTOR)
1853 htti = 0;
1855 sectors[y].htt[htti] = tteix;
1857 /* Patch in the profile counter location, if necessary. */
1858 if (offs_profInc != -1) {
1859 vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1860 VexArch arch_host = VexArch_INVALID;
1861 VexArchInfo archinfo_host;
1862 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1863 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1864 VexEndness endness_host = archinfo_host.endness;
1865 VexInvalRange vir
1866 = LibVEX_PatchProfInc( arch_host, endness_host,
1867 dstP + offs_profInc,
1868 &sectors[y].ttC[tteix].usage.prof.count );
1869 VG_(invalidate_icache)( (void*)vir.start, vir.len );
1872 VG_(invalidate_icache)( dstP, code_len );
1874 /* Add this entry to the host_extents map, checking that we're
1875 adding in order. */
1876 { HostExtent hx;
1877 hx.start = (UChar*)tcptr;
1878 hx.len = code_len;
1879 hx.tteNo = tteix;
1880 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1881 XArray* hx_array = sectors[y].host_extents;
1882 vg_assert(hx_array);
1883 Word n = VG_(sizeXA)(hx_array);
1884 if (n > 0) {
1885 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1886 vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1888 VG_(addToXA)(hx_array, &hx);
1889 if (DEBUG_TRANSTAB)
1890 VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1891 hx.start, hx.len, y, tteix);
1894 /* Update the fast-cache. */
1895 setFastCacheEntry( entry, tcptr );
1897 /* Note the eclass numbers for this translation. */
1898 upd_eclasses_after_add( &sectors[y], tteix );
1902 /* Search for the translation of the given guest address. If
1903 requested, a successful search can also cause the fast-caches to be
1904 updated.
1906 Bool VG_(search_transtab) ( /*OUT*/Addr* res_hcode,
1907 /*OUT*/SECno* res_sNo,
1908 /*OUT*/TTEno* res_tteNo,
1909 Addr guest_addr,
1910 Bool upd_cache )
1912 SECno i, sno;
1913 HTTno j, k, kstart;
1914 TTEno tti;
1916 vg_assert(init_done);
1917 /* Find the initial probe point just once. It will be the same in
1918 all sectors and avoids multiple expensive % operations. */
1919 n_full_lookups++;
1920 kstart = HASH_TT(guest_addr);
1921 vg_assert(kstart >= 0 && kstart < N_HTTES_PER_SECTOR);
1923 /* Search in all the sectors,using sector_search_order[] as a
1924 heuristic guide as to what order to visit the sectors. */
1925 for (i = 0; i < n_sectors; i++) {
1927 sno = sector_search_order[i];
1928 if (UNLIKELY(sno == INV_SNO))
1929 return False; /* run out of sectors to search */
1931 k = kstart;
1932 for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
1933 n_lookup_probes++;
1934 tti = sectors[sno].htt[k];
1935 if (tti < N_TTES_PER_SECTOR
1936 && sectors[sno].ttC[tti].entry == guest_addr) {
1937 /* found it */
1938 if (upd_cache)
1939 setFastCacheEntry(
1940 guest_addr, sectors[sno].ttC[tti].tcptr );
1941 if (res_hcode)
1942 *res_hcode = (Addr)sectors[sno].ttC[tti].tcptr;
1943 if (res_sNo)
1944 *res_sNo = sno;
1945 if (res_tteNo)
1946 *res_tteNo = tti;
1947 /* pull this one one step closer to the front. For large
1948 apps this more or less halves the number of required
1949 probes. */
1950 if (i > 0) {
1951 Int tmp = sector_search_order[i-1];
1952 sector_search_order[i-1] = sector_search_order[i];
1953 sector_search_order[i] = tmp;
1955 return True;
1957 // tti is HTT_EMPTY or HTT_DELETED or not the entry of guest_addr
1958 if (sectors[sno].htt[k] == HTT_EMPTY)
1959 break; /* not found in this sector */
1960 k++;
1961 if (k == N_HTTES_PER_SECTOR)
1962 k = 0;
1965 /* If we fall off the end, all entries are InUse and not
1966 matching, or Deleted. In any case we did not find it in this
1967 sector. */
1970 /* Not found in any sector. */
1971 return False;
1975 /*-------------------------------------------------------------*/
1976 /*--- Delete translations. ---*/
1977 /*-------------------------------------------------------------*/
1979 /* forward */
1980 static void unredir_discard_translations( Addr, ULong );
1982 /* Stuff for deleting translations which intersect with a given
1983 address range. Unfortunately, to make this run at a reasonable
1984 speed, it is complex. */
1986 static inline
1987 Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 )
1989 Addr e1 = s1 + r1 - 1;
1990 Addr e2 = s2 + r2 - 1;
1991 if (e1 < s2 || e2 < s1)
1992 return False;
1993 return True;
1996 static inline
1997 Bool overlaps ( Addr start, ULong range, const TTEntryH* tteH )
1999 if (overlap1(start, range, tteH->vge_base[0], tteH->vge_len[0]))
2000 return True;
2001 if (tteH->vge_n_used < 2)
2002 return False;
2003 if (overlap1(start, range, tteH->vge_base[1], tteH->vge_len[1]))
2004 return True;
2005 if (tteH->vge_n_used < 3)
2006 return False;
2007 if (overlap1(start, range, tteH->vge_base[2], tteH->vge_len[2]))
2008 return True;
2009 return False;
2013 /* Delete a tt entry, and update all the eclass data accordingly. */
2015 static void delete_tte ( /*OUT*/Addr* ga_deleted,
2016 /*MOD*/Sector* sec, SECno secNo, TTEno tteno,
2017 VexArch arch_host, VexEndness endness_host )
2019 Int i, ec_idx;
2020 EClassNo ec_num;
2022 /* sec and secNo are mutually redundant; cross-check. */
2023 vg_assert(sec == &sectors[secNo]);
2025 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
2026 TTEntryC* tteC = &sec->ttC[tteno];
2027 TTEntryH* tteH = &sec->ttH[tteno];
2028 vg_assert(tteH->status == InUse);
2029 vg_assert(tteC->n_tte2ec >= 1 && tteC->n_tte2ec <= 3);
2031 vg_assert(tteH->vge_n_used >= 1 && tteH->vge_n_used <= 3);
2032 vg_assert(tteH->vge_base[0] != TRANSTAB_BOGUS_GUEST_ADDR);
2033 *ga_deleted = tteH->vge_base[0];
2035 /* Unchain .. */
2036 unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno);
2038 /* Deal with the ec-to-tte links first. */
2039 for (i = 0; i < tteC->n_tte2ec; i++) {
2040 ec_num = tteC->tte2ec_ec[i];
2041 ec_idx = tteC->tte2ec_ix[i];
2042 vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
2043 vg_assert(ec_idx >= 0);
2044 vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
2045 /* Assert that the two links point at each other. */
2046 vg_assert(sec->ec2tte[ec_num][ec_idx] == tteno);
2047 /* "delete" the pointer back to here. */
2048 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
2051 /* Now fix up this TTEntry. */
2052 /* Mark the entry as deleted in htt.
2053 Note: we could avoid the below hash table search by
2054 adding a reference from tte to its hash position in tt. */
2055 HTTno j;
2056 HTTno k = HASH_TT(tteC->entry);
2057 vg_assert(k >= 0 && k < N_HTTES_PER_SECTOR);
2058 for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
2059 if (sec->htt[k] == tteno)
2060 break;
2061 k++;
2062 if (k == N_HTTES_PER_SECTOR)
2063 k = 0;
2065 vg_assert(j < N_HTTES_PER_SECTOR);
2066 sec->htt[k] = HTT_DELETED;
2067 tteH->status = Deleted;
2068 tteC->n_tte2ec = 0;
2069 add_to_empty_tt_list(secNo, tteno);
2071 /* Stats .. */
2072 sec->tt_n_inuse--;
2073 n_disc_count++;
2074 n_disc_osize += TTEntryH__osize(tteH);
2076 /* Tell the tool too. */
2077 if (VG_(needs).superblock_discards) {
2078 VexGuestExtents vge_tmp;
2079 TTEntryH__to_VexGuestExtents( &vge_tmp, tteH );
2080 VG_TDICT_CALL( tool_discard_superblock_info, tteC->entry, vge_tmp );
2085 /* Delete translations from sec which intersect specified range, but
2086 only consider translations in the specified eclass. */
2088 static
2089 SizeT delete_translations_in_sector_eclass ( /*OUT*/Addr* ga_deleted,
2090 /*MOD*/Sector* sec, SECno secNo,
2091 Addr guest_start, ULong range,
2092 EClassNo ec,
2093 VexArch arch_host,
2094 VexEndness endness_host )
2096 Int i;
2097 TTEno tteno;
2098 SizeT numDeld = 0;
2100 vg_assert(ec >= 0 && ec < ECLASS_N);
2102 for (i = 0; i < sec->ec2tte_used[ec]; i++) {
2104 tteno = sec->ec2tte[ec][i];
2105 if (tteno == EC2TTE_DELETED) {
2106 /* already deleted */
2107 continue;
2110 vg_assert(tteno < N_TTES_PER_SECTOR);
2112 TTEntryH* tteH = &sec->ttH[tteno];
2113 vg_assert(tteH->status == InUse);
2115 if (overlaps( guest_start, range, tteH )) {
2116 numDeld++;
2117 delete_tte( ga_deleted, sec, secNo, tteno, arch_host, endness_host );
2122 return numDeld;
2126 /* Delete translations from sec which intersect specified range, the
2127 slow way, by inspecting all translations in sec. */
2129 static
2130 SizeT delete_translations_in_sector ( /*OUT*/Addr* ga_deleted,
2131 /*MOD*/Sector* sec, SECno secNo,
2132 Addr guest_start, ULong range,
2133 VexArch arch_host,
2134 VexEndness endness_host )
2136 TTEno i;
2137 SizeT numDeld = 0;
2139 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2140 /* The entire and only purpose of splitting TTEntry into cold
2141 and hot parts (TTEntryC and TTEntryH) is to make this search
2142 loop run as fast as possible, by avoiding having to pull all
2143 of the cold data up the memory hierarchy. */
2144 if (UNLIKELY(sec->ttH[i].status == InUse
2145 && overlaps( guest_start, range, &sec->ttH[i] ))) {
2146 numDeld++;
2147 delete_tte( ga_deleted, sec, secNo, i, arch_host, endness_host );
2151 return numDeld;
2155 void VG_(discard_translations) ( Addr guest_start, ULong range,
2156 const HChar* who )
2158 Sector* sec;
2159 SECno sno;
2160 EClassNo ec;
2162 /* It is very commonly the case that a call here results in discarding of
2163 exactly one superblock. As an optimisation only, use ga_deleted and
2164 numDeleted to detect this situation and to record the guest addr involved.
2165 That is then used to avoid calling invalidateFastCache in this case.
2166 Instead the individual entry in the fast cache is removed. This can reduce
2167 the overall VG_(fast_cache) miss rate significantly in applications that do
2168 a lot of short code discards (basically jit generated code that is
2169 subsequently patched).
2171 ga_deleted is made to hold the guest address of the last superblock deleted
2172 (in this call to VG_(discard_translations)). If more than one superblock
2173 is deleted (or none), then we ignore what is written to ga_deleted. If
2174 exactly one superblock is deleted then ga_deleted holds exactly what we
2175 want and will be used.
2177 Addr ga_deleted = TRANSTAB_BOGUS_GUEST_ADDR;
2178 SizeT numDeleted = 0;
2180 vg_assert(init_done);
2182 VG_(debugLog)(2, "transtab",
2183 "discard_translations(0x%lx, %llu) req by %s\n",
2184 guest_start, range, who );
2186 /* Pre-deletion sanity check */
2187 if (VG_(clo_sanity_level) >= 4) {
2188 Bool sane = sanity_check_all_sectors();
2189 vg_assert(sane);
2192 if (range == 0)
2193 return;
2195 VexArch arch_host = VexArch_INVALID;
2196 VexArchInfo archinfo_host;
2197 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
2198 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
2199 VexEndness endness_host = archinfo_host.endness;
2201 /* There are two different ways to do this.
2203 If the range fits within a single address-range equivalence
2204 class, as will be the case for a cache line sized invalidation,
2205 then we only have to inspect the set of translations listed in
2206 that equivalence class, and also in the "sin-bin" equivalence
2207 class ECLASS_MISC.
2209 Otherwise, the invalidation is of a larger range and probably
2210 results from munmap. In this case it's (probably!) faster just
2211 to inspect all translations, dump those we don't want, and
2212 regenerate the equivalence class information (since modifying it
2213 in-situ is even more expensive).
2216 /* First off, figure out if the range falls within a single class,
2217 and if so which one. */
2219 ec = ECLASS_MISC;
2220 if (range <= (1ULL << ECLASS_SHIFT))
2221 ec = range_to_eclass( guest_start, (UInt)range );
2223 /* if ec is ECLASS_MISC then we aren't looking at just a single
2224 class, so use the slow scheme. Else use the fast scheme,
2225 examining 'ec' and ECLASS_MISC. */
2227 if (ec != ECLASS_MISC) {
2229 VG_(debugLog)(2, "transtab",
2230 " FAST, ec = %d\n", ec);
2232 /* Fast scheme */
2233 vg_assert(ec >= 0 && ec < ECLASS_MISC);
2235 for (sno = 0; sno < n_sectors; sno++) {
2236 sec = &sectors[sno];
2237 if (sec->tc == NULL)
2238 continue;
2239 numDeleted += delete_translations_in_sector_eclass(
2240 &ga_deleted, sec, sno, guest_start, range,
2241 ec, arch_host, endness_host
2243 numDeleted += delete_translations_in_sector_eclass(
2244 &ga_deleted, sec, sno, guest_start, range,
2245 ECLASS_MISC, arch_host, endness_host
2249 } else {
2251 /* slow scheme */
2253 VG_(debugLog)(2, "transtab",
2254 " SLOW, ec = %d\n", ec);
2256 for (sno = 0; sno < n_sectors; sno++) {
2257 sec = &sectors[sno];
2258 if (sec->tc == NULL)
2259 continue;
2260 numDeleted += delete_translations_in_sector(
2261 &ga_deleted, sec, sno, guest_start, range,
2262 arch_host, endness_host
2268 if (numDeleted == 0) {
2269 // "ga_deleted was never set"
2270 vg_assert(ga_deleted == TRANSTAB_BOGUS_GUEST_ADDR);
2271 } else
2272 if (numDeleted == 1) {
2273 // "ga_deleted was set to something valid"
2274 vg_assert(ga_deleted != TRANSTAB_BOGUS_GUEST_ADDR);
2275 // Just invalidate the individual VG_(tt_fast) cache entry \o/
2276 invalidateFastCacheEntry(ga_deleted);
2277 Addr fake_host = 0;
2278 vg_assert(! VG_(lookupInFastCache)(&fake_host, ga_deleted));
2279 } else {
2280 // "ga_deleted was set to something valid"
2281 vg_assert(ga_deleted != TRANSTAB_BOGUS_GUEST_ADDR);
2282 // Nuke the entire VG_(tt_fast) cache. Sigh.
2283 invalidateFastCache();
2286 /* don't forget the no-redir cache */
2287 unredir_discard_translations( guest_start, range );
2289 /* Post-deletion sanity check */
2290 if (VG_(clo_sanity_level) >= 4) {
2291 TTEno i;
2292 Bool sane = sanity_check_all_sectors();
2293 vg_assert(sane);
2294 /* But now, also check the requested address range isn't
2295 present anywhere. */
2296 for (sno = 0; sno < n_sectors; sno++) {
2297 sec = &sectors[sno];
2298 if (sec->tc == NULL)
2299 continue;
2300 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2301 TTEntryH* tteH = &sec->ttH[i];
2302 if (tteH->status != InUse)
2303 continue;
2304 vg_assert(!overlaps( guest_start, range, tteH ));
2310 /* Whether or not tools may discard translations. */
2311 Bool VG_(ok_to_discard_translations) = False;
2313 /* This function is exported to tools which can use it to discard
2314 translations, provided it is safe to do so. */
2315 void VG_(discard_translations_safely) ( Addr start, SizeT len,
2316 const HChar* who )
2318 vg_assert(VG_(ok_to_discard_translations));
2319 VG_(discard_translations)(start, len, who);
2322 /*------------------------------------------------------------*/
2323 /*--- AUXILIARY: the unredirected TT/TC ---*/
2324 /*------------------------------------------------------------*/
2326 /* A very simple translation cache which holds a small number of
2327 unredirected translations. This is completely independent of the
2328 main tt/tc structures. When unredir_tc or unredir_tt becomes full,
2329 both structures are simply dumped and we start over.
2331 Since these translations are unredirected, the search key is (by
2332 definition) the first address entry in the .vge field. */
2334 /* Sized to hold 500 translations of average size 1000 bytes. */
2336 #define UNREDIR_SZB 1000
2338 #define N_UNREDIR_TT 500
2339 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2341 typedef
2342 struct {
2343 VexGuestExtents vge;
2344 Addr hcode;
2345 Bool inUse;
2347 UTCEntry;
2349 /* We just allocate forwards in _tc, never deleting. */
2350 static ULong *unredir_tc;
2351 static Int unredir_tc_used = N_UNREDIR_TCQ;
2353 /* Slots in _tt can come into use and out again (.inUse).
2354 Nevertheless _tt_highwater is maintained so that invalidations
2355 don't have to scan all the slots when only a few are in use.
2356 _tt_highwater holds the index of the highest ever allocated
2357 slot. */
2358 static UTCEntry unredir_tt[N_UNREDIR_TT];
2359 static Int unredir_tt_highwater;
2362 static void init_unredir_tt_tc ( void )
2364 Int i;
2365 if (unredir_tc == NULL) {
2366 SysRes sres = VG_(am_mmap_anon_float_valgrind)
2367 ( N_UNREDIR_TT * UNREDIR_SZB );
2368 if (sr_isError(sres)) {
2369 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2370 N_UNREDIR_TT * UNREDIR_SZB);
2371 /*NOTREACHED*/
2373 unredir_tc = (ULong *)(Addr)sr_Res(sres);
2375 unredir_tc_used = 0;
2376 for (i = 0; i < N_UNREDIR_TT; i++)
2377 unredir_tt[i].inUse = False;
2378 unredir_tt_highwater = -1;
2381 /* Do a sanity check; return False on failure. */
2382 static Bool sanity_check_redir_tt_tc ( void )
2384 Int i;
2385 if (unredir_tt_highwater < -1) return False;
2386 if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2388 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2389 if (unredir_tt[i].inUse)
2390 return False;
2392 if (unredir_tc_used < 0) return False;
2393 if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2395 return True;
2399 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation
2400 is temporarily in code[0 .. code_len-1].
2402 void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge,
2403 Addr entry,
2404 Addr code,
2405 UInt code_len )
2407 Int i, j, code_szQ;
2408 HChar *srcP, *dstP;
2410 vg_assert(sanity_check_redir_tt_tc());
2412 /* This is the whole point: it's not redirected! */
2413 vg_assert(entry == vge->base[0]);
2415 /* How many unredir_tt slots are needed */
2416 code_szQ = (code_len + 7) / 8;
2418 /* Look for an empty unredir_tc slot */
2419 for (i = 0; i < N_UNREDIR_TT; i++)
2420 if (!unredir_tt[i].inUse)
2421 break;
2423 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2424 /* It's full; dump everything we currently have */
2425 init_unredir_tt_tc();
2426 i = 0;
2429 vg_assert(unredir_tc_used >= 0);
2430 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2431 vg_assert(code_szQ > 0);
2432 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2433 vg_assert(i >= 0 && i < N_UNREDIR_TT);
2434 vg_assert(unredir_tt[i].inUse == False);
2436 if (i > unredir_tt_highwater)
2437 unredir_tt_highwater = i;
2439 dstP = (HChar*)&unredir_tc[unredir_tc_used];
2440 srcP = (HChar*)code;
2441 for (j = 0; j < code_len; j++)
2442 dstP[j] = srcP[j];
2444 VG_(invalidate_icache)( dstP, code_len );
2446 unredir_tt[i].inUse = True;
2447 unredir_tt[i].vge = *vge;
2448 unredir_tt[i].hcode = (Addr)dstP;
2450 unredir_tc_used += code_szQ;
2451 vg_assert(unredir_tc_used >= 0);
2452 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2454 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2457 Bool VG_(search_unredir_transtab) ( /*OUT*/Addr* result,
2458 Addr guest_addr )
2460 Int i;
2461 for (i = 0; i < N_UNREDIR_TT; i++) {
2462 if (!unredir_tt[i].inUse)
2463 continue;
2464 if (unredir_tt[i].vge.base[0] == guest_addr) {
2465 *result = unredir_tt[i].hcode;
2466 return True;
2469 return False;
2472 static void unredir_discard_translations( Addr guest_start, ULong range )
2474 Int i;
2476 vg_assert(sanity_check_redir_tt_tc());
2478 for (i = 0; i <= unredir_tt_highwater; i++) {
2479 if (unredir_tt[i].inUse) {
2480 /* Fake up a TTEntryH just so we can pass it to overlaps()
2481 without having to make a new version of overlaps() just for
2482 this rare case. */
2483 TTEntryH tmp_tteH;
2484 TTEntryH__from_VexGuestExtents( &tmp_tteH, &unredir_tt[i].vge );
2485 tmp_tteH.status = Empty; /* Completely irrelevant; pure paranoia. */
2486 if (overlaps( guest_start, range, &tmp_tteH )) {
2487 unredir_tt[i].inUse = False;
2494 /*------------------------------------------------------------*/
2495 /*--- Initialisation. ---*/
2496 /*------------------------------------------------------------*/
2498 void VG_(init_tt_tc) ( void )
2500 Int i, avg_codeszQ;
2502 vg_assert(!init_done);
2503 init_done = True;
2505 /* Otherwise lots of things go wrong... */
2506 vg_assert(sizeof(ULong) == 8);
2507 vg_assert(sizeof(TTEno) == sizeof(HTTno));
2508 vg_assert(sizeof(TTEno) == 2);
2509 vg_assert(N_TTES_PER_SECTOR <= N_HTTES_PER_SECTOR);
2510 vg_assert(N_HTTES_PER_SECTOR < INV_TTE);
2511 vg_assert(N_HTTES_PER_SECTOR < EC2TTE_DELETED);
2512 vg_assert(N_HTTES_PER_SECTOR < HTT_EMPTY);
2514 /* check fast cache entries really are 8 words long */
2515 vg_assert(sizeof(Addr) == sizeof(void*));
2516 vg_assert(sizeof(FastCacheSet) == 8 * sizeof(Addr));
2517 /* check fast cache entries are packed back-to-back with no spaces */
2518 vg_assert(sizeof( VG_(tt_fast) )
2519 == VG_TT_FAST_SETS * sizeof(FastCacheSet));
2520 /* check fast cache entries have the layout that the handwritten assembly
2521 fragments assume. */
2522 vg_assert(sizeof(FastCacheSet) == (1 << VG_FAST_CACHE_SET_BITS));
2523 vg_assert(offsetof(FastCacheSet,guest0) == FCS_g0);
2524 vg_assert(offsetof(FastCacheSet,host0) == FCS_h0);
2525 vg_assert(offsetof(FastCacheSet,guest1) == FCS_g1);
2526 vg_assert(offsetof(FastCacheSet,host1) == FCS_h1);
2527 vg_assert(offsetof(FastCacheSet,guest2) == FCS_g2);
2528 vg_assert(offsetof(FastCacheSet,host2) == FCS_h2);
2529 vg_assert(offsetof(FastCacheSet,guest3) == FCS_g3);
2530 vg_assert(offsetof(FastCacheSet,host3) == FCS_h3);
2531 vg_assert(offsetof(FastCacheSet,guest0) == 0 * sizeof(Addr));
2532 vg_assert(offsetof(FastCacheSet,host0) == 1 * sizeof(Addr));
2533 vg_assert(offsetof(FastCacheSet,guest1) == 2 * sizeof(Addr));
2534 vg_assert(offsetof(FastCacheSet,host1) == 3 * sizeof(Addr));
2535 vg_assert(offsetof(FastCacheSet,guest2) == 4 * sizeof(Addr));
2536 vg_assert(offsetof(FastCacheSet,host2) == 5 * sizeof(Addr));
2537 vg_assert(offsetof(FastCacheSet,guest3) == 6 * sizeof(Addr));
2538 vg_assert(offsetof(FastCacheSet,host3) == 7 * sizeof(Addr));
2540 /* check fast cache is aligned as we requested. Not fatal if it
2541 isn't, but we might as well make sure. */
2542 vg_assert(VG_IS_64_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2544 /* The TTEntryH size is critical for keeping the LLC miss rate down
2545 when doing a lot of discarding. Hence check it here. We also
2546 have a lot of TTEntryCs so let's check that too. */
2547 if (sizeof(HWord) == 8) {
2548 vg_assert(sizeof(TTEntryH) <= 32);
2549 vg_assert(sizeof(TTEntryC) <= 112);
2551 else if (sizeof(HWord) == 4) {
2552 vg_assert(sizeof(TTEntryH) <= 20);
2553 # if defined(VGP_ppc32_linux) || defined(VGP_mips32_linux) \
2554 || (defined(VGP_mips64_linux) && defined(VGABI_N32)) \
2555 || defined(VGP_arm_linux)
2556 /* On PPC32, MIPS32, ARM32 platforms, alignof(ULong) == 8, so the
2557 structure is larger than on other 32 bit targets. */
2558 vg_assert(sizeof(TTEntryC) <= 96);
2559 # else
2560 vg_assert(sizeof(TTEntryC) <= 88);
2561 # endif
2563 else {
2564 vg_assert(0);
2567 /* All the hassle about converting between TTEntryH and
2568 VexGuestExtents was so as to ensure the following. */
2569 vg_assert(sizeof(TTEntryH) == sizeof(VexGuestExtents));
2571 if (VG_(clo_verbosity) > 2)
2572 VG_(message)(Vg_DebugMsg,
2573 "TT/TC: VG_(init_tt_tc) "
2574 "(startup of code management)\n");
2576 /* Figure out how big each tc area should be. */
2577 if (VG_(clo_avg_transtab_entry_size) == 0)
2578 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8;
2579 else
2580 avg_codeszQ = (VG_(clo_avg_transtab_entry_size) + 7) / 8;
2581 tc_sector_szQ = N_TTES_PER_SECTOR * (1 + avg_codeszQ);
2583 /* Ensure the calculated value is not way crazy. */
2584 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR);
2585 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR);
2587 n_sectors = VG_(clo_num_transtab_sectors);
2588 vg_assert(n_sectors >= MIN_N_SECTORS);
2589 vg_assert(n_sectors <= MAX_N_SECTORS);
2591 /* Initialise the sectors, even the ones we aren't going to use.
2592 Set all fields to zero. */
2593 youngest_sector = 0;
2594 for (i = 0; i < MAX_N_SECTORS; i++)
2595 VG_(memset)(&sectors[i], 0, sizeof(sectors[i]));
2597 /* Initialise the sector_search_order hint table, including the
2598 entries we aren't going to use. */
2599 for (i = 0; i < MAX_N_SECTORS; i++)
2600 sector_search_order[i] = INV_SNO;
2602 /* Initialise the fast cache. */
2603 invalidateFastCache();
2605 /* and the unredir tt/tc */
2606 init_unredir_tt_tc();
2608 if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2609 || VG_(debugLog_getLevel) () >= 2) {
2610 VG_(message)(Vg_DebugMsg,
2611 "TT/TC: cache: %s--avg-transtab-entry-size=%u, "
2612 "%stool provided default %u\n",
2613 VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ",
2614 VG_(clo_avg_transtab_entry_size),
2615 VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ",
2616 VG_(details).avg_translation_sizeB);
2617 VG_(message)(Vg_DebugMsg,
2618 "TT/TC: cache: %d sectors of %'d bytes each = %'d total TC\n",
2619 n_sectors, 8 * tc_sector_szQ,
2620 n_sectors * 8 * tc_sector_szQ );
2621 VG_(message)(Vg_DebugMsg,
2622 "TT/TC: table: %'d tables[%d] of C %'d + H %'d bytes each "
2623 "= %'d total TT\n",
2624 n_sectors, N_TTES_PER_SECTOR,
2625 (int)(N_TTES_PER_SECTOR * sizeof(TTEntryC)),
2626 (int)(N_TTES_PER_SECTOR * sizeof(TTEntryH)),
2627 (int)(n_sectors * N_TTES_PER_SECTOR
2628 * (sizeof(TTEntryC) + sizeof(TTEntryH))));
2629 VG_(message)(Vg_DebugMsg,
2630 "TT/TC: table: %d tt entries each = %'d total tt entries\n",
2631 N_TTES_PER_SECTOR, n_sectors * N_TTES_PER_SECTOR);
2632 VG_(message)(Vg_DebugMsg,
2633 "TT/TC: table: %d htt[%d] of %'d bytes each = %'d total HTT"
2634 " (htt[%d] %d%% max occup)\n",
2635 n_sectors, N_HTTES_PER_SECTOR,
2636 (int)(N_HTTES_PER_SECTOR * sizeof(TTEno)),
2637 (int)(n_sectors * N_HTTES_PER_SECTOR * sizeof(TTEno)),
2638 N_HTTES_PER_SECTOR, SECTOR_TT_LIMIT_PERCENT);
2641 if (0) {
2642 VG_(printf)("XXXX sizeof(VexGuestExtents) = %d\n",
2643 (Int)sizeof(VexGuestExtents));
2644 VG_(printf)("XXXX sizeof(InEdge) = %d\n", (Int)sizeof(InEdge));
2645 VG_(printf)("XXXX sizeof(OutEdge) = %d\n", (Int)sizeof(OutEdge));
2646 VG_(printf)("XXXX sizeof(InEdgeArr) = %d\n", (Int)sizeof(InEdgeArr));
2647 VG_(printf)("XXXX sizeof(OutEdgeArr) = %d\n", (Int)sizeof(OutEdgeArr));
2648 VG_(printf)("XXXX sizeof(TTEntryC) = %d\n", (Int)sizeof(TTEntryC));
2649 VG_(printf)("XXXX sizeof(TTEntryH) = %d\n", (Int)sizeof(TTEntryH));
2654 /*------------------------------------------------------------*/
2655 /*--- Printing out statistics. ---*/
2656 /*------------------------------------------------------------*/
2658 static Double safe_idiv( ULong a, ULong b )
2660 return (b == 0 ? 0 : (Double)a / (Double)b);
2663 UInt VG_(get_bbs_translated) ( void )
2665 return n_in_count;
2668 UInt VG_(get_bbs_discarded_or_dumped) ( void )
2670 return n_disc_count + n_dump_count;
2673 void VG_(print_tt_tc_stats) ( void )
2675 VG_(message)(Vg_DebugMsg,
2676 " tt/tc: %'llu tt lookups requiring %'llu probes\n",
2677 n_full_lookups, n_lookup_probes );
2678 VG_(message)(Vg_DebugMsg,
2679 " tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2680 n_fast_updates, n_fast_flushes );
2682 VG_(message)(Vg_DebugMsg,
2683 " transtab: new %'llu "
2684 "(%'llu -> %'llu; ratio %3.1f) [%'llu scs] "
2685 "avg tce size %llu\n",
2686 n_in_count, n_in_osize, n_in_tsize,
2687 safe_idiv(n_in_tsize, n_in_osize),
2688 n_in_sc_count,
2689 n_in_tsize / (n_in_count ? n_in_count : 1));
2690 VG_(message)(Vg_DebugMsg,
2691 " transtab: dumped %'llu (%'llu -> ?" "?) "
2692 "(sectors recycled %'llu)\n",
2693 n_dump_count, n_dump_osize, n_sectors_recycled );
2694 VG_(message)(Vg_DebugMsg,
2695 " transtab: discarded %'llu (%'llu -> ?" "?)\n",
2696 n_disc_count, n_disc_osize );
2698 if (DEBUG_TRANSTAB) {
2699 VG_(printf)("\n");
2700 for (EClassNo e = 0; e < ECLASS_N; e++) {
2701 VG_(printf)(" %4d", sectors[0].ec2tte_used[e]);
2702 if (e % 16 == 15)
2703 VG_(printf)("\n");
2705 VG_(printf)("\n\n");
2709 /*------------------------------------------------------------*/
2710 /*--- Printing out of profiling results. ---*/
2711 /*------------------------------------------------------------*/
2713 static ULong score ( const TTEntryC* tteC )
2715 return ((ULong)tteC->usage.prof.weight) * ((ULong)tteC->usage.prof.count);
2718 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2720 SECno sno;
2721 Int r, s;
2722 ULong score_total;
2723 TTEno i;
2725 /* First, compute the total weighted count, and find the top N
2726 ttes. tops contains pointers to the most-used n_tops blocks, in
2727 descending order (viz, tops[0] is the highest scorer). */
2728 for (s = 0; s < n_tops; s++) {
2729 tops[s].addr = 0;
2730 tops[s].score = 0;
2733 score_total = 0;
2735 for (sno = 0; sno < n_sectors; sno++) {
2736 if (sectors[sno].tc == NULL)
2737 continue;
2738 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2739 if (sectors[sno].ttH[i].status != InUse)
2740 continue;
2741 score_total += score(&sectors[sno].ttC[i]);
2742 /* Find the rank for sectors[sno].tt{C,H}[i]. */
2743 r = n_tops-1;
2744 while (True) {
2745 if (r == -1)
2746 break;
2747 if (tops[r].addr == 0) {
2748 r--;
2749 continue;
2751 if ( score(&sectors[sno].ttC[i]) > tops[r].score ) {
2752 r--;
2753 continue;
2755 break;
2757 r++;
2758 vg_assert(r >= 0 && r <= n_tops);
2759 /* This bb should be placed at r, and bbs above it shifted
2760 upwards one slot. */
2761 if (r < n_tops) {
2762 for (s = n_tops-1; s > r; s--)
2763 tops[s] = tops[s-1];
2764 tops[r].addr = sectors[sno].ttC[i].entry;
2765 tops[r].score = score( &sectors[sno].ttC[i] );
2770 /* Now zero out all the counter fields, so that we can make
2771 multiple calls here and just get the values since the last call,
2772 each time, rather than values accumulated for the whole run. */
2773 for (sno = 0; sno < n_sectors; sno++) {
2774 if (sectors[sno].tc == NULL)
2775 continue;
2776 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2777 if (sectors[sno].ttH[i].status != InUse)
2778 continue;
2779 sectors[sno].ttC[i].usage.prof.count = 0;
2783 return score_total;
2786 /*--------------------------------------------------------------------*/
2787 /*--- end ---*/
2788 /*--------------------------------------------------------------------*/