Bug 439685 compiler warning in callgrind/main.c
[valgrind.git] / coregrind / m_transtab.c
blob102108a357d36381e25ae280d5919769d69f1b0a
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache. ---*/
4 /*--- m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2000-2017 Julian Seward
12 jseward@acm.org
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 #include "pub_core_basics.h"
31 #include "pub_core_debuglog.h"
32 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo)
33 #include "pub_core_libcbase.h"
34 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh
35 #include "pub_core_libcproc.h" // VG_(invalidate_icache)
36 #include "pub_core_libcassert.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_options.h"
39 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB
40 #include "pub_core_transtab.h"
41 #include "pub_core_aspacemgr.h"
42 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
43 #include "pub_core_xarray.h"
44 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses
47 #define DEBUG_TRANSTAB 0
50 /*-------------------------------------------------------------*/
51 /*--- Management of the FIFO-based translation table+cache. ---*/
52 /*-------------------------------------------------------------*/
54 /* Nr of sectors provided via command line parameter. */
55 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
56 /* Nr of sectors.
57 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
58 static SECno n_sectors = 0;
60 /* Average size of a transtab code entry. 0 means to use the tool
61 provided default. */
62 UInt VG_(clo_avg_transtab_entry_size) = 0;
64 /*------------------ CONSTANTS ------------------*/
65 /* Number of entries in hash table of each sector. This needs to be a prime
66 number to work properly, it must be <= 65535 (so that a TTE index
67 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED, HTT_DELETED)
68 to denote 'deleted') and 0xFFFE (HTT_EMPTY) to denote 'Empty' in the
69 hash table.
70 It is strongly recommended not to change this.
71 65521 is the largest prime <= 65535. */
72 #define N_HTTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
74 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */
75 #define HTT_DELETED EC2TTE_DELETED
76 #define HTT_EMPTY 0XFFFE
78 // HTTno is the Sector->htt hash table index. Must be the same type as TTEno.
79 typedef UShort HTTno;
81 /* Because each sector contains a hash table of TTEntries, we need to
82 specify the maximum allowable loading, after which the sector is
83 deemed full. */
84 #define SECTOR_TT_LIMIT_PERCENT 65
86 /* The sector is deemed full when this many entries are in it. */
87 #define N_TTES_PER_SECTOR \
88 ((N_HTTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
90 /* Equivalence classes for fast address range deletion. There are 1 +
91 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an
92 address range which does not fall cleanly within any specific bin.
93 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32.
94 ECLASS_N must fit in a EclassNo. */
95 #define ECLASS_SHIFT 13
96 #define ECLASS_WIDTH 9
97 #define ECLASS_MISC (1 << ECLASS_WIDTH)
98 #define ECLASS_N (1 + ECLASS_MISC)
99 STATIC_ASSERT(ECLASS_SHIFT + ECLASS_WIDTH < 32);
101 typedef UShort EClassNo;
103 /*------------------ TYPES ------------------*/
105 /* In edges ("to-me") in the graph created by chaining. */
106 typedef
107 struct {
108 SECno from_sNo; /* sector number */
109 TTEno from_tteNo; /* TTE number in given sector */
110 UInt from_offs: (sizeof(UInt)*8)-1; /* code offset from TCEntry::tcptr
111 where the patch is */
112 Bool to_fastEP:1; /* Is the patch to a fast or slow entry point? */
114 InEdge;
117 /* Out edges ("from-me") in the graph created by chaining. */
118 typedef
119 struct {
120 SECno to_sNo; /* sector number */
121 TTEno to_tteNo; /* TTE number in given sector */
122 UInt from_offs; /* code offset in owning translation where patch is */
124 OutEdge;
127 #define N_FIXED_IN_EDGE_ARR 3
128 typedef
129 struct {
130 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */
131 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_IN_EDGE_ARR */
132 union {
133 InEdge fixed[N_FIXED_IN_EDGE_ARR]; /* if !has_var */
134 XArray* var; /* XArray* of InEdgeArr */ /* if has_var */
135 } edges;
137 InEdgeArr;
139 #define N_FIXED_OUT_EDGE_ARR 2
140 typedef
141 struct {
142 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */
143 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_OUT_EDGE_ARR */
144 union {
145 OutEdge fixed[N_FIXED_OUT_EDGE_ARR]; /* if !has_var */
146 XArray* var; /* XArray* of OutEdgeArr */ /* if has_var */
147 } edges;
149 OutEdgeArr;
152 /* A translation-table entry. This indicates precisely which areas of
153 guest code are included in the translation, and contains all other
154 auxiliary info too. These are split into hold and cold parts,
155 TTEntryH and TTEntryC, so as to be more cache-friendly
156 (a.k.a. "faster") when searching for translations that intersect
157 with a certain guest code address range, for deletion. In the
158 worst case this can involve a sequential scan through all the hot
159 parts, so they are packed as compactly as possible -- 32 bytes on a
160 64-bit platform, 20 bytes on a 32-bit platform.
162 Each TTEntry is logically a matching cold-and-hot pair, and in fact
163 it was originally one structure. First, the cold part.
165 typedef
166 struct {
167 union {
168 struct {
169 /* Profiling only: the count and weight (arbitrary meaning) for
170 this translation. Weight is a property of the translation
171 itself and computed once when the translation is created.
172 Count is an entry count for the translation and is
173 incremented by 1 every time the translation is used, if we
174 are profiling. */
175 ULong count;
176 UShort weight;
177 } prof; // if status == InUse
178 TTEno next_empty_tte; // if status != InUse
179 } usage;
181 /* 64-bit aligned pointer to one or more 64-bit words containing
182 the corresponding host code (must be in the same sector!)
183 This is a pointer into the sector's tc (code) area. */
184 ULong* tcptr;
186 /* This is the original guest address that purportedly is the
187 entry point of the translation. You might think that .entry
188 should be the same as .vge->base[0], and most of the time it
189 is. However, when doing redirections, that is not the case.
190 .vge must always correctly describe the guest code sections
191 from which this translation was made. However, .entry may or
192 may not be a lie, depending on whether or not we're doing
193 redirection. */
194 Addr entry;
196 /* Address range summary info: these are pointers back to
197 eclass[] entries in the containing Sector. Those entries in
198 turn point back here -- the two structures are mutually
199 redundant but both necessary to make fast deletions work.
200 The eclass info is similar to, and derived from, this entry's
201 'vge' field, but it is not the same */
202 UShort n_tte2ec; // # tte2ec pointers (1 to 3)
203 EClassNo tte2ec_ec[3]; // for each, the eclass #
204 UInt tte2ec_ix[3]; // and the index within the eclass.
205 // for i in 0 .. n_tte2ec-1
206 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
207 // should be the index
208 // of this TTEntry in the containing Sector's tt array.
210 /* Admin information for chaining. 'in_edges' is a set of the
211 patch points which jump to this translation -- hence are
212 predecessors in the control flow graph. 'out_edges' points
213 to successors in the control flow graph -- translations to
214 which this one has a patched jump. In short these are just
215 backwards and forwards edges in the graph of patched-together
216 blocks. The 'in_edges' contain slightly more info, enough
217 that we can undo the chaining of each mentioned patch point.
218 The 'out_edges' list exists only so that we can visit the
219 'in_edges' entries of all blocks we're patched through to, in
220 order to remove ourselves from then when we're deleted. */
222 /* A translation can disappear for two reasons:
223 1. erased (as part of the oldest sector cleanup) when the
224 youngest sector is full.
225 2. discarded due to calls to VG_(discard_translations).
226 VG_(discard_translations) sets the status of the
227 translation to 'Deleted'.
228 A.o., the gdbserver discards one or more translations
229 when a breakpoint is inserted or removed at an Addr,
230 or when single stepping mode is enabled/disabled
231 or when a translation is instrumented for gdbserver
232 (all the target jumps of this translation are
233 invalidated).
235 So, it is possible that the translation A to be patched
236 (to obtain a patched jump from A to B) is invalidated
237 after B is translated and before A is patched.
238 In case a translation is erased or discarded, the patching
239 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
240 are checking the 'from' translation still exists before
241 doing the patching.
243 Is it safe to erase or discard the current translation E being
244 executed ? Amazing, but yes, it is safe.
245 Here is the explanation:
247 The translation E being executed can only be erased if a new
248 translation N is being done. A new translation is done only
249 if the host addr is a not yet patched jump to another
250 translation. In such a case, the guest address of N is
251 assigned to the PC in the VEX state. Control is returned
252 to the scheduler. N will be translated. This can erase the
253 translation E (in case of sector full). VG_(tt_tc_do_chaining)
254 will not do the chaining to a non found translation E.
255 The execution will continue at the current guest PC
256 (i.e. the translation N).
257 => it is safe to erase the current translation being executed.
259 The current translation E being executed can also be discarded
260 (e.g. by gdbserver). VG_(discard_translations) will mark
261 this translation E as Deleted, but the translation itself
262 is not erased. In particular, its host code can only
263 be overwritten or erased in case a new translation is done.
264 A new translation will only be done if a not yet translated
265 jump is to be executed. The execution of the Deleted translation
266 E will continue till a non patched jump is encountered.
267 This situation is then similar to the 'erasing' case above :
268 the current translation E can be erased or overwritten, as the
269 execution will continue at the new translation N.
272 /* It is possible, although very unlikely, that a block A has
273 more than one patched jump to block B. This could happen if
274 (eg) A finishes "jcond B; jmp B".
276 This means in turn that B's in_edges set can list A more than
277 once (twice in this example). However, each such entry must
278 have a different from_offs, since a patched jump can only
279 jump to one place at once (it's meaningless for it to have
280 multiple destinations.) IOW, the successor and predecessor
281 edges in the graph are not uniquely determined by a
282 TTEntry --> TTEntry pair, but rather by a
283 (TTEntry,offset) --> TTEntry triple.
285 If A has multiple edges to B then B will mention A multiple
286 times in its in_edges. To make things simpler, we then
287 require that A mentions B exactly the same number of times in
288 its out_edges. Furthermore, a matching out-in pair must have
289 the same offset (from_offs). This facilitates sanity
290 checking, and it facilitates establishing the invariant that
291 a out_edges set may not have duplicates when using the
292 equality defined by (TTEntry,offset). Hence the out_edges
293 and in_edges sets really do have both have set semantics.
295 eg if A has been patched to B at offsets 42 and 87 (in A)
296 then A.out_edges = { (B,42), (B,87) } (in any order)
297 and B.in_edges = { (A,42), (A,87) } (in any order)
299 Hence for each node pair P->Q in the graph, there's a 1:1
300 mapping between P.out_edges and Q.in_edges.
302 InEdgeArr in_edges;
303 OutEdgeArr out_edges;
305 TTEntryC;
307 /* And this is the hot part. */
308 typedef
309 struct {
310 /* This structure describes precisely what ranges of guest code
311 the translation covers, so we can decide whether or not to
312 delete it when translations of a given address range are
313 invalidated. Originally this was a VexGuestExtents, but that
314 itself is 32 bytes on a 64-bit target, and we really want to
315 squeeze in an 8-bit |status| field into the 32 byte field, so
316 as to get 2 of them in a 64 byte LLC line. Hence the
317 VexGuestExtents fields are inlined, the _n_used field is
318 changed to a UChar (it only ever has the values 1, 2 or 3)
319 and the 8-bit status field is placed in byte 31 of the
320 structure. */
321 /* ORIGINALLY: VexGuestExtents vge; */
322 Addr vge_base[3];
323 UShort vge_len[3];
324 UChar vge_n_used; /* BEWARE: is UShort in VexGuestExtents */
326 /* Status of the slot. Note, we need to be able to do lazy
327 deletion, hence the Deleted state. */
328 enum { InUse, Deleted, Empty } status : 8;
330 TTEntryH;
333 /* Impedance matchers, that convert between a VexGuestExtents and a
334 TTEntryH, ignoring TTEntryH::status, which doesn't exist in a
335 VexGuestExtents -- it is entirely unrelated. */
337 /* Takes a VexGuestExtents and pushes it into a TTEntryH. The
338 TTEntryH.status field is left unchanged. */
339 static
340 inline void TTEntryH__from_VexGuestExtents ( /*MOD*/TTEntryH* tteH,
341 const VexGuestExtents* vge )
343 tteH->vge_base[0] = vge->base[0];
344 tteH->vge_base[1] = vge->base[1];
345 tteH->vge_base[2] = vge->base[2];
346 tteH->vge_len[0] = vge->len[0];
347 tteH->vge_len[1] = vge->len[1];
348 tteH->vge_len[2] = vge->len[2];
349 tteH->vge_n_used = (UChar)vge->n_used; /* BEWARE: no range check. */
352 /* Takes a TTEntryH and pushes the vge_ components into a VexGuestExtents. */
353 static
354 inline void TTEntryH__to_VexGuestExtents ( /*MOD*/VexGuestExtents* vge,
355 const TTEntryH* tteH )
357 vge->base[0] = tteH->vge_base[0];
358 vge->base[1] = tteH->vge_base[1];
359 vge->base[2] = tteH->vge_base[2];
360 vge->len[0] = tteH->vge_len[0] ;
361 vge->len[1] = tteH->vge_len[1] ;
362 vge->len[2] = tteH->vge_len[2] ;
363 vge->n_used = (UShort)tteH->vge_n_used ;
367 /* A structure used for mapping host code addresses back to the
368 relevant TTEntry. Used when doing chaining, for finding the
369 TTEntry to which some arbitrary patch address belongs. */
370 typedef
371 struct {
372 UChar* start;
373 UInt len;
374 TTEno tteNo;
376 HostExtent;
378 /* Finally, a sector itself. Each sector contains an array of
379 TCEntries, which hold code, and an array of TTEntries, containing
380 all required administrative info. Profiling is supported using the
381 TTEntry usage.prof.count and usage.prof.weight fields, if required.
383 If the sector is not in use, all three pointers are NULL and
384 tt_n_inuse is zero.
386 typedef
387 struct {
388 /* The TCEntry area. Size of this depends on the average
389 translation size. We try and size it so it becomes full
390 precisely when this sector's translation table (tt) reaches
391 its load limit (SECTOR_TT_LIMIT_PERCENT). */
392 ULong* tc;
394 /* An hash table, mapping guest address to an index in the tt array.
395 htt is a fixed size, always containing
396 exactly N_HTTES_PER_SECTOR entries. */
397 TTEno* htt;
399 /* The TTEntry{C,H} arrays. These are a fixed size, always
400 containing exactly N_TTES_PER_SECTOR entries. */
401 TTEntryC* ttC;
402 TTEntryH* ttH;
404 /* This points to the current allocation point in tc. */
405 ULong* tc_next;
407 /* The count of tt entries with state InUse. */
408 Int tt_n_inuse;
410 /* A list of Empty/Deleted entries, chained by tte->next_empty_tte */
411 TTEno empty_tt_list;
413 /* Expandable arrays of tt indices for each of the ECLASS_N
414 address range equivalence classes. These hold indices into
415 the containing sector's tt array, which in turn should point
416 back here. */
417 Int ec2tte_size[ECLASS_N];
418 Int ec2tte_used[ECLASS_N];
419 TTEno* ec2tte[ECLASS_N];
421 /* The host extents. The [start, +len) ranges are constructed
422 in strictly non-overlapping order, so we can binary search
423 them at any time. */
424 XArray* host_extents; /* XArray* of HostExtent */
426 Sector;
429 /*------------------ DECLS ------------------*/
431 /* The root data structure is an array of sectors. The index of the
432 youngest sector is recorded, and new translations are put into that
433 sector. When it fills up, we move along to the next sector and
434 start to fill that up, wrapping around at the end of the array.
435 That way, once all N_TC_SECTORS have been bought into use for the
436 first time, and are full, we then re-use the oldest sector,
437 endlessly.
439 When running, youngest sector should be between >= 0 and <
440 N_TC_SECTORS. The initial value indicates the TT/TC system is
441 not yet initialised.
443 static Sector sectors[MAX_N_SECTORS];
444 static Int youngest_sector = INV_SNO;
446 /* The number of ULongs in each TCEntry area. This is computed once
447 at startup and does not change. */
448 static Int tc_sector_szQ = 0;
451 /* A list of sector numbers, in the order which they should be
452 searched to find translations. This is an optimisation to be used
453 when searching for translations and should not affect
454 correctness. INV_SNO denotes "no entry". */
455 static SECno sector_search_order[MAX_N_SECTORS];
458 /* Fast helper for the TC. A 4-way set-associative cache, with more-or-less LRU
459 replacement. It holds a set of recently used (guest address, host address)
460 pairs. This array is referred to directly from
461 m_dispatch/dispatch-<platform>.S.
463 Entries in tt_fast may refer to any valid TC entry, regardless of
464 which sector it's in. Consequently we must be very careful to
465 invalidate this cache when TC entries are changed or disappear.
467 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
468 pointed at to cause that cache entry to miss. This relies on the
469 assumption that no guest code actually has that address, hence a
470 value 0x1 seems good. m_translate gives the client a synthetic
471 segfault if it tries to execute at this address.
474 typedef
475 struct {
476 Addr guest0;
477 Addr host0;
478 Addr guest1;
479 Addr host1;
480 Addr guest2;
481 Addr host2;
482 Addr guest3;
483 Addr host3;
485 FastCacheSet;
487 /*global*/ __attribute__((aligned(64)))
488 FastCacheSet VG_(tt_fast)[VG_TT_FAST_SETS];
490 /* Make sure we're not used before initialisation. */
491 static Bool init_done = False;
494 /*------------------ STATS DECLS ------------------*/
496 /* Number of fast-cache updates and flushes done. */
497 static ULong n_fast_flushes = 0;
498 static ULong n_fast_updates = 0;
500 /* Number of full lookups done. */
501 static ULong n_full_lookups = 0;
502 static ULong n_lookup_probes = 0;
504 /* Number/osize/tsize of translations entered; also the number of
505 those for which self-checking was requested. */
506 static ULong n_in_count = 0;
507 static ULong n_in_osize = 0;
508 static ULong n_in_tsize = 0;
509 static ULong n_in_sc_count = 0;
511 /* Number/osize of translations discarded due to lack of space. */
512 static ULong n_dump_count = 0;
513 static ULong n_dump_osize = 0;
514 static ULong n_sectors_recycled = 0;
516 /* Number/osize of translations discarded due to requests to do so. */
517 static ULong n_disc_count = 0;
518 static ULong n_disc_osize = 0;
521 /*-------------------------------------------------------------*/
522 /*--- Misc ---*/
523 /*-------------------------------------------------------------*/
525 static void* ttaux_malloc ( const HChar* tag, SizeT n )
527 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
530 static void ttaux_free ( void* p )
532 VG_(arena_free)(VG_AR_TTAUX, p);
536 /*-------------------------------------------------------------*/
537 /*--- Chaining support ---*/
538 /*-------------------------------------------------------------*/
540 static inline TTEntryC* index_tteC ( SECno sNo, TTEno tteNo )
542 vg_assert(sNo < n_sectors);
543 vg_assert(tteNo < N_TTES_PER_SECTOR);
544 Sector* s = &sectors[sNo];
545 vg_assert(s->ttC && s->ttH);
546 TTEntryC* tteC = &s->ttC[tteNo];
547 TTEntryH* tteH = &s->ttH[tteNo];
548 vg_assert(tteH->status == InUse);
549 return tteC;
552 static inline TTEntryH* index_tteH ( SECno sNo, TTEno tteNo )
554 vg_assert(sNo < n_sectors);
555 vg_assert(tteNo < N_TTES_PER_SECTOR);
556 Sector* s = &sectors[sNo];
557 vg_assert(s->ttH);
558 TTEntryH* tteH = &s->ttH[tteNo];
559 vg_assert(tteH->status == InUse);
560 return tteH;
563 static void InEdge__init ( InEdge* ie )
565 ie->from_sNo = INV_SNO; /* invalid */
566 ie->from_tteNo = 0;
567 ie->from_offs = 0;
568 ie->to_fastEP = False;
571 static void OutEdge__init ( OutEdge* oe )
573 oe->to_sNo = INV_SNO; /* invalid */
574 oe->to_tteNo = 0;
575 oe->from_offs = 0;
578 static void TTEntryC__init ( TTEntryC* tteC )
580 VG_(bzero_inline)(tteC, sizeof(*tteC));
583 static void TTEntryH__init ( TTEntryH* tteH )
585 VG_(bzero_inline)(tteH, sizeof(*tteH));
588 static UWord InEdgeArr__size ( const InEdgeArr* iea )
590 if (iea->has_var) {
591 vg_assert(iea->n_fixed == 0);
592 return VG_(sizeXA)(iea->edges.var);
593 } else {
594 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
595 return iea->n_fixed;
599 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
601 if (iea->has_var) {
602 vg_assert(iea->n_fixed == 0);
603 VG_(deleteXA)(iea->edges.var);
604 iea->edges.var = NULL;
605 iea->has_var = False;
606 } else {
607 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
608 iea->n_fixed = 0;
612 static
613 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
615 if (iea->has_var) {
616 vg_assert(iea->n_fixed == 0);
617 return (InEdge*)VG_(indexXA)(iea->edges.var, i);
618 } else {
619 vg_assert(i < iea->n_fixed);
620 return &iea->edges.fixed[i];
624 static
625 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
627 if (iea->has_var) {
628 vg_assert(iea->n_fixed == 0);
629 VG_(removeIndexXA)(iea->edges.var, i);
630 } else {
631 vg_assert(i < iea->n_fixed);
632 for (; i+1 < iea->n_fixed; i++) {
633 iea->edges.fixed[i] = iea->edges.fixed[i+1];
635 iea->n_fixed--;
639 static
640 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
642 if (iea->has_var) {
643 vg_assert(iea->n_fixed == 0);
644 VG_(addToXA)(iea->edges.var, ie);
645 } else {
646 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
647 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
648 /* The fixed array is full, so we have to initialise an
649 XArray and copy the fixed array into it. */
650 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
651 ttaux_free,
652 sizeof(InEdge));
653 VG_(hintSizeXA) (var, iea->n_fixed + 1);
654 UWord i;
655 for (i = 0; i < iea->n_fixed; i++) {
656 VG_(addToXA)(var, &iea->edges.fixed[i]);
658 VG_(addToXA)(var, ie);
659 iea->n_fixed = 0;
660 iea->has_var = True;
661 iea->edges.var = var;
662 } else {
663 /* Just add to the fixed array. */
664 iea->edges.fixed[iea->n_fixed++] = *ie;
669 static UWord OutEdgeArr__size ( const OutEdgeArr* oea )
671 if (oea->has_var) {
672 vg_assert(oea->n_fixed == 0);
673 return VG_(sizeXA)(oea->edges.var);
674 } else {
675 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
676 return oea->n_fixed;
680 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
682 if (oea->has_var) {
683 vg_assert(oea->n_fixed == 0);
684 VG_(deleteXA)(oea->edges.var);
685 oea->edges.var = NULL;
686 oea->has_var = False;
687 } else {
688 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
689 oea->n_fixed = 0;
693 static
694 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
696 if (oea->has_var) {
697 vg_assert(oea->n_fixed == 0);
698 return (OutEdge*)VG_(indexXA)(oea->edges.var, i);
699 } else {
700 vg_assert(i < oea->n_fixed);
701 return &oea->edges.fixed[i];
705 static
706 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
708 if (oea->has_var) {
709 vg_assert(oea->n_fixed == 0);
710 VG_(removeIndexXA)(oea->edges.var, i);
711 } else {
712 vg_assert(i < oea->n_fixed);
713 for (; i+1 < oea->n_fixed; i++) {
714 oea->edges.fixed[i] = oea->edges.fixed[i+1];
716 oea->n_fixed--;
720 static
721 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
723 if (oea->has_var) {
724 vg_assert(oea->n_fixed == 0);
725 VG_(addToXA)(oea->edges.var, oe);
726 } else {
727 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
728 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
729 /* The fixed array is full, so we have to initialise an
730 XArray and copy the fixed array into it. */
731 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
732 ttaux_free,
733 sizeof(OutEdge));
734 VG_(hintSizeXA) (var, oea->n_fixed+1);
735 UWord i;
736 for (i = 0; i < oea->n_fixed; i++) {
737 VG_(addToXA)(var, &oea->edges.fixed[i]);
739 VG_(addToXA)(var, oe);
740 oea->n_fixed = 0;
741 oea->has_var = True;
742 oea->edges.var = var;
743 } else {
744 /* Just add to the fixed array. */
745 oea->edges.fixed[oea->n_fixed++] = *oe;
750 static
751 Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
753 const HostExtent* hx1 = v1;
754 const HostExtent* hx2 = v2;
755 if (hx1->start + hx1->len <= hx2->start) return -1;
756 if (hx2->start + hx2->len <= hx1->start) return 1;
757 return 0; /* partial overlap */
760 /* True if hx is a dead host extent, i.e. corresponds to host code
761 of an entry that was invalidated. */
762 static
763 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
765 const TTEno tteNo = hx->tteNo;
766 #define LDEBUG(m) if (DEBUG_TRANSTAB) \
767 VG_(printf) (m \
768 " start 0x%p len %u sector %d ttslot %u" \
769 " tt.entry 0x%lu tt.tcptr 0x%p\n", \
770 hx->start, hx->len, (int)(sec - sectors), \
771 hx->tteNo, \
772 sec->ttC[tteNo].entry, sec->ttC[tteNo].tcptr)
774 /* Entry might have been invalidated and not re-used yet.*/
775 if (sec->ttH[tteNo].status == Deleted) {
776 LDEBUG("found deleted entry");
777 return True;
779 /* Maybe we found this entry via a host_extents which was
780 inserted for an entry which was changed to Deleted then
781 re-used after. If this entry was re-used, then its tcptr
782 is >= to host_extents start (i.e. the previous tcptr) + len.
783 This is the case as there is no re-use of host code: a new
784 entry or re-used entry always gets "higher value" host code. */
785 if ((UChar*) sec->ttC[tteNo].tcptr >= hx->start + hx->len) {
786 LDEBUG("found re-used entry");
787 return True;
790 return False;
791 #undef LDEBUG
794 static __attribute__((noinline))
795 Bool find_TTEntry_from_hcode( /*OUT*/SECno* from_sNo,
796 /*OUT*/TTEno* from_tteNo,
797 void* hcode )
799 SECno i;
801 /* Search order logic copied from VG_(search_transtab). */
802 for (i = 0; i < n_sectors; i++) {
803 SECno sno = sector_search_order[i];
804 if (UNLIKELY(sno == INV_SNO))
805 return False; /* run out of sectors to search */
807 const Sector* sec = &sectors[sno];
808 const XArray* /* of HostExtent */ host_extents = sec->host_extents;
809 vg_assert(host_extents);
811 HostExtent key;
812 VG_(memset)(&key, 0, sizeof(key));
813 key.start = hcode;
814 key.len = 1;
815 Word firstW = -1, lastW = -1;
816 Bool found = VG_(lookupXA_UNSAFE)(
817 host_extents, &key, &firstW, &lastW,
818 HostExtent__cmpOrd );
819 vg_assert(firstW == lastW); // always true, even if not found
820 if (found) {
821 HostExtent* hx = VG_(indexXA)(host_extents, firstW);
822 TTEno tteNo = hx->tteNo;
823 /* Do some additional sanity checks. */
824 vg_assert(tteNo < N_TTES_PER_SECTOR);
826 /* if this hx entry corresponds to dead host code, we must
827 tell this code has not been found, as it cannot be patched. */
828 if (HostExtent__is_dead (hx, sec))
829 return False;
831 vg_assert(sec->ttH[tteNo].status == InUse);
832 /* Can only half check that the found TTEntry contains hcode,
833 due to not having a length value for the hcode in the
834 TTEntry. */
835 vg_assert((UChar*)sec->ttC[tteNo].tcptr <= (UChar*)hcode);
836 /* Looks plausible */
837 *from_sNo = sno;
838 *from_tteNo = tteNo;
839 return True;
842 return False;
846 /* Figure out whether or not hcode is jitted code present in the main
847 code cache (but not in the no-redir cache). Used for sanity
848 checking. */
849 static Bool is_in_the_main_TC ( const void* hcode )
851 SECno i, sno;
852 for (i = 0; i < n_sectors; i++) {
853 sno = sector_search_order[i];
854 if (sno == INV_SNO)
855 break; /* run out of sectors to search */
856 if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc
857 && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next
858 + sizeof(ULong) - 1)
859 return True;
861 return False;
865 /* Fulfill a chaining request, and record admin info so we
866 can undo it later, if required.
868 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
869 SECno to_sNo,
870 TTEno to_tteNo,
871 Bool to_fastEP )
873 /* Get the CPU info established at startup. */
874 VexArch arch_host = VexArch_INVALID;
875 VexArchInfo archinfo_host;
876 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
877 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
878 VexEndness endness_host = archinfo_host.endness;
880 // host_code is where we're patching to. So it needs to
881 // take into account, whether we're jumping to the slow
882 // or fast entry point. By definition, the fast entry point
883 // is exactly one event check's worth of code along from
884 // the slow (tcptr) entry point.
885 TTEntryC* to_tteC = index_tteC(to_sNo, to_tteNo);
886 void* host_code = ((UChar*)to_tteC->tcptr)
887 + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0);
889 // stay sane -- the patch point (dst) is in this sector's code cache
890 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
891 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
892 + sizeof(ULong) - 1 );
894 /* Find the TTEntry for the from__ code. This isn't simple since
895 we only know the patch address, which is going to be somewhere
896 inside the from_ block. */
897 SECno from_sNo = INV_SNO;
898 TTEno from_tteNo = INV_TTE;
899 Bool from_found
900 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
901 from__patch_addr );
902 if (!from_found) {
903 // The from code might have been discarded due to sector re-use
904 // or marked Deleted due to translation invalidation.
905 // In such a case, don't do the chaining.
906 VG_(debugLog)(1,"transtab",
907 "host code %p not found (discarded? sector recycled?)"
908 " => no chaining done\n",
909 from__patch_addr);
910 return;
913 TTEntryC* from_tteC = index_tteC(from_sNo, from_tteNo);
915 /* Get VEX to do the patching itself. We have to hand it off
916 since it is host-dependent. */
917 VexInvalRange vir
918 = LibVEX_Chain(
919 arch_host, endness_host,
920 from__patch_addr,
921 VG_(fnptr_to_fnentry)(
922 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
923 : &VG_(disp_cp_chain_me_to_slowEP)),
924 (void*)host_code
926 VG_(invalidate_icache)( (void*)vir.start, vir.len );
928 /* Now do the tricky bit -- update the ch_succs and ch_preds info
929 for the two translations involved, so we can undo the chaining
930 later, which we will have to do if the to_ block gets removed
931 for whatever reason. */
933 /* This is the new from_ -> to_ link to add. */
934 InEdge ie;
935 InEdge__init(&ie);
936 ie.from_sNo = from_sNo;
937 ie.from_tteNo = from_tteNo;
938 ie.to_fastEP = to_fastEP;
939 HWord from_offs = (HWord)( (UChar*)from__patch_addr
940 - (UChar*)from_tteC->tcptr );
941 vg_assert(from_offs < 100000/* let's say */);
942 ie.from_offs = (UInt)from_offs;
944 /* This is the new to_ -> from_ backlink to add. */
945 OutEdge oe;
946 OutEdge__init(&oe);
947 oe.to_sNo = to_sNo;
948 oe.to_tteNo = to_tteNo;
949 oe.from_offs = (UInt)from_offs;
951 /* Add .. */
952 InEdgeArr__add(&to_tteC->in_edges, &ie);
953 OutEdgeArr__add(&from_tteC->out_edges, &oe);
957 /* Unchain one patch, as described by the specified InEdge. For
958 sanity check purposes only (to check that the patched location is
959 as expected) it also requires the fast and slow entry point
960 addresses of the destination block (that is, the block that owns
961 this InEdge). */
962 __attribute__((noinline))
963 static void unchain_one ( VexArch arch_host, VexEndness endness_host,
964 InEdge* ie,
965 void* to_fastEPaddr, void* to_slowEPaddr )
967 vg_assert(ie);
968 TTEntryC* tteC
969 = index_tteC(ie->from_sNo, ie->from_tteNo);
970 UChar* place_to_patch
971 = ((UChar*)tteC->tcptr) + ie->from_offs;
972 UChar* disp_cp_chain_me
973 = VG_(fnptr_to_fnentry)(
974 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
975 : &VG_(disp_cp_chain_me_to_slowEP)
977 UChar* place_to_jump_to_EXPECTED
978 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
980 // stay sane: both src and dst for this unchaining are
981 // in the main code cache
982 vg_assert( is_in_the_main_TC(place_to_patch) ); // src
983 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
984 // dst check is ok because LibVEX_UnChain checks that
985 // place_to_jump_to_EXPECTED really is the current dst, and
986 // asserts if it isn't.
987 VexInvalRange vir
988 = LibVEX_UnChain( arch_host, endness_host, place_to_patch,
989 place_to_jump_to_EXPECTED, disp_cp_chain_me );
990 VG_(invalidate_icache)( (void*)vir.start, vir.len );
994 /* The specified block is about to be deleted. Update the preds and
995 succs of its associated blocks accordingly. This includes undoing
996 any chained jumps to this block. */
997 static
998 void unchain_in_preparation_for_deletion ( VexArch arch_host,
999 VexEndness endness_host,
1000 SECno here_sNo, TTEno here_tteNo )
1002 if (DEBUG_TRANSTAB)
1003 VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
1004 UWord i, j, n, m;
1005 Int evCheckSzB = LibVEX_evCheckSzB(arch_host);
1006 TTEntryC* here_tteC = index_tteC(here_sNo, here_tteNo);
1007 TTEntryH* here_tteH = index_tteH(here_sNo, here_tteNo);
1008 if (DEBUG_TRANSTAB)
1009 VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n",
1010 here_tteC->entry, here_tteC->tcptr);
1011 vg_assert(here_tteH->status == InUse);
1013 /* Visit all InEdges owned by here_tte. */
1014 n = InEdgeArr__size(&here_tteC->in_edges);
1015 for (i = 0; i < n; i++) {
1016 InEdge* ie = InEdgeArr__index(&here_tteC->in_edges, i);
1017 // Undo the chaining.
1018 UChar* here_slow_EP = (UChar*)here_tteC->tcptr;
1019 UChar* here_fast_EP = here_slow_EP + evCheckSzB;
1020 unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP);
1021 // Find the corresponding entry in the "from" node's out_edges,
1022 // and remove it.
1023 TTEntryC* from_tteC = index_tteC(ie->from_sNo, ie->from_tteNo);
1024 m = OutEdgeArr__size(&from_tteC->out_edges);
1025 vg_assert(m > 0); // it must have at least one entry
1026 for (j = 0; j < m; j++) {
1027 OutEdge* oe = OutEdgeArr__index(&from_tteC->out_edges, j);
1028 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
1029 && oe->from_offs == ie->from_offs)
1030 break;
1032 vg_assert(j < m); // "oe must be findable"
1033 OutEdgeArr__deleteIndex(&from_tteC->out_edges, j);
1036 /* Visit all OutEdges owned by here_tte. */
1037 n = OutEdgeArr__size(&here_tteC->out_edges);
1038 for (i = 0; i < n; i++) {
1039 OutEdge* oe = OutEdgeArr__index(&here_tteC->out_edges, i);
1040 // Find the corresponding entry in the "to" node's in_edges,
1041 // and remove it.
1042 TTEntryC* to_tteC = index_tteC(oe->to_sNo, oe->to_tteNo);
1043 m = InEdgeArr__size(&to_tteC->in_edges);
1044 vg_assert(m > 0); // it must have at least one entry
1045 for (j = 0; j < m; j++) {
1046 InEdge* ie = InEdgeArr__index(&to_tteC->in_edges, j);
1047 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
1048 && ie->from_offs == oe->from_offs)
1049 break;
1051 vg_assert(j < m); // "ie must be findable"
1052 InEdgeArr__deleteIndex(&to_tteC->in_edges, j);
1055 InEdgeArr__makeEmpty(&here_tteC->in_edges);
1056 OutEdgeArr__makeEmpty(&here_tteC->out_edges);
1060 /*-------------------------------------------------------------*/
1061 /*--- Address-range equivalence class stuff ---*/
1062 /*-------------------------------------------------------------*/
1064 /* Return equivalence class number for a range. */
1066 static EClassNo range_to_eclass ( Addr start, UInt len )
1068 UInt mask = (1 << ECLASS_WIDTH) - 1;
1069 UInt lo = (UInt)start;
1070 UInt hi = lo + len - 1;
1071 UInt loBits = (lo >> ECLASS_SHIFT) & mask;
1072 UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
1073 if (loBits == hiBits) {
1074 vg_assert(loBits < ECLASS_N-1);
1075 return loBits;
1076 } else {
1077 return ECLASS_MISC;
1082 /* Calculates the equivalence class numbers for any VexGuestExtent.
1083 These are written in *eclasses, which must be big enough to hold 3
1084 Ints. The number written, between 1 and 3, is returned. The
1085 eclasses are presented in order, and any duplicates are removed.
1088 static
1089 Int vexGuestExtents_to_eclasses ( /*OUT*/EClassNo* eclasses,
1090 const TTEntryH* tteH )
1093 # define SWAP(_lv1,_lv2) \
1094 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
1096 UInt i, j, n_ec;
1097 EClassNo r;
1099 vg_assert(tteH->vge_n_used >= 1 && tteH->vge_n_used <= 3);
1101 n_ec = 0;
1102 for (i = 0; i < tteH->vge_n_used; i++) {
1103 r = range_to_eclass( tteH->vge_base[i], tteH->vge_len[i] );
1104 if (r == ECLASS_MISC)
1105 goto bad;
1106 /* only add if we haven't already seen it */
1107 for (j = 0; j < n_ec; j++)
1108 if (eclasses[j] == r)
1109 break;
1110 if (j == n_ec)
1111 eclasses[n_ec++] = r;
1114 if (n_ec == 1)
1115 return 1;
1117 if (n_ec == 2) {
1118 /* sort */
1119 if (eclasses[0] > eclasses[1])
1120 SWAP(eclasses[0], eclasses[1]);
1121 return 2;
1124 if (n_ec == 3) {
1125 /* sort */
1126 if (eclasses[0] > eclasses[2])
1127 SWAP(eclasses[0], eclasses[2]);
1128 if (eclasses[0] > eclasses[1])
1129 SWAP(eclasses[0], eclasses[1]);
1130 if (eclasses[1] > eclasses[2])
1131 SWAP(eclasses[1], eclasses[2]);
1132 return 3;
1135 /* NOTREACHED */
1136 vg_assert(0);
1138 bad:
1139 eclasses[0] = ECLASS_MISC;
1140 return 1;
1142 # undef SWAP
1146 /* Add tteno to the set of entries listed for equivalence class ec in
1147 this sector. Returns used location in eclass array. */
1149 static
1150 UInt addEClassNo ( /*MOD*/Sector* sec, EClassNo ec, TTEno tteno )
1152 Int old_sz, new_sz, i, r;
1153 TTEno *old_ar, *new_ar;
1155 vg_assert(ec >= 0 && ec < ECLASS_N);
1156 vg_assert(tteno < N_TTES_PER_SECTOR);
1158 if (DEBUG_TRANSTAB) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno);
1160 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1162 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1164 old_sz = sec->ec2tte_size[ec];
1165 old_ar = sec->ec2tte[ec];
1166 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1167 new_ar = ttaux_malloc("transtab.aECN.1",
1168 new_sz * sizeof(TTEno));
1169 for (i = 0; i < old_sz; i++)
1170 new_ar[i] = old_ar[i];
1171 if (old_ar)
1172 ttaux_free(old_ar);
1173 sec->ec2tte_size[ec] = new_sz;
1174 sec->ec2tte[ec] = new_ar;
1176 if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1179 /* Common case */
1180 r = sec->ec2tte_used[ec]++;
1181 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1182 sec->ec2tte[ec][r] = tteno;
1183 return (UInt)r;
1187 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate
1188 eclass entries to 'sec'. */
1190 static
1191 void upd_eclasses_after_add ( /*MOD*/Sector* sec, TTEno tteno )
1193 Int i, r;
1194 EClassNo eclasses[3];
1195 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1197 TTEntryH* tteH = &sec->ttH[tteno];
1198 r = vexGuestExtents_to_eclasses( eclasses, tteH );
1199 vg_assert(r >= 1 && r <= 3);
1201 TTEntryC* tteC = &sec->ttC[tteno];
1202 tteC->n_tte2ec = r;
1203 for (i = 0; i < r; i++) {
1204 tteC->tte2ec_ec[i] = eclasses[i];
1205 tteC->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], tteno );
1210 /* Check the eclass info in 'sec' to ensure it is consistent. Returns
1211 True if OK, False if something's not right. Expensive. */
1213 static Bool sanity_check_eclasses_in_sector ( const Sector* sec )
1215 # define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1217 const HChar* whassup = NULL;
1218 Int j, k, n, ec_idx;
1219 EClassNo i;
1220 EClassNo ec_num;
1221 TTEno tteno;
1222 ULong* tce;
1224 /* Basic checks on this sector */
1225 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR)
1226 BAD("invalid sec->tt_n_inuse");
1227 tce = sec->tc_next;
1228 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1229 BAD("sec->tc_next points outside tc");
1231 /* For each eclass ... */
1232 for (i = 0; i < ECLASS_N; i++) {
1233 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1234 BAD("ec2tte_size/ec2tte mismatch(1)");
1235 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1236 BAD("ec2tte_size/ec2tte mismatch(2)");
1237 if (sec->ec2tte_used[i] < 0
1238 || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1239 BAD("implausible ec2tte_used");
1240 if (sec->ec2tte_used[i] == 0)
1241 continue;
1243 /* For each tt reference in each eclass .. ensure the reference
1244 is to a valid tt entry, and that the entry's address ranges
1245 really include this eclass. */
1247 for (j = 0; j < sec->ec2tte_used[i]; j++) {
1248 tteno = sec->ec2tte[i][j];
1249 if (tteno == EC2TTE_DELETED)
1250 continue;
1251 if (tteno >= N_TTES_PER_SECTOR)
1252 BAD("implausible tteno");
1253 TTEntryC* tteC = &sec->ttC[tteno];
1254 TTEntryH* tteH = &sec->ttH[tteno];
1255 if (tteH->status != InUse)
1256 BAD("tteno points to non-inuse tte");
1257 if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3)
1258 BAD("tteC->n_tte2ec out of range");
1259 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1260 must equal i. Inspect tte's eclass info. */
1261 n = 0;
1262 for (k = 0; k < tteC->n_tte2ec; k++) {
1263 if (k < tteC->n_tte2ec-1
1264 && tteC->tte2ec_ec[k] >= tteC->tte2ec_ec[k+1])
1265 BAD("tteC->tte2ec_ec[..] out of order");
1266 ec_num = tteC->tte2ec_ec[k];
1267 if (ec_num < 0 || ec_num >= ECLASS_N)
1268 BAD("tteC->tte2ec_ec[..] out of range");
1269 if (ec_num != i)
1270 continue;
1271 ec_idx = tteC->tte2ec_ix[k];
1272 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1273 BAD("tteC->tte2ec_ix[..] out of range");
1274 if (ec_idx == j)
1275 n++;
1277 if (n != 1)
1278 BAD("tteno does not point back at eclass");
1282 /* That establishes that for each forward pointer from TTEntrys
1283 there is a corresponding backward pointer from the eclass[]
1284 arrays. However, it doesn't rule out the possibility of other,
1285 bogus pointers in the eclass[] arrays. So do those similarly:
1286 scan through them and check the TTEntryies they point at point
1287 back. */
1289 for (tteno = 0; tteno < N_TTES_PER_SECTOR; tteno++) {
1291 TTEntryC* tteC = &sec->ttC[tteno];
1292 TTEntryH* tteH = &sec->ttH[tteno];
1293 if (tteH->status == Empty || tteH->status == Deleted) {
1294 if (tteC->n_tte2ec != 0)
1295 BAD("tteC->n_tte2ec nonzero for unused tte");
1296 continue;
1299 vg_assert(tteH->status == InUse);
1301 if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3)
1302 BAD("tteC->n_tte2ec out of range(2)");
1304 for (j = 0; j < tteC->n_tte2ec; j++) {
1305 ec_num = tteC->tte2ec_ec[j];
1306 if (ec_num < 0 || ec_num >= ECLASS_N)
1307 BAD("tteC->tte2ec_ec[..] out of range");
1308 ec_idx = tteC->tte2ec_ix[j];
1309 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1310 BAD("tteC->tte2ec_ix[..] out of range(2)");
1311 if (sec->ec2tte[ec_num][ec_idx] != tteno)
1312 BAD("ec2tte does not point back to tte");
1316 return True;
1318 bad:
1319 if (whassup)
1320 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1322 # if 0
1323 VG_(printf)("eclass = %d\n", i);
1324 VG_(printf)("tteno = %d\n", (Int)tteno);
1325 switch (tte->status) {
1326 case InUse: VG_(printf)("InUse\n"); break;
1327 case Deleted: VG_(printf)("Deleted\n"); break;
1328 case Empty: VG_(printf)("Empty\n"); break;
1330 if (tte->status != Empty) {
1331 for (k = 0; k < tte->vge.n_used; k++)
1332 VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]);
1334 # endif
1336 return False;
1338 # undef BAD
1342 /* Sanity check absolutely everything. True == check passed. */
1344 /* forwards */
1345 static Bool sanity_check_redir_tt_tc ( void );
1347 static Bool sanity_check_sector_search_order ( void )
1349 SECno i, j, nListed;
1350 /* assert the array is the right size */
1351 vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1352 / sizeof(sector_search_order[0])));
1353 /* Check it's of the form valid_sector_numbers ++ [INV_SNO, INV_SNO, ..] */
1354 for (i = 0; i < n_sectors; i++) {
1355 if (sector_search_order[i] == INV_SNO
1356 || sector_search_order[i] >= n_sectors)
1357 break;
1359 nListed = i;
1360 for (/* */; i < n_sectors; i++) {
1361 if (sector_search_order[i] != INV_SNO)
1362 break;
1364 if (i != n_sectors)
1365 return False;
1366 /* Check each sector number only appears once */
1367 for (i = 0; i < n_sectors; i++) {
1368 if (sector_search_order[i] == INV_SNO)
1369 continue;
1370 for (j = i+1; j < n_sectors; j++) {
1371 if (sector_search_order[j] == sector_search_order[i])
1372 return False;
1375 /* Check that the number of listed sectors equals the number
1376 in use, by counting nListed back down. */
1377 for (i = 0; i < n_sectors; i++) {
1378 if (sectors[i].tc != NULL)
1379 nListed--;
1381 if (nListed != 0)
1382 return False;
1383 return True;
1386 static Bool sanity_check_all_sectors ( void )
1388 SECno sno;
1389 Bool sane;
1390 Sector* sec;
1391 for (sno = 0; sno < n_sectors; sno++) {
1392 Int i;
1393 Int nr_not_dead_hx = 0;
1394 Int szhxa;
1395 sec = &sectors[sno];
1396 if (sec->tc == NULL)
1397 continue;
1398 sane = sanity_check_eclasses_in_sector( sec );
1399 if (!sane)
1400 return False;
1401 szhxa = VG_(sizeXA)(sec->host_extents);
1402 for (i = 0; i < szhxa; i++) {
1403 const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1404 if (!HostExtent__is_dead (hx, sec))
1405 nr_not_dead_hx++;
1407 if (nr_not_dead_hx != sec->tt_n_inuse) {
1408 VG_(debugLog)(0, "transtab",
1409 "nr_not_dead_hx %d sanity fail "
1410 "(expected == in use %d)\n",
1411 nr_not_dead_hx, sec->tt_n_inuse);
1412 return False;
1416 if ( !sanity_check_redir_tt_tc() )
1417 return False;
1418 if ( !sanity_check_sector_search_order() )
1419 return False;
1420 return True;
1425 /*-------------------------------------------------------------*/
1426 /*--- Add/find translations ---*/
1427 /*-------------------------------------------------------------*/
1429 static UInt vge_osize ( const VexGuestExtents* vge )
1431 UInt i, n = 0;
1432 for (i = 0; i < vge->n_used; i++)
1433 n += (UInt)vge->len[i];
1434 return n;
1437 static UInt TTEntryH__osize ( const TTEntryH* tteH )
1439 UInt i, n = 0;
1440 for (i = 0; i < tteH->vge_n_used; i++)
1441 n += (UInt)tteH->vge_len[i];
1442 return n;
1445 static Bool isValidSector ( SECno sector )
1447 if (sector == INV_SNO || sector >= n_sectors)
1448 return False;
1449 return True;
1452 static inline HTTno HASH_TT ( Addr key )
1454 UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32);
1455 UInt kLo = (UInt)key;
1456 UInt k32 = kHi ^ kLo;
1457 UInt ror = 7;
1458 if (ror > 0)
1459 k32 = (k32 >> ror) | (k32 << (32-ror));
1460 return (HTTno)(k32 % N_HTTES_PER_SECTOR);
1463 /* Invalidate the fast cache VG_(tt_fast). */
1464 static void invalidateFastCache ( void )
1466 for (UWord j = 0; j < VG_TT_FAST_SETS; j++) {
1467 FastCacheSet* set = &VG_(tt_fast)[j];
1468 set->guest0 = TRANSTAB_BOGUS_GUEST_ADDR;
1469 set->guest1 = TRANSTAB_BOGUS_GUEST_ADDR;
1470 set->guest2 = TRANSTAB_BOGUS_GUEST_ADDR;
1471 set->guest3 = TRANSTAB_BOGUS_GUEST_ADDR;
1473 n_fast_flushes++;
1476 /* Invalidate a single fast cache entry. */
1477 static void invalidateFastCacheEntry ( Addr guest )
1479 /* This shouldn't fail. It should be assured by m_translate
1480 which should reject any attempt to make translation of code
1481 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1482 vg_assert(guest != TRANSTAB_BOGUS_GUEST_ADDR);
1483 /* If any entry in the line is the right one, just set it to
1484 TRANSTAB_BOGUS_GUEST_ADDR. Doing so ensure that the entry will never
1485 be used in future, so will eventually fall off the end of the line,
1486 due to LRU replacement, and be replaced with something that's actually
1487 useful. */
1488 UWord setNo = (UInt)VG_TT_FAST_HASH(guest);
1489 FastCacheSet* set = &VG_(tt_fast)[setNo];
1490 if (set->guest0 == guest) {
1491 set->guest0 = TRANSTAB_BOGUS_GUEST_ADDR;
1493 if (set->guest1 == guest) {
1494 set->guest1 = TRANSTAB_BOGUS_GUEST_ADDR;
1496 if (set->guest2 == guest) {
1497 set->guest2 = TRANSTAB_BOGUS_GUEST_ADDR;
1499 if (set->guest3 == guest) {
1500 set->guest3 = TRANSTAB_BOGUS_GUEST_ADDR;
1504 static void setFastCacheEntry ( Addr guest, ULong* tcptr )
1506 /* This shouldn't fail. It should be assured by m_translate
1507 which should reject any attempt to make translation of code
1508 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1509 vg_assert(guest != TRANSTAB_BOGUS_GUEST_ADDR);
1510 /* Shift all entries along one, so that the LRU one disappears, and put the
1511 new entry at the MRU position. */
1512 UWord setNo = (UInt)VG_TT_FAST_HASH(guest);
1513 FastCacheSet* set = &VG_(tt_fast)[setNo];
1514 set->host3 = set->host2;
1515 set->guest3 = set->guest2;
1516 set->host2 = set->host1;
1517 set->guest2 = set->guest1;
1518 set->host1 = set->host0;
1519 set->guest1 = set->guest0;
1520 set->host0 = (Addr)tcptr;
1521 set->guest0 = guest;
1522 n_fast_updates++;
1526 static TTEno get_empty_tt_slot(SECno sNo)
1528 TTEno i;
1530 i = sectors[sNo].empty_tt_list;
1531 sectors[sNo].empty_tt_list = sectors[sNo].ttC[i].usage.next_empty_tte;
1533 vg_assert (i >= 0 && i < N_TTES_PER_SECTOR);
1535 return i;
1538 static void add_to_empty_tt_list (SECno sNo, TTEno tteno)
1540 sectors[sNo].ttC[tteno].usage.next_empty_tte = sectors[sNo].empty_tt_list;
1541 sectors[sNo].empty_tt_list = tteno;
1544 static void initialiseSector ( SECno sno )
1546 UInt i;
1547 SysRes sres;
1548 Sector* sec;
1549 vg_assert(isValidSector(sno));
1551 { Bool sane = sanity_check_sector_search_order();
1552 vg_assert(sane);
1554 sec = &sectors[sno];
1556 if (sec->tc == NULL) {
1558 /* Sector has never been used before. Need to allocate tt and
1559 tc. */
1560 vg_assert(sec->ttC == NULL);
1561 vg_assert(sec->ttH == NULL);
1562 vg_assert(sec->tc_next == NULL);
1563 vg_assert(sec->tt_n_inuse == 0);
1564 for (EClassNo e = 0; e < ECLASS_N; e++) {
1565 vg_assert(sec->ec2tte_size[e] == 0);
1566 vg_assert(sec->ec2tte_used[e] == 0);
1567 vg_assert(sec->ec2tte[e] == NULL);
1569 vg_assert(sec->host_extents == NULL);
1571 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1572 VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1574 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1575 if (sr_isError(sres)) {
1576 VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1577 8 * tc_sector_szQ, sr_Err(sres) );
1578 /*NOTREACHED*/
1580 sec->tc = (ULong*)(Addr)sr_Res(sres);
1582 sres = VG_(am_mmap_anon_float_valgrind)
1583 ( N_TTES_PER_SECTOR * sizeof(TTEntryC) );
1584 if (sr_isError(sres)) {
1585 VG_(out_of_memory_NORETURN)("initialiseSector(TTC)",
1586 N_TTES_PER_SECTOR * sizeof(TTEntryC),
1587 sr_Err(sres));
1588 /*NOTREACHED*/
1590 sec->ttC = (TTEntryC*)(Addr)sr_Res(sres);
1592 sres = VG_(am_mmap_anon_float_valgrind)
1593 ( N_TTES_PER_SECTOR * sizeof(TTEntryH) );
1594 if (sr_isError(sres)) {
1595 VG_(out_of_memory_NORETURN)("initialiseSector(TTH)",
1596 N_TTES_PER_SECTOR * sizeof(TTEntryH),
1597 sr_Err(sres));
1598 /*NOTREACHED*/
1600 sec->ttH = (TTEntryH*)(Addr)sr_Res(sres);
1602 sec->empty_tt_list = HTT_EMPTY;
1603 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1604 sec->ttH[ei].status = Empty;
1605 sec->ttC[ei].n_tte2ec = 0;
1606 add_to_empty_tt_list(sno, ei);
1609 sres = VG_(am_mmap_anon_float_valgrind)
1610 ( N_HTTES_PER_SECTOR * sizeof(TTEno) );
1611 if (sr_isError(sres)) {
1612 VG_(out_of_memory_NORETURN)("initialiseSector(HTT)",
1613 N_HTTES_PER_SECTOR * sizeof(TTEno),
1614 sr_Err(sres));
1615 /*NOTREACHED*/
1617 sec->htt = (TTEno*)(Addr)sr_Res(sres);
1618 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1619 sec->htt[hi] = HTT_EMPTY;
1621 /* Set up the host_extents array. */
1622 sec->host_extents
1623 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1624 ttaux_free,
1625 sizeof(HostExtent));
1627 /* Add an entry in the sector_search_order */
1628 for (i = 0; i < n_sectors; i++) {
1629 if (sector_search_order[i] == INV_SNO)
1630 break;
1632 vg_assert(i >= 0 && i < n_sectors);
1633 sector_search_order[i] = sno;
1635 if (VG_(clo_verbosity) > 2)
1636 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1638 } else {
1640 /* Sector has been used before. Dump the old contents. */
1641 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1642 VG_(dmsg)("transtab: " "recycle sector %d\n", sno);
1643 n_sectors_recycled++;
1645 vg_assert(sec->ttC != NULL);
1646 vg_assert(sec->ttH != NULL);
1647 vg_assert(sec->tc_next != NULL);
1648 n_dump_count += sec->tt_n_inuse;
1650 VexArch arch_host = VexArch_INVALID;
1651 VexArchInfo archinfo_host;
1652 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1653 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1654 VexEndness endness_host = archinfo_host.endness;
1656 /* Visit each just-about-to-be-abandoned translation. */
1657 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1658 sno);
1659 sec->empty_tt_list = HTT_EMPTY;
1660 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1661 if (sec->ttH[ei].status == InUse) {
1662 vg_assert(sec->ttC[ei].n_tte2ec >= 1);
1663 vg_assert(sec->ttC[ei].n_tte2ec <= 3);
1664 n_dump_osize += TTEntryH__osize(&sec->ttH[ei]);
1665 /* Tell the tool too. */
1666 if (VG_(needs).superblock_discards) {
1667 VexGuestExtents vge_tmp;
1668 TTEntryH__to_VexGuestExtents( &vge_tmp, &sec->ttH[ei] );
1669 VG_TDICT_CALL( tool_discard_superblock_info,
1670 sec->ttC[ei].entry, vge_tmp );
1672 unchain_in_preparation_for_deletion(arch_host,
1673 endness_host, sno, ei);
1674 } else {
1675 vg_assert(sec->ttC[ei].n_tte2ec == 0);
1677 sec->ttH[ei].status = Empty;
1678 sec->ttC[ei].n_tte2ec = 0;
1679 add_to_empty_tt_list(sno, ei);
1681 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1682 sec->htt[hi] = HTT_EMPTY;
1684 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1685 sno);
1687 /* Free up the eclass structures. */
1688 for (EClassNo e = 0; e < ECLASS_N; e++) {
1689 if (sec->ec2tte_size[e] == 0) {
1690 vg_assert(sec->ec2tte_used[e] == 0);
1691 vg_assert(sec->ec2tte[e] == NULL);
1692 } else {
1693 vg_assert(sec->ec2tte[e] != NULL);
1694 ttaux_free(sec->ec2tte[e]);
1695 sec->ec2tte[e] = NULL;
1696 sec->ec2tte_size[e] = 0;
1697 sec->ec2tte_used[e] = 0;
1701 /* Empty out the host extents array. */
1702 vg_assert(sec->host_extents != NULL);
1703 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1704 vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1706 /* Sanity check: ensure it is already in
1707 sector_search_order[]. */
1708 SECno ix;
1709 for (ix = 0; ix < n_sectors; ix++) {
1710 if (sector_search_order[ix] == sno)
1711 break;
1713 vg_assert(ix >= 0 && ix < n_sectors);
1715 if (VG_(clo_verbosity) > 2)
1716 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1719 sec->tc_next = sec->tc;
1720 sec->tt_n_inuse = 0;
1722 invalidateFastCache();
1724 { Bool sane = sanity_check_sector_search_order();
1725 vg_assert(sane);
1729 /* Add a translation of vge to TT/TC. The translation is temporarily
1730 in code[0 .. code_len-1].
1732 pre: youngest_sector points to a valid (although possibly full)
1733 sector.
1735 void VG_(add_to_transtab)( const VexGuestExtents* vge,
1736 Addr entry,
1737 Addr code,
1738 UInt code_len,
1739 Bool is_self_checking,
1740 Int offs_profInc,
1741 UInt n_guest_instrs )
1743 Int tcAvailQ, reqdQ, y;
1744 ULong *tcptr, *tcptr2;
1745 UChar* srcP;
1746 UChar* dstP;
1748 vg_assert(init_done);
1749 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1751 /* 60000: should agree with N_TMPBUF in m_translate.c. */
1752 vg_assert(code_len > 0 && code_len < 60000);
1754 /* Generally stay sane */
1755 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1757 if (DEBUG_TRANSTAB)
1758 VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n",
1759 entry, code_len);
1761 n_in_count++;
1762 n_in_tsize += code_len;
1763 n_in_osize += vge_osize(vge);
1764 if (is_self_checking)
1765 n_in_sc_count++;
1767 y = youngest_sector;
1768 vg_assert(isValidSector(y));
1770 if (sectors[y].tc == NULL)
1771 initialiseSector(y);
1773 /* Try putting the translation in this sector. */
1774 reqdQ = (code_len + 7) >> 3;
1776 /* Will it fit in tc? */
1777 tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1778 - ((ULong*)(sectors[y].tc_next));
1779 vg_assert(tcAvailQ >= 0);
1780 vg_assert(tcAvailQ <= tc_sector_szQ);
1782 if (tcAvailQ < reqdQ
1783 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR) {
1784 /* No. So move on to the next sector. Either it's never been
1785 used before, in which case it will get its tt/tc allocated
1786 now, or it has been used before, in which case it is set to be
1787 empty, hence throwing out the oldest sector. */
1788 vg_assert(tc_sector_szQ > 0);
1789 Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1790 / N_HTTES_PER_SECTOR;
1791 Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1792 / tc_sector_szQ;
1793 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) {
1794 VG_(dmsg)("transtab: "
1795 "declare sector %d full "
1796 "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n",
1797 y, tt_loading_pct, tc_loading_pct,
1798 8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse);
1800 youngest_sector++;
1801 if (youngest_sector >= n_sectors)
1802 youngest_sector = 0;
1803 y = youngest_sector;
1804 initialiseSector(y);
1807 /* Be sure ... */
1808 tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1809 - ((ULong*)(sectors[y].tc_next));
1810 vg_assert(tcAvailQ >= 0);
1811 vg_assert(tcAvailQ <= tc_sector_szQ);
1812 vg_assert(tcAvailQ >= reqdQ);
1813 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR);
1814 vg_assert(sectors[y].tt_n_inuse >= 0);
1816 /* Copy into tc. */
1817 tcptr = sectors[y].tc_next;
1818 vg_assert(tcptr >= &sectors[y].tc[0]);
1819 vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
1821 dstP = (UChar*)tcptr;
1822 srcP = (UChar*)code;
1823 VG_(memcpy)(dstP, srcP, code_len);
1824 sectors[y].tc_next += reqdQ;
1825 sectors[y].tt_n_inuse++;
1827 /* more paranoia */
1828 tcptr2 = sectors[y].tc_next;
1829 vg_assert(tcptr2 >= &sectors[y].tc[0]);
1830 vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1832 /* Find an empty tt slot, and use it. There must be such a slot
1833 since tt is never allowed to get completely full. */
1834 TTEno tteix = get_empty_tt_slot(y);
1835 TTEntryC__init(&sectors[y].ttC[tteix]);
1836 TTEntryH__init(&sectors[y].ttH[tteix]);
1837 sectors[y].ttC[tteix].tcptr = tcptr;
1838 sectors[y].ttC[tteix].usage.prof.count = 0;
1840 sectors[y].ttC[tteix].usage.prof.weight
1841 = False
1842 ? // Count guest instrs (assumes all side exits are untaken)
1843 (n_guest_instrs == 0 ? 1 : n_guest_instrs)
1844 : // Counts some (not very good) approximation to host instructions
1845 (code_len == 0 ? 1 : (code_len / 4));
1847 sectors[y].ttC[tteix].entry = entry;
1848 TTEntryH__from_VexGuestExtents( &sectors[y].ttH[tteix], vge );
1849 sectors[y].ttH[tteix].status = InUse;
1851 // Point an htt entry to the tt slot
1852 HTTno htti = HASH_TT(entry);
1853 vg_assert(htti >= 0 && htti < N_HTTES_PER_SECTOR);
1854 while (True) {
1855 if (sectors[y].htt[htti] == HTT_EMPTY
1856 || sectors[y].htt[htti] == HTT_DELETED)
1857 break;
1858 htti++;
1859 if (htti >= N_HTTES_PER_SECTOR)
1860 htti = 0;
1862 sectors[y].htt[htti] = tteix;
1864 /* Patch in the profile counter location, if necessary. */
1865 if (offs_profInc != -1) {
1866 vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1867 VexArch arch_host = VexArch_INVALID;
1868 VexArchInfo archinfo_host;
1869 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1870 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1871 VexEndness endness_host = archinfo_host.endness;
1872 VexInvalRange vir
1873 = LibVEX_PatchProfInc( arch_host, endness_host,
1874 dstP + offs_profInc,
1875 &sectors[y].ttC[tteix].usage.prof.count );
1876 VG_(invalidate_icache)( (void*)vir.start, vir.len );
1879 VG_(invalidate_icache)( dstP, code_len );
1881 /* Add this entry to the host_extents map, checking that we're
1882 adding in order. */
1883 { HostExtent hx;
1884 hx.start = (UChar*)tcptr;
1885 hx.len = code_len;
1886 hx.tteNo = tteix;
1887 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1888 XArray* hx_array = sectors[y].host_extents;
1889 vg_assert(hx_array);
1890 Word n = VG_(sizeXA)(hx_array);
1891 if (n > 0) {
1892 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1893 vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1895 VG_(addToXA)(hx_array, &hx);
1896 if (DEBUG_TRANSTAB)
1897 VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1898 hx.start, hx.len, y, tteix);
1901 /* Update the fast-cache. */
1902 setFastCacheEntry( entry, tcptr );
1904 /* Note the eclass numbers for this translation. */
1905 upd_eclasses_after_add( &sectors[y], tteix );
1909 /* Search for the translation of the given guest address. If
1910 requested, a successful search can also cause the fast-caches to be
1911 updated.
1913 Bool VG_(search_transtab) ( /*OUT*/Addr* res_hcode,
1914 /*OUT*/SECno* res_sNo,
1915 /*OUT*/TTEno* res_tteNo,
1916 Addr guest_addr,
1917 Bool upd_cache )
1919 SECno i, sno;
1920 HTTno j, k, kstart;
1921 TTEno tti;
1923 vg_assert(init_done);
1924 /* Find the initial probe point just once. It will be the same in
1925 all sectors and avoids multiple expensive % operations. */
1926 n_full_lookups++;
1927 kstart = HASH_TT(guest_addr);
1928 vg_assert(kstart >= 0 && kstart < N_HTTES_PER_SECTOR);
1930 /* Search in all the sectors,using sector_search_order[] as a
1931 heuristic guide as to what order to visit the sectors. */
1932 for (i = 0; i < n_sectors; i++) {
1934 sno = sector_search_order[i];
1935 if (UNLIKELY(sno == INV_SNO))
1936 return False; /* run out of sectors to search */
1938 k = kstart;
1939 for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
1940 n_lookup_probes++;
1941 tti = sectors[sno].htt[k];
1942 if (tti < N_TTES_PER_SECTOR
1943 && sectors[sno].ttC[tti].entry == guest_addr) {
1944 /* found it */
1945 if (upd_cache)
1946 setFastCacheEntry(
1947 guest_addr, sectors[sno].ttC[tti].tcptr );
1948 if (res_hcode)
1949 *res_hcode = (Addr)sectors[sno].ttC[tti].tcptr;
1950 if (res_sNo)
1951 *res_sNo = sno;
1952 if (res_tteNo)
1953 *res_tteNo = tti;
1954 /* pull this one one step closer to the front. For large
1955 apps this more or less halves the number of required
1956 probes. */
1957 if (i > 0) {
1958 Int tmp = sector_search_order[i-1];
1959 sector_search_order[i-1] = sector_search_order[i];
1960 sector_search_order[i] = tmp;
1962 return True;
1964 // tti is HTT_EMPTY or HTT_DELETED or not the entry of guest_addr
1965 if (sectors[sno].htt[k] == HTT_EMPTY)
1966 break; /* not found in this sector */
1967 k++;
1968 if (k == N_HTTES_PER_SECTOR)
1969 k = 0;
1972 /* If we fall off the end, all entries are InUse and not
1973 matching, or Deleted. In any case we did not find it in this
1974 sector. */
1977 /* Not found in any sector. */
1978 return False;
1982 /*-------------------------------------------------------------*/
1983 /*--- Delete translations. ---*/
1984 /*-------------------------------------------------------------*/
1986 /* forward */
1987 static void unredir_discard_translations( Addr, ULong );
1989 /* Stuff for deleting translations which intersect with a given
1990 address range. Unfortunately, to make this run at a reasonable
1991 speed, it is complex. */
1993 static inline
1994 Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 )
1996 Addr e1 = s1 + r1 - 1;
1997 Addr e2 = s2 + r2 - 1;
1998 if (e1 < s2 || e2 < s1)
1999 return False;
2000 return True;
2003 static inline
2004 Bool overlaps ( Addr start, ULong range, const TTEntryH* tteH )
2006 if (overlap1(start, range, tteH->vge_base[0], tteH->vge_len[0]))
2007 return True;
2008 if (tteH->vge_n_used < 2)
2009 return False;
2010 if (overlap1(start, range, tteH->vge_base[1], tteH->vge_len[1]))
2011 return True;
2012 if (tteH->vge_n_used < 3)
2013 return False;
2014 if (overlap1(start, range, tteH->vge_base[2], tteH->vge_len[2]))
2015 return True;
2016 return False;
2020 /* Delete a tt entry, and update all the eclass data accordingly. */
2022 static void delete_tte ( /*OUT*/Addr* ga_deleted,
2023 /*MOD*/Sector* sec, SECno secNo, TTEno tteno,
2024 VexArch arch_host, VexEndness endness_host )
2026 Int i, ec_idx;
2027 EClassNo ec_num;
2029 /* sec and secNo are mutually redundant; cross-check. */
2030 vg_assert(sec == &sectors[secNo]);
2032 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
2033 TTEntryC* tteC = &sec->ttC[tteno];
2034 TTEntryH* tteH = &sec->ttH[tteno];
2035 vg_assert(tteH->status == InUse);
2036 vg_assert(tteC->n_tte2ec >= 1 && tteC->n_tte2ec <= 3);
2038 vg_assert(tteH->vge_n_used >= 1 && tteH->vge_n_used <= 3);
2039 vg_assert(tteH->vge_base[0] != TRANSTAB_BOGUS_GUEST_ADDR);
2040 *ga_deleted = tteH->vge_base[0];
2042 /* Unchain .. */
2043 unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno);
2045 /* Deal with the ec-to-tte links first. */
2046 for (i = 0; i < tteC->n_tte2ec; i++) {
2047 ec_num = tteC->tte2ec_ec[i];
2048 ec_idx = tteC->tte2ec_ix[i];
2049 vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
2050 vg_assert(ec_idx >= 0);
2051 vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
2052 /* Assert that the two links point at each other. */
2053 vg_assert(sec->ec2tte[ec_num][ec_idx] == tteno);
2054 /* "delete" the pointer back to here. */
2055 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
2058 /* Now fix up this TTEntry. */
2059 /* Mark the entry as deleted in htt.
2060 Note: we could avoid the below hash table search by
2061 adding a reference from tte to its hash position in tt. */
2062 HTTno j;
2063 HTTno k = HASH_TT(tteC->entry);
2064 vg_assert(k >= 0 && k < N_HTTES_PER_SECTOR);
2065 for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
2066 if (sec->htt[k] == tteno)
2067 break;
2068 k++;
2069 if (k == N_HTTES_PER_SECTOR)
2070 k = 0;
2072 vg_assert(j < N_HTTES_PER_SECTOR);
2073 sec->htt[k] = HTT_DELETED;
2074 tteH->status = Deleted;
2075 tteC->n_tte2ec = 0;
2076 add_to_empty_tt_list(secNo, tteno);
2078 /* Stats .. */
2079 sec->tt_n_inuse--;
2080 n_disc_count++;
2081 n_disc_osize += TTEntryH__osize(tteH);
2083 /* Tell the tool too. */
2084 if (VG_(needs).superblock_discards) {
2085 VexGuestExtents vge_tmp;
2086 TTEntryH__to_VexGuestExtents( &vge_tmp, tteH );
2087 VG_TDICT_CALL( tool_discard_superblock_info, tteC->entry, vge_tmp );
2092 /* Delete translations from sec which intersect specified range, but
2093 only consider translations in the specified eclass. */
2095 static
2096 SizeT delete_translations_in_sector_eclass ( /*OUT*/Addr* ga_deleted,
2097 /*MOD*/Sector* sec, SECno secNo,
2098 Addr guest_start, ULong range,
2099 EClassNo ec,
2100 VexArch arch_host,
2101 VexEndness endness_host )
2103 Int i;
2104 TTEno tteno;
2105 SizeT numDeld = 0;
2107 vg_assert(ec >= 0 && ec < ECLASS_N);
2109 for (i = 0; i < sec->ec2tte_used[ec]; i++) {
2111 tteno = sec->ec2tte[ec][i];
2112 if (tteno == EC2TTE_DELETED) {
2113 /* already deleted */
2114 continue;
2117 vg_assert(tteno < N_TTES_PER_SECTOR);
2119 TTEntryH* tteH = &sec->ttH[tteno];
2120 vg_assert(tteH->status == InUse);
2122 if (overlaps( guest_start, range, tteH )) {
2123 numDeld++;
2124 delete_tte( ga_deleted, sec, secNo, tteno, arch_host, endness_host );
2129 return numDeld;
2133 /* Delete translations from sec which intersect specified range, the
2134 slow way, by inspecting all translations in sec. */
2136 static
2137 SizeT delete_translations_in_sector ( /*OUT*/Addr* ga_deleted,
2138 /*MOD*/Sector* sec, SECno secNo,
2139 Addr guest_start, ULong range,
2140 VexArch arch_host,
2141 VexEndness endness_host )
2143 TTEno i;
2144 SizeT numDeld = 0;
2146 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2147 /* The entire and only purpose of splitting TTEntry into cold
2148 and hot parts (TTEntryC and TTEntryH) is to make this search
2149 loop run as fast as possible, by avoiding having to pull all
2150 of the cold data up the memory hierarchy. */
2151 if (UNLIKELY(sec->ttH[i].status == InUse
2152 && overlaps( guest_start, range, &sec->ttH[i] ))) {
2153 numDeld++;
2154 delete_tte( ga_deleted, sec, secNo, i, arch_host, endness_host );
2158 return numDeld;
2162 void VG_(discard_translations) ( Addr guest_start, ULong range,
2163 const HChar* who )
2165 Sector* sec;
2166 SECno sno;
2167 EClassNo ec;
2169 /* It is very commonly the case that a call here results in discarding of
2170 exactly one superblock. As an optimisation only, use ga_deleted and
2171 numDeleted to detect this situation and to record the guest addr involved.
2172 That is then used to avoid calling invalidateFastCache in this case.
2173 Instead the individual entry in the fast cache is removed. This can reduce
2174 the overall VG_(fast_cache) miss rate significantly in applications that do
2175 a lot of short code discards (basically jit generated code that is
2176 subsequently patched).
2178 ga_deleted is made to hold the guest address of the last superblock deleted
2179 (in this call to VG_(discard_translations)). If more than one superblock
2180 is deleted (or none), then we ignore what is written to ga_deleted. If
2181 exactly one superblock is deleted then ga_deleted holds exactly what we
2182 want and will be used.
2184 Addr ga_deleted = TRANSTAB_BOGUS_GUEST_ADDR;
2185 SizeT numDeleted = 0;
2187 vg_assert(init_done);
2189 VG_(debugLog)(2, "transtab",
2190 "discard_translations(0x%lx, %llu) req by %s\n",
2191 guest_start, range, who );
2193 /* Pre-deletion sanity check */
2194 if (VG_(clo_sanity_level) >= 4) {
2195 Bool sane = sanity_check_all_sectors();
2196 vg_assert(sane);
2199 if (range == 0)
2200 return;
2202 VexArch arch_host = VexArch_INVALID;
2203 VexArchInfo archinfo_host;
2204 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
2205 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
2206 VexEndness endness_host = archinfo_host.endness;
2208 /* There are two different ways to do this.
2210 If the range fits within a single address-range equivalence
2211 class, as will be the case for a cache line sized invalidation,
2212 then we only have to inspect the set of translations listed in
2213 that equivalence class, and also in the "sin-bin" equivalence
2214 class ECLASS_MISC.
2216 Otherwise, the invalidation is of a larger range and probably
2217 results from munmap. In this case it's (probably!) faster just
2218 to inspect all translations, dump those we don't want, and
2219 regenerate the equivalence class information (since modifying it
2220 in-situ is even more expensive).
2223 /* First off, figure out if the range falls within a single class,
2224 and if so which one. */
2226 ec = ECLASS_MISC;
2227 if (range <= (1ULL << ECLASS_SHIFT))
2228 ec = range_to_eclass( guest_start, (UInt)range );
2230 /* if ec is ECLASS_MISC then we aren't looking at just a single
2231 class, so use the slow scheme. Else use the fast scheme,
2232 examining 'ec' and ECLASS_MISC. */
2234 if (ec != ECLASS_MISC) {
2236 VG_(debugLog)(2, "transtab",
2237 " FAST, ec = %d\n", ec);
2239 /* Fast scheme */
2240 vg_assert(ec >= 0 && ec < ECLASS_MISC);
2242 for (sno = 0; sno < n_sectors; sno++) {
2243 sec = &sectors[sno];
2244 if (sec->tc == NULL)
2245 continue;
2246 numDeleted += delete_translations_in_sector_eclass(
2247 &ga_deleted, sec, sno, guest_start, range,
2248 ec, arch_host, endness_host
2250 numDeleted += delete_translations_in_sector_eclass(
2251 &ga_deleted, sec, sno, guest_start, range,
2252 ECLASS_MISC, arch_host, endness_host
2256 } else {
2258 /* slow scheme */
2260 VG_(debugLog)(2, "transtab",
2261 " SLOW, ec = %d\n", ec);
2263 for (sno = 0; sno < n_sectors; sno++) {
2264 sec = &sectors[sno];
2265 if (sec->tc == NULL)
2266 continue;
2267 numDeleted += delete_translations_in_sector(
2268 &ga_deleted, sec, sno, guest_start, range,
2269 arch_host, endness_host
2275 if (numDeleted == 0) {
2276 // "ga_deleted was never set"
2277 vg_assert(ga_deleted == TRANSTAB_BOGUS_GUEST_ADDR);
2278 } else
2279 if (numDeleted == 1) {
2280 // "ga_deleted was set to something valid"
2281 vg_assert(ga_deleted != TRANSTAB_BOGUS_GUEST_ADDR);
2282 // Just invalidate the individual VG_(tt_fast) cache entry \o/
2283 invalidateFastCacheEntry(ga_deleted);
2284 Addr fake_host = 0;
2285 vg_assert(! VG_(lookupInFastCache)(&fake_host, ga_deleted));
2286 } else {
2287 // "ga_deleted was set to something valid"
2288 vg_assert(ga_deleted != TRANSTAB_BOGUS_GUEST_ADDR);
2289 // Nuke the entire VG_(tt_fast) cache. Sigh.
2290 invalidateFastCache();
2293 /* don't forget the no-redir cache */
2294 unredir_discard_translations( guest_start, range );
2296 /* Post-deletion sanity check */
2297 if (VG_(clo_sanity_level) >= 4) {
2298 TTEno i;
2299 Bool sane = sanity_check_all_sectors();
2300 vg_assert(sane);
2301 /* But now, also check the requested address range isn't
2302 present anywhere. */
2303 for (sno = 0; sno < n_sectors; sno++) {
2304 sec = &sectors[sno];
2305 if (sec->tc == NULL)
2306 continue;
2307 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2308 TTEntryH* tteH = &sec->ttH[i];
2309 if (tteH->status != InUse)
2310 continue;
2311 vg_assert(!overlaps( guest_start, range, tteH ));
2317 /* Whether or not tools may discard translations. */
2318 Bool VG_(ok_to_discard_translations) = False;
2320 /* This function is exported to tools which can use it to discard
2321 translations, provided it is safe to do so. */
2322 void VG_(discard_translations_safely) ( Addr start, SizeT len,
2323 const HChar* who )
2325 vg_assert(VG_(ok_to_discard_translations));
2326 VG_(discard_translations)(start, len, who);
2329 /*------------------------------------------------------------*/
2330 /*--- AUXILIARY: the unredirected TT/TC ---*/
2331 /*------------------------------------------------------------*/
2333 /* A very simple translation cache which holds a small number of
2334 unredirected translations. This is completely independent of the
2335 main tt/tc structures. When unredir_tc or unredir_tt becomes full,
2336 both structures are simply dumped and we start over.
2338 Since these translations are unredirected, the search key is (by
2339 definition) the first address entry in the .vge field. */
2341 /* Sized to hold 500 translations of average size 1000 bytes. */
2343 #define UNREDIR_SZB 1000
2345 #define N_UNREDIR_TT 500
2346 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2348 typedef
2349 struct {
2350 VexGuestExtents vge;
2351 Addr hcode;
2352 Bool inUse;
2354 UTCEntry;
2356 /* We just allocate forwards in _tc, never deleting. */
2357 static ULong *unredir_tc;
2358 static Int unredir_tc_used = N_UNREDIR_TCQ;
2360 /* Slots in _tt can come into use and out again (.inUse).
2361 Nevertheless _tt_highwater is maintained so that invalidations
2362 don't have to scan all the slots when only a few are in use.
2363 _tt_highwater holds the index of the highest ever allocated
2364 slot. */
2365 static UTCEntry unredir_tt[N_UNREDIR_TT];
2366 static Int unredir_tt_highwater;
2369 static void init_unredir_tt_tc ( void )
2371 Int i;
2372 if (unredir_tc == NULL) {
2373 SysRes sres = VG_(am_mmap_anon_float_valgrind)
2374 ( N_UNREDIR_TT * UNREDIR_SZB );
2375 if (sr_isError(sres)) {
2376 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2377 N_UNREDIR_TT * UNREDIR_SZB, sr_Err(sres));
2378 /*NOTREACHED*/
2380 unredir_tc = (ULong *)(Addr)sr_Res(sres);
2382 unredir_tc_used = 0;
2383 for (i = 0; i < N_UNREDIR_TT; i++)
2384 unredir_tt[i].inUse = False;
2385 unredir_tt_highwater = -1;
2388 /* Do a sanity check; return False on failure. */
2389 static Bool sanity_check_redir_tt_tc ( void )
2391 Int i;
2392 if (unredir_tt_highwater < -1) return False;
2393 if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2395 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2396 if (unredir_tt[i].inUse)
2397 return False;
2399 if (unredir_tc_used < 0) return False;
2400 if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2402 return True;
2406 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation
2407 is temporarily in code[0 .. code_len-1].
2409 void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge,
2410 Addr entry,
2411 Addr code,
2412 UInt code_len )
2414 Int i, j, code_szQ;
2415 HChar *srcP, *dstP;
2417 vg_assert(sanity_check_redir_tt_tc());
2419 /* This is the whole point: it's not redirected! */
2420 vg_assert(entry == vge->base[0]);
2422 /* How many unredir_tt slots are needed */
2423 code_szQ = (code_len + 7) / 8;
2425 /* Look for an empty unredir_tc slot */
2426 for (i = 0; i < N_UNREDIR_TT; i++)
2427 if (!unredir_tt[i].inUse)
2428 break;
2430 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2431 /* It's full; dump everything we currently have */
2432 init_unredir_tt_tc();
2433 i = 0;
2436 vg_assert(unredir_tc_used >= 0);
2437 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2438 vg_assert(code_szQ > 0);
2439 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2440 vg_assert(i >= 0 && i < N_UNREDIR_TT);
2441 vg_assert(unredir_tt[i].inUse == False);
2443 if (i > unredir_tt_highwater)
2444 unredir_tt_highwater = i;
2446 dstP = (HChar*)&unredir_tc[unredir_tc_used];
2447 srcP = (HChar*)code;
2448 for (j = 0; j < code_len; j++)
2449 dstP[j] = srcP[j];
2451 VG_(invalidate_icache)( dstP, code_len );
2453 unredir_tt[i].inUse = True;
2454 unredir_tt[i].vge = *vge;
2455 unredir_tt[i].hcode = (Addr)dstP;
2457 unredir_tc_used += code_szQ;
2458 vg_assert(unredir_tc_used >= 0);
2459 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2461 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2464 Bool VG_(search_unredir_transtab) ( /*OUT*/Addr* result,
2465 Addr guest_addr )
2467 Int i;
2468 for (i = 0; i < N_UNREDIR_TT; i++) {
2469 if (!unredir_tt[i].inUse)
2470 continue;
2471 if (unredir_tt[i].vge.base[0] == guest_addr) {
2472 *result = unredir_tt[i].hcode;
2473 return True;
2476 return False;
2479 static void unredir_discard_translations( Addr guest_start, ULong range )
2481 Int i;
2483 vg_assert(sanity_check_redir_tt_tc());
2485 for (i = 0; i <= unredir_tt_highwater; i++) {
2486 if (unredir_tt[i].inUse) {
2487 /* Fake up a TTEntryH just so we can pass it to overlaps()
2488 without having to make a new version of overlaps() just for
2489 this rare case. */
2490 TTEntryH tmp_tteH;
2491 TTEntryH__from_VexGuestExtents( &tmp_tteH, &unredir_tt[i].vge );
2492 tmp_tteH.status = Empty; /* Completely irrelevant; pure paranoia. */
2493 if (overlaps( guest_start, range, &tmp_tteH )) {
2494 unredir_tt[i].inUse = False;
2501 /*------------------------------------------------------------*/
2502 /*--- Initialisation. ---*/
2503 /*------------------------------------------------------------*/
2505 void VG_(init_tt_tc) ( void )
2507 Int i, avg_codeszQ;
2509 vg_assert(!init_done);
2510 init_done = True;
2512 /* Otherwise lots of things go wrong... */
2513 vg_assert(sizeof(ULong) == 8);
2514 vg_assert(sizeof(TTEno) == sizeof(HTTno));
2515 vg_assert(sizeof(TTEno) == 2);
2516 vg_assert(N_TTES_PER_SECTOR <= N_HTTES_PER_SECTOR);
2517 vg_assert(N_HTTES_PER_SECTOR < INV_TTE);
2518 vg_assert(N_HTTES_PER_SECTOR < EC2TTE_DELETED);
2519 vg_assert(N_HTTES_PER_SECTOR < HTT_EMPTY);
2521 /* check fast cache entries really are 8 words long */
2522 vg_assert(sizeof(Addr) == sizeof(void*));
2523 vg_assert(sizeof(FastCacheSet) == 8 * sizeof(Addr));
2524 /* check fast cache entries are packed back-to-back with no spaces */
2525 vg_assert(sizeof( VG_(tt_fast) )
2526 == VG_TT_FAST_SETS * sizeof(FastCacheSet));
2527 /* check fast cache entries have the layout that the handwritten assembly
2528 fragments assume. */
2529 vg_assert(sizeof(FastCacheSet) == (1 << VG_FAST_CACHE_SET_BITS));
2530 vg_assert(offsetof(FastCacheSet,guest0) == FCS_g0);
2531 vg_assert(offsetof(FastCacheSet,host0) == FCS_h0);
2532 vg_assert(offsetof(FastCacheSet,guest1) == FCS_g1);
2533 vg_assert(offsetof(FastCacheSet,host1) == FCS_h1);
2534 vg_assert(offsetof(FastCacheSet,guest2) == FCS_g2);
2535 vg_assert(offsetof(FastCacheSet,host2) == FCS_h2);
2536 vg_assert(offsetof(FastCacheSet,guest3) == FCS_g3);
2537 vg_assert(offsetof(FastCacheSet,host3) == FCS_h3);
2538 vg_assert(offsetof(FastCacheSet,guest0) == 0 * sizeof(Addr));
2539 vg_assert(offsetof(FastCacheSet,host0) == 1 * sizeof(Addr));
2540 vg_assert(offsetof(FastCacheSet,guest1) == 2 * sizeof(Addr));
2541 vg_assert(offsetof(FastCacheSet,host1) == 3 * sizeof(Addr));
2542 vg_assert(offsetof(FastCacheSet,guest2) == 4 * sizeof(Addr));
2543 vg_assert(offsetof(FastCacheSet,host2) == 5 * sizeof(Addr));
2544 vg_assert(offsetof(FastCacheSet,guest3) == 6 * sizeof(Addr));
2545 vg_assert(offsetof(FastCacheSet,host3) == 7 * sizeof(Addr));
2547 /* check fast cache is aligned as we requested. Not fatal if it
2548 isn't, but we might as well make sure. */
2549 vg_assert(VG_IS_64_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2551 /* The TTEntryH size is critical for keeping the LLC miss rate down
2552 when doing a lot of discarding. Hence check it here. We also
2553 have a lot of TTEntryCs so let's check that too. */
2554 if (sizeof(HWord) == 8) {
2555 vg_assert(sizeof(TTEntryH) <= 32);
2556 vg_assert(sizeof(TTEntryC) <= 112);
2558 else if (sizeof(HWord) == 4) {
2559 vg_assert(sizeof(TTEntryH) <= 20);
2560 # if defined(VGP_ppc32_linux) || defined(VGP_mips32_linux) \
2561 || (defined(VGP_mips64_linux) && defined(VGABI_N32)) \
2562 || defined(VGP_nanomips_linux) || defined(VGP_arm_linux)
2563 /* On PPC32, MIPS32, ARM32 platforms, alignof(ULong) == 8, so the
2564 structure is larger than on other 32 bit targets. */
2565 vg_assert(sizeof(TTEntryC) <= 96);
2566 # else
2567 vg_assert(sizeof(TTEntryC) <= 88);
2568 # endif
2570 else {
2571 vg_assert(0);
2574 /* All the hassle about converting between TTEntryH and
2575 VexGuestExtents was so as to ensure the following. */
2576 vg_assert(sizeof(TTEntryH) == sizeof(VexGuestExtents));
2578 if (VG_(clo_verbosity) > 2)
2579 VG_(message)(Vg_DebugMsg,
2580 "TT/TC: VG_(init_tt_tc) "
2581 "(startup of code management)\n");
2583 /* Figure out how big each tc area should be. */
2584 if (VG_(clo_avg_transtab_entry_size) == 0)
2585 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8;
2586 else
2587 avg_codeszQ = (VG_(clo_avg_transtab_entry_size) + 7) / 8;
2588 tc_sector_szQ = N_TTES_PER_SECTOR * (1 + avg_codeszQ);
2590 /* Ensure the calculated value is not way crazy. */
2591 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR);
2592 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR);
2594 n_sectors = VG_(clo_num_transtab_sectors);
2595 vg_assert(n_sectors >= MIN_N_SECTORS);
2596 vg_assert(n_sectors <= MAX_N_SECTORS);
2598 /* Initialise the sectors, even the ones we aren't going to use.
2599 Set all fields to zero. */
2600 youngest_sector = 0;
2601 for (i = 0; i < MAX_N_SECTORS; i++)
2602 VG_(memset)(&sectors[i], 0, sizeof(sectors[i]));
2604 /* Initialise the sector_search_order hint table, including the
2605 entries we aren't going to use. */
2606 for (i = 0; i < MAX_N_SECTORS; i++)
2607 sector_search_order[i] = INV_SNO;
2609 /* Initialise the fast cache. */
2610 invalidateFastCache();
2612 /* and the unredir tt/tc */
2613 init_unredir_tt_tc();
2615 if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2616 || VG_(debugLog_getLevel) () >= 2) {
2617 VG_(message)(Vg_DebugMsg,
2618 "TT/TC: cache: %s--avg-transtab-entry-size=%u, "
2619 "%stool provided default %u\n",
2620 VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ",
2621 VG_(clo_avg_transtab_entry_size),
2622 VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ",
2623 VG_(details).avg_translation_sizeB);
2624 VG_(message)(Vg_DebugMsg,
2625 "TT/TC: cache: %d sectors of %'d bytes each = %'d total TC\n",
2626 n_sectors, 8 * tc_sector_szQ,
2627 n_sectors * 8 * tc_sector_szQ );
2628 VG_(message)(Vg_DebugMsg,
2629 "TT/TC: table: %'d tables[%d] of C %'d + H %'d bytes each "
2630 "= %'d total TT\n",
2631 n_sectors, N_TTES_PER_SECTOR,
2632 (int)(N_TTES_PER_SECTOR * sizeof(TTEntryC)),
2633 (int)(N_TTES_PER_SECTOR * sizeof(TTEntryH)),
2634 (int)(n_sectors * N_TTES_PER_SECTOR
2635 * (sizeof(TTEntryC) + sizeof(TTEntryH))));
2636 VG_(message)(Vg_DebugMsg,
2637 "TT/TC: table: %d tt entries each = %'d total tt entries\n",
2638 N_TTES_PER_SECTOR, n_sectors * N_TTES_PER_SECTOR);
2639 VG_(message)(Vg_DebugMsg,
2640 "TT/TC: table: %d htt[%d] of %'d bytes each = %'d total HTT"
2641 " (htt[%d] %d%% max occup)\n",
2642 n_sectors, N_HTTES_PER_SECTOR,
2643 (int)(N_HTTES_PER_SECTOR * sizeof(TTEno)),
2644 (int)(n_sectors * N_HTTES_PER_SECTOR * sizeof(TTEno)),
2645 N_HTTES_PER_SECTOR, SECTOR_TT_LIMIT_PERCENT);
2648 if (0) {
2649 VG_(printf)("XXXX sizeof(VexGuestExtents) = %d\n",
2650 (Int)sizeof(VexGuestExtents));
2651 VG_(printf)("XXXX sizeof(InEdge) = %d\n", (Int)sizeof(InEdge));
2652 VG_(printf)("XXXX sizeof(OutEdge) = %d\n", (Int)sizeof(OutEdge));
2653 VG_(printf)("XXXX sizeof(InEdgeArr) = %d\n", (Int)sizeof(InEdgeArr));
2654 VG_(printf)("XXXX sizeof(OutEdgeArr) = %d\n", (Int)sizeof(OutEdgeArr));
2655 VG_(printf)("XXXX sizeof(TTEntryC) = %d\n", (Int)sizeof(TTEntryC));
2656 VG_(printf)("XXXX sizeof(TTEntryH) = %d\n", (Int)sizeof(TTEntryH));
2661 /*------------------------------------------------------------*/
2662 /*--- Printing out statistics. ---*/
2663 /*------------------------------------------------------------*/
2665 static Double safe_idiv( ULong a, ULong b )
2667 return (b == 0 ? 0 : (Double)a / (Double)b);
2670 ULong VG_(get_bbs_translated) ( void )
2672 return n_in_count;
2675 ULong VG_(get_bbs_discarded_or_dumped) ( void )
2677 return n_disc_count + n_dump_count;
2680 void VG_(print_tt_tc_stats) ( void )
2682 VG_(message)(Vg_DebugMsg,
2683 " tt/tc: %'llu tt lookups requiring %'llu probes\n",
2684 n_full_lookups, n_lookup_probes );
2685 VG_(message)(Vg_DebugMsg,
2686 " tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2687 n_fast_updates, n_fast_flushes );
2689 VG_(message)(Vg_DebugMsg,
2690 " transtab: new %'llu "
2691 "(%'llu -> %'llu; ratio %3.1f) [%'llu scs] "
2692 "avg tce size %llu\n",
2693 n_in_count, n_in_osize, n_in_tsize,
2694 safe_idiv(n_in_tsize, n_in_osize),
2695 n_in_sc_count,
2696 n_in_tsize / (n_in_count ? n_in_count : 1));
2697 VG_(message)(Vg_DebugMsg,
2698 " transtab: dumped %'llu (%'llu -> ?" "?) "
2699 "(sectors recycled %'llu)\n",
2700 n_dump_count, n_dump_osize, n_sectors_recycled );
2701 VG_(message)(Vg_DebugMsg,
2702 " transtab: discarded %'llu (%'llu -> ?" "?)\n",
2703 n_disc_count, n_disc_osize );
2705 if (DEBUG_TRANSTAB) {
2706 VG_(printf)("\n");
2707 for (EClassNo e = 0; e < ECLASS_N; e++) {
2708 VG_(printf)(" %4d", sectors[0].ec2tte_used[e]);
2709 if (e % 16 == 15)
2710 VG_(printf)("\n");
2712 VG_(printf)("\n\n");
2716 /*------------------------------------------------------------*/
2717 /*--- Printing out of profiling results. ---*/
2718 /*------------------------------------------------------------*/
2720 static ULong score ( const TTEntryC* tteC )
2722 return ((ULong)tteC->usage.prof.weight) * ((ULong)tteC->usage.prof.count);
2725 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2727 SECno sno;
2728 Int r, s;
2729 ULong score_total;
2730 TTEno i;
2732 /* First, compute the total weighted count, and find the top N
2733 ttes. tops contains pointers to the most-used n_tops blocks, in
2734 descending order (viz, tops[0] is the highest scorer). */
2735 for (s = 0; s < n_tops; s++) {
2736 tops[s].addr = 0;
2737 tops[s].score = 0;
2740 score_total = 0;
2742 for (sno = 0; sno < n_sectors; sno++) {
2743 if (sectors[sno].tc == NULL)
2744 continue;
2745 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2746 if (sectors[sno].ttH[i].status != InUse)
2747 continue;
2748 score_total += score(&sectors[sno].ttC[i]);
2749 /* Find the rank for sectors[sno].tt{C,H}[i]. */
2750 r = n_tops-1;
2751 while (True) {
2752 if (r == -1)
2753 break;
2754 if (tops[r].addr == 0) {
2755 r--;
2756 continue;
2758 if ( score(&sectors[sno].ttC[i]) > tops[r].score ) {
2759 r--;
2760 continue;
2762 break;
2764 r++;
2765 vg_assert(r >= 0 && r <= n_tops);
2766 /* This bb should be placed at r, and bbs above it shifted
2767 upwards one slot. */
2768 if (r < n_tops) {
2769 for (s = n_tops-1; s > r; s--)
2770 tops[s] = tops[s-1];
2771 tops[r].addr = sectors[sno].ttC[i].entry;
2772 tops[r].score = score( &sectors[sno].ttC[i] );
2777 /* Now zero out all the counter fields, so that we can make
2778 multiple calls here and just get the values since the last call,
2779 each time, rather than values accumulated for the whole run. */
2780 for (sno = 0; sno < n_sectors; sno++) {
2781 if (sectors[sno].tc == NULL)
2782 continue;
2783 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2784 if (sectors[sno].ttH[i].status != InUse)
2785 continue;
2786 sectors[sno].ttC[i].usage.prof.count = 0;
2790 return score_total;
2793 /*--------------------------------------------------------------------*/
2794 /*--- end ---*/
2795 /*--------------------------------------------------------------------*/