Add bug 467036 Add time cost statistics for Regtest to NEWS
[valgrind.git] / helgrind / libhb_core.c
blobf660a34ea0f58ac11503e34341566e9b607514ea
2 /*--------------------------------------------------------------------*/
3 /*--- LibHB: a library for implementing and checking ---*/
4 /*--- the happens-before relationship in concurrent programs. ---*/
5 /*--- libhb_main.c ---*/
6 /*--------------------------------------------------------------------*/
8 /*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
12 Copyright (C) 2008-2017 OpenWorks Ltd
13 info@open-works.co.uk
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 The GNU General Public License is contained in the file COPYING.
31 #include "pub_tool_basics.h"
32 #include "pub_tool_poolalloc.h"
33 #include "pub_tool_libcassert.h"
34 #include "pub_tool_libcbase.h"
35 #include "pub_tool_libcprint.h"
36 #include "pub_tool_machine.h"
37 #include "pub_tool_mallocfree.h"
38 #include "pub_tool_wordfm.h"
39 #include "pub_tool_hashtable.h"
40 #include "pub_tool_xarray.h"
41 #include "pub_tool_oset.h"
42 #include "pub_tool_threadstate.h"
43 #include "pub_tool_aspacemgr.h"
44 #include "pub_tool_stacktrace.h"
45 #include "pub_tool_execontext.h"
46 #include "pub_tool_errormgr.h"
47 #include "pub_tool_debuginfo.h"
48 #include "pub_tool_gdbserver.h"
49 #include "pub_tool_options.h" // VG_(clo_stats)
50 #include "hg_basics.h"
51 #include "hg_wordset.h"
52 #include "hg_lock_n_thread.h"
53 #include "hg_errors.h"
55 #include "libhb.h"
58 /////////////////////////////////////////////////////////////////
59 /////////////////////////////////////////////////////////////////
60 // //
61 // Debugging #defines //
62 // //
63 /////////////////////////////////////////////////////////////////
64 /////////////////////////////////////////////////////////////////
66 /* Check the sanity of shadow values in the core memory state
67 machine. Change #if 0 to #if 1 to enable this. */
68 #if 0
69 # define CHECK_MSM 1
70 #else
71 # define CHECK_MSM 0
72 #endif
75 /* Check sanity (reference counts, etc) in the conflicting access
76 machinery. Change #if 0 to #if 1 to enable this. */
77 #if 0
78 # define CHECK_CEM 1
79 #else
80 # define CHECK_CEM 0
81 #endif
84 /* Check sanity in the compressed shadow memory machinery,
85 particularly in its caching innards. Unfortunately there's no
86 almost-zero-cost way to make them selectable at run time. Hence
87 set the #if 0 to #if 1 and rebuild if you want them. */
88 #if 0
89 # define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
90 # define inline __attribute__((noinline))
91 /* probably want to ditch -fomit-frame-pointer too */
92 #else
93 # define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
94 #endif
96 /* Define to 1 to activate tracing cached rcec. */
97 #define DEBUG_CACHED_RCEC 0
99 /////////////////////////////////////////////////////////////////
100 /////////////////////////////////////////////////////////////////
101 // //
102 // data decls: VtsID //
103 // //
104 /////////////////////////////////////////////////////////////////
105 /////////////////////////////////////////////////////////////////
107 /* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
108 bits, since they have to be packed into the lowest 30 bits of an
109 SVal. */
110 typedef UInt VtsID;
111 #define VtsID_INVALID 0xFFFFFFFF
115 /////////////////////////////////////////////////////////////////
116 /////////////////////////////////////////////////////////////////
117 // //
118 // data decls: SVal //
119 // //
120 /////////////////////////////////////////////////////////////////
121 /////////////////////////////////////////////////////////////////
123 typedef ULong SVal;
125 /* This value has special significance to the implementation, and callers
126 may not store it in the shadow memory. */
127 #define SVal_INVALID (3ULL << 62)
129 /* This is the default value for shadow memory. Initially the shadow
130 memory contains no accessible areas and so all reads produce this
131 value. TODO: make this caller-defineable. */
132 #define SVal_NOACCESS (2ULL << 62)
136 /////////////////////////////////////////////////////////////////
137 /////////////////////////////////////////////////////////////////
138 // //
139 // data decls: ScalarTS //
140 // //
141 /////////////////////////////////////////////////////////////////
142 /////////////////////////////////////////////////////////////////
144 /* Scalar Timestamp. We have to store a lot of these, so there is
145 some effort to make them as small as possible. Logically they are
146 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
147 We pack it into 64 bits by representing the Thr* using a ThrID, a
148 small integer (18 bits), and a 46 bit integer for the timestamp
149 number. The 46/18 split is arbitrary, but has the effect that
150 Helgrind can only handle programs that create 2^18 or fewer threads
151 over their entire lifetime, and have no more than 2^46 timestamp
152 ticks (synchronisation operations on the same thread).
154 This doesn't seem like much of a limitation. 2^46 ticks is
155 7.06e+13, and if each tick (optimistically) takes the machine 1000
156 cycles to process, then the minimum time to process that many ticks
157 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
158 but VTS ticks, which isn't realistic.
160 NB1: SCALARTS_N_THRBITS must be 27 or lower. The obvious limit is
161 32 since a ThrID is a UInt. 27 comes from the fact that
162 'Thr_n_RCEC', which records information about old accesses, packs
163 in tsw not only a ThrID but also minimum 4+1 other bits (access size
164 and writeness) in a UInt, hence limiting size to 32-(4+1) == 27.
166 NB2: thrid values are issued upwards from 1024, and values less
167 than that aren't valid. This isn't per se necessary (any order
168 will do, so long as they are unique), but it does help ensure they
169 are less likely to get confused with the various other kinds of
170 small-integer thread ids drifting around (eg, TId).
171 So, SCALARTS_N_THRBITS must be 11 or more.
172 See also NB5.
174 NB3: this probably also relies on the fact that Thr's are never
175 deallocated -- they exist forever. Hence the 1-1 mapping from
176 Thr's to thrid values (set up in Thr__new) persists forever.
178 NB4: temp_max_sized_VTS is allocated at startup and never freed.
179 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
180 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
181 making the memory use for this go sky-high. With
182 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
183 like an OK tradeoff. If more than 256k threads need to be
184 supported, we could change SCALARTS_N_THRBITS to 20, which would
185 facilitate supporting 1 million threads at the cost of 8MB storage
186 for temp_max_sized_VTS.
188 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
189 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
190 must never be a valid ThrID. Given NB2 that's OK.
192 #define SCALARTS_N_THRBITS 18 /* valid range: 11 to 27 inclusive,
193 See NB1 and NB2 above. */
195 #define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
196 typedef
197 struct {
198 ThrID thrid : SCALARTS_N_THRBITS;
199 ULong tym : SCALARTS_N_TYMBITS;
201 ScalarTS;
203 #define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
207 /////////////////////////////////////////////////////////////////
208 /////////////////////////////////////////////////////////////////
209 // //
210 // data decls: Filter //
211 // //
212 /////////////////////////////////////////////////////////////////
213 /////////////////////////////////////////////////////////////////
215 // baseline: 5, 9
216 #define FI_LINE_SZB_LOG2 5
217 #define FI_NUM_LINES_LOG2 10
219 #define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
220 #define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
222 #define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
223 #define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
225 #define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
226 & (Addr)(FI_NUM_LINES-1) )
229 /* In the lines, each 8 bytes are treated individually, and are mapped
230 to a UShort. Regardless of endianness of the underlying machine,
231 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
232 the highest address.
234 Of each bit pair, the higher numbered bit is set if a R has been
235 seen, so the actual layout is:
237 15 14 ... 01 00
239 R W for addr+7 ... R W for addr+0
241 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
244 /* tags are separated from lines. tags are Addrs and are
245 the base address of the line. */
246 typedef
247 struct {
248 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
250 FiLine;
252 typedef
253 struct {
254 Addr tags[FI_NUM_LINES];
255 FiLine lines[FI_NUM_LINES];
257 Filter;
261 /////////////////////////////////////////////////////////////////
262 /////////////////////////////////////////////////////////////////
263 // //
264 // data decls: Thr, ULong_n_EC //
265 // //
266 /////////////////////////////////////////////////////////////////
267 /////////////////////////////////////////////////////////////////
269 // Records stacks for H1 history mechanism (DRD-style)
270 typedef
271 struct { ULong ull; ExeContext* ec; }
272 ULong_n_EC;
275 /* How many of the above records to collect for each thread? Older
276 ones are dumped when we run out of space. 62.5k requires 1MB per
277 thread, since each ULong_n_EC record is 16 bytes long. When more
278 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
279 deleted to make space. Hence in the worst case we will be able to
280 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
281 Kw transitions (segments in this thread). For the current setting
282 that gives a guaranteed stack for at least the last 31.25k
283 segments. */
284 #define N_KWs_N_STACKs_PER_THREAD 62500
287 UInt HG_(clo_history_backtrace_size) = 8;
289 // (UInt) `echo "Reference Counted Execution Context" | md5sum`
290 #define RCEC_MAGIC 0xab88abb2UL
292 /* RCEC usage is commented more in details in the section 'Change-event map2'
293 later in this file */
294 typedef
295 struct _RCEC {
296 UWord magic; /* sanity check only */
297 struct _RCEC* next;
298 UWord rc;
299 UWord rcX; /* used for crosschecking */
300 UWord frames_hash; /* hash of all the frames */
301 UWord frames[0];
302 /* Variable-length array.
303 The size depends on HG_(clo_history_backtrace_size). */
305 RCEC;
307 struct _Thr {
308 /* Current VTSs for this thread. They change as we go along. viR
309 is the VTS to be used for reads, viW for writes. Usually they
310 are the same, but can differ when we deal with reader-writer
311 locks. It is always the case that
312 VtsID__cmpLEQ(viW,viR) == True
313 that is, viW must be the same, or lagging behind, viR. */
314 VtsID viR;
315 VtsID viW;
317 /* Is initially False, and is set to True after the thread really
318 has done a low-level exit. When True, we expect to never see
319 any more memory references done by this thread. */
320 Bool llexit_done;
322 /* Is initially False, and is set to True after the thread has been
323 joined with (reaped by some other thread). After this point, we
324 do not expect to see any uses of .viR or .viW, so it is safe to
325 set them to VtsID_INVALID. */
326 Bool joinedwith_done;
328 /* A small integer giving a unique identity to this Thr. See
329 comments on the definition of ScalarTS for details. */
330 ThrID thrid : SCALARTS_N_THRBITS;
332 /* A filter that removes references for which we believe that
333 msmcread/msmcwrite will not change the state, nor report a
334 race. */
335 Filter* filter;
337 /* A pointer back to the top level Thread structure. There is a
338 1-1 mapping between Thread and Thr structures -- each Thr points
339 at its corresponding Thread, and vice versa. Really, Thr and
340 Thread should be merged into a single structure. */
341 Thread* hgthread;
343 /* The ULongs (scalar Kws) in this accumulate in strictly
344 increasing order, without duplicates. This is important because
345 we need to be able to find a given scalar Kw in this array
346 later, by binary search. */
347 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
349 /* cached_rcec maintains the last RCEC that was retrieved for this thread. */
350 RCEC cached_rcec;
351 // cached_rcec value, not ref-counted.
352 // As the last member of an RCEC is a variable length array, this must be
353 // the last element of the _Thr struct.
355 /* The shadow register vex_shadow1 SP register (SP_s1) is used to maintain
356 the validity of the cached rcec.
357 If SP_s1 is 0, then the cached rcec is invalid (cannot be used).
358 If SP_S1 is != 0, then the cached rcec is valid. The valid cached rcec
359 can be used to generate a new RCEC by changing just the last frame. */
365 /////////////////////////////////////////////////////////////////
366 /////////////////////////////////////////////////////////////////
367 // //
368 // data decls: SO //
369 // //
370 /////////////////////////////////////////////////////////////////
371 /////////////////////////////////////////////////////////////////
373 // (UInt) `echo "Synchronisation object" | md5sum`
374 #define SO_MAGIC 0x56b3c5b0U
376 struct _SO {
377 struct _SO* admin_prev;
378 struct _SO* admin_next;
379 VtsID viR; /* r-clock of sender */
380 VtsID viW; /* w-clock of sender */
381 UInt magic;
386 /////////////////////////////////////////////////////////////////
387 /////////////////////////////////////////////////////////////////
388 // //
389 // Forward declarations //
390 // //
391 /////////////////////////////////////////////////////////////////
392 /////////////////////////////////////////////////////////////////
394 /* fwds for
395 Globals needed by other parts of the library. These are set
396 once at startup and then never changed. */
397 static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
398 static ExeContext* (*main_get_EC)( Thr* ) = NULL;
400 /* misc fn and data fwdses */
401 static void VtsID__rcinc ( VtsID ii );
402 static void VtsID__rcdec ( VtsID ii );
404 static inline Bool SVal__isC ( SVal s );
405 static inline VtsID SVal__unC_Rmin ( SVal s );
406 static inline VtsID SVal__unC_Wmin ( SVal s );
407 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
408 static inline void SVal__rcinc ( SVal s );
409 static inline void SVal__rcdec ( SVal s );
410 /* SVal in LineZ are used to store various pointers. */
411 static inline void *SVal2Ptr (SVal s);
412 static inline SVal Ptr2SVal (void* ptr);
414 /* A double linked list of all the SO's. */
415 SO* admin_SO;
419 /////////////////////////////////////////////////////////////////
420 /////////////////////////////////////////////////////////////////
421 // //
422 // SECTION BEGIN compressed shadow memory //
423 // //
424 /////////////////////////////////////////////////////////////////
425 /////////////////////////////////////////////////////////////////
427 #ifndef __HB_ZSM_H
428 #define __HB_ZSM_H
430 /* Initialise the library. Once initialised, it will (or may) call
431 SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
432 allow the user to do reference counting on the SVals stored herein.
433 It is important to understand, however, that due to internal
434 caching, the reference counts are in general inaccurate, and can be
435 both above or below the true reference count for an item. In
436 particular, the library may indicate that the reference count for
437 an item is zero, when in fact it is not.
439 To make the reference counting exact and therefore non-pointless,
440 call zsm_flush_cache. Immediately after it returns, the reference
441 counts for all items, as deduced by the caller by observing calls
442 to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
443 zero reference count may be freed (or at least considered to be
444 unreferenced by this library).
446 static void zsm_init ( void );
448 static void zsm_sset_range ( Addr, SizeT, SVal );
449 static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
450 static void zsm_scopy_range ( Addr, Addr, SizeT );
451 static void zsm_flush_cache ( void );
453 #endif /* ! __HB_ZSM_H */
456 /* Round a up to the next multiple of N. N must be a power of 2 */
457 #define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
458 /* Round a down to the next multiple of N. N must be a power of 2 */
459 #define ROUNDDN(a, N) ((a) & ~(N-1))
461 /* True if a belongs in range [start, start + szB[
462 (i.e. start + szB is excluded). */
463 static inline Bool address_in_range (Addr a, Addr start, SizeT szB)
465 /* Checking start <= a && a < start + szB.
466 As start and a are unsigned addresses, the condition can
467 be simplified. */
468 if (CHECK_ZSM)
469 tl_assert ((a - start < szB)
470 == (start <= a
471 && a < start + szB));
472 return a - start < szB;
475 /* ------ CacheLine ------ */
477 #define N_LINE_BITS 6 /* must be >= 3 */
478 #define N_LINE_ARANGE (1 << N_LINE_BITS)
479 #define N_LINE_TREES (N_LINE_ARANGE >> 3)
481 typedef
482 struct {
483 UShort descrs[N_LINE_TREES];
484 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
486 CacheLine;
488 #define TREE_DESCR_16_0 (1<<0)
489 #define TREE_DESCR_32_0 (1<<1)
490 #define TREE_DESCR_16_1 (1<<2)
491 #define TREE_DESCR_64 (1<<3)
492 #define TREE_DESCR_16_2 (1<<4)
493 #define TREE_DESCR_32_1 (1<<5)
494 #define TREE_DESCR_16_3 (1<<6)
495 #define TREE_DESCR_8_0 (1<<7)
496 #define TREE_DESCR_8_1 (1<<8)
497 #define TREE_DESCR_8_2 (1<<9)
498 #define TREE_DESCR_8_3 (1<<10)
499 #define TREE_DESCR_8_4 (1<<11)
500 #define TREE_DESCR_8_5 (1<<12)
501 #define TREE_DESCR_8_6 (1<<13)
502 #define TREE_DESCR_8_7 (1<<14)
503 #define TREE_DESCR_DTY (1<<15)
505 typedef
506 struct {
507 SVal dict[4]; /* can represent up to 4 diff values in the line */
508 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
509 dict indexes */
510 /* if dict[0] == SVal_INVALID then dict[1] is a pointer to the
511 LineF to use, and dict[2..] are also SVal_INVALID. */
513 LineZ; /* compressed rep for a cache line */
515 /* LineZ.dict[1] is used to store various pointers:
516 * In the first lineZ of a free SecMap, it points to the next free SecMap.
517 * In a lineZ for which we need to use a lineF, it points to the lineF. */
520 typedef
521 struct {
522 SVal w64s[N_LINE_ARANGE];
524 LineF; /* full rep for a cache line */
526 /* We use a pool allocator for LineF, as LineF is relatively small,
527 and we will often alloc/release such lines. */
528 static PoolAlloc* LineF_pool_allocator;
530 /* SVal in a lineZ are used to store various pointers.
531 Below are conversion functions to support that. */
532 static inline LineF *LineF_Ptr (LineZ *lineZ)
534 tl_assert(lineZ->dict[0] == SVal_INVALID);
535 return SVal2Ptr (lineZ->dict[1]);
538 /* Shadow memory.
539 Primary map is a WordFM Addr SecMap*.
540 SecMaps cover some page-size-ish section of address space and hold
541 a compressed representation.
542 CacheLine-sized chunks of SecMaps are copied into a Cache, being
543 decompressed when moved into the cache and recompressed on the
544 way out. Because of this, the cache must operate as a writeback
545 cache, not a writethrough one.
547 Each SecMap must hold a power-of-2 number of CacheLines. Hence
548 N_SECMAP_BITS must >= N_LINE_BITS.
550 #define N_SECMAP_BITS 13
551 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
553 // # CacheLines held by a SecMap
554 #define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
556 /* The data in the SecMap is held in the array of LineZs. Each LineZ
557 either carries the required data directly, in a compressed
558 representation, or it holds (in .dict[1]) a pointer to a LineF
559 that holds the full representation.
561 As each in-use LineF is referred to by exactly one LineZ,
562 the number of .linesZ[] that refer to a lineF should equal
563 the number of used lineF.
565 RC obligations: the RCs presented to the user include exactly
566 the values in:
567 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
568 * F reps that are in use
570 Hence the following actions at the following transitions are required:
572 F rep: alloc'd -> freed -- rcdec_LineF
573 F rep: -> alloc'd -- rcinc_LineF
574 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
575 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
578 typedef
579 struct {
580 UInt magic;
581 LineZ linesZ[N_SECMAP_ZLINES];
583 SecMap;
585 #define SecMap_MAGIC 0x571e58cbU
587 // (UInt) `echo "Free SecMap" | md5sum`
588 #define SecMap_free_MAGIC 0x5a977f30U
590 __attribute__((unused))
591 static inline Bool is_sane_SecMap ( SecMap* sm ) {
592 return sm != NULL && sm->magic == SecMap_MAGIC;
595 /* ------ Cache ------ */
597 #define N_WAY_BITS 16
598 #define N_WAY_NENT (1 << N_WAY_BITS)
600 /* Each tag is the address of the associated CacheLine, rounded down
601 to a CacheLine address boundary. A CacheLine size must be a power
602 of 2 and must be 8 or more. Hence an easy way to initialise the
603 cache so it is empty is to set all the tag values to any value % 8
604 != 0, eg 1. This means all queries in the cache initially miss.
605 It does however require us to detect and not writeback, any line
606 with a bogus tag. */
607 typedef
608 struct {
609 CacheLine lyns0[N_WAY_NENT];
610 Addr tags0[N_WAY_NENT];
612 Cache;
614 static inline Bool is_valid_scache_tag ( Addr tag ) {
615 /* a valid tag should be naturally aligned to the start of
616 a CacheLine. */
617 return 0 == (tag & (N_LINE_ARANGE - 1));
621 /* --------- Primary data structures --------- */
623 /* Shadow memory primary map */
624 static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
625 static Cache cache_shmem;
628 static UWord stats__secmaps_search = 0; // # SM finds
629 static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
630 static UWord stats__secmaps_allocd = 0; // # SecMaps issued
631 static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
632 static UWord stats__secmaps_scanGC = 0; // # nr of scan GC done.
633 static UWord stats__secmaps_scanGCed = 0; // # SecMaps GC-ed via scan
634 static UWord stats__secmaps_ssetGCed = 0; // # SecMaps GC-ed via setnoaccess
635 static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
636 static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
637 static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
638 static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
639 static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
640 static UWord stats__cache_F_fetches = 0; // # F lines fetched
641 static UWord stats__cache_F_wbacks = 0; // # F lines written back
642 static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
643 static UWord stats__cache_totrefs = 0; // # total accesses
644 static UWord stats__cache_totmisses = 0; // # misses
645 static ULong stats__cache_make_New_arange = 0; // total arange made New
646 static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
647 static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
648 static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
649 static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
650 static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
651 static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
652 static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
653 static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
654 static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
655 static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
656 static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
657 static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
658 static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
659 static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
660 static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
661 static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
662 static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
663 static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
664 static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
665 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
666 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
667 static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
668 static UWord stats__vts__tick = 0; // # calls to VTS__tick
669 static UWord stats__vts__join = 0; // # calls to VTS__join
670 static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
671 static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
672 static UWord stats__vts_tab_GC = 0; // # nr of vts_tab GC
673 static UWord stats__vts_pruning = 0; // # nr of vts pruning
675 // # calls to VTS__cmp_structural w/ slow case
676 static UWord stats__vts__cmp_structural_slow = 0;
678 // # calls to VTS__indexAt_SLOW
679 static UWord stats__vts__indexat_slow = 0;
681 // # calls to vts_set__find__or__clone_and_add
682 static UWord stats__vts_set__focaa = 0;
684 // # calls to vts_set__find__or__clone_and_add that lead to an
685 // allocation
686 static UWord stats__vts_set__focaa_a = 0;
689 static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
690 return a & ~(N_SECMAP_ARANGE - 1);
692 static inline UWord shmem__get_SecMap_offset ( Addr a ) {
693 return a & (N_SECMAP_ARANGE - 1);
697 /*----------------------------------------------------------------*/
698 /*--- map_shmem :: WordFM Addr SecMap ---*/
699 /*--- shadow memory (low level handlers) (shmem__* fns) ---*/
700 /*----------------------------------------------------------------*/
702 /*--------------- SecMap allocation --------------- */
704 static HChar* shmem__bigchunk_next = NULL;
705 static HChar* shmem__bigchunk_end1 = NULL;
707 static void* shmem__bigchunk_alloc ( SizeT n )
709 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
710 tl_assert(n > 0);
711 n = VG_ROUNDUP(n, 16);
712 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
713 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
714 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
715 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
716 if (0)
717 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
718 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
719 SysRes sres = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
720 if (sr_isError(sres)) {
721 VG_(out_of_memory_NORETURN)(
722 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE,
723 sr_Err(sres));
725 shmem__bigchunk_next = (void*)(Addr)sr_Res(sres);;
726 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
728 tl_assert(shmem__bigchunk_next);
729 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
730 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
731 shmem__bigchunk_next += n;
732 return shmem__bigchunk_next - n;
735 /* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
736 recycled SecMap. When a new SecMap is needed, a recycled SecMap
737 will be used in preference to allocating a new SecMap. */
738 /* We make a linked list of SecMap. The first LineZ is re-used to
739 implement the linked list. */
740 /* Returns the SecMap following sm in the free list.
741 NULL if sm is the last SecMap. sm must be on the free list. */
742 static inline SecMap *SecMap_freelist_next ( SecMap* sm )
744 tl_assert (sm);
745 tl_assert (sm->magic == SecMap_free_MAGIC);
746 return SVal2Ptr (sm->linesZ[0].dict[1]);
748 static inline void set_SecMap_freelist_next ( SecMap* sm, SecMap* next )
750 tl_assert (sm);
751 tl_assert (sm->magic == SecMap_free_MAGIC);
752 tl_assert (next == NULL || next->magic == SecMap_free_MAGIC);
753 sm->linesZ[0].dict[1] = Ptr2SVal (next);
756 static SecMap *SecMap_freelist = NULL;
757 static UWord SecMap_freelist_length(void)
759 SecMap *sm;
760 UWord n = 0;
762 sm = SecMap_freelist;
763 while (sm) {
764 n++;
765 sm = SecMap_freelist_next (sm);
767 return n;
770 static void push_SecMap_on_freelist(SecMap* sm)
772 if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
773 sm->magic = SecMap_free_MAGIC;
774 set_SecMap_freelist_next(sm, SecMap_freelist);
775 SecMap_freelist = sm;
777 /* Returns a free SecMap if there is one.
778 Otherwise, returns NULL. */
779 static SecMap *pop_SecMap_from_freelist(void)
781 SecMap *sm;
783 sm = SecMap_freelist;
784 if (sm) {
785 tl_assert (sm->magic == SecMap_free_MAGIC);
786 SecMap_freelist = SecMap_freelist_next (sm);
787 if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
789 return sm;
792 static SecMap* shmem__alloc_or_recycle_SecMap ( void )
794 Word i, j;
795 SecMap* sm = pop_SecMap_from_freelist();
797 if (!sm) {
798 sm = shmem__bigchunk_alloc( sizeof(SecMap) );
799 stats__secmaps_allocd++;
800 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
801 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
802 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
804 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
805 tl_assert(sm);
806 sm->magic = SecMap_MAGIC;
807 for (i = 0; i < N_SECMAP_ZLINES; i++) {
808 sm->linesZ[i].dict[0] = SVal_NOACCESS;
809 sm->linesZ[i].dict[1] = SVal_INVALID;
810 sm->linesZ[i].dict[2] = SVal_INVALID;
811 sm->linesZ[i].dict[3] = SVal_INVALID;
812 for (j = 0; j < N_LINE_ARANGE/4; j++)
813 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
815 return sm;
818 typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
819 static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
821 static SecMap* shmem__find_SecMap ( Addr ga )
823 SecMap* sm = NULL;
824 Addr gaKey = shmem__round_to_SecMap_base(ga);
825 // Cache
826 stats__secmaps_search++;
827 if (LIKELY(gaKey == smCache[0].gaKey))
828 return smCache[0].sm;
829 if (LIKELY(gaKey == smCache[1].gaKey)) {
830 SMCacheEnt tmp = smCache[0];
831 smCache[0] = smCache[1];
832 smCache[1] = tmp;
833 return smCache[0].sm;
835 if (gaKey == smCache[2].gaKey) {
836 SMCacheEnt tmp = smCache[1];
837 smCache[1] = smCache[2];
838 smCache[2] = tmp;
839 return smCache[1].sm;
841 // end Cache
842 stats__secmaps_search_slow++;
843 if (VG_(lookupFM)( map_shmem,
844 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
845 tl_assert(sm != NULL);
846 smCache[2] = smCache[1];
847 smCache[1] = smCache[0];
848 smCache[0].gaKey = gaKey;
849 smCache[0].sm = sm;
850 } else {
851 tl_assert(sm == NULL);
853 return sm;
856 /* Scan the SecMap and count the SecMap that can be GC-ed.
857 If really, really does the GC of the SecMap. */
858 /* NOT TO BE CALLED FROM WITHIN libzsm. */
859 static UWord next_SecMap_GC_at = 1000;
860 __attribute__((noinline))
861 static UWord shmem__SecMap_do_GC(Bool really)
863 UWord secmapW = 0;
864 Addr gaKey;
865 UWord examined = 0;
866 UWord ok_GCed = 0;
868 /* First invalidate the smCache */
869 smCache[0].gaKey = 1;
870 smCache[1].gaKey = 1;
871 smCache[2].gaKey = 1;
872 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
874 VG_(initIterFM)( map_shmem );
875 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
876 UWord i;
877 UWord j;
878 UWord n_linesF = 0;
879 SecMap* sm = (SecMap*)secmapW;
880 tl_assert(sm->magic == SecMap_MAGIC);
881 Bool ok_to_GC = True;
883 examined++;
885 /* Deal with the LineZs and the possible LineF of a LineZ. */
886 for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
887 LineZ* lineZ = &sm->linesZ[i];
888 if (lineZ->dict[0] != SVal_INVALID) {
889 ok_to_GC = lineZ->dict[0] == SVal_NOACCESS
890 && !SVal__isC (lineZ->dict[1])
891 && !SVal__isC (lineZ->dict[2])
892 && !SVal__isC (lineZ->dict[3]);
893 } else {
894 LineF *lineF = LineF_Ptr(lineZ);
895 n_linesF++;
896 for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
897 ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
900 if (ok_to_GC)
901 ok_GCed++;
902 if (ok_to_GC && really) {
903 SecMap *fm_sm;
904 Addr fm_gaKey;
905 /* We cannot remove a SecMap from map_shmem while iterating.
906 So, stop iteration, remove from map_shmem, recreate the iteration
907 on the next SecMap. */
908 VG_(doneIterFM) ( map_shmem );
909 /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS.
910 We just need to free the lineF referenced by the linesZ. */
911 if (n_linesF > 0) {
912 for (i = 0; i < N_SECMAP_ZLINES && n_linesF > 0; i++) {
913 LineZ* lineZ = &sm->linesZ[i];
914 if (lineZ->dict[0] == SVal_INVALID) {
915 VG_(freeEltPA)( LineF_pool_allocator, LineF_Ptr(lineZ) );
916 n_linesF--;
920 if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
921 tl_assert (0);
922 stats__secmaps_in_map_shmem--;
923 tl_assert (gaKey == fm_gaKey);
924 tl_assert (sm == fm_sm);
925 stats__secmaps_scanGCed++;
926 push_SecMap_on_freelist (sm);
927 VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
930 VG_(doneIterFM)( map_shmem );
932 if (really) {
933 stats__secmaps_scanGC++;
934 /* Next GC when we approach the max allocated */
935 next_SecMap_GC_at = stats__secmaps_allocd - 1000;
936 /* Unless we GCed less than 10%. We then allow to alloc 10%
937 more before GCing. This avoids doing a lot of costly GC
938 for the worst case : the 'growing phase' of an application
939 that allocates a lot of memory.
940 Worst can can be reproduced e.g. by
941 perf/memrw -t 30000000 -b 1000 -r 1 -l 1
942 that allocates around 30Gb of memory. */
943 if (ok_GCed < stats__secmaps_allocd/10)
944 next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
948 if (VG_(clo_stats) && really) {
949 VG_(message)(Vg_DebugMsg,
950 "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
951 " next GC at %lu\n",
952 stats__secmaps_scanGC, examined, ok_GCed,
953 next_SecMap_GC_at);
956 return ok_GCed;
959 static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
961 SecMap* sm = shmem__find_SecMap ( ga );
962 if (LIKELY(sm)) {
963 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
964 return sm;
965 } else {
966 /* create a new one */
967 Addr gaKey = shmem__round_to_SecMap_base(ga);
968 sm = shmem__alloc_or_recycle_SecMap();
969 tl_assert(sm);
970 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
971 stats__secmaps_in_map_shmem++;
972 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
973 return sm;
977 /* Returns the nr of linesF which are in use. Note: this is scanning
978 the secmap wordFM. So, this is to be used for statistics only. */
979 __attribute__((noinline))
980 static UWord shmem__SecMap_used_linesF(void)
982 UWord secmapW = 0;
983 Addr gaKey;
984 UWord inUse = 0;
986 VG_(initIterFM)( map_shmem );
987 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
988 UWord i;
989 SecMap* sm = (SecMap*)secmapW;
990 tl_assert(sm->magic == SecMap_MAGIC);
992 for (i = 0; i < N_SECMAP_ZLINES; i++) {
993 LineZ* lineZ = &sm->linesZ[i];
994 if (lineZ->dict[0] == SVal_INVALID)
995 inUse++;
998 VG_(doneIterFM)( map_shmem );
1000 return inUse;
1003 /* ------------ LineF and LineZ related ------------ */
1005 static void rcinc_LineF ( LineF* lineF ) {
1006 UWord i;
1007 for (i = 0; i < N_LINE_ARANGE; i++)
1008 SVal__rcinc(lineF->w64s[i]);
1011 static void rcdec_LineF ( LineF* lineF ) {
1012 UWord i;
1013 for (i = 0; i < N_LINE_ARANGE; i++)
1014 SVal__rcdec(lineF->w64s[i]);
1017 static void rcinc_LineZ ( LineZ* lineZ ) {
1018 tl_assert(lineZ->dict[0] != SVal_INVALID);
1019 SVal__rcinc(lineZ->dict[0]);
1020 if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
1021 if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
1022 if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
1025 static void rcdec_LineZ ( LineZ* lineZ ) {
1026 tl_assert(lineZ->dict[0] != SVal_INVALID);
1027 SVal__rcdec(lineZ->dict[0]);
1028 if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
1029 if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
1030 if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
1033 inline
1034 static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
1035 Word bix, shft, mask, prep;
1036 tl_assert(ix >= 0);
1037 bix = ix >> 2;
1038 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1039 mask = 3 << shft;
1040 prep = b2 << shft;
1041 arr[bix] = (arr[bix] & ~mask) | prep;
1044 inline
1045 static UWord read_twobit_array ( UChar* arr, UWord ix ) {
1046 Word bix, shft;
1047 tl_assert(ix >= 0);
1048 bix = ix >> 2;
1049 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1050 return (arr[bix] >> shft) & 3;
1053 /* We cache one free lineF, to avoid pool allocator calls.
1054 Measurement on firefox has shown that this avoids more than 90%
1055 of the PA calls. */
1056 static LineF *free_lineF = NULL;
1058 /* Allocates a lineF for LineZ. Sets lineZ in a state indicating
1059 lineF has to be used. */
1060 static inline LineF *alloc_LineF_for_Z (LineZ *lineZ)
1062 LineF *lineF;
1064 tl_assert(lineZ->dict[0] == SVal_INVALID);
1066 if (LIKELY(free_lineF)) {
1067 lineF = free_lineF;
1068 free_lineF = NULL;
1069 } else {
1070 lineF = VG_(allocEltPA) ( LineF_pool_allocator );
1072 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1073 lineZ->dict[1] = Ptr2SVal (lineF);
1075 return lineF;
1078 /* rcdec the LineF of lineZ, frees the lineF, and sets lineZ
1079 back to its initial state SVal_NOACCESS (i.e. ready to be
1080 read or written just after SecMap allocation). */
1081 static inline void clear_LineF_of_Z (LineZ *lineZ)
1083 LineF *lineF = LineF_Ptr(lineZ);
1085 rcdec_LineF(lineF);
1086 if (UNLIKELY(free_lineF)) {
1087 VG_(freeEltPA)( LineF_pool_allocator, lineF );
1088 } else {
1089 free_lineF = lineF;
1091 lineZ->dict[0] = SVal_NOACCESS;
1092 lineZ->dict[1] = SVal_INVALID;
1095 /* Given address 'tag', find either the Z or F line containing relevant
1096 data, so it can be read into the cache.
1098 static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
1099 /*OUT*/LineF** fp, Addr tag ) {
1100 LineZ* lineZ;
1101 LineF* lineF;
1102 UWord zix;
1103 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1104 UWord smoff = shmem__get_SecMap_offset(tag);
1105 /* since smoff is derived from a valid tag, it should be
1106 cacheline-aligned. */
1107 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1108 zix = smoff >> N_LINE_BITS;
1109 tl_assert(zix < N_SECMAP_ZLINES);
1110 lineZ = &sm->linesZ[zix];
1111 lineF = NULL;
1112 if (lineZ->dict[0] == SVal_INVALID) {
1113 lineF = LineF_Ptr (lineZ);
1114 lineZ = NULL;
1116 *zp = lineZ;
1117 *fp = lineF;
1120 /* Given address 'tag', return the relevant SecMap and the index of
1121 the LineZ within it, in the expectation that the line is to be
1122 overwritten. Regardless of whether 'tag' is currently associated
1123 with a Z or F representation, to rcdec on the current
1124 representation, in recognition of the fact that the contents are
1125 just about to be overwritten. */
1126 static __attribute__((noinline))
1127 void find_Z_for_writing ( /*OUT*/SecMap** smp,
1128 /*OUT*/Word* zixp,
1129 Addr tag ) {
1130 LineZ* lineZ;
1131 UWord zix;
1132 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1133 UWord smoff = shmem__get_SecMap_offset(tag);
1134 /* since smoff is derived from a valid tag, it should be
1135 cacheline-aligned. */
1136 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1137 zix = smoff >> N_LINE_BITS;
1138 tl_assert(zix < N_SECMAP_ZLINES);
1139 lineZ = &sm->linesZ[zix];
1140 /* re RCs, we are rcdec_LineZ/clear_LineF_of_Z this LineZ so that new data
1141 can be parked in it. Hence have to rcdec it accordingly. */
1142 /* If lineZ has an associated lineF, free it up. */
1143 if (lineZ->dict[0] == SVal_INVALID)
1144 clear_LineF_of_Z(lineZ);
1145 else
1146 rcdec_LineZ(lineZ);
1147 *smp = sm;
1148 *zixp = zix;
1151 /* ------------ CacheLine and implicit-tree related ------------ */
1153 __attribute__((unused))
1154 static void pp_CacheLine ( CacheLine* cl ) {
1155 Word i;
1156 if (!cl) {
1157 VG_(printf)("%s","pp_CacheLine(NULL)\n");
1158 return;
1160 for (i = 0; i < N_LINE_TREES; i++)
1161 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
1162 for (i = 0; i < N_LINE_ARANGE; i++)
1163 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
1166 static UChar descr_to_validbits ( UShort descr )
1168 /* a.k.a Party Time for gcc's constant folder */
1169 # define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1170 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
1171 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
1172 ( (b8_5) << 12) | ( (b8_4) << 11) | \
1173 ( (b8_3) << 10) | ( (b8_2) << 9) | \
1174 ( (b8_1) << 8) | ( (b8_0) << 7) | \
1175 ( (b16_3) << 6) | ( (b32_1) << 5) | \
1176 ( (b16_2) << 4) | ( (b64) << 3) | \
1177 ( (b16_1) << 2) | ( (b32_0) << 1) | \
1178 ( (b16_0) << 0) ) )
1180 # define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1181 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1182 ( (bit5) << 5) | ( (bit4) << 4) | \
1183 ( (bit3) << 3) | ( (bit2) << 2) | \
1184 ( (bit1) << 1) | ( (bit0) << 0) ) )
1186 /* these should all get folded out at compile time */
1187 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1188 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1189 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1190 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1191 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1192 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1193 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1194 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1195 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1197 switch (descr) {
1199 +--------------------------------- TREE_DESCR_8_7
1200 | +------------------- TREE_DESCR_8_0
1201 | | +---------------- TREE_DESCR_16_3
1202 | | | +-------------- TREE_DESCR_32_1
1203 | | | | +------------ TREE_DESCR_16_2
1204 | | | | | +--------- TREE_DESCR_64
1205 | | | | | | +------ TREE_DESCR_16_1
1206 | | | | | | | +---- TREE_DESCR_32_0
1207 | | | | | | | | +-- TREE_DESCR_16_0
1208 | | | | | | | | |
1209 | | | | | | | | | GRANULARITY, 7 -> 0 */
1210 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
1211 return BYTE(1,1,1,1,1,1,1,1);
1212 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
1213 return BYTE(1,1,0,1,1,1,1,1);
1214 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
1215 return BYTE(0,1,1,1,1,1,1,1);
1216 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
1217 return BYTE(0,1,0,1,1,1,1,1);
1219 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
1220 return BYTE(1,1,1,1,1,1,0,1);
1221 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
1222 return BYTE(1,1,0,1,1,1,0,1);
1223 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
1224 return BYTE(0,1,1,1,1,1,0,1);
1225 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
1226 return BYTE(0,1,0,1,1,1,0,1);
1228 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
1229 return BYTE(1,1,1,1,0,1,1,1);
1230 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1231 return BYTE(1,1,0,1,0,1,1,1);
1232 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1233 return BYTE(0,1,1,1,0,1,1,1);
1234 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1235 return BYTE(0,1,0,1,0,1,1,1);
1237 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1238 return BYTE(1,1,1,1,0,1,0,1);
1239 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1240 return BYTE(1,1,0,1,0,1,0,1);
1241 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1242 return BYTE(0,1,1,1,0,1,0,1);
1243 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1244 return BYTE(0,1,0,1,0,1,0,1);
1246 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1247 return BYTE(0,0,0,1,1,1,1,1);
1248 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1249 return BYTE(0,0,0,1,1,1,0,1);
1250 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1251 return BYTE(0,0,0,1,0,1,1,1);
1252 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1253 return BYTE(0,0,0,1,0,1,0,1);
1255 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1256 return BYTE(1,1,1,1,0,0,0,1);
1257 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1258 return BYTE(1,1,0,1,0,0,0,1);
1259 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1260 return BYTE(0,1,1,1,0,0,0,1);
1261 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1262 return BYTE(0,1,0,1,0,0,0,1);
1264 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1265 return BYTE(0,0,0,1,0,0,0,1);
1267 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1268 return BYTE(0,0,0,0,0,0,0,1);
1270 default: return BYTE(0,0,0,0,0,0,0,0);
1271 /* INVALID - any valid descr produces at least one
1272 valid bit in tree[0..7]*/
1274 /* NOTREACHED*/
1275 tl_assert(0);
1277 # undef DESCR
1278 # undef BYTE
1281 __attribute__((unused))
1282 static Bool is_sane_Descr ( UShort descr ) {
1283 return descr_to_validbits(descr) != 0;
1286 static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1287 VG_(sprintf)(dst,
1288 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1289 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1290 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1291 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1292 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1293 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1294 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1295 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1296 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1297 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1298 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1299 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1300 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1301 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1302 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1303 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1306 static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1307 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1308 (Int)((byte & 128) ? 1 : 0),
1309 (Int)((byte & 64) ? 1 : 0),
1310 (Int)((byte & 32) ? 1 : 0),
1311 (Int)((byte & 16) ? 1 : 0),
1312 (Int)((byte & 8) ? 1 : 0),
1313 (Int)((byte & 4) ? 1 : 0),
1314 (Int)((byte & 2) ? 1 : 0),
1315 (Int)((byte & 1) ? 1 : 0)
1319 static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1320 Word i;
1321 UChar validbits = descr_to_validbits(descr);
1322 HChar buf[128], buf2[128]; // large enough
1323 if (validbits == 0)
1324 goto bad;
1325 for (i = 0; i < 8; i++) {
1326 if (validbits & (1<<i)) {
1327 if (tree[i] == SVal_INVALID)
1328 goto bad;
1329 } else {
1330 if (tree[i] != SVal_INVALID)
1331 goto bad;
1334 return True;
1335 bad:
1336 sprintf_Descr( buf, descr );
1337 sprintf_Byte( buf2, validbits );
1338 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1339 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1340 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1341 for (i = 0; i < 8; i++)
1342 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1343 VG_(printf)("%s","}\n");
1344 return 0;
1347 static Bool is_sane_CacheLine ( CacheLine* cl )
1349 Word tno, cloff;
1351 if (!cl) goto bad;
1353 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1354 UShort descr = cl->descrs[tno];
1355 SVal* tree = &cl->svals[cloff];
1356 if (!is_sane_Descr_and_Tree(descr, tree))
1357 goto bad;
1359 tl_assert(cloff == N_LINE_ARANGE);
1360 return True;
1361 bad:
1362 pp_CacheLine(cl);
1363 return False;
1366 static UShort normalise_tree ( /*MOD*/SVal* tree )
1368 UShort descr;
1369 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1370 particular no zeroes. */
1371 if (CHECK_ZSM
1372 && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1373 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1374 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1375 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1376 tl_assert(0);
1378 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1379 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1380 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1381 /* build 16-bit layer */
1382 if (tree[1] == tree[0]) {
1383 tree[1] = SVal_INVALID;
1384 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1385 descr |= TREE_DESCR_16_0;
1387 if (tree[3] == tree[2]) {
1388 tree[3] = SVal_INVALID;
1389 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1390 descr |= TREE_DESCR_16_1;
1392 if (tree[5] == tree[4]) {
1393 tree[5] = SVal_INVALID;
1394 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1395 descr |= TREE_DESCR_16_2;
1397 if (tree[7] == tree[6]) {
1398 tree[7] = SVal_INVALID;
1399 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1400 descr |= TREE_DESCR_16_3;
1402 /* build 32-bit layer */
1403 if (tree[2] == tree[0]
1404 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1405 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1406 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1407 descr |= TREE_DESCR_32_0;
1409 if (tree[6] == tree[4]
1410 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1411 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1412 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1413 descr |= TREE_DESCR_32_1;
1415 /* build 64-bit layer */
1416 if (tree[4] == tree[0]
1417 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1418 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1419 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1420 descr |= TREE_DESCR_64;
1422 return descr;
1425 /* This takes a cacheline where all the data is at the leaves
1426 (w8[..]) and builds a correctly normalised tree. */
1427 static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1429 Word tno, cloff;
1430 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1431 SVal* tree = &cl->svals[cloff];
1432 cl->descrs[tno] = normalise_tree( tree );
1434 tl_assert(cloff == N_LINE_ARANGE);
1435 if (CHECK_ZSM)
1436 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1437 stats__cline_normalises++;
1441 typedef struct { UChar count; SVal sval; } CountedSVal;
1443 static
1444 void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1445 /*OUT*/Word* dstUsedP,
1446 Word nDst, CacheLine* src )
1448 Word tno, cloff, dstUsed;
1450 tl_assert(nDst == N_LINE_ARANGE);
1451 dstUsed = 0;
1453 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1454 UShort descr = src->descrs[tno];
1455 SVal* tree = &src->svals[cloff];
1457 /* sequentialise the tree described by (descr,tree). */
1458 # define PUT(_n,_v) \
1459 do { dst[dstUsed ].count = (_n); \
1460 dst[dstUsed++].sval = (_v); \
1461 } while (0)
1463 /* byte 0 */
1464 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1465 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1466 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1467 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1468 /* byte 1 */
1469 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1470 /* byte 2 */
1471 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1472 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1473 /* byte 3 */
1474 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1475 /* byte 4 */
1476 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1477 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1478 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1479 /* byte 5 */
1480 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1481 /* byte 6 */
1482 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1483 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1484 /* byte 7 */
1485 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1487 # undef PUT
1488 /* END sequentialise the tree described by (descr,tree). */
1491 tl_assert(cloff == N_LINE_ARANGE);
1492 tl_assert(dstUsed <= nDst);
1494 *dstUsedP = dstUsed;
1497 /* Write the cacheline 'wix' to backing store. Where it ends up
1498 is determined by its tag field. */
1499 static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1501 Word i, j, k, m;
1502 Addr tag;
1503 SecMap* sm;
1504 CacheLine* cl;
1505 LineZ* lineZ;
1506 LineF* lineF;
1507 Word zix, fix, csvalsUsed;
1508 CountedSVal csvals[N_LINE_ARANGE];
1509 SVal sv;
1511 if (0)
1512 VG_(printf)("scache wback line %d\n", (Int)wix);
1514 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1516 tag = cache_shmem.tags0[wix];
1517 cl = &cache_shmem.lyns0[wix];
1519 /* The cache line may have been invalidated; if so, ignore it. */
1520 if (!is_valid_scache_tag(tag))
1521 return;
1523 /* Where are we going to put it? */
1524 sm = NULL;
1525 lineZ = NULL;
1526 lineF = NULL;
1527 zix = fix = -1;
1529 /* find the Z line to write in and rcdec it or the associated F
1530 line. */
1531 find_Z_for_writing( &sm, &zix, tag );
1533 tl_assert(sm);
1534 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1535 lineZ = &sm->linesZ[zix];
1537 /* Generate the data to be stored */
1538 if (CHECK_ZSM)
1539 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1541 csvalsUsed = -1;
1542 sequentialise_CacheLine( csvals, &csvalsUsed,
1543 N_LINE_ARANGE, cl );
1544 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1545 if (0) VG_(printf)("%ld ", csvalsUsed);
1547 lineZ->dict[0] = lineZ->dict[1]
1548 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1550 /* i indexes actual shadow values, k is cursor in csvals */
1551 i = 0;
1552 for (k = 0; k < csvalsUsed; k++) {
1554 sv = csvals[k].sval;
1555 if (CHECK_ZSM)
1556 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1557 /* do we already have it? */
1558 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1559 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1560 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1561 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1562 /* no. look for a free slot. */
1563 if (CHECK_ZSM)
1564 tl_assert(sv != SVal_INVALID);
1565 if (lineZ->dict[0]
1566 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1567 if (lineZ->dict[1]
1568 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1569 if (lineZ->dict[2]
1570 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1571 if (lineZ->dict[3]
1572 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1573 break; /* we'll have to use the f rep */
1574 dict_ok:
1575 m = csvals[k].count;
1576 if (m == 8) {
1577 write_twobit_array( lineZ->ix2s, i+0, j );
1578 write_twobit_array( lineZ->ix2s, i+1, j );
1579 write_twobit_array( lineZ->ix2s, i+2, j );
1580 write_twobit_array( lineZ->ix2s, i+3, j );
1581 write_twobit_array( lineZ->ix2s, i+4, j );
1582 write_twobit_array( lineZ->ix2s, i+5, j );
1583 write_twobit_array( lineZ->ix2s, i+6, j );
1584 write_twobit_array( lineZ->ix2s, i+7, j );
1585 i += 8;
1587 else if (m == 4) {
1588 write_twobit_array( lineZ->ix2s, i+0, j );
1589 write_twobit_array( lineZ->ix2s, i+1, j );
1590 write_twobit_array( lineZ->ix2s, i+2, j );
1591 write_twobit_array( lineZ->ix2s, i+3, j );
1592 i += 4;
1594 else if (m == 1) {
1595 write_twobit_array( lineZ->ix2s, i+0, j );
1596 i += 1;
1598 else if (m == 2) {
1599 write_twobit_array( lineZ->ix2s, i+0, j );
1600 write_twobit_array( lineZ->ix2s, i+1, j );
1601 i += 2;
1603 else {
1604 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1609 if (LIKELY(i == N_LINE_ARANGE)) {
1610 /* Construction of the compressed representation was
1611 successful. */
1612 rcinc_LineZ(lineZ);
1613 stats__cache_Z_wbacks++;
1614 } else {
1615 /* Cannot use the compressed(z) representation. Use the full(f)
1616 rep instead. */
1617 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1618 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1619 lineF = alloc_LineF_for_Z (lineZ);
1620 i = 0;
1621 for (k = 0; k < csvalsUsed; k++) {
1622 if (CHECK_ZSM)
1623 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1624 sv = csvals[k].sval;
1625 if (CHECK_ZSM)
1626 tl_assert(sv != SVal_INVALID);
1627 for (m = csvals[k].count; m > 0; m--) {
1628 lineF->w64s[i] = sv;
1629 i++;
1632 tl_assert(i == N_LINE_ARANGE);
1633 rcinc_LineF(lineF);
1634 stats__cache_F_wbacks++;
1638 /* Fetch the cacheline 'wix' from the backing store. The tag
1639 associated with 'wix' is assumed to have already been filled in;
1640 hence that is used to determine where in the backing store to read
1641 from. */
1642 static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1644 Word i;
1645 Addr tag;
1646 CacheLine* cl;
1647 LineZ* lineZ;
1648 LineF* lineF;
1650 if (0)
1651 VG_(printf)("scache fetch line %d\n", (Int)wix);
1653 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1655 tag = cache_shmem.tags0[wix];
1656 cl = &cache_shmem.lyns0[wix];
1658 /* reject nonsense requests */
1659 tl_assert(is_valid_scache_tag(tag));
1661 lineZ = NULL;
1662 lineF = NULL;
1663 find_ZF_for_reading( &lineZ, &lineF, tag );
1664 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1666 /* expand the data into the bottom layer of the tree, then get
1667 cacheline_normalise to build the descriptor array. */
1668 if (lineF) {
1669 for (i = 0; i < N_LINE_ARANGE; i++) {
1670 cl->svals[i] = lineF->w64s[i];
1672 stats__cache_F_fetches++;
1673 } else {
1674 for (i = 0; i < N_LINE_ARANGE; i++) {
1675 UWord ix = read_twobit_array( lineZ->ix2s, i );
1676 if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
1677 cl->svals[i] = lineZ->dict[ix];
1678 if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
1680 stats__cache_Z_fetches++;
1682 normalise_CacheLine( cl );
1685 /* Invalid the cachelines corresponding to the given range, which
1686 must start and end on a cacheline boundary. */
1687 static void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1689 Word wix;
1691 /* ga must be on a cacheline boundary. */
1692 tl_assert (is_valid_scache_tag (ga));
1693 /* szB must be a multiple of cacheline size. */
1694 tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
1697 Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1698 Word nwix = szB / N_LINE_ARANGE;
1700 if (nwix > N_WAY_NENT)
1701 nwix = N_WAY_NENT; // no need to check several times the same entry.
1703 for (wix = 0; wix < nwix; wix++) {
1704 if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1705 cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1706 ga_ix++;
1707 if (UNLIKELY(ga_ix == N_WAY_NENT))
1708 ga_ix = 0;
1713 static void shmem__flush_and_invalidate_scache ( void ) {
1714 Word wix;
1715 Addr tag;
1716 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1717 tl_assert(!is_valid_scache_tag(1));
1718 for (wix = 0; wix < N_WAY_NENT; wix++) {
1719 tag = cache_shmem.tags0[wix];
1720 if (tag == 1/*INVALID*/) {
1721 /* already invalid; nothing to do */
1722 } else {
1723 tl_assert(is_valid_scache_tag(tag));
1724 cacheline_wback( wix );
1726 cache_shmem.tags0[wix] = 1/*INVALID*/;
1728 stats__cache_flushes_invals++;
1732 static inline Bool aligned16 ( Addr a ) {
1733 return 0 == (a & 1);
1735 static inline Bool aligned32 ( Addr a ) {
1736 return 0 == (a & 3);
1738 static inline Bool aligned64 ( Addr a ) {
1739 return 0 == (a & 7);
1741 static inline UWord get_cacheline_offset ( Addr a ) {
1742 return (UWord)(a & (N_LINE_ARANGE - 1));
1744 static inline Addr cacheline_ROUNDUP ( Addr a ) {
1745 return ROUNDUP(a, N_LINE_ARANGE);
1747 static inline Addr cacheline_ROUNDDN ( Addr a ) {
1748 return ROUNDDN(a, N_LINE_ARANGE);
1750 static inline UWord get_treeno ( Addr a ) {
1751 return get_cacheline_offset(a) >> 3;
1753 static inline UWord get_tree_offset ( Addr a ) {
1754 return a & 7;
1757 static __attribute__((noinline))
1758 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1759 static inline CacheLine* get_cacheline ( Addr a )
1761 /* tag is 'a' with the in-line offset masked out,
1762 eg a[31]..a[4] 0000 */
1763 Addr tag = a & ~(N_LINE_ARANGE - 1);
1764 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1765 stats__cache_totrefs++;
1766 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1767 return &cache_shmem.lyns0[wix];
1768 } else {
1769 return get_cacheline_MISS( a );
1773 static __attribute__((noinline))
1774 CacheLine* get_cacheline_MISS ( Addr a )
1776 /* tag is 'a' with the in-line offset masked out,
1777 eg a[31]..a[4] 0000 */
1779 CacheLine* cl;
1780 Addr* tag_old_p;
1781 Addr tag = a & ~(N_LINE_ARANGE - 1);
1782 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1784 tl_assert(tag != cache_shmem.tags0[wix]);
1786 /* Dump the old line into the backing store. */
1787 stats__cache_totmisses++;
1789 cl = &cache_shmem.lyns0[wix];
1790 tag_old_p = &cache_shmem.tags0[wix];
1792 if (is_valid_scache_tag( *tag_old_p )) {
1793 /* EXPENSIVE and REDUNDANT: callee does it */
1794 if (CHECK_ZSM)
1795 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1796 cacheline_wback( wix );
1798 /* and reload the new one */
1799 *tag_old_p = tag;
1800 cacheline_fetch( wix );
1801 if (CHECK_ZSM)
1802 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1803 return cl;
1806 static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1807 stats__cline_64to32pulldown++;
1808 switch (toff) {
1809 case 0: case 4:
1810 tl_assert(descr & TREE_DESCR_64);
1811 tree[4] = tree[0];
1812 descr &= ~TREE_DESCR_64;
1813 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1814 break;
1815 default:
1816 tl_assert(0);
1818 return descr;
1821 static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1822 stats__cline_32to16pulldown++;
1823 switch (toff) {
1824 case 0: case 2:
1825 if (!(descr & TREE_DESCR_32_0)) {
1826 descr = pulldown_to_32(tree, 0, descr);
1828 tl_assert(descr & TREE_DESCR_32_0);
1829 tree[2] = tree[0];
1830 descr &= ~TREE_DESCR_32_0;
1831 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1832 break;
1833 case 4: case 6:
1834 if (!(descr & TREE_DESCR_32_1)) {
1835 descr = pulldown_to_32(tree, 4, descr);
1837 tl_assert(descr & TREE_DESCR_32_1);
1838 tree[6] = tree[4];
1839 descr &= ~TREE_DESCR_32_1;
1840 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1841 break;
1842 default:
1843 tl_assert(0);
1845 return descr;
1848 static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1849 stats__cline_16to8pulldown++;
1850 switch (toff) {
1851 case 0: case 1:
1852 if (!(descr & TREE_DESCR_16_0)) {
1853 descr = pulldown_to_16(tree, 0, descr);
1855 tl_assert(descr & TREE_DESCR_16_0);
1856 tree[1] = tree[0];
1857 descr &= ~TREE_DESCR_16_0;
1858 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1859 break;
1860 case 2: case 3:
1861 if (!(descr & TREE_DESCR_16_1)) {
1862 descr = pulldown_to_16(tree, 2, descr);
1864 tl_assert(descr & TREE_DESCR_16_1);
1865 tree[3] = tree[2];
1866 descr &= ~TREE_DESCR_16_1;
1867 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1868 break;
1869 case 4: case 5:
1870 if (!(descr & TREE_DESCR_16_2)) {
1871 descr = pulldown_to_16(tree, 4, descr);
1873 tl_assert(descr & TREE_DESCR_16_2);
1874 tree[5] = tree[4];
1875 descr &= ~TREE_DESCR_16_2;
1876 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1877 break;
1878 case 6: case 7:
1879 if (!(descr & TREE_DESCR_16_3)) {
1880 descr = pulldown_to_16(tree, 6, descr);
1882 tl_assert(descr & TREE_DESCR_16_3);
1883 tree[7] = tree[6];
1884 descr &= ~TREE_DESCR_16_3;
1885 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1886 break;
1887 default:
1888 tl_assert(0);
1890 return descr;
1894 static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1895 UShort mask;
1896 switch (toff) {
1897 case 0:
1898 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1899 tl_assert( (descr & mask) == mask );
1900 descr &= ~mask;
1901 descr |= TREE_DESCR_16_0;
1902 break;
1903 case 2:
1904 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1905 tl_assert( (descr & mask) == mask );
1906 descr &= ~mask;
1907 descr |= TREE_DESCR_16_1;
1908 break;
1909 case 4:
1910 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1911 tl_assert( (descr & mask) == mask );
1912 descr &= ~mask;
1913 descr |= TREE_DESCR_16_2;
1914 break;
1915 case 6:
1916 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1917 tl_assert( (descr & mask) == mask );
1918 descr &= ~mask;
1919 descr |= TREE_DESCR_16_3;
1920 break;
1921 default:
1922 tl_assert(0);
1924 return descr;
1927 static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1928 UShort mask;
1929 switch (toff) {
1930 case 0:
1931 if (!(descr & TREE_DESCR_16_0))
1932 descr = pullup_descr_to_16(descr, 0);
1933 if (!(descr & TREE_DESCR_16_1))
1934 descr = pullup_descr_to_16(descr, 2);
1935 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1936 tl_assert( (descr & mask) == mask );
1937 descr &= ~mask;
1938 descr |= TREE_DESCR_32_0;
1939 break;
1940 case 4:
1941 if (!(descr & TREE_DESCR_16_2))
1942 descr = pullup_descr_to_16(descr, 4);
1943 if (!(descr & TREE_DESCR_16_3))
1944 descr = pullup_descr_to_16(descr, 6);
1945 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1946 tl_assert( (descr & mask) == mask );
1947 descr &= ~mask;
1948 descr |= TREE_DESCR_32_1;
1949 break;
1950 default:
1951 tl_assert(0);
1953 return descr;
1956 static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1957 switch (toff) {
1958 case 0: case 4:
1959 return 0 != (descr & TREE_DESCR_64);
1960 default:
1961 tl_assert(0);
1965 static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1966 switch (toff) {
1967 case 0:
1968 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1969 case 2:
1970 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1971 case 4:
1972 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1973 case 6:
1974 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1975 default:
1976 tl_assert(0);
1980 /* ------------ Cache management ------------ */
1982 static void zsm_flush_cache ( void )
1984 shmem__flush_and_invalidate_scache();
1988 static void zsm_init ( void )
1990 tl_assert( sizeof(UWord) == sizeof(Addr) );
1992 tl_assert(map_shmem == NULL);
1993 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1994 HG_(free),
1995 NULL/*unboxed UWord cmp*/);
1996 /* Invalidate all cache entries. */
1997 tl_assert(!is_valid_scache_tag(1));
1998 for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1999 cache_shmem.tags0[wix] = 1/*INVALID*/;
2002 LineF_pool_allocator = VG_(newPA) (
2003 sizeof(LineF),
2004 /* Nr elements/pool to fill a core arena block
2005 taking some arena overhead into account. */
2006 (4 * 1024 * 1024 - 200)/sizeof(LineF),
2007 HG_(zalloc),
2008 "libhb.LineF_storage.pool",
2009 HG_(free)
2012 /* a SecMap must contain an integral number of CacheLines */
2013 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
2014 /* also ... a CacheLine holds an integral number of trees */
2015 tl_assert(0 == (N_LINE_ARANGE % 8));
2018 /////////////////////////////////////////////////////////////////
2019 /////////////////////////////////////////////////////////////////
2020 // //
2021 // SECTION END compressed shadow memory //
2022 // //
2023 /////////////////////////////////////////////////////////////////
2024 /////////////////////////////////////////////////////////////////
2028 /////////////////////////////////////////////////////////////////
2029 /////////////////////////////////////////////////////////////////
2030 // //
2031 // SECTION BEGIN vts primitives //
2032 // //
2033 /////////////////////////////////////////////////////////////////
2034 /////////////////////////////////////////////////////////////////
2037 /* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
2038 being compact stand-ins for Thr*'s. Use these functions to map
2039 between them. */
2040 static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
2041 static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
2043 __attribute__((noreturn))
2044 static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
2046 if (due_to_nThrs) {
2047 const HChar* s =
2048 "\n"
2049 "Helgrind: cannot continue, run aborted: too many threads.\n"
2050 "Sorry. Helgrind can only handle programs that create\n"
2051 "%'llu or fewer threads over their entire lifetime.\n"
2052 "\n";
2053 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
2054 } else {
2055 const HChar* s =
2056 "\n"
2057 "Helgrind: cannot continue, run aborted: too many\n"
2058 "synchronisation events. Sorry. Helgrind can only handle\n"
2059 "programs which perform %'llu or fewer\n"
2060 "inter-thread synchronisation events (locks, unlocks, etc).\n"
2061 "\n";
2062 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
2064 VG_(exit)(1);
2065 /*NOTREACHED*/
2066 tl_assert(0); /*wtf?!*/
2070 /* The dead thread (ThrID, actually) tables. A thread may only be
2071 listed here if we have been notified thereof by libhb_async_exit.
2072 New entries are added at the end. The order isn't important, but
2073 the ThrID values must be unique.
2074 verydead_thread_table_not_pruned lists the identity of the threads
2075 that died since the previous round of pruning.
2076 Once pruning is done, these ThrID are added in verydead_thread_table.
2077 We don't actually need to keep the set of threads that have ever died --
2078 only the threads that have died since the previous round of
2079 pruning. But it's useful for sanity check purposes to keep the
2080 entire set, so we do. */
2081 static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
2082 static XArray* /* of ThrID */ verydead_thread_table = NULL;
2084 /* Arbitrary total ordering on ThrIDs. */
2085 static Int cmp__ThrID ( const void* v1, const void* v2 ) {
2086 ThrID id1 = *(const ThrID*)v1;
2087 ThrID id2 = *(const ThrID*)v2;
2088 if (id1 < id2) return -1;
2089 if (id1 > id2) return 1;
2090 return 0;
2093 static void verydead_thread_tables_init ( void )
2095 tl_assert(!verydead_thread_table);
2096 tl_assert(!verydead_thread_table_not_pruned);
2097 verydead_thread_table
2098 = VG_(newXA)( HG_(zalloc),
2099 "libhb.verydead_thread_table_init.1",
2100 HG_(free), sizeof(ThrID) );
2101 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
2102 verydead_thread_table_not_pruned
2103 = VG_(newXA)( HG_(zalloc),
2104 "libhb.verydead_thread_table_init.2",
2105 HG_(free), sizeof(ThrID) );
2106 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
2109 static void verydead_thread_table_sort_and_check (XArray* thrids)
2111 UWord i;
2113 VG_(sortXA)( thrids );
2114 /* Sanity check: check for unique .sts.thr values. */
2115 UWord nBT = VG_(sizeXA)( thrids );
2116 if (nBT > 0) {
2117 ThrID thrid1, thrid2;
2118 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2119 for (i = 1; i < nBT; i++) {
2120 thrid1 = thrid2;
2121 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2122 tl_assert(thrid1 < thrid2);
2125 /* Ok, so the dead thread table thrids has unique and in-order keys. */
2128 /* A VTS contains .ts, its vector clock, and also .id, a field to hold
2129 a backlink for the caller's convenience. Since we have no idea
2130 what to set that to in the library, it always gets set to
2131 VtsID_INVALID. */
2132 typedef
2133 struct {
2134 VtsID id;
2135 UInt usedTS;
2136 UInt sizeTS;
2137 ScalarTS ts[0];
2139 VTS;
2141 /* Allocate a VTS capable of storing 'sizeTS' entries. */
2142 static VTS* VTS__new ( const HChar* who, UInt sizeTS );
2144 /* Make a clone of 'vts', sizing the new array to exactly match the
2145 number of ScalarTSs present. */
2146 static VTS* VTS__clone ( const HChar* who, VTS* vts );
2148 /* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
2149 array is sized exactly to hold the number of required elements.
2150 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2151 must be in strictly increasing order. */
2152 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
2154 /* Delete this VTS in its entirety. */
2155 static void VTS__delete ( VTS* vts );
2157 /* Create a new singleton VTS in 'out'. Caller must have
2158 pre-allocated 'out' sufficiently big to hold the result in all
2159 possible cases. */
2160 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
2162 /* Create in 'out' a VTS which is the same as 'vts' except with
2163 vts[me]++, so to speak. Caller must have pre-allocated 'out'
2164 sufficiently big to hold the result in all possible cases. */
2165 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
2167 /* Create in 'out' a VTS which is the join (max) of 'a' and
2168 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
2169 the result in all possible cases. */
2170 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
2172 /* Compute the partial ordering relation of the two args. Although we
2173 could be completely general and return an enumeration value (EQ,
2174 LT, GT, UN), in fact we only need LEQ, and so we may as well
2175 hardwire that fact.
2177 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2178 invald ThrID). In the latter case, the returned ThrID indicates
2179 the discovered point for which they are not. There may be more
2180 than one such point, but we only care about seeing one of them, not
2181 all of them. This rather strange convention is used because
2182 sometimes we want to know the actual index at which they first
2183 differ. */
2184 static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
2186 /* Compute an arbitrary structural (total) ordering on the two args,
2187 based on their VCs, so they can be looked up in a table, tree, etc.
2188 Returns -1, 0 or 1. */
2189 static Word VTS__cmp_structural ( VTS* a, VTS* b );
2191 /* Debugging only. Display the given VTS. */
2192 static void VTS__show ( const VTS* vts );
2194 /* Debugging only. Return vts[index], so to speak. */
2195 static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
2197 /* Notify the VTS machinery that a thread has been declared
2198 comprehensively dead: that is, it has done an async exit AND it has
2199 been joined with. This should ensure that its local clocks (.viR
2200 and .viW) will never again change, and so all mentions of this
2201 thread from all VTSs in the system may be removed. */
2202 static void VTS__declare_thread_very_dead ( Thr* idx );
2204 /*--------------- to do with Vector Timestamps ---------------*/
2206 static Bool is_sane_VTS ( VTS* vts )
2208 UWord i, n;
2209 ScalarTS *st1, *st2;
2210 if (!vts) return False;
2211 if (vts->usedTS > vts->sizeTS) return False;
2212 n = vts->usedTS;
2213 if (n == 1) {
2214 st1 = &vts->ts[0];
2215 if (st1->tym == 0)
2216 return False;
2218 else
2219 if (n >= 2) {
2220 for (i = 0; i < n-1; i++) {
2221 st1 = &vts->ts[i];
2222 st2 = &vts->ts[i+1];
2223 if (st1->thrid >= st2->thrid)
2224 return False;
2225 if (st1->tym == 0 || st2->tym == 0)
2226 return False;
2229 return True;
2233 /* Create a new, empty VTS.
2235 static VTS* VTS__new ( const HChar* who, UInt sizeTS )
2237 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
2238 tl_assert(vts->usedTS == 0);
2239 vts->sizeTS = sizeTS;
2240 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
2241 return vts;
2244 /* Clone this VTS.
2246 static VTS* VTS__clone ( const HChar* who, VTS* vts )
2248 tl_assert(vts);
2249 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2250 UInt nTS = vts->usedTS;
2251 VTS* clone = VTS__new(who, nTS);
2252 clone->id = vts->id;
2253 clone->sizeTS = nTS;
2254 clone->usedTS = nTS;
2255 UInt i;
2256 for (i = 0; i < nTS; i++) {
2257 clone->ts[i] = vts->ts[i];
2259 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2260 return clone;
2264 /* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2265 must be in strictly increasing order. We could obviously do this
2266 much more efficiently (in linear time) if necessary.
2268 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
2270 UInt i, j;
2271 tl_assert(vts);
2272 tl_assert(thridsToDel);
2273 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2274 UInt nTS = vts->usedTS;
2275 /* Figure out how many ScalarTSs will remain in the output. */
2276 UInt nReq = nTS;
2277 for (i = 0; i < nTS; i++) {
2278 ThrID thrid = vts->ts[i].thrid;
2279 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2280 nReq--;
2282 tl_assert(nReq <= nTS);
2283 /* Copy the ones that will remain. */
2284 VTS* res = VTS__new(who, nReq);
2285 j = 0;
2286 for (i = 0; i < nTS; i++) {
2287 ThrID thrid = vts->ts[i].thrid;
2288 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2289 continue;
2290 res->ts[j++] = vts->ts[i];
2292 tl_assert(j == nReq);
2293 tl_assert(j == res->sizeTS);
2294 res->usedTS = j;
2295 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2296 return res;
2300 /* Delete this VTS in its entirety.
2302 static void VTS__delete ( VTS* vts )
2304 tl_assert(vts);
2305 tl_assert(vts->usedTS <= vts->sizeTS);
2306 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2307 HG_(free)(vts);
2311 /* Create a new singleton VTS.
2313 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2315 tl_assert(thr);
2316 tl_assert(tym >= 1);
2317 tl_assert(out);
2318 tl_assert(out->usedTS == 0);
2319 tl_assert(out->sizeTS >= 1);
2320 UInt hi = out->usedTS++;
2321 out->ts[hi].thrid = Thr__to_ThrID(thr);
2322 out->ts[hi].tym = tym;
2326 /* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2327 not modified.
2329 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
2331 UInt i, n;
2332 ThrID me_thrid;
2333 Bool found = False;
2335 stats__vts__tick++;
2337 tl_assert(out);
2338 tl_assert(out->usedTS == 0);
2339 if (vts->usedTS >= ThrID_MAX_VALID)
2340 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2341 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2343 tl_assert(me);
2344 me_thrid = Thr__to_ThrID(me);
2345 tl_assert(is_sane_VTS(vts));
2346 n = vts->usedTS;
2348 /* Copy all entries which precede 'me'. */
2349 for (i = 0; i < n; i++) {
2350 ScalarTS* here = &vts->ts[i];
2351 if (UNLIKELY(here->thrid >= me_thrid))
2352 break;
2353 UInt hi = out->usedTS++;
2354 out->ts[hi] = *here;
2357 /* 'i' now indicates the next entry to copy, if any.
2358 There are 3 possibilities:
2359 (a) there is no next entry (we used them all up already):
2360 add (me_thrid,1) to the output, and quit
2361 (b) there is a next entry, and its thrid > me_thrid:
2362 add (me_thrid,1) to the output, then copy the remaining entries
2363 (c) there is a next entry, and its thrid == me_thrid:
2364 copy it to the output but increment its timestamp value.
2365 Then copy the remaining entries. (c) is the common case.
2367 tl_assert(i >= 0 && i <= n);
2368 if (i == n) { /* case (a) */
2369 UInt hi = out->usedTS++;
2370 out->ts[hi].thrid = me_thrid;
2371 out->ts[hi].tym = 1;
2372 } else {
2373 /* cases (b) and (c) */
2374 ScalarTS* here = &vts->ts[i];
2375 if (me_thrid == here->thrid) { /* case (c) */
2376 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
2377 /* We're hosed. We have to stop. */
2378 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2380 UInt hi = out->usedTS++;
2381 out->ts[hi].thrid = here->thrid;
2382 out->ts[hi].tym = here->tym + 1;
2383 i++;
2384 found = True;
2385 } else { /* case (b) */
2386 UInt hi = out->usedTS++;
2387 out->ts[hi].thrid = me_thrid;
2388 out->ts[hi].tym = 1;
2390 /* And copy any remaining entries. */
2391 for (/*keepgoing*/; i < n; i++) {
2392 ScalarTS* here2 = &vts->ts[i];
2393 UInt hi = out->usedTS++;
2394 out->ts[hi] = *here2;
2398 tl_assert(is_sane_VTS(out));
2399 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2400 tl_assert(out->usedTS <= out->sizeTS);
2404 /* Return a new VTS constructed as the join (max) of the 2 args.
2405 Neither arg is modified.
2407 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
2409 UInt ia, ib, useda, usedb;
2410 ULong tyma, tymb, tymMax;
2411 ThrID thrid;
2412 UInt ncommon = 0;
2414 stats__vts__join++;
2416 tl_assert(a);
2417 tl_assert(b);
2418 useda = a->usedTS;
2419 usedb = b->usedTS;
2421 tl_assert(out);
2422 tl_assert(out->usedTS == 0);
2423 /* overly conservative test, but doing better involves comparing
2424 the two VTSs, which we don't want to do at this point. */
2425 if (useda + usedb >= ThrID_MAX_VALID)
2426 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2427 tl_assert(out->sizeTS >= useda + usedb);
2429 ia = ib = 0;
2431 while (1) {
2433 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2434 from a and b in order, where thrid is the next ThrID
2435 occurring in either a or b, and tyma/b are the relevant
2436 scalar timestamps, taking into account implicit zeroes. */
2437 tl_assert(ia >= 0 && ia <= useda);
2438 tl_assert(ib >= 0 && ib <= usedb);
2440 if (ia == useda && ib == usedb) {
2441 /* both empty - done */
2442 break;
2444 } else if (ia == useda && ib != usedb) {
2445 /* a empty, use up b */
2446 ScalarTS* tmpb = &b->ts[ib];
2447 thrid = tmpb->thrid;
2448 tyma = 0;
2449 tymb = tmpb->tym;
2450 ib++;
2452 } else if (ia != useda && ib == usedb) {
2453 /* b empty, use up a */
2454 ScalarTS* tmpa = &a->ts[ia];
2455 thrid = tmpa->thrid;
2456 tyma = tmpa->tym;
2457 tymb = 0;
2458 ia++;
2460 } else {
2461 /* both not empty; extract lowest-ThrID'd triple */
2462 ScalarTS* tmpa = &a->ts[ia];
2463 ScalarTS* tmpb = &b->ts[ib];
2464 if (tmpa->thrid < tmpb->thrid) {
2465 /* a has the lowest unconsidered ThrID */
2466 thrid = tmpa->thrid;
2467 tyma = tmpa->tym;
2468 tymb = 0;
2469 ia++;
2470 } else if (tmpa->thrid > tmpb->thrid) {
2471 /* b has the lowest unconsidered ThrID */
2472 thrid = tmpb->thrid;
2473 tyma = 0;
2474 tymb = tmpb->tym;
2475 ib++;
2476 } else {
2477 /* they both next mention the same ThrID */
2478 tl_assert(tmpa->thrid == tmpb->thrid);
2479 thrid = tmpa->thrid; /* == tmpb->thrid */
2480 tyma = tmpa->tym;
2481 tymb = tmpb->tym;
2482 ia++;
2483 ib++;
2484 ncommon++;
2488 /* having laboriously determined (thr, tyma, tymb), do something
2489 useful with it. */
2490 tymMax = tyma > tymb ? tyma : tymb;
2491 if (tymMax > 0) {
2492 UInt hi = out->usedTS++;
2493 out->ts[hi].thrid = thrid;
2494 out->ts[hi].tym = tymMax;
2499 tl_assert(is_sane_VTS(out));
2500 tl_assert(out->usedTS <= out->sizeTS);
2501 tl_assert(out->usedTS == useda + usedb - ncommon);
2505 /* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2506 they are, or the first ThrID for which they are not (no valid ThrID
2507 has the value zero). This rather strange convention is used
2508 because sometimes we want to know the actual index at which they
2509 first differ. */
2510 static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
2512 Word ia, ib, useda, usedb;
2513 ULong tyma, tymb;
2515 stats__vts__cmpLEQ++;
2517 tl_assert(a);
2518 tl_assert(b);
2519 useda = a->usedTS;
2520 usedb = b->usedTS;
2522 ia = ib = 0;
2524 while (1) {
2526 /* This logic is to enumerate doubles (tyma, tymb) drawn
2527 from a and b in order, and tyma/b are the relevant
2528 scalar timestamps, taking into account implicit zeroes. */
2529 ThrID thrid;
2531 tl_assert(ia >= 0 && ia <= useda);
2532 tl_assert(ib >= 0 && ib <= usedb);
2534 if (ia == useda && ib == usedb) {
2535 /* both empty - done */
2536 break;
2538 } else if (ia == useda && ib != usedb) {
2539 /* a empty, use up b */
2540 ScalarTS* tmpb = &b->ts[ib];
2541 tyma = 0;
2542 tymb = tmpb->tym;
2543 thrid = tmpb->thrid;
2544 ib++;
2546 } else if (ia != useda && ib == usedb) {
2547 /* b empty, use up a */
2548 ScalarTS* tmpa = &a->ts[ia];
2549 tyma = tmpa->tym;
2550 thrid = tmpa->thrid;
2551 tymb = 0;
2552 ia++;
2554 } else {
2555 /* both not empty; extract lowest-ThrID'd triple */
2556 ScalarTS* tmpa = &a->ts[ia];
2557 ScalarTS* tmpb = &b->ts[ib];
2558 if (tmpa->thrid < tmpb->thrid) {
2559 /* a has the lowest unconsidered ThrID */
2560 tyma = tmpa->tym;
2561 thrid = tmpa->thrid;
2562 tymb = 0;
2563 ia++;
2565 else
2566 if (tmpa->thrid > tmpb->thrid) {
2567 /* b has the lowest unconsidered ThrID */
2568 tyma = 0;
2569 tymb = tmpb->tym;
2570 thrid = tmpb->thrid;
2571 ib++;
2572 } else {
2573 /* they both next mention the same ThrID */
2574 tl_assert(tmpa->thrid == tmpb->thrid);
2575 tyma = tmpa->tym;
2576 thrid = tmpa->thrid;
2577 tymb = tmpb->tym;
2578 ia++;
2579 ib++;
2583 /* having laboriously determined (tyma, tymb), do something
2584 useful with it. */
2585 if (tyma > tymb) {
2586 /* not LEQ at this index. Quit, since the answer is
2587 determined already. */
2588 tl_assert(thrid >= 1024);
2589 return thrid;
2593 return 0; /* all points are LEQ => return an invalid ThrID */
2597 /* Compute an arbitrary structural (total) ordering on the two args,
2598 based on their VCs, so they can be looked up in a table, tree, etc.
2599 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2600 performance critical so there is some effort expended to make it sa
2601 fast as possible.
2603 Word VTS__cmp_structural ( VTS* a, VTS* b )
2605 /* We just need to generate an arbitrary total ordering based on
2606 a->ts and b->ts. Preferably do it in a way which comes across likely
2607 differences relatively quickly. */
2608 Word i;
2609 Word useda = 0, usedb = 0;
2610 ScalarTS *ctsa = NULL, *ctsb = NULL;
2612 stats__vts__cmp_structural++;
2614 tl_assert(a);
2615 tl_assert(b);
2617 ctsa = &a->ts[0]; useda = a->usedTS;
2618 ctsb = &b->ts[0]; usedb = b->usedTS;
2620 if (LIKELY(useda == usedb)) {
2621 ScalarTS *tmpa = NULL, *tmpb = NULL;
2622 stats__vts__cmp_structural_slow++;
2623 /* Same length vectors. Find the first difference, if any, as
2624 fast as possible. */
2625 for (i = 0; i < useda; i++) {
2626 tmpa = &ctsa[i];
2627 tmpb = &ctsb[i];
2628 if (LIKELY(tmpa->tym == tmpb->tym
2629 && tmpa->thrid == tmpb->thrid))
2630 continue;
2631 else
2632 break;
2634 if (UNLIKELY(i == useda)) {
2635 /* They're identical. */
2636 return 0;
2637 } else {
2638 tl_assert(i >= 0 && i < useda);
2639 if (tmpa->tym < tmpb->tym) return -1;
2640 if (tmpa->tym > tmpb->tym) return 1;
2641 if (tmpa->thrid < tmpb->thrid) return -1;
2642 if (tmpa->thrid > tmpb->thrid) return 1;
2643 /* we just established them as non-identical, hence: */
2645 /*NOTREACHED*/
2646 tl_assert(0);
2649 if (useda < usedb) return -1;
2650 if (useda > usedb) return 1;
2651 /*NOTREACHED*/
2652 tl_assert(0);
2656 /* Debugging only. Display the given VTS.
2658 static void VTS__show ( const VTS* vts )
2660 Word i, n;
2661 tl_assert(vts);
2663 VG_(printf)("[");
2664 n = vts->usedTS;
2665 for (i = 0; i < n; i++) {
2666 const ScalarTS *st = &vts->ts[i];
2667 VG_(printf)(i < n-1 ? "%d:%llu " : "%d:%llu", st->thrid, (ULong)st->tym);
2669 VG_(printf)("]");
2673 /* Debugging only. Return vts[index], so to speak.
2675 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2677 UWord i, n;
2678 ThrID idx_thrid = Thr__to_ThrID(idx);
2679 stats__vts__indexat_slow++;
2680 tl_assert(vts);
2681 n = vts->usedTS;
2682 for (i = 0; i < n; i++) {
2683 ScalarTS* st = &vts->ts[i];
2684 if (st->thrid == idx_thrid)
2685 return st->tym;
2687 return 0;
2691 /* See comment on prototype above.
2693 static void VTS__declare_thread_very_dead ( Thr* thr )
2695 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2697 tl_assert(thr->llexit_done);
2698 tl_assert(thr->joinedwith_done);
2700 ThrID nyu;
2701 nyu = Thr__to_ThrID(thr);
2702 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
2704 /* We can only get here if we're assured that we'll never again
2705 need to look at this thread's ::viR or ::viW. Set them to
2706 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2707 mostly so that we don't wind up pruning them (as that would be
2708 nonsensical: the only interesting ScalarTS entry for a dead
2709 thread is its own index, and the pruning will remove that.). */
2710 VtsID__rcdec(thr->viR);
2711 VtsID__rcdec(thr->viW);
2712 thr->viR = VtsID_INVALID;
2713 thr->viW = VtsID_INVALID;
2717 /////////////////////////////////////////////////////////////////
2718 /////////////////////////////////////////////////////////////////
2719 // //
2720 // SECTION END vts primitives //
2721 // //
2722 /////////////////////////////////////////////////////////////////
2723 /////////////////////////////////////////////////////////////////
2727 /////////////////////////////////////////////////////////////////
2728 /////////////////////////////////////////////////////////////////
2729 // //
2730 // SECTION BEGIN main library //
2731 // //
2732 /////////////////////////////////////////////////////////////////
2733 /////////////////////////////////////////////////////////////////
2736 /////////////////////////////////////////////////////////
2737 // //
2738 // VTS set //
2739 // //
2740 /////////////////////////////////////////////////////////
2742 static WordFM* /* WordFM VTS* void */ vts_set = NULL;
2744 static void vts_set_init ( void )
2746 tl_assert(!vts_set);
2747 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2748 HG_(free),
2749 (Word(*)(UWord,UWord))VTS__cmp_structural );
2752 /* Given a VTS, look in vts_set to see if we already have a
2753 structurally identical one. If yes, return the pair (True, pointer
2754 to the existing one). If no, clone this one, add the clone to the
2755 set, and return (False, pointer to the clone). */
2756 static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
2758 UWord keyW, valW;
2759 stats__vts_set__focaa++;
2760 tl_assert(cand->id == VtsID_INVALID);
2761 /* lookup cand (by value) */
2762 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2763 /* found it */
2764 tl_assert(valW == 0);
2765 /* if this fails, cand (by ref) was already present (!) */
2766 tl_assert(keyW != (UWord)cand);
2767 *res = (VTS*)keyW;
2768 return True;
2769 } else {
2770 /* not present. Clone, add and return address of clone. */
2771 stats__vts_set__focaa_a++;
2772 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2773 tl_assert(clone != cand);
2774 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2775 *res = clone;
2776 return False;
2781 /////////////////////////////////////////////////////////
2782 // //
2783 // VTS table //
2784 // //
2785 /////////////////////////////////////////////////////////
2787 static void VtsID__invalidate_caches ( void ); /* fwds */
2789 /* A type to hold VTS table entries. Invariants:
2790 If .vts == NULL, then this entry is not in use, so:
2791 - .rc == 0
2792 - this entry is on the freelist (unfortunately, does not imply
2793 any constraints on value for u.freelink)
2794 If .vts != NULL, then this entry is in use:
2795 - .vts is findable in vts_set
2796 - .vts->id == this entry number
2797 - no specific value for .rc (even 0 is OK)
2798 - this entry is not on freelist, so u.freelink == VtsID_INVALID
2800 typedef
2801 struct {
2802 VTS* vts; /* vts, in vts_set */
2803 UWord rc; /* reference count - enough for entire aspace */
2804 union {
2805 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2806 VtsID remap; /* used only during pruning, for used entries */
2807 } u;
2808 /* u.freelink only used when vts == NULL,
2809 u.remap only used when vts != NULL, during pruning. */
2811 VtsTE;
2813 /* The VTS table. */
2814 static XArray* /* of VtsTE */ vts_tab = NULL;
2816 /* An index into the VTS table, indicating the start of the list of
2817 free (available for use) entries. If the list is empty, this is
2818 VtsID_INVALID. */
2819 static VtsID vts_tab_freelist = VtsID_INVALID;
2821 /* Do a GC of vts_tab when the freelist becomes empty AND the size of
2822 vts_tab equals or exceeds this size. After GC, the value here is
2823 set appropriately so as to check for the next GC point. */
2824 static Word vts_next_GC_at = 1000;
2826 static void vts_tab_init ( void )
2828 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2829 HG_(free), sizeof(VtsTE) );
2830 vts_tab_freelist = VtsID_INVALID;
2833 /* Add ii to the free list, checking that it looks out-of-use. */
2834 static void add_to_free_list ( VtsID ii )
2836 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2837 tl_assert(ie->vts == NULL);
2838 tl_assert(ie->rc == 0);
2839 tl_assert(ie->u.freelink == VtsID_INVALID);
2840 ie->u.freelink = vts_tab_freelist;
2841 vts_tab_freelist = ii;
2844 /* Get an entry from the free list. This will return VtsID_INVALID if
2845 the free list is empty. */
2846 static VtsID get_from_free_list ( void )
2848 VtsID ii;
2849 VtsTE* ie;
2850 if (vts_tab_freelist == VtsID_INVALID)
2851 return VtsID_INVALID;
2852 ii = vts_tab_freelist;
2853 ie = VG_(indexXA)( vts_tab, ii );
2854 tl_assert(ie->vts == NULL);
2855 tl_assert(ie->rc == 0);
2856 vts_tab_freelist = ie->u.freelink;
2857 return ii;
2860 /* Produce a new VtsID that can be used, either by getting it from
2861 the freelist, or, if that is empty, by expanding vts_tab. */
2862 static VtsID get_new_VtsID ( void )
2864 VtsID ii;
2865 VtsTE te;
2866 ii = get_from_free_list();
2867 if (ii != VtsID_INVALID)
2868 return ii;
2869 te.vts = NULL;
2870 te.rc = 0;
2871 te.u.freelink = VtsID_INVALID;
2872 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2873 return ii;
2877 /* Indirect callback from lib_zsm. */
2878 static void VtsID__rcinc ( VtsID ii )
2880 VtsTE* ie;
2881 /* VG_(indexXA) does a range check for us */
2882 ie = VG_(indexXA)( vts_tab, ii );
2883 tl_assert(ie->vts); /* else it's not in use */
2884 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2885 tl_assert(ie->vts->id == ii);
2886 ie->rc++;
2889 /* Indirect callback from lib_zsm. */
2890 static void VtsID__rcdec ( VtsID ii )
2892 VtsTE* ie;
2893 /* VG_(indexXA) does a range check for us */
2894 ie = VG_(indexXA)( vts_tab, ii );
2895 tl_assert(ie->vts); /* else it's not in use */
2896 tl_assert(ie->rc > 0); /* else RC snafu */
2897 tl_assert(ie->vts->id == ii);
2898 ie->rc--;
2902 /* Look up 'cand' in our collection of VTSs. If present, return the
2903 VtsID for the pre-existing version. If not present, clone it, add
2904 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2905 it, and return that. */
2906 static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
2908 VTS* in_tab = NULL;
2909 tl_assert(cand->id == VtsID_INVALID);
2910 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2911 tl_assert(in_tab);
2912 if (already_have) {
2913 /* We already have a copy of 'cand'. Use that. */
2914 VtsTE* ie;
2915 tl_assert(in_tab->id != VtsID_INVALID);
2916 ie = VG_(indexXA)( vts_tab, in_tab->id );
2917 tl_assert(ie->vts == in_tab);
2918 return in_tab->id;
2919 } else {
2920 VtsID ii = get_new_VtsID();
2921 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2922 ie->vts = in_tab;
2923 ie->rc = 0;
2924 ie->u.freelink = VtsID_INVALID;
2925 in_tab->id = ii;
2926 return ii;
2931 static void show_vts_stats ( const HChar* caller )
2933 UWord nSet, nTab, nLive;
2934 ULong totrc;
2935 UWord n, i;
2936 nSet = VG_(sizeFM)( vts_set );
2937 nTab = VG_(sizeXA)( vts_tab );
2938 totrc = 0;
2939 nLive = 0;
2940 n = VG_(sizeXA)( vts_tab );
2941 for (i = 0; i < n; i++) {
2942 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2943 if (ie->vts) {
2944 nLive++;
2945 totrc += (ULong)ie->rc;
2946 } else {
2947 tl_assert(ie->rc == 0);
2950 VG_(printf)(" show_vts_stats %s\n", caller);
2951 VG_(printf)(" vts_tab size %4lu\n", nTab);
2952 VG_(printf)(" vts_tab live %4lu\n", nLive);
2953 VG_(printf)(" vts_set size %4lu\n", nSet);
2954 VG_(printf)(" total rc %4llu\n", totrc);
2958 /* --- Helpers for VtsID pruning --- */
2960 static
2961 void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2962 /*MOD*/XArray* /* of VtsTE */ new_tab,
2963 VtsID* ii )
2965 VtsTE *old_te, *new_te;
2966 VtsID old_id, new_id;
2967 /* We're relying here on VG_(indexXA)'s range checking to assert on
2968 any stupid values, in particular *ii == VtsID_INVALID. */
2969 old_id = *ii;
2970 old_te = VG_(indexXA)( old_tab, old_id );
2971 old_te->rc--;
2972 new_id = old_te->u.remap;
2973 new_te = VG_(indexXA)( new_tab, new_id );
2974 new_te->rc++;
2975 *ii = new_id;
2978 static
2979 void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2980 /*MOD*/XArray* /* of VtsTE */ new_tab,
2981 SVal* s )
2983 SVal old_sv, new_sv;
2984 old_sv = *s;
2985 if (SVal__isC(old_sv)) {
2986 VtsID rMin, wMin;
2987 rMin = SVal__unC_Rmin(old_sv);
2988 wMin = SVal__unC_Wmin(old_sv);
2989 remap_VtsID( old_tab, new_tab, &rMin );
2990 remap_VtsID( old_tab, new_tab, &wMin );
2991 new_sv = SVal__mkC( rMin, wMin );
2992 *s = new_sv;
2997 /* NOT TO BE CALLED FROM WITHIN libzsm. */
2998 __attribute__((noinline))
2999 static void vts_tab__do_GC ( Bool show_stats )
3001 UWord i, nTab, nLive, nFreed;
3003 /* ---------- BEGIN VTS GC ---------- */
3004 /* check this is actually necessary. */
3005 tl_assert(vts_tab_freelist == VtsID_INVALID);
3007 /* empty the caches for partial order checks and binary joins. We
3008 could do better and prune out the entries to be deleted, but it
3009 ain't worth the hassle. */
3010 VtsID__invalidate_caches();
3012 /* First, make the reference counts up to date. */
3013 zsm_flush_cache();
3015 nTab = VG_(sizeXA)( vts_tab );
3017 if (show_stats) {
3018 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
3019 show_vts_stats("before GC");
3022 /* Now we can inspect the entire vts_tab. Any entries with zero
3023 .rc fields are now no longer in use and can be put back on the
3024 free list, removed from vts_set, and deleted. */
3025 nFreed = 0;
3026 for (i = 0; i < nTab; i++) {
3027 Bool present;
3028 UWord oldK = 0, oldV = 12345;
3029 VtsTE* te = VG_(indexXA)( vts_tab, i );
3030 if (te->vts == NULL) {
3031 tl_assert(te->rc == 0);
3032 continue; /* already on the free list (presumably) */
3034 if (te->rc > 0)
3035 continue; /* in use */
3036 /* Ok, we got one we can free. */
3037 tl_assert(te->vts->id == i);
3038 /* first, remove it from vts_set. */
3039 present = VG_(delFromFM)( vts_set,
3040 &oldK, &oldV, (UWord)te->vts );
3041 tl_assert(present); /* else it isn't in vts_set ?! */
3042 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3043 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
3044 /* now free the VTS itself */
3045 VTS__delete(te->vts);
3046 te->vts = NULL;
3047 /* and finally put this entry on the free list */
3048 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
3049 add_to_free_list( i );
3050 nFreed++;
3053 /* Now figure out when the next GC should be. We'll allow the
3054 number of VTSs to double before GCing again. Except of course
3055 that since we can't (or, at least, don't) shrink vts_tab, we
3056 can't set the threshold value smaller than it. */
3057 tl_assert(nFreed <= nTab);
3058 nLive = nTab - nFreed;
3059 tl_assert(nLive >= 0 && nLive <= nTab);
3060 vts_next_GC_at = 2 * nLive;
3061 if (vts_next_GC_at < nTab)
3062 vts_next_GC_at = nTab;
3064 if (show_stats) {
3065 show_vts_stats("after GC");
3066 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
3069 stats__vts_tab_GC++;
3070 if (VG_(clo_stats)) {
3071 tl_assert(nTab > 0);
3072 VG_(message)(Vg_DebugMsg,
3073 "libhb: VTS GC: #%lu old size %lu live %lu (%2llu%%)\n",
3074 stats__vts_tab_GC,
3075 nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
3077 /* ---------- END VTS GC ---------- */
3079 /* Decide whether to do VTS pruning. We have one of three
3080 settings. */
3081 static UInt pruning_auto_ctr = 0; /* do not make non-static */
3083 Bool do_pruning = False;
3084 switch (HG_(clo_vts_pruning)) {
3085 case 0: /* never */
3086 break;
3087 case 1: /* auto */
3088 do_pruning = (++pruning_auto_ctr % 5) == 0;
3089 break;
3090 case 2: /* always */
3091 do_pruning = True;
3092 break;
3093 default:
3094 tl_assert(0);
3097 /* The rest of this routine only handles pruning, so we can
3098 quit at this point if it is not to be done. */
3099 if (!do_pruning)
3100 return;
3101 /* No need to do pruning if no thread died since the last pruning as
3102 no VtsTE can be pruned. */
3103 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3104 return;
3106 /* ---------- BEGIN VTS PRUNING ---------- */
3107 /* Sort and check the very dead threads that died since the last pruning.
3108 Sorting is used for the check and so that we can quickly look
3109 up the dead-thread entries as we work through the VTSs. */
3110 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
3112 /* We will run through the old table, and create a new table and
3113 set, at the same time setting the u.remap entries in the old
3114 table to point to the new entries. Then, visit every VtsID in
3115 the system, and replace all of them with new ones, using the
3116 u.remap entries in the old table. Finally, we can delete the old
3117 table and set. */
3119 XArray* /* of VtsTE */ new_tab
3120 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3121 HG_(free), sizeof(VtsTE) );
3123 /* WordFM VTS* void */
3124 WordFM* new_set
3125 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3126 HG_(free),
3127 (Word(*)(UWord,UWord))VTS__cmp_structural );
3129 /* Visit each old VTS. For each one:
3131 * make a pruned version
3133 * search new_set for the pruned version, yielding either
3134 Nothing (not present) or the new VtsID for it.
3136 * if not present, allocate a new VtsID for it, insert (pruned
3137 VTS, new VtsID) in the tree, and set
3138 remap_table[old VtsID] = new VtsID.
3140 * if present, set remap_table[old VtsID] = new VtsID, where
3141 new VtsID was determined by the tree lookup. Then free up
3142 the clone.
3145 UWord nBeforePruning = 0, nAfterPruning = 0;
3146 UWord nSTSsBefore = 0, nSTSsAfter = 0;
3147 VtsID new_VtsID_ctr = 0;
3149 for (i = 0; i < nTab; i++) {
3151 /* For each old VTS .. */
3152 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
3153 VTS* old_vts = old_te->vts;
3155 /* Skip it if not in use */
3156 if (old_te->rc == 0) {
3157 tl_assert(old_vts == NULL);
3158 continue;
3160 tl_assert(old_te->u.remap == VtsID_INVALID);
3161 tl_assert(old_vts != NULL);
3162 tl_assert(old_vts->id == i);
3163 tl_assert(old_vts->ts != NULL);
3165 /* It is in use. Make a pruned version. */
3166 nBeforePruning++;
3167 nSTSsBefore += old_vts->usedTS;
3168 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
3169 old_vts, verydead_thread_table_not_pruned);
3170 tl_assert(new_vts->sizeTS == new_vts->usedTS);
3171 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3172 == 0x0ddC0ffeeBadF00dULL);
3174 /* Get rid of the old VTS and the tree entry. It's a bit more
3175 complex to incrementally delete the VTSs now than to nuke
3176 them all after we're done, but the upside is that we don't
3177 wind up temporarily storing potentially two complete copies
3178 of each VTS and hence spiking memory use. */
3179 UWord oldK = 0, oldV = 12345;
3180 Bool present = VG_(delFromFM)( vts_set,
3181 &oldK, &oldV, (UWord)old_vts );
3182 tl_assert(present); /* else it isn't in vts_set ?! */
3183 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3184 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3185 /* now free the VTS itself */
3186 VTS__delete(old_vts);
3187 old_te->vts = NULL;
3188 old_vts = NULL;
3190 /* NO MENTIONS of old_vts allowed beyond this point. */
3192 /* Ok, we have the pruned copy in new_vts. See if a
3193 structurally identical version is already present in new_set.
3194 If so, delete the one we just made and move on; if not, add
3195 it. */
3196 VTS* identical_version = NULL;
3197 UWord valW = 12345;
3198 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3199 (UWord)new_vts)) {
3200 // already have it
3201 tl_assert(valW == 0);
3202 tl_assert(identical_version != NULL);
3203 tl_assert(identical_version != new_vts);
3204 VTS__delete(new_vts);
3205 new_vts = identical_version;
3206 tl_assert(new_vts->id != VtsID_INVALID);
3207 } else {
3208 tl_assert(valW == 12345);
3209 tl_assert(identical_version == NULL);
3210 new_vts->id = new_VtsID_ctr++;
3211 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3212 tl_assert(!b);
3213 VtsTE new_te;
3214 new_te.vts = new_vts;
3215 new_te.rc = 0;
3216 new_te.u.freelink = VtsID_INVALID;
3217 Word j = VG_(addToXA)( new_tab, &new_te );
3218 tl_assert(j <= i);
3219 tl_assert(j == new_VtsID_ctr - 1);
3220 // stats
3221 nAfterPruning++;
3222 nSTSsAfter += new_vts->usedTS;
3224 old_te->u.remap = new_vts->id;
3226 } /* for (i = 0; i < nTab; i++) */
3228 /* Move very dead thread from verydead_thread_table_not_pruned to
3229 verydead_thread_table. Sort and check verydead_thread_table
3230 to verify a thread was reported very dead only once. */
3232 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3234 for (i = 0; i < nBT; i++) {
3235 ThrID thrid =
3236 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3237 VG_(addToXA)( verydead_thread_table, &thrid );
3239 verydead_thread_table_sort_and_check (verydead_thread_table);
3240 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3243 /* At this point, we have:
3244 * the old VTS table, with its u.remap entries set,
3245 and with all .vts == NULL.
3246 * the old VTS tree should be empty, since it and the old VTSs
3247 it contained have been incrementally deleted was we worked
3248 through the old table.
3249 * the new VTS table, with all .rc == 0, all u.freelink and u.remap
3250 == VtsID_INVALID.
3251 * the new VTS tree.
3253 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3255 /* Now actually apply the mapping. */
3256 /* Visit all the VtsIDs in the entire system. Where do we expect
3257 to find them?
3258 (a) in shadow memory -- the LineZs and LineFs
3259 (b) in our collection of struct _Thrs.
3260 (c) in our collection of struct _SOs.
3261 Nowhere else, AFAICS. Not in the zsm cache, because that just
3262 got invalidated.
3264 Using the u.remap fields in vts_tab, map each old VtsID to a new
3265 VtsID. For each old VtsID, dec its rc; and for each new one,
3266 inc it. This sets up the new refcounts, and it also gives a
3267 cheap sanity check of the old ones: all old refcounts should be
3268 zero after this operation.
3271 /* Do the mappings for (a) above: iterate over the Primary shadow
3272 mem map (WordFM Addr SecMap*). */
3273 UWord secmapW = 0;
3274 VG_(initIterFM)( map_shmem );
3275 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3276 UWord j;
3277 SecMap* sm = (SecMap*)secmapW;
3278 tl_assert(sm->magic == SecMap_MAGIC);
3279 /* Deal with the LineZs */
3280 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3281 LineZ* lineZ = &sm->linesZ[i];
3282 if (lineZ->dict[0] != SVal_INVALID) {
3283 for (j = 0; j < 4; j++)
3284 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3285 } else {
3286 LineF* lineF = SVal2Ptr (lineZ->dict[1]);
3287 for (j = 0; j < N_LINE_ARANGE; j++)
3288 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3292 VG_(doneIterFM)( map_shmem );
3294 /* Do the mappings for (b) above: visit our collection of struct
3295 _Thrs. */
3296 Thread* hgthread = get_admin_threads();
3297 tl_assert(hgthread);
3298 while (hgthread) {
3299 Thr* hbthr = hgthread->hbthr;
3300 tl_assert(hbthr);
3301 /* Threads that are listed in the prunable set have their viR
3302 and viW set to VtsID_INVALID, so we can't mess with them. */
3303 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3304 tl_assert(hbthr->viR == VtsID_INVALID);
3305 tl_assert(hbthr->viW == VtsID_INVALID);
3306 hgthread = hgthread->admin;
3307 continue;
3309 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3310 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3311 hgthread = hgthread->admin;
3314 /* Do the mappings for (c) above: visit the struct _SOs. */
3315 SO* so = admin_SO;
3316 while (so) {
3317 if (so->viR != VtsID_INVALID)
3318 remap_VtsID( vts_tab, new_tab, &so->viR );
3319 if (so->viW != VtsID_INVALID)
3320 remap_VtsID( vts_tab, new_tab, &so->viW );
3321 so = so->admin_next;
3324 /* So, we're nearly done (with this incredibly complex operation).
3325 Check the refcounts for the old VtsIDs all fell to zero, as
3326 expected. Any failure is serious. */
3327 for (i = 0; i < nTab; i++) {
3328 VtsTE* te = VG_(indexXA)( vts_tab, i );
3329 tl_assert(te->vts == NULL);
3330 /* This is the assert proper. Note we're also asserting
3331 zeroness for old entries which are unmapped. That's OK. */
3332 tl_assert(te->rc == 0);
3335 /* Install the new table and set. */
3336 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3337 vts_set = new_set;
3338 VG_(deleteXA)( vts_tab );
3339 vts_tab = new_tab;
3341 /* The freelist of vts_tab entries is empty now, because we've
3342 compacted all of the live entries at the low end of the
3343 table. */
3344 vts_tab_freelist = VtsID_INVALID;
3346 /* Sanity check vts_set and vts_tab. */
3348 /* Because all the live entries got slid down to the bottom of vts_tab: */
3349 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3351 /* Assert that the vts_tab and vts_set entries point at each other
3352 in the required way */
3353 UWord wordK = 0, wordV = 0;
3354 VG_(initIterFM)( vts_set );
3355 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3356 tl_assert(wordK != 0);
3357 tl_assert(wordV == 0);
3358 VTS* vts = (VTS*)wordK;
3359 tl_assert(vts->id != VtsID_INVALID);
3360 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3361 tl_assert(te->vts == vts);
3363 VG_(doneIterFM)( vts_set );
3365 /* Also iterate over the table, and check each entry is
3366 plausible. */
3367 nTab = VG_(sizeXA)( vts_tab );
3368 for (i = 0; i < nTab; i++) {
3369 VtsTE* te = VG_(indexXA)( vts_tab, i );
3370 tl_assert(te->vts);
3371 tl_assert(te->vts->id == i);
3372 tl_assert(te->rc > 0); /* 'cos we just GC'd */
3373 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3374 /* value of te->u.remap not relevant */
3377 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
3378 stats__vts_pruning++;
3379 if (VG_(clo_stats)) {
3380 tl_assert(nTab > 0);
3381 VG_(message)(
3382 Vg_DebugMsg,
3383 "libhb: VTS PR: #%lu before %lu (avg sz %lu) "
3384 "after %lu (avg sz %lu)\n",
3385 stats__vts_pruning,
3386 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3387 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3390 /* ---------- END VTS PRUNING ---------- */
3394 /////////////////////////////////////////////////////////
3395 // //
3396 // Vts IDs //
3397 // //
3398 /////////////////////////////////////////////////////////
3400 //////////////////////////
3401 /* A temporary, max-sized VTS which is used as a temporary (the first
3402 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3403 static VTS* temp_max_sized_VTS = NULL;
3405 //////////////////////////
3406 static ULong stats__cmpLEQ_queries = 0;
3407 static ULong stats__cmpLEQ_misses = 0;
3408 static ULong stats__join2_queries = 0;
3409 static ULong stats__join2_misses = 0;
3411 static inline UInt ROL32 ( UInt w, Int n ) {
3412 w = (w << n) | (w >> (32-n));
3413 return w;
3415 static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3416 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3417 return hash % nTab;
3420 #define N_CMPLEQ_CACHE 1023
3421 static
3422 struct { VtsID vi1; VtsID vi2; Bool leq; }
3423 cmpLEQ_cache[N_CMPLEQ_CACHE];
3425 #define N_JOIN2_CACHE 1023
3426 static
3427 struct { VtsID vi1; VtsID vi2; VtsID res; }
3428 join2_cache[N_JOIN2_CACHE];
3430 static void VtsID__invalidate_caches ( void ) {
3431 Int i;
3432 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3433 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3434 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3435 cmpLEQ_cache[i].leq = False;
3437 for (i = 0; i < N_JOIN2_CACHE; i++) {
3438 join2_cache[i].vi1 = VtsID_INVALID;
3439 join2_cache[i].vi2 = VtsID_INVALID;
3440 join2_cache[i].res = VtsID_INVALID;
3443 //////////////////////////
3445 //static Bool VtsID__is_valid ( VtsID vi ) {
3446 // VtsTE* ve;
3447 // if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3448 // return False;
3449 // ve = VG_(indexXA)( vts_tab, vi );
3450 // if (!ve->vts)
3451 // return False;
3452 // tl_assert(ve->vts->id == vi);
3453 // return True;
3456 static VTS* VtsID__to_VTS ( VtsID vi ) {
3457 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3458 tl_assert(te->vts);
3459 return te->vts;
3462 static void VtsID__pp ( VtsID vi ) {
3463 VTS* vts = VtsID__to_VTS(vi);
3464 VTS__show( vts );
3467 /* compute partial ordering relation of vi1 and vi2. */
3468 __attribute__((noinline))
3469 static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
3470 UInt hash;
3471 Bool leq;
3472 VTS *v1, *v2;
3473 //if (vi1 == vi2) return True;
3474 tl_assert(vi1 != vi2);
3475 ////++
3476 stats__cmpLEQ_queries++;
3477 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3478 if (cmpLEQ_cache[hash].vi1 == vi1
3479 && cmpLEQ_cache[hash].vi2 == vi2)
3480 return cmpLEQ_cache[hash].leq;
3481 stats__cmpLEQ_misses++;
3482 ////--
3483 v1 = VtsID__to_VTS(vi1);
3484 v2 = VtsID__to_VTS(vi2);
3485 leq = VTS__cmpLEQ( v1, v2 ) == 0;
3486 ////++
3487 cmpLEQ_cache[hash].vi1 = vi1;
3488 cmpLEQ_cache[hash].vi2 = vi2;
3489 cmpLEQ_cache[hash].leq = leq;
3490 ////--
3491 return leq;
3493 static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3494 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
3497 /* compute binary join */
3498 __attribute__((noinline))
3499 static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3500 UInt hash;
3501 VtsID res;
3502 VTS *vts1, *vts2;
3503 //if (vi1 == vi2) return vi1;
3504 tl_assert(vi1 != vi2);
3505 ////++
3506 stats__join2_queries++;
3507 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3508 if (join2_cache[hash].vi1 == vi1
3509 && join2_cache[hash].vi2 == vi2)
3510 return join2_cache[hash].res;
3511 stats__join2_misses++;
3512 ////--
3513 vts1 = VtsID__to_VTS(vi1);
3514 vts2 = VtsID__to_VTS(vi2);
3515 temp_max_sized_VTS->usedTS = 0;
3516 VTS__join(temp_max_sized_VTS, vts1,vts2);
3517 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3518 ////++
3519 join2_cache[hash].vi1 = vi1;
3520 join2_cache[hash].vi2 = vi2;
3521 join2_cache[hash].res = res;
3522 ////--
3523 return res;
3525 static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
3526 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
3529 /* create a singleton VTS, namely [thr:1] */
3530 static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
3531 temp_max_sized_VTS->usedTS = 0;
3532 VTS__singleton(temp_max_sized_VTS, thr,tym);
3533 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3536 /* tick operation, creates value 1 if specified index is absent */
3537 static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3538 VTS* vts = VtsID__to_VTS(vi);
3539 temp_max_sized_VTS->usedTS = 0;
3540 VTS__tick(temp_max_sized_VTS, idx,vts);
3541 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3544 /* index into a VTS (only for assertions) */
3545 static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3546 VTS* vts = VtsID__to_VTS(vi);
3547 return VTS__indexAt_SLOW( vts, idx );
3550 /* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3551 any, really) element in vi1 which is pointwise greater-than the
3552 corresponding element in vi2. If no such element exists, return
3553 NULL. This needs to be fairly quick since it is called every time
3554 a race is detected. */
3555 static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3557 VTS *vts1, *vts2;
3558 Thr* diffthr;
3559 ThrID diffthrid;
3560 tl_assert(vi1 != vi2);
3561 vts1 = VtsID__to_VTS(vi1);
3562 vts2 = VtsID__to_VTS(vi2);
3563 tl_assert(vts1 != vts2);
3564 diffthrid = VTS__cmpLEQ(vts1, vts2);
3565 diffthr = Thr__from_ThrID(diffthrid);
3566 tl_assert(diffthr); /* else they are LEQ ! */
3567 return diffthr;
3571 /////////////////////////////////////////////////////////
3572 // //
3573 // Filters //
3574 // //
3575 /////////////////////////////////////////////////////////
3577 /* Forget everything we know -- clear the filter and let everything
3578 through. This needs to be as fast as possible, since it is called
3579 every time the running thread changes, and every time a thread's
3580 vector clocks change, which can be quite frequent. The obvious
3581 fast way to do this is simply to stuff in tags which we know are
3582 not going to match anything, since they're not aligned to the start
3583 of a line. */
3584 static void Filter__clear ( Filter* fi, const HChar* who )
3586 UWord i;
3587 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3588 for (i = 0; i < FI_NUM_LINES; i += 8) {
3589 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3590 fi->tags[i+1] = 1;
3591 fi->tags[i+2] = 1;
3592 fi->tags[i+3] = 1;
3593 fi->tags[i+4] = 1;
3594 fi->tags[i+5] = 1;
3595 fi->tags[i+6] = 1;
3596 fi->tags[i+7] = 1;
3598 tl_assert(i == FI_NUM_LINES);
3601 /* Clearing an arbitrary range in the filter. Unfortunately
3602 we have to do this due to core-supplied new/die-mem events. */
3604 static void Filter__clear_1byte ( Filter* fi, Addr a )
3606 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3607 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3608 FiLine* line = &fi->lines[lineno];
3609 UWord loff = (a - atag) / 8;
3610 UShort mask = 0x3 << (2 * (a & 7));
3611 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3612 if (LIKELY( fi->tags[lineno] == atag )) {
3613 /* hit. clear the bits. */
3614 UShort u16 = line->u16s[loff];
3615 line->u16s[loff] = u16 & ~mask; /* clear them */
3616 } else {
3617 /* miss. The filter doesn't hold this address, so ignore. */
3621 static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3623 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3624 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3625 FiLine* line = &fi->lines[lineno];
3626 UWord loff = (a - atag) / 8;
3627 if (LIKELY( fi->tags[lineno] == atag )) {
3628 line->u16s[loff] = 0;
3629 } else {
3630 /* miss. The filter doesn't hold this address, so ignore. */
3634 /* Only used to verify the fast Filter__clear_range */
3635 __attribute__((unused))
3636 static void Filter__clear_range_SLOW ( Filter* fi, Addr a, UWord len )
3638 tl_assert (CHECK_ZSM);
3640 /* slowly do part preceding 8-alignment */
3641 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3642 Filter__clear_1byte( fi, a );
3643 a++;
3644 len--;
3646 /* vector loop */
3647 while (len >= 8) {
3648 Filter__clear_8bytes_aligned( fi, a );
3649 a += 8;
3650 len -= 8;
3652 /* slowly do tail */
3653 while (UNLIKELY(len > 0)) {
3654 Filter__clear_1byte( fi, a );
3655 a++;
3656 len--;
3660 static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3662 # if CHECK_ZSM > 0
3663 /* We check the below more complex algorithm with the simple one.
3664 This check is very expensive : we do first the slow way on a
3665 copy of the data, then do it the fast way. On RETURN, we check
3666 the two values are equal. */
3667 Filter fi_check = *fi;
3668 Filter__clear_range_SLOW(&fi_check, a, len);
3669 # define RETURN goto check_and_return
3670 # else
3671 # define RETURN return
3672 # endif
3674 Addr begtag = FI_GET_TAG(a); /* tag of range begin */
3676 Addr end = a + len - 1;
3677 Addr endtag = FI_GET_TAG(end); /* tag of range end. */
3679 UWord rlen = len; /* remaining length to clear */
3681 Addr c = a; /* Current position we are clearing. */
3682 UWord clineno = FI_GET_LINENO(c); /* Current lineno we are clearing */
3683 FiLine* cline; /* Current line we are clearing */
3684 UWord cloff; /* Current offset in line we are clearing, when clearing
3685 partial lines. */
3687 UShort u16;
3689 STATIC_ASSERT (FI_LINE_SZB == 32);
3690 // Below assumes filter lines are 32 bytes
3692 if (LIKELY(fi->tags[clineno] == begtag)) {
3693 /* LIKELY for the heavy caller VG_(unknown_SP_update). */
3694 /* First filter line matches begtag.
3695 If c is not at the filter line begin, the below will clear
3696 the filter line bytes starting from c. */
3697 cline = &fi->lines[clineno];
3698 cloff = (c - begtag) / 8;
3700 /* First the byte(s) needed to reach 8-alignment */
3701 if (UNLIKELY(!VG_IS_8_ALIGNED(c))) {
3702 /* hiB is the nr of bytes (higher addresses) from c to reach
3703 8-aligment. */
3704 UWord hiB = 8 - (c & 7);
3705 /* Compute 2-bit/byte mask representing hiB bytes [c..c+hiB[
3706 mask is C000 , F000, FC00, FF00, FFC0, FFF0 or FFFC for the byte
3707 range 7..7 6..7 5..7 4..7 3..7 2..7 1..7 */
3708 UShort mask = 0xFFFF << (16 - 2*hiB);
3710 u16 = cline->u16s[cloff];
3711 if (LIKELY(rlen >= hiB)) {
3712 cline->u16s[cloff] = u16 & ~mask; /* clear all hiB from c */
3713 rlen -= hiB;
3714 c += hiB;
3715 cloff += 1;
3716 } else {
3717 /* Only have the bits for rlen bytes bytes. */
3718 mask = mask & ~(0xFFFF << (16 - 2*(hiB-rlen)));
3719 cline->u16s[cloff] = u16 & ~mask; /* clear rlen bytes from c. */
3720 RETURN; // We have cleared all what we can.
3723 /* c is now 8 aligned. Clear by 8 aligned bytes,
3724 till c is filter-line aligned */
3725 while (!VG_IS_32_ALIGNED(c) && rlen >= 8) {
3726 cline->u16s[cloff] = 0;
3727 c += 8;
3728 rlen -= 8;
3729 cloff += 1;
3731 } else {
3732 c = begtag + FI_LINE_SZB;
3733 if (c > end)
3734 RETURN; // We have cleared all what we can.
3735 rlen -= c - a;
3737 // We have changed c, so re-establish clineno.
3738 clineno = FI_GET_LINENO(c);
3740 if (rlen >= FI_LINE_SZB) {
3741 /* Here, c is filter line-aligned. Clear all full lines that
3742 overlap with the range starting at c, made of a full lines */
3743 UWord nfull = rlen / FI_LINE_SZB;
3744 UWord full_len = nfull * FI_LINE_SZB;
3745 rlen -= full_len;
3746 if (nfull > FI_NUM_LINES)
3747 nfull = FI_NUM_LINES; // no need to check several times the same entry.
3749 for (UWord n = 0; n < nfull; n++) {
3750 if (UNLIKELY(address_in_range(fi->tags[clineno], c, full_len))) {
3751 cline = &fi->lines[clineno];
3752 cline->u16s[0] = 0;
3753 cline->u16s[1] = 0;
3754 cline->u16s[2] = 0;
3755 cline->u16s[3] = 0;
3756 STATIC_ASSERT (4 == sizeof(cline->u16s)/sizeof(cline->u16s[0]));
3758 clineno++;
3759 if (UNLIKELY(clineno == FI_NUM_LINES))
3760 clineno = 0;
3763 c += full_len;
3764 clineno = FI_GET_LINENO(c);
3767 if (CHECK_ZSM) {
3768 tl_assert(VG_IS_8_ALIGNED(c));
3769 tl_assert(clineno == FI_GET_LINENO(c));
3772 /* Do the last filter line, if it was not cleared as a full filter line */
3773 if (UNLIKELY(rlen > 0) && fi->tags[clineno] == endtag) {
3774 cline = &fi->lines[clineno];
3775 cloff = (c - endtag) / 8;
3776 if (CHECK_ZSM) tl_assert(FI_GET_TAG(c) == endtag);
3778 /* c is 8 aligned. Clear by 8 aligned bytes, till we have less than
3779 8 bytes. */
3780 while (rlen >= 8) {
3781 cline->u16s[cloff] = 0;
3782 c += 8;
3783 rlen -= 8;
3784 cloff += 1;
3786 /* Then the remaining byte(s) */
3787 if (rlen > 0) {
3788 /* nr of bytes from c to reach end. */
3789 UWord loB = rlen;
3790 /* Compute mask representing loB bytes [c..c+loB[ :
3791 mask is 0003, 000F, 003F, 00FF, 03FF, 0FFF or 3FFF */
3792 UShort mask = 0xFFFF >> (16 - 2*loB);
3794 u16 = cline->u16s[cloff];
3795 cline->u16s[cloff] = u16 & ~mask; /* clear all loB from c */
3799 # if CHECK_ZSM > 0
3800 check_and_return:
3801 tl_assert (VG_(memcmp)(&fi_check, fi, sizeof(fi_check)) == 0);
3802 # endif
3803 # undef RETURN
3806 /* ------ Read handlers for the filter. ------ */
3808 static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3810 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3811 return False;
3813 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3814 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3815 FiLine* line = &fi->lines[lineno];
3816 UWord loff = (a - atag) / 8;
3817 UShort mask = 0xAAAA;
3818 if (LIKELY( fi->tags[lineno] == atag )) {
3819 /* hit. check line and update. */
3820 UShort u16 = line->u16s[loff];
3821 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3822 line->u16s[loff] = u16 | mask; /* set them */
3823 return ok;
3824 } else {
3825 /* miss. nuke existing line and re-use it. */
3826 UWord i;
3827 fi->tags[lineno] = atag;
3828 for (i = 0; i < FI_LINE_SZB / 8; i++)
3829 line->u16s[i] = 0;
3830 line->u16s[loff] = mask;
3831 return False;
3836 static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3838 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3839 return False;
3841 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3842 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3843 FiLine* line = &fi->lines[lineno];
3844 UWord loff = (a - atag) / 8;
3845 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3846 if (LIKELY( fi->tags[lineno] == atag )) {
3847 /* hit. check line and update. */
3848 UShort u16 = line->u16s[loff];
3849 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3850 line->u16s[loff] = u16 | mask; /* set them */
3851 return ok;
3852 } else {
3853 /* miss. nuke existing line and re-use it. */
3854 UWord i;
3855 fi->tags[lineno] = atag;
3856 for (i = 0; i < FI_LINE_SZB / 8; i++)
3857 line->u16s[i] = 0;
3858 line->u16s[loff] = mask;
3859 return False;
3864 static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3866 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3867 return False;
3869 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3870 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3871 FiLine* line = &fi->lines[lineno];
3872 UWord loff = (a - atag) / 8;
3873 UShort mask = 0xA << (2 * (a & 6));
3874 /* mask is A000, 0A00, 00A0 or 000A */
3875 if (LIKELY( fi->tags[lineno] == atag )) {
3876 /* hit. check line and update. */
3877 UShort u16 = line->u16s[loff];
3878 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3879 line->u16s[loff] = u16 | mask; /* set them */
3880 return ok;
3881 } else {
3882 /* miss. nuke existing line and re-use it. */
3883 UWord i;
3884 fi->tags[lineno] = atag;
3885 for (i = 0; i < FI_LINE_SZB / 8; i++)
3886 line->u16s[i] = 0;
3887 line->u16s[loff] = mask;
3888 return False;
3893 static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3896 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3897 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3898 FiLine* line = &fi->lines[lineno];
3899 UWord loff = (a - atag) / 8;
3900 UShort mask = 0x2 << (2 * (a & 7));
3901 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3902 if (LIKELY( fi->tags[lineno] == atag )) {
3903 /* hit. check line and update. */
3904 UShort u16 = line->u16s[loff];
3905 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3906 line->u16s[loff] = u16 | mask; /* set them */
3907 return ok;
3908 } else {
3909 /* miss. nuke existing line and re-use it. */
3910 UWord i;
3911 fi->tags[lineno] = atag;
3912 for (i = 0; i < FI_LINE_SZB / 8; i++)
3913 line->u16s[i] = 0;
3914 line->u16s[loff] = mask;
3915 return False;
3921 /* ------ Write handlers for the filter. ------ */
3923 static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3925 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3926 return False;
3928 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3929 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3930 FiLine* line = &fi->lines[lineno];
3931 UWord loff = (a - atag) / 8;
3932 UShort mask = 0xFFFF;
3933 if (LIKELY( fi->tags[lineno] == atag )) {
3934 /* hit. check line and update. */
3935 UShort u16 = line->u16s[loff];
3936 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3937 line->u16s[loff] = u16 | mask; /* set them */
3938 return ok;
3939 } else {
3940 /* miss. nuke existing line and re-use it. */
3941 UWord i;
3942 fi->tags[lineno] = atag;
3943 for (i = 0; i < FI_LINE_SZB / 8; i++)
3944 line->u16s[i] = 0;
3945 line->u16s[loff] = mask;
3946 return False;
3951 static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3953 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3954 return False;
3956 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3957 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3958 FiLine* line = &fi->lines[lineno];
3959 UWord loff = (a - atag) / 8;
3960 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3961 if (LIKELY( fi->tags[lineno] == atag )) {
3962 /* hit. check line and update. */
3963 UShort u16 = line->u16s[loff];
3964 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3965 line->u16s[loff] = u16 | mask; /* set them */
3966 return ok;
3967 } else {
3968 /* miss. nuke existing line and re-use it. */
3969 UWord i;
3970 fi->tags[lineno] = atag;
3971 for (i = 0; i < FI_LINE_SZB / 8; i++)
3972 line->u16s[i] = 0;
3973 line->u16s[loff] = mask;
3974 return False;
3979 static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3981 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3982 return False;
3984 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3985 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3986 FiLine* line = &fi->lines[lineno];
3987 UWord loff = (a - atag) / 8;
3988 UShort mask = 0xF << (2 * (a & 6));
3989 /* mask is F000, 0F00, 00F0 or 000F */
3990 if (LIKELY( fi->tags[lineno] == atag )) {
3991 /* hit. check line and update. */
3992 UShort u16 = line->u16s[loff];
3993 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3994 line->u16s[loff] = u16 | mask; /* set them */
3995 return ok;
3996 } else {
3997 /* miss. nuke existing line and re-use it. */
3998 UWord i;
3999 fi->tags[lineno] = atag;
4000 for (i = 0; i < FI_LINE_SZB / 8; i++)
4001 line->u16s[i] = 0;
4002 line->u16s[loff] = mask;
4003 return False;
4008 static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
4011 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
4012 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
4013 FiLine* line = &fi->lines[lineno];
4014 UWord loff = (a - atag) / 8;
4015 UShort mask = 0x3 << (2 * (a & 7));
4016 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
4017 if (LIKELY( fi->tags[lineno] == atag )) {
4018 /* hit. check line and update. */
4019 UShort u16 = line->u16s[loff];
4020 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
4021 line->u16s[loff] = u16 | mask; /* set them */
4022 return ok;
4023 } else {
4024 /* miss. nuke existing line and re-use it. */
4025 UWord i;
4026 fi->tags[lineno] = atag;
4027 for (i = 0; i < FI_LINE_SZB / 8; i++)
4028 line->u16s[i] = 0;
4029 line->u16s[loff] = mask;
4030 return False;
4036 /////////////////////////////////////////////////////////
4037 // //
4038 // Threads //
4039 // //
4040 /////////////////////////////////////////////////////////
4042 /* Maps ThrID values to their Thr*s (which contain ThrID values that
4043 should point back to the relevant slot in the array. Lowest
4044 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
4045 static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
4047 /* And a counter to dole out ThrID values. For rationale/background,
4048 see comments on definition of ScalarTS (far) above. */
4049 static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
4051 static ThrID Thr__to_ThrID ( Thr* thr ) {
4052 return thr->thrid;
4054 static Thr* Thr__from_ThrID ( UInt thrid ) {
4055 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
4056 tl_assert(thr->thrid == thrid);
4057 return thr;
4060 /* True if the cached rcec for thr is valid and can be used to build the
4061 current stack trace just by changing the last frame to the current IP. */
4062 static inline Bool cached_rcec_valid(Thr *thr)
4064 UWord cached_stackvalid = VG_(get_SP_s1) (thr->hgthread->coretid);
4065 return cached_stackvalid != 0;
4067 /* Set the validity of the cached rcec of thr. */
4068 static inline void set_cached_rcec_validity(Thr *thr, Bool valid)
4070 VG_(set_SP_s1) (thr->hgthread->coretid, valid);
4073 static Thr* Thr__new ( void )
4075 Thr* thr = HG_(zalloc)
4076 ( "libhb.Thr__new.1",
4077 sizeof(Thr) + HG_(clo_history_backtrace_size) * sizeof(UWord));
4078 // We need to add the size of the frames in the cached_rcec (last member of
4079 // _Thr).
4081 thr->viR = VtsID_INVALID;
4082 thr->viW = VtsID_INVALID;
4083 thr->llexit_done = False;
4084 thr->joinedwith_done = False;
4085 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
4086 if (HG_(clo_history_level) == 1)
4087 thr->local_Kws_n_stacks
4088 = VG_(newXA)( HG_(zalloc),
4089 "libhb.Thr__new.3 (local_Kws_and_stacks)",
4090 HG_(free), sizeof(ULong_n_EC) );
4091 /* Make an 'empty' cached rcec in thr. */
4092 thr->cached_rcec.magic = RCEC_MAGIC;
4093 thr->cached_rcec.rc = 0;
4094 thr->cached_rcec.rcX = 0;
4095 thr->cached_rcec.next = NULL;
4097 /* Add this Thr* <-> ThrID binding to the mapping, and
4098 cross-check */
4099 if (!thrid_to_thr_map) {
4100 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
4101 HG_(free), sizeof(Thr*) );
4104 if (thrid_counter >= ThrID_MAX_VALID) {
4105 /* We're hosed. We have to stop. */
4106 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
4109 thr->thrid = thrid_counter++;
4110 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
4111 tl_assert(ix + 1024 == thr->thrid);
4113 return thr;
4116 static void note_local_Kw_n_stack_for ( Thr* thr )
4118 Word nPresent;
4119 ULong_n_EC pair;
4120 tl_assert(thr);
4122 // We only collect this info at history level 1 (approx)
4123 if (HG_(clo_history_level) != 1)
4124 return;
4126 /* This is the scalar Kw for thr. */
4127 pair.ull = VtsID__indexAt( thr->viW, thr );
4128 pair.ec = main_get_EC( thr );
4129 tl_assert(pair.ec);
4130 tl_assert(thr->local_Kws_n_stacks);
4132 /* check that we're not adding duplicates */
4133 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4135 /* Throw away old stacks, if necessary. We can't accumulate stuff
4136 indefinitely. */
4137 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
4138 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
4139 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4140 if (0)
4141 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
4142 thr, pair.ull, pair.ec );
4145 if (nPresent > 0) {
4146 ULong_n_EC* prevPair
4147 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
4148 tl_assert( prevPair->ull <= pair.ull );
4151 if (nPresent == 0)
4152 pair.ec = NULL;
4154 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
4156 if (0)
4157 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
4158 thr, pair.ull, pair.ec );
4159 if (0)
4160 VG_(pp_ExeContext)(pair.ec);
4163 static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
4164 const ULong_n_EC* pair2 )
4166 if (pair1->ull < pair2->ull) return -1;
4167 if (pair1->ull > pair2->ull) return 1;
4168 return 0;
4172 /////////////////////////////////////////////////////////
4173 // //
4174 // Shadow Values //
4175 // //
4176 /////////////////////////////////////////////////////////
4178 // type SVal, SVal_INVALID and SVal_NOACCESS are defined by
4179 // hb_zsm.h. We have to do everything else here.
4181 /* SVal is 64 bit unsigned int.
4183 <---------30---------> <---------30--------->
4184 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
4185 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
4186 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
4189 #define SVAL_TAGMASK (3ULL << 62)
4191 static inline Bool SVal__isC ( SVal s ) {
4192 return (0ULL << 62) == (s & SVAL_TAGMASK);
4194 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
4195 //tl_assert(VtsID__is_valid(rmini));
4196 //tl_assert(VtsID__is_valid(wmini));
4197 return (((ULong)rmini) << 32) | ((ULong)wmini);
4199 static inline VtsID SVal__unC_Rmin ( SVal s ) {
4200 tl_assert(SVal__isC(s));
4201 return (VtsID)(s >> 32);
4203 static inline VtsID SVal__unC_Wmin ( SVal s ) {
4204 tl_assert(SVal__isC(s));
4205 return (VtsID)(s & 0xFFFFFFFFULL);
4208 static inline Bool SVal__isA ( SVal s ) {
4209 return (2ULL << 62) == (s & SVAL_TAGMASK);
4211 __attribute__((unused))
4212 static inline SVal SVal__mkA ( void ) {
4213 return 2ULL << 62;
4216 /* Direct callback from lib_zsm. */
4217 static inline void SVal__rcinc ( SVal s ) {
4218 if (SVal__isC(s)) {
4219 VtsID__rcinc( SVal__unC_Rmin(s) );
4220 VtsID__rcinc( SVal__unC_Wmin(s) );
4224 /* Direct callback from lib_zsm. */
4225 static inline void SVal__rcdec ( SVal s ) {
4226 if (SVal__isC(s)) {
4227 VtsID__rcdec( SVal__unC_Rmin(s) );
4228 VtsID__rcdec( SVal__unC_Wmin(s) );
4232 static inline void *SVal2Ptr (SVal s)
4234 return (void*)(UWord)s;
4237 static inline SVal Ptr2SVal (void* ptr)
4239 return (SVal)(UWord)ptr;
4244 /////////////////////////////////////////////////////////
4245 // //
4246 // Change-event map2 //
4247 // //
4248 /////////////////////////////////////////////////////////
4250 /* This is in two parts:
4252 1. A hash table of RCECs. This is a set of reference-counted stack
4253 traces. When the reference count of a stack trace becomes zero,
4254 it is removed from the set and freed up. The intent is to have
4255 a set of stack traces which can be referred to from (2), but to
4256 only represent each one once. The set is indexed/searched by
4257 ordering on the stack trace vectors.
4259 2. A Hash table of OldRefs. These store information about each old
4260 ref that we need to record. Hash table key is the address of the
4261 location for which the information is recorded. For LRU
4262 purposes, each OldRef in the hash table is also on a doubly
4263 linked list maintaining the order in which the OldRef were most
4264 recently accessed.
4265 Each OldRef also maintains the stamp at which it was last accessed.
4266 With these stamps, we can quickly check which of 2 OldRef is the
4267 'newest', without having to scan the full list of LRU OldRef.
4269 The important part of an OldRef is, however, its acc component.
4270 This binds a TSW triple (thread, size, R/W) to an RCEC.
4272 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4273 Then we do exact LRU discarding. For each discarded OldRef we must
4274 of course decrement the reference count on the RCEC it
4275 refers to, in order that entries from (1) eventually get
4276 discarded too.
4279 static UWord stats__evm__lookup_found = 0;
4280 static UWord stats__evm__lookup_notfound = 0;
4282 static UWord stats__ctxt_eq_tsw_eq_rcec = 0;
4283 static UWord stats__ctxt_eq_tsw_neq_rcec = 0;
4284 static UWord stats__ctxt_neq_tsw_neq_rcec = 0;
4285 static UWord stats__ctxt_rcdec_calls = 0;
4286 static UWord stats__ctxt_rcec_gc_discards = 0;
4288 static UWord stats__ctxt_tab_curr = 0;
4289 static UWord stats__ctxt_tab_max = 0;
4291 static UWord stats__ctxt_tab_qs = 0;
4292 static UWord stats__ctxt_tab_cmps = 0;
4295 ///////////////////////////////////////////////////////
4296 //// Part (1): A hash table of RCECs
4299 //#define N_RCEC_TAB 98317 /* prime */
4300 #define N_RCEC_TAB 196613 /* prime */
4302 //////////// BEGIN RCEC pool allocator
4303 static PoolAlloc* rcec_pool_allocator;
4304 static RCEC* alloc_RCEC ( void ) {
4305 return VG_(allocEltPA) ( rcec_pool_allocator );
4308 static void free_RCEC ( RCEC* rcec ) {
4309 tl_assert(rcec->magic == RCEC_MAGIC);
4310 VG_(freeEltPA)( rcec_pool_allocator, rcec );
4312 //////////// END RCEC pool allocator
4314 static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4316 /* Count of allocated RCEC having ref count > 0 */
4317 static UWord RCEC_referenced = 0;
4319 /* True if the frames of ec1 and ec2 are different. */
4320 static Bool RCEC__differs_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4321 Word i;
4322 if (CHECK_CEM) {
4323 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4324 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
4326 if (ec1->frames_hash != ec2->frames_hash) return True;
4327 for (i = 0; i < HG_(clo_history_backtrace_size); i++) {
4328 if (ec1->frames[i] != ec2->frames[i]) return True;
4330 return False;
4333 /* Dec the ref of this RCEC. */
4334 static void ctxt__rcdec ( RCEC* ec )
4336 stats__ctxt_rcdec_calls++;
4337 if (CHECK_CEM)
4338 tl_assert(ec && ec->magic == RCEC_MAGIC);
4339 tl_assert(ec->rc > 0);
4340 ec->rc--;
4341 if (ec->rc == 0)
4342 RCEC_referenced--;
4345 static void ctxt__rcinc ( RCEC* ec )
4347 if (CHECK_CEM)
4348 tl_assert(ec && ec->magic == RCEC_MAGIC);
4349 if (ec->rc == 0)
4350 RCEC_referenced++;
4351 ec->rc++;
4355 /* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4356 move it one step closer to the front of the list, so as to make
4357 subsequent searches for it cheaper. */
4358 static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4360 RCEC *ec0, *ec1, *ec2;
4361 if (ec == *headp)
4362 tl_assert(0); /* already at head of list */
4363 tl_assert(ec != NULL);
4364 ec0 = *headp;
4365 ec1 = NULL;
4366 ec2 = NULL;
4367 while (True) {
4368 if (ec0 == NULL || ec0 == ec) break;
4369 ec2 = ec1;
4370 ec1 = ec0;
4371 ec0 = ec0->next;
4373 tl_assert(ec0 == ec);
4374 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4375 RCEC* tmp;
4376 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4377 predecessor. Swap ec0 and ec1, that is, move ec0 one step
4378 closer to the start of the list. */
4379 tl_assert(ec2->next == ec1);
4380 tl_assert(ec1->next == ec0);
4381 tmp = ec0->next;
4382 ec2->next = ec0;
4383 ec0->next = ec1;
4384 ec1->next = tmp;
4386 else
4387 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4388 /* it's second in the list. */
4389 tl_assert(*headp == ec1);
4390 tl_assert(ec1->next == ec0);
4391 ec1->next = ec0->next;
4392 ec0->next = ec1;
4393 *headp = ec0;
4398 /* Find the given RCEC in the tree, and return a pointer to it. Or,
4399 if not present, add the given one to the tree (by making a copy of
4400 it, so the caller can immediately deallocate the original) and
4401 return a pointer to the copy. The caller can safely have 'example'
4402 on its stack, since we will always return a pointer to a copy of
4403 it, not to the original. Note that the inserted node will have .rc
4404 of zero and so the caller must immediately increment it. */
4405 __attribute__((noinline))
4406 static RCEC* ctxt__find_or_add ( RCEC* example )
4408 UWord hent;
4409 RCEC* copy;
4411 if (CHECK_CEM) {
4412 /* Note that the single caller of ctxt__find_or_add always provides
4413 &thr->cached_rcec as argument. The sanity of thr->cached_rcec is always
4414 checked with a thread terminates. */
4415 tl_assert(example && example->magic == RCEC_MAGIC);
4416 tl_assert(example->rc == 0);
4419 /* Search the hash table to see if we already have it. */
4420 stats__ctxt_tab_qs++;
4421 hent = example->frames_hash % N_RCEC_TAB;
4422 copy = contextTab[hent];
4423 while (1) {
4424 if (!copy) break;
4425 if (CHECK_CEM)
4426 tl_assert(copy->magic == RCEC_MAGIC);
4427 stats__ctxt_tab_cmps++;
4428 if (!RCEC__differs_by_frames(copy, example)) break;
4429 copy = copy->next;
4432 if (copy) {
4433 tl_assert(copy != example);
4434 /* optimisation: if it's not at the head of its list, move 1
4435 step fwds, to make future searches cheaper */
4436 if (copy != contextTab[hent]) {
4437 move_RCEC_one_step_forward( &contextTab[hent], copy );
4439 } else {
4440 copy = alloc_RCEC();
4441 tl_assert(copy != example);
4442 *copy = *example;
4443 for (Word i = 0; i < HG_(clo_history_backtrace_size); i++)
4444 copy->frames[i] = example->frames[i];
4445 copy->next = contextTab[hent];
4446 contextTab[hent] = copy;
4447 stats__ctxt_tab_curr++;
4448 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4449 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4451 return copy;
4454 static inline UWord ROLW ( UWord w, Int n )
4456 Int bpw = 8 * sizeof(UWord);
4457 w = (w << n) | (w >> (bpw-n));
4458 return w;
4461 static UWord stats__cached_rcec_identical = 0;
4462 static UWord stats__cached_rcec_updated = 0;
4463 static UWord stats__cached_rcec_fresh = 0;
4464 static UWord stats__cached_rcec_diff = 0;
4465 static UWord stats__cached_rcec_diff_known_reason = 0;
4467 /* Check if the cached rcec in thr corresponds to the current
4468 stacktrace of the thread. Returns True if ok, False otherwise.
4469 This is just used for debugging the cached rcec logic, activated
4470 using --hg-sanity-flags=xx1xxx i.e. SCE_ACCESS flag.
4471 When this flag is activated, a call to this function will happen each time
4472 a stack trace is needed for a memory access. */
4473 __attribute__((noinline))
4474 static Bool check_cached_rcec_ok (Thr* thr, Addr previous_frame0)
4476 Bool ok = True;
4477 UInt i;
4478 UWord frames[HG_(clo_history_backtrace_size)];
4479 UWord sps[HG_(clo_history_backtrace_size)];
4480 UWord fps[HG_(clo_history_backtrace_size)];
4481 const DiEpoch cur_ep = VG_(current_DiEpoch)();
4483 for (i = 0; i < HG_(clo_history_backtrace_size); i++)
4484 frames[i] = sps[i] = fps[i] = 0;
4485 VG_(get_StackTrace)( thr->hgthread->coretid, &frames[0],
4486 HG_(clo_history_backtrace_size),
4487 &sps[0], &fps[0], 0);
4488 for (i = 0; i < HG_(clo_history_backtrace_size); i++) {
4489 if ( thr->cached_rcec.frames[i] != frames[i] ) {
4490 /* There are a bunch of "normal" reasons for which a stack
4491 derived from the cached rcec differs from frames. */
4492 const HChar *reason = NULL;
4494 /* Old linkers (e.g. RHEL5) gave no cfi unwind information in the PLT
4495 section (fix was added in binutils around June 2011).
4496 Without PLT unwind info, stacktrace in the PLT section are
4497 missing an entry. E.g. the cached stacktrace is:
4498 ==4463== at 0x2035C0: ___tls_get_addr (dl-tls.c:753)
4499 ==4463== by 0x33B7F9: __libc_thread_freeres
4500 (in /lib/libc-2.11.2.so)
4501 ==4463== by 0x39BA4F: start_thread (pthread_create.c:307)
4502 ==4463== by 0x2F107D: clone (clone.S:130)
4503 while the 'check stacktrace' is
4504 ==4463== at 0x2035C0: ___tls_get_addr (dl-tls.c:753)
4505 ==4463== by 0x33B82D: strerror_thread_freeres
4506 (in /lib/libc-2.11.2.so)
4507 ==4463== by 0x33B7F9: __libc_thread_freeres
4508 (in /lib/libc-2.11.2.so)
4509 ==4463== by 0x39BA4F: start_thread (pthread_create.c:307)
4510 ==4463== by 0x2F107D: clone (clone.S:130)
4511 No cheap/easy way to detect or fix that. */
4513 /* It seems that sometimes, the CFI unwind info looks wrong
4514 for a 'ret' instruction. E.g. here is the unwind info
4515 for a 'retq' on gcc20 (amd64, Debian 7)
4516 [0x4e3ddfe .. 0x4e3ddfe]: let cfa=oldSP+48 in RA=*(cfa+-8)
4517 SP=cfa+0 BP=*(cfa+-24)
4518 This unwind info looks doubtful, as the RA should be at oldSP.
4519 No easy way to detect this problem.
4520 This gives a difference between cached rcec and
4521 current stack trace: the cached rcec is correct. */
4523 /* When returning from main, unwind info becomes erratic.
4524 So, by default, only report errors for main and above,
4525 unless asked to show below main. */
4526 if (reason == NULL) {
4527 UInt fr_main;
4528 Vg_FnNameKind fr_kind = Vg_FnNameNormal;
4529 for (fr_main = 0;
4530 fr_main < HG_(clo_history_backtrace_size);
4531 fr_main++) {
4532 fr_kind = VG_(get_fnname_kind_from_IP)
4533 (cur_ep, frames[fr_main]);
4534 if (fr_kind == Vg_FnNameMain || fr_kind == Vg_FnNameBelowMain)
4535 break;
4537 UInt kh_main;
4538 Vg_FnNameKind kh_kind = Vg_FnNameNormal;
4539 for (kh_main = 0;
4540 kh_main < HG_(clo_history_backtrace_size);
4541 kh_main++) {
4542 kh_kind = VG_(get_fnname_kind_from_IP)
4543 (cur_ep, thr->cached_rcec.frames[kh_main]);
4544 if (kh_kind == Vg_FnNameMain || kh_kind == Vg_FnNameBelowMain)
4545 break;
4547 if (kh_main == fr_main
4548 && kh_kind == fr_kind
4549 && (kh_main < i || (kh_main == i
4550 && kh_kind == Vg_FnNameBelowMain))) {
4551 // found main or below main before the difference
4552 reason = "Below main";
4556 /* We have places where the stack is missing some internal
4557 pthread functions. For such stacktraces, GDB reports only
4558 one function, telling:
4559 #0 0xf7fa81fe in _L_unlock_669 ()
4560 from /lib/i386-linux-gnu/libpthread.so.0
4561 Backtrace stopped: previous frame identical to
4562 this frame (corrupt stack?)
4564 This is when sps and fps are identical.
4565 The cached stack trace is then
4566 ==3336== at 0x40641FE: _L_unlock_669
4567 (pthread_mutex_unlock.c:310)
4568 ==3336== by 0x40302BE: pthread_mutex_unlock
4569 (hg_intercepts.c:710)
4570 ==3336== by 0x80486AF: main (cond_timedwait_test.c:14)
4571 while the 'check stacktrace' is
4572 ==3336== at 0x40641FE: _L_unlock_669
4573 (pthread_mutex_unlock.c:310)
4574 ==3336== by 0x4064206: _L_unlock_669
4575 (pthread_mutex_unlock.c:310)
4576 ==3336== by 0x4064132: __pthread_mutex_unlock_usercnt
4577 (pthread_mutex_unlock.c:57)
4578 ==3336== by 0x40302BE: pthread_mutex_unlock
4579 (hg_intercepts.c:710)
4580 ==3336== by 0x80486AF: main (cond_timedwait_test.c:14) */
4581 if (reason == NULL) {
4582 if ((i > 0
4583 && sps[i] == sps[i-1] && fps[i] == fps[i-1])
4584 || (i < HG_(clo_history_backtrace_size)-1
4585 && sps[i] == sps[i+1] && fps[i] == fps[i+1])) {
4586 reason = "previous||next frame: identical sp and fp";
4589 if (reason == NULL) {
4590 if ((i > 0
4591 && fps[i] == fps[i-1])
4592 || (i < HG_(clo_history_backtrace_size)-1
4593 && fps[i] == fps[i+1])) {
4594 reason = "previous||next frame: identical fp";
4598 /* When we have a read or write 'in the middle of a push instruction',
4599 then the normal backtrace is not very good, while the helgrind
4600 stacktrace is better, as it undoes the not yet fully finished
4601 push instruction before getting the stacktrace. */
4602 if (reason == NULL && thr->hgthread->first_sp_delta != 0) {
4603 reason = "fixupSP probably needed for check stacktrace";
4606 /* Unwinding becomes hectic when running the exit handlers.
4607 None of GDB, cached stacktrace and check stacktrace corresponds.
4608 So, if we find __run_exit_handlers, ignore the difference. */
4609 if (reason == NULL) {
4610 const HChar *fnname;
4611 for (UInt f = 0; f < HG_(clo_history_backtrace_size); f++) {
4612 if (VG_(get_fnname)( cur_ep, frames[f], &fnname)
4613 && VG_(strcmp) ("__run_exit_handlers", fnname) == 0) {
4614 reason = "exit handlers";
4615 break;
4620 // Show what we have found for this difference
4621 if (reason == NULL) {
4622 ok = False;
4623 stats__cached_rcec_diff++;
4624 } else {
4625 ok = True;
4626 stats__cached_rcec_diff_known_reason++;
4628 if (!ok || VG_(clo_verbosity) > 2) {
4629 Bool save_show_below_main = VG_(clo_show_below_main);
4630 VG_(clo_show_below_main) = True;
4631 /* The below error msg reports an unexpected diff in 'frame %d'.
4632 The (maybe wrong) pc found in the cached stacktrace is
4633 'cached_pc %p' while an unwind gives the (maybe wrong)
4634 'check_pc %p'.
4635 After, 'previous_frame0 %p' tells where the cached stacktrace
4636 was taken.
4637 This is then followed by the full resulting cache stack trace
4638 and the full stack trace found doing unwind.
4639 Such a diff can have various origins:
4640 * a bug in the unwinder, when the cached stack trace was taken
4641 at 'previous_frame0'
4642 * a bug in the unwinder, when the check stack trace was taken
4643 (i.e. at current pc).
4644 * a missing 'invalidate cache stack trace' somewhere in the
4645 instructions between 'previous_frame0' and current_pc.
4646 To investigate the last case, typically, disass the range of
4647 instructions where an invalidate cached stack might miss. */
4648 VG_(printf)("%s diff tid %u frame %u "
4649 "cached_pc %p check_pc %p\n",
4650 reason ? reason : "unexpected",
4651 thr->hgthread->coretid,
4653 (void*)thr->cached_rcec.frames[i],
4654 (void*)frames[i]);
4655 VG_(printf)("cached stack trace previous_frame0 %p\n",
4656 (void*)previous_frame0);
4657 VG_(pp_StackTrace)(cur_ep, &previous_frame0, 1);
4658 VG_(printf)("resulting cached stack trace:\n");
4659 VG_(pp_StackTrace)(cur_ep, thr->cached_rcec.frames,
4660 HG_(clo_history_backtrace_size));
4661 VG_(printf)("check stack trace:\n");
4662 VG_(pp_StackTrace)(cur_ep, frames, HG_(clo_history_backtrace_size));
4664 VG_(show_sched_status) (False, // host_stacktrace
4665 False, // stack_usage
4666 False); // exited_threads
4667 if (VG_(clo_vgdb_error) == 1234567890) // HACK TO ALLOW TO DEBUG
4668 VG_(gdbserver) ( thr->hgthread->coretid );
4669 VG_(clo_show_below_main) = save_show_below_main;
4671 break; // Stop giving more errors for this stacktrace.
4674 return ok;
4677 __attribute__((noinline))
4678 static RCEC* get_RCEC ( Thr* thr )
4680 UInt i;
4681 UWord hash;
4682 Addr previous_frame0 = 0; // Assignment needed to silence gcc
4683 RCEC *res;
4684 const Bool thr_cached_rcec_valid = cached_rcec_valid(thr);
4685 const Addr cur_ip = VG_(get_IP)(thr->hgthread->coretid);
4687 if (DEBUG_CACHED_RCEC)
4688 VG_(printf)("get rcec tid %u at IP %p SP %p"
4689 " first_sp_delta %ld cached valid %d\n",
4690 thr->hgthread->coretid,
4691 (void*)cur_ip,
4692 (void*)VG_(get_SP)(thr->hgthread->coretid),
4693 thr->hgthread->first_sp_delta, thr_cached_rcec_valid);
4695 /* If we have a valid cached rcec, derive the new rcec from the cached one
4696 and update the cached one.
4697 Otherwise, compute a fresh rcec. */
4699 if (thr_cached_rcec_valid) {
4700 /* Update the stacktrace of the cached rcec with the current IP */
4701 previous_frame0 = thr->cached_rcec.frames[0];
4702 thr->cached_rcec.frames[0] = cur_ip;
4704 # if defined(VGP_x86_linux)
4705 // See m_stacktrace.c kludge
4706 extern Addr VG_(client__dl_sysinfo_int80);
4707 /// #include pub_core_clientstate needed for the above ????
4708 /// or move the above into a pub_tool_??? tool_stacktrace.h maybe ????
4709 if (VG_(client__dl_sysinfo_int80) != 0 /* we know its address */
4710 && cur_ip >= VG_(client__dl_sysinfo_int80)
4711 && cur_ip < VG_(client__dl_sysinfo_int80)+3
4713 thr->cached_rcec.frames[0]
4714 = (ULong) *(Addr*)(UWord)VG_(get_SP)(thr->hgthread->coretid);
4716 # endif
4718 if (previous_frame0 == thr->cached_rcec.frames[0])
4719 stats__cached_rcec_identical++;
4720 else
4721 stats__cached_rcec_updated++;
4722 } else {
4723 /* Compute a fresh stacktrace. */
4724 main_get_stacktrace( thr, &thr->cached_rcec.frames[0],
4725 HG_(clo_history_backtrace_size) );
4726 if (DEBUG_CACHED_RCEC) {
4727 Bool save_show_below_main = VG_(clo_show_below_main);
4728 VG_(clo_show_below_main) = True;
4729 VG_(printf)("caching stack trace:\n");
4730 VG_(pp_StackTrace)(VG_(current_DiEpoch)(),
4731 &thr->cached_rcec.frames[0],
4732 HG_(clo_history_backtrace_size));
4733 VG_(clo_show_below_main) = save_show_below_main;
4735 stats__cached_rcec_fresh++;
4738 hash = 0;
4739 for (i = 0; i < HG_(clo_history_backtrace_size); i++) {
4740 hash ^= thr->cached_rcec.frames[i];
4741 hash = ROLW(hash, 19);
4743 thr->cached_rcec.frames_hash = hash;
4744 res = ctxt__find_or_add( &thr->cached_rcec );
4746 if (UNLIKELY(HG_(clo_sanity_flags) & SCE_ACCESS)
4747 && thr_cached_rcec_valid) {
4748 /* In case the cached and check differ, invalidate the cached rcec.
4749 We have less duplicated diffs reported afterwards. */
4750 if (!check_cached_rcec_ok (thr, previous_frame0))
4751 set_cached_rcec_validity(thr, False);
4752 } else {
4753 if (HG_(clo_delta_stacktrace) && !thr_cached_rcec_valid)
4754 set_cached_rcec_validity(thr, True);
4757 return res;
4760 ///////////////////////////////////////////////////////
4761 //// Part (2):
4762 /// A hashtable guest-addr -> OldRef, that refers to (1)
4763 /// Note: we use the guest address as key. This means that the entries
4764 /// for multiple threads accessing the same address will land in the same
4765 /// bucket. It might be nice to have a better distribution of the
4766 /// OldRef in the hashtable by using ask key the guestaddress ^ tsw.
4767 /// The problem is that when a race is reported on a ga, we need to retrieve
4768 /// efficiently the accesses to ga by other threads, only using the ga.
4769 /// Measurements on firefox have shown that the chain length is reasonable.
4771 /* Records an access: a thread, a context (size & writeness) and the
4772 number of held locks. The size (1,2,4,8) is stored as is in szB.
4773 Note that szB uses more bits than needed to store a size up to 8.
4774 This allows to use a TSW as a fully initialised UInt e.g. in
4775 cmp_oldref_tsw. If needed, a more compact representation of szB
4776 can be done (e.g. use only 4 bits, or use only 2 bits and encode the
4777 size (1,2,4,8) as 00 = 1, 01 = 2, 10 = 4, 11 = 8. */
4778 typedef
4779 struct {
4780 UInt thrid : SCALARTS_N_THRBITS;
4781 UInt szB : 32 - SCALARTS_N_THRBITS - 1;
4782 UInt isW : 1;
4783 } TSW; // Thread+Size+Writeness
4784 typedef
4785 struct {
4786 TSW tsw;
4787 WordSetID locksHeldW;
4788 RCEC* rcec;
4790 Thr_n_RCEC;
4792 typedef
4793 struct OldRef {
4794 struct OldRef *ht_next; // to link hash table nodes together.
4795 UWord ga; // hash_table key, == address for which we record an access.
4796 struct OldRef *prev; // to refs older than this one
4797 struct OldRef *next; // to refs newer that this one
4798 UWord stamp; // allows to order (by time of access) 2 OldRef
4799 Thr_n_RCEC acc;
4801 OldRef;
4803 /* Returns the or->tsw as an UInt */
4804 static inline UInt oldref_tsw (const OldRef* or)
4806 return *(const UInt*)(&or->acc.tsw);
4809 /* Compare the tsw component for 2 OldRef.
4810 Used for OldRef hashtable (which already verifies equality of the
4811 'key' part. */
4812 static Word cmp_oldref_tsw (const void* node1, const void* node2 )
4814 const UInt tsw1 = oldref_tsw(node1);
4815 const UInt tsw2 = oldref_tsw(node2);
4817 if (tsw1 < tsw2) return -1;
4818 if (tsw1 > tsw2) return 1;
4819 return 0;
4823 //////////// BEGIN OldRef pool allocator
4824 static PoolAlloc* oldref_pool_allocator;
4825 // Note: We only allocate elements in this pool allocator, we never free them.
4826 // We stop allocating elements at VG_(clo_conflict_cache_size).
4827 //////////// END OldRef pool allocator
4829 static OldRef mru;
4830 static OldRef lru;
4831 // A double linked list, chaining all OldREf in a mru/lru order.
4832 // mru/lru are sentinel nodes.
4833 // Whenever an oldref is re-used, its position is changed as the most recently
4834 // used (i.e. pointed to by mru.prev).
4835 // When a new oldref is needed, it is allocated from the pool
4836 // if we have not yet reached --conflict-cache-size.
4837 // Otherwise, if all oldref have already been allocated,
4838 // the least recently used (i.e. pointed to by lru.next) is re-used.
4839 // When an OldRef is used, it is moved as the most recently used entry
4840 // (i.e. pointed to by mru.prev).
4842 // Removes r from the double linked list
4843 // Note: we do not need to test for special cases such as
4844 // NULL next or prev pointers, because we have sentinel nodes
4845 // at both sides of the list. So, a node is always forward and
4846 // backward linked.
4847 static inline void OldRef_unchain(OldRef *r)
4849 r->next->prev = r->prev;
4850 r->prev->next = r->next;
4853 // Insert new as the newest OldRef
4854 // Similarly to OldRef_unchain, no need to test for NULL
4855 // pointers, as e.g. mru.prev is always guaranteed to point
4856 // to a non NULL node (lru when the list is empty).
4857 static inline void OldRef_newest(OldRef *new)
4859 new->next = &mru;
4860 new->prev = mru.prev;
4861 mru.prev = new;
4862 new->prev->next = new;
4866 static VgHashTable* oldrefHT = NULL; /* Hash table* OldRef* */
4867 static UWord oldrefHTN = 0; /* # elems in oldrefHT */
4868 /* Note: the nr of ref in the oldrefHT will always be equal to
4869 the nr of elements that were allocated from the OldRef pool allocator
4870 as we never free an OldRef : we just re-use them. */
4873 /* allocates a new OldRef or re-use the lru one if all allowed OldRef
4874 have already been allocated. */
4875 static OldRef* alloc_or_reuse_OldRef ( void )
4877 if (oldrefHTN < HG_(clo_conflict_cache_size)) {
4878 oldrefHTN++;
4879 return VG_(allocEltPA) ( oldref_pool_allocator );
4880 } else {
4881 OldRef *oldref_ht;
4882 OldRef *oldref = lru.next;
4884 OldRef_unchain(oldref);
4885 oldref_ht = VG_(HT_gen_remove) (oldrefHT, oldref, cmp_oldref_tsw);
4886 tl_assert (oldref == oldref_ht);
4887 ctxt__rcdec( oldref->acc.rcec );
4888 return oldref;
4893 inline static UInt min_UInt ( UInt a, UInt b ) {
4894 return a < b ? a : b;
4897 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4898 first interval is lower, 1 if the first interval is higher, and 0
4899 if there is any overlap. Redundant paranoia with casting is there
4900 following what looked distinctly like a bug in gcc-4.1.2, in which
4901 some of the comparisons were done signedly instead of
4902 unsignedly. */
4903 /* Copied from exp-ptrcheck/sg_main.c */
4904 static inline Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4905 Addr a2, SizeT n2 ) {
4906 UWord a1w = (UWord)a1;
4907 UWord n1w = (UWord)n1;
4908 UWord a2w = (UWord)a2;
4909 UWord n2w = (UWord)n2;
4910 tl_assert(n1w > 0 && n2w > 0);
4911 if (a1w + n1w <= a2w) return -1L;
4912 if (a2w + n2w <= a1w) return 1L;
4913 return 0;
4916 static UWord event_map_stamp = 0; // Used to stamp each OldRef when touched.
4918 static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
4920 OldRef example;
4921 OldRef* ref;
4922 RCEC* rcec;
4924 tl_assert(thr);
4925 ThrID thrid = thr->thrid;
4926 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4928 WordSetID locksHeldW = thr->hgthread->locksetW;
4930 rcec = get_RCEC( thr );
4932 /* Look in the oldrefHT to see if we already have a record for this
4933 address/thr/sz/isW. */
4934 example.ga = a;
4935 example.acc.tsw = (TSW) {.thrid = thrid,
4936 .szB = szB,
4937 .isW = (UInt)(isW & 1)};
4938 ref = VG_(HT_gen_lookup) (oldrefHT, &example, cmp_oldref_tsw);
4940 if (ref) {
4941 /* We already have a record for this address and this (thrid, R/W,
4942 size) triple. */
4943 tl_assert (ref->ga == a);
4945 /* thread 'thr' has an entry. Update its RCEC, if it differs. */
4946 if (rcec == ref->acc.rcec)
4947 stats__ctxt_eq_tsw_eq_rcec++;
4948 else {
4949 stats__ctxt_eq_tsw_neq_rcec++;
4950 ctxt__rcdec( ref->acc.rcec );
4951 ctxt__rcinc(rcec);
4952 ref->acc.rcec = rcec;
4954 tl_assert(ref->acc.tsw.thrid == thrid);
4955 /* Update the stamp, RCEC and the W-held lockset. */
4956 ref->stamp = event_map_stamp;
4957 ref->acc.locksHeldW = locksHeldW;
4959 OldRef_unchain(ref);
4960 OldRef_newest(ref);
4962 } else {
4963 tl_assert (szB == 4 || szB == 8 ||szB == 1 || szB == 2);
4964 // We only need to check the size the first time we insert a ref.
4965 // Check for most frequent cases first
4966 // Note: we could support a szB up to 1 << (32 - SCALARTS_N_THRBITS - 1)
4968 /* We don't have a record for this address+triple. Create a new one. */
4969 stats__ctxt_neq_tsw_neq_rcec++;
4970 ref = alloc_or_reuse_OldRef();
4971 ref->ga = a;
4972 ref->acc.tsw = (TSW) {.thrid = thrid,
4973 .szB = szB,
4974 .isW = (UInt)(isW & 1)};
4975 ref->stamp = event_map_stamp;
4976 ref->acc.locksHeldW = locksHeldW;
4977 ref->acc.rcec = rcec;
4978 ctxt__rcinc(rcec);
4980 VG_(HT_add_node) ( oldrefHT, ref );
4981 OldRef_newest (ref);
4983 event_map_stamp++;
4987 /* Extract info from the conflicting-access machinery.
4988 Returns the most recent conflicting access with thr/[a, a+szB[/isW. */
4989 Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
4990 /*OUT*/Thr** resThr,
4991 /*OUT*/SizeT* resSzB,
4992 /*OUT*/Bool* resIsW,
4993 /*OUT*/WordSetID* locksHeldW,
4994 Thr* thr, Addr a, SizeT szB, Bool isW )
4996 Word i, j;
4997 OldRef *ref = NULL;
4998 SizeT ref_szB = 0;
5000 OldRef *cand_ref;
5001 SizeT cand_ref_szB;
5002 Addr cand_a;
5004 Addr toCheck[15];
5005 Int nToCheck = 0;
5007 tl_assert(thr);
5008 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
5010 ThrID thrid = thr->thrid;
5012 toCheck[nToCheck++] = a;
5013 for (i = -7; i < (Word)szB; i++) {
5014 if (i != 0)
5015 toCheck[nToCheck++] = a + i;
5017 tl_assert(nToCheck <= 15);
5019 /* Now see if we can find a suitable matching event for
5020 any of the addresses in toCheck[0 .. nToCheck-1]. */
5021 for (j = 0; j < nToCheck; j++) {
5023 cand_a = toCheck[j];
5024 // VG_(printf)("test %ld %p\n", j, cand_a);
5026 /* Find the first HT element for this address.
5027 We might have several of these. They will be linked via ht_next.
5028 We however need to check various elements as the list contains
5029 all elements that map to the same bucket. */
5030 for (cand_ref = VG_(HT_lookup)( oldrefHT, cand_a );
5031 cand_ref; cand_ref = cand_ref->ht_next) {
5032 if (cand_ref->ga != cand_a)
5033 /* OldRef for another address in this HT bucket. Ignore. */
5034 continue;
5036 if (cand_ref->acc.tsw.thrid == thrid)
5037 /* This is an access by the same thread, but we're only
5038 interested in accesses from other threads. Ignore. */
5039 continue;
5041 if ((!cand_ref->acc.tsw.isW) && (!isW))
5042 /* We don't want to report a read racing against another
5043 read; that's stupid. So in this case move on. */
5044 continue;
5046 cand_ref_szB = cand_ref->acc.tsw.szB;
5047 if (cmp_nonempty_intervals(a, szB, cand_a, cand_ref_szB) != 0)
5048 /* No overlap with the access we're asking about. Ignore. */
5049 continue;
5051 /* We have a match. Keep this match if it is newer than
5052 the previous match. Note that stamp are Unsigned Words, and
5053 for long running applications, event_map_stamp might have cycled.
5054 So, 'roll' each stamp using event_map_stamp to have the
5055 stamps in the good order, in case event_map_stamp recycled. */
5056 if (!ref
5057 || (ref->stamp - event_map_stamp)
5058 < (cand_ref->stamp - event_map_stamp)) {
5059 ref = cand_ref;
5060 ref_szB = cand_ref_szB;
5064 if (ref) {
5065 /* return with success */
5066 Int n, maxNFrames;
5067 RCEC* ref_rcec = ref->acc.rcec;
5068 tl_assert(ref->acc.tsw.thrid);
5069 tl_assert(ref_rcec);
5070 tl_assert(ref_rcec->magic == RCEC_MAGIC);
5071 tl_assert(ref_szB >= 1);
5072 /* Count how many non-zero frames we have. */
5073 maxNFrames = min_UInt(HG_(clo_history_backtrace_size),
5074 VG_(clo_backtrace_size));
5075 for (n = 0; n < maxNFrames; n++) {
5076 if (0 == ref_rcec->frames[n]) break;
5078 *resEC = VG_(make_ExeContext_from_StackTrace)(&ref_rcec->frames[0],
5080 *resThr = Thr__from_ThrID(ref->acc.tsw.thrid);
5081 *resSzB = ref_szB;
5082 *resIsW = ref->acc.tsw.isW;
5083 *locksHeldW = ref->acc.locksHeldW;
5084 stats__evm__lookup_found++;
5085 return True;
5088 /* consider next address in toCheck[] */
5089 } /* for (j = 0; j < nToCheck; j++) */
5091 /* really didn't find anything. */
5092 stats__evm__lookup_notfound++;
5093 return False;
5097 void libhb_event_map_access_history ( Addr a, SizeT szB, Access_t fn )
5099 OldRef *ref = lru.next;
5100 SizeT ref_szB;
5101 Int n;
5103 while (ref != &mru) {
5104 ref_szB = ref->acc.tsw.szB;
5105 if (cmp_nonempty_intervals(a, szB, ref->ga, ref_szB) == 0) {
5106 RCEC* ref_rcec = ref->acc.rcec;
5107 for (n = 0; n < HG_(clo_history_backtrace_size); n++) {
5108 if (0 == ref_rcec->frames[n]) {
5109 break;
5112 (*fn)(&ref_rcec->frames[0], n,
5113 Thr__from_ThrID(ref->acc.tsw.thrid),
5114 ref->ga,
5115 ref_szB,
5116 ref->acc.tsw.isW,
5117 ref->acc.locksHeldW);
5119 tl_assert (ref->next == &mru
5120 || ((ref->stamp - event_map_stamp)
5121 < ref->next->stamp - event_map_stamp));
5122 ref = ref->next;
5126 static void event_map_init ( void )
5128 Word i;
5130 /* Context (RCEC) pool allocator */
5131 rcec_pool_allocator
5132 = VG_(newPA) (
5133 sizeof(RCEC) + 2 * HG_(clo_history_backtrace_size) * sizeof(UWord),
5134 1000 /* RCECs per pool */,
5135 HG_(zalloc),
5136 "libhb.event_map_init.1 (RCEC pools)",
5137 HG_(free)
5140 /* Context table */
5141 tl_assert(!contextTab);
5142 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
5143 N_RCEC_TAB * sizeof(RCEC*) );
5144 for (i = 0; i < N_RCEC_TAB; i++)
5145 contextTab[i] = NULL;
5147 /* Oldref pool allocator */
5148 oldref_pool_allocator = VG_(newPA)(
5149 sizeof(OldRef),
5150 1000 /* OldRefs per pool */,
5151 HG_(zalloc),
5152 "libhb.event_map_init.3 (OldRef pools)",
5153 HG_(free)
5156 /* Oldref hashtable */
5157 tl_assert(!oldrefHT);
5158 oldrefHT = VG_(HT_construct) ("libhb.event_map_init.4 (oldref hashtable)");
5160 oldrefHTN = 0;
5161 mru.prev = &lru;
5162 mru.next = NULL;
5163 lru.prev = NULL;
5164 lru.next = &mru;
5165 mru.acc = (Thr_n_RCEC) {.tsw = {.thrid = 0,
5166 .szB = 0,
5167 .isW = 0},
5168 .locksHeldW = 0,
5169 .rcec = NULL};
5170 lru.acc = mru.acc;
5173 static void event_map__check_reference_counts ( void )
5175 RCEC* rcec;
5176 OldRef* oldref;
5177 Word i;
5178 UWord nEnts = 0;
5180 /* Set the 'check' reference counts to zero. Also, optionally
5181 check that the real reference counts are non-zero. We allow
5182 these to fall to zero before a GC, but the GC must get rid of
5183 all those that are zero, hence none should be zero after a
5184 GC. */
5185 for (i = 0; i < N_RCEC_TAB; i++) {
5186 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
5187 nEnts++;
5188 tl_assert(rcec);
5189 tl_assert(rcec->magic == RCEC_MAGIC);
5190 rcec->rcX = 0;
5194 /* check that the stats are sane */
5195 tl_assert(nEnts == stats__ctxt_tab_curr);
5196 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
5198 /* visit all the referencing points, inc check ref counts */
5199 VG_(HT_ResetIter)( oldrefHT );
5200 oldref = VG_(HT_Next)( oldrefHT );
5201 while (oldref) {
5202 tl_assert (oldref->acc.tsw.thrid);
5203 tl_assert (oldref->acc.rcec);
5204 tl_assert (oldref->acc.rcec->magic == RCEC_MAGIC);
5205 oldref->acc.rcec->rcX++;
5206 oldref = VG_(HT_Next)( oldrefHT );
5209 /* compare check ref counts with actual */
5210 for (i = 0; i < N_RCEC_TAB; i++) {
5211 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
5212 tl_assert(rcec->rc == rcec->rcX);
5217 __attribute__((noinline))
5218 static void do_RCEC_GC ( void )
5220 UInt i;
5222 if (VG_(clo_stats)) {
5223 static UInt ctr = 1;
5224 VG_(message)(Vg_DebugMsg,
5225 "libhb: RCEC GC: #%u %lu slots,"
5226 " %lu cur ents(ref'd %lu),"
5227 " %lu max ents\n",
5228 ctr++,
5229 (UWord)N_RCEC_TAB,
5230 stats__ctxt_tab_curr, RCEC_referenced,
5231 stats__ctxt_tab_max );
5233 tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
5235 /* Throw away all RCECs with zero reference counts */
5236 for (i = 0; i < N_RCEC_TAB; i++) {
5237 RCEC** pp = &contextTab[i];
5238 RCEC* p = *pp;
5239 while (p) {
5240 if (p->rc == 0) {
5241 *pp = p->next;
5242 free_RCEC(p);
5243 p = *pp;
5244 tl_assert(stats__ctxt_tab_curr > 0);
5245 stats__ctxt_rcec_gc_discards++;
5246 stats__ctxt_tab_curr--;
5247 } else {
5248 pp = &p->next;
5249 p = p->next;
5254 tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
5257 /////////////////////////////////////////////////////////
5258 // //
5259 // Core MSM //
5260 // //
5261 /////////////////////////////////////////////////////////
5263 /* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
5264 Nov 08, and again after [...],
5265 June 09. */
5267 static ULong stats__msmcread = 0;
5268 static ULong stats__msmcread_change = 0;
5269 static ULong stats__msmcwrite = 0;
5270 static ULong stats__msmcwrite_change = 0;
5272 /* Some notes on the H1 history mechanism:
5274 Transition rules are:
5276 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
5277 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
5279 After any access by a thread T to a location L, L's constraint pair
5280 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
5282 After a race by thread T conflicting with some previous access by
5283 some other thread U, for a location with constraint (before
5284 processing the later access) (Cr,Cw), then Cw[U] is the segment in
5285 which the previously access lies.
5287 Hence in record_race_info, we pass in Cfailed and Kfailed, which
5288 are compared so as to find out which thread(s) this access
5289 conflicts with. Once that is established, we also require the
5290 pre-update Cw for the location, so we can index into it for those
5291 threads, to get the scalar clock values for the point at which the
5292 former accesses were made. (In fact we only bother to do any of
5293 this for an arbitrarily chosen one of the conflicting threads, as
5294 that's simpler, it avoids flooding the user with vast amounts of
5295 mostly useless information, and because the program is wrong if it
5296 contains any races at all -- so we don't really need to show all
5297 conflicting access pairs initially, so long as we only show none if
5298 none exist).
5302 That requires the auxiliary proof that
5304 (Cr `join` Kw)[T] == Kw[T]
5306 Why should that be true? Because for any thread T, Kw[T] >= the
5307 scalar clock value for T known by any other thread. In other
5308 words, because T's value for its own scalar clock is at least as up
5309 to date as the value for it known by any other thread (that is true
5310 for both the R- and W- scalar clocks). Hence no other thread will
5311 be able to feed in a value for that element (indirectly via a
5312 constraint) which will exceed Kw[T], and hence the join cannot
5313 cause that particular element to advance.
5316 __attribute__((noinline))
5317 static void record_race_info ( Thr* acc_thr,
5318 Addr acc_addr, SizeT szB, Bool isWrite,
5319 VtsID Cfailed,
5320 VtsID Kfailed,
5321 VtsID Cw )
5323 /* Call here to report a race. We just hand it onwards to
5324 HG_(record_error_Race). If that in turn discovers that the
5325 error is going to be collected, then, at history_level 2, that
5326 queries the conflicting-event map. The alternative would be to
5327 query it right here. But that causes a lot of pointless queries
5328 for errors which will shortly be discarded as duplicates, and
5329 can become a performance overhead; so we defer the query until
5330 we know the error is not a duplicate. */
5332 /* Stacks for the bounds of the (or one of the) conflicting
5333 segment(s). These are only set at history_level 1. */
5334 ExeContext* hist1_seg_start = NULL;
5335 ExeContext* hist1_seg_end = NULL;
5336 Thread* hist1_conf_thr = NULL;
5338 tl_assert(acc_thr);
5339 tl_assert(acc_thr->hgthread);
5340 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
5341 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
5343 if (HG_(clo_history_level) == 1) {
5344 Bool found;
5345 Word firstIx, lastIx;
5346 ULong_n_EC key;
5348 /* At history_level 1, we must round up the relevant stack-pair
5349 for the conflicting segment right now. This is because
5350 deferring it is complex; we can't (easily) put Kfailed and
5351 Cfailed into the XError and wait for later without
5352 getting tied up in difficulties with VtsID reference
5353 counting. So just do it now. */
5354 Thr* confThr;
5355 ULong confTym = 0;
5356 /* Which thread are we in conflict with? There may be more than
5357 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
5358 (in fact it's the one with the lowest Thr* value). */
5359 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
5360 /* This must exist! since if it was NULL then there's no
5361 conflict (semantics of return value of
5362 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
5363 called us, just checked exactly this -- that there was in
5364 fact a race. */
5365 tl_assert(confThr);
5367 /* Get the scalar clock value that the conflicting thread
5368 introduced into the constraint. A careful examination of the
5369 base machine rules shows that this must be the same as the
5370 conflicting thread's scalar clock when it created this
5371 constraint. Hence we know the scalar clock of the
5372 conflicting thread when the conflicting access was made. */
5373 confTym = VtsID__indexAt( Cfailed, confThr );
5375 /* Using this scalar clock, index into the conflicting thread's
5376 collection of stack traces made each time its vector clock
5377 (hence its scalar clock) changed. This gives the stack
5378 traces at the start and end of the conflicting segment (well,
5379 as per comment just above, of one of the conflicting
5380 segments, if there are more than one). */
5381 key.ull = confTym;
5382 key.ec = NULL;
5383 /* tl_assert(confThr); -- asserted just above */
5384 tl_assert(confThr->local_Kws_n_stacks);
5385 firstIx = lastIx = 0;
5386 found = VG_(lookupXA_UNSAFE)(
5387 confThr->local_Kws_n_stacks,
5388 &key, &firstIx, &lastIx,
5389 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
5391 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
5392 "confTym %llu found %d (%ld,%ld)\n",
5393 Cfailed, Kfailed, Cw,
5394 confThr, confTym, found, firstIx, lastIx);
5395 /* We can't indefinitely collect stack traces at VTS
5396 transitions, since we'd eventually run out of memory. Hence
5397 note_local_Kw_n_stack_for will eventually throw away old
5398 ones, which in turn means we might fail to find index value
5399 confTym in the array. */
5400 if (found) {
5401 ULong_n_EC *pair_start, *pair_end;
5402 pair_start
5403 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
5404 hist1_seg_start = pair_start->ec;
5405 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
5406 pair_end
5407 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
5408 lastIx+1 );
5409 /* from properties of VG_(lookupXA) and the comparison fn used: */
5410 tl_assert(pair_start->ull < pair_end->ull);
5411 hist1_seg_end = pair_end->ec;
5412 /* Could do a bit better here. It may be that pair_end
5413 doesn't have a stack, but the following entries in the
5414 array have the same scalar Kw and to have a stack. So
5415 we should search a bit further along the array than
5416 lastIx+1 if hist1_seg_end is NULL. */
5417 } else {
5418 if (!confThr->llexit_done)
5419 hist1_seg_end = main_get_EC( confThr );
5421 // seg_start could be NULL iff this is the first stack in the thread
5422 //if (seg_start) VG_(pp_ExeContext)(seg_start);
5423 //if (seg_end) VG_(pp_ExeContext)(seg_end);
5424 hist1_conf_thr = confThr->hgthread;
5428 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
5429 szB, isWrite,
5430 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
5433 static Bool is_sane_SVal_C ( SVal sv ) {
5434 Bool leq;
5435 if (!SVal__isC(sv)) return True;
5436 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
5437 return leq;
5441 /* Compute new state following a read */
5442 static inline SVal msmcread ( SVal svOld,
5443 /* The following are only needed for
5444 creating error reports. */
5445 Thr* acc_thr,
5446 Addr acc_addr, SizeT szB )
5448 SVal svNew = SVal_INVALID;
5449 stats__msmcread++;
5451 /* Redundant sanity check on the constraints */
5452 if (CHECK_MSM) {
5453 tl_assert(is_sane_SVal_C(svOld));
5456 if (LIKELY(SVal__isC(svOld))) {
5457 VtsID tviR = acc_thr->viR;
5458 VtsID tviW = acc_thr->viW;
5459 VtsID rmini = SVal__unC_Rmin(svOld);
5460 VtsID wmini = SVal__unC_Wmin(svOld);
5461 Bool leq = VtsID__cmpLEQ(rmini,tviR);
5462 if (LIKELY(leq)) {
5463 /* no race */
5464 /* Note: RWLOCK subtlety: use tviW, not tviR */
5465 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5466 goto out;
5467 } else {
5468 /* assert on sanity of constraints. */
5469 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5470 tl_assert(leqxx);
5471 // same as in non-race case
5472 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5473 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
5474 rmini, /* Cfailed */
5475 tviR, /* Kfailed */
5476 wmini /* Cw */ );
5477 goto out;
5480 if (SVal__isA(svOld)) {
5481 /* reading no-access memory (sigh); leave unchanged */
5482 /* check for no pollution */
5483 tl_assert(svOld == SVal_NOACCESS);
5484 svNew = SVal_NOACCESS;
5485 goto out;
5487 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
5488 tl_assert(0);
5490 out:
5491 if (CHECK_MSM) {
5492 tl_assert(is_sane_SVal_C(svNew));
5494 if (UNLIKELY(svNew != svOld)) {
5495 tl_assert(svNew != SVal_INVALID);
5496 if (HG_(clo_history_level) >= 2
5497 && SVal__isC(svOld) && SVal__isC(svNew)) {
5498 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
5499 stats__msmcread_change++;
5502 return svNew;
5506 /* Compute new state following a write */
5507 static inline SVal msmcwrite ( SVal svOld,
5508 /* The following are only needed for
5509 creating error reports. */
5510 Thr* acc_thr,
5511 Addr acc_addr, SizeT szB )
5513 SVal svNew = SVal_INVALID;
5514 stats__msmcwrite++;
5516 /* Redundant sanity check on the constraints */
5517 if (CHECK_MSM) {
5518 tl_assert(is_sane_SVal_C(svOld));
5521 if (LIKELY(SVal__isC(svOld))) {
5522 VtsID tviW = acc_thr->viW;
5523 VtsID wmini = SVal__unC_Wmin(svOld);
5524 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5525 if (LIKELY(leq)) {
5526 /* no race */
5527 svNew = SVal__mkC( tviW, tviW );
5528 goto out;
5529 } else {
5530 VtsID rmini = SVal__unC_Rmin(svOld);
5531 /* assert on sanity of constraints. */
5532 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5533 tl_assert(leqxx);
5534 // same as in non-race case
5535 // proof: in the non-race case, we have
5536 // rmini <= wmini (invar on constraints)
5537 // tviW <= tviR (invar on thread clocks)
5538 // wmini <= tviW (from run-time check)
5539 // hence from transitivity of <= we have
5540 // rmini <= wmini <= tviW
5541 // and so join(rmini,tviW) == tviW
5542 // and join(wmini,tviW) == tviW
5543 // qed.
5544 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5545 VtsID__join2(wmini, tviW) );
5546 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
5547 wmini, /* Cfailed */
5548 tviW, /* Kfailed */
5549 wmini /* Cw */ );
5550 goto out;
5553 if (SVal__isA(svOld)) {
5554 /* writing no-access memory (sigh); leave unchanged */
5555 /* check for no pollution */
5556 tl_assert(svOld == SVal_NOACCESS);
5557 svNew = SVal_NOACCESS;
5558 goto out;
5560 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
5561 tl_assert(0);
5563 out:
5564 if (CHECK_MSM) {
5565 tl_assert(is_sane_SVal_C(svNew));
5567 if (UNLIKELY(svNew != svOld)) {
5568 tl_assert(svNew != SVal_INVALID);
5569 if (HG_(clo_history_level) >= 2
5570 && SVal__isC(svOld) && SVal__isC(svNew)) {
5571 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
5572 stats__msmcwrite_change++;
5575 return svNew;
5579 /////////////////////////////////////////////////////////
5580 // //
5581 // Apply core MSM to specific memory locations //
5582 // //
5583 /////////////////////////////////////////////////////////
5585 /*------------- ZSM accesses: 8 bit sapply ------------- */
5587 static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
5588 CacheLine* cl;
5589 UWord cloff, tno, toff;
5590 SVal svOld, svNew;
5591 UShort descr;
5592 stats__cline_cread08s++;
5593 cl = get_cacheline(a);
5594 cloff = get_cacheline_offset(a);
5595 tno = get_treeno(a);
5596 toff = get_tree_offset(a); /* == 0 .. 7 */
5597 descr = cl->descrs[tno];
5598 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5599 SVal* tree = &cl->svals[tno << 3];
5600 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5601 if (CHECK_ZSM)
5602 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5604 svOld = cl->svals[cloff];
5605 svNew = msmcread( svOld, thr,a,1 );
5606 if (CHECK_ZSM)
5607 tl_assert(svNew != SVal_INVALID);
5608 cl->svals[cloff] = svNew;
5611 static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
5612 CacheLine* cl;
5613 UWord cloff, tno, toff;
5614 SVal svOld, svNew;
5615 UShort descr;
5616 stats__cline_cwrite08s++;
5617 cl = get_cacheline(a);
5618 cloff = get_cacheline_offset(a);
5619 tno = get_treeno(a);
5620 toff = get_tree_offset(a); /* == 0 .. 7 */
5621 descr = cl->descrs[tno];
5622 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5623 SVal* tree = &cl->svals[tno << 3];
5624 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5625 if (CHECK_ZSM)
5626 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5628 svOld = cl->svals[cloff];
5629 svNew = msmcwrite( svOld, thr,a,1 );
5630 if (CHECK_ZSM)
5631 tl_assert(svNew != SVal_INVALID);
5632 cl->svals[cloff] = svNew;
5635 /*------------- ZSM accesses: 16 bit sapply ------------- */
5637 static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
5638 CacheLine* cl;
5639 UWord cloff, tno, toff;
5640 SVal svOld, svNew;
5641 UShort descr;
5642 stats__cline_cread16s++;
5643 if (UNLIKELY(!aligned16(a))) goto slowcase;
5644 cl = get_cacheline(a);
5645 cloff = get_cacheline_offset(a);
5646 tno = get_treeno(a);
5647 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5648 descr = cl->descrs[tno];
5649 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5650 if (valid_value_is_below_me_16(descr, toff)) {
5651 goto slowcase;
5652 } else {
5653 SVal* tree = &cl->svals[tno << 3];
5654 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5656 if (CHECK_ZSM)
5657 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5659 svOld = cl->svals[cloff];
5660 svNew = msmcread( svOld, thr,a,2 );
5661 if (CHECK_ZSM)
5662 tl_assert(svNew != SVal_INVALID);
5663 cl->svals[cloff] = svNew;
5664 return;
5665 slowcase: /* misaligned, or must go further down the tree */
5666 stats__cline_16to8splits++;
5667 zsm_sapply08__msmcread( thr, a + 0 );
5668 zsm_sapply08__msmcread( thr, a + 1 );
5671 static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
5672 CacheLine* cl;
5673 UWord cloff, tno, toff;
5674 SVal svOld, svNew;
5675 UShort descr;
5676 stats__cline_cwrite16s++;
5677 if (UNLIKELY(!aligned16(a))) goto slowcase;
5678 cl = get_cacheline(a);
5679 cloff = get_cacheline_offset(a);
5680 tno = get_treeno(a);
5681 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5682 descr = cl->descrs[tno];
5683 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5684 if (valid_value_is_below_me_16(descr, toff)) {
5685 goto slowcase;
5686 } else {
5687 SVal* tree = &cl->svals[tno << 3];
5688 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5690 if (CHECK_ZSM)
5691 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5693 svOld = cl->svals[cloff];
5694 svNew = msmcwrite( svOld, thr,a,2 );
5695 if (CHECK_ZSM)
5696 tl_assert(svNew != SVal_INVALID);
5697 cl->svals[cloff] = svNew;
5698 return;
5699 slowcase: /* misaligned, or must go further down the tree */
5700 stats__cline_16to8splits++;
5701 zsm_sapply08__msmcwrite( thr, a + 0 );
5702 zsm_sapply08__msmcwrite( thr, a + 1 );
5705 /*------------- ZSM accesses: 32 bit sapply ------------- */
5707 static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
5708 CacheLine* cl;
5709 UWord cloff, tno, toff;
5710 SVal svOld, svNew;
5711 UShort descr;
5712 stats__cline_cread32s++;
5713 if (UNLIKELY(!aligned32(a))) goto slowcase;
5714 cl = get_cacheline(a);
5715 cloff = get_cacheline_offset(a);
5716 tno = get_treeno(a);
5717 toff = get_tree_offset(a); /* == 0 or 4 */
5718 descr = cl->descrs[tno];
5719 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5720 if (valid_value_is_above_me_32(descr, toff)) {
5721 SVal* tree = &cl->svals[tno << 3];
5722 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5723 } else {
5724 goto slowcase;
5726 if (CHECK_ZSM)
5727 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5729 svOld = cl->svals[cloff];
5730 svNew = msmcread( svOld, thr,a,4 );
5731 if (CHECK_ZSM)
5732 tl_assert(svNew != SVal_INVALID);
5733 cl->svals[cloff] = svNew;
5734 return;
5735 slowcase: /* misaligned, or must go further down the tree */
5736 stats__cline_32to16splits++;
5737 zsm_sapply16__msmcread( thr, a + 0 );
5738 zsm_sapply16__msmcread( thr, a + 2 );
5741 static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
5742 CacheLine* cl;
5743 UWord cloff, tno, toff;
5744 SVal svOld, svNew;
5745 UShort descr;
5746 stats__cline_cwrite32s++;
5747 if (UNLIKELY(!aligned32(a))) goto slowcase;
5748 cl = get_cacheline(a);
5749 cloff = get_cacheline_offset(a);
5750 tno = get_treeno(a);
5751 toff = get_tree_offset(a); /* == 0 or 4 */
5752 descr = cl->descrs[tno];
5753 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5754 if (valid_value_is_above_me_32(descr, toff)) {
5755 SVal* tree = &cl->svals[tno << 3];
5756 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5757 } else {
5758 goto slowcase;
5760 if (CHECK_ZSM)
5761 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5763 svOld = cl->svals[cloff];
5764 svNew = msmcwrite( svOld, thr,a,4 );
5765 if (CHECK_ZSM)
5766 tl_assert(svNew != SVal_INVALID);
5767 cl->svals[cloff] = svNew;
5768 return;
5769 slowcase: /* misaligned, or must go further down the tree */
5770 stats__cline_32to16splits++;
5771 zsm_sapply16__msmcwrite( thr, a + 0 );
5772 zsm_sapply16__msmcwrite( thr, a + 2 );
5775 /*------------- ZSM accesses: 64 bit sapply ------------- */
5777 static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
5778 CacheLine* cl;
5779 UWord cloff, tno;
5780 //UWord toff;
5781 SVal svOld, svNew;
5782 UShort descr;
5783 stats__cline_cread64s++;
5784 if (UNLIKELY(!aligned64(a))) goto slowcase;
5785 cl = get_cacheline(a);
5786 cloff = get_cacheline_offset(a);
5787 tno = get_treeno(a);
5788 //toff = get_tree_offset(a); /* == 0, unused */
5789 descr = cl->descrs[tno];
5790 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5791 goto slowcase;
5793 svOld = cl->svals[cloff];
5794 svNew = msmcread( svOld, thr,a,8 );
5795 if (CHECK_ZSM)
5796 tl_assert(svNew != SVal_INVALID);
5797 cl->svals[cloff] = svNew;
5798 return;
5799 slowcase: /* misaligned, or must go further down the tree */
5800 stats__cline_64to32splits++;
5801 zsm_sapply32__msmcread( thr, a + 0 );
5802 zsm_sapply32__msmcread( thr, a + 4 );
5805 static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
5806 CacheLine* cl;
5807 UWord cloff, tno;
5808 //UWord toff;
5809 SVal svOld, svNew;
5810 UShort descr;
5811 stats__cline_cwrite64s++;
5812 if (UNLIKELY(!aligned64(a))) goto slowcase;
5813 cl = get_cacheline(a);
5814 cloff = get_cacheline_offset(a);
5815 tno = get_treeno(a);
5816 //toff = get_tree_offset(a); /* == 0, unused */
5817 descr = cl->descrs[tno];
5818 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5819 goto slowcase;
5821 svOld = cl->svals[cloff];
5822 svNew = msmcwrite( svOld, thr,a,8 );
5823 if (CHECK_ZSM)
5824 tl_assert(svNew != SVal_INVALID);
5825 cl->svals[cloff] = svNew;
5826 return;
5827 slowcase: /* misaligned, or must go further down the tree */
5828 stats__cline_64to32splits++;
5829 zsm_sapply32__msmcwrite( thr, a + 0 );
5830 zsm_sapply32__msmcwrite( thr, a + 4 );
5833 /*--------------- ZSM accesses: 8 bit swrite --------------- */
5835 static
5836 void zsm_swrite08 ( Addr a, SVal svNew ) {
5837 CacheLine* cl;
5838 UWord cloff, tno, toff;
5839 UShort descr;
5840 stats__cline_swrite08s++;
5841 cl = get_cacheline(a);
5842 cloff = get_cacheline_offset(a);
5843 tno = get_treeno(a);
5844 toff = get_tree_offset(a); /* == 0 .. 7 */
5845 descr = cl->descrs[tno];
5846 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5847 SVal* tree = &cl->svals[tno << 3];
5848 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5849 if (CHECK_ZSM)
5850 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5852 tl_assert(svNew != SVal_INVALID);
5853 cl->svals[cloff] = svNew;
5856 /*--------------- ZSM accesses: 16 bit swrite --------------- */
5858 static
5859 void zsm_swrite16 ( Addr a, SVal svNew ) {
5860 CacheLine* cl;
5861 UWord cloff, tno, toff;
5862 UShort descr;
5863 stats__cline_swrite16s++;
5864 if (UNLIKELY(!aligned16(a))) goto slowcase;
5865 cl = get_cacheline(a);
5866 cloff = get_cacheline_offset(a);
5867 tno = get_treeno(a);
5868 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5869 descr = cl->descrs[tno];
5870 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5871 if (valid_value_is_below_me_16(descr, toff)) {
5872 /* Writing at this level. Need to fix up 'descr'. */
5873 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5874 /* At this point, the tree does not match cl->descr[tno] any
5875 more. The assignments below will fix it up. */
5876 } else {
5877 /* We can't indiscriminately write on the w16 node as in the
5878 w64 case, as that might make the node inconsistent with
5879 its parent. So first, pull down to this level. */
5880 SVal* tree = &cl->svals[tno << 3];
5881 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5882 if (CHECK_ZSM)
5883 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5886 tl_assert(svNew != SVal_INVALID);
5887 cl->svals[cloff + 0] = svNew;
5888 cl->svals[cloff + 1] = SVal_INVALID;
5889 return;
5890 slowcase: /* misaligned */
5891 stats__cline_16to8splits++;
5892 zsm_swrite08( a + 0, svNew );
5893 zsm_swrite08( a + 1, svNew );
5896 /*--------------- ZSM accesses: 32 bit swrite --------------- */
5898 static
5899 void zsm_swrite32 ( Addr a, SVal svNew ) {
5900 CacheLine* cl;
5901 UWord cloff, tno, toff;
5902 UShort descr;
5903 stats__cline_swrite32s++;
5904 if (UNLIKELY(!aligned32(a))) goto slowcase;
5905 cl = get_cacheline(a);
5906 cloff = get_cacheline_offset(a);
5907 tno = get_treeno(a);
5908 toff = get_tree_offset(a); /* == 0 or 4 */
5909 descr = cl->descrs[tno];
5910 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5911 if (valid_value_is_above_me_32(descr, toff)) {
5912 /* We can't indiscriminately write on the w32 node as in the
5913 w64 case, as that might make the node inconsistent with
5914 its parent. So first, pull down to this level. */
5915 SVal* tree = &cl->svals[tno << 3];
5916 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5917 if (CHECK_ZSM)
5918 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5919 } else {
5920 /* Writing at this level. Need to fix up 'descr'. */
5921 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5922 /* At this point, the tree does not match cl->descr[tno] any
5923 more. The assignments below will fix it up. */
5926 tl_assert(svNew != SVal_INVALID);
5927 cl->svals[cloff + 0] = svNew;
5928 cl->svals[cloff + 1] = SVal_INVALID;
5929 cl->svals[cloff + 2] = SVal_INVALID;
5930 cl->svals[cloff + 3] = SVal_INVALID;
5931 return;
5932 slowcase: /* misaligned */
5933 stats__cline_32to16splits++;
5934 zsm_swrite16( a + 0, svNew );
5935 zsm_swrite16( a + 2, svNew );
5938 /*--------------- ZSM accesses: 64 bit swrite --------------- */
5940 static
5941 void zsm_swrite64 ( Addr a, SVal svNew ) {
5942 CacheLine* cl;
5943 UWord cloff, tno;
5944 //UWord toff;
5945 stats__cline_swrite64s++;
5946 if (UNLIKELY(!aligned64(a))) goto slowcase;
5947 cl = get_cacheline(a);
5948 cloff = get_cacheline_offset(a);
5949 tno = get_treeno(a);
5950 //toff = get_tree_offset(a); /* == 0, unused */
5951 cl->descrs[tno] = TREE_DESCR_64;
5952 if (CHECK_ZSM)
5953 tl_assert(svNew != SVal_INVALID); /* EXPENSIVE */
5954 cl->svals[cloff + 0] = svNew;
5955 cl->svals[cloff + 1] = SVal_INVALID;
5956 cl->svals[cloff + 2] = SVal_INVALID;
5957 cl->svals[cloff + 3] = SVal_INVALID;
5958 cl->svals[cloff + 4] = SVal_INVALID;
5959 cl->svals[cloff + 5] = SVal_INVALID;
5960 cl->svals[cloff + 6] = SVal_INVALID;
5961 cl->svals[cloff + 7] = SVal_INVALID;
5962 return;
5963 slowcase: /* misaligned */
5964 stats__cline_64to32splits++;
5965 zsm_swrite32( a + 0, svNew );
5966 zsm_swrite32( a + 4, svNew );
5969 /*------------- ZSM accesses: 8 bit sread/scopy ------------- */
5971 static
5972 SVal zsm_sread08 ( Addr a ) {
5973 CacheLine* cl;
5974 UWord cloff, tno, toff;
5975 UShort descr;
5976 stats__cline_sread08s++;
5977 cl = get_cacheline(a);
5978 cloff = get_cacheline_offset(a);
5979 tno = get_treeno(a);
5980 toff = get_tree_offset(a); /* == 0 .. 7 */
5981 descr = cl->descrs[tno];
5982 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5983 SVal* tree = &cl->svals[tno << 3];
5984 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5986 return cl->svals[cloff];
5989 static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
5990 SVal sv;
5991 stats__cline_scopy08s++;
5992 sv = zsm_sread08( src );
5993 zsm_swrite08( dst, sv );
5997 /* Block-copy states (needed for implementing realloc()). Note this
5998 doesn't change the filtering arrangements. The caller of
5999 zsm_scopy_range needs to attend to that. */
6001 static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
6003 SizeT i;
6004 if (len == 0)
6005 return;
6007 /* assert for non-overlappingness */
6008 tl_assert(src+len <= dst || dst+len <= src);
6010 /* To be simple, just copy byte by byte. But so as not to wreck
6011 performance for later accesses to dst[0 .. len-1], normalise
6012 destination lines as we finish with them, and also normalise the
6013 line containing the first and last address. */
6014 for (i = 0; i < len; i++) {
6015 Bool normalise
6016 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
6017 || i == 0 /* first in range */
6018 || i == len-1; /* last in range */
6019 zsm_scopy08( src+i, dst+i, normalise );
6024 /* For setting address ranges to a given value. Has considerable
6025 sophistication so as to avoid generating large numbers of pointless
6026 cache loads/writebacks for large ranges. */
6028 /* Do small ranges in-cache, in the obvious way. */
6029 static
6030 void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
6032 /* fast track a couple of common cases */
6033 if (len == 4 && aligned32(a)) {
6034 zsm_swrite32( a, svNew );
6035 return;
6037 if (len == 8 && aligned64(a)) {
6038 zsm_swrite64( a, svNew );
6039 return;
6042 /* be completely general (but as efficient as possible) */
6043 if (len == 0) return;
6045 if (!aligned16(a) && len >= 1) {
6046 zsm_swrite08( a, svNew );
6047 a += 1;
6048 len -= 1;
6049 tl_assert(aligned16(a));
6051 if (len == 0) return;
6053 if (!aligned32(a) && len >= 2) {
6054 zsm_swrite16( a, svNew );
6055 a += 2;
6056 len -= 2;
6057 tl_assert(aligned32(a));
6059 if (len == 0) return;
6061 if (!aligned64(a) && len >= 4) {
6062 zsm_swrite32( a, svNew );
6063 a += 4;
6064 len -= 4;
6065 tl_assert(aligned64(a));
6067 if (len == 0) return;
6069 if (len >= 8) {
6070 tl_assert(aligned64(a));
6071 while (len >= 8) {
6072 zsm_swrite64( a, svNew );
6073 a += 8;
6074 len -= 8;
6076 tl_assert(aligned64(a));
6078 if (len == 0) return;
6080 if (len >= 4)
6081 tl_assert(aligned32(a));
6082 if (len >= 4) {
6083 zsm_swrite32( a, svNew );
6084 a += 4;
6085 len -= 4;
6087 if (len == 0) return;
6089 if (len >= 2)
6090 tl_assert(aligned16(a));
6091 if (len >= 2) {
6092 zsm_swrite16( a, svNew );
6093 a += 2;
6094 len -= 2;
6096 if (len == 0) return;
6098 if (len >= 1) {
6099 zsm_swrite08( a, svNew );
6100 //a += 1;
6101 len -= 1;
6103 tl_assert(len == 0);
6107 /* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
6108 for larger ranges, try to operate directly on the out-of-cache
6109 representation, rather than dragging lines into the cache,
6110 overwriting them, and forcing them out. This turns out to be an
6111 important performance optimisation.
6113 Note that this doesn't change the filtering arrangements. The
6114 caller of zsm_sset_range needs to attend to that. */
6116 static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
6118 tl_assert(svNew != SVal_INVALID);
6119 stats__cache_make_New_arange += (ULong)len;
6121 if (0 && len > 500)
6122 VG_(printf)("make New ( %#lx, %lu )\n", a, len );
6124 if (0) {
6125 static UWord n_New_in_cache = 0;
6126 static UWord n_New_not_in_cache = 0;
6127 /* tag is 'a' with the in-line offset masked out,
6128 eg a[31]..a[4] 0000 */
6129 Addr tag = a & ~(N_LINE_ARANGE - 1);
6130 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
6131 if (LIKELY(tag == cache_shmem.tags0[wix])) {
6132 n_New_in_cache++;
6133 } else {
6134 n_New_not_in_cache++;
6136 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
6137 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
6138 n_New_in_cache, n_New_not_in_cache );
6141 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
6142 zsm_sset_range_SMALL( a, len, svNew );
6143 } else {
6144 Addr before_start = a;
6145 Addr aligned_start = cacheline_ROUNDUP(a);
6146 Addr after_start = cacheline_ROUNDDN(a + len);
6147 UWord before_len = aligned_start - before_start;
6148 UWord aligned_len = after_start - aligned_start;
6149 UWord after_len = a + len - after_start;
6150 tl_assert(before_start <= aligned_start);
6151 tl_assert(aligned_start <= after_start);
6152 tl_assert(before_len < N_LINE_ARANGE);
6153 tl_assert(after_len < N_LINE_ARANGE);
6154 tl_assert(get_cacheline_offset(aligned_start) == 0);
6155 if (get_cacheline_offset(a) == 0) {
6156 tl_assert(before_len == 0);
6157 tl_assert(a == aligned_start);
6159 if (get_cacheline_offset(a+len) == 0) {
6160 tl_assert(after_len == 0);
6161 tl_assert(after_start == a+len);
6163 if (before_len > 0) {
6164 zsm_sset_range_SMALL( before_start, before_len, svNew );
6166 if (after_len > 0) {
6167 zsm_sset_range_SMALL( after_start, after_len, svNew );
6169 stats__cache_make_New_inZrep += (ULong)aligned_len;
6171 while (1) {
6172 Addr tag;
6173 UWord wix;
6174 if (aligned_start >= after_start)
6175 break;
6176 tl_assert(get_cacheline_offset(aligned_start) == 0);
6177 tag = aligned_start & ~(N_LINE_ARANGE - 1);
6178 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
6179 if (tag == cache_shmem.tags0[wix]) {
6180 UWord i;
6181 for (i = 0; i < N_LINE_ARANGE / 8; i++)
6182 zsm_swrite64( aligned_start + i * 8, svNew );
6183 } else {
6184 UWord i;
6185 Word zix;
6186 SecMap* sm;
6187 LineZ* lineZ;
6188 /* This line is not in the cache. Do not force it in; instead
6189 modify it in-place. */
6190 /* find the Z line to write in and rcdec it or the
6191 associated F line. */
6192 find_Z_for_writing( &sm, &zix, tag );
6193 tl_assert(sm);
6194 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
6195 lineZ = &sm->linesZ[zix];
6196 lineZ->dict[0] = svNew;
6197 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
6198 for (i = 0; i < N_LINE_ARANGE/4; i++)
6199 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6200 rcinc_LineZ(lineZ);
6202 aligned_start += N_LINE_ARANGE;
6203 aligned_len -= N_LINE_ARANGE;
6205 tl_assert(aligned_start == after_start);
6206 tl_assert(aligned_len == 0);
6211 /////////////////////////////////////////////////////////
6212 // //
6213 // Front-filtering accesses //
6214 // //
6215 /////////////////////////////////////////////////////////
6217 static UWord stats__f_ac = 0;
6218 static UWord stats__f_sk = 0;
6220 #if 0
6221 # define STATS__F_SHOW \
6222 do { \
6223 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
6224 VG_(printf)("filters: ac %lu sk %lu\n", \
6225 stats__f_ac, stats__f_sk); \
6226 } while (0)
6227 #else
6228 # define STATS__F_SHOW /* */
6229 #endif
6231 void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
6232 stats__f_ac++;
6233 STATS__F_SHOW;
6234 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
6235 stats__f_sk++;
6236 return;
6238 zsm_sapply08__msmcwrite(thr, a);
6241 void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
6242 stats__f_ac++;
6243 STATS__F_SHOW;
6244 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
6245 stats__f_sk++;
6246 return;
6248 zsm_sapply16__msmcwrite(thr, a);
6251 void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
6252 stats__f_ac++;
6253 STATS__F_SHOW;
6254 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
6255 stats__f_sk++;
6256 return;
6258 zsm_sapply32__msmcwrite(thr, a);
6261 void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
6262 stats__f_ac++;
6263 STATS__F_SHOW;
6264 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
6265 stats__f_sk++;
6266 return;
6268 zsm_sapply64__msmcwrite(thr, a);
6271 void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
6273 /* fast track a couple of common cases */
6274 if (len == 4 && aligned32(a)) {
6275 zsm_sapply32_f__msmcwrite( thr, a );
6276 return;
6278 if (len == 8 && aligned64(a)) {
6279 zsm_sapply64_f__msmcwrite( thr, a );
6280 return;
6283 /* be completely general (but as efficient as possible) */
6284 if (len == 0) return;
6286 if (!aligned16(a) && len >= 1) {
6287 zsm_sapply08_f__msmcwrite( thr, a );
6288 a += 1;
6289 len -= 1;
6290 tl_assert(aligned16(a));
6292 if (len == 0) return;
6294 if (!aligned32(a) && len >= 2) {
6295 zsm_sapply16_f__msmcwrite( thr, a );
6296 a += 2;
6297 len -= 2;
6298 tl_assert(aligned32(a));
6300 if (len == 0) return;
6302 if (!aligned64(a) && len >= 4) {
6303 zsm_sapply32_f__msmcwrite( thr, a );
6304 a += 4;
6305 len -= 4;
6306 tl_assert(aligned64(a));
6308 if (len == 0) return;
6310 if (len >= 8) {
6311 tl_assert(aligned64(a));
6312 while (len >= 8) {
6313 zsm_sapply64_f__msmcwrite( thr, a );
6314 a += 8;
6315 len -= 8;
6317 tl_assert(aligned64(a));
6319 if (len == 0) return;
6321 if (len >= 4)
6322 tl_assert(aligned32(a));
6323 if (len >= 4) {
6324 zsm_sapply32_f__msmcwrite( thr, a );
6325 a += 4;
6326 len -= 4;
6328 if (len == 0) return;
6330 if (len >= 2)
6331 tl_assert(aligned16(a));
6332 if (len >= 2) {
6333 zsm_sapply16_f__msmcwrite( thr, a );
6334 a += 2;
6335 len -= 2;
6337 if (len == 0) return;
6339 if (len >= 1) {
6340 zsm_sapply08_f__msmcwrite( thr, a );
6341 //a += 1;
6342 len -= 1;
6344 tl_assert(len == 0);
6347 void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
6348 stats__f_ac++;
6349 STATS__F_SHOW;
6350 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
6351 stats__f_sk++;
6352 return;
6354 zsm_sapply08__msmcread(thr, a);
6357 void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
6358 stats__f_ac++;
6359 STATS__F_SHOW;
6360 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
6361 stats__f_sk++;
6362 return;
6364 zsm_sapply16__msmcread(thr, a);
6367 void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
6368 stats__f_ac++;
6369 STATS__F_SHOW;
6370 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
6371 stats__f_sk++;
6372 return;
6374 zsm_sapply32__msmcread(thr, a);
6377 void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
6378 stats__f_ac++;
6379 STATS__F_SHOW;
6380 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
6381 stats__f_sk++;
6382 return;
6384 zsm_sapply64__msmcread(thr, a);
6387 void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
6389 /* fast track a couple of common cases */
6390 if (len == 4 && aligned32(a)) {
6391 zsm_sapply32_f__msmcread( thr, a );
6392 return;
6394 if (len == 8 && aligned64(a)) {
6395 zsm_sapply64_f__msmcread( thr, a );
6396 return;
6399 /* be completely general (but as efficient as possible) */
6400 if (len == 0) return;
6402 if (!aligned16(a) && len >= 1) {
6403 zsm_sapply08_f__msmcread( thr, a );
6404 a += 1;
6405 len -= 1;
6406 tl_assert(aligned16(a));
6408 if (len == 0) return;
6410 if (!aligned32(a) && len >= 2) {
6411 zsm_sapply16_f__msmcread( thr, a );
6412 a += 2;
6413 len -= 2;
6414 tl_assert(aligned32(a));
6416 if (len == 0) return;
6418 if (!aligned64(a) && len >= 4) {
6419 zsm_sapply32_f__msmcread( thr, a );
6420 a += 4;
6421 len -= 4;
6422 tl_assert(aligned64(a));
6424 if (len == 0) return;
6426 if (len >= 8) {
6427 tl_assert(aligned64(a));
6428 while (len >= 8) {
6429 zsm_sapply64_f__msmcread( thr, a );
6430 a += 8;
6431 len -= 8;
6433 tl_assert(aligned64(a));
6435 if (len == 0) return;
6437 if (len >= 4)
6438 tl_assert(aligned32(a));
6439 if (len >= 4) {
6440 zsm_sapply32_f__msmcread( thr, a );
6441 a += 4;
6442 len -= 4;
6444 if (len == 0) return;
6446 if (len >= 2)
6447 tl_assert(aligned16(a));
6448 if (len >= 2) {
6449 zsm_sapply16_f__msmcread( thr, a );
6450 a += 2;
6451 len -= 2;
6453 if (len == 0) return;
6455 if (len >= 1) {
6456 zsm_sapply08_f__msmcread( thr, a );
6457 //a += 1;
6458 len -= 1;
6460 tl_assert(len == 0);
6463 void libhb_Thr_resumes ( Thr* thr )
6465 if (0) VG_(printf)("resume %p\n", thr);
6466 tl_assert(thr);
6467 tl_assert(!thr->llexit_done);
6468 Filter__clear(thr->filter, "libhb_Thr_resumes");
6469 /* A kludge, but .. if this thread doesn't have any marker stacks
6470 at all, get one right now. This is easier than figuring out
6471 exactly when at thread startup we can and can't take a stack
6472 snapshot. */
6473 if (HG_(clo_history_level) == 1) {
6474 tl_assert(thr->local_Kws_n_stacks);
6475 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
6476 note_local_Kw_n_stack_for(thr);
6481 /////////////////////////////////////////////////////////
6482 // //
6483 // Synchronisation objects //
6484 // //
6485 /////////////////////////////////////////////////////////
6487 /* A double linked list of all the SO's. */
6488 SO* admin_SO = NULL;
6490 static SO* SO__Alloc ( void )
6492 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6493 so->viR = VtsID_INVALID;
6494 so->viW = VtsID_INVALID;
6495 so->magic = SO_MAGIC;
6496 /* Add to double linked list */
6497 if (admin_SO) {
6498 tl_assert(admin_SO->admin_prev == NULL);
6499 admin_SO->admin_prev = so;
6500 so->admin_next = admin_SO;
6501 } else {
6502 so->admin_next = NULL;
6504 so->admin_prev = NULL;
6505 admin_SO = so;
6506 /* */
6507 return so;
6510 static void SO__Dealloc ( SO* so )
6512 tl_assert(so);
6513 tl_assert(so->magic == SO_MAGIC);
6514 if (so->viR == VtsID_INVALID) {
6515 tl_assert(so->viW == VtsID_INVALID);
6516 } else {
6517 tl_assert(so->viW != VtsID_INVALID);
6518 VtsID__rcdec(so->viR);
6519 VtsID__rcdec(so->viW);
6521 so->magic = 0;
6522 /* Del from double linked list */
6523 if (so->admin_prev)
6524 so->admin_prev->admin_next = so->admin_next;
6525 if (so->admin_next)
6526 so->admin_next->admin_prev = so->admin_prev;
6527 if (so == admin_SO)
6528 admin_SO = so->admin_next;
6529 /* */
6530 HG_(free)( so );
6534 /////////////////////////////////////////////////////////
6535 // //
6536 // Top Level API //
6537 // //
6538 /////////////////////////////////////////////////////////
6540 static void show_thread_state ( const HChar* str, Thr* t )
6542 if (1) return;
6543 if (t->viR == t->viW) {
6544 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6545 VtsID__pp( t->viR );
6546 VG_(printf)("%s","\n");
6547 } else {
6548 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6549 VtsID__pp( t->viR );
6550 VG_(printf)(" viW %u==", t->viW);
6551 VtsID__pp( t->viW );
6552 VG_(printf)("%s","\n");
6557 Thr* libhb_init (
6558 void (*get_stacktrace)( Thr*, Addr*, UWord ),
6559 ExeContext* (*get_EC)( Thr* )
6562 Thr* thr;
6563 VtsID vi;
6565 // We will have to have to store a large number of these,
6566 // so make sure they're the size we expect them to be.
6567 STATIC_ASSERT(sizeof(ScalarTS) == 8);
6569 /* because first 1024 unusable */
6570 STATIC_ASSERT(SCALARTS_N_THRBITS >= 11);
6571 /* so as to fit in a UInt w/ 5 bits to spare (see defn of
6572 Thr_n_RCEC and TSW). */
6573 STATIC_ASSERT(SCALARTS_N_THRBITS <= 27);
6575 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6576 (32-bit). It's not correctness-critical, but there are a lot of
6577 them, so it's important from a space viewpoint. Unfortunately
6578 we simply can't pack it into 2 words on a 32-bit target. */
6579 STATIC_ASSERT( (sizeof(UWord) == 8 && sizeof(Thr_n_RCEC) == 16)
6580 || (sizeof(UWord) == 4 && sizeof(Thr_n_RCEC) == 12));
6581 STATIC_ASSERT(sizeof(TSW) == sizeof(UInt));
6583 /* Word sets really are 32 bits. Even on a 64 bit target. */
6584 STATIC_ASSERT(sizeof(WordSetID) == 4);
6585 STATIC_ASSERT(sizeof(WordSet) == sizeof(WordSetID));
6587 tl_assert(get_stacktrace);
6588 tl_assert(get_EC);
6589 main_get_stacktrace = get_stacktrace;
6590 main_get_EC = get_EC;
6592 // No need to initialise hg_wordfm.
6593 // No need to initialise hg_wordset.
6595 /* Allocated once and never deallocated. Used as a temporary in
6596 VTS singleton, tick and join operations. */
6597 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6598 temp_max_sized_VTS->id = VtsID_INVALID;
6599 verydead_thread_tables_init();
6600 vts_set_init();
6601 vts_tab_init();
6602 event_map_init();
6603 VtsID__invalidate_caches();
6605 // initialise shadow memory
6606 zsm_init( );
6608 thr = Thr__new();
6609 vi = VtsID__mk_Singleton( thr, 1 );
6610 thr->viR = vi;
6611 thr->viW = vi;
6612 VtsID__rcinc(thr->viR);
6613 VtsID__rcinc(thr->viW);
6615 show_thread_state(" root", thr);
6616 return thr;
6620 Thr* libhb_create ( Thr* parent )
6622 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6623 the child's index. Since the child's index is guaranteed
6624 unique, it has never been seen before, so the implicit value
6625 before the tick is zero and after that is one. */
6626 Thr* child = Thr__new();
6628 child->viR = VtsID__tick( parent->viR, child );
6629 child->viW = VtsID__tick( parent->viW, child );
6630 Filter__clear(child->filter, "libhb_create(child)");
6631 VtsID__rcinc(child->viR);
6632 VtsID__rcinc(child->viW);
6633 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
6634 early for that - it may not have a valid TId yet. So, let
6635 libhb_Thr_resumes pick it up the first time the thread runs. */
6637 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6638 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6640 /* and the parent has to move along too */
6641 VtsID__rcdec(parent->viR);
6642 VtsID__rcdec(parent->viW);
6643 parent->viR = VtsID__tick( parent->viR, parent );
6644 parent->viW = VtsID__tick( parent->viW, parent );
6645 Filter__clear(parent->filter, "libhb_create(parent)");
6646 VtsID__rcinc(parent->viR);
6647 VtsID__rcinc(parent->viW);
6648 note_local_Kw_n_stack_for( parent );
6650 show_thread_state(" child", child);
6651 show_thread_state("parent", parent);
6653 return child;
6656 /* Shut down the library, and print stats (in fact that's _all_
6657 this is for. */
6658 void libhb_shutdown ( Bool show_stats )
6660 if (show_stats) {
6661 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6662 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6663 stats__secmaps_allocd,
6664 stats__secmap_ga_space_covered);
6665 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6666 stats__secmap_linesZ_allocd,
6667 stats__secmap_linesZ_bytes);
6668 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)"
6669 " (%'10lu used)\n",
6670 VG_(sizePA) (LineF_pool_allocator),
6671 VG_(sizePA) (LineF_pool_allocator) * sizeof(LineF),
6672 shmem__SecMap_used_linesF());
6673 VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6674 " #%lu scanGC \n",
6675 stats__secmaps_in_map_shmem,
6676 shmem__SecMap_do_GC(False /* really do GC */),
6677 stats__secmaps_scanGC);
6678 tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6679 VG_(printf)(" secmaps: %'10lu in freelist,"
6680 " total (scanGCed %'lu, ssetGCed %'lu)\n",
6681 SecMap_freelist_length(),
6682 stats__secmaps_scanGCed,
6683 stats__secmaps_ssetGCed);
6684 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6685 stats__secmaps_search, stats__secmaps_search_slow);
6687 VG_(printf)("%s","\n");
6688 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6689 stats__cache_totrefs, stats__cache_totmisses );
6690 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6691 stats__cache_Z_fetches, stats__cache_F_fetches );
6692 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6693 stats__cache_Z_wbacks, stats__cache_F_wbacks );
6694 VG_(printf)(" cache: %'14lu flushes_invals\n",
6695 stats__cache_flushes_invals );
6696 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6697 stats__cache_make_New_arange,
6698 stats__cache_make_New_inZrep);
6700 VG_(printf)("%s","\n");
6701 VG_(printf)(" cline: %'10lu normalises\n",
6702 stats__cline_normalises );
6703 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6704 stats__cline_cread64s,
6705 stats__cline_cread32s,
6706 stats__cline_cread16s,
6707 stats__cline_cread08s );
6708 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6709 stats__cline_cwrite64s,
6710 stats__cline_cwrite32s,
6711 stats__cline_cwrite16s,
6712 stats__cline_cwrite08s );
6713 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6714 stats__cline_swrite64s,
6715 stats__cline_swrite32s,
6716 stats__cline_swrite16s,
6717 stats__cline_swrite08s );
6718 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6719 stats__cline_sread08s, stats__cline_scopy08s );
6720 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu"
6721 " 2to1 %'12lu\n",
6722 stats__cline_64to32splits, stats__cline_32to16splits,
6723 stats__cline_16to8splits );
6724 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu"
6725 " 2to1 %'12lu\n",
6726 stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6727 stats__cline_16to8pulldown );
6728 if (0)
6729 VG_(printf)(" cline: sizeof(CacheLineZ) %ld,"
6730 " covers %ld bytes of arange\n",
6731 (Word)sizeof(LineZ),
6732 (Word)N_LINE_ARANGE);
6734 VG_(printf)("%s","\n");
6736 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
6737 stats__msmcread, stats__msmcread_change);
6738 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
6739 stats__msmcwrite, stats__msmcwrite_change);
6740 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6741 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
6742 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6743 stats__join2_queries, stats__join2_misses);
6745 VG_(printf)("%s","\n");
6746 VG_(printf)(" libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6747 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6748 VG_(printf)(" libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6749 stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6750 VG_(printf)(" libhb: VTSset: find__or__clone_and_add %'lu"
6751 " (%'lu allocd)\n",
6752 stats__vts_set__focaa, stats__vts_set__focaa_a );
6753 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6754 stats__vts__indexat_slow );
6756 VG_(printf)("%s","\n");
6757 VG_(printf)(
6758 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6759 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6761 VG_(printf)(" libhb: #%lu vts_tab GC #%lu vts pruning\n",
6762 stats__vts_tab_GC, stats__vts_pruning);
6763 VG_(printf)( " libhb: %lu entries in vts_set\n",
6764 VG_(sizeFM)( vts_set ) );
6766 VG_(printf)("%s","\n");
6768 UInt live = 0;
6769 UInt llexit_done = 0;
6770 UInt joinedwith_done = 0;
6771 UInt llexit_and_joinedwith_done = 0;
6773 Thread* hgthread = get_admin_threads();
6774 tl_assert(hgthread);
6775 while (hgthread) {
6776 Thr* hbthr = hgthread->hbthr;
6777 tl_assert(hbthr);
6778 if (hbthr->llexit_done && hbthr->joinedwith_done)
6779 llexit_and_joinedwith_done++;
6780 else if (hbthr->llexit_done)
6781 llexit_done++;
6782 else if (hbthr->joinedwith_done)
6783 joinedwith_done++;
6784 else
6785 live++;
6786 hgthread = hgthread->admin;
6788 VG_(printf)(" libhb: threads live: %u exit_and_joinedwith %u"
6789 " exit %u joinedwith %u\n",
6790 live, llexit_and_joinedwith_done,
6791 llexit_done, joinedwith_done);
6792 VG_(printf)(" libhb: %d verydead_threads, "
6793 "%d verydead_threads_not_pruned\n",
6794 (int) VG_(sizeXA)( verydead_thread_table),
6795 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6796 tl_assert (VG_(sizeXA)( verydead_thread_table)
6797 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6798 == llexit_and_joinedwith_done);
6801 VG_(printf)("%s","\n");
6802 VG_(printf)( " libhb: oldrefHTN %lu (%'d bytes)\n",
6803 oldrefHTN, (int)(oldrefHTN * sizeof(OldRef)));
6804 tl_assert (oldrefHTN == VG_(HT_count_nodes) (oldrefHT));
6805 VG_(printf)( " libhb: oldref lookup found=%lu notfound=%lu\n",
6806 stats__evm__lookup_found, stats__evm__lookup_notfound);
6807 if (VG_(clo_verbosity) > 1)
6808 VG_(HT_print_stats) (oldrefHT, cmp_oldref_tsw);
6809 VG_(printf)( " libhb: oldref bind tsw/rcec "
6810 "==/==:%'lu ==/!=:%'lu !=/!=:%'lu\n",
6811 stats__ctxt_eq_tsw_eq_rcec, stats__ctxt_eq_tsw_neq_rcec,
6812 stats__ctxt_neq_tsw_neq_rcec);
6813 VG_(printf)( " libhb: ctxt__rcdec calls %'lu. rcec gc discards %'lu\n",
6814 stats__ctxt_rcdec_calls, stats__ctxt_rcec_gc_discards);
6815 VG_(printf)( " libhb: contextTab: %lu slots,"
6816 " %lu cur ents(ref'd %lu),"
6817 " %lu max ents\n",
6818 (UWord)N_RCEC_TAB,
6819 stats__ctxt_tab_curr, RCEC_referenced,
6820 stats__ctxt_tab_max );
6821 VG_(printf) (" libhb: stats__cached_rcec "
6822 "identical %'lu updated %'lu fresh %'lu\n",
6823 stats__cached_rcec_identical, stats__cached_rcec_updated,
6824 stats__cached_rcec_fresh);
6825 if (stats__cached_rcec_diff > 0)
6826 VG_(printf) (" libhb: stats__cached_rcec diff unk reason%'lu\n",
6827 stats__cached_rcec_diff);
6828 if (stats__cached_rcec_diff_known_reason > 0)
6829 VG_(printf) (" libhb: stats__cached_rcec diff known reason %'lu\n",
6830 stats__cached_rcec_diff_known_reason);
6833 # define MAXCHAIN 10
6834 UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6835 UInt non0chain = 0;
6836 UInt n;
6837 UInt i;
6838 RCEC *p;
6840 for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6841 for (i = 0; i < N_RCEC_TAB; i++) {
6842 n = 0;
6843 for (p = contextTab[i]; p; p = p->next)
6844 n++;
6845 if (n < MAXCHAIN)
6846 chains[n]++;
6847 else
6848 chains[MAXCHAIN]++;
6849 if (n > 0)
6850 non0chain++;
6852 VG_(printf)( " libhb: contextTab chain of [length]=nchain."
6853 " Avg chain len %3.1f\n"
6854 " ",
6855 (Double)stats__ctxt_tab_curr
6856 / (Double)(non0chain ? non0chain : 1));
6857 for (i = 0; i <= MAXCHAIN; i++) {
6858 if (chains[i] != 0)
6859 VG_(printf)( "[%u%s]=%u ",
6860 i, i == MAXCHAIN ? "+" : "",
6861 chains[i]);
6863 VG_(printf)( "\n");
6864 # undef MAXCHAIN
6866 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6867 stats__ctxt_tab_qs,
6868 stats__ctxt_tab_cmps );
6869 #if 0
6870 VG_(printf)("sizeof(CacheLine) = %zu\n", sizeof(CacheLine));
6871 VG_(printf)("sizeof(LineZ) = %zu\n", sizeof(LineZ));
6872 VG_(printf)("sizeof(LineF) = %zu\n", sizeof(LineF));
6873 VG_(printf)("sizeof(SecMap) = %zu\n", sizeof(SecMap));
6874 VG_(printf)("sizeof(Cache) = %zu\n", sizeof(Cache));
6875 VG_(printf)("sizeof(SMCacheEnt) = %zu\n", sizeof(SMCacheEnt));
6876 VG_(printf)("sizeof(CountedSVal) = %zu\n", sizeof(CountedSVal));
6877 VG_(printf)("sizeof(VTS) = %zu\n", sizeof(VTS));
6878 VG_(printf)("sizeof(ScalarTS) = %zu\n", sizeof(ScalarTS));
6879 VG_(printf)("sizeof(VtsTE) = %zu\n", sizeof(VtsTE));
6881 VG_(printf)("sizeof(struct _Thr) = %zu\n", sizeof(struct _Thr));
6882 VG_(printf)("sizeof(RCEC) = %zu\n", sizeof(RCEC));
6883 VG_(printf)("sizeof(struct _SO) = %zu\n", sizeof(struct _SO));
6884 #endif
6886 VG_(printf)("%s","<<< END libhb stats >>>\n");
6887 VG_(printf)("%s","\n");
6892 /* Receive notification that a thread has low level exited. The
6893 significance here is that we do not expect to see any more memory
6894 references from it. */
6895 void libhb_async_exit ( Thr* thr )
6897 tl_assert(thr);
6898 tl_assert(!thr->llexit_done);
6899 thr->llexit_done = True;
6901 /* Check nobody messed up with the cached_rcec */
6902 tl_assert (thr->cached_rcec.magic == RCEC_MAGIC);
6903 tl_assert (thr->cached_rcec.rc == 0);
6904 tl_assert (thr->cached_rcec.rcX == 0);
6905 tl_assert (thr->cached_rcec.next == NULL);
6907 /* Just to be sure, declare the cached stack invalid. */
6908 set_cached_rcec_validity(thr, False);
6910 /* free up Filter and local_Kws_n_stacks (well, actually not the
6911 latter ..) */
6912 tl_assert(thr->filter);
6913 HG_(free)(thr->filter);
6914 thr->filter = NULL;
6916 /* Tell the VTS mechanism this thread has exited, so it can
6917 participate in VTS pruning. Note this can only happen if the
6918 thread has both ll_exited and has been joined with. */
6919 if (thr->joinedwith_done)
6920 VTS__declare_thread_very_dead(thr);
6922 /* Another space-accuracy tradeoff. Do we want to be able to show
6923 H1 history for conflicts in threads which have since exited? If
6924 yes, then we better not free up thr->local_Kws_n_stacks. The
6925 downside is a potential per-thread leak of up to
6926 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6927 XArray average overcommit factor is (1.5 I'd guess). */
6928 // hence:
6929 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6930 // thr->local_Kws_n_stacks = NULL;
6933 /* Receive notification that a thread has been joined with. The
6934 significance here is that we do not expect to see any further
6935 references to its vector clocks (Thr::viR and Thr::viW). */
6936 void libhb_joinedwith_done ( Thr* thr )
6938 tl_assert(thr);
6939 /* Caller must ensure that this is only ever called once per Thr. */
6940 tl_assert(!thr->joinedwith_done);
6941 thr->joinedwith_done = True;
6942 if (thr->llexit_done)
6943 VTS__declare_thread_very_dead(thr);
6947 /* Both Segs and SOs point to VTSs. However, there is no sharing, so
6948 a Seg that points at a VTS is its one-and-only owner, and ditto for
6949 a SO that points at a VTS. */
6951 SO* libhb_so_alloc ( void )
6953 return SO__Alloc();
6956 void libhb_so_dealloc ( SO* so )
6958 tl_assert(so);
6959 tl_assert(so->magic == SO_MAGIC);
6960 SO__Dealloc(so);
6963 /* See comments in libhb.h for details on the meaning of
6964 strong vs weak sends and strong vs weak receives. */
6965 void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6967 /* Copy the VTSs from 'thr' into the sync object, and then move
6968 the thread along one step. */
6970 tl_assert(so);
6971 tl_assert(so->magic == SO_MAGIC);
6973 /* stay sane .. a thread's read-clock must always lead or be the
6974 same as its write-clock */
6975 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6976 tl_assert(leq);
6979 /* since we're overwriting the VtsIDs in the SO, we need to drop
6980 any references made by the previous contents thereof */
6981 if (so->viR == VtsID_INVALID) {
6982 tl_assert(so->viW == VtsID_INVALID);
6983 so->viR = thr->viR;
6984 so->viW = thr->viW;
6985 VtsID__rcinc(so->viR);
6986 VtsID__rcinc(so->viW);
6987 } else {
6988 /* In a strong send, we dump any previous VC in the SO and
6989 install the sending thread's VC instead. For a weak send we
6990 must join2 with what's already there. */
6991 tl_assert(so->viW != VtsID_INVALID);
6992 VtsID__rcdec(so->viR);
6993 VtsID__rcdec(so->viW);
6994 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6995 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6996 VtsID__rcinc(so->viR);
6997 VtsID__rcinc(so->viW);
7000 /* move both parent clocks along */
7001 VtsID__rcdec(thr->viR);
7002 VtsID__rcdec(thr->viW);
7003 thr->viR = VtsID__tick( thr->viR, thr );
7004 thr->viW = VtsID__tick( thr->viW, thr );
7005 if (!thr->llexit_done) {
7006 Filter__clear(thr->filter, "libhb_so_send");
7007 note_local_Kw_n_stack_for(thr);
7009 VtsID__rcinc(thr->viR);
7010 VtsID__rcinc(thr->viW);
7012 if (strong_send)
7013 show_thread_state("s-send", thr);
7014 else
7015 show_thread_state("w-send", thr);
7018 void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
7020 tl_assert(so);
7021 tl_assert(so->magic == SO_MAGIC);
7023 if (so->viR != VtsID_INVALID) {
7024 tl_assert(so->viW != VtsID_INVALID);
7026 /* Weak receive (basically, an R-acquisition of a R-W lock).
7027 This advances the read-clock of the receiver, but not the
7028 write-clock. */
7029 VtsID__rcdec(thr->viR);
7030 thr->viR = VtsID__join2( thr->viR, so->viR );
7031 VtsID__rcinc(thr->viR);
7033 /* At one point (r10589) it seemed safest to tick the clocks for
7034 the receiving thread after the join. But on reflection, I
7035 wonder if that might cause it to 'overtake' constraints,
7036 which could lead to missing races. So, back out that part of
7037 r10589. */
7038 //VtsID__rcdec(thr->viR);
7039 //thr->viR = VtsID__tick( thr->viR, thr );
7040 //VtsID__rcinc(thr->viR);
7042 /* For a strong receive, we also advance the receiver's write
7043 clock, which means the receive as a whole is essentially
7044 equivalent to a W-acquisition of a R-W lock. */
7045 if (strong_recv) {
7046 VtsID__rcdec(thr->viW);
7047 thr->viW = VtsID__join2( thr->viW, so->viW );
7048 VtsID__rcinc(thr->viW);
7050 /* See comment just above, re r10589. */
7051 //VtsID__rcdec(thr->viW);
7052 //thr->viW = VtsID__tick( thr->viW, thr );
7053 //VtsID__rcinc(thr->viW);
7056 if (thr->filter)
7057 Filter__clear(thr->filter, "libhb_so_recv");
7058 note_local_Kw_n_stack_for(thr);
7060 if (strong_recv)
7061 show_thread_state("s-recv", thr);
7062 else
7063 show_thread_state("w-recv", thr);
7065 } else {
7066 tl_assert(so->viW == VtsID_INVALID);
7067 /* Deal with degenerate case: 'so' has no vts, so there has been
7068 no message posted to it. Just ignore this case. */
7069 show_thread_state("d-recv", thr);
7073 Bool libhb_so_everSent ( SO* so )
7075 if (so->viR == VtsID_INVALID) {
7076 tl_assert(so->viW == VtsID_INVALID);
7077 return False;
7078 } else {
7079 tl_assert(so->viW != VtsID_INVALID);
7080 return True;
7084 #define XXX1 0 // 0x67a106c
7085 #define XXX2 0
7087 static inline Bool TRACEME(Addr a, SizeT szB) {
7088 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
7089 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
7090 return False;
7092 static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
7094 SVal sv = zsm_sread08(a);
7095 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
7096 show_thread_state("", thr);
7097 VG_(printf)("%s","\n");
7100 void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
7102 SVal sv = SVal__mkC(thr->viW, thr->viW);
7103 tl_assert(is_sane_SVal_C(sv));
7104 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
7105 zsm_sset_range( a, szB, sv );
7106 Filter__clear_range( thr->filter, a, szB );
7107 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
7110 void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
7112 /* do nothing */
7116 /* Set the lines zix_start till zix_end to NOACCESS. */
7117 static void zsm_secmap_line_range_noaccess (SecMap *sm,
7118 UInt zix_start, UInt zix_end)
7120 for (UInt lz = zix_start; lz <= zix_end; lz++) {
7121 LineZ* lineZ;
7122 lineZ = &sm->linesZ[lz];
7123 if (lineZ->dict[0] != SVal_INVALID) {
7124 rcdec_LineZ(lineZ);
7125 lineZ->dict[0] = SVal_NOACCESS;
7126 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
7127 } else {
7128 clear_LineF_of_Z(lineZ);
7130 for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
7131 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
7135 /* Set the given range to SVal_NOACCESS in-place in the secmap.
7136 a must be cacheline aligned. len must be a multiple of a cacheline
7137 and must be < N_SECMAP_ARANGE. */
7138 static void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
7140 tl_assert (is_valid_scache_tag (a));
7141 tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
7142 tl_assert (len < N_SECMAP_ARANGE);
7144 SecMap *sm1 = shmem__find_SecMap (a);
7145 SecMap *sm2 = shmem__find_SecMap (a + len - 1);
7146 UWord zix_start = shmem__get_SecMap_offset(a ) >> N_LINE_BITS;
7147 UWord zix_end = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
7149 if (sm1) {
7150 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
7151 zsm_secmap_line_range_noaccess (sm1, zix_start,
7152 sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
7154 if (sm2 && sm1 != sm2) {
7155 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
7156 zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
7160 /* Set the given address range to SVal_NOACCESS.
7161 The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
7162 static void zsm_sset_range_noaccess (Addr addr, SizeT len)
7165 BPC = Before, Partial Cacheline, = addr
7166 (i.e. starting inside a cacheline/inside a SecMap)
7167 BFC = Before, Full Cacheline(s), but not full SecMap
7168 (i.e. starting inside a SecMap)
7169 FSM = Full SecMap(s)
7170 (i.e. starting a SecMap)
7171 AFC = After, Full Cacheline(s), but not full SecMap
7172 (i.e. first address after the full SecMap(s))
7173 APC = After, Partial Cacheline, i.e. first address after the
7174 full CacheLines).
7175 ARE = After Range End = addr+len = first address not part of the range.
7177 If addr starts a Cacheline, then BPC == BFC.
7178 If addr starts a SecMap, then BPC == BFC == FSM.
7179 If addr+len starts a SecMap, then APC == ARE == AFC
7180 If addr+len starts a Cacheline, then APC == ARE
7182 Addr ARE = addr + len;
7183 Addr BPC = addr;
7184 Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
7185 Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
7186 Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
7187 Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
7188 SizeT Plen = len; // Plen will be split between the following:
7189 SizeT BPClen;
7190 SizeT BFClen;
7191 SizeT FSMlen;
7192 SizeT AFClen;
7193 SizeT APClen;
7195 /* Consumes from Plen the nr of bytes between from and to.
7196 from and to must be aligned on a multiple of round.
7197 The length consumed will be a multiple of round, with
7198 a maximum of Plen. */
7199 # define PlenCONSUME(from, to, round, consumed) \
7200 do { \
7201 if (from < to) { \
7202 if (to - from < Plen) \
7203 consumed = to - from; \
7204 else \
7205 consumed = ROUNDDN(Plen, round); \
7206 } else { \
7207 consumed = 0; \
7209 Plen -= consumed; } while (0)
7211 PlenCONSUME(BPC, BFC, 1, BPClen);
7212 PlenCONSUME(BFC, FSM, N_LINE_ARANGE, BFClen);
7213 PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
7214 PlenCONSUME(AFC, APC, N_LINE_ARANGE, AFClen);
7215 PlenCONSUME(APC, ARE, 1, APClen);
7217 if (0)
7218 VG_(printf) ("addr %p[%lu] ARE %p"
7219 " BPC %p[%lu] BFC %p[%lu] FSM %p[%lu]"
7220 " AFC %p[%lu] APC %p[%lu]\n",
7221 (void*)addr, len, (void*)ARE,
7222 (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
7223 (void*)AFC, AFClen, (void*)APC, APClen);
7225 tl_assert (Plen == 0);
7227 /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
7229 /* First we set the partial cachelines. This is done through the cache. */
7230 if (BPClen > 0)
7231 zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
7232 if (APClen > 0)
7233 zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
7235 /* After this, we will not use the cache anymore. We will directly work
7236 in-place on the z shadow memory in SecMap(s).
7237 So, we invalidate the cachelines for the whole range we are setting
7238 to NOACCESS below. */
7239 shmem__invalidate_scache_range (BFC, APC - BFC);
7241 if (BFClen > 0)
7242 zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
7243 if (AFClen > 0)
7244 zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
7246 if (FSMlen > 0) {
7247 /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
7248 free list. */
7249 Addr sm_start = FSM;
7250 while (sm_start < AFC) {
7251 SecMap *sm = shmem__find_SecMap (sm_start);
7252 if (sm) {
7253 Addr gaKey;
7254 SecMap *fm_sm;
7256 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
7257 for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
7258 LineZ *lineZ = &sm->linesZ[lz];
7259 if (LIKELY(lineZ->dict[0] != SVal_INVALID))
7260 rcdec_LineZ(lineZ);
7261 else
7262 clear_LineF_of_Z(lineZ);
7264 if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
7265 tl_assert (0);
7266 stats__secmaps_in_map_shmem--;
7267 tl_assert (gaKey == sm_start);
7268 tl_assert (sm == fm_sm);
7269 stats__secmaps_ssetGCed++;
7270 push_SecMap_on_freelist (sm);
7272 sm_start += N_SECMAP_ARANGE;
7274 tl_assert (sm_start == AFC);
7276 /* The above loop might have kept copies of freed SecMap in the smCache.
7277 => clear them. */
7278 if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
7279 smCache[0].gaKey = 1;
7280 smCache[0].sm = NULL;
7282 if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
7283 smCache[1].gaKey = 1;
7284 smCache[1].sm = NULL;
7286 if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
7287 smCache[2].gaKey = 1;
7288 smCache[2].sm = NULL;
7290 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
7294 void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
7296 /* This really does put the requested range in NoAccess. It's
7297 expensive though. */
7298 SVal sv = SVal_NOACCESS;
7299 tl_assert(is_sane_SVal_C(sv));
7300 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7301 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7302 else
7303 zsm_sset_range_noaccess (a, szB);
7304 Filter__clear_range( thr->filter, a, szB );
7307 /* Works byte at a time. Can be optimised if needed. */
7308 UWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
7310 UWord anr = 0; // nr of bytes addressable.
7312 /* Get the accessibility of each byte. Pay attention to not
7313 create SecMap or LineZ when checking if a byte is addressable.
7315 Note: this is used for client request. Performance deemed not critical.
7316 So for simplicity, we work byte per byte.
7317 Performance could be improved by working with full cachelines
7318 or with full SecMap, when reaching a cacheline or secmap boundary. */
7319 for (SizeT i = 0; i < len; i++) {
7320 SVal sv = SVal_INVALID;
7321 Addr b = a + i;
7322 Addr tag = b & ~(N_LINE_ARANGE - 1);
7323 UWord wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
7324 UWord cloff = get_cacheline_offset(b);
7326 /* Note: we do not use get_cacheline(b) to avoid creating cachelines
7327 and/or SecMap for non addressable bytes. */
7328 if (tag == cache_shmem.tags0[wix]) {
7329 CacheLine copy = cache_shmem.lyns0[wix];
7330 /* We work on a copy of the cacheline, as we do not want to
7331 record the client request as a real read.
7332 The below is somewhat similar to zsm_sapply08__msmcread but
7333 avoids side effects on the cache. */
7334 UWord toff = get_tree_offset(b); /* == 0 .. 7 */
7335 UWord tno = get_treeno(b);
7336 UShort descr = copy.descrs[tno];
7337 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
7338 SVal* tree = &copy.svals[tno << 3];
7339 copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
7341 sv = copy.svals[cloff];
7342 } else {
7343 /* Byte not found in the cacheline. Search for a SecMap. */
7344 SecMap *sm = shmem__find_SecMap(b);
7345 LineZ *lineZ;
7346 if (sm == NULL)
7347 sv = SVal_NOACCESS;
7348 else {
7349 UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
7350 lineZ = &sm->linesZ[zix];
7351 if (lineZ->dict[0] == SVal_INVALID) {
7352 LineF *lineF = SVal2Ptr(lineZ->dict[1]);
7353 sv = lineF->w64s[cloff];
7354 } else {
7355 UWord ix = read_twobit_array( lineZ->ix2s, cloff );
7356 sv = lineZ->dict[ix];
7361 tl_assert (sv != SVal_INVALID);
7362 if (sv == SVal_NOACCESS) {
7363 if (abits)
7364 abits[i] = 0x00;
7365 } else {
7366 if (abits)
7367 abits[i] = 0xff;
7368 anr++;
7372 return anr;
7376 void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
7378 SVal sv = SVal_NOACCESS;
7379 tl_assert(is_sane_SVal_C(sv));
7380 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
7381 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7382 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7383 else
7384 zsm_sset_range_noaccess (a, szB);
7385 Filter__clear_range( thr->filter, a, szB );
7386 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
7389 Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
7390 tl_assert(thr);
7391 return thr->hgthread;
7394 void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
7395 tl_assert(thr);
7396 thr->hgthread = hgthread;
7399 void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
7401 zsm_scopy_range(src, dst, len);
7402 Filter__clear_range( thr->filter, dst, len );
7405 void libhb_maybe_GC ( void )
7407 /* GC the unreferenced (zero rc) RCECs when
7408 (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
7409 with mostly NULL ptr)
7410 and (2) approaching the max nr of RCEC (as we have in any case
7411 at least that amount of RCEC in the pool allocator)
7412 Note: the margin allows to avoid a small but constant increase
7413 of the max nr of RCEC due to the fact that libhb_maybe_GC is
7414 not called when the current nr of RCEC exactly reaches the max.
7415 and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
7416 Avoid growing too much the nr of RCEC keeps the memory use low,
7417 and avoids to have too many elements in the (fixed) contextTab hashtable.
7419 if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
7420 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
7421 && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
7422 do_RCEC_GC();
7424 /* If there are still no entries available (all the table entries are full),
7425 and we hit the threshold point, then do a GC */
7426 Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
7427 && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
7428 if (UNLIKELY (vts_tab_GC))
7429 vts_tab__do_GC( False/*don't show stats*/ );
7431 /* scan GC the SecMaps when
7432 (1) no SecMap in the freelist
7433 and (2) the current nr of live secmaps exceeds the threshold. */
7434 if (UNLIKELY(SecMap_freelist == NULL
7435 && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
7436 // If we did a vts tab GC, then no need to flush the cache again.
7437 if (!vts_tab_GC)
7438 zsm_flush_cache();
7439 shmem__SecMap_do_GC(True);
7442 /* Check the reference counts (expensive) */
7443 if (CHECK_CEM)
7444 event_map__check_reference_counts();
7448 /////////////////////////////////////////////////////////////////
7449 /////////////////////////////////////////////////////////////////
7450 // //
7451 // SECTION END main library //
7452 // //
7453 /////////////////////////////////////////////////////////////////
7454 /////////////////////////////////////////////////////////////////
7456 /*--------------------------------------------------------------------*/
7457 /*--- end libhb_main.c ---*/
7458 /*--------------------------------------------------------------------*/