1 //===-- tsan_rtl.cpp ------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is a part of ThreadSanitizer (TSan), a race detector.
11 // Main file (entry points) for the TSan run-time.
12 //===----------------------------------------------------------------------===//
16 #include "sanitizer_common/sanitizer_atomic.h"
17 #include "sanitizer_common/sanitizer_common.h"
18 #include "sanitizer_common/sanitizer_file.h"
19 #include "sanitizer_common/sanitizer_libc.h"
20 #include "sanitizer_common/sanitizer_placement_new.h"
21 #include "sanitizer_common/sanitizer_stackdepot.h"
22 #include "sanitizer_common/sanitizer_symbolizer.h"
23 #include "tsan_defs.h"
24 #include "tsan_interface.h"
25 #include "tsan_mman.h"
26 #include "tsan_platform.h"
27 #include "tsan_suppressions.h"
28 #include "tsan_symbolize.h"
29 #include "ubsan/ubsan_init.h"
31 volatile int __tsan_resumed
= 0;
33 extern "C" void __tsan_resume() {
40 void (*on_initialize
)(void);
41 int (*on_finalize
)(int);
44 #if !SANITIZER_GO && !SANITIZER_MAC
45 __attribute__((tls_model("initial-exec")))
46 THREADLOCAL
char cur_thread_placeholder
[sizeof(ThreadState
)] ALIGNED(
47 SANITIZER_CACHE_LINE_SIZE
);
49 static char ctx_placeholder
[sizeof(Context
)] ALIGNED(SANITIZER_CACHE_LINE_SIZE
);
52 // Can be overriden by a front-end.
53 #ifdef TSAN_EXTERNAL_HOOKS
54 bool OnFinalize(bool failed
);
58 SANITIZER_WEAK_CXX_DEFAULT_IMPL
59 bool OnFinalize(bool failed
) {
62 return on_finalize(failed
);
66 SANITIZER_WEAK_CXX_DEFAULT_IMPL
75 static ThreadContextBase
*CreateThreadContext(Tid tid
) {
76 // Map thread trace when context is created.
78 internal_snprintf(name
, sizeof(name
), "trace %u", tid
);
79 MapThreadTrace(GetThreadTrace(tid
), TraceSize() * sizeof(Event
), name
);
80 const uptr hdr
= GetThreadTraceHeader(tid
);
81 internal_snprintf(name
, sizeof(name
), "trace header %u", tid
);
82 MapThreadTrace(hdr
, sizeof(Trace
), name
);
83 new((void*)hdr
) Trace();
84 // We are going to use only a small part of the trace with the default
85 // value of history_size. However, the constructor writes to the whole trace.
86 // Release the unused part.
87 uptr hdr_end
= hdr
+ sizeof(Trace
);
88 hdr_end
-= sizeof(TraceHeader
) * (kTraceParts
- TraceParts());
89 hdr_end
= RoundUp(hdr_end
, GetPageSizeCached());
90 if (hdr_end
< hdr
+ sizeof(Trace
)) {
91 ReleaseMemoryPagesToOS(hdr_end
, hdr
+ sizeof(Trace
));
92 uptr unused
= hdr
+ sizeof(Trace
) - hdr_end
;
93 if (hdr_end
!= (uptr
)MmapFixedNoAccess(hdr_end
, unused
)) {
94 Report("ThreadSanitizer: failed to mprotect [0x%zx-0x%zx) \n", hdr_end
,
96 CHECK("unable to mprotect" && 0);
99 return New
<ThreadContext
>(tid
);
103 static const u32 kThreadQuarantineSize
= 16;
105 static const u32 kThreadQuarantineSize
= 64;
110 report_mtx(MutexTypeReport
),
112 thread_registry(CreateThreadContext
, kMaxTid
, kThreadQuarantineSize
,
114 racy_mtx(MutexTypeRacy
),
117 fired_suppressions_mtx(MutexTypeFired
),
118 clock_alloc(LINKER_INITIALIZED
, "clock allocator") {
119 fired_suppressions
.reserve(8);
122 // The objects are allocated in TLS, so one may rely on zero-initialization.
123 ThreadState::ThreadState(Context
*ctx
, Tid tid
, int unique_id
, u64 epoch
,
124 unsigned reuse_count
, uptr stk_addr
, uptr stk_size
,
125 uptr tls_addr
, uptr tls_size
)
126 : fast_state(tid
, epoch
)
127 // Do not touch these, rely on zero initialization,
128 // they may be accessed before the ctor.
129 // , ignore_reads_and_writes()
130 // , ignore_interceptors()
132 clock(tid
, reuse_count
)
139 unique_id(unique_id
),
146 last_sleep_clock(tid
)
149 CHECK_EQ(reinterpret_cast<uptr
>(this) % SANITIZER_CACHE_LINE_SIZE
, 0);
151 shadow_stack_pos
= shadow_stack
;
152 shadow_stack_end
= shadow_stack
+ kShadowStackSize
;
154 // Setup dynamic shadow stack.
155 const int kInitStackSize
= 8;
156 shadow_stack
= (uptr
*)Alloc(kInitStackSize
* sizeof(uptr
));
157 shadow_stack_pos
= shadow_stack
;
158 shadow_stack_end
= shadow_stack
+ kInitStackSize
;
163 void MemoryProfiler(u64 uptime
) {
164 if (ctx
->memprof_fd
== kInvalidFd
)
166 InternalMmapVector
<char> buf(4096);
167 WriteMemoryProfile(buf
.data(), buf
.size(), uptime
);
168 WriteToFile(ctx
->memprof_fd
, buf
.data(), internal_strlen(buf
.data()));
171 void InitializeMemoryProfiler() {
172 ctx
->memprof_fd
= kInvalidFd
;
173 const char *fname
= flags()->profile_memory
;
174 if (!fname
|| !fname
[0])
176 if (internal_strcmp(fname
, "stdout") == 0) {
178 } else if (internal_strcmp(fname
, "stderr") == 0) {
181 InternalScopedString filename
;
182 filename
.append("%s.%d", fname
, (int)internal_getpid());
183 ctx
->memprof_fd
= OpenFile(filename
.data(), WrOnly
);
184 if (ctx
->memprof_fd
== kInvalidFd
) {
185 Printf("ThreadSanitizer: failed to open memory profile file '%s'\n",
191 MaybeSpawnBackgroundThread();
194 static void *BackgroundThread(void *arg
) {
195 // This is a non-initialized non-user thread, nothing to see here.
196 // We don't use ScopedIgnoreInterceptors, because we want ignores to be
197 // enabled even when the thread function exits (e.g. during pthread thread
199 cur_thread_init()->ignore_interceptors
++;
200 const u64 kMs2Ns
= 1000 * 1000;
201 const u64 start
= NanoTime();
203 u64 last_flush
= NanoTime();
206 atomic_load(&ctx
->stop_background_thread
, memory_order_relaxed
) == 0;
209 u64 now
= NanoTime();
211 // Flush memory if requested.
212 if (flags()->flush_memory_ms
> 0) {
213 if (last_flush
+ flags()->flush_memory_ms
* kMs2Ns
< now
) {
214 VPrintf(1, "ThreadSanitizer: periodic memory flush\n");
216 last_flush
= NanoTime();
219 if (flags()->memory_limit_mb
> 0) {
221 uptr limit
= uptr(flags()->memory_limit_mb
) << 20;
222 VPrintf(1, "ThreadSanitizer: memory flush check"
223 " RSS=%llu LAST=%llu LIMIT=%llu\n",
224 (u64
)rss
>> 20, (u64
)last_rss
>> 20, (u64
)limit
>> 20);
225 if (2 * rss
> limit
+ last_rss
) {
226 VPrintf(1, "ThreadSanitizer: flushing memory due to RSS\n");
229 VPrintf(1, "ThreadSanitizer: memory flushed RSS=%llu\n", (u64
)rss
>>20);
234 MemoryProfiler(now
- start
);
236 // Flush symbolizer cache if requested.
237 if (flags()->flush_symbolizer_ms
> 0) {
238 u64 last
= atomic_load(&ctx
->last_symbolize_time_ns
,
239 memory_order_relaxed
);
240 if (last
!= 0 && last
+ flags()->flush_symbolizer_ms
* kMs2Ns
< now
) {
241 Lock
l(&ctx
->report_mtx
);
242 ScopedErrorReportLock l2
;
244 atomic_store(&ctx
->last_symbolize_time_ns
, 0, memory_order_relaxed
);
251 static void StartBackgroundThread() {
252 ctx
->background_thread
= internal_start_thread(&BackgroundThread
, 0);
256 static void StopBackgroundThread() {
257 atomic_store(&ctx
->stop_background_thread
, 1, memory_order_relaxed
);
258 internal_join_thread(ctx
->background_thread
);
259 ctx
->background_thread
= 0;
264 void DontNeedShadowFor(uptr addr
, uptr size
) {
265 ReleaseMemoryPagesToOS(reinterpret_cast<uptr
>(MemToShadow(addr
)),
266 reinterpret_cast<uptr
>(MemToShadow(addr
+ size
)));
270 void UnmapShadow(ThreadState
*thr
, uptr addr
, uptr size
) {
271 if (size
== 0) return;
272 DontNeedShadowFor(addr
, size
);
273 ScopedGlobalProcessor sgp
;
274 ctx
->metamap
.ResetRange(thr
->proc(), addr
, size
);
278 void MapShadow(uptr addr
, uptr size
) {
279 // Global data is not 64K aligned, but there are no adjacent mappings,
280 // so we can get away with unaligned mapping.
281 // CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment
282 const uptr kPageSize
= GetPageSizeCached();
283 uptr shadow_begin
= RoundDownTo((uptr
)MemToShadow(addr
), kPageSize
);
284 uptr shadow_end
= RoundUpTo((uptr
)MemToShadow(addr
+ size
), kPageSize
);
285 if (!MmapFixedSuperNoReserve(shadow_begin
, shadow_end
- shadow_begin
,
289 // Meta shadow is 2:1, so tread carefully.
290 static bool data_mapped
= false;
291 static uptr mapped_meta_end
= 0;
292 uptr meta_begin
= (uptr
)MemToMeta(addr
);
293 uptr meta_end
= (uptr
)MemToMeta(addr
+ size
);
294 meta_begin
= RoundDownTo(meta_begin
, 64 << 10);
295 meta_end
= RoundUpTo(meta_end
, 64 << 10);
297 // First call maps data+bss.
299 if (!MmapFixedSuperNoReserve(meta_begin
, meta_end
- meta_begin
,
303 // Mapping continuous heap.
304 // Windows wants 64K alignment.
305 meta_begin
= RoundDownTo(meta_begin
, 64 << 10);
306 meta_end
= RoundUpTo(meta_end
, 64 << 10);
307 if (meta_end
<= mapped_meta_end
)
309 if (meta_begin
< mapped_meta_end
)
310 meta_begin
= mapped_meta_end
;
311 if (!MmapFixedSuperNoReserve(meta_begin
, meta_end
- meta_begin
,
314 mapped_meta_end
= meta_end
;
316 VPrintf(2, "mapped meta shadow for (0x%zx-0x%zx) at (0x%zx-0x%zx)\n", addr
,
317 addr
+ size
, meta_begin
, meta_end
);
320 void MapThreadTrace(uptr addr
, uptr size
, const char *name
) {
321 DPrintf("#0: Mapping trace at 0x%zx-0x%zx(0x%zx)\n", addr
, addr
+ size
, size
);
322 CHECK_GE(addr
, TraceMemBeg());
323 CHECK_LE(addr
+ size
, TraceMemEnd());
324 CHECK_EQ(addr
, addr
& ~((64 << 10) - 1)); // windows wants 64K alignment
325 if (!MmapFixedSuperNoReserve(addr
, size
, name
)) {
326 Printf("FATAL: ThreadSanitizer can not mmap thread trace (0x%zx/0x%zx)\n",
333 static void OnStackUnwind(const SignalContext
&sig
, const void *,
334 BufferedStackTrace
*stack
) {
335 stack
->Unwind(StackTrace::GetNextInstructionPc(sig
.pc
), sig
.bp
, sig
.context
,
336 common_flags()->fast_unwind_on_fatal
);
339 static void TsanOnDeadlySignal(int signo
, void *siginfo
, void *context
) {
340 HandleDeadlySignal(siginfo
, context
, GetTid(), &OnStackUnwind
, nullptr);
345 // There is high probability that interceptors will check-fail as well,
346 // on the other hand there is no sense in processing interceptors
347 // since we are going to die soon.
348 ScopedIgnoreInterceptors ignore
;
350 cur_thread()->ignore_sync
++;
351 cur_thread()->ignore_reads_and_writes
++;
353 PrintCurrentStackSlow(StackTrace::GetCurrentPc());
358 void Initialize(ThreadState
*thr
) {
359 // Thread safe because done before all threads exist.
362 is_initialized
= true;
363 // We are not ready to handle interceptors yet.
364 ScopedIgnoreInterceptors ignore
;
365 SanitizerToolName
= "ThreadSanitizer";
366 // Install tool-specific callbacks in sanitizer_common.
367 SetCheckUnwindCallback(CheckUnwind
);
369 ctx
= new(ctx_placeholder
) Context
;
370 const char *env_name
= SANITIZER_GO
? "GORACE" : "TSAN_OPTIONS";
371 const char *options
= GetEnv(env_name
);
374 InitializeFlags(&ctx
->flags
, options
, env_name
);
375 AvoidCVE_2016_2143();
376 __sanitizer::InitializePlatformEarly();
377 __tsan::InitializePlatformEarly();
380 // Re-exec ourselves if we need to set additional env or command line args.
383 InitializeAllocator();
384 ReplaceSystemMalloc();
386 if (common_flags()->detect_deadlocks
)
387 ctx
->dd
= DDetector::Create(flags());
388 Processor
*proc
= ProcCreate();
390 InitializeInterceptors();
391 InitializePlatform();
392 InitializeDynamicAnnotations();
394 InitializeShadowMemory();
395 InitializeAllocatorLate();
396 InstallDeadlySignalHandlers(TsanOnDeadlySignal
);
398 // Setup correct file descriptor for error reports.
399 __sanitizer_set_report_path(common_flags()->log_path
);
400 InitializeSuppressions();
402 InitializeLibIgnore();
403 Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer
, ExitSymbolizer
);
406 VPrintf(1, "***** Running under ThreadSanitizer v2 (pid %d) *****\n",
407 (int)internal_getpid());
409 // Initialize thread 0.
410 Tid tid
= ThreadCreate(thr
, 0, 0, true);
411 CHECK_EQ(tid
, kMainTid
);
412 ThreadStart(thr
, tid
, GetTid(), ThreadType::Regular
);
413 #if TSAN_CONTAINS_UBSAN
414 __ubsan::InitAsPlugin();
416 ctx
->initialized
= true;
419 Symbolizer::LateInitialize();
420 InitializeMemoryProfiler();
423 if (flags()->stop_on_start
) {
424 Printf("ThreadSanitizer is suspended at startup (pid %d)."
425 " Call __tsan_resume().\n",
426 (int)internal_getpid());
427 while (__tsan_resumed
== 0) {}
433 void MaybeSpawnBackgroundThread() {
434 // On MIPS, TSan initialization is run before
435 // __pthread_initialize_minimal_internal() is finished, so we can not spawn
437 #if !SANITIZER_GO && !defined(__mips__)
438 static atomic_uint32_t bg_thread
= {};
439 if (atomic_load(&bg_thread
, memory_order_relaxed
) == 0 &&
440 atomic_exchange(&bg_thread
, 1, memory_order_relaxed
) == 0) {
441 StartBackgroundThread();
442 SetSandboxingCallback(StopBackgroundThread
);
448 int Finalize(ThreadState
*thr
) {
451 if (common_flags()->print_module_map
== 1)
454 if (flags()->atexit_sleep_ms
> 0 && ThreadCount(thr
) > 1)
455 SleepForMillis(flags()->atexit_sleep_ms
);
457 // Wait for pending reports.
458 ctx
->report_mtx
.Lock();
459 { ScopedErrorReportLock l
; }
460 ctx
->report_mtx
.Unlock();
463 if (Verbosity()) AllocatorPrintStats();
468 if (ctx
->nreported
) {
471 Printf("ThreadSanitizer: reported %d warnings\n", ctx
->nreported
);
473 Printf("Found %d data race(s)\n", ctx
->nreported
);
477 if (common_flags()->print_suppressions
)
478 PrintMatchedSuppressions();
480 failed
= OnFinalize(failed
);
482 return failed
? common_flags()->exitcode
: 0;
486 void ForkBefore(ThreadState
*thr
, uptr pc
) NO_THREAD_SAFETY_ANALYSIS
{
487 ctx
->thread_registry
.Lock();
488 ctx
->report_mtx
.Lock();
489 ScopedErrorReportLock::Lock();
490 // Suppress all reports in the pthread_atfork callbacks.
491 // Reports will deadlock on the report_mtx.
492 // We could ignore sync operations as well,
493 // but so far it's unclear if it will do more good or harm.
494 // Unnecessarily ignoring things can lead to false positives later.
495 thr
->suppress_reports
++;
496 // On OS X, REAL(fork) can call intercepted functions (OSSpinLockLock), and
497 // we'll assert in CheckNoLocks() unless we ignore interceptors.
498 thr
->ignore_interceptors
++;
501 void ForkParentAfter(ThreadState
*thr
, uptr pc
) NO_THREAD_SAFETY_ANALYSIS
{
502 thr
->suppress_reports
--; // Enabled in ForkBefore.
503 thr
->ignore_interceptors
--;
504 ScopedErrorReportLock::Unlock();
505 ctx
->report_mtx
.Unlock();
506 ctx
->thread_registry
.Unlock();
509 void ForkChildAfter(ThreadState
*thr
, uptr pc
) NO_THREAD_SAFETY_ANALYSIS
{
510 thr
->suppress_reports
--; // Enabled in ForkBefore.
511 thr
->ignore_interceptors
--;
512 ScopedErrorReportLock::Unlock();
513 ctx
->report_mtx
.Unlock();
514 ctx
->thread_registry
.Unlock();
517 ctx
->thread_registry
.GetNumberOfThreads(0, 0, &nthread
/* alive threads */);
518 VPrintf(1, "ThreadSanitizer: forked new process with pid %d,"
519 " parent had %d threads\n", (int)internal_getpid(), (int)nthread
);
521 StartBackgroundThread();
523 // We've just forked a multi-threaded process. We cannot reasonably function
524 // after that (some mutexes may be locked before fork). So just enable
525 // ignores for everything in the hope that we will exec soon.
526 ctx
->after_multithreaded_fork
= true;
527 thr
->ignore_interceptors
++;
528 ThreadIgnoreBegin(thr
, pc
);
529 ThreadIgnoreSyncBegin(thr
, pc
);
536 void GrowShadowStack(ThreadState
*thr
) {
537 const int sz
= thr
->shadow_stack_end
- thr
->shadow_stack
;
538 const int newsz
= 2 * sz
;
539 auto *newstack
= (uptr
*)Alloc(newsz
* sizeof(uptr
));
540 internal_memcpy(newstack
, thr
->shadow_stack
, sz
* sizeof(uptr
));
541 Free(thr
->shadow_stack
);
542 thr
->shadow_stack
= newstack
;
543 thr
->shadow_stack_pos
= newstack
+ sz
;
544 thr
->shadow_stack_end
= newstack
+ newsz
;
548 StackID
CurrentStackId(ThreadState
*thr
, uptr pc
) {
549 if (!thr
->is_inited
) // May happen during bootstrap.
550 return kInvalidStackID
;
553 DCHECK_LT(thr
->shadow_stack_pos
, thr
->shadow_stack_end
);
555 if (thr
->shadow_stack_pos
== thr
->shadow_stack_end
)
556 GrowShadowStack(thr
);
558 thr
->shadow_stack_pos
[0] = pc
;
559 thr
->shadow_stack_pos
++;
561 StackID id
= StackDepotPut(
562 StackTrace(thr
->shadow_stack
, thr
->shadow_stack_pos
- thr
->shadow_stack
));
564 thr
->shadow_stack_pos
--;
570 ALWAYS_INLINE USED
bool TryTraceMemoryAccess(ThreadState
*thr
, uptr pc
,
571 uptr addr
, uptr size
,
573 DCHECK(size
== 1 || size
== 2 || size
== 4 || size
== 8);
574 if (!kCollectHistory
)
577 if (UNLIKELY(!TraceAcquire(thr
, &ev
)))
579 u64 size_log
= size
== 1 ? 0 : size
== 2 ? 1 : size
== 4 ? 2 : 3;
580 uptr pc_delta
= pc
- thr
->trace_prev_pc
+ (1 << (EventAccess::kPCBits
- 1));
581 thr
->trace_prev_pc
= pc
;
582 if (LIKELY(pc_delta
< (1 << EventAccess::kPCBits
))) {
584 ev
->is_read
= !!(typ
& kAccessRead
);
585 ev
->is_atomic
= !!(typ
& kAccessAtomic
);
586 ev
->size_log
= size_log
;
587 ev
->pc_delta
= pc_delta
;
588 DCHECK_EQ(ev
->pc_delta
, pc_delta
);
589 ev
->addr
= CompressAddr(addr
);
590 TraceRelease(thr
, ev
);
593 auto *evex
= reinterpret_cast<EventAccessExt
*>(ev
);
596 evex
->type
= EventType::kAccessExt
;
597 evex
->is_read
= !!(typ
& kAccessRead
);
598 evex
->is_atomic
= !!(typ
& kAccessAtomic
);
599 evex
->size_log
= size_log
;
600 evex
->addr
= CompressAddr(addr
);
602 TraceRelease(thr
, evex
);
606 ALWAYS_INLINE USED
bool TryTraceMemoryAccessRange(ThreadState
*thr
, uptr pc
,
607 uptr addr
, uptr size
,
609 if (!kCollectHistory
)
611 EventAccessRange
*ev
;
612 if (UNLIKELY(!TraceAcquire(thr
, &ev
)))
614 thr
->trace_prev_pc
= pc
;
617 ev
->type
= EventType::kAccessRange
;
618 ev
->is_read
= !!(typ
& kAccessRead
);
619 ev
->is_free
= !!(typ
& kAccessFree
);
621 ev
->pc
= CompressAddr(pc
);
622 ev
->addr
= CompressAddr(addr
);
623 ev
->size_hi
= size
>> EventAccessRange::kSizeLoBits
;
624 TraceRelease(thr
, ev
);
628 void TraceMemoryAccessRange(ThreadState
*thr
, uptr pc
, uptr addr
, uptr size
,
630 if (LIKELY(TryTraceMemoryAccessRange(thr
, pc
, addr
, size
, typ
)))
632 TraceSwitchPart(thr
);
633 UNUSED
bool res
= TryTraceMemoryAccessRange(thr
, pc
, addr
, size
, typ
);
637 void TraceFunc(ThreadState
*thr
, uptr pc
) {
638 if (LIKELY(TryTraceFunc(thr
, pc
)))
640 TraceSwitchPart(thr
);
641 UNUSED
bool res
= TryTraceFunc(thr
, pc
);
645 void TraceMutexLock(ThreadState
*thr
, EventType type
, uptr pc
, uptr addr
,
647 DCHECK(type
== EventType::kLock
|| type
== EventType::kRLock
);
648 if (!kCollectHistory
)
654 ev
.pc
= CompressAddr(pc
);
656 ev
.stack_hi
= stk
>> EventLock::kStackIDLoBits
;
658 ev
.addr
= CompressAddr(addr
);
662 void TraceMutexUnlock(ThreadState
*thr
, uptr addr
) {
663 if (!kCollectHistory
)
668 ev
.type
= EventType::kUnlock
;
670 ev
.addr
= CompressAddr(addr
);
674 void TraceTime(ThreadState
*thr
) {
675 if (!kCollectHistory
)
680 ev
.type
= EventType::kTime
;
681 ev
.sid
= static_cast<u64
>(thr
->sid
);
682 ev
.epoch
= static_cast<u64
>(thr
->epoch
);
688 void TraceSwitchPart(ThreadState
*thr
) {
689 Trace
*trace
= &thr
->tctx
->trace
;
690 Event
*pos
= reinterpret_cast<Event
*>(atomic_load_relaxed(&thr
->trace_pos
));
691 DCHECK_EQ(reinterpret_cast<uptr
>(pos
+ 1) & TracePart::kAlignment
, 0);
692 auto *part
= trace
->parts
.Back();
693 DPrintf("TraceSwitchPart part=%p pos=%p\n", part
, pos
);
695 // We can get here when we still have space in the current trace part.
696 // The fast-path check in TraceAcquire has false positives in the middle of
697 // the part. Check if we are indeed at the end of the current part or not,
698 // and fill any gaps with NopEvent's.
699 Event
*end
= &part
->events
[TracePart::kSize
];
700 DCHECK_GE(pos
, &part
->events
[0]);
703 if ((reinterpret_cast<uptr
>(pos
) & TracePart::kAlignment
) ==
704 TracePart::kAlignment
)
707 DCHECK_LE(pos
+ 2, end
);
708 atomic_store_relaxed(&thr
->trace_pos
, reinterpret_cast<uptr
>(pos
));
709 // Ensure we setup trace so that the next TraceAcquire
710 // won't detect trace part end.
712 CHECK(TraceAcquire(thr
, &ev
));
715 // We are indeed at the end.
716 for (; pos
< end
; pos
++) *pos
= NopEvent
;
719 if (ctx
->after_multithreaded_fork
) {
720 // We just need to survive till exec.
722 atomic_store_relaxed(&thr
->trace_pos
,
723 reinterpret_cast<uptr
>(&part
->events
[0]));
727 part
= new (MmapOrDie(sizeof(TracePart
), "TracePart")) TracePart();
729 thr
->trace_prev_pc
= 0;
731 Lock
lock(&trace
->mtx
);
732 trace
->parts
.PushBack(part
);
733 atomic_store_relaxed(&thr
->trace_pos
,
734 reinterpret_cast<uptr
>(&part
->events
[0]));
736 // Make this part self-sufficient by restoring the current stack
737 // and mutex set in the beginning of the trace.
739 for (uptr
*pos
= &thr
->shadow_stack
[0]; pos
< thr
->shadow_stack_pos
; pos
++)
740 CHECK(TryTraceFunc(thr
, *pos
));
741 for (uptr i
= 0; i
< thr
->mset
.Size(); i
++) {
742 MutexSet::Desc d
= thr
->mset
.Get(i
);
743 TraceMutexLock(thr
, d
.write
? EventType::kLock
: EventType::kRLock
, 0,
750 void TraceSwitch(ThreadState
*thr
) {
752 if (ctx
->after_multithreaded_fork
)
756 Trace
*thr_trace
= ThreadTrace(thr
->tid
);
757 Lock
l(&thr_trace
->mtx
);
758 unsigned trace
= (thr
->fast_state
.epoch() / kTracePartSize
) % TraceParts();
759 TraceHeader
*hdr
= &thr_trace
->headers
[trace
];
760 hdr
->epoch0
= thr
->fast_state
.epoch();
761 ObtainCurrentStack(thr
, 0, &hdr
->stack0
);
762 hdr
->mset0
= thr
->mset
;
766 Trace
*ThreadTrace(Tid tid
) { return (Trace
*)GetThreadTraceHeader(tid
); }
768 uptr
TraceTopPC(ThreadState
*thr
) {
769 Event
*events
= (Event
*)GetThreadTrace(thr
->tid
);
770 uptr pc
= events
[thr
->fast_state
.GetTracePos()];
775 return (uptr
)(1ull << (kTracePartSizeBits
+ flags()->history_size
+ 1));
779 return TraceSize() / kTracePartSize
;
783 extern "C" void __tsan_trace_switch() {
784 TraceSwitch(cur_thread());
787 extern "C" void __tsan_report_race() {
788 ReportRace(cur_thread());
793 Shadow
LoadShadow(u64
*p
) {
794 u64 raw
= atomic_load((atomic_uint64_t
*)p
, memory_order_relaxed
);
799 void StoreShadow(u64
*sp
, u64 s
) {
800 atomic_store((atomic_uint64_t
*)sp
, s
, memory_order_relaxed
);
804 void StoreIfNotYetStored(u64
*sp
, u64
*s
) {
810 void HandleRace(ThreadState
*thr
, u64
*shadow_mem
,
811 Shadow cur
, Shadow old
) {
812 thr
->racy_state
[0] = cur
.raw();
813 thr
->racy_state
[1] = old
.raw();
814 thr
->racy_shadow_addr
= shadow_mem
;
816 HACKY_CALL(__tsan_report_race
);
822 static inline bool HappensBefore(Shadow old
, ThreadState
*thr
) {
823 return thr
->clock
.get(old
.TidWithIgnore()) >= old
.epoch();
827 void MemoryAccessImpl1(ThreadState
*thr
, uptr addr
,
828 int kAccessSizeLog
, bool kAccessIsWrite
, bool kIsAtomic
,
829 u64
*shadow_mem
, Shadow cur
) {
831 // This potentially can live in an MMX/SSE scratch register.
832 // The required intrinsics are:
833 // __m128i _mm_move_epi64(__m128i*);
834 // _mm_storel_epi64(u64*, __m128i);
835 u64 store_word
= cur
.raw();
838 // scan all the shadow values and dispatch to 4 categories:
839 // same, replace, candidate and race (see comments below).
840 // we consider only 3 cases regarding access sizes:
841 // equal, intersect and not intersect. initially I considered
842 // larger and smaller as well, it allowed to replace some
843 // 'candidates' with 'same' or 'replace', but I think
844 // it's just not worth it (performance- and complexity-wise).
848 // It release mode we manually unroll the loop,
849 // because empirically gcc generates better code this way.
850 // However, we can't afford unrolling in debug mode, because the function
851 // consumes almost 4K of stack. Gtest gives only 4K of stack to death test
852 // threads, which is not enough for the unrolled loop.
854 for (int idx
= 0; idx
< 4; idx
++) {
855 # include "tsan_update_shadow_word.inc"
859 # include "tsan_update_shadow_word.inc"
862 # include "tsan_update_shadow_word.inc"
864 # include "tsan_update_shadow_word.inc"
868 # include "tsan_update_shadow_word.inc"
870 # include "tsan_update_shadow_word.inc"
874 # include "tsan_update_shadow_word.inc"
876 # include "tsan_update_shadow_word.inc"
880 // we did not find any races and had already stored
881 // the current access info, so we are done
884 // choose a random candidate slot and replace it
885 StoreShadow(shadow_mem
+ (cur
.epoch() % kShadowCnt
), store_word
);
888 HandleRace(thr
, shadow_mem
, cur
, old
);
892 void UnalignedMemoryAccess(ThreadState
*thr
, uptr pc
, uptr addr
, uptr size
,
894 DCHECK(!(typ
& kAccessAtomic
));
895 const bool kAccessIsWrite
= !(typ
& kAccessRead
);
896 const bool kIsAtomic
= false;
899 int kAccessSizeLog
= kSizeLog1
;
900 if (size
>= 8 && (addr
& ~7) == ((addr
+ 7) & ~7)) {
902 kAccessSizeLog
= kSizeLog8
;
903 } else if (size
>= 4 && (addr
& ~7) == ((addr
+ 3) & ~7)) {
905 kAccessSizeLog
= kSizeLog4
;
906 } else if (size
>= 2 && (addr
& ~7) == ((addr
+ 1) & ~7)) {
908 kAccessSizeLog
= kSizeLog2
;
910 MemoryAccess(thr
, pc
, addr
, kAccessSizeLog
, kAccessIsWrite
, kIsAtomic
);
917 bool ContainsSameAccessSlow(u64
*s
, u64 a
, u64 sync_epoch
, bool is_write
) {
919 for (uptr i
= 0; i
< kShadowCnt
; i
++) {
920 Shadow
old(LoadShadow(&s
[i
]));
921 if (Shadow::Addr0AndSizeAreEqual(cur
, old
) &&
922 old
.TidWithIgnore() == cur
.TidWithIgnore() &&
923 old
.epoch() > sync_epoch
&&
924 old
.IsAtomic() == cur
.IsAtomic() &&
925 old
.IsRead() <= cur
.IsRead())
932 # define SHUF(v0, v1, i0, i1, i2, i3) \
933 _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v0), \
934 _mm_castsi128_ps(v1), \
935 (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
937 bool ContainsSameAccessFast(u64
*s
, u64 a
, u64 sync_epoch
, bool is_write
) {
938 // This is an optimized version of ContainsSameAccessSlow.
939 // load current access into access[0:63]
940 const m128 access
= _mm_cvtsi64_si128(a
);
941 // duplicate high part of access in addr0:
942 // addr0[0:31] = access[32:63]
943 // addr0[32:63] = access[32:63]
944 // addr0[64:95] = access[32:63]
945 // addr0[96:127] = access[32:63]
946 const m128 addr0
= SHUF(access
, access
, 1, 1, 1, 1);
947 // load 4 shadow slots
948 const m128 shadow0
= _mm_load_si128((__m128i
*)s
);
949 const m128 shadow1
= _mm_load_si128((__m128i
*)s
+ 1);
950 // load high parts of 4 shadow slots into addr_vect:
951 // addr_vect[0:31] = shadow0[32:63]
952 // addr_vect[32:63] = shadow0[96:127]
953 // addr_vect[64:95] = shadow1[32:63]
954 // addr_vect[96:127] = shadow1[96:127]
955 m128 addr_vect
= SHUF(shadow0
, shadow1
, 1, 3, 1, 3);
957 // set IsRead bit in addr_vect
958 const m128 rw_mask1
= _mm_cvtsi64_si128(1<<15);
959 const m128 rw_mask
= SHUF(rw_mask1
, rw_mask1
, 0, 0, 0, 0);
960 addr_vect
= _mm_or_si128(addr_vect
, rw_mask
);
962 // addr0 == addr_vect?
963 const m128 addr_res
= _mm_cmpeq_epi32(addr0
, addr_vect
);
964 // epoch1[0:63] = sync_epoch
965 const m128 epoch1
= _mm_cvtsi64_si128(sync_epoch
);
966 // epoch[0:31] = sync_epoch[0:31]
967 // epoch[32:63] = sync_epoch[0:31]
968 // epoch[64:95] = sync_epoch[0:31]
969 // epoch[96:127] = sync_epoch[0:31]
970 const m128 epoch
= SHUF(epoch1
, epoch1
, 0, 0, 0, 0);
971 // load low parts of shadow cell epochs into epoch_vect:
972 // epoch_vect[0:31] = shadow0[0:31]
973 // epoch_vect[32:63] = shadow0[64:95]
974 // epoch_vect[64:95] = shadow1[0:31]
975 // epoch_vect[96:127] = shadow1[64:95]
976 const m128 epoch_vect
= SHUF(shadow0
, shadow1
, 0, 2, 0, 2);
977 // epoch_vect >= sync_epoch?
978 const m128 epoch_res
= _mm_cmpgt_epi32(epoch_vect
, epoch
);
979 // addr_res & epoch_res
980 const m128 res
= _mm_and_si128(addr_res
, epoch_res
);
984 // mask[15] = res[127]
985 const int mask
= _mm_movemask_epi8(res
);
991 bool ContainsSameAccess(u64
*s
, u64 a
, u64 sync_epoch
, bool is_write
) {
993 bool res
= ContainsSameAccessFast(s
, a
, sync_epoch
, is_write
);
994 // NOTE: this check can fail if the shadow is concurrently mutated
995 // by other threads. But it still can be useful if you modify
996 // ContainsSameAccessFast and want to ensure that it's not completely broken.
997 // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write));
1000 return ContainsSameAccessSlow(s
, a
, sync_epoch
, is_write
);
1005 void MemoryAccess(ThreadState
*thr
, uptr pc
, uptr addr
,
1006 int kAccessSizeLog
, bool kAccessIsWrite
, bool kIsAtomic
) {
1007 RawShadow
*shadow_mem
= MemToShadow(addr
);
1008 DPrintf2("#%d: MemoryAccess: @%p %p size=%d"
1009 " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n",
1010 (int)thr
->fast_state
.tid(), (void*)pc
, (void*)addr
,
1011 (int)(1 << kAccessSizeLog
), kAccessIsWrite
, shadow_mem
,
1012 (uptr
)shadow_mem
[0], (uptr
)shadow_mem
[1],
1013 (uptr
)shadow_mem
[2], (uptr
)shadow_mem
[3]);
1015 if (!IsAppMem(addr
)) {
1016 Printf("Access to non app mem %zx\n", addr
);
1017 DCHECK(IsAppMem(addr
));
1019 if (!IsShadowMem(shadow_mem
)) {
1020 Printf("Bad shadow addr %p (%zx)\n", shadow_mem
, addr
);
1021 DCHECK(IsShadowMem(shadow_mem
));
1025 if (!SANITIZER_GO
&& !kAccessIsWrite
&& *shadow_mem
== kShadowRodata
) {
1026 // Access to .rodata section, no races here.
1027 // Measurements show that it can be 10-20% of all memory accesses.
1031 FastState fast_state
= thr
->fast_state
;
1032 if (UNLIKELY(fast_state
.GetIgnoreBit())) {
1036 Shadow
cur(fast_state
);
1037 cur
.SetAddr0AndSizeLog(addr
& 7, kAccessSizeLog
);
1038 cur
.SetWrite(kAccessIsWrite
);
1039 cur
.SetAtomic(kIsAtomic
);
1041 if (LIKELY(ContainsSameAccess(shadow_mem
, cur
.raw(),
1042 thr
->fast_synch_epoch
, kAccessIsWrite
))) {
1046 if (kCollectHistory
) {
1047 fast_state
.IncrementEpoch();
1048 thr
->fast_state
= fast_state
;
1049 TraceAddEvent(thr
, fast_state
, EventTypeMop
, pc
);
1050 cur
.IncrementEpoch();
1053 MemoryAccessImpl1(thr
, addr
, kAccessSizeLog
, kAccessIsWrite
, kIsAtomic
,
1057 // Called by MemoryAccessRange in tsan_rtl_thread.cpp
1059 void MemoryAccessImpl(ThreadState
*thr
, uptr addr
,
1060 int kAccessSizeLog
, bool kAccessIsWrite
, bool kIsAtomic
,
1061 u64
*shadow_mem
, Shadow cur
) {
1062 if (LIKELY(ContainsSameAccess(shadow_mem
, cur
.raw(),
1063 thr
->fast_synch_epoch
, kAccessIsWrite
))) {
1067 MemoryAccessImpl1(thr
, addr
, kAccessSizeLog
, kAccessIsWrite
, kIsAtomic
,
1071 static void MemoryRangeSet(ThreadState
*thr
, uptr pc
, uptr addr
, uptr size
,
1078 uptr offset
= addr
% kShadowCell
;
1080 offset
= kShadowCell
- offset
;
1086 DCHECK_EQ(addr
% 8, 0);
1087 // If a user passes some insane arguments (memset(0)),
1088 // let it just crash as usual.
1089 if (!IsAppMem(addr
) || !IsAppMem(addr
+ size
- 1))
1091 // Don't want to touch lots of shadow memory.
1092 // If a program maps 10MB stack, there is no need reset the whole range.
1093 size
= (size
+ (kShadowCell
- 1)) & ~(kShadowCell
- 1);
1094 // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
1095 if (SANITIZER_WINDOWS
|| size
< common_flags()->clear_shadow_mmap_threshold
) {
1096 RawShadow
*p
= MemToShadow(addr
);
1097 CHECK(IsShadowMem(p
));
1098 CHECK(IsShadowMem(p
+ size
* kShadowCnt
/ kShadowCell
- 1));
1099 // FIXME: may overwrite a part outside the region
1100 for (uptr i
= 0; i
< size
/ kShadowCell
* kShadowCnt
;) {
1102 for (uptr j
= 1; j
< kShadowCnt
; j
++)
1106 // The region is big, reset only beginning and end.
1107 const uptr kPageSize
= GetPageSizeCached();
1108 RawShadow
*begin
= MemToShadow(addr
);
1109 RawShadow
*end
= begin
+ size
/ kShadowCell
* kShadowCnt
;
1110 RawShadow
*p
= begin
;
1111 // Set at least first kPageSize/2 to page boundary.
1112 while ((p
< begin
+ kPageSize
/ kShadowSize
/ 2) || ((uptr
)p
% kPageSize
)) {
1114 for (uptr j
= 1; j
< kShadowCnt
; j
++)
1117 // Reset middle part.
1119 p
= RoundDown(end
, kPageSize
);
1120 if (!MmapFixedSuperNoReserve((uptr
)p1
, (uptr
)p
- (uptr
)p1
))
1125 for (uptr j
= 1; j
< kShadowCnt
; j
++)
1131 void MemoryResetRange(ThreadState
*thr
, uptr pc
, uptr addr
, uptr size
) {
1132 MemoryRangeSet(thr
, pc
, addr
, size
, 0);
1135 void MemoryRangeFreed(ThreadState
*thr
, uptr pc
, uptr addr
, uptr size
) {
1136 // Processing more than 1k (4k of shadow) is expensive,
1137 // can cause excessive memory consumption (user does not necessary touch
1138 // the whole range) and most likely unnecessary.
1141 CHECK_EQ(thr
->is_freeing
, false);
1142 thr
->is_freeing
= true;
1143 MemoryAccessRange(thr
, pc
, addr
, size
, true);
1144 thr
->is_freeing
= false;
1145 if (kCollectHistory
) {
1146 thr
->fast_state
.IncrementEpoch();
1147 TraceAddEvent(thr
, thr
->fast_state
, EventTypeMop
, pc
);
1149 Shadow
s(thr
->fast_state
);
1153 s
.SetAddr0AndSizeLog(0, 3);
1154 MemoryRangeSet(thr
, pc
, addr
, size
, s
.raw());
1157 void MemoryRangeImitateWrite(ThreadState
*thr
, uptr pc
, uptr addr
, uptr size
) {
1158 if (kCollectHistory
) {
1159 thr
->fast_state
.IncrementEpoch();
1160 TraceAddEvent(thr
, thr
->fast_state
, EventTypeMop
, pc
);
1162 Shadow
s(thr
->fast_state
);
1165 s
.SetAddr0AndSizeLog(0, 3);
1166 MemoryRangeSet(thr
, pc
, addr
, size
, s
.raw());
1169 void MemoryRangeImitateWriteOrResetRange(ThreadState
*thr
, uptr pc
, uptr addr
,
1171 if (thr
->ignore_reads_and_writes
== 0)
1172 MemoryRangeImitateWrite(thr
, pc
, addr
, size
);
1174 MemoryResetRange(thr
, pc
, addr
, size
);
1178 void FuncEntry(ThreadState
*thr
, uptr pc
) {
1179 DPrintf2("#%d: FuncEntry %p\n", (int)thr
->fast_state
.tid(), (void*)pc
);
1180 if (kCollectHistory
) {
1181 thr
->fast_state
.IncrementEpoch();
1182 TraceAddEvent(thr
, thr
->fast_state
, EventTypeFuncEnter
, pc
);
1185 // Shadow stack maintenance can be replaced with
1186 // stack unwinding during trace switch (which presumably must be faster).
1187 DCHECK_GE(thr
->shadow_stack_pos
, thr
->shadow_stack
);
1189 DCHECK_LT(thr
->shadow_stack_pos
, thr
->shadow_stack_end
);
1191 if (thr
->shadow_stack_pos
== thr
->shadow_stack_end
)
1192 GrowShadowStack(thr
);
1194 thr
->shadow_stack_pos
[0] = pc
;
1195 thr
->shadow_stack_pos
++;
1199 void FuncExit(ThreadState
*thr
) {
1200 DPrintf2("#%d: FuncExit\n", (int)thr
->fast_state
.tid());
1201 if (kCollectHistory
) {
1202 thr
->fast_state
.IncrementEpoch();
1203 TraceAddEvent(thr
, thr
->fast_state
, EventTypeFuncExit
, 0);
1206 DCHECK_GT(thr
->shadow_stack_pos
, thr
->shadow_stack
);
1208 DCHECK_LT(thr
->shadow_stack_pos
, thr
->shadow_stack_end
);
1210 thr
->shadow_stack_pos
--;
1213 void ThreadIgnoreBegin(ThreadState
*thr
, uptr pc
) {
1214 DPrintf("#%d: ThreadIgnoreBegin\n", thr
->tid
);
1215 thr
->ignore_reads_and_writes
++;
1216 CHECK_GT(thr
->ignore_reads_and_writes
, 0);
1217 thr
->fast_state
.SetIgnoreBit();
1219 if (pc
&& !ctx
->after_multithreaded_fork
)
1220 thr
->mop_ignore_set
.Add(CurrentStackId(thr
, pc
));
1224 void ThreadIgnoreEnd(ThreadState
*thr
) {
1225 DPrintf("#%d: ThreadIgnoreEnd\n", thr
->tid
);
1226 CHECK_GT(thr
->ignore_reads_and_writes
, 0);
1227 thr
->ignore_reads_and_writes
--;
1228 if (thr
->ignore_reads_and_writes
== 0) {
1229 thr
->fast_state
.ClearIgnoreBit();
1231 thr
->mop_ignore_set
.Reset();
1237 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
1238 uptr
__tsan_testonly_shadow_stack_current_size() {
1239 ThreadState
*thr
= cur_thread();
1240 return thr
->shadow_stack_pos
- thr
->shadow_stack
;
1244 void ThreadIgnoreSyncBegin(ThreadState
*thr
, uptr pc
) {
1245 DPrintf("#%d: ThreadIgnoreSyncBegin\n", thr
->tid
);
1247 CHECK_GT(thr
->ignore_sync
, 0);
1249 if (pc
&& !ctx
->after_multithreaded_fork
)
1250 thr
->sync_ignore_set
.Add(CurrentStackId(thr
, pc
));
1254 void ThreadIgnoreSyncEnd(ThreadState
*thr
) {
1255 DPrintf("#%d: ThreadIgnoreSyncEnd\n", thr
->tid
);
1256 CHECK_GT(thr
->ignore_sync
, 0);
1259 if (thr
->ignore_sync
== 0)
1260 thr
->sync_ignore_set
.Reset();
1264 bool MD5Hash::operator==(const MD5Hash
&other
) const {
1265 return hash
[0] == other
.hash
[0] && hash
[1] == other
.hash
[1];
1269 void build_consistency_debug() {}
1271 void build_consistency_release() {}
1274 } // namespace __tsan
1276 #if SANITIZER_CHECK_DEADLOCKS
1277 namespace __sanitizer
{
1278 using namespace __tsan
;
1279 MutexMeta mutex_meta
[] = {
1280 {MutexInvalid
, "Invalid", {}},
1281 {MutexThreadRegistry
, "ThreadRegistry", {}},
1282 {MutexTypeTrace
, "Trace", {MutexLeaf
}},
1283 {MutexTypeReport
, "Report", {MutexTypeSyncVar
}},
1284 {MutexTypeSyncVar
, "SyncVar", {}},
1285 {MutexTypeAnnotations
, "Annotations", {}},
1286 {MutexTypeAtExit
, "AtExit", {MutexTypeSyncVar
}},
1287 {MutexTypeFired
, "Fired", {MutexLeaf
}},
1288 {MutexTypeRacy
, "Racy", {MutexLeaf
}},
1289 {MutexTypeGlobalProc
, "GlobalProc", {}},
1293 void PrintMutexPC(uptr pc
) { StackTrace(&pc
, 1).Print(); }
1294 } // namespace __sanitizer
1298 // Must be included in this file to make sure everything is inlined.
1299 # include "tsan_interface.inc"