1 //===-- tsan_rtl_access.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is a part of ThreadSanitizer (TSan), a race detector.
11 // Definitions of memory access and function entry/exit entry points.
12 //===----------------------------------------------------------------------===//
18 ALWAYS_INLINE USED
bool TryTraceMemoryAccess(ThreadState
* thr
, uptr pc
,
21 DCHECK(size
== 1 || size
== 2 || size
== 4 || size
== 8);
25 if (UNLIKELY(!TraceAcquire(thr
, &ev
)))
27 u64 size_log
= size
== 1 ? 0 : size
== 2 ? 1 : size
== 4 ? 2 : 3;
28 uptr pc_delta
= pc
- thr
->trace_prev_pc
+ (1 << (EventAccess::kPCBits
- 1));
29 thr
->trace_prev_pc
= pc
;
30 if (LIKELY(pc_delta
< (1 << EventAccess::kPCBits
))) {
32 ev
->is_read
= !!(typ
& kAccessRead
);
33 ev
->is_atomic
= !!(typ
& kAccessAtomic
);
34 ev
->size_log
= size_log
;
35 ev
->pc_delta
= pc_delta
;
36 DCHECK_EQ(ev
->pc_delta
, pc_delta
);
37 ev
->addr
= CompressAddr(addr
);
38 TraceRelease(thr
, ev
);
41 auto* evex
= reinterpret_cast<EventAccessExt
*>(ev
);
44 evex
->type
= EventType::kAccessExt
;
45 evex
->is_read
= !!(typ
& kAccessRead
);
46 evex
->is_atomic
= !!(typ
& kAccessAtomic
);
47 evex
->size_log
= size_log
;
48 // Note: this is important, see comment in EventAccessExt.
50 evex
->addr
= CompressAddr(addr
);
52 TraceRelease(thr
, evex
);
57 bool TryTraceMemoryAccessRange(ThreadState
* thr
, uptr pc
, uptr addr
, uptr size
,
62 if (UNLIKELY(!TraceAcquire(thr
, &ev
)))
64 thr
->trace_prev_pc
= pc
;
67 ev
->type
= EventType::kAccessRange
;
68 ev
->is_read
= !!(typ
& kAccessRead
);
69 ev
->is_free
= !!(typ
& kAccessFree
);
71 ev
->pc
= CompressAddr(pc
);
72 ev
->addr
= CompressAddr(addr
);
73 ev
->size_hi
= size
>> EventAccessRange::kSizeLoBits
;
74 TraceRelease(thr
, ev
);
78 void TraceMemoryAccessRange(ThreadState
* thr
, uptr pc
, uptr addr
, uptr size
,
80 if (LIKELY(TryTraceMemoryAccessRange(thr
, pc
, addr
, size
, typ
)))
83 UNUSED
bool res
= TryTraceMemoryAccessRange(thr
, pc
, addr
, size
, typ
);
87 void TraceFunc(ThreadState
* thr
, uptr pc
) {
88 if (LIKELY(TryTraceFunc(thr
, pc
)))
91 UNUSED
bool res
= TryTraceFunc(thr
, pc
);
95 NOINLINE
void TraceRestartFuncEntry(ThreadState
* thr
, uptr pc
) {
100 NOINLINE
void TraceRestartFuncExit(ThreadState
* thr
) {
101 TraceSwitchPart(thr
);
105 void TraceMutexLock(ThreadState
* thr
, EventType type
, uptr pc
, uptr addr
,
107 DCHECK(type
== EventType::kLock
|| type
== EventType::kRLock
);
108 if (!kCollectHistory
)
114 ev
.pc
= CompressAddr(pc
);
116 ev
.stack_hi
= stk
>> EventLock::kStackIDLoBits
;
118 ev
.addr
= CompressAddr(addr
);
122 void TraceMutexUnlock(ThreadState
* thr
, uptr addr
) {
123 if (!kCollectHistory
)
128 ev
.type
= EventType::kUnlock
;
130 ev
.addr
= CompressAddr(addr
);
134 void TraceTime(ThreadState
* thr
) {
135 if (!kCollectHistory
)
137 FastState fast_state
= thr
->fast_state
;
141 ev
.type
= EventType::kTime
;
142 ev
.sid
= static_cast<u64
>(fast_state
.sid());
143 ev
.epoch
= static_cast<u64
>(fast_state
.epoch());
148 NOINLINE
void DoReportRace(ThreadState
* thr
, RawShadow
* shadow_mem
, Shadow cur
,
150 AccessType typ
) SANITIZER_NO_THREAD_SAFETY_ANALYSIS
{
151 // For the free shadow markers the first element (that contains kFreeSid)
152 // triggers the race, but the second element contains info about the freeing
154 if (old
.sid() == kFreeSid
)
155 old
= Shadow(LoadShadow(&shadow_mem
[1]));
156 // This prevents trapping on this address in future.
157 for (uptr i
= 0; i
< kShadowCnt
; i
++)
158 StoreShadow(&shadow_mem
[i
], i
== 0 ? Shadow::kRodata
: Shadow::kEmpty
);
159 // See the comment in MemoryRangeFreed as to why the slot is locked
160 // for free memory accesses. ReportRace must not be called with
161 // the slot locked because of the fork. But MemoryRangeFreed is not
162 // called during fork because fork sets ignore_reads_and_writes,
163 // so simply unlocking the slot should be fine.
164 if (typ
& kAccessSlotLocked
)
166 ReportRace(thr
, shadow_mem
, cur
, Shadow(old
), typ
);
167 if (typ
& kAccessSlotLocked
)
173 bool ContainsSameAccess(RawShadow
* s
, Shadow cur
, int unused0
, int unused1
,
175 for (uptr i
= 0; i
< kShadowCnt
; i
++) {
176 auto old
= LoadShadow(&s
[i
]);
177 if (!(typ
& kAccessRead
)) {
178 if (old
== cur
.raw())
182 auto masked
= static_cast<RawShadow
>(static_cast<u32
>(old
) |
183 static_cast<u32
>(Shadow::kRodata
));
184 if (masked
== cur
.raw())
186 if (!(typ
& kAccessNoRodata
) && !SANITIZER_GO
) {
187 if (old
== Shadow::kRodata
)
195 bool CheckRaces(ThreadState
* thr
, RawShadow
* shadow_mem
, Shadow cur
,
196 int unused0
, int unused1
, AccessType typ
) {
198 for (uptr idx
= 0; idx
< kShadowCnt
; idx
++) {
199 RawShadow
* sp
= &shadow_mem
[idx
];
200 Shadow
old(LoadShadow(sp
));
201 if (LIKELY(old
.raw() == Shadow::kEmpty
)) {
202 if (!(typ
& kAccessCheckOnly
) && !stored
)
203 StoreShadow(sp
, cur
.raw());
206 if (LIKELY(!(cur
.access() & old
.access())))
208 if (LIKELY(cur
.sid() == old
.sid())) {
209 if (!(typ
& kAccessCheckOnly
) &&
210 LIKELY(cur
.access() == old
.access() && old
.IsRWWeakerOrEqual(typ
))) {
211 StoreShadow(sp
, cur
.raw());
216 if (LIKELY(old
.IsBothReadsOrAtomic(typ
)))
218 if (LIKELY(thr
->clock
.Get(old
.sid()) >= old
.epoch()))
220 DoReportRace(thr
, shadow_mem
, cur
, old
, typ
);
223 // We did not find any races and had already stored
224 // the current access info, so we are done.
227 // Choose a random candidate slot and replace it.
229 atomic_load_relaxed(&thr
->trace_pos
) / sizeof(Event
) % kShadowCnt
;
230 StoreShadow(&shadow_mem
[index
], cur
.raw());
234 # define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
236 #else /* !TSAN_VECTORIZE */
239 bool ContainsSameAccess(RawShadow
* unused0
, Shadow unused1
, m128 shadow
,
240 m128 access
, AccessType typ
) {
241 // Note: we could check if there is a larger access of the same type,
242 // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
243 // and now do smaller reads/writes, these can also be considered as "same
244 // access". However, it will make the check more expensive, so it's unclear
245 // if it's worth it. But this would conserve trace space, so it's useful
246 // besides potential speed up.
247 if (!(typ
& kAccessRead
)) {
248 const m128 same
= _mm_cmpeq_epi32(shadow
, access
);
249 return _mm_movemask_epi8(same
);
251 // For reads we need to reset read bit in the shadow,
252 // because we need to match read with both reads and writes.
253 // Shadow::kRodata has only read bit set, so it does what we want.
254 // We also abuse it for rodata check to save few cycles
255 // since we already loaded Shadow::kRodata into a register.
256 // Reads from rodata can't race.
257 // Measurements show that they can be 10-20% of all memory accesses.
258 // Shadow::kRodata has epoch 0 which cannot appear in shadow normally
259 // (thread epochs start from 1). So the same read bit mask
260 // serves as rodata indicator.
261 const m128 read_mask
= _mm_set1_epi32(static_cast<u32
>(Shadow::kRodata
));
262 const m128 masked_shadow
= _mm_or_si128(shadow
, read_mask
);
263 m128 same
= _mm_cmpeq_epi32(masked_shadow
, access
);
264 // Range memory accesses check Shadow::kRodata before calling this,
265 // Shadow::kRodatas is not possible for free memory access
266 // and Go does not use Shadow::kRodata.
267 if (!(typ
& kAccessNoRodata
) && !SANITIZER_GO
) {
268 const m128 ro
= _mm_cmpeq_epi32(shadow
, read_mask
);
269 same
= _mm_or_si128(ro
, same
);
271 return _mm_movemask_epi8(same
);
274 NOINLINE
void DoReportRaceV(ThreadState
* thr
, RawShadow
* shadow_mem
, Shadow cur
,
275 u32 race_mask
, m128 shadow
, AccessType typ
) {
276 // race_mask points which of the shadow elements raced with the current
277 // access. Extract that element.
278 CHECK_NE(race_mask
, 0);
280 // Note: _mm_extract_epi32 index must be a constant value.
281 switch (__builtin_ffs(race_mask
) / 4) {
283 old
= _mm_extract_epi32(shadow
, 0);
286 old
= _mm_extract_epi32(shadow
, 1);
289 old
= _mm_extract_epi32(shadow
, 2);
292 old
= _mm_extract_epi32(shadow
, 3);
295 Shadow
prev(static_cast<RawShadow
>(old
));
296 // For the free shadow markers the first element (that contains kFreeSid)
297 // triggers the race, but the second element contains info about the freeing
299 if (prev
.sid() == kFreeSid
)
300 prev
= Shadow(static_cast<RawShadow
>(_mm_extract_epi32(shadow
, 1)));
301 DoReportRace(thr
, shadow_mem
, cur
, prev
, typ
);
305 bool CheckRaces(ThreadState
* thr
, RawShadow
* shadow_mem
, Shadow cur
,
306 m128 shadow
, m128 access
, AccessType typ
) {
307 // Note: empty/zero slots don't intersect with any access.
308 const m128 zero
= _mm_setzero_si128();
309 const m128 mask_access
= _mm_set1_epi32(0x000000ff);
310 const m128 mask_sid
= _mm_set1_epi32(0x0000ff00);
311 const m128 mask_read_atomic
= _mm_set1_epi32(0xc0000000);
312 const m128 access_and
= _mm_and_si128(access
, shadow
);
313 const m128 access_xor
= _mm_xor_si128(access
, shadow
);
314 const m128 intersect
= _mm_and_si128(access_and
, mask_access
);
315 const m128 not_intersect
= _mm_cmpeq_epi32(intersect
, zero
);
316 const m128 not_same_sid
= _mm_and_si128(access_xor
, mask_sid
);
317 const m128 same_sid
= _mm_cmpeq_epi32(not_same_sid
, zero
);
318 const m128 both_read_or_atomic
= _mm_and_si128(access_and
, mask_read_atomic
);
320 _mm_or_si128(_mm_or_si128(not_intersect
, same_sid
), both_read_or_atomic
);
321 const int race_mask
= _mm_movemask_epi8(_mm_cmpeq_epi32(no_race
, zero
));
322 if (UNLIKELY(race_mask
))
326 if (typ
& kAccessCheckOnly
)
328 // We could also replace different sid's if access is the same,
329 // rw weaker and happens before. However, just checking access below
330 // is not enough because we also need to check that !both_read_or_atomic
331 // (reads from different sids can be concurrent).
332 // Theoretically we could replace smaller accesses with larger accesses,
333 // but it's unclear if it's worth doing.
334 const m128 mask_access_sid
= _mm_set1_epi32(0x0000ffff);
335 const m128 not_same_sid_access
= _mm_and_si128(access_xor
, mask_access_sid
);
336 const m128 same_sid_access
= _mm_cmpeq_epi32(not_same_sid_access
, zero
);
337 const m128 access_read_atomic
=
338 _mm_set1_epi32((typ
& (kAccessRead
| kAccessAtomic
)) << 30);
339 const m128 rw_weaker
=
340 _mm_cmpeq_epi32(_mm_max_epu32(shadow
, access_read_atomic
), shadow
);
341 const m128 rewrite
= _mm_and_si128(same_sid_access
, rw_weaker
);
342 const int rewrite_mask
= _mm_movemask_epi8(rewrite
);
343 int index
= __builtin_ffs(rewrite_mask
);
344 if (UNLIKELY(index
== 0)) {
345 const m128 empty
= _mm_cmpeq_epi32(shadow
, zero
);
346 const int empty_mask
= _mm_movemask_epi8(empty
);
347 index
= __builtin_ffs(empty_mask
);
348 if (UNLIKELY(index
== 0))
349 index
= (atomic_load_relaxed(&thr
->trace_pos
) / 2) % 16;
351 StoreShadow(&shadow_mem
[index
/ 4], cur
.raw());
352 // We could zero other slots determined by rewrite_mask.
353 // That would help other threads to evict better slots,
354 // but it's unclear if it's worth it.
359 m128 thread_epochs
= _mm_set1_epi32(0x7fffffff);
360 // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
361 // indexes must be constants.
362 # define LOAD_EPOCH(idx) \
363 if (LIKELY(race_mask & (1 << (idx * 4)))) { \
364 u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \
365 u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \
366 thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
373 const m128 mask_epoch
= _mm_set1_epi32(0x3fff0000);
374 const m128 shadow_epochs
= _mm_and_si128(shadow
, mask_epoch
);
375 const m128 concurrent
= _mm_cmplt_epi32(thread_epochs
, shadow_epochs
);
376 const int concurrent_mask
= _mm_movemask_epi8(concurrent
);
377 if (LIKELY(concurrent_mask
== 0))
380 DoReportRaceV(thr
, shadow_mem
, cur
, concurrent_mask
, shadow
, typ
);
384 # define LOAD_CURRENT_SHADOW(cur, shadow_mem) \
385 const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
386 const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
389 char* DumpShadow(char* buf
, RawShadow raw
) {
390 if (raw
== Shadow::kEmpty
) {
391 internal_snprintf(buf
, 64, "0");
396 s
.GetAccess(nullptr, nullptr, &typ
);
397 internal_snprintf(buf
, 64, "{tid=%u@%u access=0x%x typ=%x}",
398 static_cast<u32
>(s
.sid()), static_cast<u32
>(s
.epoch()),
399 s
.access(), static_cast<u32
>(typ
));
403 // TryTrace* and TraceRestart* functions allow to turn memory access and func
404 // entry/exit callbacks into leaf functions with all associated performance
405 // benefits. These hottest callbacks do only 2 slow path calls: report a race
406 // and trace part switching. Race reporting is easy to turn into a tail call, we
407 // just always return from the runtime after reporting a race. But trace part
408 // switching is harder because it needs to be in the middle of callbacks. To
409 // turn it into a tail call we immidiately return after TraceRestart* functions,
410 // but TraceRestart* functions themselves recurse into the callback after
411 // switching trace part. As the result the hottest callbacks contain only tail
412 // calls, which effectively makes them leaf functions (can use all registers,
413 // no frame setup, etc).
414 NOINLINE
void TraceRestartMemoryAccess(ThreadState
* thr
, uptr pc
, uptr addr
,
415 uptr size
, AccessType typ
) {
416 TraceSwitchPart(thr
);
417 MemoryAccess(thr
, pc
, addr
, size
, typ
);
420 ALWAYS_INLINE USED
void MemoryAccess(ThreadState
* thr
, uptr pc
, uptr addr
,
421 uptr size
, AccessType typ
) {
422 RawShadow
* shadow_mem
= MemToShadow(addr
);
423 UNUSED
char memBuf
[4][64];
424 DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr
->tid
,
425 static_cast<int>(thr
->fast_state
.sid()),
426 static_cast<int>(thr
->fast_state
.epoch()), (void*)addr
, size
,
427 static_cast<int>(typ
), DumpShadow(memBuf
[0], shadow_mem
[0]),
428 DumpShadow(memBuf
[1], shadow_mem
[1]),
429 DumpShadow(memBuf
[2], shadow_mem
[2]),
430 DumpShadow(memBuf
[3], shadow_mem
[3]));
432 FastState fast_state
= thr
->fast_state
;
433 Shadow
cur(fast_state
, addr
, size
, typ
);
435 LOAD_CURRENT_SHADOW(cur
, shadow_mem
);
436 if (LIKELY(ContainsSameAccess(shadow_mem
, cur
, shadow
, access
, typ
)))
438 if (UNLIKELY(fast_state
.GetIgnoreBit()))
440 if (!TryTraceMemoryAccess(thr
, pc
, addr
, size
, typ
))
441 return TraceRestartMemoryAccess(thr
, pc
, addr
, size
, typ
);
442 CheckRaces(thr
, shadow_mem
, cur
, shadow
, access
, typ
);
445 void MemoryAccess16(ThreadState
* thr
, uptr pc
, uptr addr
, AccessType typ
);
448 void RestartMemoryAccess16(ThreadState
* thr
, uptr pc
, uptr addr
,
450 TraceSwitchPart(thr
);
451 MemoryAccess16(thr
, pc
, addr
, typ
);
454 ALWAYS_INLINE USED
void MemoryAccess16(ThreadState
* thr
, uptr pc
, uptr addr
,
456 const uptr size
= 16;
457 FastState fast_state
= thr
->fast_state
;
458 if (UNLIKELY(fast_state
.GetIgnoreBit()))
460 Shadow
cur(fast_state
, 0, 8, typ
);
461 RawShadow
* shadow_mem
= MemToShadow(addr
);
464 LOAD_CURRENT_SHADOW(cur
, shadow_mem
);
465 if (LIKELY(ContainsSameAccess(shadow_mem
, cur
, shadow
, access
, typ
)))
467 if (!TryTraceMemoryAccessRange(thr
, pc
, addr
, size
, typ
))
468 return RestartMemoryAccess16(thr
, pc
, addr
, typ
);
470 if (UNLIKELY(CheckRaces(thr
, shadow_mem
, cur
, shadow
, access
, typ
)))
474 shadow_mem
+= kShadowCnt
;
475 LOAD_CURRENT_SHADOW(cur
, shadow_mem
);
476 if (LIKELY(ContainsSameAccess(shadow_mem
, cur
, shadow
, access
, typ
)))
478 if (!traced
&& !TryTraceMemoryAccessRange(thr
, pc
, addr
, size
, typ
))
479 return RestartMemoryAccess16(thr
, pc
, addr
, typ
);
480 CheckRaces(thr
, shadow_mem
, cur
, shadow
, access
, typ
);
484 void RestartUnalignedMemoryAccess(ThreadState
* thr
, uptr pc
, uptr addr
,
485 uptr size
, AccessType typ
) {
486 TraceSwitchPart(thr
);
487 UnalignedMemoryAccess(thr
, pc
, addr
, size
, typ
);
490 ALWAYS_INLINE USED
void UnalignedMemoryAccess(ThreadState
* thr
, uptr pc
,
491 uptr addr
, uptr size
,
494 FastState fast_state
= thr
->fast_state
;
495 if (UNLIKELY(fast_state
.GetIgnoreBit()))
497 RawShadow
* shadow_mem
= MemToShadow(addr
);
499 uptr size1
= Min
<uptr
>(size
, RoundUp(addr
+ 1, kShadowCell
) - addr
);
501 Shadow
cur(fast_state
, addr
, size1
, typ
);
502 LOAD_CURRENT_SHADOW(cur
, shadow_mem
);
503 if (LIKELY(ContainsSameAccess(shadow_mem
, cur
, shadow
, access
, typ
)))
505 if (!TryTraceMemoryAccessRange(thr
, pc
, addr
, size
, typ
))
506 return RestartUnalignedMemoryAccess(thr
, pc
, addr
, size
, typ
);
508 if (UNLIKELY(CheckRaces(thr
, shadow_mem
, cur
, shadow
, access
, typ
)))
512 uptr size2
= size
- size1
;
513 if (LIKELY(size2
== 0))
515 shadow_mem
+= kShadowCnt
;
516 Shadow
cur(fast_state
, 0, size2
, typ
);
517 LOAD_CURRENT_SHADOW(cur
, shadow_mem
);
518 if (LIKELY(ContainsSameAccess(shadow_mem
, cur
, shadow
, access
, typ
)))
520 if (!traced
&& !TryTraceMemoryAccessRange(thr
, pc
, addr
, size
, typ
))
521 return RestartUnalignedMemoryAccess(thr
, pc
, addr
, size
, typ
);
522 CheckRaces(thr
, shadow_mem
, cur
, shadow
, access
, typ
);
525 void ShadowSet(RawShadow
* p
, RawShadow
* end
, RawShadow v
) {
527 DCHECK(IsShadowMem(p
));
528 DCHECK(IsShadowMem(end
));
529 UNUSED
const uptr kAlign
= kShadowCnt
* kShadowSize
;
530 DCHECK_EQ(reinterpret_cast<uptr
>(p
) % kAlign
, 0);
531 DCHECK_EQ(reinterpret_cast<uptr
>(end
) % kAlign
, 0);
533 for (; p
< end
; p
+= kShadowCnt
) {
535 for (uptr i
= 1; i
< kShadowCnt
; i
++) p
[i
] = Shadow::kEmpty
;
538 m128 vv
= _mm_setr_epi32(
539 static_cast<u32
>(v
), static_cast<u32
>(Shadow::kEmpty
),
540 static_cast<u32
>(Shadow::kEmpty
), static_cast<u32
>(Shadow::kEmpty
));
541 m128
* vp
= reinterpret_cast<m128
*>(p
);
542 m128
* vend
= reinterpret_cast<m128
*>(end
);
543 for (; vp
< vend
; vp
++) _mm_store_si128(vp
, vv
);
547 static void MemoryRangeSet(uptr addr
, uptr size
, RawShadow val
) {
550 DCHECK_EQ(addr
% kShadowCell
, 0);
551 DCHECK_EQ(size
% kShadowCell
, 0);
552 // If a user passes some insane arguments (memset(0)),
553 // let it just crash as usual.
554 if (!IsAppMem(addr
) || !IsAppMem(addr
+ size
- 1))
556 RawShadow
* begin
= MemToShadow(addr
);
557 RawShadow
* end
= begin
+ size
/ kShadowCell
* kShadowCnt
;
558 // Don't want to touch lots of shadow memory.
559 // If a program maps 10MB stack, there is no need reset the whole range.
560 // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
561 if (SANITIZER_WINDOWS
||
562 size
<= common_flags()->clear_shadow_mmap_threshold
) {
563 ShadowSet(begin
, end
, val
);
566 // The region is big, reset only beginning and end.
567 const uptr kPageSize
= GetPageSizeCached();
568 // Set at least first kPageSize/2 to page boundary.
570 Min(end
, reinterpret_cast<RawShadow
*>(RoundUp(
571 reinterpret_cast<uptr
>(begin
) + kPageSize
/ 2, kPageSize
)));
572 ShadowSet(begin
, mid1
, val
);
573 // Reset middle part.
574 RawShadow
* mid2
= RoundDown(end
, kPageSize
);
576 if (!MmapFixedSuperNoReserve((uptr
)mid1
, (uptr
)mid2
- (uptr
)mid1
))
580 ShadowSet(mid2
, end
, val
);
583 void MemoryResetRange(ThreadState
* thr
, uptr pc
, uptr addr
, uptr size
) {
584 uptr addr1
= RoundDown(addr
, kShadowCell
);
585 uptr size1
= RoundUp(size
+ addr
- addr1
, kShadowCell
);
586 MemoryRangeSet(addr1
, size1
, Shadow::kEmpty
);
589 void MemoryRangeFreed(ThreadState
* thr
, uptr pc
, uptr addr
, uptr size
) {
590 // Callers must lock the slot to ensure synchronization with the reset.
591 // The problem with "freed" memory is that it's not "monotonic"
592 // with respect to bug detection: freed memory is bad to access,
593 // but then if the heap block is reallocated later, it's good to access.
594 // As the result a garbage "freed" shadow can lead to a false positive
595 // if it happens to match a real free in the thread trace,
596 // but the heap block was reallocated before the current memory access,
597 // so it's still good to access. It's not the case with data races.
598 DCHECK(thr
->slot_locked
);
599 DCHECK_EQ(addr
% kShadowCell
, 0);
600 size
= RoundUp(size
, kShadowCell
);
601 // Processing more than 1k (2k of shadow) is expensive,
602 // can cause excessive memory consumption (user does not necessary touch
603 // the whole range) and most likely unnecessary.
604 size
= Min
<uptr
>(size
, 1024);
605 const AccessType typ
= kAccessWrite
| kAccessFree
| kAccessSlotLocked
|
606 kAccessCheckOnly
| kAccessNoRodata
;
607 TraceMemoryAccessRange(thr
, pc
, addr
, size
, typ
);
608 RawShadow
* shadow_mem
= MemToShadow(addr
);
609 Shadow
cur(thr
->fast_state
, 0, kShadowCell
, typ
);
611 const m128 access
= _mm_set1_epi32(static_cast<u32
>(cur
.raw()));
612 const m128 freed
= _mm_setr_epi32(
613 static_cast<u32
>(Shadow::FreedMarker()),
614 static_cast<u32
>(Shadow::FreedInfo(cur
.sid(), cur
.epoch())), 0, 0);
615 for (; size
; size
-= kShadowCell
, shadow_mem
+= kShadowCnt
) {
616 const m128 shadow
= _mm_load_si128((m128
*)shadow_mem
);
617 if (UNLIKELY(CheckRaces(thr
, shadow_mem
, cur
, shadow
, access
, typ
)))
619 _mm_store_si128((m128
*)shadow_mem
, freed
);
622 for (; size
; size
-= kShadowCell
, shadow_mem
+= kShadowCnt
) {
623 if (UNLIKELY(CheckRaces(thr
, shadow_mem
, cur
, 0, 0, typ
)))
625 StoreShadow(&shadow_mem
[0], Shadow::FreedMarker());
626 StoreShadow(&shadow_mem
[1], Shadow::FreedInfo(cur
.sid(), cur
.epoch()));
627 StoreShadow(&shadow_mem
[2], Shadow::kEmpty
);
628 StoreShadow(&shadow_mem
[3], Shadow::kEmpty
);
633 void MemoryRangeImitateWrite(ThreadState
* thr
, uptr pc
, uptr addr
, uptr size
) {
634 DCHECK_EQ(addr
% kShadowCell
, 0);
635 size
= RoundUp(size
, kShadowCell
);
636 TraceMemoryAccessRange(thr
, pc
, addr
, size
, kAccessWrite
);
637 Shadow
cur(thr
->fast_state
, 0, 8, kAccessWrite
);
638 MemoryRangeSet(addr
, size
, cur
.raw());
641 void MemoryRangeImitateWriteOrResetRange(ThreadState
* thr
, uptr pc
, uptr addr
,
643 if (thr
->ignore_reads_and_writes
== 0)
644 MemoryRangeImitateWrite(thr
, pc
, addr
, size
);
646 MemoryResetRange(thr
, pc
, addr
, size
);
650 bool MemoryAccessRangeOne(ThreadState
* thr
, RawShadow
* shadow_mem
, Shadow cur
,
652 LOAD_CURRENT_SHADOW(cur
, shadow_mem
);
653 if (LIKELY(ContainsSameAccess(shadow_mem
, cur
, shadow
, access
, typ
)))
655 return CheckRaces(thr
, shadow_mem
, cur
, shadow
, access
, typ
);
658 template <bool is_read
>
659 NOINLINE
void RestartMemoryAccessRange(ThreadState
* thr
, uptr pc
, uptr addr
,
661 TraceSwitchPart(thr
);
662 MemoryAccessRangeT
<is_read
>(thr
, pc
, addr
, size
);
665 template <bool is_read
>
666 void MemoryAccessRangeT(ThreadState
* thr
, uptr pc
, uptr addr
, uptr size
) {
667 const AccessType typ
=
668 (is_read
? kAccessRead
: kAccessWrite
) | kAccessNoRodata
;
669 RawShadow
* shadow_mem
= MemToShadow(addr
);
670 DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr
->tid
,
671 (void*)pc
, (void*)addr
, (int)size
, is_read
);
674 if (!IsAppMem(addr
)) {
675 Printf("Access to non app mem %zx\n", addr
);
676 DCHECK(IsAppMem(addr
));
678 if (!IsAppMem(addr
+ size
- 1)) {
679 Printf("Access to non app mem %zx\n", addr
+ size
- 1);
680 DCHECK(IsAppMem(addr
+ size
- 1));
682 if (!IsShadowMem(shadow_mem
)) {
683 Printf("Bad shadow addr %p (%zx)\n", static_cast<void*>(shadow_mem
), addr
);
684 DCHECK(IsShadowMem(shadow_mem
));
686 if (!IsShadowMem(shadow_mem
+ size
* kShadowCnt
- 1)) {
687 Printf("Bad shadow addr %p (%zx)\n",
688 static_cast<void*>(shadow_mem
+ size
* kShadowCnt
- 1),
690 DCHECK(IsShadowMem(shadow_mem
+ size
* kShadowCnt
- 1));
694 // Access to .rodata section, no races here.
695 // Measurements show that it can be 10-20% of all memory accesses.
696 // Check here once to not check for every access separately.
697 // Note: we could (and should) do this only for the is_read case
698 // (writes shouldn't go to .rodata). But it happens in Chromium tests:
699 // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19
700 // Details are unknown since it happens only on CI machines.
701 if (*shadow_mem
== Shadow::kRodata
)
704 FastState fast_state
= thr
->fast_state
;
705 if (UNLIKELY(fast_state
.GetIgnoreBit()))
708 if (!TryTraceMemoryAccessRange(thr
, pc
, addr
, size
, typ
))
709 return RestartMemoryAccessRange
<is_read
>(thr
, pc
, addr
, size
);
711 if (UNLIKELY(addr
% kShadowCell
)) {
712 // Handle unaligned beginning, if any.
713 uptr size1
= Min(size
, RoundUp(addr
, kShadowCell
) - addr
);
715 Shadow
cur(fast_state
, addr
, size1
, typ
);
716 if (UNLIKELY(MemoryAccessRangeOne(thr
, shadow_mem
, cur
, typ
)))
718 shadow_mem
+= kShadowCnt
;
720 // Handle middle part, if any.
721 Shadow
cur(fast_state
, 0, kShadowCell
, typ
);
722 for (; size
>= kShadowCell
; size
-= kShadowCell
, shadow_mem
+= kShadowCnt
) {
723 if (UNLIKELY(MemoryAccessRangeOne(thr
, shadow_mem
, cur
, typ
)))
726 // Handle ending, if any.
727 if (UNLIKELY(size
)) {
728 Shadow
cur(fast_state
, 0, size
, typ
);
729 if (UNLIKELY(MemoryAccessRangeOne(thr
, shadow_mem
, cur
, typ
)))
734 template void MemoryAccessRangeT
<true>(ThreadState
* thr
, uptr pc
, uptr addr
,
736 template void MemoryAccessRangeT
<false>(ThreadState
* thr
, uptr pc
, uptr addr
,
739 } // namespace __tsan
742 // Must be included in this file to make sure everything is inlined.
743 # include "tsan_interface.inc"