2 /*--------------------------------------------------------------------*/
3 /*--- Thread scheduling. scheduler.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2000-2017 Julian Seward
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
32 Valgrind tries to emulate the kernel's threading as closely as
33 possible. The client does all threading via the normal syscalls
34 (on Linux: clone, etc). Valgrind emulates this by creating exactly
35 the same process structure as would be created without Valgrind.
36 There are no extra threads.
38 The main difference is that Valgrind only allows one client thread
39 to run at once. This is controlled with the CPU Big Lock,
40 "the_BigLock". Any time a thread wants to run client code or
41 manipulate any shared state (which is anything other than its own
42 ThreadState entry), it must hold the_BigLock.
44 When a thread is about to block in a blocking syscall, it releases
45 the_BigLock, and re-takes it when it becomes runnable again (either
46 because the syscall finished, or we took a signal).
48 VG_(scheduler) therefore runs in each thread. It returns only when
49 the thread is exiting, either because it exited itself, or it was
50 told to exit by another thread.
52 This file is almost entirely OS-independent. The details of how
53 the OS handles threading and signalling are abstracted away and
54 implemented elsewhere. [Some of the functions have worked their
55 way back for the moment, until we do an OS port in earnest...]
59 #include "pub_core_basics.h"
60 #include "pub_core_debuglog.h"
61 #include "pub_core_vki.h"
62 #include "pub_core_vkiscnums.h" // __NR_sched_yield
63 #include "pub_core_threadstate.h"
64 #include "pub_core_clientstate.h"
65 #include "pub_core_aspacemgr.h"
66 #include "pub_core_clreq.h" // for VG_USERREQ__*
67 #include "pub_core_dispatch.h"
68 #include "pub_core_errormgr.h" // For VG_(get_n_errs_found)()
69 #include "pub_core_gdbserver.h" // for VG_(gdbserver)/VG_(gdbserver_activity)
70 #include "pub_core_libcbase.h"
71 #include "pub_core_libcassert.h"
72 #include "pub_core_libcprint.h"
73 #include "pub_core_libcproc.h"
74 #include "pub_core_libcsignal.h"
75 #if defined(VGO_darwin)
76 #include "pub_core_mach.h"
78 #include "pub_core_machine.h"
79 #include "pub_core_mallocfree.h"
80 #include "pub_core_options.h"
81 #include "pub_core_replacemalloc.h"
82 #include "pub_core_sbprofile.h"
83 #include "pub_core_signals.h"
84 #include "pub_core_stacks.h"
85 #include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)()
86 #include "pub_core_syscall.h"
87 #include "pub_core_syswrap.h"
88 #include "pub_core_tooliface.h"
89 #include "pub_core_translate.h" // For VG_(translate)()
90 #include "pub_core_transtab.h"
91 #include "pub_core_debuginfo.h" // VG_(di_notify_pdb_debuginfo)
92 #include "priv_sched-lock.h"
93 #include "pub_core_scheduler.h" // self
94 #include "pub_core_redir.h"
95 #include "libvex_emnote.h" // VexEmNote
98 /* ---------------------------------------------------------------------
99 Types and globals for the scheduler.
100 ------------------------------------------------------------------ */
102 /* ThreadId and ThreadState are defined elsewhere*/
104 /* If False, a fault is Valgrind-internal (ie, a bug) */
105 Bool
VG_(in_generated_code
) = False
;
107 /* 64-bit counter for the number of basic blocks done. */
108 static ULong bbs_done
= 0;
110 /* Counter to see if vgdb activity is to be verified.
111 When nr of bbs done reaches vgdb_next_poll, scheduler will
112 poll for gdbserver activity. VG_(force_vgdb_poll) and
113 VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
114 to control when the next poll will be done. */
115 static ULong vgdb_next_poll
;
118 static void do_client_request ( ThreadId tid
);
119 static void scheduler_sanity ( ThreadId tid
);
120 static void mostly_clear_thread_record ( ThreadId tid
);
123 static ULong n_scheduling_events_MINOR
= 0;
124 static ULong n_scheduling_events_MAJOR
= 0;
126 /* Stats: number of XIndirs looked up in the fast cache, the number of hits in
127 ways 1, 2 and 3, and the number of misses. The number of hits in way 0 isn't
128 recorded because it can be computed from these five numbers. */
129 static ULong stats__n_xIndirs
= 0;
130 static ULong stats__n_xIndir_hits1
= 0;
131 static ULong stats__n_xIndir_hits2
= 0;
132 static ULong stats__n_xIndir_hits3
= 0;
133 static ULong stats__n_xIndir_misses
= 0;
135 /* And 32-bit temp bins for the above, so that 32-bit platforms don't
136 have to do 64 bit incs on the hot path through
137 VG_(disp_cp_xindir). */
138 /*global*/ UInt
VG_(stats__n_xIndirs_32
) = 0;
139 /*global*/ UInt
VG_(stats__n_xIndir_hits1_32
) = 0;
140 /*global*/ UInt
VG_(stats__n_xIndir_hits2_32
) = 0;
141 /*global*/ UInt
VG_(stats__n_xIndir_hits3_32
) = 0;
142 /*global*/ UInt
VG_(stats__n_xIndir_misses_32
) = 0;
144 /* Sanity checking counts. */
145 static UInt sanity_fast_count
= 0;
146 static UInt sanity_slow_count
= 0;
148 void VG_(print_scheduler_stats
)(void)
150 VG_(message
)(Vg_DebugMsg
,
151 "scheduler: %'llu event checks.\n", bbs_done
);
154 = stats__n_xIndirs
- stats__n_xIndir_hits1
- stats__n_xIndir_hits2
155 - stats__n_xIndir_hits3
- stats__n_xIndir_misses
;
156 VG_(message
)(Vg_DebugMsg
,
157 "scheduler: %'llu indir transfers, "
158 "%'llu misses (1 in %llu) ..\n",
159 stats__n_xIndirs
, stats__n_xIndir_misses
,
160 stats__n_xIndirs
/ (stats__n_xIndir_misses
161 ? stats__n_xIndir_misses
: 1));
162 VG_(message
)(Vg_DebugMsg
,
163 "scheduler: .. of which: %'llu hit0, %'llu hit1, "
164 "%'llu hit2, %'llu hit3, %'llu missed\n",
166 stats__n_xIndir_hits1
,
167 stats__n_xIndir_hits2
,
168 stats__n_xIndir_hits3
,
169 stats__n_xIndir_misses
);
171 VG_(message
)(Vg_DebugMsg
,
172 "scheduler: %'llu/%'llu major/minor sched events.\n",
173 n_scheduling_events_MAJOR
, n_scheduling_events_MINOR
);
174 VG_(message
)(Vg_DebugMsg
,
175 " sanity: %u cheap, %u expensive checks.\n",
176 sanity_fast_count
, sanity_slow_count
);
180 * Mutual exclusion object used to serialize threads.
182 static struct sched_lock
*the_BigLock
;
185 /* ---------------------------------------------------------------------
186 Helper functions for the scheduler.
187 ------------------------------------------------------------------ */
189 static void maybe_progress_report ( UInt reporting_interval_seconds
)
191 /* This is when the next report is due, in user cpu milliseconds since
192 process start. This is a global variable so this won't be thread-safe
193 if Valgrind is ever made multithreaded. For now it's fine. */
194 static UInt next_report_due_at
= 0;
196 /* First of all, figure out whether another report is due. It
198 UInt user_ms
= VG_(get_user_milliseconds
)();
199 if (LIKELY(user_ms
< next_report_due_at
))
202 Bool first_ever_call
= next_report_due_at
== 0;
204 /* A report is due. First, though, set the time for the next report. */
205 next_report_due_at
+= 1000 * reporting_interval_seconds
;
207 /* If it's been an excessively long time since the last check, we
208 might have gone more than one reporting interval forward. Guard
210 while (next_report_due_at
<= user_ms
)
211 next_report_due_at
+= 1000 * reporting_interval_seconds
;
213 /* Also we don't want to report anything on the first call, but we
214 have to wait till this point to leave, so that we set up the
215 next-call time correctly. */
219 /* Print the report. */
220 UInt user_cpu_seconds
= user_ms
/ 1000;
221 UInt wallclock_seconds
= VG_(read_millisecond_timer
)() / 1000;
222 Double millionEvCs
= ((Double
)bbs_done
) / 1000000.0;
223 Double thousandTIns
= ((Double
)VG_(get_bbs_translated
)()) / 1000.0;
224 Double thousandTOuts
= ((Double
)VG_(get_bbs_discarded_or_dumped
)()) / 1000.0;
225 UInt nThreads
= VG_(count_living_threads
)();
227 if (VG_(clo_verbosity
) > 0) {
228 VG_(dmsg
)("PROGRESS: U %'us, W %'us, %.1f%% CPU, EvC %.2fM, "
229 "TIn %.1fk, TOut %.1fk, #thr %u\n",
230 user_cpu_seconds
, wallclock_seconds
,
232 * (Double
)(user_cpu_seconds
)
233 / (Double
)(wallclock_seconds
== 0 ? 1 : wallclock_seconds
),
235 thousandTIns
, thousandTOuts
, nThreads
);
240 void print_sched_event ( ThreadId tid
, const HChar
* what
)
242 VG_(message
)(Vg_DebugMsg
, " SCHED[%u]: %s\n", tid
, what
);
245 /* For showing SB profiles, if the user asks to see them. */
247 void maybe_show_sb_profile ( void )
249 /* DO NOT MAKE NON-STATIC */
250 static ULong bbs_done_lastcheck
= 0;
252 vg_assert(VG_(clo_profyle_interval
) > 0);
253 Long delta
= (Long
)(bbs_done
- bbs_done_lastcheck
);
254 vg_assert(delta
>= 0);
255 if ((ULong
)delta
>= VG_(clo_profyle_interval
)) {
256 bbs_done_lastcheck
= bbs_done
;
257 VG_(get_and_show_SB_profile
)(bbs_done
);
262 const HChar
* name_of_sched_event ( UInt event
)
265 case VEX_TRC_JMP_INVALICACHE
: return "INVALICACHE";
266 case VEX_TRC_JMP_FLUSHDCACHE
: return "FLUSHDCACHE";
267 case VEX_TRC_JMP_NOREDIR
: return "NOREDIR";
268 case VEX_TRC_JMP_SIGILL
: return "SIGILL";
269 case VEX_TRC_JMP_SIGTRAP
: return "SIGTRAP";
270 case VEX_TRC_JMP_SIGSEGV
: return "SIGSEGV";
271 case VEX_TRC_JMP_SIGBUS
: return "SIGBUS";
272 case VEX_TRC_JMP_SIGFPE_INTOVF
:
273 case VEX_TRC_JMP_SIGFPE_INTDIV
: return "SIGFPE";
274 case VEX_TRC_JMP_EMWARN
: return "EMWARN";
275 case VEX_TRC_JMP_EMFAIL
: return "EMFAIL";
276 case VEX_TRC_JMP_CLIENTREQ
: return "CLIENTREQ";
277 case VEX_TRC_JMP_YIELD
: return "YIELD";
278 case VEX_TRC_JMP_NODECODE
: return "NODECODE";
279 case VEX_TRC_JMP_MAPFAIL
: return "MAPFAIL";
280 case VEX_TRC_JMP_SYS_SYSCALL
: return "SYSCALL";
281 case VEX_TRC_JMP_SYS_INT32
: return "INT32";
282 case VEX_TRC_JMP_SYS_INT128
: return "INT128";
283 case VEX_TRC_JMP_SYS_INT129
: return "INT129";
284 case VEX_TRC_JMP_SYS_INT130
: return "INT130";
285 case VEX_TRC_JMP_SYS_INT145
: return "INT145";
286 case VEX_TRC_JMP_SYS_INT210
: return "INT210";
287 case VEX_TRC_JMP_SYS_SYSENTER
: return "SYSENTER";
288 case VEX_TRC_JMP_BORING
: return "VEX_BORING";
290 case VG_TRC_BORING
: return "VG_BORING";
291 case VG_TRC_INNER_FASTMISS
: return "FASTMISS";
292 case VG_TRC_INNER_COUNTERZERO
: return "COUNTERZERO";
293 case VG_TRC_FAULT_SIGNAL
: return "FAULTSIGNAL";
294 case VG_TRC_INVARIANT_FAILED
: return "INVFAILED";
295 case VG_TRC_CHAIN_ME_TO_SLOW_EP
: return "CHAIN_ME_SLOW";
296 case VG_TRC_CHAIN_ME_TO_FAST_EP
: return "CHAIN_ME_FAST";
297 default: return "??UNKNOWN??";
301 /* Allocate a completely empty ThreadState record. */
302 ThreadId
VG_(alloc_ThreadState
) ( void )
305 for (i
= 1; i
< VG_N_THREADS
; i
++) {
306 if (VG_(threads
)[i
].status
== VgTs_Empty
) {
307 VG_(threads
)[i
].status
= VgTs_Init
;
308 VG_(threads
)[i
].exitreason
= VgSrc_None
;
309 if (VG_(threads
)[i
].thread_name
)
310 VG_(free
)(VG_(threads
)[i
].thread_name
);
311 VG_(threads
)[i
].thread_name
= NULL
;
315 VG_(printf
)("Use --max-threads=INT to specify a larger number of threads\n"
316 "and rerun valgrind\n");
317 VG_(core_panic
)("Max number of threads is too low");
322 Mark a thread as Runnable. This will block until the_BigLock is
323 available, so that we get exclusive access to all the shared
324 structures and the CPU. Up until we get the_BigLock, we must not
325 touch any shared state.
327 When this returns, we'll actually be running.
329 void VG_(acquire_BigLock
)(ThreadId tid
, const HChar
* who
)
334 if (VG_(clo_trace_sched
)) {
335 HChar buf
[VG_(strlen
)(who
) + 30];
336 VG_(sprintf
)(buf
, "waiting for lock (%s)", who
);
337 print_sched_event(tid
, buf
);
341 /* First, acquire the_BigLock. We can't do anything else safely
342 prior to this point. Even doing debug printing prior to this
343 point is, technically, wrong. */
344 VG_(acquire_BigLock_LL
)(NULL
);
346 tst
= VG_(get_ThreadState
)(tid
);
348 vg_assert(tst
->status
!= VgTs_Runnable
);
350 tst
->status
= VgTs_Runnable
;
352 if (VG_(running_tid
) != VG_INVALID_THREADID
)
353 VG_(printf
)("tid %u found %u running\n", tid
, VG_(running_tid
));
354 vg_assert(VG_(running_tid
) == VG_INVALID_THREADID
);
355 VG_(running_tid
) = tid
;
357 { Addr gsp
= VG_(get_SP
)(tid
);
358 if (NULL
!= VG_(tdict
).track_new_mem_stack_w_ECU
)
359 VG_(unknown_SP_update_w_ECU
)(gsp
, gsp
, 0/*unknown origin*/);
361 VG_(unknown_SP_update
)(gsp
, gsp
);
364 if (VG_(clo_trace_sched
)) {
365 HChar buf
[VG_(strlen
)(who
) + 30];
366 VG_(sprintf
)(buf
, " acquired lock (%s)", who
);
367 print_sched_event(tid
, buf
);
372 Set a thread into a sleeping state, and give up exclusive access to
373 the CPU. On return, the thread must be prepared to block until it
374 is ready to run again (generally this means blocking in a syscall,
375 but it may mean that we remain in a Runnable state and we're just
376 yielding the CPU to another thread).
378 void VG_(release_BigLock
)(ThreadId tid
, ThreadStatus sleepstate
,
381 ThreadState
*tst
= VG_(get_ThreadState
)(tid
);
383 vg_assert(tst
->status
== VgTs_Runnable
);
385 vg_assert(sleepstate
== VgTs_WaitSys
||
386 sleepstate
== VgTs_Yielding
);
388 tst
->status
= sleepstate
;
390 vg_assert(VG_(running_tid
) == tid
);
391 VG_(running_tid
) = VG_INVALID_THREADID
;
393 if (VG_(clo_trace_sched
)) {
394 const HChar
*status
= VG_(name_of_ThreadStatus
)(sleepstate
);
395 HChar buf
[VG_(strlen
)(who
) + VG_(strlen
)(status
) + 30];
396 VG_(sprintf
)(buf
, "releasing lock (%s) -> %s", who
, status
);
397 print_sched_event(tid
, buf
);
400 /* Release the_BigLock; this will reschedule any runnable
402 VG_(release_BigLock_LL
)(NULL
);
405 static void init_BigLock(void)
407 vg_assert(!the_BigLock
);
408 the_BigLock
= ML_(create_sched_lock
)();
411 static void deinit_BigLock(void)
413 ML_(destroy_sched_lock
)(the_BigLock
);
417 /* See pub_core_scheduler.h for description */
418 void VG_(acquire_BigLock_LL
) ( const HChar
* who
)
420 ML_(acquire_sched_lock
)(the_BigLock
);
423 /* See pub_core_scheduler.h for description */
424 void VG_(release_BigLock_LL
) ( const HChar
* who
)
426 ML_(release_sched_lock
)(the_BigLock
);
429 Bool
VG_(owns_BigLock_LL
) ( ThreadId tid
)
431 return (ML_(get_sched_lock_owner
)(the_BigLock
)
432 == VG_(threads
)[tid
].os_state
.lwpid
);
436 /* Clear out the ThreadState and release the semaphore. Leaves the
437 ThreadState in VgTs_Zombie state, so that it doesn't get
438 reallocated until the caller is really ready. */
439 void VG_(exit_thread
)(ThreadId tid
)
441 vg_assert(VG_(is_valid_tid
)(tid
));
442 vg_assert(VG_(is_running_thread
)(tid
));
443 vg_assert(VG_(is_exiting
)(tid
));
445 mostly_clear_thread_record(tid
);
446 VG_(running_tid
) = VG_INVALID_THREADID
;
448 /* There should still be a valid exitreason for this thread */
449 vg_assert(VG_(threads
)[tid
].exitreason
!= VgSrc_None
);
451 if (VG_(clo_trace_sched
))
452 print_sched_event(tid
, "release lock in VG_(exit_thread)");
454 VG_(release_BigLock_LL
)(NULL
);
457 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
458 out of the syscall and onto doing the next thing, whatever that is.
459 If it isn't blocked in a syscall, has no effect on the thread. */
460 void VG_(get_thread_out_of_syscall
)(ThreadId tid
)
462 vg_assert(VG_(is_valid_tid
)(tid
));
463 vg_assert(!VG_(is_running_thread
)(tid
));
465 if (VG_(threads
)[tid
].status
== VgTs_WaitSys
) {
466 if (VG_(clo_trace_signals
)) {
467 VG_(message
)(Vg_DebugMsg
,
468 "get_thread_out_of_syscall zaps tid %u lwp %d\n",
469 tid
, VG_(threads
)[tid
].os_state
.lwpid
);
471 # if defined(VGO_darwin)
473 // GrP fixme use mach primitives on darwin?
474 // GrP fixme thread_abort_safely?
475 // GrP fixme race for thread with WaitSys set but not in syscall yet?
476 extern kern_return_t
thread_abort(mach_port_t
);
477 thread_abort(VG_(threads
)[tid
].os_state
.lwpid
);
481 __attribute__((unused
))
482 Int r
= VG_(tkill
)(VG_(threads
)[tid
].os_state
.lwpid
, VG_SIGVGKILL
);
483 /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
484 I'm really not sure. Here's a race scenario which argues
485 that we shoudn't; but equally I'm not sure the scenario is
486 even possible, because of constraints caused by the question
487 of who holds the BigLock when.
489 Target thread tid does sys_read on a socket and blocks. This
490 function gets called, and we observe correctly that tid's
491 status is WaitSys but then for whatever reason this function
492 goes very slowly for a while. Then data arrives from
493 wherever, tid's sys_read returns, tid exits. Then we do
494 tkill on tid, but tid no longer exists; tkill returns an
495 error code and the assert fails. */
496 /* vg_assert(r == 0); */
503 Yield the CPU for a short time to let some other thread run.
505 void VG_(vg_yield
)(void)
507 ThreadId tid
= VG_(running_tid
);
509 vg_assert(tid
!= VG_INVALID_THREADID
);
510 vg_assert(VG_(threads
)[tid
].os_state
.lwpid
== VG_(gettid
)());
512 VG_(release_BigLock
)(tid
, VgTs_Yielding
, "VG_(vg_yield)");
515 Tell the kernel we're yielding.
517 # if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
518 VG_(do_syscall0
)(__NR_sched_yield
);
519 # elif defined(VGO_solaris)
520 VG_(do_syscall0
)(__NR_yield
);
525 VG_(acquire_BigLock
)(tid
, "VG_(vg_yield)");
529 /* Set the standard set of blocked signals, used whenever we're not
530 running a client syscall. */
531 static void block_signals(void)
535 VG_(sigfillset
)(&mask
);
537 /* Don't block these because they're synchronous */
538 VG_(sigdelset
)(&mask
, VKI_SIGSEGV
);
539 VG_(sigdelset
)(&mask
, VKI_SIGBUS
);
540 VG_(sigdelset
)(&mask
, VKI_SIGFPE
);
541 VG_(sigdelset
)(&mask
, VKI_SIGILL
);
542 VG_(sigdelset
)(&mask
, VKI_SIGTRAP
);
543 VG_(sigdelset
)(&mask
, VKI_SIGSYS
);
545 /* Can't block these anyway */
546 VG_(sigdelset
)(&mask
, VKI_SIGSTOP
);
547 VG_(sigdelset
)(&mask
, VKI_SIGKILL
);
549 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &mask
, NULL
);
552 static void os_state_clear(ThreadState
*tst
)
554 tst
->os_state
.lwpid
= 0;
555 tst
->os_state
.threadgroup
= 0;
556 tst
->os_state
.stk_id
= NULL_STK_ID
;
557 # if defined(VGO_linux)
558 /* no other fields to clear */
559 # elif defined(VGO_freebsd)
560 /* no other fields to clear */
561 # elif defined(VGO_darwin)
562 tst
->os_state
.post_mach_trap_fn
= NULL
;
563 tst
->os_state
.pthread
= 0;
564 tst
->os_state
.func_arg
= 0;
565 VG_(memset
)(&tst
->os_state
.child_go
, 0, sizeof(tst
->os_state
.child_go
));
566 VG_(memset
)(&tst
->os_state
.child_done
, 0, sizeof(tst
->os_state
.child_done
));
567 tst
->os_state
.wq_jmpbuf_valid
= False
;
568 tst
->os_state
.remote_port
= 0;
569 tst
->os_state
.msgh_id
= 0;
570 VG_(memset
)(&tst
->os_state
.mach_args
, 0, sizeof(tst
->os_state
.mach_args
));
571 # elif defined(VGO_solaris)
572 # if defined(VGP_x86_solaris)
573 tst
->os_state
.thrptr
= 0;
575 tst
->os_state
.ustack
= NULL
;
576 tst
->os_state
.in_door_return
= False
;
577 tst
->os_state
.door_return_procedure
= 0;
578 tst
->os_state
.oldcontext
= NULL
;
579 tst
->os_state
.schedctl_data
= 0;
580 tst
->os_state
.daemon_thread
= False
;
586 static void os_state_init(ThreadState
*tst
)
588 tst
->os_state
.valgrind_stack_base
= 0;
589 tst
->os_state
.valgrind_stack_init_SP
= 0;
594 void mostly_clear_thread_record ( ThreadId tid
)
596 vki_sigset_t savedmask
;
598 vg_assert(tid
>= 0 && tid
< VG_N_THREADS
);
599 VG_(cleanup_thread
)(&VG_(threads
)[tid
].arch
);
600 VG_(threads
)[tid
].tid
= tid
;
602 /* Leave the thread in Zombie, so that it doesn't get reallocated
603 until the caller is finally done with the thread stack. */
604 VG_(threads
)[tid
].status
= VgTs_Zombie
;
606 VG_(sigemptyset
)(&VG_(threads
)[tid
].sig_mask
);
607 VG_(sigemptyset
)(&VG_(threads
)[tid
].tmp_sig_mask
);
609 os_state_clear(&VG_(threads
)[tid
]);
611 /* start with no altstack */
612 VG_(threads
)[tid
].altstack
.ss_sp
= (void *)0xdeadbeef;
613 VG_(threads
)[tid
].altstack
.ss_size
= 0;
614 VG_(threads
)[tid
].altstack
.ss_flags
= VKI_SS_DISABLE
;
616 VG_(clear_out_queued_signals
)(tid
, &savedmask
);
618 VG_(threads
)[tid
].sched_jmpbuf_valid
= False
;
622 Called in the child after fork. If the parent has multiple
623 threads, then we've inherited a VG_(threads) array describing them,
624 but only the thread which called fork() is actually alive in the
625 child. This functions needs to clean up all those other thread
628 Whichever tid in the parent which called fork() becomes the
629 master_tid in the child. That's because the only living slot in
630 VG_(threads) in the child after fork is VG_(threads)[tid], and it
631 would be too hard to try to re-number the thread and relocate the
632 thread state down to VG_(threads)[1].
634 This function also needs to reinitialize the_BigLock, since
635 otherwise we may end up sharing its state with the parent, which
636 would be deeply confusing.
638 static void sched_fork_cleanup(ThreadId me
)
641 vg_assert(VG_(running_tid
) == me
);
643 # if defined(VGO_darwin)
644 // GrP fixme hack reset Mach ports
648 VG_(threads
)[me
].os_state
.lwpid
= VG_(gettid
)();
649 VG_(threads
)[me
].os_state
.threadgroup
= VG_(getpid
)();
651 /* clear out all the unused thread slots */
652 for (tid
= 1; tid
< VG_N_THREADS
; tid
++) {
654 mostly_clear_thread_record(tid
);
655 VG_(threads
)[tid
].status
= VgTs_Empty
;
656 VG_(clear_syscallInfo
)(tid
);
660 /* re-init and take the sema */
663 VG_(acquire_BigLock_LL
)(NULL
);
667 /* First phase of initialisation of the scheduler. Initialise the
668 bigLock, zeroise the VG_(threads) structure and decide on the
669 ThreadId of the root thread.
671 ThreadId
VG_(scheduler_init_phase1
) ( void )
676 VG_(debugLog
)(1,"sched","sched_init_phase1\n");
678 if (VG_(clo_fair_sched
) != disable_fair_sched
679 && !ML_(set_sched_lock_impl
)(sched_lock_ticket
)
680 && VG_(clo_fair_sched
) == enable_fair_sched
)
682 VG_(printf
)("Error: fair scheduling is not supported on this system.\n");
686 if (VG_(clo_verbosity
) > 1) {
687 VG_(message
)(Vg_DebugMsg
,
688 "Scheduler: using %s scheduler lock implementation.\n",
689 ML_(get_sched_lock_name
)());
694 for (i
= 0 /* NB; not 1 */; i
< VG_N_THREADS
; i
++) {
695 /* Paranoia .. completely zero it out. */
696 VG_(memset
)( & VG_(threads
)[i
], 0, sizeof( VG_(threads
)[i
] ) );
698 VG_(threads
)[i
].sig_queue
= NULL
;
700 os_state_init(&VG_(threads
)[i
]);
701 mostly_clear_thread_record(i
);
703 VG_(threads
)[i
].status
= VgTs_Empty
;
704 VG_(threads
)[i
].client_stack_szB
= 0;
705 VG_(threads
)[i
].client_stack_highest_byte
= (Addr
)NULL
;
706 VG_(threads
)[i
].err_disablement_level
= 0;
707 VG_(threads
)[i
].thread_name
= NULL
;
710 tid_main
= VG_(alloc_ThreadState
)();
712 /* Bleh. Unfortunately there are various places in the system that
713 assume that the main thread has a ThreadId of 1.
714 - Helgrind (possibly)
715 - stack overflow message in default_action() in m_signals.c
716 - definitely a lot more places
718 vg_assert(tid_main
== 1);
724 /* Second phase of initialisation of the scheduler. Given the root
725 ThreadId computed by first phase of initialisation, fill in stack
726 details and acquire bigLock. Initialise the scheduler. This is
727 called at startup. The caller subsequently initialises the guest
728 state components of this main thread.
730 void VG_(scheduler_init_phase2
) ( ThreadId tid_main
,
734 VG_(debugLog
)(1,"sched","sched_init_phase2: tid_main=%u, "
735 "cls_end=0x%lx, cls_sz=%lu\n",
736 tid_main
, clstack_end
, clstack_size
);
738 vg_assert(VG_IS_PAGE_ALIGNED(clstack_end
+1));
739 vg_assert(VG_IS_PAGE_ALIGNED(clstack_size
));
741 VG_(threads
)[tid_main
].client_stack_highest_byte
743 VG_(threads
)[tid_main
].client_stack_szB
746 VG_(atfork
)(NULL
, NULL
, sched_fork_cleanup
);
750 /* ---------------------------------------------------------------------
751 Helpers for running translations.
752 ------------------------------------------------------------------ */
754 /* Use gcc's built-in setjmp/longjmp. longjmp must not restore signal
755 mask state, but does need to pass "val" through. jumped must be a
757 #define SCHEDSETJMP(tid, jumped, stmt) \
759 ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid); \
761 (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf); \
762 if ((jumped) == ((UWord)0)) { \
763 vg_assert(!_qq_tst->sched_jmpbuf_valid); \
764 _qq_tst->sched_jmpbuf_valid = True; \
766 } else if (VG_(clo_trace_sched)) \
767 VG_(printf)("SCHEDSETJMP(line %d) tid %u, jumped=%lu\n", \
768 __LINE__, tid, jumped); \
769 vg_assert(_qq_tst->sched_jmpbuf_valid); \
770 _qq_tst->sched_jmpbuf_valid = False; \
774 /* Do various guest state alignment checks prior to running a thread.
775 Specifically, check that what we have matches Vex's guest state
776 layout requirements. See libvex.h for details, but in short the
777 requirements are: There must be no holes in between the primary
778 guest state, its two copies, and the spill area. In short, all 4
779 areas must be aligned on the LibVEX_GUEST_STATE_ALIGN boundary and
780 be placed back-to-back without holes in between. */
781 static void do_pre_run_checks ( volatile ThreadState
* tst
)
783 Addr a_vex
= (Addr
) & tst
->arch
.vex
;
784 Addr a_vexsh1
= (Addr
) & tst
->arch
.vex_shadow1
;
785 Addr a_vexsh2
= (Addr
) & tst
->arch
.vex_shadow2
;
786 Addr a_spill
= (Addr
) & tst
->arch
.vex_spill
;
787 UInt sz_vex
= (UInt
) sizeof tst
->arch
.vex
;
788 UInt sz_vexsh1
= (UInt
) sizeof tst
->arch
.vex_shadow1
;
789 UInt sz_vexsh2
= (UInt
) sizeof tst
->arch
.vex_shadow2
;
790 UInt sz_spill
= (UInt
) sizeof tst
->arch
.vex_spill
;
793 VG_(printf
)("gst %p %u, sh1 %p %u, "
794 "sh2 %p %u, spill %p %u\n",
795 (void*)a_vex
, sz_vex
,
796 (void*)a_vexsh1
, sz_vexsh1
,
797 (void*)a_vexsh2
, sz_vexsh2
,
798 (void*)a_spill
, sz_spill
);
800 vg_assert(sz_vex
% LibVEX_GUEST_STATE_ALIGN
== 0);
801 vg_assert(sz_vexsh1
% LibVEX_GUEST_STATE_ALIGN
== 0);
802 vg_assert(sz_vexsh2
% LibVEX_GUEST_STATE_ALIGN
== 0);
803 vg_assert(sz_spill
% LibVEX_GUEST_STATE_ALIGN
== 0);
805 vg_assert(a_vex
% LibVEX_GUEST_STATE_ALIGN
== 0);
806 vg_assert(a_vexsh1
% LibVEX_GUEST_STATE_ALIGN
== 0);
807 vg_assert(a_vexsh2
% LibVEX_GUEST_STATE_ALIGN
== 0);
808 vg_assert(a_spill
% LibVEX_GUEST_STATE_ALIGN
== 0);
810 /* Check that the guest state and its two shadows have the same
811 size, and that there are no holes in between. The latter is
812 important because Memcheck assumes that it can reliably access
813 the shadows by indexing off a pointer to the start of the
814 primary guest state area. */
815 vg_assert(sz_vex
== sz_vexsh1
);
816 vg_assert(sz_vex
== sz_vexsh2
);
817 vg_assert(a_vex
+ 1 * sz_vex
== a_vexsh1
);
818 vg_assert(a_vex
+ 2 * sz_vex
== a_vexsh2
);
819 /* Also check there's no hole between the second shadow area and
821 vg_assert(sz_spill
== LibVEX_N_SPILL_BYTES
);
822 vg_assert(a_vex
+ 3 * sz_vex
== a_spill
);
824 # if defined(VGA_x86)
825 /* x86 XMM regs must form an array, ie, have no holes in
828 (offsetof(VexGuestX86State
,guest_XMM7
)
829 - offsetof(VexGuestX86State
,guest_XMM0
))
830 == (8/*#regs*/-1) * 16/*bytes per reg*/
832 vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State
,guest_XMM0
)));
833 vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State
,guest_FPREG
)));
834 vg_assert(8 == offsetof(VexGuestX86State
,guest_EAX
));
835 vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State
,guest_EAX
)));
836 vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State
,guest_EIP
)));
839 # if defined(VGA_amd64)
840 /* amd64 YMM regs must form an array, ie, have no holes in
843 (offsetof(VexGuestAMD64State
,guest_YMM16
)
844 - offsetof(VexGuestAMD64State
,guest_YMM0
))
845 == (17/*#regs*/-1) * 32/*bytes per reg*/
847 vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State
,guest_YMM0
)));
848 vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State
,guest_FPREG
)));
849 vg_assert(16 == offsetof(VexGuestAMD64State
,guest_RAX
));
850 vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State
,guest_RAX
)));
851 vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State
,guest_RIP
)));
854 # if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
855 /* ppc guest_state vector regs must be 16 byte aligned for
856 loads/stores. This is important! */
857 vg_assert(VG_IS_16_ALIGNED(& tst
->arch
.vex
.guest_VSR0
));
858 vg_assert(VG_IS_16_ALIGNED(& tst
->arch
.vex_shadow1
.guest_VSR0
));
859 vg_assert(VG_IS_16_ALIGNED(& tst
->arch
.vex_shadow2
.guest_VSR0
));
860 /* be extra paranoid .. */
861 vg_assert(VG_IS_16_ALIGNED(& tst
->arch
.vex
.guest_VSR1
));
862 vg_assert(VG_IS_16_ALIGNED(& tst
->arch
.vex_shadow1
.guest_VSR1
));
863 vg_assert(VG_IS_16_ALIGNED(& tst
->arch
.vex_shadow2
.guest_VSR1
));
866 # if defined(VGA_arm)
867 /* arm guest_state VFP regs must be 8 byte aligned for
868 loads/stores. Let's use 16 just to be on the safe side. */
869 vg_assert(VG_IS_16_ALIGNED(& tst
->arch
.vex
.guest_D0
));
870 vg_assert(VG_IS_16_ALIGNED(& tst
->arch
.vex_shadow1
.guest_D0
));
871 vg_assert(VG_IS_16_ALIGNED(& tst
->arch
.vex_shadow2
.guest_D0
));
872 /* be extra paranoid .. */
873 vg_assert(VG_IS_8_ALIGNED(& tst
->arch
.vex
.guest_D1
));
874 vg_assert(VG_IS_8_ALIGNED(& tst
->arch
.vex_shadow1
.guest_D1
));
875 vg_assert(VG_IS_8_ALIGNED(& tst
->arch
.vex_shadow2
.guest_D1
));
878 # if defined(VGA_arm64)
879 vg_assert(VG_IS_8_ALIGNED(& tst
->arch
.vex
.guest_X0
));
880 vg_assert(VG_IS_8_ALIGNED(& tst
->arch
.vex_shadow1
.guest_X0
));
881 vg_assert(VG_IS_8_ALIGNED(& tst
->arch
.vex_shadow2
.guest_X0
));
882 vg_assert(VG_IS_16_ALIGNED(& tst
->arch
.vex
.guest_Q0
));
883 vg_assert(VG_IS_16_ALIGNED(& tst
->arch
.vex_shadow1
.guest_Q0
));
884 vg_assert(VG_IS_16_ALIGNED(& tst
->arch
.vex_shadow2
.guest_Q0
));
887 # if defined(VGA_s390x)
888 /* no special requirements */
891 # if defined(VGA_mips32) || defined(VGA_mips64)
892 /* no special requirements */
896 // NO_VGDB_POLL value ensures vgdb is not polled, while
897 // VGDB_POLL_ASAP ensures that the next scheduler call
898 // will cause a poll.
899 #define NO_VGDB_POLL 0xffffffffffffffffULL
900 #define VGDB_POLL_ASAP 0x0ULL
902 void VG_(disable_vgdb_poll
) (void )
904 vgdb_next_poll
= NO_VGDB_POLL
;
906 void VG_(force_vgdb_poll
) ( void )
908 vgdb_next_poll
= VGDB_POLL_ASAP
;
911 /* Run the thread tid for a while, and return a VG_TRC_* value
912 indicating why VG_(disp_run_translations) stopped, and possibly an
913 auxiliary word. Also, only allow the thread to run for at most
914 *dispatchCtrP events. If (as is the normal case) use_alt_host_addr
915 is False, we are running ordinary redir'd translations, and we
916 should therefore start by looking up the guest next IP in TT. If
917 it is True then we ignore the guest next IP and just run from
918 alt_host_addr, which presumably points at host code for a no-redir
921 Return results are placed in two_words. two_words[0] is set to the
922 TRC. In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
923 the address to patch is placed in two_words[1].
926 void run_thread_for_a_while ( /*OUT*/HWord
* two_words
,
927 /*MOD*/Int
* dispatchCtrP
,
930 Bool use_alt_host_addr
)
932 volatile HWord jumped
= 0;
933 volatile ThreadState
* tst
= NULL
; /* stop gcc complaining */
934 volatile Int done_this_time
= 0;
935 volatile HWord host_code_addr
= 0;
938 vg_assert(VG_(is_valid_tid
)(tid
));
939 vg_assert(VG_(is_running_thread
)(tid
));
940 vg_assert(!VG_(is_exiting
)(tid
));
941 vg_assert(*dispatchCtrP
> 0);
943 tst
= VG_(get_ThreadState
)(tid
);
944 do_pre_run_checks( tst
);
947 /* Futz with the XIndir stats counters. */
948 vg_assert(VG_(stats__n_xIndirs_32
) == 0);
949 vg_assert(VG_(stats__n_xIndir_hits1_32
) == 0);
950 vg_assert(VG_(stats__n_xIndir_hits2_32
) == 0);
951 vg_assert(VG_(stats__n_xIndir_hits3_32
) == 0);
952 vg_assert(VG_(stats__n_xIndir_misses_32
) == 0);
954 /* Clear return area. */
955 two_words
[0] = two_words
[1] = 0;
957 /* Figure out where we're starting from. */
958 if (use_alt_host_addr
) {
959 /* unusual case -- no-redir translation */
960 host_code_addr
= alt_host_addr
;
962 /* normal case -- redir translation */
963 Addr host_from_fast_cache
= 0;
964 Bool found_in_fast_cache
965 = VG_(lookupInFastCache
)( &host_from_fast_cache
,
966 (Addr
)tst
->arch
.vex
.VG_INSTR_PTR
);
967 if (found_in_fast_cache
) {
968 host_code_addr
= host_from_fast_cache
;
971 /* not found in VG_(tt_fast). Searching here the transtab
972 improves the performance compared to returning directly
974 Bool found
= VG_(search_transtab
)(&res
, NULL
, NULL
,
975 (Addr
)tst
->arch
.vex
.VG_INSTR_PTR
,
979 host_code_addr
= res
;
981 /* At this point, we know that we intended to start at a
982 normal redir translation, but it was not found. In
983 which case we can return now claiming it's not
985 two_words
[0] = VG_TRC_INNER_FASTMISS
; /* hmm, is that right? */
990 /* We have either a no-redir or a redir translation. */
991 vg_assert(host_code_addr
!= 0); /* implausible */
993 /* there should be no undealt-with signals */
994 //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
996 /* Set up event counter stuff for the run. */
997 tst
->arch
.vex
.host_EvC_COUNTER
= *dispatchCtrP
;
998 tst
->arch
.vex
.host_EvC_FAILADDR
999 = (HWord
)VG_(fnptr_to_fnentry
)( &VG_(disp_cp_evcheck_fail
) );
1001 /* Invalidate any in-flight LL/SC transactions, in the case that we're
1002 using the fallback LL/SC implementation. See bugs 344524 and 369459. */
1003 # if defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
1004 || defined(VGP_nanomips_linux)
1005 tst
->arch
.vex
.guest_LLaddr
= (RegWord
)(-1);
1006 # elif defined(VGP_arm64_linux)
1007 tst
->arch
.vex
.guest_LLSC_SIZE
= 0;
1012 Int i
, err
= VG_(sigprocmask
)(VKI_SIG_SETMASK
, NULL
, &m
);
1013 vg_assert(err
== 0);
1014 VG_(printf
)("tid %u: entering code with unblocked signals: ", tid
);
1015 for (i
= 1; i
<= _VKI_NSIG
; i
++)
1016 if (!VG_(sigismember
)(&m
, i
))
1017 VG_(printf
)("%d ", i
);
1021 /* Set up return-value area. */
1023 // Tell the tool this thread is about to run client code
1024 VG_TRACK( start_client_code
, tid
, bbs_done
);
1026 vg_assert(VG_(in_generated_code
) == False
);
1027 VG_(in_generated_code
) = True
;
1032 VG_(disp_run_translations
)(
1034 (volatile void*)&tst
->arch
.vex
,
1039 vg_assert(VG_(in_generated_code
) == True
);
1040 VG_(in_generated_code
) = False
;
1042 if (jumped
!= (HWord
)0) {
1043 /* We get here if the client took a fault that caused our signal
1044 handler to longjmp. */
1045 vg_assert(two_words
[0] == 0 && two_words
[1] == 0); // correct?
1046 two_words
[0] = VG_TRC_FAULT_SIGNAL
;
1051 /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
1052 and zero out the 32-bit ones in preparation for the next run of
1054 stats__n_xIndirs
+= (ULong
)VG_(stats__n_xIndirs_32
);
1055 VG_(stats__n_xIndirs_32
) = 0;
1056 stats__n_xIndir_hits1
+= (ULong
)VG_(stats__n_xIndir_hits1_32
);
1057 VG_(stats__n_xIndir_hits1_32
) = 0;
1058 stats__n_xIndir_hits2
+= (ULong
)VG_(stats__n_xIndir_hits2_32
);
1059 VG_(stats__n_xIndir_hits2_32
) = 0;
1060 stats__n_xIndir_hits3
+= (ULong
)VG_(stats__n_xIndir_hits3_32
);
1061 VG_(stats__n_xIndir_hits3_32
) = 0;
1062 stats__n_xIndir_misses
+= (ULong
)VG_(stats__n_xIndir_misses_32
);
1063 VG_(stats__n_xIndir_misses_32
) = 0;
1065 /* Inspect the event counter. */
1066 vg_assert((Int
)tst
->arch
.vex
.host_EvC_COUNTER
>= -1);
1067 vg_assert(tst
->arch
.vex
.host_EvC_FAILADDR
1068 == (HWord
)VG_(fnptr_to_fnentry
)( &VG_(disp_cp_evcheck_fail
)) );
1070 /* The number of events done this time is the difference between
1071 the event counter originally and what it is now. Except -- if
1072 it has gone negative (to -1) then the transition 0 to -1 doesn't
1073 correspond to a real executed block, so back it out. It's like
1074 this because the event checks decrement the counter first and
1075 check it for negativeness second, hence the 0 to -1 transition
1076 causes a bailout and the block it happens in isn't executed. */
1078 Int dispatchCtrAfterwards
= (Int
)tst
->arch
.vex
.host_EvC_COUNTER
;
1079 done_this_time
= *dispatchCtrP
- dispatchCtrAfterwards
;
1080 if (dispatchCtrAfterwards
== -1) {
1083 /* If the generated code drives the counter below -1, something
1084 is seriously wrong. */
1085 vg_assert(dispatchCtrAfterwards
>= 0);
1089 vg_assert(done_this_time
>= 0);
1090 bbs_done
+= (ULong
)done_this_time
;
1092 *dispatchCtrP
-= done_this_time
;
1093 vg_assert(*dispatchCtrP
>= 0);
1095 // Tell the tool this thread has stopped running client code
1096 VG_TRACK( stop_client_code
, tid
, bbs_done
);
1098 if (bbs_done
>= vgdb_next_poll
) {
1099 if (VG_(clo_vgdb_poll
))
1100 vgdb_next_poll
= bbs_done
+ (ULong
)VG_(clo_vgdb_poll
);
1102 /* value was changed due to gdbserver invocation via ptrace */
1103 vgdb_next_poll
= NO_VGDB_POLL
;
1104 if (VG_(gdbserver_activity
) (tid
))
1105 VG_(gdbserver
) (tid
);
1108 /* TRC value and possible auxiliary patch-address word are already
1109 in two_words[0] and [1] respectively, as a result of the call to
1110 VG_(run_innerloop). */
1112 if (two_words
[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
1113 || two_words
[0] == VG_TRC_CHAIN_ME_TO_FAST_EP
) {
1114 vg_assert(two_words
[1] != 0); /* we have a legit patch addr */
1116 vg_assert(two_words
[1] == 0); /* nobody messed with it */
1121 /* ---------------------------------------------------------------------
1122 The scheduler proper.
1123 ------------------------------------------------------------------ */
1125 static void handle_tt_miss ( ThreadId tid
)
1128 Addr ip
= VG_(get_IP
)(tid
);
1130 /* Trivial event. Miss in the fast-cache. Do a full
1132 found
= VG_(search_transtab
)( NULL
, NULL
, NULL
,
1133 ip
, True
/*upd_fast_cache*/ );
1134 if (UNLIKELY(!found
)) {
1135 /* Not found; we need to request a translation. */
1136 if (VG_(translate
)( tid
, ip
, /*debug*/False
, 0/*not verbose*/,
1137 bbs_done
, True
/*allow redirection*/ )) {
1138 found
= VG_(search_transtab
)( NULL
, NULL
, NULL
,
1140 vg_assert2(found
, "handle_tt_miss: missing tt_fast entry");
1143 // If VG_(translate)() fails, it's because it had to throw a
1144 // signal because the client jumped to a bad address. That
1145 // means that either a signal has been set up for delivery,
1146 // or the thread has been marked for termination. Either
1147 // way, we just need to go back into the scheduler loop.
1153 void handle_chain_me ( ThreadId tid
, void* place_to_chain
, Bool toFastEP
)
1156 Addr ip
= VG_(get_IP
)(tid
);
1157 SECno to_sNo
= INV_SNO
;
1158 TTEno to_tteNo
= INV_TTE
;
1160 found
= VG_(search_transtab
)( NULL
, &to_sNo
, &to_tteNo
,
1161 ip
, False
/*dont_upd_fast_cache*/ );
1163 /* Not found; we need to request a translation. */
1164 if (VG_(translate
)( tid
, ip
, /*debug*/False
, 0/*not verbose*/,
1165 bbs_done
, True
/*allow redirection*/ )) {
1166 found
= VG_(search_transtab
)( NULL
, &to_sNo
, &to_tteNo
,
1168 vg_assert2(found
, "handle_chain_me: missing tt_fast entry");
1170 // If VG_(translate)() fails, it's because it had to throw a
1171 // signal because the client jumped to a bad address. That
1172 // means that either a signal has been set up for delivery,
1173 // or the thread has been marked for termination. Either
1174 // way, we just need to go back into the scheduler loop.
1179 vg_assert(to_sNo
!= INV_SNO
);
1180 vg_assert(to_tteNo
!= INV_TTE
);
1182 /* So, finally we know where to patch through to. Do the patching
1183 and update the various admin tables that allow it to be undone
1184 in the case that the destination block gets deleted. */
1185 VG_(tt_tc_do_chaining
)( place_to_chain
,
1186 to_sNo
, to_tteNo
, toFastEP
);
1189 static void handle_syscall(ThreadId tid
, UInt trc
)
1191 ThreadState
* volatile tst
= VG_(get_ThreadState
)(tid
);
1192 volatile UWord jumped
;
1194 /* Syscall may or may not block; either way, it will be
1195 complete by the time this call returns, and we'll be
1196 runnable again. We could take a signal while the
1199 if (VG_(clo_sanity_level
) >= 3) {
1200 HChar buf
[50]; // large enough
1201 VG_(sprintf
)(buf
, "(BEFORE SYSCALL, tid %u)", tid
);
1202 Bool ok
= VG_(am_do_sync_check
)(buf
, __FILE__
, __LINE__
);
1206 SCHEDSETJMP(tid
, jumped
, VG_(client_syscall
)(tid
, trc
));
1208 if (VG_(clo_sanity_level
) >= 3) {
1209 HChar buf
[50]; // large enough
1210 VG_(sprintf
)(buf
, "(AFTER SYSCALL, tid %u)", tid
);
1211 Bool ok
= VG_(am_do_sync_check
)(buf
, __FILE__
, __LINE__
);
1215 if (!VG_(is_running_thread
)(tid
))
1216 VG_(printf
)("tid %u not running; VG_(running_tid)=%u, tid %u status %u\n",
1217 tid
, VG_(running_tid
), tid
, tst
->status
);
1218 vg_assert(VG_(is_running_thread
)(tid
));
1220 if (jumped
!= (UWord
)0) {
1222 VG_(poll_signals
)(tid
);
1226 /* tid just requested a jump to the noredir version of its current
1227 program counter. So make up that translation if needed, run it,
1228 and return the resulting thread return code in two_words[]. */
1230 void handle_noredir_jump ( /*OUT*/HWord
* two_words
,
1231 /*MOD*/Int
* dispatchCtrP
,
1234 /* Clear return area. */
1235 two_words
[0] = two_words
[1] = 0;
1238 Addr ip
= VG_(get_IP
)(tid
);
1240 Bool found
= VG_(search_unredir_transtab
)( &hcode
, ip
);
1242 /* Not found; we need to request a translation. */
1243 if (VG_(translate
)( tid
, ip
, /*debug*/False
, 0/*not verbose*/, bbs_done
,
1244 False
/*NO REDIRECTION*/ )) {
1246 found
= VG_(search_unredir_transtab
)( &hcode
, ip
);
1247 vg_assert2(found
, "unredir translation missing after creation?!");
1249 // If VG_(translate)() fails, it's because it had to throw a
1250 // signal because the client jumped to a bad address. That
1251 // means that either a signal has been set up for delivery,
1252 // or the thread has been marked for termination. Either
1253 // way, we just need to go back into the scheduler loop.
1254 two_words
[0] = VG_TRC_BORING
;
1261 vg_assert(hcode
!= 0);
1263 /* Otherwise run it and return the resulting VG_TRC_* value. */
1264 vg_assert(*dispatchCtrP
> 0); /* so as to guarantee progress */
1265 run_thread_for_a_while( two_words
, dispatchCtrP
, tid
,
1266 hcode
, True
/*use hcode*/ );
1271 Run a thread until it wants to exit.
1273 We assume that the caller has already called VG_(acquire_BigLock) for
1274 us, so we own the VCPU. Also, all signals are blocked.
1276 VgSchedReturnCode
VG_(scheduler
) ( ThreadId tid
)
1278 /* Holds the remaining size of this thread's "timeslice". */
1279 Int dispatch_ctr
= 0;
1281 ThreadState
*tst
= VG_(get_ThreadState
)(tid
);
1282 static Bool vgdb_startup_action_done
= False
;
1284 if (VG_(clo_trace_sched
))
1285 print_sched_event(tid
, "entering VG_(scheduler)");
1287 /* Do vgdb initialization (but once). Only the first (main) task
1288 starting up will do the below.
1289 Initialize gdbserver earlier than at the first
1290 thread VG_(scheduler) is causing problems:
1291 * at the end of VG_(scheduler_init_phase2) :
1292 The main thread is in VgTs_Init state, but in a not yet
1293 consistent state => the thread cannot be reported to gdb
1294 (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
1295 back the guest registers to gdb).
1296 * at end of valgrind_main, just
1297 before VG_(main_thread_wrapper_NORETURN)(1) :
1298 The main thread is still in VgTs_Init state but in a
1299 more advanced state. However, the thread state is not yet
1300 completely initialized : a.o., the os_state is not yet fully
1301 set => the thread is then not properly reported to gdb,
1302 which is then confused (causing e.g. a duplicate thread be
1303 shown, without thread id).
1304 * it would be possible to initialize gdbserver "lower" in the
1305 call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
1306 these are platform dependent and the place at which
1307 the thread state is completely initialized is not
1308 specific anymore to the main thread (so a similar "do it only
1309 once" would be needed).
1311 => a "once only" initialization here is the best compromise. */
1312 if (!vgdb_startup_action_done
) {
1313 vg_assert(tid
== 1); // it must be the main thread.
1314 vgdb_startup_action_done
= True
;
1315 if (VG_(clo_vgdb
) != Vg_VgdbNo
) {
1316 /* If we have to poll, ensures we do an initial poll at first
1317 scheduler call. Otherwise, ensure no poll (unless interrupted
1319 if (VG_(clo_vgdb_poll
))
1320 VG_(force_vgdb_poll
) ();
1322 VG_(disable_vgdb_poll
) ();
1324 VG_(gdbserver_prerun_action
) (1);
1326 VG_(disable_vgdb_poll
) ();
1330 if (SimHintiS(SimHint_no_nptl_pthread_stackcache
, VG_(clo_sim_hints
))
1332 /* We disable the stack cache the first time we see a thread other
1333 than the main thread appearing. At this moment, we are sure the pthread
1334 lib loading is done/variable was initialised by pthread lib/... */
1335 if (VG_(client__stack_cache_actsize__addr
)) {
1336 if (*VG_(client__stack_cache_actsize__addr
) == 0) {
1337 VG_(debugLog
)(1,"sched",
1338 "pthread stack cache size disable done"
1340 *VG_(client__stack_cache_actsize__addr
) = 1000 * 1000 * 1000;
1341 /* Set a value big enough to be above the hardcoded maximum stack
1342 cache size in glibc, small enough to allow a pthread stack size
1343 to be added without risk of overflow. */
1347 * glibc 2.34 no longer has stack_cache_actsize as a visible variable
1348 * so we switch to using the GLIBC_TUNABLES env var. Processing for that
1349 * is done in initimg-linux.c / setup_client_env for all glibc
1351 * If we don't detect stack_cache_actsize we want to be able to tell
1352 * whether it is an unexpected error or if it is no longer there.
1353 * In the latter case we don't print a warning.
1355 Bool print_warning
= True
;
1356 if (VG_(client__gnu_get_libc_version_addr
) != NULL
) {
1357 const HChar
* gnu_libc_version
= VG_(client__gnu_get_libc_version_addr
)();
1358 if (gnu_libc_version
!= NULL
) {
1359 HChar
* glibc_version_tok
= VG_(strdup
)("scheduler.1", gnu_libc_version
);
1360 const HChar
* str_major
= VG_(strtok
)(glibc_version_tok
, ".");
1361 Long major
= VG_(strtoll10
)(str_major
, NULL
);
1362 const HChar
* str_minor
= VG_(strtok
)(NULL
, ".");
1363 Long minor
= VG_(strtoll10
)(str_minor
, NULL
);
1364 if (major
>= 2 && minor
>= 34) {
1365 print_warning
= False
;
1367 VG_(free
)(glibc_version_tok
);
1372 if (print_warning
) {
1373 VG_(debugLog
)(0,"sched",
1374 "WARNING: pthread stack cache cannot be disabled!\n");
1376 VG_(clo_sim_hints
) &= ~SimHint2S(SimHint_no_nptl_pthread_stackcache
);
1377 /* Remove SimHint_no_nptl_pthread_stackcache from VG_(clo_sim_hints)
1378 to avoid having a msg for all following threads. */
1382 /* set the proper running signal mask */
1385 vg_assert(VG_(is_running_thread
)(tid
));
1387 dispatch_ctr
= VG_(clo_scheduling_quantum
);
1389 while (!VG_(is_exiting
)(tid
)) {
1391 vg_assert(dispatch_ctr
>= 0);
1392 if (dispatch_ctr
== 0) {
1394 /* Our slice is done, so yield the CPU to another thread. On
1395 Linux, this doesn't sleep between sleeping and running,
1396 since that would take too much time. */
1398 /* 4 July 06: it seems that a zero-length nsleep is needed to
1399 cause async thread cancellation (canceller.c) to terminate
1400 in finite time; else it is in some kind of race/starvation
1401 situation and completion is arbitrarily delayed (although
1402 this is not a deadlock).
1404 Unfortunately these sleeps cause MPI jobs not to terminate
1405 sometimes (some kind of livelock). So sleeping once
1406 every N opportunities appears to work. */
1408 /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
1409 sys_yield also helps the problem, whilst not crashing apps. */
1411 VG_(release_BigLock
)(tid
, VgTs_Yielding
,
1412 "VG_(scheduler):timeslice");
1413 /* ------------ now we don't have The Lock ------------ */
1415 VG_(acquire_BigLock
)(tid
, "VG_(scheduler):timeslice");
1416 /* ------------ now we do have The Lock ------------ */
1418 /* OK, do some relatively expensive housekeeping stuff */
1419 scheduler_sanity(tid
);
1420 VG_(sanity_check_general
)(False
);
1422 /* Possibly make a progress report */
1423 if (UNLIKELY(VG_(clo_progress_interval
) > 0)) {
1424 maybe_progress_report( VG_(clo_progress_interval
) );
1427 /* Look for any pending signals for this thread, and set them up
1429 VG_(poll_signals
)(tid
);
1431 if (VG_(is_exiting
)(tid
))
1432 break; /* poll_signals picked up a fatal signal */
1434 /* For stats purposes only. */
1435 n_scheduling_events_MAJOR
++;
1437 /* Figure out how many bbs to ask vg_run_innerloop to do. */
1438 dispatch_ctr
= VG_(clo_scheduling_quantum
);
1441 vg_assert(tst
->tid
== tid
);
1442 vg_assert(tst
->os_state
.lwpid
== VG_(gettid
)());
1445 /* For stats purposes only. */
1446 n_scheduling_events_MINOR
++;
1449 VG_(message
)(Vg_DebugMsg
, "thread %u: running for %d bbs\n",
1450 tid
, dispatch_ctr
- 1 );
1452 HWord trc
[2]; /* "two_words" */
1453 run_thread_for_a_while( &trc
[0],
1455 tid
, 0/*ignored*/, False
);
1457 if (VG_(clo_trace_sched
) && VG_(clo_verbosity
) > 2) {
1458 const HChar
*name
= name_of_sched_event(trc
[0]);
1459 HChar buf
[VG_(strlen
)(name
) + 10]; // large enough
1460 VG_(sprintf
)(buf
, "TRC: %s", name
);
1461 print_sched_event(tid
, buf
);
1464 if (trc
[0] == VEX_TRC_JMP_NOREDIR
) {
1465 /* If we got a request to run a no-redir version of
1466 something, do so now -- handle_noredir_jump just (creates
1467 and) runs that one translation. The flip side is that the
1468 noredir translation can't itself return another noredir
1469 request -- that would be nonsensical. It can, however,
1470 return VG_TRC_BORING, which just means keep going as
1472 /* Note that the fact that we need to continue with a
1473 no-redir jump is not recorded anywhere else in this
1474 thread's state. So we *must* execute the block right now
1475 -- we can't fail to execute it and later resume with it,
1476 because by then we'll have forgotten the fact that it
1477 should be run as no-redir, but will get run as a normal
1478 potentially-redir'd, hence screwing up. This really ought
1479 to be cleaned up, by noting in the guest state that the
1480 next block to be executed should be no-redir. Then we can
1481 suspend and resume at any point, which isn't the case at
1483 /* We can't enter a no-redir translation with the dispatch
1484 ctr set to zero, for the reasons commented just above --
1485 we need to force it to execute right now. So, if the
1486 dispatch ctr is zero, set it to one. Note that this would
1487 have the bad side effect of holding the Big Lock arbitrary
1488 long should there be an arbitrarily long sequence of
1489 back-to-back no-redir translations to run. But we assert
1490 just below that this translation cannot request another
1491 no-redir jump, so we should be safe against that. */
1492 if (dispatch_ctr
== 0) {
1495 handle_noredir_jump( &trc
[0],
1498 vg_assert(trc
[0] != VEX_TRC_JMP_NOREDIR
);
1500 /* This can't be allowed to happen, since it means the block
1501 didn't execute, and we have no way to resume-as-noredir
1502 after we get more timeslice. But I don't think it ever
1503 can, since handle_noredir_jump will assert if the counter
1504 is zero on entry. */
1505 vg_assert(trc
[0] != VG_TRC_INNER_COUNTERZERO
);
1506 /* This asserts the same thing. */
1507 vg_assert(dispatch_ctr
>= 0);
1509 /* A no-redir translation can't return with a chain-me
1510 request, since chaining in the no-redir cache is too
1512 vg_assert(trc
[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
1513 && trc
[0] != VG_TRC_CHAIN_ME_TO_FAST_EP
);
1517 case VEX_TRC_JMP_BORING
:
1518 /* assisted dispatch, no event. Used by no-redir
1519 translations to force return to the scheduler. */
1521 /* no special event, just keep going. */
1524 case VG_TRC_INNER_FASTMISS
:
1525 vg_assert(dispatch_ctr
>= 0);
1526 handle_tt_miss(tid
);
1529 case VG_TRC_CHAIN_ME_TO_SLOW_EP
: {
1530 if (0) VG_(printf
)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc
[1] );
1531 handle_chain_me(tid
, (void*)trc
[1], False
);
1535 case VG_TRC_CHAIN_ME_TO_FAST_EP
: {
1536 if (0) VG_(printf
)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc
[1] );
1537 handle_chain_me(tid
, (void*)trc
[1], True
);
1541 case VEX_TRC_JMP_CLIENTREQ
:
1542 do_client_request(tid
);
1545 case VEX_TRC_JMP_SYS_INT128
: /* x86-linux */
1546 case VEX_TRC_JMP_SYS_INT129
: /* x86-darwin */
1547 case VEX_TRC_JMP_SYS_INT130
: /* x86-darwin */
1548 case VEX_TRC_JMP_SYS_INT145
: /* x86-solaris */
1549 case VEX_TRC_JMP_SYS_INT210
: /* x86-solaris */
1550 /* amd64-linux, ppc32-linux, amd64-darwin, amd64-solaris */
1551 case VEX_TRC_JMP_SYS_SYSCALL
:
1552 handle_syscall(tid
, trc
[0]);
1553 if (VG_(clo_sanity_level
) > 2)
1554 VG_(sanity_check_general
)(True
); /* sanity-check every syscall */
1557 case VEX_TRC_JMP_YIELD
:
1558 /* Explicit yield, because this thread is in a spin-lock
1559 or something. Only let the thread run for a short while
1560 longer. Because swapping to another thread is expensive,
1561 we're prepared to let this thread eat a little more CPU
1562 before swapping to another. That means that short term
1563 spins waiting for hardware to poke memory won't cause a
1565 if (dispatch_ctr
> 300)
1569 case VG_TRC_INNER_COUNTERZERO
:
1570 /* Timeslice is out. Let a new thread be scheduled. */
1571 vg_assert(dispatch_ctr
== 0);
1574 case VG_TRC_FAULT_SIGNAL
:
1575 /* Everything should be set up (either we're exiting, or
1576 about to start in a signal handler). */
1579 case VEX_TRC_JMP_MAPFAIL
:
1580 /* Failure of arch-specific address translation (x86/amd64
1581 segment override use) */
1582 /* jrs 2005 03 11: is this correct? */
1583 VG_(synth_fault
)(tid
);
1586 case VEX_TRC_JMP_EMWARN
: {
1587 static Int counts
[EmNote_NUMBER
];
1588 static Bool counts_initted
= False
;
1593 if (!counts_initted
) {
1594 counts_initted
= True
;
1595 for (q
= 0; q
< EmNote_NUMBER
; q
++)
1598 ew
= (VexEmNote
)VG_(threads
)[tid
].arch
.vex
.guest_EMNOTE
;
1599 what
= (ew
< 0 || ew
>= EmNote_NUMBER
)
1601 : LibVEX_EmNote_string(ew
);
1602 show
= (ew
< 0 || ew
>= EmNote_NUMBER
)
1605 if (show
&& VG_(clo_show_emwarns
) && !VG_(clo_xml
)) {
1606 VG_(message
)( Vg_UserMsg
,
1607 "Emulation warning: unsupported action:\n");
1608 VG_(message
)( Vg_UserMsg
, " %s\n", what
);
1609 VG_(get_and_pp_StackTrace
)( tid
, VG_(clo_backtrace_size
) );
1614 case VEX_TRC_JMP_EMFAIL
: {
1617 ew
= (VexEmNote
)VG_(threads
)[tid
].arch
.vex
.guest_EMNOTE
;
1618 what
= (ew
< 0 || ew
>= EmNote_NUMBER
)
1620 : LibVEX_EmNote_string(ew
);
1621 VG_(message
)( Vg_UserMsg
,
1622 "Emulation fatal error -- Valgrind cannot continue:\n");
1623 VG_(message
)( Vg_UserMsg
, " %s\n", what
);
1624 VG_(get_and_pp_StackTrace
)( tid
, VG_(clo_backtrace_size
) );
1625 VG_(message
)(Vg_UserMsg
, "\n");
1626 VG_(message
)(Vg_UserMsg
, "Valgrind has to exit now. Sorry.\n");
1627 VG_(message
)(Vg_UserMsg
, "\n");
1632 case VEX_TRC_JMP_SIGILL
:
1633 VG_(synth_sigill
)(tid
, VG_(get_IP
)(tid
));
1636 case VEX_TRC_JMP_SIGTRAP
:
1637 VG_(synth_sigtrap
)(tid
);
1640 case VEX_TRC_JMP_SIGSEGV
:
1641 VG_(synth_fault
)(tid
);
1644 case VEX_TRC_JMP_SIGBUS
:
1645 VG_(synth_sigbus
)(tid
);
1648 case VEX_TRC_JMP_SIGFPE
:
1649 VG_(synth_sigfpe
)(tid
, 0);
1652 case VEX_TRC_JMP_SIGFPE_INTDIV
:
1653 VG_(synth_sigfpe
)(tid
, VKI_FPE_INTDIV
);
1656 case VEX_TRC_JMP_SIGFPE_INTOVF
:
1657 VG_(synth_sigfpe
)(tid
, VKI_FPE_INTOVF
);
1660 case VEX_TRC_JMP_NODECODE
: {
1661 Addr addr
= VG_(get_IP
)(tid
);
1663 if (VG_(clo_sigill_diag
)) {
1665 "valgrind: Unrecognised instruction at address %#lx.\n", addr
);
1666 VG_(get_and_pp_StackTrace
)(tid
, VG_(clo_backtrace_size
));
1667 # define M(a) VG_(umsg)(a "\n");
1668 M("Your program just tried to execute an instruction that Valgrind" );
1669 M("did not recognise. There are two possible reasons for this." );
1670 M("1. Your program has a bug and erroneously jumped to a non-code" );
1671 M(" location. If you are running Memcheck and you just saw a" );
1672 M(" warning about a bad jump, it's probably your program's fault.");
1673 M("2. The instruction is legitimate but Valgrind doesn't handle it,");
1674 M(" i.e. it's Valgrind's fault. If you think this is the case or");
1675 M(" you are not sure, please let us know and we'll try to fix it.");
1676 M("Either way, Valgrind will now raise a SIGILL signal which will" );
1677 M("probably kill your program." );
1680 # if defined(VGA_s390x)
1681 /* Now that the complaint is out we need to adjust the guest_IA. The
1682 reason is that -- after raising the exception -- execution will
1683 continue with the insn that follows the invalid insn. As the first
1684 2 bits of the invalid insn determine its length in the usual way,
1685 we can compute the address of the next insn here and adjust the
1686 guest_IA accordingly. This adjustment is essential and tested by
1687 none/tests/s390x/op_exception.c (which would loop forever
1689 UChar byte
= ((UChar
*)addr
)[0];
1690 UInt insn_length
= ((((byte
>> 6) + 1) >> 1) + 1) << 1;
1691 Addr next_insn_addr
= addr
+ insn_length
;
1692 VG_(set_IP
)(tid
, next_insn_addr
);
1694 VG_(synth_sigill
)(tid
, addr
);
1698 case VEX_TRC_JMP_INVALICACHE
:
1699 VG_(discard_translations
)(
1700 (Addr
)VG_(threads
)[tid
].arch
.vex
.guest_CMSTART
,
1701 VG_(threads
)[tid
].arch
.vex
.guest_CMLEN
,
1702 "scheduler(VEX_TRC_JMP_INVALICACHE)"
1705 VG_(printf
)("dump translations done.\n");
1708 case VEX_TRC_JMP_FLUSHDCACHE
: {
1709 void* start
= (void*)(Addr
)VG_(threads
)[tid
].arch
.vex
.guest_CMSTART
;
1710 SizeT len
= VG_(threads
)[tid
].arch
.vex
.guest_CMLEN
;
1711 VG_(debugLog
)(2, "sched", "flush_dcache(%p, %lu)\n", start
, len
);
1712 VG_(flush_dcache
)(start
, len
);
1716 case VG_TRC_INVARIANT_FAILED
:
1717 /* This typically happens if, after running generated code,
1718 it is detected that host CPU settings (eg, FPU/Vector
1719 control words) are not as they should be. Vex's code
1720 generation specifies the state such control words should
1721 be in on entry to Vex-generated code, and they should be
1722 unchanged on exit from it. Failure of this assertion
1723 usually means a bug in Vex's code generation. */
1725 // __asm__ __volatile__ (
1726 // "\t.word 0xEEF12A10\n" // fmrx r2,fpscr
1727 // "\tmov %0, r2" : "=r"(xx) : : "r2" );
1728 // VG_(printf)("QQQQ new fpscr = %08x\n", xx);
1730 vg_assert2(0, "VG_(scheduler), phase 3: "
1731 "run_innerloop detected host "
1732 "state invariant failure", trc
);
1734 case VEX_TRC_JMP_SYS_SYSENTER
:
1735 /* Do whatever simulation is appropriate for an x86 sysenter
1736 instruction. Note that it is critical to set this thread's
1737 guest_EIP to point at the code to execute after the
1738 sysenter, since Vex-generated code will not have set it --
1739 vex does not know what it should be. Vex sets the next
1740 address to zero, so if you don't set guest_EIP, the thread
1741 will jump to zero afterwards and probably die as a result. */
1742 # if defined(VGP_x86_linux)
1743 vg_assert2(0, "VG_(scheduler), phase 3: "
1744 "sysenter_x86 on x86-linux is not supported");
1745 # elif defined(VGP_x86_darwin) || defined(VGP_x86_solaris)
1746 /* return address in client edx */
1747 VG_(threads
)[tid
].arch
.vex
.guest_EIP
1748 = VG_(threads
)[tid
].arch
.vex
.guest_EDX
;
1749 handle_syscall(tid
, trc
[0]);
1751 vg_assert2(0, "VG_(scheduler), phase 3: "
1752 "sysenter_x86 on non-x86 platform?!?!");
1757 vg_assert2(0, "VG_(scheduler), phase 3: "
1758 "unexpected thread return code (%u)", trc
[0]);
1762 } /* switch (trc) */
1764 if (UNLIKELY(VG_(clo_profyle_sbs
)) && VG_(clo_profyle_interval
) > 0)
1765 maybe_show_sb_profile();
1768 if (VG_(clo_trace_sched
))
1769 print_sched_event(tid
, "exiting VG_(scheduler)");
1771 vg_assert(VG_(is_exiting
)(tid
));
1773 return tst
->exitreason
;
1777 void VG_(nuke_all_threads_except
) ( ThreadId me
, VgSchedReturnCode src
)
1781 vg_assert(VG_(is_running_thread
)(me
));
1783 for (tid
= 1; tid
< VG_N_THREADS
; tid
++) {
1785 || VG_(threads
)[tid
].status
== VgTs_Empty
)
1789 "VG_(nuke_all_threads_except): nuking tid %u\n", tid
);
1791 VG_(threads
)[tid
].exitreason
= src
;
1792 if (src
== VgSrc_FatalSig
)
1793 VG_(threads
)[tid
].os_state
.fatalsig
= VKI_SIGKILL
;
1794 VG_(get_thread_out_of_syscall
)(tid
);
1799 /* ---------------------------------------------------------------------
1800 Specifying shadow register values
1801 ------------------------------------------------------------------ */
1803 #if defined(VGA_x86)
1804 # define VG_CLREQ_ARGS guest_EAX
1805 # define VG_CLREQ_RET guest_EDX
1806 #elif defined(VGA_amd64)
1807 # define VG_CLREQ_ARGS guest_RAX
1808 # define VG_CLREQ_RET guest_RDX
1809 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
1810 # define VG_CLREQ_ARGS guest_GPR4
1811 # define VG_CLREQ_RET guest_GPR3
1812 #elif defined(VGA_arm)
1813 # define VG_CLREQ_ARGS guest_R4
1814 # define VG_CLREQ_RET guest_R3
1815 #elif defined(VGA_arm64)
1816 # define VG_CLREQ_ARGS guest_X4
1817 # define VG_CLREQ_RET guest_X3
1818 #elif defined (VGA_s390x)
1819 # define VG_CLREQ_ARGS guest_r2
1820 # define VG_CLREQ_RET guest_r3
1821 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)
1822 # define VG_CLREQ_ARGS guest_r12
1823 # define VG_CLREQ_RET guest_r11
1825 # error Unknown arch
1828 #define CLREQ_ARGS(regs) ((regs).vex.VG_CLREQ_ARGS)
1829 #define CLREQ_RET(regs) ((regs).vex.VG_CLREQ_RET)
1830 #define O_CLREQ_RET (offsetof(VexGuestArchState, VG_CLREQ_RET))
1832 // These macros write a value to a client's thread register, and tell the
1833 // tool that it's happened (if necessary).
1835 #define SET_CLREQ_RETVAL(zztid, zzval) \
1836 do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1837 VG_TRACK( post_reg_write, \
1838 Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
1841 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
1842 do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1843 VG_TRACK( post_reg_write_clientcall_return, \
1844 zztid, O_CLREQ_RET, sizeof(UWord), f); \
1848 /* ---------------------------------------------------------------------
1849 Handle client requests.
1850 ------------------------------------------------------------------ */
1852 // OS-specific(?) client requests
1853 static Bool
os_client_request(ThreadId tid
, UWord
*args
)
1855 Bool handled
= True
;
1857 vg_assert(VG_(is_running_thread
)(tid
));
1860 case VG_USERREQ__FREERES_DONE
:
1861 /* This is equivalent to an exit() syscall, but we don't set the
1862 exitcode (since it might already be set) */
1863 if (0 || VG_(clo_trace_syscalls
) || VG_(clo_trace_sched
))
1864 VG_(message
)(Vg_DebugMsg
,
1865 "__gnu_cxx::__freeres() and __libc_freeres() wrapper "
1866 "done; really quitting!\n");
1867 VG_(threads
)[tid
].exitreason
= VgSrc_ExitThread
;
1879 /* Write out a client message, possibly including a back trace. Return
1880 the number of characters written. In case of XML output, the format
1881 string as well as any arguments it requires will be XML'ified.
1882 I.e. special characters such as the angle brackets will be translated
1883 into proper escape sequences. */
1885 Int
print_client_message( ThreadId tid
, const HChar
*format
,
1886 va_list *vargsp
, Bool include_backtrace
)
1891 /* Translate the format string as follows:
1896 Yes, yes, it's simplified but in synch with
1897 myvprintf_str_XML_simplistic and VG_(debugLog_vprintf).
1900 /* Allocate a buffer that is for sure large enough. */
1901 HChar xml_format
[VG_(strlen
)(format
) * 5 + 1];
1904 HChar
*q
= xml_format
;
1906 for (p
= format
; *p
; ++p
) {
1908 case '<': VG_(strcpy
)(q
, "<"); q
+= 4; break;
1909 case '>': VG_(strcpy
)(q
, ">"); q
+= 4; break;
1910 case '&': VG_(strcpy
)(q
, "&"); q
+= 5; break;
1912 /* Careful: make sure %%s stays %%s */
1929 VG_(printf_xml
)( "<clientmsg>\n" );
1930 VG_(printf_xml
)( " <tid>%u</tid>\n", tid
);
1931 const ThreadState
*tst
= VG_(get_ThreadState
)(tid
);
1932 if (tst
->thread_name
)
1933 VG_(printf_xml
)(" <threadname>%s</threadname>\n", tst
->thread_name
);
1934 VG_(printf_xml
)( " <text>" );
1935 count
= VG_(vprintf_xml
)( xml_format
, *vargsp
);
1936 VG_(printf_xml
)( " </text>\n" );
1938 count
= VG_(vmessage
)( Vg_ClientMsg
, format
, *vargsp
);
1939 VG_(message_flush
)();
1942 if (include_backtrace
)
1943 VG_(get_and_pp_StackTrace
)( tid
, VG_(clo_backtrace_size
) );
1946 VG_(printf_xml
)( "</clientmsg>\n" );
1952 /* Do a client request for the thread tid. After the request, tid may
1953 or may not still be runnable; if not, the scheduler will have to
1954 choose a new thread to run.
1957 void do_client_request ( ThreadId tid
)
1959 UWord
* arg
= (UWord
*)(Addr
)(CLREQ_ARGS(VG_(threads
)[tid
].arch
));
1960 UWord req_no
= arg
[0];
1963 VG_(printf
)("req no = 0x%lx, arg = %p\n", req_no
, arg
);
1966 case VG_USERREQ__CLIENT_CALL0
: {
1967 UWord (*f
)(ThreadId
) = (__typeof__(f
))arg
[1];
1969 VG_(message
)(Vg_DebugMsg
, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f
);
1971 SET_CLCALL_RETVAL(tid
, f ( tid
), (Addr
)f
);
1974 case VG_USERREQ__CLIENT_CALL1
: {
1975 UWord (*f
)(ThreadId
, UWord
) = (__typeof__(f
))arg
[1];
1977 VG_(message
)(Vg_DebugMsg
, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f
);
1979 SET_CLCALL_RETVAL(tid
, f ( tid
, arg
[2] ), (Addr
)f
);
1982 case VG_USERREQ__CLIENT_CALL2
: {
1983 UWord (*f
)(ThreadId
, UWord
, UWord
) = (__typeof__(f
))arg
[1];
1985 VG_(message
)(Vg_DebugMsg
, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f
);
1987 SET_CLCALL_RETVAL(tid
, f ( tid
, arg
[2], arg
[3] ), (Addr
)f
);
1990 case VG_USERREQ__CLIENT_CALL3
: {
1991 UWord (*f
)(ThreadId
, UWord
, UWord
, UWord
) = (__typeof__(f
))arg
[1];
1993 VG_(message
)(Vg_DebugMsg
, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f
);
1995 SET_CLCALL_RETVAL(tid
, f ( tid
, arg
[2], arg
[3], arg
[4] ), (Addr
)f
);
1999 // Nb: this looks like a circular definition, because it kind of is.
2000 // See comment in valgrind.h to understand what's going on.
2001 case VG_USERREQ__RUNNING_ON_VALGRIND
:
2002 SET_CLREQ_RETVAL(tid
, RUNNING_ON_VALGRIND
+1);
2005 case VG_USERREQ__PRINTF
: {
2006 const HChar
* format
= (HChar
*)arg
[1];
2007 /* JRS 2010-Jan-28: this is DEPRECATED; use the
2008 _VALIST_BY_REF version instead */
2009 if (sizeof(va_list) != sizeof(UWord
))
2010 goto va_list_casting_error_NORETURN
;
2015 u
.uw
= (unsigned long)arg
[2];
2017 print_client_message( tid
, format
, &u
.vargs
,
2018 /* include_backtrace */ False
);
2019 SET_CLREQ_RETVAL( tid
, count
);
2023 case VG_USERREQ__PRINTF_BACKTRACE
: {
2024 const HChar
* format
= (HChar
*)arg
[1];
2025 /* JRS 2010-Jan-28: this is DEPRECATED; use the
2026 _VALIST_BY_REF version instead */
2027 if (sizeof(va_list) != sizeof(UWord
))
2028 goto va_list_casting_error_NORETURN
;
2033 u
.uw
= (unsigned long)arg
[2];
2035 print_client_message( tid
, format
, &u
.vargs
,
2036 /* include_backtrace */ True
);
2037 SET_CLREQ_RETVAL( tid
, count
);
2041 case VG_USERREQ__PRINTF_VALIST_BY_REF
: {
2042 const HChar
* format
= (HChar
*)arg
[1];
2043 va_list* vargsp
= (va_list*)arg
[2];
2045 print_client_message( tid
, format
, vargsp
,
2046 /* include_backtrace */ False
);
2048 SET_CLREQ_RETVAL( tid
, count
);
2052 case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF
: {
2053 const HChar
* format
= (HChar
*)arg
[1];
2054 va_list* vargsp
= (va_list*)arg
[2];
2056 print_client_message( tid
, format
, vargsp
,
2057 /* include_backtrace */ True
);
2058 SET_CLREQ_RETVAL( tid
, count
);
2062 case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF
: {
2063 va_list* vargsp
= (va_list*)arg
[2];
2065 VG_(vmessage
)( Vg_DebugMsg
, (HChar
*)arg
[1], *vargsp
);
2066 VG_(message_flush
)();
2067 SET_CLREQ_RETVAL( tid
, count
);
2071 case VG_USERREQ__ADD_IFUNC_TARGET
: {
2072 VG_(redir_add_ifunc_target
)( arg
[1], arg
[2] );
2073 SET_CLREQ_RETVAL( tid
, 0);
2076 case VG_USERREQ__STACK_REGISTER
: {
2077 UWord sid
= VG_(register_stack
)((Addr
)arg
[1], (Addr
)arg
[2]);
2078 SET_CLREQ_RETVAL( tid
, sid
);
2079 VG_TRACK(register_stack
, (Addr
)arg
[1], (Addr
)arg
[2]);
2082 case VG_USERREQ__STACK_DEREGISTER
: {
2083 VG_(deregister_stack
)(arg
[1]);
2084 SET_CLREQ_RETVAL( tid
, 0 ); /* return value is meaningless */
2087 case VG_USERREQ__STACK_CHANGE
: {
2088 VG_(change_stack
)(arg
[1], (Addr
)arg
[2], (Addr
)arg
[3]);
2089 SET_CLREQ_RETVAL( tid
, 0 ); /* return value is meaningless */
2092 case VG_USERREQ__GET_MALLOCFUNCS
: {
2093 struct vg_mallocfunc_info
*info
= (struct vg_mallocfunc_info
*)arg
[1];
2095 info
->tl_malloc
= VG_(tdict
).tool_malloc
;
2096 info
->tl_calloc
= VG_(tdict
).tool_calloc
;
2097 info
->tl_realloc
= VG_(tdict
).tool_realloc
;
2098 info
->tl_memalign
= VG_(tdict
).tool_memalign
;
2099 info
->tl___builtin_new
= VG_(tdict
).tool___builtin_new
;
2100 info
->tl___builtin_new_aligned
= VG_(tdict
).tool___builtin_new_aligned
;
2101 info
->tl___builtin_vec_new
= VG_(tdict
).tool___builtin_vec_new
;
2102 info
->tl___builtin_vec_new_aligned
= VG_(tdict
).tool___builtin_vec_new_aligned
;
2103 info
->tl_free
= VG_(tdict
).tool_free
;
2104 info
->tl___builtin_delete
= VG_(tdict
).tool___builtin_delete
;
2105 info
->tl___builtin_delete_aligned
= VG_(tdict
).tool___builtin_delete_aligned
;
2106 info
->tl___builtin_vec_delete
= VG_(tdict
).tool___builtin_vec_delete
;
2107 info
->tl___builtin_vec_delete_aligned
= VG_(tdict
).tool___builtin_vec_delete_aligned
;
2108 info
->tl_malloc_usable_size
= VG_(tdict
).tool_malloc_usable_size
;
2110 info
->mallinfo
= VG_(mallinfo
);
2111 info
->clo_trace_malloc
= VG_(clo_trace_malloc
);
2112 info
->clo_realloc_zero_bytes_frees
= VG_(clo_realloc_zero_bytes_frees
);
2114 SET_CLREQ_RETVAL( tid
, 0 ); /* return value is meaningless */
2119 /* Requests from the client program */
2121 case VG_USERREQ__DISCARD_TRANSLATIONS
:
2122 if (VG_(clo_verbosity
) > 2)
2123 VG_(printf
)( "client request: DISCARD_TRANSLATIONS,"
2124 " addr %p, len %lu\n",
2125 (void*)arg
[1], arg
[2] );
2127 VG_(discard_translations
)(
2128 arg
[1], arg
[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
2131 SET_CLREQ_RETVAL( tid
, 0 ); /* return value is meaningless */
2134 case VG_USERREQ__INNER_THREADS
:
2135 if (VG_(clo_verbosity
) > 2)
2136 VG_(printf
)( "client request: INNER_THREADS,"
2139 VG_(inner_threads
) = (ThreadState
*)arg
[1];
2140 SET_CLREQ_RETVAL( tid
, 0 ); /* return value is meaningless */
2143 case VG_USERREQ__COUNT_ERRORS
:
2144 SET_CLREQ_RETVAL( tid
, VG_(get_n_errs_found
)() );
2147 case VG_USERREQ__CLO_CHANGE
:
2148 VG_(process_dynamic_option
) (cloD
, (HChar
*)arg
[1]);
2149 SET_CLREQ_RETVAL( tid
, 0 ); /* return value is meaningless */
2152 case VG_USERREQ__LOAD_PDB_DEBUGINFO
:
2153 VG_(di_notify_pdb_debuginfo
)( arg
[1], arg
[2], arg
[3], arg
[4] );
2154 SET_CLREQ_RETVAL( tid
, 0 ); /* return value is meaningless */
2157 case VG_USERREQ__MAP_IP_TO_SRCLOC
: {
2159 HChar
* buf64
= (HChar
*)arg
[2]; // points to a HChar [64] array
2160 const HChar
*buf
; // points to a string of unknown size
2162 VG_(memset
)(buf64
, 0, 64);
2165 // Unless the guest would become epoch aware (and would need to
2166 // describe IP addresses of dlclosed libs), using cur_ep is a
2167 // reasonable choice.
2168 const DiEpoch cur_ep
= VG_(current_DiEpoch
)();
2170 Bool ok
= VG_(get_filename_linenum
)(
2171 cur_ep
, ip
, &buf
, NULL
, &linenum
2174 /* For backward compatibility truncate the filename to
2176 VG_(strncpy
)(buf64
, buf
, 50);
2179 for (i
= 0; i
< 50; i
++) {
2183 VG_(sprintf
)(buf64
+i
, ":%u", linenum
); // safe
2188 SET_CLREQ_RETVAL( tid
, 0 ); /* return value is meaningless */
2192 case VG_USERREQ__CHANGE_ERR_DISABLEMENT
: {
2193 Word delta
= arg
[1];
2194 vg_assert(delta
== 1 || delta
== -1);
2195 ThreadState
* tst
= VG_(get_ThreadState
)(tid
);
2197 if (delta
== 1 && tst
->err_disablement_level
< 0xFFFFFFFF) {
2198 tst
->err_disablement_level
++;
2201 if (delta
== -1 && tst
->err_disablement_level
> 0) {
2202 tst
->err_disablement_level
--;
2204 SET_CLREQ_RETVAL( tid
, 0 ); /* return value is meaningless */
2208 case VG_USERREQ__GDB_MONITOR_COMMAND
: {
2210 ret
= (UWord
) VG_(client_monitor_command
) ((HChar
*)arg
[1]);
2211 SET_CLREQ_RETVAL(tid
, ret
);
2215 case VG_USERREQ__MALLOCLIKE_BLOCK
:
2216 case VG_USERREQ__RESIZEINPLACE_BLOCK
:
2217 case VG_USERREQ__FREELIKE_BLOCK
:
2218 // Ignore them if the addr is NULL; otherwise pass onto the tool.
2220 SET_CLREQ_RETVAL( tid
, 0 ); /* return value is meaningless */
2226 case VG_USERREQ__VEX_INIT_FOR_IRI
:
2227 LibVEX_InitIRI ( (IRICB
*)arg
[1] );
2232 if (os_client_request(tid
, arg
)) {
2233 // do nothing, os_client_request() handled it
2234 } else if (VG_(needs
).client_requests
) {
2237 if (VG_(clo_verbosity
) > 2)
2238 VG_(printf
)("client request: code %lx, addr %p, len %lu\n",
2239 arg
[0], (void*)arg
[1], arg
[2] );
2241 if ( VG_TDICT_CALL(tool_handle_client_request
, tid
, arg
, &ret
) )
2242 SET_CLREQ_RETVAL(tid
, ret
);
2244 static Bool whined
= False
;
2246 if (!whined
&& VG_(clo_verbosity
) > 2) {
2247 // Allow for requests in core, but defined by tools, which
2248 // have 0 and 0 in their two high bytes.
2249 HChar c1
= (arg
[0] >> 24) & 0xff;
2250 HChar c2
= (arg
[0] >> 16) & 0xff;
2251 if (c1
== 0) c1
= '_';
2252 if (c2
== 0) c2
= '_';
2253 VG_(message
)(Vg_UserMsg
, "Warning:\n"
2254 " unhandled client request: 0x%lx (%c%c+0x%lx). Perhaps\n"
2255 " VG_(needs).client_requests should be set?\n",
2256 arg
[0], c1
, c2
, arg
[0] & 0xffff);
2265 va_list_casting_error_NORETURN
:
2267 "Valgrind: fatal error - cannot continue: use of the deprecated\n"
2268 "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
2269 "on a platform where they cannot be supported. Please use the\n"
2270 "equivalent _VALIST_BY_REF versions instead.\n"
2272 "This is a binary-incompatible change in Valgrind's client request\n"
2273 "mechanism. It is unfortunate, but difficult to avoid. End-users\n"
2274 "are expected to almost never see this message. The only case in\n"
2275 "which you might see this message is if your code uses the macros\n"
2276 "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE. If so, you will need\n"
2277 "to recompile such code, using the header files from this version of\n"
2278 "Valgrind, and not any previous version.\n"
2280 "If you see this message in any other circumstances, it is probably\n"
2281 "a bug in Valgrind. In this case, please file a bug report at\n"
2283 " http://www.valgrind.org/support/bug_reports.html\n"
2291 /* ---------------------------------------------------------------------
2292 Sanity checking (permanently engaged)
2293 ------------------------------------------------------------------ */
2295 /* Internal consistency checks on the sched structures. */
2297 void scheduler_sanity ( ThreadId tid
)
2300 Int lwpid
= VG_(gettid
)();
2302 if (!VG_(is_running_thread
)(tid
)) {
2303 VG_(message
)(Vg_DebugMsg
,
2304 "Thread %u is supposed to be running, "
2305 "but doesn't own the_BigLock (owned by %u)\n",
2306 tid
, VG_(running_tid
));
2310 if (lwpid
!= VG_(threads
)[tid
].os_state
.lwpid
) {
2311 VG_(message
)(Vg_DebugMsg
,
2312 "Thread %u supposed to be in LWP %d, but we're actually %d\n",
2313 tid
, VG_(threads
)[tid
].os_state
.lwpid
, VG_(gettid
)());
2317 if (lwpid
!= ML_(get_sched_lock_owner
)(the_BigLock
)) {
2318 VG_(message
)(Vg_DebugMsg
,
2319 "Thread (LWPID) %u doesn't own the_BigLock\n",
2325 /* Periodically show the state of all threads, for debugging
2327 static UInt lasttime
= 0;
2329 now
= VG_(read_millisecond_timer
)();
2330 if ((!bad
) && (lasttime
+ 4000/*ms*/ <= now
)) {
2332 VG_(printf
)("\n------------ Sched State at %d ms ------------\n",
2334 VG_(show_sched_status
)(True
, // host_stacktrace
2335 True
, // stack_usage
2336 True
); // exited_threads);
2340 /* core_panic also shows the sched status, which is why we don't
2341 show it above if bad==True. */
2343 VG_(core_panic
)("scheduler_sanity: failed");
2346 void VG_(sanity_check_general
) ( Bool force_expensive
)
2350 static UInt next_slow_check_at
= 1;
2351 static UInt slow_check_interval
= 25;
2353 if (VG_(clo_sanity_level
) < 1) return;
2355 /* --- First do all the tests that we can do quickly. ---*/
2357 sanity_fast_count
++;
2359 /* Check stuff pertaining to the memory check system. */
2361 /* Check that nobody has spuriously claimed that the first or
2362 last 16 pages of memory have become accessible [...] */
2363 if (VG_(needs
).sanity_checks
) {
2364 vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check
));
2367 /* --- Now some more expensive checks. ---*/
2369 /* Once every now and again, check some more expensive stuff.
2370 Gradually increase the interval between such checks so as not to
2371 burden long-running programs too much. */
2372 if ( force_expensive
2373 || VG_(clo_sanity_level
) > 1
2374 || (VG_(clo_sanity_level
) == 1
2375 && sanity_fast_count
== next_slow_check_at
)) {
2377 if (0) VG_(printf
)("SLOW at %u\n", sanity_fast_count
-1);
2379 next_slow_check_at
= sanity_fast_count
- 1 + slow_check_interval
;
2380 slow_check_interval
++;
2381 sanity_slow_count
++;
2383 if (VG_(needs
).sanity_checks
) {
2384 vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check
));
2387 /* Look for stack overruns. Visit all threads. */
2388 for (tid
= 1; tid
< VG_N_THREADS
; tid
++) {
2392 if (VG_(threads
)[tid
].status
== VgTs_Empty
||
2393 VG_(threads
)[tid
].status
== VgTs_Zombie
)
2398 VG_(get_ThreadState
)(tid
)->os_state
.valgrind_stack_base
;
2400 = 4096; // Let's say. Checking more causes lots of L2 misses.
2402 = VG_(am_get_VgStack_unused_szB
)(stack
, limit
);
2403 if (remains
< limit
)
2404 VG_(message
)(Vg_DebugMsg
,
2405 "WARNING: Thread %u is within %lu bytes "
2406 "of running out of valgrind stack!\n"
2407 "Valgrind stack size can be increased "
2408 "using --valgrind-stacksize=....\n",
2413 if (VG_(clo_sanity_level
) > 1) {
2414 /* Check sanity of the low-level memory manager. Note that bugs
2415 in the client's code can cause this to fail, so we don't do
2416 this check unless specially asked for. And because it's
2417 potentially very expensive. */
2418 VG_(sanity_check_malloc_all
)();
2422 /*--------------------------------------------------------------------*/
2424 /*--------------------------------------------------------------------*/