coregrind/m_scheduler/scheduler.c

   1
   2 /*--------------------------------------------------------------------*/
   3 /*--- Thread scheduling.                               scheduler.c ---*/
   4 /*--------------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2000-2017 Julian Seward
  11       jseward@acm.org
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, see <http://www.gnu.org/licenses/>.
  25
  26    The GNU General Public License is contained in the file COPYING.
  27 */
  28
  29 /*
  30    Overview
  31
  32    Valgrind tries to emulate the kernel's threading as closely as
  33    possible.  The client does all threading via the normal syscalls
  34    (on Linux: clone, etc).  Valgrind emulates this by creating exactly
  35    the same process structure as would be created without Valgrind.
  36    There are no extra threads.
  37
  38    The main difference is that Valgrind only allows one client thread
  39    to run at once.  This is controlled with the CPU Big Lock,
  40    "the_BigLock".  Any time a thread wants to run client code or
  41    manipulate any shared state (which is anything other than its own
  42    ThreadState entry), it must hold the_BigLock.
  43
  44    When a thread is about to block in a blocking syscall, it releases
  45    the_BigLock, and re-takes it when it becomes runnable again (either
  46    because the syscall finished, or we took a signal).
  47
  48    VG_(scheduler) therefore runs in each thread.  It returns only when
  49    the thread is exiting, either because it exited itself, or it was
  50    told to exit by another thread.
  51
  52    This file is almost entirely OS-independent.  The details of how
  53    the OS handles threading and signalling are abstracted away and
  54    implemented elsewhere.  [Some of the functions have worked their
  55    way back for the moment, until we do an OS port in earnest...]
  56 */
  57
  58
  59 #include "pub_core_basics.h"
  60 #include "pub_core_debuglog.h"
  61 #include "pub_core_vki.h"
  62 #include "pub_core_vkiscnums.h"  // __NR_sched_yield
  63 #include "pub_core_threadstate.h"
  64 #include "pub_core_clientstate.h"
  65 #include "pub_core_aspacemgr.h"
  66 #include "pub_core_clreq.h"      // for VG_USERREQ__*
  67 #include "pub_core_dispatch.h"
  68 #include "pub_core_errormgr.h"   // For VG_(get_n_errs_found)()
  69 #include "pub_core_gdbserver.h"  // for VG_(gdbserver)/VG_(gdbserver_activity)
  70 #include "pub_core_libcbase.h"
  71 #include "pub_core_libcassert.h"
  72 #include "pub_core_libcprint.h"
  73 #include "pub_core_libcproc.h"
  74 #include "pub_core_libcsignal.h"
  75 #if defined(VGO_darwin)
  76 #include "pub_core_mach.h"
  77 #endif
  78 #include "pub_core_machine.h"
  79 #include "pub_core_mallocfree.h"
  80 #include "pub_core_options.h"
  81 #include "pub_core_replacemalloc.h"
  82 #include "pub_core_sbprofile.h"
  83 #include "pub_core_signals.h"
  84 #include "pub_core_stacks.h"
  85 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
  86 #include "pub_core_syscall.h"
  87 #include "pub_core_syswrap.h"
  88 #include "pub_core_tooliface.h"
  89 #include "pub_core_translate.h"     // For VG_(translate)()
  90 #include "pub_core_transtab.h"
  91 #include "pub_core_debuginfo.h"     // VG_(di_notify_pdb_debuginfo)
  92 #include "priv_sched-lock.h"
  93 #include "pub_core_scheduler.h"     // self
  94 #include "pub_core_redir.h"
  95 #include "libvex_emnote.h"          // VexEmNote
  96
  97
  98 /* ---------------------------------------------------------------------
  99    Types and globals for the scheduler.
 100    ------------------------------------------------------------------ */
 101
 102 /* ThreadId and ThreadState are defined elsewhere*/
 103
 104 /* If False, a fault is Valgrind-internal (ie, a bug) */
 105 Bool VG_(in_generated_code) = False;
 106
 107 /* 64-bit counter for the number of basic blocks done. */
 108 static ULong bbs_done = 0;
 109
 110 /* Counter to see if vgdb activity is to be verified.
 111    When nr of bbs done reaches vgdb_next_poll, scheduler will
 112    poll for gdbserver activity. VG_(force_vgdb_poll) and
 113    VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
 114    to control when the next poll will be done. */
 115 static ULong vgdb_next_poll;
 116
 117 /* Forwards */
 118 static void do_client_request ( ThreadId tid );
 119 static void scheduler_sanity ( ThreadId tid );
 120 static void mostly_clear_thread_record ( ThreadId tid );
 121
 122 /* Stats. */
 123 static ULong n_scheduling_events_MINOR = 0;
 124 static ULong n_scheduling_events_MAJOR = 0;
 125
 126 /* Stats: number of XIndirs looked up in the fast cache, the number of hits in
 127    ways 1, 2 and 3, and the number of misses.  The number of hits in way 0 isn't
 128    recorded because it can be computed from these five numbers. */
 129 static ULong stats__n_xIndirs = 0;
 130 static ULong stats__n_xIndir_hits1 = 0;
 131 static ULong stats__n_xIndir_hits2 = 0;
 132 static ULong stats__n_xIndir_hits3 = 0;
 133 static ULong stats__n_xIndir_misses = 0;
 134
 135 /* And 32-bit temp bins for the above, so that 32-bit platforms don't
 136    have to do 64 bit incs on the hot path through
 137    VG_(disp_cp_xindir). */
 138 /*global*/ UInt VG_(stats__n_xIndirs_32) = 0;
 139 /*global*/ UInt VG_(stats__n_xIndir_hits1_32) = 0;
 140 /*global*/ UInt VG_(stats__n_xIndir_hits2_32) = 0;
 141 /*global*/ UInt VG_(stats__n_xIndir_hits3_32) = 0;
 142 /*global*/ UInt VG_(stats__n_xIndir_misses_32) = 0;
 143
 144 /* Sanity checking counts. */
 145 static UInt sanity_fast_count = 0;
 146 static UInt sanity_slow_count = 0;
 147
 148 void VG_(print_scheduler_stats)(void)
 149 {
 150    VG_(message)(Vg_DebugMsg,
 151       "scheduler: %'llu event checks.\n", bbs_done );
 152
 153    const ULong hits0
 154       = stats__n_xIndirs - stats__n_xIndir_hits1 - stats__n_xIndir_hits2
 155         - stats__n_xIndir_hits3 - stats__n_xIndir_misses;
 156    VG_(message)(Vg_DebugMsg,
 157                 "scheduler: %'llu indir transfers, "
 158                 "%'llu misses (1 in %llu) ..\n",
 159                 stats__n_xIndirs, stats__n_xIndir_misses,
 160                 stats__n_xIndirs / (stats__n_xIndir_misses
 161                                    ? stats__n_xIndir_misses : 1));
 162    VG_(message)(Vg_DebugMsg,
 163                 "scheduler: .. of which: %'llu hit0, %'llu hit1, "
 164                 "%'llu hit2, %'llu hit3, %'llu missed\n",
 165                 hits0,
 166                 stats__n_xIndir_hits1,
 167                 stats__n_xIndir_hits2,
 168                 stats__n_xIndir_hits3,
 169                 stats__n_xIndir_misses);
 170
 171    VG_(message)(Vg_DebugMsg,
 172       "scheduler: %'llu/%'llu major/minor sched events.\n",
 173       n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
 174    VG_(message)(Vg_DebugMsg,
 175                 "   sanity: %u cheap, %u expensive checks.\n",
 176                 sanity_fast_count, sanity_slow_count );
 177 }
 178
 179 /*
 180  * Mutual exclusion object used to serialize threads.
 181  */
 182 static struct sched_lock *the_BigLock;
 183
 184
 185 /* ---------------------------------------------------------------------
 186    Helper functions for the scheduler.
 187    ------------------------------------------------------------------ */
 188
 189 static void maybe_progress_report ( UInt reporting_interval_seconds )
 190 {
 191    /* This is when the next report is due, in user cpu milliseconds since
 192       process start.  This is a global variable so this won't be thread-safe
 193       if Valgrind is ever made multithreaded.  For now it's fine. */
 194    static UInt next_report_due_at = 0;
 195
 196    /* First of all, figure out whether another report is due.  It
 197       probably isn't. */
 198    UInt user_ms = VG_(get_user_milliseconds)();
 199    if (LIKELY(user_ms < next_report_due_at))
 200       return;
 201
 202    Bool first_ever_call = next_report_due_at == 0;
 203
 204    /* A report is due.  First, though, set the time for the next report. */
 205    next_report_due_at += 1000 * reporting_interval_seconds;
 206
 207    /* If it's been an excessively long time since the last check, we
 208       might have gone more than one reporting interval forward.  Guard
 209       against that. */
 210    while (next_report_due_at <= user_ms)
 211       next_report_due_at += 1000 * reporting_interval_seconds;
 212
 213    /* Also we don't want to report anything on the first call, but we
 214       have to wait till this point to leave, so that we set up the
 215       next-call time correctly. */
 216    if (first_ever_call)
 217       return;
 218
 219    /* Print the report. */
 220    UInt   user_cpu_seconds  = user_ms / 1000;
 221    UInt   wallclock_seconds = VG_(read_millisecond_timer)() / 1000;
 222    Double millionEvCs   = ((Double)bbs_done) / 1000000.0;
 223    Double thousandTIns  = ((Double)VG_(get_bbs_translated)()) / 1000.0;
 224    Double thousandTOuts = ((Double)VG_(get_bbs_discarded_or_dumped)()) / 1000.0;
 225    UInt   nThreads      = VG_(count_living_threads)();
 226
 227    if (VG_(clo_verbosity) > 0) {
 228       VG_(dmsg)("PROGRESS: U %'us, W %'us, %.1f%% CPU, EvC %.2fM, "
 229                 "TIn %.1fk, TOut %.1fk, #thr %u\n",
 230                 user_cpu_seconds, wallclock_seconds,
 231                 100.0
 232                    * (Double)(user_cpu_seconds)
 233                    / (Double)(wallclock_seconds == 0 ? 1 : wallclock_seconds),
 234                 millionEvCs,
 235                 thousandTIns, thousandTOuts, nThreads);
 236    }
 237 }
 238
 239 static
 240 void print_sched_event ( ThreadId tid, const HChar* what )
 241 {
 242    VG_(message)(Vg_DebugMsg, "  SCHED[%u]: %s\n", tid, what );
 243 }
 244
 245 /* For showing SB profiles, if the user asks to see them. */
 246 static
 247 void maybe_show_sb_profile ( void )
 248 {
 249    /* DO NOT MAKE NON-STATIC */
 250    static ULong bbs_done_lastcheck = 0;
 251    /* */
 252    vg_assert(VG_(clo_profyle_interval) > 0);
 253    Long delta = (Long)(bbs_done - bbs_done_lastcheck);
 254    vg_assert(delta >= 0);
 255    if ((ULong)delta >= VG_(clo_profyle_interval)) {
 256       bbs_done_lastcheck = bbs_done;
 257       VG_(get_and_show_SB_profile)(bbs_done);
 258    }
 259 }
 260
 261 static
 262 const HChar* name_of_sched_event ( UInt event )
 263 {
 264    switch (event) {
 265       case VEX_TRC_JMP_INVALICACHE:    return "INVALICACHE";
 266       case VEX_TRC_JMP_FLUSHDCACHE:    return "FLUSHDCACHE";
 267       case VEX_TRC_JMP_NOREDIR:        return "NOREDIR";
 268       case VEX_TRC_JMP_SIGILL:         return "SIGILL";
 269       case VEX_TRC_JMP_SIGTRAP:        return "SIGTRAP";
 270       case VEX_TRC_JMP_SIGSEGV:        return "SIGSEGV";
 271       case VEX_TRC_JMP_SIGBUS:         return "SIGBUS";
 272       case VEX_TRC_JMP_SIGFPE_INTOVF:
 273       case VEX_TRC_JMP_SIGFPE_INTDIV:  return "SIGFPE";
 274       case VEX_TRC_JMP_EMWARN:         return "EMWARN";
 275       case VEX_TRC_JMP_EMFAIL:         return "EMFAIL";
 276       case VEX_TRC_JMP_CLIENTREQ:      return "CLIENTREQ";
 277       case VEX_TRC_JMP_YIELD:          return "YIELD";
 278       case VEX_TRC_JMP_NODECODE:       return "NODECODE";
 279       case VEX_TRC_JMP_MAPFAIL:        return "MAPFAIL";
 280       case VEX_TRC_JMP_SYS_SYSCALL:    return "SYSCALL";
 281       case VEX_TRC_JMP_SYS_INT32:      return "INT32";
 282       case VEX_TRC_JMP_SYS_INT128:     return "INT128";
 283       case VEX_TRC_JMP_SYS_INT129:     return "INT129";
 284       case VEX_TRC_JMP_SYS_INT130:     return "INT130";
 285       case VEX_TRC_JMP_SYS_INT145:     return "INT145";
 286       case VEX_TRC_JMP_SYS_INT210:     return "INT210";
 287       case VEX_TRC_JMP_SYS_SYSENTER:   return "SYSENTER";
 288       case VEX_TRC_JMP_BORING:         return "VEX_BORING";
 289
 290       case VG_TRC_BORING:              return "VG_BORING";
 291       case VG_TRC_INNER_FASTMISS:      return "FASTMISS";
 292       case VG_TRC_INNER_COUNTERZERO:   return "COUNTERZERO";
 293       case VG_TRC_FAULT_SIGNAL:        return "FAULTSIGNAL";
 294       case VG_TRC_INVARIANT_FAILED:    return "INVFAILED";
 295       case VG_TRC_CHAIN_ME_TO_SLOW_EP: return "CHAIN_ME_SLOW";
 296       case VG_TRC_CHAIN_ME_TO_FAST_EP: return "CHAIN_ME_FAST";
 297       default:                         return "??UNKNOWN??";
 298   }
 299 }
 300
 301 /* Allocate a completely empty ThreadState record. */
 302 ThreadId VG_(alloc_ThreadState) ( void )
 303 {
 304    Int i;
 305    for (i = 1; i < VG_N_THREADS; i++) {
 306       if (VG_(threads)[i].status == VgTs_Empty) {
 307          VG_(threads)[i].status = VgTs_Init;
 308          VG_(threads)[i].exitreason = VgSrc_None;
 309          if (VG_(threads)[i].thread_name)
 310             VG_(free)(VG_(threads)[i].thread_name);
 311          VG_(threads)[i].thread_name = NULL;
 312          return i;
 313       }
 314    }
 315    VG_(printf)("Use --max-threads=INT to specify a larger number of threads\n"
 316                "and rerun valgrind\n");
 317    VG_(core_panic)("Max number of threads is too low");
 318    /*NOTREACHED*/
 319 }
 320
 321 /*
 322    Mark a thread as Runnable.  This will block until the_BigLock is
 323    available, so that we get exclusive access to all the shared
 324    structures and the CPU.  Up until we get the_BigLock, we must not
 325    touch any shared state.
 326
 327    When this returns, we'll actually be running.
 328  */
 329 void VG_(acquire_BigLock)(ThreadId tid, const HChar* who)
 330 {
 331    ThreadState *tst;
 332
 333 #if 0
 334    if (VG_(clo_trace_sched)) {
 335       HChar buf[VG_(strlen)(who) + 30];
 336       VG_(sprintf)(buf, "waiting for lock (%s)", who);
 337       print_sched_event(tid, buf);
 338    }
 339 #endif
 340
 341    /* First, acquire the_BigLock.  We can't do anything else safely
 342       prior to this point.  Even doing debug printing prior to this
 343       point is, technically, wrong. */
 344    VG_(acquire_BigLock_LL)(NULL);
 345
 346    tst = VG_(get_ThreadState)(tid);
 347
 348    vg_assert(tst->status != VgTs_Runnable);
 349
 350    tst->status = VgTs_Runnable;
 351
 352    if (VG_(running_tid) != VG_INVALID_THREADID)
 353       VG_(printf)("tid %u found %u running\n", tid, VG_(running_tid));
 354    vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
 355    VG_(running_tid) = tid;
 356
 357    { Addr gsp = VG_(get_SP)(tid);
 358       if (NULL != VG_(tdict).track_new_mem_stack_w_ECU)
 359          VG_(unknown_SP_update_w_ECU)(gsp, gsp, 0/*unknown origin*/);
 360       else
 361          VG_(unknown_SP_update)(gsp, gsp);
 362    }
 363
 364    if (VG_(clo_trace_sched)) {
 365       HChar buf[VG_(strlen)(who) + 30];
 366       VG_(sprintf)(buf, " acquired lock (%s)", who);
 367       print_sched_event(tid, buf);
 368    }
 369 }
 370
 371 /*
 372    Set a thread into a sleeping state, and give up exclusive access to
 373    the CPU.  On return, the thread must be prepared to block until it
 374    is ready to run again (generally this means blocking in a syscall,
 375    but it may mean that we remain in a Runnable state and we're just
 376    yielding the CPU to another thread).
 377  */
 378 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate,
 379                           const HChar* who)
 380 {
 381    ThreadState *tst = VG_(get_ThreadState)(tid);
 382
 383    vg_assert(tst->status == VgTs_Runnable);
 384
 385    vg_assert(sleepstate == VgTs_WaitSys ||
 386              sleepstate == VgTs_Yielding);
 387
 388    tst->status = sleepstate;
 389
 390    vg_assert(VG_(running_tid) == tid);
 391    VG_(running_tid) = VG_INVALID_THREADID;
 392
 393    if (VG_(clo_trace_sched)) {
 394       const HChar *status = VG_(name_of_ThreadStatus)(sleepstate);
 395       HChar buf[VG_(strlen)(who) + VG_(strlen)(status) + 30];
 396       VG_(sprintf)(buf, "releasing lock (%s) -> %s", who, status);
 397       print_sched_event(tid, buf);
 398    }
 399
 400    /* Release the_BigLock; this will reschedule any runnable
 401       thread. */
 402    VG_(release_BigLock_LL)(NULL);
 403 }
 404
 405 static void init_BigLock(void)
 406 {
 407    vg_assert(!the_BigLock);
 408    the_BigLock = ML_(create_sched_lock)();
 409 }
 410
 411 static void deinit_BigLock(void)
 412 {
 413    ML_(destroy_sched_lock)(the_BigLock);
 414    the_BigLock = NULL;
 415 }
 416
 417 /* See pub_core_scheduler.h for description */
 418 void VG_(acquire_BigLock_LL) ( const HChar* who )
 419 {
 420    ML_(acquire_sched_lock)(the_BigLock);
 421 }
 422
 423 /* See pub_core_scheduler.h for description */
 424 void VG_(release_BigLock_LL) ( const HChar* who )
 425 {
 426    ML_(release_sched_lock)(the_BigLock);
 427 }
 428
 429 Bool VG_(owns_BigLock_LL) ( ThreadId tid )
 430 {
 431    return (ML_(get_sched_lock_owner)(the_BigLock)
 432            == VG_(threads)[tid].os_state.lwpid);
 433 }
 434
 435
 436 /* Clear out the ThreadState and release the semaphore. Leaves the
 437    ThreadState in VgTs_Zombie state, so that it doesn't get
 438    reallocated until the caller is really ready. */
 439 void VG_(exit_thread)(ThreadId tid)
 440 {
 441    vg_assert(VG_(is_valid_tid)(tid));
 442    vg_assert(VG_(is_running_thread)(tid));
 443    vg_assert(VG_(is_exiting)(tid));
 444
 445    mostly_clear_thread_record(tid);
 446    VG_(running_tid) = VG_INVALID_THREADID;
 447
 448    /* There should still be a valid exitreason for this thread */
 449    vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
 450
 451    if (VG_(clo_trace_sched))
 452       print_sched_event(tid, "release lock in VG_(exit_thread)");
 453
 454    VG_(release_BigLock_LL)(NULL);
 455 }
 456
 457 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
 458    out of the syscall and onto doing the next thing, whatever that is.
 459    If it isn't blocked in a syscall, has no effect on the thread. */
 460 void VG_(get_thread_out_of_syscall)(ThreadId tid)
 461 {
 462    vg_assert(VG_(is_valid_tid)(tid));
 463    vg_assert(!VG_(is_running_thread)(tid));
 464
 465    if (VG_(threads)[tid].status == VgTs_WaitSys) {
 466       if (VG_(clo_trace_signals)) {
 467          VG_(message)(Vg_DebugMsg,
 468                       "get_thread_out_of_syscall zaps tid %u lwp %d\n",
 469                       tid, VG_(threads)[tid].os_state.lwpid);
 470       }
 471 #     if defined(VGO_darwin)
 472       {
 473          // GrP fixme use mach primitives on darwin?
 474          // GrP fixme thread_abort_safely?
 475          // GrP fixme race for thread with WaitSys set but not in syscall yet?
 476          extern kern_return_t thread_abort(mach_port_t);
 477          thread_abort(VG_(threads)[tid].os_state.lwpid);
 478       }
 479 #     else
 480       {
 481          __attribute__((unused))
 482          Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
 483          /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
 484             I'm really not sure.  Here's a race scenario which argues
 485             that we shoudn't; but equally I'm not sure the scenario is
 486             even possible, because of constraints caused by the question
 487             of who holds the BigLock when.
 488
 489             Target thread tid does sys_read on a socket and blocks.  This
 490             function gets called, and we observe correctly that tid's
 491             status is WaitSys but then for whatever reason this function
 492             goes very slowly for a while.  Then data arrives from
 493             wherever, tid's sys_read returns, tid exits.  Then we do
 494             tkill on tid, but tid no longer exists; tkill returns an
 495             error code and the assert fails. */
 496          /* vg_assert(r == 0); */
 497       }
 498 #     endif
 499    }
 500 }
 501
 502 /*
 503    Yield the CPU for a short time to let some other thread run.
 504  */
 505 void VG_(vg_yield)(void)
 506 {
 507    ThreadId tid = VG_(running_tid);
 508
 509    vg_assert(tid != VG_INVALID_THREADID);
 510    vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
 511
 512    VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
 513
 514    /*
 515       Tell the kernel we're yielding.
 516     */
 517 #  if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd)
 518    VG_(do_syscall0)(__NR_sched_yield);
 519 #  elif defined(VGO_solaris)
 520    VG_(do_syscall0)(__NR_yield);
 521 #  else
 522 #    error Unknown OS
 523 #  endif
 524
 525    VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
 526 }
 527
 528
 529 /* Set the standard set of blocked signals, used whenever we're not
 530    running a client syscall. */
 531 static void block_signals(void)
 532 {
 533    vki_sigset_t mask;
 534
 535    VG_(sigfillset)(&mask);
 536
 537    /* Don't block these because they're synchronous */
 538    VG_(sigdelset)(&mask, VKI_SIGSEGV);
 539    VG_(sigdelset)(&mask, VKI_SIGBUS);
 540    VG_(sigdelset)(&mask, VKI_SIGFPE);
 541    VG_(sigdelset)(&mask, VKI_SIGILL);
 542    VG_(sigdelset)(&mask, VKI_SIGTRAP);
 543    VG_(sigdelset)(&mask, VKI_SIGSYS);
 544
 545    /* Can't block these anyway */
 546    VG_(sigdelset)(&mask, VKI_SIGSTOP);
 547    VG_(sigdelset)(&mask, VKI_SIGKILL);
 548
 549    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
 550 }
 551
 552 static void os_state_clear(ThreadState *tst)
 553 {
 554    tst->os_state.lwpid       = 0;
 555    tst->os_state.threadgroup = 0;
 556    tst->os_state.stk_id = NULL_STK_ID;
 557 #  if defined(VGO_linux)
 558    /* no other fields to clear */
 559 #  elif defined(VGO_freebsd)
 560    /* no other fields to clear */
 561 #  elif defined(VGO_darwin)
 562    tst->os_state.post_mach_trap_fn = NULL;
 563    tst->os_state.pthread           = 0;
 564    tst->os_state.func_arg          = 0;
 565    VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
 566    VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
 567    tst->os_state.wq_jmpbuf_valid   = False;
 568    tst->os_state.remote_port       = 0;
 569    tst->os_state.msgh_id           = 0;
 570    VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
 571 #  elif defined(VGO_solaris)
 572 #  if defined(VGP_x86_solaris)
 573    tst->os_state.thrptr = 0;
 574 #  endif
 575    tst->os_state.ustack = NULL;
 576    tst->os_state.in_door_return = False;
 577    tst->os_state.door_return_procedure = 0;
 578    tst->os_state.oldcontext = NULL;
 579    tst->os_state.schedctl_data = 0;
 580    tst->os_state.daemon_thread = False;
 581 #  else
 582 #    error "Unknown OS"
 583 #  endif
 584 }
 585
 586 static void os_state_init(ThreadState *tst)
 587 {
 588    tst->os_state.valgrind_stack_base    = 0;
 589    tst->os_state.valgrind_stack_init_SP = 0;
 590    os_state_clear(tst);
 591 }
 592
 593 static
 594 void mostly_clear_thread_record ( ThreadId tid )
 595 {
 596    vki_sigset_t savedmask;
 597
 598    vg_assert(tid >= 0 && tid < VG_N_THREADS);
 599    VG_(cleanup_thread)(&VG_(threads)[tid].arch);
 600    VG_(threads)[tid].tid = tid;
 601
 602    /* Leave the thread in Zombie, so that it doesn't get reallocated
 603       until the caller is finally done with the thread stack. */
 604    VG_(threads)[tid].status               = VgTs_Zombie;
 605
 606    VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
 607    VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
 608
 609    os_state_clear(&VG_(threads)[tid]);
 610
 611    /* start with no altstack */
 612    VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
 613    VG_(threads)[tid].altstack.ss_size = 0;
 614    VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
 615
 616    VG_(clear_out_queued_signals)(tid, &savedmask);
 617
 618    VG_(threads)[tid].sched_jmpbuf_valid = False;
 619 }
 620
 621 /*
 622    Called in the child after fork.  If the parent has multiple
 623    threads, then we've inherited a VG_(threads) array describing them,
 624    but only the thread which called fork() is actually alive in the
 625    child.  This functions needs to clean up all those other thread
 626    structures.
 627
 628    Whichever tid in the parent which called fork() becomes the
 629    master_tid in the child.  That's because the only living slot in
 630    VG_(threads) in the child after fork is VG_(threads)[tid], and it
 631    would be too hard to try to re-number the thread and relocate the
 632    thread state down to VG_(threads)[1].
 633
 634    This function also needs to reinitialize the_BigLock, since
 635    otherwise we may end up sharing its state with the parent, which
 636    would be deeply confusing.
 637 */
 638 static void sched_fork_cleanup(ThreadId me)
 639 {
 640    ThreadId tid;
 641    vg_assert(VG_(running_tid) == me);
 642
 643 #  if defined(VGO_darwin)
 644    // GrP fixme hack reset Mach ports
 645    VG_(mach_init)();
 646 #  endif
 647
 648    VG_(threads)[me].os_state.lwpid = VG_(gettid)();
 649    VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
 650
 651    /* clear out all the unused thread slots */
 652    for (tid = 1; tid < VG_N_THREADS; tid++) {
 653       if (tid != me) {
 654          mostly_clear_thread_record(tid);
 655          VG_(threads)[tid].status = VgTs_Empty;
 656          VG_(clear_syscallInfo)(tid);
 657       }
 658    }
 659
 660    /* re-init and take the sema */
 661    deinit_BigLock();
 662    init_BigLock();
 663    VG_(acquire_BigLock_LL)(NULL);
 664 }
 665
 666
 667 /* First phase of initialisation of the scheduler.  Initialise the
 668    bigLock, zeroise the VG_(threads) structure and decide on the
 669    ThreadId of the root thread.
 670 */
 671 ThreadId VG_(scheduler_init_phase1) ( void )
 672 {
 673    Int i;
 674    ThreadId tid_main;
 675
 676    VG_(debugLog)(1,"sched","sched_init_phase1\n");
 677
 678    if (VG_(clo_fair_sched) != disable_fair_sched
 679        && !ML_(set_sched_lock_impl)(sched_lock_ticket)
 680        && VG_(clo_fair_sched) == enable_fair_sched)
 681    {
 682       VG_(printf)("Error: fair scheduling is not supported on this system.\n");
 683       VG_(exit)(1);
 684    }
 685
 686    if (VG_(clo_verbosity) > 1) {
 687       VG_(message)(Vg_DebugMsg,
 688                    "Scheduler: using %s scheduler lock implementation.\n",
 689                    ML_(get_sched_lock_name)());
 690    }
 691
 692    init_BigLock();
 693
 694    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
 695       /* Paranoia .. completely zero it out. */
 696       VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
 697
 698       VG_(threads)[i].sig_queue = NULL;
 699
 700       os_state_init(&VG_(threads)[i]);
 701       mostly_clear_thread_record(i);
 702
 703       VG_(threads)[i].status                    = VgTs_Empty;
 704       VG_(threads)[i].client_stack_szB          = 0;
 705       VG_(threads)[i].client_stack_highest_byte = (Addr)NULL;
 706       VG_(threads)[i].err_disablement_level     = 0;
 707       VG_(threads)[i].thread_name               = NULL;
 708    }
 709
 710    tid_main = VG_(alloc_ThreadState)();
 711
 712    /* Bleh.  Unfortunately there are various places in the system that
 713       assume that the main thread has a ThreadId of 1.
 714       - Helgrind (possibly)
 715       - stack overflow message in default_action() in m_signals.c
 716       - definitely a lot more places
 717    */
 718    vg_assert(tid_main == 1);
 719
 720    return tid_main;
 721 }
 722
 723
 724 /* Second phase of initialisation of the scheduler.  Given the root
 725    ThreadId computed by first phase of initialisation, fill in stack
 726    details and acquire bigLock.  Initialise the scheduler.  This is
 727    called at startup.  The caller subsequently initialises the guest
 728    state components of this main thread.
 729 */
 730 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
 731                                   Addr     clstack_end,
 732                                   SizeT    clstack_size )
 733 {
 734    VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%u, "
 735                    "cls_end=0x%lx, cls_sz=%lu\n",
 736                    tid_main, clstack_end, clstack_size);
 737
 738    vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
 739    vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
 740
 741    VG_(threads)[tid_main].client_stack_highest_byte
 742       = clstack_end;
 743    VG_(threads)[tid_main].client_stack_szB
 744       = clstack_size;
 745
 746    VG_(atfork)(NULL, NULL, sched_fork_cleanup);
 747 }
 748
 749
 750 /* ---------------------------------------------------------------------
 751    Helpers for running translations.
 752    ------------------------------------------------------------------ */
 753
 754 /* Use gcc's built-in setjmp/longjmp.  longjmp must not restore signal
 755    mask state, but does need to pass "val" through.  jumped must be a
 756    volatile UWord. */
 757 #define SCHEDSETJMP(tid, jumped, stmt)                            \
 758    do {                                                           \
 759       ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid); \
 760                                                                   \
 761       (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf);        \
 762       if ((jumped) == ((UWord)0)) {                               \
 763          vg_assert(!_qq_tst->sched_jmpbuf_valid);                 \
 764          _qq_tst->sched_jmpbuf_valid = True;                      \
 765          stmt;                                                    \
 766       } else if (VG_(clo_trace_sched))                           \
 767          VG_(printf)("SCHEDSETJMP(line %d) tid %u, jumped=%lu\n", \
 768                      __LINE__, tid, jumped);                      \
 769       vg_assert(_qq_tst->sched_jmpbuf_valid);                     \
 770       _qq_tst->sched_jmpbuf_valid = False;                        \
 771    } while(0)
 772
 773
 774 /* Do various guest state alignment checks prior to running a thread.
 775    Specifically, check that what we have matches Vex's guest state
 776    layout requirements.  See libvex.h for details, but in short the
 777    requirements are: There must be no holes in between the primary
 778    guest state, its two copies, and the spill area.  In short, all 4
 779    areas must be aligned on the LibVEX_GUEST_STATE_ALIGN boundary and
 780    be placed back-to-back without holes in between. */
 781 static void do_pre_run_checks ( volatile ThreadState* tst )
 782 {
 783    Addr a_vex     = (Addr) & tst->arch.vex;
 784    Addr a_vexsh1  = (Addr) & tst->arch.vex_shadow1;
 785    Addr a_vexsh2  = (Addr) & tst->arch.vex_shadow2;
 786    Addr a_spill   = (Addr) & tst->arch.vex_spill;
 787    UInt sz_vex    = (UInt) sizeof tst->arch.vex;
 788    UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
 789    UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
 790    UInt sz_spill  = (UInt) sizeof tst->arch.vex_spill;
 791
 792    if (0)
 793    VG_(printf)("gst %p %u, sh1 %p %u, "
 794                "sh2 %p %u, spill %p %u\n",
 795                (void*)a_vex, sz_vex,
 796                (void*)a_vexsh1, sz_vexsh1,
 797                (void*)a_vexsh2, sz_vexsh2,
 798                (void*)a_spill, sz_spill );
 799
 800    vg_assert(sz_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
 801    vg_assert(sz_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
 802    vg_assert(sz_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
 803    vg_assert(sz_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
 804
 805    vg_assert(a_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
 806    vg_assert(a_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
 807    vg_assert(a_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
 808    vg_assert(a_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
 809
 810    /* Check that the guest state and its two shadows have the same
 811       size, and that there are no holes in between.  The latter is
 812       important because Memcheck assumes that it can reliably access
 813       the shadows by indexing off a pointer to the start of the
 814       primary guest state area. */
 815    vg_assert(sz_vex == sz_vexsh1);
 816    vg_assert(sz_vex == sz_vexsh2);
 817    vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
 818    vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
 819    /* Also check there's no hole between the second shadow area and
 820       the spill area. */
 821    vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
 822    vg_assert(a_vex + 3 * sz_vex == a_spill);
 823
 824 #  if defined(VGA_x86)
 825    /* x86 XMM regs must form an array, ie, have no holes in
 826       between. */
 827    vg_assert(
 828       (offsetof(VexGuestX86State,guest_XMM7)
 829        - offsetof(VexGuestX86State,guest_XMM0))
 830       == (8/*#regs*/-1) * 16/*bytes per reg*/
 831    );
 832    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
 833    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
 834    vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
 835    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
 836    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
 837 #  endif
 838
 839 #  if defined(VGA_amd64)
 840    /* amd64 YMM regs must form an array, ie, have no holes in
 841       between. */
 842    vg_assert(
 843       (offsetof(VexGuestAMD64State,guest_YMM16)
 844        - offsetof(VexGuestAMD64State,guest_YMM0))
 845       == (17/*#regs*/-1) * 32/*bytes per reg*/
 846    );
 847    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_YMM0)));
 848    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
 849    vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
 850    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
 851    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
 852 #  endif
 853
 854 #  if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
 855    /* ppc guest_state vector regs must be 16 byte aligned for
 856       loads/stores.  This is important! */
 857    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
 858    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
 859    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
 860    /* be extra paranoid .. */
 861    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
 862    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
 863    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
 864 #  endif
 865
 866 #  if defined(VGA_arm)
 867    /* arm guest_state VFP regs must be 8 byte aligned for
 868       loads/stores.  Let's use 16 just to be on the safe side. */
 869    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
 870    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
 871    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
 872    /* be extra paranoid .. */
 873    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
 874    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
 875    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
 876 #  endif
 877
 878 #  if defined(VGA_arm64)
 879    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_X0));
 880    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_X0));
 881    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_X0));
 882    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_Q0));
 883    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_Q0));
 884    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_Q0));
 885 #  endif
 886
 887 #  if defined(VGA_s390x)
 888    /* no special requirements */
 889 #  endif
 890
 891 #  if defined(VGA_mips32) || defined(VGA_mips64)
 892    /* no special requirements */
 893 #  endif
 894 }
 895
 896 // NO_VGDB_POLL value ensures vgdb is not polled, while
 897 // VGDB_POLL_ASAP ensures that the next scheduler call
 898 // will cause a poll.
 899 #define NO_VGDB_POLL    0xffffffffffffffffULL
 900 #define VGDB_POLL_ASAP  0x0ULL
 901
 902 void VG_(disable_vgdb_poll) (void )
 903 {
 904    vgdb_next_poll = NO_VGDB_POLL;
 905 }
 906 void VG_(force_vgdb_poll) ( void )
 907 {
 908    vgdb_next_poll = VGDB_POLL_ASAP;
 909 }
 910
 911 /* Run the thread tid for a while, and return a VG_TRC_* value
 912    indicating why VG_(disp_run_translations) stopped, and possibly an
 913    auxiliary word.  Also, only allow the thread to run for at most
 914    *dispatchCtrP events.  If (as is the normal case) use_alt_host_addr
 915    is False, we are running ordinary redir'd translations, and we
 916    should therefore start by looking up the guest next IP in TT.  If
 917    it is True then we ignore the guest next IP and just run from
 918    alt_host_addr, which presumably points at host code for a no-redir
 919    translation.
 920
 921    Return results are placed in two_words.  two_words[0] is set to the
 922    TRC.  In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
 923    the address to patch is placed in two_words[1].
 924 */
 925 static
 926 void run_thread_for_a_while ( /*OUT*/HWord* two_words,
 927                               /*MOD*/Int*   dispatchCtrP,
 928                               ThreadId      tid,
 929                               HWord         alt_host_addr,
 930                               Bool          use_alt_host_addr )
 931 {
 932    volatile HWord        jumped         = 0;
 933    volatile ThreadState* tst            = NULL; /* stop gcc complaining */
 934    volatile Int          done_this_time = 0;
 935    volatile HWord        host_code_addr = 0;
 936
 937    /* Paranoia */
 938    vg_assert(VG_(is_valid_tid)(tid));
 939    vg_assert(VG_(is_running_thread)(tid));
 940    vg_assert(!VG_(is_exiting)(tid));
 941    vg_assert(*dispatchCtrP > 0);
 942
 943    tst = VG_(get_ThreadState)(tid);
 944    do_pre_run_checks( tst );
 945    /* end Paranoia */
 946
 947    /* Futz with the XIndir stats counters. */
 948    vg_assert(VG_(stats__n_xIndirs_32) == 0);
 949    vg_assert(VG_(stats__n_xIndir_hits1_32) == 0);
 950    vg_assert(VG_(stats__n_xIndir_hits2_32) == 0);
 951    vg_assert(VG_(stats__n_xIndir_hits3_32) == 0);
 952    vg_assert(VG_(stats__n_xIndir_misses_32) == 0);
 953
 954    /* Clear return area. */
 955    two_words[0] = two_words[1] = 0;
 956
 957    /* Figure out where we're starting from. */
 958    if (use_alt_host_addr) {
 959       /* unusual case -- no-redir translation */
 960       host_code_addr = alt_host_addr;
 961    } else {
 962       /* normal case -- redir translation */
 963       Addr host_from_fast_cache = 0;
 964       Bool found_in_fast_cache
 965          = VG_(lookupInFastCache)( &host_from_fast_cache,
 966                                    (Addr)tst->arch.vex.VG_INSTR_PTR );
 967       if (found_in_fast_cache) {
 968          host_code_addr = host_from_fast_cache;
 969       } else {
 970          Addr res = 0;
 971          /* not found in VG_(tt_fast). Searching here the transtab
 972             improves the performance compared to returning directly
 973             to the scheduler. */
 974          Bool  found = VG_(search_transtab)(&res, NULL, NULL,
 975                                             (Addr)tst->arch.vex.VG_INSTR_PTR,
 976                                             True/*upd cache*/
 977                                             );
 978          if (LIKELY(found)) {
 979             host_code_addr = res;
 980          } else {
 981             /* At this point, we know that we intended to start at a
 982                normal redir translation, but it was not found.  In
 983                which case we can return now claiming it's not
 984                findable. */
 985             two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
 986             return;
 987          }
 988       }
 989    }
 990    /* We have either a no-redir or a redir translation. */
 991    vg_assert(host_code_addr != 0); /* implausible */
 992
 993    /* there should be no undealt-with signals */
 994    //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
 995
 996    /* Set up event counter stuff for the run. */
 997    tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
 998    tst->arch.vex.host_EvC_FAILADDR
 999       = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
1000
1001    /* Invalidate any in-flight LL/SC transactions, in the case that we're
1002       using the fallback LL/SC implementation.  See bugs 344524 and 369459. */
1003 #  if defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
1004       || defined(VGP_nanomips_linux)
1005    tst->arch.vex.guest_LLaddr = (RegWord)(-1);
1006 #  elif defined(VGP_arm64_linux)
1007    tst->arch.vex.guest_LLSC_SIZE = 0;
1008 #  endif
1009
1010    if (0) {
1011       vki_sigset_t m;
1012       Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
1013       vg_assert(err == 0);
1014       VG_(printf)("tid %u: entering code with unblocked signals: ", tid);
1015       for (i = 1; i <= _VKI_NSIG; i++)
1016          if (!VG_(sigismember)(&m, i))
1017             VG_(printf)("%d ", i);
1018       VG_(printf)("\n");
1019    }
1020
1021    /* Set up return-value area. */
1022
1023    // Tell the tool this thread is about to run client code
1024    VG_TRACK( start_client_code, tid, bbs_done );
1025
1026    vg_assert(VG_(in_generated_code) == False);
1027    VG_(in_generated_code) = True;
1028
1029    SCHEDSETJMP(
1030       tid,
1031       jumped,
1032       VG_(disp_run_translations)(
1033          two_words,
1034          (volatile void*)&tst->arch.vex,
1035          host_code_addr
1036       )
1037    );
1038
1039    vg_assert(VG_(in_generated_code) == True);
1040    VG_(in_generated_code) = False;
1041
1042    if (jumped != (HWord)0) {
1043       /* We get here if the client took a fault that caused our signal
1044          handler to longjmp. */
1045       vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
1046       two_words[0] = VG_TRC_FAULT_SIGNAL;
1047       two_words[1] = 0;
1048       block_signals();
1049    }
1050
1051    /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
1052       and zero out the 32-bit ones in preparation for the next run of
1053       generated code. */
1054    stats__n_xIndirs += (ULong)VG_(stats__n_xIndirs_32);
1055    VG_(stats__n_xIndirs_32) = 0;
1056    stats__n_xIndir_hits1 += (ULong)VG_(stats__n_xIndir_hits1_32);
1057    VG_(stats__n_xIndir_hits1_32) = 0;
1058    stats__n_xIndir_hits2 += (ULong)VG_(stats__n_xIndir_hits2_32);
1059    VG_(stats__n_xIndir_hits2_32) = 0;
1060    stats__n_xIndir_hits3 += (ULong)VG_(stats__n_xIndir_hits3_32);
1061    VG_(stats__n_xIndir_hits3_32) = 0;
1062    stats__n_xIndir_misses += (ULong)VG_(stats__n_xIndir_misses_32);
1063    VG_(stats__n_xIndir_misses_32) = 0;
1064
1065    /* Inspect the event counter. */
1066    vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
1067    vg_assert(tst->arch.vex.host_EvC_FAILADDR
1068              == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
1069
1070    /* The number of events done this time is the difference between
1071       the event counter originally and what it is now.  Except -- if
1072       it has gone negative (to -1) then the transition 0 to -1 doesn't
1073       correspond to a real executed block, so back it out.  It's like
1074       this because the event checks decrement the counter first and
1075       check it for negativeness second, hence the 0 to -1 transition
1076       causes a bailout and the block it happens in isn't executed. */
1077    {
1078      Int dispatchCtrAfterwards = (Int)tst->arch.vex.host_EvC_COUNTER;
1079      done_this_time = *dispatchCtrP - dispatchCtrAfterwards;
1080      if (dispatchCtrAfterwards == -1) {
1081         done_this_time--;
1082      } else {
1083         /* If the generated code drives the counter below -1, something
1084            is seriously wrong. */
1085         vg_assert(dispatchCtrAfterwards >= 0);
1086      }
1087    }
1088
1089    vg_assert(done_this_time >= 0);
1090    bbs_done += (ULong)done_this_time;
1091
1092    *dispatchCtrP -= done_this_time;
1093    vg_assert(*dispatchCtrP >= 0);
1094
1095    // Tell the tool this thread has stopped running client code
1096    VG_TRACK( stop_client_code, tid, bbs_done );
1097
1098    if (bbs_done >= vgdb_next_poll) {
1099       if (VG_(clo_vgdb_poll))
1100          vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
1101       else
1102          /* value was changed due to gdbserver invocation via ptrace */
1103          vgdb_next_poll = NO_VGDB_POLL;
1104       if (VG_(gdbserver_activity) (tid))
1105          VG_(gdbserver) (tid);
1106    }
1107
1108    /* TRC value and possible auxiliary patch-address word are already
1109       in two_words[0] and [1] respectively, as a result of the call to
1110       VG_(run_innerloop). */
1111    /* Stay sane .. */
1112    if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
1113        || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
1114       vg_assert(two_words[1] != 0); /* we have a legit patch addr */
1115    } else {
1116       vg_assert(two_words[1] == 0); /* nobody messed with it */
1117    }
1118 }
1119
1120
1121 /* ---------------------------------------------------------------------
1122    The scheduler proper.
1123    ------------------------------------------------------------------ */
1124
1125 static void handle_tt_miss ( ThreadId tid )
1126 {
1127    Bool found;
1128    Addr ip = VG_(get_IP)(tid);
1129
1130    /* Trivial event.  Miss in the fast-cache.  Do a full
1131       lookup for it. */
1132    found = VG_(search_transtab)( NULL, NULL, NULL,
1133                                  ip, True/*upd_fast_cache*/ );
1134    if (UNLIKELY(!found)) {
1135       /* Not found; we need to request a translation. */
1136       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1137                           bbs_done, True/*allow redirection*/ )) {
1138          found = VG_(search_transtab)( NULL, NULL, NULL,
1139                                        ip, True );
1140          vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
1141
1142       } else {
1143          // If VG_(translate)() fails, it's because it had to throw a
1144          // signal because the client jumped to a bad address.  That
1145          // means that either a signal has been set up for delivery,
1146          // or the thread has been marked for termination.  Either
1147          // way, we just need to go back into the scheduler loop.
1148       }
1149    }
1150 }
1151
1152 static
1153 void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
1154 {
1155    Bool found          = False;
1156    Addr ip             = VG_(get_IP)(tid);
1157    SECno to_sNo         = INV_SNO;
1158    TTEno to_tteNo       = INV_TTE;
1159
1160    found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1161                                  ip, False/*dont_upd_fast_cache*/ );
1162    if (!found) {
1163       /* Not found; we need to request a translation. */
1164       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1165                           bbs_done, True/*allow redirection*/ )) {
1166          found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1167                                        ip, False );
1168          vg_assert2(found, "handle_chain_me: missing tt_fast entry");
1169       } else {
1170          // If VG_(translate)() fails, it's because it had to throw a
1171          // signal because the client jumped to a bad address.  That
1172          // means that either a signal has been set up for delivery,
1173          // or the thread has been marked for termination.  Either
1174          // way, we just need to go back into the scheduler loop.
1175         return;
1176       }
1177    }
1178    vg_assert(found);
1179    vg_assert(to_sNo != INV_SNO);
1180    vg_assert(to_tteNo != INV_TTE);
1181
1182    /* So, finally we know where to patch through to.  Do the patching
1183       and update the various admin tables that allow it to be undone
1184       in the case that the destination block gets deleted. */
1185    VG_(tt_tc_do_chaining)( place_to_chain,
1186                            to_sNo, to_tteNo, toFastEP );
1187 }
1188
1189 static void handle_syscall(ThreadId tid, UInt trc)
1190 {
1191    ThreadState * volatile tst = VG_(get_ThreadState)(tid);
1192    volatile UWord jumped;
1193
1194    /* Syscall may or may not block; either way, it will be
1195       complete by the time this call returns, and we'll be
1196       runnable again.  We could take a signal while the
1197       syscall runs. */
1198
1199    if (VG_(clo_sanity_level) >= 3) {
1200       HChar buf[50];    // large enough
1201       VG_(sprintf)(buf, "(BEFORE SYSCALL, tid %u)", tid);
1202       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
1203       vg_assert(ok);
1204    }
1205
1206    SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
1207
1208    if (VG_(clo_sanity_level) >= 3) {
1209       HChar buf[50];    // large enough
1210       VG_(sprintf)(buf, "(AFTER SYSCALL, tid %u)", tid);
1211       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
1212       vg_assert(ok);
1213    }
1214
1215    if (!VG_(is_running_thread)(tid))
1216       VG_(printf)("tid %u not running; VG_(running_tid)=%u, tid %u status %u\n",
1217                   tid, VG_(running_tid), tid, tst->status);
1218    vg_assert(VG_(is_running_thread)(tid));
1219
1220    if (jumped != (UWord)0) {
1221       block_signals();
1222       VG_(poll_signals)(tid);
1223    }
1224 }
1225
1226 /* tid just requested a jump to the noredir version of its current
1227    program counter.  So make up that translation if needed, run it,
1228    and return the resulting thread return code in two_words[]. */
1229 static
1230 void handle_noredir_jump ( /*OUT*/HWord* two_words,
1231                            /*MOD*/Int*   dispatchCtrP,
1232                            ThreadId tid )
1233 {
1234    /* Clear return area. */
1235    two_words[0] = two_words[1] = 0;
1236
1237    Addr  hcode = 0;
1238    Addr  ip    = VG_(get_IP)(tid);
1239
1240    Bool  found = VG_(search_unredir_transtab)( &hcode, ip );
1241    if (!found) {
1242       /* Not found; we need to request a translation. */
1243       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
1244                           False/*NO REDIRECTION*/ )) {
1245
1246          found = VG_(search_unredir_transtab)( &hcode, ip );
1247          vg_assert2(found, "unredir translation missing after creation?!");
1248       } else {
1249          // If VG_(translate)() fails, it's because it had to throw a
1250          // signal because the client jumped to a bad address.  That
1251          // means that either a signal has been set up for delivery,
1252          // or the thread has been marked for termination.  Either
1253          // way, we just need to go back into the scheduler loop.
1254          two_words[0] = VG_TRC_BORING;
1255          return;
1256       }
1257
1258    }
1259
1260    vg_assert(found);
1261    vg_assert(hcode != 0);
1262
1263    /* Otherwise run it and return the resulting VG_TRC_* value. */
1264    vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
1265    run_thread_for_a_while( two_words, dispatchCtrP, tid,
1266                            hcode, True/*use hcode*/ );
1267 }
1268
1269
1270 /*
1271    Run a thread until it wants to exit.
1272
1273    We assume that the caller has already called VG_(acquire_BigLock) for
1274    us, so we own the VCPU.  Also, all signals are blocked.
1275  */
1276 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
1277 {
1278    /* Holds the remaining size of this thread's "timeslice". */
1279    Int dispatch_ctr = 0;
1280
1281    ThreadState *tst = VG_(get_ThreadState)(tid);
1282    static Bool vgdb_startup_action_done = False;
1283
1284    if (VG_(clo_trace_sched))
1285       print_sched_event(tid, "entering VG_(scheduler)");
1286
1287    /* Do vgdb initialization (but once). Only the first (main) task
1288       starting up will do the below.
1289       Initialize gdbserver earlier than at the first
1290       thread VG_(scheduler) is causing problems:
1291       * at the end of VG_(scheduler_init_phase2) :
1292         The main thread is in VgTs_Init state, but in a not yet
1293         consistent state => the thread cannot be reported to gdb
1294         (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
1295         back the guest registers to gdb).
1296       * at end of valgrind_main, just
1297         before VG_(main_thread_wrapper_NORETURN)(1) :
1298         The main thread is still in VgTs_Init state but in a
1299         more advanced state. However, the thread state is not yet
1300         completely initialized : a.o., the os_state is not yet fully
1301         set => the thread is then not properly reported to gdb,
1302         which is then confused (causing e.g. a duplicate thread be
1303         shown, without thread id).
1304       * it would be possible to initialize gdbserver "lower" in the
1305         call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
1306         these are platform dependent and the place at which
1307         the thread state is completely initialized is not
1308         specific anymore to the main thread (so a similar "do it only
1309         once" would be needed).
1310
1311         => a "once only" initialization here is the best compromise. */
1312    if (!vgdb_startup_action_done) {
1313       vg_assert(tid == 1); // it must be the main thread.
1314       vgdb_startup_action_done = True;
1315       if (VG_(clo_vgdb) != Vg_VgdbNo) {
1316          /* If we have to poll, ensures we do an initial poll at first
1317             scheduler call. Otherwise, ensure no poll (unless interrupted
1318             by ptrace). */
1319          if (VG_(clo_vgdb_poll))
1320             VG_(force_vgdb_poll) ();
1321          else
1322             VG_(disable_vgdb_poll) ();
1323
1324          VG_(gdbserver_prerun_action) (1);
1325       } else {
1326          VG_(disable_vgdb_poll) ();
1327       }
1328    }
1329
1330    if (SimHintiS(SimHint_no_nptl_pthread_stackcache, VG_(clo_sim_hints))
1331        && tid != 1) {
1332       /* We disable the stack cache the first time we see a thread other
1333          than the main thread appearing. At this moment, we are sure the pthread
1334          lib loading is done/variable was initialised by pthread lib/... */
1335       if (VG_(client__stack_cache_actsize__addr)) {
1336          if (*VG_(client__stack_cache_actsize__addr) == 0) {
1337             VG_(debugLog)(1,"sched",
1338                           "pthread stack cache size disable done"
1339                           " via kludge\n");
1340             *VG_(client__stack_cache_actsize__addr) = 1000 * 1000 * 1000;
1341             /* Set a value big enough to be above the hardcoded maximum stack
1342                cache size in glibc, small enough to allow a pthread stack size
1343                to be added without risk of overflow. */
1344          }
1345       } else {
1346           /*
1347            * glibc 2.34 no longer has stack_cache_actsize as a visible variable
1348            * so we switch to using the GLIBC_TUNABLES env var. Processing for that
1349            * is done in initimg-linux.c / setup_client_env  for all glibc
1350            *
1351            * If we don't detect stack_cache_actsize we want to be able to tell
1352            * whether it is an unexpected error or if it is no longer there.
1353            * In the latter case we don't print a warning.
1354            */
1355           Bool print_warning = True;
1356           if (VG_(client__gnu_get_libc_version_addr) != NULL) {
1357               const HChar* gnu_libc_version = VG_(client__gnu_get_libc_version_addr)();
1358               if (gnu_libc_version != NULL) {
1359                   HChar* glibc_version_tok = VG_(strdup)("scheduler.1", gnu_libc_version);
1360                   const HChar* str_major = VG_(strtok)(glibc_version_tok, ".");
1361                   Long major = VG_(strtoll10)(str_major, NULL);
1362                   const HChar* str_minor = VG_(strtok)(NULL, ".");
1363                   Long minor = VG_(strtoll10)(str_minor, NULL);
1364                   if (major >= 2 && minor >= 34) {
1365                       print_warning = False;
1366                   }
1367                   VG_(free)(glibc_version_tok);
1368               }
1369           } else {
1370
1371           }
1372           if (print_warning) {
1373               VG_(debugLog)(0,"sched",
1374                             "WARNING: pthread stack cache cannot be disabled!\n");
1375           }
1376           VG_(clo_sim_hints) &= ~SimHint2S(SimHint_no_nptl_pthread_stackcache);
1377           /* Remove SimHint_no_nptl_pthread_stackcache from VG_(clo_sim_hints)
1378              to avoid having a msg for all following threads. */
1379       }
1380    }
1381
1382    /* set the proper running signal mask */
1383    block_signals();
1384
1385    vg_assert(VG_(is_running_thread)(tid));
1386
1387    dispatch_ctr = VG_(clo_scheduling_quantum);
1388
1389    while (!VG_(is_exiting)(tid)) {
1390
1391       vg_assert(dispatch_ctr >= 0);
1392       if (dispatch_ctr == 0) {
1393
1394          /* Our slice is done, so yield the CPU to another thread.  On
1395             Linux, this doesn't sleep between sleeping and running,
1396             since that would take too much time. */
1397
1398          /* 4 July 06: it seems that a zero-length nsleep is needed to
1399             cause async thread cancellation (canceller.c) to terminate
1400             in finite time; else it is in some kind of race/starvation
1401             situation and completion is arbitrarily delayed (although
1402             this is not a deadlock).
1403
1404             Unfortunately these sleeps cause MPI jobs not to terminate
1405             sometimes (some kind of livelock).  So sleeping once
1406             every N opportunities appears to work. */
1407
1408          /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
1409             sys_yield also helps the problem, whilst not crashing apps. */
1410
1411          VG_(release_BigLock)(tid, VgTs_Yielding,
1412                                    "VG_(scheduler):timeslice");
1413          /* ------------ now we don't have The Lock ------------ */
1414
1415          VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
1416          /* ------------ now we do have The Lock ------------ */
1417
1418          /* OK, do some relatively expensive housekeeping stuff */
1419          scheduler_sanity(tid);
1420          VG_(sanity_check_general)(False);
1421
1422          /* Possibly make a progress report */
1423          if (UNLIKELY(VG_(clo_progress_interval) > 0)) {
1424             maybe_progress_report( VG_(clo_progress_interval) );
1425          }
1426
1427          /* Look for any pending signals for this thread, and set them up
1428             for delivery */
1429          VG_(poll_signals)(tid);
1430
1431          if (VG_(is_exiting)(tid))
1432             break;              /* poll_signals picked up a fatal signal */
1433
1434          /* For stats purposes only. */
1435          n_scheduling_events_MAJOR++;
1436
1437          /* Figure out how many bbs to ask vg_run_innerloop to do. */
1438          dispatch_ctr = VG_(clo_scheduling_quantum);
1439
1440          /* paranoia ... */
1441          vg_assert(tst->tid == tid);
1442          vg_assert(tst->os_state.lwpid == VG_(gettid)());
1443       }
1444
1445       /* For stats purposes only. */
1446       n_scheduling_events_MINOR++;
1447
1448       if (0)
1449          VG_(message)(Vg_DebugMsg, "thread %u: running for %d bbs\n",
1450                                    tid, dispatch_ctr - 1 );
1451
1452       HWord trc[2]; /* "two_words" */
1453       run_thread_for_a_while( &trc[0],
1454                               &dispatch_ctr,
1455                               tid, 0/*ignored*/, False );
1456
1457       if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
1458          const HChar *name = name_of_sched_event(trc[0]);
1459          HChar buf[VG_(strlen)(name) + 10];    // large enough
1460          VG_(sprintf)(buf, "TRC: %s", name);
1461          print_sched_event(tid, buf);
1462       }
1463
1464       if (trc[0] == VEX_TRC_JMP_NOREDIR) {
1465          /* If we got a request to run a no-redir version of
1466             something, do so now -- handle_noredir_jump just (creates
1467             and) runs that one translation.  The flip side is that the
1468             noredir translation can't itself return another noredir
1469             request -- that would be nonsensical.  It can, however,
1470             return VG_TRC_BORING, which just means keep going as
1471             normal. */
1472          /* Note that the fact that we need to continue with a
1473             no-redir jump is not recorded anywhere else in this
1474             thread's state.  So we *must* execute the block right now
1475             -- we can't fail to execute it and later resume with it,
1476             because by then we'll have forgotten the fact that it
1477             should be run as no-redir, but will get run as a normal
1478             potentially-redir'd, hence screwing up.  This really ought
1479             to be cleaned up, by noting in the guest state that the
1480             next block to be executed should be no-redir.  Then we can
1481             suspend and resume at any point, which isn't the case at
1482             the moment. */
1483          /* We can't enter a no-redir translation with the dispatch
1484             ctr set to zero, for the reasons commented just above --
1485             we need to force it to execute right now.  So, if the
1486             dispatch ctr is zero, set it to one.  Note that this would
1487             have the bad side effect of holding the Big Lock arbitrary
1488             long should there be an arbitrarily long sequence of
1489             back-to-back no-redir translations to run.  But we assert
1490             just below that this translation cannot request another
1491             no-redir jump, so we should be safe against that. */
1492          if (dispatch_ctr == 0) {
1493             dispatch_ctr = 1;
1494          }
1495          handle_noredir_jump( &trc[0],
1496                               &dispatch_ctr,
1497                               tid );
1498          vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
1499
1500          /* This can't be allowed to happen, since it means the block
1501             didn't execute, and we have no way to resume-as-noredir
1502             after we get more timeslice.  But I don't think it ever
1503             can, since handle_noredir_jump will assert if the counter
1504             is zero on entry. */
1505          vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
1506          /* This asserts the same thing. */
1507          vg_assert(dispatch_ctr >= 0);
1508
1509          /* A no-redir translation can't return with a chain-me
1510             request, since chaining in the no-redir cache is too
1511             complex. */
1512          vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
1513                    && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
1514       }
1515
1516       switch (trc[0]) {
1517       case VEX_TRC_JMP_BORING:
1518          /* assisted dispatch, no event.  Used by no-redir
1519             translations to force return to the scheduler. */
1520       case VG_TRC_BORING:
1521          /* no special event, just keep going. */
1522          break;
1523
1524       case VG_TRC_INNER_FASTMISS:
1525          vg_assert(dispatch_ctr >= 0);
1526          handle_tt_miss(tid);
1527          break;
1528
1529       case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
1530          if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
1531          handle_chain_me(tid, (void*)trc[1], False);
1532          break;
1533       }
1534
1535       case VG_TRC_CHAIN_ME_TO_FAST_EP: {
1536          if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
1537          handle_chain_me(tid, (void*)trc[1], True);
1538          break;
1539       }
1540
1541       case VEX_TRC_JMP_CLIENTREQ:
1542          do_client_request(tid);
1543          break;
1544
1545       case VEX_TRC_JMP_SYS_INT128:  /* x86-linux */
1546       case VEX_TRC_JMP_SYS_INT129:  /* x86-darwin */
1547       case VEX_TRC_JMP_SYS_INT130:  /* x86-darwin */
1548       case VEX_TRC_JMP_SYS_INT145:  /* x86-solaris */
1549       case VEX_TRC_JMP_SYS_INT210:  /* x86-solaris */
1550       /* amd64-linux, ppc32-linux, amd64-darwin, amd64-solaris */
1551       case VEX_TRC_JMP_SYS_SYSCALL:
1552          handle_syscall(tid, trc[0]);
1553          if (VG_(clo_sanity_level) > 2)
1554             VG_(sanity_check_general)(True); /* sanity-check every syscall */
1555          break;
1556
1557       case VEX_TRC_JMP_YIELD:
1558          /* Explicit yield, because this thread is in a spin-lock
1559             or something.  Only let the thread run for a short while
1560             longer.  Because swapping to another thread is expensive,
1561             we're prepared to let this thread eat a little more CPU
1562             before swapping to another.  That means that short term
1563             spins waiting for hardware to poke memory won't cause a
1564             thread swap. */
1565          if (dispatch_ctr > 300)
1566             dispatch_ctr = 300;
1567          break;
1568
1569       case VG_TRC_INNER_COUNTERZERO:
1570          /* Timeslice is out.  Let a new thread be scheduled. */
1571          vg_assert(dispatch_ctr == 0);
1572          break;
1573
1574       case VG_TRC_FAULT_SIGNAL:
1575          /* Everything should be set up (either we're exiting, or
1576             about to start in a signal handler). */
1577          break;
1578
1579       case VEX_TRC_JMP_MAPFAIL:
1580          /* Failure of arch-specific address translation (x86/amd64
1581             segment override use) */
1582          /* jrs 2005 03 11: is this correct? */
1583          VG_(synth_fault)(tid);
1584          break;
1585
1586       case VEX_TRC_JMP_EMWARN: {
1587          static Int  counts[EmNote_NUMBER];
1588          static Bool counts_initted = False;
1589          VexEmNote ew;
1590          const HChar* what;
1591          Bool      show;
1592          Int       q;
1593          if (!counts_initted) {
1594             counts_initted = True;
1595             for (q = 0; q < EmNote_NUMBER; q++)
1596                counts[q] = 0;
1597          }
1598          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1599          what = (ew < 0 || ew >= EmNote_NUMBER)
1600                    ? "unknown (?!)"
1601                    : LibVEX_EmNote_string(ew);
1602          show = (ew < 0 || ew >= EmNote_NUMBER)
1603                    ? True
1604                    : counts[ew]++ < 3;
1605          if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
1606             VG_(message)( Vg_UserMsg,
1607                           "Emulation warning: unsupported action:\n");
1608             VG_(message)( Vg_UserMsg, "  %s\n", what);
1609             VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1610          }
1611          break;
1612       }
1613
1614       case VEX_TRC_JMP_EMFAIL: {
1615          VexEmNote ew;
1616          const HChar* what;
1617          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1618          what = (ew < 0 || ew >= EmNote_NUMBER)
1619                    ? "unknown (?!)"
1620                    : LibVEX_EmNote_string(ew);
1621          VG_(message)( Vg_UserMsg,
1622                        "Emulation fatal error -- Valgrind cannot continue:\n");
1623          VG_(message)( Vg_UserMsg, "  %s\n", what);
1624          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1625          VG_(message)(Vg_UserMsg, "\n");
1626          VG_(message)(Vg_UserMsg, "Valgrind has to exit now.  Sorry.\n");
1627          VG_(message)(Vg_UserMsg, "\n");
1628          VG_(exit)(1);
1629          break;
1630       }
1631
1632       case VEX_TRC_JMP_SIGILL:
1633          VG_(synth_sigill)(tid, VG_(get_IP)(tid));
1634          break;
1635
1636       case VEX_TRC_JMP_SIGTRAP:
1637          VG_(synth_sigtrap)(tid);
1638          break;
1639
1640       case VEX_TRC_JMP_SIGSEGV:
1641          VG_(synth_fault)(tid);
1642          break;
1643
1644       case VEX_TRC_JMP_SIGBUS:
1645          VG_(synth_sigbus)(tid);
1646          break;
1647
1648       case VEX_TRC_JMP_SIGFPE:
1649          VG_(synth_sigfpe)(tid, 0);
1650          break;
1651
1652       case VEX_TRC_JMP_SIGFPE_INTDIV:
1653          VG_(synth_sigfpe)(tid, VKI_FPE_INTDIV);
1654          break;
1655
1656       case VEX_TRC_JMP_SIGFPE_INTOVF:
1657          VG_(synth_sigfpe)(tid, VKI_FPE_INTOVF);
1658          break;
1659
1660       case VEX_TRC_JMP_NODECODE: {
1661          Addr addr = VG_(get_IP)(tid);
1662
1663          if (VG_(clo_sigill_diag)) {
1664             VG_(umsg)(
1665                "valgrind: Unrecognised instruction at address %#lx.\n", addr);
1666             VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1667 #        define M(a) VG_(umsg)(a "\n");
1668          M("Your program just tried to execute an instruction that Valgrind" );
1669          M("did not recognise.  There are two possible reasons for this."    );
1670          M("1. Your program has a bug and erroneously jumped to a non-code"  );
1671          M("   location.  If you are running Memcheck and you just saw a"    );
1672          M("   warning about a bad jump, it's probably your program's fault.");
1673          M("2. The instruction is legitimate but Valgrind doesn't handle it,");
1674          M("   i.e. it's Valgrind's fault.  If you think this is the case or");
1675          M("   you are not sure, please let us know and we'll try to fix it.");
1676          M("Either way, Valgrind will now raise a SIGILL signal which will"  );
1677          M("probably kill your program."                                     );
1678 #        undef M
1679          }
1680 #        if defined(VGA_s390x)
1681          /* Now that the complaint is out we need to adjust the guest_IA. The
1682             reason is that -- after raising the exception -- execution will
1683             continue with the insn that follows the invalid insn. As the first
1684             2 bits of the invalid insn determine its length in the usual way,
1685             we can compute the address of the next insn here and adjust the
1686             guest_IA accordingly. This adjustment is essential and tested by
1687             none/tests/s390x/op_exception.c (which would loop forever
1688             otherwise) */
1689          UChar byte = ((UChar *)addr)[0];
1690          UInt  insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
1691          Addr  next_insn_addr = addr + insn_length;
1692          VG_(set_IP)(tid, next_insn_addr);
1693 #        endif
1694          VG_(synth_sigill)(tid, addr);
1695          break;
1696       }
1697
1698       case VEX_TRC_JMP_INVALICACHE:
1699          VG_(discard_translations)(
1700             (Addr)VG_(threads)[tid].arch.vex.guest_CMSTART,
1701             VG_(threads)[tid].arch.vex.guest_CMLEN,
1702             "scheduler(VEX_TRC_JMP_INVALICACHE)"
1703          );
1704          if (0)
1705             VG_(printf)("dump translations done.\n");
1706          break;
1707
1708       case VEX_TRC_JMP_FLUSHDCACHE: {
1709          void* start = (void*)(Addr)VG_(threads)[tid].arch.vex.guest_CMSTART;
1710          SizeT len   = VG_(threads)[tid].arch.vex.guest_CMLEN;
1711          VG_(debugLog)(2, "sched", "flush_dcache(%p, %lu)\n", start, len);
1712          VG_(flush_dcache)(start, len);
1713          break;
1714       }
1715
1716       case VG_TRC_INVARIANT_FAILED:
1717          /* This typically happens if, after running generated code,
1718             it is detected that host CPU settings (eg, FPU/Vector
1719             control words) are not as they should be.  Vex's code
1720             generation specifies the state such control words should
1721             be in on entry to Vex-generated code, and they should be
1722             unchanged on exit from it.  Failure of this assertion
1723             usually means a bug in Vex's code generation. */
1724          //{ UInt xx;
1725          //  __asm__ __volatile__ (
1726          //     "\t.word 0xEEF12A10\n"  // fmrx r2,fpscr
1727          //     "\tmov %0, r2" : "=r"(xx) : : "r2" );
1728          //  VG_(printf)("QQQQ new fpscr = %08x\n", xx);
1729          //}
1730          vg_assert2(0, "VG_(scheduler), phase 3: "
1731                        "run_innerloop detected host "
1732                        "state invariant failure", trc);
1733
1734       case VEX_TRC_JMP_SYS_SYSENTER:
1735          /* Do whatever simulation is appropriate for an x86 sysenter
1736             instruction.  Note that it is critical to set this thread's
1737             guest_EIP to point at the code to execute after the
1738             sysenter, since Vex-generated code will not have set it --
1739             vex does not know what it should be.  Vex sets the next
1740             address to zero, so if you don't set guest_EIP, the thread
1741             will jump to zero afterwards and probably die as a result. */
1742 #        if defined(VGP_x86_linux)
1743          vg_assert2(0, "VG_(scheduler), phase 3: "
1744                        "sysenter_x86 on x86-linux is not supported");
1745 #        elif defined(VGP_x86_darwin) || defined(VGP_x86_solaris)
1746          /* return address in client edx */
1747          VG_(threads)[tid].arch.vex.guest_EIP
1748             = VG_(threads)[tid].arch.vex.guest_EDX;
1749          handle_syscall(tid, trc[0]);
1750 #        else
1751          vg_assert2(0, "VG_(scheduler), phase 3: "
1752                        "sysenter_x86 on non-x86 platform?!?!");
1753 #        endif
1754          break;
1755
1756       default:
1757          vg_assert2(0, "VG_(scheduler), phase 3: "
1758                        "unexpected thread return code (%u)", trc[0]);
1759          /* NOTREACHED */
1760          break;
1761
1762       } /* switch (trc) */
1763
1764       if (UNLIKELY(VG_(clo_profyle_sbs)) && VG_(clo_profyle_interval) > 0)
1765          maybe_show_sb_profile();
1766    }
1767
1768    if (VG_(clo_trace_sched))
1769       print_sched_event(tid, "exiting VG_(scheduler)");
1770
1771    vg_assert(VG_(is_exiting)(tid));
1772
1773    return tst->exitreason;
1774 }
1775
1776
1777 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
1778 {
1779    ThreadId tid;
1780
1781    vg_assert(VG_(is_running_thread)(me));
1782
1783    for (tid = 1; tid < VG_N_THREADS; tid++) {
1784       if (tid == me
1785           || VG_(threads)[tid].status == VgTs_Empty)
1786          continue;
1787       if (0)
1788          VG_(printf)(
1789             "VG_(nuke_all_threads_except): nuking tid %u\n", tid);
1790
1791       VG_(threads)[tid].exitreason = src;
1792       if (src == VgSrc_FatalSig)
1793          VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
1794       VG_(get_thread_out_of_syscall)(tid);
1795    }
1796 }
1797
1798
1799 /* ---------------------------------------------------------------------
1800    Specifying shadow register values
1801    ------------------------------------------------------------------ */
1802
1803 #if defined(VGA_x86)
1804 #  define VG_CLREQ_ARGS       guest_EAX
1805 #  define VG_CLREQ_RET        guest_EDX
1806 #elif defined(VGA_amd64)
1807 #  define VG_CLREQ_ARGS       guest_RAX
1808 #  define VG_CLREQ_RET        guest_RDX
1809 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
1810 #  define VG_CLREQ_ARGS       guest_GPR4
1811 #  define VG_CLREQ_RET        guest_GPR3
1812 #elif defined(VGA_arm)
1813 #  define VG_CLREQ_ARGS       guest_R4
1814 #  define VG_CLREQ_RET        guest_R3
1815 #elif defined(VGA_arm64)
1816 #  define VG_CLREQ_ARGS       guest_X4
1817 #  define VG_CLREQ_RET        guest_X3
1818 #elif defined (VGA_s390x)
1819 #  define VG_CLREQ_ARGS       guest_r2
1820 #  define VG_CLREQ_RET        guest_r3
1821 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)
1822 #  define VG_CLREQ_ARGS       guest_r12
1823 #  define VG_CLREQ_RET        guest_r11
1824 #else
1825 #  error Unknown arch
1826 #endif
1827
1828 #define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
1829 #define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
1830 #define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
1831
1832 // These macros write a value to a client's thread register, and tell the
1833 // tool that it's happened (if necessary).
1834
1835 #define SET_CLREQ_RETVAL(zztid, zzval) \
1836    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1837         VG_TRACK( post_reg_write, \
1838                   Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
1839    } while (0)
1840
1841 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
1842    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1843         VG_TRACK( post_reg_write_clientcall_return, \
1844                   zztid, O_CLREQ_RET, sizeof(UWord), f); \
1845    } while (0)
1846
1847
1848 /* ---------------------------------------------------------------------
1849    Handle client requests.
1850    ------------------------------------------------------------------ */
1851
1852 // OS-specific(?) client requests
1853 static Bool os_client_request(ThreadId tid, UWord *args)
1854 {
1855    Bool handled = True;
1856
1857    vg_assert(VG_(is_running_thread)(tid));
1858
1859    switch(args[0]) {
1860    case VG_USERREQ__FREERES_DONE:
1861       /* This is equivalent to an exit() syscall, but we don't set the
1862          exitcode (since it might already be set) */
1863       if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
1864          VG_(message)(Vg_DebugMsg,
1865                       "__gnu_cxx::__freeres() and __libc_freeres() wrapper "
1866                       "done; really quitting!\n");
1867       VG_(threads)[tid].exitreason = VgSrc_ExitThread;
1868       break;
1869
1870    default:
1871       handled = False;
1872       break;
1873    }
1874
1875    return handled;
1876 }
1877
1878
1879 /* Write out a client message, possibly including a back trace. Return
1880    the number of characters written. In case of XML output, the format
1881    string as well as any arguments it requires will be XML'ified.
1882    I.e. special characters such as the angle brackets will be translated
1883    into proper escape sequences. */
1884 static
1885 Int print_client_message( ThreadId tid, const HChar *format,
1886                           va_list *vargsp, Bool include_backtrace)
1887 {
1888    Int count;
1889
1890    if (VG_(clo_xml)) {
1891       /* Translate the format string as follows:
1892          <  -->  &lt;
1893          >  -->  &gt;
1894          &  -->  &amp;
1895          %s -->  %pS
1896          Yes, yes, it's simplified but in synch with
1897          myvprintf_str_XML_simplistic and VG_(debugLog_vprintf).
1898       */
1899
1900       /* Allocate a buffer that is for sure large enough. */
1901       HChar xml_format[VG_(strlen)(format) * 5 + 1];
1902
1903       const HChar *p;
1904       HChar *q = xml_format;
1905
1906       for (p = format; *p; ++p) {
1907          switch (*p) {
1908          case '<': VG_(strcpy)(q, "&lt;");  q += 4; break;
1909          case '>': VG_(strcpy)(q, "&gt;");  q += 4; break;
1910          case '&': VG_(strcpy)(q, "&amp;"); q += 5; break;
1911          case '%':
1912             /* Careful: make sure %%s stays %%s */
1913             *q++ = *p++;
1914             if (*p == 's') {
1915               *q++ = 'p';
1916               *q++ = 'S';
1917             } else {
1918               *q++ = *p;
1919             }
1920             break;
1921
1922          default:
1923             *q++ = *p;
1924             break;
1925          }
1926       }
1927       *q = '\0';
1928
1929       VG_(printf_xml)( "<clientmsg>\n" );
1930       VG_(printf_xml)( "  <tid>%u</tid>\n", tid );
1931       const ThreadState *tst = VG_(get_ThreadState)(tid);
1932       if (tst->thread_name)
1933          VG_(printf_xml)("  <threadname>%s</threadname>\n", tst->thread_name);
1934       VG_(printf_xml)( "  <text>" );
1935       count = VG_(vprintf_xml)( xml_format, *vargsp );
1936       VG_(printf_xml)( "  </text>\n" );
1937    } else {
1938       count = VG_(vmessage)( Vg_ClientMsg, format, *vargsp );
1939       VG_(message_flush)();
1940    }
1941
1942    if (include_backtrace)
1943       VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1944
1945    if (VG_(clo_xml))
1946       VG_(printf_xml)( "</clientmsg>\n" );
1947
1948    return count;
1949 }
1950
1951
1952 /* Do a client request for the thread tid.  After the request, tid may
1953    or may not still be runnable; if not, the scheduler will have to
1954    choose a new thread to run.
1955 */
1956 static
1957 void do_client_request ( ThreadId tid )
1958 {
1959    UWord* arg = (UWord*)(Addr)(CLREQ_ARGS(VG_(threads)[tid].arch));
1960    UWord req_no = arg[0];
1961
1962    if (0)
1963       VG_(printf)("req no = 0x%lx, arg = %p\n", req_no, arg);
1964    switch (req_no) {
1965
1966       case VG_USERREQ__CLIENT_CALL0: {
1967          UWord (*f)(ThreadId) = (__typeof__(f))arg[1];
1968          if (f == NULL)
1969             VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
1970          else
1971             SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
1972          break;
1973       }
1974       case VG_USERREQ__CLIENT_CALL1: {
1975          UWord (*f)(ThreadId, UWord) = (__typeof__(f))arg[1];
1976          if (f == NULL)
1977             VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
1978          else
1979             SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
1980          break;
1981       }
1982       case VG_USERREQ__CLIENT_CALL2: {
1983          UWord (*f)(ThreadId, UWord, UWord) = (__typeof__(f))arg[1];
1984          if (f == NULL)
1985             VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
1986          else
1987             SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
1988          break;
1989       }
1990       case VG_USERREQ__CLIENT_CALL3: {
1991          UWord (*f)(ThreadId, UWord, UWord, UWord) = (__typeof__(f))arg[1];
1992          if (f == NULL)
1993             VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
1994          else
1995             SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
1996          break;
1997       }
1998
1999       // Nb: this looks like a circular definition, because it kind of is.
2000       // See comment in valgrind.h to understand what's going on.
2001       case VG_USERREQ__RUNNING_ON_VALGRIND:
2002          SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
2003          break;
2004
2005       case VG_USERREQ__PRINTF: {
2006          const HChar* format = (HChar *)arg[1];
2007          /* JRS 2010-Jan-28: this is DEPRECATED; use the
2008             _VALIST_BY_REF version instead */
2009          if (sizeof(va_list) != sizeof(UWord))
2010             goto va_list_casting_error_NORETURN;
2011          union {
2012             va_list vargs;
2013             unsigned long uw;
2014          } u;
2015          u.uw = (unsigned long)arg[2];
2016          Int count =
2017             print_client_message( tid, format, &u.vargs,
2018                                   /* include_backtrace */ False );
2019          SET_CLREQ_RETVAL( tid, count );
2020          break;
2021       }
2022
2023       case VG_USERREQ__PRINTF_BACKTRACE: {
2024          const HChar* format = (HChar *)arg[1];
2025          /* JRS 2010-Jan-28: this is DEPRECATED; use the
2026             _VALIST_BY_REF version instead */
2027          if (sizeof(va_list) != sizeof(UWord))
2028             goto va_list_casting_error_NORETURN;
2029          union {
2030             va_list vargs;
2031             unsigned long uw;
2032          } u;
2033          u.uw = (unsigned long)arg[2];
2034          Int count =
2035             print_client_message( tid, format, &u.vargs,
2036                                   /* include_backtrace */ True );
2037          SET_CLREQ_RETVAL( tid, count );
2038          break;
2039       }
2040
2041       case VG_USERREQ__PRINTF_VALIST_BY_REF: {
2042          const HChar* format = (HChar *)arg[1];
2043          va_list* vargsp = (va_list*)arg[2];
2044          Int count =
2045             print_client_message( tid, format, vargsp,
2046                                   /* include_backtrace */ False );
2047
2048          SET_CLREQ_RETVAL( tid, count );
2049          break;
2050       }
2051
2052       case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
2053          const HChar* format = (HChar *)arg[1];
2054          va_list* vargsp = (va_list*)arg[2];
2055          Int count =
2056             print_client_message( tid, format, vargsp,
2057                                   /* include_backtrace */ True );
2058          SET_CLREQ_RETVAL( tid, count );
2059          break;
2060       }
2061
2062       case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
2063          va_list* vargsp = (va_list*)arg[2];
2064          Int count =
2065             VG_(vmessage)( Vg_DebugMsg, (HChar *)arg[1], *vargsp );
2066          VG_(message_flush)();
2067          SET_CLREQ_RETVAL( tid, count );
2068          break;
2069       }
2070
2071       case VG_USERREQ__ADD_IFUNC_TARGET: {
2072          VG_(redir_add_ifunc_target)( arg[1], arg[2] );
2073          SET_CLREQ_RETVAL( tid, 0);
2074          break; }
2075
2076       case VG_USERREQ__STACK_REGISTER: {
2077          UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
2078          SET_CLREQ_RETVAL( tid, sid );
2079          VG_TRACK(register_stack, (Addr)arg[1], (Addr)arg[2]);
2080          break; }
2081
2082       case VG_USERREQ__STACK_DEREGISTER: {
2083          VG_(deregister_stack)(arg[1]);
2084          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2085          break; }
2086
2087       case VG_USERREQ__STACK_CHANGE: {
2088          VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
2089          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2090          break; }
2091
2092       case VG_USERREQ__GET_MALLOCFUNCS: {
2093          struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
2094
2095          info->tl_malloc               = VG_(tdict).tool_malloc;
2096          info->tl_calloc               = VG_(tdict).tool_calloc;
2097          info->tl_realloc              = VG_(tdict).tool_realloc;
2098          info->tl_memalign             = VG_(tdict).tool_memalign;
2099          info->tl___builtin_new        = VG_(tdict).tool___builtin_new;
2100          info->tl___builtin_new_aligned = VG_(tdict).tool___builtin_new_aligned;
2101          info->tl___builtin_vec_new    = VG_(tdict).tool___builtin_vec_new;
2102          info->tl___builtin_vec_new_aligned    = VG_(tdict).tool___builtin_vec_new_aligned;
2103          info->tl_free                 = VG_(tdict).tool_free;
2104          info->tl___builtin_delete     = VG_(tdict).tool___builtin_delete;
2105          info->tl___builtin_delete_aligned     = VG_(tdict).tool___builtin_delete_aligned;
2106          info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
2107          info->tl___builtin_vec_delete_aligned = VG_(tdict).tool___builtin_vec_delete_aligned;
2108          info->tl_malloc_usable_size   = VG_(tdict).tool_malloc_usable_size;
2109
2110          info->mallinfo                = VG_(mallinfo);
2111          info->clo_trace_malloc        = VG_(clo_trace_malloc);
2112          info->clo_realloc_zero_bytes_frees    = VG_(clo_realloc_zero_bytes_frees);
2113
2114          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2115
2116          break;
2117       }
2118
2119       /* Requests from the client program */
2120
2121       case VG_USERREQ__DISCARD_TRANSLATIONS:
2122          if (VG_(clo_verbosity) > 2)
2123             VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
2124                          " addr %p,  len %lu\n",
2125                          (void*)arg[1], arg[2] );
2126
2127          VG_(discard_translations)(
2128             arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
2129          );
2130
2131          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2132          break;
2133
2134       case VG_USERREQ__INNER_THREADS:
2135          if (VG_(clo_verbosity) > 2)
2136             VG_(printf)( "client request: INNER_THREADS,"
2137                          " addr %p\n",
2138                          (void*)arg[1] );
2139          VG_(inner_threads) = (ThreadState*)arg[1];
2140          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2141          break;
2142
2143       case VG_USERREQ__COUNT_ERRORS:
2144          SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
2145          break;
2146
2147       case VG_USERREQ__CLO_CHANGE:
2148          VG_(process_dynamic_option) (cloD, (HChar *)arg[1]);
2149          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2150          break;
2151
2152       case VG_USERREQ__LOAD_PDB_DEBUGINFO:
2153          VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
2154          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2155          break;
2156
2157       case VG_USERREQ__MAP_IP_TO_SRCLOC: {
2158          Addr   ip    = arg[1];
2159          HChar* buf64 = (HChar*)arg[2];  // points to a HChar [64] array
2160          const HChar *buf;  // points to a string of unknown size
2161
2162          VG_(memset)(buf64, 0, 64);
2163          UInt linenum = 0;
2164
2165          // Unless the guest would become epoch aware (and would need to
2166          // describe IP addresses of dlclosed libs), using cur_ep is a
2167          // reasonable choice.
2168          const DiEpoch cur_ep = VG_(current_DiEpoch)();
2169
2170          Bool ok = VG_(get_filename_linenum)(
2171                       cur_ep, ip, &buf, NULL, &linenum
2172                    );
2173          if (ok) {
2174             /* For backward compatibility truncate the filename to
2175                49 characters. */
2176             VG_(strncpy)(buf64, buf, 50);
2177             buf64[49] = '\0';
2178             UInt i;
2179             for (i = 0; i < 50; i++) {
2180                if (buf64[i] == 0)
2181                   break;
2182             }
2183             VG_(sprintf)(buf64+i, ":%u", linenum);  // safe
2184          } else {
2185             buf64[0] = 0;
2186          }
2187
2188          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2189          break;
2190       }
2191
2192       case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
2193          Word delta = arg[1];
2194          vg_assert(delta == 1 || delta == -1);
2195          ThreadState* tst = VG_(get_ThreadState)(tid);
2196          vg_assert(tst);
2197          if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
2198             tst->err_disablement_level++;
2199          }
2200          else
2201          if (delta == -1 && tst->err_disablement_level > 0) {
2202             tst->err_disablement_level--;
2203          }
2204          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2205          break;
2206       }
2207
2208       case VG_USERREQ__GDB_MONITOR_COMMAND: {
2209          UWord ret;
2210          ret = (UWord) VG_(client_monitor_command) ((HChar*)arg[1]);
2211          SET_CLREQ_RETVAL(tid, ret);
2212          break;
2213       }
2214
2215       case VG_USERREQ__MALLOCLIKE_BLOCK:
2216       case VG_USERREQ__RESIZEINPLACE_BLOCK:
2217       case VG_USERREQ__FREELIKE_BLOCK:
2218          // Ignore them if the addr is NULL;  otherwise pass onto the tool.
2219          if (!arg[1]) {
2220             SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2221             break;
2222          } else {
2223             goto my_default;
2224          }
2225
2226       case VG_USERREQ__VEX_INIT_FOR_IRI:
2227          LibVEX_InitIRI ( (IRICB *)arg[1] );
2228          break;
2229
2230       default:
2231        my_default:
2232          if (os_client_request(tid, arg)) {
2233             // do nothing, os_client_request() handled it
2234          } else if (VG_(needs).client_requests) {
2235             UWord ret;
2236
2237             if (VG_(clo_verbosity) > 2)
2238                VG_(printf)("client request: code %lx,  addr %p,  len %lu\n",
2239                            arg[0], (void*)arg[1], arg[2] );
2240
2241             if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
2242                SET_CLREQ_RETVAL(tid, ret);
2243          } else {
2244             static Bool whined = False;
2245
2246             if (!whined && VG_(clo_verbosity) > 2) {
2247                // Allow for requests in core, but defined by tools, which
2248                // have 0 and 0 in their two high bytes.
2249                HChar c1 = (arg[0] >> 24) & 0xff;
2250                HChar c2 = (arg[0] >> 16) & 0xff;
2251                if (c1 == 0) c1 = '_';
2252                if (c2 == 0) c2 = '_';
2253                VG_(message)(Vg_UserMsg, "Warning:\n"
2254                    "  unhandled client request: 0x%lx (%c%c+0x%lx).  Perhaps\n"
2255                    "  VG_(needs).client_requests should be set?\n",
2256                             arg[0], c1, c2, arg[0] & 0xffff);
2257                whined = True;
2258             }
2259          }
2260          break;
2261    }
2262    return;
2263
2264    /*NOTREACHED*/
2265   va_list_casting_error_NORETURN:
2266    VG_(umsg)(
2267       "Valgrind: fatal error - cannot continue: use of the deprecated\n"
2268       "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
2269       "on a platform where they cannot be supported.  Please use the\n"
2270       "equivalent _VALIST_BY_REF versions instead.\n"
2271       "\n"
2272       "This is a binary-incompatible change in Valgrind's client request\n"
2273       "mechanism.  It is unfortunate, but difficult to avoid.  End-users\n"
2274       "are expected to almost never see this message.  The only case in\n"
2275       "which you might see this message is if your code uses the macros\n"
2276       "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE.  If so, you will need\n"
2277       "to recompile such code, using the header files from this version of\n"
2278       "Valgrind, and not any previous version.\n"
2279       "\n"
2280       "If you see this message in any other circumstances, it is probably\n"
2281       "a bug in Valgrind.  In this case, please file a bug report at\n"
2282       "\n"
2283       "   http://www.valgrind.org/support/bug_reports.html\n"
2284       "\n"
2285       "Will now abort.\n"
2286    );
2287    vg_assert(0);
2288 }
2289
2290
2291 /* ---------------------------------------------------------------------
2292    Sanity checking (permanently engaged)
2293    ------------------------------------------------------------------ */
2294
2295 /* Internal consistency checks on the sched structures. */
2296 static
2297 void scheduler_sanity ( ThreadId tid )
2298 {
2299    Bool bad = False;
2300    Int lwpid = VG_(gettid)();
2301
2302    if (!VG_(is_running_thread)(tid)) {
2303       VG_(message)(Vg_DebugMsg,
2304                    "Thread %u is supposed to be running, "
2305                    "but doesn't own the_BigLock (owned by %u)\n",
2306                    tid, VG_(running_tid));
2307       bad = True;
2308    }
2309
2310    if (lwpid != VG_(threads)[tid].os_state.lwpid) {
2311       VG_(message)(Vg_DebugMsg,
2312                    "Thread %u supposed to be in LWP %d, but we're actually %d\n",
2313                    tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
2314       bad = True;
2315    }
2316
2317    if (lwpid != ML_(get_sched_lock_owner)(the_BigLock)) {
2318       VG_(message)(Vg_DebugMsg,
2319                    "Thread (LWPID) %u doesn't own the_BigLock\n",
2320                    tid);
2321       bad = True;
2322    }
2323
2324    if (0) {
2325       /* Periodically show the state of all threads, for debugging
2326          purposes. */
2327       static UInt lasttime = 0;
2328       UInt now;
2329       now = VG_(read_millisecond_timer)();
2330       if ((!bad) && (lasttime + 4000/*ms*/ <= now)) {
2331          lasttime = now;
2332          VG_(printf)("\n------------ Sched State at %d ms ------------\n",
2333                      (Int)now);
2334          VG_(show_sched_status)(True,  // host_stacktrace
2335                                 True,  // stack_usage
2336                                 True); // exited_threads);
2337       }
2338    }
2339
2340    /* core_panic also shows the sched status, which is why we don't
2341       show it above if bad==True. */
2342    if (bad)
2343       VG_(core_panic)("scheduler_sanity: failed");
2344 }
2345
2346 void VG_(sanity_check_general) ( Bool force_expensive )
2347 {
2348    ThreadId tid;
2349
2350    static UInt next_slow_check_at = 1;
2351    static UInt slow_check_interval = 25;
2352
2353    if (VG_(clo_sanity_level) < 1) return;
2354
2355    /* --- First do all the tests that we can do quickly. ---*/
2356
2357    sanity_fast_count++;
2358
2359    /* Check stuff pertaining to the memory check system. */
2360
2361    /* Check that nobody has spuriously claimed that the first or
2362       last 16 pages of memory have become accessible [...] */
2363    if (VG_(needs).sanity_checks) {
2364       vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
2365    }
2366
2367    /* --- Now some more expensive checks. ---*/
2368
2369    /* Once every now and again, check some more expensive stuff.
2370       Gradually increase the interval between such checks so as not to
2371       burden long-running programs too much. */
2372    if ( force_expensive
2373         || VG_(clo_sanity_level) > 1
2374         || (VG_(clo_sanity_level) == 1
2375             && sanity_fast_count == next_slow_check_at)) {
2376
2377       if (0) VG_(printf)("SLOW at %u\n", sanity_fast_count-1);
2378
2379       next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
2380       slow_check_interval++;
2381       sanity_slow_count++;
2382
2383       if (VG_(needs).sanity_checks) {
2384           vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
2385       }
2386
2387       /* Look for stack overruns.  Visit all threads. */
2388       for (tid = 1; tid < VG_N_THREADS; tid++) {
2389          SizeT    remains;
2390          VgStack* stack;
2391
2392          if (VG_(threads)[tid].status == VgTs_Empty ||
2393              VG_(threads)[tid].status == VgTs_Zombie)
2394             continue;
2395
2396          stack
2397             = (VgStack*)
2398               VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
2399          SizeT limit
2400             = 4096; // Let's say.  Checking more causes lots of L2 misses.
2401          remains
2402             = VG_(am_get_VgStack_unused_szB)(stack, limit);
2403          if (remains < limit)
2404             VG_(message)(Vg_DebugMsg,
2405                          "WARNING: Thread %u is within %lu bytes "
2406                          "of running out of valgrind stack!\n"
2407                          "Valgrind stack size can be increased "
2408                          "using --valgrind-stacksize=....\n",
2409                          tid, remains);
2410       }
2411    }
2412
2413    if (VG_(clo_sanity_level) > 1) {
2414       /* Check sanity of the low-level memory manager.  Note that bugs
2415          in the client's code can cause this to fail, so we don't do
2416          this check unless specially asked for.  And because it's
2417          potentially very expensive. */
2418       VG_(sanity_check_malloc_all)();
2419    }
2420 }
2421
2422 /*--------------------------------------------------------------------*/
2423 /*--- end                                                          ---*/
2424 /*--------------------------------------------------------------------*/