From 0e151d4aecf43cc4b8623e60435f997c9f6e2bb7 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 23 Jul 2016 18:59:33 -0700 Subject: [PATCH] kernel - Fix TDF_EXITING bug, instrument potential live loops * Fix a TDF_EXITING bug. lwkt_switch_return() is called to fixup the 'previous' thread, meaning turning off TDF_RUNNING and handling TDF_EXITING. However, if TDF_EXITING is not set, the old thread can be used or acted upon / exited on by some other cpu the instant we clear TDF_RUNNING. In this situation it is possible that the other cpu will set TDF_EXITING in the small window of opportunity just before we check ourselves, leading to serious thread management corruption. * The new pmap_inval*() code runs on Xinvltlb instead of as a IPIQ and can easily create significant latency between the two tests, whereas the old code ran as an IPIQ and could not due to the critical section. --- sys/kern/lwkt_thread.c | 51 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/sys/kern/lwkt_thread.c b/sys/kern/lwkt_thread.c index 9fca7ec5ba..57388b5801 100644 --- a/sys/kern/lwkt_thread.c +++ b/sys/kern/lwkt_thread.c @@ -70,11 +70,14 @@ #include #include +#include #ifdef _KERNEL_VIRTUAL #include #endif +#define LOOPMASK + #if !defined(KTR_CTXSW) #define KTR_CTXSW KTR_ALL #endif @@ -534,6 +537,9 @@ lwkt_switch(void) thread_t td = gd->gd_curthread; thread_t ntd; int upri; +#ifdef LOOPMASK + uint64_t tsc_base = rdtsc(); +#endif KKASSERT(gd->gd_processing_ipiq == 0); KKASSERT(td->td_flags & TDF_RUNNING); @@ -668,6 +674,13 @@ lwkt_switch(void) } ++gd->gd_cnt.v_lock_colls; ++ntd->td_contended; /* overflow ok */ +#ifdef LOOPMASK + if (tsc_frequency && rdtsc() - tsc_base > tsc_frequency) { + kprintf("lwkt_switch: excessive contended %d " + "thread %p\n", ntd->td_contended, ntd); + tsc_base = rdtsc(); + } +#endif } while (ntd->td_contended < (lwkt_spin_loops >> 1)); upri = ntd->td_upri; @@ -769,6 +782,13 @@ void lwkt_switch_return(thread_t otd) { globaldata_t rgd; +#ifdef LOOPMASK + uint64_t tsc_base = rdtsc(); +#endif + int exiting; + + exiting = otd->td_flags & TDF_EXITING; + cpu_ccfence(); /* * Check if otd was migrating. Now that we are on ntd we can finish @@ -796,8 +816,13 @@ lwkt_switch_return(thread_t otd) /* * Final exit validations (see lwp_wait()). Note that otd becomes * invalid the *instant* we set TDF_MP_EXITSIG. + * + * Use the EXITING status loaded from before we clear TDF_RUNNING, + * because if it is not set otd becomes invalid the instant we clear + * TDF_RUNNING on it (otherwise, if the system is fast enough, we + * might 'steal' TDF_EXITING from another switch-return!). */ - while (otd->td_flags & TDF_EXITING) { + while (exiting) { u_int mpflags; mpflags = otd->td_mpflags; @@ -816,6 +841,14 @@ lwkt_switch_return(thread_t otd) break; } } + +#ifdef LOOPMASK + if (tsc_frequency && rdtsc() - tsc_base > tsc_frequency) { + kprintf("lwkt_switch_return: excessive TDF_EXITING " + "thread %p\n", otd); + tsc_base = rdtsc(); + } +#endif } } @@ -1236,12 +1269,14 @@ lwkt_acquire(thread_t td) { globaldata_t gd; globaldata_t mygd; - int retry = 10000000; KKASSERT(td->td_flags & TDF_MIGRATING); gd = td->td_gd; mygd = mycpu; if (gd != mycpu) { +#ifdef LOOPMASK + uint64_t tsc_base = rdtsc(); +#endif cpu_lfence(); KKASSERT((td->td_flags & TDF_RUNQ) == 0); crit_enter_gd(mygd); @@ -1249,14 +1284,16 @@ lwkt_acquire(thread_t td) while (td->td_flags & (TDF_RUNNING|TDF_PREEMPT_LOCK)) { lwkt_process_ipiq(); cpu_lfence(); - if (--retry == 0) { - kprintf("lwkt_acquire: stuck: td %p td->td_flags %08x\n", - td, td->td_flags); - retry = 10000000; - } #ifdef _KERNEL_VIRTUAL pthread_yield(); #endif +#ifdef LOOPMASK + if (tsc_frequency && rdtsc() - tsc_base > tsc_frequency) { + kprintf("lwkt_acquire: stuck td %p td->td_flags %08x\n", + td, td->td_flags); + tsc_base = rdtsc(); + } +#endif } DEBUG_POP_INFO(); cpu_mfence(); -- 2.11.4.GIT