From 46a1529a2a2fba0012bc0cf9099fb3d3d269b10a Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 30 Mar 2022 21:55:47 +0000 Subject: [PATCH] WIP: debug l_biglocks --- sys/kern/kern_exec.c | 1 + sys/kern/kern_exit.c | 1 + sys/kern/kern_lwp.c | 2 ++ sys/kern/kern_mutex.c | 44 +++++++++++++++++++++++++++++++++++ sys/kern/kern_sleepq.c | 15 ++++++++++++ sys/kern/kern_synch.c | 5 ++++ sys/kern/kern_turnstile.c | 9 +++++++ sys/kern/sys_lwp.c | 1 + sys/kern/vfs_bio.c | 49 ++++++++++++++++++++++++++++++++++++++- sys/sys/lwp.h | 15 ++++++++++++ sys/sys/sleepq.h | 1 + 11 files changed, 142 insertions(+), 1 deletion(-) diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index a0bb241b1f95..d01eeda99d3a 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1408,6 +1408,7 @@ execve_runproc(struct lwp *l, struct execve_data * restrict data, if (p->p_sflag & PS_STOPEXEC) { ksiginfoq_t kq; + BIGUNLOCKED(l); KERNEL_UNLOCK_ALL(l, &l->l_biglocks); p->p_pptr->p_nstopchild++; p->p_waited = 0; diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 15632f66a4c9..8a52cb12586f 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -234,6 +234,7 @@ exit1(struct lwp *l, int exitcode, int signo) * If we have been asked to stop on exit, do so now. */ if (__predict_false(p->p_sflag & PS_STOPEXIT)) { + BIGUNLOCKED(l); KERNEL_UNLOCK_ALL(l, &l->l_biglocks); sigclearall(p, &contsigmask, &kq); diff --git a/sys/kern/kern_lwp.c b/sys/kern/kern_lwp.c index c20ae8c34168..6ec296b1acb3 100644 --- a/sys/kern/kern_lwp.c +++ b/sys/kern/kern_lwp.c @@ -292,6 +292,7 @@ struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = { .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders), .l_name = __UNCONST("swapper"), .l_fd = &filedesc0, + .l_biglocks = -1, }; static int @@ -834,6 +835,7 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags, l2->l_proc = p2; l2->l_refcnt = 0; l2->l_class = sclass; + l2->l_biglocks = -1; /* * Allocate a process ID for this LWP. We need to do this now diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 0ff668f7c2f9..f89242f549fd 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -454,6 +454,7 @@ mutex_vector_enter(kmutex_t *mtx) #ifdef MULTIPROCESSOR u_int count; #endif + int locks; LOCKSTAT_COUNTER(spincnt); LOCKSTAT_COUNTER(slpcnt); LOCKSTAT_TIMER(spintime); @@ -464,6 +465,7 @@ mutex_vector_enter(kmutex_t *mtx) * Handle spin mutexes. */ KPREEMPT_DISABLE(curlwp); + locks = curlwp->l_blcnt; owner = mtx->mtx_owner; if (MUTEX_SPIN_P(owner)) { #if defined(LOCKDEBUG) && defined(MULTIPROCESSOR) @@ -509,6 +511,9 @@ mutex_vector_enter(kmutex_t *mtx) #endif /* !MULTIPROCESSOR */ #endif /* FULL */ MUTEX_LOCKED(mtx); + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); return; } @@ -560,9 +565,15 @@ mutex_vector_enter(kmutex_t *mtx) LOCKSTAT_START_TIMER(lsflag, spintime); count = SPINLOCK_BACKOFF_MIN; do { + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); KPREEMPT_ENABLE(curlwp); SPINLOCK_BACKOFF(count); KPREEMPT_DISABLE(curlwp); + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); owner = mtx->mtx_owner; } while (mutex_oncpu(owner)); LOCKSTAT_STOP_TIMER(lsflag, spintime); @@ -572,7 +583,13 @@ mutex_vector_enter(kmutex_t *mtx) } #endif + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); ts = turnstile_lookup(mtx); + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); /* * Once we have the turnstile chain interlock, mark the @@ -580,7 +597,13 @@ mutex_vector_enter(kmutex_t *mtx) * chances are that the mutex has been released. */ if (!MUTEX_SET_WAITERS(mtx, owner)) { + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); turnstile_exit(mtx); + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); owner = mtx->mtx_owner; continue; } @@ -681,13 +704,25 @@ mutex_vector_enter(kmutex_t *mtx) */ membar_consumer(); if (mutex_oncpu(owner)) { + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); turnstile_exit(mtx); + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); owner = mtx->mtx_owner; continue; } membar_consumer(); if (!MUTEX_HAS_WAITERS(mtx)) { + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); turnstile_exit(mtx); + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); owner = mtx->mtx_owner; continue; } @@ -695,13 +730,22 @@ mutex_vector_enter(kmutex_t *mtx) LOCKSTAT_START_TIMER(lsflag, slptime); + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj); + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); LOCKSTAT_STOP_TIMER(lsflag, slptime); LOCKSTAT_COUNT(slpcnt, 1); owner = mtx->mtx_owner; } + KASSERTMSG(locks == curlwp->l_blcnt, + "mutex %p slipped %d->%d biglocks", + mtx, locks, curlwp->l_blcnt); KPREEMPT_ENABLE(curlwp); LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1, diff --git a/sys/kern/kern_sleepq.c b/sys/kern/kern_sleepq.c index 237355782500..96ce1d73ddb2 100644 --- a/sys/kern/kern_sleepq.c +++ b/sys/kern/kern_sleepq.c @@ -310,6 +310,17 @@ sleepq_block(int timo, bool catch_p, struct syncobj *syncobj) bool early = false; int biglocks = l->l_biglocks; + KASSERTMSG(l->l_blcnt == 0, "unexpectedly acquired %d biglocks", + l->l_blcnt); + + /* + * Claim l->l_biglocks. We will restore these at the end, but + * in the interim, callout_halt, mutex_enter, and ktrcsw might + * sleep and re-enter sleepq_enter/block which need to stash 0 + * in l->l_biglocks. + */ + l->l_biglocks = -1; + ktrcsw(1, 0, syncobj); /* @@ -398,9 +409,13 @@ sleepq_block(int timo, bool catch_p, struct syncobj *syncobj) } ktrcsw(0, 0, syncobj); + KASSERTMSG(l->l_blcnt == 0, "unexpectedly acquired %d biglocks", + l->l_blcnt); if (__predict_false(biglocks != 0)) { KERNEL_LOCK(biglocks, NULL); } + KASSERTMSG(biglocks == l->l_blcnt, "slipped %d->%d biglocks", + biglocks, l->l_blcnt); return error; } diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 48ad3ee1fddd..cfccdbb7a777 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -241,6 +241,7 @@ kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx) mutex_exit(mtx); l->l_kpriority = true; lwp_lock(l); + BIGUNLOCKED(l); KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks); sleepq_enqueue(NULL, l, wmesg, &kpause_syncobj, intr); error = sleepq_block(timo, intr, &kpause_syncobj); @@ -277,6 +278,7 @@ yield(void) { struct lwp *l = curlwp; + BIGUNLOCKED(l); KERNEL_UNLOCK_ALL(l, &l->l_biglocks); lwp_lock(l); @@ -288,6 +290,7 @@ yield(void) spc_lock(l->l_cpu); mi_switch(l); KERNEL_LOCK(l->l_biglocks, l); + l->l_biglocks = -1; } /* @@ -304,6 +307,7 @@ preempt(void) { struct lwp *l = curlwp; + BIGUNLOCKED(l); KERNEL_UNLOCK_ALL(l, &l->l_biglocks); lwp_lock(l); @@ -318,6 +322,7 @@ preempt(void) l->l_pflag |= LP_PREEMPTING; mi_switch(l); KERNEL_LOCK(l->l_biglocks, l); + l->l_biglocks = -1; } /* diff --git a/sys/kern/kern_turnstile.c b/sys/kern/kern_turnstile.c index 26a19fb7ebdf..e966c7801d58 100644 --- a/sys/kern/kern_turnstile.c +++ b/sys/kern/kern_turnstile.c @@ -378,6 +378,7 @@ turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) sleepq_t *sq; pri_t obase; u_int hash; + int locks; hash = TS_HASH(obj); tc = &turnstile_chains[hash]; @@ -387,6 +388,8 @@ turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) KASSERT(mutex_owned(lock)); KASSERT(l != NULL && l->l_ts != NULL); + locks = curlwp->l_blcnt; + if (ts == NULL) { /* * We are the first thread to wait for this object; @@ -419,6 +422,9 @@ turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) sq = &ts->ts_sleepq[q]; ts->ts_waiters[q]++; + KASSERTMSG(locks == curlwp->l_blcnt, + "turnstile %p obj %p sobj %p slipped %d->%d biglocks", + ts, obj, sobj, locks, curlwp->l_blcnt); sleepq_enter(sq, l, lock); LOCKDEBUG_BARRIER(lock, 1); l->l_kpriority = true; @@ -436,6 +442,9 @@ turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) KASSERT(lock == l->l_mutex); turnstile_lendpri(l); sleepq_block(0, false, sobj); + KASSERTMSG(locks == curlwp->l_blcnt, + "turnstile %p obj %p sobj %p slipped %d->%d biglocks", + ts, obj, sobj, locks, curlwp->l_blcnt); l->l_kpribase = obase; KPREEMPT_ENABLE(l); } diff --git a/sys/kern/sys_lwp.c b/sys/kern/sys_lwp.c index e9e7d2c93a30..92054c672ce2 100644 --- a/sys/kern/sys_lwp.c +++ b/sys/kern/sys_lwp.c @@ -531,6 +531,7 @@ lwp_park(clockid_t clock_id, int flags, struct timespec *ts) lwp_unlock(l); return EALREADY; } + BIGUNLOCKED(l); l->l_biglocks = 0; sleepq_enqueue(NULL, l, "parked", &lwp_park_syncobj, true); error = sleepq_block(timo, true, &lwp_park_syncobj); diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 21e36bafcdc3..78b854fa7611 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1673,13 +1673,19 @@ static void biodone2(buf_t *bp) { void (*callout)(buf_t *); + int locks; SDT_PROBE1(io, kernel, ,done, bp); BIOHIST_FUNC(__func__); BIOHIST_CALLARGS(biohist, "bp=%#jx", (uintptr_t)bp, 0, 0, 0); + locks = curlwp->l_blcnt; + mutex_enter(bp->b_objlock); + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, bp->b_iodone, locks, curlwp->l_blcnt); /* Note that the transfer is done. */ if (ISSET(bp->b_oflags, BO_DONE)) panic("biodone2 already"); @@ -1687,9 +1693,20 @@ biodone2(buf_t *bp) SET(bp->b_oflags, BO_DONE); BIO_SETPRIO(bp, BPRIO_DEFAULT); + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, bp->b_iodone, locks, curlwp->l_blcnt); + /* Wake up waiting writers. */ - if (!ISSET(bp->b_flags, B_READ)) + if (!ISSET(bp->b_flags, B_READ)) { + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, bp->b_iodone, locks, curlwp->l_blcnt); vwakeup(bp); + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, bp->b_iodone, locks, curlwp->l_blcnt); + } if ((callout = bp->b_iodone) != NULL) { BIOHIST_LOG(biohist, "callout %#jx", (uintptr_t)callout, @@ -1698,20 +1715,50 @@ biodone2(buf_t *bp) /* Note callout done, then call out. */ KASSERT(!cv_has_waiters(&bp->b_done)); bp->b_iodone = NULL; + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, callout, locks, curlwp->l_blcnt); mutex_exit(bp->b_objlock); + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, callout, locks, curlwp->l_blcnt); (*callout)(bp); + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, callout, locks, curlwp->l_blcnt); } else if (ISSET(bp->b_flags, B_ASYNC)) { /* If async, release. */ BIOHIST_LOG(biohist, "async", 0, 0, 0, 0); KASSERT(!cv_has_waiters(&bp->b_done)); + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, callout, locks, curlwp->l_blcnt); mutex_exit(bp->b_objlock); + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, callout, locks, curlwp->l_blcnt); brelse(bp, 0); + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, callout, locks, curlwp->l_blcnt); } else { /* Otherwise just wake up waiters in biowait(). */ BIOHIST_LOG(biohist, "wake-up", 0, 0, 0, 0); + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, callout, locks, curlwp->l_blcnt); cv_broadcast(&bp->b_done); + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, callout, locks, curlwp->l_blcnt); mutex_exit(bp->b_objlock); + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, callout, locks, curlwp->l_blcnt); } + KASSERTMSG(locks == curlwp->l_blcnt, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, callout, locks, curlwp->l_blcnt); } static void diff --git a/sys/sys/lwp.h b/sys/sys/lwp.h index 760708173790..89b1930bb1e0 100644 --- a/sys/sys/lwp.h +++ b/sys/sys/lwp.h @@ -216,6 +216,8 @@ struct lwp { struct kdtrace_thread *l_dtrace; /* (: DTrace-specific data. */ + void *l_bigunlocked; + #ifdef KMSAN void *l_kmsan; /* !: KMSAN private data. */ #endif @@ -224,6 +226,19 @@ struct lwp { #endif }; +#ifdef _KERNEL +static void __noinline __unused +BIGUNLOCKED(struct lwp *l) +{ + +#ifdef _HARDKERNEL + KASSERTMSG(l->l_biglocks == -1, "last bigunlocked from %d at %p", + l->l_biglocks, l->l_bigunlocked); + l->l_bigunlocked = __builtin_return_address(0); +#endif +} +#endif + /* * UAREA_PCB_OFFSET: an offset of PCB structure in the uarea. MD code may * define it in , to indicate a different uarea layout. diff --git a/sys/sys/sleepq.h b/sys/sys/sleepq.h index 42b0db2fceae..77e9e77e2826 100644 --- a/sys/sys/sleepq.h +++ b/sys/sys/sleepq.h @@ -95,6 +95,7 @@ sleepq_enter(sleepq_t *sq, lwp_t *l, kmutex_t *mp) */ lwp_lock(l); lwp_unlock_to(l, mp); + BIGUNLOCKED(l); KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks); } #endif