Index: arch/amd64/amd64/genassym.cf =================================================================== RCS file: /cvsroot/src/sys/arch/amd64/amd64/genassym.cf,v retrieving revision 1.80.2.3 diff -u -p -r1.80.2.3 genassym.cf --- arch/amd64/amd64/genassym.cf 22 Jan 2020 11:40:16 -0000 1.80.2.3 +++ arch/amd64/amd64/genassym.cf 25 Jan 2020 21:39:05 -0000 @@ -78,7 +78,6 @@ include include include include -include include include include @@ -346,15 +345,6 @@ define MTX_IPL offsetof(struct kmutex, define MTX_LOCK offsetof(struct kmutex, u.s.mtxs_lock) define MTX_OWNER offsetof(struct kmutex, u.mtxa_owner) -define RW_OWNER offsetof(struct krwlock, rw_owner) -define RW_WRITE_LOCKED RW_WRITE_LOCKED -define RW_WRITE_WANTED RW_WRITE_WANTED -define RW_READ_INCR RW_READ_INCR -define RW_HAS_WAITERS RW_HAS_WAITERS -define RW_THREAD RW_THREAD -define RW_READER RW_READER -define RW_WRITER RW_WRITER - define EV_COUNT offsetof(struct evcnt, ev_count) define OPTERON_MSR_PASSCODE OPTERON_MSR_PASSCODE Index: arch/amd64/amd64/lock_stubs.S =================================================================== RCS file: /cvsroot/src/sys/arch/amd64/amd64/lock_stubs.S,v retrieving revision 1.35.2.2 diff -u -p -r1.35.2.2 lock_stubs.S --- arch/amd64/amd64/lock_stubs.S 22 Jan 2020 11:40:16 -0000 1.35.2.2 +++ arch/amd64/amd64/lock_stubs.S 25 Jan 2020 21:39:05 -0000 @@ -185,126 +185,6 @@ ENTRY(mutex_spin_exit) END(mutex_spin_exit) -/* - * void rw_enter(krwlock_t *rwl, krw_t op); - * - * Acquire one hold on a RW lock. - */ -ENTRY(rw_enter) - cmpl $RW_READER, %esi - jne 2f - - /* - * Reader: this is the most common case. - */ - movq (%rdi), %rax -0: - testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al - jnz 3f - leaq RW_READ_INCR(%rax), %rdx - LOCK - cmpxchgq %rdx, (%rdi) - jnz 1f - RET -1: - jmp 0b - - /* - * Writer: if the compare-and-set fails, don't bother retrying. - */ -2: movq CPUVAR(CURLWP), %rcx - xorq %rax, %rax - orq $RW_WRITE_LOCKED, %rcx - LOCK - cmpxchgq %rcx, (%rdi) - jnz 3f - RET -3: - jmp _C_LABEL(rw_vector_enter) -END(rw_enter) - -/* - * void rw_exit(krwlock_t *rwl); - * - * Release one hold on a RW lock. - */ -ENTRY(rw_exit) - movq (%rdi), %rax - testb $RW_WRITE_LOCKED, %al - jnz 2f - - /* - * Reader - */ -0: testb $RW_HAS_WAITERS, %al - jnz 3f - cmpq $RW_READ_INCR, %rax - jb 3f - leaq -RW_READ_INCR(%rax), %rdx - LOCK - cmpxchgq %rdx, (%rdi) - jnz 1f - ret -1: - jmp 0b - - /* - * Writer - */ -2: leaq -RW_WRITE_LOCKED(%rax), %rdx - subq CPUVAR(CURLWP), %rdx - jnz 3f - LOCK - cmpxchgq %rdx, (%rdi) - jnz 3f - ret - -3: jmp _C_LABEL(rw_vector_exit) -END(rw_exit) - -/* - * int rw_tryenter(krwlock_t *rwl, krw_t op); - * - * Try to acquire one hold on a RW lock. - */ -ENTRY(rw_tryenter) - cmpl $RW_READER, %esi - jne 2f - - /* - * Reader: this is the most common case. - */ - movq (%rdi), %rax -0: - testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al - jnz 4f - leaq RW_READ_INCR(%rax), %rdx - LOCK - cmpxchgq %rdx, (%rdi) - jnz 1f - movl %edx, %eax /* nonzero */ - RET -1: - jmp 0b - - /* - * Writer: if the compare-and-set fails, don't bother retrying. - */ -2: movq CPUVAR(CURLWP), %rcx - xorq %rax, %rax - orq $RW_WRITE_LOCKED, %rcx - LOCK - cmpxchgq %rcx, (%rdi) - movl $0, %eax - setz %al -3: - RET - ret -4: - xorl %eax, %eax - jmp 3b -END(rw_tryenter) - #endif /* LOCKDEBUG */ /* Index: arch/amd64/include/rwlock.h =================================================================== RCS file: /cvsroot/src/sys/arch/amd64/include/rwlock.h,v retrieving revision 1.2.144.2 diff -u -p -r1.2.144.2 rwlock.h --- arch/amd64/include/rwlock.h 22 Jan 2020 11:40:16 -0000 1.2.144.2 +++ arch/amd64/include/rwlock.h 25 Jan 2020 21:39:05 -0000 @@ -1,3 +0,0 @@ -/* $NetBSD: rwlock.h,v 1.2.144.2 2020/01/22 11:40:16 ad Exp $ */ - -#include Index: kern/kern_rwlock.c =================================================================== RCS file: /cvsroot/src/sys/kern/kern_rwlock.c,v retrieving revision 1.59.2.4 diff -u -p -r1.59.2.4 kern_rwlock.c --- kern/kern_rwlock.c 22 Jan 2020 11:40:17 -0000 1.59.2.4 +++ kern/kern_rwlock.c 25 Jan 2020 21:39:11 -0000 @@ -59,8 +59,6 @@ __KERNEL_RCSID(0, "$NetBSD: kern_rwlock. #include -#include - /* * LOCKDEBUG */ @@ -104,19 +102,6 @@ do { \ #define RW_MEMBAR_PRODUCER() membar_producer() #endif -/* - * For platforms that do not provide stubs, or for the LOCKDEBUG case. - */ -#ifdef LOCKDEBUG -#undef __HAVE_RW_STUBS -#endif - -#ifndef __HAVE_RW_STUBS -__strong_alias(rw_enter,rw_vector_enter); -__strong_alias(rw_exit,rw_vector_exit); -__strong_alias(rw_tryenter,rw_vector_tryenter); -#endif - static void rw_abort(const char *, size_t, krwlock_t *, const char *); static void rw_dump(const volatile void *, lockop_printer_t); static lwp_t *rw_owner(wchan_t); @@ -149,6 +134,22 @@ rw_cas(krwlock_t *rw, uintptr_t o, uintp } /* + * rw_and: + * + * Do an atomic AND on the lock word. + */ +static inline void +rw_and(krwlock_t *rw, uintptr_t m) +{ + +#ifdef _LP64 + atomic_and_64(&rw->rw_owner, m); +#else + atomic_and_32(&rw->rw_owner, m); +#endif +} + +/* * rw_swap: * * Do an atomic swap of the lock word. This is used only when it's @@ -167,6 +168,75 @@ rw_swap(krwlock_t *rw, uintptr_t o, uint } /* + * rw_enter_lwp: + * + * Helper - when acquring a lock, record the new hold. + */ +static inline uintptr_t +rw_enter_lwp(krwlock_t *rw, lwp_t *l) +{ + int i; + + KASSERT(kpreempt_disabled()); + + for (i = 0; i < __arraycount(l->l_rwlocks); i++) { + if (__predict_true(l->l_rwlocks[i] == NULL)) { + l->l_rwlocks[i] = rw; + /* + * Clear the write wanted flag on every acquire to + * give readers a chance once again. + */ + return ~RW_WRITE_WANTED; + } + } + + /* + * Nowhere to track the hold so we lose: temporarily disable + * spinning on the lock. + */ + return ~(RW_WRITE_WANTED | RW_SPIN); +} + +/* + * rw_exit_lwp: + * + * Helper - when releasing a lock, stop tracking the hold. + */ +static inline void +rw_exit_lwp(krwlock_t *rw, lwp_t *l) +{ + int i; + + KASSERT(kpreempt_disabled()); + + for (i = 0; i < __arraycount(l->l_rwlocks); i++) { + if (__predict_true(l->l_rwlocks[i] == rw)) { + l->l_rwlocks[i] = NULL; + return; + } + } +} + +/* + * rw_switch: + * + * Called by mi_switch() to indicate that an LWP is going off the CPU. + */ +void +rw_switch(void) +{ + lwp_t *l = curlwp; + int i; + + for (i = 0; i < __arraycount(l->l_rwlocks); i++) { + if (l->l_rwlocks[i] != NULL) { + rw_and(l->l_rwlocks[i], ~RW_SPIN); + /* Leave in place for exit to clear. */ + } + } +} + +/* * rw_dump: * * Dump the contents of a rwlock structure. @@ -206,15 +276,10 @@ void _rw_init(krwlock_t *rw, uintptr_t return_address) { -#ifdef LOCKDEBUG - /* XXX only because the assembly stubs can't handle RW_NODEBUG */ if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address)) - rw->rw_owner = 0; + rw->rw_owner = RW_SPIN; else - rw->rw_owner = RW_NODEBUG; -#else - rw->rw_owner = 0; -#endif + rw->rw_owner = RW_SPIN | RW_NODEBUG; } void @@ -233,55 +298,20 @@ void rw_destroy(krwlock_t *rw) { - RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0); + RW_ASSERT(rw, (rw->rw_owner & ~(RW_NODEBUG | RW_SPIN)) == 0); LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw); } /* - * rw_oncpu: - * - * Return true if an rwlock owner is running on a CPU in the system. - * If the target is waiting on the kernel big lock, then we must - * release it. This is necessary to avoid deadlock. - */ -static bool -rw_oncpu(uintptr_t owner) -{ -#ifdef MULTIPROCESSOR - struct cpu_info *ci; - lwp_t *l; - - KASSERT(kpreempt_disabled()); - - if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) { - return false; - } - - /* - * See lwp_dtor() why dereference of the LWP pointer is safe. - * We must have kernel preemption disabled for that. - */ - l = (lwp_t *)(owner & RW_THREAD); - ci = l->l_cpu; - - if (ci && ci->ci_curlwp == l) { - /* Target is running; do we need to block? */ - return (ci->ci_biglock_wanted != l); - } -#endif - /* Not running. It may be safe to block now. */ - return false; -} - -/* * rw_vector_enter: * - * Acquire a rwlock. + * The slow path for acquiring a rwlock, that considers all conditions. + * Marked __noinline to prevent the compiler pulling it into rw_enter(). */ -void -rw_vector_enter(krwlock_t *rw, const krw_t op) +static void __noinline +rw_vector_enter(krwlock_t *rw, const krw_t op, uintptr_t mask, uintptr_t ra) { - uintptr_t owner, incr, need_wait, set_wait, curthread, next; + uintptr_t owner, incr, need_wait, set_wait, next; turnstile_t *ts; int queue; lwp_t *l; @@ -291,12 +321,8 @@ rw_vector_enter(krwlock_t *rw, const krw LOCKSTAT_COUNTER(spincnt); LOCKSTAT_FLAG(lsflag); - l = curlwp; - curthread = (uintptr_t)l; - RW_ASSERT(rw, !cpu_intr_p()); - RW_ASSERT(rw, curthread != 0); - RW_WANTLOCK(rw, op); + RW_ASSERT(rw, kpreempt_disabled()); if (panicstr == NULL) { KDASSERT(pserialize_not_in_read_section()); @@ -312,6 +338,7 @@ rw_vector_enter(krwlock_t *rw, const krw * therefore we can use an add operation to set them, which * means an add operation for both cases. */ + l = curlwp; if (__predict_true(op == RW_READER)) { incr = RW_READ_INCR; set_wait = RW_HAS_WAITERS; @@ -319,7 +346,7 @@ rw_vector_enter(krwlock_t *rw, const krw queue = TS_READER_Q; } else { RW_ASSERT(rw, op == RW_WRITER); - incr = curthread | RW_WRITE_LOCKED; + incr = (uintptr_t)l | RW_WRITE_LOCKED; set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED; need_wait = RW_WRITE_LOCKED | RW_THREAD; queue = TS_WRITER_Q; @@ -327,15 +354,13 @@ rw_vector_enter(krwlock_t *rw, const krw LOCKSTAT_ENTER(lsflag); - KPREEMPT_DISABLE(curlwp); for (owner = rw->rw_owner;;) { /* * Read the lock owner field. If the need-to-wait * indicator is clear, then try to acquire the lock. */ if ((owner & need_wait) == 0) { - next = rw_cas(rw, owner, (owner + incr) & - ~RW_WRITE_WANTED); + next = rw_cas(rw, owner, (owner + incr) & mask); if (__predict_true(next == owner)) { /* Got it! */ RW_MEMBAR_ENTER(); @@ -349,15 +374,50 @@ rw_vector_enter(krwlock_t *rw, const krw owner = next; continue; } - if (__predict_false(RW_OWNER(rw) == curthread)) { + if (__predict_false(RW_OWNER(rw) == (uintptr_t)l)) { rw_abort(__func__, __LINE__, rw, "locking against myself"); } + /* - * If the lock owner is running on another CPU, and - * there are no existing waiters, then spin. + * If the lock owner is running on another CPU, and there + * are no existing waiters, then spin. Notes: + * + * 1) If an LWP on this CPU (possibly curlwp, or an LWP that + * curlwp has interupted) holds kernel_lock, we can't spin + * without a deadlock. The CPU that holds the rwlock may be + * blocked trying to acquire kernel_lock, or there may be an + * unseen chain of dependant locks. To defeat the potential + * deadlock, this LWP needs to sleep (and thereby directly + * drop the kernel_lock, or permit the interrupted LWP that + * holds kernel_lock to complete its work). + * + * 2) If trying to acquire a write lock, and the lock is + * currently read held, after a brief wait set the write + * wanted bit to block out new readers and try to avoid + * starvation. When the hold is acquired, we'll clear the + * WRITE_WANTED flag to give readers a chance again. With + * luck this should nudge things in the direction of + * interleaving readers and writers when there is high + * contention. + * + * 3) The spin wait can't be done in soft interrupt context, + * because a lock holder could be pinned beneath the soft + * interrupt LWP (i.e. curlwp) on the same CPU. For the + * lock holder to make progress and release the lock, the + * soft interrupt needs to sleep. */ - if (rw_oncpu(owner)) { + if ((cpu_softintr_p() || curcpu()->ci_biglock_count != 0) && + (owner & RW_SPIN) != 0) { /* buzzkiller! */ + next = rw_cas(rw, owner, owner & ~RW_SPIN); + if (next != owner) { + owner = next; + continue; + } + owner &= ~RW_SPIN; + } + if ((owner & RW_SPIN) != 0) { + RW_ASSERT(rw, (owner & RW_HAS_WAITERS) == 0); LOCKSTAT_START_TIMER(lsflag, spintime); u_int count = SPINLOCK_BACKOFF_MIN; do { @@ -365,7 +425,18 @@ rw_vector_enter(krwlock_t *rw, const krw SPINLOCK_BACKOFF(count); KPREEMPT_DISABLE(curlwp); owner = rw->rw_owner; - } while (rw_oncpu(owner)); + if ((owner & need_wait) == 0) + break; + if (count < SPINLOCK_BACKOFF_MAX) + continue; + /* XXXAD suspicious of this. */ + if (op == RW_WRITER && + (owner & RW_WRITE_LOCKED) == 0 && + (owner & RW_WRITE_WANTED) == 0) { + owner = rw_cas(rw, owner, + owner | RW_WRITE_WANTED); + } + } while ((owner & RW_SPIN) != 0); LOCKSTAT_STOP_TIMER(lsflag, spintime); LOCKSTAT_COUNT(spincnt, 1); if ((owner & need_wait) == 0) @@ -379,17 +450,17 @@ rw_vector_enter(krwlock_t *rw, const krw ts = turnstile_lookup(rw); /* - * Mark the rwlock as having waiters. If the set fails, - * then we may not need to sleep and should spin again. - * Reload rw_owner because turnstile_lookup() may have - * spun on the turnstile chain lock. + * Mark the rwlock as having waiters, and disable spinning. + * If the set fails, then we may not need to sleep and + * should spin again. Reload rw_owner now that we own + * the turnstile chain lock. */ owner = rw->rw_owner; - if ((owner & need_wait) == 0 || rw_oncpu(owner)) { + if ((owner & need_wait) == 0) { turnstile_exit(rw); continue; } - next = rw_cas(rw, owner, owner | set_wait); + next = rw_cas(rw, owner, (owner | set_wait) & ~RW_SPIN); if (__predict_false(next != owner)) { turnstile_exit(rw); owner = next; @@ -405,43 +476,98 @@ rw_vector_enter(krwlock_t *rw, const krw * No need for a memory barrier because of context switch. * If not handed the lock, then spin again. */ - if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread) + if (op == RW_READER) break; - owner = rw->rw_owner; + if ((owner & RW_THREAD) == (uintptr_t)l) + break; } KPREEMPT_ENABLE(curlwp); LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime, - (l->l_rwcallsite != 0 ? l->l_rwcallsite : - (uintptr_t)__builtin_return_address(0))); + (l->l_rwcallsite != 0 ? l->l_rwcallsite : ra)); LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime, - (l->l_rwcallsite != 0 ? l->l_rwcallsite : - (uintptr_t)__builtin_return_address(0))); + (l->l_rwcallsite != 0 ? l->l_rwcallsite : ra)); LOCKSTAT_EXIT(lsflag); - RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || + RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == (uintptr_t)l) || (op == RW_READER && RW_COUNT(rw) != 0)); RW_LOCKED(rw, op); } /* - * rw_vector_exit: + * rw_enter: * - * Release a rwlock. + * The fast path for acquiring a lock that considers only the + * uncontended case. Falls back to rw_vector_enter(). */ void +rw_enter(krwlock_t *rw, const krw_t op) +{ + uintptr_t owner, incr, need_wait, next, mask; + lwp_t *l; + + RW_ASSERT(rw, !cpu_intr_p()); + RW_ASSERT(rw, curlwp != NULL); + RW_WANTLOCK(rw, op); + + l = curlwp; + KPREEMPT_DISABLE(l); + mask = rw_enter_lwp(rw, l); + + /* + * We play a slight trick here. If we're a reader, we want + * increment the read count. If we're a writer, we want to + * set the owner field and the WRITE_LOCKED bit. + * + * In the latter case, we expect those bits to be zero, + * therefore we can use an add operation to set them, which + * means an add operation for both cases. + */ + if (__predict_true(op == RW_READER)) { + incr = RW_READ_INCR; + need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; + } else { + RW_ASSERT(rw, op == RW_WRITER); + incr = (uintptr_t)l | RW_WRITE_LOCKED; + need_wait = RW_WRITE_LOCKED | RW_THREAD; + } + + /* + * Read the lock owner field. If the need-to-wait + * indicator is clear, then try to acquire the lock. + */ + owner = rw->rw_owner; + if ((owner & need_wait) == 0) { + next = rw_cas(rw, owner, (owner + incr) & mask); + if (__predict_true(next == owner)) { + /* Got it! */ + RW_LOCKED(rw, op); + KPREEMPT_ENABLE(l); + RW_MEMBAR_ENTER(); + return; + } + } + + rw_vector_enter(rw, op, mask, (uintptr_t)__builtin_return_address(0)); +} + +/* + * rw_vector_exit: + * + * The slow path for releasing a rwlock, that considers all conditions. + * Marked __noinline to prevent the compiler pulling it into rw_enter(). + */ +static void __noinline rw_vector_exit(krwlock_t *rw) { - uintptr_t curthread, owner, decr, newown, next; + uintptr_t owner, decr, newown, next; turnstile_t *ts; int rcnt, wcnt; lwp_t *l; - l = curlwp; - curthread = (uintptr_t)l; - RW_ASSERT(rw, curthread != 0); + RW_ASSERT(rw, kpreempt_disabled()); /* * Again, we use a trick. Since we used an add operation to @@ -449,13 +575,12 @@ rw_vector_exit(krwlock_t *rw) * them, which makes the read-release and write-release path * the same. */ + l = curlwp; owner = rw->rw_owner; if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) { - RW_UNLOCKED(rw, RW_WRITER); - RW_ASSERT(rw, RW_OWNER(rw) == curthread); - decr = curthread | RW_WRITE_LOCKED; + RW_ASSERT(rw, RW_OWNER(rw) == (uintptr_t)l); + decr = (uintptr_t)l | RW_WRITE_LOCKED; } else { - RW_UNLOCKED(rw, RW_READER); RW_ASSERT(rw, RW_COUNT(rw) != 0); decr = RW_READ_INCR; } @@ -470,12 +595,21 @@ rw_vector_exit(krwlock_t *rw) newown = (owner - decr); if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS) break; + /* Want spinning enabled if lock is becoming free. */ + if ((newown & RW_THREAD) == 0) + newown |= RW_SPIN; next = rw_cas(rw, owner, newown); - if (__predict_true(next == owner)) + if (__predict_true(next == owner)) { + rw_exit_lwp(rw, l); + KPREEMPT_ENABLE(l); return; + } owner = next; } + /* If there are waiters, there can't be spinners. */ + RW_ASSERT(rw, (newown & RW_SPIN) == 0); + /* * Grab the turnstile chain lock. This gets the interlock * on the sleep queue. Once we have that, we can adjust the @@ -488,6 +622,7 @@ rw_vector_exit(krwlock_t *rw) wcnt = TS_WAITERS(ts, TS_WRITER_Q); rcnt = TS_WAITERS(ts, TS_READER_Q); + RW_ASSERT(rw, wcnt + rcnt > 0); /* * Give the lock away. @@ -500,19 +635,18 @@ rw_vector_exit(krwlock_t *rw) * set WRITE_WANTED to block out new readers, and let them * do the work of acquiring the lock in rw_vector_enter(). */ - if (rcnt == 0 || decr == RW_READ_INCR) { - RW_ASSERT(rw, wcnt != 0); + if (wcnt > 0 && (rcnt == 0 || decr == RW_READ_INCR)) { RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0); if (rcnt != 0) { /* Give the lock to the longest waiting writer. */ - l = TS_FIRST(ts, TS_WRITER_Q); - newown = (uintptr_t)l | (owner & RW_NODEBUG); + lwp_t *l2 = TS_FIRST(ts, TS_WRITER_Q); + newown = (uintptr_t)l2 | (owner & RW_NODEBUG); newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS; if (wcnt > 1) newown |= RW_WRITE_WANTED; rw_swap(rw, owner, newown); - turnstile_wakeup(ts, TS_WRITER_Q, 1, l); + turnstile_wakeup(ts, TS_WRITER_Q, 1, l2); } else { /* Wake all writers and let them fight it out. */ newown = owner & RW_NODEBUG; @@ -537,37 +671,94 @@ rw_vector_exit(krwlock_t *rw) rw_swap(rw, owner, newown); turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); } + rw_exit_lwp(rw, l); + KPREEMPT_ENABLE(l); } /* - * rw_vector_tryenter: + * rw_exit: + * + * The fast path for releasing a lock that considers only the + * uncontended case. Falls back to rw_vector_exit(). + */ +void +rw_exit(krwlock_t *rw) +{ + uintptr_t owner, decr, newown, next; + lwp_t *l; + + RW_ASSERT(rw, curlwp != NULL); + + /* + * Again, we use a trick. Since we used an add operation to + * set the required lock bits, we can use a subtract to clear + * them, which makes the read-release and write-release path + * the same. + */ + l = curlwp; + owner = rw->rw_owner; + if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) { + RW_UNLOCKED(rw, RW_WRITER); + RW_ASSERT(rw, RW_OWNER(rw) == (uintptr_t)l); + decr = (uintptr_t)l | RW_WRITE_LOCKED; + } else { + RW_UNLOCKED(rw, RW_READER); + RW_ASSERT(rw, RW_COUNT(rw) != 0); + decr = RW_READ_INCR; + } + + /* Now try to release it. */ + RW_MEMBAR_EXIT(); + KPREEMPT_DISABLE(l); + newown = (owner - decr); + if (__predict_true((newown & (RW_THREAD | RW_HAS_WAITERS)) != + RW_HAS_WAITERS)) { + /* Want spinning (re-)enabled if lock is becoming free. */ + if ((newown & RW_THREAD) == 0) + newown |= RW_SPIN; + next = rw_cas(rw, owner, newown); + if (__predict_true(next == owner)) { + rw_exit_lwp(rw, l); + KPREEMPT_ENABLE(l); + return; + } + } + rw_vector_exit(rw); +} + +/* + * rw_tryenter: * * Try to acquire a rwlock. */ int -rw_vector_tryenter(krwlock_t *rw, const krw_t op) +rw_tryenter(krwlock_t *rw, const krw_t op) { - uintptr_t curthread, owner, incr, need_wait, next; + uintptr_t owner, incr, need_wait, next, mask; lwp_t *l; - l = curlwp; - curthread = (uintptr_t)l; + RW_ASSERT(rw, curlwp != NULL); - RW_ASSERT(rw, curthread != 0); + l = curlwp; + KPREEMPT_DISABLE(l); + mask = rw_enter_lwp(rw, l); if (op == RW_READER) { incr = RW_READ_INCR; need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; } else { RW_ASSERT(rw, op == RW_WRITER); - incr = curthread | RW_WRITE_LOCKED; + incr = (uintptr_t)l | RW_WRITE_LOCKED; need_wait = RW_WRITE_LOCKED | RW_THREAD; } for (owner = rw->rw_owner;; owner = next) { - if (__predict_false((owner & need_wait) != 0)) + if (__predict_false((owner & need_wait) != 0)) { + rw_exit_lwp(rw, l); + KPREEMPT_ENABLE(l); return 0; - next = rw_cas(rw, owner, owner + incr); + } + next = rw_cas(rw, owner, (owner + incr) & mask); if (__predict_true(next == owner)) { /* Got it! */ break; @@ -576,9 +767,10 @@ rw_vector_tryenter(krwlock_t *rw, const RW_WANTLOCK(rw, op); RW_LOCKED(rw, op); - RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || + RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == (uintptr_t)l) || (op == RW_READER && RW_COUNT(rw) != 0)); + KPREEMPT_ENABLE(l); RW_MEMBAR_ENTER(); return 1; } @@ -591,20 +783,14 @@ rw_vector_tryenter(krwlock_t *rw, const void rw_downgrade(krwlock_t *rw) { - uintptr_t owner, curthread, newown, next; + uintptr_t owner, newown, next; turnstile_t *ts; int rcnt, wcnt; - lwp_t *l; - l = curlwp; - curthread = (uintptr_t)l; - RW_ASSERT(rw, curthread != 0); + RW_ASSERT(rw, curlwp != NULL); RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0); - RW_ASSERT(rw, RW_OWNER(rw) == curthread); + RW_ASSERT(rw, RW_OWNER(rw) == (uintptr_t)curlwp); RW_UNLOCKED(rw, RW_WRITER); -#if !defined(DIAGNOSTIC) - __USE(curthread); -#endif RW_MEMBAR_PRODUCER(); @@ -643,7 +829,8 @@ rw_downgrade(krwlock_t *rw) /* * If there are no readers, just preserve the * waiters bits, swap us down to one read hold and - * return. + * return. Don't set the spin bit as nobody's + * running yet. */ RW_ASSERT(rw, wcnt != 0); RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0); @@ -661,7 +848,8 @@ rw_downgrade(krwlock_t *rw) * Give the lock to all blocked readers. We may * retain one read hold if downgrading. If there is * a writer waiting, new readers will be blocked - * out. + * out. Don't set the spin bit as nobody's running + * yet. */ newown = owner & RW_NODEBUG; newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR; @@ -693,16 +881,15 @@ rw_downgrade(krwlock_t *rw) int rw_tryupgrade(krwlock_t *rw) { - uintptr_t owner, curthread, newown, next; + uintptr_t owner, newown, next; struct lwp *l; - l = curlwp; - curthread = (uintptr_t)l; - RW_ASSERT(rw, curthread != 0); + RW_ASSERT(rw, curlwp != NULL); RW_ASSERT(rw, rw_read_held(rw)); + l = curlwp; for (owner = RW_READ_INCR;; owner = next) { - newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD); + newown = (uintptr_t)l | RW_WRITE_LOCKED | (owner & ~RW_THREAD); next = rw_cas(rw, owner, newown); if (__predict_true(next == owner)) { RW_MEMBAR_PRODUCER(); @@ -719,7 +906,7 @@ rw_tryupgrade(krwlock_t *rw) RW_WANTLOCK(rw, RW_WRITER); RW_LOCKED(rw, RW_WRITER); RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED); - RW_ASSERT(rw, RW_OWNER(rw) == curthread); + RW_ASSERT(rw, RW_OWNER(rw) == (uintptr_t)l); return 1; } @@ -796,22 +983,14 @@ rw_owner(wchan_t obj) /* * rw_owner_running: * - * Return true if a RW lock is unheld, or write held and the owner is - * running on a CPU. For the pagedaemon. + * Return true if a RW lock is unheld, or held and the owner is running + * on a CPU. For the pagedaemon only - do not document or use in other + * code. */ bool rw_owner_running(const krwlock_t *rw) { -#ifdef MULTIPROCESSOR - uintptr_t owner; - bool rv; + uintptr_t owner = rw->rw_owner; - kpreempt_disable(); - owner = rw->rw_owner; - rv = (owner & RW_THREAD) == 0 || rw_oncpu(owner); - kpreempt_enable(); - return rv; -#else - return rw_owner(rw) == curlwp; -#endif + return (owner & RW_THREAD) == 0 || (owner & RW_SPIN) != 0; } Index: kern/kern_synch.c =================================================================== RCS file: /cvsroot/src/sys/kern/kern_synch.c,v retrieving revision 1.334.2.3 diff -u -p -r1.334.2.3 kern_synch.c --- kern/kern_synch.c 23 Jan 2020 12:17:08 -0000 1.334.2.3 +++ kern/kern_synch.c 25 Jan 2020 21:39:11 -0000 @@ -658,6 +658,9 @@ mi_switch(lwp_t *l) /* We're down to only one lock, so do debug checks. */ LOCKDEBUG_BARRIER(l->l_mutex, 1); + /* Disable spinning on any R/W locks that we hold. */ + rw_switch(); + /* Count the context switch. */ CPU_COUNT(CPU_COUNT_NSWTCH, 1); l->l_ncsw++; Index: sys/lwp.h =================================================================== RCS file: /cvsroot/src/sys/sys/lwp.h,v retrieving revision 1.192.2.3 diff -u -p -r1.192.2.3 lwp.h --- sys/lwp.h 22 Jan 2020 11:40:17 -0000 1.192.2.3 +++ sys/lwp.h 25 Jan 2020 21:39:12 -0000 @@ -186,6 +186,7 @@ struct lwp { u_short l_exlocks; /* !: lockdebug: excl. locks held */ u_short l_psrefs; /* !: count of psref held */ u_short l_blcnt; /* !: count of kernel_lock held */ + struct krwlock *l_rwlocks[4]; /* !: tracks first N held rwlocks */ int l_nopreempt; /* !: don't preempt me! */ u_int l_dopreempt; /* s: kernel preemption pending */ int l_pflag; /* !: LWP private flags */ Index: sys/rwlock.h =================================================================== RCS file: /cvsroot/src/sys/sys/rwlock.h,v retrieving revision 1.12.2.2 diff -u -p -r1.12.2.2 rwlock.h --- sys/rwlock.h 22 Jan 2020 11:40:17 -0000 1.12.2.2 +++ sys/rwlock.h 25 Jan 2020 21:39:12 -0000 @@ -71,6 +71,7 @@ typedef struct krwlock krwlock_t; #define RW_HAS_WAITERS 0x01UL /* lock has waiters */ #define RW_WRITE_WANTED 0x02UL /* >= 1 waiter is a writer */ #define RW_WRITE_LOCKED 0x04UL /* lock is currently write locked */ +#define RW_SPIN 0x08UL #define RW_NODEBUG 0x10UL /* LOCKDEBUG disabled */ #define RW_READ_COUNT_SHIFT 5 @@ -80,9 +81,6 @@ typedef struct krwlock krwlock_t; #define RW_COUNT(rw) ((rw)->rw_owner & RW_THREAD) #define RW_FLAGS(rw) ((rw)->rw_owner & ~RW_THREAD) -void rw_vector_enter(krwlock_t *, const krw_t); -void rw_vector_exit(krwlock_t *); -int rw_vector_tryenter(krwlock_t *, const krw_t); void _rw_init(krwlock_t *, uintptr_t); bool rw_owner_running(const krwlock_t *); #endif /* __RWLOCK_PRIVATE */ @@ -107,6 +105,8 @@ int rw_lock_held(krwlock_t *); void rw_enter(krwlock_t *, const krw_t); void rw_exit(krwlock_t *); +void rw_switch(void); + void rw_obj_init(void); krwlock_t *rw_obj_alloc(void); void rw_obj_hold(krwlock_t *);