Index: compat/common/kern_50.c =================================================================== RCS file: /cvsroot/src/sys/compat/common/kern_50.c,v retrieving revision 1.2 diff -u -p -r1.2 kern_50.c --- compat/common/kern_50.c 27 Jan 2019 02:08:39 -0000 1.2 +++ compat/common/kern_50.c 28 Jan 2020 22:36:25 -0000 @@ -1,7 +1,7 @@ /* $NetBSD: kern_50.c,v 1.2 2019/01/27 02:08:39 pgoyette Exp $ */ /*- - * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. + * Copyright (c) 2008, 2009, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -80,12 +80,12 @@ compat_50_sys__lwp_park(struct lwp *l, } if (SCARG(uap, unpark) != 0) { - error = lwp_unpark(SCARG(uap, unpark), SCARG(uap, unparkhint)); + error = lwp_unpark(&SCARG(uap, unpark), 1); if (error != 0) return error; } - return lwp_park(CLOCK_REALTIME, TIMER_ABSTIME, tsp, SCARG(uap, hint)); + return lwp_park(CLOCK_REALTIME, TIMER_ABSTIME, tsp); } static int Index: compat/common/kern_time_60.c =================================================================== RCS file: /cvsroot/src/sys/compat/common/kern_time_60.c,v retrieving revision 1.2 diff -u -p -r1.2 kern_time_60.c --- compat/common/kern_time_60.c 27 Jan 2019 02:08:39 -0000 1.2 +++ compat/common/kern_time_60.c 28 Jan 2020 22:36:25 -0000 @@ -1,7 +1,7 @@ /* $NetBSD: kern_time_60.c,v 1.2 2019/01/27 02:08:39 pgoyette Exp $ */ /*- - * Copyright (c) 2013 The NetBSD Foundation, Inc. + * Copyright (c) 2013, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -75,12 +75,12 @@ compat_60_sys__lwp_park(struct lwp *l, } if (SCARG(uap, unpark) != 0) { - error = lwp_unpark(SCARG(uap, unpark), SCARG(uap, unparkhint)); + error = lwp_unpark(&SCARG(uap, unpark), 1); if (error != 0) return error; } - return lwp_park(CLOCK_REALTIME, TIMER_ABSTIME, tsp, SCARG(uap, hint)); + return lwp_park(CLOCK_REALTIME, TIMER_ABSTIME, tsp); } int Index: compat/netbsd32/netbsd32_compat_50.c =================================================================== RCS file: /cvsroot/src/sys/compat/netbsd32/netbsd32_compat_50.c,v retrieving revision 1.44 diff -u -p -r1.44 netbsd32_compat_50.c --- compat/netbsd32/netbsd32_compat_50.c 1 Jan 2020 14:52:38 -0000 1.44 +++ compat/netbsd32/netbsd32_compat_50.c 28 Jan 2020 22:36:25 -0000 @@ -1,7 +1,7 @@ /* $NetBSD: netbsd32_compat_50.c,v 1.44 2020/01/01 14:52:38 maxv Exp $ */ /*- - * Copyright (c) 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -564,14 +564,12 @@ compat_50_netbsd32__lwp_park(struct lwp } if (SCARG(uap, unpark) != 0) { - error = lwp_unpark(SCARG(uap, unpark), - SCARG_P32(uap, unparkhint)); + error = lwp_unpark(&SCARG(uap, unpark), 1); if (error != 0) return error; } - return lwp_park(CLOCK_REALTIME, TIMER_ABSTIME, tsp, - SCARG_P32(uap, hint)); + return lwp_park(CLOCK_REALTIME, TIMER_ABSTIME, tsp); } static int Index: compat/netbsd32/netbsd32_compat_60.c =================================================================== RCS file: /cvsroot/src/sys/compat/netbsd32/netbsd32_compat_60.c,v retrieving revision 1.5 diff -u -p -r1.5 netbsd32_compat_60.c --- compat/netbsd32/netbsd32_compat_60.c 15 Dec 2019 16:48:26 -0000 1.5 +++ compat/netbsd32/netbsd32_compat_60.c 28 Jan 2020 22:36:25 -0000 @@ -1,7 +1,7 @@ /* $NetBSD: netbsd32_compat_60.c,v 1.5 2019/12/15 16:48:26 tsutsui Exp $ */ /*- - * Copyright (c) 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -70,14 +70,12 @@ compat_60_netbsd32__lwp_park(struct lwp } if (SCARG(uap, unpark) != 0) { - error = lwp_unpark(SCARG(uap, unpark), - SCARG_P32(uap, unparkhint)); + error = lwp_unpark(&SCARG(uap, unpark), 1); if (error != 0) return error; } - return lwp_park(CLOCK_REALTIME, TIMER_ABSTIME, tsp, - SCARG_P32(uap, hint)); + return lwp_park(CLOCK_REALTIME, TIMER_ABSTIME, tsp); } static struct syscall_package compat_netbsd32_60_syscalls[] = { Index: compat/netbsd32/netbsd32_lwp.c =================================================================== RCS file: /cvsroot/src/sys/compat/netbsd32/netbsd32_lwp.c,v retrieving revision 1.21 diff -u -p -r1.21 netbsd32_lwp.c --- compat/netbsd32/netbsd32_lwp.c 26 Jan 2020 19:08:09 -0000 1.21 +++ compat/netbsd32/netbsd32_lwp.c 28 Jan 2020 22:36:25 -0000 @@ -183,14 +183,12 @@ netbsd32____lwp_park60(struct lwp *l, } if (SCARG(uap, unpark) != 0) { - error = lwp_unpark(SCARG(uap, unpark), - SCARG_P32(uap, unparkhint)); + error = lwp_unpark(&SCARG(uap, unpark), 1); if (error != 0) return error; } - return lwp_park(SCARG(uap, clock_id), SCARG(uap, flags), tsp, - SCARG_P32(uap, hint)); + return lwp_park(SCARG(uap, clock_id), SCARG(uap, flags), tsp); } int Index: kern/kern_exec.c =================================================================== RCS file: /cvsroot/src/sys/kern/kern_exec.c,v retrieving revision 1.489 diff -u -p -r1.489 kern_exec.c --- kern/kern_exec.c 23 Jan 2020 10:05:44 -0000 1.489 +++ kern/kern_exec.c 28 Jan 2020 22:36:27 -0000 @@ -1141,13 +1141,30 @@ emulexec(struct lwp *l, struct exec_pack (*p->p_emul->e_proc_exit)(p); /* - * This is now LWP 1. + * This is now LWP 1. Don't bother with p_treelock here as this is + * the only live LWP in the proc right now. */ - /* XXX elsewhere */ - mutex_enter(p->p_lock); - p->p_nlwpid = 1; - l->l_lid = 1; - mutex_exit(p->p_lock); + while (__predict_false(l->l_lid != 1)) { + lwp_t *l2 __diagused; + int error; + + mutex_enter(p->p_lock); + error = radix_tree_insert_node(&p->p_lwptree, 1 - 1, l); + if (error == 0) { + l2 = radix_tree_remove_node(&p->p_lwptree, + (uint64_t)(l->l_lid - 1)); + KASSERT(l2 == l); + p->p_nlwpid = 2; + l->l_lid = 1; + } + mutex_exit(p->p_lock); + + if (error == 0) + break; + + KASSERT(error == ENOMEM); + radix_tree_await_memory(); + } /* * Call exec hook. Emulation code may NOT store reference to anything @@ -2526,6 +2543,7 @@ do_posix_spawn(struct lwp *l1, pid_t *pi mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE); rw_init(&p2->p_reflock); + rw_init(&p2->p_treelock); cv_init(&p2->p_waitcv, "wait"); cv_init(&p2->p_lwpcv, "lwpwait"); Index: kern/kern_exit.c =================================================================== RCS file: /cvsroot/src/sys/kern/kern_exit.c,v retrieving revision 1.281 diff -u -p -r1.281 kern_exit.c --- kern/kern_exit.c 27 Jan 2020 21:09:33 -0000 1.281 +++ kern/kern_exit.c 28 Jan 2020 22:36:27 -0000 @@ -202,6 +202,7 @@ exit1(struct lwp *l, int exitcode, int s ksiginfo_t ksi; ksiginfoq_t kq; int wakeinit; + struct lwp *l2 __diagused; p = l->l_proc; @@ -565,6 +566,11 @@ exit1(struct lwp *l, int exitcode, int s p->p_nrlwps--; p->p_nzlwps++; p->p_ndlwps = 0; + /* Don't bother with p_treelock as no other LWPs remain. */ + l2 = radix_tree_remove_node(&p->p_lwptree, (uint64_t)(l->l_lid - 1)); + KASSERT(l2 == l); + KASSERT(radix_tree_empty_tree_p(&p->p_lwptree)); + radix_tree_fini_tree(&p->p_lwptree); mutex_exit(p->p_lock); /* @@ -1256,6 +1262,7 @@ proc_free(struct proc *p, struct wrusage cv_destroy(&p->p_waitcv); cv_destroy(&p->p_lwpcv); rw_destroy(&p->p_reflock); + rw_destroy(&p->p_treelock); proc_free_mem(p); } Index: kern/kern_fork.c =================================================================== RCS file: /cvsroot/src/sys/kern/kern_fork.c,v retrieving revision 1.217 diff -u -p -r1.217 kern_fork.c --- kern/kern_fork.c 16 Dec 2019 22:47:54 -0000 1.217 +++ kern/kern_fork.c 28 Jan 2020 22:36:27 -0000 @@ -327,6 +327,7 @@ fork1(struct lwp *l1, int flags, int exi LIST_INIT(&p2->p_lwps); LIST_INIT(&p2->p_sigwaiters); + radix_tree_init_tree(&p2->p_lwptree); /* * Duplicate sub-structures as needed. @@ -353,6 +354,7 @@ fork1(struct lwp *l1, int flags, int exi mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE); rw_init(&p2->p_reflock); + rw_init(&p2->p_treelock); cv_init(&p2->p_waitcv, "wait"); cv_init(&p2->p_lwpcv, "lwpwait"); Index: kern/kern_lwp.c =================================================================== RCS file: /cvsroot/src/sys/kern/kern_lwp.c,v retrieving revision 1.222 diff -u -p -r1.222 kern_lwp.c --- kern/kern_lwp.c 27 Jan 2020 21:58:16 -0000 1.222 +++ kern/kern_lwp.c 28 Jan 2020 22:36:27 -0000 @@ -331,7 +331,6 @@ lwpinit(void) LIST_INIT(&alllwp); lwpinit_specificdata(); - lwp_sys_init(); lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0, "lwppl", NULL, IPL_NONE, NULL, lwp_dtor, NULL); @@ -538,7 +537,7 @@ lwp_wait(struct lwp *l, lwpid_t lid, lwp { const lwpid_t curlid = l->l_lid; proc_t *p = l->l_proc; - lwp_t *l2; + lwp_t *l2, *next; int error; KASSERT(mutex_owned(p->p_lock)); @@ -578,7 +577,29 @@ lwp_wait(struct lwp *l, lwpid_t lid, lwp */ nfound = 0; error = 0; - LIST_FOREACH(l2, &p->p_lwps, l_sibling) { + + /* + * If given a specific LID, go via the tree and make sure + * it's not detached. + */ + if (lid != 0) { + l2 = radix_tree_lookup_node(&p->p_lwptree, + (uint64_t)(lid - 1)); + if (l2 == NULL) { + error = ESRCH; + break; + } + KASSERT(l2->l_lid == lid); + if ((l2->l_prflag & LPR_DETACHED) != 0) { + error = EINVAL; + break; + } + } else { + l2 = LIST_FIRST(&p->p_lwps); + } + for (; l2 != NULL; l2 = next) { + next = (lid != 0 ? NULL : LIST_NEXT(l2, l_sibling)); + /* * If a specific wait and the target is waiting on * us, then avoid deadlock. This also traps LWPs @@ -599,8 +620,6 @@ lwp_wait(struct lwp *l, lwpid_t lid, lwp continue; } if (lid != 0) { - if (l2->l_lid != lid) - continue; /* * Mark this LWP as the first waiter, if there * is no other. @@ -686,13 +705,12 @@ lwp_wait(struct lwp *l, lwpid_t lid, lwp * so that they can re-check for zombies and for deadlock. */ if (lid != 0) { - LIST_FOREACH(l2, &p->p_lwps, l_sibling) { - if (l2->l_lid == lid) { - if (l2->l_waiter == curlid) - l2->l_waiter = 0; - break; - } - } + l2 = radix_tree_lookup_node(&p->p_lwptree, + (uint64_t)(lid - 1)); + KASSERT(l2 == NULL || l2->l_lid == lid); + + if (l2 != NULL && l2->l_waiter == curlid) + l2->l_waiter = 0; } p->p_nlwpwait--; l->l_waitingfor = 0; @@ -701,60 +719,42 @@ lwp_wait(struct lwp *l, lwpid_t lid, lwp return error; } +/* + * Find an unused LID for a new LWP. + */ static lwpid_t -lwp_find_free_lid(lwpid_t try_lid, lwp_t * new_lwp, proc_t *p) +lwp_find_free_lid(struct proc *p) { - #define LID_SCAN (1u << 31) - lwp_t *scan, *free_before; - lwpid_t nxt_lid; - - /* - * We want the first unused lid greater than or equal to - * try_lid (modulo 2^31). - * (If nothing else ld.elf_so doesn't want lwpid with the top bit set.) - * We must not return 0, and avoiding 'LID_SCAN - 1' makes - * the outer test easier. - * This would be much easier if the list were sorted in - * increasing order. - * The list is kept sorted in decreasing order. - * This code is only used after a process has generated 2^31 lwp. - * - * Code assumes it can always find an id. - */ + struct lwp *gang[32]; + lwpid_t lid; + unsigned n; - try_lid &= LID_SCAN - 1; - if (try_lid <= 1) - try_lid = 2; - - free_before = NULL; - nxt_lid = LID_SCAN - 1; - LIST_FOREACH(scan, &p->p_lwps, l_sibling) { - if (scan->l_lid != nxt_lid) { - /* There are available lid before this entry */ - free_before = scan; - if (try_lid > scan->l_lid) - break; - } - if (try_lid == scan->l_lid) { - /* The ideal lid is busy, take a higher one */ - if (free_before != NULL) { - try_lid = free_before->l_lid + 1; - break; - } - /* No higher ones, reuse low numbers */ - try_lid = 2; - } + KASSERT(mutex_owned(p->p_lock)); + KASSERT(p->p_nlwpid > 0); - nxt_lid = scan->l_lid - 1; - if (LIST_NEXT(scan, l_sibling) == NULL) { - /* The value we have is lower than any existing lwp */ - LIST_INSERT_AFTER(scan, new_lwp, l_sibling); - return try_lid; + /* + * Scoot forward through the tree in blocks of LIDs doing gang + * lookup with dense=true, meaning the lookup will terminate the + * instance a hole is encountered. The expectation is that most of + * the time, the very first entry (p_nlwpid) is empty, and the + * lookup will fail fast. + */ + for (lid = p->p_nlwpid;;) { + n = radix_tree_gang_lookup_node(&p->p_lwptree, lid - 1, + (void **)gang, __arraycount(gang), true); + if (n == 0) { + /* Start point was empty. */ + break; + } + KASSERT(gang[0]->l_lid == lid); + lid = gang[n - 1]->l_lid + 1; + if (n < __arraycount(gang)) { + /* Scan encountered a hole. */ + break; } } - LIST_INSERT_BEFORE(free_before, new_lwp, l_sibling); - return try_lid; + return (lwpid_t)lid; } /* @@ -896,20 +896,54 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_ uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2); if ((flags & LWP_PIDLID) != 0) { + /* Linux threads: use a PID. */ lid = proc_alloc_pid(p2); l2->l_pflag |= LP_PIDLID; } else if (p2->p_nlwps == 0) { - lid = l1->l_lid; /* - * Update next LWP ID, too. If this overflows to LID_SCAN, - * the slow path of scanning will be used for the next LWP. + * First LWP in process. Copy the parent's LID to avoid + * causing problems for fork() + threads. Don't give + * subsequent threads the distinction of using LID 1. */ - p2->p_nlwpid = lid + 1; + lid = l1->l_lid; + p2->p_nlwpid = 2; } else { + /* Scan the radix tree for a free LID. */ lid = 0; } + /* + * Allocate LID if needed, and insert into the radix tree. The + * first LWP in most processes has a LID of 1. It turns out that if + * you insert an item with a key of zero to a radixtree, it's stored + * directly in the root (p_lwptree) and no extra memory is + * allocated. We therefore always subtract 1 from the LID, which + * means no memory is allocated for the tree unless the program is + * using threads. NB: the allocation and insert must take place + * under the same hold of p_lock. + */ mutex_enter(p2->p_lock); + for (;;) { + int error; + + l2->l_lid = (lid == 0 ? lwp_find_free_lid(p2) : lid); + + rw_enter(&p2->p_treelock, RW_WRITER); + error = radix_tree_insert_node(&p2->p_lwptree, + (uint64_t)(l2->l_lid - 1), l2); + rw_exit(&p2->p_treelock); + + if (__predict_true(error == 0)) { + if (lid == 0) + p2->p_nlwpid = l2->l_lid + 1; + break; + } + + KASSERT(error == ENOMEM); + mutex_exit(p2->p_lock); + radix_tree_await_memory(); + mutex_enter(p2->p_lock); + } if ((flags & LWP_DETACHED) != 0) { l2->l_prflag = LPR_DETACHED; @@ -917,35 +951,23 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_ } else l2->l_prflag = 0; - if (l1->l_proc == p2) + if (l1->l_proc == p2) { + /* + * These flags are set while p_lock is held. Copy with + * p_lock held too, so the LWP doesn't sneak into the + * process without them being set. + */ l2->l_flag |= (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE)); - else + } else { + /* fork(): pending core/exit doesn't apply to child. */ l2->l_flag |= (l1->l_flag & LW_WREBOOT); + } l2->l_sigstk = *sigstk; l2->l_sigmask = *sigmask; TAILQ_INIT(&l2->l_sigpend.sp_info); sigemptyset(&l2->l_sigpend.sp_set); - - if (__predict_true(lid == 0)) { - /* - * XXX: l_lid are expected to be unique (for a process) - * if LWP_PIDLID is sometimes set this won't be true. - * Once 2^31 threads have been allocated we have to - * scan to ensure we allocate a unique value. - */ - lid = ++p2->p_nlwpid; - if (__predict_false(lid & LID_SCAN)) { - lid = lwp_find_free_lid(lid, l2, p2); - p2->p_nlwpid = lid | LID_SCAN; - /* l2 as been inserted into p_lwps in order */ - goto skip_insert; - } - p2->p_nlwpid = lid; - } LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling); - skip_insert: - l2->l_lid = lid; p2->p_nlwps++; p2->p_nrlwps++; @@ -964,6 +986,8 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_ } lwp_unlock(l1); } + + /* This marks the end of the "must be atomic" section. */ mutex_exit(p2->p_lock); SDT_PROBE(proc, kernel, , lwp__create, l2, 0, 0, 0, 0); @@ -1218,6 +1242,7 @@ lwp_free(struct lwp *l, bool recycle, bo { struct proc *p = l->l_proc; struct rusage *ru; + struct lwp *l2; ksiginfoq_t kq; KASSERT(l != curlwp); @@ -1233,8 +1258,8 @@ lwp_free(struct lwp *l, bool recycle, bo (void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1); /* - * If this was not the last LWP in the process, then adjust - * counters and unlock. + * If this was not the last LWP in the process, then adjust counters + * and unlock. This is done differently for the last LWP in exit1(). */ if (!last) { /* @@ -1253,6 +1278,15 @@ lwp_free(struct lwp *l, bool recycle, bo if ((l->l_prflag & LPR_DETACHED) != 0) p->p_ndlwps--; + /* Make note of the LID being free, and remove from tree. */ + if (l->l_lid < p->p_nlwpid) + p->p_nlwpid = l->l_lid; + rw_enter(&p->p_treelock, RW_WRITER); + l2 = radix_tree_remove_node(&p->p_lwptree, + (uint64_t)(l->l_lid - 1)); + KASSERT(l2 == l); + rw_exit(&p->p_treelock); + /* * Have any LWPs sleeping in lwp_wait() recheck for * deadlock. @@ -1423,7 +1457,8 @@ lwp_find2(pid_t pid, lwpid_t lid) /* * Look up a live LWP within the specified process. * - * Must be called with p->p_lock held. + * Must be called with p->p_lock held (as it looks at the radix tree, + * and also wants to exclude idle and zombie LWPs). */ struct lwp * lwp_find(struct proc *p, lwpid_t id) @@ -1432,10 +1467,8 @@ lwp_find(struct proc *p, lwpid_t id) KASSERT(mutex_owned(p->p_lock)); - LIST_FOREACH(l, &p->p_lwps, l_sibling) { - if (l->l_lid == id) - break; - } + l = radix_tree_lookup_node(&p->p_lwptree, (uint64_t)(id - 1)); + KASSERT(l == NULL || l->l_lid == id); /* * No need to lock - all of these conditions will Index: kern/kern_proc.c =================================================================== RCS file: /cvsroot/src/sys/kern/kern_proc.c,v retrieving revision 1.239 diff -u -p -r1.239 kern_proc.c --- kern/kern_proc.c 31 Dec 2019 13:07:13 -0000 1.239 +++ kern/kern_proc.c 28 Jan 2020 22:36:27 -0000 @@ -376,7 +376,7 @@ procinit(void) proc_specificdata_domain = specificdata_domain_create(); KASSERT(proc_specificdata_domain != NULL); - proc_cache = pool_cache_init(sizeof(struct proc), 0, 0, 0, + proc_cache = pool_cache_init(sizeof(struct proc), coherency_unit, 0, 0, "procpl", NULL, IPL_NONE, proc_ctor, NULL, NULL); proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS, @@ -440,6 +440,7 @@ proc0_init(void) struct pgrp *pg; struct rlimit *rlim; rlim_t lim; + int error __diagused; int i; p = &proc0; @@ -450,10 +451,15 @@ proc0_init(void) p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); rw_init(&p->p_reflock); + rw_init(&p->p_treelock); cv_init(&p->p_waitcv, "wait"); cv_init(&p->p_lwpcv, "lwpwait"); LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling); + radix_tree_init_tree(&p->p_lwptree); + error = radix_tree_insert_node(&p->p_lwptree, + (uint64_t)(lwp0.l_lid - 1), &lwp0); + KASSERT(error == 0); pid_table[0].pt_proc = p; LIST_INSERT_HEAD(&allproc, p, p_list); Index: kern/sys_lwp.c =================================================================== RCS file: /cvsroot/src/sys/kern/sys_lwp.c,v retrieving revision 1.73 diff -u -p -r1.73 sys_lwp.c --- kern/sys_lwp.c 26 Jan 2020 19:08:09 -0000 1.73 +++ kern/sys_lwp.c 28 Jan 2020 22:36:27 -0000 @@ -56,22 +56,14 @@ __KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v static const stack_t lwp_ss_init = SS_INIT; -static syncobj_t lwp_park_sobj = { - .sobj_flag = SOBJ_SLEEPQ_LIFO, +syncobj_t lwp_park_syncobj = { + .sobj_flag = SOBJ_SLEEPQ_NULL, .sobj_unsleep = sleepq_unsleep, .sobj_changepri = sleepq_changepri, .sobj_lendpri = sleepq_lendpri, .sobj_owner = syncobj_noowner, }; -static sleeptab_t lwp_park_tab; - -void -lwp_sys_init(void) -{ - sleeptab_init(&lwp_park_tab); -} - static void mi_startlwp(void *arg) { @@ -423,9 +415,9 @@ sys__lwp_detach(struct lwp *l, const str * We can't use lwp_find() here because the target might * be a zombie. */ - LIST_FOREACH(t, &p->p_lwps, l_sibling) - if (t->l_lid == target) - break; + t = radix_tree_lookup_node(&p->p_lwptree, + (uint64_t)(target - 1)); + KASSERT(t == NULL || t->l_lid == target); } /* @@ -463,79 +455,53 @@ sys__lwp_detach(struct lwp *l, const str return error; } -static inline wchan_t -lwp_park_wchan(struct proc *p, const void *hint) -{ - - return (wchan_t)((uintptr_t)p ^ (uintptr_t)hint); -} - int -lwp_unpark(lwpid_t target, const void *hint) +lwp_unpark(const lwpid_t *tp, const u_int ntargets) { - sleepq_t *sq; - wchan_t wchan; - kmutex_t *mp; + uint64_t id; + u_int target; + int error; proc_t *p; lwp_t *t; - /* - * Easy case: search for the LWP on the sleep queue. If - * it's parked, remove it from the queue and set running. - */ p = curproc; - wchan = lwp_park_wchan(p, hint); - sq = sleeptab_lookup(&lwp_park_tab, wchan, &mp); - - TAILQ_FOREACH(t, sq, l_sleepchain) - if (t->l_proc == p && t->l_lid == target) - break; - - if (__predict_true(t != NULL)) { - sleepq_remove(sq, t); - mutex_spin_exit(mp); - return 0; - } - - /* - * The LWP hasn't parked yet. Take the hit and mark the - * operation as pending. - */ - mutex_spin_exit(mp); - - mutex_enter(p->p_lock); - if ((t = lwp_find(p, target)) == NULL) { - mutex_exit(p->p_lock); - return ESRCH; - } + error = 0; - /* - * It may not have parked yet, we may have raced, or it - * is parked on a different user sync object. - */ - lwp_lock(t); - if (t->l_syncobj == &lwp_park_sobj) { - /* Releases the LWP lock. */ - lwp_unsleep(t, true); - } else { + rw_enter(&p->p_treelock, RW_READER); + for (target = 0; target < ntargets; target++) { /* - * Set the operation pending. The next call to _lwp_park - * will return early. + * We don't bother excluding zombies or idle LWPs here, as + * setting LW_UNPARKED on them won't do any harm. */ - t->l_flag |= LW_UNPARKED; - lwp_unlock(t); + id = (uint64_t)(tp[target] - 1); + t = radix_tree_lookup_node(&p->p_lwptree, id); + if (t == NULL) { + error = ESRCH; + continue; + } + + /* It may not have parked yet or we may have raced. */ + lwp_lock(t); + if (t->l_syncobj == &lwp_park_syncobj) { + /* Releases the LWP lock. */ + lwp_unsleep(t, true); + } else { + /* + * Set the operation pending. The next call to + * _lwp_park() will return early. + */ + t->l_flag |= LW_UNPARKED; + lwp_unlock(t); + } } + rw_exit(&p->p_treelock); - mutex_exit(p->p_lock); - return 0; + return error; } int -lwp_park(clockid_t clock_id, int flags, struct timespec *ts, const void *hint) +lwp_park(clockid_t clock_id, int flags, struct timespec *ts) { - sleepq_t *sq; - kmutex_t *mp; - wchan_t wchan; int timo, error; struct timespec start; lwp_t *l; @@ -550,25 +516,19 @@ lwp_park(clockid_t clock_id, int flags, timo = 0; } - /* Find and lock the sleep queue. */ - l = curlwp; - wchan = lwp_park_wchan(l->l_proc, hint); - sq = sleeptab_lookup(&lwp_park_tab, wchan, &mp); - /* * Before going the full route and blocking, check to see if an * unpark op is pending. */ + l = curlwp; lwp_lock(l); if ((l->l_flag & (LW_CANCELLED | LW_UNPARKED)) != 0) { l->l_flag &= ~(LW_CANCELLED | LW_UNPARKED); lwp_unlock(l); - mutex_spin_exit(mp); return EALREADY; } - lwp_unlock_to(l, mp); l->l_biglocks = 0; - sleepq_enqueue(sq, wchan, "parked", &lwp_park_sobj); + sleepq_enqueue(NULL, l, "parked", &lwp_park_syncobj); error = sleepq_block(timo, true); switch (error) { case EWOULDBLOCK: @@ -617,13 +577,12 @@ sys____lwp_park60(struct lwp *l, const s } if (SCARG(uap, unpark) != 0) { - error = lwp_unpark(SCARG(uap, unpark), SCARG(uap, unparkhint)); + error = lwp_unpark(&SCARG(uap, unpark), 1); if (error != 0) return error; } - error = lwp_park(SCARG(uap, clock_id), SCARG(uap, flags), tsp, - SCARG(uap, hint)); + error = lwp_park(SCARG(uap, clock_id), SCARG(uap, flags), tsp); if (SCARG(uap, ts) != NULL && (SCARG(uap, flags) & TIMER_ABSTIME) == 0) (void)copyout(tsp, SCARG(uap, ts), sizeof(*tsp)); return error; @@ -638,7 +597,7 @@ sys__lwp_unpark(struct lwp *l, const str syscallarg(const void *) hint; } */ - return lwp_unpark(SCARG(uap, target), SCARG(uap, hint)); + return lwp_unpark(&SCARG(uap, target), 1); } int @@ -650,19 +609,12 @@ sys__lwp_unpark_all(struct lwp *l, const syscallarg(size_t) ntargets; syscallarg(const void *) hint; } */ - struct proc *p; - struct lwp *t; - sleepq_t *sq; - wchan_t wchan; - lwpid_t targets[32], *tp, *tpp, *tmax, target; + lwpid_t targets[32], *tp; int error; - kmutex_t *mp; u_int ntargets; size_t sz; - p = l->l_proc; ntargets = SCARG(uap, ntargets); - if (SCARG(uap, targets) == NULL) { /* * Let the caller know how much we are willing to do, and @@ -678,7 +630,7 @@ sys__lwp_unpark_all(struct lwp *l, const * Copy in the target array. If it's a small number of LWPs, then * place the numbers on the stack. */ - sz = sizeof(target) * ntargets; + sz = sizeof(lwpid_t) * ntargets; if (sz <= sizeof(targets)) tp = targets; else @@ -690,64 +642,10 @@ sys__lwp_unpark_all(struct lwp *l, const } return error; } - - wchan = lwp_park_wchan(p, SCARG(uap, hint)); - sq = sleeptab_lookup(&lwp_park_tab, wchan, &mp); - - for (tmax = tp + ntargets, tpp = tp; tpp < tmax; tpp++) { - target = *tpp; - - /* - * Easy case: search for the LWP on the sleep queue. If - * it's parked, remove it from the queue and set running. - */ - TAILQ_FOREACH(t, sq, l_sleepchain) - if (t->l_proc == p && t->l_lid == target) - break; - - if (t != NULL) { - sleepq_remove(sq, t); - continue; - } - - /* - * The LWP hasn't parked yet. Take the hit and - * mark the operation as pending. - */ - mutex_spin_exit(mp); - mutex_enter(p->p_lock); - if ((t = lwp_find(p, target)) == NULL) { - mutex_exit(p->p_lock); - mutex_spin_enter(mp); - continue; - } - lwp_lock(t); - - /* - * It may not have parked yet, we may have raced, or - * it is parked on a different user sync object. - */ - if (t->l_syncobj == &lwp_park_sobj) { - /* Releases the LWP lock. */ - lwp_unsleep(t, true); - } else { - /* - * Set the operation pending. The next call to - * _lwp_park will return early. - */ - t->l_flag |= LW_UNPARKED; - lwp_unlock(t); - } - - mutex_exit(p->p_lock); - mutex_spin_enter(mp); - } - - mutex_spin_exit(mp); + error = lwp_unpark(tp, ntargets); if (tp != targets) kmem_free(tp, sz); - - return 0; + return error; } int Index: sys/lwp.h =================================================================== RCS file: /cvsroot/src/sys/sys/lwp.h,v retrieving revision 1.199 diff -u -p -r1.199 lwp.h --- sys/lwp.h 28 Jan 2020 16:40:27 -0000 1.199 +++ sys/lwp.h 28 Jan 2020 22:36:28 -0000 @@ -319,7 +319,6 @@ do { \ void lwpinit(void); void lwp0_init(void); -void lwp_sys_init(void); void lwp_startup(lwp_t *, lwp_t *); void startlwp(void *); @@ -368,8 +367,8 @@ void lwp_setspecific(specificdata_key_t, void lwp_setspecific_by_lwp(lwp_t *, specificdata_key_t, void *); /* Syscalls. */ -int lwp_park(clockid_t, int, struct timespec *, const void *); -int lwp_unpark(lwpid_t, const void *); +int lwp_park(clockid_t, int, struct timespec *); +int lwp_unpark(const lwpid_t *, const u_int); /* DDB. */ void lwp_whatis(uintptr_t, void (*)(const char *, ...) __printflike(1, 2)); Index: sys/proc.h =================================================================== RCS file: /cvsroot/src/sys/sys/proc.h,v retrieving revision 1.357 diff -u -p -r1.357 proc.h --- sys/proc.h 12 Oct 2019 19:38:57 -0000 1.357 +++ sys/proc.h 28 Jan 2020 22:36:28 -0000 @@ -1,7 +1,7 @@ /* $NetBSD: proc.h,v 1.357 2019/10/12 19:38:57 kamil Exp $ */ /*- - * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -87,6 +87,7 @@ #include #include #include +#include #include #include #include @@ -222,6 +223,7 @@ struct emul { * l: proc_lock * t: p_stmutex * p: p_lock + * r: p_treelock * (: updated atomically * :: unlocked, stable */ @@ -229,11 +231,7 @@ struct vmspace; struct proc { LIST_ENTRY(proc) p_list; /* l: List of all processes */ - - kmutex_t p_auxlock; /* :: secondary, longer term lock */ kmutex_t *p_lock; /* :: general mutex */ - kmutex_t p_stmutex; /* :: mutex on profiling state */ - krwlock_t p_reflock; /* p: lock for debugger, procfs */ kcondvar_t p_waitcv; /* p: wait, stop CV on children */ kcondvar_t p_lwpcv; /* p: wait, stop CV on LWPs */ @@ -266,6 +264,7 @@ struct proc { LIST_ENTRY(proc) p_sibling; /* l: List of sibling processes. */ LIST_HEAD(, proc) p_children; /* l: List of children. */ LIST_HEAD(, lwp) p_lwps; /* p: List of LWPs. */ + struct radix_tree p_lwptree; /* p,r: Tree of LWPs. */ struct ras *p_raslist; /* a: List of RAS entries */ /* The following fields are all zeroed upon creation in fork. */ @@ -342,6 +341,14 @@ struct proc { struct mdproc p_md; /* p: Any machine-dependent fields */ vaddr_t p_stackbase; /* :: ASLR randomized stack base */ struct kdtrace_proc *p_dtrace; /* :: DTrace-specific data. */ +/* + * Locks in their own cache line towards the end. + */ + kmutex_t p_auxlock /* :: secondary, longer term lock */ + __aligned(COHERENCY_UNIT); + kmutex_t p_stmutex; /* :: mutex on profiling state */ + krwlock_t p_reflock; /* :: lock for debugger, procfs */ + krwlock_t p_treelock; /* :: lock on p_lwptree */ }; #define p_rlimit p_limit->pl_rlimit