# HG changeset patch # User Taylor R Campbell # Date 1780149630 0 # Sat May 30 14:00:30 2026 +0000 # Branch trunk # Node ID 4e5dccfbc8b8b9ec697717dab38ae0b20def1f7f # Parent 2489653fbd32de8d45a986717c42bff916899e16 # EXP-Topic riastradh-20260530-workrequeue WIP: workqueue(9): New flag WQ_CONDQUEUE. With WQ_PERCPU|WQ_CONDQUEUE, workqueue_enqueue(wq, wk, NULL) conditionally enqueues the work, depending on whether or not it was already queued. WQ_CONDQUEUE requires the work to be zero-initialized and (currently) requires WQ_PERCPU, but allows drivers to safely requeue work and be guaranteed that it will run at least once after workqueue_enqueue is called. WQ_CONDQUEUE requires WQ_PERCPU because otherwise the memory ordering is troublesome to get correct, and probably requires a membar_sync in workqueue_runlist at least but the asymmetry between workqueue_enqueue and workqueue_runlist is making my head spin. diff -r 2489653fbd32 -r 4e5dccfbc8b8 share/man/man9/workqueue.9 --- a/share/man/man9/workqueue.9 Fri May 22 06:15:01 2026 +0000 +++ b/share/man/man9/workqueue.9 Sat May 30 14:00:30 2026 +0000 @@ -94,6 +94,17 @@ otherwise the kernel lock will be held w .It Dv WQ_PERCPU Specifies that the workqueue should have a separate queue for each CPU, thus the work could be enqueued on concrete CPUs. +.It Dv WQ_CONDQUEUE +If this and +.Dv WQ_PERCPU +are both set, then a given work item may be safely passed repeatedly to +.Fn workqueue_enqueue +before it is processed. +Work items must be zero-initialized, and +.Fn workqueue_enqueue +may be slightly costlier, if +.Dv WQ_CONDQUEUE +is set. .El .El .Pp @@ -124,7 +135,11 @@ The enqueued work will be processed in a A work must not be enqueued again until the callback is called by the .Nm -framework. +framework, unless the +.Dv WQ_CONDQUEUE +and +.Dv WQ_PERCPU +flags are both set. .Pp .\" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - .Fn workqueue_wait diff -r 2489653fbd32 -r 4e5dccfbc8b8 sys/kern/subr_workqueue.c --- a/sys/kern/subr_workqueue.c Fri May 22 06:15:01 2026 +0000 +++ b/sys/kern/subr_workqueue.c Sat May 30 14:00:30 2026 +0000 @@ -42,16 +42,16 @@ #include #include -typedef struct work_impl { - SIMPLEQ_ENTRY(work_impl) wk_entry; -} work_impl_t; - -SIMPLEQ_HEAD(workqhead, work_impl); +#if defined __i386__ || defined __x86_64__ /* XXX */ +#define __HAVE_ATOMIC_NI +#endif struct workqueue_queue { kmutex_t q_mutex; kcondvar_t q_cv; - struct workqhead q_queue_pending; + struct work *q_queue_head; + struct work **q_queue_tail; + struct work q_queue_sentinel; uint64_t q_gen; lwp_t *q_worker; }; @@ -60,6 +60,9 @@ struct workqueue { void (*wq_func)(struct work *, void *); void *wq_arg; int wq_flags; +#ifndef __HAVE_ATOMIC_NI + ipl_cookie_t wq_iplcookie; +#endif char wq_name[MAXCOMLEN]; pri_t wq_prio; @@ -136,20 +139,35 @@ workqueue_queue_lookup(struct workqueue } static void -workqueue_runlist(struct workqueue *wq, struct workqhead *list) +workqueue_runlist(struct workqueue *wq, struct work *head, + struct work *sentinel) { - work_impl_t *wk; - work_impl_t *next; + struct work *wk; + struct work *next; struct lwp *l = curlwp; KASSERTMSG(l->l_nopreempt == 0, "lwp %p nopreempt %d", l, l->l_nopreempt); - for (wk = SIMPLEQ_FIRST(list); wk != NULL; wk = next) { - next = SIMPLEQ_NEXT(wk, wk_entry); + for (wk = head; wk != sentinel; wk = next) { + if ((wq->wq_flags & (WQ_PERCPU|WQ_CONDQUEUE)) == + (WQ_PERCPU|WQ_CONDQUEUE)) { +#ifdef __HAVE_ATOMIC_NI + /* XXX want atomic_swap_ptr_ni, but it doesn't exist */ + next = atomic_cas_ptr_ni(&wk->wk_next, wk->wk_next, + NULL); +#else + const int s = splraiseipl(wq->wq_iplcookie); + next = wk->wk_next; + wk->wk_next = NULL; + splx(s); +#endif + } else { + next = wk->wk_next; + } SDT_PROBE4(sdt, kernel, workqueue, entry, wq, wk, wq->wq_func, wq->wq_arg); - (*wq->wq_func)((void *)wk, wq->wq_arg); + (*wq->wq_func)(wk, wq->wq_arg); SDT_PROBE4(sdt, kernel, workqueue, return, wq, wk, wq->wq_func, wq->wq_arg); KASSERTMSG(l->l_nopreempt == 0, @@ -172,14 +190,12 @@ workqueue_worker(void *cookie) s = kthread_fpu_enter(); mutex_enter(&q->q_mutex); for (;;) { - struct workqhead tmp; - - SIMPLEQ_INIT(&tmp); + struct work *head; - while (SIMPLEQ_EMPTY(&q->q_queue_pending)) + while ((head = q->q_queue_head) == NULL) cv_wait(&q->q_cv, &q->q_mutex); - SIMPLEQ_CONCAT(&tmp, &q->q_queue_pending); - SIMPLEQ_INIT(&q->q_queue_pending); + q->q_queue_head = NULL; + q->q_queue_tail = &q->q_queue_head; /* * Mark the queue as actively running a batch of work @@ -188,7 +204,7 @@ workqueue_worker(void *cookie) q->q_gen |= 1; mutex_exit(&q->q_mutex); - workqueue_runlist(wq, &tmp); + workqueue_runlist(wq, head, &q->q_queue_sentinel); /* * Notify workqueue_wait that we have completed a batch @@ -216,6 +232,9 @@ workqueue_init(struct workqueue *wq, con wq->wq_prio = prio; wq->wq_func = callback_func; wq->wq_arg = callback_arg; +#ifndef __HAVE_ATOMIC_NI + wq->wq_iplcookie = makeiplcookie(ipl); +#endif } static int @@ -228,7 +247,8 @@ workqueue_initqueue(struct workqueue *wq mutex_init(&q->q_mutex, MUTEX_DEFAULT, ipl); cv_init(&q->q_cv, wq->wq_name); - SIMPLEQ_INIT(&q->q_queue_pending); + q->q_queue_head = NULL; + q->q_queue_tail = &q->q_queue_head; q->q_gen = 0; ktf = ((wq->wq_flags & WQ_MPSAFE) != 0 ? KTHREAD_MPSAFE : 0); if (wq->wq_prio < PRI_KERNEL) @@ -249,14 +269,15 @@ workqueue_initqueue(struct workqueue *wq } struct workqueue_exitargs { - work_impl_t wqe_wk; + struct work wqe_wk; struct workqueue_queue *wqe_q; }; static void workqueue_exit(struct work *wk, void *arg) { - struct workqueue_exitargs *wqe = (void *)wk; + struct workqueue_exitargs *wqe = container_of(wk, + struct workqueue_exitargs, wqe_wk); struct workqueue_queue *q = wqe->wqe_q; /* @@ -264,7 +285,8 @@ workqueue_exit(struct work *wk, void *ar */ KASSERT(q->q_worker == curlwp); - KASSERT(SIMPLEQ_EMPTY(&q->q_queue_pending)); + KASSERT(q->q_queue_head == NULL); + KASSERT(q->q_queue_tail == &q->q_queue_head); mutex_enter(&q->q_mutex); q->q_worker = NULL; cv_broadcast(&q->q_cv); @@ -280,10 +302,13 @@ workqueue_finiqueue(struct workqueue *wq KASSERT(wq->wq_func == workqueue_exit); wqe.wqe_q = q; - KASSERT(SIMPLEQ_EMPTY(&q->q_queue_pending)); + KASSERT(q->q_queue_head == NULL); + KASSERT(q->q_queue_tail == &q->q_queue_head); KASSERT(q->q_worker != NULL); mutex_enter(&q->q_mutex); - SIMPLEQ_INSERT_TAIL(&q->q_queue_pending, &wqe.wqe_wk, wk_entry); + wqe.wqe_wk.wk_next = &q->q_queue_sentinel; + *q->q_queue_tail = &wqe.wqe_wk; + q->q_queue_tail = &wqe.wqe_wk.wk_next; cv_broadcast(&q->q_cv); while (q->q_worker != NULL) { cv_wait(&q->q_cv, &q->q_mutex); @@ -305,8 +330,6 @@ workqueue_create(struct workqueue **wqp, void *ptr; int error = 0; - CTASSERT(sizeof(work_impl_t) <= sizeof(struct work)); - ptr = kmem_zalloc(workqueue_size(flags), KM_SLEEP); wq = (void *)roundup2((uintptr_t)ptr, coherency_unit); wq->wq_ptr = ptr; @@ -327,6 +350,9 @@ workqueue_create(struct workqueue **wqp, } } } else { + KASSERTMSG((flags & WQ_CONDQUEUE) == 0, + "WQ_CONDQUEUE currently requires WQ_PERCPU"); + /* initialize a work-queue */ q = workqueue_queue_lookup(wq, NULL); error = workqueue_initqueue(wq, q, ipl, NULL); @@ -343,9 +369,9 @@ workqueue_create(struct workqueue **wqp, static bool workqueue_q_wait(struct workqueue *wq, struct workqueue_queue *q, - work_impl_t *wk_target) + struct work *wk_target) { - work_impl_t *wk; + struct work *wk; bool found = false; uint64_t gen; @@ -371,7 +397,9 @@ workqueue_q_wait(struct workqueue *wq, s * have no access to. */ again: - SIMPLEQ_FOREACH(wk, &q->q_queue_pending, wk_entry) { + for (wk = q->q_queue_head; + wk != &q->q_queue_sentinel; + wk = wk->wk_next) { if (wk == wk_target) { SDT_PROBE2(sdt, kernel, workqueue, wait__hit, wq, wk); found = true; @@ -418,13 +446,13 @@ workqueue_wait(struct workqueue *wq, str CPU_INFO_ITERATOR cii; for (CPU_INFO_FOREACH(cii, ci)) { q = workqueue_queue_lookup(wq, ci); - found = workqueue_q_wait(wq, q, (work_impl_t *)wk); + found = workqueue_q_wait(wq, q, wk); if (found) break; } } else { q = workqueue_queue_lookup(wq, NULL); - (void)workqueue_q_wait(wq, q, (work_impl_t *)wk); + (void)workqueue_q_wait(wq, q, wk); } SDT_PROBE2(sdt, kernel, workqueue, wait__done, wq, wk); } @@ -452,11 +480,13 @@ workqueue_destroy(struct workqueue *wq) #ifdef DEBUG static void -workqueue_check_duplication(struct workqueue_queue *q, work_impl_t *wk) +workqueue_check_duplication(struct workqueue_queue *q, struct work *wk) { - work_impl_t *_wk; + struct work *_wk; - SIMPLEQ_FOREACH(_wk, &q->q_queue_pending, wk_entry) { + for (_wk = q->q_queue_head; + _wk != &q->q_queue_sentinel; + _wk = _wk->wk_next) { if (_wk == wk) panic("%s: tried to enqueue a queued work", __func__); } @@ -464,21 +494,53 @@ workqueue_check_duplication(struct workq #endif void -workqueue_enqueue(struct workqueue *wq, struct work *wk0, struct cpu_info *ci) +workqueue_enqueue(struct workqueue *wq, struct work *wk, struct cpu_info *ci) { struct workqueue_queue *q; - work_impl_t *wk = (void *)wk0; + int bound; + + SDT_PROBE3(sdt, kernel, workqueue, enqueue, wq, wk, ci); - SDT_PROBE3(sdt, kernel, workqueue, enqueue, wq, wk0, ci); + const bool bind = + ((wq->wq_flags & (WQ_PERCPU|WQ_CONDQUEUE)) == + (WQ_PERCPU|WQ_CONDQUEUE)) && + (ci == NULL); + if (bind) + bound = curlwp_bind(); KASSERT(wq->wq_flags & WQ_PERCPU || ci == NULL); q = workqueue_queue_lookup(wq, ci); mutex_enter(&q->q_mutex); -#ifdef DEBUG - workqueue_check_duplication(q, wk); + if (bind) { +#ifdef __HAVE_ATOMIC_NI + if (atomic_cas_ptr_ni(&wk->wk_next, NULL, + &q->q_queue_sentinel) == NULL) { + *q->q_queue_tail = wk; + q->q_queue_tail = &wk->wk_next; + cv_broadcast(&q->q_cv); + } +#else + const int s = splraiseipl(wq->wq_iplcookie); + if (wk->wk_next == NULL) { + wk->wk_next = &q->q_queue_sentinel; + *q->q_queue_tail = wk; + q->q_queue_tail = &wk->wk_next; + cv_broadcast(&q->q_cv); + } + splx(s); #endif - SIMPLEQ_INSERT_TAIL(&q->q_queue_pending, wk, wk_entry); - cv_broadcast(&q->q_cv); + } else { +#ifdef DEBUG + workqueue_check_duplication(q, wk); +#endif + wk->wk_next = &q->q_queue_sentinel; + *q->q_queue_tail = wk; + q->q_queue_tail = &wk->wk_next; + cv_broadcast(&q->q_cv); + } mutex_exit(&q->q_mutex); + + if (bind) + curlwp_bindx(bound); } diff -r 2489653fbd32 -r 4e5dccfbc8b8 sys/sys/workqueue.h --- a/sys/sys/workqueue.h Fri May 22 06:15:01 2026 +0000 +++ b/sys/sys/workqueue.h Sat May 30 14:00:30 2026 +0000 @@ -42,7 +42,7 @@ struct cpu_info; */ struct work { - void *wk_dummy; + struct work *wk_next; }; struct workqueue; @@ -50,6 +50,7 @@ struct workqueue; #define WQ_MPSAFE 0x01 #define WQ_PERCPU 0x02 #define WQ_FPU 0x04 +#define WQ_CONDQUEUE 0x08 int workqueue_create(struct workqueue **, const char *, void (*)(struct work *, void *), void *, pri_t, int, int);