Index: sys/callout.h =================================================================== RCS file: /cvsroot/src/sys/sys/callout.h,v retrieving revision 1.27 diff -u -p -r1.27 callout.h --- sys/callout.h 28 Mar 2008 20:44:38 -0000 1.27 +++ sys/callout.h 12 Apr 2008 16:28:37 -0000 @@ -57,6 +57,7 @@ typedef struct callout { } callout_t; /* Internal flags. */ +#define CALLOUT_BOUND 0x0001 /* bound to a specific CPU */ #define CALLOUT_PENDING 0x0002 /* callout is on the queue */ #define CALLOUT_FIRED 0x0004 /* callout has fired */ #define CALLOUT_INVOKING 0x0008 /* callout function is being invoked */ @@ -85,15 +86,15 @@ struct callout_circq { #define cq_next_l cq_next.list #define cq_prev_l cq_prev.list +struct callout_cpu; + typedef struct callout_impl { struct callout_circq c_list; /* linkage on queue */ void (*c_func)(void *); /* function to call */ void *c_arg; /* function argument */ - void *c_oncpu; /* non-NULL while running */ - void *c_onlwp; /* non-NULL while running */ + struct callout_cpu * volatile c_cpu; /* associated CPU */ int c_time; /* when callout fires */ u_int c_flags; /* state of this entry */ - u_int c_runwait; /* number of waiters */ u_int c_magic; /* magic number */ } callout_impl_t; #define CALLOUT_MAGIC 0x11deeba1 @@ -101,8 +102,10 @@ typedef struct callout_impl { #endif /* _CALLOUT_PRIVATE */ #ifdef _KERNEL +struct cpu_info; + void callout_startup(void); -void callout_startup2(void); +void callout_init_cpu(struct cpu_info *); void callout_hardclock(void); void callout_init(callout_t *, u_int); @@ -117,6 +120,7 @@ bool callout_expired(callout_t *); bool callout_active(callout_t *); bool callout_invoking(callout_t *); void callout_ack(callout_t *); +void callout_bind(callout_t *, struct cpu_info *); #endif /* _KERNEL */ #endif /* !_SYS_CALLOUT_H_ */ Index: sys/cpu_data.h =================================================================== RCS file: /cvsroot/src/sys/sys/cpu_data.h,v retrieving revision 1.21 diff -u -p -r1.21 cpu_data.h --- sys/cpu_data.h 11 Apr 2008 15:31:34 -0000 1.21 +++ sys/cpu_data.h 12 Apr 2008 16:28:37 -0000 @@ -67,9 +67,9 @@ struct cpu_data { * it is cache hot. */ lwp_t *cpu_biglock_wanted; /* LWP spinning on biglock */ - void *cpu_callout; /* running callout */ - void *cpu_callout_cancel; /* callout to be cancelled */ - u_int cpu_callout_nwait; /* # LWPs waiting on callout */ + void *cpu_callout; /* per-CPU callout state */ + void *cpu_unused1; /* unused */ + u_int cpu_unused2; /* unused */ struct schedstate_percpu cpu_schedstate; /* scheduler state */ kcondvar_t cpu_xcall; /* cross-call support */ int cpu_xcall_pending; /* cross-call support */ Index: kern/kern_clock.c =================================================================== RCS file: /cvsroot/src/sys/kern/kern_clock.c,v retrieving revision 1.119 diff -u -p -r1.119 kern_clock.c --- kern/kern_clock.c 11 Mar 2008 02:26:47 -0000 1.119 +++ kern/kern_clock.c 12 Apr 2008 16:28:38 -0000 @@ -240,17 +240,12 @@ hardclock(struct clockframe *frame) sched_tick(ci); #if defined(MULTIPROCESSOR) - /* - * If we are not the primary CPU, we're not allowed to do - * any more work. - */ - if (CPU_IS_PRIMARY(ci) == 0) - return; + if (CPU_IS_PRIMARY(ci)) #endif - - hardclock_ticks++; - - tc_ticktock(); + { + hardclock_ticks++; + tc_ticktock(); + } /* * Update real-time timeout queue. Callouts are processed at a Index: kern/kern_cpu.c =================================================================== RCS file: /cvsroot/src/sys/kern/kern_cpu.c,v retrieving revision 1.24 diff -u -p -r1.24 kern_cpu.c --- kern/kern_cpu.c 11 Apr 2008 15:31:34 -0000 1.24 +++ kern/kern_cpu.c 12 Apr 2008 16:28:38 -0000 @@ -83,6 +83,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v #include #include #include +#include #include @@ -136,6 +133,7 @@ mi_cpu_attach(struct cpu_info *ci) percpu_init_cpu(ci); softint_init(ci); + callout_init_cpu(ci); xc_init_cpu(ci); pool_cache_cpu_init(ci); selsysinit(ci); Index: kern/kern_event.c =================================================================== RCS file: /cvsroot/src/sys/kern/kern_event.c,v retrieving revision 1.53 diff -u -p -r1.53 kern_event.c --- kern/kern_event.c 26 Mar 2008 13:32:32 -0000 1.53 +++ kern/kern_event.c 12 Apr 2008 16:28:39 -0000 @@ -600,7 +600,7 @@ filt_timerattach(struct knote *kn) atomic_dec_uint(&kq_ncallouts); return ENOMEM; } - callout_init(calloutp, 0); + callout_init(calloutp, CALLOUT_MPSAFE); kq = kn->kn_kq; mutex_spin_enter(&kq->kq_lock); @@ -619,7 +619,7 @@ filt_timerdetach(struct knote *kn) callout_t *calloutp; calloutp = (callout_t *)kn->kn_hook; - callout_stop(calloutp); + callout_halt(calloutp); callout_destroy(calloutp); kmem_free(calloutp, sizeof(*calloutp)); atomic_dec_uint(&kq_ncallouts); Index: kern/kern_timeout.c =================================================================== RCS file: /cvsroot/src/sys/kern/kern_timeout.c,v retrieving revision 1.35 diff -u -p -r1.35 kern_timeout.c --- kern/kern_timeout.c 29 Mar 2008 14:07:23 -0000 1.35 +++ kern/kern_timeout.c 12 Apr 2008 16:28:40 -0000 @@ -70,10 +70,10 @@ __KERNEL_RCSID(0, "$NetBSD: kern_timeout /* * Timeouts are kept in a hierarchical timing wheel. The c_time is the - * value of the global variable "hardclock_ticks" when the timeout should - * be called. There are four levels with 256 buckets each. See 'Scheme 7' - * in "Hashed and Hierarchical Timing Wheels: Efficient Data Structures - * for Implementing a Timer Facility" by George Varghese and Tony Lauck. + * value of c_cpu->cc_ticks when the timeout should be called. There are + * four levels with 256 buckets each. See 'Scheme 7' in "Hashed and + * Hierarchical Timing Wheels: Efficient Data Structures for Implementing + * a Timer Facility" by George Varghese and Tony Lauck. * * Some of the "math" in here is a bit tricky. We have to beware of * wrapping ints. @@ -84,7 +84,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_timeout * be positive or negative so comparing it with anything is dangerous. * The only way we can use the c->c_time value in any predictable way is * when we calculate how far in the future `to' will timeout - "c->c_time - * - hardclock_ticks". The result will always be positive for future + * - c->c_cpu->cc_ticks". The result will always be positive for future * timeouts and 0 or negative for due timeouts. */ @@ -101,6 +101,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_timeout #include #include #include +#include #ifdef DDB #include @@ -115,23 +116,20 @@ __KERNEL_RCSID(0, "$NetBSD: kern_timeout #define WHEELMASK 255 #define WHEELBITS 8 -static struct callout_circq timeout_wheel[BUCKETS]; /* Queues of timeouts */ -static struct callout_circq timeout_todo; /* Worklist */ - #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) -#define BUCKET(rel, abs) \ +#define BUCKET(cc, rel, abs) \ (((rel) <= (1 << (2*WHEELBITS))) \ ? ((rel) <= (1 << WHEELBITS)) \ - ? &timeout_wheel[MASKWHEEL(0, (abs))] \ - : &timeout_wheel[MASKWHEEL(1, (abs)) + WHEELSIZE] \ + ? &(cc)->cc_wheel[MASKWHEEL(0, (abs))] \ + : &(cc)->cc_wheel[MASKWHEEL(1, (abs)) + WHEELSIZE] \ : ((rel) <= (1 << (3*WHEELBITS))) \ - ? &timeout_wheel[MASKWHEEL(2, (abs)) + 2*WHEELSIZE] \ - : &timeout_wheel[MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) + ? &(cc)->cc_wheel[MASKWHEEL(2, (abs)) + 2*WHEELSIZE] \ + : &(cc)->cc_wheel[MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) -#define MOVEBUCKET(wheel, time) \ - CIRCQ_APPEND(&timeout_todo, \ - &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) +#define MOVEBUCKET(cc, wheel, time) \ + CIRCQ_APPEND(&(cc)->cc_todo, \ + &(cc)->cc_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) /* * Circular queue definitions. @@ -175,72 +173,129 @@ do { \ static void callout_softclock(void *); -/* - * All wheels are locked with the same lock (which must also block out - * all interrupts). Eventually this should become per-CPU. - */ -kmutex_t callout_lock; -sleepq_t callout_sleepq; -void *callout_si; - -static struct evcnt callout_ev_late; -static struct evcnt callout_ev_block; +struct callout_cpu { + kmutex_t cc_lock; + sleepq_t cc_sleepq; + u_int cc_nwait; + u_int cc_ticks; + lwp_t *cc_lwp; + callout_impl_t *cc_active; + callout_impl_t *cc_cancel; + struct evcnt cc_ev_late; + struct evcnt cc_ev_block; + struct callout_circq cc_todo; /* Worklist */ + struct callout_circq cc_wheel[BUCKETS]; /* Queues of timeouts */ + char cc_name1[12]; + char cc_name2[12]; +}; + +static struct callout_cpu callout_cpu0; +static void *callout_sih; + +static inline kmutex_t * +callout_lock(callout_impl_t *c) +{ + kmutex_t *lock; + + for (;;) { + lock = &c->c_cpu->cc_lock; + mutex_spin_enter(lock); + if (__predict_true(lock == &c->c_cpu->cc_lock)) + return lock; + mutex_spin_exit(lock); + } +} /* * callout_startup: * * Initialize the callout facility, called at system startup time. + * Do just enough to allow callouts to be safely registered. */ void callout_startup(void) { + struct callout_cpu *cc; int b; - KASSERT(sizeof(callout_impl_t) <= sizeof(callout_t)); + KASSERT(curcpu()->ci_data.cpu_callout == NULL); - CIRCQ_INIT(&timeout_todo); + cc = &callout_cpu0; + mutex_init(&cc->cc_lock, MUTEX_DEFAULT, IPL_SCHED); + CIRCQ_INIT(&cc->cc_todo); for (b = 0; b < BUCKETS; b++) - CIRCQ_INIT(&timeout_wheel[b]); - - mutex_init(&callout_lock, MUTEX_DEFAULT, IPL_SCHED); - sleepq_init(&callout_sleepq, &callout_lock); - - evcnt_attach_dynamic(&callout_ev_late, EVCNT_TYPE_MISC, - NULL, "callout", "late"); - evcnt_attach_dynamic(&callout_ev_block, EVCNT_TYPE_MISC, - NULL, "callout", "wait for completion"); + CIRCQ_INIT(&cc->cc_wheel[b]); + curcpu()->ci_data.cpu_callout = cc; } /* - * callout_startup2: + * callout_init_cpu: * - * Complete initialization once soft interrupts are available. + * Per-CPU initialization. */ void -callout_startup2(void) +callout_init_cpu(struct cpu_info *ci) { + struct callout_cpu *cc; + int b; + + KASSERT(sizeof(callout_impl_t) <= sizeof(callout_t)); - callout_si = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE, - callout_softclock, NULL); - if (callout_si == NULL) - panic("callout_startup2: unable to register softclock intr"); + if ((cc = ci->ci_data.cpu_callout) == NULL) { + cc = kmem_zalloc(sizeof(*cc), KM_SLEEP); + if (cc == NULL) + panic("callout_init_cpu (1)"); + mutex_init(&cc->cc_lock, MUTEX_DEFAULT, IPL_SCHED); + CIRCQ_INIT(&cc->cc_todo); + for (b = 0; b < BUCKETS; b++) + CIRCQ_INIT(&cc->cc_wheel[b]); + } else { + /* Boot CPU, one time only. */ + callout_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE, + callout_softclock, NULL); + if (callout_sih == NULL) + panic("callout_init_cpu (2)"); + } + + sleepq_init(&cc->cc_sleepq, &cc->cc_lock); + + snprintf(cc->cc_name1, sizeof(cc->cc_name1), "late/%u", + cpu_index(ci)); + evcnt_attach_dynamic(&cc->cc_ev_late, EVCNT_TYPE_MISC, + NULL, "callout", cc->cc_name1); + + snprintf(cc->cc_name2, sizeof(cc->cc_name2), "wait/%u", + cpu_index(ci)); + evcnt_attach_dynamic(&cc->cc_ev_block, EVCNT_TYPE_MISC, + NULL, "callout", cc->cc_name2); + + ci->ci_data.cpu_callout = cc; } /* * callout_init: * - * Initialize a callout structure. + * Initialize a callout structure. This must be quick, so we fill + * only the minimum number of fields. */ void callout_init(callout_t *cs, u_int flags) { callout_impl_t *c = (callout_impl_t *)cs; + struct callout_cpu *cc; KASSERT((flags & ~CALLOUT_FLAGMASK) == 0); - memset(c, 0, sizeof(*c)); - c->c_flags = flags; + cc = curcpu()->ci_data.cpu_callout; + c->c_func = NULL; c->c_magic = CALLOUT_MAGIC; + if (__predict_true((flags & CALLOUT_MPSAFE) != 0 && cc != NULL)) { + c->c_flags = flags; + c->c_cpu = cc; + return; + } + c->c_flags = flags | CALLOUT_BOUND; + c->c_cpu = &callout_cpu0; } /* @@ -259,12 +314,8 @@ callout_destroy(callout_t *cs) * running, the current thread should have stopped it. */ KASSERT((c->c_flags & CALLOUT_PENDING) == 0); - if (c->c_oncpu != NULL && c->c_onlwp != curlwp) { - KASSERT( - ((struct cpu_info *)c->c_oncpu)->ci_data.cpu_callout != c); - } + KASSERT(c->c_cpu->cc_lwp == curlwp || c->c_cpu->cc_active != c); KASSERT(c->c_magic == CALLOUT_MAGIC); - c->c_magic = 0; } @@ -276,32 +327,51 @@ callout_destroy(callout_t *cs) * callout_lock. */ static void -callout_schedule_locked(callout_impl_t *c, int to_ticks) +callout_schedule_locked(callout_impl_t *c, kmutex_t *lock, int to_ticks) { + struct callout_cpu *cc, *occ; int old_time; KASSERT(to_ticks >= 0); KASSERT(c->c_func != NULL); /* Initialize the time here, it won't change. */ - old_time = c->c_time; - c->c_time = to_ticks + hardclock_ticks; + occ = c->c_cpu; c->c_flags &= ~CALLOUT_FIRED; /* * If this timeout is already scheduled and now is moved - * earlier, reschedule it now. Otherwise leave it in place + * earlier, reschedule it now. Otherwise leave it in place * and let it be rescheduled later. */ if ((c->c_flags & CALLOUT_PENDING) != 0) { + /* Leave on existing CPU. */ + old_time = c->c_time; + c->c_time = to_ticks + occ->cc_ticks; if (c->c_time - old_time < 0) { CIRCQ_REMOVE(&c->c_list); - CIRCQ_INSERT(&c->c_list, &timeout_todo); + CIRCQ_INSERT(&c->c_list, &occ->cc_todo); } + mutex_spin_exit(lock); + return; + } + + cc = curcpu()->ci_data.cpu_callout; + if ((c->c_flags & CALLOUT_BOUND) != 0 || cc == occ || + !mutex_tryenter(&cc->cc_lock)) { + /* Leave on existing CPU. */ + c->c_time = to_ticks + occ->cc_ticks; + c->c_flags |= CALLOUT_PENDING; + CIRCQ_INSERT(&c->c_list, &occ->cc_todo); } else { + /* Move to this CPU. */ + c->c_cpu = cc; + c->c_time = to_ticks + cc->cc_ticks; c->c_flags |= CALLOUT_PENDING; - CIRCQ_INSERT(&c->c_list, &timeout_todo); + CIRCQ_INSERT(&c->c_list, &cc->cc_todo); + mutex_spin_exit(&cc->cc_lock); } + mutex_spin_exit(lock); } /* @@ -314,17 +384,14 @@ void callout_reset(callout_t *cs, int to_ticks, void (*func)(void *), void *arg) { callout_impl_t *c = (callout_impl_t *)cs; + kmutex_t *lock; KASSERT(c->c_magic == CALLOUT_MAGIC); - mutex_spin_enter(&callout_lock); - + lock = callout_lock(c); c->c_func = func; c->c_arg = arg; - - callout_schedule_locked(c, to_ticks); - - mutex_spin_exit(&callout_lock); + callout_schedule_locked(c, lock, to_ticks); } /* @@ -337,47 +404,49 @@ void callout_schedule(callout_t *cs, int to_ticks) { callout_impl_t *c = (callout_impl_t *)cs; + kmutex_t *lock; KASSERT(c->c_magic == CALLOUT_MAGIC); - mutex_spin_enter(&callout_lock); - callout_schedule_locked(c, to_ticks); - mutex_spin_exit(&callout_lock); + lock = callout_lock(c); + callout_schedule_locked(c, lock, to_ticks); } - /* * callout_stop: * - * Try to cancel a pending callout. + * Try to cancel a pending callout. It may be too late: the callout + * could be running on another CPU. If called from interrupt context, + * the callout could already be in progress at a lower priority. */ bool callout_stop(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; - struct cpu_info *ci; + struct callout_cpu *cc; + kmutex_t *lock; bool expired; KASSERT(c->c_magic == CALLOUT_MAGIC); - mutex_spin_enter(&callout_lock); + lock = callout_lock(c); if ((c->c_flags & CALLOUT_PENDING) != 0) CIRCQ_REMOVE(&c->c_list); expired = ((c->c_flags & CALLOUT_FIRED) != 0); c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); - if ((ci = c->c_oncpu) != NULL && ci->ci_data.cpu_callout == c) { + cc = c->c_cpu; + if (cc->cc_active == c) { /* * This is for non-MPSAFE callouts only. To synchronize * effectively we must be called with kernel_lock held. * It's also taken in callout_softclock. */ - ci = c->c_oncpu; - ci->ci_data.cpu_callout_cancel = c; + cc->cc_cancel = c; } - mutex_spin_exit(&callout_lock); + mutex_spin_exit(lock); return expired; } @@ -386,20 +455,23 @@ callout_stop(callout_t *cs) * callout_halt: * * Cancel a pending callout. If in-flight, block until it completes. - * May not be called from a hard interrupt handler. + * May not be called from a hard interrupt handler. If the callout + * can take locks, the caller of callout_halt() must not hold any of + * those locks, otherwise the two could deadlock. */ bool callout_halt(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; - struct cpu_info *ci; + struct callout_cpu *cc; struct lwp *l; + kmutex_t *lock; bool expired; KASSERT(c->c_magic == CALLOUT_MAGIC); KASSERT(!cpu_intr_p()); - mutex_spin_enter(&callout_lock); + lock = callout_lock(c); expired = ((c->c_flags & CALLOUT_FIRED) != 0); if ((c->c_flags & CALLOUT_PENDING) != 0) @@ -407,50 +479,86 @@ callout_halt(callout_t *cs) c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); l = curlwp; - while (__predict_false((ci = c->c_oncpu) != NULL && - ci->ci_data.cpu_callout == c && c->c_onlwp != l)) { + for (;;) { + cc = c->c_cpu; + if (__predict_true(cc->cc_active != c || cc->cc_lwp == l)) + break; KASSERT(l->l_wchan == NULL); - - ci->ci_data.cpu_callout_nwait++; - callout_ev_block.ev_count++; - + cc->cc_nwait++; + cc->cc_ev_block.ev_count++; l->l_kpriority = true; - sleepq_enter(&callout_sleepq, l); - sleepq_enqueue(&callout_sleepq, ci, "callout", &sleep_syncobj); + sleepq_enter(&cc->cc_sleepq, l); + sleepq_enqueue(&cc->cc_sleepq, cc, "callout", &sleep_syncobj); KERNEL_UNLOCK_ALL(l, &l->l_biglocks); sleepq_block(0, false); - mutex_spin_enter(&callout_lock); + lock = callout_lock(c); } - mutex_spin_exit(&callout_lock); + mutex_spin_exit(lock); return expired; } +/* + * callout_bind: + * + * Bind a callout so that it will only execute on one CPU. + * The callout must be stopped, and must be MPSAFE. + */ +void +callout_bind(callout_t *cs, struct cpu_info *ci) +{ + callout_impl_t *c = (callout_impl_t *)cs; + struct callout_cpu *cc; + kmutex_t *lock; + + KASSERT((c->c_flags & CALLOUT_PENDING) == 0); + KASSERT(c->c_cpu->cc_active != c); + KASSERT(c->c_magic == CALLOUT_MAGIC); + KASSERT((c->c_flags & CALLOUT_MPSAFE) != 0); + + lock = callout_lock(c); + cc = ci->ci_data.cpu_callout; + c->c_flags |= CALLOUT_BOUND; + if (c->c_cpu != cc) { + /* + * Assigning c_cpu effectively unlocks the callout + * structure, as we don't hold the new CPU's lock. + * Issue memory barrier to prevent accesses being + * reordered. + */ + membar_exit(); + c->c_cpu = cc; + } + mutex_spin_exit(lock); +} + void callout_setfunc(callout_t *cs, void (*func)(void *), void *arg) { callout_impl_t *c = (callout_impl_t *)cs; + kmutex_t *lock; KASSERT(c->c_magic == CALLOUT_MAGIC); - mutex_spin_enter(&callout_lock); + lock = callout_lock(c); c->c_func = func; c->c_arg = arg; - mutex_spin_exit(&callout_lock); + mutex_spin_exit(lock); } bool callout_expired(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; + kmutex_t *lock; bool rv; KASSERT(c->c_magic == CALLOUT_MAGIC); - mutex_spin_enter(&callout_lock); + lock = callout_lock(c); rv = ((c->c_flags & CALLOUT_FIRED) != 0); - mutex_spin_exit(&callout_lock); + mutex_spin_exit(lock); return rv; } @@ -459,13 +567,14 @@ bool callout_active(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; + kmutex_t *lock; bool rv; KASSERT(c->c_magic == CALLOUT_MAGIC); - mutex_spin_enter(&callout_lock); + lock = callout_lock(c); rv = ((c->c_flags & (CALLOUT_PENDING|CALLOUT_FIRED)) != 0); - mutex_spin_exit(&callout_lock); + mutex_spin_exit(lock); return rv; } @@ -474,13 +583,14 @@ bool callout_pending(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; + kmutex_t *lock; bool rv; KASSERT(c->c_magic == CALLOUT_MAGIC); - mutex_spin_enter(&callout_lock); + lock = callout_lock(c); rv = ((c->c_flags & CALLOUT_PENDING) != 0); - mutex_spin_exit(&callout_lock); + mutex_spin_exit(lock); return rv; } @@ -489,13 +599,14 @@ bool callout_invoking(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; + kmutex_t *lock; bool rv; KASSERT(c->c_magic == CALLOUT_MAGIC); - mutex_spin_enter(&callout_lock); + lock = callout_lock(c); rv = ((c->c_flags & CALLOUT_INVOKING) != 0); - mutex_spin_exit(&callout_lock); + mutex_spin_exit(lock); return rv; } @@ -504,118 +615,131 @@ void callout_ack(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; + kmutex_t *lock; KASSERT(c->c_magic == CALLOUT_MAGIC); - mutex_spin_enter(&callout_lock); + lock = callout_lock(c); c->c_flags &= ~CALLOUT_INVOKING; - mutex_spin_exit(&callout_lock); + mutex_spin_exit(lock); } /* - * This is called from hardclock() once every tick. - * We schedule callout_softclock() if there is work - * to be done. + * callout_hardclock: + * + * Called from hardclock() once every tick. We schedule a soft + * interrupt if there is work to be done. */ void callout_hardclock(void) { - int needsoftclock; + struct callout_cpu *cc; + int needsoftclock, ticks; - mutex_spin_enter(&callout_lock); + cc = curcpu()->ci_data.cpu_callout; + mutex_spin_enter(&cc->cc_lock); - MOVEBUCKET(0, hardclock_ticks); - if (MASKWHEEL(0, hardclock_ticks) == 0) { - MOVEBUCKET(1, hardclock_ticks); - if (MASKWHEEL(1, hardclock_ticks) == 0) { - MOVEBUCKET(2, hardclock_ticks); - if (MASKWHEEL(2, hardclock_ticks) == 0) - MOVEBUCKET(3, hardclock_ticks); + ticks = ++cc->cc_ticks; + + MOVEBUCKET(cc, 0, ticks); + if (MASKWHEEL(0, ticks) == 0) { + MOVEBUCKET(cc, 1, ticks); + if (MASKWHEEL(1, ticks) == 0) { + MOVEBUCKET(cc, 2, ticks); + if (MASKWHEEL(2, ticks) == 0) + MOVEBUCKET(cc, 3, ticks); } } - needsoftclock = !CIRCQ_EMPTY(&timeout_todo); - mutex_spin_exit(&callout_lock); + needsoftclock = !CIRCQ_EMPTY(&cc->cc_todo); + mutex_spin_exit(&cc->cc_lock); if (needsoftclock) - softint_schedule(callout_si); + softint_schedule(callout_sih); } -/* ARGSUSED */ +/* + * callout_softclock: + * + * Soft interrupt handler, scheduled above if there is work to + * be done. Callouts are made in soft interrupt context. + */ static void callout_softclock(void *v) { callout_impl_t *c; - struct cpu_info *ci; + struct callout_cpu *cc; void (*func)(void *); void *arg; - u_int mpsafe, count; + int mpsafe, count, ticks, delta; lwp_t *l; l = curlwp; - ci = l->l_cpu; - - mutex_spin_enter(&callout_lock); + KASSERT(l->l_cpu == curcpu()); + cc = l->l_cpu->ci_data.cpu_callout; - while (!CIRCQ_EMPTY(&timeout_todo)) { - c = CIRCQ_FIRST(&timeout_todo); + mutex_spin_enter(&cc->cc_lock); + cc->cc_lwp = l; + while (!CIRCQ_EMPTY(&cc->cc_todo)) { + c = CIRCQ_FIRST(&cc->cc_todo); KASSERT(c->c_magic == CALLOUT_MAGIC); KASSERT(c->c_func != NULL); + KASSERT(c->c_cpu == cc); KASSERT((c->c_flags & CALLOUT_PENDING) != 0); KASSERT((c->c_flags & CALLOUT_FIRED) == 0); CIRCQ_REMOVE(&c->c_list); /* If due run it, otherwise insert it into the right bucket. */ - if (c->c_time - hardclock_ticks > 0) { - CIRCQ_INSERT(&c->c_list, - BUCKET((c->c_time - hardclock_ticks), c->c_time)); - } else { - if (c->c_time - hardclock_ticks < 0) - callout_ev_late.ev_count++; - - c->c_flags ^= (CALLOUT_PENDING | CALLOUT_FIRED); - mpsafe = (c->c_flags & CALLOUT_MPSAFE); - func = c->c_func; - arg = c->c_arg; - c->c_oncpu = ci; - c->c_onlwp = l; - ci->ci_data.cpu_callout = c; - - mutex_spin_exit(&callout_lock); - if (!mpsafe) { - KERNEL_LOCK(1, curlwp); - (*func)(arg); - KERNEL_UNLOCK_ONE(curlwp); - } else - (*func)(arg); - mutex_spin_enter(&callout_lock); - - /* - * We can't touch 'c' here because it might be - * freed already. If LWPs waiting for callout - * to complete, awaken them. - */ - ci->ci_data.cpu_callout = NULL; - if ((count = ci->ci_data.cpu_callout_nwait) != 0) { - ci->ci_data.cpu_callout_nwait = 0; - /* sleepq_wake() drops the lock. */ - sleepq_wake(&callout_sleepq, ci, count); - mutex_spin_enter(&callout_lock); - } + ticks = cc->cc_ticks; + delta = c->c_time - ticks; + if (delta > 0) { + CIRCQ_INSERT(&c->c_list, BUCKET(cc, delta, c->c_time)); + continue; } - } + if (delta < 0) + cc->cc_ev_late.ev_count++; - mutex_spin_exit(&callout_lock); + c->c_flags ^= (CALLOUT_PENDING | CALLOUT_FIRED); + mpsafe = (c->c_flags & CALLOUT_MPSAFE); + func = c->c_func; + arg = c->c_arg; + cc->cc_active = c; + + mutex_spin_exit(&cc->cc_lock); + if (!mpsafe) { + KERNEL_LOCK(1, NULL); + (*func)(arg); + KERNEL_UNLOCK_ONE(NULL); + } else + (*func)(arg); + mutex_spin_enter(&cc->cc_lock); + + /* + * We can't touch 'c' here because it might be + * freed already. If LWPs waiting for callout + * to complete, awaken them. + */ + cc->cc_active = NULL; + if ((count = cc->cc_nwait) != 0) { + cc->cc_nwait = 0; + /* sleepq_wake() drops the lock. */ + sleepq_wake(&cc->cc_sleepq, cc, count); + mutex_spin_enter(&cc->cc_lock); + } + } + cc->cc_lwp = NULL; + mutex_spin_exit(&cc->cc_lock); } #ifdef DDB static void -db_show_callout_bucket(struct callout_circq *bucket) +db_show_callout_bucket(struct callout_cpu *cc, struct callout_circq *bucket) { callout_impl_t *c; db_expr_t offset; const char *name; static char question[] = "?"; + int b; if (CIRCQ_EMPTY(bucket)) return; @@ -624,16 +748,12 @@ db_show_callout_bucket(struct callout_ci db_find_sym_and_offset((db_addr_t)(intptr_t)c->c_func, &name, &offset); name = name ? name : question; -#ifdef _LP64 -#define POINTER_WIDTH "%16lx" -#else -#define POINTER_WIDTH "%8lx" -#endif - db_printf("%9d %2d/%-4d " POINTER_WIDTH " %s\n", - c->c_time - hardclock_ticks, - (int)((bucket - timeout_wheel) / WHEELSIZE), - (int)(bucket - timeout_wheel), (u_long) c->c_arg, name); - + b = (bucket - cc->cc_wheel); + if (b < 0) + b = -WHEELSIZE; + db_printf("%9d %2d/%-4d %16lx %s\n", + c->c_time - cc->cc_ticks, b / WHEELSIZE, b, + (u_long)c->c_arg, name); if (CIRCQ_LAST(&c->c_list, bucket)) break; } @@ -642,23 +762,28 @@ db_show_callout_bucket(struct callout_ci void db_show_callout(db_expr_t addr, bool haddr, db_expr_t count, const char *modif) { + CPU_INFO_ITERATOR cii; + struct callout_cpu *cc; + struct cpu_info *ci; int b; db_printf("hardclock_ticks now: %d\n", hardclock_ticks); -#ifdef _LP64 db_printf(" ticks wheel arg func\n"); -#else - db_printf(" ticks wheel arg func\n"); -#endif /* * Don't lock the callwheel; all the other CPUs are paused * anyhow, and we might be called in a circumstance where * some other CPU was paused while holding the lock. */ - - db_show_callout_bucket(&timeout_todo); - for (b = 0; b < BUCKETS; b++) - db_show_callout_bucket(&timeout_wheel[b]); + for (CPU_INFO_FOREACH(cii, ci)) { + cc = ci->ci_data.cpu_callout; + db_show_callout_bucket(cc, &cc->cc_todo); + } + for (b = 0; b < BUCKETS; b++) { + for (CPU_INFO_FOREACH(cii, ci)) { + cc = ci->ci_data.cpu_callout; + db_show_callout_bucket(cc, &cc->cc_wheel[b]); + } + } } #endif /* DDB */ Index: kern/subr_autoconf.c =================================================================== RCS file: /cvsroot/src/sys/kern/subr_autoconf.c,v retrieving revision 1.143 diff -u -p -r1.143 subr_autoconf.c --- kern/subr_autoconf.c 4 Apr 2008 20:13:18 -0000 1.143 +++ kern/subr_autoconf.c 12 Apr 2008 16:28:41 -0000 @@ -429,9 +429,6 @@ configure(void) */ cpu_configure(); - /* Initialize callouts, part 2. */ - callout_startup2(); - /* * Now that we've found all the hardware, start the real time * and statistics clocks.