From 382440f73786f206630318683fdc22efbb78c70f Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Tue, 11 Jul 2023 22:51:53 +0000 Subject: [PATCH] entropy(9): Simplify stages. Split interrupt vs non-interrupt paths. - Nix the entropy stage (cold, warm, hot). Just use the usual kernel `cold' (cold: single-core, single-thread; interrupts may happen), and don't make any three-way distinction about whether interrupts or threads or other CPUs can be running. Instead, while cold, use splhigh/splx or forbid paths to come from interrupt context, and while warm, use mutex or the per-CPU hard and soft interrupt paths for low latency. This comes at a small cost to some interrupt latency, since we may stir the pool in interrupt context -- but only for a very short window early at boot between configure and configure2, so it's hard to imagine it matters much. - Allow rnd_add_uint32 to run in hard interrupt context or with spin locks held, but defer processing to softint and drop samples on the floor if buffer is full. This is mainly used for cheaply tossing samples from drivers for non-HWRNG devices into the entropy pool, so it is often used from interrupt context and/or under spin locks. - New rnd_add_data_intr provides the interrupt-like data entry path for arbitrary buffers and driver-specified entropy estimates: defer processing to softint and drop samples on the floor if buffer is full. - Document that rnd_add_data is forbidden under spin locks outside interrupt context (will crash in LOCKDEBUG), and inadvisable in interrupt context (but technically permitted just in case there are compatibility issues for now); later we can forbid it altogether in interrupt context or under spin locks. - Audit all uses of rnd_add_data to use rnd_add_data_intr where it might be used in interrupt context or under a spin lock. This fixes a regression from last year when the global entropy lock was changed from IPL_VM (spin) to IPL_SOFTSERIAL (adaptive). Thought I'd caught all the problems from that, but another one bit three different people this week, presumably because of recent changes that led to more non-HWRNG drivers entering the entropy consolidation path from rnd_add_uint32. In my attempt to preserve the rnd(9) API for the (now long-since abandoned) prospect of pullup to netbsd-9 in my rewrite of the entropy subsystem in 2020, I didn't introduce a separate entry point for entering entropy from interrupt context or equivalent, i.e., spin locks held, and instead made rnd_add_data rely on cpu_intr_p() to decide whether to process the whole sample under a lock or only take as much as there's buffer space for before scheduling a softint. In retrospect, that was a mistake (though perhaps not as much of a mistake as other entropy API decisions...), a mistake which is finally getting rectified now by rnd_add_data_intr. XXX pullup-10 --- share/man/man9/rnd.9 | 61 ++++++- sys/dev/pci/hifn7751.c | 4 +- sys/dev/pci/ubsec.c | 2 +- sys/dev/pci/viornd.c | 5 +- sys/kern/kern_entropy.c | 373 ++++++++++++++++++++++++---------------- sys/kern/subr_prf.c | 4 +- sys/sys/rndsource.h | 2 + 7 files changed, 294 insertions(+), 157 deletions(-) diff --git a/share/man/man9/rnd.9 b/share/man/man9/rnd.9 index e18f56e85a0d..4385689564c1 100644 --- a/share/man/man9/rnd.9 +++ b/share/man/man9/rnd.9 @@ -35,6 +35,7 @@ .Nm rnd_attach_source , .Nm rnd_detach_source , .Nm rnd_add_data , +.Nm rnd_add_data_intr , .Nm rnd_add_data_sync , .Nm rnd_add_uint32 .Nd functions to make a device available for entropy collection @@ -50,6 +51,8 @@ .Ft void .Fn rnd_add_data "krndsource_t *rnd_source" "void *data" "uint32_t len" "uint32_t entropy" .Ft void +.Fn rnd_add_data_intr "krndsource_t *rnd_source" "void *data" "uint32_t len" "uint32_t entropy" +.Ft void .Fn rnd_add_data_sync "krndsource_t *rnd_source" "void *data" "uint32_t len" "uint32_t entropy" .Ft void .Fn rnd_add_uint32 "krndsource_t *rnd_source" "uint32_t datum" @@ -82,7 +85,8 @@ Attach the random source with .Fn rnd_attach_source . .It Enter data with -.Fn rnd_add_data +.Fn rnd_add_data , +.Fn rnd_add_data_intr , or .Fn rnd_add_uint32 , or, if in the callback, @@ -147,7 +151,8 @@ The callback normally does one of two things: Sends a request to a hardware device for entropy and returns. The hardware will later return data asynchronously by an interrupt, and the callback will use -.Fn rnd_add_data +.Fn rnd_add_data , +.Fn rnd_add_data_intr , or .Fn rnd_add_uint32 to add the data to the pool. @@ -161,9 +166,10 @@ returning, the callback use .Fn rnd_add_data_sync , not -.Fn rnd_add_data +.Fn rnd_add_data , +.Fn rnd_add_data_intr \" this works for now but no promises or -.Fn rnd_add_uint32 . +.Fn rnd_add_uint32 . \" this also works for now but no promises .El .Pp .Nm @@ -285,22 +291,65 @@ be used during a callback as set with use .Fn rnd_add_data_sync instead. +.Pp +.Fn rnd_add_data +.Em must not +be called from thread context with spin locks held. +.Pp +For compatibility, +.Fn rnd_add_data +currently +.Em may +but +.Em should not +be called from interrupt context, possibly with spin locks held. +However, this may be forbidden in the future; use +.Fn rnd_add_data_intr +from interrupt context instead, if the work can't be usefully deferred +to softint or thread. +.It Fn rnd_add_data_intr "rnd_source" "data" "len" "entropy" +Tries to enter +.Fa len +bytes at +.Fa data +into the entropy pool like +.Fn rnd_add_data , +but if this fills or would overflow a sample buffer, schedules a +softint to process it and discards an unspecified subset of the data +while counting zero entropy for the sample. +.Pp +.Fn rnd_add_data_intr +may be called from any context, including hard interrupt context, +including contexts where spin locks are held, except that it +.Em must not +be used during a callback as set with +.Fn rndsource_setcb ; +use +.Fn rnd_add_data_sync +in that context instead. .It Fn rnd_add_data_sync "rnd_source" "data" "len" "entropy" Like .Fn rnd_add_data , but may be used in a callback as set with .Fn rndsource_setcb . +Must always be called in thread context. .It Fn rnd_add_uint32 "rnd_source" "datum" Equivalent to -.Li rnd_add_data Ns ( Ns Fa rnd_source , Li & Ns Fa datum , Li 4 , 0 ) . +.Li rnd_add_data_intr Ns ( Ns Fa rnd_source , Li & Ns Fa datum , Li 4 , 0 ) . .Pp .Fn rnd_add_uint32 +may be called from any context, including hard interrupt context, +including contexts where spin locks are held, except that it .Em must not be used during a callback as set with .Fn rndsource_setcb ; use .Fn rnd_add_data_sync -instead. +in that context instead. +.Pp +.Fn rnd_add_uint32 +is meant for cheaply taking samples from devices that aren't designed +to be hardware random number generators. .El .Sh FILES These functions are declared in src/sys/sys/rndsource.h and defined in diff --git a/sys/dev/pci/hifn7751.c b/sys/dev/pci/hifn7751.c index 3fced5cfd1a4..df3edb289d53 100644 --- a/sys/dev/pci/hifn7751.c +++ b/sys/dev/pci/hifn7751.c @@ -655,7 +655,7 @@ hifn_rng(struct hifn_softc *sc) hexdump(printf, "hifn", num, sizeof num); #endif entropybits = NBBY*sizeof(num)/HIFN_RNG_BITSPER; - rnd_add_data(&sc->sc_rnd_source, num, sizeof(num), + rnd_add_data_intr(&sc->sc_rnd_source, num, sizeof(num), entropybits); entropybits = MAX(entropybits, 1); entropybits = MIN(entropybits, sc->sc_rng_needbits); @@ -693,7 +693,7 @@ hifn_rng(struct hifn_softc *sc) hexdump(printf, "hifn", num, sizeof num); #endif entropybits = NBBY*sizeof(num)/HIFN_RNG_BITSPER; - rnd_add_data(&sc->sc_rnd_source, num, sizeof num, + rnd_add_data_intr(&sc->sc_rnd_source, num, sizeof num, entropybits); entropybits = MAX(entropybits, 1); entropybits = MIN(entropybits, sc->sc_rng_needbits); diff --git a/sys/dev/pci/ubsec.c b/sys/dev/pci/ubsec.c index dbbf7b97f639..61d693070cce 100644 --- a/sys/dev/pci/ubsec.c +++ b/sys/dev/pci/ubsec.c @@ -1939,7 +1939,7 @@ ubsec_callback2(struct ubsec_softc *sc, struct ubsec_q2 *q) rng->rng_buf.dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD); p = (u_int32_t *)rng->rng_buf.dma_vaddr; i = UBSEC_RNG_BUFSIZ * sizeof(u_int32_t); - rnd_add_data(&sc->sc_rnd_source, (char *)p, i, i * NBBY); + rnd_add_data_intr(&sc->sc_rnd_source, (char *)p, i, i * NBBY); sc->sc_rng_need -= i; rng->rng_used = 0; if (sc->sc_rng_need > 0) { diff --git a/sys/dev/pci/viornd.c b/sys/dev/pci/viornd.c index dd33fd4e9332..c2f72bef0646 100644 --- a/sys/dev/pci/viornd.c +++ b/sys/dev/pci/viornd.c @@ -245,8 +245,9 @@ viornd_vq_done(struct virtqueue *vq) #if VIORND_DEBUG aprint_normal("%s: got %d bytes of entropy\n", __func__, len); #endif - rnd_add_data(&sc->sc_rndsource, sc->sc_buf, VIORND_BUFSIZE, - VIORND_BUFSIZE * NBBY); + /* XXX Shouldn't this be len instead of VIORND_BUFSIZE? */ + rnd_add_data_intr(&sc->sc_rndsource, sc->sc_buf, VIORND_BUFSIZE, + VIORND_BUFSIZE * NBBY); out: virtio_dequeue_commit(vsc, vq, slot); mutex_exit(&sc->sc_mutex); diff --git a/sys/kern/kern_entropy.c b/sys/kern/kern_entropy.c index a7e57fcd6f27..2d5d9463e778 100644 --- a/sys/kern/kern_entropy.c +++ b/sys/kern/kern_entropy.c @@ -65,6 +65,15 @@ * * Entropy depletion is available for testing (or if you're into * that sort of thing), with sysctl -w kern.entropy.depletion=1; * the logic to support it is small, to minimize chance of bugs. + * + * * While cold, a single global entropy pool is available for + * entering and extracting, serialized through splhigh/splx. + * The per-CPU entropy pool data structures are initialized in + * entropy_init and entropy_init_late (separated mainly for + * hysterical raisins at this point), but are not used until the + * system is warm, at which point access to the global entropy + * pool is limited to thread and softint context and serialized + * by E->lock. */ #include @@ -183,11 +192,6 @@ struct { struct lwp *sourcelock; /* lock on list of sources */ kcondvar_t sourcelock_cv; /* notifies sourcelock release */ LIST_HEAD(,krndsource) sources; /* list of entropy sources */ - enum entropy_stage { - ENTROPY_COLD = 0, /* single-threaded */ - ENTROPY_WARM, /* multi-threaded at boot before CPUs */ - ENTROPY_HOT, /* multi-threaded multi-CPU */ - } stage; bool consolidate; /* kick thread to consolidate */ bool seed_rndsource; /* true if seed source is attached */ bool seeded; /* true if seed file already loaded */ @@ -197,7 +201,6 @@ struct { .samplesneeded = MINSAMPLES, .epoch = (unsigned)-1, /* -1 means entropy never consolidated */ .sources = LIST_HEAD_INITIALIZER(entropy_global.sources), - .stage = ENTROPY_COLD, }; #define E (&entropy_global) /* declutter */ @@ -265,8 +268,10 @@ static int sysctl_entropy_gather(SYSCTLFN_ARGS); static void filt_entropy_read_detach(struct knote *); static int filt_entropy_read_event(struct knote *, long); static int entropy_request(size_t, int); +static void rnd_add_data_internal(struct krndsource *, const void *, + uint32_t, uint32_t, bool); static void rnd_add_data_1(struct krndsource *, const void *, uint32_t, - uint32_t, bool, uint32_t); + uint32_t, bool, uint32_t, bool); static unsigned rndsource_entropybits(struct krndsource *); static void rndsource_entropybits_cpu(void *, void *, struct cpu_info *); static void rndsource_to_user(struct krndsource *, rndsource_t *); @@ -308,12 +313,17 @@ static void attach_seed_rndsource(void) { + KASSERT(!cpu_intr_p()); + KASSERT(!cpu_softintr_p()); + KASSERT(cold); + /* * First called no later than entropy_init, while we are still * single-threaded, so no need for RUN_ONCE. */ - if (E->stage >= ENTROPY_WARM || E->seed_rndsource) + if (E->seed_rndsource) return; + rnd_attach_source(&seed_rndsource, "seed", RND_TYPE_UNKNOWN, RND_FLAG_COLLECT_VALUE); E->seed_rndsource = true; @@ -324,7 +334,8 @@ attach_seed_rndsource(void) * * Initialize the entropy subsystem. Panic on failure. * - * Requires percpu(9) and sysctl(9) to be initialized. + * Requires percpu(9) and sysctl(9) to be initialized. Must run + * while cold. */ static void entropy_init(void) @@ -333,7 +344,7 @@ entropy_init(void) struct krndsource *rs; unsigned i = 0; - KASSERT(E->stage == ENTROPY_COLD); + KASSERT(cold); /* Grab some cycle counts early at boot. */ extra[i++] = entropy_timer(); @@ -419,28 +430,6 @@ entropy_init(void) KASSERT(i == __arraycount(extra)); entropy_enter(extra, sizeof extra, /*nbits*/0, /*count*/false); explicit_memset(extra, 0, sizeof extra); - - /* We are now ready for multi-threaded operation. */ - E->stage = ENTROPY_WARM; -} - -static void -entropy_init_late_cpu(void *a, void *b) -{ - int bound; - - /* - * We're not necessarily in a softint lwp here (xc_broadcast - * triggers softint on other CPUs, but calls directly on this - * CPU), so explicitly bind to the current CPU to invoke the - * softintr -- this lets us have a simpler assertion in - * entropy_account_cpu. Not necessary to avoid migration - * because xc_broadcast disables kpreemption anyway, but it - * doesn't hurt. - */ - bound = curlwp_bind(); - entropy_softintr(NULL); - curlwp_bindx(bound); } /* @@ -449,22 +438,22 @@ entropy_init_late_cpu(void *a, void *b) * Late initialization. Panic on failure. * * Requires CPUs to have been detected and LWPs to have started. + * Must run while cold. */ static void entropy_init_late(void) { - void *sih; int error; - KASSERT(E->stage == ENTROPY_WARM); + KASSERT(cold); /* * Establish the softint at the highest softint priority level. * Must happen after CPU detection. */ - sih = softint_establish(SOFTINT_SERIAL|SOFTINT_MPSAFE, + entropy_sih = softint_establish(SOFTINT_SERIAL|SOFTINT_MPSAFE, &entropy_softintr, NULL); - if (sih == NULL) + if (entropy_sih == NULL) panic("unable to establish entropy softint"); /* @@ -476,25 +465,6 @@ entropy_init_late(void) if (error) panic("unable to create entropy housekeeping thread: %d", error); - - /* - * Wait until the per-CPU initialization has hit all CPUs - * before proceeding to mark the entropy system hot and - * enabling use of the softint. - */ - xc_barrier(XC_HIGHPRI); - E->stage = ENTROPY_HOT; - atomic_store_relaxed(&entropy_sih, sih); - - /* - * At this point, entering new samples from interrupt handlers - * will trigger the softint to process them. But there may be - * some samples that were entered from interrupt handlers - * before the softint was available. Make sure we process - * those samples on all CPUs by running the softint logic on - * all CPUs. - */ - xc_wait(xc_broadcast(XC_HIGHPRI, entropy_init_late_cpu, NULL, NULL)); } /* @@ -609,6 +579,10 @@ entropy_seed(rndsave_t *seed) uint8_t digest[SHA1_DIGEST_LENGTH]; bool seeded; + KASSERT(!cpu_intr_p()); + KASSERT(!cpu_softintr_p()); + KASSERT(cold); + /* * Verify the checksum. If the checksum fails, take the data * but ignore the entropy estimate -- the file may have been @@ -642,12 +616,8 @@ entropy_seed(rndsave_t *seed) attach_seed_rndsource(); /* Test and set E->seeded. */ - if (E->stage >= ENTROPY_WARM) - mutex_enter(&E->lock); seeded = E->seeded; E->seeded = (seed->entropy > 0); - if (E->stage >= ENTROPY_WARM) - mutex_exit(&E->lock); /* * If we've been seeded, may be re-entering the same seed @@ -672,23 +642,23 @@ entropy_seed(rndsave_t *seed) * entropy_bootrequest() * * Request entropy from all sources at boot, once config is - * complete and interrupts are running. + * complete and interrupts are running but we are still cold. */ void entropy_bootrequest(void) { int error; - KASSERT(E->stage >= ENTROPY_WARM); + KASSERT(!cpu_intr_p()); + KASSERT(!cpu_softintr_p()); + KASSERT(cold); /* * Request enough to satisfy the maximum entropy shortage. * This is harmless overkill if the bootloader provided a seed. */ - mutex_enter(&E->lock); error = entropy_request(MINENTROPYBYTES, ENTROPY_WAIT); - KASSERT(error == 0); - mutex_exit(&E->lock); + KASSERTMSG(error == 0, "error=%d", error); } /* @@ -751,7 +721,8 @@ entropy_account_cpu(struct entropy_cpu *ec) struct entropy_cpu *ec0; unsigned bitsdiff, samplesdiff; - KASSERT(E->stage >= ENTROPY_WARM); + KASSERT(!cpu_intr_p()); + KASSERT(!cold); KASSERT(curlwp->l_pflag & LP_BOUND); /* @@ -880,8 +851,19 @@ static void entropy_enter_early(const void *buf, size_t len, unsigned nbits) { bool notify = false; + int s; + + KASSERT(cold); - KASSERT(E->stage == ENTROPY_COLD); + /* + * We're early at boot before multithreading and multi-CPU + * operation, and we don't have softints yet to defer + * processing from interrupt context, so we have to enter the + * samples directly into the global pool. But interrupts may + * be enabled, and we enter this path from interrupt context, + * so block interrupts until we're done. + */ + s = splhigh(); /* Enter it into the pool. */ entpool_enter(&E->pool, buf, len); @@ -890,6 +872,8 @@ entropy_enter_early(const void *buf, size_t len, unsigned nbits) * Decide whether to notify reseed -- we will do so if either: * (a) we transition from partial entropy to full entropy, or * (b) we get a batch of full entropy all at once. + * We don't count timing samples because we assume, while cold, + * there's not likely to be much jitter yet. */ notify |= (E->bitsneeded && E->bitsneeded <= nbits); notify |= (nbits >= MINENTROPYBITS); @@ -905,6 +889,8 @@ entropy_enter_early(const void *buf, size_t len, unsigned nbits) entropy_notify(); entropy_immediate_evcnt.ev_count++; } + + splx(s); } /* @@ -928,8 +914,11 @@ entropy_enter(const void *buf, size_t len, unsigned nbits, bool count) KASSERTMSG(howmany(nbits, NBBY) <= len, "impossible entropy rate: %u bits in %zu-byte string", nbits, len); - /* If it's too early after boot, just use entropy_enter_early. */ - if (__predict_false(E->stage == ENTROPY_COLD)) { + /* + * If we're still cold, just use entropy_enter_early to put + * samples directly into the global pool. + */ + if (__predict_false(cold)) { entropy_enter_early(buf, len, nbits); return; } @@ -981,8 +970,9 @@ entropy_enter(const void *buf, size_t len, unsigned nbits, bool count) * instance. Schedule a softint to stir the entropy pool if * needed. Return true if used fully, false if truncated at all. * - * Using this in thread context will work, but you might as well - * use entropy_enter in that case. + * Using this in thread or softint context with no spin locks held + * will work, but you might as well use entropy_enter in that + * case. */ static bool entropy_enter_intr(const void *buf, size_t len, unsigned nbits, bool count) @@ -990,18 +980,29 @@ entropy_enter_intr(const void *buf, size_t len, unsigned nbits, bool count) struct entropy_cpu *ec; bool fullyused = false; uint32_t bitspending, samplespending; - void *sih; + int s; - KASSERT(cpu_intr_p()); KASSERTMSG(howmany(nbits, NBBY) <= len, "impossible entropy rate: %u bits in %zu-byte string", nbits, len); - /* If it's too early after boot, just use entropy_enter_early. */ - if (__predict_false(E->stage == ENTROPY_COLD)) { + /* + * If we're still cold, just use entropy_enter_early to put + * samples directly into the global pool. + */ + if (__predict_false(cold)) { entropy_enter_early(buf, len, nbits); return true; } + /* + * In case we were called in thread or interrupt context with + * interrupts unblocked, block soft interrupts up to + * IPL_SOFTSERIAL. This way logic that is safe in interrupt + * context or under a spin lock is also safe in less + * restrictive contexts. + */ + s = splsoftserial(); + /* * Acquire the per-CPU state. If someone is in the middle of * using it, drop the sample. Otherwise, take the lock so that @@ -1020,9 +1021,8 @@ entropy_enter_intr(const void *buf, size_t len, unsigned nbits, bool count) * truncated, schedule a softint to stir the pool and stop. */ if (!entpool_enter_nostir(ec->ec_pool, buf, len)) { - sih = atomic_load_relaxed(&entropy_sih); - if (__predict_true(sih != NULL)) - softint_schedule(sih); + if (__predict_true(!cold)) + softint_schedule(entropy_sih); ec->ec_evcnt->intrtrunc.ev_count++; goto out1; } @@ -1047,17 +1047,16 @@ entropy_enter_intr(const void *buf, size_t len, unsigned nbits, bool count) /* Schedule a softint if we added anything and it matters. */ if (__predict_false(atomic_load_relaxed(&E->bitsneeded) || atomic_load_relaxed(&entropy_depletion)) && - (nbits != 0 || count)) { - sih = atomic_load_relaxed(&entropy_sih); - if (__predict_true(sih != NULL)) - softint_schedule(sih); - } + (nbits != 0 || count) && + __predict_true(!cold)) + softint_schedule(entropy_sih); out1: /* Release the per-CPU state. */ KASSERT(ec->ec_locked); __insn_barrier(); ec->ec_locked = false; out0: percpu_putref(entropy_percpu); + splx(s); return fullyused; } @@ -1104,6 +1103,8 @@ entropy_thread(void *cookie) { bool consolidate; + KASSERT(!cold); + for (;;) { /* * Wait until there's full entropy somewhere among the @@ -1184,6 +1185,9 @@ entropy_do_consolidate(void) unsigned bitsdiff, samplesdiff; uint64_t ticket; + KASSERT(!cold); + ASSERT_SLEEPABLE(); + /* Gather entropy on all CPUs into a temporary pool. */ memset(&pool, 0, sizeof pool); ticket = xc_broadcast(0, &entropy_consolidate_xc, &pool, NULL); @@ -1291,7 +1295,7 @@ entropy_notify(void) static bool ready = false, besteffort = false; unsigned epoch; - KASSERT(E->stage == ENTROPY_COLD || mutex_owned(&E->lock)); + KASSERT(__predict_false(cold) || mutex_owned(&E->lock)); /* * If this is the first time, print a message to the console @@ -1324,7 +1328,7 @@ entropy_notify(void) KASSERT(E->epoch != (unsigned)-1); /* Notify waiters. */ - if (E->stage >= ENTROPY_WARM) { + if (__predict_true(!cold)) { cv_broadcast(&E->cv); selnotify(&E->selq, POLLIN|POLLRDNORM, NOTE_SUBMIT); } @@ -1350,7 +1354,8 @@ entropy_consolidate(void) uint64_t ticket; int error; - KASSERT(E->stage == ENTROPY_HOT); + KASSERT(!cold); + ASSERT_SLEEPABLE(); mutex_enter(&E->lock); ticket = entropy_consolidate_evcnt.ev_count; @@ -1380,8 +1385,6 @@ sysctl_entropy_consolidate(SYSCTLFN_ARGS) int arg = 0; int error; - KASSERT(E->stage == ENTROPY_HOT); - node.sysctl_data = &arg; error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error || newp == NULL) @@ -1406,8 +1409,6 @@ sysctl_entropy_gather(SYSCTLFN_ARGS) int arg = 0; int error; - KASSERT(E->stage == ENTROPY_HOT); - node.sysctl_data = &arg; error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error || newp == NULL) @@ -1434,6 +1435,9 @@ sysctl_entropy_gather(SYSCTLFN_ARGS) * provide backtracking resistance -- it must be combined with a * PRNG/DRBG that does. * + * This may be used very early at boot, before even entropy_init + * has been called. + * * You generally shouldn't use this directly -- use cprng(9) * instead. * @@ -1460,19 +1464,25 @@ entropy_extract(void *buf, size_t len, int flags) static const struct timeval interval = {.tv_sec = 60, .tv_usec = 0}; static struct timeval lasttime; /* serialized by E->lock */ bool printed = false; - int error; + int s = -1/*XXXGCC*/, error; if (ISSET(flags, ENTROPY_WAIT)) { ASSERT_SLEEPABLE(); - KASSERTMSG(E->stage >= ENTROPY_WARM, - "can't wait for entropy until warm"); + KASSERT(!cold); } /* Refuse to operate in interrupt context. */ KASSERT(!cpu_intr_p()); - /* Acquire the global lock to get at the global pool. */ - if (E->stage >= ENTROPY_WARM) + /* + * If we're cold, we are only contending with interrupts on the + * currrent CPU, so block them. Otherwise, we are _not_ + * contending with interrupts on the current CPU, but we are + * contending with other threads, to exclude them with a mutex. + */ + if (__predict_false(cold)) + s = splhigh(); + else mutex_enter(&E->lock); /* Wait until there is enough entropy in the system. */ @@ -1505,7 +1515,7 @@ entropy_extract(void *buf, size_t len, int flags) } /* Wait for some entropy to come in and try again. */ - KASSERT(E->stage >= ENTROPY_WARM); + KASSERT(!cold); if (!printed) { printf("entropy: pid %d (%s) waiting for entropy(7)\n", curproc->p_pid, curproc->p_comm); @@ -1581,7 +1591,9 @@ entropy_extract(void *buf, size_t len, int flags) } out: /* Release the global lock and return the error. */ - if (E->stage >= ENTROPY_WARM) + if (__predict_false(cold)) + splx(s); + else mutex_exit(&E->lock); return error; } @@ -1597,7 +1609,7 @@ entropy_poll(int events) { int revents = 0; - KASSERT(E->stage >= ENTROPY_WARM); + KASSERT(!cold); /* Always ready for writing. */ revents |= events & (POLLOUT|POLLWRNORM); @@ -1640,7 +1652,7 @@ static void filt_entropy_read_detach(struct knote *kn) { - KASSERT(E->stage >= ENTROPY_WARM); + KASSERT(!cold); mutex_enter(&E->lock); selremove_knote(&E->selq, kn); @@ -1659,7 +1671,7 @@ filt_entropy_read_event(struct knote *kn, long hint) { int ret; - KASSERT(E->stage >= ENTROPY_WARM); + KASSERT(!cold); /* Acquire the lock, if caller is outside entropy subsystem. */ if (hint == NOTE_SUBMIT) @@ -1708,7 +1720,7 @@ int entropy_kqfilter(struct knote *kn) { - KASSERT(E->stage >= ENTROPY_WARM); + KASSERT(!cold); switch (kn->kn_filter) { case EVFILT_READ: @@ -1784,15 +1796,15 @@ rnd_attach_source(struct krndsource *rs, const char *name, uint32_t type, rs->total = 0; rs->type = type; rs->flags = flags; - if (E->stage >= ENTROPY_WARM) + if (entropy_percpu != NULL) rs->state = percpu_alloc(sizeof(struct rndsource_cpu)); extra[i++] = entropy_timer(); /* Wire it into the global list of random sources. */ - if (E->stage >= ENTROPY_WARM) + if (__predict_true(!cold)) mutex_enter(&E->lock); LIST_INSERT_HEAD(&E->sources, rs, list); - if (E->stage >= ENTROPY_WARM) + if (__predict_true(!cold)) mutex_exit(&E->lock); extra[i++] = entropy_timer(); @@ -1803,7 +1815,7 @@ rnd_attach_source(struct krndsource *rs, const char *name, uint32_t type, /* Mix the extra into the pool. */ KASSERT(i == __arraycount(extra)); - entropy_enter(extra, sizeof extra, 0, /*count*/!cold); + entropy_enter(extra, sizeof extra, 0, /*count*/__predict_true(!cold)); explicit_memset(extra, 0, sizeof extra); } @@ -1821,7 +1833,7 @@ rnd_detach_source(struct krndsource *rs) * If we're cold (shouldn't happen, but hey), just remove it * from the list -- there's nothing allocated. */ - if (E->stage == ENTROPY_COLD) { + if (__predict_false(cold) && entropy_percpu == NULL) { LIST_REMOVE(rs, list); return; } @@ -1848,6 +1860,8 @@ rnd_detach_source(struct krndsource *rs) * rnd_unlock_sources even while the caller releases the global * entropy lock. * + * May be called very early at boot, before entropy_init. + * * If flags & ENTROPY_WAIT, wait for concurrent access to finish. * If flags & ENTROPY_SIG, allow interruption by signal. */ @@ -1856,10 +1870,11 @@ rnd_lock_sources(int flags) { int error; - KASSERT(E->stage == ENTROPY_COLD || mutex_owned(&E->lock)); + KASSERT(__predict_false(cold) || mutex_owned(&E->lock)); + KASSERT(!cpu_intr_p()); while (E->sourcelock) { - KASSERT(E->stage >= ENTROPY_WARM); + KASSERT(!cold); if (!ISSET(flags, ENTROPY_WAIT)) return EWOULDBLOCK; if (ISSET(flags, ENTROPY_SIG)) { @@ -1880,17 +1895,20 @@ rnd_lock_sources(int flags) * * Unlock the list of sources after rnd_lock_sources. Caller must * hold the global entropy lock. + * + * May be called very early at boot, before entropy_init. */ static void rnd_unlock_sources(void) { - KASSERT(E->stage == ENTROPY_COLD || mutex_owned(&E->lock)); + KASSERT(__predict_false(cold) || mutex_owned(&E->lock)); + KASSERT(!cpu_intr_p()); KASSERTMSG(E->sourcelock == curlwp, "lwp %p releasing lock held by %p", curlwp, E->sourcelock); E->sourcelock = NULL; - if (E->stage >= ENTROPY_WARM) + if (__predict_true(!cold)) cv_signal(&E->sourcelock_cv); } @@ -1899,6 +1917,8 @@ rnd_unlock_sources(void) * * True if we hold the list of rndsources locked, for diagnostic * assertions. + * + * May be called very early at boot, before entropy_init. */ static bool __diagused rnd_sources_locked(void) @@ -1914,6 +1934,8 @@ rnd_sources_locked(void) * OK if we overdo it. Caller must hold the global entropy lock; * will release and re-acquire it. * + * May be called very early at boot, before entropy_init. + * * If flags & ENTROPY_WAIT, wait for concurrent access to finish. * If flags & ENTROPY_SIG, allow interruption by signal. */ @@ -1923,8 +1945,9 @@ entropy_request(size_t nbytes, int flags) struct krndsource *rs; int error; - KASSERT(E->stage == ENTROPY_COLD || mutex_owned(&E->lock)); - if (flags & ENTROPY_WAIT) + KASSERT(__predict_false(cold) || mutex_owned(&E->lock)); + KASSERT(!cpu_intr_p()); + if ((flags & ENTROPY_WAIT) != 0 && __predict_false(!cold)) ASSERT_SLEEPABLE(); /* @@ -1954,10 +1977,10 @@ entropy_request(size_t nbytes, int flags) continue; /* Drop the lock while we call the callback. */ - if (E->stage >= ENTROPY_WARM) + if (__predict_true(!cold)) mutex_exit(&E->lock); (*rs->get)(nbytes, rs->getarg); - if (E->stage >= ENTROPY_WARM) + if (__predict_true(!cold)) mutex_enter(&E->lock); } @@ -2030,30 +2053,34 @@ rnd_dt_estimate(struct krndsource *rs, uint32_t t) * * Enter 32 bits of data from an entropy source into the pool. * - * If rs is NULL, may not be called from interrupt context. + * May be called from any context or with spin locks held, but may + * drop data. * - * If rs is non-NULL, may be called from any context. May drop - * data if called from interrupt context. + * This is meant for cheaply taking samples from devices that + * aren't designed to be hardware random number generators. */ void rnd_add_uint32(struct krndsource *rs, uint32_t value) { + bool intr_p = true; - rnd_add_data(rs, &value, sizeof value, 0); + rnd_add_data_internal(rs, &value, sizeof value, 0, intr_p); } void _rnd_add_uint32(struct krndsource *rs, uint32_t value) { + bool intr_p = true; - rnd_add_data(rs, &value, sizeof value, 0); + rnd_add_data_internal(rs, &value, sizeof value, 0, intr_p); } void _rnd_add_uint64(struct krndsource *rs, uint64_t value) { + bool intr_p = true; - rnd_add_data(rs, &value, sizeof value, 0); + rnd_add_data_internal(rs, &value, sizeof value, 0, intr_p); } /* @@ -2064,25 +2091,38 @@ _rnd_add_uint64(struct krndsource *rs, uint64_t value) * the data has. If RND_FLAG_NO_ESTIMATE, we ignore the driver's * estimate and treat it as zero. * - * If rs is NULL, may not be called from interrupt context. - * - * If rs is non-NULL, may be called from any context. May drop - * data if called from interrupt context. + * rs MAY but SHOULD NOT be NULL. If rs is NULL, MUST NOT be + * called from interrupt context or with spin locks held. + * + * If rs is non-NULL, MAY but SHOULD NOT be called from interrupt + * context, in which case act like rnd_add_data_intr -- if the + * sample buffer is full, schedule a softint and drop any + * additional data on the floor. (This may change later once we + * fix drivers that still call this from interrupt context to use + * rnd_add_data_intr instead.) MUST NOT be called with spin locks + * held if not in hard interrupt context -- i.e., MUST NOT be + * called in thread context or softint context with spin locks + * held. */ void rnd_add_data(struct krndsource *rs, const void *buf, uint32_t len, uint32_t entropybits) { - uint32_t extra; - uint32_t flags; - - KASSERTMSG(howmany(entropybits, NBBY) <= len, - "%s: impossible entropy rate:" - " %"PRIu32" bits in %"PRIu32"-byte string", - rs ? rs->name : "(anonymous)", entropybits, len); + bool intr_p = cpu_intr_p(); /* XXX make this unconditionally false */ - /* If there's no rndsource, just enter the data and time now. */ + /* + * Weird legacy exception that we should rip out and replace by + * creating new rndsources to attribute entropy to the callers: + * If there's no rndsource, just enter the data and time now. + */ if (rs == NULL) { + uint32_t extra; + + KASSERT(!intr_p); + KASSERTMSG(howmany(entropybits, NBBY) <= len, + "%s: impossible entropy rate:" + " %"PRIu32" bits in %"PRIu32"-byte string", + rs ? rs->name : "(anonymous)", entropybits, len); entropy_enter(buf, len, entropybits, /*count*/false); extra = entropy_timer(); entropy_enter(&extra, sizeof extra, 0, /*count*/false); @@ -2090,6 +2130,48 @@ rnd_add_data(struct krndsource *rs, const void *buf, uint32_t len, return; } + rnd_add_data_internal(rs, buf, len, entropybits, intr_p); +} + +/* + * rnd_add_data_intr(rs, buf, len, entropybits) + * + * Try to enter data from an entropy source into the pool, with a + * driver's estimate of how much entropy the physical source of + * the data has. If RND_FLAG_NO_ESTIMATE, we ignore the driver's + * estimate and treat it as zero. If the sample buffer is full, + * schedule a softint and drop any additional data on the floor. + */ +void +rnd_add_data_intr(struct krndsource *rs, const void *buf, uint32_t len, + uint32_t entropybits) +{ + bool intr_p = true; + + rnd_add_data_internal(rs, buf, len, entropybits, intr_p); +} + +/* + * rnd_add_data_internal(rs, buf, len, entropybits, intr_p) + * + * Internal subroutine to decide whether or not to enter data or + * timing for a particular rndsource, and if so, to enter it. + * + * intr_p is true for callers from interrupt context or spin locks + * held, and false for callers from thread or soft interrupt + * context and no spin locks held. + */ +static void +rnd_add_data_internal(struct krndsource *rs, const void *buf, uint32_t len, + uint32_t entropybits, bool intr_p) +{ + uint32_t flags; + + KASSERTMSG(howmany(entropybits, NBBY) <= len, + "%s: impossible entropy rate:" + " %"PRIu32" bits in %"PRIu32"-byte string", + rs ? rs->name : "(anonymous)", entropybits, len); + /* * Hold up the reset xcall before it zeroes the entropy counts * on this CPU or globally. Otherwise, we might leave some @@ -2119,11 +2201,12 @@ rnd_add_data(struct krndsource *rs, const void *buf, uint32_t len, /* If we are collecting data, enter them. */ if (ISSET(flags, RND_FLAG_COLLECT_VALUE)) { rnd_add_data_1(rs, buf, len, entropybits, /*count*/false, - RND_FLAG_COLLECT_VALUE); + RND_FLAG_COLLECT_VALUE, intr_p); } /* If we are collecting timings, enter one. */ if (ISSET(flags, RND_FLAG_COLLECT_TIME)) { + uint32_t extra; bool count; /* Sample a timer. */ @@ -2131,13 +2214,13 @@ rnd_add_data(struct krndsource *rs, const void *buf, uint32_t len, /* If asked, do entropy estimation on the time. */ if ((flags & (RND_FLAG_ESTIMATE_TIME|RND_FLAG_NO_ESTIMATE)) == - RND_FLAG_ESTIMATE_TIME && !cold) + RND_FLAG_ESTIMATE_TIME && __predict_true(!cold)) count = rnd_dt_estimate(rs, extra); else count = false; rnd_add_data_1(rs, &extra, sizeof extra, 0, count, - RND_FLAG_COLLECT_TIME); + RND_FLAG_COLLECT_TIME, intr_p); } out: /* Allow concurrent changes to flags to finish. */ @@ -2161,16 +2244,16 @@ add_sat(unsigned a, unsigned b) */ static void rnd_add_data_1(struct krndsource *rs, const void *buf, uint32_t len, - uint32_t entropybits, bool count, uint32_t flag) + uint32_t entropybits, bool count, uint32_t flag, bool intr_p) { bool fullyused; /* - * If we're in interrupt context, use entropy_enter_intr and - * take note of whether it consumed the full sample; if not, - * use entropy_enter, which always consumes the full sample. + * For the interrupt-like path, use entropy_enter_intr and take + * note of whether it consumed the full sample; otherwise, use + * entropy_enter, which always consumes the full sample. */ - if (curlwp && cpu_intr_p()) { + if (intr_p) { fullyused = entropy_enter_intr(buf, len, entropybits, count); } else { entropy_enter(buf, len, entropybits, count); @@ -2182,7 +2265,8 @@ rnd_add_data_1(struct krndsource *rs, const void *buf, uint32_t len, * contributed from this source. */ if (fullyused) { - if (__predict_false(E->stage == ENTROPY_COLD)) { + if (__predict_false(cold)) { + const int s = splhigh(); rs->total = add_sat(rs->total, entropybits); switch (flag) { case RND_FLAG_COLLECT_TIME: @@ -2194,6 +2278,7 @@ rnd_add_data_1(struct krndsource *rs, const void *buf, uint32_t len, add_sat(rs->value_delta.insamples, 1); break; } + splx(s); } else { struct rndsource_cpu *rc = percpu_getref(rs->state); @@ -2240,7 +2325,7 @@ rndsource_entropybits(struct krndsource *rs) { unsigned nbits = rs->total; - KASSERT(E->stage >= ENTROPY_WARM); + KASSERT(!cold); KASSERT(rnd_sources_locked()); percpu_foreach(rs->state, rndsource_entropybits_cpu, &nbits); return nbits; @@ -2266,7 +2351,7 @@ static void rndsource_to_user(struct krndsource *rs, rndsource_t *urs) { - KASSERT(E->stage >= ENTROPY_WARM); + KASSERT(!cold); KASSERT(rnd_sources_locked()); /* Avoid kernel memory disclosure. */ @@ -2289,7 +2374,7 @@ static void rndsource_to_user_est(struct krndsource *rs, rndsource_est_t *urse) { - KASSERT(E->stage >= ENTROPY_WARM); + KASSERT(!cold); KASSERT(rnd_sources_locked()); /* Avoid kernel memory disclosure. */ @@ -2353,7 +2438,7 @@ entropy_ioctl(unsigned long cmd, void *data) bool privileged; int error; - KASSERT(E->stage >= ENTROPY_WARM); + KASSERT(!cold); /* Verify user's authorization to perform the ioctl. */ switch (cmd) { diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c index ad65c64ee1a5..8b0c95962eef 100644 --- a/sys/kern/subr_prf.c +++ b/sys/kern/subr_prf.c @@ -521,7 +521,7 @@ putchar(int c, int flags, struct tty *tp) #ifdef RND_PRINTF if (__predict_true(kprintf_inited)) { unsigned char ch = c; - rnd_add_data(&rnd_printf_source, &ch, 1, 0); + rnd_add_data_intr(&rnd_printf_source, &ch, 1, 0); } #endif } @@ -1623,7 +1623,7 @@ done: #ifdef RND_PRINTF if (__predict_true(kprintf_inited)) - rnd_add_data(&rnd_printf_source, NULL, 0, 0); + rnd_add_data_intr(&rnd_printf_source, NULL, 0, 0); #endif return ret; } diff --git a/sys/sys/rndsource.h b/sys/sys/rndsource.h index 234a9704ac25..4ac81b291bbe 100644 --- a/sys/sys/rndsource.h +++ b/sys/sys/rndsource.h @@ -94,6 +94,8 @@ void _rnd_add_uint64(struct krndsource *, uint64_t); /* legacy */ void rnd_add_uint32(struct krndsource *, uint32_t); void rnd_add_data(struct krndsource *, const void *, uint32_t, uint32_t); +void rnd_add_data_intr(struct krndsource *, const void *, uint32_t, + uint32_t); void rnd_add_data_sync(struct krndsource *, const void *, uint32_t, uint32_t);