commit a6a1a1d62127ac610460a08e10d719987091e2c0 Author: Taylor R Campbell Date: Sun Jun 23 15:39:36 2013 +0000 WIP: Fix races in /dev/u?random context initialization and accounting. diff --git a/sys/dev/rndpseudo.c b/sys/dev/rndpseudo.c index 3bbb522..a6abf80 100644 --- a/sys/dev/rndpseudo.c +++ b/sys/dev/rndpseudo.c @@ -1,11 +1,12 @@ /* $NetBSD: rndpseudo.c,v 1.12 2013/06/13 00:55:01 tls Exp $ */ /*- - * Copyright (c) 1997-2011 The NetBSD Foundation, Inc. + * Copyright (c) 1997-2013 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation - * by Michael Graff and Thor Lancelot Simon. + * by Michael Graff , Thor Lancelot Simon, and + * Taylor R. Campbell. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -56,6 +57,7 @@ __KERNEL_RCSID(0, "$NetBSD: rndpseudo.c,v 1.12 2013/06/13 00:55:01 tls Exp $"); #include #include #include +#include #include #ifdef COMPAT_50 @@ -88,18 +90,17 @@ extern int rnd_debug; #endif /* - * The size of a temporary buffer, kmem_alloc()ed when needed, and used for - * reading and writing data. + * The size of a temporary buffer for reading and writing entropy. */ #define RND_TEMP_BUFFER_SIZE 512 -static pool_cache_t rp_pc; -static pool_cache_t rp_cpc; +static pool_cache_t rnd_temp_buffer_cache; +static pool_cache_t rnd_ctx_cache; /* * The per-CPU RNGs used for short requests */ -cprng_strong_t **rp_cpurngs; +static percpu_t *percpu_urandom_cprng; /* * Our random pool. This is defined here rather than using the general @@ -164,190 +165,216 @@ rndpseudo_counter(void) } /* - * "Attach" the random device. This is an (almost) empty stub, since - * pseudo-devices don't get attached until after config, after the - * entropy sources will attach. We just use the timing of this event - * as another potential source of initial entropy. + * `Attach' the random device. We use the timing of this event as + * another potential source of initial entropy. */ void rndattach(int num) { - u_int32_t c; + uint32_t c; - /* Trap unwary players who don't call rnd_init() early */ + /* Trap unwary players who don't call rnd_init() early. */ KASSERT(rnd_ready); - rp_pc = pool_cache_init(RND_TEMP_BUFFER_SIZE, 0, 0, 0, - "rndtemp", NULL, IPL_NONE, - NULL, NULL, NULL); - rp_cpc = pool_cache_init(sizeof(rp_ctx_t), 0, 0, 0, - "rndctx", NULL, IPL_NONE, - NULL, NULL, NULL); + rnd_temp_buffer_cache = pool_cache_init(RND_TEMP_BUFFER_SIZE, 0, 0, 0, + "rndtemp", NULL, IPL_NONE, NULL, NULL, NULL); + rnd_ctx_cache = pool_cache_init(sizeof(rp_ctx_t), 0, 0, 0, + "rndctx", NULL, IPL_NONE, NULL, NULL, NULL); + percpu_urandom_cprng = percpu_alloc(sizeof(struct cprng_strong *)); - /* mix in another counter */ + /* Mix in another counter. */ c = rndpseudo_counter(); mutex_spin_enter(&rndpool_mtx); - rndpool_add_data(&rnd_pool, &c, sizeof(u_int32_t), 1); + rndpool_add_data(&rnd_pool, &c, sizeof(c), 1); mutex_spin_exit(&rndpool_mtx); - - rp_cpurngs = kmem_zalloc(maxcpus * sizeof(cprng_strong_t *), - KM_SLEEP); } int -rndopen(dev_t dev, int flag, int ifmt, - struct lwp *l) +rndopen(dev_t dev, int flags, int fmt, struct lwp *l) { - rp_ctx_t *ctx; - file_t *fp; - int fd, hard, error = 0; + bool hard; + struct file *fp; + int fd; + int error; switch (minor(dev)) { - case RND_DEV_URANDOM: - hard = 0; + case RND_DEV_URANDOM: + hard = false; break; - case RND_DEV_RANDOM: - hard = 1; + + case RND_DEV_RANDOM: + hard = true; break; - default: + + default: return ENXIO; } - ctx = pool_cache_get(rp_cpc, PR_WAITOK); - if ((error = fd_allocfile(&fp, &fd)) != 0) { - pool_cache_put(rp_cpc, ctx); - return error; - } + + error = fd_allocfile(&fp, &fd); + if (error) + return error; + + /* + * Allocate a context, but don't create a CPRNG yet -- do that + * lazily because it consumes entropy from the system entropy + * pool, which (currently) has the effect of depleting it and + * causing readers from /dev/random to block. If this is + * /dev/urandom and the process is about to send only short + * reads to it, then we will be using a per-CPU CPRNG anyway. + */ + rp_ctx_t *const ctx = pool_cache_get(rnd_ctx_cache, PR_WAITOK); ctx->cprng = NULL; ctx->hard = hard; - ctx->bytesonkey = 0; - mutex_init(&ctx->interlock, MUTEX_DEFAULT, IPL_NONE); - return fd_clone(fp, fd, flag, &rnd_fileops, ctx); + error = fd_clone(fp, fd, flags, &rnd_fileops, ctx); + KASSERT(error == EMOVEFD); + + return error; } -static void -rnd_alloc_cprng(rp_ctx_t *ctx) +/* + * Fetch a /dev/u?random context's CPRNG, or create and save one if + * necessary. + */ +static struct cprng_strong * +rnd_ctx_cprng(rp_ctx_t *ctx) { - char personalization_buf[64]; - struct lwp *l = curlwp; - int cflags = ctx->hard ? CPRNG_USE_CV : - CPRNG_INIT_ANY|CPRNG_REKEY_ANY; - - mutex_enter(&ctx->interlock); - if (__predict_true(ctx->cprng == NULL)) { - snprintf(personalization_buf, - sizeof(personalization_buf), - "%d%llud%d", l->l_proc->p_pid, - (unsigned long long int)l->l_ncsw, l->l_cpticks); - ctx->cprng = cprng_strong_create(personalization_buf, - IPL_NONE, cflags); + struct cprng_strong *cprng, *tmp = NULL; + + /* Fast path: if someone has already allocated a CPRNG, use it. */ + cprng = ctx->cprng; + if (__predict_true(cprng != NULL)) { + /* Make sure the CPU hasn't prefetched cprng's guts. */ + membar_consumer(); + goto out; } - membar_sync(); - mutex_exit(&ctx->interlock); + + /* Slow path: create a CPRNG. Allocate before taking locks. */ + char name[64]; + struct lwp *const l = curlwp; + (void)snprintf(name, sizeof(name), "%d %"PRIu64" %u", + (int)l->l_proc->p_pid, l->l_ncsw, l->l_cpticks); + const int flags = (ctx->hard? CPRNG_USE_CV : + (CPRNG_INIT_ANY | CPRNG_REKEY_ANY)); + tmp = cprng_strong_create(name, IPL_NONE, flags); + + /* Limit it to its internal strength if this is /dev/random. */ + if (ctx->hard) + cprng_strong_limit(tmp, cprng_strong_strength(tmp)); + + /* Publish cprng's guts before the pointer to them. */ + membar_producer(); + + /* Attempt to commit tmp, unless someone beat us. */ + cprng = atomic_cas_ptr(&ctx->cprng, NULL, tmp); + if (__predict_false(cprng != NULL)) { + /* Make sure the CPU hasn't prefetched cprng's guts. */ + membar_consumer(); + goto out; + } + cprng = tmp; + tmp = NULL; + +out: if (tmp != NULL) + cprng_strong_destroy(tmp); + KASSERT(cprng != NULL); + return cprng; +} + +/* + * Fetch a per-CPU CPRNG, or create and save one if necessary. + */ +static struct cprng_strong * +rnd_percpu_cprng(void) +{ + struct cprng_strong **cprngp, *cprng, *tmp; + + /* Fast path: if there already is a CPRNG for this CPU, use it. */ + cprngp = percpu_getref(percpu_urandom_cprng); + cprng = *cprngp; + if (__predict_true(cprng != NULL)) + goto out; + + /* + * Slow path: create a CPRNG named by this CPU. + * + * XXX The CPU of the name may be different from the CPU to + * which it is assigned, because we need to choose a name and + * allocate a cprng while preemption is enabled. This could be + * fixed by changing the cprng_strong API (e.g., by adding a + * cprng_strong_setname or by separating allocation from + * initialization), but it's not clear that's worth the + * trouble. + */ + char name[32]; + (void)snprintf(name, sizeof(name), "urandom%u", cpu_index(curcpu())); + tmp = cprng_strong_create(name, IPL_NONE, + (CPRNG_INIT_ANY | CPRNG_REKEY_ANY)); + + /* Try again, but we may have been preempted and lost a race. */ + cprngp = percpu_getref(percpu_urandom_cprng); + cprng = *cprngp; + if (__predict_false(cprng != NULL)) + goto out; + + /* Commit the CPRNG we just created. */ + cprng = tmp; + tmp = NULL; + *cprngp = cprng; + +out: percpu_putref(percpu_urandom_cprng); + if (tmp != NULL) + cprng_strong_destroy(tmp); + KASSERT(cprng != NULL); + return cprng; } static int -rnd_read(struct file * fp, off_t *offp, struct uio *uio, - kauth_cred_t cred, int flags) +rnd_read(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, + int flags) { - rp_ctx_t *ctx = fp->f_data; - cprng_strong_t *cprng; - u_int8_t *bf; - int strength, ret; - struct cpu_info *ci = curcpu(); + int error; DPRINTF(RND_DEBUG_READ, - ("Random: Read of %zu requested, flags 0x%08x\n", - uio->uio_resid, flags)); + ("Random: Read of %zu requested, flags 0x%08x\n", + uio->uio_resid, flags)); if (uio->uio_resid == 0) - return (0); + return 0; - if (ctx->hard || uio->uio_resid > NIST_BLOCK_KEYLEN_BYTES) { - if (ctx->cprng == NULL) { - rnd_alloc_cprng(ctx); - } - cprng = ctx->cprng; - } else { - int index = cpu_index(ci); - - if (__predict_false(rp_cpurngs[index] == NULL)) { - char rngname[32]; - - snprintf(rngname, sizeof(rngname), - "%s-short", cpu_name(ci)); - rp_cpurngs[index] = - cprng_strong_create(rngname, IPL_NONE, - CPRNG_INIT_ANY | - CPRNG_REKEY_ANY); - } - cprng = rp_cpurngs[index]; - } + rp_ctx_t *const ctx = fp->f_data; + uint8_t *const buf = pool_cache_get(rnd_temp_buffer_cache, PR_WAITOK); - if (__predict_false(cprng == NULL)) { - printf("NULL rng!\n"); - return EIO; - } + /* + * Choose a CPRNG to use -- either the per-open CPRNG, if this + * is /dev/random or a long read, or the per-CPU one otherwise. + * + * XXX NIST_BLOCK_KEYLEN_BYTES is a detail of the cprng(9) + * implementation and as such should not be mentioned here. + */ + struct cprng_strong *const cprng = + ((ctx->hard || (uio->uio_resid > NIST_BLOCK_KEYLEN_BYTES))? + rnd_ctx_cprng(ctx) : rnd_percpu_cprng()); - strength = cprng_strong_strength(cprng); - ret = 0; - bf = pool_cache_get(rp_pc, PR_WAITOK); + /* + * Generate the data in RND_TEMP_BUFFER_SIZE chunks. + */ while (uio->uio_resid > 0) { - int n, nread, want; - - want = MIN(RND_TEMP_BUFFER_SIZE, uio->uio_resid); + const size_t n_req = MIN(uio->uio_resid, RND_TEMP_BUFFER_SIZE); - /* XXX is this _really_ what's wanted? */ - if (ctx->hard) { -#ifdef RND_VERBOSE - printf("rnd: hard, want = %d, strength = %d, " - "bytesonkey = %d\n", (int)want, (int)strength, - (int)ctx->bytesonkey); -#endif - n = MIN(want, strength - ctx->bytesonkey); - if (n < 1) { -#ifdef RND_VERBOSE - printf("rnd: BAD BAD BAD: n = %d, want = %d, " - "strength = %d, bytesonkey = %d\n", n, - (int)want, (int)strength, - (int)ctx->bytesonkey); -#endif - } - } else { - n = want; - } - - nread = cprng_strong(cprng, bf, n, - (fp->f_flag & FNONBLOCK) ? FNONBLOCK : 0); - - if (ctx->hard && nread > 0) { - if (atomic_add_int_nv(&ctx->bytesonkey, nread) >= - strength) { - cprng_strong_deplete(cprng); - ctx->bytesonkey = 0; - membar_producer(); - } -#ifdef RND_VERBOSE - printf("rnd: new bytesonkey %d\n", ctx->bytesonkey); -#endif - } - if (nread < 1) { - if (fp->f_flag & FNONBLOCK) { - ret = EWOULDBLOCK; - } else { - ret = EINTR; - } - goto out; - } + CTASSERT(RND_TEMP_BUFFER_SIZE <= CPRNG_MAX_LEN); + const size_t n_read = cprng_strong(cprng, buf, n_req, + ((ctx->hard && ISSET(fp->f_flag, FNONBLOCK))? + FNONBLOCK : 0)); + KASSERT(n_read == n_req); - ret = uiomove((void *)bf, nread, uio); - if (ret != 0 || n < want) { + error = uiomove(buf, n_read, uio); + if (error) goto out; - } } -out: - pool_cache_put(rp_pc, bf); - return (ret); + +out: pool_cache_put(rnd_temp_buffer_cache, buf); + return error; } static int @@ -371,7 +398,7 @@ rnd_write(struct file *fp, off_t *offp, struct uio *uio, if (uio->uio_resid == 0) return (0); ret = 0; - bf = pool_cache_get(rp_pc, PR_WAITOK); + bf = pool_cache_get(rnd_temp_buffer_cache, PR_WAITOK); while (uio->uio_resid > 0) { /* * Don't flood the pool. @@ -418,7 +445,7 @@ rnd_write(struct file *fp, off_t *offp, struct uio *uio, added += n; DPRINTF(RND_DEBUG_WRITE, ("Random: Copied in %d bytes\n", n)); } - pool_cache_put(rp_pc, bf); + pool_cache_put(rnd_temp_buffer_cache, bf); return (ret); } @@ -658,8 +685,8 @@ rnd_ioctl(struct file *fp, u_long cmd, void *addr) static int rnd_poll(struct file *fp, int events) { + rp_ctx_t *const ctx = fp->f_data; int revents; - rp_ctx_t *ctx = fp->f_data; /* * We are always writable. @@ -670,22 +697,18 @@ rnd_poll(struct file *fp, int events) * Save some work if not checking for reads. */ if ((events & (POLLIN | POLLRDNORM)) == 0) - return (revents); - - if (ctx->cprng == NULL) { - rnd_alloc_cprng(ctx); - if (__predict_false(ctx->cprng == NULL)) { - return EIO; - } - } + return revents; - if (ctx->hard) { - revents |= cprng_strong_poll(ctx->cprng, events); - } else { + /* + * For /dev/random, ask the CPRNG, which may require creating + * one. For /dev/urandom, we're always readable. + */ + if (ctx->hard) + revents |= cprng_strong_poll(rnd_ctx_cprng(ctx), events); + else revents |= (events & (POLLIN | POLLRDNORM)); - } - return (revents); + return revents; } static int @@ -709,14 +732,12 @@ rnd_stat(struct file *fp, struct stat *st) static int rnd_close(struct file *fp) { - rp_ctx_t *ctx = fp->f_data; + rp_ctx_t *const ctx = fp->f_data; - if (ctx->cprng) { + if (ctx->cprng != NULL) cprng_strong_destroy(ctx->cprng); - } fp->f_data = NULL; - mutex_destroy(&ctx->interlock); - pool_cache_put(rp_cpc, ctx); + pool_cache_put(rnd_ctx_cache, ctx); return 0; } @@ -724,14 +745,7 @@ rnd_close(struct file *fp) static int rnd_kqfilter(struct file *fp, struct knote *kn) { - rp_ctx_t *ctx = fp->f_data; - - if (ctx->cprng == NULL) { - rnd_alloc_cprng(ctx); - if (__predict_false(ctx->cprng == NULL)) { - return EIO; - } - } + rp_ctx_t *const ctx = fp->f_data; - return cprng_strong_kqfilter(ctx->cprng, kn); + return cprng_strong_kqfilter(rnd_ctx_cprng(ctx), kn); } diff --git a/sys/kern/subr_cprng.c b/sys/kern/subr_cprng.c index 77f6aa9..54af4a4 100644 --- a/sys/kern/subr_cprng.c +++ b/sys/kern/subr_cprng.c @@ -98,8 +98,15 @@ struct cprng_strong { struct rndsink *cs_rndsink; bool cs_ready; NIST_CTR_DRBG cs_drbg; + + /* XXX Kludge for /dev/random `information-theoretic' properties. */ + unsigned int cs_consumed; + unsigned int cs_limit; }; +/* Paranoia: Make sure unsigned int is enough for any user limits. */ +CTASSERT(CPRNG_MAX_LEN <= UINT_MAX); + struct cprng_strong * cprng_strong_create(const char *name, int ipl, int flags) { @@ -133,6 +140,10 @@ cprng_strong_create(const char *name, int ipl, int flags) cprng->cs_name); explicit_bzero(seed, sizeof(seed)); + /* By default, don't rekey faster than the CTR_DRBG wants. */ + cprng->cs_consumed = 0; + cprng->cs_limit = 0; + if (!cprng->cs_ready && !ISSET(flags, CPRNG_INIT_ANY)) printf("cprng %s: creating with partial entropy\n", cprng->cs_name); @@ -172,9 +183,11 @@ size_t cprng_strong(struct cprng_strong *cprng, void *buffer, size_t bytes, int flags) { size_t result; + const size_t limit = ((cprng->cs_limit == 0)? CPRNG_MAX_LEN + : MIN(cprng->cs_limit, CPRNG_MAX_LEN)); - /* Caller must loop for more than CPRNG_MAX_LEN bytes. */ - bytes = MIN(bytes, CPRNG_MAX_LEN); + /* Caller must loop for more. */ + bytes = MIN(bytes, limit); mutex_enter(&cprng->cs_lock); @@ -279,7 +292,7 @@ cprng_strong_poll(struct cprng_strong *cprng, int events) } /* - * XXX Kludge for the current /dev/random implementation. + * XXX Kludge for the old /dev/random implementation. */ void cprng_strong_deplete(struct cprng_strong *cprng) @@ -292,6 +305,30 @@ cprng_strong_deplete(struct cprng_strong *cprng) } /* + * XXX Kludge for /dev/random `information-theoretic' properties. + */ +void +cprng_strong_limit(struct cprng_strong *cprng, size_t limit) +{ + + if (limit >= CPRNG_MAX_LEN) + return; + + mutex_enter(&cprng->cs_lock); + if (cprng->cs_consumed >= limit) { + cprng->cs_consumed = 0; + if (limit > 0) { + cprng->cs_ready = false; + rndsink_schedule(cprng->cs_rndsink); + } + } + cprng->cs_limit = limit; + KASSERT((cprng->cs_limit == 0) || + (cprng->cs_consumed < cprng->cs_limit)); + mutex_exit(&cprng->cs_lock); +} + +/* * XXX Move nist_ctr_drbg_reseed_advised_p and * nist_ctr_drbg_reseed_needed_p into the nist_ctr_drbg API and make * the NIST_CTR_DRBG structure opaque. @@ -314,15 +351,34 @@ nist_ctr_drbg_reseed_needed_p(NIST_CTR_DRBG *drbg) * Generate some data from the underlying generator. */ static void -cprng_strong_generate(struct cprng_strong *cprng, void *buffer, - size_t bytes) +cprng_strong_generate(struct cprng_strong *cprng, void *buffer, size_t bytes) { const uint32_t cc = cprng_counter(); KASSERT(bytes <= CPRNG_MAX_LEN); + KASSERT((cprng->cs_limit == 0) || (bytes <= cprng->cs_limit)); KASSERT(mutex_owned(&cprng->cs_lock)); /* + * `Debit' the `entropy' if requested. + * + * XXX Kludge for /dev/random `information-theoretic' properties. + */ + if (__predict_false(cprng->cs_limit > 0)) { + KASSERT(cprng->cs_consumed < cprng->cs_limit); + if (__predict_false(bytes >= + (cprng->cs_limit - cprng->cs_consumed))) { + bytes = (cprng->cs_limit - cprng->cs_consumed); + cprng->cs_consumed = 0; + cprng->cs_ready = false; + rndsink_schedule(cprng->cs_rndsink); + } + KASSERT(bytes < (cprng->cs_limit - cprng->cs_consumed)); + cprng->cs_consumed += bytes; + KASSERT(cprng->cs_consumed < cprng->cs_limit); + } + + /* * Generate some data from the NIST CTR_DRBG. Caller * guarantees reseed if we're not ready, and if we exhaust the * generator, we mark ourselves not ready. Consequently, this diff --git a/sys/sys/cprng.h b/sys/sys/cprng.h index 89b3f69..6411ee5 100644 --- a/sys/sys/cprng.h +++ b/sys/sys/cprng.h @@ -101,6 +101,7 @@ struct knote; /* XXX temp, for /dev/random */ int cprng_strong_kqfilter(cprng_strong_t *, struct knote *); /* XXX " */ int cprng_strong_poll(cprng_strong_t *, int); /* XXX " */ void cprng_strong_deplete(cprng_strong_t *); /* XXX " */ +void cprng_strong_limit(cprng_strong_t *, size_t); extern cprng_strong_t *kern_cprng; diff --git a/sys/sys/rnd.h b/sys/sys/rnd.h index 6333c84..5808316 100644 --- a/sys/sys/rnd.h +++ b/sys/sys/rnd.h @@ -235,13 +235,11 @@ typedef struct { #ifdef _KERNEL /* - * A context. cprng plus a smidge. + * A /dev/u?random context. */ typedef struct { struct cprng_strong *cprng; - int hard; - int bytesonkey; - kmutex_t interlock; + bool hard; } rp_ctx_t; #endif diff --git a/usr.bin/fstat/misc.c b/usr.bin/fstat/misc.c index 0b67bb2..d9170bb 100644 --- a/usr.bin/fstat/misc.c +++ b/usr.bin/fstat/misc.c @@ -207,7 +207,7 @@ p_rnd(struct file *f) } (void)printf("* rnd"); if (rp.hard) - printf(" bytesonkey=%d", rp.bytesonkey); + printf(" hard"); printf("\n"); return 0; }