From 3371f3da08cff4b75c1f2dce742d460539d6566d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 12 Jun 2016 18:11:51 -0400 Subject: [PATCH 1/8] random: initialize the non-blocking pool via add_hwgenerator_randomness() If we have a hardware RNG and are using the in-kernel rngd, we should use this to initialize the non-blocking pool so that getrandom(2) doesn't block unnecessarily. Cc: stable@kernel.org Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 0158d3bff7e5..4e2627a8d226 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1849,12 +1849,18 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, { struct entropy_store *poolp = &input_pool; - /* Suspend writing if we're above the trickle threshold. - * We'll be woken up again once below random_write_wakeup_thresh, - * or when the calling thread is about to terminate. - */ - wait_event_interruptible(random_write_wait, kthread_should_stop() || + if (unlikely(nonblocking_pool.initialized == 0)) + poolp = &nonblocking_pool; + else { + /* Suspend writing if we're above the trickle + * threshold. We'll be woken up again once below + * random_write_wakeup_thresh, or when the calling + * thread is about to terminate. + */ + wait_event_interruptible(random_write_wait, + kthread_should_stop() || ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); + } mix_pool_bytes(poolp, buffer, count); credit_entropy_bits(poolp, entropy); } From 9b4d008787f864f17d008c9c15bbe8a0f7e2fc24 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 13 Jun 2016 10:10:51 -0400 Subject: [PATCH 2/8] random: print a warning for the first ten uninitialized random users Since systemd is consistently using /dev/urandom before it is initialized, we can't see the other potentially dangerous users of /dev/urandom immediately after boot. So print the first ten such complaints instead. Cc: stable@kernel.org Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 4e2627a8d226..d057438266bb 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1458,12 +1458,16 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { + static int maxwarn = 10; int ret; - if (unlikely(nonblocking_pool.initialized == 0)) - printk_once(KERN_NOTICE "random: %s urandom read " - "with %d bits of entropy available\n", - current->comm, nonblocking_pool.entropy_total); + if (unlikely(nonblocking_pool.initialized == 0) && + maxwarn > 0) { + maxwarn--; + printk(KERN_NOTICE "random: %s: uninitialized urandom read " + "(%zd bytes read, %d bits of entropy available)\n", + current->comm, nbytes, nonblocking_pool.entropy_total); + } nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3)); ret = extract_entropy_user(&nonblocking_pool, buf, nbytes); From 4b44f2d18a330565227a7348844493c59366171e Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Mon, 2 May 2016 02:14:34 -0400 Subject: [PATCH 3/8] random: add interrupt callback to VMBus IRQ handler The Hyper-V Linux Integration Services use the VMBus implementation for communication with the Hypervisor. VMBus registers its own interrupt handler that completely bypasses the common Linux interrupt handling. This implies that the interrupt entropy collector is not triggered. This patch adds the interrupt entropy collection callback into the VMBus interrupt handler function. Cc: stable@kernel.org Signed-off-by: Stephan Mueller Signed-off-by: Stephan Mueller Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 1 + drivers/hv/vmbus_drv.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/char/random.c b/drivers/char/random.c index d057438266bb..68f18d47717d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -946,6 +946,7 @@ void add_interrupt_randomness(int irq, int irq_flags) /* award one bit for the contents of the fast pool */ credit_entropy_bits(r, credit + 1); } +EXPORT_SYMBOL_GPL(add_interrupt_randomness); #ifdef CONFIG_BLOCK void add_disk_randomness(struct gendisk *disk) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 952f20fdc7e3..e82f7e1c217c 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "hyperv_vmbus.h" static struct acpi_device *hv_acpi_dev; @@ -806,6 +807,8 @@ static void vmbus_isr(void) else tasklet_schedule(hv_context.msg_dpc[cpu]); } + + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); } From b1132deac01c2332d234fa821a70022796b79182 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 4 May 2016 21:08:39 -0400 Subject: [PATCH 4/8] random: properly align get_random_int_hash get_random_long() reads from the get_random_int_hash array using an unsigned long pointer. For this code to be guaranteed correct on all architectures, the array must be aligned to an unsigned long boundary. Cc: stable@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 68f18d47717d..a6253e89663c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1778,13 +1778,15 @@ int random_int_secret_init(void) return 0; } +static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash) + __aligned(sizeof(unsigned long)); + /* * Get a random word for internal kernel use only. Similar to urandom but * with the goal of minimal entropy pool depletion. As a result, the random * value is not cryptographically secure but for several uses the cost of * depleting entropy is too high */ -static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash); unsigned int get_random_int(void) { __u32 *hash; From e192be9d9a30555aae2ca1dc3aad37cba484cd4a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 12 Jun 2016 18:13:36 -0400 Subject: [PATCH 5/8] random: replace non-blocking pool with a Chacha20-based CRNG The CRNG is faster, and we don't pretend to track entropy usage in the CRNG any more. Signed-off-by: Theodore Ts'o --- crypto/chacha20_generic.c | 61 ------ drivers/char/random.c | 378 ++++++++++++++++++++++++++++---------- include/crypto/chacha20.h | 1 + lib/Makefile | 2 +- lib/chacha20.c | 79 ++++++++ 5 files changed, 357 insertions(+), 164 deletions(-) create mode 100644 lib/chacha20.c diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index da9c89968223..1cab83146e33 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -15,72 +15,11 @@ #include #include -static inline u32 rotl32(u32 v, u8 n) -{ - return (v << n) | (v >> (sizeof(v) * 8 - n)); -} - static inline u32 le32_to_cpuvp(const void *p) { return le32_to_cpup(p); } -static void chacha20_block(u32 *state, void *stream) -{ - u32 x[16], *out = stream; - int i; - - for (i = 0; i < ARRAY_SIZE(x); i++) - x[i] = state[i]; - - for (i = 0; i < 20; i += 2) { - x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 16); - x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 16); - x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 16); - x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 16); - - x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 12); - x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 12); - x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 12); - x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 12); - - x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 8); - x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 8); - x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 8); - x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 8); - - x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 7); - x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 7); - x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 7); - x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 7); - - x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 16); - x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 16); - x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 16); - x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 16); - - x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 12); - x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 12); - x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 12); - x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 12); - - x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 8); - x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 8); - x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 8); - x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 8); - - x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 7); - x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 7); - x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 7); - x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 7); - } - - for (i = 0; i < ARRAY_SIZE(x); i++) - out[i] = cpu_to_le32(x[i] + state[i]); - - state[12]++; -} - static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src, unsigned int bytes) { diff --git a/drivers/char/random.c b/drivers/char/random.c index a6253e89663c..dc2a9c2d8dcf 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -261,6 +261,7 @@ #include #include #include +#include #include #include @@ -413,6 +414,31 @@ static struct fasync_struct *fasync; static DEFINE_SPINLOCK(random_ready_list_lock); static LIST_HEAD(random_ready_list); +struct crng_state { + __u32 state[16]; + unsigned long init_time; + spinlock_t lock; +}; + +struct crng_state primary_crng = { + .lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock), +}; + +/* + * crng_init = 0 --> Uninitialized + * 1 --> Initialized + * 2 --> Initialized from input_pool + * + * crng_init is protected by primary_crng->lock, and only increases + * its value (from 0->1->2). + */ +static int crng_init = 0; +#define crng_ready() (likely(crng_init > 0)) +static int crng_init_cnt = 0; +#define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE) +static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]); +static void process_random_ready_list(void); + /********************************************************************** * * OS independent entropy store. Here are the functions which handle @@ -442,10 +468,15 @@ struct entropy_store { __u8 last_data[EXTRACT_SIZE]; }; +static ssize_t extract_entropy(struct entropy_store *r, void *buf, + size_t nbytes, int min, int rsvd); +static ssize_t _extract_entropy(struct entropy_store *r, void *buf, + size_t nbytes, int fips); + +static void crng_reseed(struct crng_state *crng, struct entropy_store *r); static void push_to_pool(struct work_struct *work); static __u32 input_pool_data[INPUT_POOL_WORDS]; static __u32 blocking_pool_data[OUTPUT_POOL_WORDS]; -static __u32 nonblocking_pool_data[OUTPUT_POOL_WORDS]; static struct entropy_store input_pool = { .poolinfo = &poolinfo_table[0], @@ -466,16 +497,6 @@ static struct entropy_store blocking_pool = { push_to_pool), }; -static struct entropy_store nonblocking_pool = { - .poolinfo = &poolinfo_table[1], - .name = "nonblocking", - .pull = &input_pool, - .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock), - .pool = nonblocking_pool_data, - .push_work = __WORK_INITIALIZER(nonblocking_pool.push_work, - push_to_pool), -}; - static __u32 const twist_table[8] = { 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; @@ -678,12 +699,6 @@ retry: if (!r->initialized && r->entropy_total > 128) { r->initialized = 1; r->entropy_total = 0; - if (r == &nonblocking_pool) { - prandom_reseed_late(); - process_random_ready_list(); - wake_up_all(&urandom_init_wait); - pr_notice("random: %s pool is initialized\n", r->name); - } } trace_credit_entropy_bits(r->name, nbits, @@ -693,30 +708,27 @@ retry: if (r == &input_pool) { int entropy_bits = entropy_count >> ENTROPY_SHIFT; + if (crng_init < 2 && entropy_bits >= 128) { + crng_reseed(&primary_crng, r); + entropy_bits = r->entropy_count >> ENTROPY_SHIFT; + } + /* should we wake readers? */ if (entropy_bits >= random_read_wakeup_bits) { wake_up_interruptible(&random_read_wait); kill_fasync(&fasync, SIGIO, POLL_IN); } /* If the input pool is getting full, send some - * entropy to the two output pools, flipping back and - * forth between them, until the output pools are 75% - * full. + * entropy to the blocking pool until it is 75% full. */ if (entropy_bits > random_write_wakeup_bits && r->initialized && r->entropy_total >= 2*random_read_wakeup_bits) { - static struct entropy_store *last = &blocking_pool; struct entropy_store *other = &blocking_pool; - if (last == &blocking_pool) - other = &nonblocking_pool; if (other->entropy_count <= - 3 * other->poolinfo->poolfracbits / 4) - last = other; - if (last->entropy_count <= - 3 * last->poolinfo->poolfracbits / 4) { - schedule_work(&last->push_work); + 3 * other->poolinfo->poolfracbits / 4) { + schedule_work(&other->push_work); r->entropy_total = 0; } } @@ -734,6 +746,152 @@ static void credit_entropy_bits_safe(struct entropy_store *r, int nbits) credit_entropy_bits(r, nbits); } +/********************************************************************* + * + * CRNG using CHACHA20 + * + *********************************************************************/ + +#define CRNG_RESEED_INTERVAL (300*HZ) + +static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); + +static void crng_initialize(struct crng_state *crng) +{ + int i; + unsigned long rv; + + memcpy(&crng->state[0], "expand 32-byte k", 16); + if (crng == &primary_crng) + _extract_entropy(&input_pool, &crng->state[4], + sizeof(__u32) * 12, 0); + else + get_random_bytes(&crng->state[4], sizeof(__u32) * 12); + for (i = 4; i < 16; i++) { + if (!arch_get_random_seed_long(&rv) && + !arch_get_random_long(&rv)) + rv = random_get_entropy(); + crng->state[i] ^= rv; + } + crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; +} + +static int crng_fast_load(const char *cp, size_t len) +{ + unsigned long flags; + char *p; + + if (!spin_trylock_irqsave(&primary_crng.lock, flags)) + return 0; + if (crng_ready()) { + spin_unlock_irqrestore(&primary_crng.lock, flags); + return 0; + } + p = (unsigned char *) &primary_crng.state[4]; + while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { + p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp; + cp++; crng_init_cnt++; len--; + } + if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { + crng_init = 1; + wake_up_interruptible(&crng_init_wait); + pr_notice("random: fast init done\n"); + } + spin_unlock_irqrestore(&primary_crng.lock, flags); + return 1; +} + +static void crng_reseed(struct crng_state *crng, struct entropy_store *r) +{ + unsigned long flags; + int i, num; + union { + __u8 block[CHACHA20_BLOCK_SIZE]; + __u32 key[8]; + } buf; + + if (r) { + num = extract_entropy(r, &buf, 32, 16, 0); + if (num == 0) + return; + } else + extract_crng(buf.block); + spin_lock_irqsave(&primary_crng.lock, flags); + for (i = 0; i < 8; i++) { + unsigned long rv; + if (!arch_get_random_seed_long(&rv) && + !arch_get_random_long(&rv)) + rv = random_get_entropy(); + crng->state[i+4] ^= buf.key[i] ^ rv; + } + memzero_explicit(&buf, sizeof(buf)); + crng->init_time = jiffies; + if (crng == &primary_crng && crng_init < 2) { + crng_init = 2; + process_random_ready_list(); + wake_up_interruptible(&crng_init_wait); + pr_notice("random: crng init done\n"); + } + spin_unlock_irqrestore(&primary_crng.lock, flags); +} + +static inline void crng_wait_ready(void) +{ + wait_event_interruptible(crng_init_wait, crng_ready()); +} + +static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) +{ + unsigned long v, flags; + struct crng_state *crng = &primary_crng; + + if (crng_init > 1 && + time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL)) + crng_reseed(crng, &input_pool); + spin_lock_irqsave(&crng->lock, flags); + if (arch_get_random_long(&v)) + crng->state[14] ^= v; + chacha20_block(&crng->state[0], out); + if (crng->state[12] == 0) + crng->state[13]++; + spin_unlock_irqrestore(&crng->lock, flags); +} + +static ssize_t extract_crng_user(void __user *buf, size_t nbytes) +{ + ssize_t ret = 0, i; + __u8 tmp[CHACHA20_BLOCK_SIZE]; + int large_request = (nbytes > 256); + + while (nbytes) { + if (large_request && need_resched()) { + if (signal_pending(current)) { + if (ret == 0) + ret = -ERESTARTSYS; + break; + } + schedule(); + } + + extract_crng(tmp); + i = min_t(int, nbytes, CHACHA20_BLOCK_SIZE); + if (copy_to_user(buf, tmp, i)) { + ret = -EFAULT; + break; + } + + nbytes -= i; + buf += i; + ret += i; + } + + /* Wipe data just written to memory */ + memzero_explicit(tmp, sizeof(tmp)); + + return ret; +} + + /********************************************************************* * * Entropy input management @@ -750,12 +908,12 @@ struct timer_rand_state { #define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, }; /* - * Add device- or boot-specific data to the input and nonblocking - * pools to help initialize them to unique values. + * Add device- or boot-specific data to the input pool to help + * initialize it. * - * None of this adds any entropy, it is meant to avoid the - * problem of the nonblocking pool having similar initial state - * across largely identical devices. + * None of this adds any entropy; it is meant to avoid the problem of + * the entropy pool having similar initial state across largely + * identical devices. */ void add_device_randomness(const void *buf, unsigned int size) { @@ -767,11 +925,6 @@ void add_device_randomness(const void *buf, unsigned int size) _mix_pool_bytes(&input_pool, buf, size); _mix_pool_bytes(&input_pool, &time, sizeof(time)); spin_unlock_irqrestore(&input_pool.lock, flags); - - spin_lock_irqsave(&nonblocking_pool.lock, flags); - _mix_pool_bytes(&nonblocking_pool, buf, size); - _mix_pool_bytes(&nonblocking_pool, &time, sizeof(time)); - spin_unlock_irqrestore(&nonblocking_pool.lock, flags); } EXPORT_SYMBOL(add_device_randomness); @@ -802,7 +955,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) sample.jiffies = jiffies; sample.cycles = random_get_entropy(); sample.num = num; - r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool; + r = &input_pool; mix_pool_bytes(r, &sample, sizeof(sample)); /* @@ -918,11 +1071,21 @@ void add_interrupt_randomness(int irq, int irq_flags) fast_mix(fast_pool); add_interrupt_bench(cycles); + if (!crng_ready()) { + if ((fast_pool->count >= 64) && + crng_fast_load((char *) fast_pool->pool, + sizeof(fast_pool->pool))) { + fast_pool->count = 0; + fast_pool->last = now; + } + return; + } + if ((fast_pool->count < 64) && !time_after(now, fast_pool->last + HZ)) return; - r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool; + r = &input_pool; if (!spin_trylock(&r->lock)) return; @@ -966,9 +1129,6 @@ EXPORT_SYMBOL_GPL(add_disk_randomness); * *********************************************************************/ -static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min, int rsvd); - /* * This utility inline function is responsible for transferring entropy * from the primary pool to the secondary extraction pool. We make @@ -1143,6 +1303,36 @@ static void extract_buf(struct entropy_store *r, __u8 *out) memzero_explicit(&hash, sizeof(hash)); } +static ssize_t _extract_entropy(struct entropy_store *r, void *buf, + size_t nbytes, int fips) +{ + ssize_t ret = 0, i; + __u8 tmp[EXTRACT_SIZE]; + unsigned long flags; + + while (nbytes) { + extract_buf(r, tmp); + + if (fips) { + spin_lock_irqsave(&r->lock, flags); + if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) + panic("Hardware RNG duplicated output!\n"); + memcpy(r->last_data, tmp, EXTRACT_SIZE); + spin_unlock_irqrestore(&r->lock, flags); + } + i = min_t(int, nbytes, EXTRACT_SIZE); + memcpy(buf, tmp, i); + nbytes -= i; + buf += i; + ret += i; + } + + /* Wipe data just returned from memory */ + memzero_explicit(tmp, sizeof(tmp)); + + return ret; +} + /* * This function extracts randomness from the "entropy pool", and * returns it in a buffer. @@ -1155,7 +1345,6 @@ static void extract_buf(struct entropy_store *r, __u8 *out) static ssize_t extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int min, int reserved) { - ssize_t ret = 0, i; __u8 tmp[EXTRACT_SIZE]; unsigned long flags; @@ -1179,27 +1368,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, xfer_secondary_pool(r, nbytes); nbytes = account(r, nbytes, min, reserved); - while (nbytes) { - extract_buf(r, tmp); - - if (fips_enabled) { - spin_lock_irqsave(&r->lock, flags); - if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) - panic("Hardware RNG duplicated output!\n"); - memcpy(r->last_data, tmp, EXTRACT_SIZE); - spin_unlock_irqrestore(&r->lock, flags); - } - i = min_t(int, nbytes, EXTRACT_SIZE); - memcpy(buf, tmp, i); - nbytes -= i; - buf += i; - ret += i; - } - - /* Wipe data just returned from memory */ - memzero_explicit(tmp, sizeof(tmp)); - - return ret; + return _extract_entropy(r, buf, nbytes, fips_enabled); } /* @@ -1254,15 +1423,26 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, */ void get_random_bytes(void *buf, int nbytes) { + __u8 tmp[CHACHA20_BLOCK_SIZE]; + #if DEBUG_RANDOM_BOOT > 0 - if (unlikely(nonblocking_pool.initialized == 0)) + if (!crng_ready()) printk(KERN_NOTICE "random: %pF get_random_bytes called " - "with %d bits of entropy available\n", - (void *) _RET_IP_, - nonblocking_pool.entropy_total); + "with crng_init = %d\n", (void *) _RET_IP_, crng_init); #endif trace_get_random_bytes(nbytes, _RET_IP_); - extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0); + + while (nbytes >= CHACHA20_BLOCK_SIZE) { + extract_crng(buf); + buf += CHACHA20_BLOCK_SIZE; + nbytes -= CHACHA20_BLOCK_SIZE; + } + + if (nbytes > 0) { + extract_crng(tmp); + memcpy(buf, tmp, nbytes); + memzero_explicit(tmp, nbytes); + } } EXPORT_SYMBOL(get_random_bytes); @@ -1280,7 +1460,7 @@ int add_random_ready_callback(struct random_ready_callback *rdy) unsigned long flags; int err = -EALREADY; - if (likely(nonblocking_pool.initialized)) + if (crng_ready()) return err; owner = rdy->owner; @@ -1288,7 +1468,7 @@ int add_random_ready_callback(struct random_ready_callback *rdy) return -ENOENT; spin_lock_irqsave(&random_ready_list_lock, flags); - if (nonblocking_pool.initialized) + if (crng_ready()) goto out; owner = NULL; @@ -1352,7 +1532,7 @@ void get_random_bytes_arch(void *buf, int nbytes) } if (nbytes) - extract_entropy(&nonblocking_pool, p, nbytes, 0, 0); + get_random_bytes(p, nbytes); } EXPORT_SYMBOL(get_random_bytes_arch); @@ -1397,7 +1577,7 @@ static int rand_initialize(void) { init_std_data(&input_pool); init_std_data(&blocking_pool); - init_std_data(&nonblocking_pool); + crng_initialize(&primary_crng); return 0; } early_initcall(rand_initialize); @@ -1459,22 +1639,22 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { + unsigned long flags; static int maxwarn = 10; int ret; - if (unlikely(nonblocking_pool.initialized == 0) && - maxwarn > 0) { + if (!crng_ready() && maxwarn > 0) { maxwarn--; printk(KERN_NOTICE "random: %s: uninitialized urandom read " - "(%zd bytes read, %d bits of entropy available)\n", - current->comm, nbytes, nonblocking_pool.entropy_total); + "(%zd bytes read)\n", + current->comm, nbytes); + spin_lock_irqsave(&primary_crng.lock, flags); + crng_init_cnt = 0; + spin_unlock_irqrestore(&primary_crng.lock, flags); } - nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3)); - ret = extract_entropy_user(&nonblocking_pool, buf, nbytes); - - trace_urandom_read(8 * nbytes, ENTROPY_BITS(&nonblocking_pool), - ENTROPY_BITS(&input_pool)); + ret = extract_crng_user(buf, nbytes); + trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS(&input_pool)); return ret; } @@ -1520,10 +1700,7 @@ static ssize_t random_write(struct file *file, const char __user *buffer, { size_t ret; - ret = write_pool(&blocking_pool, buffer, count); - if (ret) - return ret; - ret = write_pool(&nonblocking_pool, buffer, count); + ret = write_pool(&input_pool, buffer, count); if (ret) return ret; @@ -1574,7 +1751,6 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; input_pool.entropy_count = 0; - nonblocking_pool.entropy_count = 0; blocking_pool.entropy_count = 0; return 0; default: @@ -1616,11 +1792,10 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, if (flags & GRND_RANDOM) return _random_read(flags & GRND_NONBLOCK, buf, count); - if (unlikely(nonblocking_pool.initialized == 0)) { + if (!crng_ready()) { if (flags & GRND_NONBLOCK) return -EAGAIN; - wait_event_interruptible(urandom_init_wait, - nonblocking_pool.initialized); + crng_wait_ready(); if (signal_pending(current)) return -ERESTARTSYS; } @@ -1856,18 +2031,17 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, { struct entropy_store *poolp = &input_pool; - if (unlikely(nonblocking_pool.initialized == 0)) - poolp = &nonblocking_pool; - else { - /* Suspend writing if we're above the trickle - * threshold. We'll be woken up again once below - * random_write_wakeup_thresh, or when the calling - * thread is about to terminate. - */ - wait_event_interruptible(random_write_wait, - kthread_should_stop() || - ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); + if (!crng_ready()) { + crng_fast_load(buffer, count); + return; } + + /* Suspend writing if we're above the trickle threshold. + * We'll be woken up again once below random_write_wakeup_thresh, + * or when the calling thread is about to terminate. + */ + wait_event_interruptible(random_write_wait, kthread_should_stop() || + ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); mix_pool_bytes(poolp, buffer, count); credit_entropy_bits(poolp, entropy); } diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h index 274bbaeeed0f..20d20f681a72 100644 --- a/include/crypto/chacha20.h +++ b/include/crypto/chacha20.h @@ -16,6 +16,7 @@ struct chacha20_ctx { u32 key[8]; }; +void chacha20_block(u32 *state, void *stream); void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv); int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keysize); diff --git a/lib/Makefile b/lib/Makefile index 499fb354d627..34e205facfa3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -22,7 +22,7 @@ KCOV_INSTRUMENT_hweight.o := n lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o \ - sha1.o md5.o irq_regs.o argv_split.o \ + sha1.o chacha20.o md5.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o diff --git a/lib/chacha20.c b/lib/chacha20.c new file mode 100644 index 000000000000..250ceed9ec9a --- /dev/null +++ b/lib/chacha20.c @@ -0,0 +1,79 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539 + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +static inline u32 rotl32(u32 v, u8 n) +{ + return (v << n) | (v >> (sizeof(v) * 8 - n)); +} + +extern void chacha20_block(u32 *state, void *stream) +{ + u32 x[16], *out = stream; + int i; + + for (i = 0; i < ARRAY_SIZE(x); i++) + x[i] = state[i]; + + for (i = 0; i < 20; i += 2) { + x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 16); + x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 16); + x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 16); + x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 16); + + x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 12); + x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 12); + x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 12); + x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 12); + + x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 8); + x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 8); + x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 8); + x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 8); + + x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 7); + x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 7); + x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 7); + x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 7); + + x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 16); + x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 16); + x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 16); + x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 16); + + x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 12); + x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 12); + x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 12); + x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 12); + + x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 8); + x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 8); + x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 8); + x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 8); + + x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 7); + x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 7); + x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 7); + x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 7); + } + + for (i = 0; i < ARRAY_SIZE(x); i++) + out[i] = cpu_to_le32(x[i] + state[i]); + + state[12]++; +} +EXPORT_SYMBOL(chacha20_block); From 1e7f583af67be4ff091d0aeb863c649efd7a9112 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 2 May 2016 02:04:41 -0400 Subject: [PATCH 6/8] random: make /dev/urandom scalable for silly userspace programs On a system with a 4 socket (NUMA) system where a large number of application threads were all trying to read from /dev/urandom, this can result in the system spending 80% of its time contending on the global urandom spinlock. The application should have used its own PRNG, but let's try to help it from running, lemming-like, straight over the locking cliff. Reported-by: Andi Kleen Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 62 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index dc2a9c2d8dcf..2a30d9718a1b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -436,6 +436,8 @@ static int crng_init = 0; #define crng_ready() (likely(crng_init > 0)) static int crng_init_cnt = 0; #define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE) +static void _extract_crng(struct crng_state *crng, + __u8 out[CHACHA20_BLOCK_SIZE]); static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]); static void process_random_ready_list(void); @@ -756,6 +758,16 @@ static void credit_entropy_bits_safe(struct entropy_store *r, int nbits) static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); +#ifdef CONFIG_NUMA +/* + * Hack to deal with crazy userspace progams when they are all trying + * to access /dev/urandom in parallel. The programs are almost + * certainly doing something terribly wrong, but we'll work around + * their brain damage. + */ +static struct crng_state **crng_node_pool __read_mostly; +#endif + static void crng_initialize(struct crng_state *crng) { int i; @@ -815,7 +827,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) if (num == 0) return; } else - extract_crng(buf.block); + _extract_crng(&primary_crng, buf.block); spin_lock_irqsave(&primary_crng.lock, flags); for (i = 0; i < 8; i++) { unsigned long rv; @@ -835,19 +847,26 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) spin_unlock_irqrestore(&primary_crng.lock, flags); } +static inline void maybe_reseed_primary_crng(void) +{ + if (crng_init > 2 && + time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL)) + crng_reseed(&primary_crng, &input_pool); +} + static inline void crng_wait_ready(void) { wait_event_interruptible(crng_init_wait, crng_ready()); } -static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) +static void _extract_crng(struct crng_state *crng, + __u8 out[CHACHA20_BLOCK_SIZE]) { unsigned long v, flags; - struct crng_state *crng = &primary_crng; if (crng_init > 1 && time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(crng, &input_pool); + crng_reseed(crng, crng == &primary_crng ? &input_pool : NULL); spin_lock_irqsave(&crng->lock, flags); if (arch_get_random_long(&v)) crng->state[14] ^= v; @@ -857,6 +876,19 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) spin_unlock_irqrestore(&crng->lock, flags); } +static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) +{ + struct crng_state *crng = NULL; + +#ifdef CONFIG_NUMA + if (crng_node_pool) + crng = crng_node_pool[numa_node_id()]; + if (crng == NULL) +#endif + crng = &primary_crng; + _extract_crng(crng, out); +} + static ssize_t extract_crng_user(void __user *buf, size_t nbytes) { ssize_t ret = 0, i; @@ -1575,9 +1607,31 @@ static void init_std_data(struct entropy_store *r) */ static int rand_initialize(void) { +#ifdef CONFIG_NUMA + int i; + int num_nodes = num_possible_nodes(); + struct crng_state *crng; + struct crng_state **pool; +#endif + init_std_data(&input_pool); init_std_data(&blocking_pool); crng_initialize(&primary_crng); + +#ifdef CONFIG_NUMA + pool = kmalloc(num_nodes * sizeof(void *), + GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO); + for (i=0; i < num_nodes; i++) { + crng = kmalloc_node(sizeof(struct crng_state), + GFP_KERNEL | __GFP_NOFAIL, i); + spin_lock_init(&crng->lock); + crng_initialize(crng); + pool[i] = crng; + + } + mb(); + crng_node_pool = pool; +#endif return 0; } early_initcall(rand_initialize); From c92e040d575a7389d72e7e6f25e2033bfb867f8b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 4 May 2016 13:29:18 -0400 Subject: [PATCH 7/8] random: add backtracking protection to the CRNG Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 54 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 2a30d9718a1b..783dee11cdc9 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -438,7 +438,8 @@ static int crng_init_cnt = 0; #define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE) static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA20_BLOCK_SIZE]); -static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]); +static void _crng_backtrack_protect(struct crng_state *crng, + __u8 tmp[CHACHA20_BLOCK_SIZE], int used); static void process_random_ready_list(void); /********************************************************************** @@ -826,8 +827,11 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) num = extract_entropy(r, &buf, 32, 16, 0); if (num == 0) return; - } else + } else { _extract_crng(&primary_crng, buf.block); + _crng_backtrack_protect(&primary_crng, buf.block, + CHACHA20_KEY_SIZE); + } spin_lock_irqsave(&primary_crng.lock, flags); for (i = 0; i < 8; i++) { unsigned long rv; @@ -889,9 +893,46 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) _extract_crng(crng, out); } +/* + * Use the leftover bytes from the CRNG block output (if there is + * enough) to mutate the CRNG key to provide backtracking protection. + */ +static void _crng_backtrack_protect(struct crng_state *crng, + __u8 tmp[CHACHA20_BLOCK_SIZE], int used) +{ + unsigned long flags; + __u32 *s, *d; + int i; + + used = round_up(used, sizeof(__u32)); + if (used + CHACHA20_KEY_SIZE > CHACHA20_BLOCK_SIZE) { + extract_crng(tmp); + used = 0; + } + spin_lock_irqsave(&crng->lock, flags); + s = (__u32 *) &tmp[used]; + d = &crng->state[4]; + for (i=0; i < 8; i++) + *d++ ^= *s++; + spin_unlock_irqrestore(&crng->lock, flags); +} + +static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used) +{ + struct crng_state *crng = NULL; + +#ifdef CONFIG_NUMA + if (crng_node_pool) + crng = crng_node_pool[numa_node_id()]; + if (crng == NULL) +#endif + crng = &primary_crng; + _crng_backtrack_protect(crng, tmp, used); +} + static ssize_t extract_crng_user(void __user *buf, size_t nbytes) { - ssize_t ret = 0, i; + ssize_t ret = 0, i = CHACHA20_BLOCK_SIZE; __u8 tmp[CHACHA20_BLOCK_SIZE]; int large_request = (nbytes > 256); @@ -916,6 +957,7 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes) buf += i; ret += i; } + crng_backtrack_protect(tmp, i); /* Wipe data just written to memory */ memzero_explicit(tmp, sizeof(tmp)); @@ -1473,8 +1515,10 @@ void get_random_bytes(void *buf, int nbytes) if (nbytes > 0) { extract_crng(tmp); memcpy(buf, tmp, nbytes); - memzero_explicit(tmp, nbytes); - } + crng_backtrack_protect(tmp, nbytes); + } else + crng_backtrack_protect(tmp, CHACHA20_BLOCK_SIZE); + memzero_explicit(tmp, sizeof(tmp)); } EXPORT_SYMBOL(get_random_bytes); From 86a574de4590ffe6fd3f3ca34cdcf655a78e36ec Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 3 Jul 2016 17:01:26 -0400 Subject: [PATCH 8/8] random: strengthen input validation for RNDADDTOENTCNT Don't allow RNDADDTOENTCNT or RNDADDENTROPY to accept a negative entropy value. It doesn't make any sense to subtract from the entropy counter, and it can trigger a warning: random: negative entropy/overflow: pool input count -40000 ------------[ cut here ]------------ WARNING: CPU: 3 PID: 6828 at drivers/char/random.c:670[< none >] credit_entropy_bits+0x21e/0xad0 drivers/char/random.c:670 Modules linked in: CPU: 3 PID: 6828 Comm: a.out Not tainted 4.7.0-rc4+ #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffffffff880b58e0 ffff88005dd9fcb0 ffffffff82cc838f ffffffff87158b40 fffffbfff1016b1c 0000000000000000 0000000000000000 ffffffff87158b40 ffffffff83283dae 0000000000000009 ffff88005dd9fcf8 ffffffff8136d27f Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x12e/0x18f lib/dump_stack.c:51 [] __warn+0x19f/0x1e0 kernel/panic.c:516 [] warn_slowpath_null+0x2c/0x40 kernel/panic.c:551 [] credit_entropy_bits+0x21e/0xad0 drivers/char/random.c:670 [< inline >] credit_entropy_bits_safe drivers/char/random.c:734 [] random_ioctl+0x21d/0x250 drivers/char/random.c:1546 [< inline >] vfs_ioctl fs/ioctl.c:43 [] do_vfs_ioctl+0x18c/0xff0 fs/ioctl.c:674 [< inline >] SYSC_ioctl fs/ioctl.c:689 [] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:680 [] entry_SYSCALL_64_fastpath+0x23/0xc1 arch/x86/entry/entry_64.S:207 ---[ end trace 5d4902b2ba842f1f ]--- This was triggered using the test program: // autogenerated by syzkaller (http://github.com/google/syzkaller) int main() { int fd = open("/dev/random", O_RDWR); int val = -5000; ioctl(fd, RNDADDTOENTCNT, &val); return 0; } It's harmless in that (a) only root can trigger it, and (b) after complaining the code never does let the entropy count go negative, but it's better to simply not allow this userspace from passing in a negative entropy value altogether. Google-Bug-Id: #29575089 Reported-By: Dmitry Vyukov Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 783dee11cdc9..8d0af74f6569 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -738,15 +738,18 @@ retry: } } -static void credit_entropy_bits_safe(struct entropy_store *r, int nbits) +static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) { const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1)); + if (nbits < 0) + return -EINVAL; + /* Cap the value to avoid overflows */ nbits = min(nbits, nbits_max); - nbits = max(nbits, -nbits_max); credit_entropy_bits(r, nbits); + return 0; } /********************************************************************* @@ -1823,8 +1826,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (get_user(ent_count, p)) return -EFAULT; - credit_entropy_bits_safe(&input_pool, ent_count); - return 0; + return credit_entropy_bits_safe(&input_pool, ent_count); case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1838,8 +1840,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) size); if (retval < 0) return retval; - credit_entropy_bits_safe(&input_pool, ent_count); - return 0; + return credit_entropy_bits_safe(&input_pool, ent_count); case RNDZAPENTCNT: case RNDCLEARPOOL: /*