1  // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2  /*
3   * Copyright (C) 2017-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
4   * Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005
5   * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved.
6   *
7   * This driver produces cryptographically secure pseudorandom data. It is divided
8   * into roughly six sections, each with a section header:
9   *
10   *   - Initialization and readiness waiting.
11   *   - Fast key erasure RNG, the "crng".
12   *   - Entropy accumulation and extraction routines.
13   *   - Entropy collection routines.
14   *   - Userspace reader/writer interfaces.
15   *   - Sysctl interface.
16   *
17   * The high level overview is that there is one input pool, into which
18   * various pieces of data are hashed. Prior to initialization, some of that
19   * data is then "credited" as having a certain number of bits of entropy.
20   * When enough bits of entropy are available, the hash is finalized and
21   * handed as a key to a stream cipher that expands it indefinitely for
22   * various consumers. This key is periodically refreshed as the various
23   * entropy collectors, described below, add data to the input pool.
24   */
25  
26  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
27  
28  #include <linux/utsname.h>
29  #include <linux/module.h>
30  #include <linux/kernel.h>
31  #include <linux/major.h>
32  #include <linux/string.h>
33  #include <linux/fcntl.h>
34  #include <linux/slab.h>
35  #include <linux/random.h>
36  #include <linux/poll.h>
37  #include <linux/init.h>
38  #include <linux/fs.h>
39  #include <linux/blkdev.h>
40  #include <linux/interrupt.h>
41  #include <linux/mm.h>
42  #include <linux/nodemask.h>
43  #include <linux/spinlock.h>
44  #include <linux/kthread.h>
45  #include <linux/percpu.h>
46  #include <linux/ptrace.h>
47  #include <linux/workqueue.h>
48  #include <linux/irq.h>
49  #include <linux/ratelimit.h>
50  #include <linux/syscalls.h>
51  #include <linux/completion.h>
52  #include <linux/uuid.h>
53  #include <linux/uaccess.h>
54  #include <linux/suspend.h>
55  #include <linux/siphash.h>
56  #include <linux/sched/isolation.h>
57  #include <crypto/chacha.h>
58  #include <crypto/blake2s.h>
59  #ifdef CONFIG_VDSO_GETRANDOM
60  #include <vdso/getrandom.h>
61  #include <vdso/datapage.h>
62  #include <vdso/vsyscall.h>
63  #endif
64  #include <asm/archrandom.h>
65  #include <asm/processor.h>
66  #include <asm/irq.h>
67  #include <asm/irq_regs.h>
68  #include <asm/io.h>
69  
70  /*********************************************************************
71   *
72   * Initialization and readiness waiting.
73   *
74   * Much of the RNG infrastructure is devoted to various dependencies
75   * being able to wait until the RNG has collected enough entropy and
76   * is ready for safe consumption.
77   *
78   *********************************************************************/
79  
80  /*
81   * crng_init is protected by base_crng->lock, and only increases
82   * its value (from empty->early->ready).
83   */
84  static enum {
85  	CRNG_EMPTY = 0, /* Little to no entropy collected */
86  	CRNG_EARLY = 1, /* At least POOL_EARLY_BITS collected */
87  	CRNG_READY = 2  /* Fully initialized with POOL_READY_BITS collected */
88  } crng_init __read_mostly = CRNG_EMPTY;
89  static DEFINE_STATIC_KEY_FALSE(crng_is_ready);
90  #define crng_ready() (static_branch_likely(&crng_is_ready) || crng_init >= CRNG_READY)
91  /* Various types of waiters for crng_init->CRNG_READY transition. */
92  static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
93  static struct fasync_struct *fasync;
94  static ATOMIC_NOTIFIER_HEAD(random_ready_notifier);
95  
96  /* Control how we warn userspace. */
97  static struct ratelimit_state urandom_warning =
98  	RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE);
99  static int ratelimit_disable __read_mostly =
100  	IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM);
101  module_param_named(ratelimit_disable, ratelimit_disable, int, 0644);
102  MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
103  
104  /*
105   * Returns whether or not the input pool has been seeded and thus guaranteed
106   * to supply cryptographically secure random numbers. This applies to: the
107   * /dev/urandom device, the get_random_bytes function, and the get_random_{u8,
108   * u16,u32,u64,long} family of functions.
109   *
110   * Returns: true if the input pool has been seeded.
111   *          false if the input pool has not been seeded.
112   */
rng_is_initialized(void)113  bool rng_is_initialized(void)
114  {
115  	return crng_ready();
116  }
117  EXPORT_SYMBOL(rng_is_initialized);
118  
crng_set_ready(struct work_struct * work)119  static void __cold crng_set_ready(struct work_struct *work)
120  {
121  	static_branch_enable(&crng_is_ready);
122  }
123  
124  /* Used by wait_for_random_bytes(), and considered an entropy collector, below. */
125  static void try_to_generate_entropy(void);
126  
127  /*
128   * Wait for the input pool to be seeded and thus guaranteed to supply
129   * cryptographically secure random numbers. This applies to: the /dev/urandom
130   * device, the get_random_bytes function, and the get_random_{u8,u16,u32,u64,
131   * long} family of functions. Using any of these functions without first
132   * calling this function forfeits the guarantee of security.
133   *
134   * Returns: 0 if the input pool has been seeded.
135   *          -ERESTARTSYS if the function was interrupted by a signal.
136   */
wait_for_random_bytes(void)137  int wait_for_random_bytes(void)
138  {
139  	while (!crng_ready()) {
140  		int ret;
141  
142  		try_to_generate_entropy();
143  		ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
144  		if (ret)
145  			return ret > 0 ? 0 : ret;
146  	}
147  	return 0;
148  }
149  EXPORT_SYMBOL(wait_for_random_bytes);
150  
151  /*
152   * Add a callback function that will be invoked when the crng is initialised,
153   * or immediately if it already has been. Only use this is you are absolutely
154   * sure it is required. Most users should instead be able to test
155   * `rng_is_initialized()` on demand, or make use of `get_random_bytes_wait()`.
156   */
execute_with_initialized_rng(struct notifier_block * nb)157  int __cold execute_with_initialized_rng(struct notifier_block *nb)
158  {
159  	unsigned long flags;
160  	int ret = 0;
161  
162  	spin_lock_irqsave(&random_ready_notifier.lock, flags);
163  	if (crng_ready())
164  		nb->notifier_call(nb, 0, NULL);
165  	else
166  		ret = raw_notifier_chain_register((struct raw_notifier_head *)&random_ready_notifier.head, nb);
167  	spin_unlock_irqrestore(&random_ready_notifier.lock, flags);
168  	return ret;
169  }
170  
171  #define warn_unseeded_randomness() \
172  	if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \
173  		printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \
174  				__func__, (void *)_RET_IP_, crng_init)
175  
176  
177  /*********************************************************************
178   *
179   * Fast key erasure RNG, the "crng".
180   *
181   * These functions expand entropy from the entropy extractor into
182   * long streams for external consumption using the "fast key erasure"
183   * RNG described at <https://blog.cr.yp.to/20170723-random.html>.
184   *
185   * There are a few exported interfaces for use by other drivers:
186   *
187   *	void get_random_bytes(void *buf, size_t len)
188   *	u8 get_random_u8()
189   *	u16 get_random_u16()
190   *	u32 get_random_u32()
191   *	u32 get_random_u32_below(u32 ceil)
192   *	u32 get_random_u32_above(u32 floor)
193   *	u32 get_random_u32_inclusive(u32 floor, u32 ceil)
194   *	u64 get_random_u64()
195   *	unsigned long get_random_long()
196   *
197   * These interfaces will return the requested number of random bytes
198   * into the given buffer or as a return value. This is equivalent to
199   * a read from /dev/urandom. The u8, u16, u32, u64, long family of
200   * functions may be higher performance for one-off random integers,
201   * because they do a bit of buffering and do not invoke reseeding
202   * until the buffer is emptied.
203   *
204   *********************************************************************/
205  
206  enum {
207  	CRNG_RESEED_START_INTERVAL = HZ,
208  	CRNG_RESEED_INTERVAL = 60 * HZ
209  };
210  
211  static struct {
212  	u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long));
213  	unsigned long generation;
214  	spinlock_t lock;
215  } base_crng = {
216  	.lock = __SPIN_LOCK_UNLOCKED(base_crng.lock)
217  };
218  
219  struct crng {
220  	u8 key[CHACHA_KEY_SIZE];
221  	unsigned long generation;
222  	local_lock_t lock;
223  };
224  
225  static DEFINE_PER_CPU(struct crng, crngs) = {
226  	.generation = ULONG_MAX,
227  	.lock = INIT_LOCAL_LOCK(crngs.lock),
228  };
229  
230  /*
231   * Return the interval until the next reseeding, which is normally
232   * CRNG_RESEED_INTERVAL, but during early boot, it is at an interval
233   * proportional to the uptime.
234   */
crng_reseed_interval(void)235  static unsigned int crng_reseed_interval(void)
236  {
237  	static bool early_boot = true;
238  
239  	if (unlikely(READ_ONCE(early_boot))) {
240  		time64_t uptime = ktime_get_seconds();
241  		if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2)
242  			WRITE_ONCE(early_boot, false);
243  		else
244  			return max_t(unsigned int, CRNG_RESEED_START_INTERVAL,
245  				     (unsigned int)uptime / 2 * HZ);
246  	}
247  	return CRNG_RESEED_INTERVAL;
248  }
249  
250  /* Used by crng_reseed() and crng_make_state() to extract a new seed from the input pool. */
251  static void extract_entropy(void *buf, size_t len);
252  
253  /* This extracts a new crng key from the input pool. */
crng_reseed(struct work_struct * work)254  static void crng_reseed(struct work_struct *work)
255  {
256  	static DECLARE_DELAYED_WORK(next_reseed, crng_reseed);
257  	unsigned long flags;
258  	unsigned long next_gen;
259  	u8 key[CHACHA_KEY_SIZE];
260  
261  	/* Immediately schedule the next reseeding, so that it fires sooner rather than later. */
262  	if (likely(system_unbound_wq))
263  		queue_delayed_work(system_unbound_wq, &next_reseed, crng_reseed_interval());
264  
265  	extract_entropy(key, sizeof(key));
266  
267  	/*
268  	 * We copy the new key into the base_crng, overwriting the old one,
269  	 * and update the generation counter. We avoid hitting ULONG_MAX,
270  	 * because the per-cpu crngs are initialized to ULONG_MAX, so this
271  	 * forces new CPUs that come online to always initialize.
272  	 */
273  	spin_lock_irqsave(&base_crng.lock, flags);
274  	memcpy(base_crng.key, key, sizeof(base_crng.key));
275  	next_gen = base_crng.generation + 1;
276  	if (next_gen == ULONG_MAX)
277  		++next_gen;
278  	WRITE_ONCE(base_crng.generation, next_gen);
279  #ifdef CONFIG_VDSO_GETRANDOM
280  	/* base_crng.generation's invalid value is ULONG_MAX, while
281  	 * _vdso_rng_data.generation's invalid value is 0, so add one to the
282  	 * former to arrive at the latter. Use smp_store_release so that this
283  	 * is ordered with the write above to base_crng.generation. Pairs with
284  	 * the smp_rmb() before the syscall in the vDSO code.
285  	 *
286  	 * Cast to unsigned long for 32-bit architectures, since atomic 64-bit
287  	 * operations are not supported on those architectures. This is safe
288  	 * because base_crng.generation is a 32-bit value. On big-endian
289  	 * architectures it will be stored in the upper 32 bits, but that's okay
290  	 * because the vDSO side only checks whether the value changed, without
291  	 * actually using or interpreting the value.
292  	 */
293  	smp_store_release((unsigned long *)&__arch_get_k_vdso_rng_data()->generation, next_gen + 1);
294  #endif
295  	if (!static_branch_likely(&crng_is_ready))
296  		crng_init = CRNG_READY;
297  	spin_unlock_irqrestore(&base_crng.lock, flags);
298  	memzero_explicit(key, sizeof(key));
299  }
300  
301  /*
302   * This generates a ChaCha block using the provided key, and then
303   * immediately overwrites that key with half the block. It returns
304   * the resultant ChaCha state to the user, along with the second
305   * half of the block containing 32 bytes of random data that may
306   * be used; random_data_len may not be greater than 32.
307   *
308   * The returned ChaCha state contains within it a copy of the old
309   * key value, at index 4, so the state should always be zeroed out
310   * immediately after using in order to maintain forward secrecy.
311   * If the state cannot be erased in a timely manner, then it is
312   * safer to set the random_data parameter to &chacha_state[4] so
313   * that this function overwrites it before returning.
314   */
crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],u32 chacha_state[CHACHA_STATE_WORDS],u8 * random_data,size_t random_data_len)315  static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
316  				  u32 chacha_state[CHACHA_STATE_WORDS],
317  				  u8 *random_data, size_t random_data_len)
318  {
319  	u8 first_block[CHACHA_BLOCK_SIZE];
320  
321  	BUG_ON(random_data_len > 32);
322  
323  	chacha_init_consts(chacha_state);
324  	memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE);
325  	memset(&chacha_state[12], 0, sizeof(u32) * 4);
326  	chacha20_block(chacha_state, first_block);
327  
328  	memcpy(key, first_block, CHACHA_KEY_SIZE);
329  	memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len);
330  	memzero_explicit(first_block, sizeof(first_block));
331  }
332  
333  /*
334   * This function returns a ChaCha state that you may use for generating
335   * random data. It also returns up to 32 bytes on its own of random data
336   * that may be used; random_data_len may not be greater than 32.
337   */
crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],u8 * random_data,size_t random_data_len)338  static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
339  			    u8 *random_data, size_t random_data_len)
340  {
341  	unsigned long flags;
342  	struct crng *crng;
343  
344  	BUG_ON(random_data_len > 32);
345  
346  	/*
347  	 * For the fast path, we check whether we're ready, unlocked first, and
348  	 * then re-check once locked later. In the case where we're really not
349  	 * ready, we do fast key erasure with the base_crng directly, extracting
350  	 * when crng_init is CRNG_EMPTY.
351  	 */
352  	if (!crng_ready()) {
353  		bool ready;
354  
355  		spin_lock_irqsave(&base_crng.lock, flags);
356  		ready = crng_ready();
357  		if (!ready) {
358  			if (crng_init == CRNG_EMPTY)
359  				extract_entropy(base_crng.key, sizeof(base_crng.key));
360  			crng_fast_key_erasure(base_crng.key, chacha_state,
361  					      random_data, random_data_len);
362  		}
363  		spin_unlock_irqrestore(&base_crng.lock, flags);
364  		if (!ready)
365  			return;
366  	}
367  
368  	local_lock_irqsave(&crngs.lock, flags);
369  	crng = raw_cpu_ptr(&crngs);
370  
371  	/*
372  	 * If our per-cpu crng is older than the base_crng, then it means
373  	 * somebody reseeded the base_crng. In that case, we do fast key
374  	 * erasure on the base_crng, and use its output as the new key
375  	 * for our per-cpu crng. This brings us up to date with base_crng.
376  	 */
377  	if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) {
378  		spin_lock(&base_crng.lock);
379  		crng_fast_key_erasure(base_crng.key, chacha_state,
380  				      crng->key, sizeof(crng->key));
381  		crng->generation = base_crng.generation;
382  		spin_unlock(&base_crng.lock);
383  	}
384  
385  	/*
386  	 * Finally, when we've made it this far, our per-cpu crng has an up
387  	 * to date key, and we can do fast key erasure with it to produce
388  	 * some random data and a ChaCha state for the caller. All other
389  	 * branches of this function are "unlikely", so most of the time we
390  	 * should wind up here immediately.
391  	 */
392  	crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len);
393  	local_unlock_irqrestore(&crngs.lock, flags);
394  }
395  
_get_random_bytes(void * buf,size_t len)396  static void _get_random_bytes(void *buf, size_t len)
397  {
398  	u32 chacha_state[CHACHA_STATE_WORDS];
399  	u8 tmp[CHACHA_BLOCK_SIZE];
400  	size_t first_block_len;
401  
402  	if (!len)
403  		return;
404  
405  	first_block_len = min_t(size_t, 32, len);
406  	crng_make_state(chacha_state, buf, first_block_len);
407  	len -= first_block_len;
408  	buf += first_block_len;
409  
410  	while (len) {
411  		if (len < CHACHA_BLOCK_SIZE) {
412  			chacha20_block(chacha_state, tmp);
413  			memcpy(buf, tmp, len);
414  			memzero_explicit(tmp, sizeof(tmp));
415  			break;
416  		}
417  
418  		chacha20_block(chacha_state, buf);
419  		if (unlikely(chacha_state[12] == 0))
420  			++chacha_state[13];
421  		len -= CHACHA_BLOCK_SIZE;
422  		buf += CHACHA_BLOCK_SIZE;
423  	}
424  
425  	memzero_explicit(chacha_state, sizeof(chacha_state));
426  }
427  
428  /*
429   * This returns random bytes in arbitrary quantities. The quality of the
430   * random bytes is good as /dev/urandom. In order to ensure that the
431   * randomness provided by this function is okay, the function
432   * wait_for_random_bytes() should be called and return 0 at least once
433   * at any point prior.
434   */
get_random_bytes(void * buf,size_t len)435  void get_random_bytes(void *buf, size_t len)
436  {
437  	warn_unseeded_randomness();
438  	_get_random_bytes(buf, len);
439  }
440  EXPORT_SYMBOL(get_random_bytes);
441  
get_random_bytes_user(struct iov_iter * iter)442  static ssize_t get_random_bytes_user(struct iov_iter *iter)
443  {
444  	u32 chacha_state[CHACHA_STATE_WORDS];
445  	u8 block[CHACHA_BLOCK_SIZE];
446  	size_t ret = 0, copied;
447  
448  	if (unlikely(!iov_iter_count(iter)))
449  		return 0;
450  
451  	/*
452  	 * Immediately overwrite the ChaCha key at index 4 with random
453  	 * bytes, in case userspace causes copy_to_iter() below to sleep
454  	 * forever, so that we still retain forward secrecy in that case.
455  	 */
456  	crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE);
457  	/*
458  	 * However, if we're doing a read of len <= 32, we don't need to
459  	 * use chacha_state after, so we can simply return those bytes to
460  	 * the user directly.
461  	 */
462  	if (iov_iter_count(iter) <= CHACHA_KEY_SIZE) {
463  		ret = copy_to_iter(&chacha_state[4], CHACHA_KEY_SIZE, iter);
464  		goto out_zero_chacha;
465  	}
466  
467  	for (;;) {
468  		chacha20_block(chacha_state, block);
469  		if (unlikely(chacha_state[12] == 0))
470  			++chacha_state[13];
471  
472  		copied = copy_to_iter(block, sizeof(block), iter);
473  		ret += copied;
474  		if (!iov_iter_count(iter) || copied != sizeof(block))
475  			break;
476  
477  		BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0);
478  		if (ret % PAGE_SIZE == 0) {
479  			if (signal_pending(current))
480  				break;
481  			cond_resched();
482  		}
483  	}
484  
485  	memzero_explicit(block, sizeof(block));
486  out_zero_chacha:
487  	memzero_explicit(chacha_state, sizeof(chacha_state));
488  	return ret ? ret : -EFAULT;
489  }
490  
491  /*
492   * Batched entropy returns random integers. The quality of the random
493   * number is good as /dev/urandom. In order to ensure that the randomness
494   * provided by this function is okay, the function wait_for_random_bytes()
495   * should be called and return 0 at least once at any point prior.
496   */
497  
498  #define DEFINE_BATCHED_ENTROPY(type)						\
499  struct batch_ ##type {								\
500  	/*									\
501  	 * We make this 1.5x a ChaCha block, so that we get the			\
502  	 * remaining 32 bytes from fast key erasure, plus one full		\
503  	 * block from the detached ChaCha state. We can increase		\
504  	 * the size of this later if needed so long as we keep the		\
505  	 * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE.		\
506  	 */									\
507  	type entropy[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(type))];		\
508  	local_lock_t lock;							\
509  	unsigned long generation;						\
510  	unsigned int position;							\
511  };										\
512  										\
513  static DEFINE_PER_CPU(struct batch_ ##type, batched_entropy_ ##type) = {	\
514  	.lock = INIT_LOCAL_LOCK(batched_entropy_ ##type.lock),			\
515  	.position = UINT_MAX							\
516  };										\
517  										\
518  type get_random_ ##type(void)							\
519  {										\
520  	type ret;								\
521  	unsigned long flags;							\
522  	struct batch_ ##type *batch;						\
523  	unsigned long next_gen;							\
524  										\
525  	warn_unseeded_randomness();						\
526  										\
527  	if  (!crng_ready()) {							\
528  		_get_random_bytes(&ret, sizeof(ret));				\
529  		return ret;							\
530  	}									\
531  										\
532  	local_lock_irqsave(&batched_entropy_ ##type.lock, flags);		\
533  	batch = raw_cpu_ptr(&batched_entropy_##type);				\
534  										\
535  	next_gen = READ_ONCE(base_crng.generation);				\
536  	if (batch->position >= ARRAY_SIZE(batch->entropy) ||			\
537  	    next_gen != batch->generation) {					\
538  		_get_random_bytes(batch->entropy, sizeof(batch->entropy));	\
539  		batch->position = 0;						\
540  		batch->generation = next_gen;					\
541  	}									\
542  										\
543  	ret = batch->entropy[batch->position];					\
544  	batch->entropy[batch->position] = 0;					\
545  	++batch->position;							\
546  	local_unlock_irqrestore(&batched_entropy_ ##type.lock, flags);		\
547  	return ret;								\
548  }										\
549  EXPORT_SYMBOL(get_random_ ##type);
550  
551  DEFINE_BATCHED_ENTROPY(u8)
DEFINE_BATCHED_ENTROPY(u16)552  DEFINE_BATCHED_ENTROPY(u16)
553  DEFINE_BATCHED_ENTROPY(u32)
554  DEFINE_BATCHED_ENTROPY(u64)
555  
556  u32 __get_random_u32_below(u32 ceil)
557  {
558  	/*
559  	 * This is the slow path for variable ceil. It is still fast, most of
560  	 * the time, by doing traditional reciprocal multiplication and
561  	 * opportunistically comparing the lower half to ceil itself, before
562  	 * falling back to computing a larger bound, and then rejecting samples
563  	 * whose lower half would indicate a range indivisible by ceil. The use
564  	 * of `-ceil % ceil` is analogous to `2^32 % ceil`, but is computable
565  	 * in 32-bits.
566  	 */
567  	u32 rand = get_random_u32();
568  	u64 mult;
569  
570  	/*
571  	 * This function is technically undefined for ceil == 0, and in fact
572  	 * for the non-underscored constant version in the header, we build bug
573  	 * on that. But for the non-constant case, it's convenient to have that
574  	 * evaluate to being a straight call to get_random_u32(), so that
575  	 * get_random_u32_inclusive() can work over its whole range without
576  	 * undefined behavior.
577  	 */
578  	if (unlikely(!ceil))
579  		return rand;
580  
581  	mult = (u64)ceil * rand;
582  	if (unlikely((u32)mult < ceil)) {
583  		u32 bound = -ceil % ceil;
584  		while (unlikely((u32)mult < bound))
585  			mult = (u64)ceil * get_random_u32();
586  	}
587  	return mult >> 32;
588  }
589  EXPORT_SYMBOL(__get_random_u32_below);
590  
591  #ifdef CONFIG_SMP
592  /*
593   * This function is called when the CPU is coming up, with entry
594   * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP.
595   */
random_prepare_cpu(unsigned int cpu)596  int __cold random_prepare_cpu(unsigned int cpu)
597  {
598  	/*
599  	 * When the cpu comes back online, immediately invalidate both
600  	 * the per-cpu crng and all batches, so that we serve fresh
601  	 * randomness.
602  	 */
603  	per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX;
604  	per_cpu_ptr(&batched_entropy_u8, cpu)->position = UINT_MAX;
605  	per_cpu_ptr(&batched_entropy_u16, cpu)->position = UINT_MAX;
606  	per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX;
607  	per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX;
608  	return 0;
609  }
610  #endif
611  
612  
613  /**********************************************************************
614   *
615   * Entropy accumulation and extraction routines.
616   *
617   * Callers may add entropy via:
618   *
619   *     static void mix_pool_bytes(const void *buf, size_t len)
620   *
621   * After which, if added entropy should be credited:
622   *
623   *     static void credit_init_bits(size_t bits)
624   *
625   * Finally, extract entropy via:
626   *
627   *     static void extract_entropy(void *buf, size_t len)
628   *
629   **********************************************************************/
630  
631  enum {
632  	POOL_BITS = BLAKE2S_HASH_SIZE * 8,
633  	POOL_READY_BITS = POOL_BITS, /* When crng_init->CRNG_READY */
634  	POOL_EARLY_BITS = POOL_READY_BITS / 2 /* When crng_init->CRNG_EARLY */
635  };
636  
637  static struct {
638  	struct blake2s_state hash;
639  	spinlock_t lock;
640  	unsigned int init_bits;
641  } input_pool = {
642  	.hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE),
643  		    BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4,
644  		    BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 },
645  	.hash.outlen = BLAKE2S_HASH_SIZE,
646  	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
647  };
648  
_mix_pool_bytes(const void * buf,size_t len)649  static void _mix_pool_bytes(const void *buf, size_t len)
650  {
651  	blake2s_update(&input_pool.hash, buf, len);
652  }
653  
654  /*
655   * This function adds bytes into the input pool. It does not
656   * update the initialization bit counter; the caller should call
657   * credit_init_bits if this is appropriate.
658   */
mix_pool_bytes(const void * buf,size_t len)659  static void mix_pool_bytes(const void *buf, size_t len)
660  {
661  	unsigned long flags;
662  
663  	spin_lock_irqsave(&input_pool.lock, flags);
664  	_mix_pool_bytes(buf, len);
665  	spin_unlock_irqrestore(&input_pool.lock, flags);
666  }
667  
668  /*
669   * This is an HKDF-like construction for using the hashed collected entropy
670   * as a PRF key, that's then expanded block-by-block.
671   */
extract_entropy(void * buf,size_t len)672  static void extract_entropy(void *buf, size_t len)
673  {
674  	unsigned long flags;
675  	u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE];
676  	struct {
677  		unsigned long rdseed[32 / sizeof(long)];
678  		size_t counter;
679  	} block;
680  	size_t i, longs;
681  
682  	for (i = 0; i < ARRAY_SIZE(block.rdseed);) {
683  		longs = arch_get_random_seed_longs(&block.rdseed[i], ARRAY_SIZE(block.rdseed) - i);
684  		if (longs) {
685  			i += longs;
686  			continue;
687  		}
688  		longs = arch_get_random_longs(&block.rdseed[i], ARRAY_SIZE(block.rdseed) - i);
689  		if (longs) {
690  			i += longs;
691  			continue;
692  		}
693  		block.rdseed[i++] = random_get_entropy();
694  	}
695  
696  	spin_lock_irqsave(&input_pool.lock, flags);
697  
698  	/* seed = HASHPRF(last_key, entropy_input) */
699  	blake2s_final(&input_pool.hash, seed);
700  
701  	/* next_key = HASHPRF(seed, RDSEED || 0) */
702  	block.counter = 0;
703  	blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed));
704  	blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key));
705  
706  	spin_unlock_irqrestore(&input_pool.lock, flags);
707  	memzero_explicit(next_key, sizeof(next_key));
708  
709  	while (len) {
710  		i = min_t(size_t, len, BLAKE2S_HASH_SIZE);
711  		/* output = HASHPRF(seed, RDSEED || ++counter) */
712  		++block.counter;
713  		blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed));
714  		len -= i;
715  		buf += i;
716  	}
717  
718  	memzero_explicit(seed, sizeof(seed));
719  	memzero_explicit(&block, sizeof(block));
720  }
721  
722  #define credit_init_bits(bits) if (!crng_ready()) _credit_init_bits(bits)
723  
_credit_init_bits(size_t bits)724  static void __cold _credit_init_bits(size_t bits)
725  {
726  	static DECLARE_WORK(set_ready, crng_set_ready);
727  	unsigned int new, orig, add;
728  	unsigned long flags;
729  
730  	if (!bits)
731  		return;
732  
733  	add = min_t(size_t, bits, POOL_BITS);
734  
735  	orig = READ_ONCE(input_pool.init_bits);
736  	do {
737  		new = min_t(unsigned int, POOL_BITS, orig + add);
738  	} while (!try_cmpxchg(&input_pool.init_bits, &orig, new));
739  
740  	if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {
741  		crng_reseed(NULL); /* Sets crng_init to CRNG_READY under base_crng.lock. */
742  		if (static_key_initialized && system_unbound_wq)
743  			queue_work(system_unbound_wq, &set_ready);
744  		atomic_notifier_call_chain(&random_ready_notifier, 0, NULL);
745  #ifdef CONFIG_VDSO_GETRANDOM
746  		WRITE_ONCE(__arch_get_k_vdso_rng_data()->is_ready, true);
747  #endif
748  		wake_up_interruptible(&crng_init_wait);
749  		kill_fasync(&fasync, SIGIO, POLL_IN);
750  		pr_notice("crng init done\n");
751  		if (urandom_warning.missed)
752  			pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
753  				  urandom_warning.missed);
754  	} else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) {
755  		spin_lock_irqsave(&base_crng.lock, flags);
756  		/* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */
757  		if (crng_init == CRNG_EMPTY) {
758  			extract_entropy(base_crng.key, sizeof(base_crng.key));
759  			crng_init = CRNG_EARLY;
760  		}
761  		spin_unlock_irqrestore(&base_crng.lock, flags);
762  	}
763  }
764  
765  
766  /**********************************************************************
767   *
768   * Entropy collection routines.
769   *
770   * The following exported functions are used for pushing entropy into
771   * the above entropy accumulation routines:
772   *
773   *	void add_device_randomness(const void *buf, size_t len);
774   *	void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy, bool sleep_after);
775   *	void add_bootloader_randomness(const void *buf, size_t len);
776   *	void add_vmfork_randomness(const void *unique_vm_id, size_t len);
777   *	void add_interrupt_randomness(int irq);
778   *	void add_input_randomness(unsigned int type, unsigned int code, unsigned int value);
779   *	void add_disk_randomness(struct gendisk *disk);
780   *
781   * add_device_randomness() adds data to the input pool that
782   * is likely to differ between two devices (or possibly even per boot).
783   * This would be things like MAC addresses or serial numbers, or the
784   * read-out of the RTC. This does *not* credit any actual entropy to
785   * the pool, but it initializes the pool to different values for devices
786   * that might otherwise be identical and have very little entropy
787   * available to them (particularly common in the embedded world).
788   *
789   * add_hwgenerator_randomness() is for true hardware RNGs, and will credit
790   * entropy as specified by the caller. If the entropy pool is full it will
791   * block until more entropy is needed.
792   *
793   * add_bootloader_randomness() is called by bootloader drivers, such as EFI
794   * and device tree, and credits its input depending on whether or not the
795   * command line option 'random.trust_bootloader'.
796   *
797   * add_vmfork_randomness() adds a unique (but not necessarily secret) ID
798   * representing the current instance of a VM to the pool, without crediting,
799   * and then force-reseeds the crng so that it takes effect immediately.
800   *
801   * add_interrupt_randomness() uses the interrupt timing as random
802   * inputs to the entropy pool. Using the cycle counters and the irq source
803   * as inputs, it feeds the input pool roughly once a second or after 64
804   * interrupts, crediting 1 bit of entropy for whichever comes first.
805   *
806   * add_input_randomness() uses the input layer interrupt timing, as well
807   * as the event type information from the hardware.
808   *
809   * add_disk_randomness() uses what amounts to the seek time of block
810   * layer request events, on a per-disk_devt basis, as input to the
811   * entropy pool. Note that high-speed solid state drives with very low
812   * seek times do not make for good sources of entropy, as their seek
813   * times are usually fairly consistent.
814   *
815   * The last two routines try to estimate how many bits of entropy
816   * to credit. They do this by keeping track of the first and second
817   * order deltas of the event timings.
818   *
819   **********************************************************************/
820  
821  static bool trust_cpu __initdata = true;
822  static bool trust_bootloader __initdata = true;
parse_trust_cpu(char * arg)823  static int __init parse_trust_cpu(char *arg)
824  {
825  	return kstrtobool(arg, &trust_cpu);
826  }
parse_trust_bootloader(char * arg)827  static int __init parse_trust_bootloader(char *arg)
828  {
829  	return kstrtobool(arg, &trust_bootloader);
830  }
831  early_param("random.trust_cpu", parse_trust_cpu);
832  early_param("random.trust_bootloader", parse_trust_bootloader);
833  
random_pm_notification(struct notifier_block * nb,unsigned long action,void * data)834  static int random_pm_notification(struct notifier_block *nb, unsigned long action, void *data)
835  {
836  	unsigned long flags, entropy = random_get_entropy();
837  
838  	/*
839  	 * Encode a representation of how long the system has been suspended,
840  	 * in a way that is distinct from prior system suspends.
841  	 */
842  	ktime_t stamps[] = { ktime_get(), ktime_get_boottime(), ktime_get_real() };
843  
844  	spin_lock_irqsave(&input_pool.lock, flags);
845  	_mix_pool_bytes(&action, sizeof(action));
846  	_mix_pool_bytes(stamps, sizeof(stamps));
847  	_mix_pool_bytes(&entropy, sizeof(entropy));
848  	spin_unlock_irqrestore(&input_pool.lock, flags);
849  
850  	if (crng_ready() && (action == PM_RESTORE_PREPARE ||
851  	    (action == PM_POST_SUSPEND && !IS_ENABLED(CONFIG_PM_AUTOSLEEP) &&
852  	     !IS_ENABLED(CONFIG_PM_USERSPACE_AUTOSLEEP)))) {
853  		crng_reseed(NULL);
854  		pr_notice("crng reseeded on system resumption\n");
855  	}
856  	return 0;
857  }
858  
859  static struct notifier_block pm_notifier = { .notifier_call = random_pm_notification };
860  
861  /*
862   * This is called extremely early, before time keeping functionality is
863   * available, but arch randomness is. Interrupts are not yet enabled.
864   */
random_init_early(const char * command_line)865  void __init random_init_early(const char *command_line)
866  {
867  	unsigned long entropy[BLAKE2S_BLOCK_SIZE / sizeof(long)];
868  	size_t i, longs, arch_bits;
869  
870  #if defined(LATENT_ENTROPY_PLUGIN)
871  	static const u8 compiletime_seed[BLAKE2S_BLOCK_SIZE] __initconst __latent_entropy;
872  	_mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed));
873  #endif
874  
875  	for (i = 0, arch_bits = sizeof(entropy) * 8; i < ARRAY_SIZE(entropy);) {
876  		longs = arch_get_random_seed_longs(entropy, ARRAY_SIZE(entropy) - i);
877  		if (longs) {
878  			_mix_pool_bytes(entropy, sizeof(*entropy) * longs);
879  			i += longs;
880  			continue;
881  		}
882  		longs = arch_get_random_longs(entropy, ARRAY_SIZE(entropy) - i);
883  		if (longs) {
884  			_mix_pool_bytes(entropy, sizeof(*entropy) * longs);
885  			i += longs;
886  			continue;
887  		}
888  		arch_bits -= sizeof(*entropy) * 8;
889  		++i;
890  	}
891  
892  	_mix_pool_bytes(init_utsname(), sizeof(*(init_utsname())));
893  	_mix_pool_bytes(command_line, strlen(command_line));
894  
895  	/* Reseed if already seeded by earlier phases. */
896  	if (crng_ready())
897  		crng_reseed(NULL);
898  	else if (trust_cpu)
899  		_credit_init_bits(arch_bits);
900  }
901  
902  /*
903   * This is called a little bit after the prior function, and now there is
904   * access to timestamps counters. Interrupts are not yet enabled.
905   */
random_init(void)906  void __init random_init(void)
907  {
908  	unsigned long entropy = random_get_entropy();
909  	ktime_t now = ktime_get_real();
910  
911  	_mix_pool_bytes(&now, sizeof(now));
912  	_mix_pool_bytes(&entropy, sizeof(entropy));
913  	add_latent_entropy();
914  
915  	/*
916  	 * If we were initialized by the cpu or bootloader before jump labels
917  	 * or workqueues are initialized, then we should enable the static
918  	 * branch here, where it's guaranteed that these have been initialized.
919  	 */
920  	if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY)
921  		crng_set_ready(NULL);
922  
923  	/* Reseed if already seeded by earlier phases. */
924  	if (crng_ready())
925  		crng_reseed(NULL);
926  
927  	WARN_ON(register_pm_notifier(&pm_notifier));
928  
929  	WARN(!entropy, "Missing cycle counter and fallback timer; RNG "
930  		       "entropy collection will consequently suffer.");
931  }
932  
933  /*
934   * Add device- or boot-specific data to the input pool to help
935   * initialize it.
936   *
937   * None of this adds any entropy; it is meant to avoid the problem of
938   * the entropy pool having similar initial state across largely
939   * identical devices.
940   */
add_device_randomness(const void * buf,size_t len)941  void add_device_randomness(const void *buf, size_t len)
942  {
943  	unsigned long entropy = random_get_entropy();
944  	unsigned long flags;
945  
946  	spin_lock_irqsave(&input_pool.lock, flags);
947  	_mix_pool_bytes(&entropy, sizeof(entropy));
948  	_mix_pool_bytes(buf, len);
949  	spin_unlock_irqrestore(&input_pool.lock, flags);
950  }
951  EXPORT_SYMBOL(add_device_randomness);
952  
953  /*
954   * Interface for in-kernel drivers of true hardware RNGs. Those devices
955   * may produce endless random bits, so this function will sleep for
956   * some amount of time after, if the sleep_after parameter is true.
957   */
add_hwgenerator_randomness(const void * buf,size_t len,size_t entropy,bool sleep_after)958  void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy, bool sleep_after)
959  {
960  	mix_pool_bytes(buf, len);
961  	credit_init_bits(entropy);
962  
963  	/*
964  	 * Throttle writing to once every reseed interval, unless we're not yet
965  	 * initialized or no entropy is credited.
966  	 */
967  	if (sleep_after && !kthread_should_stop() && (crng_ready() || !entropy))
968  		schedule_timeout_interruptible(crng_reseed_interval());
969  }
970  EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
971  
972  /*
973   * Handle random seed passed by bootloader, and credit it depending
974   * on the command line option 'random.trust_bootloader'.
975   */
add_bootloader_randomness(const void * buf,size_t len)976  void __init add_bootloader_randomness(const void *buf, size_t len)
977  {
978  	mix_pool_bytes(buf, len);
979  	if (trust_bootloader)
980  		credit_init_bits(len * 8);
981  }
982  
983  #if IS_ENABLED(CONFIG_VMGENID)
984  static BLOCKING_NOTIFIER_HEAD(vmfork_chain);
985  
986  /*
987   * Handle a new unique VM ID, which is unique, not secret, so we
988   * don't credit it, but we do immediately force a reseed after so
989   * that it's used by the crng posthaste.
990   */
add_vmfork_randomness(const void * unique_vm_id,size_t len)991  void __cold add_vmfork_randomness(const void *unique_vm_id, size_t len)
992  {
993  	add_device_randomness(unique_vm_id, len);
994  	if (crng_ready()) {
995  		crng_reseed(NULL);
996  		pr_notice("crng reseeded due to virtual machine fork\n");
997  	}
998  	blocking_notifier_call_chain(&vmfork_chain, 0, NULL);
999  }
1000  #if IS_MODULE(CONFIG_VMGENID)
1001  EXPORT_SYMBOL_GPL(add_vmfork_randomness);
1002  #endif
1003  
register_random_vmfork_notifier(struct notifier_block * nb)1004  int __cold register_random_vmfork_notifier(struct notifier_block *nb)
1005  {
1006  	return blocking_notifier_chain_register(&vmfork_chain, nb);
1007  }
1008  EXPORT_SYMBOL_GPL(register_random_vmfork_notifier);
1009  
unregister_random_vmfork_notifier(struct notifier_block * nb)1010  int __cold unregister_random_vmfork_notifier(struct notifier_block *nb)
1011  {
1012  	return blocking_notifier_chain_unregister(&vmfork_chain, nb);
1013  }
1014  EXPORT_SYMBOL_GPL(unregister_random_vmfork_notifier);
1015  #endif
1016  
1017  struct fast_pool {
1018  	unsigned long pool[4];
1019  	unsigned long last;
1020  	unsigned int count;
1021  	struct timer_list mix;
1022  };
1023  
1024  static void mix_interrupt_randomness(struct timer_list *work);
1025  
1026  static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = {
1027  #ifdef CONFIG_64BIT
1028  #define FASTMIX_PERM SIPHASH_PERMUTATION
1029  	.pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 },
1030  #else
1031  #define FASTMIX_PERM HSIPHASH_PERMUTATION
1032  	.pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 },
1033  #endif
1034  	.mix = __TIMER_INITIALIZER(mix_interrupt_randomness, 0)
1035  };
1036  
1037  /*
1038   * This is [Half]SipHash-1-x, starting from an empty key. Because
1039   * the key is fixed, it assumes that its inputs are non-malicious,
1040   * and therefore this has no security on its own. s represents the
1041   * four-word SipHash state, while v represents a two-word input.
1042   */
fast_mix(unsigned long s[4],unsigned long v1,unsigned long v2)1043  static void fast_mix(unsigned long s[4], unsigned long v1, unsigned long v2)
1044  {
1045  	s[3] ^= v1;
1046  	FASTMIX_PERM(s[0], s[1], s[2], s[3]);
1047  	s[0] ^= v1;
1048  	s[3] ^= v2;
1049  	FASTMIX_PERM(s[0], s[1], s[2], s[3]);
1050  	s[0] ^= v2;
1051  }
1052  
1053  #ifdef CONFIG_SMP
1054  /*
1055   * This function is called when the CPU has just come online, with
1056   * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE.
1057   */
random_online_cpu(unsigned int cpu)1058  int __cold random_online_cpu(unsigned int cpu)
1059  {
1060  	/*
1061  	 * During CPU shutdown and before CPU onlining, add_interrupt_
1062  	 * randomness() may schedule mix_interrupt_randomness(), and
1063  	 * set the MIX_INFLIGHT flag. However, because the worker can
1064  	 * be scheduled on a different CPU during this period, that
1065  	 * flag will never be cleared. For that reason, we zero out
1066  	 * the flag here, which runs just after workqueues are onlined
1067  	 * for the CPU again. This also has the effect of setting the
1068  	 * irq randomness count to zero so that new accumulated irqs
1069  	 * are fresh.
1070  	 */
1071  	per_cpu_ptr(&irq_randomness, cpu)->count = 0;
1072  	return 0;
1073  }
1074  #endif
1075  
mix_interrupt_randomness(struct timer_list * work)1076  static void mix_interrupt_randomness(struct timer_list *work)
1077  {
1078  	struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix);
1079  	/*
1080  	 * The size of the copied stack pool is explicitly 2 longs so that we
1081  	 * only ever ingest half of the siphash output each time, retaining
1082  	 * the other half as the next "key" that carries over. The entropy is
1083  	 * supposed to be sufficiently dispersed between bits so on average
1084  	 * we don't wind up "losing" some.
1085  	 */
1086  	unsigned long pool[2];
1087  	unsigned int count;
1088  
1089  	/* Check to see if we're running on the wrong CPU due to hotplug. */
1090  	local_irq_disable();
1091  	if (fast_pool != this_cpu_ptr(&irq_randomness)) {
1092  		local_irq_enable();
1093  		return;
1094  	}
1095  
1096  	/*
1097  	 * Copy the pool to the stack so that the mixer always has a
1098  	 * consistent view, before we reenable irqs again.
1099  	 */
1100  	memcpy(pool, fast_pool->pool, sizeof(pool));
1101  	count = fast_pool->count;
1102  	fast_pool->count = 0;
1103  	fast_pool->last = jiffies;
1104  	local_irq_enable();
1105  
1106  	mix_pool_bytes(pool, sizeof(pool));
1107  	credit_init_bits(clamp_t(unsigned int, (count & U16_MAX) / 64, 1, sizeof(pool) * 8));
1108  
1109  	memzero_explicit(pool, sizeof(pool));
1110  }
1111  
add_interrupt_randomness(int irq)1112  void add_interrupt_randomness(int irq)
1113  {
1114  	enum { MIX_INFLIGHT = 1U << 31 };
1115  	unsigned long entropy = random_get_entropy();
1116  	struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
1117  	struct pt_regs *regs = get_irq_regs();
1118  	unsigned int new_count;
1119  
1120  	fast_mix(fast_pool->pool, entropy,
1121  		 (regs ? instruction_pointer(regs) : _RET_IP_) ^ swab(irq));
1122  	new_count = ++fast_pool->count;
1123  
1124  	if (new_count & MIX_INFLIGHT)
1125  		return;
1126  
1127  	if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ))
1128  		return;
1129  
1130  	fast_pool->count |= MIX_INFLIGHT;
1131  	if (!timer_pending(&fast_pool->mix)) {
1132  		fast_pool->mix.expires = jiffies;
1133  		add_timer_on(&fast_pool->mix, raw_smp_processor_id());
1134  	}
1135  }
1136  EXPORT_SYMBOL_GPL(add_interrupt_randomness);
1137  
1138  /* There is one of these per entropy source */
1139  struct timer_rand_state {
1140  	unsigned long last_time;
1141  	long last_delta, last_delta2;
1142  };
1143  
1144  /*
1145   * This function adds entropy to the entropy "pool" by using timing
1146   * delays. It uses the timer_rand_state structure to make an estimate
1147   * of how many bits of entropy this call has added to the pool. The
1148   * value "num" is also added to the pool; it should somehow describe
1149   * the type of event that just happened.
1150   */
add_timer_randomness(struct timer_rand_state * state,unsigned int num)1151  static void add_timer_randomness(struct timer_rand_state *state, unsigned int num)
1152  {
1153  	unsigned long entropy = random_get_entropy(), now = jiffies, flags;
1154  	long delta, delta2, delta3;
1155  	unsigned int bits;
1156  
1157  	/*
1158  	 * If we're in a hard IRQ, add_interrupt_randomness() will be called
1159  	 * sometime after, so mix into the fast pool.
1160  	 */
1161  	if (in_hardirq()) {
1162  		fast_mix(this_cpu_ptr(&irq_randomness)->pool, entropy, num);
1163  	} else {
1164  		spin_lock_irqsave(&input_pool.lock, flags);
1165  		_mix_pool_bytes(&entropy, sizeof(entropy));
1166  		_mix_pool_bytes(&num, sizeof(num));
1167  		spin_unlock_irqrestore(&input_pool.lock, flags);
1168  	}
1169  
1170  	if (crng_ready())
1171  		return;
1172  
1173  	/*
1174  	 * Calculate number of bits of randomness we probably added.
1175  	 * We take into account the first, second and third-order deltas
1176  	 * in order to make our estimate.
1177  	 */
1178  	delta = now - READ_ONCE(state->last_time);
1179  	WRITE_ONCE(state->last_time, now);
1180  
1181  	delta2 = delta - READ_ONCE(state->last_delta);
1182  	WRITE_ONCE(state->last_delta, delta);
1183  
1184  	delta3 = delta2 - READ_ONCE(state->last_delta2);
1185  	WRITE_ONCE(state->last_delta2, delta2);
1186  
1187  	if (delta < 0)
1188  		delta = -delta;
1189  	if (delta2 < 0)
1190  		delta2 = -delta2;
1191  	if (delta3 < 0)
1192  		delta3 = -delta3;
1193  	if (delta > delta2)
1194  		delta = delta2;
1195  	if (delta > delta3)
1196  		delta = delta3;
1197  
1198  	/*
1199  	 * delta is now minimum absolute delta. Round down by 1 bit
1200  	 * on general principles, and limit entropy estimate to 11 bits.
1201  	 */
1202  	bits = min(fls(delta >> 1), 11);
1203  
1204  	/*
1205  	 * As mentioned above, if we're in a hard IRQ, add_interrupt_randomness()
1206  	 * will run after this, which uses a different crediting scheme of 1 bit
1207  	 * per every 64 interrupts. In order to let that function do accounting
1208  	 * close to the one in this function, we credit a full 64/64 bit per bit,
1209  	 * and then subtract one to account for the extra one added.
1210  	 */
1211  	if (in_hardirq())
1212  		this_cpu_ptr(&irq_randomness)->count += max(1u, bits * 64) - 1;
1213  	else
1214  		_credit_init_bits(bits);
1215  }
1216  
add_input_randomness(unsigned int type,unsigned int code,unsigned int value)1217  void add_input_randomness(unsigned int type, unsigned int code, unsigned int value)
1218  {
1219  	static unsigned char last_value;
1220  	static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES };
1221  
1222  	/* Ignore autorepeat and the like. */
1223  	if (value == last_value)
1224  		return;
1225  
1226  	last_value = value;
1227  	add_timer_randomness(&input_timer_state,
1228  			     (type << 4) ^ code ^ (code >> 4) ^ value);
1229  }
1230  EXPORT_SYMBOL_GPL(add_input_randomness);
1231  
1232  #ifdef CONFIG_BLOCK
add_disk_randomness(struct gendisk * disk)1233  void add_disk_randomness(struct gendisk *disk)
1234  {
1235  	if (!disk || !disk->random)
1236  		return;
1237  	/* First major is 1, so we get >= 0x200 here. */
1238  	add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
1239  }
1240  EXPORT_SYMBOL_GPL(add_disk_randomness);
1241  
rand_initialize_disk(struct gendisk * disk)1242  void __cold rand_initialize_disk(struct gendisk *disk)
1243  {
1244  	struct timer_rand_state *state;
1245  
1246  	/*
1247  	 * If kzalloc returns null, we just won't use that entropy
1248  	 * source.
1249  	 */
1250  	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
1251  	if (state) {
1252  		state->last_time = INITIAL_JIFFIES;
1253  		disk->random = state;
1254  	}
1255  }
1256  #endif
1257  
1258  struct entropy_timer_state {
1259  	unsigned long entropy;
1260  	struct timer_list timer;
1261  	atomic_t samples;
1262  	unsigned int samples_per_bit;
1263  };
1264  
1265  /*
1266   * Each time the timer fires, we expect that we got an unpredictable jump in
1267   * the cycle counter. Even if the timer is running on another CPU, the timer
1268   * activity will be touching the stack of the CPU that is generating entropy.
1269   *
1270   * Note that we don't re-arm the timer in the timer itself - we are happy to be
1271   * scheduled away, since that just makes the load more complex, but we do not
1272   * want the timer to keep ticking unless the entropy loop is running.
1273   *
1274   * So the re-arming always happens in the entropy loop itself.
1275   */
entropy_timer(struct timer_list * timer)1276  static void __cold entropy_timer(struct timer_list *timer)
1277  {
1278  	struct entropy_timer_state *state = container_of(timer, struct entropy_timer_state, timer);
1279  	unsigned long entropy = random_get_entropy();
1280  
1281  	mix_pool_bytes(&entropy, sizeof(entropy));
1282  	if (atomic_inc_return(&state->samples) % state->samples_per_bit == 0)
1283  		credit_init_bits(1);
1284  }
1285  
1286  /*
1287   * If we have an actual cycle counter, see if we can generate enough entropy
1288   * with timing noise.
1289   */
try_to_generate_entropy(void)1290  static void __cold try_to_generate_entropy(void)
1291  {
1292  	enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = HZ / 15 };
1293  	u8 stack_bytes[sizeof(struct entropy_timer_state) + SMP_CACHE_BYTES - 1];
1294  	struct entropy_timer_state *stack = PTR_ALIGN((void *)stack_bytes, SMP_CACHE_BYTES);
1295  	unsigned int i, num_different = 0;
1296  	unsigned long last = random_get_entropy();
1297  	int cpu = -1;
1298  
1299  	for (i = 0; i < NUM_TRIAL_SAMPLES - 1; ++i) {
1300  		stack->entropy = random_get_entropy();
1301  		if (stack->entropy != last)
1302  			++num_different;
1303  		last = stack->entropy;
1304  	}
1305  	stack->samples_per_bit = DIV_ROUND_UP(NUM_TRIAL_SAMPLES, num_different + 1);
1306  	if (stack->samples_per_bit > MAX_SAMPLES_PER_BIT)
1307  		return;
1308  
1309  	atomic_set(&stack->samples, 0);
1310  	timer_setup_on_stack(&stack->timer, entropy_timer, 0);
1311  	while (!crng_ready() && !signal_pending(current)) {
1312  		/*
1313  		 * Check !timer_pending() and then ensure that any previous callback has finished
1314  		 * executing by checking try_to_del_timer_sync(), before queueing the next one.
1315  		 */
1316  		if (!timer_pending(&stack->timer) && try_to_del_timer_sync(&stack->timer) >= 0) {
1317  			struct cpumask timer_cpus;
1318  			unsigned int num_cpus;
1319  
1320  			/*
1321  			 * Preemption must be disabled here, both to read the current CPU number
1322  			 * and to avoid scheduling a timer on a dead CPU.
1323  			 */
1324  			preempt_disable();
1325  
1326  			/* Only schedule callbacks on timer CPUs that are online. */
1327  			cpumask_and(&timer_cpus, housekeeping_cpumask(HK_TYPE_TIMER), cpu_online_mask);
1328  			num_cpus = cpumask_weight(&timer_cpus);
1329  			/* In very bizarre case of misconfiguration, fallback to all online. */
1330  			if (unlikely(num_cpus == 0)) {
1331  				timer_cpus = *cpu_online_mask;
1332  				num_cpus = cpumask_weight(&timer_cpus);
1333  			}
1334  
1335  			/* Basic CPU round-robin, which avoids the current CPU. */
1336  			do {
1337  				cpu = cpumask_next(cpu, &timer_cpus);
1338  				if (cpu >= nr_cpu_ids)
1339  					cpu = cpumask_first(&timer_cpus);
1340  			} while (cpu == smp_processor_id() && num_cpus > 1);
1341  
1342  			/* Expiring the timer at `jiffies` means it's the next tick. */
1343  			stack->timer.expires = jiffies;
1344  
1345  			add_timer_on(&stack->timer, cpu);
1346  
1347  			preempt_enable();
1348  		}
1349  		mix_pool_bytes(&stack->entropy, sizeof(stack->entropy));
1350  		schedule();
1351  		stack->entropy = random_get_entropy();
1352  	}
1353  	mix_pool_bytes(&stack->entropy, sizeof(stack->entropy));
1354  
1355  	del_timer_sync(&stack->timer);
1356  	destroy_timer_on_stack(&stack->timer);
1357  }
1358  
1359  
1360  /**********************************************************************
1361   *
1362   * Userspace reader/writer interfaces.
1363   *
1364   * getrandom(2) is the primary modern interface into the RNG and should
1365   * be used in preference to anything else.
1366   *
1367   * Reading from /dev/random has the same functionality as calling
1368   * getrandom(2) with flags=0. In earlier versions, however, it had
1369   * vastly different semantics and should therefore be avoided, to
1370   * prevent backwards compatibility issues.
1371   *
1372   * Reading from /dev/urandom has the same functionality as calling
1373   * getrandom(2) with flags=GRND_INSECURE. Because it does not block
1374   * waiting for the RNG to be ready, it should not be used.
1375   *
1376   * Writing to either /dev/random or /dev/urandom adds entropy to
1377   * the input pool but does not credit it.
1378   *
1379   * Polling on /dev/random indicates when the RNG is initialized, on
1380   * the read side, and when it wants new entropy, on the write side.
1381   *
1382   * Both /dev/random and /dev/urandom have the same set of ioctls for
1383   * adding entropy, getting the entropy count, zeroing the count, and
1384   * reseeding the crng.
1385   *
1386   **********************************************************************/
1387  
SYSCALL_DEFINE3(getrandom,char __user *,ubuf,size_t,len,unsigned int,flags)1388  SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags)
1389  {
1390  	struct iov_iter iter;
1391  	int ret;
1392  
1393  	if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
1394  		return -EINVAL;
1395  
1396  	/*
1397  	 * Requesting insecure and blocking randomness at the same time makes
1398  	 * no sense.
1399  	 */
1400  	if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM))
1401  		return -EINVAL;
1402  
1403  	if (!crng_ready() && !(flags & GRND_INSECURE)) {
1404  		if (flags & GRND_NONBLOCK)
1405  			return -EAGAIN;
1406  		ret = wait_for_random_bytes();
1407  		if (unlikely(ret))
1408  			return ret;
1409  	}
1410  
1411  	ret = import_ubuf(ITER_DEST, ubuf, len, &iter);
1412  	if (unlikely(ret))
1413  		return ret;
1414  	return get_random_bytes_user(&iter);
1415  }
1416  
random_poll(struct file * file,poll_table * wait)1417  static __poll_t random_poll(struct file *file, poll_table *wait)
1418  {
1419  	poll_wait(file, &crng_init_wait, wait);
1420  	return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM;
1421  }
1422  
write_pool_user(struct iov_iter * iter)1423  static ssize_t write_pool_user(struct iov_iter *iter)
1424  {
1425  	u8 block[BLAKE2S_BLOCK_SIZE];
1426  	ssize_t ret = 0;
1427  	size_t copied;
1428  
1429  	if (unlikely(!iov_iter_count(iter)))
1430  		return 0;
1431  
1432  	for (;;) {
1433  		copied = copy_from_iter(block, sizeof(block), iter);
1434  		ret += copied;
1435  		mix_pool_bytes(block, copied);
1436  		if (!iov_iter_count(iter) || copied != sizeof(block))
1437  			break;
1438  
1439  		BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0);
1440  		if (ret % PAGE_SIZE == 0) {
1441  			if (signal_pending(current))
1442  				break;
1443  			cond_resched();
1444  		}
1445  	}
1446  
1447  	memzero_explicit(block, sizeof(block));
1448  	return ret ? ret : -EFAULT;
1449  }
1450  
random_write_iter(struct kiocb * kiocb,struct iov_iter * iter)1451  static ssize_t random_write_iter(struct kiocb *kiocb, struct iov_iter *iter)
1452  {
1453  	return write_pool_user(iter);
1454  }
1455  
urandom_read_iter(struct kiocb * kiocb,struct iov_iter * iter)1456  static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter)
1457  {
1458  	static int maxwarn = 10;
1459  
1460  	/*
1461  	 * Opportunistically attempt to initialize the RNG on platforms that
1462  	 * have fast cycle counters, but don't (for now) require it to succeed.
1463  	 */
1464  	if (!crng_ready())
1465  		try_to_generate_entropy();
1466  
1467  	if (!crng_ready()) {
1468  		if (!ratelimit_disable && maxwarn <= 0)
1469  			++urandom_warning.missed;
1470  		else if (ratelimit_disable || __ratelimit(&urandom_warning)) {
1471  			--maxwarn;
1472  			pr_notice("%s: uninitialized urandom read (%zu bytes read)\n",
1473  				  current->comm, iov_iter_count(iter));
1474  		}
1475  	}
1476  
1477  	return get_random_bytes_user(iter);
1478  }
1479  
random_read_iter(struct kiocb * kiocb,struct iov_iter * iter)1480  static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *iter)
1481  {
1482  	int ret;
1483  
1484  	if (!crng_ready() &&
1485  	    ((kiocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO)) ||
1486  	     (kiocb->ki_filp->f_flags & O_NONBLOCK)))
1487  		return -EAGAIN;
1488  
1489  	ret = wait_for_random_bytes();
1490  	if (ret != 0)
1491  		return ret;
1492  	return get_random_bytes_user(iter);
1493  }
1494  
random_ioctl(struct file * f,unsigned int cmd,unsigned long arg)1495  static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1496  {
1497  	int __user *p = (int __user *)arg;
1498  	int ent_count;
1499  
1500  	switch (cmd) {
1501  	case RNDGETENTCNT:
1502  		/* Inherently racy, no point locking. */
1503  		if (put_user(input_pool.init_bits, p))
1504  			return -EFAULT;
1505  		return 0;
1506  	case RNDADDTOENTCNT:
1507  		if (!capable(CAP_SYS_ADMIN))
1508  			return -EPERM;
1509  		if (get_user(ent_count, p))
1510  			return -EFAULT;
1511  		if (ent_count < 0)
1512  			return -EINVAL;
1513  		credit_init_bits(ent_count);
1514  		return 0;
1515  	case RNDADDENTROPY: {
1516  		struct iov_iter iter;
1517  		ssize_t ret;
1518  		int len;
1519  
1520  		if (!capable(CAP_SYS_ADMIN))
1521  			return -EPERM;
1522  		if (get_user(ent_count, p++))
1523  			return -EFAULT;
1524  		if (ent_count < 0)
1525  			return -EINVAL;
1526  		if (get_user(len, p++))
1527  			return -EFAULT;
1528  		ret = import_ubuf(ITER_SOURCE, p, len, &iter);
1529  		if (unlikely(ret))
1530  			return ret;
1531  		ret = write_pool_user(&iter);
1532  		if (unlikely(ret < 0))
1533  			return ret;
1534  		/* Since we're crediting, enforce that it was all written into the pool. */
1535  		if (unlikely(ret != len))
1536  			return -EFAULT;
1537  		credit_init_bits(ent_count);
1538  		return 0;
1539  	}
1540  	case RNDZAPENTCNT:
1541  	case RNDCLEARPOOL:
1542  		/* No longer has any effect. */
1543  		if (!capable(CAP_SYS_ADMIN))
1544  			return -EPERM;
1545  		return 0;
1546  	case RNDRESEEDCRNG:
1547  		if (!capable(CAP_SYS_ADMIN))
1548  			return -EPERM;
1549  		if (!crng_ready())
1550  			return -ENODATA;
1551  		crng_reseed(NULL);
1552  		return 0;
1553  	default:
1554  		return -EINVAL;
1555  	}
1556  }
1557  
random_fasync(int fd,struct file * filp,int on)1558  static int random_fasync(int fd, struct file *filp, int on)
1559  {
1560  	return fasync_helper(fd, filp, on, &fasync);
1561  }
1562  
1563  const struct file_operations random_fops = {
1564  	.read_iter = random_read_iter,
1565  	.write_iter = random_write_iter,
1566  	.poll = random_poll,
1567  	.unlocked_ioctl = random_ioctl,
1568  	.compat_ioctl = compat_ptr_ioctl,
1569  	.fasync = random_fasync,
1570  	.llseek = noop_llseek,
1571  	.splice_read = copy_splice_read,
1572  	.splice_write = iter_file_splice_write,
1573  };
1574  
1575  const struct file_operations urandom_fops = {
1576  	.read_iter = urandom_read_iter,
1577  	.write_iter = random_write_iter,
1578  	.unlocked_ioctl = random_ioctl,
1579  	.compat_ioctl = compat_ptr_ioctl,
1580  	.fasync = random_fasync,
1581  	.llseek = noop_llseek,
1582  	.splice_read = copy_splice_read,
1583  	.splice_write = iter_file_splice_write,
1584  };
1585  
1586  
1587  /********************************************************************
1588   *
1589   * Sysctl interface.
1590   *
1591   * These are partly unused legacy knobs with dummy values to not break
1592   * userspace and partly still useful things. They are usually accessible
1593   * in /proc/sys/kernel/random/ and are as follows:
1594   *
1595   * - boot_id - a UUID representing the current boot.
1596   *
1597   * - uuid - a random UUID, different each time the file is read.
1598   *
1599   * - poolsize - the number of bits of entropy that the input pool can
1600   *   hold, tied to the POOL_BITS constant.
1601   *
1602   * - entropy_avail - the number of bits of entropy currently in the
1603   *   input pool. Always <= poolsize.
1604   *
1605   * - write_wakeup_threshold - the amount of entropy in the input pool
1606   *   below which write polls to /dev/random will unblock, requesting
1607   *   more entropy, tied to the POOL_READY_BITS constant. It is writable
1608   *   to avoid breaking old userspaces, but writing to it does not
1609   *   change any behavior of the RNG.
1610   *
1611   * - urandom_min_reseed_secs - fixed to the value CRNG_RESEED_INTERVAL.
1612   *   It is writable to avoid breaking old userspaces, but writing
1613   *   to it does not change any behavior of the RNG.
1614   *
1615   ********************************************************************/
1616  
1617  #ifdef CONFIG_SYSCTL
1618  
1619  #include <linux/sysctl.h>
1620  
1621  static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ;
1622  static int sysctl_random_write_wakeup_bits = POOL_READY_BITS;
1623  static int sysctl_poolsize = POOL_BITS;
1624  static u8 sysctl_bootid[UUID_SIZE];
1625  
1626  /*
1627   * This function is used to return both the bootid UUID, and random
1628   * UUID. The difference is in whether table->data is NULL; if it is,
1629   * then a new UUID is generated and returned to the user.
1630   */
proc_do_uuid(const struct ctl_table * table,int write,void * buf,size_t * lenp,loff_t * ppos)1631  static int proc_do_uuid(const struct ctl_table *table, int write, void *buf,
1632  			size_t *lenp, loff_t *ppos)
1633  {
1634  	u8 tmp_uuid[UUID_SIZE], *uuid;
1635  	char uuid_string[UUID_STRING_LEN + 1];
1636  	struct ctl_table fake_table = {
1637  		.data = uuid_string,
1638  		.maxlen = UUID_STRING_LEN
1639  	};
1640  
1641  	if (write)
1642  		return -EPERM;
1643  
1644  	uuid = table->data;
1645  	if (!uuid) {
1646  		uuid = tmp_uuid;
1647  		generate_random_uuid(uuid);
1648  	} else {
1649  		static DEFINE_SPINLOCK(bootid_spinlock);
1650  
1651  		spin_lock(&bootid_spinlock);
1652  		if (!uuid[8])
1653  			generate_random_uuid(uuid);
1654  		spin_unlock(&bootid_spinlock);
1655  	}
1656  
1657  	snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid);
1658  	return proc_dostring(&fake_table, 0, buf, lenp, ppos);
1659  }
1660  
1661  /* The same as proc_dointvec, but writes don't change anything. */
proc_do_rointvec(const struct ctl_table * table,int write,void * buf,size_t * lenp,loff_t * ppos)1662  static int proc_do_rointvec(const struct ctl_table *table, int write, void *buf,
1663  			    size_t *lenp, loff_t *ppos)
1664  {
1665  	return write ? 0 : proc_dointvec(table, 0, buf, lenp, ppos);
1666  }
1667  
1668  static struct ctl_table random_table[] = {
1669  	{
1670  		.procname	= "poolsize",
1671  		.data		= &sysctl_poolsize,
1672  		.maxlen		= sizeof(int),
1673  		.mode		= 0444,
1674  		.proc_handler	= proc_dointvec,
1675  	},
1676  	{
1677  		.procname	= "entropy_avail",
1678  		.data		= &input_pool.init_bits,
1679  		.maxlen		= sizeof(int),
1680  		.mode		= 0444,
1681  		.proc_handler	= proc_dointvec,
1682  	},
1683  	{
1684  		.procname	= "write_wakeup_threshold",
1685  		.data		= &sysctl_random_write_wakeup_bits,
1686  		.maxlen		= sizeof(int),
1687  		.mode		= 0644,
1688  		.proc_handler	= proc_do_rointvec,
1689  	},
1690  	{
1691  		.procname	= "urandom_min_reseed_secs",
1692  		.data		= &sysctl_random_min_urandom_seed,
1693  		.maxlen		= sizeof(int),
1694  		.mode		= 0644,
1695  		.proc_handler	= proc_do_rointvec,
1696  	},
1697  	{
1698  		.procname	= "boot_id",
1699  		.data		= &sysctl_bootid,
1700  		.mode		= 0444,
1701  		.proc_handler	= proc_do_uuid,
1702  	},
1703  	{
1704  		.procname	= "uuid",
1705  		.mode		= 0444,
1706  		.proc_handler	= proc_do_uuid,
1707  	},
1708  };
1709  
1710  /*
1711   * random_init() is called before sysctl_init(),
1712   * so we cannot call register_sysctl_init() in random_init()
1713   */
random_sysctls_init(void)1714  static int __init random_sysctls_init(void)
1715  {
1716  	register_sysctl_init("kernel/random", random_table);
1717  	return 0;
1718  }
1719  device_initcall(random_sysctls_init);
1720  #endif
1721