1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *	linux/kernel/softirq.c
4   *
5   *	Copyright (C) 1992 Linus Torvalds
6   *
7   *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
8   */
9  
10  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11  
12  #include <linux/export.h>
13  #include <linux/kernel_stat.h>
14  #include <linux/interrupt.h>
15  #include <linux/init.h>
16  #include <linux/local_lock.h>
17  #include <linux/mm.h>
18  #include <linux/notifier.h>
19  #include <linux/percpu.h>
20  #include <linux/cpu.h>
21  #include <linux/freezer.h>
22  #include <linux/kthread.h>
23  #include <linux/rcupdate.h>
24  #include <linux/ftrace.h>
25  #include <linux/smp.h>
26  #include <linux/smpboot.h>
27  #include <linux/tick.h>
28  #include <linux/irq.h>
29  #include <linux/wait_bit.h>
30  #include <linux/workqueue.h>
31  
32  #include <asm/softirq_stack.h>
33  
34  #define CREATE_TRACE_POINTS
35  #include <trace/events/irq.h>
36  
37  /*
38     - No shared variables, all the data are CPU local.
39     - If a softirq needs serialization, let it serialize itself
40       by its own spinlocks.
41     - Even if softirq is serialized, only local cpu is marked for
42       execution. Hence, we get something sort of weak cpu binding.
43       Though it is still not clear, will it result in better locality
44       or will not.
45  
46     Examples:
47     - NET RX softirq. It is multithreaded and does not require
48       any global serialization.
49     - NET TX softirq. It kicks software netdevice queues, hence
50       it is logically serialized per device, but this serialization
51       is invisible to common code.
52     - Tasklets: serialized wrt itself.
53   */
54  
55  #ifndef __ARCH_IRQ_STAT
56  DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
57  EXPORT_PER_CPU_SYMBOL(irq_stat);
58  #endif
59  
60  static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
61  
62  DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
63  
64  const char * const softirq_to_name[NR_SOFTIRQS] = {
65  	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
66  	"TASKLET", "SCHED", "HRTIMER", "RCU"
67  };
68  
69  /*
70   * we cannot loop indefinitely here to avoid userspace starvation,
71   * but we also don't want to introduce a worst case 1/HZ latency
72   * to the pending events, so lets the scheduler to balance
73   * the softirq load for us.
74   */
wakeup_softirqd(void)75  static void wakeup_softirqd(void)
76  {
77  	/* Interrupts are disabled: no need to stop preemption */
78  	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
79  
80  	if (tsk)
81  		wake_up_process(tsk);
82  }
83  
84  #ifdef CONFIG_TRACE_IRQFLAGS
85  DEFINE_PER_CPU(int, hardirqs_enabled);
86  DEFINE_PER_CPU(int, hardirq_context);
87  EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
88  EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
89  #endif
90  
91  /*
92   * SOFTIRQ_OFFSET usage:
93   *
94   * On !RT kernels 'count' is the preempt counter, on RT kernels this applies
95   * to a per CPU counter and to task::softirqs_disabled_cnt.
96   *
97   * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq
98   *   processing.
99   *
100   * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
101   *   on local_bh_disable or local_bh_enable.
102   *
103   * This lets us distinguish between whether we are currently processing
104   * softirq and whether we just have bh disabled.
105   */
106  #ifdef CONFIG_PREEMPT_RT
107  
108  /*
109   * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and
110   * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a
111   * softirq disabled section to be preempted.
112   *
113   * The per task counter is used for softirq_count(), in_softirq() and
114   * in_serving_softirqs() because these counts are only valid when the task
115   * holding softirq_ctrl::lock is running.
116   *
117   * The per CPU counter prevents pointless wakeups of ksoftirqd in case that
118   * the task which is in a softirq disabled section is preempted or blocks.
119   */
120  struct softirq_ctrl {
121  	local_lock_t	lock;
122  	int		cnt;
123  };
124  
125  static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = {
126  	.lock	= INIT_LOCAL_LOCK(softirq_ctrl.lock),
127  };
128  
129  /**
130   * local_bh_blocked() - Check for idle whether BH processing is blocked
131   *
132   * Returns false if the per CPU softirq::cnt is 0 otherwise true.
133   *
134   * This is invoked from the idle task to guard against false positive
135   * softirq pending warnings, which would happen when the task which holds
136   * softirq_ctrl::lock was the only running task on the CPU and blocks on
137   * some other lock.
138   */
local_bh_blocked(void)139  bool local_bh_blocked(void)
140  {
141  	return __this_cpu_read(softirq_ctrl.cnt) != 0;
142  }
143  
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)144  void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
145  {
146  	unsigned long flags;
147  	int newcnt;
148  
149  	WARN_ON_ONCE(in_hardirq());
150  
151  	/* First entry of a task into a BH disabled section? */
152  	if (!current->softirq_disable_cnt) {
153  		if (preemptible()) {
154  			local_lock(&softirq_ctrl.lock);
155  			/* Required to meet the RCU bottomhalf requirements. */
156  			rcu_read_lock();
157  		} else {
158  			DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
159  		}
160  	}
161  
162  	/*
163  	 * Track the per CPU softirq disabled state. On RT this is per CPU
164  	 * state to allow preemption of bottom half disabled sections.
165  	 */
166  	newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
167  	/*
168  	 * Reflect the result in the task state to prevent recursion on the
169  	 * local lock and to make softirq_count() & al work.
170  	 */
171  	current->softirq_disable_cnt = newcnt;
172  
173  	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
174  		raw_local_irq_save(flags);
175  		lockdep_softirqs_off(ip);
176  		raw_local_irq_restore(flags);
177  	}
178  }
179  EXPORT_SYMBOL(__local_bh_disable_ip);
180  
__local_bh_enable(unsigned int cnt,bool unlock)181  static void __local_bh_enable(unsigned int cnt, bool unlock)
182  {
183  	unsigned long flags;
184  	int newcnt;
185  
186  	DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
187  			    this_cpu_read(softirq_ctrl.cnt));
188  
189  	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
190  		raw_local_irq_save(flags);
191  		lockdep_softirqs_on(_RET_IP_);
192  		raw_local_irq_restore(flags);
193  	}
194  
195  	newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
196  	current->softirq_disable_cnt = newcnt;
197  
198  	if (!newcnt && unlock) {
199  		rcu_read_unlock();
200  		local_unlock(&softirq_ctrl.lock);
201  	}
202  }
203  
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)204  void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
205  {
206  	bool preempt_on = preemptible();
207  	unsigned long flags;
208  	u32 pending;
209  	int curcnt;
210  
211  	WARN_ON_ONCE(in_hardirq());
212  	lockdep_assert_irqs_enabled();
213  
214  	local_irq_save(flags);
215  	curcnt = __this_cpu_read(softirq_ctrl.cnt);
216  
217  	/*
218  	 * If this is not reenabling soft interrupts, no point in trying to
219  	 * run pending ones.
220  	 */
221  	if (curcnt != cnt)
222  		goto out;
223  
224  	pending = local_softirq_pending();
225  	if (!pending)
226  		goto out;
227  
228  	/*
229  	 * If this was called from non preemptible context, wake up the
230  	 * softirq daemon.
231  	 */
232  	if (!preempt_on) {
233  		wakeup_softirqd();
234  		goto out;
235  	}
236  
237  	/*
238  	 * Adjust softirq count to SOFTIRQ_OFFSET which makes
239  	 * in_serving_softirq() become true.
240  	 */
241  	cnt = SOFTIRQ_OFFSET;
242  	__local_bh_enable(cnt, false);
243  	__do_softirq();
244  
245  out:
246  	__local_bh_enable(cnt, preempt_on);
247  	local_irq_restore(flags);
248  }
249  EXPORT_SYMBOL(__local_bh_enable_ip);
250  
251  /*
252   * Invoked from ksoftirqd_run() outside of the interrupt disabled section
253   * to acquire the per CPU local lock for reentrancy protection.
254   */
ksoftirqd_run_begin(void)255  static inline void ksoftirqd_run_begin(void)
256  {
257  	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
258  	local_irq_disable();
259  }
260  
261  /* Counterpart to ksoftirqd_run_begin() */
ksoftirqd_run_end(void)262  static inline void ksoftirqd_run_end(void)
263  {
264  	__local_bh_enable(SOFTIRQ_OFFSET, true);
265  	WARN_ON_ONCE(in_interrupt());
266  	local_irq_enable();
267  }
268  
softirq_handle_begin(void)269  static inline void softirq_handle_begin(void) { }
softirq_handle_end(void)270  static inline void softirq_handle_end(void) { }
271  
should_wake_ksoftirqd(void)272  static inline bool should_wake_ksoftirqd(void)
273  {
274  	return !this_cpu_read(softirq_ctrl.cnt);
275  }
276  
invoke_softirq(void)277  static inline void invoke_softirq(void)
278  {
279  	if (should_wake_ksoftirqd())
280  		wakeup_softirqd();
281  }
282  
283  /*
284   * flush_smp_call_function_queue() can raise a soft interrupt in a function
285   * call. On RT kernels this is undesired and the only known functionality
286   * in the block layer which does this is disabled on RT. If soft interrupts
287   * get raised which haven't been raised before the flush, warn so it can be
288   * investigated.
289   */
do_softirq_post_smp_call_flush(unsigned int was_pending)290  void do_softirq_post_smp_call_flush(unsigned int was_pending)
291  {
292  	if (WARN_ON_ONCE(was_pending != local_softirq_pending()))
293  		invoke_softirq();
294  }
295  
296  #else /* CONFIG_PREEMPT_RT */
297  
298  /*
299   * This one is for softirq.c-internal use, where hardirqs are disabled
300   * legitimately:
301   */
302  #ifdef CONFIG_TRACE_IRQFLAGS
__local_bh_disable_ip(unsigned long ip,unsigned int cnt)303  void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
304  {
305  	unsigned long flags;
306  
307  	WARN_ON_ONCE(in_hardirq());
308  
309  	raw_local_irq_save(flags);
310  	/*
311  	 * The preempt tracer hooks into preempt_count_add and will break
312  	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
313  	 * is set and before current->softirq_enabled is cleared.
314  	 * We must manually increment preempt_count here and manually
315  	 * call the trace_preempt_off later.
316  	 */
317  	__preempt_count_add(cnt);
318  	/*
319  	 * Were softirqs turned off above:
320  	 */
321  	if (softirq_count() == (cnt & SOFTIRQ_MASK))
322  		lockdep_softirqs_off(ip);
323  	raw_local_irq_restore(flags);
324  
325  	if (preempt_count() == cnt) {
326  #ifdef CONFIG_DEBUG_PREEMPT
327  		current->preempt_disable_ip = get_lock_parent_ip();
328  #endif
329  		trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
330  	}
331  }
332  EXPORT_SYMBOL(__local_bh_disable_ip);
333  #endif /* CONFIG_TRACE_IRQFLAGS */
334  
__local_bh_enable(unsigned int cnt)335  static void __local_bh_enable(unsigned int cnt)
336  {
337  	lockdep_assert_irqs_disabled();
338  
339  	if (preempt_count() == cnt)
340  		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
341  
342  	if (softirq_count() == (cnt & SOFTIRQ_MASK))
343  		lockdep_softirqs_on(_RET_IP_);
344  
345  	__preempt_count_sub(cnt);
346  }
347  
348  /*
349   * Special-case - softirqs can safely be enabled by __do_softirq(),
350   * without processing still-pending softirqs:
351   */
_local_bh_enable(void)352  void _local_bh_enable(void)
353  {
354  	WARN_ON_ONCE(in_hardirq());
355  	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
356  }
357  EXPORT_SYMBOL(_local_bh_enable);
358  
__local_bh_enable_ip(unsigned long ip,unsigned int cnt)359  void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
360  {
361  	WARN_ON_ONCE(in_hardirq());
362  	lockdep_assert_irqs_enabled();
363  #ifdef CONFIG_TRACE_IRQFLAGS
364  	local_irq_disable();
365  #endif
366  	/*
367  	 * Are softirqs going to be turned on now:
368  	 */
369  	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
370  		lockdep_softirqs_on(ip);
371  	/*
372  	 * Keep preemption disabled until we are done with
373  	 * softirq processing:
374  	 */
375  	__preempt_count_sub(cnt - 1);
376  
377  	if (unlikely(!in_interrupt() && local_softirq_pending())) {
378  		/*
379  		 * Run softirq if any pending. And do it in its own stack
380  		 * as we may be calling this deep in a task call stack already.
381  		 */
382  		do_softirq();
383  	}
384  
385  	preempt_count_dec();
386  #ifdef CONFIG_TRACE_IRQFLAGS
387  	local_irq_enable();
388  #endif
389  	preempt_check_resched();
390  }
391  EXPORT_SYMBOL(__local_bh_enable_ip);
392  
softirq_handle_begin(void)393  static inline void softirq_handle_begin(void)
394  {
395  	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
396  }
397  
softirq_handle_end(void)398  static inline void softirq_handle_end(void)
399  {
400  	__local_bh_enable(SOFTIRQ_OFFSET);
401  	WARN_ON_ONCE(in_interrupt());
402  }
403  
ksoftirqd_run_begin(void)404  static inline void ksoftirqd_run_begin(void)
405  {
406  	local_irq_disable();
407  }
408  
ksoftirqd_run_end(void)409  static inline void ksoftirqd_run_end(void)
410  {
411  	local_irq_enable();
412  }
413  
should_wake_ksoftirqd(void)414  static inline bool should_wake_ksoftirqd(void)
415  {
416  	return true;
417  }
418  
invoke_softirq(void)419  static inline void invoke_softirq(void)
420  {
421  	if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
422  #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
423  		/*
424  		 * We can safely execute softirq on the current stack if
425  		 * it is the irq stack, because it should be near empty
426  		 * at this stage.
427  		 */
428  		__do_softirq();
429  #else
430  		/*
431  		 * Otherwise, irq_exit() is called on the task stack that can
432  		 * be potentially deep already. So call softirq in its own stack
433  		 * to prevent from any overrun.
434  		 */
435  		do_softirq_own_stack();
436  #endif
437  	} else {
438  		wakeup_softirqd();
439  	}
440  }
441  
do_softirq(void)442  asmlinkage __visible void do_softirq(void)
443  {
444  	__u32 pending;
445  	unsigned long flags;
446  
447  	if (in_interrupt())
448  		return;
449  
450  	local_irq_save(flags);
451  
452  	pending = local_softirq_pending();
453  
454  	if (pending)
455  		do_softirq_own_stack();
456  
457  	local_irq_restore(flags);
458  }
459  
460  #endif /* !CONFIG_PREEMPT_RT */
461  
462  /*
463   * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
464   * but break the loop if need_resched() is set or after 2 ms.
465   * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
466   * certain cases, such as stop_machine(), jiffies may cease to
467   * increment and so we need the MAX_SOFTIRQ_RESTART limit as
468   * well to make sure we eventually return from this method.
469   *
470   * These limits have been established via experimentation.
471   * The two things to balance is latency against fairness -
472   * we want to handle softirqs as soon as possible, but they
473   * should not be able to lock up the box.
474   */
475  #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
476  #define MAX_SOFTIRQ_RESTART 10
477  
478  #ifdef CONFIG_TRACE_IRQFLAGS
479  /*
480   * When we run softirqs from irq_exit() and thus on the hardirq stack we need
481   * to keep the lockdep irq context tracking as tight as possible in order to
482   * not miss-qualify lock contexts and miss possible deadlocks.
483   */
484  
lockdep_softirq_start(void)485  static inline bool lockdep_softirq_start(void)
486  {
487  	bool in_hardirq = false;
488  
489  	if (lockdep_hardirq_context()) {
490  		in_hardirq = true;
491  		lockdep_hardirq_exit();
492  	}
493  
494  	lockdep_softirq_enter();
495  
496  	return in_hardirq;
497  }
498  
lockdep_softirq_end(bool in_hardirq)499  static inline void lockdep_softirq_end(bool in_hardirq)
500  {
501  	lockdep_softirq_exit();
502  
503  	if (in_hardirq)
504  		lockdep_hardirq_enter();
505  }
506  #else
lockdep_softirq_start(void)507  static inline bool lockdep_softirq_start(void) { return false; }
lockdep_softirq_end(bool in_hardirq)508  static inline void lockdep_softirq_end(bool in_hardirq) { }
509  #endif
510  
handle_softirqs(bool ksirqd)511  static void handle_softirqs(bool ksirqd)
512  {
513  	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
514  	unsigned long old_flags = current->flags;
515  	int max_restart = MAX_SOFTIRQ_RESTART;
516  	struct softirq_action *h;
517  	bool in_hardirq;
518  	__u32 pending;
519  	int softirq_bit;
520  
521  	/*
522  	 * Mask out PF_MEMALLOC as the current task context is borrowed for the
523  	 * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
524  	 * again if the socket is related to swapping.
525  	 */
526  	current->flags &= ~PF_MEMALLOC;
527  
528  	pending = local_softirq_pending();
529  
530  	softirq_handle_begin();
531  	in_hardirq = lockdep_softirq_start();
532  	account_softirq_enter(current);
533  
534  restart:
535  	/* Reset the pending bitmask before enabling irqs */
536  	set_softirq_pending(0);
537  
538  	local_irq_enable();
539  
540  	h = softirq_vec;
541  
542  	while ((softirq_bit = ffs(pending))) {
543  		unsigned int vec_nr;
544  		int prev_count;
545  
546  		h += softirq_bit - 1;
547  
548  		vec_nr = h - softirq_vec;
549  		prev_count = preempt_count();
550  
551  		kstat_incr_softirqs_this_cpu(vec_nr);
552  
553  		trace_softirq_entry(vec_nr);
554  		h->action();
555  		trace_softirq_exit(vec_nr);
556  		if (unlikely(prev_count != preempt_count())) {
557  			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
558  			       vec_nr, softirq_to_name[vec_nr], h->action,
559  			       prev_count, preempt_count());
560  			preempt_count_set(prev_count);
561  		}
562  		h++;
563  		pending >>= softirq_bit;
564  	}
565  
566  	if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd)
567  		rcu_softirq_qs();
568  
569  	local_irq_disable();
570  
571  	pending = local_softirq_pending();
572  	if (pending) {
573  		if (time_before(jiffies, end) && !need_resched() &&
574  		    --max_restart)
575  			goto restart;
576  
577  		wakeup_softirqd();
578  	}
579  
580  	account_softirq_exit(current);
581  	lockdep_softirq_end(in_hardirq);
582  	softirq_handle_end();
583  	current_restore_flags(old_flags, PF_MEMALLOC);
584  }
585  
__do_softirq(void)586  asmlinkage __visible void __softirq_entry __do_softirq(void)
587  {
588  	handle_softirqs(false);
589  }
590  
591  /**
592   * irq_enter_rcu - Enter an interrupt context with RCU watching
593   */
irq_enter_rcu(void)594  void irq_enter_rcu(void)
595  {
596  	__irq_enter_raw();
597  
598  	if (tick_nohz_full_cpu(smp_processor_id()) ||
599  	    (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)))
600  		tick_irq_enter();
601  
602  	account_hardirq_enter(current);
603  }
604  
605  /**
606   * irq_enter - Enter an interrupt context including RCU update
607   */
irq_enter(void)608  void irq_enter(void)
609  {
610  	ct_irq_enter();
611  	irq_enter_rcu();
612  }
613  
tick_irq_exit(void)614  static inline void tick_irq_exit(void)
615  {
616  #ifdef CONFIG_NO_HZ_COMMON
617  	int cpu = smp_processor_id();
618  
619  	/* Make sure that timer wheel updates are propagated */
620  	if ((sched_core_idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
621  		if (!in_hardirq())
622  			tick_nohz_irq_exit();
623  	}
624  #endif
625  }
626  
__irq_exit_rcu(void)627  static inline void __irq_exit_rcu(void)
628  {
629  #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
630  	local_irq_disable();
631  #else
632  	lockdep_assert_irqs_disabled();
633  #endif
634  	account_hardirq_exit(current);
635  	preempt_count_sub(HARDIRQ_OFFSET);
636  	if (!in_interrupt() && local_softirq_pending())
637  		invoke_softirq();
638  
639  	tick_irq_exit();
640  }
641  
642  /**
643   * irq_exit_rcu() - Exit an interrupt context without updating RCU
644   *
645   * Also processes softirqs if needed and possible.
646   */
irq_exit_rcu(void)647  void irq_exit_rcu(void)
648  {
649  	__irq_exit_rcu();
650  	 /* must be last! */
651  	lockdep_hardirq_exit();
652  }
653  
654  /**
655   * irq_exit - Exit an interrupt context, update RCU and lockdep
656   *
657   * Also processes softirqs if needed and possible.
658   */
irq_exit(void)659  void irq_exit(void)
660  {
661  	__irq_exit_rcu();
662  	ct_irq_exit();
663  	 /* must be last! */
664  	lockdep_hardirq_exit();
665  }
666  
667  /*
668   * This function must run with irqs disabled!
669   */
raise_softirq_irqoff(unsigned int nr)670  inline void raise_softirq_irqoff(unsigned int nr)
671  {
672  	__raise_softirq_irqoff(nr);
673  
674  	/*
675  	 * If we're in an interrupt or softirq, we're done
676  	 * (this also catches softirq-disabled code). We will
677  	 * actually run the softirq once we return from
678  	 * the irq or softirq.
679  	 *
680  	 * Otherwise we wake up ksoftirqd to make sure we
681  	 * schedule the softirq soon.
682  	 */
683  	if (!in_interrupt() && should_wake_ksoftirqd())
684  		wakeup_softirqd();
685  }
686  
raise_softirq(unsigned int nr)687  void raise_softirq(unsigned int nr)
688  {
689  	unsigned long flags;
690  
691  	local_irq_save(flags);
692  	raise_softirq_irqoff(nr);
693  	local_irq_restore(flags);
694  }
695  
__raise_softirq_irqoff(unsigned int nr)696  void __raise_softirq_irqoff(unsigned int nr)
697  {
698  	lockdep_assert_irqs_disabled();
699  	trace_softirq_raise(nr);
700  	or_softirq_pending(1UL << nr);
701  }
702  
open_softirq(int nr,void (* action)(void))703  void open_softirq(int nr, void (*action)(void))
704  {
705  	softirq_vec[nr].action = action;
706  }
707  
708  /*
709   * Tasklets
710   */
711  struct tasklet_head {
712  	struct tasklet_struct *head;
713  	struct tasklet_struct **tail;
714  };
715  
716  static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
717  static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
718  
__tasklet_schedule_common(struct tasklet_struct * t,struct tasklet_head __percpu * headp,unsigned int softirq_nr)719  static void __tasklet_schedule_common(struct tasklet_struct *t,
720  				      struct tasklet_head __percpu *headp,
721  				      unsigned int softirq_nr)
722  {
723  	struct tasklet_head *head;
724  	unsigned long flags;
725  
726  	local_irq_save(flags);
727  	head = this_cpu_ptr(headp);
728  	t->next = NULL;
729  	*head->tail = t;
730  	head->tail = &(t->next);
731  	raise_softirq_irqoff(softirq_nr);
732  	local_irq_restore(flags);
733  }
734  
__tasklet_schedule(struct tasklet_struct * t)735  void __tasklet_schedule(struct tasklet_struct *t)
736  {
737  	__tasklet_schedule_common(t, &tasklet_vec,
738  				  TASKLET_SOFTIRQ);
739  }
740  EXPORT_SYMBOL(__tasklet_schedule);
741  
__tasklet_hi_schedule(struct tasklet_struct * t)742  void __tasklet_hi_schedule(struct tasklet_struct *t)
743  {
744  	__tasklet_schedule_common(t, &tasklet_hi_vec,
745  				  HI_SOFTIRQ);
746  }
747  EXPORT_SYMBOL(__tasklet_hi_schedule);
748  
tasklet_clear_sched(struct tasklet_struct * t)749  static bool tasklet_clear_sched(struct tasklet_struct *t)
750  {
751  	if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) {
752  		wake_up_var(&t->state);
753  		return true;
754  	}
755  
756  	WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n",
757  		  t->use_callback ? "callback" : "func",
758  		  t->use_callback ? (void *)t->callback : (void *)t->func);
759  
760  	return false;
761  }
762  
tasklet_action_common(struct tasklet_head * tl_head,unsigned int softirq_nr)763  static void tasklet_action_common(struct tasklet_head *tl_head,
764  				  unsigned int softirq_nr)
765  {
766  	struct tasklet_struct *list;
767  
768  	local_irq_disable();
769  	list = tl_head->head;
770  	tl_head->head = NULL;
771  	tl_head->tail = &tl_head->head;
772  	local_irq_enable();
773  
774  	while (list) {
775  		struct tasklet_struct *t = list;
776  
777  		list = list->next;
778  
779  		if (tasklet_trylock(t)) {
780  			if (!atomic_read(&t->count)) {
781  				if (tasklet_clear_sched(t)) {
782  					if (t->use_callback) {
783  						trace_tasklet_entry(t, t->callback);
784  						t->callback(t);
785  						trace_tasklet_exit(t, t->callback);
786  					} else {
787  						trace_tasklet_entry(t, t->func);
788  						t->func(t->data);
789  						trace_tasklet_exit(t, t->func);
790  					}
791  				}
792  				tasklet_unlock(t);
793  				continue;
794  			}
795  			tasklet_unlock(t);
796  		}
797  
798  		local_irq_disable();
799  		t->next = NULL;
800  		*tl_head->tail = t;
801  		tl_head->tail = &t->next;
802  		__raise_softirq_irqoff(softirq_nr);
803  		local_irq_enable();
804  	}
805  }
806  
tasklet_action(void)807  static __latent_entropy void tasklet_action(void)
808  {
809  	workqueue_softirq_action(false);
810  	tasklet_action_common(this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
811  }
812  
tasklet_hi_action(void)813  static __latent_entropy void tasklet_hi_action(void)
814  {
815  	workqueue_softirq_action(true);
816  	tasklet_action_common(this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
817  }
818  
tasklet_setup(struct tasklet_struct * t,void (* callback)(struct tasklet_struct *))819  void tasklet_setup(struct tasklet_struct *t,
820  		   void (*callback)(struct tasklet_struct *))
821  {
822  	t->next = NULL;
823  	t->state = 0;
824  	atomic_set(&t->count, 0);
825  	t->callback = callback;
826  	t->use_callback = true;
827  	t->data = 0;
828  }
829  EXPORT_SYMBOL(tasklet_setup);
830  
tasklet_init(struct tasklet_struct * t,void (* func)(unsigned long),unsigned long data)831  void tasklet_init(struct tasklet_struct *t,
832  		  void (*func)(unsigned long), unsigned long data)
833  {
834  	t->next = NULL;
835  	t->state = 0;
836  	atomic_set(&t->count, 0);
837  	t->func = func;
838  	t->use_callback = false;
839  	t->data = data;
840  }
841  EXPORT_SYMBOL(tasklet_init);
842  
843  #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
844  /*
845   * Do not use in new code. Waiting for tasklets from atomic contexts is
846   * error prone and should be avoided.
847   */
tasklet_unlock_spin_wait(struct tasklet_struct * t)848  void tasklet_unlock_spin_wait(struct tasklet_struct *t)
849  {
850  	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
851  		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
852  			/*
853  			 * Prevent a live lock when current preempted soft
854  			 * interrupt processing or prevents ksoftirqd from
855  			 * running. If the tasklet runs on a different CPU
856  			 * then this has no effect other than doing the BH
857  			 * disable/enable dance for nothing.
858  			 */
859  			local_bh_disable();
860  			local_bh_enable();
861  		} else {
862  			cpu_relax();
863  		}
864  	}
865  }
866  EXPORT_SYMBOL(tasklet_unlock_spin_wait);
867  #endif
868  
tasklet_kill(struct tasklet_struct * t)869  void tasklet_kill(struct tasklet_struct *t)
870  {
871  	if (in_interrupt())
872  		pr_notice("Attempt to kill tasklet from interrupt\n");
873  
874  	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
875  		wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state));
876  
877  	tasklet_unlock_wait(t);
878  	tasklet_clear_sched(t);
879  }
880  EXPORT_SYMBOL(tasklet_kill);
881  
882  #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
tasklet_unlock(struct tasklet_struct * t)883  void tasklet_unlock(struct tasklet_struct *t)
884  {
885  	smp_mb__before_atomic();
886  	clear_bit(TASKLET_STATE_RUN, &t->state);
887  	smp_mb__after_atomic();
888  	wake_up_var(&t->state);
889  }
890  EXPORT_SYMBOL_GPL(tasklet_unlock);
891  
tasklet_unlock_wait(struct tasklet_struct * t)892  void tasklet_unlock_wait(struct tasklet_struct *t)
893  {
894  	wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state));
895  }
896  EXPORT_SYMBOL_GPL(tasklet_unlock_wait);
897  #endif
898  
softirq_init(void)899  void __init softirq_init(void)
900  {
901  	int cpu;
902  
903  	for_each_possible_cpu(cpu) {
904  		per_cpu(tasklet_vec, cpu).tail =
905  			&per_cpu(tasklet_vec, cpu).head;
906  		per_cpu(tasklet_hi_vec, cpu).tail =
907  			&per_cpu(tasklet_hi_vec, cpu).head;
908  	}
909  
910  	open_softirq(TASKLET_SOFTIRQ, tasklet_action);
911  	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
912  }
913  
ksoftirqd_should_run(unsigned int cpu)914  static int ksoftirqd_should_run(unsigned int cpu)
915  {
916  	return local_softirq_pending();
917  }
918  
run_ksoftirqd(unsigned int cpu)919  static void run_ksoftirqd(unsigned int cpu)
920  {
921  	ksoftirqd_run_begin();
922  	if (local_softirq_pending()) {
923  		/*
924  		 * We can safely run softirq on inline stack, as we are not deep
925  		 * in the task stack here.
926  		 */
927  		handle_softirqs(true);
928  		ksoftirqd_run_end();
929  		cond_resched();
930  		return;
931  	}
932  	ksoftirqd_run_end();
933  }
934  
935  #ifdef CONFIG_HOTPLUG_CPU
takeover_tasklets(unsigned int cpu)936  static int takeover_tasklets(unsigned int cpu)
937  {
938  	workqueue_softirq_dead(cpu);
939  
940  	/* CPU is dead, so no lock needed. */
941  	local_irq_disable();
942  
943  	/* Find end, append list for that CPU. */
944  	if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
945  		*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
946  		__this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
947  		per_cpu(tasklet_vec, cpu).head = NULL;
948  		per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
949  	}
950  	raise_softirq_irqoff(TASKLET_SOFTIRQ);
951  
952  	if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
953  		*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
954  		__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
955  		per_cpu(tasklet_hi_vec, cpu).head = NULL;
956  		per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
957  	}
958  	raise_softirq_irqoff(HI_SOFTIRQ);
959  
960  	local_irq_enable();
961  	return 0;
962  }
963  #else
964  #define takeover_tasklets	NULL
965  #endif /* CONFIG_HOTPLUG_CPU */
966  
967  static struct smp_hotplug_thread softirq_threads = {
968  	.store			= &ksoftirqd,
969  	.thread_should_run	= ksoftirqd_should_run,
970  	.thread_fn		= run_ksoftirqd,
971  	.thread_comm		= "ksoftirqd/%u",
972  };
973  
spawn_ksoftirqd(void)974  static __init int spawn_ksoftirqd(void)
975  {
976  	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
977  				  takeover_tasklets);
978  	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
979  
980  	return 0;
981  }
982  early_initcall(spawn_ksoftirqd);
983  
984  /*
985   * [ These __weak aliases are kept in a separate compilation unit, so that
986   *   GCC does not inline them incorrectly. ]
987   */
988  
early_irq_init(void)989  int __init __weak early_irq_init(void)
990  {
991  	return 0;
992  }
993  
arch_probe_nr_irqs(void)994  int __init __weak arch_probe_nr_irqs(void)
995  {
996  	return NR_IRQS_LEGACY;
997  }
998  
arch_early_irq_init(void)999  int __init __weak arch_early_irq_init(void)
1000  {
1001  	return 0;
1002  }
1003  
arch_dynirq_lower_bound(unsigned int from)1004  unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
1005  {
1006  	return from;
1007  }
1008