1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
4   *
5   * Provides a framework for enqueueing and running callbacks from hardirq
6   * context. The enqueueing is NMI-safe.
7   */
8  
9  #include <linux/bug.h>
10  #include <linux/kernel.h>
11  #include <linux/export.h>
12  #include <linux/irq_work.h>
13  #include <linux/percpu.h>
14  #include <linux/hardirq.h>
15  #include <linux/irqflags.h>
16  #include <linux/sched.h>
17  #include <linux/tick.h>
18  #include <linux/cpu.h>
19  #include <linux/notifier.h>
20  #include <linux/smp.h>
21  #include <linux/smpboot.h>
22  #include <asm/processor.h>
23  #include <linux/kasan.h>
24  
25  #include <trace/events/ipi.h>
26  
27  static DEFINE_PER_CPU(struct llist_head, raised_list);
28  static DEFINE_PER_CPU(struct llist_head, lazy_list);
29  static DEFINE_PER_CPU(struct task_struct *, irq_workd);
30  
wake_irq_workd(void)31  static void wake_irq_workd(void)
32  {
33  	struct task_struct *tsk = __this_cpu_read(irq_workd);
34  
35  	if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
36  		wake_up_process(tsk);
37  }
38  
39  #ifdef CONFIG_SMP
irq_work_wake(struct irq_work * entry)40  static void irq_work_wake(struct irq_work *entry)
41  {
42  	wake_irq_workd();
43  }
44  
45  static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
46  	IRQ_WORK_INIT_HARD(irq_work_wake);
47  #endif
48  
irq_workd_should_run(unsigned int cpu)49  static int irq_workd_should_run(unsigned int cpu)
50  {
51  	return !llist_empty(this_cpu_ptr(&lazy_list));
52  }
53  
54  /*
55   * Claim the entry so that no one else will poke at it.
56   */
irq_work_claim(struct irq_work * work)57  static bool irq_work_claim(struct irq_work *work)
58  {
59  	int oflags;
60  
61  	oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->node.a_flags);
62  	/*
63  	 * If the work is already pending, no need to raise the IPI.
64  	 * The pairing smp_mb() in irq_work_single() makes sure
65  	 * everything we did before is visible.
66  	 */
67  	if (oflags & IRQ_WORK_PENDING)
68  		return false;
69  	return true;
70  }
71  
arch_irq_work_raise(void)72  void __weak arch_irq_work_raise(void)
73  {
74  	/*
75  	 * Lame architectures will get the timer tick callback
76  	 */
77  }
78  
irq_work_raise(struct irq_work * work)79  static __always_inline void irq_work_raise(struct irq_work *work)
80  {
81  	if (trace_ipi_send_cpu_enabled() && arch_irq_work_has_interrupt())
82  		trace_ipi_send_cpu(smp_processor_id(), _RET_IP_, work->func);
83  
84  	arch_irq_work_raise();
85  }
86  
87  /* Enqueue on current CPU, work must already be claimed and preempt disabled */
__irq_work_queue_local(struct irq_work * work)88  static void __irq_work_queue_local(struct irq_work *work)
89  {
90  	struct llist_head *list;
91  	bool rt_lazy_work = false;
92  	bool lazy_work = false;
93  	int work_flags;
94  
95  	work_flags = atomic_read(&work->node.a_flags);
96  	if (work_flags & IRQ_WORK_LAZY)
97  		lazy_work = true;
98  	else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
99  		 !(work_flags & IRQ_WORK_HARD_IRQ))
100  		rt_lazy_work = true;
101  
102  	if (lazy_work || rt_lazy_work)
103  		list = this_cpu_ptr(&lazy_list);
104  	else
105  		list = this_cpu_ptr(&raised_list);
106  
107  	if (!llist_add(&work->node.llist, list))
108  		return;
109  
110  	/* If the work is "lazy", handle it from next tick if any */
111  	if (!lazy_work || tick_nohz_tick_stopped())
112  		irq_work_raise(work);
113  }
114  
115  /* Enqueue the irq work @work on the current CPU */
irq_work_queue(struct irq_work * work)116  bool irq_work_queue(struct irq_work *work)
117  {
118  	/* Only queue if not already pending */
119  	if (!irq_work_claim(work))
120  		return false;
121  
122  	/* Queue the entry and raise the IPI if needed. */
123  	preempt_disable();
124  	__irq_work_queue_local(work);
125  	preempt_enable();
126  
127  	return true;
128  }
129  EXPORT_SYMBOL_GPL(irq_work_queue);
130  
131  /*
132   * Enqueue the irq_work @work on @cpu unless it's already pending
133   * somewhere.
134   *
135   * Can be re-enqueued while the callback is still in progress.
136   */
irq_work_queue_on(struct irq_work * work,int cpu)137  bool irq_work_queue_on(struct irq_work *work, int cpu)
138  {
139  #ifndef CONFIG_SMP
140  	return irq_work_queue(work);
141  
142  #else /* CONFIG_SMP: */
143  	/* All work should have been flushed before going offline */
144  	WARN_ON_ONCE(cpu_is_offline(cpu));
145  
146  	/* Only queue if not already pending */
147  	if (!irq_work_claim(work))
148  		return false;
149  
150  	kasan_record_aux_stack_noalloc(work);
151  
152  	preempt_disable();
153  	if (cpu != smp_processor_id()) {
154  		/* Arch remote IPI send/receive backend aren't NMI safe */
155  		WARN_ON_ONCE(in_nmi());
156  
157  		/*
158  		 * On PREEMPT_RT the items which are not marked as
159  		 * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
160  		 * item is used on the remote CPU to wake the thread.
161  		 */
162  		if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
163  		    !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
164  
165  			if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
166  				goto out;
167  
168  			work = &per_cpu(irq_work_wakeup, cpu);
169  			if (!irq_work_claim(work))
170  				goto out;
171  		}
172  
173  		__smp_call_single_queue(cpu, &work->node.llist);
174  	} else {
175  		__irq_work_queue_local(work);
176  	}
177  out:
178  	preempt_enable();
179  
180  	return true;
181  #endif /* CONFIG_SMP */
182  }
183  
irq_work_needs_cpu(void)184  bool irq_work_needs_cpu(void)
185  {
186  	struct llist_head *raised, *lazy;
187  
188  	raised = this_cpu_ptr(&raised_list);
189  	lazy = this_cpu_ptr(&lazy_list);
190  
191  	if (llist_empty(raised) || arch_irq_work_has_interrupt())
192  		if (llist_empty(lazy))
193  			return false;
194  
195  	/* All work should have been flushed before going offline */
196  	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
197  
198  	return true;
199  }
200  
irq_work_single(void * arg)201  void irq_work_single(void *arg)
202  {
203  	struct irq_work *work = arg;
204  	int flags;
205  
206  	/*
207  	 * Clear the PENDING bit, after this point the @work can be re-used.
208  	 * The PENDING bit acts as a lock, and we own it, so we can clear it
209  	 * without atomic ops.
210  	 */
211  	flags = atomic_read(&work->node.a_flags);
212  	flags &= ~IRQ_WORK_PENDING;
213  	atomic_set(&work->node.a_flags, flags);
214  
215  	/*
216  	 * See irq_work_claim().
217  	 */
218  	smp_mb();
219  
220  	lockdep_irq_work_enter(flags);
221  	work->func(work);
222  	lockdep_irq_work_exit(flags);
223  
224  	/*
225  	 * Clear the BUSY bit, if set, and return to the free state if no-one
226  	 * else claimed it meanwhile.
227  	 */
228  	(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
229  
230  	if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
231  	    !arch_irq_work_has_interrupt())
232  		rcuwait_wake_up(&work->irqwait);
233  }
234  
irq_work_run_list(struct llist_head * list)235  static void irq_work_run_list(struct llist_head *list)
236  {
237  	struct irq_work *work, *tmp;
238  	struct llist_node *llnode;
239  
240  	/*
241  	 * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
242  	 * in a per-CPU thread in preemptible context. Only the items which are
243  	 * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
244  	 */
245  	BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));
246  
247  	if (llist_empty(list))
248  		return;
249  
250  	llnode = llist_del_all(list);
251  	llist_for_each_entry_safe(work, tmp, llnode, node.llist)
252  		irq_work_single(work);
253  }
254  
255  /*
256   * hotplug calls this through:
257   *  hotplug_cfd() -> flush_smp_call_function_queue()
258   */
irq_work_run(void)259  void irq_work_run(void)
260  {
261  	irq_work_run_list(this_cpu_ptr(&raised_list));
262  	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
263  		irq_work_run_list(this_cpu_ptr(&lazy_list));
264  	else
265  		wake_irq_workd();
266  }
267  EXPORT_SYMBOL_GPL(irq_work_run);
268  
irq_work_tick(void)269  void irq_work_tick(void)
270  {
271  	struct llist_head *raised = this_cpu_ptr(&raised_list);
272  
273  	if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
274  		irq_work_run_list(raised);
275  
276  	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
277  		irq_work_run_list(this_cpu_ptr(&lazy_list));
278  	else
279  		wake_irq_workd();
280  }
281  
282  /*
283   * Synchronize against the irq_work @entry, ensures the entry is not
284   * currently in use.
285   */
irq_work_sync(struct irq_work * work)286  void irq_work_sync(struct irq_work *work)
287  {
288  	lockdep_assert_irqs_enabled();
289  	might_sleep();
290  
291  	if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
292  	    !arch_irq_work_has_interrupt()) {
293  		rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
294  				   TASK_UNINTERRUPTIBLE);
295  		return;
296  	}
297  
298  	while (irq_work_is_busy(work))
299  		cpu_relax();
300  }
301  EXPORT_SYMBOL_GPL(irq_work_sync);
302  
run_irq_workd(unsigned int cpu)303  static void run_irq_workd(unsigned int cpu)
304  {
305  	irq_work_run_list(this_cpu_ptr(&lazy_list));
306  }
307  
irq_workd_setup(unsigned int cpu)308  static void irq_workd_setup(unsigned int cpu)
309  {
310  	sched_set_fifo_low(current);
311  }
312  
313  static struct smp_hotplug_thread irqwork_threads = {
314  	.store                  = &irq_workd,
315  	.setup			= irq_workd_setup,
316  	.thread_should_run      = irq_workd_should_run,
317  	.thread_fn              = run_irq_workd,
318  	.thread_comm            = "irq_work/%u",
319  };
320  
irq_work_init_threads(void)321  static __init int irq_work_init_threads(void)
322  {
323  	if (IS_ENABLED(CONFIG_PREEMPT_RT))
324  		BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
325  	return 0;
326  }
327  early_initcall(irq_work_init_threads);
328