1  // SPDX-License-Identifier: GPL-2.0-only
2  /* Copyright (c) 2019 Facebook */
3  #include <linux/hash.h>
4  #include <linux/bpf.h>
5  #include <linux/filter.h>
6  #include <linux/ftrace.h>
7  #include <linux/rbtree_latch.h>
8  #include <linux/perf_event.h>
9  #include <linux/btf.h>
10  #include <linux/rcupdate_trace.h>
11  #include <linux/rcupdate_wait.h>
12  #include <linux/static_call.h>
13  #include <linux/bpf_verifier.h>
14  #include <linux/bpf_lsm.h>
15  #include <linux/delay.h>
16  
17  /* dummy _ops. The verifier will operate on target program's ops. */
18  const struct bpf_verifier_ops bpf_extension_verifier_ops = {
19  };
20  const struct bpf_prog_ops bpf_extension_prog_ops = {
21  };
22  
23  /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
24  #define TRAMPOLINE_HASH_BITS 10
25  #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
26  
27  static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
28  
29  /* serializes access to trampoline_table */
30  static DEFINE_MUTEX(trampoline_mutex);
31  
32  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
33  static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
34  
bpf_tramp_ftrace_ops_func(struct ftrace_ops * ops,enum ftrace_ops_cmd cmd)35  static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd)
36  {
37  	struct bpf_trampoline *tr = ops->private;
38  	int ret = 0;
39  
40  	if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
41  		/* This is called inside register_ftrace_direct_multi(), so
42  		 * tr->mutex is already locked.
43  		 */
44  		lockdep_assert_held_once(&tr->mutex);
45  
46  		/* Instead of updating the trampoline here, we propagate
47  		 * -EAGAIN to register_ftrace_direct(). Then we can
48  		 * retry register_ftrace_direct() after updating the
49  		 * trampoline.
50  		 */
51  		if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
52  		    !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) {
53  			if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY))
54  				return -EBUSY;
55  
56  			tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
57  			return -EAGAIN;
58  		}
59  
60  		return 0;
61  	}
62  
63  	/* The normal locking order is
64  	 *    tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
65  	 *
66  	 * The following two commands are called from
67  	 *
68  	 *   prepare_direct_functions_for_ipmodify
69  	 *   cleanup_direct_functions_after_ipmodify
70  	 *
71  	 * In both cases, direct_mutex is already locked. Use
72  	 * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
73  	 * (something else is making changes to this same trampoline).
74  	 */
75  	if (!mutex_trylock(&tr->mutex)) {
76  		/* sleep 1 ms to make sure whatever holding tr->mutex makes
77  		 * some progress.
78  		 */
79  		msleep(1);
80  		return -EAGAIN;
81  	}
82  
83  	switch (cmd) {
84  	case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER:
85  		tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
86  
87  		if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
88  		    !(tr->flags & BPF_TRAMP_F_ORIG_STACK))
89  			ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
90  		break;
91  	case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER:
92  		tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY;
93  
94  		if (tr->flags & BPF_TRAMP_F_ORIG_STACK)
95  			ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
96  		break;
97  	default:
98  		ret = -EINVAL;
99  		break;
100  	}
101  
102  	mutex_unlock(&tr->mutex);
103  	return ret;
104  }
105  #endif
106  
bpf_prog_has_trampoline(const struct bpf_prog * prog)107  bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
108  {
109  	enum bpf_attach_type eatype = prog->expected_attach_type;
110  	enum bpf_prog_type ptype = prog->type;
111  
112  	return (ptype == BPF_PROG_TYPE_TRACING &&
113  		(eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
114  		 eatype == BPF_MODIFY_RETURN)) ||
115  		(ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
116  }
117  
bpf_image_ksym_add(void * data,unsigned int size,struct bpf_ksym * ksym)118  void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym)
119  {
120  	ksym->start = (unsigned long) data;
121  	ksym->end = ksym->start + size;
122  	bpf_ksym_add(ksym);
123  	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
124  			   PAGE_SIZE, false, ksym->name);
125  }
126  
bpf_image_ksym_del(struct bpf_ksym * ksym)127  void bpf_image_ksym_del(struct bpf_ksym *ksym)
128  {
129  	bpf_ksym_del(ksym);
130  	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
131  			   PAGE_SIZE, true, ksym->name);
132  }
133  
bpf_trampoline_lookup(u64 key)134  static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
135  {
136  	struct bpf_trampoline *tr;
137  	struct hlist_head *head;
138  	int i;
139  
140  	mutex_lock(&trampoline_mutex);
141  	head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
142  	hlist_for_each_entry(tr, head, hlist) {
143  		if (tr->key == key) {
144  			refcount_inc(&tr->refcnt);
145  			goto out;
146  		}
147  	}
148  	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
149  	if (!tr)
150  		goto out;
151  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
152  	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
153  	if (!tr->fops) {
154  		kfree(tr);
155  		tr = NULL;
156  		goto out;
157  	}
158  	tr->fops->private = tr;
159  	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
160  #endif
161  
162  	tr->key = key;
163  	INIT_HLIST_NODE(&tr->hlist);
164  	hlist_add_head(&tr->hlist, head);
165  	refcount_set(&tr->refcnt, 1);
166  	mutex_init(&tr->mutex);
167  	for (i = 0; i < BPF_TRAMP_MAX; i++)
168  		INIT_HLIST_HEAD(&tr->progs_hlist[i]);
169  out:
170  	mutex_unlock(&trampoline_mutex);
171  	return tr;
172  }
173  
unregister_fentry(struct bpf_trampoline * tr,void * old_addr)174  static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
175  {
176  	void *ip = tr->func.addr;
177  	int ret;
178  
179  	if (tr->func.ftrace_managed)
180  		ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false);
181  	else
182  		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
183  
184  	return ret;
185  }
186  
modify_fentry(struct bpf_trampoline * tr,void * old_addr,void * new_addr,bool lock_direct_mutex)187  static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr,
188  			 bool lock_direct_mutex)
189  {
190  	void *ip = tr->func.addr;
191  	int ret;
192  
193  	if (tr->func.ftrace_managed) {
194  		if (lock_direct_mutex)
195  			ret = modify_ftrace_direct(tr->fops, (long)new_addr);
196  		else
197  			ret = modify_ftrace_direct_nolock(tr->fops, (long)new_addr);
198  	} else {
199  		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
200  	}
201  	return ret;
202  }
203  
204  /* first time registering */
register_fentry(struct bpf_trampoline * tr,void * new_addr)205  static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
206  {
207  	void *ip = tr->func.addr;
208  	unsigned long faddr;
209  	int ret;
210  
211  	faddr = ftrace_location((unsigned long)ip);
212  	if (faddr) {
213  		if (!tr->fops)
214  			return -ENOTSUPP;
215  		tr->func.ftrace_managed = true;
216  	}
217  
218  	if (tr->func.ftrace_managed) {
219  		ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
220  		ret = register_ftrace_direct(tr->fops, (long)new_addr);
221  	} else {
222  		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
223  	}
224  
225  	return ret;
226  }
227  
228  static struct bpf_tramp_links *
bpf_trampoline_get_progs(const struct bpf_trampoline * tr,int * total,bool * ip_arg)229  bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
230  {
231  	struct bpf_tramp_link *link;
232  	struct bpf_tramp_links *tlinks;
233  	struct bpf_tramp_link **links;
234  	int kind;
235  
236  	*total = 0;
237  	tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
238  	if (!tlinks)
239  		return ERR_PTR(-ENOMEM);
240  
241  	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
242  		tlinks[kind].nr_links = tr->progs_cnt[kind];
243  		*total += tr->progs_cnt[kind];
244  		links = tlinks[kind].links;
245  
246  		hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
247  			*ip_arg |= link->link.prog->call_get_func_ip;
248  			*links++ = link;
249  		}
250  	}
251  	return tlinks;
252  }
253  
bpf_tramp_image_free(struct bpf_tramp_image * im)254  static void bpf_tramp_image_free(struct bpf_tramp_image *im)
255  {
256  	bpf_image_ksym_del(&im->ksym);
257  	arch_free_bpf_trampoline(im->image, im->size);
258  	bpf_jit_uncharge_modmem(im->size);
259  	percpu_ref_exit(&im->pcref);
260  	kfree_rcu(im, rcu);
261  }
262  
__bpf_tramp_image_put_deferred(struct work_struct * work)263  static void __bpf_tramp_image_put_deferred(struct work_struct *work)
264  {
265  	struct bpf_tramp_image *im;
266  
267  	im = container_of(work, struct bpf_tramp_image, work);
268  	bpf_tramp_image_free(im);
269  }
270  
271  /* callback, fexit step 3 or fentry step 2 */
__bpf_tramp_image_put_rcu(struct rcu_head * rcu)272  static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu)
273  {
274  	struct bpf_tramp_image *im;
275  
276  	im = container_of(rcu, struct bpf_tramp_image, rcu);
277  	INIT_WORK(&im->work, __bpf_tramp_image_put_deferred);
278  	schedule_work(&im->work);
279  }
280  
281  /* callback, fexit step 2. Called after percpu_ref_kill confirms. */
__bpf_tramp_image_release(struct percpu_ref * pcref)282  static void __bpf_tramp_image_release(struct percpu_ref *pcref)
283  {
284  	struct bpf_tramp_image *im;
285  
286  	im = container_of(pcref, struct bpf_tramp_image, pcref);
287  	call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
288  }
289  
290  /* callback, fexit or fentry step 1 */
__bpf_tramp_image_put_rcu_tasks(struct rcu_head * rcu)291  static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu)
292  {
293  	struct bpf_tramp_image *im;
294  
295  	im = container_of(rcu, struct bpf_tramp_image, rcu);
296  	if (im->ip_after_call)
297  		/* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */
298  		percpu_ref_kill(&im->pcref);
299  	else
300  		/* the case of fentry trampoline */
301  		call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
302  }
303  
bpf_tramp_image_put(struct bpf_tramp_image * im)304  static void bpf_tramp_image_put(struct bpf_tramp_image *im)
305  {
306  	/* The trampoline image that calls original function is using:
307  	 * rcu_read_lock_trace to protect sleepable bpf progs
308  	 * rcu_read_lock to protect normal bpf progs
309  	 * percpu_ref to protect trampoline itself
310  	 * rcu tasks to protect trampoline asm not covered by percpu_ref
311  	 * (which are few asm insns before __bpf_tramp_enter and
312  	 *  after __bpf_tramp_exit)
313  	 *
314  	 * The trampoline is unreachable before bpf_tramp_image_put().
315  	 *
316  	 * First, patch the trampoline to avoid calling into fexit progs.
317  	 * The progs will be freed even if the original function is still
318  	 * executing or sleeping.
319  	 * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on
320  	 * first few asm instructions to execute and call into
321  	 * __bpf_tramp_enter->percpu_ref_get.
322  	 * Then use percpu_ref_kill to wait for the trampoline and the original
323  	 * function to finish.
324  	 * Then use call_rcu_tasks() to make sure few asm insns in
325  	 * the trampoline epilogue are done as well.
326  	 *
327  	 * In !PREEMPT case the task that got interrupted in the first asm
328  	 * insns won't go through an RCU quiescent state which the
329  	 * percpu_ref_kill will be waiting for. Hence the first
330  	 * call_rcu_tasks() is not necessary.
331  	 */
332  	if (im->ip_after_call) {
333  		int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP,
334  					     NULL, im->ip_epilogue);
335  		WARN_ON(err);
336  		if (IS_ENABLED(CONFIG_TASKS_RCU))
337  			call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
338  		else
339  			percpu_ref_kill(&im->pcref);
340  		return;
341  	}
342  
343  	/* The trampoline without fexit and fmod_ret progs doesn't call original
344  	 * function and doesn't use percpu_ref.
345  	 * Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
346  	 * Then use call_rcu_tasks() to wait for the rest of trampoline asm
347  	 * and normal progs.
348  	 */
349  	call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
350  }
351  
bpf_tramp_image_alloc(u64 key,int size)352  static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
353  {
354  	struct bpf_tramp_image *im;
355  	struct bpf_ksym *ksym;
356  	void *image;
357  	int err = -ENOMEM;
358  
359  	im = kzalloc(sizeof(*im), GFP_KERNEL);
360  	if (!im)
361  		goto out;
362  
363  	err = bpf_jit_charge_modmem(size);
364  	if (err)
365  		goto out_free_im;
366  	im->size = size;
367  
368  	err = -ENOMEM;
369  	im->image = image = arch_alloc_bpf_trampoline(size);
370  	if (!image)
371  		goto out_uncharge;
372  
373  	err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL);
374  	if (err)
375  		goto out_free_image;
376  
377  	ksym = &im->ksym;
378  	INIT_LIST_HEAD_RCU(&ksym->lnode);
379  	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key);
380  	bpf_image_ksym_add(image, size, ksym);
381  	return im;
382  
383  out_free_image:
384  	arch_free_bpf_trampoline(im->image, im->size);
385  out_uncharge:
386  	bpf_jit_uncharge_modmem(size);
387  out_free_im:
388  	kfree(im);
389  out:
390  	return ERR_PTR(err);
391  }
392  
bpf_trampoline_update(struct bpf_trampoline * tr,bool lock_direct_mutex)393  static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
394  {
395  	struct bpf_tramp_image *im;
396  	struct bpf_tramp_links *tlinks;
397  	u32 orig_flags = tr->flags;
398  	bool ip_arg = false;
399  	int err, total, size;
400  
401  	tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg);
402  	if (IS_ERR(tlinks))
403  		return PTR_ERR(tlinks);
404  
405  	if (total == 0) {
406  		err = unregister_fentry(tr, tr->cur_image->image);
407  		bpf_tramp_image_put(tr->cur_image);
408  		tr->cur_image = NULL;
409  		goto out;
410  	}
411  
412  	/* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
413  	tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);
414  
415  	if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
416  	    tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
417  		/* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
418  		 * should not be set together.
419  		 */
420  		tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
421  	} else {
422  		tr->flags |= BPF_TRAMP_F_RESTORE_REGS;
423  	}
424  
425  	if (ip_arg)
426  		tr->flags |= BPF_TRAMP_F_IP_ARG;
427  
428  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
429  again:
430  	if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) &&
431  	    (tr->flags & BPF_TRAMP_F_CALL_ORIG))
432  		tr->flags |= BPF_TRAMP_F_ORIG_STACK;
433  #endif
434  
435  	size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
436  					tlinks, tr->func.addr);
437  	if (size < 0) {
438  		err = size;
439  		goto out;
440  	}
441  
442  	if (size > PAGE_SIZE) {
443  		err = -E2BIG;
444  		goto out;
445  	}
446  
447  	im = bpf_tramp_image_alloc(tr->key, size);
448  	if (IS_ERR(im)) {
449  		err = PTR_ERR(im);
450  		goto out;
451  	}
452  
453  	err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
454  					  &tr->func.model, tr->flags, tlinks,
455  					  tr->func.addr);
456  	if (err < 0)
457  		goto out_free;
458  
459  	err = arch_protect_bpf_trampoline(im->image, im->size);
460  	if (err)
461  		goto out_free;
462  
463  	WARN_ON(tr->cur_image && total == 0);
464  	if (tr->cur_image)
465  		/* progs already running at this address */
466  		err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex);
467  	else
468  		/* first time registering */
469  		err = register_fentry(tr, im->image);
470  
471  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
472  	if (err == -EAGAIN) {
473  		/* -EAGAIN from bpf_tramp_ftrace_ops_func. Now
474  		 * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the
475  		 * trampoline again, and retry register.
476  		 */
477  		/* reset fops->func and fops->trampoline for re-register */
478  		tr->fops->func = NULL;
479  		tr->fops->trampoline = 0;
480  
481  		/* free im memory and reallocate later */
482  		bpf_tramp_image_free(im);
483  		goto again;
484  	}
485  #endif
486  	if (err)
487  		goto out_free;
488  
489  	if (tr->cur_image)
490  		bpf_tramp_image_put(tr->cur_image);
491  	tr->cur_image = im;
492  out:
493  	/* If any error happens, restore previous flags */
494  	if (err)
495  		tr->flags = orig_flags;
496  	kfree(tlinks);
497  	return err;
498  
499  out_free:
500  	bpf_tramp_image_free(im);
501  	goto out;
502  }
503  
bpf_attach_type_to_tramp(struct bpf_prog * prog)504  static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
505  {
506  	switch (prog->expected_attach_type) {
507  	case BPF_TRACE_FENTRY:
508  		return BPF_TRAMP_FENTRY;
509  	case BPF_MODIFY_RETURN:
510  		return BPF_TRAMP_MODIFY_RETURN;
511  	case BPF_TRACE_FEXIT:
512  		return BPF_TRAMP_FEXIT;
513  	case BPF_LSM_MAC:
514  		if (!prog->aux->attach_func_proto->type)
515  			/* The function returns void, we cannot modify its
516  			 * return value.
517  			 */
518  			return BPF_TRAMP_FEXIT;
519  		else
520  			return BPF_TRAMP_MODIFY_RETURN;
521  	default:
522  		return BPF_TRAMP_REPLACE;
523  	}
524  }
525  
__bpf_trampoline_link_prog(struct bpf_tramp_link * link,struct bpf_trampoline * tr)526  static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
527  {
528  	enum bpf_tramp_prog_type kind;
529  	struct bpf_tramp_link *link_exiting;
530  	int err = 0;
531  	int cnt = 0, i;
532  
533  	kind = bpf_attach_type_to_tramp(link->link.prog);
534  	if (tr->extension_prog)
535  		/* cannot attach fentry/fexit if extension prog is attached.
536  		 * cannot overwrite extension prog either.
537  		 */
538  		return -EBUSY;
539  
540  	for (i = 0; i < BPF_TRAMP_MAX; i++)
541  		cnt += tr->progs_cnt[i];
542  
543  	if (kind == BPF_TRAMP_REPLACE) {
544  		/* Cannot attach extension if fentry/fexit are in use. */
545  		if (cnt)
546  			return -EBUSY;
547  		tr->extension_prog = link->link.prog;
548  		return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
549  					  link->link.prog->bpf_func);
550  	}
551  	if (cnt >= BPF_MAX_TRAMP_LINKS)
552  		return -E2BIG;
553  	if (!hlist_unhashed(&link->tramp_hlist))
554  		/* prog already linked */
555  		return -EBUSY;
556  	hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
557  		if (link_exiting->link.prog != link->link.prog)
558  			continue;
559  		/* prog already linked */
560  		return -EBUSY;
561  	}
562  
563  	hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
564  	tr->progs_cnt[kind]++;
565  	err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
566  	if (err) {
567  		hlist_del_init(&link->tramp_hlist);
568  		tr->progs_cnt[kind]--;
569  	}
570  	return err;
571  }
572  
bpf_trampoline_link_prog(struct bpf_tramp_link * link,struct bpf_trampoline * tr)573  int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
574  {
575  	int err;
576  
577  	mutex_lock(&tr->mutex);
578  	err = __bpf_trampoline_link_prog(link, tr);
579  	mutex_unlock(&tr->mutex);
580  	return err;
581  }
582  
__bpf_trampoline_unlink_prog(struct bpf_tramp_link * link,struct bpf_trampoline * tr)583  static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
584  {
585  	enum bpf_tramp_prog_type kind;
586  	int err;
587  
588  	kind = bpf_attach_type_to_tramp(link->link.prog);
589  	if (kind == BPF_TRAMP_REPLACE) {
590  		WARN_ON_ONCE(!tr->extension_prog);
591  		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
592  					 tr->extension_prog->bpf_func, NULL);
593  		tr->extension_prog = NULL;
594  		return err;
595  	}
596  	hlist_del_init(&link->tramp_hlist);
597  	tr->progs_cnt[kind]--;
598  	return bpf_trampoline_update(tr, true /* lock_direct_mutex */);
599  }
600  
601  /* bpf_trampoline_unlink_prog() should never fail. */
bpf_trampoline_unlink_prog(struct bpf_tramp_link * link,struct bpf_trampoline * tr)602  int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
603  {
604  	int err;
605  
606  	mutex_lock(&tr->mutex);
607  	err = __bpf_trampoline_unlink_prog(link, tr);
608  	mutex_unlock(&tr->mutex);
609  	return err;
610  }
611  
612  #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
bpf_shim_tramp_link_release(struct bpf_link * link)613  static void bpf_shim_tramp_link_release(struct bpf_link *link)
614  {
615  	struct bpf_shim_tramp_link *shim_link =
616  		container_of(link, struct bpf_shim_tramp_link, link.link);
617  
618  	/* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */
619  	if (!shim_link->trampoline)
620  		return;
621  
622  	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline));
623  	bpf_trampoline_put(shim_link->trampoline);
624  }
625  
bpf_shim_tramp_link_dealloc(struct bpf_link * link)626  static void bpf_shim_tramp_link_dealloc(struct bpf_link *link)
627  {
628  	struct bpf_shim_tramp_link *shim_link =
629  		container_of(link, struct bpf_shim_tramp_link, link.link);
630  
631  	kfree(shim_link);
632  }
633  
634  static const struct bpf_link_ops bpf_shim_tramp_link_lops = {
635  	.release = bpf_shim_tramp_link_release,
636  	.dealloc = bpf_shim_tramp_link_dealloc,
637  };
638  
cgroup_shim_alloc(const struct bpf_prog * prog,bpf_func_t bpf_func,int cgroup_atype)639  static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
640  						     bpf_func_t bpf_func,
641  						     int cgroup_atype)
642  {
643  	struct bpf_shim_tramp_link *shim_link = NULL;
644  	struct bpf_prog *p;
645  
646  	shim_link = kzalloc(sizeof(*shim_link), GFP_USER);
647  	if (!shim_link)
648  		return NULL;
649  
650  	p = bpf_prog_alloc(1, 0);
651  	if (!p) {
652  		kfree(shim_link);
653  		return NULL;
654  	}
655  
656  	p->jited = false;
657  	p->bpf_func = bpf_func;
658  
659  	p->aux->cgroup_atype = cgroup_atype;
660  	p->aux->attach_func_proto = prog->aux->attach_func_proto;
661  	p->aux->attach_btf_id = prog->aux->attach_btf_id;
662  	p->aux->attach_btf = prog->aux->attach_btf;
663  	btf_get(p->aux->attach_btf);
664  	p->type = BPF_PROG_TYPE_LSM;
665  	p->expected_attach_type = BPF_LSM_MAC;
666  	bpf_prog_inc(p);
667  	bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC,
668  		      &bpf_shim_tramp_link_lops, p);
669  	bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype);
670  
671  	return shim_link;
672  }
673  
cgroup_shim_find(struct bpf_trampoline * tr,bpf_func_t bpf_func)674  static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
675  						    bpf_func_t bpf_func)
676  {
677  	struct bpf_tramp_link *link;
678  	int kind;
679  
680  	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
681  		hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
682  			struct bpf_prog *p = link->link.prog;
683  
684  			if (p->bpf_func == bpf_func)
685  				return container_of(link, struct bpf_shim_tramp_link, link);
686  		}
687  	}
688  
689  	return NULL;
690  }
691  
bpf_trampoline_link_cgroup_shim(struct bpf_prog * prog,int cgroup_atype)692  int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
693  				    int cgroup_atype)
694  {
695  	struct bpf_shim_tramp_link *shim_link = NULL;
696  	struct bpf_attach_target_info tgt_info = {};
697  	struct bpf_trampoline *tr;
698  	bpf_func_t bpf_func;
699  	u64 key;
700  	int err;
701  
702  	err = bpf_check_attach_target(NULL, prog, NULL,
703  				      prog->aux->attach_btf_id,
704  				      &tgt_info);
705  	if (err)
706  		return err;
707  
708  	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
709  					 prog->aux->attach_btf_id);
710  
711  	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
712  	tr = bpf_trampoline_get(key, &tgt_info);
713  	if (!tr)
714  		return  -ENOMEM;
715  
716  	mutex_lock(&tr->mutex);
717  
718  	shim_link = cgroup_shim_find(tr, bpf_func);
719  	if (shim_link) {
720  		/* Reusing existing shim attached by the other program. */
721  		bpf_link_inc(&shim_link->link.link);
722  
723  		mutex_unlock(&tr->mutex);
724  		bpf_trampoline_put(tr); /* bpf_trampoline_get above */
725  		return 0;
726  	}
727  
728  	/* Allocate and install new shim. */
729  
730  	shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype);
731  	if (!shim_link) {
732  		err = -ENOMEM;
733  		goto err;
734  	}
735  
736  	err = __bpf_trampoline_link_prog(&shim_link->link, tr);
737  	if (err)
738  		goto err;
739  
740  	shim_link->trampoline = tr;
741  	/* note, we're still holding tr refcnt from above */
742  
743  	mutex_unlock(&tr->mutex);
744  
745  	return 0;
746  err:
747  	mutex_unlock(&tr->mutex);
748  
749  	if (shim_link)
750  		bpf_link_put(&shim_link->link.link);
751  
752  	/* have to release tr while _not_ holding its mutex */
753  	bpf_trampoline_put(tr); /* bpf_trampoline_get above */
754  
755  	return err;
756  }
757  
bpf_trampoline_unlink_cgroup_shim(struct bpf_prog * prog)758  void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
759  {
760  	struct bpf_shim_tramp_link *shim_link = NULL;
761  	struct bpf_trampoline *tr;
762  	bpf_func_t bpf_func;
763  	u64 key;
764  
765  	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
766  					 prog->aux->attach_btf_id);
767  
768  	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
769  	tr = bpf_trampoline_lookup(key);
770  	if (WARN_ON_ONCE(!tr))
771  		return;
772  
773  	mutex_lock(&tr->mutex);
774  	shim_link = cgroup_shim_find(tr, bpf_func);
775  	mutex_unlock(&tr->mutex);
776  
777  	if (shim_link)
778  		bpf_link_put(&shim_link->link.link);
779  
780  	bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */
781  }
782  #endif
783  
bpf_trampoline_get(u64 key,struct bpf_attach_target_info * tgt_info)784  struct bpf_trampoline *bpf_trampoline_get(u64 key,
785  					  struct bpf_attach_target_info *tgt_info)
786  {
787  	struct bpf_trampoline *tr;
788  
789  	tr = bpf_trampoline_lookup(key);
790  	if (!tr)
791  		return NULL;
792  
793  	mutex_lock(&tr->mutex);
794  	if (tr->func.addr)
795  		goto out;
796  
797  	memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
798  	tr->func.addr = (void *)tgt_info->tgt_addr;
799  out:
800  	mutex_unlock(&tr->mutex);
801  	return tr;
802  }
803  
bpf_trampoline_put(struct bpf_trampoline * tr)804  void bpf_trampoline_put(struct bpf_trampoline *tr)
805  {
806  	int i;
807  
808  	if (!tr)
809  		return;
810  	mutex_lock(&trampoline_mutex);
811  	if (!refcount_dec_and_test(&tr->refcnt))
812  		goto out;
813  	WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
814  
815  	for (i = 0; i < BPF_TRAMP_MAX; i++)
816  		if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
817  			goto out;
818  
819  	/* This code will be executed even when the last bpf_tramp_image
820  	 * is alive. All progs are detached from the trampoline and the
821  	 * trampoline image is patched with jmp into epilogue to skip
822  	 * fexit progs. The fentry-only trampoline will be freed via
823  	 * multiple rcu callbacks.
824  	 */
825  	hlist_del(&tr->hlist);
826  	if (tr->fops) {
827  		ftrace_free_filter(tr->fops);
828  		kfree(tr->fops);
829  	}
830  	kfree(tr);
831  out:
832  	mutex_unlock(&trampoline_mutex);
833  }
834  
835  #define NO_START_TIME 1
bpf_prog_start_time(void)836  static __always_inline u64 notrace bpf_prog_start_time(void)
837  {
838  	u64 start = NO_START_TIME;
839  
840  	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
841  		start = sched_clock();
842  		if (unlikely(!start))
843  			start = NO_START_TIME;
844  	}
845  	return start;
846  }
847  
848  /* The logic is similar to bpf_prog_run(), but with an explicit
849   * rcu_read_lock() and migrate_disable() which are required
850   * for the trampoline. The macro is split into
851   * call __bpf_prog_enter
852   * call prog->bpf_func
853   * call __bpf_prog_exit
854   *
855   * __bpf_prog_enter returns:
856   * 0 - skip execution of the bpf prog
857   * 1 - execute bpf prog
858   * [2..MAX_U64] - execute bpf prog and record execution time.
859   *     This is start time.
860   */
__bpf_prog_enter_recur(struct bpf_prog * prog,struct bpf_tramp_run_ctx * run_ctx)861  static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
862  	__acquires(RCU)
863  {
864  	rcu_read_lock();
865  	migrate_disable();
866  
867  	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
868  
869  	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
870  		bpf_prog_inc_misses_counter(prog);
871  		return 0;
872  	}
873  	return bpf_prog_start_time();
874  }
875  
update_prog_stats(struct bpf_prog * prog,u64 start)876  static void notrace update_prog_stats(struct bpf_prog *prog,
877  				      u64 start)
878  {
879  	struct bpf_prog_stats *stats;
880  
881  	if (static_branch_unlikely(&bpf_stats_enabled_key) &&
882  	    /* static_key could be enabled in __bpf_prog_enter*
883  	     * and disabled in __bpf_prog_exit*.
884  	     * And vice versa.
885  	     * Hence check that 'start' is valid.
886  	     */
887  	    start > NO_START_TIME) {
888  		u64 duration = sched_clock() - start;
889  		unsigned long flags;
890  
891  		stats = this_cpu_ptr(prog->stats);
892  		flags = u64_stats_update_begin_irqsave(&stats->syncp);
893  		u64_stats_inc(&stats->cnt);
894  		u64_stats_add(&stats->nsecs, duration);
895  		u64_stats_update_end_irqrestore(&stats->syncp, flags);
896  	}
897  }
898  
__bpf_prog_exit_recur(struct bpf_prog * prog,u64 start,struct bpf_tramp_run_ctx * run_ctx)899  static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
900  					  struct bpf_tramp_run_ctx *run_ctx)
901  	__releases(RCU)
902  {
903  	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
904  
905  	update_prog_stats(prog, start);
906  	this_cpu_dec(*(prog->active));
907  	migrate_enable();
908  	rcu_read_unlock();
909  }
910  
__bpf_prog_enter_lsm_cgroup(struct bpf_prog * prog,struct bpf_tramp_run_ctx * run_ctx)911  static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
912  					       struct bpf_tramp_run_ctx *run_ctx)
913  	__acquires(RCU)
914  {
915  	/* Runtime stats are exported via actual BPF_LSM_CGROUP
916  	 * programs, not the shims.
917  	 */
918  	rcu_read_lock();
919  	migrate_disable();
920  
921  	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
922  
923  	return NO_START_TIME;
924  }
925  
__bpf_prog_exit_lsm_cgroup(struct bpf_prog * prog,u64 start,struct bpf_tramp_run_ctx * run_ctx)926  static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
927  					       struct bpf_tramp_run_ctx *run_ctx)
928  	__releases(RCU)
929  {
930  	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
931  
932  	migrate_enable();
933  	rcu_read_unlock();
934  }
935  
__bpf_prog_enter_sleepable_recur(struct bpf_prog * prog,struct bpf_tramp_run_ctx * run_ctx)936  u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
937  					     struct bpf_tramp_run_ctx *run_ctx)
938  {
939  	rcu_read_lock_trace();
940  	migrate_disable();
941  	might_fault();
942  
943  	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
944  
945  	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
946  		bpf_prog_inc_misses_counter(prog);
947  		return 0;
948  	}
949  	return bpf_prog_start_time();
950  }
951  
__bpf_prog_exit_sleepable_recur(struct bpf_prog * prog,u64 start,struct bpf_tramp_run_ctx * run_ctx)952  void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
953  					     struct bpf_tramp_run_ctx *run_ctx)
954  {
955  	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
956  
957  	update_prog_stats(prog, start);
958  	this_cpu_dec(*(prog->active));
959  	migrate_enable();
960  	rcu_read_unlock_trace();
961  }
962  
__bpf_prog_enter_sleepable(struct bpf_prog * prog,struct bpf_tramp_run_ctx * run_ctx)963  static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog,
964  					      struct bpf_tramp_run_ctx *run_ctx)
965  {
966  	rcu_read_lock_trace();
967  	migrate_disable();
968  	might_fault();
969  
970  	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
971  
972  	return bpf_prog_start_time();
973  }
974  
__bpf_prog_exit_sleepable(struct bpf_prog * prog,u64 start,struct bpf_tramp_run_ctx * run_ctx)975  static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
976  					      struct bpf_tramp_run_ctx *run_ctx)
977  {
978  	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
979  
980  	update_prog_stats(prog, start);
981  	migrate_enable();
982  	rcu_read_unlock_trace();
983  }
984  
__bpf_prog_enter(struct bpf_prog * prog,struct bpf_tramp_run_ctx * run_ctx)985  static u64 notrace __bpf_prog_enter(struct bpf_prog *prog,
986  				    struct bpf_tramp_run_ctx *run_ctx)
987  	__acquires(RCU)
988  {
989  	rcu_read_lock();
990  	migrate_disable();
991  
992  	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
993  
994  	return bpf_prog_start_time();
995  }
996  
__bpf_prog_exit(struct bpf_prog * prog,u64 start,struct bpf_tramp_run_ctx * run_ctx)997  static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
998  				    struct bpf_tramp_run_ctx *run_ctx)
999  	__releases(RCU)
1000  {
1001  	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
1002  
1003  	update_prog_stats(prog, start);
1004  	migrate_enable();
1005  	rcu_read_unlock();
1006  }
1007  
__bpf_tramp_enter(struct bpf_tramp_image * tr)1008  void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
1009  {
1010  	percpu_ref_get(&tr->pcref);
1011  }
1012  
__bpf_tramp_exit(struct bpf_tramp_image * tr)1013  void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
1014  {
1015  	percpu_ref_put(&tr->pcref);
1016  }
1017  
bpf_trampoline_enter(const struct bpf_prog * prog)1018  bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
1019  {
1020  	bool sleepable = prog->sleepable;
1021  
1022  	if (bpf_prog_check_recur(prog))
1023  		return sleepable ? __bpf_prog_enter_sleepable_recur :
1024  			__bpf_prog_enter_recur;
1025  
1026  	if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
1027  	    prog->expected_attach_type == BPF_LSM_CGROUP)
1028  		return __bpf_prog_enter_lsm_cgroup;
1029  
1030  	return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter;
1031  }
1032  
bpf_trampoline_exit(const struct bpf_prog * prog)1033  bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
1034  {
1035  	bool sleepable = prog->sleepable;
1036  
1037  	if (bpf_prog_check_recur(prog))
1038  		return sleepable ? __bpf_prog_exit_sleepable_recur :
1039  			__bpf_prog_exit_recur;
1040  
1041  	if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
1042  	    prog->expected_attach_type == BPF_LSM_CGROUP)
1043  		return __bpf_prog_exit_lsm_cgroup;
1044  
1045  	return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit;
1046  }
1047  
1048  int __weak
arch_prepare_bpf_trampoline(struct bpf_tramp_image * im,void * image,void * image_end,const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)1049  arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
1050  			    const struct btf_func_model *m, u32 flags,
1051  			    struct bpf_tramp_links *tlinks,
1052  			    void *func_addr)
1053  {
1054  	return -ENOTSUPP;
1055  }
1056  
arch_alloc_bpf_trampoline(unsigned int size)1057  void * __weak arch_alloc_bpf_trampoline(unsigned int size)
1058  {
1059  	void *image;
1060  
1061  	if (WARN_ON_ONCE(size > PAGE_SIZE))
1062  		return NULL;
1063  	image = bpf_jit_alloc_exec(PAGE_SIZE);
1064  	if (image)
1065  		set_vm_flush_reset_perms(image);
1066  	return image;
1067  }
1068  
arch_free_bpf_trampoline(void * image,unsigned int size)1069  void __weak arch_free_bpf_trampoline(void *image, unsigned int size)
1070  {
1071  	WARN_ON_ONCE(size > PAGE_SIZE);
1072  	/* bpf_jit_free_exec doesn't need "size", but
1073  	 * bpf_prog_pack_free() needs it.
1074  	 */
1075  	bpf_jit_free_exec(image);
1076  }
1077  
arch_protect_bpf_trampoline(void * image,unsigned int size)1078  int __weak arch_protect_bpf_trampoline(void *image, unsigned int size)
1079  {
1080  	WARN_ON_ONCE(size > PAGE_SIZE);
1081  	return set_memory_rox((long)image, 1);
1082  }
1083  
arch_bpf_trampoline_size(const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)1084  int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
1085  				    struct bpf_tramp_links *tlinks, void *func_addr)
1086  {
1087  	return -ENOTSUPP;
1088  }
1089  
init_trampolines(void)1090  static int __init init_trampolines(void)
1091  {
1092  	int i;
1093  
1094  	for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
1095  		INIT_HLIST_HEAD(&trampoline_table[i]);
1096  	return 0;
1097  }
1098  late_initcall(init_trampolines);
1099