xref: /openbmc/linux/kernel/bpf/trampoline.c (revision 44ecda71)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /* Copyright (c) 2019 Facebook */
3  #include <linux/hash.h>
4  #include <linux/bpf.h>
5  #include <linux/filter.h>
6  #include <linux/ftrace.h>
7  #include <linux/rbtree_latch.h>
8  #include <linux/perf_event.h>
9  #include <linux/btf.h>
10  #include <linux/rcupdate_trace.h>
11  #include <linux/rcupdate_wait.h>
12  #include <linux/module.h>
13  #include <linux/static_call.h>
14  #include <linux/bpf_verifier.h>
15  #include <linux/bpf_lsm.h>
16  #include <linux/delay.h>
17  
18  /* dummy _ops. The verifier will operate on target program's ops. */
19  const struct bpf_verifier_ops bpf_extension_verifier_ops = {
20  };
21  const struct bpf_prog_ops bpf_extension_prog_ops = {
22  };
23  
24  /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
25  #define TRAMPOLINE_HASH_BITS 10
26  #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
27  
28  static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
29  
30  /* serializes access to trampoline_table */
31  static DEFINE_MUTEX(trampoline_mutex);
32  
33  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
34  static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
35  
36  static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd)
37  {
38  	struct bpf_trampoline *tr = ops->private;
39  	int ret = 0;
40  
41  	if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
42  		/* This is called inside register_ftrace_direct_multi(), so
43  		 * tr->mutex is already locked.
44  		 */
45  		lockdep_assert_held_once(&tr->mutex);
46  
47  		/* Instead of updating the trampoline here, we propagate
48  		 * -EAGAIN to register_ftrace_direct_multi(). Then we can
49  		 * retry register_ftrace_direct_multi() after updating the
50  		 * trampoline.
51  		 */
52  		if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
53  		    !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) {
54  			if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY))
55  				return -EBUSY;
56  
57  			tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
58  			return -EAGAIN;
59  		}
60  
61  		return 0;
62  	}
63  
64  	/* The normal locking order is
65  	 *    tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
66  	 *
67  	 * The following two commands are called from
68  	 *
69  	 *   prepare_direct_functions_for_ipmodify
70  	 *   cleanup_direct_functions_after_ipmodify
71  	 *
72  	 * In both cases, direct_mutex is already locked. Use
73  	 * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
74  	 * (something else is making changes to this same trampoline).
75  	 */
76  	if (!mutex_trylock(&tr->mutex)) {
77  		/* sleep 1 ms to make sure whatever holding tr->mutex makes
78  		 * some progress.
79  		 */
80  		msleep(1);
81  		return -EAGAIN;
82  	}
83  
84  	switch (cmd) {
85  	case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER:
86  		tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
87  
88  		if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
89  		    !(tr->flags & BPF_TRAMP_F_ORIG_STACK))
90  			ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
91  		break;
92  	case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER:
93  		tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY;
94  
95  		if (tr->flags & BPF_TRAMP_F_ORIG_STACK)
96  			ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
97  		break;
98  	default:
99  		ret = -EINVAL;
100  		break;
101  	}
102  
103  	mutex_unlock(&tr->mutex);
104  	return ret;
105  }
106  #endif
107  
108  bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
109  {
110  	enum bpf_attach_type eatype = prog->expected_attach_type;
111  	enum bpf_prog_type ptype = prog->type;
112  
113  	return (ptype == BPF_PROG_TYPE_TRACING &&
114  		(eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
115  		 eatype == BPF_MODIFY_RETURN)) ||
116  		(ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
117  }
118  
119  void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
120  {
121  	ksym->start = (unsigned long) data;
122  	ksym->end = ksym->start + PAGE_SIZE;
123  	bpf_ksym_add(ksym);
124  	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
125  			   PAGE_SIZE, false, ksym->name);
126  }
127  
128  void bpf_image_ksym_del(struct bpf_ksym *ksym)
129  {
130  	bpf_ksym_del(ksym);
131  	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
132  			   PAGE_SIZE, true, ksym->name);
133  }
134  
135  static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
136  {
137  	struct bpf_trampoline *tr;
138  	struct hlist_head *head;
139  	int i;
140  
141  	mutex_lock(&trampoline_mutex);
142  	head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
143  	hlist_for_each_entry(tr, head, hlist) {
144  		if (tr->key == key) {
145  			refcount_inc(&tr->refcnt);
146  			goto out;
147  		}
148  	}
149  	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
150  	if (!tr)
151  		goto out;
152  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
153  	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
154  	if (!tr->fops) {
155  		kfree(tr);
156  		tr = NULL;
157  		goto out;
158  	}
159  	tr->fops->private = tr;
160  	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
161  #endif
162  
163  	tr->key = key;
164  	INIT_HLIST_NODE(&tr->hlist);
165  	hlist_add_head(&tr->hlist, head);
166  	refcount_set(&tr->refcnt, 1);
167  	mutex_init(&tr->mutex);
168  	for (i = 0; i < BPF_TRAMP_MAX; i++)
169  		INIT_HLIST_HEAD(&tr->progs_hlist[i]);
170  out:
171  	mutex_unlock(&trampoline_mutex);
172  	return tr;
173  }
174  
175  static int bpf_trampoline_module_get(struct bpf_trampoline *tr)
176  {
177  	struct module *mod;
178  	int err = 0;
179  
180  	preempt_disable();
181  	mod = __module_text_address((unsigned long) tr->func.addr);
182  	if (mod && !try_module_get(mod))
183  		err = -ENOENT;
184  	preempt_enable();
185  	tr->mod = mod;
186  	return err;
187  }
188  
189  static void bpf_trampoline_module_put(struct bpf_trampoline *tr)
190  {
191  	module_put(tr->mod);
192  	tr->mod = NULL;
193  }
194  
195  static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
196  {
197  	void *ip = tr->func.addr;
198  	int ret;
199  
200  	if (tr->func.ftrace_managed)
201  		ret = unregister_ftrace_direct_multi(tr->fops, (long)old_addr);
202  	else
203  		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
204  
205  	if (!ret)
206  		bpf_trampoline_module_put(tr);
207  	return ret;
208  }
209  
210  static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr,
211  			 bool lock_direct_mutex)
212  {
213  	void *ip = tr->func.addr;
214  	int ret;
215  
216  	if (tr->func.ftrace_managed) {
217  		if (lock_direct_mutex)
218  			ret = modify_ftrace_direct_multi(tr->fops, (long)new_addr);
219  		else
220  			ret = modify_ftrace_direct_multi_nolock(tr->fops, (long)new_addr);
221  	} else {
222  		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
223  	}
224  	return ret;
225  }
226  
227  /* first time registering */
228  static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
229  {
230  	void *ip = tr->func.addr;
231  	unsigned long faddr;
232  	int ret;
233  
234  	faddr = ftrace_location((unsigned long)ip);
235  	if (faddr) {
236  		if (!tr->fops)
237  			return -ENOTSUPP;
238  		tr->func.ftrace_managed = true;
239  	}
240  
241  	if (bpf_trampoline_module_get(tr))
242  		return -ENOENT;
243  
244  	if (tr->func.ftrace_managed) {
245  		ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
246  		ret = register_ftrace_direct_multi(tr->fops, (long)new_addr);
247  	} else {
248  		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
249  	}
250  
251  	if (ret)
252  		bpf_trampoline_module_put(tr);
253  	return ret;
254  }
255  
256  static struct bpf_tramp_links *
257  bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
258  {
259  	struct bpf_tramp_link *link;
260  	struct bpf_tramp_links *tlinks;
261  	struct bpf_tramp_link **links;
262  	int kind;
263  
264  	*total = 0;
265  	tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
266  	if (!tlinks)
267  		return ERR_PTR(-ENOMEM);
268  
269  	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
270  		tlinks[kind].nr_links = tr->progs_cnt[kind];
271  		*total += tr->progs_cnt[kind];
272  		links = tlinks[kind].links;
273  
274  		hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
275  			*ip_arg |= link->link.prog->call_get_func_ip;
276  			*links++ = link;
277  		}
278  	}
279  	return tlinks;
280  }
281  
282  static void __bpf_tramp_image_put_deferred(struct work_struct *work)
283  {
284  	struct bpf_tramp_image *im;
285  
286  	im = container_of(work, struct bpf_tramp_image, work);
287  	bpf_image_ksym_del(&im->ksym);
288  	bpf_jit_free_exec(im->image);
289  	bpf_jit_uncharge_modmem(PAGE_SIZE);
290  	percpu_ref_exit(&im->pcref);
291  	kfree_rcu(im, rcu);
292  }
293  
294  /* callback, fexit step 3 or fentry step 2 */
295  static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu)
296  {
297  	struct bpf_tramp_image *im;
298  
299  	im = container_of(rcu, struct bpf_tramp_image, rcu);
300  	INIT_WORK(&im->work, __bpf_tramp_image_put_deferred);
301  	schedule_work(&im->work);
302  }
303  
304  /* callback, fexit step 2. Called after percpu_ref_kill confirms. */
305  static void __bpf_tramp_image_release(struct percpu_ref *pcref)
306  {
307  	struct bpf_tramp_image *im;
308  
309  	im = container_of(pcref, struct bpf_tramp_image, pcref);
310  	call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
311  }
312  
313  /* callback, fexit or fentry step 1 */
314  static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu)
315  {
316  	struct bpf_tramp_image *im;
317  
318  	im = container_of(rcu, struct bpf_tramp_image, rcu);
319  	if (im->ip_after_call)
320  		/* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */
321  		percpu_ref_kill(&im->pcref);
322  	else
323  		/* the case of fentry trampoline */
324  		call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
325  }
326  
327  static void bpf_tramp_image_put(struct bpf_tramp_image *im)
328  {
329  	/* The trampoline image that calls original function is using:
330  	 * rcu_read_lock_trace to protect sleepable bpf progs
331  	 * rcu_read_lock to protect normal bpf progs
332  	 * percpu_ref to protect trampoline itself
333  	 * rcu tasks to protect trampoline asm not covered by percpu_ref
334  	 * (which are few asm insns before __bpf_tramp_enter and
335  	 *  after __bpf_tramp_exit)
336  	 *
337  	 * The trampoline is unreachable before bpf_tramp_image_put().
338  	 *
339  	 * First, patch the trampoline to avoid calling into fexit progs.
340  	 * The progs will be freed even if the original function is still
341  	 * executing or sleeping.
342  	 * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on
343  	 * first few asm instructions to execute and call into
344  	 * __bpf_tramp_enter->percpu_ref_get.
345  	 * Then use percpu_ref_kill to wait for the trampoline and the original
346  	 * function to finish.
347  	 * Then use call_rcu_tasks() to make sure few asm insns in
348  	 * the trampoline epilogue are done as well.
349  	 *
350  	 * In !PREEMPT case the task that got interrupted in the first asm
351  	 * insns won't go through an RCU quiescent state which the
352  	 * percpu_ref_kill will be waiting for. Hence the first
353  	 * call_rcu_tasks() is not necessary.
354  	 */
355  	if (im->ip_after_call) {
356  		int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP,
357  					     NULL, im->ip_epilogue);
358  		WARN_ON(err);
359  		if (IS_ENABLED(CONFIG_PREEMPTION))
360  			call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
361  		else
362  			percpu_ref_kill(&im->pcref);
363  		return;
364  	}
365  
366  	/* The trampoline without fexit and fmod_ret progs doesn't call original
367  	 * function and doesn't use percpu_ref.
368  	 * Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
369  	 * Then use call_rcu_tasks() to wait for the rest of trampoline asm
370  	 * and normal progs.
371  	 */
372  	call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
373  }
374  
375  static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
376  {
377  	struct bpf_tramp_image *im;
378  	struct bpf_ksym *ksym;
379  	void *image;
380  	int err = -ENOMEM;
381  
382  	im = kzalloc(sizeof(*im), GFP_KERNEL);
383  	if (!im)
384  		goto out;
385  
386  	err = bpf_jit_charge_modmem(PAGE_SIZE);
387  	if (err)
388  		goto out_free_im;
389  
390  	err = -ENOMEM;
391  	im->image = image = bpf_jit_alloc_exec(PAGE_SIZE);
392  	if (!image)
393  		goto out_uncharge;
394  	set_vm_flush_reset_perms(image);
395  
396  	err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL);
397  	if (err)
398  		goto out_free_image;
399  
400  	ksym = &im->ksym;
401  	INIT_LIST_HEAD_RCU(&ksym->lnode);
402  	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu_%u", key, idx);
403  	bpf_image_ksym_add(image, ksym);
404  	return im;
405  
406  out_free_image:
407  	bpf_jit_free_exec(im->image);
408  out_uncharge:
409  	bpf_jit_uncharge_modmem(PAGE_SIZE);
410  out_free_im:
411  	kfree(im);
412  out:
413  	return ERR_PTR(err);
414  }
415  
416  static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
417  {
418  	struct bpf_tramp_image *im;
419  	struct bpf_tramp_links *tlinks;
420  	u32 orig_flags = tr->flags;
421  	bool ip_arg = false;
422  	int err, total;
423  
424  	tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg);
425  	if (IS_ERR(tlinks))
426  		return PTR_ERR(tlinks);
427  
428  	if (total == 0) {
429  		err = unregister_fentry(tr, tr->cur_image->image);
430  		bpf_tramp_image_put(tr->cur_image);
431  		tr->cur_image = NULL;
432  		tr->selector = 0;
433  		goto out;
434  	}
435  
436  	im = bpf_tramp_image_alloc(tr->key, tr->selector);
437  	if (IS_ERR(im)) {
438  		err = PTR_ERR(im);
439  		goto out;
440  	}
441  
442  	/* clear all bits except SHARE_IPMODIFY */
443  	tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY;
444  
445  	if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
446  	    tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
447  		/* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
448  		 * should not be set together.
449  		 */
450  		tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
451  	} else {
452  		tr->flags |= BPF_TRAMP_F_RESTORE_REGS;
453  	}
454  
455  	if (ip_arg)
456  		tr->flags |= BPF_TRAMP_F_IP_ARG;
457  
458  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
459  again:
460  	if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) &&
461  	    (tr->flags & BPF_TRAMP_F_CALL_ORIG))
462  		tr->flags |= BPF_TRAMP_F_ORIG_STACK;
463  #endif
464  
465  	err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
466  					  &tr->func.model, tr->flags, tlinks,
467  					  tr->func.addr);
468  	if (err < 0)
469  		goto out;
470  
471  	set_memory_ro((long)im->image, 1);
472  	set_memory_x((long)im->image, 1);
473  
474  	WARN_ON(tr->cur_image && tr->selector == 0);
475  	WARN_ON(!tr->cur_image && tr->selector);
476  	if (tr->cur_image)
477  		/* progs already running at this address */
478  		err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex);
479  	else
480  		/* first time registering */
481  		err = register_fentry(tr, im->image);
482  
483  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
484  	if (err == -EAGAIN) {
485  		/* -EAGAIN from bpf_tramp_ftrace_ops_func. Now
486  		 * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the
487  		 * trampoline again, and retry register.
488  		 */
489  		/* reset fops->func and fops->trampoline for re-register */
490  		tr->fops->func = NULL;
491  		tr->fops->trampoline = 0;
492  		goto again;
493  	}
494  #endif
495  	if (err)
496  		goto out;
497  
498  	if (tr->cur_image)
499  		bpf_tramp_image_put(tr->cur_image);
500  	tr->cur_image = im;
501  	tr->selector++;
502  out:
503  	/* If any error happens, restore previous flags */
504  	if (err)
505  		tr->flags = orig_flags;
506  	kfree(tlinks);
507  	return err;
508  }
509  
510  static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
511  {
512  	switch (prog->expected_attach_type) {
513  	case BPF_TRACE_FENTRY:
514  		return BPF_TRAMP_FENTRY;
515  	case BPF_MODIFY_RETURN:
516  		return BPF_TRAMP_MODIFY_RETURN;
517  	case BPF_TRACE_FEXIT:
518  		return BPF_TRAMP_FEXIT;
519  	case BPF_LSM_MAC:
520  		if (!prog->aux->attach_func_proto->type)
521  			/* The function returns void, we cannot modify its
522  			 * return value.
523  			 */
524  			return BPF_TRAMP_FEXIT;
525  		else
526  			return BPF_TRAMP_MODIFY_RETURN;
527  	default:
528  		return BPF_TRAMP_REPLACE;
529  	}
530  }
531  
532  static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
533  {
534  	enum bpf_tramp_prog_type kind;
535  	struct bpf_tramp_link *link_exiting;
536  	int err = 0;
537  	int cnt = 0, i;
538  
539  	kind = bpf_attach_type_to_tramp(link->link.prog);
540  	if (tr->extension_prog)
541  		/* cannot attach fentry/fexit if extension prog is attached.
542  		 * cannot overwrite extension prog either.
543  		 */
544  		return -EBUSY;
545  
546  	for (i = 0; i < BPF_TRAMP_MAX; i++)
547  		cnt += tr->progs_cnt[i];
548  
549  	if (kind == BPF_TRAMP_REPLACE) {
550  		/* Cannot attach extension if fentry/fexit are in use. */
551  		if (cnt)
552  			return -EBUSY;
553  		tr->extension_prog = link->link.prog;
554  		return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
555  					  link->link.prog->bpf_func);
556  	}
557  	if (cnt >= BPF_MAX_TRAMP_LINKS)
558  		return -E2BIG;
559  	if (!hlist_unhashed(&link->tramp_hlist))
560  		/* prog already linked */
561  		return -EBUSY;
562  	hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
563  		if (link_exiting->link.prog != link->link.prog)
564  			continue;
565  		/* prog already linked */
566  		return -EBUSY;
567  	}
568  
569  	hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
570  	tr->progs_cnt[kind]++;
571  	err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
572  	if (err) {
573  		hlist_del_init(&link->tramp_hlist);
574  		tr->progs_cnt[kind]--;
575  	}
576  	return err;
577  }
578  
579  int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
580  {
581  	int err;
582  
583  	mutex_lock(&tr->mutex);
584  	err = __bpf_trampoline_link_prog(link, tr);
585  	mutex_unlock(&tr->mutex);
586  	return err;
587  }
588  
589  static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
590  {
591  	enum bpf_tramp_prog_type kind;
592  	int err;
593  
594  	kind = bpf_attach_type_to_tramp(link->link.prog);
595  	if (kind == BPF_TRAMP_REPLACE) {
596  		WARN_ON_ONCE(!tr->extension_prog);
597  		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
598  					 tr->extension_prog->bpf_func, NULL);
599  		tr->extension_prog = NULL;
600  		return err;
601  	}
602  	hlist_del_init(&link->tramp_hlist);
603  	tr->progs_cnt[kind]--;
604  	return bpf_trampoline_update(tr, true /* lock_direct_mutex */);
605  }
606  
607  /* bpf_trampoline_unlink_prog() should never fail. */
608  int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
609  {
610  	int err;
611  
612  	mutex_lock(&tr->mutex);
613  	err = __bpf_trampoline_unlink_prog(link, tr);
614  	mutex_unlock(&tr->mutex);
615  	return err;
616  }
617  
618  #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
619  static void bpf_shim_tramp_link_release(struct bpf_link *link)
620  {
621  	struct bpf_shim_tramp_link *shim_link =
622  		container_of(link, struct bpf_shim_tramp_link, link.link);
623  
624  	/* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */
625  	if (!shim_link->trampoline)
626  		return;
627  
628  	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline));
629  	bpf_trampoline_put(shim_link->trampoline);
630  }
631  
632  static void bpf_shim_tramp_link_dealloc(struct bpf_link *link)
633  {
634  	struct bpf_shim_tramp_link *shim_link =
635  		container_of(link, struct bpf_shim_tramp_link, link.link);
636  
637  	kfree(shim_link);
638  }
639  
640  static const struct bpf_link_ops bpf_shim_tramp_link_lops = {
641  	.release = bpf_shim_tramp_link_release,
642  	.dealloc = bpf_shim_tramp_link_dealloc,
643  };
644  
645  static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
646  						     bpf_func_t bpf_func,
647  						     int cgroup_atype)
648  {
649  	struct bpf_shim_tramp_link *shim_link = NULL;
650  	struct bpf_prog *p;
651  
652  	shim_link = kzalloc(sizeof(*shim_link), GFP_USER);
653  	if (!shim_link)
654  		return NULL;
655  
656  	p = bpf_prog_alloc(1, 0);
657  	if (!p) {
658  		kfree(shim_link);
659  		return NULL;
660  	}
661  
662  	p->jited = false;
663  	p->bpf_func = bpf_func;
664  
665  	p->aux->cgroup_atype = cgroup_atype;
666  	p->aux->attach_func_proto = prog->aux->attach_func_proto;
667  	p->aux->attach_btf_id = prog->aux->attach_btf_id;
668  	p->aux->attach_btf = prog->aux->attach_btf;
669  	btf_get(p->aux->attach_btf);
670  	p->type = BPF_PROG_TYPE_LSM;
671  	p->expected_attach_type = BPF_LSM_MAC;
672  	bpf_prog_inc(p);
673  	bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC,
674  		      &bpf_shim_tramp_link_lops, p);
675  	bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype);
676  
677  	return shim_link;
678  }
679  
680  static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
681  						    bpf_func_t bpf_func)
682  {
683  	struct bpf_tramp_link *link;
684  	int kind;
685  
686  	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
687  		hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
688  			struct bpf_prog *p = link->link.prog;
689  
690  			if (p->bpf_func == bpf_func)
691  				return container_of(link, struct bpf_shim_tramp_link, link);
692  		}
693  	}
694  
695  	return NULL;
696  }
697  
698  int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
699  				    int cgroup_atype)
700  {
701  	struct bpf_shim_tramp_link *shim_link = NULL;
702  	struct bpf_attach_target_info tgt_info = {};
703  	struct bpf_trampoline *tr;
704  	bpf_func_t bpf_func;
705  	u64 key;
706  	int err;
707  
708  	err = bpf_check_attach_target(NULL, prog, NULL,
709  				      prog->aux->attach_btf_id,
710  				      &tgt_info);
711  	if (err)
712  		return err;
713  
714  	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
715  					 prog->aux->attach_btf_id);
716  
717  	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
718  	tr = bpf_trampoline_get(key, &tgt_info);
719  	if (!tr)
720  		return  -ENOMEM;
721  
722  	mutex_lock(&tr->mutex);
723  
724  	shim_link = cgroup_shim_find(tr, bpf_func);
725  	if (shim_link) {
726  		/* Reusing existing shim attached by the other program. */
727  		bpf_link_inc(&shim_link->link.link);
728  
729  		mutex_unlock(&tr->mutex);
730  		bpf_trampoline_put(tr); /* bpf_trampoline_get above */
731  		return 0;
732  	}
733  
734  	/* Allocate and install new shim. */
735  
736  	shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype);
737  	if (!shim_link) {
738  		err = -ENOMEM;
739  		goto err;
740  	}
741  
742  	err = __bpf_trampoline_link_prog(&shim_link->link, tr);
743  	if (err)
744  		goto err;
745  
746  	shim_link->trampoline = tr;
747  	/* note, we're still holding tr refcnt from above */
748  
749  	mutex_unlock(&tr->mutex);
750  
751  	return 0;
752  err:
753  	mutex_unlock(&tr->mutex);
754  
755  	if (shim_link)
756  		bpf_link_put(&shim_link->link.link);
757  
758  	/* have to release tr while _not_ holding its mutex */
759  	bpf_trampoline_put(tr); /* bpf_trampoline_get above */
760  
761  	return err;
762  }
763  
764  void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
765  {
766  	struct bpf_shim_tramp_link *shim_link = NULL;
767  	struct bpf_trampoline *tr;
768  	bpf_func_t bpf_func;
769  	u64 key;
770  
771  	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
772  					 prog->aux->attach_btf_id);
773  
774  	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
775  	tr = bpf_trampoline_lookup(key);
776  	if (WARN_ON_ONCE(!tr))
777  		return;
778  
779  	mutex_lock(&tr->mutex);
780  	shim_link = cgroup_shim_find(tr, bpf_func);
781  	mutex_unlock(&tr->mutex);
782  
783  	if (shim_link)
784  		bpf_link_put(&shim_link->link.link);
785  
786  	bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */
787  }
788  #endif
789  
790  struct bpf_trampoline *bpf_trampoline_get(u64 key,
791  					  struct bpf_attach_target_info *tgt_info)
792  {
793  	struct bpf_trampoline *tr;
794  
795  	tr = bpf_trampoline_lookup(key);
796  	if (!tr)
797  		return NULL;
798  
799  	mutex_lock(&tr->mutex);
800  	if (tr->func.addr)
801  		goto out;
802  
803  	memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
804  	tr->func.addr = (void *)tgt_info->tgt_addr;
805  out:
806  	mutex_unlock(&tr->mutex);
807  	return tr;
808  }
809  
810  void bpf_trampoline_put(struct bpf_trampoline *tr)
811  {
812  	int i;
813  
814  	if (!tr)
815  		return;
816  	mutex_lock(&trampoline_mutex);
817  	if (!refcount_dec_and_test(&tr->refcnt))
818  		goto out;
819  	WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
820  
821  	for (i = 0; i < BPF_TRAMP_MAX; i++)
822  		if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
823  			goto out;
824  
825  	/* This code will be executed even when the last bpf_tramp_image
826  	 * is alive. All progs are detached from the trampoline and the
827  	 * trampoline image is patched with jmp into epilogue to skip
828  	 * fexit progs. The fentry-only trampoline will be freed via
829  	 * multiple rcu callbacks.
830  	 */
831  	hlist_del(&tr->hlist);
832  	if (tr->fops) {
833  		ftrace_free_filter(tr->fops);
834  		kfree(tr->fops);
835  	}
836  	kfree(tr);
837  out:
838  	mutex_unlock(&trampoline_mutex);
839  }
840  
841  #define NO_START_TIME 1
842  static __always_inline u64 notrace bpf_prog_start_time(void)
843  {
844  	u64 start = NO_START_TIME;
845  
846  	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
847  		start = sched_clock();
848  		if (unlikely(!start))
849  			start = NO_START_TIME;
850  	}
851  	return start;
852  }
853  
854  /* The logic is similar to bpf_prog_run(), but with an explicit
855   * rcu_read_lock() and migrate_disable() which are required
856   * for the trampoline. The macro is split into
857   * call __bpf_prog_enter
858   * call prog->bpf_func
859   * call __bpf_prog_exit
860   *
861   * __bpf_prog_enter returns:
862   * 0 - skip execution of the bpf prog
863   * 1 - execute bpf prog
864   * [2..MAX_U64] - execute bpf prog and record execution time.
865   *     This is start time.
866   */
867  u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
868  	__acquires(RCU)
869  {
870  	rcu_read_lock();
871  	migrate_disable();
872  
873  	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
874  
875  	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
876  		bpf_prog_inc_misses_counter(prog);
877  		return 0;
878  	}
879  	return bpf_prog_start_time();
880  }
881  
882  static void notrace update_prog_stats(struct bpf_prog *prog,
883  				      u64 start)
884  {
885  	struct bpf_prog_stats *stats;
886  
887  	if (static_branch_unlikely(&bpf_stats_enabled_key) &&
888  	    /* static_key could be enabled in __bpf_prog_enter*
889  	     * and disabled in __bpf_prog_exit*.
890  	     * And vice versa.
891  	     * Hence check that 'start' is valid.
892  	     */
893  	    start > NO_START_TIME) {
894  		unsigned long flags;
895  
896  		stats = this_cpu_ptr(prog->stats);
897  		flags = u64_stats_update_begin_irqsave(&stats->syncp);
898  		u64_stats_inc(&stats->cnt);
899  		u64_stats_add(&stats->nsecs, sched_clock() - start);
900  		u64_stats_update_end_irqrestore(&stats->syncp, flags);
901  	}
902  }
903  
904  void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx)
905  	__releases(RCU)
906  {
907  	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
908  
909  	update_prog_stats(prog, start);
910  	this_cpu_dec(*(prog->active));
911  	migrate_enable();
912  	rcu_read_unlock();
913  }
914  
915  u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
916  					struct bpf_tramp_run_ctx *run_ctx)
917  	__acquires(RCU)
918  {
919  	/* Runtime stats are exported via actual BPF_LSM_CGROUP
920  	 * programs, not the shims.
921  	 */
922  	rcu_read_lock();
923  	migrate_disable();
924  
925  	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
926  
927  	return NO_START_TIME;
928  }
929  
930  void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
931  					struct bpf_tramp_run_ctx *run_ctx)
932  	__releases(RCU)
933  {
934  	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
935  
936  	migrate_enable();
937  	rcu_read_unlock();
938  }
939  
940  u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
941  {
942  	rcu_read_lock_trace();
943  	migrate_disable();
944  	might_fault();
945  
946  	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
947  		bpf_prog_inc_misses_counter(prog);
948  		return 0;
949  	}
950  
951  	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
952  
953  	return bpf_prog_start_time();
954  }
955  
956  void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
957  				       struct bpf_tramp_run_ctx *run_ctx)
958  {
959  	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
960  
961  	update_prog_stats(prog, start);
962  	this_cpu_dec(*(prog->active));
963  	migrate_enable();
964  	rcu_read_unlock_trace();
965  }
966  
967  u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
968  					struct bpf_tramp_run_ctx *run_ctx)
969  	__acquires(RCU)
970  {
971  	rcu_read_lock();
972  	migrate_disable();
973  
974  	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
975  
976  	return bpf_prog_start_time();
977  }
978  
979  void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start,
980  					struct bpf_tramp_run_ctx *run_ctx)
981  	__releases(RCU)
982  {
983  	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
984  
985  	update_prog_stats(prog, start);
986  	migrate_enable();
987  	rcu_read_unlock();
988  }
989  
990  void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
991  {
992  	percpu_ref_get(&tr->pcref);
993  }
994  
995  void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
996  {
997  	percpu_ref_put(&tr->pcref);
998  }
999  
1000  int __weak
1001  arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
1002  			    const struct btf_func_model *m, u32 flags,
1003  			    struct bpf_tramp_links *tlinks,
1004  			    void *orig_call)
1005  {
1006  	return -ENOTSUPP;
1007  }
1008  
1009  static int __init init_trampolines(void)
1010  {
1011  	int i;
1012  
1013  	for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
1014  		INIT_HLIST_HEAD(&trampoline_table[i]);
1015  	return 0;
1016  }
1017  late_initcall(init_trampolines);
1018