1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2019 Facebook */ 3 #include <linux/hash.h> 4 #include <linux/bpf.h> 5 #include <linux/filter.h> 6 #include <linux/ftrace.h> 7 #include <linux/rbtree_latch.h> 8 #include <linux/perf_event.h> 9 #include <linux/btf.h> 10 11 /* dummy _ops. The verifier will operate on target program's ops. */ 12 const struct bpf_verifier_ops bpf_extension_verifier_ops = { 13 }; 14 const struct bpf_prog_ops bpf_extension_prog_ops = { 15 }; 16 17 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */ 18 #define TRAMPOLINE_HASH_BITS 10 19 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) 20 21 static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; 22 23 /* serializes access to trampoline_table */ 24 static DEFINE_MUTEX(trampoline_mutex); 25 26 void *bpf_jit_alloc_exec_page(void) 27 { 28 void *image; 29 30 image = bpf_jit_alloc_exec(PAGE_SIZE); 31 if (!image) 32 return NULL; 33 34 set_vm_flush_reset_perms(image); 35 /* Keep image as writeable. The alternative is to keep flipping ro/rw 36 * everytime new program is attached or detached. 37 */ 38 set_memory_x((long)image, 1); 39 return image; 40 } 41 42 void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym) 43 { 44 ksym->start = (unsigned long) data; 45 ksym->end = ksym->start + PAGE_SIZE; 46 bpf_ksym_add(ksym); 47 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, 48 PAGE_SIZE, false, ksym->name); 49 } 50 51 void bpf_image_ksym_del(struct bpf_ksym *ksym) 52 { 53 bpf_ksym_del(ksym); 54 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, 55 PAGE_SIZE, true, ksym->name); 56 } 57 58 static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr) 59 { 60 struct bpf_ksym *ksym = &tr->ksym; 61 62 snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key); 63 bpf_image_ksym_add(tr->image, ksym); 64 } 65 66 struct bpf_trampoline *bpf_trampoline_lookup(u64 key) 67 { 68 struct bpf_trampoline *tr; 69 struct hlist_head *head; 70 void *image; 71 int i; 72 73 mutex_lock(&trampoline_mutex); 74 head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)]; 75 hlist_for_each_entry(tr, head, hlist) { 76 if (tr->key == key) { 77 refcount_inc(&tr->refcnt); 78 goto out; 79 } 80 } 81 tr = kzalloc(sizeof(*tr), GFP_KERNEL); 82 if (!tr) 83 goto out; 84 85 /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ 86 image = bpf_jit_alloc_exec_page(); 87 if (!image) { 88 kfree(tr); 89 tr = NULL; 90 goto out; 91 } 92 93 tr->key = key; 94 INIT_HLIST_NODE(&tr->hlist); 95 hlist_add_head(&tr->hlist, head); 96 refcount_set(&tr->refcnt, 1); 97 mutex_init(&tr->mutex); 98 for (i = 0; i < BPF_TRAMP_MAX; i++) 99 INIT_HLIST_HEAD(&tr->progs_hlist[i]); 100 tr->image = image; 101 INIT_LIST_HEAD_RCU(&tr->ksym.lnode); 102 bpf_trampoline_ksym_add(tr); 103 out: 104 mutex_unlock(&trampoline_mutex); 105 return tr; 106 } 107 108 static int is_ftrace_location(void *ip) 109 { 110 long addr; 111 112 addr = ftrace_location((long)ip); 113 if (!addr) 114 return 0; 115 if (WARN_ON_ONCE(addr != (long)ip)) 116 return -EFAULT; 117 return 1; 118 } 119 120 static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) 121 { 122 void *ip = tr->func.addr; 123 int ret; 124 125 if (tr->func.ftrace_managed) 126 ret = unregister_ftrace_direct((long)ip, (long)old_addr); 127 else 128 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); 129 return ret; 130 } 131 132 static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr) 133 { 134 void *ip = tr->func.addr; 135 int ret; 136 137 if (tr->func.ftrace_managed) 138 ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr); 139 else 140 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr); 141 return ret; 142 } 143 144 /* first time registering */ 145 static int register_fentry(struct bpf_trampoline *tr, void *new_addr) 146 { 147 void *ip = tr->func.addr; 148 int ret; 149 150 ret = is_ftrace_location(ip); 151 if (ret < 0) 152 return ret; 153 tr->func.ftrace_managed = ret; 154 155 if (tr->func.ftrace_managed) 156 ret = register_ftrace_direct((long)ip, (long)new_addr); 157 else 158 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); 159 return ret; 160 } 161 162 static struct bpf_tramp_progs * 163 bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total) 164 { 165 const struct bpf_prog_aux *aux; 166 struct bpf_tramp_progs *tprogs; 167 struct bpf_prog **progs; 168 int kind; 169 170 *total = 0; 171 tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); 172 if (!tprogs) 173 return ERR_PTR(-ENOMEM); 174 175 for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { 176 tprogs[kind].nr_progs = tr->progs_cnt[kind]; 177 *total += tr->progs_cnt[kind]; 178 progs = tprogs[kind].progs; 179 180 hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) 181 *progs++ = aux->prog; 182 } 183 return tprogs; 184 } 185 186 static int bpf_trampoline_update(struct bpf_trampoline *tr) 187 { 188 void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2; 189 void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2; 190 struct bpf_tramp_progs *tprogs; 191 u32 flags = BPF_TRAMP_F_RESTORE_REGS; 192 int err, total; 193 194 tprogs = bpf_trampoline_get_progs(tr, &total); 195 if (IS_ERR(tprogs)) 196 return PTR_ERR(tprogs); 197 198 if (total == 0) { 199 err = unregister_fentry(tr, old_image); 200 tr->selector = 0; 201 goto out; 202 } 203 204 if (tprogs[BPF_TRAMP_FEXIT].nr_progs || 205 tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) 206 flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; 207 208 /* Though the second half of trampoline page is unused a task could be 209 * preempted in the middle of the first half of trampoline and two 210 * updates to trampoline would change the code from underneath the 211 * preempted task. Hence wait for tasks to voluntarily schedule or go 212 * to userspace. 213 */ 214 215 synchronize_rcu_tasks(); 216 217 err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2, 218 &tr->func.model, flags, tprogs, 219 tr->func.addr); 220 if (err < 0) 221 goto out; 222 223 if (tr->selector) 224 /* progs already running at this address */ 225 err = modify_fentry(tr, old_image, new_image); 226 else 227 /* first time registering */ 228 err = register_fentry(tr, new_image); 229 if (err) 230 goto out; 231 tr->selector++; 232 out: 233 kfree(tprogs); 234 return err; 235 } 236 237 static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) 238 { 239 switch (prog->expected_attach_type) { 240 case BPF_TRACE_FENTRY: 241 return BPF_TRAMP_FENTRY; 242 case BPF_MODIFY_RETURN: 243 return BPF_TRAMP_MODIFY_RETURN; 244 case BPF_TRACE_FEXIT: 245 return BPF_TRAMP_FEXIT; 246 case BPF_LSM_MAC: 247 if (!prog->aux->attach_func_proto->type) 248 /* The function returns void, we cannot modify its 249 * return value. 250 */ 251 return BPF_TRAMP_FEXIT; 252 else 253 return BPF_TRAMP_MODIFY_RETURN; 254 default: 255 return BPF_TRAMP_REPLACE; 256 } 257 } 258 259 int bpf_trampoline_link_prog(struct bpf_prog *prog) 260 { 261 enum bpf_tramp_prog_type kind; 262 struct bpf_trampoline *tr; 263 int err = 0; 264 int cnt; 265 266 tr = prog->aux->trampoline; 267 kind = bpf_attach_type_to_tramp(prog); 268 mutex_lock(&tr->mutex); 269 if (tr->extension_prog) { 270 /* cannot attach fentry/fexit if extension prog is attached. 271 * cannot overwrite extension prog either. 272 */ 273 err = -EBUSY; 274 goto out; 275 } 276 cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]; 277 if (kind == BPF_TRAMP_REPLACE) { 278 /* Cannot attach extension if fentry/fexit are in use. */ 279 if (cnt) { 280 err = -EBUSY; 281 goto out; 282 } 283 tr->extension_prog = prog; 284 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, 285 prog->bpf_func); 286 goto out; 287 } 288 if (cnt >= BPF_MAX_TRAMP_PROGS) { 289 err = -E2BIG; 290 goto out; 291 } 292 if (!hlist_unhashed(&prog->aux->tramp_hlist)) { 293 /* prog already linked */ 294 err = -EBUSY; 295 goto out; 296 } 297 hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]); 298 tr->progs_cnt[kind]++; 299 err = bpf_trampoline_update(prog->aux->trampoline); 300 if (err) { 301 hlist_del(&prog->aux->tramp_hlist); 302 tr->progs_cnt[kind]--; 303 } 304 out: 305 mutex_unlock(&tr->mutex); 306 return err; 307 } 308 309 /* bpf_trampoline_unlink_prog() should never fail. */ 310 int bpf_trampoline_unlink_prog(struct bpf_prog *prog) 311 { 312 enum bpf_tramp_prog_type kind; 313 struct bpf_trampoline *tr; 314 int err; 315 316 tr = prog->aux->trampoline; 317 kind = bpf_attach_type_to_tramp(prog); 318 mutex_lock(&tr->mutex); 319 if (kind == BPF_TRAMP_REPLACE) { 320 WARN_ON_ONCE(!tr->extension_prog); 321 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, 322 tr->extension_prog->bpf_func, NULL); 323 tr->extension_prog = NULL; 324 goto out; 325 } 326 hlist_del(&prog->aux->tramp_hlist); 327 tr->progs_cnt[kind]--; 328 err = bpf_trampoline_update(prog->aux->trampoline); 329 out: 330 mutex_unlock(&tr->mutex); 331 return err; 332 } 333 334 void bpf_trampoline_put(struct bpf_trampoline *tr) 335 { 336 if (!tr) 337 return; 338 mutex_lock(&trampoline_mutex); 339 if (!refcount_dec_and_test(&tr->refcnt)) 340 goto out; 341 WARN_ON_ONCE(mutex_is_locked(&tr->mutex)); 342 if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY]))) 343 goto out; 344 if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) 345 goto out; 346 bpf_image_ksym_del(&tr->ksym); 347 /* wait for tasks to get out of trampoline before freeing it */ 348 synchronize_rcu_tasks(); 349 bpf_jit_free_exec(tr->image); 350 hlist_del(&tr->hlist); 351 kfree(tr); 352 out: 353 mutex_unlock(&trampoline_mutex); 354 } 355 356 /* The logic is similar to BPF_PROG_RUN, but with an explicit 357 * rcu_read_lock() and migrate_disable() which are required 358 * for the trampoline. The macro is split into 359 * call _bpf_prog_enter 360 * call prog->bpf_func 361 * call __bpf_prog_exit 362 */ 363 u64 notrace __bpf_prog_enter(void) 364 __acquires(RCU) 365 { 366 u64 start = 0; 367 368 rcu_read_lock(); 369 migrate_disable(); 370 if (static_branch_unlikely(&bpf_stats_enabled_key)) 371 start = sched_clock(); 372 return start; 373 } 374 375 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) 376 __releases(RCU) 377 { 378 struct bpf_prog_stats *stats; 379 380 if (static_branch_unlikely(&bpf_stats_enabled_key) && 381 /* static_key could be enabled in __bpf_prog_enter 382 * and disabled in __bpf_prog_exit. 383 * And vice versa. 384 * Hence check that 'start' is not zero. 385 */ 386 start) { 387 stats = this_cpu_ptr(prog->aux->stats); 388 u64_stats_update_begin(&stats->syncp); 389 stats->cnt++; 390 stats->nsecs += sched_clock() - start; 391 u64_stats_update_end(&stats->syncp); 392 } 393 migrate_enable(); 394 rcu_read_unlock(); 395 } 396 397 int __weak 398 arch_prepare_bpf_trampoline(void *image, void *image_end, 399 const struct btf_func_model *m, u32 flags, 400 struct bpf_tramp_progs *tprogs, 401 void *orig_call) 402 { 403 return -ENOTSUPP; 404 } 405 406 static int __init init_trampolines(void) 407 { 408 int i; 409 410 for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++) 411 INIT_HLIST_HEAD(&trampoline_table[i]); 412 return 0; 413 } 414 late_initcall(init_trampolines); 415