1 /* 2 * Minimal file system backend for holding eBPF maps and programs, 3 * used by bpf(2) object pinning. 4 * 5 * Authors: 6 * 7 * Daniel Borkmann <daniel@iogearbox.net> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * version 2 as published by the Free Software Foundation. 12 */ 13 14 #include <linux/init.h> 15 #include <linux/magic.h> 16 #include <linux/major.h> 17 #include <linux/mount.h> 18 #include <linux/namei.h> 19 #include <linux/fs.h> 20 #include <linux/kdev_t.h> 21 #include <linux/parser.h> 22 #include <linux/filter.h> 23 #include <linux/bpf.h> 24 25 enum bpf_type { 26 BPF_TYPE_UNSPEC = 0, 27 BPF_TYPE_PROG, 28 BPF_TYPE_MAP, 29 }; 30 31 static void *bpf_any_get(void *raw, enum bpf_type type) 32 { 33 switch (type) { 34 case BPF_TYPE_PROG: 35 raw = bpf_prog_inc(raw); 36 break; 37 case BPF_TYPE_MAP: 38 raw = bpf_map_inc(raw, true); 39 break; 40 default: 41 WARN_ON_ONCE(1); 42 break; 43 } 44 45 return raw; 46 } 47 48 static void bpf_any_put(void *raw, enum bpf_type type) 49 { 50 switch (type) { 51 case BPF_TYPE_PROG: 52 bpf_prog_put(raw); 53 break; 54 case BPF_TYPE_MAP: 55 bpf_map_put_with_uref(raw); 56 break; 57 default: 58 WARN_ON_ONCE(1); 59 break; 60 } 61 } 62 63 static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) 64 { 65 void *raw; 66 67 *type = BPF_TYPE_MAP; 68 raw = bpf_map_get_with_uref(ufd); 69 if (IS_ERR(raw)) { 70 *type = BPF_TYPE_PROG; 71 raw = bpf_prog_get(ufd); 72 } 73 74 return raw; 75 } 76 77 static const struct inode_operations bpf_dir_iops; 78 79 static const struct inode_operations bpf_prog_iops = { }; 80 static const struct inode_operations bpf_map_iops = { }; 81 82 static struct inode *bpf_get_inode(struct super_block *sb, 83 const struct inode *dir, 84 umode_t mode) 85 { 86 struct inode *inode; 87 88 switch (mode & S_IFMT) { 89 case S_IFDIR: 90 case S_IFREG: 91 case S_IFLNK: 92 break; 93 default: 94 return ERR_PTR(-EINVAL); 95 } 96 97 inode = new_inode(sb); 98 if (!inode) 99 return ERR_PTR(-ENOSPC); 100 101 inode->i_ino = get_next_ino(); 102 inode->i_atime = current_time(inode); 103 inode->i_mtime = inode->i_atime; 104 inode->i_ctime = inode->i_atime; 105 106 inode_init_owner(inode, dir, mode); 107 108 return inode; 109 } 110 111 static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) 112 { 113 *type = BPF_TYPE_UNSPEC; 114 if (inode->i_op == &bpf_prog_iops) 115 *type = BPF_TYPE_PROG; 116 else if (inode->i_op == &bpf_map_iops) 117 *type = BPF_TYPE_MAP; 118 else 119 return -EACCES; 120 121 return 0; 122 } 123 124 static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, 125 struct inode *dir) 126 { 127 d_instantiate(dentry, inode); 128 dget(dentry); 129 130 dir->i_mtime = current_time(dir); 131 dir->i_ctime = dir->i_mtime; 132 } 133 134 static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 135 { 136 struct inode *inode; 137 138 inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); 139 if (IS_ERR(inode)) 140 return PTR_ERR(inode); 141 142 inode->i_op = &bpf_dir_iops; 143 inode->i_fop = &simple_dir_operations; 144 145 inc_nlink(inode); 146 inc_nlink(dir); 147 148 bpf_dentry_finalize(dentry, inode, dir); 149 return 0; 150 } 151 152 static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, 153 umode_t mode, const struct inode_operations *iops) 154 { 155 struct inode *inode; 156 157 inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); 158 if (IS_ERR(inode)) 159 return PTR_ERR(inode); 160 161 inode->i_op = iops; 162 inode->i_private = dentry->d_fsdata; 163 164 bpf_dentry_finalize(dentry, inode, dir); 165 return 0; 166 } 167 168 static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, 169 dev_t devt) 170 { 171 enum bpf_type type = MINOR(devt); 172 173 if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) || 174 dentry->d_fsdata == NULL) 175 return -EPERM; 176 177 switch (type) { 178 case BPF_TYPE_PROG: 179 return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops); 180 case BPF_TYPE_MAP: 181 return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops); 182 default: 183 return -EPERM; 184 } 185 } 186 187 static struct dentry * 188 bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) 189 { 190 if (strchr(dentry->d_name.name, '.')) 191 return ERR_PTR(-EPERM); 192 193 return simple_lookup(dir, dentry, flags); 194 } 195 196 static int bpf_symlink(struct inode *dir, struct dentry *dentry, 197 const char *target) 198 { 199 char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); 200 struct inode *inode; 201 202 if (!link) 203 return -ENOMEM; 204 205 inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK); 206 if (IS_ERR(inode)) { 207 kfree(link); 208 return PTR_ERR(inode); 209 } 210 211 inode->i_op = &simple_symlink_inode_operations; 212 inode->i_link = link; 213 214 bpf_dentry_finalize(dentry, inode, dir); 215 return 0; 216 } 217 218 static const struct inode_operations bpf_dir_iops = { 219 .lookup = bpf_lookup, 220 .mknod = bpf_mkobj, 221 .mkdir = bpf_mkdir, 222 .symlink = bpf_symlink, 223 .rmdir = simple_rmdir, 224 .rename = simple_rename, 225 .link = simple_link, 226 .unlink = simple_unlink, 227 }; 228 229 static int bpf_obj_do_pin(const struct filename *pathname, void *raw, 230 enum bpf_type type) 231 { 232 struct dentry *dentry; 233 struct inode *dir; 234 struct path path; 235 umode_t mode; 236 dev_t devt; 237 int ret; 238 239 dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); 240 if (IS_ERR(dentry)) 241 return PTR_ERR(dentry); 242 243 mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); 244 devt = MKDEV(UNNAMED_MAJOR, type); 245 246 ret = security_path_mknod(&path, dentry, mode, devt); 247 if (ret) 248 goto out; 249 250 dir = d_inode(path.dentry); 251 if (dir->i_op != &bpf_dir_iops) { 252 ret = -EPERM; 253 goto out; 254 } 255 256 dentry->d_fsdata = raw; 257 ret = vfs_mknod(dir, dentry, mode, devt); 258 dentry->d_fsdata = NULL; 259 out: 260 done_path_create(&path, dentry); 261 return ret; 262 } 263 264 int bpf_obj_pin_user(u32 ufd, const char __user *pathname) 265 { 266 struct filename *pname; 267 enum bpf_type type; 268 void *raw; 269 int ret; 270 271 pname = getname(pathname); 272 if (IS_ERR(pname)) 273 return PTR_ERR(pname); 274 275 raw = bpf_fd_probe_obj(ufd, &type); 276 if (IS_ERR(raw)) { 277 ret = PTR_ERR(raw); 278 goto out; 279 } 280 281 ret = bpf_obj_do_pin(pname, raw, type); 282 if (ret != 0) 283 bpf_any_put(raw, type); 284 out: 285 putname(pname); 286 return ret; 287 } 288 289 static void *bpf_obj_do_get(const struct filename *pathname, 290 enum bpf_type *type) 291 { 292 struct inode *inode; 293 struct path path; 294 void *raw; 295 int ret; 296 297 ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path); 298 if (ret) 299 return ERR_PTR(ret); 300 301 inode = d_backing_inode(path.dentry); 302 ret = inode_permission(inode, MAY_WRITE); 303 if (ret) 304 goto out; 305 306 ret = bpf_inode_type(inode, type); 307 if (ret) 308 goto out; 309 310 raw = bpf_any_get(inode->i_private, *type); 311 if (!IS_ERR(raw)) 312 touch_atime(&path); 313 314 path_put(&path); 315 return raw; 316 out: 317 path_put(&path); 318 return ERR_PTR(ret); 319 } 320 321 int bpf_obj_get_user(const char __user *pathname) 322 { 323 enum bpf_type type = BPF_TYPE_UNSPEC; 324 struct filename *pname; 325 int ret = -ENOENT; 326 void *raw; 327 328 pname = getname(pathname); 329 if (IS_ERR(pname)) 330 return PTR_ERR(pname); 331 332 raw = bpf_obj_do_get(pname, &type); 333 if (IS_ERR(raw)) { 334 ret = PTR_ERR(raw); 335 goto out; 336 } 337 338 if (type == BPF_TYPE_PROG) 339 ret = bpf_prog_new_fd(raw); 340 else if (type == BPF_TYPE_MAP) 341 ret = bpf_map_new_fd(raw); 342 else 343 goto out; 344 345 if (ret < 0) 346 bpf_any_put(raw, type); 347 out: 348 putname(pname); 349 return ret; 350 } 351 352 static void bpf_evict_inode(struct inode *inode) 353 { 354 enum bpf_type type; 355 356 truncate_inode_pages_final(&inode->i_data); 357 clear_inode(inode); 358 359 if (S_ISLNK(inode->i_mode)) 360 kfree(inode->i_link); 361 if (!bpf_inode_type(inode, &type)) 362 bpf_any_put(inode->i_private, type); 363 } 364 365 static const struct super_operations bpf_super_ops = { 366 .statfs = simple_statfs, 367 .drop_inode = generic_delete_inode, 368 .show_options = generic_show_options, 369 .evict_inode = bpf_evict_inode, 370 }; 371 372 enum { 373 OPT_MODE, 374 OPT_ERR, 375 }; 376 377 static const match_table_t bpf_mount_tokens = { 378 { OPT_MODE, "mode=%o" }, 379 { OPT_ERR, NULL }, 380 }; 381 382 struct bpf_mount_opts { 383 umode_t mode; 384 }; 385 386 static int bpf_parse_options(char *data, struct bpf_mount_opts *opts) 387 { 388 substring_t args[MAX_OPT_ARGS]; 389 int option, token; 390 char *ptr; 391 392 opts->mode = S_IRWXUGO; 393 394 while ((ptr = strsep(&data, ",")) != NULL) { 395 if (!*ptr) 396 continue; 397 398 token = match_token(ptr, bpf_mount_tokens, args); 399 switch (token) { 400 case OPT_MODE: 401 if (match_octal(&args[0], &option)) 402 return -EINVAL; 403 opts->mode = option & S_IALLUGO; 404 break; 405 /* We might like to report bad mount options here, but 406 * traditionally we've ignored all mount options, so we'd 407 * better continue to ignore non-existing options for bpf. 408 */ 409 } 410 } 411 412 return 0; 413 } 414 415 static int bpf_fill_super(struct super_block *sb, void *data, int silent) 416 { 417 static struct tree_descr bpf_rfiles[] = { { "" } }; 418 struct bpf_mount_opts opts; 419 struct inode *inode; 420 int ret; 421 422 save_mount_options(sb, data); 423 424 ret = bpf_parse_options(data, &opts); 425 if (ret) 426 return ret; 427 428 ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); 429 if (ret) 430 return ret; 431 432 sb->s_op = &bpf_super_ops; 433 434 inode = sb->s_root->d_inode; 435 inode->i_op = &bpf_dir_iops; 436 inode->i_mode &= ~S_IALLUGO; 437 inode->i_mode |= S_ISVTX | opts.mode; 438 439 return 0; 440 } 441 442 static struct dentry *bpf_mount(struct file_system_type *type, int flags, 443 const char *dev_name, void *data) 444 { 445 return mount_nodev(type, flags, data, bpf_fill_super); 446 } 447 448 static struct file_system_type bpf_fs_type = { 449 .owner = THIS_MODULE, 450 .name = "bpf", 451 .mount = bpf_mount, 452 .kill_sb = kill_litter_super, 453 }; 454 455 static int __init bpf_init(void) 456 { 457 int ret; 458 459 ret = sysfs_create_mount_point(fs_kobj, "bpf"); 460 if (ret) 461 return ret; 462 463 ret = register_filesystem(&bpf_fs_type); 464 if (ret) 465 sysfs_remove_mount_point(fs_kobj, "bpf"); 466 467 return ret; 468 } 469 fs_initcall(bpf_init); 470