1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * inode.c - part of tracefs, a pseudo file system for activating tracing 4 * 5 * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com> 6 * 7 * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com> 8 * 9 * tracefs is the file system that is used by the tracing infrastructure. 10 */ 11 12 #include <linux/module.h> 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/kobject.h> 16 #include <linux/namei.h> 17 #include <linux/tracefs.h> 18 #include <linux/fsnotify.h> 19 #include <linux/security.h> 20 #include <linux/seq_file.h> 21 #include <linux/parser.h> 22 #include <linux/magic.h> 23 #include <linux/slab.h> 24 25 #define TRACEFS_DEFAULT_MODE 0700 26 27 static struct vfsmount *tracefs_mount; 28 static int tracefs_mount_count; 29 static bool tracefs_registered; 30 31 static ssize_t default_read_file(struct file *file, char __user *buf, 32 size_t count, loff_t *ppos) 33 { 34 return 0; 35 } 36 37 static ssize_t default_write_file(struct file *file, const char __user *buf, 38 size_t count, loff_t *ppos) 39 { 40 return count; 41 } 42 43 static const struct file_operations tracefs_file_operations = { 44 .read = default_read_file, 45 .write = default_write_file, 46 .open = simple_open, 47 .llseek = noop_llseek, 48 }; 49 50 static struct tracefs_dir_ops { 51 int (*mkdir)(const char *name); 52 int (*rmdir)(const char *name); 53 } tracefs_ops __ro_after_init; 54 55 static char *get_dname(struct dentry *dentry) 56 { 57 const char *dname; 58 char *name; 59 int len = dentry->d_name.len; 60 61 dname = dentry->d_name.name; 62 name = kmalloc(len + 1, GFP_KERNEL); 63 if (!name) 64 return NULL; 65 memcpy(name, dname, len); 66 name[len] = 0; 67 return name; 68 } 69 70 static int tracefs_syscall_mkdir(struct user_namespace *mnt_userns, 71 struct inode *inode, struct dentry *dentry, 72 umode_t mode) 73 { 74 char *name; 75 int ret; 76 77 name = get_dname(dentry); 78 if (!name) 79 return -ENOMEM; 80 81 /* 82 * The mkdir call can call the generic functions that create 83 * the files within the tracefs system. It is up to the individual 84 * mkdir routine to handle races. 85 */ 86 inode_unlock(inode); 87 ret = tracefs_ops.mkdir(name); 88 inode_lock(inode); 89 90 kfree(name); 91 92 return ret; 93 } 94 95 static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry) 96 { 97 char *name; 98 int ret; 99 100 name = get_dname(dentry); 101 if (!name) 102 return -ENOMEM; 103 104 /* 105 * The rmdir call can call the generic functions that create 106 * the files within the tracefs system. It is up to the individual 107 * rmdir routine to handle races. 108 * This time we need to unlock not only the parent (inode) but 109 * also the directory that is being deleted. 110 */ 111 inode_unlock(inode); 112 inode_unlock(d_inode(dentry)); 113 114 ret = tracefs_ops.rmdir(name); 115 116 inode_lock_nested(inode, I_MUTEX_PARENT); 117 inode_lock(d_inode(dentry)); 118 119 kfree(name); 120 121 return ret; 122 } 123 124 static const struct inode_operations tracefs_dir_inode_operations = { 125 .lookup = simple_lookup, 126 .mkdir = tracefs_syscall_mkdir, 127 .rmdir = tracefs_syscall_rmdir, 128 }; 129 130 static struct inode *tracefs_get_inode(struct super_block *sb) 131 { 132 struct inode *inode = new_inode(sb); 133 if (inode) { 134 inode->i_ino = get_next_ino(); 135 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 136 } 137 return inode; 138 } 139 140 struct tracefs_mount_opts { 141 kuid_t uid; 142 kgid_t gid; 143 umode_t mode; 144 /* Opt_* bitfield. */ 145 unsigned int opts; 146 }; 147 148 enum { 149 Opt_uid, 150 Opt_gid, 151 Opt_mode, 152 Opt_err 153 }; 154 155 static const match_table_t tokens = { 156 {Opt_uid, "uid=%u"}, 157 {Opt_gid, "gid=%u"}, 158 {Opt_mode, "mode=%o"}, 159 {Opt_err, NULL} 160 }; 161 162 struct tracefs_fs_info { 163 struct tracefs_mount_opts mount_opts; 164 }; 165 166 static void change_gid(struct dentry *dentry, kgid_t gid) 167 { 168 if (!dentry->d_inode) 169 return; 170 dentry->d_inode->i_gid = gid; 171 } 172 173 /* 174 * Taken from d_walk, but without he need for handling renames. 175 * Nothing can be renamed while walking the list, as tracefs 176 * does not support renames. This is only called when mounting 177 * or remounting the file system, to set all the files to 178 * the given gid. 179 */ 180 static void set_gid(struct dentry *parent, kgid_t gid) 181 { 182 struct dentry *this_parent; 183 struct list_head *next; 184 185 this_parent = parent; 186 spin_lock(&this_parent->d_lock); 187 188 change_gid(this_parent, gid); 189 repeat: 190 next = this_parent->d_subdirs.next; 191 resume: 192 while (next != &this_parent->d_subdirs) { 193 struct list_head *tmp = next; 194 struct dentry *dentry = list_entry(tmp, struct dentry, d_child); 195 next = tmp->next; 196 197 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 198 199 change_gid(dentry, gid); 200 201 if (!list_empty(&dentry->d_subdirs)) { 202 spin_unlock(&this_parent->d_lock); 203 spin_release(&dentry->d_lock.dep_map, _RET_IP_); 204 this_parent = dentry; 205 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 206 goto repeat; 207 } 208 spin_unlock(&dentry->d_lock); 209 } 210 /* 211 * All done at this level ... ascend and resume the search. 212 */ 213 rcu_read_lock(); 214 ascend: 215 if (this_parent != parent) { 216 struct dentry *child = this_parent; 217 this_parent = child->d_parent; 218 219 spin_unlock(&child->d_lock); 220 spin_lock(&this_parent->d_lock); 221 222 /* go into the first sibling still alive */ 223 do { 224 next = child->d_child.next; 225 if (next == &this_parent->d_subdirs) 226 goto ascend; 227 child = list_entry(next, struct dentry, d_child); 228 } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); 229 rcu_read_unlock(); 230 goto resume; 231 } 232 rcu_read_unlock(); 233 spin_unlock(&this_parent->d_lock); 234 return; 235 } 236 237 static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) 238 { 239 substring_t args[MAX_OPT_ARGS]; 240 int option; 241 int token; 242 kuid_t uid; 243 kgid_t gid; 244 char *p; 245 246 opts->opts = 0; 247 opts->mode = TRACEFS_DEFAULT_MODE; 248 249 while ((p = strsep(&data, ",")) != NULL) { 250 if (!*p) 251 continue; 252 253 token = match_token(p, tokens, args); 254 switch (token) { 255 case Opt_uid: 256 if (match_int(&args[0], &option)) 257 return -EINVAL; 258 uid = make_kuid(current_user_ns(), option); 259 if (!uid_valid(uid)) 260 return -EINVAL; 261 opts->uid = uid; 262 break; 263 case Opt_gid: 264 if (match_int(&args[0], &option)) 265 return -EINVAL; 266 gid = make_kgid(current_user_ns(), option); 267 if (!gid_valid(gid)) 268 return -EINVAL; 269 opts->gid = gid; 270 break; 271 case Opt_mode: 272 if (match_octal(&args[0], &option)) 273 return -EINVAL; 274 opts->mode = option & S_IALLUGO; 275 break; 276 /* 277 * We might like to report bad mount options here; 278 * but traditionally tracefs has ignored all mount options 279 */ 280 } 281 282 opts->opts |= BIT(token); 283 } 284 285 return 0; 286 } 287 288 static int tracefs_apply_options(struct super_block *sb, bool remount) 289 { 290 struct tracefs_fs_info *fsi = sb->s_fs_info; 291 struct inode *inode = d_inode(sb->s_root); 292 struct tracefs_mount_opts *opts = &fsi->mount_opts; 293 294 /* 295 * On remount, only reset mode/uid/gid if they were provided as mount 296 * options. 297 */ 298 299 if (!remount || opts->opts & BIT(Opt_mode)) { 300 inode->i_mode &= ~S_IALLUGO; 301 inode->i_mode |= opts->mode; 302 } 303 304 if (!remount || opts->opts & BIT(Opt_uid)) 305 inode->i_uid = opts->uid; 306 307 if (!remount || opts->opts & BIT(Opt_gid)) { 308 /* Set all the group ids to the mount option */ 309 set_gid(sb->s_root, opts->gid); 310 } 311 312 return 0; 313 } 314 315 static int tracefs_remount(struct super_block *sb, int *flags, char *data) 316 { 317 int err; 318 struct tracefs_fs_info *fsi = sb->s_fs_info; 319 320 sync_filesystem(sb); 321 err = tracefs_parse_options(data, &fsi->mount_opts); 322 if (err) 323 goto fail; 324 325 tracefs_apply_options(sb, true); 326 327 fail: 328 return err; 329 } 330 331 static int tracefs_show_options(struct seq_file *m, struct dentry *root) 332 { 333 struct tracefs_fs_info *fsi = root->d_sb->s_fs_info; 334 struct tracefs_mount_opts *opts = &fsi->mount_opts; 335 336 if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) 337 seq_printf(m, ",uid=%u", 338 from_kuid_munged(&init_user_ns, opts->uid)); 339 if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) 340 seq_printf(m, ",gid=%u", 341 from_kgid_munged(&init_user_ns, opts->gid)); 342 if (opts->mode != TRACEFS_DEFAULT_MODE) 343 seq_printf(m, ",mode=%o", opts->mode); 344 345 return 0; 346 } 347 348 static const struct super_operations tracefs_super_operations = { 349 .statfs = simple_statfs, 350 .remount_fs = tracefs_remount, 351 .show_options = tracefs_show_options, 352 }; 353 354 static int trace_fill_super(struct super_block *sb, void *data, int silent) 355 { 356 static const struct tree_descr trace_files[] = {{""}}; 357 struct tracefs_fs_info *fsi; 358 int err; 359 360 fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL); 361 sb->s_fs_info = fsi; 362 if (!fsi) { 363 err = -ENOMEM; 364 goto fail; 365 } 366 367 err = tracefs_parse_options(data, &fsi->mount_opts); 368 if (err) 369 goto fail; 370 371 err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files); 372 if (err) 373 goto fail; 374 375 sb->s_op = &tracefs_super_operations; 376 377 tracefs_apply_options(sb, false); 378 379 return 0; 380 381 fail: 382 kfree(fsi); 383 sb->s_fs_info = NULL; 384 return err; 385 } 386 387 static struct dentry *trace_mount(struct file_system_type *fs_type, 388 int flags, const char *dev_name, 389 void *data) 390 { 391 return mount_single(fs_type, flags, data, trace_fill_super); 392 } 393 394 static struct file_system_type trace_fs_type = { 395 .owner = THIS_MODULE, 396 .name = "tracefs", 397 .mount = trace_mount, 398 .kill_sb = kill_litter_super, 399 }; 400 MODULE_ALIAS_FS("tracefs"); 401 402 static struct dentry *start_creating(const char *name, struct dentry *parent) 403 { 404 struct dentry *dentry; 405 int error; 406 407 pr_debug("tracefs: creating file '%s'\n",name); 408 409 error = simple_pin_fs(&trace_fs_type, &tracefs_mount, 410 &tracefs_mount_count); 411 if (error) 412 return ERR_PTR(error); 413 414 /* If the parent is not specified, we create it in the root. 415 * We need the root dentry to do this, which is in the super 416 * block. A pointer to that is in the struct vfsmount that we 417 * have around. 418 */ 419 if (!parent) 420 parent = tracefs_mount->mnt_root; 421 422 inode_lock(d_inode(parent)); 423 if (unlikely(IS_DEADDIR(d_inode(parent)))) 424 dentry = ERR_PTR(-ENOENT); 425 else 426 dentry = lookup_one_len(name, parent, strlen(name)); 427 if (!IS_ERR(dentry) && d_inode(dentry)) { 428 dput(dentry); 429 dentry = ERR_PTR(-EEXIST); 430 } 431 432 if (IS_ERR(dentry)) { 433 inode_unlock(d_inode(parent)); 434 simple_release_fs(&tracefs_mount, &tracefs_mount_count); 435 } 436 437 return dentry; 438 } 439 440 static struct dentry *failed_creating(struct dentry *dentry) 441 { 442 inode_unlock(d_inode(dentry->d_parent)); 443 dput(dentry); 444 simple_release_fs(&tracefs_mount, &tracefs_mount_count); 445 return NULL; 446 } 447 448 static struct dentry *end_creating(struct dentry *dentry) 449 { 450 inode_unlock(d_inode(dentry->d_parent)); 451 return dentry; 452 } 453 454 /** 455 * tracefs_create_file - create a file in the tracefs filesystem 456 * @name: a pointer to a string containing the name of the file to create. 457 * @mode: the permission that the file should have. 458 * @parent: a pointer to the parent dentry for this file. This should be a 459 * directory dentry if set. If this parameter is NULL, then the 460 * file will be created in the root of the tracefs filesystem. 461 * @data: a pointer to something that the caller will want to get to later 462 * on. The inode.i_private pointer will point to this value on 463 * the open() call. 464 * @fops: a pointer to a struct file_operations that should be used for 465 * this file. 466 * 467 * This is the basic "create a file" function for tracefs. It allows for a 468 * wide range of flexibility in creating a file, or a directory (if you want 469 * to create a directory, the tracefs_create_dir() function is 470 * recommended to be used instead.) 471 * 472 * This function will return a pointer to a dentry if it succeeds. This 473 * pointer must be passed to the tracefs_remove() function when the file is 474 * to be removed (no automatic cleanup happens if your module is unloaded, 475 * you are responsible here.) If an error occurs, %NULL will be returned. 476 * 477 * If tracefs is not enabled in the kernel, the value -%ENODEV will be 478 * returned. 479 */ 480 struct dentry *tracefs_create_file(const char *name, umode_t mode, 481 struct dentry *parent, void *data, 482 const struct file_operations *fops) 483 { 484 struct dentry *dentry; 485 struct inode *inode; 486 487 if (security_locked_down(LOCKDOWN_TRACEFS)) 488 return NULL; 489 490 if (!(mode & S_IFMT)) 491 mode |= S_IFREG; 492 BUG_ON(!S_ISREG(mode)); 493 dentry = start_creating(name, parent); 494 495 if (IS_ERR(dentry)) 496 return NULL; 497 498 inode = tracefs_get_inode(dentry->d_sb); 499 if (unlikely(!inode)) 500 return failed_creating(dentry); 501 502 inode->i_mode = mode; 503 inode->i_fop = fops ? fops : &tracefs_file_operations; 504 inode->i_private = data; 505 inode->i_uid = d_inode(dentry->d_parent)->i_uid; 506 inode->i_gid = d_inode(dentry->d_parent)->i_gid; 507 d_instantiate(dentry, inode); 508 fsnotify_create(d_inode(dentry->d_parent), dentry); 509 return end_creating(dentry); 510 } 511 512 static struct dentry *__create_dir(const char *name, struct dentry *parent, 513 const struct inode_operations *ops) 514 { 515 struct dentry *dentry = start_creating(name, parent); 516 struct inode *inode; 517 518 if (IS_ERR(dentry)) 519 return NULL; 520 521 inode = tracefs_get_inode(dentry->d_sb); 522 if (unlikely(!inode)) 523 return failed_creating(dentry); 524 525 /* Do not set bits for OTH */ 526 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP; 527 inode->i_op = ops; 528 inode->i_fop = &simple_dir_operations; 529 inode->i_uid = d_inode(dentry->d_parent)->i_uid; 530 inode->i_gid = d_inode(dentry->d_parent)->i_gid; 531 532 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 533 inc_nlink(inode); 534 d_instantiate(dentry, inode); 535 inc_nlink(d_inode(dentry->d_parent)); 536 fsnotify_mkdir(d_inode(dentry->d_parent), dentry); 537 return end_creating(dentry); 538 } 539 540 /** 541 * tracefs_create_dir - create a directory in the tracefs filesystem 542 * @name: a pointer to a string containing the name of the directory to 543 * create. 544 * @parent: a pointer to the parent dentry for this file. This should be a 545 * directory dentry if set. If this parameter is NULL, then the 546 * directory will be created in the root of the tracefs filesystem. 547 * 548 * This function creates a directory in tracefs with the given name. 549 * 550 * This function will return a pointer to a dentry if it succeeds. This 551 * pointer must be passed to the tracefs_remove() function when the file is 552 * to be removed. If an error occurs, %NULL will be returned. 553 * 554 * If tracing is not enabled in the kernel, the value -%ENODEV will be 555 * returned. 556 */ 557 struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) 558 { 559 return __create_dir(name, parent, &simple_dir_inode_operations); 560 } 561 562 /** 563 * tracefs_create_instance_dir - create the tracing instances directory 564 * @name: The name of the instances directory to create 565 * @parent: The parent directory that the instances directory will exist 566 * @mkdir: The function to call when a mkdir is performed. 567 * @rmdir: The function to call when a rmdir is performed. 568 * 569 * Only one instances directory is allowed. 570 * 571 * The instances directory is special as it allows for mkdir and rmdir 572 * to be done by userspace. When a mkdir or rmdir is performed, the inode 573 * locks are released and the methods passed in (@mkdir and @rmdir) are 574 * called without locks and with the name of the directory being created 575 * within the instances directory. 576 * 577 * Returns the dentry of the instances directory. 578 */ 579 __init struct dentry *tracefs_create_instance_dir(const char *name, 580 struct dentry *parent, 581 int (*mkdir)(const char *name), 582 int (*rmdir)(const char *name)) 583 { 584 struct dentry *dentry; 585 586 /* Only allow one instance of the instances directory. */ 587 if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir)) 588 return NULL; 589 590 dentry = __create_dir(name, parent, &tracefs_dir_inode_operations); 591 if (!dentry) 592 return NULL; 593 594 tracefs_ops.mkdir = mkdir; 595 tracefs_ops.rmdir = rmdir; 596 597 return dentry; 598 } 599 600 static void remove_one(struct dentry *victim) 601 { 602 simple_release_fs(&tracefs_mount, &tracefs_mount_count); 603 } 604 605 /** 606 * tracefs_remove - recursively removes a directory 607 * @dentry: a pointer to a the dentry of the directory to be removed. 608 * 609 * This function recursively removes a directory tree in tracefs that 610 * was previously created with a call to another tracefs function 611 * (like tracefs_create_file() or variants thereof.) 612 */ 613 void tracefs_remove(struct dentry *dentry) 614 { 615 if (IS_ERR_OR_NULL(dentry)) 616 return; 617 618 simple_pin_fs(&trace_fs_type, &tracefs_mount, &tracefs_mount_count); 619 simple_recursive_removal(dentry, remove_one); 620 simple_release_fs(&tracefs_mount, &tracefs_mount_count); 621 } 622 623 /** 624 * tracefs_initialized - Tells whether tracefs has been registered 625 */ 626 bool tracefs_initialized(void) 627 { 628 return tracefs_registered; 629 } 630 631 static int __init tracefs_init(void) 632 { 633 int retval; 634 635 retval = sysfs_create_mount_point(kernel_kobj, "tracing"); 636 if (retval) 637 return -EINVAL; 638 639 retval = register_filesystem(&trace_fs_type); 640 if (!retval) 641 tracefs_registered = true; 642 643 return retval; 644 } 645 core_initcall(tracefs_init); 646