1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/proc/inode.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/cache.h> 9 #include <linux/time.h> 10 #include <linux/proc_fs.h> 11 #include <linux/kernel.h> 12 #include <linux/pid_namespace.h> 13 #include <linux/mm.h> 14 #include <linux/string.h> 15 #include <linux/stat.h> 16 #include <linux/completion.h> 17 #include <linux/poll.h> 18 #include <linux/printk.h> 19 #include <linux/file.h> 20 #include <linux/limits.h> 21 #include <linux/init.h> 22 #include <linux/module.h> 23 #include <linux/sysctl.h> 24 #include <linux/seq_file.h> 25 #include <linux/slab.h> 26 #include <linux/mount.h> 27 #include <linux/magic.h> 28 29 #include <linux/uaccess.h> 30 31 #include "internal.h" 32 33 static void proc_evict_inode(struct inode *inode) 34 { 35 struct proc_dir_entry *de; 36 struct ctl_table_header *head; 37 38 truncate_inode_pages_final(&inode->i_data); 39 clear_inode(inode); 40 41 /* Stop tracking associated processes */ 42 put_pid(PROC_I(inode)->pid); 43 44 /* Let go of any associated proc directory entry */ 45 de = PDE(inode); 46 if (de) 47 pde_put(de); 48 49 head = PROC_I(inode)->sysctl; 50 if (head) { 51 RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); 52 proc_sys_evict_inode(inode, head); 53 } 54 } 55 56 static struct kmem_cache *proc_inode_cachep __ro_after_init; 57 static struct kmem_cache *pde_opener_cache __ro_after_init; 58 59 static struct inode *proc_alloc_inode(struct super_block *sb) 60 { 61 struct proc_inode *ei; 62 63 ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL); 64 if (!ei) 65 return NULL; 66 ei->pid = NULL; 67 ei->fd = 0; 68 ei->op.proc_get_link = NULL; 69 ei->pde = NULL; 70 ei->sysctl = NULL; 71 ei->sysctl_entry = NULL; 72 ei->ns_ops = NULL; 73 return &ei->vfs_inode; 74 } 75 76 static void proc_i_callback(struct rcu_head *head) 77 { 78 struct inode *inode = container_of(head, struct inode, i_rcu); 79 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 80 } 81 82 static void proc_destroy_inode(struct inode *inode) 83 { 84 call_rcu(&inode->i_rcu, proc_i_callback); 85 } 86 87 static void init_once(void *foo) 88 { 89 struct proc_inode *ei = (struct proc_inode *) foo; 90 91 inode_init_once(&ei->vfs_inode); 92 } 93 94 void __init proc_init_kmemcache(void) 95 { 96 proc_inode_cachep = kmem_cache_create("proc_inode_cache", 97 sizeof(struct proc_inode), 98 0, (SLAB_RECLAIM_ACCOUNT| 99 SLAB_MEM_SPREAD|SLAB_ACCOUNT| 100 SLAB_PANIC), 101 init_once); 102 pde_opener_cache = 103 kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0, 104 SLAB_ACCOUNT|SLAB_PANIC, NULL); 105 proc_dir_entry_cache = kmem_cache_create_usercopy( 106 "proc_dir_entry", SIZEOF_PDE, 0, SLAB_PANIC, 107 offsetof(struct proc_dir_entry, inline_name), 108 SIZEOF_PDE_INLINE_NAME, NULL); 109 BUILD_BUG_ON(sizeof(struct proc_dir_entry) >= SIZEOF_PDE); 110 } 111 112 static int proc_show_options(struct seq_file *seq, struct dentry *root) 113 { 114 struct super_block *sb = root->d_sb; 115 struct pid_namespace *pid = sb->s_fs_info; 116 117 if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) 118 seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); 119 if (pid->hide_pid != HIDEPID_OFF) 120 seq_printf(seq, ",hidepid=%u", pid->hide_pid); 121 122 return 0; 123 } 124 125 static const struct super_operations proc_sops = { 126 .alloc_inode = proc_alloc_inode, 127 .destroy_inode = proc_destroy_inode, 128 .drop_inode = generic_delete_inode, 129 .evict_inode = proc_evict_inode, 130 .statfs = simple_statfs, 131 .remount_fs = proc_remount, 132 .show_options = proc_show_options, 133 }; 134 135 enum {BIAS = -1U<<31}; 136 137 static inline int use_pde(struct proc_dir_entry *pde) 138 { 139 return likely(atomic_inc_unless_negative(&pde->in_use)); 140 } 141 142 static void unuse_pde(struct proc_dir_entry *pde) 143 { 144 if (unlikely(atomic_dec_return(&pde->in_use) == BIAS)) 145 complete(pde->pde_unload_completion); 146 } 147 148 /* pde is locked on entry, unlocked on exit */ 149 static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) 150 { 151 /* 152 * close() (proc_reg_release()) can't delete an entry and proceed: 153 * ->release hook needs to be available at the right moment. 154 * 155 * rmmod (remove_proc_entry() et al) can't delete an entry and proceed: 156 * "struct file" needs to be available at the right moment. 157 * 158 * Therefore, first process to enter this function does ->release() and 159 * signals its completion to the other process which does nothing. 160 */ 161 if (pdeo->closing) { 162 /* somebody else is doing that, just wait */ 163 DECLARE_COMPLETION_ONSTACK(c); 164 pdeo->c = &c; 165 spin_unlock(&pde->pde_unload_lock); 166 wait_for_completion(&c); 167 } else { 168 struct file *file; 169 struct completion *c; 170 171 pdeo->closing = true; 172 spin_unlock(&pde->pde_unload_lock); 173 file = pdeo->file; 174 pde->proc_fops->release(file_inode(file), file); 175 spin_lock(&pde->pde_unload_lock); 176 /* After ->release. */ 177 list_del(&pdeo->lh); 178 c = pdeo->c; 179 spin_unlock(&pde->pde_unload_lock); 180 if (unlikely(c)) 181 complete(c); 182 kmem_cache_free(pde_opener_cache, pdeo); 183 } 184 } 185 186 void proc_entry_rundown(struct proc_dir_entry *de) 187 { 188 DECLARE_COMPLETION_ONSTACK(c); 189 /* Wait until all existing callers into module are done. */ 190 de->pde_unload_completion = &c; 191 if (atomic_add_return(BIAS, &de->in_use) != BIAS) 192 wait_for_completion(&c); 193 194 /* ->pde_openers list can't grow from now on. */ 195 196 spin_lock(&de->pde_unload_lock); 197 while (!list_empty(&de->pde_openers)) { 198 struct pde_opener *pdeo; 199 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); 200 close_pdeo(de, pdeo); 201 spin_lock(&de->pde_unload_lock); 202 } 203 spin_unlock(&de->pde_unload_lock); 204 } 205 206 static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) 207 { 208 struct proc_dir_entry *pde = PDE(file_inode(file)); 209 loff_t rv = -EINVAL; 210 if (use_pde(pde)) { 211 loff_t (*llseek)(struct file *, loff_t, int); 212 llseek = pde->proc_fops->llseek; 213 if (!llseek) 214 llseek = default_llseek; 215 rv = llseek(file, offset, whence); 216 unuse_pde(pde); 217 } 218 return rv; 219 } 220 221 static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) 222 { 223 ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); 224 struct proc_dir_entry *pde = PDE(file_inode(file)); 225 ssize_t rv = -EIO; 226 if (use_pde(pde)) { 227 read = pde->proc_fops->read; 228 if (read) 229 rv = read(file, buf, count, ppos); 230 unuse_pde(pde); 231 } 232 return rv; 233 } 234 235 static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) 236 { 237 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); 238 struct proc_dir_entry *pde = PDE(file_inode(file)); 239 ssize_t rv = -EIO; 240 if (use_pde(pde)) { 241 write = pde->proc_fops->write; 242 if (write) 243 rv = write(file, buf, count, ppos); 244 unuse_pde(pde); 245 } 246 return rv; 247 } 248 249 static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts) 250 { 251 struct proc_dir_entry *pde = PDE(file_inode(file)); 252 __poll_t rv = DEFAULT_POLLMASK; 253 __poll_t (*poll)(struct file *, struct poll_table_struct *); 254 if (use_pde(pde)) { 255 poll = pde->proc_fops->poll; 256 if (poll) 257 rv = poll(file, pts); 258 unuse_pde(pde); 259 } 260 return rv; 261 } 262 263 static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 264 { 265 struct proc_dir_entry *pde = PDE(file_inode(file)); 266 long rv = -ENOTTY; 267 long (*ioctl)(struct file *, unsigned int, unsigned long); 268 if (use_pde(pde)) { 269 ioctl = pde->proc_fops->unlocked_ioctl; 270 if (ioctl) 271 rv = ioctl(file, cmd, arg); 272 unuse_pde(pde); 273 } 274 return rv; 275 } 276 277 #ifdef CONFIG_COMPAT 278 static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 279 { 280 struct proc_dir_entry *pde = PDE(file_inode(file)); 281 long rv = -ENOTTY; 282 long (*compat_ioctl)(struct file *, unsigned int, unsigned long); 283 if (use_pde(pde)) { 284 compat_ioctl = pde->proc_fops->compat_ioctl; 285 if (compat_ioctl) 286 rv = compat_ioctl(file, cmd, arg); 287 unuse_pde(pde); 288 } 289 return rv; 290 } 291 #endif 292 293 static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) 294 { 295 struct proc_dir_entry *pde = PDE(file_inode(file)); 296 int rv = -EIO; 297 int (*mmap)(struct file *, struct vm_area_struct *); 298 if (use_pde(pde)) { 299 mmap = pde->proc_fops->mmap; 300 if (mmap) 301 rv = mmap(file, vma); 302 unuse_pde(pde); 303 } 304 return rv; 305 } 306 307 static unsigned long 308 proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, 309 unsigned long len, unsigned long pgoff, 310 unsigned long flags) 311 { 312 struct proc_dir_entry *pde = PDE(file_inode(file)); 313 unsigned long rv = -EIO; 314 315 if (use_pde(pde)) { 316 typeof(proc_reg_get_unmapped_area) *get_area; 317 318 get_area = pde->proc_fops->get_unmapped_area; 319 #ifdef CONFIG_MMU 320 if (!get_area) 321 get_area = current->mm->get_unmapped_area; 322 #endif 323 324 if (get_area) 325 rv = get_area(file, orig_addr, len, pgoff, flags); 326 else 327 rv = orig_addr; 328 unuse_pde(pde); 329 } 330 return rv; 331 } 332 333 static int proc_reg_open(struct inode *inode, struct file *file) 334 { 335 struct proc_dir_entry *pde = PDE(inode); 336 int rv = 0; 337 int (*open)(struct inode *, struct file *); 338 int (*release)(struct inode *, struct file *); 339 struct pde_opener *pdeo; 340 341 /* 342 * Ensure that 343 * 1) PDE's ->release hook will be called no matter what 344 * either normally by close()/->release, or forcefully by 345 * rmmod/remove_proc_entry. 346 * 347 * 2) rmmod isn't blocked by opening file in /proc and sitting on 348 * the descriptor (including "rmmod foo </proc/foo" scenario). 349 * 350 * Save every "struct file" with custom ->release hook. 351 */ 352 if (!use_pde(pde)) 353 return -ENOENT; 354 355 release = pde->proc_fops->release; 356 if (release) { 357 pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL); 358 if (!pdeo) { 359 rv = -ENOMEM; 360 goto out_unuse; 361 } 362 } 363 364 open = pde->proc_fops->open; 365 if (open) 366 rv = open(inode, file); 367 368 if (release) { 369 if (rv == 0) { 370 /* To know what to release. */ 371 pdeo->file = file; 372 pdeo->closing = false; 373 pdeo->c = NULL; 374 spin_lock(&pde->pde_unload_lock); 375 list_add(&pdeo->lh, &pde->pde_openers); 376 spin_unlock(&pde->pde_unload_lock); 377 } else 378 kmem_cache_free(pde_opener_cache, pdeo); 379 } 380 381 out_unuse: 382 unuse_pde(pde); 383 return rv; 384 } 385 386 static int proc_reg_release(struct inode *inode, struct file *file) 387 { 388 struct proc_dir_entry *pde = PDE(inode); 389 struct pde_opener *pdeo; 390 spin_lock(&pde->pde_unload_lock); 391 list_for_each_entry(pdeo, &pde->pde_openers, lh) { 392 if (pdeo->file == file) { 393 close_pdeo(pde, pdeo); 394 return 0; 395 } 396 } 397 spin_unlock(&pde->pde_unload_lock); 398 return 0; 399 } 400 401 static const struct file_operations proc_reg_file_ops = { 402 .llseek = proc_reg_llseek, 403 .read = proc_reg_read, 404 .write = proc_reg_write, 405 .poll = proc_reg_poll, 406 .unlocked_ioctl = proc_reg_unlocked_ioctl, 407 #ifdef CONFIG_COMPAT 408 .compat_ioctl = proc_reg_compat_ioctl, 409 #endif 410 .mmap = proc_reg_mmap, 411 .get_unmapped_area = proc_reg_get_unmapped_area, 412 .open = proc_reg_open, 413 .release = proc_reg_release, 414 }; 415 416 #ifdef CONFIG_COMPAT 417 static const struct file_operations proc_reg_file_ops_no_compat = { 418 .llseek = proc_reg_llseek, 419 .read = proc_reg_read, 420 .write = proc_reg_write, 421 .poll = proc_reg_poll, 422 .unlocked_ioctl = proc_reg_unlocked_ioctl, 423 .mmap = proc_reg_mmap, 424 .get_unmapped_area = proc_reg_get_unmapped_area, 425 .open = proc_reg_open, 426 .release = proc_reg_release, 427 }; 428 #endif 429 430 static void proc_put_link(void *p) 431 { 432 unuse_pde(p); 433 } 434 435 static const char *proc_get_link(struct dentry *dentry, 436 struct inode *inode, 437 struct delayed_call *done) 438 { 439 struct proc_dir_entry *pde = PDE(inode); 440 if (!use_pde(pde)) 441 return ERR_PTR(-EINVAL); 442 set_delayed_call(done, proc_put_link, pde); 443 return pde->data; 444 } 445 446 const struct inode_operations proc_link_inode_operations = { 447 .get_link = proc_get_link, 448 }; 449 450 struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) 451 { 452 struct inode *inode = new_inode_pseudo(sb); 453 454 if (inode) { 455 inode->i_ino = de->low_ino; 456 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); 457 PROC_I(inode)->pde = de; 458 459 if (is_empty_pde(de)) { 460 make_empty_dir_inode(inode); 461 return inode; 462 } 463 if (de->mode) { 464 inode->i_mode = de->mode; 465 inode->i_uid = de->uid; 466 inode->i_gid = de->gid; 467 } 468 if (de->size) 469 inode->i_size = de->size; 470 if (de->nlink) 471 set_nlink(inode, de->nlink); 472 WARN_ON(!de->proc_iops); 473 inode->i_op = de->proc_iops; 474 if (de->proc_fops) { 475 if (S_ISREG(inode->i_mode)) { 476 #ifdef CONFIG_COMPAT 477 if (!de->proc_fops->compat_ioctl) 478 inode->i_fop = 479 &proc_reg_file_ops_no_compat; 480 else 481 #endif 482 inode->i_fop = &proc_reg_file_ops; 483 } else { 484 inode->i_fop = de->proc_fops; 485 } 486 } 487 } else 488 pde_put(de); 489 return inode; 490 } 491 492 int proc_fill_super(struct super_block *s, void *data, int silent) 493 { 494 struct pid_namespace *ns = get_pid_ns(s->s_fs_info); 495 struct inode *root_inode; 496 int ret; 497 498 if (!proc_parse_options(data, ns)) 499 return -EINVAL; 500 501 /* User space would break if executables or devices appear on proc */ 502 s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; 503 s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; 504 s->s_blocksize = 1024; 505 s->s_blocksize_bits = 10; 506 s->s_magic = PROC_SUPER_MAGIC; 507 s->s_op = &proc_sops; 508 s->s_time_gran = 1; 509 510 /* 511 * procfs isn't actually a stacking filesystem; however, there is 512 * too much magic going on inside it to permit stacking things on 513 * top of it 514 */ 515 s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; 516 517 /* procfs dentries and inodes don't require IO to create */ 518 s->s_shrink.seeks = 0; 519 520 pde_get(&proc_root); 521 root_inode = proc_get_inode(s, &proc_root); 522 if (!root_inode) { 523 pr_err("proc_fill_super: get root inode failed\n"); 524 return -ENOMEM; 525 } 526 527 s->s_root = d_make_root(root_inode); 528 if (!s->s_root) { 529 pr_err("proc_fill_super: allocate dentry failed\n"); 530 return -ENOMEM; 531 } 532 533 ret = proc_setup_self(s); 534 if (ret) { 535 return ret; 536 } 537 return proc_setup_thread_self(s); 538 } 539