1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/proc/inode.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/cache.h> 9 #include <linux/time.h> 10 #include <linux/proc_fs.h> 11 #include <linux/kernel.h> 12 #include <linux/pid_namespace.h> 13 #include <linux/mm.h> 14 #include <linux/string.h> 15 #include <linux/stat.h> 16 #include <linux/completion.h> 17 #include <linux/poll.h> 18 #include <linux/printk.h> 19 #include <linux/file.h> 20 #include <linux/limits.h> 21 #include <linux/init.h> 22 #include <linux/module.h> 23 #include <linux/sysctl.h> 24 #include <linux/seq_file.h> 25 #include <linux/slab.h> 26 #include <linux/mount.h> 27 #include <linux/magic.h> 28 29 #include <linux/uaccess.h> 30 31 #include "internal.h" 32 33 static void proc_evict_inode(struct inode *inode) 34 { 35 struct proc_dir_entry *de; 36 struct ctl_table_header *head; 37 38 truncate_inode_pages_final(&inode->i_data); 39 clear_inode(inode); 40 41 /* Stop tracking associated processes */ 42 put_pid(PROC_I(inode)->pid); 43 44 /* Let go of any associated proc directory entry */ 45 de = PDE(inode); 46 if (de) 47 pde_put(de); 48 49 head = PROC_I(inode)->sysctl; 50 if (head) { 51 RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); 52 proc_sys_evict_inode(inode, head); 53 } 54 } 55 56 static struct kmem_cache *proc_inode_cachep __ro_after_init; 57 58 static struct inode *proc_alloc_inode(struct super_block *sb) 59 { 60 struct proc_inode *ei; 61 struct inode *inode; 62 63 ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL); 64 if (!ei) 65 return NULL; 66 ei->pid = NULL; 67 ei->fd = 0; 68 ei->op.proc_get_link = NULL; 69 ei->pde = NULL; 70 ei->sysctl = NULL; 71 ei->sysctl_entry = NULL; 72 ei->ns_ops = NULL; 73 inode = &ei->vfs_inode; 74 return inode; 75 } 76 77 static void proc_i_callback(struct rcu_head *head) 78 { 79 struct inode *inode = container_of(head, struct inode, i_rcu); 80 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 81 } 82 83 static void proc_destroy_inode(struct inode *inode) 84 { 85 call_rcu(&inode->i_rcu, proc_i_callback); 86 } 87 88 static void init_once(void *foo) 89 { 90 struct proc_inode *ei = (struct proc_inode *) foo; 91 92 inode_init_once(&ei->vfs_inode); 93 } 94 95 void __init proc_init_inodecache(void) 96 { 97 proc_inode_cachep = kmem_cache_create("proc_inode_cache", 98 sizeof(struct proc_inode), 99 0, (SLAB_RECLAIM_ACCOUNT| 100 SLAB_MEM_SPREAD|SLAB_ACCOUNT| 101 SLAB_PANIC), 102 init_once); 103 } 104 105 static int proc_show_options(struct seq_file *seq, struct dentry *root) 106 { 107 struct super_block *sb = root->d_sb; 108 struct pid_namespace *pid = sb->s_fs_info; 109 110 if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) 111 seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); 112 if (pid->hide_pid != HIDEPID_OFF) 113 seq_printf(seq, ",hidepid=%u", pid->hide_pid); 114 115 return 0; 116 } 117 118 static const struct super_operations proc_sops = { 119 .alloc_inode = proc_alloc_inode, 120 .destroy_inode = proc_destroy_inode, 121 .drop_inode = generic_delete_inode, 122 .evict_inode = proc_evict_inode, 123 .statfs = simple_statfs, 124 .remount_fs = proc_remount, 125 .show_options = proc_show_options, 126 }; 127 128 enum {BIAS = -1U<<31}; 129 130 static inline int use_pde(struct proc_dir_entry *pde) 131 { 132 return likely(atomic_inc_unless_negative(&pde->in_use)); 133 } 134 135 static void unuse_pde(struct proc_dir_entry *pde) 136 { 137 if (unlikely(atomic_dec_return(&pde->in_use) == BIAS)) 138 complete(pde->pde_unload_completion); 139 } 140 141 /* pde is locked */ 142 static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) 143 { 144 /* 145 * close() (proc_reg_release()) can't delete an entry and proceed: 146 * ->release hook needs to be available at the right moment. 147 * 148 * rmmod (remove_proc_entry() et al) can't delete an entry and proceed: 149 * "struct file" needs to be available at the right moment. 150 * 151 * Therefore, first process to enter this function does ->release() and 152 * signals its completion to the other process which does nothing. 153 */ 154 if (pdeo->closing) { 155 /* somebody else is doing that, just wait */ 156 DECLARE_COMPLETION_ONSTACK(c); 157 pdeo->c = &c; 158 spin_unlock(&pde->pde_unload_lock); 159 wait_for_completion(&c); 160 spin_lock(&pde->pde_unload_lock); 161 } else { 162 struct file *file; 163 pdeo->closing = true; 164 spin_unlock(&pde->pde_unload_lock); 165 file = pdeo->file; 166 pde->proc_fops->release(file_inode(file), file); 167 spin_lock(&pde->pde_unload_lock); 168 /* After ->release. */ 169 list_del(&pdeo->lh); 170 if (unlikely(pdeo->c)) 171 complete(pdeo->c); 172 kfree(pdeo); 173 } 174 } 175 176 void proc_entry_rundown(struct proc_dir_entry *de) 177 { 178 DECLARE_COMPLETION_ONSTACK(c); 179 /* Wait until all existing callers into module are done. */ 180 de->pde_unload_completion = &c; 181 if (atomic_add_return(BIAS, &de->in_use) != BIAS) 182 wait_for_completion(&c); 183 184 /* ->pde_openers list can't grow from now on. */ 185 186 spin_lock(&de->pde_unload_lock); 187 while (!list_empty(&de->pde_openers)) { 188 struct pde_opener *pdeo; 189 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); 190 close_pdeo(de, pdeo); 191 } 192 spin_unlock(&de->pde_unload_lock); 193 } 194 195 static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) 196 { 197 struct proc_dir_entry *pde = PDE(file_inode(file)); 198 loff_t rv = -EINVAL; 199 if (use_pde(pde)) { 200 loff_t (*llseek)(struct file *, loff_t, int); 201 llseek = pde->proc_fops->llseek; 202 if (!llseek) 203 llseek = default_llseek; 204 rv = llseek(file, offset, whence); 205 unuse_pde(pde); 206 } 207 return rv; 208 } 209 210 static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) 211 { 212 ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); 213 struct proc_dir_entry *pde = PDE(file_inode(file)); 214 ssize_t rv = -EIO; 215 if (use_pde(pde)) { 216 read = pde->proc_fops->read; 217 if (read) 218 rv = read(file, buf, count, ppos); 219 unuse_pde(pde); 220 } 221 return rv; 222 } 223 224 static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) 225 { 226 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); 227 struct proc_dir_entry *pde = PDE(file_inode(file)); 228 ssize_t rv = -EIO; 229 if (use_pde(pde)) { 230 write = pde->proc_fops->write; 231 if (write) 232 rv = write(file, buf, count, ppos); 233 unuse_pde(pde); 234 } 235 return rv; 236 } 237 238 static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts) 239 { 240 struct proc_dir_entry *pde = PDE(file_inode(file)); 241 __poll_t rv = DEFAULT_POLLMASK; 242 __poll_t (*poll)(struct file *, struct poll_table_struct *); 243 if (use_pde(pde)) { 244 poll = pde->proc_fops->poll; 245 if (poll) 246 rv = poll(file, pts); 247 unuse_pde(pde); 248 } 249 return rv; 250 } 251 252 static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 253 { 254 struct proc_dir_entry *pde = PDE(file_inode(file)); 255 long rv = -ENOTTY; 256 long (*ioctl)(struct file *, unsigned int, unsigned long); 257 if (use_pde(pde)) { 258 ioctl = pde->proc_fops->unlocked_ioctl; 259 if (ioctl) 260 rv = ioctl(file, cmd, arg); 261 unuse_pde(pde); 262 } 263 return rv; 264 } 265 266 #ifdef CONFIG_COMPAT 267 static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 268 { 269 struct proc_dir_entry *pde = PDE(file_inode(file)); 270 long rv = -ENOTTY; 271 long (*compat_ioctl)(struct file *, unsigned int, unsigned long); 272 if (use_pde(pde)) { 273 compat_ioctl = pde->proc_fops->compat_ioctl; 274 if (compat_ioctl) 275 rv = compat_ioctl(file, cmd, arg); 276 unuse_pde(pde); 277 } 278 return rv; 279 } 280 #endif 281 282 static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) 283 { 284 struct proc_dir_entry *pde = PDE(file_inode(file)); 285 int rv = -EIO; 286 int (*mmap)(struct file *, struct vm_area_struct *); 287 if (use_pde(pde)) { 288 mmap = pde->proc_fops->mmap; 289 if (mmap) 290 rv = mmap(file, vma); 291 unuse_pde(pde); 292 } 293 return rv; 294 } 295 296 static unsigned long 297 proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, 298 unsigned long len, unsigned long pgoff, 299 unsigned long flags) 300 { 301 struct proc_dir_entry *pde = PDE(file_inode(file)); 302 unsigned long rv = -EIO; 303 304 if (use_pde(pde)) { 305 typeof(proc_reg_get_unmapped_area) *get_area; 306 307 get_area = pde->proc_fops->get_unmapped_area; 308 #ifdef CONFIG_MMU 309 if (!get_area) 310 get_area = current->mm->get_unmapped_area; 311 #endif 312 313 if (get_area) 314 rv = get_area(file, orig_addr, len, pgoff, flags); 315 else 316 rv = orig_addr; 317 unuse_pde(pde); 318 } 319 return rv; 320 } 321 322 static int proc_reg_open(struct inode *inode, struct file *file) 323 { 324 struct proc_dir_entry *pde = PDE(inode); 325 int rv = 0; 326 int (*open)(struct inode *, struct file *); 327 int (*release)(struct inode *, struct file *); 328 struct pde_opener *pdeo; 329 330 /* 331 * Ensure that 332 * 1) PDE's ->release hook will be called no matter what 333 * either normally by close()/->release, or forcefully by 334 * rmmod/remove_proc_entry. 335 * 336 * 2) rmmod isn't blocked by opening file in /proc and sitting on 337 * the descriptor (including "rmmod foo </proc/foo" scenario). 338 * 339 * Save every "struct file" with custom ->release hook. 340 */ 341 pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); 342 if (!pdeo) 343 return -ENOMEM; 344 345 if (!use_pde(pde)) { 346 kfree(pdeo); 347 return -ENOENT; 348 } 349 open = pde->proc_fops->open; 350 release = pde->proc_fops->release; 351 352 if (open) 353 rv = open(inode, file); 354 355 if (rv == 0 && release) { 356 /* To know what to release. */ 357 pdeo->file = file; 358 pdeo->closing = false; 359 pdeo->c = NULL; 360 spin_lock(&pde->pde_unload_lock); 361 list_add(&pdeo->lh, &pde->pde_openers); 362 spin_unlock(&pde->pde_unload_lock); 363 } else 364 kfree(pdeo); 365 366 unuse_pde(pde); 367 return rv; 368 } 369 370 static int proc_reg_release(struct inode *inode, struct file *file) 371 { 372 struct proc_dir_entry *pde = PDE(inode); 373 struct pde_opener *pdeo; 374 spin_lock(&pde->pde_unload_lock); 375 list_for_each_entry(pdeo, &pde->pde_openers, lh) { 376 if (pdeo->file == file) { 377 close_pdeo(pde, pdeo); 378 break; 379 } 380 } 381 spin_unlock(&pde->pde_unload_lock); 382 return 0; 383 } 384 385 static const struct file_operations proc_reg_file_ops = { 386 .llseek = proc_reg_llseek, 387 .read = proc_reg_read, 388 .write = proc_reg_write, 389 .poll = proc_reg_poll, 390 .unlocked_ioctl = proc_reg_unlocked_ioctl, 391 #ifdef CONFIG_COMPAT 392 .compat_ioctl = proc_reg_compat_ioctl, 393 #endif 394 .mmap = proc_reg_mmap, 395 .get_unmapped_area = proc_reg_get_unmapped_area, 396 .open = proc_reg_open, 397 .release = proc_reg_release, 398 }; 399 400 #ifdef CONFIG_COMPAT 401 static const struct file_operations proc_reg_file_ops_no_compat = { 402 .llseek = proc_reg_llseek, 403 .read = proc_reg_read, 404 .write = proc_reg_write, 405 .poll = proc_reg_poll, 406 .unlocked_ioctl = proc_reg_unlocked_ioctl, 407 .mmap = proc_reg_mmap, 408 .get_unmapped_area = proc_reg_get_unmapped_area, 409 .open = proc_reg_open, 410 .release = proc_reg_release, 411 }; 412 #endif 413 414 static void proc_put_link(void *p) 415 { 416 unuse_pde(p); 417 } 418 419 static const char *proc_get_link(struct dentry *dentry, 420 struct inode *inode, 421 struct delayed_call *done) 422 { 423 struct proc_dir_entry *pde = PDE(inode); 424 if (!use_pde(pde)) 425 return ERR_PTR(-EINVAL); 426 set_delayed_call(done, proc_put_link, pde); 427 return pde->data; 428 } 429 430 const struct inode_operations proc_link_inode_operations = { 431 .get_link = proc_get_link, 432 }; 433 434 struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) 435 { 436 struct inode *inode = new_inode_pseudo(sb); 437 438 if (inode) { 439 inode->i_ino = de->low_ino; 440 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); 441 PROC_I(inode)->pde = de; 442 443 if (is_empty_pde(de)) { 444 make_empty_dir_inode(inode); 445 return inode; 446 } 447 if (de->mode) { 448 inode->i_mode = de->mode; 449 inode->i_uid = de->uid; 450 inode->i_gid = de->gid; 451 } 452 if (de->size) 453 inode->i_size = de->size; 454 if (de->nlink) 455 set_nlink(inode, de->nlink); 456 WARN_ON(!de->proc_iops); 457 inode->i_op = de->proc_iops; 458 if (de->proc_fops) { 459 if (S_ISREG(inode->i_mode)) { 460 #ifdef CONFIG_COMPAT 461 if (!de->proc_fops->compat_ioctl) 462 inode->i_fop = 463 &proc_reg_file_ops_no_compat; 464 else 465 #endif 466 inode->i_fop = &proc_reg_file_ops; 467 } else { 468 inode->i_fop = de->proc_fops; 469 } 470 } 471 } else 472 pde_put(de); 473 return inode; 474 } 475 476 int proc_fill_super(struct super_block *s, void *data, int silent) 477 { 478 struct pid_namespace *ns = get_pid_ns(s->s_fs_info); 479 struct inode *root_inode; 480 int ret; 481 482 if (!proc_parse_options(data, ns)) 483 return -EINVAL; 484 485 /* User space would break if executables or devices appear on proc */ 486 s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; 487 s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; 488 s->s_blocksize = 1024; 489 s->s_blocksize_bits = 10; 490 s->s_magic = PROC_SUPER_MAGIC; 491 s->s_op = &proc_sops; 492 s->s_time_gran = 1; 493 494 /* 495 * procfs isn't actually a stacking filesystem; however, there is 496 * too much magic going on inside it to permit stacking things on 497 * top of it 498 */ 499 s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; 500 501 pde_get(&proc_root); 502 root_inode = proc_get_inode(s, &proc_root); 503 if (!root_inode) { 504 pr_err("proc_fill_super: get root inode failed\n"); 505 return -ENOMEM; 506 } 507 508 s->s_root = d_make_root(root_inode); 509 if (!s->s_root) { 510 pr_err("proc_fill_super: allocate dentry failed\n"); 511 return -ENOMEM; 512 } 513 514 ret = proc_setup_self(s); 515 if (ret) { 516 return ret; 517 } 518 return proc_setup_thread_self(s); 519 } 520