1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * fs/kernfs/file.c - kernfs file implementation 4 * 5 * Copyright (c) 2001-3 Patrick Mochel 6 * Copyright (c) 2007 SUSE Linux Products GmbH 7 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/seq_file.h> 12 #include <linux/slab.h> 13 #include <linux/poll.h> 14 #include <linux/pagemap.h> 15 #include <linux/sched/mm.h> 16 #include <linux/fsnotify.h> 17 #include <linux/uio.h> 18 19 #include "kernfs-internal.h" 20 21 struct kernfs_open_node { 22 struct rcu_head rcu_head; 23 atomic_t event; 24 wait_queue_head_t poll; 25 struct list_head files; /* goes through kernfs_open_file.list */ 26 }; 27 28 /* 29 * kernfs_notify() may be called from any context and bounces notifications 30 * through a work item. To minimize space overhead in kernfs_node, the 31 * pending queue is implemented as a singly linked list of kernfs_nodes. 32 * The list is terminated with the self pointer so that whether a 33 * kernfs_node is on the list or not can be determined by testing the next 34 * pointer for NULL. 35 */ 36 #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) 37 38 static DEFINE_SPINLOCK(kernfs_notify_lock); 39 static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; 40 41 static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn) 42 { 43 int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS); 44 45 return &kernfs_locks->open_file_mutex[idx]; 46 } 47 48 static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn) 49 { 50 struct mutex *lock; 51 52 lock = kernfs_open_file_mutex_ptr(kn); 53 54 mutex_lock(lock); 55 56 return lock; 57 } 58 59 /** 60 * kernfs_deref_open_node - Get kernfs_open_node corresponding to @kn. 61 * 62 * @of: associated kernfs_open_file instance. 63 * @kn: target kernfs_node. 64 * 65 * Fetch and return ->attr.open of @kn if @of->list is non empty. 66 * If @of->list is not empty we can safely assume that @of is on 67 * @kn->attr.open->files list and this guarantees that @kn->attr.open 68 * will not vanish i.e. dereferencing outside RCU read-side critical 69 * section is safe here. 70 * 71 * The caller needs to make sure that @of->list is not empty. 72 */ 73 static struct kernfs_open_node * 74 kernfs_deref_open_node(struct kernfs_open_file *of, struct kernfs_node *kn) 75 { 76 struct kernfs_open_node *on; 77 78 on = rcu_dereference_check(kn->attr.open, !list_empty(&of->list)); 79 80 return on; 81 } 82 83 /** 84 * kernfs_deref_open_node_protected - Get kernfs_open_node corresponding to @kn 85 * 86 * @kn: target kernfs_node. 87 * 88 * Fetch and return ->attr.open of @kn when caller holds the 89 * kernfs_open_file_mutex_ptr(kn). 90 * 91 * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when 92 * the caller guarantees that this mutex is being held, other updaters can't 93 * change ->attr.open and this means that we can safely deref ->attr.open 94 * outside RCU read-side critical section. 95 * 96 * The caller needs to make sure that kernfs_open_file_mutex is held. 97 */ 98 static struct kernfs_open_node * 99 kernfs_deref_open_node_protected(struct kernfs_node *kn) 100 { 101 return rcu_dereference_protected(kn->attr.open, 102 lockdep_is_held(kernfs_open_file_mutex_ptr(kn))); 103 } 104 105 static struct kernfs_open_file *kernfs_of(struct file *file) 106 { 107 return ((struct seq_file *)file->private_data)->private; 108 } 109 110 /* 111 * Determine the kernfs_ops for the given kernfs_node. This function must 112 * be called while holding an active reference. 113 */ 114 static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) 115 { 116 if (kn->flags & KERNFS_LOCKDEP) 117 lockdep_assert_held(kn); 118 return kn->attr.ops; 119 } 120 121 /* 122 * As kernfs_seq_stop() is also called after kernfs_seq_start() or 123 * kernfs_seq_next() failure, it needs to distinguish whether it's stopping 124 * a seq_file iteration which is fully initialized with an active reference 125 * or an aborted kernfs_seq_start() due to get_active failure. The 126 * position pointer is the only context for each seq_file iteration and 127 * thus the stop condition should be encoded in it. As the return value is 128 * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable 129 * choice to indicate get_active failure. 130 * 131 * Unfortunately, this is complicated due to the optional custom seq_file 132 * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() 133 * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or 134 * custom seq_file operations and thus can't decide whether put_active 135 * should be performed or not only on ERR_PTR(-ENODEV). 136 * 137 * This is worked around by factoring out the custom seq_stop() and 138 * put_active part into kernfs_seq_stop_active(), skipping it from 139 * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after 140 * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures 141 * that kernfs_seq_stop_active() is skipped only after get_active failure. 142 */ 143 static void kernfs_seq_stop_active(struct seq_file *sf, void *v) 144 { 145 struct kernfs_open_file *of = sf->private; 146 const struct kernfs_ops *ops = kernfs_ops(of->kn); 147 148 if (ops->seq_stop) 149 ops->seq_stop(sf, v); 150 kernfs_put_active(of->kn); 151 } 152 153 static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) 154 { 155 struct kernfs_open_file *of = sf->private; 156 const struct kernfs_ops *ops; 157 158 /* 159 * @of->mutex nests outside active ref and is primarily to ensure that 160 * the ops aren't called concurrently for the same open file. 161 */ 162 mutex_lock(&of->mutex); 163 if (!kernfs_get_active(of->kn)) 164 return ERR_PTR(-ENODEV); 165 166 ops = kernfs_ops(of->kn); 167 if (ops->seq_start) { 168 void *next = ops->seq_start(sf, ppos); 169 /* see the comment above kernfs_seq_stop_active() */ 170 if (next == ERR_PTR(-ENODEV)) 171 kernfs_seq_stop_active(sf, next); 172 return next; 173 } 174 return single_start(sf, ppos); 175 } 176 177 static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) 178 { 179 struct kernfs_open_file *of = sf->private; 180 const struct kernfs_ops *ops = kernfs_ops(of->kn); 181 182 if (ops->seq_next) { 183 void *next = ops->seq_next(sf, v, ppos); 184 /* see the comment above kernfs_seq_stop_active() */ 185 if (next == ERR_PTR(-ENODEV)) 186 kernfs_seq_stop_active(sf, next); 187 return next; 188 } else { 189 /* 190 * The same behavior and code as single_open(), always 191 * terminate after the initial read. 192 */ 193 ++*ppos; 194 return NULL; 195 } 196 } 197 198 static void kernfs_seq_stop(struct seq_file *sf, void *v) 199 { 200 struct kernfs_open_file *of = sf->private; 201 202 if (v != ERR_PTR(-ENODEV)) 203 kernfs_seq_stop_active(sf, v); 204 mutex_unlock(&of->mutex); 205 } 206 207 static int kernfs_seq_show(struct seq_file *sf, void *v) 208 { 209 struct kernfs_open_file *of = sf->private; 210 struct kernfs_open_node *on = kernfs_deref_open_node(of, of->kn); 211 212 if (!on) 213 return -EINVAL; 214 215 of->event = atomic_read(&on->event); 216 217 return of->kn->attr.ops->seq_show(sf, v); 218 } 219 220 static const struct seq_operations kernfs_seq_ops = { 221 .start = kernfs_seq_start, 222 .next = kernfs_seq_next, 223 .stop = kernfs_seq_stop, 224 .show = kernfs_seq_show, 225 }; 226 227 /* 228 * As reading a bin file can have side-effects, the exact offset and bytes 229 * specified in read(2) call should be passed to the read callback making 230 * it difficult to use seq_file. Implement simplistic custom buffering for 231 * bin files. 232 */ 233 static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 234 { 235 struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); 236 ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE); 237 const struct kernfs_ops *ops; 238 struct kernfs_open_node *on; 239 char *buf; 240 241 buf = of->prealloc_buf; 242 if (buf) 243 mutex_lock(&of->prealloc_mutex); 244 else 245 buf = kmalloc(len, GFP_KERNEL); 246 if (!buf) 247 return -ENOMEM; 248 249 /* 250 * @of->mutex nests outside active ref and is used both to ensure that 251 * the ops aren't called concurrently for the same open file. 252 */ 253 mutex_lock(&of->mutex); 254 if (!kernfs_get_active(of->kn)) { 255 len = -ENODEV; 256 mutex_unlock(&of->mutex); 257 goto out_free; 258 } 259 260 on = kernfs_deref_open_node(of, of->kn); 261 if (!on) { 262 len = -EINVAL; 263 mutex_unlock(&of->mutex); 264 goto out_free; 265 } 266 267 of->event = atomic_read(&on->event); 268 269 ops = kernfs_ops(of->kn); 270 if (ops->read) 271 len = ops->read(of, buf, len, iocb->ki_pos); 272 else 273 len = -EINVAL; 274 275 kernfs_put_active(of->kn); 276 mutex_unlock(&of->mutex); 277 278 if (len < 0) 279 goto out_free; 280 281 if (copy_to_iter(buf, len, iter) != len) { 282 len = -EFAULT; 283 goto out_free; 284 } 285 286 iocb->ki_pos += len; 287 288 out_free: 289 if (buf == of->prealloc_buf) 290 mutex_unlock(&of->prealloc_mutex); 291 else 292 kfree(buf); 293 return len; 294 } 295 296 static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) 297 { 298 if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW) 299 return seq_read_iter(iocb, iter); 300 return kernfs_file_read_iter(iocb, iter); 301 } 302 303 /* 304 * Copy data in from userland and pass it to the matching kernfs write 305 * operation. 306 * 307 * There is no easy way for us to know if userspace is only doing a partial 308 * write, so we don't support them. We expect the entire buffer to come on 309 * the first write. Hint: if you're writing a value, first read the file, 310 * modify only the value you're changing, then write entire buffer 311 * back. 312 */ 313 static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) 314 { 315 struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); 316 ssize_t len = iov_iter_count(iter); 317 const struct kernfs_ops *ops; 318 char *buf; 319 320 if (of->atomic_write_len) { 321 if (len > of->atomic_write_len) 322 return -E2BIG; 323 } else { 324 len = min_t(size_t, len, PAGE_SIZE); 325 } 326 327 buf = of->prealloc_buf; 328 if (buf) 329 mutex_lock(&of->prealloc_mutex); 330 else 331 buf = kmalloc(len + 1, GFP_KERNEL); 332 if (!buf) 333 return -ENOMEM; 334 335 if (copy_from_iter(buf, len, iter) != len) { 336 len = -EFAULT; 337 goto out_free; 338 } 339 buf[len] = '\0'; /* guarantee string termination */ 340 341 /* 342 * @of->mutex nests outside active ref and is used both to ensure that 343 * the ops aren't called concurrently for the same open file. 344 */ 345 mutex_lock(&of->mutex); 346 if (!kernfs_get_active(of->kn)) { 347 mutex_unlock(&of->mutex); 348 len = -ENODEV; 349 goto out_free; 350 } 351 352 ops = kernfs_ops(of->kn); 353 if (ops->write) 354 len = ops->write(of, buf, len, iocb->ki_pos); 355 else 356 len = -EINVAL; 357 358 kernfs_put_active(of->kn); 359 mutex_unlock(&of->mutex); 360 361 if (len > 0) 362 iocb->ki_pos += len; 363 364 out_free: 365 if (buf == of->prealloc_buf) 366 mutex_unlock(&of->prealloc_mutex); 367 else 368 kfree(buf); 369 return len; 370 } 371 372 static void kernfs_vma_open(struct vm_area_struct *vma) 373 { 374 struct file *file = vma->vm_file; 375 struct kernfs_open_file *of = kernfs_of(file); 376 377 if (!of->vm_ops) 378 return; 379 380 if (!kernfs_get_active(of->kn)) 381 return; 382 383 if (of->vm_ops->open) 384 of->vm_ops->open(vma); 385 386 kernfs_put_active(of->kn); 387 } 388 389 static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) 390 { 391 struct file *file = vmf->vma->vm_file; 392 struct kernfs_open_file *of = kernfs_of(file); 393 vm_fault_t ret; 394 395 if (!of->vm_ops) 396 return VM_FAULT_SIGBUS; 397 398 if (!kernfs_get_active(of->kn)) 399 return VM_FAULT_SIGBUS; 400 401 ret = VM_FAULT_SIGBUS; 402 if (of->vm_ops->fault) 403 ret = of->vm_ops->fault(vmf); 404 405 kernfs_put_active(of->kn); 406 return ret; 407 } 408 409 static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) 410 { 411 struct file *file = vmf->vma->vm_file; 412 struct kernfs_open_file *of = kernfs_of(file); 413 vm_fault_t ret; 414 415 if (!of->vm_ops) 416 return VM_FAULT_SIGBUS; 417 418 if (!kernfs_get_active(of->kn)) 419 return VM_FAULT_SIGBUS; 420 421 ret = 0; 422 if (of->vm_ops->page_mkwrite) 423 ret = of->vm_ops->page_mkwrite(vmf); 424 else 425 file_update_time(file); 426 427 kernfs_put_active(of->kn); 428 return ret; 429 } 430 431 static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, 432 void *buf, int len, int write) 433 { 434 struct file *file = vma->vm_file; 435 struct kernfs_open_file *of = kernfs_of(file); 436 int ret; 437 438 if (!of->vm_ops) 439 return -EINVAL; 440 441 if (!kernfs_get_active(of->kn)) 442 return -EINVAL; 443 444 ret = -EINVAL; 445 if (of->vm_ops->access) 446 ret = of->vm_ops->access(vma, addr, buf, len, write); 447 448 kernfs_put_active(of->kn); 449 return ret; 450 } 451 452 #ifdef CONFIG_NUMA 453 static int kernfs_vma_set_policy(struct vm_area_struct *vma, 454 struct mempolicy *new) 455 { 456 struct file *file = vma->vm_file; 457 struct kernfs_open_file *of = kernfs_of(file); 458 int ret; 459 460 if (!of->vm_ops) 461 return 0; 462 463 if (!kernfs_get_active(of->kn)) 464 return -EINVAL; 465 466 ret = 0; 467 if (of->vm_ops->set_policy) 468 ret = of->vm_ops->set_policy(vma, new); 469 470 kernfs_put_active(of->kn); 471 return ret; 472 } 473 474 static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma, 475 unsigned long addr) 476 { 477 struct file *file = vma->vm_file; 478 struct kernfs_open_file *of = kernfs_of(file); 479 struct mempolicy *pol; 480 481 if (!of->vm_ops) 482 return vma->vm_policy; 483 484 if (!kernfs_get_active(of->kn)) 485 return vma->vm_policy; 486 487 pol = vma->vm_policy; 488 if (of->vm_ops->get_policy) 489 pol = of->vm_ops->get_policy(vma, addr); 490 491 kernfs_put_active(of->kn); 492 return pol; 493 } 494 495 #endif 496 497 static const struct vm_operations_struct kernfs_vm_ops = { 498 .open = kernfs_vma_open, 499 .fault = kernfs_vma_fault, 500 .page_mkwrite = kernfs_vma_page_mkwrite, 501 .access = kernfs_vma_access, 502 #ifdef CONFIG_NUMA 503 .set_policy = kernfs_vma_set_policy, 504 .get_policy = kernfs_vma_get_policy, 505 #endif 506 }; 507 508 static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) 509 { 510 struct kernfs_open_file *of = kernfs_of(file); 511 const struct kernfs_ops *ops; 512 int rc; 513 514 /* 515 * mmap path and of->mutex are prone to triggering spurious lockdep 516 * warnings and we don't want to add spurious locking dependency 517 * between the two. Check whether mmap is actually implemented 518 * without grabbing @of->mutex by testing HAS_MMAP flag. See the 519 * comment in kernfs_file_open() for more details. 520 */ 521 if (!(of->kn->flags & KERNFS_HAS_MMAP)) 522 return -ENODEV; 523 524 mutex_lock(&of->mutex); 525 526 rc = -ENODEV; 527 if (!kernfs_get_active(of->kn)) 528 goto out_unlock; 529 530 ops = kernfs_ops(of->kn); 531 rc = ops->mmap(of, vma); 532 if (rc) 533 goto out_put; 534 535 /* 536 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() 537 * to satisfy versions of X which crash if the mmap fails: that 538 * substitutes a new vm_file, and we don't then want bin_vm_ops. 539 */ 540 if (vma->vm_file != file) 541 goto out_put; 542 543 rc = -EINVAL; 544 if (of->mmapped && of->vm_ops != vma->vm_ops) 545 goto out_put; 546 547 /* 548 * It is not possible to successfully wrap close. 549 * So error if someone is trying to use close. 550 */ 551 if (vma->vm_ops && vma->vm_ops->close) 552 goto out_put; 553 554 rc = 0; 555 of->mmapped = true; 556 of->vm_ops = vma->vm_ops; 557 vma->vm_ops = &kernfs_vm_ops; 558 out_put: 559 kernfs_put_active(of->kn); 560 out_unlock: 561 mutex_unlock(&of->mutex); 562 563 return rc; 564 } 565 566 /** 567 * kernfs_get_open_node - get or create kernfs_open_node 568 * @kn: target kernfs_node 569 * @of: kernfs_open_file for this instance of open 570 * 571 * If @kn->attr.open exists, increment its reference count; otherwise, 572 * create one. @of is chained to the files list. 573 * 574 * LOCKING: 575 * Kernel thread context (may sleep). 576 * 577 * RETURNS: 578 * 0 on success, -errno on failure. 579 */ 580 static int kernfs_get_open_node(struct kernfs_node *kn, 581 struct kernfs_open_file *of) 582 { 583 struct kernfs_open_node *on, *new_on = NULL; 584 struct mutex *mutex = NULL; 585 586 mutex = kernfs_open_file_mutex_lock(kn); 587 on = kernfs_deref_open_node_protected(kn); 588 589 if (on) { 590 list_add_tail(&of->list, &on->files); 591 mutex_unlock(mutex); 592 return 0; 593 } else { 594 /* not there, initialize a new one */ 595 new_on = kmalloc(sizeof(*new_on), GFP_KERNEL); 596 if (!new_on) { 597 mutex_unlock(mutex); 598 return -ENOMEM; 599 } 600 atomic_set(&new_on->event, 1); 601 init_waitqueue_head(&new_on->poll); 602 INIT_LIST_HEAD(&new_on->files); 603 list_add_tail(&of->list, &new_on->files); 604 rcu_assign_pointer(kn->attr.open, new_on); 605 } 606 mutex_unlock(mutex); 607 608 return 0; 609 } 610 611 /** 612 * kernfs_unlink_open_file - Unlink @of from @kn. 613 * 614 * @kn: target kernfs_node 615 * @of: associated kernfs_open_file 616 * 617 * Unlink @of from list of @kn's associated open files. If list of 618 * associated open files becomes empty, disassociate and free 619 * kernfs_open_node. 620 * 621 * LOCKING: 622 * None. 623 */ 624 static void kernfs_unlink_open_file(struct kernfs_node *kn, 625 struct kernfs_open_file *of) 626 { 627 struct kernfs_open_node *on; 628 struct mutex *mutex = NULL; 629 630 mutex = kernfs_open_file_mutex_lock(kn); 631 632 on = kernfs_deref_open_node_protected(kn); 633 if (!on) { 634 mutex_unlock(mutex); 635 return; 636 } 637 638 if (of) 639 list_del(&of->list); 640 641 if (list_empty(&on->files)) { 642 rcu_assign_pointer(kn->attr.open, NULL); 643 kfree_rcu(on, rcu_head); 644 } 645 646 mutex_unlock(mutex); 647 } 648 649 static int kernfs_fop_open(struct inode *inode, struct file *file) 650 { 651 struct kernfs_node *kn = inode->i_private; 652 struct kernfs_root *root = kernfs_root(kn); 653 const struct kernfs_ops *ops; 654 struct kernfs_open_file *of; 655 bool has_read, has_write, has_mmap; 656 int error = -EACCES; 657 658 if (!kernfs_get_active(kn)) 659 return -ENODEV; 660 661 ops = kernfs_ops(kn); 662 663 has_read = ops->seq_show || ops->read || ops->mmap; 664 has_write = ops->write || ops->mmap; 665 has_mmap = ops->mmap; 666 667 /* see the flag definition for details */ 668 if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { 669 if ((file->f_mode & FMODE_WRITE) && 670 (!(inode->i_mode & S_IWUGO) || !has_write)) 671 goto err_out; 672 673 if ((file->f_mode & FMODE_READ) && 674 (!(inode->i_mode & S_IRUGO) || !has_read)) 675 goto err_out; 676 } 677 678 /* allocate a kernfs_open_file for the file */ 679 error = -ENOMEM; 680 of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); 681 if (!of) 682 goto err_out; 683 684 /* 685 * The following is done to give a different lockdep key to 686 * @of->mutex for files which implement mmap. This is a rather 687 * crude way to avoid false positive lockdep warning around 688 * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and 689 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under 690 * which mm->mmap_lock nests, while holding @of->mutex. As each 691 * open file has a separate mutex, it's okay as long as those don't 692 * happen on the same file. At this point, we can't easily give 693 * each file a separate locking class. Let's differentiate on 694 * whether the file has mmap or not for now. 695 * 696 * Both paths of the branch look the same. They're supposed to 697 * look that way and give @of->mutex different static lockdep keys. 698 */ 699 if (has_mmap) 700 mutex_init(&of->mutex); 701 else 702 mutex_init(&of->mutex); 703 704 of->kn = kn; 705 of->file = file; 706 707 /* 708 * Write path needs to atomic_write_len outside active reference. 709 * Cache it in open_file. See kernfs_fop_write_iter() for details. 710 */ 711 of->atomic_write_len = ops->atomic_write_len; 712 713 error = -EINVAL; 714 /* 715 * ->seq_show is incompatible with ->prealloc, 716 * as seq_read does its own allocation. 717 * ->read must be used instead. 718 */ 719 if (ops->prealloc && ops->seq_show) 720 goto err_free; 721 if (ops->prealloc) { 722 int len = of->atomic_write_len ?: PAGE_SIZE; 723 of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); 724 error = -ENOMEM; 725 if (!of->prealloc_buf) 726 goto err_free; 727 mutex_init(&of->prealloc_mutex); 728 } 729 730 /* 731 * Always instantiate seq_file even if read access doesn't use 732 * seq_file or is not requested. This unifies private data access 733 * and readable regular files are the vast majority anyway. 734 */ 735 if (ops->seq_show) 736 error = seq_open(file, &kernfs_seq_ops); 737 else 738 error = seq_open(file, NULL); 739 if (error) 740 goto err_free; 741 742 of->seq_file = file->private_data; 743 of->seq_file->private = of; 744 745 /* seq_file clears PWRITE unconditionally, restore it if WRITE */ 746 if (file->f_mode & FMODE_WRITE) 747 file->f_mode |= FMODE_PWRITE; 748 749 /* make sure we have open node struct */ 750 error = kernfs_get_open_node(kn, of); 751 if (error) 752 goto err_seq_release; 753 754 if (ops->open) { 755 /* nobody has access to @of yet, skip @of->mutex */ 756 error = ops->open(of); 757 if (error) 758 goto err_put_node; 759 } 760 761 /* open succeeded, put active references */ 762 kernfs_put_active(kn); 763 return 0; 764 765 err_put_node: 766 kernfs_unlink_open_file(kn, of); 767 err_seq_release: 768 seq_release(inode, file); 769 err_free: 770 kfree(of->prealloc_buf); 771 kfree(of); 772 err_out: 773 kernfs_put_active(kn); 774 return error; 775 } 776 777 /* used from release/drain to ensure that ->release() is called exactly once */ 778 static void kernfs_release_file(struct kernfs_node *kn, 779 struct kernfs_open_file *of) 780 { 781 /* 782 * @of is guaranteed to have no other file operations in flight and 783 * we just want to synchronize release and drain paths. 784 * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used 785 * here because drain path may be called from places which can 786 * cause circular dependency. 787 */ 788 lockdep_assert_held(kernfs_open_file_mutex_ptr(kn)); 789 790 if (!of->released) { 791 /* 792 * A file is never detached without being released and we 793 * need to be able to release files which are deactivated 794 * and being drained. Don't use kernfs_ops(). 795 */ 796 kn->attr.ops->release(of); 797 of->released = true; 798 } 799 } 800 801 static int kernfs_fop_release(struct inode *inode, struct file *filp) 802 { 803 struct kernfs_node *kn = inode->i_private; 804 struct kernfs_open_file *of = kernfs_of(filp); 805 struct mutex *mutex = NULL; 806 807 if (kn->flags & KERNFS_HAS_RELEASE) { 808 mutex = kernfs_open_file_mutex_lock(kn); 809 kernfs_release_file(kn, of); 810 mutex_unlock(mutex); 811 } 812 813 kernfs_unlink_open_file(kn, of); 814 seq_release(inode, filp); 815 kfree(of->prealloc_buf); 816 kfree(of); 817 818 return 0; 819 } 820 821 void kernfs_drain_open_files(struct kernfs_node *kn) 822 { 823 struct kernfs_open_node *on; 824 struct kernfs_open_file *of; 825 struct mutex *mutex = NULL; 826 827 if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) 828 return; 829 830 /* 831 * lockless opportunistic check is safe below because no one is adding to 832 * ->attr.open at this point of time. This check allows early bail out 833 * if ->attr.open is already NULL. kernfs_unlink_open_file makes 834 * ->attr.open NULL only while holding kernfs_open_file_mutex so below 835 * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if 836 * ->attr.open became NULL while waiting for the mutex. 837 */ 838 if (!rcu_access_pointer(kn->attr.open)) 839 return; 840 841 mutex = kernfs_open_file_mutex_lock(kn); 842 on = kernfs_deref_open_node_protected(kn); 843 if (!on) { 844 mutex_unlock(mutex); 845 return; 846 } 847 848 list_for_each_entry(of, &on->files, list) { 849 struct inode *inode = file_inode(of->file); 850 851 if (kn->flags & KERNFS_HAS_MMAP) 852 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 853 854 if (kn->flags & KERNFS_HAS_RELEASE) 855 kernfs_release_file(kn, of); 856 } 857 858 mutex_unlock(mutex); 859 } 860 861 /* 862 * Kernfs attribute files are pollable. The idea is that you read 863 * the content and then you use 'poll' or 'select' to wait for 864 * the content to change. When the content changes (assuming the 865 * manager for the kobject supports notification), poll will 866 * return EPOLLERR|EPOLLPRI, and select will return the fd whether 867 * it is waiting for read, write, or exceptions. 868 * Once poll/select indicates that the value has changed, you 869 * need to close and re-open the file, or seek to 0 and read again. 870 * Reminder: this only works for attributes which actively support 871 * it, and it is not possible to test an attribute from userspace 872 * to see if it supports poll (Neither 'poll' nor 'select' return 873 * an appropriate error code). When in doubt, set a suitable timeout value. 874 */ 875 __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) 876 { 877 struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry); 878 struct kernfs_open_node *on = kernfs_deref_open_node(of, kn); 879 880 if (!on) 881 return EPOLLERR; 882 883 poll_wait(of->file, &on->poll, wait); 884 885 if (of->event != atomic_read(&on->event)) 886 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 887 888 return DEFAULT_POLLMASK; 889 } 890 891 static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) 892 { 893 struct kernfs_open_file *of = kernfs_of(filp); 894 struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); 895 __poll_t ret; 896 897 if (!kernfs_get_active(kn)) 898 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 899 900 if (kn->attr.ops->poll) 901 ret = kn->attr.ops->poll(of, wait); 902 else 903 ret = kernfs_generic_poll(of, wait); 904 905 kernfs_put_active(kn); 906 return ret; 907 } 908 909 static void kernfs_notify_workfn(struct work_struct *work) 910 { 911 struct kernfs_node *kn; 912 struct kernfs_super_info *info; 913 struct kernfs_root *root; 914 repeat: 915 /* pop one off the notify_list */ 916 spin_lock_irq(&kernfs_notify_lock); 917 kn = kernfs_notify_list; 918 if (kn == KERNFS_NOTIFY_EOL) { 919 spin_unlock_irq(&kernfs_notify_lock); 920 return; 921 } 922 kernfs_notify_list = kn->attr.notify_next; 923 kn->attr.notify_next = NULL; 924 spin_unlock_irq(&kernfs_notify_lock); 925 926 root = kernfs_root(kn); 927 /* kick fsnotify */ 928 down_write(&root->kernfs_rwsem); 929 930 list_for_each_entry(info, &kernfs_root(kn)->supers, node) { 931 struct kernfs_node *parent; 932 struct inode *p_inode = NULL; 933 struct inode *inode; 934 struct qstr name; 935 936 /* 937 * We want fsnotify_modify() on @kn but as the 938 * modifications aren't originating from userland don't 939 * have the matching @file available. Look up the inodes 940 * and generate the events manually. 941 */ 942 inode = ilookup(info->sb, kernfs_ino(kn)); 943 if (!inode) 944 continue; 945 946 name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name)); 947 parent = kernfs_get_parent(kn); 948 if (parent) { 949 p_inode = ilookup(info->sb, kernfs_ino(parent)); 950 if (p_inode) { 951 fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD, 952 inode, FSNOTIFY_EVENT_INODE, 953 p_inode, &name, inode, 0); 954 iput(p_inode); 955 } 956 957 kernfs_put(parent); 958 } 959 960 if (!p_inode) 961 fsnotify_inode(inode, FS_MODIFY); 962 963 iput(inode); 964 } 965 966 up_write(&root->kernfs_rwsem); 967 kernfs_put(kn); 968 goto repeat; 969 } 970 971 /** 972 * kernfs_notify - notify a kernfs file 973 * @kn: file to notify 974 * 975 * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any 976 * context. 977 */ 978 void kernfs_notify(struct kernfs_node *kn) 979 { 980 static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); 981 unsigned long flags; 982 struct kernfs_open_node *on; 983 984 if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) 985 return; 986 987 /* kick poll immediately */ 988 rcu_read_lock(); 989 on = rcu_dereference(kn->attr.open); 990 if (on) { 991 atomic_inc(&on->event); 992 wake_up_interruptible(&on->poll); 993 } 994 rcu_read_unlock(); 995 996 /* schedule work to kick fsnotify */ 997 spin_lock_irqsave(&kernfs_notify_lock, flags); 998 if (!kn->attr.notify_next) { 999 kernfs_get(kn); 1000 kn->attr.notify_next = kernfs_notify_list; 1001 kernfs_notify_list = kn; 1002 schedule_work(&kernfs_notify_work); 1003 } 1004 spin_unlock_irqrestore(&kernfs_notify_lock, flags); 1005 } 1006 EXPORT_SYMBOL_GPL(kernfs_notify); 1007 1008 const struct file_operations kernfs_file_fops = { 1009 .read_iter = kernfs_fop_read_iter, 1010 .write_iter = kernfs_fop_write_iter, 1011 .llseek = generic_file_llseek, 1012 .mmap = kernfs_fop_mmap, 1013 .open = kernfs_fop_open, 1014 .release = kernfs_fop_release, 1015 .poll = kernfs_fop_poll, 1016 .fsync = noop_fsync, 1017 .splice_read = generic_file_splice_read, 1018 .splice_write = iter_file_splice_write, 1019 }; 1020 1021 /** 1022 * __kernfs_create_file - kernfs internal function to create a file 1023 * @parent: directory to create the file in 1024 * @name: name of the file 1025 * @mode: mode of the file 1026 * @uid: uid of the file 1027 * @gid: gid of the file 1028 * @size: size of the file 1029 * @ops: kernfs operations for the file 1030 * @priv: private data for the file 1031 * @ns: optional namespace tag of the file 1032 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep 1033 * 1034 * Returns the created node on success, ERR_PTR() value on error. 1035 */ 1036 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, 1037 const char *name, 1038 umode_t mode, kuid_t uid, kgid_t gid, 1039 loff_t size, 1040 const struct kernfs_ops *ops, 1041 void *priv, const void *ns, 1042 struct lock_class_key *key) 1043 { 1044 struct kernfs_node *kn; 1045 unsigned flags; 1046 int rc; 1047 1048 flags = KERNFS_FILE; 1049 1050 kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, 1051 uid, gid, flags); 1052 if (!kn) 1053 return ERR_PTR(-ENOMEM); 1054 1055 kn->attr.ops = ops; 1056 kn->attr.size = size; 1057 kn->ns = ns; 1058 kn->priv = priv; 1059 1060 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1061 if (key) { 1062 lockdep_init_map(&kn->dep_map, "kn->active", key, 0); 1063 kn->flags |= KERNFS_LOCKDEP; 1064 } 1065 #endif 1066 1067 /* 1068 * kn->attr.ops is accessible only while holding active ref. We 1069 * need to know whether some ops are implemented outside active 1070 * ref. Cache their existence in flags. 1071 */ 1072 if (ops->seq_show) 1073 kn->flags |= KERNFS_HAS_SEQ_SHOW; 1074 if (ops->mmap) 1075 kn->flags |= KERNFS_HAS_MMAP; 1076 if (ops->release) 1077 kn->flags |= KERNFS_HAS_RELEASE; 1078 1079 rc = kernfs_add_one(kn); 1080 if (rc) { 1081 kernfs_put(kn); 1082 return ERR_PTR(rc); 1083 } 1084 return kn; 1085 } 1086