1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * fs/kernfs/file.c - kernfs file implementation 4 * 5 * Copyright (c) 2001-3 Patrick Mochel 6 * Copyright (c) 2007 SUSE Linux Products GmbH 7 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/seq_file.h> 12 #include <linux/slab.h> 13 #include <linux/poll.h> 14 #include <linux/pagemap.h> 15 #include <linux/sched/mm.h> 16 #include <linux/fsnotify.h> 17 #include <linux/uio.h> 18 19 #include "kernfs-internal.h" 20 21 /* 22 * There's one kernfs_open_file for each open file and one kernfs_open_node 23 * for each kernfs_node with one or more open files. 24 * 25 * kernfs_node->attr.open points to kernfs_open_node. attr.open is 26 * protected by kernfs_open_node_lock. 27 * 28 * filp->private_data points to seq_file whose ->private points to 29 * kernfs_open_file. kernfs_open_files are chained at 30 * kernfs_open_node->files, which is protected by kernfs_open_file_mutex. 31 */ 32 static DEFINE_SPINLOCK(kernfs_open_node_lock); 33 static DEFINE_MUTEX(kernfs_open_file_mutex); 34 35 struct kernfs_open_node { 36 atomic_t refcnt; 37 atomic_t event; 38 wait_queue_head_t poll; 39 struct list_head files; /* goes through kernfs_open_file.list */ 40 }; 41 42 /* 43 * kernfs_notify() may be called from any context and bounces notifications 44 * through a work item. To minimize space overhead in kernfs_node, the 45 * pending queue is implemented as a singly linked list of kernfs_nodes. 46 * The list is terminated with the self pointer so that whether a 47 * kernfs_node is on the list or not can be determined by testing the next 48 * pointer for NULL. 49 */ 50 #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) 51 52 static DEFINE_SPINLOCK(kernfs_notify_lock); 53 static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; 54 55 static struct kernfs_open_file *kernfs_of(struct file *file) 56 { 57 return ((struct seq_file *)file->private_data)->private; 58 } 59 60 /* 61 * Determine the kernfs_ops for the given kernfs_node. This function must 62 * be called while holding an active reference. 63 */ 64 static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) 65 { 66 if (kn->flags & KERNFS_LOCKDEP) 67 lockdep_assert_held(kn); 68 return kn->attr.ops; 69 } 70 71 /* 72 * As kernfs_seq_stop() is also called after kernfs_seq_start() or 73 * kernfs_seq_next() failure, it needs to distinguish whether it's stopping 74 * a seq_file iteration which is fully initialized with an active reference 75 * or an aborted kernfs_seq_start() due to get_active failure. The 76 * position pointer is the only context for each seq_file iteration and 77 * thus the stop condition should be encoded in it. As the return value is 78 * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable 79 * choice to indicate get_active failure. 80 * 81 * Unfortunately, this is complicated due to the optional custom seq_file 82 * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() 83 * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or 84 * custom seq_file operations and thus can't decide whether put_active 85 * should be performed or not only on ERR_PTR(-ENODEV). 86 * 87 * This is worked around by factoring out the custom seq_stop() and 88 * put_active part into kernfs_seq_stop_active(), skipping it from 89 * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after 90 * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures 91 * that kernfs_seq_stop_active() is skipped only after get_active failure. 92 */ 93 static void kernfs_seq_stop_active(struct seq_file *sf, void *v) 94 { 95 struct kernfs_open_file *of = sf->private; 96 const struct kernfs_ops *ops = kernfs_ops(of->kn); 97 98 if (ops->seq_stop) 99 ops->seq_stop(sf, v); 100 kernfs_put_active(of->kn); 101 } 102 103 static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) 104 { 105 struct kernfs_open_file *of = sf->private; 106 const struct kernfs_ops *ops; 107 108 /* 109 * @of->mutex nests outside active ref and is primarily to ensure that 110 * the ops aren't called concurrently for the same open file. 111 */ 112 mutex_lock(&of->mutex); 113 if (!kernfs_get_active(of->kn)) 114 return ERR_PTR(-ENODEV); 115 116 ops = kernfs_ops(of->kn); 117 if (ops->seq_start) { 118 void *next = ops->seq_start(sf, ppos); 119 /* see the comment above kernfs_seq_stop_active() */ 120 if (next == ERR_PTR(-ENODEV)) 121 kernfs_seq_stop_active(sf, next); 122 return next; 123 } 124 return single_start(sf, ppos); 125 } 126 127 static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) 128 { 129 struct kernfs_open_file *of = sf->private; 130 const struct kernfs_ops *ops = kernfs_ops(of->kn); 131 132 if (ops->seq_next) { 133 void *next = ops->seq_next(sf, v, ppos); 134 /* see the comment above kernfs_seq_stop_active() */ 135 if (next == ERR_PTR(-ENODEV)) 136 kernfs_seq_stop_active(sf, next); 137 return next; 138 } else { 139 /* 140 * The same behavior and code as single_open(), always 141 * terminate after the initial read. 142 */ 143 ++*ppos; 144 return NULL; 145 } 146 } 147 148 static void kernfs_seq_stop(struct seq_file *sf, void *v) 149 { 150 struct kernfs_open_file *of = sf->private; 151 152 if (v != ERR_PTR(-ENODEV)) 153 kernfs_seq_stop_active(sf, v); 154 mutex_unlock(&of->mutex); 155 } 156 157 static int kernfs_seq_show(struct seq_file *sf, void *v) 158 { 159 struct kernfs_open_file *of = sf->private; 160 161 of->event = atomic_read(&of->kn->attr.open->event); 162 163 return of->kn->attr.ops->seq_show(sf, v); 164 } 165 166 static const struct seq_operations kernfs_seq_ops = { 167 .start = kernfs_seq_start, 168 .next = kernfs_seq_next, 169 .stop = kernfs_seq_stop, 170 .show = kernfs_seq_show, 171 }; 172 173 /* 174 * As reading a bin file can have side-effects, the exact offset and bytes 175 * specified in read(2) call should be passed to the read callback making 176 * it difficult to use seq_file. Implement simplistic custom buffering for 177 * bin files. 178 */ 179 static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 180 { 181 struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); 182 ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE); 183 const struct kernfs_ops *ops; 184 char *buf; 185 186 buf = of->prealloc_buf; 187 if (buf) 188 mutex_lock(&of->prealloc_mutex); 189 else 190 buf = kmalloc(len, GFP_KERNEL); 191 if (!buf) 192 return -ENOMEM; 193 194 /* 195 * @of->mutex nests outside active ref and is used both to ensure that 196 * the ops aren't called concurrently for the same open file. 197 */ 198 mutex_lock(&of->mutex); 199 if (!kernfs_get_active(of->kn)) { 200 len = -ENODEV; 201 mutex_unlock(&of->mutex); 202 goto out_free; 203 } 204 205 of->event = atomic_read(&of->kn->attr.open->event); 206 ops = kernfs_ops(of->kn); 207 if (ops->read) 208 len = ops->read(of, buf, len, iocb->ki_pos); 209 else 210 len = -EINVAL; 211 212 kernfs_put_active(of->kn); 213 mutex_unlock(&of->mutex); 214 215 if (len < 0) 216 goto out_free; 217 218 if (copy_to_iter(buf, len, iter) != len) { 219 len = -EFAULT; 220 goto out_free; 221 } 222 223 iocb->ki_pos += len; 224 225 out_free: 226 if (buf == of->prealloc_buf) 227 mutex_unlock(&of->prealloc_mutex); 228 else 229 kfree(buf); 230 return len; 231 } 232 233 static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) 234 { 235 if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW) 236 return seq_read_iter(iocb, iter); 237 return kernfs_file_read_iter(iocb, iter); 238 } 239 240 /* 241 * Copy data in from userland and pass it to the matching kernfs write 242 * operation. 243 * 244 * There is no easy way for us to know if userspace is only doing a partial 245 * write, so we don't support them. We expect the entire buffer to come on 246 * the first write. Hint: if you're writing a value, first read the file, 247 * modify only the the value you're changing, then write entire buffer 248 * back. 249 */ 250 static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) 251 { 252 struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); 253 ssize_t len = iov_iter_count(iter); 254 const struct kernfs_ops *ops; 255 char *buf; 256 257 if (of->atomic_write_len) { 258 if (len > of->atomic_write_len) 259 return -E2BIG; 260 } else { 261 len = min_t(size_t, len, PAGE_SIZE); 262 } 263 264 buf = of->prealloc_buf; 265 if (buf) 266 mutex_lock(&of->prealloc_mutex); 267 else 268 buf = kmalloc(len + 1, GFP_KERNEL); 269 if (!buf) 270 return -ENOMEM; 271 272 if (copy_from_iter(buf, len, iter) != len) { 273 len = -EFAULT; 274 goto out_free; 275 } 276 buf[len] = '\0'; /* guarantee string termination */ 277 278 /* 279 * @of->mutex nests outside active ref and is used both to ensure that 280 * the ops aren't called concurrently for the same open file. 281 */ 282 mutex_lock(&of->mutex); 283 if (!kernfs_get_active(of->kn)) { 284 mutex_unlock(&of->mutex); 285 len = -ENODEV; 286 goto out_free; 287 } 288 289 ops = kernfs_ops(of->kn); 290 if (ops->write) 291 len = ops->write(of, buf, len, iocb->ki_pos); 292 else 293 len = -EINVAL; 294 295 kernfs_put_active(of->kn); 296 mutex_unlock(&of->mutex); 297 298 if (len > 0) 299 iocb->ki_pos += len; 300 301 out_free: 302 if (buf == of->prealloc_buf) 303 mutex_unlock(&of->prealloc_mutex); 304 else 305 kfree(buf); 306 return len; 307 } 308 309 static void kernfs_vma_open(struct vm_area_struct *vma) 310 { 311 struct file *file = vma->vm_file; 312 struct kernfs_open_file *of = kernfs_of(file); 313 314 if (!of->vm_ops) 315 return; 316 317 if (!kernfs_get_active(of->kn)) 318 return; 319 320 if (of->vm_ops->open) 321 of->vm_ops->open(vma); 322 323 kernfs_put_active(of->kn); 324 } 325 326 static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) 327 { 328 struct file *file = vmf->vma->vm_file; 329 struct kernfs_open_file *of = kernfs_of(file); 330 vm_fault_t ret; 331 332 if (!of->vm_ops) 333 return VM_FAULT_SIGBUS; 334 335 if (!kernfs_get_active(of->kn)) 336 return VM_FAULT_SIGBUS; 337 338 ret = VM_FAULT_SIGBUS; 339 if (of->vm_ops->fault) 340 ret = of->vm_ops->fault(vmf); 341 342 kernfs_put_active(of->kn); 343 return ret; 344 } 345 346 static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) 347 { 348 struct file *file = vmf->vma->vm_file; 349 struct kernfs_open_file *of = kernfs_of(file); 350 vm_fault_t ret; 351 352 if (!of->vm_ops) 353 return VM_FAULT_SIGBUS; 354 355 if (!kernfs_get_active(of->kn)) 356 return VM_FAULT_SIGBUS; 357 358 ret = 0; 359 if (of->vm_ops->page_mkwrite) 360 ret = of->vm_ops->page_mkwrite(vmf); 361 else 362 file_update_time(file); 363 364 kernfs_put_active(of->kn); 365 return ret; 366 } 367 368 static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, 369 void *buf, int len, int write) 370 { 371 struct file *file = vma->vm_file; 372 struct kernfs_open_file *of = kernfs_of(file); 373 int ret; 374 375 if (!of->vm_ops) 376 return -EINVAL; 377 378 if (!kernfs_get_active(of->kn)) 379 return -EINVAL; 380 381 ret = -EINVAL; 382 if (of->vm_ops->access) 383 ret = of->vm_ops->access(vma, addr, buf, len, write); 384 385 kernfs_put_active(of->kn); 386 return ret; 387 } 388 389 #ifdef CONFIG_NUMA 390 static int kernfs_vma_set_policy(struct vm_area_struct *vma, 391 struct mempolicy *new) 392 { 393 struct file *file = vma->vm_file; 394 struct kernfs_open_file *of = kernfs_of(file); 395 int ret; 396 397 if (!of->vm_ops) 398 return 0; 399 400 if (!kernfs_get_active(of->kn)) 401 return -EINVAL; 402 403 ret = 0; 404 if (of->vm_ops->set_policy) 405 ret = of->vm_ops->set_policy(vma, new); 406 407 kernfs_put_active(of->kn); 408 return ret; 409 } 410 411 static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma, 412 unsigned long addr) 413 { 414 struct file *file = vma->vm_file; 415 struct kernfs_open_file *of = kernfs_of(file); 416 struct mempolicy *pol; 417 418 if (!of->vm_ops) 419 return vma->vm_policy; 420 421 if (!kernfs_get_active(of->kn)) 422 return vma->vm_policy; 423 424 pol = vma->vm_policy; 425 if (of->vm_ops->get_policy) 426 pol = of->vm_ops->get_policy(vma, addr); 427 428 kernfs_put_active(of->kn); 429 return pol; 430 } 431 432 #endif 433 434 static const struct vm_operations_struct kernfs_vm_ops = { 435 .open = kernfs_vma_open, 436 .fault = kernfs_vma_fault, 437 .page_mkwrite = kernfs_vma_page_mkwrite, 438 .access = kernfs_vma_access, 439 #ifdef CONFIG_NUMA 440 .set_policy = kernfs_vma_set_policy, 441 .get_policy = kernfs_vma_get_policy, 442 #endif 443 }; 444 445 static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) 446 { 447 struct kernfs_open_file *of = kernfs_of(file); 448 const struct kernfs_ops *ops; 449 int rc; 450 451 /* 452 * mmap path and of->mutex are prone to triggering spurious lockdep 453 * warnings and we don't want to add spurious locking dependency 454 * between the two. Check whether mmap is actually implemented 455 * without grabbing @of->mutex by testing HAS_MMAP flag. See the 456 * comment in kernfs_file_open() for more details. 457 */ 458 if (!(of->kn->flags & KERNFS_HAS_MMAP)) 459 return -ENODEV; 460 461 mutex_lock(&of->mutex); 462 463 rc = -ENODEV; 464 if (!kernfs_get_active(of->kn)) 465 goto out_unlock; 466 467 ops = kernfs_ops(of->kn); 468 rc = ops->mmap(of, vma); 469 if (rc) 470 goto out_put; 471 472 /* 473 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() 474 * to satisfy versions of X which crash if the mmap fails: that 475 * substitutes a new vm_file, and we don't then want bin_vm_ops. 476 */ 477 if (vma->vm_file != file) 478 goto out_put; 479 480 rc = -EINVAL; 481 if (of->mmapped && of->vm_ops != vma->vm_ops) 482 goto out_put; 483 484 /* 485 * It is not possible to successfully wrap close. 486 * So error if someone is trying to use close. 487 */ 488 rc = -EINVAL; 489 if (vma->vm_ops && vma->vm_ops->close) 490 goto out_put; 491 492 rc = 0; 493 of->mmapped = true; 494 of->vm_ops = vma->vm_ops; 495 vma->vm_ops = &kernfs_vm_ops; 496 out_put: 497 kernfs_put_active(of->kn); 498 out_unlock: 499 mutex_unlock(&of->mutex); 500 501 return rc; 502 } 503 504 /** 505 * kernfs_get_open_node - get or create kernfs_open_node 506 * @kn: target kernfs_node 507 * @of: kernfs_open_file for this instance of open 508 * 509 * If @kn->attr.open exists, increment its reference count; otherwise, 510 * create one. @of is chained to the files list. 511 * 512 * LOCKING: 513 * Kernel thread context (may sleep). 514 * 515 * RETURNS: 516 * 0 on success, -errno on failure. 517 */ 518 static int kernfs_get_open_node(struct kernfs_node *kn, 519 struct kernfs_open_file *of) 520 { 521 struct kernfs_open_node *on, *new_on = NULL; 522 523 retry: 524 mutex_lock(&kernfs_open_file_mutex); 525 spin_lock_irq(&kernfs_open_node_lock); 526 527 if (!kn->attr.open && new_on) { 528 kn->attr.open = new_on; 529 new_on = NULL; 530 } 531 532 on = kn->attr.open; 533 if (on) { 534 atomic_inc(&on->refcnt); 535 list_add_tail(&of->list, &on->files); 536 } 537 538 spin_unlock_irq(&kernfs_open_node_lock); 539 mutex_unlock(&kernfs_open_file_mutex); 540 541 if (on) { 542 kfree(new_on); 543 return 0; 544 } 545 546 /* not there, initialize a new one and retry */ 547 new_on = kmalloc(sizeof(*new_on), GFP_KERNEL); 548 if (!new_on) 549 return -ENOMEM; 550 551 atomic_set(&new_on->refcnt, 0); 552 atomic_set(&new_on->event, 1); 553 init_waitqueue_head(&new_on->poll); 554 INIT_LIST_HEAD(&new_on->files); 555 goto retry; 556 } 557 558 /** 559 * kernfs_put_open_node - put kernfs_open_node 560 * @kn: target kernfs_nodet 561 * @of: associated kernfs_open_file 562 * 563 * Put @kn->attr.open and unlink @of from the files list. If 564 * reference count reaches zero, disassociate and free it. 565 * 566 * LOCKING: 567 * None. 568 */ 569 static void kernfs_put_open_node(struct kernfs_node *kn, 570 struct kernfs_open_file *of) 571 { 572 struct kernfs_open_node *on = kn->attr.open; 573 unsigned long flags; 574 575 mutex_lock(&kernfs_open_file_mutex); 576 spin_lock_irqsave(&kernfs_open_node_lock, flags); 577 578 if (of) 579 list_del(&of->list); 580 581 if (atomic_dec_and_test(&on->refcnt)) 582 kn->attr.open = NULL; 583 else 584 on = NULL; 585 586 spin_unlock_irqrestore(&kernfs_open_node_lock, flags); 587 mutex_unlock(&kernfs_open_file_mutex); 588 589 kfree(on); 590 } 591 592 static int kernfs_fop_open(struct inode *inode, struct file *file) 593 { 594 struct kernfs_node *kn = inode->i_private; 595 struct kernfs_root *root = kernfs_root(kn); 596 const struct kernfs_ops *ops; 597 struct kernfs_open_file *of; 598 bool has_read, has_write, has_mmap; 599 int error = -EACCES; 600 601 if (!kernfs_get_active(kn)) 602 return -ENODEV; 603 604 ops = kernfs_ops(kn); 605 606 has_read = ops->seq_show || ops->read || ops->mmap; 607 has_write = ops->write || ops->mmap; 608 has_mmap = ops->mmap; 609 610 /* see the flag definition for details */ 611 if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { 612 if ((file->f_mode & FMODE_WRITE) && 613 (!(inode->i_mode & S_IWUGO) || !has_write)) 614 goto err_out; 615 616 if ((file->f_mode & FMODE_READ) && 617 (!(inode->i_mode & S_IRUGO) || !has_read)) 618 goto err_out; 619 } 620 621 /* allocate a kernfs_open_file for the file */ 622 error = -ENOMEM; 623 of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); 624 if (!of) 625 goto err_out; 626 627 /* 628 * The following is done to give a different lockdep key to 629 * @of->mutex for files which implement mmap. This is a rather 630 * crude way to avoid false positive lockdep warning around 631 * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and 632 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under 633 * which mm->mmap_lock nests, while holding @of->mutex. As each 634 * open file has a separate mutex, it's okay as long as those don't 635 * happen on the same file. At this point, we can't easily give 636 * each file a separate locking class. Let's differentiate on 637 * whether the file has mmap or not for now. 638 * 639 * Both paths of the branch look the same. They're supposed to 640 * look that way and give @of->mutex different static lockdep keys. 641 */ 642 if (has_mmap) 643 mutex_init(&of->mutex); 644 else 645 mutex_init(&of->mutex); 646 647 of->kn = kn; 648 of->file = file; 649 650 /* 651 * Write path needs to atomic_write_len outside active reference. 652 * Cache it in open_file. See kernfs_fop_write_iter() for details. 653 */ 654 of->atomic_write_len = ops->atomic_write_len; 655 656 error = -EINVAL; 657 /* 658 * ->seq_show is incompatible with ->prealloc, 659 * as seq_read does its own allocation. 660 * ->read must be used instead. 661 */ 662 if (ops->prealloc && ops->seq_show) 663 goto err_free; 664 if (ops->prealloc) { 665 int len = of->atomic_write_len ?: PAGE_SIZE; 666 of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); 667 error = -ENOMEM; 668 if (!of->prealloc_buf) 669 goto err_free; 670 mutex_init(&of->prealloc_mutex); 671 } 672 673 /* 674 * Always instantiate seq_file even if read access doesn't use 675 * seq_file or is not requested. This unifies private data access 676 * and readable regular files are the vast majority anyway. 677 */ 678 if (ops->seq_show) 679 error = seq_open(file, &kernfs_seq_ops); 680 else 681 error = seq_open(file, NULL); 682 if (error) 683 goto err_free; 684 685 of->seq_file = file->private_data; 686 of->seq_file->private = of; 687 688 /* seq_file clears PWRITE unconditionally, restore it if WRITE */ 689 if (file->f_mode & FMODE_WRITE) 690 file->f_mode |= FMODE_PWRITE; 691 692 /* make sure we have open node struct */ 693 error = kernfs_get_open_node(kn, of); 694 if (error) 695 goto err_seq_release; 696 697 if (ops->open) { 698 /* nobody has access to @of yet, skip @of->mutex */ 699 error = ops->open(of); 700 if (error) 701 goto err_put_node; 702 } 703 704 /* open succeeded, put active references */ 705 kernfs_put_active(kn); 706 return 0; 707 708 err_put_node: 709 kernfs_put_open_node(kn, of); 710 err_seq_release: 711 seq_release(inode, file); 712 err_free: 713 kfree(of->prealloc_buf); 714 kfree(of); 715 err_out: 716 kernfs_put_active(kn); 717 return error; 718 } 719 720 /* used from release/drain to ensure that ->release() is called exactly once */ 721 static void kernfs_release_file(struct kernfs_node *kn, 722 struct kernfs_open_file *of) 723 { 724 /* 725 * @of is guaranteed to have no other file operations in flight and 726 * we just want to synchronize release and drain paths. 727 * @kernfs_open_file_mutex is enough. @of->mutex can't be used 728 * here because drain path may be called from places which can 729 * cause circular dependency. 730 */ 731 lockdep_assert_held(&kernfs_open_file_mutex); 732 733 if (!of->released) { 734 /* 735 * A file is never detached without being released and we 736 * need to be able to release files which are deactivated 737 * and being drained. Don't use kernfs_ops(). 738 */ 739 kn->attr.ops->release(of); 740 of->released = true; 741 } 742 } 743 744 static int kernfs_fop_release(struct inode *inode, struct file *filp) 745 { 746 struct kernfs_node *kn = inode->i_private; 747 struct kernfs_open_file *of = kernfs_of(filp); 748 749 if (kn->flags & KERNFS_HAS_RELEASE) { 750 mutex_lock(&kernfs_open_file_mutex); 751 kernfs_release_file(kn, of); 752 mutex_unlock(&kernfs_open_file_mutex); 753 } 754 755 kernfs_put_open_node(kn, of); 756 seq_release(inode, filp); 757 kfree(of->prealloc_buf); 758 kfree(of); 759 760 return 0; 761 } 762 763 void kernfs_drain_open_files(struct kernfs_node *kn) 764 { 765 struct kernfs_open_node *on; 766 struct kernfs_open_file *of; 767 768 if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) 769 return; 770 771 spin_lock_irq(&kernfs_open_node_lock); 772 on = kn->attr.open; 773 if (on) 774 atomic_inc(&on->refcnt); 775 spin_unlock_irq(&kernfs_open_node_lock); 776 if (!on) 777 return; 778 779 mutex_lock(&kernfs_open_file_mutex); 780 781 list_for_each_entry(of, &on->files, list) { 782 struct inode *inode = file_inode(of->file); 783 784 if (kn->flags & KERNFS_HAS_MMAP) 785 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 786 787 if (kn->flags & KERNFS_HAS_RELEASE) 788 kernfs_release_file(kn, of); 789 } 790 791 mutex_unlock(&kernfs_open_file_mutex); 792 793 kernfs_put_open_node(kn, NULL); 794 } 795 796 /* 797 * Kernfs attribute files are pollable. The idea is that you read 798 * the content and then you use 'poll' or 'select' to wait for 799 * the content to change. When the content changes (assuming the 800 * manager for the kobject supports notification), poll will 801 * return EPOLLERR|EPOLLPRI, and select will return the fd whether 802 * it is waiting for read, write, or exceptions. 803 * Once poll/select indicates that the value has changed, you 804 * need to close and re-open the file, or seek to 0 and read again. 805 * Reminder: this only works for attributes which actively support 806 * it, and it is not possible to test an attribute from userspace 807 * to see if it supports poll (Neither 'poll' nor 'select' return 808 * an appropriate error code). When in doubt, set a suitable timeout value. 809 */ 810 __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) 811 { 812 struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry); 813 struct kernfs_open_node *on = kn->attr.open; 814 815 poll_wait(of->file, &on->poll, wait); 816 817 if (of->event != atomic_read(&on->event)) 818 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 819 820 return DEFAULT_POLLMASK; 821 } 822 823 static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) 824 { 825 struct kernfs_open_file *of = kernfs_of(filp); 826 struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); 827 __poll_t ret; 828 829 if (!kernfs_get_active(kn)) 830 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 831 832 if (kn->attr.ops->poll) 833 ret = kn->attr.ops->poll(of, wait); 834 else 835 ret = kernfs_generic_poll(of, wait); 836 837 kernfs_put_active(kn); 838 return ret; 839 } 840 841 static void kernfs_notify_workfn(struct work_struct *work) 842 { 843 struct kernfs_node *kn; 844 struct kernfs_super_info *info; 845 struct kernfs_root *root; 846 repeat: 847 /* pop one off the notify_list */ 848 spin_lock_irq(&kernfs_notify_lock); 849 kn = kernfs_notify_list; 850 if (kn == KERNFS_NOTIFY_EOL) { 851 spin_unlock_irq(&kernfs_notify_lock); 852 return; 853 } 854 kernfs_notify_list = kn->attr.notify_next; 855 kn->attr.notify_next = NULL; 856 spin_unlock_irq(&kernfs_notify_lock); 857 858 root = kernfs_root(kn); 859 /* kick fsnotify */ 860 down_write(&root->kernfs_rwsem); 861 862 list_for_each_entry(info, &kernfs_root(kn)->supers, node) { 863 struct kernfs_node *parent; 864 struct inode *p_inode = NULL; 865 struct inode *inode; 866 struct qstr name; 867 868 /* 869 * We want fsnotify_modify() on @kn but as the 870 * modifications aren't originating from userland don't 871 * have the matching @file available. Look up the inodes 872 * and generate the events manually. 873 */ 874 inode = ilookup(info->sb, kernfs_ino(kn)); 875 if (!inode) 876 continue; 877 878 name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name)); 879 parent = kernfs_get_parent(kn); 880 if (parent) { 881 p_inode = ilookup(info->sb, kernfs_ino(parent)); 882 if (p_inode) { 883 fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD, 884 inode, FSNOTIFY_EVENT_INODE, 885 p_inode, &name, inode, 0); 886 iput(p_inode); 887 } 888 889 kernfs_put(parent); 890 } 891 892 if (!p_inode) 893 fsnotify_inode(inode, FS_MODIFY); 894 895 iput(inode); 896 } 897 898 up_write(&root->kernfs_rwsem); 899 kernfs_put(kn); 900 goto repeat; 901 } 902 903 /** 904 * kernfs_notify - notify a kernfs file 905 * @kn: file to notify 906 * 907 * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any 908 * context. 909 */ 910 void kernfs_notify(struct kernfs_node *kn) 911 { 912 static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); 913 unsigned long flags; 914 struct kernfs_open_node *on; 915 916 if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) 917 return; 918 919 /* kick poll immediately */ 920 spin_lock_irqsave(&kernfs_open_node_lock, flags); 921 on = kn->attr.open; 922 if (on) { 923 atomic_inc(&on->event); 924 wake_up_interruptible(&on->poll); 925 } 926 spin_unlock_irqrestore(&kernfs_open_node_lock, flags); 927 928 /* schedule work to kick fsnotify */ 929 spin_lock_irqsave(&kernfs_notify_lock, flags); 930 if (!kn->attr.notify_next) { 931 kernfs_get(kn); 932 kn->attr.notify_next = kernfs_notify_list; 933 kernfs_notify_list = kn; 934 schedule_work(&kernfs_notify_work); 935 } 936 spin_unlock_irqrestore(&kernfs_notify_lock, flags); 937 } 938 EXPORT_SYMBOL_GPL(kernfs_notify); 939 940 const struct file_operations kernfs_file_fops = { 941 .read_iter = kernfs_fop_read_iter, 942 .write_iter = kernfs_fop_write_iter, 943 .llseek = generic_file_llseek, 944 .mmap = kernfs_fop_mmap, 945 .open = kernfs_fop_open, 946 .release = kernfs_fop_release, 947 .poll = kernfs_fop_poll, 948 .fsync = noop_fsync, 949 .splice_read = generic_file_splice_read, 950 .splice_write = iter_file_splice_write, 951 }; 952 953 /** 954 * __kernfs_create_file - kernfs internal function to create a file 955 * @parent: directory to create the file in 956 * @name: name of the file 957 * @mode: mode of the file 958 * @uid: uid of the file 959 * @gid: gid of the file 960 * @size: size of the file 961 * @ops: kernfs operations for the file 962 * @priv: private data for the file 963 * @ns: optional namespace tag of the file 964 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep 965 * 966 * Returns the created node on success, ERR_PTR() value on error. 967 */ 968 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, 969 const char *name, 970 umode_t mode, kuid_t uid, kgid_t gid, 971 loff_t size, 972 const struct kernfs_ops *ops, 973 void *priv, const void *ns, 974 struct lock_class_key *key) 975 { 976 struct kernfs_node *kn; 977 unsigned flags; 978 int rc; 979 980 flags = KERNFS_FILE; 981 982 kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, 983 uid, gid, flags); 984 if (!kn) 985 return ERR_PTR(-ENOMEM); 986 987 kn->attr.ops = ops; 988 kn->attr.size = size; 989 kn->ns = ns; 990 kn->priv = priv; 991 992 #ifdef CONFIG_DEBUG_LOCK_ALLOC 993 if (key) { 994 lockdep_init_map(&kn->dep_map, "kn->active", key, 0); 995 kn->flags |= KERNFS_LOCKDEP; 996 } 997 #endif 998 999 /* 1000 * kn->attr.ops is accessible only while holding active ref. We 1001 * need to know whether some ops are implemented outside active 1002 * ref. Cache their existence in flags. 1003 */ 1004 if (ops->seq_show) 1005 kn->flags |= KERNFS_HAS_SEQ_SHOW; 1006 if (ops->mmap) 1007 kn->flags |= KERNFS_HAS_MMAP; 1008 if (ops->release) 1009 kn->flags |= KERNFS_HAS_RELEASE; 1010 1011 rc = kernfs_add_one(kn); 1012 if (rc) { 1013 kernfs_put(kn); 1014 return ERR_PTR(rc); 1015 } 1016 return kn; 1017 } 1018