1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * fs/kernfs/file.c - kernfs file implementation 4 * 5 * Copyright (c) 2001-3 Patrick Mochel 6 * Copyright (c) 2007 SUSE Linux Products GmbH 7 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/seq_file.h> 12 #include <linux/slab.h> 13 #include <linux/poll.h> 14 #include <linux/pagemap.h> 15 #include <linux/sched/mm.h> 16 #include <linux/fsnotify.h> 17 #include <linux/uio.h> 18 19 #include "kernfs-internal.h" 20 21 /* 22 * There's one kernfs_open_file for each open file and one kernfs_open_node 23 * for each kernfs_node with one or more open files. 24 * 25 * kernfs_node->attr.open points to kernfs_open_node. attr.open is 26 * protected by kernfs_open_node_lock. 27 * 28 * filp->private_data points to seq_file whose ->private points to 29 * kernfs_open_file. kernfs_open_files are chained at 30 * kernfs_open_node->files, which is protected by kernfs_open_file_mutex. 31 */ 32 static DEFINE_SPINLOCK(kernfs_open_node_lock); 33 static DEFINE_MUTEX(kernfs_open_file_mutex); 34 35 struct kernfs_open_node { 36 atomic_t refcnt; 37 atomic_t event; 38 wait_queue_head_t poll; 39 struct list_head files; /* goes through kernfs_open_file.list */ 40 }; 41 42 /* 43 * kernfs_notify() may be called from any context and bounces notifications 44 * through a work item. To minimize space overhead in kernfs_node, the 45 * pending queue is implemented as a singly linked list of kernfs_nodes. 46 * The list is terminated with the self pointer so that whether a 47 * kernfs_node is on the list or not can be determined by testing the next 48 * pointer for NULL. 49 */ 50 #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) 51 52 static DEFINE_SPINLOCK(kernfs_notify_lock); 53 static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; 54 55 static struct kernfs_open_file *kernfs_of(struct file *file) 56 { 57 return ((struct seq_file *)file->private_data)->private; 58 } 59 60 /* 61 * Determine the kernfs_ops for the given kernfs_node. This function must 62 * be called while holding an active reference. 63 */ 64 static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) 65 { 66 if (kn->flags & KERNFS_LOCKDEP) 67 lockdep_assert_held(kn); 68 return kn->attr.ops; 69 } 70 71 /* 72 * As kernfs_seq_stop() is also called after kernfs_seq_start() or 73 * kernfs_seq_next() failure, it needs to distinguish whether it's stopping 74 * a seq_file iteration which is fully initialized with an active reference 75 * or an aborted kernfs_seq_start() due to get_active failure. The 76 * position pointer is the only context for each seq_file iteration and 77 * thus the stop condition should be encoded in it. As the return value is 78 * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable 79 * choice to indicate get_active failure. 80 * 81 * Unfortunately, this is complicated due to the optional custom seq_file 82 * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() 83 * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or 84 * custom seq_file operations and thus can't decide whether put_active 85 * should be performed or not only on ERR_PTR(-ENODEV). 86 * 87 * This is worked around by factoring out the custom seq_stop() and 88 * put_active part into kernfs_seq_stop_active(), skipping it from 89 * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after 90 * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures 91 * that kernfs_seq_stop_active() is skipped only after get_active failure. 92 */ 93 static void kernfs_seq_stop_active(struct seq_file *sf, void *v) 94 { 95 struct kernfs_open_file *of = sf->private; 96 const struct kernfs_ops *ops = kernfs_ops(of->kn); 97 98 if (ops->seq_stop) 99 ops->seq_stop(sf, v); 100 kernfs_put_active(of->kn); 101 } 102 103 static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) 104 { 105 struct kernfs_open_file *of = sf->private; 106 const struct kernfs_ops *ops; 107 108 /* 109 * @of->mutex nests outside active ref and is primarily to ensure that 110 * the ops aren't called concurrently for the same open file. 111 */ 112 mutex_lock(&of->mutex); 113 if (!kernfs_get_active(of->kn)) 114 return ERR_PTR(-ENODEV); 115 116 ops = kernfs_ops(of->kn); 117 if (ops->seq_start) { 118 void *next = ops->seq_start(sf, ppos); 119 /* see the comment above kernfs_seq_stop_active() */ 120 if (next == ERR_PTR(-ENODEV)) 121 kernfs_seq_stop_active(sf, next); 122 return next; 123 } else { 124 /* 125 * The same behavior and code as single_open(). Returns 126 * !NULL if pos is at the beginning; otherwise, NULL. 127 */ 128 return NULL + !*ppos; 129 } 130 } 131 132 static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) 133 { 134 struct kernfs_open_file *of = sf->private; 135 const struct kernfs_ops *ops = kernfs_ops(of->kn); 136 137 if (ops->seq_next) { 138 void *next = ops->seq_next(sf, v, ppos); 139 /* see the comment above kernfs_seq_stop_active() */ 140 if (next == ERR_PTR(-ENODEV)) 141 kernfs_seq_stop_active(sf, next); 142 return next; 143 } else { 144 /* 145 * The same behavior and code as single_open(), always 146 * terminate after the initial read. 147 */ 148 ++*ppos; 149 return NULL; 150 } 151 } 152 153 static void kernfs_seq_stop(struct seq_file *sf, void *v) 154 { 155 struct kernfs_open_file *of = sf->private; 156 157 if (v != ERR_PTR(-ENODEV)) 158 kernfs_seq_stop_active(sf, v); 159 mutex_unlock(&of->mutex); 160 } 161 162 static int kernfs_seq_show(struct seq_file *sf, void *v) 163 { 164 struct kernfs_open_file *of = sf->private; 165 166 of->event = atomic_read(&of->kn->attr.open->event); 167 168 return of->kn->attr.ops->seq_show(sf, v); 169 } 170 171 static const struct seq_operations kernfs_seq_ops = { 172 .start = kernfs_seq_start, 173 .next = kernfs_seq_next, 174 .stop = kernfs_seq_stop, 175 .show = kernfs_seq_show, 176 }; 177 178 /* 179 * As reading a bin file can have side-effects, the exact offset and bytes 180 * specified in read(2) call should be passed to the read callback making 181 * it difficult to use seq_file. Implement simplistic custom buffering for 182 * bin files. 183 */ 184 static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 185 { 186 struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); 187 ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE); 188 const struct kernfs_ops *ops; 189 char *buf; 190 191 buf = of->prealloc_buf; 192 if (buf) 193 mutex_lock(&of->prealloc_mutex); 194 else 195 buf = kmalloc(len, GFP_KERNEL); 196 if (!buf) 197 return -ENOMEM; 198 199 /* 200 * @of->mutex nests outside active ref and is used both to ensure that 201 * the ops aren't called concurrently for the same open file. 202 */ 203 mutex_lock(&of->mutex); 204 if (!kernfs_get_active(of->kn)) { 205 len = -ENODEV; 206 mutex_unlock(&of->mutex); 207 goto out_free; 208 } 209 210 of->event = atomic_read(&of->kn->attr.open->event); 211 ops = kernfs_ops(of->kn); 212 if (ops->read) 213 len = ops->read(of, buf, len, iocb->ki_pos); 214 else 215 len = -EINVAL; 216 217 kernfs_put_active(of->kn); 218 mutex_unlock(&of->mutex); 219 220 if (len < 0) 221 goto out_free; 222 223 if (copy_to_iter(buf, len, iter) != len) { 224 len = -EFAULT; 225 goto out_free; 226 } 227 228 iocb->ki_pos += len; 229 230 out_free: 231 if (buf == of->prealloc_buf) 232 mutex_unlock(&of->prealloc_mutex); 233 else 234 kfree(buf); 235 return len; 236 } 237 238 static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) 239 { 240 if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW) 241 return seq_read_iter(iocb, iter); 242 return kernfs_file_read_iter(iocb, iter); 243 } 244 245 /** 246 * kernfs_fop_write - kernfs vfs write callback 247 * @file: file pointer 248 * @user_buf: data to write 249 * @count: number of bytes 250 * @ppos: starting offset 251 * 252 * Copy data in from userland and pass it to the matching kernfs write 253 * operation. 254 * 255 * There is no easy way for us to know if userspace is only doing a partial 256 * write, so we don't support them. We expect the entire buffer to come on 257 * the first write. Hint: if you're writing a value, first read the file, 258 * modify only the the value you're changing, then write entire buffer 259 * back. 260 */ 261 static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, 262 size_t count, loff_t *ppos) 263 { 264 struct kernfs_open_file *of = kernfs_of(file); 265 const struct kernfs_ops *ops; 266 ssize_t len; 267 char *buf; 268 269 if (of->atomic_write_len) { 270 len = count; 271 if (len > of->atomic_write_len) 272 return -E2BIG; 273 } else { 274 len = min_t(size_t, count, PAGE_SIZE); 275 } 276 277 buf = of->prealloc_buf; 278 if (buf) 279 mutex_lock(&of->prealloc_mutex); 280 else 281 buf = kmalloc(len + 1, GFP_KERNEL); 282 if (!buf) 283 return -ENOMEM; 284 285 if (copy_from_user(buf, user_buf, len)) { 286 len = -EFAULT; 287 goto out_free; 288 } 289 buf[len] = '\0'; /* guarantee string termination */ 290 291 /* 292 * @of->mutex nests outside active ref and is used both to ensure that 293 * the ops aren't called concurrently for the same open file. 294 */ 295 mutex_lock(&of->mutex); 296 if (!kernfs_get_active(of->kn)) { 297 mutex_unlock(&of->mutex); 298 len = -ENODEV; 299 goto out_free; 300 } 301 302 ops = kernfs_ops(of->kn); 303 if (ops->write) 304 len = ops->write(of, buf, len, *ppos); 305 else 306 len = -EINVAL; 307 308 kernfs_put_active(of->kn); 309 mutex_unlock(&of->mutex); 310 311 if (len > 0) 312 *ppos += len; 313 314 out_free: 315 if (buf == of->prealloc_buf) 316 mutex_unlock(&of->prealloc_mutex); 317 else 318 kfree(buf); 319 return len; 320 } 321 322 static void kernfs_vma_open(struct vm_area_struct *vma) 323 { 324 struct file *file = vma->vm_file; 325 struct kernfs_open_file *of = kernfs_of(file); 326 327 if (!of->vm_ops) 328 return; 329 330 if (!kernfs_get_active(of->kn)) 331 return; 332 333 if (of->vm_ops->open) 334 of->vm_ops->open(vma); 335 336 kernfs_put_active(of->kn); 337 } 338 339 static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) 340 { 341 struct file *file = vmf->vma->vm_file; 342 struct kernfs_open_file *of = kernfs_of(file); 343 vm_fault_t ret; 344 345 if (!of->vm_ops) 346 return VM_FAULT_SIGBUS; 347 348 if (!kernfs_get_active(of->kn)) 349 return VM_FAULT_SIGBUS; 350 351 ret = VM_FAULT_SIGBUS; 352 if (of->vm_ops->fault) 353 ret = of->vm_ops->fault(vmf); 354 355 kernfs_put_active(of->kn); 356 return ret; 357 } 358 359 static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) 360 { 361 struct file *file = vmf->vma->vm_file; 362 struct kernfs_open_file *of = kernfs_of(file); 363 vm_fault_t ret; 364 365 if (!of->vm_ops) 366 return VM_FAULT_SIGBUS; 367 368 if (!kernfs_get_active(of->kn)) 369 return VM_FAULT_SIGBUS; 370 371 ret = 0; 372 if (of->vm_ops->page_mkwrite) 373 ret = of->vm_ops->page_mkwrite(vmf); 374 else 375 file_update_time(file); 376 377 kernfs_put_active(of->kn); 378 return ret; 379 } 380 381 static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, 382 void *buf, int len, int write) 383 { 384 struct file *file = vma->vm_file; 385 struct kernfs_open_file *of = kernfs_of(file); 386 int ret; 387 388 if (!of->vm_ops) 389 return -EINVAL; 390 391 if (!kernfs_get_active(of->kn)) 392 return -EINVAL; 393 394 ret = -EINVAL; 395 if (of->vm_ops->access) 396 ret = of->vm_ops->access(vma, addr, buf, len, write); 397 398 kernfs_put_active(of->kn); 399 return ret; 400 } 401 402 #ifdef CONFIG_NUMA 403 static int kernfs_vma_set_policy(struct vm_area_struct *vma, 404 struct mempolicy *new) 405 { 406 struct file *file = vma->vm_file; 407 struct kernfs_open_file *of = kernfs_of(file); 408 int ret; 409 410 if (!of->vm_ops) 411 return 0; 412 413 if (!kernfs_get_active(of->kn)) 414 return -EINVAL; 415 416 ret = 0; 417 if (of->vm_ops->set_policy) 418 ret = of->vm_ops->set_policy(vma, new); 419 420 kernfs_put_active(of->kn); 421 return ret; 422 } 423 424 static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma, 425 unsigned long addr) 426 { 427 struct file *file = vma->vm_file; 428 struct kernfs_open_file *of = kernfs_of(file); 429 struct mempolicy *pol; 430 431 if (!of->vm_ops) 432 return vma->vm_policy; 433 434 if (!kernfs_get_active(of->kn)) 435 return vma->vm_policy; 436 437 pol = vma->vm_policy; 438 if (of->vm_ops->get_policy) 439 pol = of->vm_ops->get_policy(vma, addr); 440 441 kernfs_put_active(of->kn); 442 return pol; 443 } 444 445 #endif 446 447 static const struct vm_operations_struct kernfs_vm_ops = { 448 .open = kernfs_vma_open, 449 .fault = kernfs_vma_fault, 450 .page_mkwrite = kernfs_vma_page_mkwrite, 451 .access = kernfs_vma_access, 452 #ifdef CONFIG_NUMA 453 .set_policy = kernfs_vma_set_policy, 454 .get_policy = kernfs_vma_get_policy, 455 #endif 456 }; 457 458 static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) 459 { 460 struct kernfs_open_file *of = kernfs_of(file); 461 const struct kernfs_ops *ops; 462 int rc; 463 464 /* 465 * mmap path and of->mutex are prone to triggering spurious lockdep 466 * warnings and we don't want to add spurious locking dependency 467 * between the two. Check whether mmap is actually implemented 468 * without grabbing @of->mutex by testing HAS_MMAP flag. See the 469 * comment in kernfs_file_open() for more details. 470 */ 471 if (!(of->kn->flags & KERNFS_HAS_MMAP)) 472 return -ENODEV; 473 474 mutex_lock(&of->mutex); 475 476 rc = -ENODEV; 477 if (!kernfs_get_active(of->kn)) 478 goto out_unlock; 479 480 ops = kernfs_ops(of->kn); 481 rc = ops->mmap(of, vma); 482 if (rc) 483 goto out_put; 484 485 /* 486 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() 487 * to satisfy versions of X which crash if the mmap fails: that 488 * substitutes a new vm_file, and we don't then want bin_vm_ops. 489 */ 490 if (vma->vm_file != file) 491 goto out_put; 492 493 rc = -EINVAL; 494 if (of->mmapped && of->vm_ops != vma->vm_ops) 495 goto out_put; 496 497 /* 498 * It is not possible to successfully wrap close. 499 * So error if someone is trying to use close. 500 */ 501 rc = -EINVAL; 502 if (vma->vm_ops && vma->vm_ops->close) 503 goto out_put; 504 505 rc = 0; 506 of->mmapped = true; 507 of->vm_ops = vma->vm_ops; 508 vma->vm_ops = &kernfs_vm_ops; 509 out_put: 510 kernfs_put_active(of->kn); 511 out_unlock: 512 mutex_unlock(&of->mutex); 513 514 return rc; 515 } 516 517 /** 518 * kernfs_get_open_node - get or create kernfs_open_node 519 * @kn: target kernfs_node 520 * @of: kernfs_open_file for this instance of open 521 * 522 * If @kn->attr.open exists, increment its reference count; otherwise, 523 * create one. @of is chained to the files list. 524 * 525 * LOCKING: 526 * Kernel thread context (may sleep). 527 * 528 * RETURNS: 529 * 0 on success, -errno on failure. 530 */ 531 static int kernfs_get_open_node(struct kernfs_node *kn, 532 struct kernfs_open_file *of) 533 { 534 struct kernfs_open_node *on, *new_on = NULL; 535 536 retry: 537 mutex_lock(&kernfs_open_file_mutex); 538 spin_lock_irq(&kernfs_open_node_lock); 539 540 if (!kn->attr.open && new_on) { 541 kn->attr.open = new_on; 542 new_on = NULL; 543 } 544 545 on = kn->attr.open; 546 if (on) { 547 atomic_inc(&on->refcnt); 548 list_add_tail(&of->list, &on->files); 549 } 550 551 spin_unlock_irq(&kernfs_open_node_lock); 552 mutex_unlock(&kernfs_open_file_mutex); 553 554 if (on) { 555 kfree(new_on); 556 return 0; 557 } 558 559 /* not there, initialize a new one and retry */ 560 new_on = kmalloc(sizeof(*new_on), GFP_KERNEL); 561 if (!new_on) 562 return -ENOMEM; 563 564 atomic_set(&new_on->refcnt, 0); 565 atomic_set(&new_on->event, 1); 566 init_waitqueue_head(&new_on->poll); 567 INIT_LIST_HEAD(&new_on->files); 568 goto retry; 569 } 570 571 /** 572 * kernfs_put_open_node - put kernfs_open_node 573 * @kn: target kernfs_nodet 574 * @of: associated kernfs_open_file 575 * 576 * Put @kn->attr.open and unlink @of from the files list. If 577 * reference count reaches zero, disassociate and free it. 578 * 579 * LOCKING: 580 * None. 581 */ 582 static void kernfs_put_open_node(struct kernfs_node *kn, 583 struct kernfs_open_file *of) 584 { 585 struct kernfs_open_node *on = kn->attr.open; 586 unsigned long flags; 587 588 mutex_lock(&kernfs_open_file_mutex); 589 spin_lock_irqsave(&kernfs_open_node_lock, flags); 590 591 if (of) 592 list_del(&of->list); 593 594 if (atomic_dec_and_test(&on->refcnt)) 595 kn->attr.open = NULL; 596 else 597 on = NULL; 598 599 spin_unlock_irqrestore(&kernfs_open_node_lock, flags); 600 mutex_unlock(&kernfs_open_file_mutex); 601 602 kfree(on); 603 } 604 605 static int kernfs_fop_open(struct inode *inode, struct file *file) 606 { 607 struct kernfs_node *kn = inode->i_private; 608 struct kernfs_root *root = kernfs_root(kn); 609 const struct kernfs_ops *ops; 610 struct kernfs_open_file *of; 611 bool has_read, has_write, has_mmap; 612 int error = -EACCES; 613 614 if (!kernfs_get_active(kn)) 615 return -ENODEV; 616 617 ops = kernfs_ops(kn); 618 619 has_read = ops->seq_show || ops->read || ops->mmap; 620 has_write = ops->write || ops->mmap; 621 has_mmap = ops->mmap; 622 623 /* see the flag definition for details */ 624 if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { 625 if ((file->f_mode & FMODE_WRITE) && 626 (!(inode->i_mode & S_IWUGO) || !has_write)) 627 goto err_out; 628 629 if ((file->f_mode & FMODE_READ) && 630 (!(inode->i_mode & S_IRUGO) || !has_read)) 631 goto err_out; 632 } 633 634 /* allocate a kernfs_open_file for the file */ 635 error = -ENOMEM; 636 of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); 637 if (!of) 638 goto err_out; 639 640 /* 641 * The following is done to give a different lockdep key to 642 * @of->mutex for files which implement mmap. This is a rather 643 * crude way to avoid false positive lockdep warning around 644 * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and 645 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under 646 * which mm->mmap_lock nests, while holding @of->mutex. As each 647 * open file has a separate mutex, it's okay as long as those don't 648 * happen on the same file. At this point, we can't easily give 649 * each file a separate locking class. Let's differentiate on 650 * whether the file has mmap or not for now. 651 * 652 * Both paths of the branch look the same. They're supposed to 653 * look that way and give @of->mutex different static lockdep keys. 654 */ 655 if (has_mmap) 656 mutex_init(&of->mutex); 657 else 658 mutex_init(&of->mutex); 659 660 of->kn = kn; 661 of->file = file; 662 663 /* 664 * Write path needs to atomic_write_len outside active reference. 665 * Cache it in open_file. See kernfs_fop_write() for details. 666 */ 667 of->atomic_write_len = ops->atomic_write_len; 668 669 error = -EINVAL; 670 /* 671 * ->seq_show is incompatible with ->prealloc, 672 * as seq_read does its own allocation. 673 * ->read must be used instead. 674 */ 675 if (ops->prealloc && ops->seq_show) 676 goto err_free; 677 if (ops->prealloc) { 678 int len = of->atomic_write_len ?: PAGE_SIZE; 679 of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); 680 error = -ENOMEM; 681 if (!of->prealloc_buf) 682 goto err_free; 683 mutex_init(&of->prealloc_mutex); 684 } 685 686 /* 687 * Always instantiate seq_file even if read access doesn't use 688 * seq_file or is not requested. This unifies private data access 689 * and readable regular files are the vast majority anyway. 690 */ 691 if (ops->seq_show) 692 error = seq_open(file, &kernfs_seq_ops); 693 else 694 error = seq_open(file, NULL); 695 if (error) 696 goto err_free; 697 698 of->seq_file = file->private_data; 699 of->seq_file->private = of; 700 701 /* seq_file clears PWRITE unconditionally, restore it if WRITE */ 702 if (file->f_mode & FMODE_WRITE) 703 file->f_mode |= FMODE_PWRITE; 704 705 /* make sure we have open node struct */ 706 error = kernfs_get_open_node(kn, of); 707 if (error) 708 goto err_seq_release; 709 710 if (ops->open) { 711 /* nobody has access to @of yet, skip @of->mutex */ 712 error = ops->open(of); 713 if (error) 714 goto err_put_node; 715 } 716 717 /* open succeeded, put active references */ 718 kernfs_put_active(kn); 719 return 0; 720 721 err_put_node: 722 kernfs_put_open_node(kn, of); 723 err_seq_release: 724 seq_release(inode, file); 725 err_free: 726 kfree(of->prealloc_buf); 727 kfree(of); 728 err_out: 729 kernfs_put_active(kn); 730 return error; 731 } 732 733 /* used from release/drain to ensure that ->release() is called exactly once */ 734 static void kernfs_release_file(struct kernfs_node *kn, 735 struct kernfs_open_file *of) 736 { 737 /* 738 * @of is guaranteed to have no other file operations in flight and 739 * we just want to synchronize release and drain paths. 740 * @kernfs_open_file_mutex is enough. @of->mutex can't be used 741 * here because drain path may be called from places which can 742 * cause circular dependency. 743 */ 744 lockdep_assert_held(&kernfs_open_file_mutex); 745 746 if (!of->released) { 747 /* 748 * A file is never detached without being released and we 749 * need to be able to release files which are deactivated 750 * and being drained. Don't use kernfs_ops(). 751 */ 752 kn->attr.ops->release(of); 753 of->released = true; 754 } 755 } 756 757 static int kernfs_fop_release(struct inode *inode, struct file *filp) 758 { 759 struct kernfs_node *kn = inode->i_private; 760 struct kernfs_open_file *of = kernfs_of(filp); 761 762 if (kn->flags & KERNFS_HAS_RELEASE) { 763 mutex_lock(&kernfs_open_file_mutex); 764 kernfs_release_file(kn, of); 765 mutex_unlock(&kernfs_open_file_mutex); 766 } 767 768 kernfs_put_open_node(kn, of); 769 seq_release(inode, filp); 770 kfree(of->prealloc_buf); 771 kfree(of); 772 773 return 0; 774 } 775 776 void kernfs_drain_open_files(struct kernfs_node *kn) 777 { 778 struct kernfs_open_node *on; 779 struct kernfs_open_file *of; 780 781 if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) 782 return; 783 784 spin_lock_irq(&kernfs_open_node_lock); 785 on = kn->attr.open; 786 if (on) 787 atomic_inc(&on->refcnt); 788 spin_unlock_irq(&kernfs_open_node_lock); 789 if (!on) 790 return; 791 792 mutex_lock(&kernfs_open_file_mutex); 793 794 list_for_each_entry(of, &on->files, list) { 795 struct inode *inode = file_inode(of->file); 796 797 if (kn->flags & KERNFS_HAS_MMAP) 798 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 799 800 if (kn->flags & KERNFS_HAS_RELEASE) 801 kernfs_release_file(kn, of); 802 } 803 804 mutex_unlock(&kernfs_open_file_mutex); 805 806 kernfs_put_open_node(kn, NULL); 807 } 808 809 /* 810 * Kernfs attribute files are pollable. The idea is that you read 811 * the content and then you use 'poll' or 'select' to wait for 812 * the content to change. When the content changes (assuming the 813 * manager for the kobject supports notification), poll will 814 * return EPOLLERR|EPOLLPRI, and select will return the fd whether 815 * it is waiting for read, write, or exceptions. 816 * Once poll/select indicates that the value has changed, you 817 * need to close and re-open the file, or seek to 0 and read again. 818 * Reminder: this only works for attributes which actively support 819 * it, and it is not possible to test an attribute from userspace 820 * to see if it supports poll (Neither 'poll' nor 'select' return 821 * an appropriate error code). When in doubt, set a suitable timeout value. 822 */ 823 __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) 824 { 825 struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry); 826 struct kernfs_open_node *on = kn->attr.open; 827 828 poll_wait(of->file, &on->poll, wait); 829 830 if (of->event != atomic_read(&on->event)) 831 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 832 833 return DEFAULT_POLLMASK; 834 } 835 836 static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) 837 { 838 struct kernfs_open_file *of = kernfs_of(filp); 839 struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); 840 __poll_t ret; 841 842 if (!kernfs_get_active(kn)) 843 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 844 845 if (kn->attr.ops->poll) 846 ret = kn->attr.ops->poll(of, wait); 847 else 848 ret = kernfs_generic_poll(of, wait); 849 850 kernfs_put_active(kn); 851 return ret; 852 } 853 854 static void kernfs_notify_workfn(struct work_struct *work) 855 { 856 struct kernfs_node *kn; 857 struct kernfs_super_info *info; 858 repeat: 859 /* pop one off the notify_list */ 860 spin_lock_irq(&kernfs_notify_lock); 861 kn = kernfs_notify_list; 862 if (kn == KERNFS_NOTIFY_EOL) { 863 spin_unlock_irq(&kernfs_notify_lock); 864 return; 865 } 866 kernfs_notify_list = kn->attr.notify_next; 867 kn->attr.notify_next = NULL; 868 spin_unlock_irq(&kernfs_notify_lock); 869 870 /* kick fsnotify */ 871 mutex_lock(&kernfs_mutex); 872 873 list_for_each_entry(info, &kernfs_root(kn)->supers, node) { 874 struct kernfs_node *parent; 875 struct inode *p_inode = NULL; 876 struct inode *inode; 877 struct qstr name; 878 879 /* 880 * We want fsnotify_modify() on @kn but as the 881 * modifications aren't originating from userland don't 882 * have the matching @file available. Look up the inodes 883 * and generate the events manually. 884 */ 885 inode = ilookup(info->sb, kernfs_ino(kn)); 886 if (!inode) 887 continue; 888 889 name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name)); 890 parent = kernfs_get_parent(kn); 891 if (parent) { 892 p_inode = ilookup(info->sb, kernfs_ino(parent)); 893 if (p_inode) { 894 fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD, 895 inode, FSNOTIFY_EVENT_INODE, 896 p_inode, &name, inode, 0); 897 iput(p_inode); 898 } 899 900 kernfs_put(parent); 901 } 902 903 if (!p_inode) 904 fsnotify_inode(inode, FS_MODIFY); 905 906 iput(inode); 907 } 908 909 mutex_unlock(&kernfs_mutex); 910 kernfs_put(kn); 911 goto repeat; 912 } 913 914 /** 915 * kernfs_notify - notify a kernfs file 916 * @kn: file to notify 917 * 918 * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any 919 * context. 920 */ 921 void kernfs_notify(struct kernfs_node *kn) 922 { 923 static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); 924 unsigned long flags; 925 struct kernfs_open_node *on; 926 927 if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) 928 return; 929 930 /* kick poll immediately */ 931 spin_lock_irqsave(&kernfs_open_node_lock, flags); 932 on = kn->attr.open; 933 if (on) { 934 atomic_inc(&on->event); 935 wake_up_interruptible(&on->poll); 936 } 937 spin_unlock_irqrestore(&kernfs_open_node_lock, flags); 938 939 /* schedule work to kick fsnotify */ 940 spin_lock_irqsave(&kernfs_notify_lock, flags); 941 if (!kn->attr.notify_next) { 942 kernfs_get(kn); 943 kn->attr.notify_next = kernfs_notify_list; 944 kernfs_notify_list = kn; 945 schedule_work(&kernfs_notify_work); 946 } 947 spin_unlock_irqrestore(&kernfs_notify_lock, flags); 948 } 949 EXPORT_SYMBOL_GPL(kernfs_notify); 950 951 const struct file_operations kernfs_file_fops = { 952 .read_iter = kernfs_fop_read_iter, 953 .write = kernfs_fop_write, 954 .llseek = generic_file_llseek, 955 .mmap = kernfs_fop_mmap, 956 .open = kernfs_fop_open, 957 .release = kernfs_fop_release, 958 .poll = kernfs_fop_poll, 959 .fsync = noop_fsync, 960 }; 961 962 /** 963 * __kernfs_create_file - kernfs internal function to create a file 964 * @parent: directory to create the file in 965 * @name: name of the file 966 * @mode: mode of the file 967 * @uid: uid of the file 968 * @gid: gid of the file 969 * @size: size of the file 970 * @ops: kernfs operations for the file 971 * @priv: private data for the file 972 * @ns: optional namespace tag of the file 973 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep 974 * 975 * Returns the created node on success, ERR_PTR() value on error. 976 */ 977 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, 978 const char *name, 979 umode_t mode, kuid_t uid, kgid_t gid, 980 loff_t size, 981 const struct kernfs_ops *ops, 982 void *priv, const void *ns, 983 struct lock_class_key *key) 984 { 985 struct kernfs_node *kn; 986 unsigned flags; 987 int rc; 988 989 flags = KERNFS_FILE; 990 991 kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, 992 uid, gid, flags); 993 if (!kn) 994 return ERR_PTR(-ENOMEM); 995 996 kn->attr.ops = ops; 997 kn->attr.size = size; 998 kn->ns = ns; 999 kn->priv = priv; 1000 1001 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1002 if (key) { 1003 lockdep_init_map(&kn->dep_map, "kn->active", key, 0); 1004 kn->flags |= KERNFS_LOCKDEP; 1005 } 1006 #endif 1007 1008 /* 1009 * kn->attr.ops is accesible only while holding active ref. We 1010 * need to know whether some ops are implemented outside active 1011 * ref. Cache their existence in flags. 1012 */ 1013 if (ops->seq_show) 1014 kn->flags |= KERNFS_HAS_SEQ_SHOW; 1015 if (ops->mmap) 1016 kn->flags |= KERNFS_HAS_MMAP; 1017 if (ops->release) 1018 kn->flags |= KERNFS_HAS_RELEASE; 1019 1020 rc = kernfs_add_one(kn); 1021 if (rc) { 1022 kernfs_put(kn); 1023 return ERR_PTR(rc); 1024 } 1025 return kn; 1026 } 1027