1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * fs/kernfs/file.c - kernfs file implementation 4 * 5 * Copyright (c) 2001-3 Patrick Mochel 6 * Copyright (c) 2007 SUSE Linux Products GmbH 7 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/seq_file.h> 12 #include <linux/slab.h> 13 #include <linux/poll.h> 14 #include <linux/pagemap.h> 15 #include <linux/sched/mm.h> 16 #include <linux/fsnotify.h> 17 18 #include "kernfs-internal.h" 19 20 /* 21 * There's one kernfs_open_file for each open file and one kernfs_open_node 22 * for each kernfs_node with one or more open files. 23 * 24 * kernfs_node->attr.open points to kernfs_open_node. attr.open is 25 * protected by kernfs_open_node_lock. 26 * 27 * filp->private_data points to seq_file whose ->private points to 28 * kernfs_open_file. kernfs_open_files are chained at 29 * kernfs_open_node->files, which is protected by kernfs_open_file_mutex. 30 */ 31 static DEFINE_SPINLOCK(kernfs_open_node_lock); 32 static DEFINE_MUTEX(kernfs_open_file_mutex); 33 34 struct kernfs_open_node { 35 atomic_t refcnt; 36 atomic_t event; 37 wait_queue_head_t poll; 38 struct list_head files; /* goes through kernfs_open_file.list */ 39 }; 40 41 /* 42 * kernfs_notify() may be called from any context and bounces notifications 43 * through a work item. To minimize space overhead in kernfs_node, the 44 * pending queue is implemented as a singly linked list of kernfs_nodes. 45 * The list is terminated with the self pointer so that whether a 46 * kernfs_node is on the list or not can be determined by testing the next 47 * pointer for NULL. 48 */ 49 #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) 50 51 static DEFINE_SPINLOCK(kernfs_notify_lock); 52 static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; 53 54 static struct kernfs_open_file *kernfs_of(struct file *file) 55 { 56 return ((struct seq_file *)file->private_data)->private; 57 } 58 59 /* 60 * Determine the kernfs_ops for the given kernfs_node. This function must 61 * be called while holding an active reference. 62 */ 63 static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) 64 { 65 if (kn->flags & KERNFS_LOCKDEP) 66 lockdep_assert_held(kn); 67 return kn->attr.ops; 68 } 69 70 /* 71 * As kernfs_seq_stop() is also called after kernfs_seq_start() or 72 * kernfs_seq_next() failure, it needs to distinguish whether it's stopping 73 * a seq_file iteration which is fully initialized with an active reference 74 * or an aborted kernfs_seq_start() due to get_active failure. The 75 * position pointer is the only context for each seq_file iteration and 76 * thus the stop condition should be encoded in it. As the return value is 77 * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable 78 * choice to indicate get_active failure. 79 * 80 * Unfortunately, this is complicated due to the optional custom seq_file 81 * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() 82 * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or 83 * custom seq_file operations and thus can't decide whether put_active 84 * should be performed or not only on ERR_PTR(-ENODEV). 85 * 86 * This is worked around by factoring out the custom seq_stop() and 87 * put_active part into kernfs_seq_stop_active(), skipping it from 88 * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after 89 * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures 90 * that kernfs_seq_stop_active() is skipped only after get_active failure. 91 */ 92 static void kernfs_seq_stop_active(struct seq_file *sf, void *v) 93 { 94 struct kernfs_open_file *of = sf->private; 95 const struct kernfs_ops *ops = kernfs_ops(of->kn); 96 97 if (ops->seq_stop) 98 ops->seq_stop(sf, v); 99 kernfs_put_active(of->kn); 100 } 101 102 static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) 103 { 104 struct kernfs_open_file *of = sf->private; 105 const struct kernfs_ops *ops; 106 107 /* 108 * @of->mutex nests outside active ref and is primarily to ensure that 109 * the ops aren't called concurrently for the same open file. 110 */ 111 mutex_lock(&of->mutex); 112 if (!kernfs_get_active(of->kn)) 113 return ERR_PTR(-ENODEV); 114 115 ops = kernfs_ops(of->kn); 116 if (ops->seq_start) { 117 void *next = ops->seq_start(sf, ppos); 118 /* see the comment above kernfs_seq_stop_active() */ 119 if (next == ERR_PTR(-ENODEV)) 120 kernfs_seq_stop_active(sf, next); 121 return next; 122 } else { 123 /* 124 * The same behavior and code as single_open(). Returns 125 * !NULL if pos is at the beginning; otherwise, NULL. 126 */ 127 return NULL + !*ppos; 128 } 129 } 130 131 static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) 132 { 133 struct kernfs_open_file *of = sf->private; 134 const struct kernfs_ops *ops = kernfs_ops(of->kn); 135 136 if (ops->seq_next) { 137 void *next = ops->seq_next(sf, v, ppos); 138 /* see the comment above kernfs_seq_stop_active() */ 139 if (next == ERR_PTR(-ENODEV)) 140 kernfs_seq_stop_active(sf, next); 141 return next; 142 } else { 143 /* 144 * The same behavior and code as single_open(), always 145 * terminate after the initial read. 146 */ 147 ++*ppos; 148 return NULL; 149 } 150 } 151 152 static void kernfs_seq_stop(struct seq_file *sf, void *v) 153 { 154 struct kernfs_open_file *of = sf->private; 155 156 if (v != ERR_PTR(-ENODEV)) 157 kernfs_seq_stop_active(sf, v); 158 mutex_unlock(&of->mutex); 159 } 160 161 static int kernfs_seq_show(struct seq_file *sf, void *v) 162 { 163 struct kernfs_open_file *of = sf->private; 164 165 of->event = atomic_read(&of->kn->attr.open->event); 166 167 return of->kn->attr.ops->seq_show(sf, v); 168 } 169 170 static const struct seq_operations kernfs_seq_ops = { 171 .start = kernfs_seq_start, 172 .next = kernfs_seq_next, 173 .stop = kernfs_seq_stop, 174 .show = kernfs_seq_show, 175 }; 176 177 /* 178 * As reading a bin file can have side-effects, the exact offset and bytes 179 * specified in read(2) call should be passed to the read callback making 180 * it difficult to use seq_file. Implement simplistic custom buffering for 181 * bin files. 182 */ 183 static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, 184 char __user *user_buf, size_t count, 185 loff_t *ppos) 186 { 187 ssize_t len = min_t(size_t, count, PAGE_SIZE); 188 const struct kernfs_ops *ops; 189 char *buf; 190 191 buf = of->prealloc_buf; 192 if (buf) 193 mutex_lock(&of->prealloc_mutex); 194 else 195 buf = kmalloc(len, GFP_KERNEL); 196 if (!buf) 197 return -ENOMEM; 198 199 /* 200 * @of->mutex nests outside active ref and is used both to ensure that 201 * the ops aren't called concurrently for the same open file. 202 */ 203 mutex_lock(&of->mutex); 204 if (!kernfs_get_active(of->kn)) { 205 len = -ENODEV; 206 mutex_unlock(&of->mutex); 207 goto out_free; 208 } 209 210 of->event = atomic_read(&of->kn->attr.open->event); 211 ops = kernfs_ops(of->kn); 212 if (ops->read) 213 len = ops->read(of, buf, len, *ppos); 214 else 215 len = -EINVAL; 216 217 kernfs_put_active(of->kn); 218 mutex_unlock(&of->mutex); 219 220 if (len < 0) 221 goto out_free; 222 223 if (copy_to_user(user_buf, buf, len)) { 224 len = -EFAULT; 225 goto out_free; 226 } 227 228 *ppos += len; 229 230 out_free: 231 if (buf == of->prealloc_buf) 232 mutex_unlock(&of->prealloc_mutex); 233 else 234 kfree(buf); 235 return len; 236 } 237 238 /** 239 * kernfs_fop_read - kernfs vfs read callback 240 * @file: file pointer 241 * @user_buf: data to write 242 * @count: number of bytes 243 * @ppos: starting offset 244 */ 245 static ssize_t kernfs_fop_read(struct file *file, char __user *user_buf, 246 size_t count, loff_t *ppos) 247 { 248 struct kernfs_open_file *of = kernfs_of(file); 249 250 if (of->kn->flags & KERNFS_HAS_SEQ_SHOW) 251 return seq_read(file, user_buf, count, ppos); 252 else 253 return kernfs_file_direct_read(of, user_buf, count, ppos); 254 } 255 256 /** 257 * kernfs_fop_write - kernfs vfs write callback 258 * @file: file pointer 259 * @user_buf: data to write 260 * @count: number of bytes 261 * @ppos: starting offset 262 * 263 * Copy data in from userland and pass it to the matching kernfs write 264 * operation. 265 * 266 * There is no easy way for us to know if userspace is only doing a partial 267 * write, so we don't support them. We expect the entire buffer to come on 268 * the first write. Hint: if you're writing a value, first read the file, 269 * modify only the the value you're changing, then write entire buffer 270 * back. 271 */ 272 static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, 273 size_t count, loff_t *ppos) 274 { 275 struct kernfs_open_file *of = kernfs_of(file); 276 const struct kernfs_ops *ops; 277 ssize_t len; 278 char *buf; 279 280 if (of->atomic_write_len) { 281 len = count; 282 if (len > of->atomic_write_len) 283 return -E2BIG; 284 } else { 285 len = min_t(size_t, count, PAGE_SIZE); 286 } 287 288 buf = of->prealloc_buf; 289 if (buf) 290 mutex_lock(&of->prealloc_mutex); 291 else 292 buf = kmalloc(len + 1, GFP_KERNEL); 293 if (!buf) 294 return -ENOMEM; 295 296 if (copy_from_user(buf, user_buf, len)) { 297 len = -EFAULT; 298 goto out_free; 299 } 300 buf[len] = '\0'; /* guarantee string termination */ 301 302 /* 303 * @of->mutex nests outside active ref and is used both to ensure that 304 * the ops aren't called concurrently for the same open file. 305 */ 306 mutex_lock(&of->mutex); 307 if (!kernfs_get_active(of->kn)) { 308 mutex_unlock(&of->mutex); 309 len = -ENODEV; 310 goto out_free; 311 } 312 313 ops = kernfs_ops(of->kn); 314 if (ops->write) 315 len = ops->write(of, buf, len, *ppos); 316 else 317 len = -EINVAL; 318 319 kernfs_put_active(of->kn); 320 mutex_unlock(&of->mutex); 321 322 if (len > 0) 323 *ppos += len; 324 325 out_free: 326 if (buf == of->prealloc_buf) 327 mutex_unlock(&of->prealloc_mutex); 328 else 329 kfree(buf); 330 return len; 331 } 332 333 static void kernfs_vma_open(struct vm_area_struct *vma) 334 { 335 struct file *file = vma->vm_file; 336 struct kernfs_open_file *of = kernfs_of(file); 337 338 if (!of->vm_ops) 339 return; 340 341 if (!kernfs_get_active(of->kn)) 342 return; 343 344 if (of->vm_ops->open) 345 of->vm_ops->open(vma); 346 347 kernfs_put_active(of->kn); 348 } 349 350 static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) 351 { 352 struct file *file = vmf->vma->vm_file; 353 struct kernfs_open_file *of = kernfs_of(file); 354 vm_fault_t ret; 355 356 if (!of->vm_ops) 357 return VM_FAULT_SIGBUS; 358 359 if (!kernfs_get_active(of->kn)) 360 return VM_FAULT_SIGBUS; 361 362 ret = VM_FAULT_SIGBUS; 363 if (of->vm_ops->fault) 364 ret = of->vm_ops->fault(vmf); 365 366 kernfs_put_active(of->kn); 367 return ret; 368 } 369 370 static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) 371 { 372 struct file *file = vmf->vma->vm_file; 373 struct kernfs_open_file *of = kernfs_of(file); 374 vm_fault_t ret; 375 376 if (!of->vm_ops) 377 return VM_FAULT_SIGBUS; 378 379 if (!kernfs_get_active(of->kn)) 380 return VM_FAULT_SIGBUS; 381 382 ret = 0; 383 if (of->vm_ops->page_mkwrite) 384 ret = of->vm_ops->page_mkwrite(vmf); 385 else 386 file_update_time(file); 387 388 kernfs_put_active(of->kn); 389 return ret; 390 } 391 392 static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, 393 void *buf, int len, int write) 394 { 395 struct file *file = vma->vm_file; 396 struct kernfs_open_file *of = kernfs_of(file); 397 int ret; 398 399 if (!of->vm_ops) 400 return -EINVAL; 401 402 if (!kernfs_get_active(of->kn)) 403 return -EINVAL; 404 405 ret = -EINVAL; 406 if (of->vm_ops->access) 407 ret = of->vm_ops->access(vma, addr, buf, len, write); 408 409 kernfs_put_active(of->kn); 410 return ret; 411 } 412 413 #ifdef CONFIG_NUMA 414 static int kernfs_vma_set_policy(struct vm_area_struct *vma, 415 struct mempolicy *new) 416 { 417 struct file *file = vma->vm_file; 418 struct kernfs_open_file *of = kernfs_of(file); 419 int ret; 420 421 if (!of->vm_ops) 422 return 0; 423 424 if (!kernfs_get_active(of->kn)) 425 return -EINVAL; 426 427 ret = 0; 428 if (of->vm_ops->set_policy) 429 ret = of->vm_ops->set_policy(vma, new); 430 431 kernfs_put_active(of->kn); 432 return ret; 433 } 434 435 static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma, 436 unsigned long addr) 437 { 438 struct file *file = vma->vm_file; 439 struct kernfs_open_file *of = kernfs_of(file); 440 struct mempolicy *pol; 441 442 if (!of->vm_ops) 443 return vma->vm_policy; 444 445 if (!kernfs_get_active(of->kn)) 446 return vma->vm_policy; 447 448 pol = vma->vm_policy; 449 if (of->vm_ops->get_policy) 450 pol = of->vm_ops->get_policy(vma, addr); 451 452 kernfs_put_active(of->kn); 453 return pol; 454 } 455 456 #endif 457 458 static const struct vm_operations_struct kernfs_vm_ops = { 459 .open = kernfs_vma_open, 460 .fault = kernfs_vma_fault, 461 .page_mkwrite = kernfs_vma_page_mkwrite, 462 .access = kernfs_vma_access, 463 #ifdef CONFIG_NUMA 464 .set_policy = kernfs_vma_set_policy, 465 .get_policy = kernfs_vma_get_policy, 466 #endif 467 }; 468 469 static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) 470 { 471 struct kernfs_open_file *of = kernfs_of(file); 472 const struct kernfs_ops *ops; 473 int rc; 474 475 /* 476 * mmap path and of->mutex are prone to triggering spurious lockdep 477 * warnings and we don't want to add spurious locking dependency 478 * between the two. Check whether mmap is actually implemented 479 * without grabbing @of->mutex by testing HAS_MMAP flag. See the 480 * comment in kernfs_file_open() for more details. 481 */ 482 if (!(of->kn->flags & KERNFS_HAS_MMAP)) 483 return -ENODEV; 484 485 mutex_lock(&of->mutex); 486 487 rc = -ENODEV; 488 if (!kernfs_get_active(of->kn)) 489 goto out_unlock; 490 491 ops = kernfs_ops(of->kn); 492 rc = ops->mmap(of, vma); 493 if (rc) 494 goto out_put; 495 496 /* 497 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() 498 * to satisfy versions of X which crash if the mmap fails: that 499 * substitutes a new vm_file, and we don't then want bin_vm_ops. 500 */ 501 if (vma->vm_file != file) 502 goto out_put; 503 504 rc = -EINVAL; 505 if (of->mmapped && of->vm_ops != vma->vm_ops) 506 goto out_put; 507 508 /* 509 * It is not possible to successfully wrap close. 510 * So error if someone is trying to use close. 511 */ 512 rc = -EINVAL; 513 if (vma->vm_ops && vma->vm_ops->close) 514 goto out_put; 515 516 rc = 0; 517 of->mmapped = true; 518 of->vm_ops = vma->vm_ops; 519 vma->vm_ops = &kernfs_vm_ops; 520 out_put: 521 kernfs_put_active(of->kn); 522 out_unlock: 523 mutex_unlock(&of->mutex); 524 525 return rc; 526 } 527 528 /** 529 * kernfs_get_open_node - get or create kernfs_open_node 530 * @kn: target kernfs_node 531 * @of: kernfs_open_file for this instance of open 532 * 533 * If @kn->attr.open exists, increment its reference count; otherwise, 534 * create one. @of is chained to the files list. 535 * 536 * LOCKING: 537 * Kernel thread context (may sleep). 538 * 539 * RETURNS: 540 * 0 on success, -errno on failure. 541 */ 542 static int kernfs_get_open_node(struct kernfs_node *kn, 543 struct kernfs_open_file *of) 544 { 545 struct kernfs_open_node *on, *new_on = NULL; 546 547 retry: 548 mutex_lock(&kernfs_open_file_mutex); 549 spin_lock_irq(&kernfs_open_node_lock); 550 551 if (!kn->attr.open && new_on) { 552 kn->attr.open = new_on; 553 new_on = NULL; 554 } 555 556 on = kn->attr.open; 557 if (on) { 558 atomic_inc(&on->refcnt); 559 list_add_tail(&of->list, &on->files); 560 } 561 562 spin_unlock_irq(&kernfs_open_node_lock); 563 mutex_unlock(&kernfs_open_file_mutex); 564 565 if (on) { 566 kfree(new_on); 567 return 0; 568 } 569 570 /* not there, initialize a new one and retry */ 571 new_on = kmalloc(sizeof(*new_on), GFP_KERNEL); 572 if (!new_on) 573 return -ENOMEM; 574 575 atomic_set(&new_on->refcnt, 0); 576 atomic_set(&new_on->event, 1); 577 init_waitqueue_head(&new_on->poll); 578 INIT_LIST_HEAD(&new_on->files); 579 goto retry; 580 } 581 582 /** 583 * kernfs_put_open_node - put kernfs_open_node 584 * @kn: target kernfs_nodet 585 * @of: associated kernfs_open_file 586 * 587 * Put @kn->attr.open and unlink @of from the files list. If 588 * reference count reaches zero, disassociate and free it. 589 * 590 * LOCKING: 591 * None. 592 */ 593 static void kernfs_put_open_node(struct kernfs_node *kn, 594 struct kernfs_open_file *of) 595 { 596 struct kernfs_open_node *on = kn->attr.open; 597 unsigned long flags; 598 599 mutex_lock(&kernfs_open_file_mutex); 600 spin_lock_irqsave(&kernfs_open_node_lock, flags); 601 602 if (of) 603 list_del(&of->list); 604 605 if (atomic_dec_and_test(&on->refcnt)) 606 kn->attr.open = NULL; 607 else 608 on = NULL; 609 610 spin_unlock_irqrestore(&kernfs_open_node_lock, flags); 611 mutex_unlock(&kernfs_open_file_mutex); 612 613 kfree(on); 614 } 615 616 static int kernfs_fop_open(struct inode *inode, struct file *file) 617 { 618 struct kernfs_node *kn = inode->i_private; 619 struct kernfs_root *root = kernfs_root(kn); 620 const struct kernfs_ops *ops; 621 struct kernfs_open_file *of; 622 bool has_read, has_write, has_mmap; 623 int error = -EACCES; 624 625 if (!kernfs_get_active(kn)) 626 return -ENODEV; 627 628 ops = kernfs_ops(kn); 629 630 has_read = ops->seq_show || ops->read || ops->mmap; 631 has_write = ops->write || ops->mmap; 632 has_mmap = ops->mmap; 633 634 /* see the flag definition for details */ 635 if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { 636 if ((file->f_mode & FMODE_WRITE) && 637 (!(inode->i_mode & S_IWUGO) || !has_write)) 638 goto err_out; 639 640 if ((file->f_mode & FMODE_READ) && 641 (!(inode->i_mode & S_IRUGO) || !has_read)) 642 goto err_out; 643 } 644 645 /* allocate a kernfs_open_file for the file */ 646 error = -ENOMEM; 647 of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); 648 if (!of) 649 goto err_out; 650 651 /* 652 * The following is done to give a different lockdep key to 653 * @of->mutex for files which implement mmap. This is a rather 654 * crude way to avoid false positive lockdep warning around 655 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and 656 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under 657 * which mm->mmap_sem nests, while holding @of->mutex. As each 658 * open file has a separate mutex, it's okay as long as those don't 659 * happen on the same file. At this point, we can't easily give 660 * each file a separate locking class. Let's differentiate on 661 * whether the file has mmap or not for now. 662 * 663 * Both paths of the branch look the same. They're supposed to 664 * look that way and give @of->mutex different static lockdep keys. 665 */ 666 if (has_mmap) 667 mutex_init(&of->mutex); 668 else 669 mutex_init(&of->mutex); 670 671 of->kn = kn; 672 of->file = file; 673 674 /* 675 * Write path needs to atomic_write_len outside active reference. 676 * Cache it in open_file. See kernfs_fop_write() for details. 677 */ 678 of->atomic_write_len = ops->atomic_write_len; 679 680 error = -EINVAL; 681 /* 682 * ->seq_show is incompatible with ->prealloc, 683 * as seq_read does its own allocation. 684 * ->read must be used instead. 685 */ 686 if (ops->prealloc && ops->seq_show) 687 goto err_free; 688 if (ops->prealloc) { 689 int len = of->atomic_write_len ?: PAGE_SIZE; 690 of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); 691 error = -ENOMEM; 692 if (!of->prealloc_buf) 693 goto err_free; 694 mutex_init(&of->prealloc_mutex); 695 } 696 697 /* 698 * Always instantiate seq_file even if read access doesn't use 699 * seq_file or is not requested. This unifies private data access 700 * and readable regular files are the vast majority anyway. 701 */ 702 if (ops->seq_show) 703 error = seq_open(file, &kernfs_seq_ops); 704 else 705 error = seq_open(file, NULL); 706 if (error) 707 goto err_free; 708 709 of->seq_file = file->private_data; 710 of->seq_file->private = of; 711 712 /* seq_file clears PWRITE unconditionally, restore it if WRITE */ 713 if (file->f_mode & FMODE_WRITE) 714 file->f_mode |= FMODE_PWRITE; 715 716 /* make sure we have open node struct */ 717 error = kernfs_get_open_node(kn, of); 718 if (error) 719 goto err_seq_release; 720 721 if (ops->open) { 722 /* nobody has access to @of yet, skip @of->mutex */ 723 error = ops->open(of); 724 if (error) 725 goto err_put_node; 726 } 727 728 /* open succeeded, put active references */ 729 kernfs_put_active(kn); 730 return 0; 731 732 err_put_node: 733 kernfs_put_open_node(kn, of); 734 err_seq_release: 735 seq_release(inode, file); 736 err_free: 737 kfree(of->prealloc_buf); 738 kfree(of); 739 err_out: 740 kernfs_put_active(kn); 741 return error; 742 } 743 744 /* used from release/drain to ensure that ->release() is called exactly once */ 745 static void kernfs_release_file(struct kernfs_node *kn, 746 struct kernfs_open_file *of) 747 { 748 /* 749 * @of is guaranteed to have no other file operations in flight and 750 * we just want to synchronize release and drain paths. 751 * @kernfs_open_file_mutex is enough. @of->mutex can't be used 752 * here because drain path may be called from places which can 753 * cause circular dependency. 754 */ 755 lockdep_assert_held(&kernfs_open_file_mutex); 756 757 if (!of->released) { 758 /* 759 * A file is never detached without being released and we 760 * need to be able to release files which are deactivated 761 * and being drained. Don't use kernfs_ops(). 762 */ 763 kn->attr.ops->release(of); 764 of->released = true; 765 } 766 } 767 768 static int kernfs_fop_release(struct inode *inode, struct file *filp) 769 { 770 struct kernfs_node *kn = inode->i_private; 771 struct kernfs_open_file *of = kernfs_of(filp); 772 773 if (kn->flags & KERNFS_HAS_RELEASE) { 774 mutex_lock(&kernfs_open_file_mutex); 775 kernfs_release_file(kn, of); 776 mutex_unlock(&kernfs_open_file_mutex); 777 } 778 779 kernfs_put_open_node(kn, of); 780 seq_release(inode, filp); 781 kfree(of->prealloc_buf); 782 kfree(of); 783 784 return 0; 785 } 786 787 void kernfs_drain_open_files(struct kernfs_node *kn) 788 { 789 struct kernfs_open_node *on; 790 struct kernfs_open_file *of; 791 792 if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) 793 return; 794 795 spin_lock_irq(&kernfs_open_node_lock); 796 on = kn->attr.open; 797 if (on) 798 atomic_inc(&on->refcnt); 799 spin_unlock_irq(&kernfs_open_node_lock); 800 if (!on) 801 return; 802 803 mutex_lock(&kernfs_open_file_mutex); 804 805 list_for_each_entry(of, &on->files, list) { 806 struct inode *inode = file_inode(of->file); 807 808 if (kn->flags & KERNFS_HAS_MMAP) 809 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 810 811 if (kn->flags & KERNFS_HAS_RELEASE) 812 kernfs_release_file(kn, of); 813 } 814 815 mutex_unlock(&kernfs_open_file_mutex); 816 817 kernfs_put_open_node(kn, NULL); 818 } 819 820 /* 821 * Kernfs attribute files are pollable. The idea is that you read 822 * the content and then you use 'poll' or 'select' to wait for 823 * the content to change. When the content changes (assuming the 824 * manager for the kobject supports notification), poll will 825 * return EPOLLERR|EPOLLPRI, and select will return the fd whether 826 * it is waiting for read, write, or exceptions. 827 * Once poll/select indicates that the value has changed, you 828 * need to close and re-open the file, or seek to 0 and read again. 829 * Reminder: this only works for attributes which actively support 830 * it, and it is not possible to test an attribute from userspace 831 * to see if it supports poll (Neither 'poll' nor 'select' return 832 * an appropriate error code). When in doubt, set a suitable timeout value. 833 */ 834 __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) 835 { 836 struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry); 837 struct kernfs_open_node *on = kn->attr.open; 838 839 poll_wait(of->file, &on->poll, wait); 840 841 if (of->event != atomic_read(&on->event)) 842 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 843 844 return DEFAULT_POLLMASK; 845 } 846 847 static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) 848 { 849 struct kernfs_open_file *of = kernfs_of(filp); 850 struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); 851 __poll_t ret; 852 853 if (!kernfs_get_active(kn)) 854 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 855 856 if (kn->attr.ops->poll) 857 ret = kn->attr.ops->poll(of, wait); 858 else 859 ret = kernfs_generic_poll(of, wait); 860 861 kernfs_put_active(kn); 862 return ret; 863 } 864 865 static void kernfs_notify_workfn(struct work_struct *work) 866 { 867 struct kernfs_node *kn; 868 struct kernfs_super_info *info; 869 repeat: 870 /* pop one off the notify_list */ 871 spin_lock_irq(&kernfs_notify_lock); 872 kn = kernfs_notify_list; 873 if (kn == KERNFS_NOTIFY_EOL) { 874 spin_unlock_irq(&kernfs_notify_lock); 875 return; 876 } 877 kernfs_notify_list = kn->attr.notify_next; 878 kn->attr.notify_next = NULL; 879 spin_unlock_irq(&kernfs_notify_lock); 880 881 /* kick fsnotify */ 882 mutex_lock(&kernfs_mutex); 883 884 list_for_each_entry(info, &kernfs_root(kn)->supers, node) { 885 struct kernfs_node *parent; 886 struct inode *inode; 887 struct qstr name; 888 889 /* 890 * We want fsnotify_modify() on @kn but as the 891 * modifications aren't originating from userland don't 892 * have the matching @file available. Look up the inodes 893 * and generate the events manually. 894 */ 895 inode = ilookup(info->sb, kn->id.ino); 896 if (!inode) 897 continue; 898 899 name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name)); 900 parent = kernfs_get_parent(kn); 901 if (parent) { 902 struct inode *p_inode; 903 904 p_inode = ilookup(info->sb, parent->id.ino); 905 if (p_inode) { 906 fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD, 907 inode, FSNOTIFY_EVENT_INODE, &name, 0); 908 iput(p_inode); 909 } 910 911 kernfs_put(parent); 912 } 913 914 fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE, 915 &name, 0); 916 iput(inode); 917 } 918 919 mutex_unlock(&kernfs_mutex); 920 kernfs_put(kn); 921 goto repeat; 922 } 923 924 /** 925 * kernfs_notify - notify a kernfs file 926 * @kn: file to notify 927 * 928 * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any 929 * context. 930 */ 931 void kernfs_notify(struct kernfs_node *kn) 932 { 933 static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); 934 unsigned long flags; 935 struct kernfs_open_node *on; 936 937 if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) 938 return; 939 940 /* kick poll immediately */ 941 spin_lock_irqsave(&kernfs_open_node_lock, flags); 942 on = kn->attr.open; 943 if (on) { 944 atomic_inc(&on->event); 945 wake_up_interruptible(&on->poll); 946 } 947 spin_unlock_irqrestore(&kernfs_open_node_lock, flags); 948 949 /* schedule work to kick fsnotify */ 950 spin_lock_irqsave(&kernfs_notify_lock, flags); 951 if (!kn->attr.notify_next) { 952 kernfs_get(kn); 953 kn->attr.notify_next = kernfs_notify_list; 954 kernfs_notify_list = kn; 955 schedule_work(&kernfs_notify_work); 956 } 957 spin_unlock_irqrestore(&kernfs_notify_lock, flags); 958 } 959 EXPORT_SYMBOL_GPL(kernfs_notify); 960 961 const struct file_operations kernfs_file_fops = { 962 .read = kernfs_fop_read, 963 .write = kernfs_fop_write, 964 .llseek = generic_file_llseek, 965 .mmap = kernfs_fop_mmap, 966 .open = kernfs_fop_open, 967 .release = kernfs_fop_release, 968 .poll = kernfs_fop_poll, 969 .fsync = noop_fsync, 970 }; 971 972 /** 973 * __kernfs_create_file - kernfs internal function to create a file 974 * @parent: directory to create the file in 975 * @name: name of the file 976 * @mode: mode of the file 977 * @uid: uid of the file 978 * @gid: gid of the file 979 * @size: size of the file 980 * @ops: kernfs operations for the file 981 * @priv: private data for the file 982 * @ns: optional namespace tag of the file 983 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep 984 * 985 * Returns the created node on success, ERR_PTR() value on error. 986 */ 987 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, 988 const char *name, 989 umode_t mode, kuid_t uid, kgid_t gid, 990 loff_t size, 991 const struct kernfs_ops *ops, 992 void *priv, const void *ns, 993 struct lock_class_key *key) 994 { 995 struct kernfs_node *kn; 996 unsigned flags; 997 int rc; 998 999 flags = KERNFS_FILE; 1000 1001 kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, 1002 uid, gid, flags); 1003 if (!kn) 1004 return ERR_PTR(-ENOMEM); 1005 1006 kn->attr.ops = ops; 1007 kn->attr.size = size; 1008 kn->ns = ns; 1009 kn->priv = priv; 1010 1011 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1012 if (key) { 1013 lockdep_init_map(&kn->dep_map, "kn->count", key, 0); 1014 kn->flags |= KERNFS_LOCKDEP; 1015 } 1016 #endif 1017 1018 /* 1019 * kn->attr.ops is accesible only while holding active ref. We 1020 * need to know whether some ops are implemented outside active 1021 * ref. Cache their existence in flags. 1022 */ 1023 if (ops->seq_show) 1024 kn->flags |= KERNFS_HAS_SEQ_SHOW; 1025 if (ops->mmap) 1026 kn->flags |= KERNFS_HAS_MMAP; 1027 if (ops->release) 1028 kn->flags |= KERNFS_HAS_RELEASE; 1029 1030 rc = kernfs_add_one(kn); 1031 if (rc) { 1032 kernfs_put(kn); 1033 return ERR_PTR(rc); 1034 } 1035 return kn; 1036 } 1037