1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/fanotify.h> 3 #include <linux/fcntl.h> 4 #include <linux/file.h> 5 #include <linux/fs.h> 6 #include <linux/anon_inodes.h> 7 #include <linux/fsnotify_backend.h> 8 #include <linux/init.h> 9 #include <linux/mount.h> 10 #include <linux/namei.h> 11 #include <linux/poll.h> 12 #include <linux/security.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/types.h> 16 #include <linux/uaccess.h> 17 #include <linux/compat.h> 18 #include <linux/sched/signal.h> 19 #include <linux/memcontrol.h> 20 #include <linux/statfs.h> 21 #include <linux/exportfs.h> 22 23 #include <asm/ioctls.h> 24 25 #include "../../mount.h" 26 #include "../fdinfo.h" 27 #include "fanotify.h" 28 29 #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 30 #define FANOTIFY_DEFAULT_MAX_MARKS 8192 31 #define FANOTIFY_DEFAULT_MAX_LISTENERS 128 32 33 /* 34 * All flags that may be specified in parameter event_f_flags of fanotify_init. 35 * 36 * Internal and external open flags are stored together in field f_flags of 37 * struct file. Only external open flags shall be allowed in event_f_flags. 38 * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be 39 * excluded. 40 */ 41 #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ 42 O_ACCMODE | O_APPEND | O_NONBLOCK | \ 43 __O_SYNC | O_DSYNC | O_CLOEXEC | \ 44 O_LARGEFILE | O_NOATIME ) 45 46 extern const struct fsnotify_ops fanotify_fsnotify_ops; 47 48 struct kmem_cache *fanotify_mark_cache __read_mostly; 49 struct kmem_cache *fanotify_event_cachep __read_mostly; 50 struct kmem_cache *fanotify_perm_event_cachep __read_mostly; 51 52 #define FANOTIFY_EVENT_ALIGN 4 53 54 static int fanotify_event_info_len(struct fanotify_event *event) 55 { 56 if (!fanotify_event_has_fid(event)) 57 return 0; 58 59 return roundup(sizeof(struct fanotify_event_info_fid) + 60 sizeof(struct file_handle) + event->fh_len, 61 FANOTIFY_EVENT_ALIGN); 62 } 63 64 /* 65 * Get an fsnotify notification event if one exists and is small 66 * enough to fit in "count". Return an error pointer if the count 67 * is not large enough. When permission event is dequeued, its state is 68 * updated accordingly. 69 */ 70 static struct fsnotify_event *get_one_event(struct fsnotify_group *group, 71 size_t count) 72 { 73 size_t event_size = FAN_EVENT_METADATA_LEN; 74 struct fsnotify_event *fsn_event = NULL; 75 76 pr_debug("%s: group=%p count=%zd\n", __func__, group, count); 77 78 spin_lock(&group->notification_lock); 79 if (fsnotify_notify_queue_is_empty(group)) 80 goto out; 81 82 if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { 83 event_size += fanotify_event_info_len( 84 FANOTIFY_E(fsnotify_peek_first_event(group))); 85 } 86 87 if (event_size > count) { 88 fsn_event = ERR_PTR(-EINVAL); 89 goto out; 90 } 91 fsn_event = fsnotify_remove_first_event(group); 92 if (fanotify_is_perm_event(FANOTIFY_E(fsn_event)->mask)) 93 FANOTIFY_PE(fsn_event)->state = FAN_EVENT_REPORTED; 94 out: 95 spin_unlock(&group->notification_lock); 96 return fsn_event; 97 } 98 99 static int create_fd(struct fsnotify_group *group, 100 struct fanotify_event *event, 101 struct file **file) 102 { 103 int client_fd; 104 struct file *new_file; 105 106 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 107 108 client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); 109 if (client_fd < 0) 110 return client_fd; 111 112 /* 113 * we need a new file handle for the userspace program so it can read even if it was 114 * originally opened O_WRONLY. 115 */ 116 /* it's possible this event was an overflow event. in that case dentry and mnt 117 * are NULL; That's fine, just don't call dentry open */ 118 if (event->path.dentry && event->path.mnt) 119 new_file = dentry_open(&event->path, 120 group->fanotify_data.f_flags | FMODE_NONOTIFY, 121 current_cred()); 122 else 123 new_file = ERR_PTR(-EOVERFLOW); 124 if (IS_ERR(new_file)) { 125 /* 126 * we still send an event even if we can't open the file. this 127 * can happen when say tasks are gone and we try to open their 128 * /proc files or we try to open a WRONLY file like in sysfs 129 * we just send the errno to userspace since there isn't much 130 * else we can do. 131 */ 132 put_unused_fd(client_fd); 133 client_fd = PTR_ERR(new_file); 134 } else { 135 *file = new_file; 136 } 137 138 return client_fd; 139 } 140 141 /* 142 * Finish processing of permission event by setting it to ANSWERED state and 143 * drop group->notification_lock. 144 */ 145 static void finish_permission_event(struct fsnotify_group *group, 146 struct fanotify_perm_event *event, 147 unsigned int response) 148 __releases(&group->notification_lock) 149 { 150 bool destroy = false; 151 152 assert_spin_locked(&group->notification_lock); 153 event->response = response; 154 if (event->state == FAN_EVENT_CANCELED) 155 destroy = true; 156 else 157 event->state = FAN_EVENT_ANSWERED; 158 spin_unlock(&group->notification_lock); 159 if (destroy) 160 fsnotify_destroy_event(group, &event->fae.fse); 161 } 162 163 static int process_access_response(struct fsnotify_group *group, 164 struct fanotify_response *response_struct) 165 { 166 struct fanotify_perm_event *event; 167 int fd = response_struct->fd; 168 int response = response_struct->response; 169 170 pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group, 171 fd, response); 172 /* 173 * make sure the response is valid, if invalid we do nothing and either 174 * userspace can send a valid response or we will clean it up after the 175 * timeout 176 */ 177 switch (response & ~FAN_AUDIT) { 178 case FAN_ALLOW: 179 case FAN_DENY: 180 break; 181 default: 182 return -EINVAL; 183 } 184 185 if (fd < 0) 186 return -EINVAL; 187 188 if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT)) 189 return -EINVAL; 190 191 spin_lock(&group->notification_lock); 192 list_for_each_entry(event, &group->fanotify_data.access_list, 193 fae.fse.list) { 194 if (event->fd != fd) 195 continue; 196 197 list_del_init(&event->fae.fse.list); 198 finish_permission_event(group, event, response); 199 wake_up(&group->fanotify_data.access_waitq); 200 return 0; 201 } 202 spin_unlock(&group->notification_lock); 203 204 return -ENOENT; 205 } 206 207 static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) 208 { 209 struct fanotify_event_info_fid info = { }; 210 struct file_handle handle = { }; 211 size_t fh_len = event->fh_len; 212 size_t len = fanotify_event_info_len(event); 213 214 if (!len) 215 return 0; 216 217 if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len)) 218 return -EFAULT; 219 220 /* Copy event info fid header followed by vaiable sized file handle */ 221 info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID; 222 info.hdr.len = len; 223 info.fsid = event->fid.fsid; 224 if (copy_to_user(buf, &info, sizeof(info))) 225 return -EFAULT; 226 227 buf += sizeof(info); 228 len -= sizeof(info); 229 handle.handle_type = event->fh_type; 230 handle.handle_bytes = fh_len; 231 if (copy_to_user(buf, &handle, sizeof(handle))) 232 return -EFAULT; 233 234 buf += sizeof(handle); 235 len -= sizeof(handle); 236 if (copy_to_user(buf, fanotify_event_fh(event), fh_len)) 237 return -EFAULT; 238 239 /* Pad with 0's */ 240 buf += fh_len; 241 len -= fh_len; 242 WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); 243 if (len > 0 && clear_user(buf, len)) 244 return -EFAULT; 245 246 return 0; 247 } 248 249 static ssize_t copy_event_to_user(struct fsnotify_group *group, 250 struct fsnotify_event *fsn_event, 251 char __user *buf, size_t count) 252 { 253 struct fanotify_event_metadata metadata; 254 struct fanotify_event *event; 255 struct file *f = NULL; 256 int ret, fd = FAN_NOFD; 257 258 pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); 259 260 event = container_of(fsn_event, struct fanotify_event, fse); 261 metadata.event_len = FAN_EVENT_METADATA_LEN; 262 metadata.metadata_len = FAN_EVENT_METADATA_LEN; 263 metadata.vers = FANOTIFY_METADATA_VERSION; 264 metadata.reserved = 0; 265 metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; 266 metadata.pid = pid_vnr(event->pid); 267 268 if (fanotify_event_has_path(event)) { 269 fd = create_fd(group, event, &f); 270 if (fd < 0) 271 return fd; 272 } else if (fanotify_event_has_fid(event)) { 273 metadata.event_len += fanotify_event_info_len(event); 274 } 275 metadata.fd = fd; 276 277 ret = -EFAULT; 278 /* 279 * Sanity check copy size in case get_one_event() and 280 * fill_event_metadata() event_len sizes ever get out of sync. 281 */ 282 if (WARN_ON_ONCE(metadata.event_len > count)) 283 goto out_close_fd; 284 285 if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) 286 goto out_close_fd; 287 288 if (fanotify_is_perm_event(event->mask)) 289 FANOTIFY_PE(fsn_event)->fd = fd; 290 291 if (fanotify_event_has_path(event)) { 292 fd_install(fd, f); 293 } else if (fanotify_event_has_fid(event)) { 294 ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN); 295 if (ret < 0) 296 return ret; 297 } 298 299 return metadata.event_len; 300 301 out_close_fd: 302 if (fd != FAN_NOFD) { 303 put_unused_fd(fd); 304 fput(f); 305 } 306 return ret; 307 } 308 309 /* intofiy userspace file descriptor functions */ 310 static __poll_t fanotify_poll(struct file *file, poll_table *wait) 311 { 312 struct fsnotify_group *group = file->private_data; 313 __poll_t ret = 0; 314 315 poll_wait(file, &group->notification_waitq, wait); 316 spin_lock(&group->notification_lock); 317 if (!fsnotify_notify_queue_is_empty(group)) 318 ret = EPOLLIN | EPOLLRDNORM; 319 spin_unlock(&group->notification_lock); 320 321 return ret; 322 } 323 324 static ssize_t fanotify_read(struct file *file, char __user *buf, 325 size_t count, loff_t *pos) 326 { 327 struct fsnotify_group *group; 328 struct fsnotify_event *kevent; 329 char __user *start; 330 int ret; 331 DEFINE_WAIT_FUNC(wait, woken_wake_function); 332 333 start = buf; 334 group = file->private_data; 335 336 pr_debug("%s: group=%p\n", __func__, group); 337 338 add_wait_queue(&group->notification_waitq, &wait); 339 while (1) { 340 kevent = get_one_event(group, count); 341 if (IS_ERR(kevent)) { 342 ret = PTR_ERR(kevent); 343 break; 344 } 345 346 if (!kevent) { 347 ret = -EAGAIN; 348 if (file->f_flags & O_NONBLOCK) 349 break; 350 351 ret = -ERESTARTSYS; 352 if (signal_pending(current)) 353 break; 354 355 if (start != buf) 356 break; 357 358 wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); 359 continue; 360 } 361 362 ret = copy_event_to_user(group, kevent, buf, count); 363 if (unlikely(ret == -EOPENSTALE)) { 364 /* 365 * We cannot report events with stale fd so drop it. 366 * Setting ret to 0 will continue the event loop and 367 * do the right thing if there are no more events to 368 * read (i.e. return bytes read, -EAGAIN or wait). 369 */ 370 ret = 0; 371 } 372 373 /* 374 * Permission events get queued to wait for response. Other 375 * events can be destroyed now. 376 */ 377 if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) { 378 fsnotify_destroy_event(group, kevent); 379 } else { 380 if (ret <= 0) { 381 spin_lock(&group->notification_lock); 382 finish_permission_event(group, 383 FANOTIFY_PE(kevent), FAN_DENY); 384 wake_up(&group->fanotify_data.access_waitq); 385 } else { 386 spin_lock(&group->notification_lock); 387 list_add_tail(&kevent->list, 388 &group->fanotify_data.access_list); 389 spin_unlock(&group->notification_lock); 390 } 391 } 392 if (ret < 0) 393 break; 394 buf += ret; 395 count -= ret; 396 } 397 remove_wait_queue(&group->notification_waitq, &wait); 398 399 if (start != buf && ret != -EFAULT) 400 ret = buf - start; 401 return ret; 402 } 403 404 static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 405 { 406 struct fanotify_response response = { .fd = -1, .response = -1 }; 407 struct fsnotify_group *group; 408 int ret; 409 410 if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) 411 return -EINVAL; 412 413 group = file->private_data; 414 415 if (count > sizeof(response)) 416 count = sizeof(response); 417 418 pr_debug("%s: group=%p count=%zu\n", __func__, group, count); 419 420 if (copy_from_user(&response, buf, count)) 421 return -EFAULT; 422 423 ret = process_access_response(group, &response); 424 if (ret < 0) 425 count = ret; 426 427 return count; 428 } 429 430 static int fanotify_release(struct inode *ignored, struct file *file) 431 { 432 struct fsnotify_group *group = file->private_data; 433 struct fanotify_perm_event *event; 434 struct fsnotify_event *fsn_event; 435 436 /* 437 * Stop new events from arriving in the notification queue. since 438 * userspace cannot use fanotify fd anymore, no event can enter or 439 * leave access_list by now either. 440 */ 441 fsnotify_group_stop_queueing(group); 442 443 /* 444 * Process all permission events on access_list and notification queue 445 * and simulate reply from userspace. 446 */ 447 spin_lock(&group->notification_lock); 448 while (!list_empty(&group->fanotify_data.access_list)) { 449 event = list_first_entry(&group->fanotify_data.access_list, 450 struct fanotify_perm_event, fae.fse.list); 451 list_del_init(&event->fae.fse.list); 452 finish_permission_event(group, event, FAN_ALLOW); 453 spin_lock(&group->notification_lock); 454 } 455 456 /* 457 * Destroy all non-permission events. For permission events just 458 * dequeue them and set the response. They will be freed once the 459 * response is consumed and fanotify_get_response() returns. 460 */ 461 while (!fsnotify_notify_queue_is_empty(group)) { 462 fsn_event = fsnotify_remove_first_event(group); 463 if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) { 464 spin_unlock(&group->notification_lock); 465 fsnotify_destroy_event(group, fsn_event); 466 } else { 467 finish_permission_event(group, FANOTIFY_PE(fsn_event), 468 FAN_ALLOW); 469 } 470 spin_lock(&group->notification_lock); 471 } 472 spin_unlock(&group->notification_lock); 473 474 /* Response for all permission events it set, wakeup waiters */ 475 wake_up(&group->fanotify_data.access_waitq); 476 477 /* matches the fanotify_init->fsnotify_alloc_group */ 478 fsnotify_destroy_group(group); 479 480 return 0; 481 } 482 483 static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 484 { 485 struct fsnotify_group *group; 486 struct fsnotify_event *fsn_event; 487 void __user *p; 488 int ret = -ENOTTY; 489 size_t send_len = 0; 490 491 group = file->private_data; 492 493 p = (void __user *) arg; 494 495 switch (cmd) { 496 case FIONREAD: 497 spin_lock(&group->notification_lock); 498 list_for_each_entry(fsn_event, &group->notification_list, list) 499 send_len += FAN_EVENT_METADATA_LEN; 500 spin_unlock(&group->notification_lock); 501 ret = put_user(send_len, (int __user *) p); 502 break; 503 } 504 505 return ret; 506 } 507 508 static const struct file_operations fanotify_fops = { 509 .show_fdinfo = fanotify_show_fdinfo, 510 .poll = fanotify_poll, 511 .read = fanotify_read, 512 .write = fanotify_write, 513 .fasync = NULL, 514 .release = fanotify_release, 515 .unlocked_ioctl = fanotify_ioctl, 516 .compat_ioctl = fanotify_ioctl, 517 .llseek = noop_llseek, 518 }; 519 520 static int fanotify_find_path(int dfd, const char __user *filename, 521 struct path *path, unsigned int flags) 522 { 523 int ret; 524 525 pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__, 526 dfd, filename, flags); 527 528 if (filename == NULL) { 529 struct fd f = fdget(dfd); 530 531 ret = -EBADF; 532 if (!f.file) 533 goto out; 534 535 ret = -ENOTDIR; 536 if ((flags & FAN_MARK_ONLYDIR) && 537 !(S_ISDIR(file_inode(f.file)->i_mode))) { 538 fdput(f); 539 goto out; 540 } 541 542 *path = f.file->f_path; 543 path_get(path); 544 fdput(f); 545 } else { 546 unsigned int lookup_flags = 0; 547 548 if (!(flags & FAN_MARK_DONT_FOLLOW)) 549 lookup_flags |= LOOKUP_FOLLOW; 550 if (flags & FAN_MARK_ONLYDIR) 551 lookup_flags |= LOOKUP_DIRECTORY; 552 553 ret = user_path_at(dfd, filename, lookup_flags, path); 554 if (ret) 555 goto out; 556 } 557 558 /* you can only watch an inode if you have read permissions on it */ 559 ret = inode_permission(path->dentry->d_inode, MAY_READ); 560 if (ret) 561 path_put(path); 562 out: 563 return ret; 564 } 565 566 static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, 567 __u32 mask, 568 unsigned int flags, 569 int *destroy) 570 { 571 __u32 oldmask = 0; 572 573 spin_lock(&fsn_mark->lock); 574 if (!(flags & FAN_MARK_IGNORED_MASK)) { 575 oldmask = fsn_mark->mask; 576 fsn_mark->mask &= ~mask; 577 } else { 578 fsn_mark->ignored_mask &= ~mask; 579 } 580 *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask); 581 spin_unlock(&fsn_mark->lock); 582 583 return mask & oldmask; 584 } 585 586 static int fanotify_remove_mark(struct fsnotify_group *group, 587 fsnotify_connp_t *connp, __u32 mask, 588 unsigned int flags) 589 { 590 struct fsnotify_mark *fsn_mark = NULL; 591 __u32 removed; 592 int destroy_mark; 593 594 mutex_lock(&group->mark_mutex); 595 fsn_mark = fsnotify_find_mark(connp, group); 596 if (!fsn_mark) { 597 mutex_unlock(&group->mark_mutex); 598 return -ENOENT; 599 } 600 601 removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, 602 &destroy_mark); 603 if (removed & fsnotify_conn_mask(fsn_mark->connector)) 604 fsnotify_recalc_mask(fsn_mark->connector); 605 if (destroy_mark) 606 fsnotify_detach_mark(fsn_mark); 607 mutex_unlock(&group->mark_mutex); 608 if (destroy_mark) 609 fsnotify_free_mark(fsn_mark); 610 611 /* matches the fsnotify_find_mark() */ 612 fsnotify_put_mark(fsn_mark); 613 return 0; 614 } 615 616 static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, 617 struct vfsmount *mnt, __u32 mask, 618 unsigned int flags) 619 { 620 return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, 621 mask, flags); 622 } 623 624 static int fanotify_remove_sb_mark(struct fsnotify_group *group, 625 struct super_block *sb, __u32 mask, 626 unsigned int flags) 627 { 628 return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, flags); 629 } 630 631 static int fanotify_remove_inode_mark(struct fsnotify_group *group, 632 struct inode *inode, __u32 mask, 633 unsigned int flags) 634 { 635 return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask, 636 flags); 637 } 638 639 static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, 640 __u32 mask, 641 unsigned int flags) 642 { 643 __u32 oldmask = -1; 644 645 spin_lock(&fsn_mark->lock); 646 if (!(flags & FAN_MARK_IGNORED_MASK)) { 647 oldmask = fsn_mark->mask; 648 fsn_mark->mask |= mask; 649 } else { 650 fsn_mark->ignored_mask |= mask; 651 if (flags & FAN_MARK_IGNORED_SURV_MODIFY) 652 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; 653 } 654 spin_unlock(&fsn_mark->lock); 655 656 return mask & ~oldmask; 657 } 658 659 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, 660 fsnotify_connp_t *connp, 661 unsigned int type, 662 __kernel_fsid_t *fsid) 663 { 664 struct fsnotify_mark *mark; 665 int ret; 666 667 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) 668 return ERR_PTR(-ENOSPC); 669 670 mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); 671 if (!mark) 672 return ERR_PTR(-ENOMEM); 673 674 fsnotify_init_mark(mark, group); 675 ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid); 676 if (ret) { 677 fsnotify_put_mark(mark); 678 return ERR_PTR(ret); 679 } 680 681 return mark; 682 } 683 684 685 static int fanotify_add_mark(struct fsnotify_group *group, 686 fsnotify_connp_t *connp, unsigned int type, 687 __u32 mask, unsigned int flags, 688 __kernel_fsid_t *fsid) 689 { 690 struct fsnotify_mark *fsn_mark; 691 __u32 added; 692 693 mutex_lock(&group->mark_mutex); 694 fsn_mark = fsnotify_find_mark(connp, group); 695 if (!fsn_mark) { 696 fsn_mark = fanotify_add_new_mark(group, connp, type, fsid); 697 if (IS_ERR(fsn_mark)) { 698 mutex_unlock(&group->mark_mutex); 699 return PTR_ERR(fsn_mark); 700 } 701 } 702 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); 703 if (added & ~fsnotify_conn_mask(fsn_mark->connector)) 704 fsnotify_recalc_mask(fsn_mark->connector); 705 mutex_unlock(&group->mark_mutex); 706 707 fsnotify_put_mark(fsn_mark); 708 return 0; 709 } 710 711 static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, 712 struct vfsmount *mnt, __u32 mask, 713 unsigned int flags, __kernel_fsid_t *fsid) 714 { 715 return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, 716 FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); 717 } 718 719 static int fanotify_add_sb_mark(struct fsnotify_group *group, 720 struct super_block *sb, __u32 mask, 721 unsigned int flags, __kernel_fsid_t *fsid) 722 { 723 return fanotify_add_mark(group, &sb->s_fsnotify_marks, 724 FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); 725 } 726 727 static int fanotify_add_inode_mark(struct fsnotify_group *group, 728 struct inode *inode, __u32 mask, 729 unsigned int flags, __kernel_fsid_t *fsid) 730 { 731 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); 732 733 /* 734 * If some other task has this inode open for write we should not add 735 * an ignored mark, unless that ignored mark is supposed to survive 736 * modification changes anyway. 737 */ 738 if ((flags & FAN_MARK_IGNORED_MASK) && 739 !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && 740 inode_is_open_for_write(inode)) 741 return 0; 742 743 return fanotify_add_mark(group, &inode->i_fsnotify_marks, 744 FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); 745 } 746 747 /* fanotify syscalls */ 748 SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) 749 { 750 struct fsnotify_group *group; 751 int f_flags, fd; 752 struct user_struct *user; 753 struct fanotify_event *oevent; 754 755 pr_debug("%s: flags=%x event_f_flags=%x\n", 756 __func__, flags, event_f_flags); 757 758 if (!capable(CAP_SYS_ADMIN)) 759 return -EPERM; 760 761 #ifdef CONFIG_AUDITSYSCALL 762 if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) 763 #else 764 if (flags & ~FANOTIFY_INIT_FLAGS) 765 #endif 766 return -EINVAL; 767 768 if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) 769 return -EINVAL; 770 771 switch (event_f_flags & O_ACCMODE) { 772 case O_RDONLY: 773 case O_RDWR: 774 case O_WRONLY: 775 break; 776 default: 777 return -EINVAL; 778 } 779 780 if ((flags & FAN_REPORT_FID) && 781 (flags & FANOTIFY_CLASS_BITS) != FAN_CLASS_NOTIF) 782 return -EINVAL; 783 784 user = get_current_user(); 785 if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) { 786 free_uid(user); 787 return -EMFILE; 788 } 789 790 f_flags = O_RDWR | FMODE_NONOTIFY; 791 if (flags & FAN_CLOEXEC) 792 f_flags |= O_CLOEXEC; 793 if (flags & FAN_NONBLOCK) 794 f_flags |= O_NONBLOCK; 795 796 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ 797 group = fsnotify_alloc_group(&fanotify_fsnotify_ops); 798 if (IS_ERR(group)) { 799 free_uid(user); 800 return PTR_ERR(group); 801 } 802 803 group->fanotify_data.user = user; 804 group->fanotify_data.flags = flags; 805 atomic_inc(&user->fanotify_listeners); 806 group->memcg = get_mem_cgroup_from_mm(current->mm); 807 808 oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, 809 FSNOTIFY_EVENT_NONE, NULL); 810 if (unlikely(!oevent)) { 811 fd = -ENOMEM; 812 goto out_destroy_group; 813 } 814 group->overflow_event = &oevent->fse; 815 816 if (force_o_largefile()) 817 event_f_flags |= O_LARGEFILE; 818 group->fanotify_data.f_flags = event_f_flags; 819 init_waitqueue_head(&group->fanotify_data.access_waitq); 820 INIT_LIST_HEAD(&group->fanotify_data.access_list); 821 switch (flags & FANOTIFY_CLASS_BITS) { 822 case FAN_CLASS_NOTIF: 823 group->priority = FS_PRIO_0; 824 break; 825 case FAN_CLASS_CONTENT: 826 group->priority = FS_PRIO_1; 827 break; 828 case FAN_CLASS_PRE_CONTENT: 829 group->priority = FS_PRIO_2; 830 break; 831 default: 832 fd = -EINVAL; 833 goto out_destroy_group; 834 } 835 836 if (flags & FAN_UNLIMITED_QUEUE) { 837 fd = -EPERM; 838 if (!capable(CAP_SYS_ADMIN)) 839 goto out_destroy_group; 840 group->max_events = UINT_MAX; 841 } else { 842 group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; 843 } 844 845 if (flags & FAN_UNLIMITED_MARKS) { 846 fd = -EPERM; 847 if (!capable(CAP_SYS_ADMIN)) 848 goto out_destroy_group; 849 group->fanotify_data.max_marks = UINT_MAX; 850 } else { 851 group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; 852 } 853 854 if (flags & FAN_ENABLE_AUDIT) { 855 fd = -EPERM; 856 if (!capable(CAP_AUDIT_WRITE)) 857 goto out_destroy_group; 858 } 859 860 fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); 861 if (fd < 0) 862 goto out_destroy_group; 863 864 return fd; 865 866 out_destroy_group: 867 fsnotify_destroy_group(group); 868 return fd; 869 } 870 871 /* Check if filesystem can encode a unique fid */ 872 static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) 873 { 874 __kernel_fsid_t root_fsid; 875 int err; 876 877 /* 878 * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). 879 */ 880 err = vfs_get_fsid(path->dentry, fsid); 881 if (err) 882 return err; 883 884 if (!fsid->val[0] && !fsid->val[1]) 885 return -ENODEV; 886 887 /* 888 * Make sure path is not inside a filesystem subvolume (e.g. btrfs) 889 * which uses a different fsid than sb root. 890 */ 891 err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid); 892 if (err) 893 return err; 894 895 if (root_fsid.val[0] != fsid->val[0] || 896 root_fsid.val[1] != fsid->val[1]) 897 return -EXDEV; 898 899 /* 900 * We need to make sure that the file system supports at least 901 * encoding a file handle so user can use name_to_handle_at() to 902 * compare fid returned with event to the file handle of watched 903 * objects. However, name_to_handle_at() requires that the 904 * filesystem also supports decoding file handles. 905 */ 906 if (!path->dentry->d_sb->s_export_op || 907 !path->dentry->d_sb->s_export_op->fh_to_dentry) 908 return -EOPNOTSUPP; 909 910 return 0; 911 } 912 913 static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, 914 int dfd, const char __user *pathname) 915 { 916 struct inode *inode = NULL; 917 struct vfsmount *mnt = NULL; 918 struct fsnotify_group *group; 919 struct fd f; 920 struct path path; 921 __kernel_fsid_t __fsid, *fsid = NULL; 922 u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; 923 unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; 924 int ret; 925 926 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", 927 __func__, fanotify_fd, flags, dfd, pathname, mask); 928 929 /* we only use the lower 32 bits as of right now. */ 930 if (mask & ((__u64)0xffffffff << 32)) 931 return -EINVAL; 932 933 if (flags & ~FANOTIFY_MARK_FLAGS) 934 return -EINVAL; 935 936 switch (mark_type) { 937 case FAN_MARK_INODE: 938 case FAN_MARK_MOUNT: 939 case FAN_MARK_FILESYSTEM: 940 break; 941 default: 942 return -EINVAL; 943 } 944 945 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { 946 case FAN_MARK_ADD: /* fallthrough */ 947 case FAN_MARK_REMOVE: 948 if (!mask) 949 return -EINVAL; 950 break; 951 case FAN_MARK_FLUSH: 952 if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH)) 953 return -EINVAL; 954 break; 955 default: 956 return -EINVAL; 957 } 958 959 if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) 960 valid_mask |= FANOTIFY_PERM_EVENTS; 961 962 if (mask & ~valid_mask) 963 return -EINVAL; 964 965 f = fdget(fanotify_fd); 966 if (unlikely(!f.file)) 967 return -EBADF; 968 969 /* verify that this is indeed an fanotify instance */ 970 ret = -EINVAL; 971 if (unlikely(f.file->f_op != &fanotify_fops)) 972 goto fput_and_out; 973 group = f.file->private_data; 974 975 /* 976 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not 977 * allowed to set permissions events. 978 */ 979 ret = -EINVAL; 980 if (mask & FANOTIFY_PERM_EVENTS && 981 group->priority == FS_PRIO_0) 982 goto fput_and_out; 983 984 /* 985 * Events with data type inode do not carry enough information to report 986 * event->fd, so we do not allow setting a mask for inode events unless 987 * group supports reporting fid. 988 * inode events are not supported on a mount mark, because they do not 989 * carry enough information (i.e. path) to be filtered by mount point. 990 */ 991 if (mask & FANOTIFY_INODE_EVENTS && 992 (!FAN_GROUP_FLAG(group, FAN_REPORT_FID) || 993 mark_type == FAN_MARK_MOUNT)) 994 goto fput_and_out; 995 996 if (flags & FAN_MARK_FLUSH) { 997 ret = 0; 998 if (mark_type == FAN_MARK_MOUNT) 999 fsnotify_clear_vfsmount_marks_by_group(group); 1000 else if (mark_type == FAN_MARK_FILESYSTEM) 1001 fsnotify_clear_sb_marks_by_group(group); 1002 else 1003 fsnotify_clear_inode_marks_by_group(group); 1004 goto fput_and_out; 1005 } 1006 1007 ret = fanotify_find_path(dfd, pathname, &path, flags); 1008 if (ret) 1009 goto fput_and_out; 1010 1011 if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { 1012 ret = fanotify_test_fid(&path, &__fsid); 1013 if (ret) 1014 goto path_put_and_out; 1015 1016 fsid = &__fsid; 1017 } 1018 1019 /* inode held in place by reference to path; group by fget on fd */ 1020 if (mark_type == FAN_MARK_INODE) 1021 inode = path.dentry->d_inode; 1022 else 1023 mnt = path.mnt; 1024 1025 /* create/update an inode mark */ 1026 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { 1027 case FAN_MARK_ADD: 1028 if (mark_type == FAN_MARK_MOUNT) 1029 ret = fanotify_add_vfsmount_mark(group, mnt, mask, 1030 flags, fsid); 1031 else if (mark_type == FAN_MARK_FILESYSTEM) 1032 ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, 1033 flags, fsid); 1034 else 1035 ret = fanotify_add_inode_mark(group, inode, mask, 1036 flags, fsid); 1037 break; 1038 case FAN_MARK_REMOVE: 1039 if (mark_type == FAN_MARK_MOUNT) 1040 ret = fanotify_remove_vfsmount_mark(group, mnt, mask, 1041 flags); 1042 else if (mark_type == FAN_MARK_FILESYSTEM) 1043 ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, 1044 flags); 1045 else 1046 ret = fanotify_remove_inode_mark(group, inode, mask, 1047 flags); 1048 break; 1049 default: 1050 ret = -EINVAL; 1051 } 1052 1053 path_put_and_out: 1054 path_put(&path); 1055 fput_and_out: 1056 fdput(f); 1057 return ret; 1058 } 1059 1060 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, 1061 __u64, mask, int, dfd, 1062 const char __user *, pathname) 1063 { 1064 return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); 1065 } 1066 1067 #ifdef CONFIG_COMPAT 1068 COMPAT_SYSCALL_DEFINE6(fanotify_mark, 1069 int, fanotify_fd, unsigned int, flags, 1070 __u32, mask0, __u32, mask1, int, dfd, 1071 const char __user *, pathname) 1072 { 1073 return do_fanotify_mark(fanotify_fd, flags, 1074 #ifdef __BIG_ENDIAN 1075 ((__u64)mask0 << 32) | mask1, 1076 #else 1077 ((__u64)mask1 << 32) | mask0, 1078 #endif 1079 dfd, pathname); 1080 } 1081 #endif 1082 1083 /* 1084 * fanotify_user_setup - Our initialization function. Note that we cannot return 1085 * error because we have compiled-in VFS hooks. So an (unlikely) failure here 1086 * must result in panic(). 1087 */ 1088 static int __init fanotify_user_setup(void) 1089 { 1090 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 8); 1091 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); 1092 1093 fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, 1094 SLAB_PANIC|SLAB_ACCOUNT); 1095 fanotify_event_cachep = KMEM_CACHE(fanotify_event, SLAB_PANIC); 1096 if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { 1097 fanotify_perm_event_cachep = 1098 KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); 1099 } 1100 1101 return 0; 1102 } 1103 device_initcall(fanotify_user_setup); 1104