1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/fanotify.h> 3 #include <linux/fcntl.h> 4 #include <linux/file.h> 5 #include <linux/fs.h> 6 #include <linux/anon_inodes.h> 7 #include <linux/fsnotify_backend.h> 8 #include <linux/init.h> 9 #include <linux/mount.h> 10 #include <linux/namei.h> 11 #include <linux/poll.h> 12 #include <linux/security.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/types.h> 16 #include <linux/uaccess.h> 17 #include <linux/compat.h> 18 #include <linux/sched/signal.h> 19 #include <linux/memcontrol.h> 20 #include <linux/statfs.h> 21 #include <linux/exportfs.h> 22 23 #include <asm/ioctls.h> 24 25 #include "../../mount.h" 26 #include "../fdinfo.h" 27 #include "fanotify.h" 28 29 #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 30 #define FANOTIFY_DEFAULT_MAX_MARKS 8192 31 #define FANOTIFY_DEFAULT_MAX_LISTENERS 128 32 33 /* 34 * All flags that may be specified in parameter event_f_flags of fanotify_init. 35 * 36 * Internal and external open flags are stored together in field f_flags of 37 * struct file. Only external open flags shall be allowed in event_f_flags. 38 * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be 39 * excluded. 40 */ 41 #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ 42 O_ACCMODE | O_APPEND | O_NONBLOCK | \ 43 __O_SYNC | O_DSYNC | O_CLOEXEC | \ 44 O_LARGEFILE | O_NOATIME ) 45 46 extern const struct fsnotify_ops fanotify_fsnotify_ops; 47 48 struct kmem_cache *fanotify_mark_cache __read_mostly; 49 struct kmem_cache *fanotify_event_cachep __read_mostly; 50 struct kmem_cache *fanotify_perm_event_cachep __read_mostly; 51 52 #define FANOTIFY_EVENT_ALIGN 4 53 54 static int fanotify_event_info_len(struct fanotify_event *event) 55 { 56 if (!fanotify_event_has_fid(event)) 57 return 0; 58 59 return roundup(sizeof(struct fanotify_event_info_fid) + 60 sizeof(struct file_handle) + event->fh_len, 61 FANOTIFY_EVENT_ALIGN); 62 } 63 64 /* 65 * Get an fsnotify notification event if one exists and is small 66 * enough to fit in "count". Return an error pointer if the count 67 * is not large enough. When permission event is dequeued, its state is 68 * updated accordingly. 69 */ 70 static struct fsnotify_event *get_one_event(struct fsnotify_group *group, 71 size_t count) 72 { 73 size_t event_size = FAN_EVENT_METADATA_LEN; 74 struct fsnotify_event *fsn_event = NULL; 75 76 pr_debug("%s: group=%p count=%zd\n", __func__, group, count); 77 78 spin_lock(&group->notification_lock); 79 if (fsnotify_notify_queue_is_empty(group)) 80 goto out; 81 82 if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { 83 event_size += fanotify_event_info_len( 84 FANOTIFY_E(fsnotify_peek_first_event(group))); 85 } 86 87 if (event_size > count) { 88 fsn_event = ERR_PTR(-EINVAL); 89 goto out; 90 } 91 fsn_event = fsnotify_remove_first_event(group); 92 if (fanotify_is_perm_event(FANOTIFY_E(fsn_event)->mask)) 93 FANOTIFY_PE(fsn_event)->state = FAN_EVENT_REPORTED; 94 out: 95 spin_unlock(&group->notification_lock); 96 return fsn_event; 97 } 98 99 static int create_fd(struct fsnotify_group *group, 100 struct fanotify_event *event, 101 struct file **file) 102 { 103 int client_fd; 104 struct file *new_file; 105 106 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 107 108 client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); 109 if (client_fd < 0) 110 return client_fd; 111 112 /* 113 * we need a new file handle for the userspace program so it can read even if it was 114 * originally opened O_WRONLY. 115 */ 116 /* it's possible this event was an overflow event. in that case dentry and mnt 117 * are NULL; That's fine, just don't call dentry open */ 118 if (event->path.dentry && event->path.mnt) 119 new_file = dentry_open(&event->path, 120 group->fanotify_data.f_flags | FMODE_NONOTIFY, 121 current_cred()); 122 else 123 new_file = ERR_PTR(-EOVERFLOW); 124 if (IS_ERR(new_file)) { 125 /* 126 * we still send an event even if we can't open the file. this 127 * can happen when say tasks are gone and we try to open their 128 * /proc files or we try to open a WRONLY file like in sysfs 129 * we just send the errno to userspace since there isn't much 130 * else we can do. 131 */ 132 put_unused_fd(client_fd); 133 client_fd = PTR_ERR(new_file); 134 } else { 135 *file = new_file; 136 } 137 138 return client_fd; 139 } 140 141 /* 142 * Finish processing of permission event by setting it to ANSWERED state and 143 * drop group->notification_lock. 144 */ 145 static void finish_permission_event(struct fsnotify_group *group, 146 struct fanotify_perm_event *event, 147 unsigned int response) 148 __releases(&group->notification_lock) 149 { 150 bool destroy = false; 151 152 assert_spin_locked(&group->notification_lock); 153 event->response = response; 154 if (event->state == FAN_EVENT_CANCELED) 155 destroy = true; 156 else 157 event->state = FAN_EVENT_ANSWERED; 158 spin_unlock(&group->notification_lock); 159 if (destroy) 160 fsnotify_destroy_event(group, &event->fae.fse); 161 } 162 163 static int process_access_response(struct fsnotify_group *group, 164 struct fanotify_response *response_struct) 165 { 166 struct fanotify_perm_event *event; 167 int fd = response_struct->fd; 168 int response = response_struct->response; 169 170 pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group, 171 fd, response); 172 /* 173 * make sure the response is valid, if invalid we do nothing and either 174 * userspace can send a valid response or we will clean it up after the 175 * timeout 176 */ 177 switch (response & ~FAN_AUDIT) { 178 case FAN_ALLOW: 179 case FAN_DENY: 180 break; 181 default: 182 return -EINVAL; 183 } 184 185 if (fd < 0) 186 return -EINVAL; 187 188 if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT)) 189 return -EINVAL; 190 191 spin_lock(&group->notification_lock); 192 list_for_each_entry(event, &group->fanotify_data.access_list, 193 fae.fse.list) { 194 if (event->fd != fd) 195 continue; 196 197 list_del_init(&event->fae.fse.list); 198 finish_permission_event(group, event, response); 199 wake_up(&group->fanotify_data.access_waitq); 200 return 0; 201 } 202 spin_unlock(&group->notification_lock); 203 204 return -ENOENT; 205 } 206 207 static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) 208 { 209 struct fanotify_event_info_fid info = { }; 210 struct file_handle handle = { }; 211 unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh; 212 size_t fh_len = event->fh_len; 213 size_t len = fanotify_event_info_len(event); 214 215 if (!len) 216 return 0; 217 218 if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len)) 219 return -EFAULT; 220 221 /* Copy event info fid header followed by vaiable sized file handle */ 222 info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID; 223 info.hdr.len = len; 224 info.fsid = event->fid.fsid; 225 if (copy_to_user(buf, &info, sizeof(info))) 226 return -EFAULT; 227 228 buf += sizeof(info); 229 len -= sizeof(info); 230 handle.handle_type = event->fh_type; 231 handle.handle_bytes = fh_len; 232 if (copy_to_user(buf, &handle, sizeof(handle))) 233 return -EFAULT; 234 235 buf += sizeof(handle); 236 len -= sizeof(handle); 237 /* 238 * For an inline fh, copy through stack to exclude the copy from 239 * usercopy hardening protections. 240 */ 241 fh = fanotify_event_fh(event); 242 if (fh_len <= FANOTIFY_INLINE_FH_LEN) { 243 memcpy(bounce, fh, fh_len); 244 fh = bounce; 245 } 246 if (copy_to_user(buf, fh, fh_len)) 247 return -EFAULT; 248 249 /* Pad with 0's */ 250 buf += fh_len; 251 len -= fh_len; 252 WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); 253 if (len > 0 && clear_user(buf, len)) 254 return -EFAULT; 255 256 return 0; 257 } 258 259 static ssize_t copy_event_to_user(struct fsnotify_group *group, 260 struct fsnotify_event *fsn_event, 261 char __user *buf, size_t count) 262 { 263 struct fanotify_event_metadata metadata; 264 struct fanotify_event *event; 265 struct file *f = NULL; 266 int ret, fd = FAN_NOFD; 267 268 pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); 269 270 event = container_of(fsn_event, struct fanotify_event, fse); 271 metadata.event_len = FAN_EVENT_METADATA_LEN; 272 metadata.metadata_len = FAN_EVENT_METADATA_LEN; 273 metadata.vers = FANOTIFY_METADATA_VERSION; 274 metadata.reserved = 0; 275 metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; 276 metadata.pid = pid_vnr(event->pid); 277 278 if (fanotify_event_has_path(event)) { 279 fd = create_fd(group, event, &f); 280 if (fd < 0) 281 return fd; 282 } else if (fanotify_event_has_fid(event)) { 283 metadata.event_len += fanotify_event_info_len(event); 284 } 285 metadata.fd = fd; 286 287 ret = -EFAULT; 288 /* 289 * Sanity check copy size in case get_one_event() and 290 * fill_event_metadata() event_len sizes ever get out of sync. 291 */ 292 if (WARN_ON_ONCE(metadata.event_len > count)) 293 goto out_close_fd; 294 295 if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) 296 goto out_close_fd; 297 298 if (fanotify_is_perm_event(event->mask)) 299 FANOTIFY_PE(fsn_event)->fd = fd; 300 301 if (fanotify_event_has_path(event)) { 302 fd_install(fd, f); 303 } else if (fanotify_event_has_fid(event)) { 304 ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN); 305 if (ret < 0) 306 return ret; 307 } 308 309 return metadata.event_len; 310 311 out_close_fd: 312 if (fd != FAN_NOFD) { 313 put_unused_fd(fd); 314 fput(f); 315 } 316 return ret; 317 } 318 319 /* intofiy userspace file descriptor functions */ 320 static __poll_t fanotify_poll(struct file *file, poll_table *wait) 321 { 322 struct fsnotify_group *group = file->private_data; 323 __poll_t ret = 0; 324 325 poll_wait(file, &group->notification_waitq, wait); 326 spin_lock(&group->notification_lock); 327 if (!fsnotify_notify_queue_is_empty(group)) 328 ret = EPOLLIN | EPOLLRDNORM; 329 spin_unlock(&group->notification_lock); 330 331 return ret; 332 } 333 334 static ssize_t fanotify_read(struct file *file, char __user *buf, 335 size_t count, loff_t *pos) 336 { 337 struct fsnotify_group *group; 338 struct fsnotify_event *kevent; 339 char __user *start; 340 int ret; 341 DEFINE_WAIT_FUNC(wait, woken_wake_function); 342 343 start = buf; 344 group = file->private_data; 345 346 pr_debug("%s: group=%p\n", __func__, group); 347 348 add_wait_queue(&group->notification_waitq, &wait); 349 while (1) { 350 kevent = get_one_event(group, count); 351 if (IS_ERR(kevent)) { 352 ret = PTR_ERR(kevent); 353 break; 354 } 355 356 if (!kevent) { 357 ret = -EAGAIN; 358 if (file->f_flags & O_NONBLOCK) 359 break; 360 361 ret = -ERESTARTSYS; 362 if (signal_pending(current)) 363 break; 364 365 if (start != buf) 366 break; 367 368 wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); 369 continue; 370 } 371 372 ret = copy_event_to_user(group, kevent, buf, count); 373 if (unlikely(ret == -EOPENSTALE)) { 374 /* 375 * We cannot report events with stale fd so drop it. 376 * Setting ret to 0 will continue the event loop and 377 * do the right thing if there are no more events to 378 * read (i.e. return bytes read, -EAGAIN or wait). 379 */ 380 ret = 0; 381 } 382 383 /* 384 * Permission events get queued to wait for response. Other 385 * events can be destroyed now. 386 */ 387 if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) { 388 fsnotify_destroy_event(group, kevent); 389 } else { 390 if (ret <= 0) { 391 spin_lock(&group->notification_lock); 392 finish_permission_event(group, 393 FANOTIFY_PE(kevent), FAN_DENY); 394 wake_up(&group->fanotify_data.access_waitq); 395 } else { 396 spin_lock(&group->notification_lock); 397 list_add_tail(&kevent->list, 398 &group->fanotify_data.access_list); 399 spin_unlock(&group->notification_lock); 400 } 401 } 402 if (ret < 0) 403 break; 404 buf += ret; 405 count -= ret; 406 } 407 remove_wait_queue(&group->notification_waitq, &wait); 408 409 if (start != buf && ret != -EFAULT) 410 ret = buf - start; 411 return ret; 412 } 413 414 static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 415 { 416 struct fanotify_response response = { .fd = -1, .response = -1 }; 417 struct fsnotify_group *group; 418 int ret; 419 420 if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) 421 return -EINVAL; 422 423 group = file->private_data; 424 425 if (count > sizeof(response)) 426 count = sizeof(response); 427 428 pr_debug("%s: group=%p count=%zu\n", __func__, group, count); 429 430 if (copy_from_user(&response, buf, count)) 431 return -EFAULT; 432 433 ret = process_access_response(group, &response); 434 if (ret < 0) 435 count = ret; 436 437 return count; 438 } 439 440 static int fanotify_release(struct inode *ignored, struct file *file) 441 { 442 struct fsnotify_group *group = file->private_data; 443 struct fanotify_perm_event *event; 444 struct fsnotify_event *fsn_event; 445 446 /* 447 * Stop new events from arriving in the notification queue. since 448 * userspace cannot use fanotify fd anymore, no event can enter or 449 * leave access_list by now either. 450 */ 451 fsnotify_group_stop_queueing(group); 452 453 /* 454 * Process all permission events on access_list and notification queue 455 * and simulate reply from userspace. 456 */ 457 spin_lock(&group->notification_lock); 458 while (!list_empty(&group->fanotify_data.access_list)) { 459 event = list_first_entry(&group->fanotify_data.access_list, 460 struct fanotify_perm_event, fae.fse.list); 461 list_del_init(&event->fae.fse.list); 462 finish_permission_event(group, event, FAN_ALLOW); 463 spin_lock(&group->notification_lock); 464 } 465 466 /* 467 * Destroy all non-permission events. For permission events just 468 * dequeue them and set the response. They will be freed once the 469 * response is consumed and fanotify_get_response() returns. 470 */ 471 while (!fsnotify_notify_queue_is_empty(group)) { 472 fsn_event = fsnotify_remove_first_event(group); 473 if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) { 474 spin_unlock(&group->notification_lock); 475 fsnotify_destroy_event(group, fsn_event); 476 } else { 477 finish_permission_event(group, FANOTIFY_PE(fsn_event), 478 FAN_ALLOW); 479 } 480 spin_lock(&group->notification_lock); 481 } 482 spin_unlock(&group->notification_lock); 483 484 /* Response for all permission events it set, wakeup waiters */ 485 wake_up(&group->fanotify_data.access_waitq); 486 487 /* matches the fanotify_init->fsnotify_alloc_group */ 488 fsnotify_destroy_group(group); 489 490 return 0; 491 } 492 493 static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 494 { 495 struct fsnotify_group *group; 496 struct fsnotify_event *fsn_event; 497 void __user *p; 498 int ret = -ENOTTY; 499 size_t send_len = 0; 500 501 group = file->private_data; 502 503 p = (void __user *) arg; 504 505 switch (cmd) { 506 case FIONREAD: 507 spin_lock(&group->notification_lock); 508 list_for_each_entry(fsn_event, &group->notification_list, list) 509 send_len += FAN_EVENT_METADATA_LEN; 510 spin_unlock(&group->notification_lock); 511 ret = put_user(send_len, (int __user *) p); 512 break; 513 } 514 515 return ret; 516 } 517 518 static const struct file_operations fanotify_fops = { 519 .show_fdinfo = fanotify_show_fdinfo, 520 .poll = fanotify_poll, 521 .read = fanotify_read, 522 .write = fanotify_write, 523 .fasync = NULL, 524 .release = fanotify_release, 525 .unlocked_ioctl = fanotify_ioctl, 526 .compat_ioctl = fanotify_ioctl, 527 .llseek = noop_llseek, 528 }; 529 530 static int fanotify_find_path(int dfd, const char __user *filename, 531 struct path *path, unsigned int flags, __u64 mask, 532 unsigned int obj_type) 533 { 534 int ret; 535 536 pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__, 537 dfd, filename, flags); 538 539 if (filename == NULL) { 540 struct fd f = fdget(dfd); 541 542 ret = -EBADF; 543 if (!f.file) 544 goto out; 545 546 ret = -ENOTDIR; 547 if ((flags & FAN_MARK_ONLYDIR) && 548 !(S_ISDIR(file_inode(f.file)->i_mode))) { 549 fdput(f); 550 goto out; 551 } 552 553 *path = f.file->f_path; 554 path_get(path); 555 fdput(f); 556 } else { 557 unsigned int lookup_flags = 0; 558 559 if (!(flags & FAN_MARK_DONT_FOLLOW)) 560 lookup_flags |= LOOKUP_FOLLOW; 561 if (flags & FAN_MARK_ONLYDIR) 562 lookup_flags |= LOOKUP_DIRECTORY; 563 564 ret = user_path_at(dfd, filename, lookup_flags, path); 565 if (ret) 566 goto out; 567 } 568 569 /* you can only watch an inode if you have read permissions on it */ 570 ret = inode_permission(path->dentry->d_inode, MAY_READ); 571 if (ret) { 572 path_put(path); 573 goto out; 574 } 575 576 ret = security_path_notify(path, mask, obj_type); 577 if (ret) 578 path_put(path); 579 580 out: 581 return ret; 582 } 583 584 static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, 585 __u32 mask, 586 unsigned int flags, 587 int *destroy) 588 { 589 __u32 oldmask = 0; 590 591 spin_lock(&fsn_mark->lock); 592 if (!(flags & FAN_MARK_IGNORED_MASK)) { 593 oldmask = fsn_mark->mask; 594 fsn_mark->mask &= ~mask; 595 } else { 596 fsn_mark->ignored_mask &= ~mask; 597 } 598 *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask); 599 spin_unlock(&fsn_mark->lock); 600 601 return mask & oldmask; 602 } 603 604 static int fanotify_remove_mark(struct fsnotify_group *group, 605 fsnotify_connp_t *connp, __u32 mask, 606 unsigned int flags) 607 { 608 struct fsnotify_mark *fsn_mark = NULL; 609 __u32 removed; 610 int destroy_mark; 611 612 mutex_lock(&group->mark_mutex); 613 fsn_mark = fsnotify_find_mark(connp, group); 614 if (!fsn_mark) { 615 mutex_unlock(&group->mark_mutex); 616 return -ENOENT; 617 } 618 619 removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, 620 &destroy_mark); 621 if (removed & fsnotify_conn_mask(fsn_mark->connector)) 622 fsnotify_recalc_mask(fsn_mark->connector); 623 if (destroy_mark) 624 fsnotify_detach_mark(fsn_mark); 625 mutex_unlock(&group->mark_mutex); 626 if (destroy_mark) 627 fsnotify_free_mark(fsn_mark); 628 629 /* matches the fsnotify_find_mark() */ 630 fsnotify_put_mark(fsn_mark); 631 return 0; 632 } 633 634 static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, 635 struct vfsmount *mnt, __u32 mask, 636 unsigned int flags) 637 { 638 return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, 639 mask, flags); 640 } 641 642 static int fanotify_remove_sb_mark(struct fsnotify_group *group, 643 struct super_block *sb, __u32 mask, 644 unsigned int flags) 645 { 646 return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, flags); 647 } 648 649 static int fanotify_remove_inode_mark(struct fsnotify_group *group, 650 struct inode *inode, __u32 mask, 651 unsigned int flags) 652 { 653 return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask, 654 flags); 655 } 656 657 static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, 658 __u32 mask, 659 unsigned int flags) 660 { 661 __u32 oldmask = -1; 662 663 spin_lock(&fsn_mark->lock); 664 if (!(flags & FAN_MARK_IGNORED_MASK)) { 665 oldmask = fsn_mark->mask; 666 fsn_mark->mask |= mask; 667 } else { 668 fsn_mark->ignored_mask |= mask; 669 if (flags & FAN_MARK_IGNORED_SURV_MODIFY) 670 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; 671 } 672 spin_unlock(&fsn_mark->lock); 673 674 return mask & ~oldmask; 675 } 676 677 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, 678 fsnotify_connp_t *connp, 679 unsigned int type, 680 __kernel_fsid_t *fsid) 681 { 682 struct fsnotify_mark *mark; 683 int ret; 684 685 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) 686 return ERR_PTR(-ENOSPC); 687 688 mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); 689 if (!mark) 690 return ERR_PTR(-ENOMEM); 691 692 fsnotify_init_mark(mark, group); 693 ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid); 694 if (ret) { 695 fsnotify_put_mark(mark); 696 return ERR_PTR(ret); 697 } 698 699 return mark; 700 } 701 702 703 static int fanotify_add_mark(struct fsnotify_group *group, 704 fsnotify_connp_t *connp, unsigned int type, 705 __u32 mask, unsigned int flags, 706 __kernel_fsid_t *fsid) 707 { 708 struct fsnotify_mark *fsn_mark; 709 __u32 added; 710 711 mutex_lock(&group->mark_mutex); 712 fsn_mark = fsnotify_find_mark(connp, group); 713 if (!fsn_mark) { 714 fsn_mark = fanotify_add_new_mark(group, connp, type, fsid); 715 if (IS_ERR(fsn_mark)) { 716 mutex_unlock(&group->mark_mutex); 717 return PTR_ERR(fsn_mark); 718 } 719 } 720 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); 721 if (added & ~fsnotify_conn_mask(fsn_mark->connector)) 722 fsnotify_recalc_mask(fsn_mark->connector); 723 mutex_unlock(&group->mark_mutex); 724 725 fsnotify_put_mark(fsn_mark); 726 return 0; 727 } 728 729 static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, 730 struct vfsmount *mnt, __u32 mask, 731 unsigned int flags, __kernel_fsid_t *fsid) 732 { 733 return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, 734 FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); 735 } 736 737 static int fanotify_add_sb_mark(struct fsnotify_group *group, 738 struct super_block *sb, __u32 mask, 739 unsigned int flags, __kernel_fsid_t *fsid) 740 { 741 return fanotify_add_mark(group, &sb->s_fsnotify_marks, 742 FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); 743 } 744 745 static int fanotify_add_inode_mark(struct fsnotify_group *group, 746 struct inode *inode, __u32 mask, 747 unsigned int flags, __kernel_fsid_t *fsid) 748 { 749 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); 750 751 /* 752 * If some other task has this inode open for write we should not add 753 * an ignored mark, unless that ignored mark is supposed to survive 754 * modification changes anyway. 755 */ 756 if ((flags & FAN_MARK_IGNORED_MASK) && 757 !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && 758 inode_is_open_for_write(inode)) 759 return 0; 760 761 return fanotify_add_mark(group, &inode->i_fsnotify_marks, 762 FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); 763 } 764 765 /* fanotify syscalls */ 766 SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) 767 { 768 struct fsnotify_group *group; 769 int f_flags, fd; 770 struct user_struct *user; 771 struct fanotify_event *oevent; 772 773 pr_debug("%s: flags=%x event_f_flags=%x\n", 774 __func__, flags, event_f_flags); 775 776 if (!capable(CAP_SYS_ADMIN)) 777 return -EPERM; 778 779 #ifdef CONFIG_AUDITSYSCALL 780 if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) 781 #else 782 if (flags & ~FANOTIFY_INIT_FLAGS) 783 #endif 784 return -EINVAL; 785 786 if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) 787 return -EINVAL; 788 789 switch (event_f_flags & O_ACCMODE) { 790 case O_RDONLY: 791 case O_RDWR: 792 case O_WRONLY: 793 break; 794 default: 795 return -EINVAL; 796 } 797 798 if ((flags & FAN_REPORT_FID) && 799 (flags & FANOTIFY_CLASS_BITS) != FAN_CLASS_NOTIF) 800 return -EINVAL; 801 802 user = get_current_user(); 803 if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) { 804 free_uid(user); 805 return -EMFILE; 806 } 807 808 f_flags = O_RDWR | FMODE_NONOTIFY; 809 if (flags & FAN_CLOEXEC) 810 f_flags |= O_CLOEXEC; 811 if (flags & FAN_NONBLOCK) 812 f_flags |= O_NONBLOCK; 813 814 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ 815 group = fsnotify_alloc_group(&fanotify_fsnotify_ops); 816 if (IS_ERR(group)) { 817 free_uid(user); 818 return PTR_ERR(group); 819 } 820 821 group->fanotify_data.user = user; 822 group->fanotify_data.flags = flags; 823 atomic_inc(&user->fanotify_listeners); 824 group->memcg = get_mem_cgroup_from_mm(current->mm); 825 826 oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, 827 FSNOTIFY_EVENT_NONE, NULL); 828 if (unlikely(!oevent)) { 829 fd = -ENOMEM; 830 goto out_destroy_group; 831 } 832 group->overflow_event = &oevent->fse; 833 834 if (force_o_largefile()) 835 event_f_flags |= O_LARGEFILE; 836 group->fanotify_data.f_flags = event_f_flags; 837 init_waitqueue_head(&group->fanotify_data.access_waitq); 838 INIT_LIST_HEAD(&group->fanotify_data.access_list); 839 switch (flags & FANOTIFY_CLASS_BITS) { 840 case FAN_CLASS_NOTIF: 841 group->priority = FS_PRIO_0; 842 break; 843 case FAN_CLASS_CONTENT: 844 group->priority = FS_PRIO_1; 845 break; 846 case FAN_CLASS_PRE_CONTENT: 847 group->priority = FS_PRIO_2; 848 break; 849 default: 850 fd = -EINVAL; 851 goto out_destroy_group; 852 } 853 854 if (flags & FAN_UNLIMITED_QUEUE) { 855 fd = -EPERM; 856 if (!capable(CAP_SYS_ADMIN)) 857 goto out_destroy_group; 858 group->max_events = UINT_MAX; 859 } else { 860 group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; 861 } 862 863 if (flags & FAN_UNLIMITED_MARKS) { 864 fd = -EPERM; 865 if (!capable(CAP_SYS_ADMIN)) 866 goto out_destroy_group; 867 group->fanotify_data.max_marks = UINT_MAX; 868 } else { 869 group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; 870 } 871 872 if (flags & FAN_ENABLE_AUDIT) { 873 fd = -EPERM; 874 if (!capable(CAP_AUDIT_WRITE)) 875 goto out_destroy_group; 876 } 877 878 fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); 879 if (fd < 0) 880 goto out_destroy_group; 881 882 return fd; 883 884 out_destroy_group: 885 fsnotify_destroy_group(group); 886 return fd; 887 } 888 889 /* Check if filesystem can encode a unique fid */ 890 static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) 891 { 892 __kernel_fsid_t root_fsid; 893 int err; 894 895 /* 896 * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). 897 */ 898 err = vfs_get_fsid(path->dentry, fsid); 899 if (err) 900 return err; 901 902 if (!fsid->val[0] && !fsid->val[1]) 903 return -ENODEV; 904 905 /* 906 * Make sure path is not inside a filesystem subvolume (e.g. btrfs) 907 * which uses a different fsid than sb root. 908 */ 909 err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid); 910 if (err) 911 return err; 912 913 if (root_fsid.val[0] != fsid->val[0] || 914 root_fsid.val[1] != fsid->val[1]) 915 return -EXDEV; 916 917 /* 918 * We need to make sure that the file system supports at least 919 * encoding a file handle so user can use name_to_handle_at() to 920 * compare fid returned with event to the file handle of watched 921 * objects. However, name_to_handle_at() requires that the 922 * filesystem also supports decoding file handles. 923 */ 924 if (!path->dentry->d_sb->s_export_op || 925 !path->dentry->d_sb->s_export_op->fh_to_dentry) 926 return -EOPNOTSUPP; 927 928 return 0; 929 } 930 931 static int fanotify_events_supported(struct path *path, __u64 mask) 932 { 933 /* 934 * Some filesystems such as 'proc' acquire unusual locks when opening 935 * files. For them fanotify permission events have high chances of 936 * deadlocking the system - open done when reporting fanotify event 937 * blocks on this "unusual" lock while another process holding the lock 938 * waits for fanotify permission event to be answered. Just disallow 939 * permission events for such filesystems. 940 */ 941 if (mask & FANOTIFY_PERM_EVENTS && 942 path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) 943 return -EINVAL; 944 return 0; 945 } 946 947 static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, 948 int dfd, const char __user *pathname) 949 { 950 struct inode *inode = NULL; 951 struct vfsmount *mnt = NULL; 952 struct fsnotify_group *group; 953 struct fd f; 954 struct path path; 955 __kernel_fsid_t __fsid, *fsid = NULL; 956 u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; 957 unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; 958 unsigned int obj_type; 959 int ret; 960 961 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", 962 __func__, fanotify_fd, flags, dfd, pathname, mask); 963 964 /* we only use the lower 32 bits as of right now. */ 965 if (mask & ((__u64)0xffffffff << 32)) 966 return -EINVAL; 967 968 if (flags & ~FANOTIFY_MARK_FLAGS) 969 return -EINVAL; 970 971 switch (mark_type) { 972 case FAN_MARK_INODE: 973 obj_type = FSNOTIFY_OBJ_TYPE_INODE; 974 break; 975 case FAN_MARK_MOUNT: 976 obj_type = FSNOTIFY_OBJ_TYPE_VFSMOUNT; 977 break; 978 case FAN_MARK_FILESYSTEM: 979 obj_type = FSNOTIFY_OBJ_TYPE_SB; 980 break; 981 default: 982 return -EINVAL; 983 } 984 985 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { 986 case FAN_MARK_ADD: /* fallthrough */ 987 case FAN_MARK_REMOVE: 988 if (!mask) 989 return -EINVAL; 990 break; 991 case FAN_MARK_FLUSH: 992 if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH)) 993 return -EINVAL; 994 break; 995 default: 996 return -EINVAL; 997 } 998 999 if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) 1000 valid_mask |= FANOTIFY_PERM_EVENTS; 1001 1002 if (mask & ~valid_mask) 1003 return -EINVAL; 1004 1005 f = fdget(fanotify_fd); 1006 if (unlikely(!f.file)) 1007 return -EBADF; 1008 1009 /* verify that this is indeed an fanotify instance */ 1010 ret = -EINVAL; 1011 if (unlikely(f.file->f_op != &fanotify_fops)) 1012 goto fput_and_out; 1013 group = f.file->private_data; 1014 1015 /* 1016 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not 1017 * allowed to set permissions events. 1018 */ 1019 ret = -EINVAL; 1020 if (mask & FANOTIFY_PERM_EVENTS && 1021 group->priority == FS_PRIO_0) 1022 goto fput_and_out; 1023 1024 /* 1025 * Events with data type inode do not carry enough information to report 1026 * event->fd, so we do not allow setting a mask for inode events unless 1027 * group supports reporting fid. 1028 * inode events are not supported on a mount mark, because they do not 1029 * carry enough information (i.e. path) to be filtered by mount point. 1030 */ 1031 if (mask & FANOTIFY_INODE_EVENTS && 1032 (!FAN_GROUP_FLAG(group, FAN_REPORT_FID) || 1033 mark_type == FAN_MARK_MOUNT)) 1034 goto fput_and_out; 1035 1036 if (flags & FAN_MARK_FLUSH) { 1037 ret = 0; 1038 if (mark_type == FAN_MARK_MOUNT) 1039 fsnotify_clear_vfsmount_marks_by_group(group); 1040 else if (mark_type == FAN_MARK_FILESYSTEM) 1041 fsnotify_clear_sb_marks_by_group(group); 1042 else 1043 fsnotify_clear_inode_marks_by_group(group); 1044 goto fput_and_out; 1045 } 1046 1047 ret = fanotify_find_path(dfd, pathname, &path, flags, 1048 (mask & ALL_FSNOTIFY_EVENTS), obj_type); 1049 if (ret) 1050 goto fput_and_out; 1051 1052 if (flags & FAN_MARK_ADD) { 1053 ret = fanotify_events_supported(&path, mask); 1054 if (ret) 1055 goto path_put_and_out; 1056 } 1057 1058 if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { 1059 ret = fanotify_test_fid(&path, &__fsid); 1060 if (ret) 1061 goto path_put_and_out; 1062 1063 fsid = &__fsid; 1064 } 1065 1066 /* inode held in place by reference to path; group by fget on fd */ 1067 if (mark_type == FAN_MARK_INODE) 1068 inode = path.dentry->d_inode; 1069 else 1070 mnt = path.mnt; 1071 1072 /* create/update an inode mark */ 1073 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { 1074 case FAN_MARK_ADD: 1075 if (mark_type == FAN_MARK_MOUNT) 1076 ret = fanotify_add_vfsmount_mark(group, mnt, mask, 1077 flags, fsid); 1078 else if (mark_type == FAN_MARK_FILESYSTEM) 1079 ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, 1080 flags, fsid); 1081 else 1082 ret = fanotify_add_inode_mark(group, inode, mask, 1083 flags, fsid); 1084 break; 1085 case FAN_MARK_REMOVE: 1086 if (mark_type == FAN_MARK_MOUNT) 1087 ret = fanotify_remove_vfsmount_mark(group, mnt, mask, 1088 flags); 1089 else if (mark_type == FAN_MARK_FILESYSTEM) 1090 ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, 1091 flags); 1092 else 1093 ret = fanotify_remove_inode_mark(group, inode, mask, 1094 flags); 1095 break; 1096 default: 1097 ret = -EINVAL; 1098 } 1099 1100 path_put_and_out: 1101 path_put(&path); 1102 fput_and_out: 1103 fdput(f); 1104 return ret; 1105 } 1106 1107 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, 1108 __u64, mask, int, dfd, 1109 const char __user *, pathname) 1110 { 1111 return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); 1112 } 1113 1114 #ifdef CONFIG_COMPAT 1115 COMPAT_SYSCALL_DEFINE6(fanotify_mark, 1116 int, fanotify_fd, unsigned int, flags, 1117 __u32, mask0, __u32, mask1, int, dfd, 1118 const char __user *, pathname) 1119 { 1120 return do_fanotify_mark(fanotify_fd, flags, 1121 #ifdef __BIG_ENDIAN 1122 ((__u64)mask0 << 32) | mask1, 1123 #else 1124 ((__u64)mask1 << 32) | mask0, 1125 #endif 1126 dfd, pathname); 1127 } 1128 #endif 1129 1130 /* 1131 * fanotify_user_setup - Our initialization function. Note that we cannot return 1132 * error because we have compiled-in VFS hooks. So an (unlikely) failure here 1133 * must result in panic(). 1134 */ 1135 static int __init fanotify_user_setup(void) 1136 { 1137 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 8); 1138 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); 1139 1140 fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, 1141 SLAB_PANIC|SLAB_ACCOUNT); 1142 fanotify_event_cachep = KMEM_CACHE(fanotify_event, SLAB_PANIC); 1143 if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { 1144 fanotify_perm_event_cachep = 1145 KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); 1146 } 1147 1148 return 0; 1149 } 1150 device_initcall(fanotify_user_setup); 1151