1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2021, Microsoft Corporation. 4 * 5 * Authors: 6 * Beau Belgrave <beaub@linux.microsoft.com> 7 */ 8 9 #include <linux/bitmap.h> 10 #include <linux/cdev.h> 11 #include <linux/hashtable.h> 12 #include <linux/list.h> 13 #include <linux/io.h> 14 #include <linux/uio.h> 15 #include <linux/ioctl.h> 16 #include <linux/jhash.h> 17 #include <linux/refcount.h> 18 #include <linux/trace_events.h> 19 #include <linux/tracefs.h> 20 #include <linux/types.h> 21 #include <linux/uaccess.h> 22 #include <linux/highmem.h> 23 #include <linux/init.h> 24 #include <linux/user_events.h> 25 #include "trace_dynevent.h" 26 #include "trace_output.h" 27 #include "trace.h" 28 29 #define USER_EVENTS_PREFIX_LEN (sizeof(USER_EVENTS_PREFIX)-1) 30 31 #define FIELD_DEPTH_TYPE 0 32 #define FIELD_DEPTH_NAME 1 33 #define FIELD_DEPTH_SIZE 2 34 35 /* Limit how long of an event name plus args within the subsystem. */ 36 #define MAX_EVENT_DESC 512 37 #define EVENT_NAME(user_event) ((user_event)->tracepoint.name) 38 #define MAX_FIELD_ARRAY_SIZE 1024 39 40 /* 41 * Internal bits (kernel side only) to keep track of connected probes: 42 * These are used when status is requested in text form about an event. These 43 * bits are compared against an internal byte on the event to determine which 44 * probes to print out to the user. 45 * 46 * These do not reflect the mapped bytes between the user and kernel space. 47 */ 48 #define EVENT_STATUS_FTRACE BIT(0) 49 #define EVENT_STATUS_PERF BIT(1) 50 #define EVENT_STATUS_OTHER BIT(7) 51 52 /* 53 * Stores the system name, tables, and locks for a group of events. This 54 * allows isolation for events by various means. 55 */ 56 struct user_event_group { 57 char *system_name; 58 struct hlist_node node; 59 struct mutex reg_mutex; 60 DECLARE_HASHTABLE(register_table, 8); 61 }; 62 63 /* Group for init_user_ns mapping, top-most group */ 64 static struct user_event_group *init_group; 65 66 /* Max allowed events for the whole system */ 67 static unsigned int max_user_events = 32768; 68 69 /* Current number of events on the whole system */ 70 static unsigned int current_user_events; 71 72 /* 73 * Stores per-event properties, as users register events 74 * within a file a user_event might be created if it does not 75 * already exist. These are globally used and their lifetime 76 * is tied to the refcnt member. These cannot go away until the 77 * refcnt reaches one. 78 */ 79 struct user_event { 80 struct user_event_group *group; 81 struct tracepoint tracepoint; 82 struct trace_event_call call; 83 struct trace_event_class class; 84 struct dyn_event devent; 85 struct hlist_node node; 86 struct list_head fields; 87 struct list_head validators; 88 refcount_t refcnt; 89 int min_size; 90 char status; 91 }; 92 93 /* 94 * Stores per-mm/event properties that enable an address to be 95 * updated properly for each task. As tasks are forked, we use 96 * these to track enablement sites that are tied to an event. 97 */ 98 struct user_event_enabler { 99 struct list_head link; 100 struct user_event *event; 101 unsigned long addr; 102 103 /* Track enable bit, flags, etc. Aligned for bitops. */ 104 unsigned int values; 105 }; 106 107 /* Bits 0-5 are for the bit to update upon enable/disable (0-63 allowed) */ 108 #define ENABLE_VAL_BIT_MASK 0x3F 109 110 /* Bit 6 is for faulting status of enablement */ 111 #define ENABLE_VAL_FAULTING_BIT 6 112 113 /* Bit 7 is for freeing status of enablement */ 114 #define ENABLE_VAL_FREEING_BIT 7 115 116 /* Only duplicate the bit value */ 117 #define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK 118 119 #define ENABLE_BITOPS(e) ((unsigned long *)&(e)->values) 120 121 /* Used for asynchronous faulting in of pages */ 122 struct user_event_enabler_fault { 123 struct work_struct work; 124 struct user_event_mm *mm; 125 struct user_event_enabler *enabler; 126 int attempt; 127 }; 128 129 static struct kmem_cache *fault_cache; 130 131 /* Global list of memory descriptors using user_events */ 132 static LIST_HEAD(user_event_mms); 133 static DEFINE_SPINLOCK(user_event_mms_lock); 134 135 /* 136 * Stores per-file events references, as users register events 137 * within a file this structure is modified and freed via RCU. 138 * The lifetime of this struct is tied to the lifetime of the file. 139 * These are not shared and only accessible by the file that created it. 140 */ 141 struct user_event_refs { 142 struct rcu_head rcu; 143 int count; 144 struct user_event *events[]; 145 }; 146 147 struct user_event_file_info { 148 struct user_event_group *group; 149 struct user_event_refs *refs; 150 }; 151 152 #define VALIDATOR_ENSURE_NULL (1 << 0) 153 #define VALIDATOR_REL (1 << 1) 154 155 struct user_event_validator { 156 struct list_head link; 157 int offset; 158 int flags; 159 }; 160 161 typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i, 162 void *tpdata, bool *faulted); 163 164 static int user_event_parse(struct user_event_group *group, char *name, 165 char *args, char *flags, 166 struct user_event **newuser); 167 168 static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm); 169 static struct user_event_mm *user_event_mm_get_all(struct user_event *user); 170 static void user_event_mm_put(struct user_event_mm *mm); 171 172 static u32 user_event_key(char *name) 173 { 174 return jhash(name, strlen(name), 0); 175 } 176 177 static void user_event_group_destroy(struct user_event_group *group) 178 { 179 kfree(group->system_name); 180 kfree(group); 181 } 182 183 static char *user_event_group_system_name(struct user_namespace *user_ns) 184 { 185 char *system_name; 186 int len = sizeof(USER_EVENTS_SYSTEM) + 1; 187 188 if (user_ns != &init_user_ns) { 189 /* 190 * Unexpected at this point: 191 * We only currently support init_user_ns. 192 * When we enable more, this will trigger a failure so log. 193 */ 194 pr_warn("user_events: Namespace other than init_user_ns!\n"); 195 return NULL; 196 } 197 198 system_name = kmalloc(len, GFP_KERNEL); 199 200 if (!system_name) 201 return NULL; 202 203 snprintf(system_name, len, "%s", USER_EVENTS_SYSTEM); 204 205 return system_name; 206 } 207 208 static inline struct user_event_group 209 *user_event_group_from_user_ns(struct user_namespace *user_ns) 210 { 211 if (user_ns == &init_user_ns) 212 return init_group; 213 214 return NULL; 215 } 216 217 static struct user_event_group *current_user_event_group(void) 218 { 219 struct user_namespace *user_ns = current_user_ns(); 220 struct user_event_group *group = NULL; 221 222 while (user_ns) { 223 group = user_event_group_from_user_ns(user_ns); 224 225 if (group) 226 break; 227 228 user_ns = user_ns->parent; 229 } 230 231 return group; 232 } 233 234 static struct user_event_group 235 *user_event_group_create(struct user_namespace *user_ns) 236 { 237 struct user_event_group *group; 238 239 group = kzalloc(sizeof(*group), GFP_KERNEL); 240 241 if (!group) 242 return NULL; 243 244 group->system_name = user_event_group_system_name(user_ns); 245 246 if (!group->system_name) 247 goto error; 248 249 mutex_init(&group->reg_mutex); 250 hash_init(group->register_table); 251 252 return group; 253 error: 254 if (group) 255 user_event_group_destroy(group); 256 257 return NULL; 258 }; 259 260 static void user_event_enabler_destroy(struct user_event_enabler *enabler) 261 { 262 list_del_rcu(&enabler->link); 263 264 /* No longer tracking the event via the enabler */ 265 refcount_dec(&enabler->event->refcnt); 266 267 kfree(enabler); 268 } 269 270 static int user_event_mm_fault_in(struct user_event_mm *mm, unsigned long uaddr, 271 int attempt) 272 { 273 bool unlocked; 274 int ret; 275 276 /* 277 * Normally this is low, ensure that it cannot be taken advantage of by 278 * bad user processes to cause excessive looping. 279 */ 280 if (attempt > 10) 281 return -EFAULT; 282 283 mmap_read_lock(mm->mm); 284 285 /* Ensure MM has tasks, cannot use after exit_mm() */ 286 if (refcount_read(&mm->tasks) == 0) { 287 ret = -ENOENT; 288 goto out; 289 } 290 291 ret = fixup_user_fault(mm->mm, uaddr, FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE, 292 &unlocked); 293 out: 294 mmap_read_unlock(mm->mm); 295 296 return ret; 297 } 298 299 static int user_event_enabler_write(struct user_event_mm *mm, 300 struct user_event_enabler *enabler, 301 bool fixup_fault, int *attempt); 302 303 static void user_event_enabler_fault_fixup(struct work_struct *work) 304 { 305 struct user_event_enabler_fault *fault = container_of( 306 work, struct user_event_enabler_fault, work); 307 struct user_event_enabler *enabler = fault->enabler; 308 struct user_event_mm *mm = fault->mm; 309 unsigned long uaddr = enabler->addr; 310 int attempt = fault->attempt; 311 int ret; 312 313 ret = user_event_mm_fault_in(mm, uaddr, attempt); 314 315 if (ret && ret != -ENOENT) { 316 struct user_event *user = enabler->event; 317 318 pr_warn("user_events: Fault for mm: 0x%pK @ 0x%llx event: %s\n", 319 mm->mm, (unsigned long long)uaddr, EVENT_NAME(user)); 320 } 321 322 /* Prevent state changes from racing */ 323 mutex_lock(&event_mutex); 324 325 /* User asked for enabler to be removed during fault */ 326 if (test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))) { 327 user_event_enabler_destroy(enabler); 328 goto out; 329 } 330 331 /* 332 * If we managed to get the page, re-issue the write. We do not 333 * want to get into a possible infinite loop, which is why we only 334 * attempt again directly if the page came in. If we couldn't get 335 * the page here, then we will try again the next time the event is 336 * enabled/disabled. 337 */ 338 clear_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)); 339 340 if (!ret) { 341 mmap_read_lock(mm->mm); 342 user_event_enabler_write(mm, enabler, true, &attempt); 343 mmap_read_unlock(mm->mm); 344 } 345 out: 346 mutex_unlock(&event_mutex); 347 348 /* In all cases we no longer need the mm or fault */ 349 user_event_mm_put(mm); 350 kmem_cache_free(fault_cache, fault); 351 } 352 353 static bool user_event_enabler_queue_fault(struct user_event_mm *mm, 354 struct user_event_enabler *enabler, 355 int attempt) 356 { 357 struct user_event_enabler_fault *fault; 358 359 fault = kmem_cache_zalloc(fault_cache, GFP_NOWAIT | __GFP_NOWARN); 360 361 if (!fault) 362 return false; 363 364 INIT_WORK(&fault->work, user_event_enabler_fault_fixup); 365 fault->mm = user_event_mm_get(mm); 366 fault->enabler = enabler; 367 fault->attempt = attempt; 368 369 /* Don't try to queue in again while we have a pending fault */ 370 set_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)); 371 372 if (!schedule_work(&fault->work)) { 373 /* Allow another attempt later */ 374 clear_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)); 375 376 user_event_mm_put(mm); 377 kmem_cache_free(fault_cache, fault); 378 379 return false; 380 } 381 382 return true; 383 } 384 385 static int user_event_enabler_write(struct user_event_mm *mm, 386 struct user_event_enabler *enabler, 387 bool fixup_fault, int *attempt) 388 { 389 unsigned long uaddr = enabler->addr; 390 unsigned long *ptr; 391 struct page *page; 392 void *kaddr; 393 int ret; 394 395 lockdep_assert_held(&event_mutex); 396 mmap_assert_locked(mm->mm); 397 398 *attempt += 1; 399 400 /* Ensure MM has tasks, cannot use after exit_mm() */ 401 if (refcount_read(&mm->tasks) == 0) 402 return -ENOENT; 403 404 if (unlikely(test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)) || 405 test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)))) 406 return -EBUSY; 407 408 ret = pin_user_pages_remote(mm->mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT, 409 &page, NULL, NULL); 410 411 if (unlikely(ret <= 0)) { 412 if (!fixup_fault) 413 return -EFAULT; 414 415 if (!user_event_enabler_queue_fault(mm, enabler, *attempt)) 416 pr_warn("user_events: Unable to queue fault handler\n"); 417 418 return -EFAULT; 419 } 420 421 kaddr = kmap_local_page(page); 422 ptr = kaddr + (uaddr & ~PAGE_MASK); 423 424 /* Update bit atomically, user tracers must be atomic as well */ 425 if (enabler->event && enabler->event->status) 426 set_bit(enabler->values & ENABLE_VAL_BIT_MASK, ptr); 427 else 428 clear_bit(enabler->values & ENABLE_VAL_BIT_MASK, ptr); 429 430 kunmap_local(kaddr); 431 unpin_user_pages_dirty_lock(&page, 1, true); 432 433 return 0; 434 } 435 436 static bool user_event_enabler_exists(struct user_event_mm *mm, 437 unsigned long uaddr, unsigned char bit) 438 { 439 struct user_event_enabler *enabler; 440 struct user_event_enabler *next; 441 442 list_for_each_entry_safe(enabler, next, &mm->enablers, link) { 443 if (enabler->addr == uaddr && 444 (enabler->values & ENABLE_VAL_BIT_MASK) == bit) 445 return true; 446 } 447 448 return false; 449 } 450 451 static void user_event_enabler_update(struct user_event *user) 452 { 453 struct user_event_enabler *enabler; 454 struct user_event_mm *mm = user_event_mm_get_all(user); 455 struct user_event_mm *next; 456 int attempt; 457 458 while (mm) { 459 next = mm->next; 460 mmap_read_lock(mm->mm); 461 rcu_read_lock(); 462 463 list_for_each_entry_rcu(enabler, &mm->enablers, link) { 464 if (enabler->event == user) { 465 attempt = 0; 466 user_event_enabler_write(mm, enabler, true, &attempt); 467 } 468 } 469 470 rcu_read_unlock(); 471 mmap_read_unlock(mm->mm); 472 user_event_mm_put(mm); 473 mm = next; 474 } 475 } 476 477 static bool user_event_enabler_dup(struct user_event_enabler *orig, 478 struct user_event_mm *mm) 479 { 480 struct user_event_enabler *enabler; 481 482 /* Skip pending frees */ 483 if (unlikely(test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(orig)))) 484 return true; 485 486 enabler = kzalloc(sizeof(*enabler), GFP_NOWAIT | __GFP_ACCOUNT); 487 488 if (!enabler) 489 return false; 490 491 enabler->event = orig->event; 492 enabler->addr = orig->addr; 493 494 /* Only dup part of value (ignore future flags, etc) */ 495 enabler->values = orig->values & ENABLE_VAL_DUP_MASK; 496 497 refcount_inc(&enabler->event->refcnt); 498 list_add_rcu(&enabler->link, &mm->enablers); 499 500 return true; 501 } 502 503 static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm) 504 { 505 refcount_inc(&mm->refcnt); 506 507 return mm; 508 } 509 510 static struct user_event_mm *user_event_mm_get_all(struct user_event *user) 511 { 512 struct user_event_mm *found = NULL; 513 struct user_event_enabler *enabler; 514 struct user_event_mm *mm; 515 516 /* 517 * We do not want to block fork/exec while enablements are being 518 * updated, so we use RCU to walk the current tasks that have used 519 * user_events ABI for 1 or more events. Each enabler found in each 520 * task that matches the event being updated has a write to reflect 521 * the kernel state back into the process. Waits/faults must not occur 522 * during this. So we scan the list under RCU for all the mm that have 523 * the event within it. This is needed because mm_read_lock() can wait. 524 * Each user mm returned has a ref inc to handle remove RCU races. 525 */ 526 rcu_read_lock(); 527 528 list_for_each_entry_rcu(mm, &user_event_mms, link) 529 list_for_each_entry_rcu(enabler, &mm->enablers, link) 530 if (enabler->event == user) { 531 mm->next = found; 532 found = user_event_mm_get(mm); 533 break; 534 } 535 536 rcu_read_unlock(); 537 538 return found; 539 } 540 541 static struct user_event_mm *user_event_mm_create(struct task_struct *t) 542 { 543 struct user_event_mm *user_mm; 544 unsigned long flags; 545 546 user_mm = kzalloc(sizeof(*user_mm), GFP_KERNEL_ACCOUNT); 547 548 if (!user_mm) 549 return NULL; 550 551 user_mm->mm = t->mm; 552 INIT_LIST_HEAD(&user_mm->enablers); 553 refcount_set(&user_mm->refcnt, 1); 554 refcount_set(&user_mm->tasks, 1); 555 556 spin_lock_irqsave(&user_event_mms_lock, flags); 557 list_add_rcu(&user_mm->link, &user_event_mms); 558 spin_unlock_irqrestore(&user_event_mms_lock, flags); 559 560 t->user_event_mm = user_mm; 561 562 /* 563 * The lifetime of the memory descriptor can slightly outlast 564 * the task lifetime if a ref to the user_event_mm is taken 565 * between list_del_rcu() and call_rcu(). Therefore we need 566 * to take a reference to it to ensure it can live this long 567 * under this corner case. This can also occur in clones that 568 * outlast the parent. 569 */ 570 mmgrab(user_mm->mm); 571 572 return user_mm; 573 } 574 575 static struct user_event_mm *current_user_event_mm(void) 576 { 577 struct user_event_mm *user_mm = current->user_event_mm; 578 579 if (user_mm) 580 goto inc; 581 582 user_mm = user_event_mm_create(current); 583 584 if (!user_mm) 585 goto error; 586 inc: 587 refcount_inc(&user_mm->refcnt); 588 error: 589 return user_mm; 590 } 591 592 static void user_event_mm_destroy(struct user_event_mm *mm) 593 { 594 struct user_event_enabler *enabler, *next; 595 596 list_for_each_entry_safe(enabler, next, &mm->enablers, link) 597 user_event_enabler_destroy(enabler); 598 599 mmdrop(mm->mm); 600 kfree(mm); 601 } 602 603 static void user_event_mm_put(struct user_event_mm *mm) 604 { 605 if (mm && refcount_dec_and_test(&mm->refcnt)) 606 user_event_mm_destroy(mm); 607 } 608 609 static void delayed_user_event_mm_put(struct work_struct *work) 610 { 611 struct user_event_mm *mm; 612 613 mm = container_of(to_rcu_work(work), struct user_event_mm, put_rwork); 614 user_event_mm_put(mm); 615 } 616 617 void user_event_mm_remove(struct task_struct *t) 618 { 619 struct user_event_mm *mm; 620 unsigned long flags; 621 622 might_sleep(); 623 624 mm = t->user_event_mm; 625 t->user_event_mm = NULL; 626 627 /* Clone will increment the tasks, only remove if last clone */ 628 if (!refcount_dec_and_test(&mm->tasks)) 629 return; 630 631 /* Remove the mm from the list, so it can no longer be enabled */ 632 spin_lock_irqsave(&user_event_mms_lock, flags); 633 list_del_rcu(&mm->link); 634 spin_unlock_irqrestore(&user_event_mms_lock, flags); 635 636 /* 637 * We need to wait for currently occurring writes to stop within 638 * the mm. This is required since exit_mm() snaps the current rss 639 * stats and clears them. On the final mmdrop(), check_mm() will 640 * report a bug if these increment. 641 * 642 * All writes/pins are done under mmap_read lock, take the write 643 * lock to ensure in-progress faults have completed. Faults that 644 * are pending but yet to run will check the task count and skip 645 * the fault since the mm is going away. 646 */ 647 mmap_write_lock(mm->mm); 648 mmap_write_unlock(mm->mm); 649 650 /* 651 * Put for mm must be done after RCU delay to handle new refs in 652 * between the list_del_rcu() and now. This ensures any get refs 653 * during rcu_read_lock() are accounted for during list removal. 654 * 655 * CPU A | CPU B 656 * --------------------------------------------------------------- 657 * user_event_mm_remove() | rcu_read_lock(); 658 * list_del_rcu() | list_for_each_entry_rcu(); 659 * call_rcu() | refcount_inc(); 660 * . | rcu_read_unlock(); 661 * schedule_work() | . 662 * user_event_mm_put() | . 663 * 664 * mmdrop() cannot be called in the softirq context of call_rcu() 665 * so we use a work queue after call_rcu() to run within. 666 */ 667 INIT_RCU_WORK(&mm->put_rwork, delayed_user_event_mm_put); 668 queue_rcu_work(system_wq, &mm->put_rwork); 669 } 670 671 void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm) 672 { 673 struct user_event_mm *mm = user_event_mm_create(t); 674 struct user_event_enabler *enabler; 675 676 if (!mm) 677 return; 678 679 rcu_read_lock(); 680 681 list_for_each_entry_rcu(enabler, &old_mm->enablers, link) 682 if (!user_event_enabler_dup(enabler, mm)) 683 goto error; 684 685 rcu_read_unlock(); 686 687 return; 688 error: 689 rcu_read_unlock(); 690 user_event_mm_remove(t); 691 } 692 693 static bool current_user_event_enabler_exists(unsigned long uaddr, 694 unsigned char bit) 695 { 696 struct user_event_mm *user_mm = current_user_event_mm(); 697 bool exists; 698 699 if (!user_mm) 700 return false; 701 702 exists = user_event_enabler_exists(user_mm, uaddr, bit); 703 704 user_event_mm_put(user_mm); 705 706 return exists; 707 } 708 709 static struct user_event_enabler 710 *user_event_enabler_create(struct user_reg *reg, struct user_event *user, 711 int *write_result) 712 { 713 struct user_event_enabler *enabler; 714 struct user_event_mm *user_mm; 715 unsigned long uaddr = (unsigned long)reg->enable_addr; 716 int attempt = 0; 717 718 user_mm = current_user_event_mm(); 719 720 if (!user_mm) 721 return NULL; 722 723 enabler = kzalloc(sizeof(*enabler), GFP_KERNEL_ACCOUNT); 724 725 if (!enabler) 726 goto out; 727 728 enabler->event = user; 729 enabler->addr = uaddr; 730 enabler->values = reg->enable_bit; 731 retry: 732 /* Prevents state changes from racing with new enablers */ 733 mutex_lock(&event_mutex); 734 735 /* Attempt to reflect the current state within the process */ 736 mmap_read_lock(user_mm->mm); 737 *write_result = user_event_enabler_write(user_mm, enabler, false, 738 &attempt); 739 mmap_read_unlock(user_mm->mm); 740 741 /* 742 * If the write works, then we will track the enabler. A ref to the 743 * underlying user_event is held by the enabler to prevent it going 744 * away while the enabler is still in use by a process. The ref is 745 * removed when the enabler is destroyed. This means a event cannot 746 * be forcefully deleted from the system until all tasks using it 747 * exit or run exec(), which includes forks and clones. 748 */ 749 if (!*write_result) { 750 refcount_inc(&enabler->event->refcnt); 751 list_add_rcu(&enabler->link, &user_mm->enablers); 752 } 753 754 mutex_unlock(&event_mutex); 755 756 if (*write_result) { 757 /* Attempt to fault-in and retry if it worked */ 758 if (!user_event_mm_fault_in(user_mm, uaddr, attempt)) 759 goto retry; 760 761 kfree(enabler); 762 enabler = NULL; 763 } 764 out: 765 user_event_mm_put(user_mm); 766 767 return enabler; 768 } 769 770 static __always_inline __must_check 771 bool user_event_last_ref(struct user_event *user) 772 { 773 return refcount_read(&user->refcnt) == 1; 774 } 775 776 static __always_inline __must_check 777 size_t copy_nofault(void *addr, size_t bytes, struct iov_iter *i) 778 { 779 size_t ret; 780 781 pagefault_disable(); 782 783 ret = copy_from_iter_nocache(addr, bytes, i); 784 785 pagefault_enable(); 786 787 return ret; 788 } 789 790 static struct list_head *user_event_get_fields(struct trace_event_call *call) 791 { 792 struct user_event *user = (struct user_event *)call->data; 793 794 return &user->fields; 795 } 796 797 /* 798 * Parses a register command for user_events 799 * Format: event_name[:FLAG1[,FLAG2...]] [field1[;field2...]] 800 * 801 * Example event named 'test' with a 20 char 'msg' field with an unsigned int 802 * 'id' field after: 803 * test char[20] msg;unsigned int id 804 * 805 * NOTE: Offsets are from the user data perspective, they are not from the 806 * trace_entry/buffer perspective. We automatically add the common properties 807 * sizes to the offset for the user. 808 * 809 * Upon success user_event has its ref count increased by 1. 810 */ 811 static int user_event_parse_cmd(struct user_event_group *group, 812 char *raw_command, struct user_event **newuser) 813 { 814 char *name = raw_command; 815 char *args = strpbrk(name, " "); 816 char *flags; 817 818 if (args) 819 *args++ = '\0'; 820 821 flags = strpbrk(name, ":"); 822 823 if (flags) 824 *flags++ = '\0'; 825 826 return user_event_parse(group, name, args, flags, newuser); 827 } 828 829 static int user_field_array_size(const char *type) 830 { 831 const char *start = strchr(type, '['); 832 char val[8]; 833 char *bracket; 834 int size = 0; 835 836 if (start == NULL) 837 return -EINVAL; 838 839 if (strscpy(val, start + 1, sizeof(val)) <= 0) 840 return -EINVAL; 841 842 bracket = strchr(val, ']'); 843 844 if (!bracket) 845 return -EINVAL; 846 847 *bracket = '\0'; 848 849 if (kstrtouint(val, 0, &size)) 850 return -EINVAL; 851 852 if (size > MAX_FIELD_ARRAY_SIZE) 853 return -EINVAL; 854 855 return size; 856 } 857 858 static int user_field_size(const char *type) 859 { 860 /* long is not allowed from a user, since it's ambigious in size */ 861 if (strcmp(type, "s64") == 0) 862 return sizeof(s64); 863 if (strcmp(type, "u64") == 0) 864 return sizeof(u64); 865 if (strcmp(type, "s32") == 0) 866 return sizeof(s32); 867 if (strcmp(type, "u32") == 0) 868 return sizeof(u32); 869 if (strcmp(type, "int") == 0) 870 return sizeof(int); 871 if (strcmp(type, "unsigned int") == 0) 872 return sizeof(unsigned int); 873 if (strcmp(type, "s16") == 0) 874 return sizeof(s16); 875 if (strcmp(type, "u16") == 0) 876 return sizeof(u16); 877 if (strcmp(type, "short") == 0) 878 return sizeof(short); 879 if (strcmp(type, "unsigned short") == 0) 880 return sizeof(unsigned short); 881 if (strcmp(type, "s8") == 0) 882 return sizeof(s8); 883 if (strcmp(type, "u8") == 0) 884 return sizeof(u8); 885 if (strcmp(type, "char") == 0) 886 return sizeof(char); 887 if (strcmp(type, "unsigned char") == 0) 888 return sizeof(unsigned char); 889 if (str_has_prefix(type, "char[")) 890 return user_field_array_size(type); 891 if (str_has_prefix(type, "unsigned char[")) 892 return user_field_array_size(type); 893 if (str_has_prefix(type, "__data_loc ")) 894 return sizeof(u32); 895 if (str_has_prefix(type, "__rel_loc ")) 896 return sizeof(u32); 897 898 /* Uknown basic type, error */ 899 return -EINVAL; 900 } 901 902 static void user_event_destroy_validators(struct user_event *user) 903 { 904 struct user_event_validator *validator, *next; 905 struct list_head *head = &user->validators; 906 907 list_for_each_entry_safe(validator, next, head, link) { 908 list_del(&validator->link); 909 kfree(validator); 910 } 911 } 912 913 static void user_event_destroy_fields(struct user_event *user) 914 { 915 struct ftrace_event_field *field, *next; 916 struct list_head *head = &user->fields; 917 918 list_for_each_entry_safe(field, next, head, link) { 919 list_del(&field->link); 920 kfree(field); 921 } 922 } 923 924 static int user_event_add_field(struct user_event *user, const char *type, 925 const char *name, int offset, int size, 926 int is_signed, int filter_type) 927 { 928 struct user_event_validator *validator; 929 struct ftrace_event_field *field; 930 int validator_flags = 0; 931 932 field = kmalloc(sizeof(*field), GFP_KERNEL_ACCOUNT); 933 934 if (!field) 935 return -ENOMEM; 936 937 if (str_has_prefix(type, "__data_loc ")) 938 goto add_validator; 939 940 if (str_has_prefix(type, "__rel_loc ")) { 941 validator_flags |= VALIDATOR_REL; 942 goto add_validator; 943 } 944 945 goto add_field; 946 947 add_validator: 948 if (strstr(type, "char") != NULL) 949 validator_flags |= VALIDATOR_ENSURE_NULL; 950 951 validator = kmalloc(sizeof(*validator), GFP_KERNEL_ACCOUNT); 952 953 if (!validator) { 954 kfree(field); 955 return -ENOMEM; 956 } 957 958 validator->flags = validator_flags; 959 validator->offset = offset; 960 961 /* Want sequential access when validating */ 962 list_add_tail(&validator->link, &user->validators); 963 964 add_field: 965 field->type = type; 966 field->name = name; 967 field->offset = offset; 968 field->size = size; 969 field->is_signed = is_signed; 970 field->filter_type = filter_type; 971 972 if (filter_type == FILTER_OTHER) 973 field->filter_type = filter_assign_type(type); 974 975 list_add(&field->link, &user->fields); 976 977 /* 978 * Min size from user writes that are required, this does not include 979 * the size of trace_entry (common fields). 980 */ 981 user->min_size = (offset + size) - sizeof(struct trace_entry); 982 983 return 0; 984 } 985 986 /* 987 * Parses the values of a field within the description 988 * Format: type name [size] 989 */ 990 static int user_event_parse_field(char *field, struct user_event *user, 991 u32 *offset) 992 { 993 char *part, *type, *name; 994 u32 depth = 0, saved_offset = *offset; 995 int len, size = -EINVAL; 996 bool is_struct = false; 997 998 field = skip_spaces(field); 999 1000 if (*field == '\0') 1001 return 0; 1002 1003 /* Handle types that have a space within */ 1004 len = str_has_prefix(field, "unsigned "); 1005 if (len) 1006 goto skip_next; 1007 1008 len = str_has_prefix(field, "struct "); 1009 if (len) { 1010 is_struct = true; 1011 goto skip_next; 1012 } 1013 1014 len = str_has_prefix(field, "__data_loc unsigned "); 1015 if (len) 1016 goto skip_next; 1017 1018 len = str_has_prefix(field, "__data_loc "); 1019 if (len) 1020 goto skip_next; 1021 1022 len = str_has_prefix(field, "__rel_loc unsigned "); 1023 if (len) 1024 goto skip_next; 1025 1026 len = str_has_prefix(field, "__rel_loc "); 1027 if (len) 1028 goto skip_next; 1029 1030 goto parse; 1031 skip_next: 1032 type = field; 1033 field = strpbrk(field + len, " "); 1034 1035 if (field == NULL) 1036 return -EINVAL; 1037 1038 *field++ = '\0'; 1039 depth++; 1040 parse: 1041 name = NULL; 1042 1043 while ((part = strsep(&field, " ")) != NULL) { 1044 switch (depth++) { 1045 case FIELD_DEPTH_TYPE: 1046 type = part; 1047 break; 1048 case FIELD_DEPTH_NAME: 1049 name = part; 1050 break; 1051 case FIELD_DEPTH_SIZE: 1052 if (!is_struct) 1053 return -EINVAL; 1054 1055 if (kstrtou32(part, 10, &size)) 1056 return -EINVAL; 1057 break; 1058 default: 1059 return -EINVAL; 1060 } 1061 } 1062 1063 if (depth < FIELD_DEPTH_SIZE || !name) 1064 return -EINVAL; 1065 1066 if (depth == FIELD_DEPTH_SIZE) 1067 size = user_field_size(type); 1068 1069 if (size == 0) 1070 return -EINVAL; 1071 1072 if (size < 0) 1073 return size; 1074 1075 *offset = saved_offset + size; 1076 1077 return user_event_add_field(user, type, name, saved_offset, size, 1078 type[0] != 'u', FILTER_OTHER); 1079 } 1080 1081 static int user_event_parse_fields(struct user_event *user, char *args) 1082 { 1083 char *field; 1084 u32 offset = sizeof(struct trace_entry); 1085 int ret = -EINVAL; 1086 1087 if (args == NULL) 1088 return 0; 1089 1090 while ((field = strsep(&args, ";")) != NULL) { 1091 ret = user_event_parse_field(field, user, &offset); 1092 1093 if (ret) 1094 break; 1095 } 1096 1097 return ret; 1098 } 1099 1100 static struct trace_event_fields user_event_fields_array[1]; 1101 1102 static const char *user_field_format(const char *type) 1103 { 1104 if (strcmp(type, "s64") == 0) 1105 return "%lld"; 1106 if (strcmp(type, "u64") == 0) 1107 return "%llu"; 1108 if (strcmp(type, "s32") == 0) 1109 return "%d"; 1110 if (strcmp(type, "u32") == 0) 1111 return "%u"; 1112 if (strcmp(type, "int") == 0) 1113 return "%d"; 1114 if (strcmp(type, "unsigned int") == 0) 1115 return "%u"; 1116 if (strcmp(type, "s16") == 0) 1117 return "%d"; 1118 if (strcmp(type, "u16") == 0) 1119 return "%u"; 1120 if (strcmp(type, "short") == 0) 1121 return "%d"; 1122 if (strcmp(type, "unsigned short") == 0) 1123 return "%u"; 1124 if (strcmp(type, "s8") == 0) 1125 return "%d"; 1126 if (strcmp(type, "u8") == 0) 1127 return "%u"; 1128 if (strcmp(type, "char") == 0) 1129 return "%d"; 1130 if (strcmp(type, "unsigned char") == 0) 1131 return "%u"; 1132 if (strstr(type, "char[") != NULL) 1133 return "%s"; 1134 1135 /* Unknown, likely struct, allowed treat as 64-bit */ 1136 return "%llu"; 1137 } 1138 1139 static bool user_field_is_dyn_string(const char *type, const char **str_func) 1140 { 1141 if (str_has_prefix(type, "__data_loc ")) { 1142 *str_func = "__get_str"; 1143 goto check; 1144 } 1145 1146 if (str_has_prefix(type, "__rel_loc ")) { 1147 *str_func = "__get_rel_str"; 1148 goto check; 1149 } 1150 1151 return false; 1152 check: 1153 return strstr(type, "char") != NULL; 1154 } 1155 1156 #define LEN_OR_ZERO (len ? len - pos : 0) 1157 static int user_dyn_field_set_string(int argc, const char **argv, int *iout, 1158 char *buf, int len, bool *colon) 1159 { 1160 int pos = 0, i = *iout; 1161 1162 *colon = false; 1163 1164 for (; i < argc; ++i) { 1165 if (i != *iout) 1166 pos += snprintf(buf + pos, LEN_OR_ZERO, " "); 1167 1168 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", argv[i]); 1169 1170 if (strchr(argv[i], ';')) { 1171 ++i; 1172 *colon = true; 1173 break; 1174 } 1175 } 1176 1177 /* Actual set, advance i */ 1178 if (len != 0) 1179 *iout = i; 1180 1181 return pos + 1; 1182 } 1183 1184 static int user_field_set_string(struct ftrace_event_field *field, 1185 char *buf, int len, bool colon) 1186 { 1187 int pos = 0; 1188 1189 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", field->type); 1190 pos += snprintf(buf + pos, LEN_OR_ZERO, " "); 1191 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", field->name); 1192 1193 if (colon) 1194 pos += snprintf(buf + pos, LEN_OR_ZERO, ";"); 1195 1196 return pos + 1; 1197 } 1198 1199 static int user_event_set_print_fmt(struct user_event *user, char *buf, int len) 1200 { 1201 struct ftrace_event_field *field, *next; 1202 struct list_head *head = &user->fields; 1203 int pos = 0, depth = 0; 1204 const char *str_func; 1205 1206 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 1207 1208 list_for_each_entry_safe_reverse(field, next, head, link) { 1209 if (depth != 0) 1210 pos += snprintf(buf + pos, LEN_OR_ZERO, " "); 1211 1212 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s", 1213 field->name, user_field_format(field->type)); 1214 1215 depth++; 1216 } 1217 1218 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 1219 1220 list_for_each_entry_safe_reverse(field, next, head, link) { 1221 if (user_field_is_dyn_string(field->type, &str_func)) 1222 pos += snprintf(buf + pos, LEN_OR_ZERO, 1223 ", %s(%s)", str_func, field->name); 1224 else 1225 pos += snprintf(buf + pos, LEN_OR_ZERO, 1226 ", REC->%s", field->name); 1227 } 1228 1229 return pos + 1; 1230 } 1231 #undef LEN_OR_ZERO 1232 1233 static int user_event_create_print_fmt(struct user_event *user) 1234 { 1235 char *print_fmt; 1236 int len; 1237 1238 len = user_event_set_print_fmt(user, NULL, 0); 1239 1240 print_fmt = kmalloc(len, GFP_KERNEL_ACCOUNT); 1241 1242 if (!print_fmt) 1243 return -ENOMEM; 1244 1245 user_event_set_print_fmt(user, print_fmt, len); 1246 1247 user->call.print_fmt = print_fmt; 1248 1249 return 0; 1250 } 1251 1252 static enum print_line_t user_event_print_trace(struct trace_iterator *iter, 1253 int flags, 1254 struct trace_event *event) 1255 { 1256 return print_event_fields(iter, event); 1257 } 1258 1259 static struct trace_event_functions user_event_funcs = { 1260 .trace = user_event_print_trace, 1261 }; 1262 1263 static int user_event_set_call_visible(struct user_event *user, bool visible) 1264 { 1265 int ret; 1266 const struct cred *old_cred; 1267 struct cred *cred; 1268 1269 cred = prepare_creds(); 1270 1271 if (!cred) 1272 return -ENOMEM; 1273 1274 /* 1275 * While by default tracefs is locked down, systems can be configured 1276 * to allow user_event files to be less locked down. The extreme case 1277 * being "other" has read/write access to user_events_data/status. 1278 * 1279 * When not locked down, processes may not have permissions to 1280 * add/remove calls themselves to tracefs. We need to temporarily 1281 * switch to root file permission to allow for this scenario. 1282 */ 1283 cred->fsuid = GLOBAL_ROOT_UID; 1284 1285 old_cred = override_creds(cred); 1286 1287 if (visible) 1288 ret = trace_add_event_call(&user->call); 1289 else 1290 ret = trace_remove_event_call(&user->call); 1291 1292 revert_creds(old_cred); 1293 put_cred(cred); 1294 1295 return ret; 1296 } 1297 1298 static int destroy_user_event(struct user_event *user) 1299 { 1300 int ret = 0; 1301 1302 lockdep_assert_held(&event_mutex); 1303 1304 /* Must destroy fields before call removal */ 1305 user_event_destroy_fields(user); 1306 1307 ret = user_event_set_call_visible(user, false); 1308 1309 if (ret) 1310 return ret; 1311 1312 dyn_event_remove(&user->devent); 1313 hash_del(&user->node); 1314 1315 user_event_destroy_validators(user); 1316 kfree(user->call.print_fmt); 1317 kfree(EVENT_NAME(user)); 1318 kfree(user); 1319 1320 if (current_user_events > 0) 1321 current_user_events--; 1322 else 1323 pr_alert("BUG: Bad current_user_events\n"); 1324 1325 return ret; 1326 } 1327 1328 static struct user_event *find_user_event(struct user_event_group *group, 1329 char *name, u32 *outkey) 1330 { 1331 struct user_event *user; 1332 u32 key = user_event_key(name); 1333 1334 *outkey = key; 1335 1336 hash_for_each_possible(group->register_table, user, node, key) 1337 if (!strcmp(EVENT_NAME(user), name)) { 1338 refcount_inc(&user->refcnt); 1339 return user; 1340 } 1341 1342 return NULL; 1343 } 1344 1345 static int user_event_validate(struct user_event *user, void *data, int len) 1346 { 1347 struct list_head *head = &user->validators; 1348 struct user_event_validator *validator; 1349 void *pos, *end = data + len; 1350 u32 loc, offset, size; 1351 1352 list_for_each_entry(validator, head, link) { 1353 pos = data + validator->offset; 1354 1355 /* Already done min_size check, no bounds check here */ 1356 loc = *(u32 *)pos; 1357 offset = loc & 0xffff; 1358 size = loc >> 16; 1359 1360 if (likely(validator->flags & VALIDATOR_REL)) 1361 pos += offset + sizeof(loc); 1362 else 1363 pos = data + offset; 1364 1365 pos += size; 1366 1367 if (unlikely(pos > end)) 1368 return -EFAULT; 1369 1370 if (likely(validator->flags & VALIDATOR_ENSURE_NULL)) 1371 if (unlikely(*(char *)(pos - 1) != '\0')) 1372 return -EFAULT; 1373 } 1374 1375 return 0; 1376 } 1377 1378 /* 1379 * Writes the user supplied payload out to a trace file. 1380 */ 1381 static void user_event_ftrace(struct user_event *user, struct iov_iter *i, 1382 void *tpdata, bool *faulted) 1383 { 1384 struct trace_event_file *file; 1385 struct trace_entry *entry; 1386 struct trace_event_buffer event_buffer; 1387 size_t size = sizeof(*entry) + i->count; 1388 1389 file = (struct trace_event_file *)tpdata; 1390 1391 if (!file || 1392 !(file->flags & EVENT_FILE_FL_ENABLED) || 1393 trace_trigger_soft_disabled(file)) 1394 return; 1395 1396 /* Allocates and fills trace_entry, + 1 of this is data payload */ 1397 entry = trace_event_buffer_reserve(&event_buffer, file, size); 1398 1399 if (unlikely(!entry)) 1400 return; 1401 1402 if (unlikely(!copy_nofault(entry + 1, i->count, i))) 1403 goto discard; 1404 1405 if (!list_empty(&user->validators) && 1406 unlikely(user_event_validate(user, entry, size))) 1407 goto discard; 1408 1409 trace_event_buffer_commit(&event_buffer); 1410 1411 return; 1412 discard: 1413 *faulted = true; 1414 __trace_event_discard_commit(event_buffer.buffer, 1415 event_buffer.event); 1416 } 1417 1418 #ifdef CONFIG_PERF_EVENTS 1419 /* 1420 * Writes the user supplied payload out to perf ring buffer. 1421 */ 1422 static void user_event_perf(struct user_event *user, struct iov_iter *i, 1423 void *tpdata, bool *faulted) 1424 { 1425 struct hlist_head *perf_head; 1426 1427 perf_head = this_cpu_ptr(user->call.perf_events); 1428 1429 if (perf_head && !hlist_empty(perf_head)) { 1430 struct trace_entry *perf_entry; 1431 struct pt_regs *regs; 1432 size_t size = sizeof(*perf_entry) + i->count; 1433 int context; 1434 1435 perf_entry = perf_trace_buf_alloc(ALIGN(size, 8), 1436 ®s, &context); 1437 1438 if (unlikely(!perf_entry)) 1439 return; 1440 1441 perf_fetch_caller_regs(regs); 1442 1443 if (unlikely(!copy_nofault(perf_entry + 1, i->count, i))) 1444 goto discard; 1445 1446 if (!list_empty(&user->validators) && 1447 unlikely(user_event_validate(user, perf_entry, size))) 1448 goto discard; 1449 1450 perf_trace_buf_submit(perf_entry, size, context, 1451 user->call.event.type, 1, regs, 1452 perf_head, NULL); 1453 1454 return; 1455 discard: 1456 *faulted = true; 1457 perf_swevent_put_recursion_context(context); 1458 } 1459 } 1460 #endif 1461 1462 /* 1463 * Update the enabled bit among all user processes. 1464 */ 1465 static void update_enable_bit_for(struct user_event *user) 1466 { 1467 struct tracepoint *tp = &user->tracepoint; 1468 char status = 0; 1469 1470 if (atomic_read(&tp->key.enabled) > 0) { 1471 struct tracepoint_func *probe_func_ptr; 1472 user_event_func_t probe_func; 1473 1474 rcu_read_lock_sched(); 1475 1476 probe_func_ptr = rcu_dereference_sched(tp->funcs); 1477 1478 if (probe_func_ptr) { 1479 do { 1480 probe_func = probe_func_ptr->func; 1481 1482 if (probe_func == user_event_ftrace) 1483 status |= EVENT_STATUS_FTRACE; 1484 #ifdef CONFIG_PERF_EVENTS 1485 else if (probe_func == user_event_perf) 1486 status |= EVENT_STATUS_PERF; 1487 #endif 1488 else 1489 status |= EVENT_STATUS_OTHER; 1490 } while ((++probe_func_ptr)->func); 1491 } 1492 1493 rcu_read_unlock_sched(); 1494 } 1495 1496 user->status = status; 1497 1498 user_event_enabler_update(user); 1499 } 1500 1501 /* 1502 * Register callback for our events from tracing sub-systems. 1503 */ 1504 static int user_event_reg(struct trace_event_call *call, 1505 enum trace_reg type, 1506 void *data) 1507 { 1508 struct user_event *user = (struct user_event *)call->data; 1509 int ret = 0; 1510 1511 if (!user) 1512 return -ENOENT; 1513 1514 switch (type) { 1515 case TRACE_REG_REGISTER: 1516 ret = tracepoint_probe_register(call->tp, 1517 call->class->probe, 1518 data); 1519 if (!ret) 1520 goto inc; 1521 break; 1522 1523 case TRACE_REG_UNREGISTER: 1524 tracepoint_probe_unregister(call->tp, 1525 call->class->probe, 1526 data); 1527 goto dec; 1528 1529 #ifdef CONFIG_PERF_EVENTS 1530 case TRACE_REG_PERF_REGISTER: 1531 ret = tracepoint_probe_register(call->tp, 1532 call->class->perf_probe, 1533 data); 1534 if (!ret) 1535 goto inc; 1536 break; 1537 1538 case TRACE_REG_PERF_UNREGISTER: 1539 tracepoint_probe_unregister(call->tp, 1540 call->class->perf_probe, 1541 data); 1542 goto dec; 1543 1544 case TRACE_REG_PERF_OPEN: 1545 case TRACE_REG_PERF_CLOSE: 1546 case TRACE_REG_PERF_ADD: 1547 case TRACE_REG_PERF_DEL: 1548 break; 1549 #endif 1550 } 1551 1552 return ret; 1553 inc: 1554 refcount_inc(&user->refcnt); 1555 update_enable_bit_for(user); 1556 return 0; 1557 dec: 1558 update_enable_bit_for(user); 1559 refcount_dec(&user->refcnt); 1560 return 0; 1561 } 1562 1563 static int user_event_create(const char *raw_command) 1564 { 1565 struct user_event_group *group; 1566 struct user_event *user; 1567 char *name; 1568 int ret; 1569 1570 if (!str_has_prefix(raw_command, USER_EVENTS_PREFIX)) 1571 return -ECANCELED; 1572 1573 raw_command += USER_EVENTS_PREFIX_LEN; 1574 raw_command = skip_spaces(raw_command); 1575 1576 name = kstrdup(raw_command, GFP_KERNEL_ACCOUNT); 1577 1578 if (!name) 1579 return -ENOMEM; 1580 1581 group = current_user_event_group(); 1582 1583 if (!group) { 1584 kfree(name); 1585 return -ENOENT; 1586 } 1587 1588 mutex_lock(&group->reg_mutex); 1589 1590 ret = user_event_parse_cmd(group, name, &user); 1591 1592 if (!ret) 1593 refcount_dec(&user->refcnt); 1594 1595 mutex_unlock(&group->reg_mutex); 1596 1597 if (ret) 1598 kfree(name); 1599 1600 return ret; 1601 } 1602 1603 static int user_event_show(struct seq_file *m, struct dyn_event *ev) 1604 { 1605 struct user_event *user = container_of(ev, struct user_event, devent); 1606 struct ftrace_event_field *field, *next; 1607 struct list_head *head; 1608 int depth = 0; 1609 1610 seq_printf(m, "%s%s", USER_EVENTS_PREFIX, EVENT_NAME(user)); 1611 1612 head = trace_get_fields(&user->call); 1613 1614 list_for_each_entry_safe_reverse(field, next, head, link) { 1615 if (depth == 0) 1616 seq_puts(m, " "); 1617 else 1618 seq_puts(m, "; "); 1619 1620 seq_printf(m, "%s %s", field->type, field->name); 1621 1622 if (str_has_prefix(field->type, "struct ")) 1623 seq_printf(m, " %d", field->size); 1624 1625 depth++; 1626 } 1627 1628 seq_puts(m, "\n"); 1629 1630 return 0; 1631 } 1632 1633 static bool user_event_is_busy(struct dyn_event *ev) 1634 { 1635 struct user_event *user = container_of(ev, struct user_event, devent); 1636 1637 return !user_event_last_ref(user); 1638 } 1639 1640 static int user_event_free(struct dyn_event *ev) 1641 { 1642 struct user_event *user = container_of(ev, struct user_event, devent); 1643 1644 if (!user_event_last_ref(user)) 1645 return -EBUSY; 1646 1647 return destroy_user_event(user); 1648 } 1649 1650 static bool user_field_match(struct ftrace_event_field *field, int argc, 1651 const char **argv, int *iout) 1652 { 1653 char *field_name = NULL, *dyn_field_name = NULL; 1654 bool colon = false, match = false; 1655 int dyn_len, len; 1656 1657 if (*iout >= argc) 1658 return false; 1659 1660 dyn_len = user_dyn_field_set_string(argc, argv, iout, dyn_field_name, 1661 0, &colon); 1662 1663 len = user_field_set_string(field, field_name, 0, colon); 1664 1665 if (dyn_len != len) 1666 return false; 1667 1668 dyn_field_name = kmalloc(dyn_len, GFP_KERNEL); 1669 field_name = kmalloc(len, GFP_KERNEL); 1670 1671 if (!dyn_field_name || !field_name) 1672 goto out; 1673 1674 user_dyn_field_set_string(argc, argv, iout, dyn_field_name, 1675 dyn_len, &colon); 1676 1677 user_field_set_string(field, field_name, len, colon); 1678 1679 match = strcmp(dyn_field_name, field_name) == 0; 1680 out: 1681 kfree(dyn_field_name); 1682 kfree(field_name); 1683 1684 return match; 1685 } 1686 1687 static bool user_fields_match(struct user_event *user, int argc, 1688 const char **argv) 1689 { 1690 struct ftrace_event_field *field, *next; 1691 struct list_head *head = &user->fields; 1692 int i = 0; 1693 1694 list_for_each_entry_safe_reverse(field, next, head, link) 1695 if (!user_field_match(field, argc, argv, &i)) 1696 return false; 1697 1698 if (i != argc) 1699 return false; 1700 1701 return true; 1702 } 1703 1704 static bool user_event_match(const char *system, const char *event, 1705 int argc, const char **argv, struct dyn_event *ev) 1706 { 1707 struct user_event *user = container_of(ev, struct user_event, devent); 1708 bool match; 1709 1710 match = strcmp(EVENT_NAME(user), event) == 0 && 1711 (!system || strcmp(system, USER_EVENTS_SYSTEM) == 0); 1712 1713 if (match && argc > 0) 1714 match = user_fields_match(user, argc, argv); 1715 1716 return match; 1717 } 1718 1719 static struct dyn_event_operations user_event_dops = { 1720 .create = user_event_create, 1721 .show = user_event_show, 1722 .is_busy = user_event_is_busy, 1723 .free = user_event_free, 1724 .match = user_event_match, 1725 }; 1726 1727 static int user_event_trace_register(struct user_event *user) 1728 { 1729 int ret; 1730 1731 ret = register_trace_event(&user->call.event); 1732 1733 if (!ret) 1734 return -ENODEV; 1735 1736 ret = user_event_set_call_visible(user, true); 1737 1738 if (ret) 1739 unregister_trace_event(&user->call.event); 1740 1741 return ret; 1742 } 1743 1744 /* 1745 * Parses the event name, arguments and flags then registers if successful. 1746 * The name buffer lifetime is owned by this method for success cases only. 1747 * Upon success the returned user_event has its ref count increased by 1. 1748 */ 1749 static int user_event_parse(struct user_event_group *group, char *name, 1750 char *args, char *flags, 1751 struct user_event **newuser) 1752 { 1753 int ret; 1754 u32 key; 1755 struct user_event *user; 1756 1757 /* Prevent dyn_event from racing */ 1758 mutex_lock(&event_mutex); 1759 user = find_user_event(group, name, &key); 1760 mutex_unlock(&event_mutex); 1761 1762 if (user) { 1763 *newuser = user; 1764 /* 1765 * Name is allocated by caller, free it since it already exists. 1766 * Caller only worries about failure cases for freeing. 1767 */ 1768 kfree(name); 1769 return 0; 1770 } 1771 1772 user = kzalloc(sizeof(*user), GFP_KERNEL_ACCOUNT); 1773 1774 if (!user) 1775 return -ENOMEM; 1776 1777 INIT_LIST_HEAD(&user->class.fields); 1778 INIT_LIST_HEAD(&user->fields); 1779 INIT_LIST_HEAD(&user->validators); 1780 1781 user->group = group; 1782 user->tracepoint.name = name; 1783 1784 ret = user_event_parse_fields(user, args); 1785 1786 if (ret) 1787 goto put_user; 1788 1789 ret = user_event_create_print_fmt(user); 1790 1791 if (ret) 1792 goto put_user; 1793 1794 user->call.data = user; 1795 user->call.class = &user->class; 1796 user->call.name = name; 1797 user->call.flags = TRACE_EVENT_FL_TRACEPOINT; 1798 user->call.tp = &user->tracepoint; 1799 user->call.event.funcs = &user_event_funcs; 1800 user->class.system = group->system_name; 1801 1802 user->class.fields_array = user_event_fields_array; 1803 user->class.get_fields = user_event_get_fields; 1804 user->class.reg = user_event_reg; 1805 user->class.probe = user_event_ftrace; 1806 #ifdef CONFIG_PERF_EVENTS 1807 user->class.perf_probe = user_event_perf; 1808 #endif 1809 1810 mutex_lock(&event_mutex); 1811 1812 if (current_user_events >= max_user_events) { 1813 ret = -EMFILE; 1814 goto put_user_lock; 1815 } 1816 1817 ret = user_event_trace_register(user); 1818 1819 if (ret) 1820 goto put_user_lock; 1821 1822 /* Ensure we track self ref and caller ref (2) */ 1823 refcount_set(&user->refcnt, 2); 1824 1825 dyn_event_init(&user->devent, &user_event_dops); 1826 dyn_event_add(&user->devent, &user->call); 1827 hash_add(group->register_table, &user->node, key); 1828 current_user_events++; 1829 1830 mutex_unlock(&event_mutex); 1831 1832 *newuser = user; 1833 return 0; 1834 put_user_lock: 1835 mutex_unlock(&event_mutex); 1836 put_user: 1837 user_event_destroy_fields(user); 1838 user_event_destroy_validators(user); 1839 kfree(user->call.print_fmt); 1840 kfree(user); 1841 return ret; 1842 } 1843 1844 /* 1845 * Deletes a previously created event if it is no longer being used. 1846 */ 1847 static int delete_user_event(struct user_event_group *group, char *name) 1848 { 1849 u32 key; 1850 struct user_event *user = find_user_event(group, name, &key); 1851 1852 if (!user) 1853 return -ENOENT; 1854 1855 refcount_dec(&user->refcnt); 1856 1857 if (!user_event_last_ref(user)) 1858 return -EBUSY; 1859 1860 return destroy_user_event(user); 1861 } 1862 1863 /* 1864 * Validates the user payload and writes via iterator. 1865 */ 1866 static ssize_t user_events_write_core(struct file *file, struct iov_iter *i) 1867 { 1868 struct user_event_file_info *info = file->private_data; 1869 struct user_event_refs *refs; 1870 struct user_event *user = NULL; 1871 struct tracepoint *tp; 1872 ssize_t ret = i->count; 1873 int idx; 1874 1875 if (unlikely(copy_from_iter(&idx, sizeof(idx), i) != sizeof(idx))) 1876 return -EFAULT; 1877 1878 if (idx < 0) 1879 return -EINVAL; 1880 1881 rcu_read_lock_sched(); 1882 1883 refs = rcu_dereference_sched(info->refs); 1884 1885 /* 1886 * The refs->events array is protected by RCU, and new items may be 1887 * added. But the user retrieved from indexing into the events array 1888 * shall be immutable while the file is opened. 1889 */ 1890 if (likely(refs && idx < refs->count)) 1891 user = refs->events[idx]; 1892 1893 rcu_read_unlock_sched(); 1894 1895 if (unlikely(user == NULL)) 1896 return -ENOENT; 1897 1898 if (unlikely(i->count < user->min_size)) 1899 return -EINVAL; 1900 1901 tp = &user->tracepoint; 1902 1903 /* 1904 * It's possible key.enabled disables after this check, however 1905 * we don't mind if a few events are included in this condition. 1906 */ 1907 if (likely(atomic_read(&tp->key.enabled) > 0)) { 1908 struct tracepoint_func *probe_func_ptr; 1909 user_event_func_t probe_func; 1910 struct iov_iter copy; 1911 void *tpdata; 1912 bool faulted; 1913 1914 if (unlikely(fault_in_iov_iter_readable(i, i->count))) 1915 return -EFAULT; 1916 1917 faulted = false; 1918 1919 rcu_read_lock_sched(); 1920 1921 probe_func_ptr = rcu_dereference_sched(tp->funcs); 1922 1923 if (probe_func_ptr) { 1924 do { 1925 copy = *i; 1926 probe_func = probe_func_ptr->func; 1927 tpdata = probe_func_ptr->data; 1928 probe_func(user, ©, tpdata, &faulted); 1929 } while ((++probe_func_ptr)->func); 1930 } 1931 1932 rcu_read_unlock_sched(); 1933 1934 if (unlikely(faulted)) 1935 return -EFAULT; 1936 } 1937 1938 return ret; 1939 } 1940 1941 static int user_events_open(struct inode *node, struct file *file) 1942 { 1943 struct user_event_group *group; 1944 struct user_event_file_info *info; 1945 1946 group = current_user_event_group(); 1947 1948 if (!group) 1949 return -ENOENT; 1950 1951 info = kzalloc(sizeof(*info), GFP_KERNEL_ACCOUNT); 1952 1953 if (!info) 1954 return -ENOMEM; 1955 1956 info->group = group; 1957 1958 file->private_data = info; 1959 1960 return 0; 1961 } 1962 1963 static ssize_t user_events_write(struct file *file, const char __user *ubuf, 1964 size_t count, loff_t *ppos) 1965 { 1966 struct iovec iov; 1967 struct iov_iter i; 1968 1969 if (unlikely(*ppos != 0)) 1970 return -EFAULT; 1971 1972 if (unlikely(import_single_range(ITER_SOURCE, (char __user *)ubuf, 1973 count, &iov, &i))) 1974 return -EFAULT; 1975 1976 return user_events_write_core(file, &i); 1977 } 1978 1979 static ssize_t user_events_write_iter(struct kiocb *kp, struct iov_iter *i) 1980 { 1981 return user_events_write_core(kp->ki_filp, i); 1982 } 1983 1984 static int user_events_ref_add(struct user_event_file_info *info, 1985 struct user_event *user) 1986 { 1987 struct user_event_group *group = info->group; 1988 struct user_event_refs *refs, *new_refs; 1989 int i, size, count = 0; 1990 1991 refs = rcu_dereference_protected(info->refs, 1992 lockdep_is_held(&group->reg_mutex)); 1993 1994 if (refs) { 1995 count = refs->count; 1996 1997 for (i = 0; i < count; ++i) 1998 if (refs->events[i] == user) 1999 return i; 2000 } 2001 2002 size = struct_size(refs, events, count + 1); 2003 2004 new_refs = kzalloc(size, GFP_KERNEL_ACCOUNT); 2005 2006 if (!new_refs) 2007 return -ENOMEM; 2008 2009 new_refs->count = count + 1; 2010 2011 for (i = 0; i < count; ++i) 2012 new_refs->events[i] = refs->events[i]; 2013 2014 new_refs->events[i] = user; 2015 2016 refcount_inc(&user->refcnt); 2017 2018 rcu_assign_pointer(info->refs, new_refs); 2019 2020 if (refs) 2021 kfree_rcu(refs, rcu); 2022 2023 return i; 2024 } 2025 2026 static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg) 2027 { 2028 u32 size; 2029 long ret; 2030 2031 ret = get_user(size, &ureg->size); 2032 2033 if (ret) 2034 return ret; 2035 2036 if (size > PAGE_SIZE) 2037 return -E2BIG; 2038 2039 if (size < offsetofend(struct user_reg, write_index)) 2040 return -EINVAL; 2041 2042 ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size); 2043 2044 if (ret) 2045 return ret; 2046 2047 /* Ensure no flags, since we don't support any yet */ 2048 if (kreg->flags != 0) 2049 return -EINVAL; 2050 2051 /* Ensure supported size */ 2052 switch (kreg->enable_size) { 2053 case 4: 2054 /* 32-bit */ 2055 break; 2056 #if BITS_PER_LONG >= 64 2057 case 8: 2058 /* 64-bit */ 2059 break; 2060 #endif 2061 default: 2062 return -EINVAL; 2063 } 2064 2065 /* Ensure natural alignment */ 2066 if (kreg->enable_addr % kreg->enable_size) 2067 return -EINVAL; 2068 2069 /* Ensure bit range for size */ 2070 if (kreg->enable_bit > (kreg->enable_size * BITS_PER_BYTE) - 1) 2071 return -EINVAL; 2072 2073 /* Ensure accessible */ 2074 if (!access_ok((const void __user *)(uintptr_t)kreg->enable_addr, 2075 kreg->enable_size)) 2076 return -EFAULT; 2077 2078 kreg->size = size; 2079 2080 return 0; 2081 } 2082 2083 /* 2084 * Registers a user_event on behalf of a user process. 2085 */ 2086 static long user_events_ioctl_reg(struct user_event_file_info *info, 2087 unsigned long uarg) 2088 { 2089 struct user_reg __user *ureg = (struct user_reg __user *)uarg; 2090 struct user_reg reg; 2091 struct user_event *user; 2092 struct user_event_enabler *enabler; 2093 char *name; 2094 long ret; 2095 int write_result; 2096 2097 ret = user_reg_get(ureg, ®); 2098 2099 if (ret) 2100 return ret; 2101 2102 /* 2103 * Prevent users from using the same address and bit multiple times 2104 * within the same mm address space. This can cause unexpected behavior 2105 * for user processes that is far easier to debug if this is explictly 2106 * an error upon registering. 2107 */ 2108 if (current_user_event_enabler_exists((unsigned long)reg.enable_addr, 2109 reg.enable_bit)) 2110 return -EADDRINUSE; 2111 2112 name = strndup_user((const char __user *)(uintptr_t)reg.name_args, 2113 MAX_EVENT_DESC); 2114 2115 if (IS_ERR(name)) { 2116 ret = PTR_ERR(name); 2117 return ret; 2118 } 2119 2120 ret = user_event_parse_cmd(info->group, name, &user); 2121 2122 if (ret) { 2123 kfree(name); 2124 return ret; 2125 } 2126 2127 ret = user_events_ref_add(info, user); 2128 2129 /* No longer need parse ref, ref_add either worked or not */ 2130 refcount_dec(&user->refcnt); 2131 2132 /* Positive number is index and valid */ 2133 if (ret < 0) 2134 return ret; 2135 2136 /* 2137 * user_events_ref_add succeeded: 2138 * At this point we have a user_event, it's lifetime is bound by the 2139 * reference count, not this file. If anything fails, the user_event 2140 * still has a reference until the file is released. During release 2141 * any remaining references (from user_events_ref_add) are decremented. 2142 * 2143 * Attempt to create an enabler, which too has a lifetime tied in the 2144 * same way for the event. Once the task that caused the enabler to be 2145 * created exits or issues exec() then the enablers it has created 2146 * will be destroyed and the ref to the event will be decremented. 2147 */ 2148 enabler = user_event_enabler_create(®, user, &write_result); 2149 2150 if (!enabler) 2151 return -ENOMEM; 2152 2153 /* Write failed/faulted, give error back to caller */ 2154 if (write_result) 2155 return write_result; 2156 2157 put_user((u32)ret, &ureg->write_index); 2158 2159 return 0; 2160 } 2161 2162 /* 2163 * Deletes a user_event on behalf of a user process. 2164 */ 2165 static long user_events_ioctl_del(struct user_event_file_info *info, 2166 unsigned long uarg) 2167 { 2168 void __user *ubuf = (void __user *)uarg; 2169 char *name; 2170 long ret; 2171 2172 name = strndup_user(ubuf, MAX_EVENT_DESC); 2173 2174 if (IS_ERR(name)) 2175 return PTR_ERR(name); 2176 2177 /* event_mutex prevents dyn_event from racing */ 2178 mutex_lock(&event_mutex); 2179 ret = delete_user_event(info->group, name); 2180 mutex_unlock(&event_mutex); 2181 2182 kfree(name); 2183 2184 return ret; 2185 } 2186 2187 static long user_unreg_get(struct user_unreg __user *ureg, 2188 struct user_unreg *kreg) 2189 { 2190 u32 size; 2191 long ret; 2192 2193 ret = get_user(size, &ureg->size); 2194 2195 if (ret) 2196 return ret; 2197 2198 if (size > PAGE_SIZE) 2199 return -E2BIG; 2200 2201 if (size < offsetofend(struct user_unreg, disable_addr)) 2202 return -EINVAL; 2203 2204 ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size); 2205 2206 /* Ensure no reserved values, since we don't support any yet */ 2207 if (kreg->__reserved || kreg->__reserved2) 2208 return -EINVAL; 2209 2210 return ret; 2211 } 2212 2213 static int user_event_mm_clear_bit(struct user_event_mm *user_mm, 2214 unsigned long uaddr, unsigned char bit) 2215 { 2216 struct user_event_enabler enabler; 2217 int result; 2218 int attempt = 0; 2219 2220 memset(&enabler, 0, sizeof(enabler)); 2221 enabler.addr = uaddr; 2222 enabler.values = bit; 2223 retry: 2224 /* Prevents state changes from racing with new enablers */ 2225 mutex_lock(&event_mutex); 2226 2227 /* Force the bit to be cleared, since no event is attached */ 2228 mmap_read_lock(user_mm->mm); 2229 result = user_event_enabler_write(user_mm, &enabler, false, &attempt); 2230 mmap_read_unlock(user_mm->mm); 2231 2232 mutex_unlock(&event_mutex); 2233 2234 if (result) { 2235 /* Attempt to fault-in and retry if it worked */ 2236 if (!user_event_mm_fault_in(user_mm, uaddr, attempt)) 2237 goto retry; 2238 } 2239 2240 return result; 2241 } 2242 2243 /* 2244 * Unregisters an enablement address/bit within a task/user mm. 2245 */ 2246 static long user_events_ioctl_unreg(unsigned long uarg) 2247 { 2248 struct user_unreg __user *ureg = (struct user_unreg __user *)uarg; 2249 struct user_event_mm *mm = current->user_event_mm; 2250 struct user_event_enabler *enabler, *next; 2251 struct user_unreg reg; 2252 long ret; 2253 2254 ret = user_unreg_get(ureg, ®); 2255 2256 if (ret) 2257 return ret; 2258 2259 if (!mm) 2260 return -ENOENT; 2261 2262 ret = -ENOENT; 2263 2264 /* 2265 * Flags freeing and faulting are used to indicate if the enabler is in 2266 * use at all. When faulting is set a page-fault is occurring asyncly. 2267 * During async fault if freeing is set, the enabler will be destroyed. 2268 * If no async fault is happening, we can destroy it now since we hold 2269 * the event_mutex during these checks. 2270 */ 2271 mutex_lock(&event_mutex); 2272 2273 list_for_each_entry_safe(enabler, next, &mm->enablers, link) 2274 if (enabler->addr == reg.disable_addr && 2275 (enabler->values & ENABLE_VAL_BIT_MASK) == reg.disable_bit) { 2276 set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)); 2277 2278 if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler))) 2279 user_event_enabler_destroy(enabler); 2280 2281 /* Removed at least one */ 2282 ret = 0; 2283 } 2284 2285 mutex_unlock(&event_mutex); 2286 2287 /* Ensure bit is now cleared for user, regardless of event status */ 2288 if (!ret) 2289 ret = user_event_mm_clear_bit(mm, reg.disable_addr, 2290 reg.disable_bit); 2291 2292 return ret; 2293 } 2294 2295 /* 2296 * Handles the ioctl from user mode to register or alter operations. 2297 */ 2298 static long user_events_ioctl(struct file *file, unsigned int cmd, 2299 unsigned long uarg) 2300 { 2301 struct user_event_file_info *info = file->private_data; 2302 struct user_event_group *group = info->group; 2303 long ret = -ENOTTY; 2304 2305 switch (cmd) { 2306 case DIAG_IOCSREG: 2307 mutex_lock(&group->reg_mutex); 2308 ret = user_events_ioctl_reg(info, uarg); 2309 mutex_unlock(&group->reg_mutex); 2310 break; 2311 2312 case DIAG_IOCSDEL: 2313 mutex_lock(&group->reg_mutex); 2314 ret = user_events_ioctl_del(info, uarg); 2315 mutex_unlock(&group->reg_mutex); 2316 break; 2317 2318 case DIAG_IOCSUNREG: 2319 mutex_lock(&group->reg_mutex); 2320 ret = user_events_ioctl_unreg(uarg); 2321 mutex_unlock(&group->reg_mutex); 2322 break; 2323 } 2324 2325 return ret; 2326 } 2327 2328 /* 2329 * Handles the final close of the file from user mode. 2330 */ 2331 static int user_events_release(struct inode *node, struct file *file) 2332 { 2333 struct user_event_file_info *info = file->private_data; 2334 struct user_event_group *group; 2335 struct user_event_refs *refs; 2336 struct user_event *user; 2337 int i; 2338 2339 if (!info) 2340 return -EINVAL; 2341 2342 group = info->group; 2343 2344 /* 2345 * Ensure refs cannot change under any situation by taking the 2346 * register mutex during the final freeing of the references. 2347 */ 2348 mutex_lock(&group->reg_mutex); 2349 2350 refs = info->refs; 2351 2352 if (!refs) 2353 goto out; 2354 2355 /* 2356 * The lifetime of refs has reached an end, it's tied to this file. 2357 * The underlying user_events are ref counted, and cannot be freed. 2358 * After this decrement, the user_events may be freed elsewhere. 2359 */ 2360 for (i = 0; i < refs->count; ++i) { 2361 user = refs->events[i]; 2362 2363 if (user) 2364 refcount_dec(&user->refcnt); 2365 } 2366 out: 2367 file->private_data = NULL; 2368 2369 mutex_unlock(&group->reg_mutex); 2370 2371 kfree(refs); 2372 kfree(info); 2373 2374 return 0; 2375 } 2376 2377 static const struct file_operations user_data_fops = { 2378 .open = user_events_open, 2379 .write = user_events_write, 2380 .write_iter = user_events_write_iter, 2381 .unlocked_ioctl = user_events_ioctl, 2382 .release = user_events_release, 2383 }; 2384 2385 static void *user_seq_start(struct seq_file *m, loff_t *pos) 2386 { 2387 if (*pos) 2388 return NULL; 2389 2390 return (void *)1; 2391 } 2392 2393 static void *user_seq_next(struct seq_file *m, void *p, loff_t *pos) 2394 { 2395 ++*pos; 2396 return NULL; 2397 } 2398 2399 static void user_seq_stop(struct seq_file *m, void *p) 2400 { 2401 } 2402 2403 static int user_seq_show(struct seq_file *m, void *p) 2404 { 2405 struct user_event_group *group = m->private; 2406 struct user_event *user; 2407 char status; 2408 int i, active = 0, busy = 0; 2409 2410 if (!group) 2411 return -EINVAL; 2412 2413 mutex_lock(&group->reg_mutex); 2414 2415 hash_for_each(group->register_table, i, user, node) { 2416 status = user->status; 2417 2418 seq_printf(m, "%s", EVENT_NAME(user)); 2419 2420 if (status != 0) 2421 seq_puts(m, " #"); 2422 2423 if (status != 0) { 2424 seq_puts(m, " Used by"); 2425 if (status & EVENT_STATUS_FTRACE) 2426 seq_puts(m, " ftrace"); 2427 if (status & EVENT_STATUS_PERF) 2428 seq_puts(m, " perf"); 2429 if (status & EVENT_STATUS_OTHER) 2430 seq_puts(m, " other"); 2431 busy++; 2432 } 2433 2434 seq_puts(m, "\n"); 2435 active++; 2436 } 2437 2438 mutex_unlock(&group->reg_mutex); 2439 2440 seq_puts(m, "\n"); 2441 seq_printf(m, "Active: %d\n", active); 2442 seq_printf(m, "Busy: %d\n", busy); 2443 2444 return 0; 2445 } 2446 2447 static const struct seq_operations user_seq_ops = { 2448 .start = user_seq_start, 2449 .next = user_seq_next, 2450 .stop = user_seq_stop, 2451 .show = user_seq_show, 2452 }; 2453 2454 static int user_status_open(struct inode *node, struct file *file) 2455 { 2456 struct user_event_group *group; 2457 int ret; 2458 2459 group = current_user_event_group(); 2460 2461 if (!group) 2462 return -ENOENT; 2463 2464 ret = seq_open(file, &user_seq_ops); 2465 2466 if (!ret) { 2467 /* Chain group to seq_file */ 2468 struct seq_file *m = file->private_data; 2469 2470 m->private = group; 2471 } 2472 2473 return ret; 2474 } 2475 2476 static const struct file_operations user_status_fops = { 2477 .open = user_status_open, 2478 .read = seq_read, 2479 .llseek = seq_lseek, 2480 .release = seq_release, 2481 }; 2482 2483 /* 2484 * Creates a set of tracefs files to allow user mode interactions. 2485 */ 2486 static int create_user_tracefs(void) 2487 { 2488 struct dentry *edata, *emmap; 2489 2490 edata = tracefs_create_file("user_events_data", TRACE_MODE_WRITE, 2491 NULL, NULL, &user_data_fops); 2492 2493 if (!edata) { 2494 pr_warn("Could not create tracefs 'user_events_data' entry\n"); 2495 goto err; 2496 } 2497 2498 emmap = tracefs_create_file("user_events_status", TRACE_MODE_READ, 2499 NULL, NULL, &user_status_fops); 2500 2501 if (!emmap) { 2502 tracefs_remove(edata); 2503 pr_warn("Could not create tracefs 'user_events_mmap' entry\n"); 2504 goto err; 2505 } 2506 2507 return 0; 2508 err: 2509 return -ENODEV; 2510 } 2511 2512 static int set_max_user_events_sysctl(struct ctl_table *table, int write, 2513 void *buffer, size_t *lenp, loff_t *ppos) 2514 { 2515 int ret; 2516 2517 mutex_lock(&event_mutex); 2518 2519 ret = proc_douintvec(table, write, buffer, lenp, ppos); 2520 2521 mutex_unlock(&event_mutex); 2522 2523 return ret; 2524 } 2525 2526 static struct ctl_table user_event_sysctls[] = { 2527 { 2528 .procname = "user_events_max", 2529 .data = &max_user_events, 2530 .maxlen = sizeof(unsigned int), 2531 .mode = 0644, 2532 .proc_handler = set_max_user_events_sysctl, 2533 }, 2534 {} 2535 }; 2536 2537 static int __init trace_events_user_init(void) 2538 { 2539 int ret; 2540 2541 fault_cache = KMEM_CACHE(user_event_enabler_fault, 0); 2542 2543 if (!fault_cache) 2544 return -ENOMEM; 2545 2546 init_group = user_event_group_create(&init_user_ns); 2547 2548 if (!init_group) { 2549 kmem_cache_destroy(fault_cache); 2550 return -ENOMEM; 2551 } 2552 2553 ret = create_user_tracefs(); 2554 2555 if (ret) { 2556 pr_warn("user_events could not register with tracefs\n"); 2557 user_event_group_destroy(init_group); 2558 kmem_cache_destroy(fault_cache); 2559 init_group = NULL; 2560 return ret; 2561 } 2562 2563 if (dyn_event_register(&user_event_dops)) 2564 pr_warn("user_events could not register with dyn_events\n"); 2565 2566 register_sysctl_init("kernel", user_event_sysctls); 2567 2568 return 0; 2569 } 2570 2571 fs_initcall(trace_events_user_init); 2572