1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2021, Microsoft Corporation. 4 * 5 * Authors: 6 * Beau Belgrave <beaub@linux.microsoft.com> 7 */ 8 9 #include <linux/bitmap.h> 10 #include <linux/cdev.h> 11 #include <linux/hashtable.h> 12 #include <linux/list.h> 13 #include <linux/io.h> 14 #include <linux/uio.h> 15 #include <linux/ioctl.h> 16 #include <linux/jhash.h> 17 #include <linux/trace_events.h> 18 #include <linux/tracefs.h> 19 #include <linux/types.h> 20 #include <linux/uaccess.h> 21 #include <uapi/linux/user_events.h> 22 #include "trace.h" 23 #include "trace_dynevent.h" 24 25 #define USER_EVENTS_PREFIX_LEN (sizeof(USER_EVENTS_PREFIX)-1) 26 27 #define FIELD_DEPTH_TYPE 0 28 #define FIELD_DEPTH_NAME 1 29 #define FIELD_DEPTH_SIZE 2 30 31 /* 32 * Limits how many trace_event calls user processes can create: 33 * Must be multiple of PAGE_SIZE. 34 */ 35 #define MAX_PAGES 1 36 #define MAX_EVENTS (MAX_PAGES * PAGE_SIZE) 37 38 /* Limit how long of an event name plus args within the subsystem. */ 39 #define MAX_EVENT_DESC 512 40 #define EVENT_NAME(user_event) ((user_event)->tracepoint.name) 41 #define MAX_FIELD_ARRAY_SIZE 1024 42 43 static char *register_page_data; 44 45 static DEFINE_MUTEX(reg_mutex); 46 static DEFINE_HASHTABLE(register_table, 4); 47 static DECLARE_BITMAP(page_bitmap, MAX_EVENTS); 48 49 /* 50 * Stores per-event properties, as users register events 51 * within a file a user_event might be created if it does not 52 * already exist. These are globally used and their lifetime 53 * is tied to the refcnt member. These cannot go away until the 54 * refcnt reaches zero. 55 */ 56 struct user_event { 57 struct tracepoint tracepoint; 58 struct trace_event_call call; 59 struct trace_event_class class; 60 struct dyn_event devent; 61 struct hlist_node node; 62 struct list_head fields; 63 atomic_t refcnt; 64 int index; 65 int flags; 66 }; 67 68 /* 69 * Stores per-file events references, as users register events 70 * within a file this structure is modified and freed via RCU. 71 * The lifetime of this struct is tied to the lifetime of the file. 72 * These are not shared and only accessible by the file that created it. 73 */ 74 struct user_event_refs { 75 struct rcu_head rcu; 76 int count; 77 struct user_event *events[]; 78 }; 79 80 typedef void (*user_event_func_t) (struct user_event *user, 81 void *data, u32 datalen, 82 void *tpdata); 83 84 static int user_event_parse(char *name, char *args, char *flags, 85 struct user_event **newuser); 86 87 static u32 user_event_key(char *name) 88 { 89 return jhash(name, strlen(name), 0); 90 } 91 92 static struct list_head *user_event_get_fields(struct trace_event_call *call) 93 { 94 struct user_event *user = (struct user_event *)call->data; 95 96 return &user->fields; 97 } 98 99 /* 100 * Parses a register command for user_events 101 * Format: event_name[:FLAG1[,FLAG2...]] [field1[;field2...]] 102 * 103 * Example event named 'test' with a 20 char 'msg' field with an unsigned int 104 * 'id' field after: 105 * test char[20] msg;unsigned int id 106 * 107 * NOTE: Offsets are from the user data perspective, they are not from the 108 * trace_entry/buffer perspective. We automatically add the common properties 109 * sizes to the offset for the user. 110 */ 111 static int user_event_parse_cmd(char *raw_command, struct user_event **newuser) 112 { 113 char *name = raw_command; 114 char *args = strpbrk(name, " "); 115 char *flags; 116 117 if (args) 118 *args++ = '\0'; 119 120 flags = strpbrk(name, ":"); 121 122 if (flags) 123 *flags++ = '\0'; 124 125 return user_event_parse(name, args, flags, newuser); 126 } 127 128 static int user_field_array_size(const char *type) 129 { 130 const char *start = strchr(type, '['); 131 char val[8]; 132 char *bracket; 133 int size = 0; 134 135 if (start == NULL) 136 return -EINVAL; 137 138 if (strscpy(val, start + 1, sizeof(val)) <= 0) 139 return -EINVAL; 140 141 bracket = strchr(val, ']'); 142 143 if (!bracket) 144 return -EINVAL; 145 146 *bracket = '\0'; 147 148 if (kstrtouint(val, 0, &size)) 149 return -EINVAL; 150 151 if (size > MAX_FIELD_ARRAY_SIZE) 152 return -EINVAL; 153 154 return size; 155 } 156 157 static int user_field_size(const char *type) 158 { 159 /* long is not allowed from a user, since it's ambigious in size */ 160 if (strcmp(type, "s64") == 0) 161 return sizeof(s64); 162 if (strcmp(type, "u64") == 0) 163 return sizeof(u64); 164 if (strcmp(type, "s32") == 0) 165 return sizeof(s32); 166 if (strcmp(type, "u32") == 0) 167 return sizeof(u32); 168 if (strcmp(type, "int") == 0) 169 return sizeof(int); 170 if (strcmp(type, "unsigned int") == 0) 171 return sizeof(unsigned int); 172 if (strcmp(type, "s16") == 0) 173 return sizeof(s16); 174 if (strcmp(type, "u16") == 0) 175 return sizeof(u16); 176 if (strcmp(type, "short") == 0) 177 return sizeof(short); 178 if (strcmp(type, "unsigned short") == 0) 179 return sizeof(unsigned short); 180 if (strcmp(type, "s8") == 0) 181 return sizeof(s8); 182 if (strcmp(type, "u8") == 0) 183 return sizeof(u8); 184 if (strcmp(type, "char") == 0) 185 return sizeof(char); 186 if (strcmp(type, "unsigned char") == 0) 187 return sizeof(unsigned char); 188 if (str_has_prefix(type, "char[")) 189 return user_field_array_size(type); 190 if (str_has_prefix(type, "unsigned char[")) 191 return user_field_array_size(type); 192 if (str_has_prefix(type, "__data_loc ")) 193 return sizeof(u32); 194 if (str_has_prefix(type, "__rel_loc ")) 195 return sizeof(u32); 196 197 /* Uknown basic type, error */ 198 return -EINVAL; 199 } 200 201 static void user_event_destroy_fields(struct user_event *user) 202 { 203 struct ftrace_event_field *field, *next; 204 struct list_head *head = &user->fields; 205 206 list_for_each_entry_safe(field, next, head, link) { 207 list_del(&field->link); 208 kfree(field); 209 } 210 } 211 212 static int user_event_add_field(struct user_event *user, const char *type, 213 const char *name, int offset, int size, 214 int is_signed, int filter_type) 215 { 216 struct ftrace_event_field *field; 217 218 field = kmalloc(sizeof(*field), GFP_KERNEL); 219 220 if (!field) 221 return -ENOMEM; 222 223 field->type = type; 224 field->name = name; 225 field->offset = offset; 226 field->size = size; 227 field->is_signed = is_signed; 228 field->filter_type = filter_type; 229 230 list_add(&field->link, &user->fields); 231 232 return 0; 233 } 234 235 /* 236 * Parses the values of a field within the description 237 * Format: type name [size] 238 */ 239 static int user_event_parse_field(char *field, struct user_event *user, 240 u32 *offset) 241 { 242 char *part, *type, *name; 243 u32 depth = 0, saved_offset = *offset; 244 int len, size = -EINVAL; 245 bool is_struct = false; 246 247 field = skip_spaces(field); 248 249 if (*field == '\0') 250 return 0; 251 252 /* Handle types that have a space within */ 253 len = str_has_prefix(field, "unsigned "); 254 if (len) 255 goto skip_next; 256 257 len = str_has_prefix(field, "struct "); 258 if (len) { 259 is_struct = true; 260 goto skip_next; 261 } 262 263 len = str_has_prefix(field, "__data_loc unsigned "); 264 if (len) 265 goto skip_next; 266 267 len = str_has_prefix(field, "__data_loc "); 268 if (len) 269 goto skip_next; 270 271 len = str_has_prefix(field, "__rel_loc unsigned "); 272 if (len) 273 goto skip_next; 274 275 len = str_has_prefix(field, "__rel_loc "); 276 if (len) 277 goto skip_next; 278 279 goto parse; 280 skip_next: 281 type = field; 282 field = strpbrk(field + len, " "); 283 284 if (field == NULL) 285 return -EINVAL; 286 287 *field++ = '\0'; 288 depth++; 289 parse: 290 while ((part = strsep(&field, " ")) != NULL) { 291 switch (depth++) { 292 case FIELD_DEPTH_TYPE: 293 type = part; 294 break; 295 case FIELD_DEPTH_NAME: 296 name = part; 297 break; 298 case FIELD_DEPTH_SIZE: 299 if (!is_struct) 300 return -EINVAL; 301 302 if (kstrtou32(part, 10, &size)) 303 return -EINVAL; 304 break; 305 default: 306 return -EINVAL; 307 } 308 } 309 310 if (depth < FIELD_DEPTH_SIZE) 311 return -EINVAL; 312 313 if (depth == FIELD_DEPTH_SIZE) 314 size = user_field_size(type); 315 316 if (size == 0) 317 return -EINVAL; 318 319 if (size < 0) 320 return size; 321 322 *offset = saved_offset + size; 323 324 return user_event_add_field(user, type, name, saved_offset, size, 325 type[0] != 'u', FILTER_OTHER); 326 } 327 328 static void user_event_parse_flags(struct user_event *user, char *flags) 329 { 330 char *flag; 331 332 if (flags == NULL) 333 return; 334 335 while ((flag = strsep(&flags, ",")) != NULL) { 336 if (strcmp(flag, "BPF_ITER") == 0) 337 user->flags |= FLAG_BPF_ITER; 338 } 339 } 340 341 static int user_event_parse_fields(struct user_event *user, char *args) 342 { 343 char *field; 344 u32 offset = sizeof(struct trace_entry); 345 int ret = -EINVAL; 346 347 if (args == NULL) 348 return 0; 349 350 while ((field = strsep(&args, ";")) != NULL) { 351 ret = user_event_parse_field(field, user, &offset); 352 353 if (ret) 354 break; 355 } 356 357 return ret; 358 } 359 360 static struct trace_event_fields user_event_fields_array[1]; 361 362 static enum print_line_t user_event_print_trace(struct trace_iterator *iter, 363 int flags, 364 struct trace_event *event) 365 { 366 /* Unsafe to try to decode user provided print_fmt, use hex */ 367 trace_print_hex_dump_seq(&iter->seq, "", DUMP_PREFIX_OFFSET, 16, 368 1, iter->ent, iter->ent_size, true); 369 370 return trace_handle_return(&iter->seq); 371 } 372 373 static struct trace_event_functions user_event_funcs = { 374 .trace = user_event_print_trace, 375 }; 376 377 static int destroy_user_event(struct user_event *user) 378 { 379 int ret = 0; 380 381 /* Must destroy fields before call removal */ 382 user_event_destroy_fields(user); 383 384 ret = trace_remove_event_call(&user->call); 385 386 if (ret) 387 return ret; 388 389 dyn_event_remove(&user->devent); 390 391 register_page_data[user->index] = 0; 392 clear_bit(user->index, page_bitmap); 393 hash_del(&user->node); 394 395 kfree(EVENT_NAME(user)); 396 kfree(user); 397 398 return ret; 399 } 400 401 static struct user_event *find_user_event(char *name, u32 *outkey) 402 { 403 struct user_event *user; 404 u32 key = user_event_key(name); 405 406 *outkey = key; 407 408 hash_for_each_possible(register_table, user, node, key) 409 if (!strcmp(EVENT_NAME(user), name)) 410 return user; 411 412 return NULL; 413 } 414 415 /* 416 * Writes the user supplied payload out to a trace file. 417 */ 418 static void user_event_ftrace(struct user_event *user, void *data, u32 datalen, 419 void *tpdata) 420 { 421 struct trace_event_file *file; 422 struct trace_entry *entry; 423 struct trace_event_buffer event_buffer; 424 425 file = (struct trace_event_file *)tpdata; 426 427 if (!file || 428 !(file->flags & EVENT_FILE_FL_ENABLED) || 429 trace_trigger_soft_disabled(file)) 430 return; 431 432 /* Allocates and fills trace_entry, + 1 of this is data payload */ 433 entry = trace_event_buffer_reserve(&event_buffer, file, 434 sizeof(*entry) + datalen); 435 436 if (unlikely(!entry)) 437 return; 438 439 memcpy(entry + 1, data, datalen); 440 441 trace_event_buffer_commit(&event_buffer); 442 } 443 444 /* 445 * Update the register page that is shared between user processes. 446 */ 447 static void update_reg_page_for(struct user_event *user) 448 { 449 struct tracepoint *tp = &user->tracepoint; 450 char status = 0; 451 452 if (atomic_read(&tp->key.enabled) > 0) { 453 struct tracepoint_func *probe_func_ptr; 454 user_event_func_t probe_func; 455 456 rcu_read_lock_sched(); 457 458 probe_func_ptr = rcu_dereference_sched(tp->funcs); 459 460 if (probe_func_ptr) { 461 do { 462 probe_func = probe_func_ptr->func; 463 464 if (probe_func == user_event_ftrace) 465 status |= EVENT_STATUS_FTRACE; 466 else 467 status |= EVENT_STATUS_OTHER; 468 } while ((++probe_func_ptr)->func); 469 } 470 471 rcu_read_unlock_sched(); 472 } 473 474 register_page_data[user->index] = status; 475 } 476 477 /* 478 * Register callback for our events from tracing sub-systems. 479 */ 480 static int user_event_reg(struct trace_event_call *call, 481 enum trace_reg type, 482 void *data) 483 { 484 struct user_event *user = (struct user_event *)call->data; 485 int ret = 0; 486 487 if (!user) 488 return -ENOENT; 489 490 switch (type) { 491 case TRACE_REG_REGISTER: 492 ret = tracepoint_probe_register(call->tp, 493 call->class->probe, 494 data); 495 if (!ret) 496 goto inc; 497 break; 498 499 case TRACE_REG_UNREGISTER: 500 tracepoint_probe_unregister(call->tp, 501 call->class->probe, 502 data); 503 goto dec; 504 505 default: 506 break; 507 } 508 509 return ret; 510 inc: 511 atomic_inc(&user->refcnt); 512 update_reg_page_for(user); 513 return 0; 514 dec: 515 update_reg_page_for(user); 516 atomic_dec(&user->refcnt); 517 return 0; 518 } 519 520 static int user_event_create(const char *raw_command) 521 { 522 struct user_event *user; 523 char *name; 524 int ret; 525 526 if (!str_has_prefix(raw_command, USER_EVENTS_PREFIX)) 527 return -ECANCELED; 528 529 raw_command += USER_EVENTS_PREFIX_LEN; 530 raw_command = skip_spaces(raw_command); 531 532 name = kstrdup(raw_command, GFP_KERNEL); 533 534 if (!name) 535 return -ENOMEM; 536 537 mutex_lock(®_mutex); 538 ret = user_event_parse_cmd(name, &user); 539 mutex_unlock(®_mutex); 540 541 if (ret) 542 kfree(name); 543 544 return ret; 545 } 546 547 static int user_event_show(struct seq_file *m, struct dyn_event *ev) 548 { 549 struct user_event *user = container_of(ev, struct user_event, devent); 550 struct ftrace_event_field *field, *next; 551 struct list_head *head; 552 int depth = 0; 553 554 seq_printf(m, "%s%s", USER_EVENTS_PREFIX, EVENT_NAME(user)); 555 556 head = trace_get_fields(&user->call); 557 558 list_for_each_entry_safe_reverse(field, next, head, link) { 559 if (depth == 0) 560 seq_puts(m, " "); 561 else 562 seq_puts(m, "; "); 563 564 seq_printf(m, "%s %s", field->type, field->name); 565 566 if (str_has_prefix(field->type, "struct ")) 567 seq_printf(m, " %d", field->size); 568 569 depth++; 570 } 571 572 seq_puts(m, "\n"); 573 574 return 0; 575 } 576 577 static bool user_event_is_busy(struct dyn_event *ev) 578 { 579 struct user_event *user = container_of(ev, struct user_event, devent); 580 581 return atomic_read(&user->refcnt) != 0; 582 } 583 584 static int user_event_free(struct dyn_event *ev) 585 { 586 struct user_event *user = container_of(ev, struct user_event, devent); 587 588 if (atomic_read(&user->refcnt) != 0) 589 return -EBUSY; 590 591 return destroy_user_event(user); 592 } 593 594 static bool user_event_match(const char *system, const char *event, 595 int argc, const char **argv, struct dyn_event *ev) 596 { 597 struct user_event *user = container_of(ev, struct user_event, devent); 598 599 return strcmp(EVENT_NAME(user), event) == 0 && 600 (!system || strcmp(system, USER_EVENTS_SYSTEM) == 0); 601 } 602 603 static struct dyn_event_operations user_event_dops = { 604 .create = user_event_create, 605 .show = user_event_show, 606 .is_busy = user_event_is_busy, 607 .free = user_event_free, 608 .match = user_event_match, 609 }; 610 611 static int user_event_trace_register(struct user_event *user) 612 { 613 int ret; 614 615 ret = register_trace_event(&user->call.event); 616 617 if (!ret) 618 return -ENODEV; 619 620 ret = trace_add_event_call(&user->call); 621 622 if (ret) 623 unregister_trace_event(&user->call.event); 624 625 return ret; 626 } 627 628 /* 629 * Parses the event name, arguments and flags then registers if successful. 630 * The name buffer lifetime is owned by this method for success cases only. 631 */ 632 static int user_event_parse(char *name, char *args, char *flags, 633 struct user_event **newuser) 634 { 635 int ret; 636 int index; 637 u32 key; 638 struct user_event *user = find_user_event(name, &key); 639 640 if (user) { 641 *newuser = user; 642 /* 643 * Name is allocated by caller, free it since it already exists. 644 * Caller only worries about failure cases for freeing. 645 */ 646 kfree(name); 647 return 0; 648 } 649 650 index = find_first_zero_bit(page_bitmap, MAX_EVENTS); 651 652 if (index == MAX_EVENTS) 653 return -EMFILE; 654 655 user = kzalloc(sizeof(*user), GFP_KERNEL); 656 657 if (!user) 658 return -ENOMEM; 659 660 INIT_LIST_HEAD(&user->class.fields); 661 INIT_LIST_HEAD(&user->fields); 662 663 user->tracepoint.name = name; 664 665 user_event_parse_flags(user, flags); 666 667 ret = user_event_parse_fields(user, args); 668 669 if (ret) 670 goto put_user; 671 672 /* Minimal print format */ 673 user->call.print_fmt = "\"\""; 674 675 user->call.data = user; 676 user->call.class = &user->class; 677 user->call.name = name; 678 user->call.flags = TRACE_EVENT_FL_TRACEPOINT; 679 user->call.tp = &user->tracepoint; 680 user->call.event.funcs = &user_event_funcs; 681 682 user->class.system = USER_EVENTS_SYSTEM; 683 user->class.fields_array = user_event_fields_array; 684 user->class.get_fields = user_event_get_fields; 685 user->class.reg = user_event_reg; 686 user->class.probe = user_event_ftrace; 687 688 mutex_lock(&event_mutex); 689 ret = user_event_trace_register(user); 690 mutex_unlock(&event_mutex); 691 692 if (ret) 693 goto put_user; 694 695 user->index = index; 696 dyn_event_init(&user->devent, &user_event_dops); 697 dyn_event_add(&user->devent, &user->call); 698 set_bit(user->index, page_bitmap); 699 hash_add(register_table, &user->node, key); 700 701 *newuser = user; 702 return 0; 703 put_user: 704 user_event_destroy_fields(user); 705 kfree(user); 706 return ret; 707 } 708 709 /* 710 * Deletes a previously created event if it is no longer being used. 711 */ 712 static int delete_user_event(char *name) 713 { 714 u32 key; 715 int ret; 716 struct user_event *user = find_user_event(name, &key); 717 718 if (!user) 719 return -ENOENT; 720 721 if (atomic_read(&user->refcnt) != 0) 722 return -EBUSY; 723 724 mutex_lock(&event_mutex); 725 ret = destroy_user_event(user); 726 mutex_unlock(&event_mutex); 727 728 return ret; 729 } 730 731 /* 732 * Validates the user payload and writes via iterator. 733 */ 734 static ssize_t user_events_write_core(struct file *file, struct iov_iter *i) 735 { 736 struct user_event_refs *refs; 737 struct user_event *user = NULL; 738 struct tracepoint *tp; 739 ssize_t ret = i->count; 740 int idx; 741 742 if (unlikely(copy_from_iter(&idx, sizeof(idx), i) != sizeof(idx))) 743 return -EFAULT; 744 745 rcu_read_lock_sched(); 746 747 refs = rcu_dereference_sched(file->private_data); 748 749 /* 750 * The refs->events array is protected by RCU, and new items may be 751 * added. But the user retrieved from indexing into the events array 752 * shall be immutable while the file is opened. 753 */ 754 if (likely(refs && idx < refs->count)) 755 user = refs->events[idx]; 756 757 rcu_read_unlock_sched(); 758 759 if (unlikely(user == NULL)) 760 return -ENOENT; 761 762 tp = &user->tracepoint; 763 764 /* 765 * It's possible key.enabled disables after this check, however 766 * we don't mind if a few events are included in this condition. 767 */ 768 if (likely(atomic_read(&tp->key.enabled) > 0)) { 769 struct tracepoint_func *probe_func_ptr; 770 user_event_func_t probe_func; 771 void *tpdata; 772 void *kdata; 773 u32 datalen; 774 775 kdata = kmalloc(i->count, GFP_KERNEL); 776 777 if (unlikely(!kdata)) 778 return -ENOMEM; 779 780 datalen = copy_from_iter(kdata, i->count, i); 781 782 rcu_read_lock_sched(); 783 784 probe_func_ptr = rcu_dereference_sched(tp->funcs); 785 786 if (probe_func_ptr) { 787 do { 788 probe_func = probe_func_ptr->func; 789 tpdata = probe_func_ptr->data; 790 probe_func(user, kdata, datalen, tpdata); 791 } while ((++probe_func_ptr)->func); 792 } 793 794 rcu_read_unlock_sched(); 795 796 kfree(kdata); 797 } 798 799 return ret; 800 } 801 802 static ssize_t user_events_write(struct file *file, const char __user *ubuf, 803 size_t count, loff_t *ppos) 804 { 805 struct iovec iov; 806 struct iov_iter i; 807 808 if (unlikely(*ppos != 0)) 809 return -EFAULT; 810 811 if (unlikely(import_single_range(READ, (char *)ubuf, count, &iov, &i))) 812 return -EFAULT; 813 814 return user_events_write_core(file, &i); 815 } 816 817 static ssize_t user_events_write_iter(struct kiocb *kp, struct iov_iter *i) 818 { 819 return user_events_write_core(kp->ki_filp, i); 820 } 821 822 static int user_events_ref_add(struct file *file, struct user_event *user) 823 { 824 struct user_event_refs *refs, *new_refs; 825 int i, size, count = 0; 826 827 refs = rcu_dereference_protected(file->private_data, 828 lockdep_is_held(®_mutex)); 829 830 if (refs) { 831 count = refs->count; 832 833 for (i = 0; i < count; ++i) 834 if (refs->events[i] == user) 835 return i; 836 } 837 838 size = struct_size(refs, events, count + 1); 839 840 new_refs = kzalloc(size, GFP_KERNEL); 841 842 if (!new_refs) 843 return -ENOMEM; 844 845 new_refs->count = count + 1; 846 847 for (i = 0; i < count; ++i) 848 new_refs->events[i] = refs->events[i]; 849 850 new_refs->events[i] = user; 851 852 atomic_inc(&user->refcnt); 853 854 rcu_assign_pointer(file->private_data, new_refs); 855 856 if (refs) 857 kfree_rcu(refs, rcu); 858 859 return i; 860 } 861 862 static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg) 863 { 864 u32 size; 865 long ret; 866 867 ret = get_user(size, &ureg->size); 868 869 if (ret) 870 return ret; 871 872 if (size > PAGE_SIZE) 873 return -E2BIG; 874 875 return copy_struct_from_user(kreg, sizeof(*kreg), ureg, size); 876 } 877 878 /* 879 * Registers a user_event on behalf of a user process. 880 */ 881 static long user_events_ioctl_reg(struct file *file, unsigned long uarg) 882 { 883 struct user_reg __user *ureg = (struct user_reg __user *)uarg; 884 struct user_reg reg; 885 struct user_event *user; 886 char *name; 887 long ret; 888 889 ret = user_reg_get(ureg, ®); 890 891 if (ret) 892 return ret; 893 894 name = strndup_user((const char __user *)(uintptr_t)reg.name_args, 895 MAX_EVENT_DESC); 896 897 if (IS_ERR(name)) { 898 ret = PTR_ERR(name); 899 return ret; 900 } 901 902 ret = user_event_parse_cmd(name, &user); 903 904 if (ret) { 905 kfree(name); 906 return ret; 907 } 908 909 ret = user_events_ref_add(file, user); 910 911 /* Positive number is index and valid */ 912 if (ret < 0) 913 return ret; 914 915 put_user((u32)ret, &ureg->write_index); 916 put_user(user->index, &ureg->status_index); 917 918 return 0; 919 } 920 921 /* 922 * Deletes a user_event on behalf of a user process. 923 */ 924 static long user_events_ioctl_del(struct file *file, unsigned long uarg) 925 { 926 void __user *ubuf = (void __user *)uarg; 927 char *name; 928 long ret; 929 930 name = strndup_user(ubuf, MAX_EVENT_DESC); 931 932 if (IS_ERR(name)) 933 return PTR_ERR(name); 934 935 ret = delete_user_event(name); 936 937 kfree(name); 938 939 return ret; 940 } 941 942 /* 943 * Handles the ioctl from user mode to register or alter operations. 944 */ 945 static long user_events_ioctl(struct file *file, unsigned int cmd, 946 unsigned long uarg) 947 { 948 long ret = -ENOTTY; 949 950 switch (cmd) { 951 case DIAG_IOCSREG: 952 mutex_lock(®_mutex); 953 ret = user_events_ioctl_reg(file, uarg); 954 mutex_unlock(®_mutex); 955 break; 956 957 case DIAG_IOCSDEL: 958 mutex_lock(®_mutex); 959 ret = user_events_ioctl_del(file, uarg); 960 mutex_unlock(®_mutex); 961 break; 962 } 963 964 return ret; 965 } 966 967 /* 968 * Handles the final close of the file from user mode. 969 */ 970 static int user_events_release(struct inode *node, struct file *file) 971 { 972 struct user_event_refs *refs; 973 struct user_event *user; 974 int i; 975 976 /* 977 * Ensure refs cannot change under any situation by taking the 978 * register mutex during the final freeing of the references. 979 */ 980 mutex_lock(®_mutex); 981 982 refs = file->private_data; 983 984 if (!refs) 985 goto out; 986 987 /* 988 * The lifetime of refs has reached an end, it's tied to this file. 989 * The underlying user_events are ref counted, and cannot be freed. 990 * After this decrement, the user_events may be freed elsewhere. 991 */ 992 for (i = 0; i < refs->count; ++i) { 993 user = refs->events[i]; 994 995 if (user) 996 atomic_dec(&user->refcnt); 997 } 998 out: 999 file->private_data = NULL; 1000 1001 mutex_unlock(®_mutex); 1002 1003 kfree(refs); 1004 1005 return 0; 1006 } 1007 1008 static const struct file_operations user_data_fops = { 1009 .write = user_events_write, 1010 .write_iter = user_events_write_iter, 1011 .unlocked_ioctl = user_events_ioctl, 1012 .release = user_events_release, 1013 }; 1014 1015 /* 1016 * Maps the shared page into the user process for checking if event is enabled. 1017 */ 1018 static int user_status_mmap(struct file *file, struct vm_area_struct *vma) 1019 { 1020 unsigned long size = vma->vm_end - vma->vm_start; 1021 1022 if (size != MAX_EVENTS) 1023 return -EINVAL; 1024 1025 return remap_pfn_range(vma, vma->vm_start, 1026 virt_to_phys(register_page_data) >> PAGE_SHIFT, 1027 size, vm_get_page_prot(VM_READ)); 1028 } 1029 1030 static void *user_seq_start(struct seq_file *m, loff_t *pos) 1031 { 1032 if (*pos) 1033 return NULL; 1034 1035 return (void *)1; 1036 } 1037 1038 static void *user_seq_next(struct seq_file *m, void *p, loff_t *pos) 1039 { 1040 ++*pos; 1041 return NULL; 1042 } 1043 1044 static void user_seq_stop(struct seq_file *m, void *p) 1045 { 1046 } 1047 1048 static int user_seq_show(struct seq_file *m, void *p) 1049 { 1050 struct user_event *user; 1051 char status; 1052 int i, active = 0, busy = 0, flags; 1053 1054 mutex_lock(®_mutex); 1055 1056 hash_for_each(register_table, i, user, node) { 1057 status = register_page_data[user->index]; 1058 flags = user->flags; 1059 1060 seq_printf(m, "%d:%s", user->index, EVENT_NAME(user)); 1061 1062 if (flags != 0 || status != 0) 1063 seq_puts(m, " #"); 1064 1065 if (status != 0) { 1066 seq_puts(m, " Used by"); 1067 if (status & EVENT_STATUS_FTRACE) 1068 seq_puts(m, " ftrace"); 1069 if (status & EVENT_STATUS_PERF) 1070 seq_puts(m, " perf"); 1071 if (status & EVENT_STATUS_OTHER) 1072 seq_puts(m, " other"); 1073 busy++; 1074 } 1075 1076 if (flags & FLAG_BPF_ITER) 1077 seq_puts(m, " FLAG:BPF_ITER"); 1078 1079 seq_puts(m, "\n"); 1080 active++; 1081 } 1082 1083 mutex_unlock(®_mutex); 1084 1085 seq_puts(m, "\n"); 1086 seq_printf(m, "Active: %d\n", active); 1087 seq_printf(m, "Busy: %d\n", busy); 1088 seq_printf(m, "Max: %ld\n", MAX_EVENTS); 1089 1090 return 0; 1091 } 1092 1093 static const struct seq_operations user_seq_ops = { 1094 .start = user_seq_start, 1095 .next = user_seq_next, 1096 .stop = user_seq_stop, 1097 .show = user_seq_show, 1098 }; 1099 1100 static int user_status_open(struct inode *node, struct file *file) 1101 { 1102 return seq_open(file, &user_seq_ops); 1103 } 1104 1105 static const struct file_operations user_status_fops = { 1106 .open = user_status_open, 1107 .mmap = user_status_mmap, 1108 .read = seq_read, 1109 .llseek = seq_lseek, 1110 .release = seq_release, 1111 }; 1112 1113 /* 1114 * Creates a set of tracefs files to allow user mode interactions. 1115 */ 1116 static int create_user_tracefs(void) 1117 { 1118 struct dentry *edata, *emmap; 1119 1120 edata = tracefs_create_file("user_events_data", TRACE_MODE_WRITE, 1121 NULL, NULL, &user_data_fops); 1122 1123 if (!edata) { 1124 pr_warn("Could not create tracefs 'user_events_data' entry\n"); 1125 goto err; 1126 } 1127 1128 /* mmap with MAP_SHARED requires writable fd */ 1129 emmap = tracefs_create_file("user_events_status", TRACE_MODE_WRITE, 1130 NULL, NULL, &user_status_fops); 1131 1132 if (!emmap) { 1133 tracefs_remove(edata); 1134 pr_warn("Could not create tracefs 'user_events_mmap' entry\n"); 1135 goto err; 1136 } 1137 1138 return 0; 1139 err: 1140 return -ENODEV; 1141 } 1142 1143 static void set_page_reservations(bool set) 1144 { 1145 int page; 1146 1147 for (page = 0; page < MAX_PAGES; ++page) { 1148 void *addr = register_page_data + (PAGE_SIZE * page); 1149 1150 if (set) 1151 SetPageReserved(virt_to_page(addr)); 1152 else 1153 ClearPageReserved(virt_to_page(addr)); 1154 } 1155 } 1156 1157 static int __init trace_events_user_init(void) 1158 { 1159 int ret; 1160 1161 /* Zero all bits beside 0 (which is reserved for failures) */ 1162 bitmap_zero(page_bitmap, MAX_EVENTS); 1163 set_bit(0, page_bitmap); 1164 1165 register_page_data = kzalloc(MAX_EVENTS, GFP_KERNEL); 1166 1167 if (!register_page_data) 1168 return -ENOMEM; 1169 1170 set_page_reservations(true); 1171 1172 ret = create_user_tracefs(); 1173 1174 if (ret) { 1175 pr_warn("user_events could not register with tracefs\n"); 1176 set_page_reservations(false); 1177 kfree(register_page_data); 1178 return ret; 1179 } 1180 1181 if (dyn_event_register(&user_event_dops)) 1182 pr_warn("user_events could not register with dyn_events\n"); 1183 1184 return 0; 1185 } 1186 1187 fs_initcall(trace_events_user_init); 1188