1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2021, Microsoft Corporation.
4  *
5  * Authors:
6  *   Beau Belgrave <beaub@linux.microsoft.com>
7  */
8 
9 #include <linux/bitmap.h>
10 #include <linux/cdev.h>
11 #include <linux/hashtable.h>
12 #include <linux/list.h>
13 #include <linux/io.h>
14 #include <linux/uio.h>
15 #include <linux/ioctl.h>
16 #include <linux/jhash.h>
17 #include <linux/trace_events.h>
18 #include <linux/tracefs.h>
19 #include <linux/types.h>
20 #include <linux/uaccess.h>
21 #include <uapi/linux/user_events.h>
22 #include "trace.h"
23 #include "trace_dynevent.h"
24 
25 #define USER_EVENTS_PREFIX_LEN (sizeof(USER_EVENTS_PREFIX)-1)
26 
27 #define FIELD_DEPTH_TYPE 0
28 #define FIELD_DEPTH_NAME 1
29 #define FIELD_DEPTH_SIZE 2
30 
31 /*
32  * Limits how many trace_event calls user processes can create:
33  * Must be a power of two of PAGE_SIZE.
34  */
35 #define MAX_PAGE_ORDER 0
36 #define MAX_PAGES (1 << MAX_PAGE_ORDER)
37 #define MAX_EVENTS (MAX_PAGES * PAGE_SIZE)
38 
39 /* Limit how long of an event name plus args within the subsystem. */
40 #define MAX_EVENT_DESC 512
41 #define EVENT_NAME(user_event) ((user_event)->tracepoint.name)
42 #define MAX_FIELD_ARRAY_SIZE 1024
43 #define MAX_FIELD_ARG_NAME 256
44 
45 #define MAX_BPF_COPY_SIZE PAGE_SIZE
46 #define MAX_STACK_BPF_DATA 512
47 
48 static char *register_page_data;
49 
50 static DEFINE_MUTEX(reg_mutex);
51 static DEFINE_HASHTABLE(register_table, 4);
52 static DECLARE_BITMAP(page_bitmap, MAX_EVENTS);
53 
54 /*
55  * Stores per-event properties, as users register events
56  * within a file a user_event might be created if it does not
57  * already exist. These are globally used and their lifetime
58  * is tied to the refcnt member. These cannot go away until the
59  * refcnt reaches zero.
60  */
61 struct user_event {
62 	struct tracepoint tracepoint;
63 	struct trace_event_call call;
64 	struct trace_event_class class;
65 	struct dyn_event devent;
66 	struct hlist_node node;
67 	struct list_head fields;
68 	struct list_head validators;
69 	atomic_t refcnt;
70 	int index;
71 	int flags;
72 	int min_size;
73 };
74 
75 /*
76  * Stores per-file events references, as users register events
77  * within a file this structure is modified and freed via RCU.
78  * The lifetime of this struct is tied to the lifetime of the file.
79  * These are not shared and only accessible by the file that created it.
80  */
81 struct user_event_refs {
82 	struct rcu_head rcu;
83 	int count;
84 	struct user_event *events[];
85 };
86 
87 #define VALIDATOR_ENSURE_NULL (1 << 0)
88 #define VALIDATOR_REL (1 << 1)
89 
90 struct user_event_validator {
91 	struct list_head link;
92 	int offset;
93 	int flags;
94 };
95 
96 typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
97 				   void *tpdata, bool *faulted);
98 
99 static int user_event_parse(char *name, char *args, char *flags,
100 			    struct user_event **newuser);
101 
102 static u32 user_event_key(char *name)
103 {
104 	return jhash(name, strlen(name), 0);
105 }
106 
107 static __always_inline __must_check
108 size_t copy_nofault(void *addr, size_t bytes, struct iov_iter *i)
109 {
110 	size_t ret;
111 
112 	pagefault_disable();
113 
114 	ret = copy_from_iter_nocache(addr, bytes, i);
115 
116 	pagefault_enable();
117 
118 	return ret;
119 }
120 
121 static struct list_head *user_event_get_fields(struct trace_event_call *call)
122 {
123 	struct user_event *user = (struct user_event *)call->data;
124 
125 	return &user->fields;
126 }
127 
128 /*
129  * Parses a register command for user_events
130  * Format: event_name[:FLAG1[,FLAG2...]] [field1[;field2...]]
131  *
132  * Example event named 'test' with a 20 char 'msg' field with an unsigned int
133  * 'id' field after:
134  * test char[20] msg;unsigned int id
135  *
136  * NOTE: Offsets are from the user data perspective, they are not from the
137  * trace_entry/buffer perspective. We automatically add the common properties
138  * sizes to the offset for the user.
139  *
140  * Upon success user_event has its ref count increased by 1.
141  */
142 static int user_event_parse_cmd(char *raw_command, struct user_event **newuser)
143 {
144 	char *name = raw_command;
145 	char *args = strpbrk(name, " ");
146 	char *flags;
147 
148 	if (args)
149 		*args++ = '\0';
150 
151 	flags = strpbrk(name, ":");
152 
153 	if (flags)
154 		*flags++ = '\0';
155 
156 	return user_event_parse(name, args, flags, newuser);
157 }
158 
159 static int user_field_array_size(const char *type)
160 {
161 	const char *start = strchr(type, '[');
162 	char val[8];
163 	char *bracket;
164 	int size = 0;
165 
166 	if (start == NULL)
167 		return -EINVAL;
168 
169 	if (strscpy(val, start + 1, sizeof(val)) <= 0)
170 		return -EINVAL;
171 
172 	bracket = strchr(val, ']');
173 
174 	if (!bracket)
175 		return -EINVAL;
176 
177 	*bracket = '\0';
178 
179 	if (kstrtouint(val, 0, &size))
180 		return -EINVAL;
181 
182 	if (size > MAX_FIELD_ARRAY_SIZE)
183 		return -EINVAL;
184 
185 	return size;
186 }
187 
188 static int user_field_size(const char *type)
189 {
190 	/* long is not allowed from a user, since it's ambigious in size */
191 	if (strcmp(type, "s64") == 0)
192 		return sizeof(s64);
193 	if (strcmp(type, "u64") == 0)
194 		return sizeof(u64);
195 	if (strcmp(type, "s32") == 0)
196 		return sizeof(s32);
197 	if (strcmp(type, "u32") == 0)
198 		return sizeof(u32);
199 	if (strcmp(type, "int") == 0)
200 		return sizeof(int);
201 	if (strcmp(type, "unsigned int") == 0)
202 		return sizeof(unsigned int);
203 	if (strcmp(type, "s16") == 0)
204 		return sizeof(s16);
205 	if (strcmp(type, "u16") == 0)
206 		return sizeof(u16);
207 	if (strcmp(type, "short") == 0)
208 		return sizeof(short);
209 	if (strcmp(type, "unsigned short") == 0)
210 		return sizeof(unsigned short);
211 	if (strcmp(type, "s8") == 0)
212 		return sizeof(s8);
213 	if (strcmp(type, "u8") == 0)
214 		return sizeof(u8);
215 	if (strcmp(type, "char") == 0)
216 		return sizeof(char);
217 	if (strcmp(type, "unsigned char") == 0)
218 		return sizeof(unsigned char);
219 	if (str_has_prefix(type, "char["))
220 		return user_field_array_size(type);
221 	if (str_has_prefix(type, "unsigned char["))
222 		return user_field_array_size(type);
223 	if (str_has_prefix(type, "__data_loc "))
224 		return sizeof(u32);
225 	if (str_has_prefix(type, "__rel_loc "))
226 		return sizeof(u32);
227 
228 	/* Uknown basic type, error */
229 	return -EINVAL;
230 }
231 
232 static void user_event_destroy_validators(struct user_event *user)
233 {
234 	struct user_event_validator *validator, *next;
235 	struct list_head *head = &user->validators;
236 
237 	list_for_each_entry_safe(validator, next, head, link) {
238 		list_del(&validator->link);
239 		kfree(validator);
240 	}
241 }
242 
243 static void user_event_destroy_fields(struct user_event *user)
244 {
245 	struct ftrace_event_field *field, *next;
246 	struct list_head *head = &user->fields;
247 
248 	list_for_each_entry_safe(field, next, head, link) {
249 		list_del(&field->link);
250 		kfree(field);
251 	}
252 }
253 
254 static int user_event_add_field(struct user_event *user, const char *type,
255 				const char *name, int offset, int size,
256 				int is_signed, int filter_type)
257 {
258 	struct user_event_validator *validator;
259 	struct ftrace_event_field *field;
260 	int validator_flags = 0;
261 
262 	field = kmalloc(sizeof(*field), GFP_KERNEL);
263 
264 	if (!field)
265 		return -ENOMEM;
266 
267 	if (str_has_prefix(type, "__data_loc "))
268 		goto add_validator;
269 
270 	if (str_has_prefix(type, "__rel_loc ")) {
271 		validator_flags |= VALIDATOR_REL;
272 		goto add_validator;
273 	}
274 
275 	goto add_field;
276 
277 add_validator:
278 	if (strstr(type, "char") != 0)
279 		validator_flags |= VALIDATOR_ENSURE_NULL;
280 
281 	validator = kmalloc(sizeof(*validator), GFP_KERNEL);
282 
283 	if (!validator) {
284 		kfree(field);
285 		return -ENOMEM;
286 	}
287 
288 	validator->flags = validator_flags;
289 	validator->offset = offset;
290 
291 	/* Want sequential access when validating */
292 	list_add_tail(&validator->link, &user->validators);
293 
294 add_field:
295 	field->type = type;
296 	field->name = name;
297 	field->offset = offset;
298 	field->size = size;
299 	field->is_signed = is_signed;
300 	field->filter_type = filter_type;
301 
302 	list_add(&field->link, &user->fields);
303 
304 	/*
305 	 * Min size from user writes that are required, this does not include
306 	 * the size of trace_entry (common fields).
307 	 */
308 	user->min_size = (offset + size) - sizeof(struct trace_entry);
309 
310 	return 0;
311 }
312 
313 /*
314  * Parses the values of a field within the description
315  * Format: type name [size]
316  */
317 static int user_event_parse_field(char *field, struct user_event *user,
318 				  u32 *offset)
319 {
320 	char *part, *type, *name;
321 	u32 depth = 0, saved_offset = *offset;
322 	int len, size = -EINVAL;
323 	bool is_struct = false;
324 
325 	field = skip_spaces(field);
326 
327 	if (*field == '\0')
328 		return 0;
329 
330 	/* Handle types that have a space within */
331 	len = str_has_prefix(field, "unsigned ");
332 	if (len)
333 		goto skip_next;
334 
335 	len = str_has_prefix(field, "struct ");
336 	if (len) {
337 		is_struct = true;
338 		goto skip_next;
339 	}
340 
341 	len = str_has_prefix(field, "__data_loc unsigned ");
342 	if (len)
343 		goto skip_next;
344 
345 	len = str_has_prefix(field, "__data_loc ");
346 	if (len)
347 		goto skip_next;
348 
349 	len = str_has_prefix(field, "__rel_loc unsigned ");
350 	if (len)
351 		goto skip_next;
352 
353 	len = str_has_prefix(field, "__rel_loc ");
354 	if (len)
355 		goto skip_next;
356 
357 	goto parse;
358 skip_next:
359 	type = field;
360 	field = strpbrk(field + len, " ");
361 
362 	if (field == NULL)
363 		return -EINVAL;
364 
365 	*field++ = '\0';
366 	depth++;
367 parse:
368 	name = NULL;
369 
370 	while ((part = strsep(&field, " ")) != NULL) {
371 		switch (depth++) {
372 		case FIELD_DEPTH_TYPE:
373 			type = part;
374 			break;
375 		case FIELD_DEPTH_NAME:
376 			name = part;
377 			break;
378 		case FIELD_DEPTH_SIZE:
379 			if (!is_struct)
380 				return -EINVAL;
381 
382 			if (kstrtou32(part, 10, &size))
383 				return -EINVAL;
384 			break;
385 		default:
386 			return -EINVAL;
387 		}
388 	}
389 
390 	if (depth < FIELD_DEPTH_SIZE || !name)
391 		return -EINVAL;
392 
393 	if (depth == FIELD_DEPTH_SIZE)
394 		size = user_field_size(type);
395 
396 	if (size == 0)
397 		return -EINVAL;
398 
399 	if (size < 0)
400 		return size;
401 
402 	*offset = saved_offset + size;
403 
404 	return user_event_add_field(user, type, name, saved_offset, size,
405 				    type[0] != 'u', FILTER_OTHER);
406 }
407 
408 static void user_event_parse_flags(struct user_event *user, char *flags)
409 {
410 	char *flag;
411 
412 	if (flags == NULL)
413 		return;
414 
415 	while ((flag = strsep(&flags, ",")) != NULL) {
416 		if (strcmp(flag, "BPF_ITER") == 0)
417 			user->flags |= FLAG_BPF_ITER;
418 	}
419 }
420 
421 static int user_event_parse_fields(struct user_event *user, char *args)
422 {
423 	char *field;
424 	u32 offset = sizeof(struct trace_entry);
425 	int ret = -EINVAL;
426 
427 	if (args == NULL)
428 		return 0;
429 
430 	while ((field = strsep(&args, ";")) != NULL) {
431 		ret = user_event_parse_field(field, user, &offset);
432 
433 		if (ret)
434 			break;
435 	}
436 
437 	return ret;
438 }
439 
440 static struct trace_event_fields user_event_fields_array[1];
441 
442 static const char *user_field_format(const char *type)
443 {
444 	if (strcmp(type, "s64") == 0)
445 		return "%lld";
446 	if (strcmp(type, "u64") == 0)
447 		return "%llu";
448 	if (strcmp(type, "s32") == 0)
449 		return "%d";
450 	if (strcmp(type, "u32") == 0)
451 		return "%u";
452 	if (strcmp(type, "int") == 0)
453 		return "%d";
454 	if (strcmp(type, "unsigned int") == 0)
455 		return "%u";
456 	if (strcmp(type, "s16") == 0)
457 		return "%d";
458 	if (strcmp(type, "u16") == 0)
459 		return "%u";
460 	if (strcmp(type, "short") == 0)
461 		return "%d";
462 	if (strcmp(type, "unsigned short") == 0)
463 		return "%u";
464 	if (strcmp(type, "s8") == 0)
465 		return "%d";
466 	if (strcmp(type, "u8") == 0)
467 		return "%u";
468 	if (strcmp(type, "char") == 0)
469 		return "%d";
470 	if (strcmp(type, "unsigned char") == 0)
471 		return "%u";
472 	if (strstr(type, "char[") != 0)
473 		return "%s";
474 
475 	/* Unknown, likely struct, allowed treat as 64-bit */
476 	return "%llu";
477 }
478 
479 static bool user_field_is_dyn_string(const char *type, const char **str_func)
480 {
481 	if (str_has_prefix(type, "__data_loc ")) {
482 		*str_func = "__get_str";
483 		goto check;
484 	}
485 
486 	if (str_has_prefix(type, "__rel_loc ")) {
487 		*str_func = "__get_rel_str";
488 		goto check;
489 	}
490 
491 	return false;
492 check:
493 	return strstr(type, "char") != 0;
494 }
495 
496 #define LEN_OR_ZERO (len ? len - pos : 0)
497 static int user_event_set_print_fmt(struct user_event *user, char *buf, int len)
498 {
499 	struct ftrace_event_field *field, *next;
500 	struct list_head *head = &user->fields;
501 	int pos = 0, depth = 0;
502 	const char *str_func;
503 
504 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
505 
506 	list_for_each_entry_safe_reverse(field, next, head, link) {
507 		if (depth != 0)
508 			pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
509 
510 		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s",
511 				field->name, user_field_format(field->type));
512 
513 		depth++;
514 	}
515 
516 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
517 
518 	list_for_each_entry_safe_reverse(field, next, head, link) {
519 		if (user_field_is_dyn_string(field->type, &str_func))
520 			pos += snprintf(buf + pos, LEN_OR_ZERO,
521 					", %s(%s)", str_func, field->name);
522 		else
523 			pos += snprintf(buf + pos, LEN_OR_ZERO,
524 					", REC->%s", field->name);
525 	}
526 
527 	return pos + 1;
528 }
529 #undef LEN_OR_ZERO
530 
531 static int user_event_create_print_fmt(struct user_event *user)
532 {
533 	char *print_fmt;
534 	int len;
535 
536 	len = user_event_set_print_fmt(user, NULL, 0);
537 
538 	print_fmt = kmalloc(len, GFP_KERNEL);
539 
540 	if (!print_fmt)
541 		return -ENOMEM;
542 
543 	user_event_set_print_fmt(user, print_fmt, len);
544 
545 	user->call.print_fmt = print_fmt;
546 
547 	return 0;
548 }
549 
550 static enum print_line_t user_event_print_trace(struct trace_iterator *iter,
551 						int flags,
552 						struct trace_event *event)
553 {
554 	/* Unsafe to try to decode user provided print_fmt, use hex */
555 	trace_print_hex_dump_seq(&iter->seq, "", DUMP_PREFIX_OFFSET, 16,
556 				 1, iter->ent, iter->ent_size, true);
557 
558 	return trace_handle_return(&iter->seq);
559 }
560 
561 static struct trace_event_functions user_event_funcs = {
562 	.trace = user_event_print_trace,
563 };
564 
565 static int user_event_set_call_visible(struct user_event *user, bool visible)
566 {
567 	int ret;
568 	const struct cred *old_cred;
569 	struct cred *cred;
570 
571 	cred = prepare_creds();
572 
573 	if (!cred)
574 		return -ENOMEM;
575 
576 	/*
577 	 * While by default tracefs is locked down, systems can be configured
578 	 * to allow user_event files to be less locked down. The extreme case
579 	 * being "other" has read/write access to user_events_data/status.
580 	 *
581 	 * When not locked down, processes may not have have permissions to
582 	 * add/remove calls themselves to tracefs. We need to temporarily
583 	 * switch to root file permission to allow for this scenario.
584 	 */
585 	cred->fsuid = GLOBAL_ROOT_UID;
586 
587 	old_cred = override_creds(cred);
588 
589 	if (visible)
590 		ret = trace_add_event_call(&user->call);
591 	else
592 		ret = trace_remove_event_call(&user->call);
593 
594 	revert_creds(old_cred);
595 	put_cred(cred);
596 
597 	return ret;
598 }
599 
600 static int destroy_user_event(struct user_event *user)
601 {
602 	int ret = 0;
603 
604 	/* Must destroy fields before call removal */
605 	user_event_destroy_fields(user);
606 
607 	ret = user_event_set_call_visible(user, false);
608 
609 	if (ret)
610 		return ret;
611 
612 	dyn_event_remove(&user->devent);
613 
614 	register_page_data[user->index] = 0;
615 	clear_bit(user->index, page_bitmap);
616 	hash_del(&user->node);
617 
618 	user_event_destroy_validators(user);
619 	kfree(user->call.print_fmt);
620 	kfree(EVENT_NAME(user));
621 	kfree(user);
622 
623 	return ret;
624 }
625 
626 static struct user_event *find_user_event(char *name, u32 *outkey)
627 {
628 	struct user_event *user;
629 	u32 key = user_event_key(name);
630 
631 	*outkey = key;
632 
633 	hash_for_each_possible(register_table, user, node, key)
634 		if (!strcmp(EVENT_NAME(user), name)) {
635 			atomic_inc(&user->refcnt);
636 			return user;
637 		}
638 
639 	return NULL;
640 }
641 
642 static int user_event_validate(struct user_event *user, void *data, int len)
643 {
644 	struct list_head *head = &user->validators;
645 	struct user_event_validator *validator;
646 	void *pos, *end = data + len;
647 	u32 loc, offset, size;
648 
649 	list_for_each_entry(validator, head, link) {
650 		pos = data + validator->offset;
651 
652 		/* Already done min_size check, no bounds check here */
653 		loc = *(u32 *)pos;
654 		offset = loc & 0xffff;
655 		size = loc >> 16;
656 
657 		if (likely(validator->flags & VALIDATOR_REL))
658 			pos += offset + sizeof(loc);
659 		else
660 			pos = data + offset;
661 
662 		pos += size;
663 
664 		if (unlikely(pos > end))
665 			return -EFAULT;
666 
667 		if (likely(validator->flags & VALIDATOR_ENSURE_NULL))
668 			if (unlikely(*(char *)(pos - 1) != '\0'))
669 				return -EFAULT;
670 	}
671 
672 	return 0;
673 }
674 
675 /*
676  * Writes the user supplied payload out to a trace file.
677  */
678 static void user_event_ftrace(struct user_event *user, struct iov_iter *i,
679 			      void *tpdata, bool *faulted)
680 {
681 	struct trace_event_file *file;
682 	struct trace_entry *entry;
683 	struct trace_event_buffer event_buffer;
684 	size_t size = sizeof(*entry) + i->count;
685 
686 	file = (struct trace_event_file *)tpdata;
687 
688 	if (!file ||
689 	    !(file->flags & EVENT_FILE_FL_ENABLED) ||
690 	    trace_trigger_soft_disabled(file))
691 		return;
692 
693 	/* Allocates and fills trace_entry, + 1 of this is data payload */
694 	entry = trace_event_buffer_reserve(&event_buffer, file, size);
695 
696 	if (unlikely(!entry))
697 		return;
698 
699 	if (unlikely(!copy_nofault(entry + 1, i->count, i)))
700 		goto discard;
701 
702 	if (!list_empty(&user->validators) &&
703 	    unlikely(user_event_validate(user, entry, size)))
704 		goto discard;
705 
706 	trace_event_buffer_commit(&event_buffer);
707 
708 	return;
709 discard:
710 	*faulted = true;
711 	__trace_event_discard_commit(event_buffer.buffer,
712 				     event_buffer.event);
713 }
714 
715 #ifdef CONFIG_PERF_EVENTS
716 static void user_event_bpf(struct user_event *user, struct iov_iter *i)
717 {
718 	struct user_bpf_context context;
719 	struct user_bpf_iter bpf_i;
720 	char fast_data[MAX_STACK_BPF_DATA];
721 	void *temp = NULL;
722 
723 	if ((user->flags & FLAG_BPF_ITER) && iter_is_iovec(i)) {
724 		/* Raw iterator */
725 		context.data_type = USER_BPF_DATA_ITER;
726 		context.data_len = i->count;
727 		context.iter = &bpf_i;
728 
729 		bpf_i.iov_offset = i->iov_offset;
730 		bpf_i.iov = i->iov;
731 		bpf_i.nr_segs = i->nr_segs;
732 	} else if (i->nr_segs == 1 && iter_is_iovec(i)) {
733 		/* Single buffer from user */
734 		context.data_type = USER_BPF_DATA_USER;
735 		context.data_len = i->count;
736 		context.udata = i->iov->iov_base + i->iov_offset;
737 	} else {
738 		/* Multi buffer from user */
739 		struct iov_iter copy = *i;
740 		size_t copy_size = min_t(size_t, i->count, MAX_BPF_COPY_SIZE);
741 
742 		context.data_type = USER_BPF_DATA_KERNEL;
743 		context.kdata = fast_data;
744 
745 		if (unlikely(copy_size > sizeof(fast_data))) {
746 			temp = kmalloc(copy_size, GFP_NOWAIT);
747 
748 			if (temp)
749 				context.kdata = temp;
750 			else
751 				copy_size = sizeof(fast_data);
752 		}
753 
754 		context.data_len = copy_nofault(context.kdata,
755 						copy_size, &copy);
756 	}
757 
758 	trace_call_bpf(&user->call, &context);
759 
760 	kfree(temp);
761 }
762 
763 /*
764  * Writes the user supplied payload out to perf ring buffer or eBPF program.
765  */
766 static void user_event_perf(struct user_event *user, struct iov_iter *i,
767 			    void *tpdata, bool *faulted)
768 {
769 	struct hlist_head *perf_head;
770 
771 	if (bpf_prog_array_valid(&user->call))
772 		user_event_bpf(user, i);
773 
774 	perf_head = this_cpu_ptr(user->call.perf_events);
775 
776 	if (perf_head && !hlist_empty(perf_head)) {
777 		struct trace_entry *perf_entry;
778 		struct pt_regs *regs;
779 		size_t size = sizeof(*perf_entry) + i->count;
780 		int context;
781 
782 		perf_entry = perf_trace_buf_alloc(ALIGN(size, 8),
783 						  &regs, &context);
784 
785 		if (unlikely(!perf_entry))
786 			return;
787 
788 		perf_fetch_caller_regs(regs);
789 
790 		if (unlikely(!copy_nofault(perf_entry + 1, i->count, i)))
791 			goto discard;
792 
793 		if (!list_empty(&user->validators) &&
794 		    unlikely(user_event_validate(user, perf_entry, size)))
795 			goto discard;
796 
797 		perf_trace_buf_submit(perf_entry, size, context,
798 				      user->call.event.type, 1, regs,
799 				      perf_head, NULL);
800 
801 		return;
802 discard:
803 		*faulted = true;
804 		perf_swevent_put_recursion_context(context);
805 	}
806 }
807 #endif
808 
809 /*
810  * Update the register page that is shared between user processes.
811  */
812 static void update_reg_page_for(struct user_event *user)
813 {
814 	struct tracepoint *tp = &user->tracepoint;
815 	char status = 0;
816 
817 	if (atomic_read(&tp->key.enabled) > 0) {
818 		struct tracepoint_func *probe_func_ptr;
819 		user_event_func_t probe_func;
820 
821 		rcu_read_lock_sched();
822 
823 		probe_func_ptr = rcu_dereference_sched(tp->funcs);
824 
825 		if (probe_func_ptr) {
826 			do {
827 				probe_func = probe_func_ptr->func;
828 
829 				if (probe_func == user_event_ftrace)
830 					status |= EVENT_STATUS_FTRACE;
831 #ifdef CONFIG_PERF_EVENTS
832 				else if (probe_func == user_event_perf)
833 					status |= EVENT_STATUS_PERF;
834 #endif
835 				else
836 					status |= EVENT_STATUS_OTHER;
837 			} while ((++probe_func_ptr)->func);
838 		}
839 
840 		rcu_read_unlock_sched();
841 	}
842 
843 	register_page_data[user->index] = status;
844 }
845 
846 /*
847  * Register callback for our events from tracing sub-systems.
848  */
849 static int user_event_reg(struct trace_event_call *call,
850 			  enum trace_reg type,
851 			  void *data)
852 {
853 	struct user_event *user = (struct user_event *)call->data;
854 	int ret = 0;
855 
856 	if (!user)
857 		return -ENOENT;
858 
859 	switch (type) {
860 	case TRACE_REG_REGISTER:
861 		ret = tracepoint_probe_register(call->tp,
862 						call->class->probe,
863 						data);
864 		if (!ret)
865 			goto inc;
866 		break;
867 
868 	case TRACE_REG_UNREGISTER:
869 		tracepoint_probe_unregister(call->tp,
870 					    call->class->probe,
871 					    data);
872 		goto dec;
873 
874 #ifdef CONFIG_PERF_EVENTS
875 	case TRACE_REG_PERF_REGISTER:
876 		ret = tracepoint_probe_register(call->tp,
877 						call->class->perf_probe,
878 						data);
879 		if (!ret)
880 			goto inc;
881 		break;
882 
883 	case TRACE_REG_PERF_UNREGISTER:
884 		tracepoint_probe_unregister(call->tp,
885 					    call->class->perf_probe,
886 					    data);
887 		goto dec;
888 
889 	case TRACE_REG_PERF_OPEN:
890 	case TRACE_REG_PERF_CLOSE:
891 	case TRACE_REG_PERF_ADD:
892 	case TRACE_REG_PERF_DEL:
893 		break;
894 #endif
895 	}
896 
897 	return ret;
898 inc:
899 	atomic_inc(&user->refcnt);
900 	update_reg_page_for(user);
901 	return 0;
902 dec:
903 	update_reg_page_for(user);
904 	atomic_dec(&user->refcnt);
905 	return 0;
906 }
907 
908 static int user_event_create(const char *raw_command)
909 {
910 	struct user_event *user;
911 	char *name;
912 	int ret;
913 
914 	if (!str_has_prefix(raw_command, USER_EVENTS_PREFIX))
915 		return -ECANCELED;
916 
917 	raw_command += USER_EVENTS_PREFIX_LEN;
918 	raw_command = skip_spaces(raw_command);
919 
920 	name = kstrdup(raw_command, GFP_KERNEL);
921 
922 	if (!name)
923 		return -ENOMEM;
924 
925 	mutex_lock(&reg_mutex);
926 
927 	ret = user_event_parse_cmd(name, &user);
928 
929 	if (!ret)
930 		atomic_dec(&user->refcnt);
931 
932 	mutex_unlock(&reg_mutex);
933 
934 	if (ret)
935 		kfree(name);
936 
937 	return ret;
938 }
939 
940 static int user_event_show(struct seq_file *m, struct dyn_event *ev)
941 {
942 	struct user_event *user = container_of(ev, struct user_event, devent);
943 	struct ftrace_event_field *field, *next;
944 	struct list_head *head;
945 	int depth = 0;
946 
947 	seq_printf(m, "%s%s", USER_EVENTS_PREFIX, EVENT_NAME(user));
948 
949 	head = trace_get_fields(&user->call);
950 
951 	list_for_each_entry_safe_reverse(field, next, head, link) {
952 		if (depth == 0)
953 			seq_puts(m, " ");
954 		else
955 			seq_puts(m, "; ");
956 
957 		seq_printf(m, "%s %s", field->type, field->name);
958 
959 		if (str_has_prefix(field->type, "struct "))
960 			seq_printf(m, " %d", field->size);
961 
962 		depth++;
963 	}
964 
965 	seq_puts(m, "\n");
966 
967 	return 0;
968 }
969 
970 static bool user_event_is_busy(struct dyn_event *ev)
971 {
972 	struct user_event *user = container_of(ev, struct user_event, devent);
973 
974 	return atomic_read(&user->refcnt) != 0;
975 }
976 
977 static int user_event_free(struct dyn_event *ev)
978 {
979 	struct user_event *user = container_of(ev, struct user_event, devent);
980 
981 	if (atomic_read(&user->refcnt) != 0)
982 		return -EBUSY;
983 
984 	return destroy_user_event(user);
985 }
986 
987 static bool user_field_match(struct ftrace_event_field *field, int argc,
988 			     const char **argv, int *iout)
989 {
990 	char *field_name, *arg_name;
991 	int len, pos, i = *iout;
992 	bool colon = false, match = false;
993 
994 	if (i >= argc)
995 		return false;
996 
997 	len = MAX_FIELD_ARG_NAME;
998 	field_name = kmalloc(len, GFP_KERNEL);
999 	arg_name = kmalloc(len, GFP_KERNEL);
1000 
1001 	if (!arg_name || !field_name)
1002 		goto out;
1003 
1004 	pos = 0;
1005 
1006 	for (; i < argc; ++i) {
1007 		if (i != *iout)
1008 			pos += snprintf(arg_name + pos, len - pos, " ");
1009 
1010 		pos += snprintf(arg_name + pos, len - pos, argv[i]);
1011 
1012 		if (strchr(argv[i], ';')) {
1013 			++i;
1014 			colon = true;
1015 			break;
1016 		}
1017 	}
1018 
1019 	pos = 0;
1020 
1021 	pos += snprintf(field_name + pos, len - pos, field->type);
1022 	pos += snprintf(field_name + pos, len - pos, " ");
1023 	pos += snprintf(field_name + pos, len - pos, field->name);
1024 
1025 	if (colon)
1026 		pos += snprintf(field_name + pos, len - pos, ";");
1027 
1028 	*iout = i;
1029 
1030 	match = strcmp(arg_name, field_name) == 0;
1031 out:
1032 	kfree(arg_name);
1033 	kfree(field_name);
1034 
1035 	return match;
1036 }
1037 
1038 static bool user_fields_match(struct user_event *user, int argc,
1039 			      const char **argv)
1040 {
1041 	struct ftrace_event_field *field, *next;
1042 	struct list_head *head = &user->fields;
1043 	int i = 0;
1044 
1045 	list_for_each_entry_safe_reverse(field, next, head, link)
1046 		if (!user_field_match(field, argc, argv, &i))
1047 			return false;
1048 
1049 	if (i != argc)
1050 		return false;
1051 
1052 	return true;
1053 }
1054 
1055 static bool user_event_match(const char *system, const char *event,
1056 			     int argc, const char **argv, struct dyn_event *ev)
1057 {
1058 	struct user_event *user = container_of(ev, struct user_event, devent);
1059 	bool match;
1060 
1061 	match = strcmp(EVENT_NAME(user), event) == 0 &&
1062 		(!system || strcmp(system, USER_EVENTS_SYSTEM) == 0);
1063 
1064 	if (match && argc > 0)
1065 		match = user_fields_match(user, argc, argv);
1066 
1067 	return match;
1068 }
1069 
1070 static struct dyn_event_operations user_event_dops = {
1071 	.create = user_event_create,
1072 	.show = user_event_show,
1073 	.is_busy = user_event_is_busy,
1074 	.free = user_event_free,
1075 	.match = user_event_match,
1076 };
1077 
1078 static int user_event_trace_register(struct user_event *user)
1079 {
1080 	int ret;
1081 
1082 	ret = register_trace_event(&user->call.event);
1083 
1084 	if (!ret)
1085 		return -ENODEV;
1086 
1087 	ret = user_event_set_call_visible(user, true);
1088 
1089 	if (ret)
1090 		unregister_trace_event(&user->call.event);
1091 
1092 	return ret;
1093 }
1094 
1095 /*
1096  * Parses the event name, arguments and flags then registers if successful.
1097  * The name buffer lifetime is owned by this method for success cases only.
1098  * Upon success the returned user_event has its ref count increased by 1.
1099  */
1100 static int user_event_parse(char *name, char *args, char *flags,
1101 			    struct user_event **newuser)
1102 {
1103 	int ret;
1104 	int index;
1105 	u32 key;
1106 	struct user_event *user;
1107 
1108 	/* Prevent dyn_event from racing */
1109 	mutex_lock(&event_mutex);
1110 	user = find_user_event(name, &key);
1111 	mutex_unlock(&event_mutex);
1112 
1113 	if (user) {
1114 		*newuser = user;
1115 		/*
1116 		 * Name is allocated by caller, free it since it already exists.
1117 		 * Caller only worries about failure cases for freeing.
1118 		 */
1119 		kfree(name);
1120 		return 0;
1121 	}
1122 
1123 	index = find_first_zero_bit(page_bitmap, MAX_EVENTS);
1124 
1125 	if (index == MAX_EVENTS)
1126 		return -EMFILE;
1127 
1128 	user = kzalloc(sizeof(*user), GFP_KERNEL);
1129 
1130 	if (!user)
1131 		return -ENOMEM;
1132 
1133 	INIT_LIST_HEAD(&user->class.fields);
1134 	INIT_LIST_HEAD(&user->fields);
1135 	INIT_LIST_HEAD(&user->validators);
1136 
1137 	user->tracepoint.name = name;
1138 
1139 	user_event_parse_flags(user, flags);
1140 
1141 	ret = user_event_parse_fields(user, args);
1142 
1143 	if (ret)
1144 		goto put_user;
1145 
1146 	ret = user_event_create_print_fmt(user);
1147 
1148 	if (ret)
1149 		goto put_user;
1150 
1151 	user->call.data = user;
1152 	user->call.class = &user->class;
1153 	user->call.name = name;
1154 	user->call.flags = TRACE_EVENT_FL_TRACEPOINT;
1155 	user->call.tp = &user->tracepoint;
1156 	user->call.event.funcs = &user_event_funcs;
1157 
1158 	user->class.system = USER_EVENTS_SYSTEM;
1159 	user->class.fields_array = user_event_fields_array;
1160 	user->class.get_fields = user_event_get_fields;
1161 	user->class.reg = user_event_reg;
1162 	user->class.probe = user_event_ftrace;
1163 #ifdef CONFIG_PERF_EVENTS
1164 	user->class.perf_probe = user_event_perf;
1165 #endif
1166 
1167 	mutex_lock(&event_mutex);
1168 	ret = user_event_trace_register(user);
1169 	mutex_unlock(&event_mutex);
1170 
1171 	if (ret)
1172 		goto put_user;
1173 
1174 	user->index = index;
1175 
1176 	/* Ensure we track ref */
1177 	atomic_inc(&user->refcnt);
1178 
1179 	dyn_event_init(&user->devent, &user_event_dops);
1180 	dyn_event_add(&user->devent, &user->call);
1181 	set_bit(user->index, page_bitmap);
1182 	hash_add(register_table, &user->node, key);
1183 
1184 	*newuser = user;
1185 	return 0;
1186 put_user:
1187 	user_event_destroy_fields(user);
1188 	user_event_destroy_validators(user);
1189 	kfree(user);
1190 	return ret;
1191 }
1192 
1193 /*
1194  * Deletes a previously created event if it is no longer being used.
1195  */
1196 static int delete_user_event(char *name)
1197 {
1198 	u32 key;
1199 	int ret;
1200 	struct user_event *user = find_user_event(name, &key);
1201 
1202 	if (!user)
1203 		return -ENOENT;
1204 
1205 	/* Ensure we are the last ref */
1206 	if (atomic_read(&user->refcnt) != 1) {
1207 		ret = -EBUSY;
1208 		goto put_ref;
1209 	}
1210 
1211 	ret = destroy_user_event(user);
1212 
1213 	if (ret)
1214 		goto put_ref;
1215 
1216 	return ret;
1217 put_ref:
1218 	/* No longer have this ref */
1219 	atomic_dec(&user->refcnt);
1220 
1221 	return ret;
1222 }
1223 
1224 /*
1225  * Validates the user payload and writes via iterator.
1226  */
1227 static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
1228 {
1229 	struct user_event_refs *refs;
1230 	struct user_event *user = NULL;
1231 	struct tracepoint *tp;
1232 	ssize_t ret = i->count;
1233 	int idx;
1234 
1235 	if (unlikely(copy_from_iter(&idx, sizeof(idx), i) != sizeof(idx)))
1236 		return -EFAULT;
1237 
1238 	rcu_read_lock_sched();
1239 
1240 	refs = rcu_dereference_sched(file->private_data);
1241 
1242 	/*
1243 	 * The refs->events array is protected by RCU, and new items may be
1244 	 * added. But the user retrieved from indexing into the events array
1245 	 * shall be immutable while the file is opened.
1246 	 */
1247 	if (likely(refs && idx < refs->count))
1248 		user = refs->events[idx];
1249 
1250 	rcu_read_unlock_sched();
1251 
1252 	if (unlikely(user == NULL))
1253 		return -ENOENT;
1254 
1255 	if (unlikely(i->count < user->min_size))
1256 		return -EINVAL;
1257 
1258 	tp = &user->tracepoint;
1259 
1260 	/*
1261 	 * It's possible key.enabled disables after this check, however
1262 	 * we don't mind if a few events are included in this condition.
1263 	 */
1264 	if (likely(atomic_read(&tp->key.enabled) > 0)) {
1265 		struct tracepoint_func *probe_func_ptr;
1266 		user_event_func_t probe_func;
1267 		struct iov_iter copy;
1268 		void *tpdata;
1269 		bool faulted;
1270 
1271 		if (unlikely(fault_in_iov_iter_readable(i, i->count)))
1272 			return -EFAULT;
1273 
1274 		faulted = false;
1275 
1276 		rcu_read_lock_sched();
1277 
1278 		probe_func_ptr = rcu_dereference_sched(tp->funcs);
1279 
1280 		if (probe_func_ptr) {
1281 			do {
1282 				copy = *i;
1283 				probe_func = probe_func_ptr->func;
1284 				tpdata = probe_func_ptr->data;
1285 				probe_func(user, &copy, tpdata, &faulted);
1286 			} while ((++probe_func_ptr)->func);
1287 		}
1288 
1289 		rcu_read_unlock_sched();
1290 
1291 		if (unlikely(faulted))
1292 			return -EFAULT;
1293 	}
1294 
1295 	return ret;
1296 }
1297 
1298 static ssize_t user_events_write(struct file *file, const char __user *ubuf,
1299 				 size_t count, loff_t *ppos)
1300 {
1301 	struct iovec iov;
1302 	struct iov_iter i;
1303 
1304 	if (unlikely(*ppos != 0))
1305 		return -EFAULT;
1306 
1307 	if (unlikely(import_single_range(READ, (char *)ubuf, count, &iov, &i)))
1308 		return -EFAULT;
1309 
1310 	return user_events_write_core(file, &i);
1311 }
1312 
1313 static ssize_t user_events_write_iter(struct kiocb *kp, struct iov_iter *i)
1314 {
1315 	return user_events_write_core(kp->ki_filp, i);
1316 }
1317 
1318 static int user_events_ref_add(struct file *file, struct user_event *user)
1319 {
1320 	struct user_event_refs *refs, *new_refs;
1321 	int i, size, count = 0;
1322 
1323 	refs = rcu_dereference_protected(file->private_data,
1324 					 lockdep_is_held(&reg_mutex));
1325 
1326 	if (refs) {
1327 		count = refs->count;
1328 
1329 		for (i = 0; i < count; ++i)
1330 			if (refs->events[i] == user)
1331 				return i;
1332 	}
1333 
1334 	size = struct_size(refs, events, count + 1);
1335 
1336 	new_refs = kzalloc(size, GFP_KERNEL);
1337 
1338 	if (!new_refs)
1339 		return -ENOMEM;
1340 
1341 	new_refs->count = count + 1;
1342 
1343 	for (i = 0; i < count; ++i)
1344 		new_refs->events[i] = refs->events[i];
1345 
1346 	new_refs->events[i] = user;
1347 
1348 	atomic_inc(&user->refcnt);
1349 
1350 	rcu_assign_pointer(file->private_data, new_refs);
1351 
1352 	if (refs)
1353 		kfree_rcu(refs, rcu);
1354 
1355 	return i;
1356 }
1357 
1358 static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg)
1359 {
1360 	u32 size;
1361 	long ret;
1362 
1363 	ret = get_user(size, &ureg->size);
1364 
1365 	if (ret)
1366 		return ret;
1367 
1368 	if (size > PAGE_SIZE)
1369 		return -E2BIG;
1370 
1371 	return copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
1372 }
1373 
1374 /*
1375  * Registers a user_event on behalf of a user process.
1376  */
1377 static long user_events_ioctl_reg(struct file *file, unsigned long uarg)
1378 {
1379 	struct user_reg __user *ureg = (struct user_reg __user *)uarg;
1380 	struct user_reg reg;
1381 	struct user_event *user;
1382 	char *name;
1383 	long ret;
1384 
1385 	ret = user_reg_get(ureg, &reg);
1386 
1387 	if (ret)
1388 		return ret;
1389 
1390 	name = strndup_user((const char __user *)(uintptr_t)reg.name_args,
1391 			    MAX_EVENT_DESC);
1392 
1393 	if (IS_ERR(name)) {
1394 		ret = PTR_ERR(name);
1395 		return ret;
1396 	}
1397 
1398 	ret = user_event_parse_cmd(name, &user);
1399 
1400 	if (ret) {
1401 		kfree(name);
1402 		return ret;
1403 	}
1404 
1405 	ret = user_events_ref_add(file, user);
1406 
1407 	/* No longer need parse ref, ref_add either worked or not */
1408 	atomic_dec(&user->refcnt);
1409 
1410 	/* Positive number is index and valid */
1411 	if (ret < 0)
1412 		return ret;
1413 
1414 	put_user((u32)ret, &ureg->write_index);
1415 	put_user(user->index, &ureg->status_index);
1416 
1417 	return 0;
1418 }
1419 
1420 /*
1421  * Deletes a user_event on behalf of a user process.
1422  */
1423 static long user_events_ioctl_del(struct file *file, unsigned long uarg)
1424 {
1425 	void __user *ubuf = (void __user *)uarg;
1426 	char *name;
1427 	long ret;
1428 
1429 	name = strndup_user(ubuf, MAX_EVENT_DESC);
1430 
1431 	if (IS_ERR(name))
1432 		return PTR_ERR(name);
1433 
1434 	/* event_mutex prevents dyn_event from racing */
1435 	mutex_lock(&event_mutex);
1436 	ret = delete_user_event(name);
1437 	mutex_unlock(&event_mutex);
1438 
1439 	kfree(name);
1440 
1441 	return ret;
1442 }
1443 
1444 /*
1445  * Handles the ioctl from user mode to register or alter operations.
1446  */
1447 static long user_events_ioctl(struct file *file, unsigned int cmd,
1448 			      unsigned long uarg)
1449 {
1450 	long ret = -ENOTTY;
1451 
1452 	switch (cmd) {
1453 	case DIAG_IOCSREG:
1454 		mutex_lock(&reg_mutex);
1455 		ret = user_events_ioctl_reg(file, uarg);
1456 		mutex_unlock(&reg_mutex);
1457 		break;
1458 
1459 	case DIAG_IOCSDEL:
1460 		mutex_lock(&reg_mutex);
1461 		ret = user_events_ioctl_del(file, uarg);
1462 		mutex_unlock(&reg_mutex);
1463 		break;
1464 	}
1465 
1466 	return ret;
1467 }
1468 
1469 /*
1470  * Handles the final close of the file from user mode.
1471  */
1472 static int user_events_release(struct inode *node, struct file *file)
1473 {
1474 	struct user_event_refs *refs;
1475 	struct user_event *user;
1476 	int i;
1477 
1478 	/*
1479 	 * Ensure refs cannot change under any situation by taking the
1480 	 * register mutex during the final freeing of the references.
1481 	 */
1482 	mutex_lock(&reg_mutex);
1483 
1484 	refs = file->private_data;
1485 
1486 	if (!refs)
1487 		goto out;
1488 
1489 	/*
1490 	 * The lifetime of refs has reached an end, it's tied to this file.
1491 	 * The underlying user_events are ref counted, and cannot be freed.
1492 	 * After this decrement, the user_events may be freed elsewhere.
1493 	 */
1494 	for (i = 0; i < refs->count; ++i) {
1495 		user = refs->events[i];
1496 
1497 		if (user)
1498 			atomic_dec(&user->refcnt);
1499 	}
1500 out:
1501 	file->private_data = NULL;
1502 
1503 	mutex_unlock(&reg_mutex);
1504 
1505 	kfree(refs);
1506 
1507 	return 0;
1508 }
1509 
1510 static const struct file_operations user_data_fops = {
1511 	.write = user_events_write,
1512 	.write_iter = user_events_write_iter,
1513 	.unlocked_ioctl	= user_events_ioctl,
1514 	.release = user_events_release,
1515 };
1516 
1517 /*
1518  * Maps the shared page into the user process for checking if event is enabled.
1519  */
1520 static int user_status_mmap(struct file *file, struct vm_area_struct *vma)
1521 {
1522 	unsigned long size = vma->vm_end - vma->vm_start;
1523 
1524 	if (size != MAX_EVENTS)
1525 		return -EINVAL;
1526 
1527 	return remap_pfn_range(vma, vma->vm_start,
1528 			       virt_to_phys(register_page_data) >> PAGE_SHIFT,
1529 			       size, vm_get_page_prot(VM_READ));
1530 }
1531 
1532 static void *user_seq_start(struct seq_file *m, loff_t *pos)
1533 {
1534 	if (*pos)
1535 		return NULL;
1536 
1537 	return (void *)1;
1538 }
1539 
1540 static void *user_seq_next(struct seq_file *m, void *p, loff_t *pos)
1541 {
1542 	++*pos;
1543 	return NULL;
1544 }
1545 
1546 static void user_seq_stop(struct seq_file *m, void *p)
1547 {
1548 }
1549 
1550 static int user_seq_show(struct seq_file *m, void *p)
1551 {
1552 	struct user_event *user;
1553 	char status;
1554 	int i, active = 0, busy = 0, flags;
1555 
1556 	mutex_lock(&reg_mutex);
1557 
1558 	hash_for_each(register_table, i, user, node) {
1559 		status = register_page_data[user->index];
1560 		flags = user->flags;
1561 
1562 		seq_printf(m, "%d:%s", user->index, EVENT_NAME(user));
1563 
1564 		if (flags != 0 || status != 0)
1565 			seq_puts(m, " #");
1566 
1567 		if (status != 0) {
1568 			seq_puts(m, " Used by");
1569 			if (status & EVENT_STATUS_FTRACE)
1570 				seq_puts(m, " ftrace");
1571 			if (status & EVENT_STATUS_PERF)
1572 				seq_puts(m, " perf");
1573 			if (status & EVENT_STATUS_OTHER)
1574 				seq_puts(m, " other");
1575 			busy++;
1576 		}
1577 
1578 		if (flags & FLAG_BPF_ITER)
1579 			seq_puts(m, " FLAG:BPF_ITER");
1580 
1581 		seq_puts(m, "\n");
1582 		active++;
1583 	}
1584 
1585 	mutex_unlock(&reg_mutex);
1586 
1587 	seq_puts(m, "\n");
1588 	seq_printf(m, "Active: %d\n", active);
1589 	seq_printf(m, "Busy: %d\n", busy);
1590 	seq_printf(m, "Max: %ld\n", MAX_EVENTS);
1591 
1592 	return 0;
1593 }
1594 
1595 static const struct seq_operations user_seq_ops = {
1596 	.start = user_seq_start,
1597 	.next  = user_seq_next,
1598 	.stop  = user_seq_stop,
1599 	.show  = user_seq_show,
1600 };
1601 
1602 static int user_status_open(struct inode *node, struct file *file)
1603 {
1604 	return seq_open(file, &user_seq_ops);
1605 }
1606 
1607 static const struct file_operations user_status_fops = {
1608 	.open = user_status_open,
1609 	.mmap = user_status_mmap,
1610 	.read = seq_read,
1611 	.llseek  = seq_lseek,
1612 	.release = seq_release,
1613 };
1614 
1615 /*
1616  * Creates a set of tracefs files to allow user mode interactions.
1617  */
1618 static int create_user_tracefs(void)
1619 {
1620 	struct dentry *edata, *emmap;
1621 
1622 	edata = tracefs_create_file("user_events_data", TRACE_MODE_WRITE,
1623 				    NULL, NULL, &user_data_fops);
1624 
1625 	if (!edata) {
1626 		pr_warn("Could not create tracefs 'user_events_data' entry\n");
1627 		goto err;
1628 	}
1629 
1630 	/* mmap with MAP_SHARED requires writable fd */
1631 	emmap = tracefs_create_file("user_events_status", TRACE_MODE_WRITE,
1632 				    NULL, NULL, &user_status_fops);
1633 
1634 	if (!emmap) {
1635 		tracefs_remove(edata);
1636 		pr_warn("Could not create tracefs 'user_events_mmap' entry\n");
1637 		goto err;
1638 	}
1639 
1640 	return 0;
1641 err:
1642 	return -ENODEV;
1643 }
1644 
1645 static void set_page_reservations(bool set)
1646 {
1647 	int page;
1648 
1649 	for (page = 0; page < MAX_PAGES; ++page) {
1650 		void *addr = register_page_data + (PAGE_SIZE * page);
1651 
1652 		if (set)
1653 			SetPageReserved(virt_to_page(addr));
1654 		else
1655 			ClearPageReserved(virt_to_page(addr));
1656 	}
1657 }
1658 
1659 static int __init trace_events_user_init(void)
1660 {
1661 	struct page *pages;
1662 	int ret;
1663 
1664 	/* Zero all bits beside 0 (which is reserved for failures) */
1665 	bitmap_zero(page_bitmap, MAX_EVENTS);
1666 	set_bit(0, page_bitmap);
1667 
1668 	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PAGE_ORDER);
1669 	if (!pages)
1670 		return -ENOMEM;
1671 	register_page_data = page_address(pages);
1672 
1673 	set_page_reservations(true);
1674 
1675 	ret = create_user_tracefs();
1676 
1677 	if (ret) {
1678 		pr_warn("user_events could not register with tracefs\n");
1679 		set_page_reservations(false);
1680 		__free_pages(pages, MAX_PAGE_ORDER);
1681 		return ret;
1682 	}
1683 
1684 	if (dyn_event_register(&user_event_dops))
1685 		pr_warn("user_events could not register with dyn_events\n");
1686 
1687 	return 0;
1688 }
1689 
1690 fs_initcall(trace_events_user_init);
1691