xref: /openbmc/linux/kernel/trace/trace_events_user.c (revision bc47ee4844d6b7d7351536cd99d35848c4449689)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2021, Microsoft Corporation.
4  *
5  * Authors:
6  *   Beau Belgrave <beaub@linux.microsoft.com>
7  */
8 
9 #include <linux/bitmap.h>
10 #include <linux/cdev.h>
11 #include <linux/hashtable.h>
12 #include <linux/list.h>
13 #include <linux/io.h>
14 #include <linux/uio.h>
15 #include <linux/ioctl.h>
16 #include <linux/jhash.h>
17 #include <linux/trace_events.h>
18 #include <linux/tracefs.h>
19 #include <linux/types.h>
20 #include <linux/uaccess.h>
21 #include <uapi/linux/user_events.h>
22 #include "trace.h"
23 #include "trace_dynevent.h"
24 
25 #define USER_EVENTS_PREFIX_LEN (sizeof(USER_EVENTS_PREFIX)-1)
26 
27 #define FIELD_DEPTH_TYPE 0
28 #define FIELD_DEPTH_NAME 1
29 #define FIELD_DEPTH_SIZE 2
30 
31 /*
32  * Limits how many trace_event calls user processes can create:
33  * Must be a power of two of PAGE_SIZE.
34  */
35 #define MAX_PAGE_ORDER 0
36 #define MAX_PAGES (1 << MAX_PAGE_ORDER)
37 #define MAX_EVENTS (MAX_PAGES * PAGE_SIZE)
38 
39 /* Limit how long of an event name plus args within the subsystem. */
40 #define MAX_EVENT_DESC 512
41 #define EVENT_NAME(user_event) ((user_event)->tracepoint.name)
42 #define MAX_FIELD_ARRAY_SIZE 1024
43 #define MAX_FIELD_ARG_NAME 256
44 
45 #define MAX_BPF_COPY_SIZE PAGE_SIZE
46 #define MAX_STACK_BPF_DATA 512
47 
48 static char *register_page_data;
49 
50 static DEFINE_MUTEX(reg_mutex);
51 static DEFINE_HASHTABLE(register_table, 4);
52 static DECLARE_BITMAP(page_bitmap, MAX_EVENTS);
53 
54 /*
55  * Stores per-event properties, as users register events
56  * within a file a user_event might be created if it does not
57  * already exist. These are globally used and their lifetime
58  * is tied to the refcnt member. These cannot go away until the
59  * refcnt reaches zero.
60  */
61 struct user_event {
62 	struct tracepoint tracepoint;
63 	struct trace_event_call call;
64 	struct trace_event_class class;
65 	struct dyn_event devent;
66 	struct hlist_node node;
67 	struct list_head fields;
68 	struct list_head validators;
69 	atomic_t refcnt;
70 	int index;
71 	int flags;
72 	int min_size;
73 };
74 
75 /*
76  * Stores per-file events references, as users register events
77  * within a file this structure is modified and freed via RCU.
78  * The lifetime of this struct is tied to the lifetime of the file.
79  * These are not shared and only accessible by the file that created it.
80  */
81 struct user_event_refs {
82 	struct rcu_head rcu;
83 	int count;
84 	struct user_event *events[];
85 };
86 
87 #define VALIDATOR_ENSURE_NULL (1 << 0)
88 #define VALIDATOR_REL (1 << 1)
89 
90 struct user_event_validator {
91 	struct list_head link;
92 	int offset;
93 	int flags;
94 };
95 
96 typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
97 				   void *tpdata, bool *faulted);
98 
99 static int user_event_parse(char *name, char *args, char *flags,
100 			    struct user_event **newuser);
101 
102 static u32 user_event_key(char *name)
103 {
104 	return jhash(name, strlen(name), 0);
105 }
106 
107 static __always_inline __must_check
108 size_t copy_nofault(void *addr, size_t bytes, struct iov_iter *i)
109 {
110 	size_t ret;
111 
112 	pagefault_disable();
113 
114 	ret = copy_from_iter_nocache(addr, bytes, i);
115 
116 	pagefault_enable();
117 
118 	return ret;
119 }
120 
121 static struct list_head *user_event_get_fields(struct trace_event_call *call)
122 {
123 	struct user_event *user = (struct user_event *)call->data;
124 
125 	return &user->fields;
126 }
127 
128 /*
129  * Parses a register command for user_events
130  * Format: event_name[:FLAG1[,FLAG2...]] [field1[;field2...]]
131  *
132  * Example event named 'test' with a 20 char 'msg' field with an unsigned int
133  * 'id' field after:
134  * test char[20] msg;unsigned int id
135  *
136  * NOTE: Offsets are from the user data perspective, they are not from the
137  * trace_entry/buffer perspective. We automatically add the common properties
138  * sizes to the offset for the user.
139  *
140  * Upon success user_event has its ref count increased by 1.
141  */
142 static int user_event_parse_cmd(char *raw_command, struct user_event **newuser)
143 {
144 	char *name = raw_command;
145 	char *args = strpbrk(name, " ");
146 	char *flags;
147 
148 	if (args)
149 		*args++ = '\0';
150 
151 	flags = strpbrk(name, ":");
152 
153 	if (flags)
154 		*flags++ = '\0';
155 
156 	return user_event_parse(name, args, flags, newuser);
157 }
158 
159 static int user_field_array_size(const char *type)
160 {
161 	const char *start = strchr(type, '[');
162 	char val[8];
163 	char *bracket;
164 	int size = 0;
165 
166 	if (start == NULL)
167 		return -EINVAL;
168 
169 	if (strscpy(val, start + 1, sizeof(val)) <= 0)
170 		return -EINVAL;
171 
172 	bracket = strchr(val, ']');
173 
174 	if (!bracket)
175 		return -EINVAL;
176 
177 	*bracket = '\0';
178 
179 	if (kstrtouint(val, 0, &size))
180 		return -EINVAL;
181 
182 	if (size > MAX_FIELD_ARRAY_SIZE)
183 		return -EINVAL;
184 
185 	return size;
186 }
187 
188 static int user_field_size(const char *type)
189 {
190 	/* long is not allowed from a user, since it's ambigious in size */
191 	if (strcmp(type, "s64") == 0)
192 		return sizeof(s64);
193 	if (strcmp(type, "u64") == 0)
194 		return sizeof(u64);
195 	if (strcmp(type, "s32") == 0)
196 		return sizeof(s32);
197 	if (strcmp(type, "u32") == 0)
198 		return sizeof(u32);
199 	if (strcmp(type, "int") == 0)
200 		return sizeof(int);
201 	if (strcmp(type, "unsigned int") == 0)
202 		return sizeof(unsigned int);
203 	if (strcmp(type, "s16") == 0)
204 		return sizeof(s16);
205 	if (strcmp(type, "u16") == 0)
206 		return sizeof(u16);
207 	if (strcmp(type, "short") == 0)
208 		return sizeof(short);
209 	if (strcmp(type, "unsigned short") == 0)
210 		return sizeof(unsigned short);
211 	if (strcmp(type, "s8") == 0)
212 		return sizeof(s8);
213 	if (strcmp(type, "u8") == 0)
214 		return sizeof(u8);
215 	if (strcmp(type, "char") == 0)
216 		return sizeof(char);
217 	if (strcmp(type, "unsigned char") == 0)
218 		return sizeof(unsigned char);
219 	if (str_has_prefix(type, "char["))
220 		return user_field_array_size(type);
221 	if (str_has_prefix(type, "unsigned char["))
222 		return user_field_array_size(type);
223 	if (str_has_prefix(type, "__data_loc "))
224 		return sizeof(u32);
225 	if (str_has_prefix(type, "__rel_loc "))
226 		return sizeof(u32);
227 
228 	/* Uknown basic type, error */
229 	return -EINVAL;
230 }
231 
232 static void user_event_destroy_validators(struct user_event *user)
233 {
234 	struct user_event_validator *validator, *next;
235 	struct list_head *head = &user->validators;
236 
237 	list_for_each_entry_safe(validator, next, head, link) {
238 		list_del(&validator->link);
239 		kfree(validator);
240 	}
241 }
242 
243 static void user_event_destroy_fields(struct user_event *user)
244 {
245 	struct ftrace_event_field *field, *next;
246 	struct list_head *head = &user->fields;
247 
248 	list_for_each_entry_safe(field, next, head, link) {
249 		list_del(&field->link);
250 		kfree(field);
251 	}
252 }
253 
254 static int user_event_add_field(struct user_event *user, const char *type,
255 				const char *name, int offset, int size,
256 				int is_signed, int filter_type)
257 {
258 	struct user_event_validator *validator;
259 	struct ftrace_event_field *field;
260 	int validator_flags = 0;
261 
262 	field = kmalloc(sizeof(*field), GFP_KERNEL);
263 
264 	if (!field)
265 		return -ENOMEM;
266 
267 	if (str_has_prefix(type, "__data_loc "))
268 		goto add_validator;
269 
270 	if (str_has_prefix(type, "__rel_loc ")) {
271 		validator_flags |= VALIDATOR_REL;
272 		goto add_validator;
273 	}
274 
275 	goto add_field;
276 
277 add_validator:
278 	if (strstr(type, "char") != 0)
279 		validator_flags |= VALIDATOR_ENSURE_NULL;
280 
281 	validator = kmalloc(sizeof(*validator), GFP_KERNEL);
282 
283 	if (!validator) {
284 		kfree(field);
285 		return -ENOMEM;
286 	}
287 
288 	validator->flags = validator_flags;
289 	validator->offset = offset;
290 
291 	/* Want sequential access when validating */
292 	list_add_tail(&validator->link, &user->validators);
293 
294 add_field:
295 	field->type = type;
296 	field->name = name;
297 	field->offset = offset;
298 	field->size = size;
299 	field->is_signed = is_signed;
300 	field->filter_type = filter_type;
301 
302 	list_add(&field->link, &user->fields);
303 
304 	/*
305 	 * Min size from user writes that are required, this does not include
306 	 * the size of trace_entry (common fields).
307 	 */
308 	user->min_size = (offset + size) - sizeof(struct trace_entry);
309 
310 	return 0;
311 }
312 
313 /*
314  * Parses the values of a field within the description
315  * Format: type name [size]
316  */
317 static int user_event_parse_field(char *field, struct user_event *user,
318 				  u32 *offset)
319 {
320 	char *part, *type, *name;
321 	u32 depth = 0, saved_offset = *offset;
322 	int len, size = -EINVAL;
323 	bool is_struct = false;
324 
325 	field = skip_spaces(field);
326 
327 	if (*field == '\0')
328 		return 0;
329 
330 	/* Handle types that have a space within */
331 	len = str_has_prefix(field, "unsigned ");
332 	if (len)
333 		goto skip_next;
334 
335 	len = str_has_prefix(field, "struct ");
336 	if (len) {
337 		is_struct = true;
338 		goto skip_next;
339 	}
340 
341 	len = str_has_prefix(field, "__data_loc unsigned ");
342 	if (len)
343 		goto skip_next;
344 
345 	len = str_has_prefix(field, "__data_loc ");
346 	if (len)
347 		goto skip_next;
348 
349 	len = str_has_prefix(field, "__rel_loc unsigned ");
350 	if (len)
351 		goto skip_next;
352 
353 	len = str_has_prefix(field, "__rel_loc ");
354 	if (len)
355 		goto skip_next;
356 
357 	goto parse;
358 skip_next:
359 	type = field;
360 	field = strpbrk(field + len, " ");
361 
362 	if (field == NULL)
363 		return -EINVAL;
364 
365 	*field++ = '\0';
366 	depth++;
367 parse:
368 	name = NULL;
369 
370 	while ((part = strsep(&field, " ")) != NULL) {
371 		switch (depth++) {
372 		case FIELD_DEPTH_TYPE:
373 			type = part;
374 			break;
375 		case FIELD_DEPTH_NAME:
376 			name = part;
377 			break;
378 		case FIELD_DEPTH_SIZE:
379 			if (!is_struct)
380 				return -EINVAL;
381 
382 			if (kstrtou32(part, 10, &size))
383 				return -EINVAL;
384 			break;
385 		default:
386 			return -EINVAL;
387 		}
388 	}
389 
390 	if (depth < FIELD_DEPTH_SIZE || !name)
391 		return -EINVAL;
392 
393 	if (depth == FIELD_DEPTH_SIZE)
394 		size = user_field_size(type);
395 
396 	if (size == 0)
397 		return -EINVAL;
398 
399 	if (size < 0)
400 		return size;
401 
402 	*offset = saved_offset + size;
403 
404 	return user_event_add_field(user, type, name, saved_offset, size,
405 				    type[0] != 'u', FILTER_OTHER);
406 }
407 
408 static void user_event_parse_flags(struct user_event *user, char *flags)
409 {
410 	char *flag;
411 
412 	if (flags == NULL)
413 		return;
414 
415 	while ((flag = strsep(&flags, ",")) != NULL) {
416 		if (strcmp(flag, "BPF_ITER") == 0)
417 			user->flags |= FLAG_BPF_ITER;
418 	}
419 }
420 
421 static int user_event_parse_fields(struct user_event *user, char *args)
422 {
423 	char *field;
424 	u32 offset = sizeof(struct trace_entry);
425 	int ret = -EINVAL;
426 
427 	if (args == NULL)
428 		return 0;
429 
430 	while ((field = strsep(&args, ";")) != NULL) {
431 		ret = user_event_parse_field(field, user, &offset);
432 
433 		if (ret)
434 			break;
435 	}
436 
437 	return ret;
438 }
439 
440 static struct trace_event_fields user_event_fields_array[1];
441 
442 static const char *user_field_format(const char *type)
443 {
444 	if (strcmp(type, "s64") == 0)
445 		return "%lld";
446 	if (strcmp(type, "u64") == 0)
447 		return "%llu";
448 	if (strcmp(type, "s32") == 0)
449 		return "%d";
450 	if (strcmp(type, "u32") == 0)
451 		return "%u";
452 	if (strcmp(type, "int") == 0)
453 		return "%d";
454 	if (strcmp(type, "unsigned int") == 0)
455 		return "%u";
456 	if (strcmp(type, "s16") == 0)
457 		return "%d";
458 	if (strcmp(type, "u16") == 0)
459 		return "%u";
460 	if (strcmp(type, "short") == 0)
461 		return "%d";
462 	if (strcmp(type, "unsigned short") == 0)
463 		return "%u";
464 	if (strcmp(type, "s8") == 0)
465 		return "%d";
466 	if (strcmp(type, "u8") == 0)
467 		return "%u";
468 	if (strcmp(type, "char") == 0)
469 		return "%d";
470 	if (strcmp(type, "unsigned char") == 0)
471 		return "%u";
472 	if (strstr(type, "char[") != 0)
473 		return "%s";
474 
475 	/* Unknown, likely struct, allowed treat as 64-bit */
476 	return "%llu";
477 }
478 
479 static bool user_field_is_dyn_string(const char *type, const char **str_func)
480 {
481 	if (str_has_prefix(type, "__data_loc ")) {
482 		*str_func = "__get_str";
483 		goto check;
484 	}
485 
486 	if (str_has_prefix(type, "__rel_loc ")) {
487 		*str_func = "__get_rel_str";
488 		goto check;
489 	}
490 
491 	return false;
492 check:
493 	return strstr(type, "char") != 0;
494 }
495 
496 #define LEN_OR_ZERO (len ? len - pos : 0)
497 static int user_event_set_print_fmt(struct user_event *user, char *buf, int len)
498 {
499 	struct ftrace_event_field *field, *next;
500 	struct list_head *head = &user->fields;
501 	int pos = 0, depth = 0;
502 	const char *str_func;
503 
504 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
505 
506 	list_for_each_entry_safe_reverse(field, next, head, link) {
507 		if (depth != 0)
508 			pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
509 
510 		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s",
511 				field->name, user_field_format(field->type));
512 
513 		depth++;
514 	}
515 
516 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
517 
518 	list_for_each_entry_safe_reverse(field, next, head, link) {
519 		if (user_field_is_dyn_string(field->type, &str_func))
520 			pos += snprintf(buf + pos, LEN_OR_ZERO,
521 					", %s(%s)", str_func, field->name);
522 		else
523 			pos += snprintf(buf + pos, LEN_OR_ZERO,
524 					", REC->%s", field->name);
525 	}
526 
527 	return pos + 1;
528 }
529 #undef LEN_OR_ZERO
530 
531 static int user_event_create_print_fmt(struct user_event *user)
532 {
533 	char *print_fmt;
534 	int len;
535 
536 	len = user_event_set_print_fmt(user, NULL, 0);
537 
538 	print_fmt = kmalloc(len, GFP_KERNEL);
539 
540 	if (!print_fmt)
541 		return -ENOMEM;
542 
543 	user_event_set_print_fmt(user, print_fmt, len);
544 
545 	user->call.print_fmt = print_fmt;
546 
547 	return 0;
548 }
549 
550 static enum print_line_t user_event_print_trace(struct trace_iterator *iter,
551 						int flags,
552 						struct trace_event *event)
553 {
554 	/* Unsafe to try to decode user provided print_fmt, use hex */
555 	trace_print_hex_dump_seq(&iter->seq, "", DUMP_PREFIX_OFFSET, 16,
556 				 1, iter->ent, iter->ent_size, true);
557 
558 	return trace_handle_return(&iter->seq);
559 }
560 
561 static struct trace_event_functions user_event_funcs = {
562 	.trace = user_event_print_trace,
563 };
564 
565 static int destroy_user_event(struct user_event *user)
566 {
567 	int ret = 0;
568 
569 	/* Must destroy fields before call removal */
570 	user_event_destroy_fields(user);
571 
572 	ret = trace_remove_event_call(&user->call);
573 
574 	if (ret)
575 		return ret;
576 
577 	dyn_event_remove(&user->devent);
578 
579 	register_page_data[user->index] = 0;
580 	clear_bit(user->index, page_bitmap);
581 	hash_del(&user->node);
582 
583 	user_event_destroy_validators(user);
584 	kfree(user->call.print_fmt);
585 	kfree(EVENT_NAME(user));
586 	kfree(user);
587 
588 	return ret;
589 }
590 
591 static struct user_event *find_user_event(char *name, u32 *outkey)
592 {
593 	struct user_event *user;
594 	u32 key = user_event_key(name);
595 
596 	*outkey = key;
597 
598 	hash_for_each_possible(register_table, user, node, key)
599 		if (!strcmp(EVENT_NAME(user), name)) {
600 			atomic_inc(&user->refcnt);
601 			return user;
602 		}
603 
604 	return NULL;
605 }
606 
607 static int user_event_validate(struct user_event *user, void *data, int len)
608 {
609 	struct list_head *head = &user->validators;
610 	struct user_event_validator *validator;
611 	void *pos, *end = data + len;
612 	u32 loc, offset, size;
613 
614 	list_for_each_entry(validator, head, link) {
615 		pos = data + validator->offset;
616 
617 		/* Already done min_size check, no bounds check here */
618 		loc = *(u32 *)pos;
619 		offset = loc & 0xffff;
620 		size = loc >> 16;
621 
622 		if (likely(validator->flags & VALIDATOR_REL))
623 			pos += offset + sizeof(loc);
624 		else
625 			pos = data + offset;
626 
627 		pos += size;
628 
629 		if (unlikely(pos > end))
630 			return -EFAULT;
631 
632 		if (likely(validator->flags & VALIDATOR_ENSURE_NULL))
633 			if (unlikely(*(char *)(pos - 1) != '\0'))
634 				return -EFAULT;
635 	}
636 
637 	return 0;
638 }
639 
640 /*
641  * Writes the user supplied payload out to a trace file.
642  */
643 static void user_event_ftrace(struct user_event *user, struct iov_iter *i,
644 			      void *tpdata, bool *faulted)
645 {
646 	struct trace_event_file *file;
647 	struct trace_entry *entry;
648 	struct trace_event_buffer event_buffer;
649 	size_t size = sizeof(*entry) + i->count;
650 
651 	file = (struct trace_event_file *)tpdata;
652 
653 	if (!file ||
654 	    !(file->flags & EVENT_FILE_FL_ENABLED) ||
655 	    trace_trigger_soft_disabled(file))
656 		return;
657 
658 	/* Allocates and fills trace_entry, + 1 of this is data payload */
659 	entry = trace_event_buffer_reserve(&event_buffer, file, size);
660 
661 	if (unlikely(!entry))
662 		return;
663 
664 	if (unlikely(!copy_nofault(entry + 1, i->count, i)))
665 		goto discard;
666 
667 	if (!list_empty(&user->validators) &&
668 	    unlikely(user_event_validate(user, entry, size)))
669 		goto discard;
670 
671 	trace_event_buffer_commit(&event_buffer);
672 
673 	return;
674 discard:
675 	*faulted = true;
676 	__trace_event_discard_commit(event_buffer.buffer,
677 				     event_buffer.event);
678 }
679 
680 #ifdef CONFIG_PERF_EVENTS
681 static void user_event_bpf(struct user_event *user, struct iov_iter *i)
682 {
683 	struct user_bpf_context context;
684 	struct user_bpf_iter bpf_i;
685 	char fast_data[MAX_STACK_BPF_DATA];
686 	void *temp = NULL;
687 
688 	if ((user->flags & FLAG_BPF_ITER) && iter_is_iovec(i)) {
689 		/* Raw iterator */
690 		context.data_type = USER_BPF_DATA_ITER;
691 		context.data_len = i->count;
692 		context.iter = &bpf_i;
693 
694 		bpf_i.iov_offset = i->iov_offset;
695 		bpf_i.iov = i->iov;
696 		bpf_i.nr_segs = i->nr_segs;
697 	} else if (i->nr_segs == 1 && iter_is_iovec(i)) {
698 		/* Single buffer from user */
699 		context.data_type = USER_BPF_DATA_USER;
700 		context.data_len = i->count;
701 		context.udata = i->iov->iov_base + i->iov_offset;
702 	} else {
703 		/* Multi buffer from user */
704 		struct iov_iter copy = *i;
705 		size_t copy_size = min_t(size_t, i->count, MAX_BPF_COPY_SIZE);
706 
707 		context.data_type = USER_BPF_DATA_KERNEL;
708 		context.kdata = fast_data;
709 
710 		if (unlikely(copy_size > sizeof(fast_data))) {
711 			temp = kmalloc(copy_size, GFP_NOWAIT);
712 
713 			if (temp)
714 				context.kdata = temp;
715 			else
716 				copy_size = sizeof(fast_data);
717 		}
718 
719 		context.data_len = copy_nofault(context.kdata,
720 						copy_size, &copy);
721 	}
722 
723 	trace_call_bpf(&user->call, &context);
724 
725 	kfree(temp);
726 }
727 
728 /*
729  * Writes the user supplied payload out to perf ring buffer or eBPF program.
730  */
731 static void user_event_perf(struct user_event *user, struct iov_iter *i,
732 			    void *tpdata, bool *faulted)
733 {
734 	struct hlist_head *perf_head;
735 
736 	if (bpf_prog_array_valid(&user->call))
737 		user_event_bpf(user, i);
738 
739 	perf_head = this_cpu_ptr(user->call.perf_events);
740 
741 	if (perf_head && !hlist_empty(perf_head)) {
742 		struct trace_entry *perf_entry;
743 		struct pt_regs *regs;
744 		size_t size = sizeof(*perf_entry) + i->count;
745 		int context;
746 
747 		perf_entry = perf_trace_buf_alloc(ALIGN(size, 8),
748 						  &regs, &context);
749 
750 		if (unlikely(!perf_entry))
751 			return;
752 
753 		perf_fetch_caller_regs(regs);
754 
755 		if (unlikely(!copy_nofault(perf_entry + 1, i->count, i)))
756 			goto discard;
757 
758 		if (!list_empty(&user->validators) &&
759 		    unlikely(user_event_validate(user, perf_entry, size)))
760 			goto discard;
761 
762 		perf_trace_buf_submit(perf_entry, size, context,
763 				      user->call.event.type, 1, regs,
764 				      perf_head, NULL);
765 
766 		return;
767 discard:
768 		*faulted = true;
769 		perf_swevent_put_recursion_context(context);
770 	}
771 }
772 #endif
773 
774 /*
775  * Update the register page that is shared between user processes.
776  */
777 static void update_reg_page_for(struct user_event *user)
778 {
779 	struct tracepoint *tp = &user->tracepoint;
780 	char status = 0;
781 
782 	if (atomic_read(&tp->key.enabled) > 0) {
783 		struct tracepoint_func *probe_func_ptr;
784 		user_event_func_t probe_func;
785 
786 		rcu_read_lock_sched();
787 
788 		probe_func_ptr = rcu_dereference_sched(tp->funcs);
789 
790 		if (probe_func_ptr) {
791 			do {
792 				probe_func = probe_func_ptr->func;
793 
794 				if (probe_func == user_event_ftrace)
795 					status |= EVENT_STATUS_FTRACE;
796 #ifdef CONFIG_PERF_EVENTS
797 				else if (probe_func == user_event_perf)
798 					status |= EVENT_STATUS_PERF;
799 #endif
800 				else
801 					status |= EVENT_STATUS_OTHER;
802 			} while ((++probe_func_ptr)->func);
803 		}
804 
805 		rcu_read_unlock_sched();
806 	}
807 
808 	register_page_data[user->index] = status;
809 }
810 
811 /*
812  * Register callback for our events from tracing sub-systems.
813  */
814 static int user_event_reg(struct trace_event_call *call,
815 			  enum trace_reg type,
816 			  void *data)
817 {
818 	struct user_event *user = (struct user_event *)call->data;
819 	int ret = 0;
820 
821 	if (!user)
822 		return -ENOENT;
823 
824 	switch (type) {
825 	case TRACE_REG_REGISTER:
826 		ret = tracepoint_probe_register(call->tp,
827 						call->class->probe,
828 						data);
829 		if (!ret)
830 			goto inc;
831 		break;
832 
833 	case TRACE_REG_UNREGISTER:
834 		tracepoint_probe_unregister(call->tp,
835 					    call->class->probe,
836 					    data);
837 		goto dec;
838 
839 #ifdef CONFIG_PERF_EVENTS
840 	case TRACE_REG_PERF_REGISTER:
841 		ret = tracepoint_probe_register(call->tp,
842 						call->class->perf_probe,
843 						data);
844 		if (!ret)
845 			goto inc;
846 		break;
847 
848 	case TRACE_REG_PERF_UNREGISTER:
849 		tracepoint_probe_unregister(call->tp,
850 					    call->class->perf_probe,
851 					    data);
852 		goto dec;
853 
854 	case TRACE_REG_PERF_OPEN:
855 	case TRACE_REG_PERF_CLOSE:
856 	case TRACE_REG_PERF_ADD:
857 	case TRACE_REG_PERF_DEL:
858 		break;
859 #endif
860 	}
861 
862 	return ret;
863 inc:
864 	atomic_inc(&user->refcnt);
865 	update_reg_page_for(user);
866 	return 0;
867 dec:
868 	update_reg_page_for(user);
869 	atomic_dec(&user->refcnt);
870 	return 0;
871 }
872 
873 static int user_event_create(const char *raw_command)
874 {
875 	struct user_event *user;
876 	char *name;
877 	int ret;
878 
879 	if (!str_has_prefix(raw_command, USER_EVENTS_PREFIX))
880 		return -ECANCELED;
881 
882 	raw_command += USER_EVENTS_PREFIX_LEN;
883 	raw_command = skip_spaces(raw_command);
884 
885 	name = kstrdup(raw_command, GFP_KERNEL);
886 
887 	if (!name)
888 		return -ENOMEM;
889 
890 	mutex_lock(&reg_mutex);
891 
892 	ret = user_event_parse_cmd(name, &user);
893 
894 	if (!ret)
895 		atomic_dec(&user->refcnt);
896 
897 	mutex_unlock(&reg_mutex);
898 
899 	if (ret)
900 		kfree(name);
901 
902 	return ret;
903 }
904 
905 static int user_event_show(struct seq_file *m, struct dyn_event *ev)
906 {
907 	struct user_event *user = container_of(ev, struct user_event, devent);
908 	struct ftrace_event_field *field, *next;
909 	struct list_head *head;
910 	int depth = 0;
911 
912 	seq_printf(m, "%s%s", USER_EVENTS_PREFIX, EVENT_NAME(user));
913 
914 	head = trace_get_fields(&user->call);
915 
916 	list_for_each_entry_safe_reverse(field, next, head, link) {
917 		if (depth == 0)
918 			seq_puts(m, " ");
919 		else
920 			seq_puts(m, "; ");
921 
922 		seq_printf(m, "%s %s", field->type, field->name);
923 
924 		if (str_has_prefix(field->type, "struct "))
925 			seq_printf(m, " %d", field->size);
926 
927 		depth++;
928 	}
929 
930 	seq_puts(m, "\n");
931 
932 	return 0;
933 }
934 
935 static bool user_event_is_busy(struct dyn_event *ev)
936 {
937 	struct user_event *user = container_of(ev, struct user_event, devent);
938 
939 	return atomic_read(&user->refcnt) != 0;
940 }
941 
942 static int user_event_free(struct dyn_event *ev)
943 {
944 	struct user_event *user = container_of(ev, struct user_event, devent);
945 
946 	if (atomic_read(&user->refcnt) != 0)
947 		return -EBUSY;
948 
949 	return destroy_user_event(user);
950 }
951 
952 static bool user_field_match(struct ftrace_event_field *field, int argc,
953 			     const char **argv, int *iout)
954 {
955 	char *field_name, *arg_name;
956 	int len, pos, i = *iout;
957 	bool colon = false, match = false;
958 
959 	if (i >= argc)
960 		return false;
961 
962 	len = MAX_FIELD_ARG_NAME;
963 	field_name = kmalloc(len, GFP_KERNEL);
964 	arg_name = kmalloc(len, GFP_KERNEL);
965 
966 	if (!arg_name || !field_name)
967 		goto out;
968 
969 	pos = 0;
970 
971 	for (; i < argc; ++i) {
972 		if (i != *iout)
973 			pos += snprintf(arg_name + pos, len - pos, " ");
974 
975 		pos += snprintf(arg_name + pos, len - pos, argv[i]);
976 
977 		if (strchr(argv[i], ';')) {
978 			++i;
979 			colon = true;
980 			break;
981 		}
982 	}
983 
984 	pos = 0;
985 
986 	pos += snprintf(field_name + pos, len - pos, field->type);
987 	pos += snprintf(field_name + pos, len - pos, " ");
988 	pos += snprintf(field_name + pos, len - pos, field->name);
989 
990 	if (colon)
991 		pos += snprintf(field_name + pos, len - pos, ";");
992 
993 	*iout = i;
994 
995 	match = strcmp(arg_name, field_name) == 0;
996 out:
997 	kfree(arg_name);
998 	kfree(field_name);
999 
1000 	return match;
1001 }
1002 
1003 static bool user_fields_match(struct user_event *user, int argc,
1004 			      const char **argv)
1005 {
1006 	struct ftrace_event_field *field, *next;
1007 	struct list_head *head = &user->fields;
1008 	int i = 0;
1009 
1010 	list_for_each_entry_safe_reverse(field, next, head, link)
1011 		if (!user_field_match(field, argc, argv, &i))
1012 			return false;
1013 
1014 	if (i != argc)
1015 		return false;
1016 
1017 	return true;
1018 }
1019 
1020 static bool user_event_match(const char *system, const char *event,
1021 			     int argc, const char **argv, struct dyn_event *ev)
1022 {
1023 	struct user_event *user = container_of(ev, struct user_event, devent);
1024 	bool match;
1025 
1026 	match = strcmp(EVENT_NAME(user), event) == 0 &&
1027 		(!system || strcmp(system, USER_EVENTS_SYSTEM) == 0);
1028 
1029 	if (match && argc > 0)
1030 		match = user_fields_match(user, argc, argv);
1031 
1032 	return match;
1033 }
1034 
1035 static struct dyn_event_operations user_event_dops = {
1036 	.create = user_event_create,
1037 	.show = user_event_show,
1038 	.is_busy = user_event_is_busy,
1039 	.free = user_event_free,
1040 	.match = user_event_match,
1041 };
1042 
1043 static int user_event_trace_register(struct user_event *user)
1044 {
1045 	int ret;
1046 
1047 	ret = register_trace_event(&user->call.event);
1048 
1049 	if (!ret)
1050 		return -ENODEV;
1051 
1052 	ret = trace_add_event_call(&user->call);
1053 
1054 	if (ret)
1055 		unregister_trace_event(&user->call.event);
1056 
1057 	return ret;
1058 }
1059 
1060 /*
1061  * Parses the event name, arguments and flags then registers if successful.
1062  * The name buffer lifetime is owned by this method for success cases only.
1063  * Upon success the returned user_event has its ref count increased by 1.
1064  */
1065 static int user_event_parse(char *name, char *args, char *flags,
1066 			    struct user_event **newuser)
1067 {
1068 	int ret;
1069 	int index;
1070 	u32 key;
1071 	struct user_event *user;
1072 
1073 	/* Prevent dyn_event from racing */
1074 	mutex_lock(&event_mutex);
1075 	user = find_user_event(name, &key);
1076 	mutex_unlock(&event_mutex);
1077 
1078 	if (user) {
1079 		*newuser = user;
1080 		/*
1081 		 * Name is allocated by caller, free it since it already exists.
1082 		 * Caller only worries about failure cases for freeing.
1083 		 */
1084 		kfree(name);
1085 		return 0;
1086 	}
1087 
1088 	index = find_first_zero_bit(page_bitmap, MAX_EVENTS);
1089 
1090 	if (index == MAX_EVENTS)
1091 		return -EMFILE;
1092 
1093 	user = kzalloc(sizeof(*user), GFP_KERNEL);
1094 
1095 	if (!user)
1096 		return -ENOMEM;
1097 
1098 	INIT_LIST_HEAD(&user->class.fields);
1099 	INIT_LIST_HEAD(&user->fields);
1100 	INIT_LIST_HEAD(&user->validators);
1101 
1102 	user->tracepoint.name = name;
1103 
1104 	user_event_parse_flags(user, flags);
1105 
1106 	ret = user_event_parse_fields(user, args);
1107 
1108 	if (ret)
1109 		goto put_user;
1110 
1111 	ret = user_event_create_print_fmt(user);
1112 
1113 	if (ret)
1114 		goto put_user;
1115 
1116 	user->call.data = user;
1117 	user->call.class = &user->class;
1118 	user->call.name = name;
1119 	user->call.flags = TRACE_EVENT_FL_TRACEPOINT;
1120 	user->call.tp = &user->tracepoint;
1121 	user->call.event.funcs = &user_event_funcs;
1122 
1123 	user->class.system = USER_EVENTS_SYSTEM;
1124 	user->class.fields_array = user_event_fields_array;
1125 	user->class.get_fields = user_event_get_fields;
1126 	user->class.reg = user_event_reg;
1127 	user->class.probe = user_event_ftrace;
1128 #ifdef CONFIG_PERF_EVENTS
1129 	user->class.perf_probe = user_event_perf;
1130 #endif
1131 
1132 	mutex_lock(&event_mutex);
1133 	ret = user_event_trace_register(user);
1134 	mutex_unlock(&event_mutex);
1135 
1136 	if (ret)
1137 		goto put_user;
1138 
1139 	user->index = index;
1140 
1141 	/* Ensure we track ref */
1142 	atomic_inc(&user->refcnt);
1143 
1144 	dyn_event_init(&user->devent, &user_event_dops);
1145 	dyn_event_add(&user->devent, &user->call);
1146 	set_bit(user->index, page_bitmap);
1147 	hash_add(register_table, &user->node, key);
1148 
1149 	*newuser = user;
1150 	return 0;
1151 put_user:
1152 	user_event_destroy_fields(user);
1153 	user_event_destroy_validators(user);
1154 	kfree(user);
1155 	return ret;
1156 }
1157 
1158 /*
1159  * Deletes a previously created event if it is no longer being used.
1160  */
1161 static int delete_user_event(char *name)
1162 {
1163 	u32 key;
1164 	int ret;
1165 	struct user_event *user = find_user_event(name, &key);
1166 
1167 	if (!user)
1168 		return -ENOENT;
1169 
1170 	/* Ensure we are the last ref */
1171 	if (atomic_read(&user->refcnt) != 1) {
1172 		ret = -EBUSY;
1173 		goto put_ref;
1174 	}
1175 
1176 	ret = destroy_user_event(user);
1177 
1178 	if (ret)
1179 		goto put_ref;
1180 
1181 	return ret;
1182 put_ref:
1183 	/* No longer have this ref */
1184 	atomic_dec(&user->refcnt);
1185 
1186 	return ret;
1187 }
1188 
1189 /*
1190  * Validates the user payload and writes via iterator.
1191  */
1192 static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
1193 {
1194 	struct user_event_refs *refs;
1195 	struct user_event *user = NULL;
1196 	struct tracepoint *tp;
1197 	ssize_t ret = i->count;
1198 	int idx;
1199 
1200 	if (unlikely(copy_from_iter(&idx, sizeof(idx), i) != sizeof(idx)))
1201 		return -EFAULT;
1202 
1203 	rcu_read_lock_sched();
1204 
1205 	refs = rcu_dereference_sched(file->private_data);
1206 
1207 	/*
1208 	 * The refs->events array is protected by RCU, and new items may be
1209 	 * added. But the user retrieved from indexing into the events array
1210 	 * shall be immutable while the file is opened.
1211 	 */
1212 	if (likely(refs && idx < refs->count))
1213 		user = refs->events[idx];
1214 
1215 	rcu_read_unlock_sched();
1216 
1217 	if (unlikely(user == NULL))
1218 		return -ENOENT;
1219 
1220 	if (unlikely(i->count < user->min_size))
1221 		return -EINVAL;
1222 
1223 	tp = &user->tracepoint;
1224 
1225 	/*
1226 	 * It's possible key.enabled disables after this check, however
1227 	 * we don't mind if a few events are included in this condition.
1228 	 */
1229 	if (likely(atomic_read(&tp->key.enabled) > 0)) {
1230 		struct tracepoint_func *probe_func_ptr;
1231 		user_event_func_t probe_func;
1232 		struct iov_iter copy;
1233 		void *tpdata;
1234 		bool faulted;
1235 
1236 		if (unlikely(fault_in_iov_iter_readable(i, i->count)))
1237 			return -EFAULT;
1238 
1239 		faulted = false;
1240 
1241 		rcu_read_lock_sched();
1242 
1243 		probe_func_ptr = rcu_dereference_sched(tp->funcs);
1244 
1245 		if (probe_func_ptr) {
1246 			do {
1247 				copy = *i;
1248 				probe_func = probe_func_ptr->func;
1249 				tpdata = probe_func_ptr->data;
1250 				probe_func(user, &copy, tpdata, &faulted);
1251 			} while ((++probe_func_ptr)->func);
1252 		}
1253 
1254 		rcu_read_unlock_sched();
1255 
1256 		if (unlikely(faulted))
1257 			return -EFAULT;
1258 	}
1259 
1260 	return ret;
1261 }
1262 
1263 static ssize_t user_events_write(struct file *file, const char __user *ubuf,
1264 				 size_t count, loff_t *ppos)
1265 {
1266 	struct iovec iov;
1267 	struct iov_iter i;
1268 
1269 	if (unlikely(*ppos != 0))
1270 		return -EFAULT;
1271 
1272 	if (unlikely(import_single_range(READ, (char *)ubuf, count, &iov, &i)))
1273 		return -EFAULT;
1274 
1275 	return user_events_write_core(file, &i);
1276 }
1277 
1278 static ssize_t user_events_write_iter(struct kiocb *kp, struct iov_iter *i)
1279 {
1280 	return user_events_write_core(kp->ki_filp, i);
1281 }
1282 
1283 static int user_events_ref_add(struct file *file, struct user_event *user)
1284 {
1285 	struct user_event_refs *refs, *new_refs;
1286 	int i, size, count = 0;
1287 
1288 	refs = rcu_dereference_protected(file->private_data,
1289 					 lockdep_is_held(&reg_mutex));
1290 
1291 	if (refs) {
1292 		count = refs->count;
1293 
1294 		for (i = 0; i < count; ++i)
1295 			if (refs->events[i] == user)
1296 				return i;
1297 	}
1298 
1299 	size = struct_size(refs, events, count + 1);
1300 
1301 	new_refs = kzalloc(size, GFP_KERNEL);
1302 
1303 	if (!new_refs)
1304 		return -ENOMEM;
1305 
1306 	new_refs->count = count + 1;
1307 
1308 	for (i = 0; i < count; ++i)
1309 		new_refs->events[i] = refs->events[i];
1310 
1311 	new_refs->events[i] = user;
1312 
1313 	atomic_inc(&user->refcnt);
1314 
1315 	rcu_assign_pointer(file->private_data, new_refs);
1316 
1317 	if (refs)
1318 		kfree_rcu(refs, rcu);
1319 
1320 	return i;
1321 }
1322 
1323 static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg)
1324 {
1325 	u32 size;
1326 	long ret;
1327 
1328 	ret = get_user(size, &ureg->size);
1329 
1330 	if (ret)
1331 		return ret;
1332 
1333 	if (size > PAGE_SIZE)
1334 		return -E2BIG;
1335 
1336 	return copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
1337 }
1338 
1339 /*
1340  * Registers a user_event on behalf of a user process.
1341  */
1342 static long user_events_ioctl_reg(struct file *file, unsigned long uarg)
1343 {
1344 	struct user_reg __user *ureg = (struct user_reg __user *)uarg;
1345 	struct user_reg reg;
1346 	struct user_event *user;
1347 	char *name;
1348 	long ret;
1349 
1350 	ret = user_reg_get(ureg, &reg);
1351 
1352 	if (ret)
1353 		return ret;
1354 
1355 	name = strndup_user((const char __user *)(uintptr_t)reg.name_args,
1356 			    MAX_EVENT_DESC);
1357 
1358 	if (IS_ERR(name)) {
1359 		ret = PTR_ERR(name);
1360 		return ret;
1361 	}
1362 
1363 	ret = user_event_parse_cmd(name, &user);
1364 
1365 	if (ret) {
1366 		kfree(name);
1367 		return ret;
1368 	}
1369 
1370 	ret = user_events_ref_add(file, user);
1371 
1372 	/* No longer need parse ref, ref_add either worked or not */
1373 	atomic_dec(&user->refcnt);
1374 
1375 	/* Positive number is index and valid */
1376 	if (ret < 0)
1377 		return ret;
1378 
1379 	put_user((u32)ret, &ureg->write_index);
1380 	put_user(user->index, &ureg->status_index);
1381 
1382 	return 0;
1383 }
1384 
1385 /*
1386  * Deletes a user_event on behalf of a user process.
1387  */
1388 static long user_events_ioctl_del(struct file *file, unsigned long uarg)
1389 {
1390 	void __user *ubuf = (void __user *)uarg;
1391 	char *name;
1392 	long ret;
1393 
1394 	name = strndup_user(ubuf, MAX_EVENT_DESC);
1395 
1396 	if (IS_ERR(name))
1397 		return PTR_ERR(name);
1398 
1399 	/* event_mutex prevents dyn_event from racing */
1400 	mutex_lock(&event_mutex);
1401 	ret = delete_user_event(name);
1402 	mutex_unlock(&event_mutex);
1403 
1404 	kfree(name);
1405 
1406 	return ret;
1407 }
1408 
1409 /*
1410  * Handles the ioctl from user mode to register or alter operations.
1411  */
1412 static long user_events_ioctl(struct file *file, unsigned int cmd,
1413 			      unsigned long uarg)
1414 {
1415 	long ret = -ENOTTY;
1416 
1417 	switch (cmd) {
1418 	case DIAG_IOCSREG:
1419 		mutex_lock(&reg_mutex);
1420 		ret = user_events_ioctl_reg(file, uarg);
1421 		mutex_unlock(&reg_mutex);
1422 		break;
1423 
1424 	case DIAG_IOCSDEL:
1425 		mutex_lock(&reg_mutex);
1426 		ret = user_events_ioctl_del(file, uarg);
1427 		mutex_unlock(&reg_mutex);
1428 		break;
1429 	}
1430 
1431 	return ret;
1432 }
1433 
1434 /*
1435  * Handles the final close of the file from user mode.
1436  */
1437 static int user_events_release(struct inode *node, struct file *file)
1438 {
1439 	struct user_event_refs *refs;
1440 	struct user_event *user;
1441 	int i;
1442 
1443 	/*
1444 	 * Ensure refs cannot change under any situation by taking the
1445 	 * register mutex during the final freeing of the references.
1446 	 */
1447 	mutex_lock(&reg_mutex);
1448 
1449 	refs = file->private_data;
1450 
1451 	if (!refs)
1452 		goto out;
1453 
1454 	/*
1455 	 * The lifetime of refs has reached an end, it's tied to this file.
1456 	 * The underlying user_events are ref counted, and cannot be freed.
1457 	 * After this decrement, the user_events may be freed elsewhere.
1458 	 */
1459 	for (i = 0; i < refs->count; ++i) {
1460 		user = refs->events[i];
1461 
1462 		if (user)
1463 			atomic_dec(&user->refcnt);
1464 	}
1465 out:
1466 	file->private_data = NULL;
1467 
1468 	mutex_unlock(&reg_mutex);
1469 
1470 	kfree(refs);
1471 
1472 	return 0;
1473 }
1474 
1475 static const struct file_operations user_data_fops = {
1476 	.write = user_events_write,
1477 	.write_iter = user_events_write_iter,
1478 	.unlocked_ioctl	= user_events_ioctl,
1479 	.release = user_events_release,
1480 };
1481 
1482 /*
1483  * Maps the shared page into the user process for checking if event is enabled.
1484  */
1485 static int user_status_mmap(struct file *file, struct vm_area_struct *vma)
1486 {
1487 	unsigned long size = vma->vm_end - vma->vm_start;
1488 
1489 	if (size != MAX_EVENTS)
1490 		return -EINVAL;
1491 
1492 	return remap_pfn_range(vma, vma->vm_start,
1493 			       virt_to_phys(register_page_data) >> PAGE_SHIFT,
1494 			       size, vm_get_page_prot(VM_READ));
1495 }
1496 
1497 static void *user_seq_start(struct seq_file *m, loff_t *pos)
1498 {
1499 	if (*pos)
1500 		return NULL;
1501 
1502 	return (void *)1;
1503 }
1504 
1505 static void *user_seq_next(struct seq_file *m, void *p, loff_t *pos)
1506 {
1507 	++*pos;
1508 	return NULL;
1509 }
1510 
1511 static void user_seq_stop(struct seq_file *m, void *p)
1512 {
1513 }
1514 
1515 static int user_seq_show(struct seq_file *m, void *p)
1516 {
1517 	struct user_event *user;
1518 	char status;
1519 	int i, active = 0, busy = 0, flags;
1520 
1521 	mutex_lock(&reg_mutex);
1522 
1523 	hash_for_each(register_table, i, user, node) {
1524 		status = register_page_data[user->index];
1525 		flags = user->flags;
1526 
1527 		seq_printf(m, "%d:%s", user->index, EVENT_NAME(user));
1528 
1529 		if (flags != 0 || status != 0)
1530 			seq_puts(m, " #");
1531 
1532 		if (status != 0) {
1533 			seq_puts(m, " Used by");
1534 			if (status & EVENT_STATUS_FTRACE)
1535 				seq_puts(m, " ftrace");
1536 			if (status & EVENT_STATUS_PERF)
1537 				seq_puts(m, " perf");
1538 			if (status & EVENT_STATUS_OTHER)
1539 				seq_puts(m, " other");
1540 			busy++;
1541 		}
1542 
1543 		if (flags & FLAG_BPF_ITER)
1544 			seq_puts(m, " FLAG:BPF_ITER");
1545 
1546 		seq_puts(m, "\n");
1547 		active++;
1548 	}
1549 
1550 	mutex_unlock(&reg_mutex);
1551 
1552 	seq_puts(m, "\n");
1553 	seq_printf(m, "Active: %d\n", active);
1554 	seq_printf(m, "Busy: %d\n", busy);
1555 	seq_printf(m, "Max: %ld\n", MAX_EVENTS);
1556 
1557 	return 0;
1558 }
1559 
1560 static const struct seq_operations user_seq_ops = {
1561 	.start = user_seq_start,
1562 	.next  = user_seq_next,
1563 	.stop  = user_seq_stop,
1564 	.show  = user_seq_show,
1565 };
1566 
1567 static int user_status_open(struct inode *node, struct file *file)
1568 {
1569 	return seq_open(file, &user_seq_ops);
1570 }
1571 
1572 static const struct file_operations user_status_fops = {
1573 	.open = user_status_open,
1574 	.mmap = user_status_mmap,
1575 	.read = seq_read,
1576 	.llseek  = seq_lseek,
1577 	.release = seq_release,
1578 };
1579 
1580 /*
1581  * Creates a set of tracefs files to allow user mode interactions.
1582  */
1583 static int create_user_tracefs(void)
1584 {
1585 	struct dentry *edata, *emmap;
1586 
1587 	edata = tracefs_create_file("user_events_data", TRACE_MODE_WRITE,
1588 				    NULL, NULL, &user_data_fops);
1589 
1590 	if (!edata) {
1591 		pr_warn("Could not create tracefs 'user_events_data' entry\n");
1592 		goto err;
1593 	}
1594 
1595 	/* mmap with MAP_SHARED requires writable fd */
1596 	emmap = tracefs_create_file("user_events_status", TRACE_MODE_WRITE,
1597 				    NULL, NULL, &user_status_fops);
1598 
1599 	if (!emmap) {
1600 		tracefs_remove(edata);
1601 		pr_warn("Could not create tracefs 'user_events_mmap' entry\n");
1602 		goto err;
1603 	}
1604 
1605 	return 0;
1606 err:
1607 	return -ENODEV;
1608 }
1609 
1610 static void set_page_reservations(bool set)
1611 {
1612 	int page;
1613 
1614 	for (page = 0; page < MAX_PAGES; ++page) {
1615 		void *addr = register_page_data + (PAGE_SIZE * page);
1616 
1617 		if (set)
1618 			SetPageReserved(virt_to_page(addr));
1619 		else
1620 			ClearPageReserved(virt_to_page(addr));
1621 	}
1622 }
1623 
1624 static int __init trace_events_user_init(void)
1625 {
1626 	struct page *pages;
1627 	int ret;
1628 
1629 	/* Zero all bits beside 0 (which is reserved for failures) */
1630 	bitmap_zero(page_bitmap, MAX_EVENTS);
1631 	set_bit(0, page_bitmap);
1632 
1633 	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PAGE_ORDER);
1634 	if (!pages)
1635 		return -ENOMEM;
1636 	register_page_data = page_address(pages);
1637 
1638 	set_page_reservations(true);
1639 
1640 	ret = create_user_tracefs();
1641 
1642 	if (ret) {
1643 		pr_warn("user_events could not register with tracefs\n");
1644 		set_page_reservations(false);
1645 		__free_pages(pages, MAX_PAGE_ORDER);
1646 		return ret;
1647 	}
1648 
1649 	if (dyn_event_register(&user_event_dops))
1650 		pr_warn("user_events could not register with dyn_events\n");
1651 
1652 	return 0;
1653 }
1654 
1655 fs_initcall(trace_events_user_init);
1656