xref: /openbmc/linux/kernel/trace/trace_events_user.c (revision 173c2049d12b441b498d6423276f5dd76b1e637b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2021, Microsoft Corporation.
4  *
5  * Authors:
6  *   Beau Belgrave <beaub@linux.microsoft.com>
7  */
8 
9 #include <linux/bitmap.h>
10 #include <linux/cdev.h>
11 #include <linux/hashtable.h>
12 #include <linux/list.h>
13 #include <linux/io.h>
14 #include <linux/uio.h>
15 #include <linux/ioctl.h>
16 #include <linux/jhash.h>
17 #include <linux/trace_events.h>
18 #include <linux/tracefs.h>
19 #include <linux/types.h>
20 #include <linux/uaccess.h>
21 #include <uapi/linux/user_events.h>
22 #include "trace.h"
23 #include "trace_dynevent.h"
24 
25 #define USER_EVENTS_PREFIX_LEN (sizeof(USER_EVENTS_PREFIX)-1)
26 
27 #define FIELD_DEPTH_TYPE 0
28 #define FIELD_DEPTH_NAME 1
29 #define FIELD_DEPTH_SIZE 2
30 
31 /*
32  * Limits how many trace_event calls user processes can create:
33  * Must be multiple of PAGE_SIZE.
34  */
35 #define MAX_PAGES 1
36 #define MAX_EVENTS (MAX_PAGES * PAGE_SIZE)
37 
38 /* Limit how long of an event name plus args within the subsystem. */
39 #define MAX_EVENT_DESC 512
40 #define EVENT_NAME(user_event) ((user_event)->tracepoint.name)
41 #define MAX_FIELD_ARRAY_SIZE 1024
42 #define MAX_FIELD_ARG_NAME 256
43 
44 #define MAX_BPF_COPY_SIZE PAGE_SIZE
45 #define MAX_STACK_BPF_DATA 512
46 
47 static char *register_page_data;
48 
49 static DEFINE_MUTEX(reg_mutex);
50 static DEFINE_HASHTABLE(register_table, 4);
51 static DECLARE_BITMAP(page_bitmap, MAX_EVENTS);
52 
53 /*
54  * Stores per-event properties, as users register events
55  * within a file a user_event might be created if it does not
56  * already exist. These are globally used and their lifetime
57  * is tied to the refcnt member. These cannot go away until the
58  * refcnt reaches zero.
59  */
60 struct user_event {
61 	struct tracepoint tracepoint;
62 	struct trace_event_call call;
63 	struct trace_event_class class;
64 	struct dyn_event devent;
65 	struct hlist_node node;
66 	struct list_head fields;
67 	struct list_head validators;
68 	atomic_t refcnt;
69 	int index;
70 	int flags;
71 	int min_size;
72 };
73 
74 /*
75  * Stores per-file events references, as users register events
76  * within a file this structure is modified and freed via RCU.
77  * The lifetime of this struct is tied to the lifetime of the file.
78  * These are not shared and only accessible by the file that created it.
79  */
80 struct user_event_refs {
81 	struct rcu_head rcu;
82 	int count;
83 	struct user_event *events[];
84 };
85 
86 #define VALIDATOR_ENSURE_NULL (1 << 0)
87 #define VALIDATOR_REL (1 << 1)
88 
89 struct user_event_validator {
90 	struct list_head link;
91 	int offset;
92 	int flags;
93 };
94 
95 typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
96 				   void *tpdata, bool *faulted);
97 
98 static int user_event_parse(char *name, char *args, char *flags,
99 			    struct user_event **newuser);
100 
101 static u32 user_event_key(char *name)
102 {
103 	return jhash(name, strlen(name), 0);
104 }
105 
106 static __always_inline __must_check
107 size_t copy_nofault(void *addr, size_t bytes, struct iov_iter *i)
108 {
109 	size_t ret;
110 
111 	pagefault_disable();
112 
113 	ret = copy_from_iter_nocache(addr, bytes, i);
114 
115 	pagefault_enable();
116 
117 	return ret;
118 }
119 
120 static struct list_head *user_event_get_fields(struct trace_event_call *call)
121 {
122 	struct user_event *user = (struct user_event *)call->data;
123 
124 	return &user->fields;
125 }
126 
127 /*
128  * Parses a register command for user_events
129  * Format: event_name[:FLAG1[,FLAG2...]] [field1[;field2...]]
130  *
131  * Example event named 'test' with a 20 char 'msg' field with an unsigned int
132  * 'id' field after:
133  * test char[20] msg;unsigned int id
134  *
135  * NOTE: Offsets are from the user data perspective, they are not from the
136  * trace_entry/buffer perspective. We automatically add the common properties
137  * sizes to the offset for the user.
138  */
139 static int user_event_parse_cmd(char *raw_command, struct user_event **newuser)
140 {
141 	char *name = raw_command;
142 	char *args = strpbrk(name, " ");
143 	char *flags;
144 
145 	if (args)
146 		*args++ = '\0';
147 
148 	flags = strpbrk(name, ":");
149 
150 	if (flags)
151 		*flags++ = '\0';
152 
153 	return user_event_parse(name, args, flags, newuser);
154 }
155 
156 static int user_field_array_size(const char *type)
157 {
158 	const char *start = strchr(type, '[');
159 	char val[8];
160 	char *bracket;
161 	int size = 0;
162 
163 	if (start == NULL)
164 		return -EINVAL;
165 
166 	if (strscpy(val, start + 1, sizeof(val)) <= 0)
167 		return -EINVAL;
168 
169 	bracket = strchr(val, ']');
170 
171 	if (!bracket)
172 		return -EINVAL;
173 
174 	*bracket = '\0';
175 
176 	if (kstrtouint(val, 0, &size))
177 		return -EINVAL;
178 
179 	if (size > MAX_FIELD_ARRAY_SIZE)
180 		return -EINVAL;
181 
182 	return size;
183 }
184 
185 static int user_field_size(const char *type)
186 {
187 	/* long is not allowed from a user, since it's ambigious in size */
188 	if (strcmp(type, "s64") == 0)
189 		return sizeof(s64);
190 	if (strcmp(type, "u64") == 0)
191 		return sizeof(u64);
192 	if (strcmp(type, "s32") == 0)
193 		return sizeof(s32);
194 	if (strcmp(type, "u32") == 0)
195 		return sizeof(u32);
196 	if (strcmp(type, "int") == 0)
197 		return sizeof(int);
198 	if (strcmp(type, "unsigned int") == 0)
199 		return sizeof(unsigned int);
200 	if (strcmp(type, "s16") == 0)
201 		return sizeof(s16);
202 	if (strcmp(type, "u16") == 0)
203 		return sizeof(u16);
204 	if (strcmp(type, "short") == 0)
205 		return sizeof(short);
206 	if (strcmp(type, "unsigned short") == 0)
207 		return sizeof(unsigned short);
208 	if (strcmp(type, "s8") == 0)
209 		return sizeof(s8);
210 	if (strcmp(type, "u8") == 0)
211 		return sizeof(u8);
212 	if (strcmp(type, "char") == 0)
213 		return sizeof(char);
214 	if (strcmp(type, "unsigned char") == 0)
215 		return sizeof(unsigned char);
216 	if (str_has_prefix(type, "char["))
217 		return user_field_array_size(type);
218 	if (str_has_prefix(type, "unsigned char["))
219 		return user_field_array_size(type);
220 	if (str_has_prefix(type, "__data_loc "))
221 		return sizeof(u32);
222 	if (str_has_prefix(type, "__rel_loc "))
223 		return sizeof(u32);
224 
225 	/* Uknown basic type, error */
226 	return -EINVAL;
227 }
228 
229 static void user_event_destroy_validators(struct user_event *user)
230 {
231 	struct user_event_validator *validator, *next;
232 	struct list_head *head = &user->validators;
233 
234 	list_for_each_entry_safe(validator, next, head, link) {
235 		list_del(&validator->link);
236 		kfree(validator);
237 	}
238 }
239 
240 static void user_event_destroy_fields(struct user_event *user)
241 {
242 	struct ftrace_event_field *field, *next;
243 	struct list_head *head = &user->fields;
244 
245 	list_for_each_entry_safe(field, next, head, link) {
246 		list_del(&field->link);
247 		kfree(field);
248 	}
249 }
250 
251 static int user_event_add_field(struct user_event *user, const char *type,
252 				const char *name, int offset, int size,
253 				int is_signed, int filter_type)
254 {
255 	struct user_event_validator *validator;
256 	struct ftrace_event_field *field;
257 	int validator_flags = 0;
258 
259 	field = kmalloc(sizeof(*field), GFP_KERNEL);
260 
261 	if (!field)
262 		return -ENOMEM;
263 
264 	if (str_has_prefix(type, "__data_loc "))
265 		goto add_validator;
266 
267 	if (str_has_prefix(type, "__rel_loc ")) {
268 		validator_flags |= VALIDATOR_REL;
269 		goto add_validator;
270 	}
271 
272 	goto add_field;
273 
274 add_validator:
275 	if (strstr(type, "char") != 0)
276 		validator_flags |= VALIDATOR_ENSURE_NULL;
277 
278 	validator = kmalloc(sizeof(*validator), GFP_KERNEL);
279 
280 	if (!validator) {
281 		kfree(field);
282 		return -ENOMEM;
283 	}
284 
285 	validator->flags = validator_flags;
286 	validator->offset = offset;
287 
288 	/* Want sequential access when validating */
289 	list_add_tail(&validator->link, &user->validators);
290 
291 add_field:
292 	field->type = type;
293 	field->name = name;
294 	field->offset = offset;
295 	field->size = size;
296 	field->is_signed = is_signed;
297 	field->filter_type = filter_type;
298 
299 	list_add(&field->link, &user->fields);
300 
301 	/*
302 	 * Min size from user writes that are required, this does not include
303 	 * the size of trace_entry (common fields).
304 	 */
305 	user->min_size = (offset + size) - sizeof(struct trace_entry);
306 
307 	return 0;
308 }
309 
310 /*
311  * Parses the values of a field within the description
312  * Format: type name [size]
313  */
314 static int user_event_parse_field(char *field, struct user_event *user,
315 				  u32 *offset)
316 {
317 	char *part, *type, *name;
318 	u32 depth = 0, saved_offset = *offset;
319 	int len, size = -EINVAL;
320 	bool is_struct = false;
321 
322 	field = skip_spaces(field);
323 
324 	if (*field == '\0')
325 		return 0;
326 
327 	/* Handle types that have a space within */
328 	len = str_has_prefix(field, "unsigned ");
329 	if (len)
330 		goto skip_next;
331 
332 	len = str_has_prefix(field, "struct ");
333 	if (len) {
334 		is_struct = true;
335 		goto skip_next;
336 	}
337 
338 	len = str_has_prefix(field, "__data_loc unsigned ");
339 	if (len)
340 		goto skip_next;
341 
342 	len = str_has_prefix(field, "__data_loc ");
343 	if (len)
344 		goto skip_next;
345 
346 	len = str_has_prefix(field, "__rel_loc unsigned ");
347 	if (len)
348 		goto skip_next;
349 
350 	len = str_has_prefix(field, "__rel_loc ");
351 	if (len)
352 		goto skip_next;
353 
354 	goto parse;
355 skip_next:
356 	type = field;
357 	field = strpbrk(field + len, " ");
358 
359 	if (field == NULL)
360 		return -EINVAL;
361 
362 	*field++ = '\0';
363 	depth++;
364 parse:
365 	name = NULL;
366 
367 	while ((part = strsep(&field, " ")) != NULL) {
368 		switch (depth++) {
369 		case FIELD_DEPTH_TYPE:
370 			type = part;
371 			break;
372 		case FIELD_DEPTH_NAME:
373 			name = part;
374 			break;
375 		case FIELD_DEPTH_SIZE:
376 			if (!is_struct)
377 				return -EINVAL;
378 
379 			if (kstrtou32(part, 10, &size))
380 				return -EINVAL;
381 			break;
382 		default:
383 			return -EINVAL;
384 		}
385 	}
386 
387 	if (depth < FIELD_DEPTH_SIZE || !name)
388 		return -EINVAL;
389 
390 	if (depth == FIELD_DEPTH_SIZE)
391 		size = user_field_size(type);
392 
393 	if (size == 0)
394 		return -EINVAL;
395 
396 	if (size < 0)
397 		return size;
398 
399 	*offset = saved_offset + size;
400 
401 	return user_event_add_field(user, type, name, saved_offset, size,
402 				    type[0] != 'u', FILTER_OTHER);
403 }
404 
405 static void user_event_parse_flags(struct user_event *user, char *flags)
406 {
407 	char *flag;
408 
409 	if (flags == NULL)
410 		return;
411 
412 	while ((flag = strsep(&flags, ",")) != NULL) {
413 		if (strcmp(flag, "BPF_ITER") == 0)
414 			user->flags |= FLAG_BPF_ITER;
415 	}
416 }
417 
418 static int user_event_parse_fields(struct user_event *user, char *args)
419 {
420 	char *field;
421 	u32 offset = sizeof(struct trace_entry);
422 	int ret = -EINVAL;
423 
424 	if (args == NULL)
425 		return 0;
426 
427 	while ((field = strsep(&args, ";")) != NULL) {
428 		ret = user_event_parse_field(field, user, &offset);
429 
430 		if (ret)
431 			break;
432 	}
433 
434 	return ret;
435 }
436 
437 static struct trace_event_fields user_event_fields_array[1];
438 
439 static const char *user_field_format(const char *type)
440 {
441 	if (strcmp(type, "s64") == 0)
442 		return "%lld";
443 	if (strcmp(type, "u64") == 0)
444 		return "%llu";
445 	if (strcmp(type, "s32") == 0)
446 		return "%d";
447 	if (strcmp(type, "u32") == 0)
448 		return "%u";
449 	if (strcmp(type, "int") == 0)
450 		return "%d";
451 	if (strcmp(type, "unsigned int") == 0)
452 		return "%u";
453 	if (strcmp(type, "s16") == 0)
454 		return "%d";
455 	if (strcmp(type, "u16") == 0)
456 		return "%u";
457 	if (strcmp(type, "short") == 0)
458 		return "%d";
459 	if (strcmp(type, "unsigned short") == 0)
460 		return "%u";
461 	if (strcmp(type, "s8") == 0)
462 		return "%d";
463 	if (strcmp(type, "u8") == 0)
464 		return "%u";
465 	if (strcmp(type, "char") == 0)
466 		return "%d";
467 	if (strcmp(type, "unsigned char") == 0)
468 		return "%u";
469 	if (strstr(type, "char[") != 0)
470 		return "%s";
471 
472 	/* Unknown, likely struct, allowed treat as 64-bit */
473 	return "%llu";
474 }
475 
476 static bool user_field_is_dyn_string(const char *type, const char **str_func)
477 {
478 	if (str_has_prefix(type, "__data_loc ")) {
479 		*str_func = "__get_str";
480 		goto check;
481 	}
482 
483 	if (str_has_prefix(type, "__rel_loc ")) {
484 		*str_func = "__get_rel_str";
485 		goto check;
486 	}
487 
488 	return false;
489 check:
490 	return strstr(type, "char") != 0;
491 }
492 
493 #define LEN_OR_ZERO (len ? len - pos : 0)
494 static int user_event_set_print_fmt(struct user_event *user, char *buf, int len)
495 {
496 	struct ftrace_event_field *field, *next;
497 	struct list_head *head = &user->fields;
498 	int pos = 0, depth = 0;
499 	const char *str_func;
500 
501 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
502 
503 	list_for_each_entry_safe_reverse(field, next, head, link) {
504 		if (depth != 0)
505 			pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
506 
507 		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s",
508 				field->name, user_field_format(field->type));
509 
510 		depth++;
511 	}
512 
513 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
514 
515 	list_for_each_entry_safe_reverse(field, next, head, link) {
516 		if (user_field_is_dyn_string(field->type, &str_func))
517 			pos += snprintf(buf + pos, LEN_OR_ZERO,
518 					", %s(%s)", str_func, field->name);
519 		else
520 			pos += snprintf(buf + pos, LEN_OR_ZERO,
521 					", REC->%s", field->name);
522 	}
523 
524 	return pos + 1;
525 }
526 #undef LEN_OR_ZERO
527 
528 static int user_event_create_print_fmt(struct user_event *user)
529 {
530 	char *print_fmt;
531 	int len;
532 
533 	len = user_event_set_print_fmt(user, NULL, 0);
534 
535 	print_fmt = kmalloc(len, GFP_KERNEL);
536 
537 	if (!print_fmt)
538 		return -ENOMEM;
539 
540 	user_event_set_print_fmt(user, print_fmt, len);
541 
542 	user->call.print_fmt = print_fmt;
543 
544 	return 0;
545 }
546 
547 static enum print_line_t user_event_print_trace(struct trace_iterator *iter,
548 						int flags,
549 						struct trace_event *event)
550 {
551 	/* Unsafe to try to decode user provided print_fmt, use hex */
552 	trace_print_hex_dump_seq(&iter->seq, "", DUMP_PREFIX_OFFSET, 16,
553 				 1, iter->ent, iter->ent_size, true);
554 
555 	return trace_handle_return(&iter->seq);
556 }
557 
558 static struct trace_event_functions user_event_funcs = {
559 	.trace = user_event_print_trace,
560 };
561 
562 static int destroy_user_event(struct user_event *user)
563 {
564 	int ret = 0;
565 
566 	/* Must destroy fields before call removal */
567 	user_event_destroy_fields(user);
568 
569 	ret = trace_remove_event_call(&user->call);
570 
571 	if (ret)
572 		return ret;
573 
574 	dyn_event_remove(&user->devent);
575 
576 	register_page_data[user->index] = 0;
577 	clear_bit(user->index, page_bitmap);
578 	hash_del(&user->node);
579 
580 	user_event_destroy_validators(user);
581 	kfree(user->call.print_fmt);
582 	kfree(EVENT_NAME(user));
583 	kfree(user);
584 
585 	return ret;
586 }
587 
588 static struct user_event *find_user_event(char *name, u32 *outkey)
589 {
590 	struct user_event *user;
591 	u32 key = user_event_key(name);
592 
593 	*outkey = key;
594 
595 	hash_for_each_possible(register_table, user, node, key)
596 		if (!strcmp(EVENT_NAME(user), name))
597 			return user;
598 
599 	return NULL;
600 }
601 
602 static int user_event_validate(struct user_event *user, void *data, int len)
603 {
604 	struct list_head *head = &user->validators;
605 	struct user_event_validator *validator;
606 	void *pos, *end = data + len;
607 	u32 loc, offset, size;
608 
609 	list_for_each_entry(validator, head, link) {
610 		pos = data + validator->offset;
611 
612 		/* Already done min_size check, no bounds check here */
613 		loc = *(u32 *)pos;
614 		offset = loc & 0xffff;
615 		size = loc >> 16;
616 
617 		if (likely(validator->flags & VALIDATOR_REL))
618 			pos += offset + sizeof(loc);
619 		else
620 			pos = data + offset;
621 
622 		pos += size;
623 
624 		if (unlikely(pos > end))
625 			return -EFAULT;
626 
627 		if (likely(validator->flags & VALIDATOR_ENSURE_NULL))
628 			if (unlikely(*(char *)(pos - 1) != '\0'))
629 				return -EFAULT;
630 	}
631 
632 	return 0;
633 }
634 
635 /*
636  * Writes the user supplied payload out to a trace file.
637  */
638 static void user_event_ftrace(struct user_event *user, struct iov_iter *i,
639 			      void *tpdata, bool *faulted)
640 {
641 	struct trace_event_file *file;
642 	struct trace_entry *entry;
643 	struct trace_event_buffer event_buffer;
644 	size_t size = sizeof(*entry) + i->count;
645 
646 	file = (struct trace_event_file *)tpdata;
647 
648 	if (!file ||
649 	    !(file->flags & EVENT_FILE_FL_ENABLED) ||
650 	    trace_trigger_soft_disabled(file))
651 		return;
652 
653 	/* Allocates and fills trace_entry, + 1 of this is data payload */
654 	entry = trace_event_buffer_reserve(&event_buffer, file, size);
655 
656 	if (unlikely(!entry))
657 		return;
658 
659 	if (unlikely(!copy_nofault(entry + 1, i->count, i)))
660 		goto discard;
661 
662 	if (!list_empty(&user->validators) &&
663 	    unlikely(user_event_validate(user, entry, size)))
664 		goto discard;
665 
666 	trace_event_buffer_commit(&event_buffer);
667 
668 	return;
669 discard:
670 	*faulted = true;
671 	__trace_event_discard_commit(event_buffer.buffer,
672 				     event_buffer.event);
673 }
674 
675 #ifdef CONFIG_PERF_EVENTS
676 static void user_event_bpf(struct user_event *user, struct iov_iter *i)
677 {
678 	struct user_bpf_context context;
679 	struct user_bpf_iter bpf_i;
680 	char fast_data[MAX_STACK_BPF_DATA];
681 	void *temp = NULL;
682 
683 	if ((user->flags & FLAG_BPF_ITER) && iter_is_iovec(i)) {
684 		/* Raw iterator */
685 		context.data_type = USER_BPF_DATA_ITER;
686 		context.data_len = i->count;
687 		context.iter = &bpf_i;
688 
689 		bpf_i.iov_offset = i->iov_offset;
690 		bpf_i.iov = i->iov;
691 		bpf_i.nr_segs = i->nr_segs;
692 	} else if (i->nr_segs == 1 && iter_is_iovec(i)) {
693 		/* Single buffer from user */
694 		context.data_type = USER_BPF_DATA_USER;
695 		context.data_len = i->count;
696 		context.udata = i->iov->iov_base + i->iov_offset;
697 	} else {
698 		/* Multi buffer from user */
699 		struct iov_iter copy = *i;
700 		size_t copy_size = min_t(size_t, i->count, MAX_BPF_COPY_SIZE);
701 
702 		context.data_type = USER_BPF_DATA_KERNEL;
703 		context.kdata = fast_data;
704 
705 		if (unlikely(copy_size > sizeof(fast_data))) {
706 			temp = kmalloc(copy_size, GFP_NOWAIT);
707 
708 			if (temp)
709 				context.kdata = temp;
710 			else
711 				copy_size = sizeof(fast_data);
712 		}
713 
714 		context.data_len = copy_nofault(context.kdata,
715 						copy_size, &copy);
716 	}
717 
718 	trace_call_bpf(&user->call, &context);
719 
720 	kfree(temp);
721 }
722 
723 /*
724  * Writes the user supplied payload out to perf ring buffer or eBPF program.
725  */
726 static void user_event_perf(struct user_event *user, struct iov_iter *i,
727 			    void *tpdata, bool *faulted)
728 {
729 	struct hlist_head *perf_head;
730 
731 	if (bpf_prog_array_valid(&user->call))
732 		user_event_bpf(user, i);
733 
734 	perf_head = this_cpu_ptr(user->call.perf_events);
735 
736 	if (perf_head && !hlist_empty(perf_head)) {
737 		struct trace_entry *perf_entry;
738 		struct pt_regs *regs;
739 		size_t size = sizeof(*perf_entry) + i->count;
740 		int context;
741 
742 		perf_entry = perf_trace_buf_alloc(ALIGN(size, 8),
743 						  &regs, &context);
744 
745 		if (unlikely(!perf_entry))
746 			return;
747 
748 		perf_fetch_caller_regs(regs);
749 
750 		if (unlikely(!copy_nofault(perf_entry + 1, i->count, i)))
751 			goto discard;
752 
753 		if (!list_empty(&user->validators) &&
754 		    unlikely(user_event_validate(user, perf_entry, size)))
755 			goto discard;
756 
757 		perf_trace_buf_submit(perf_entry, size, context,
758 				      user->call.event.type, 1, regs,
759 				      perf_head, NULL);
760 
761 		return;
762 discard:
763 		*faulted = true;
764 		perf_swevent_put_recursion_context(context);
765 	}
766 }
767 #endif
768 
769 /*
770  * Update the register page that is shared between user processes.
771  */
772 static void update_reg_page_for(struct user_event *user)
773 {
774 	struct tracepoint *tp = &user->tracepoint;
775 	char status = 0;
776 
777 	if (atomic_read(&tp->key.enabled) > 0) {
778 		struct tracepoint_func *probe_func_ptr;
779 		user_event_func_t probe_func;
780 
781 		rcu_read_lock_sched();
782 
783 		probe_func_ptr = rcu_dereference_sched(tp->funcs);
784 
785 		if (probe_func_ptr) {
786 			do {
787 				probe_func = probe_func_ptr->func;
788 
789 				if (probe_func == user_event_ftrace)
790 					status |= EVENT_STATUS_FTRACE;
791 #ifdef CONFIG_PERF_EVENTS
792 				else if (probe_func == user_event_perf)
793 					status |= EVENT_STATUS_PERF;
794 #endif
795 				else
796 					status |= EVENT_STATUS_OTHER;
797 			} while ((++probe_func_ptr)->func);
798 		}
799 
800 		rcu_read_unlock_sched();
801 	}
802 
803 	register_page_data[user->index] = status;
804 }
805 
806 /*
807  * Register callback for our events from tracing sub-systems.
808  */
809 static int user_event_reg(struct trace_event_call *call,
810 			  enum trace_reg type,
811 			  void *data)
812 {
813 	struct user_event *user = (struct user_event *)call->data;
814 	int ret = 0;
815 
816 	if (!user)
817 		return -ENOENT;
818 
819 	switch (type) {
820 	case TRACE_REG_REGISTER:
821 		ret = tracepoint_probe_register(call->tp,
822 						call->class->probe,
823 						data);
824 		if (!ret)
825 			goto inc;
826 		break;
827 
828 	case TRACE_REG_UNREGISTER:
829 		tracepoint_probe_unregister(call->tp,
830 					    call->class->probe,
831 					    data);
832 		goto dec;
833 
834 #ifdef CONFIG_PERF_EVENTS
835 	case TRACE_REG_PERF_REGISTER:
836 		ret = tracepoint_probe_register(call->tp,
837 						call->class->perf_probe,
838 						data);
839 		if (!ret)
840 			goto inc;
841 		break;
842 
843 	case TRACE_REG_PERF_UNREGISTER:
844 		tracepoint_probe_unregister(call->tp,
845 					    call->class->perf_probe,
846 					    data);
847 		goto dec;
848 
849 	case TRACE_REG_PERF_OPEN:
850 	case TRACE_REG_PERF_CLOSE:
851 	case TRACE_REG_PERF_ADD:
852 	case TRACE_REG_PERF_DEL:
853 		break;
854 #endif
855 	}
856 
857 	return ret;
858 inc:
859 	atomic_inc(&user->refcnt);
860 	update_reg_page_for(user);
861 	return 0;
862 dec:
863 	update_reg_page_for(user);
864 	atomic_dec(&user->refcnt);
865 	return 0;
866 }
867 
868 static int user_event_create(const char *raw_command)
869 {
870 	struct user_event *user;
871 	char *name;
872 	int ret;
873 
874 	if (!str_has_prefix(raw_command, USER_EVENTS_PREFIX))
875 		return -ECANCELED;
876 
877 	raw_command += USER_EVENTS_PREFIX_LEN;
878 	raw_command = skip_spaces(raw_command);
879 
880 	name = kstrdup(raw_command, GFP_KERNEL);
881 
882 	if (!name)
883 		return -ENOMEM;
884 
885 	mutex_lock(&reg_mutex);
886 	ret = user_event_parse_cmd(name, &user);
887 	mutex_unlock(&reg_mutex);
888 
889 	if (ret)
890 		kfree(name);
891 
892 	return ret;
893 }
894 
895 static int user_event_show(struct seq_file *m, struct dyn_event *ev)
896 {
897 	struct user_event *user = container_of(ev, struct user_event, devent);
898 	struct ftrace_event_field *field, *next;
899 	struct list_head *head;
900 	int depth = 0;
901 
902 	seq_printf(m, "%s%s", USER_EVENTS_PREFIX, EVENT_NAME(user));
903 
904 	head = trace_get_fields(&user->call);
905 
906 	list_for_each_entry_safe_reverse(field, next, head, link) {
907 		if (depth == 0)
908 			seq_puts(m, " ");
909 		else
910 			seq_puts(m, "; ");
911 
912 		seq_printf(m, "%s %s", field->type, field->name);
913 
914 		if (str_has_prefix(field->type, "struct "))
915 			seq_printf(m, " %d", field->size);
916 
917 		depth++;
918 	}
919 
920 	seq_puts(m, "\n");
921 
922 	return 0;
923 }
924 
925 static bool user_event_is_busy(struct dyn_event *ev)
926 {
927 	struct user_event *user = container_of(ev, struct user_event, devent);
928 
929 	return atomic_read(&user->refcnt) != 0;
930 }
931 
932 static int user_event_free(struct dyn_event *ev)
933 {
934 	struct user_event *user = container_of(ev, struct user_event, devent);
935 
936 	if (atomic_read(&user->refcnt) != 0)
937 		return -EBUSY;
938 
939 	return destroy_user_event(user);
940 }
941 
942 static bool user_field_match(struct ftrace_event_field *field, int argc,
943 			     const char **argv, int *iout)
944 {
945 	char *field_name, *arg_name;
946 	int len, pos, i = *iout;
947 	bool colon = false, match = false;
948 
949 	if (i >= argc)
950 		return false;
951 
952 	len = MAX_FIELD_ARG_NAME;
953 	field_name = kmalloc(len, GFP_KERNEL);
954 	arg_name = kmalloc(len, GFP_KERNEL);
955 
956 	if (!arg_name || !field_name)
957 		goto out;
958 
959 	pos = 0;
960 
961 	for (; i < argc; ++i) {
962 		if (i != *iout)
963 			pos += snprintf(arg_name + pos, len - pos, " ");
964 
965 		pos += snprintf(arg_name + pos, len - pos, argv[i]);
966 
967 		if (strchr(argv[i], ';')) {
968 			++i;
969 			colon = true;
970 			break;
971 		}
972 	}
973 
974 	pos = 0;
975 
976 	pos += snprintf(field_name + pos, len - pos, field->type);
977 	pos += snprintf(field_name + pos, len - pos, " ");
978 	pos += snprintf(field_name + pos, len - pos, field->name);
979 
980 	if (colon)
981 		pos += snprintf(field_name + pos, len - pos, ";");
982 
983 	*iout = i;
984 
985 	match = strcmp(arg_name, field_name) == 0;
986 out:
987 	kfree(arg_name);
988 	kfree(field_name);
989 
990 	return match;
991 }
992 
993 static bool user_fields_match(struct user_event *user, int argc,
994 			      const char **argv)
995 {
996 	struct ftrace_event_field *field, *next;
997 	struct list_head *head = &user->fields;
998 	int i = 0;
999 
1000 	list_for_each_entry_safe_reverse(field, next, head, link)
1001 		if (!user_field_match(field, argc, argv, &i))
1002 			return false;
1003 
1004 	if (i != argc)
1005 		return false;
1006 
1007 	return true;
1008 }
1009 
1010 static bool user_event_match(const char *system, const char *event,
1011 			     int argc, const char **argv, struct dyn_event *ev)
1012 {
1013 	struct user_event *user = container_of(ev, struct user_event, devent);
1014 	bool match;
1015 
1016 	match = strcmp(EVENT_NAME(user), event) == 0 &&
1017 		(!system || strcmp(system, USER_EVENTS_SYSTEM) == 0);
1018 
1019 	if (match && argc > 0)
1020 		match = user_fields_match(user, argc, argv);
1021 
1022 	return match;
1023 }
1024 
1025 static struct dyn_event_operations user_event_dops = {
1026 	.create = user_event_create,
1027 	.show = user_event_show,
1028 	.is_busy = user_event_is_busy,
1029 	.free = user_event_free,
1030 	.match = user_event_match,
1031 };
1032 
1033 static int user_event_trace_register(struct user_event *user)
1034 {
1035 	int ret;
1036 
1037 	ret = register_trace_event(&user->call.event);
1038 
1039 	if (!ret)
1040 		return -ENODEV;
1041 
1042 	ret = trace_add_event_call(&user->call);
1043 
1044 	if (ret)
1045 		unregister_trace_event(&user->call.event);
1046 
1047 	return ret;
1048 }
1049 
1050 /*
1051  * Parses the event name, arguments and flags then registers if successful.
1052  * The name buffer lifetime is owned by this method for success cases only.
1053  */
1054 static int user_event_parse(char *name, char *args, char *flags,
1055 			    struct user_event **newuser)
1056 {
1057 	int ret;
1058 	int index;
1059 	u32 key;
1060 	struct user_event *user = find_user_event(name, &key);
1061 
1062 	if (user) {
1063 		*newuser = user;
1064 		/*
1065 		 * Name is allocated by caller, free it since it already exists.
1066 		 * Caller only worries about failure cases for freeing.
1067 		 */
1068 		kfree(name);
1069 		return 0;
1070 	}
1071 
1072 	index = find_first_zero_bit(page_bitmap, MAX_EVENTS);
1073 
1074 	if (index == MAX_EVENTS)
1075 		return -EMFILE;
1076 
1077 	user = kzalloc(sizeof(*user), GFP_KERNEL);
1078 
1079 	if (!user)
1080 		return -ENOMEM;
1081 
1082 	INIT_LIST_HEAD(&user->class.fields);
1083 	INIT_LIST_HEAD(&user->fields);
1084 	INIT_LIST_HEAD(&user->validators);
1085 
1086 	user->tracepoint.name = name;
1087 
1088 	user_event_parse_flags(user, flags);
1089 
1090 	ret = user_event_parse_fields(user, args);
1091 
1092 	if (ret)
1093 		goto put_user;
1094 
1095 	ret = user_event_create_print_fmt(user);
1096 
1097 	if (ret)
1098 		goto put_user;
1099 
1100 	user->call.data = user;
1101 	user->call.class = &user->class;
1102 	user->call.name = name;
1103 	user->call.flags = TRACE_EVENT_FL_TRACEPOINT;
1104 	user->call.tp = &user->tracepoint;
1105 	user->call.event.funcs = &user_event_funcs;
1106 
1107 	user->class.system = USER_EVENTS_SYSTEM;
1108 	user->class.fields_array = user_event_fields_array;
1109 	user->class.get_fields = user_event_get_fields;
1110 	user->class.reg = user_event_reg;
1111 	user->class.probe = user_event_ftrace;
1112 #ifdef CONFIG_PERF_EVENTS
1113 	user->class.perf_probe = user_event_perf;
1114 #endif
1115 
1116 	mutex_lock(&event_mutex);
1117 	ret = user_event_trace_register(user);
1118 	mutex_unlock(&event_mutex);
1119 
1120 	if (ret)
1121 		goto put_user;
1122 
1123 	user->index = index;
1124 	dyn_event_init(&user->devent, &user_event_dops);
1125 	dyn_event_add(&user->devent, &user->call);
1126 	set_bit(user->index, page_bitmap);
1127 	hash_add(register_table, &user->node, key);
1128 
1129 	*newuser = user;
1130 	return 0;
1131 put_user:
1132 	user_event_destroy_fields(user);
1133 	user_event_destroy_validators(user);
1134 	kfree(user);
1135 	return ret;
1136 }
1137 
1138 /*
1139  * Deletes a previously created event if it is no longer being used.
1140  */
1141 static int delete_user_event(char *name)
1142 {
1143 	u32 key;
1144 	int ret;
1145 	struct user_event *user = find_user_event(name, &key);
1146 
1147 	if (!user)
1148 		return -ENOENT;
1149 
1150 	if (atomic_read(&user->refcnt) != 0)
1151 		return -EBUSY;
1152 
1153 	mutex_lock(&event_mutex);
1154 	ret = destroy_user_event(user);
1155 	mutex_unlock(&event_mutex);
1156 
1157 	return ret;
1158 }
1159 
1160 /*
1161  * Validates the user payload and writes via iterator.
1162  */
1163 static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
1164 {
1165 	struct user_event_refs *refs;
1166 	struct user_event *user = NULL;
1167 	struct tracepoint *tp;
1168 	ssize_t ret = i->count;
1169 	int idx;
1170 
1171 	if (unlikely(copy_from_iter(&idx, sizeof(idx), i) != sizeof(idx)))
1172 		return -EFAULT;
1173 
1174 	rcu_read_lock_sched();
1175 
1176 	refs = rcu_dereference_sched(file->private_data);
1177 
1178 	/*
1179 	 * The refs->events array is protected by RCU, and new items may be
1180 	 * added. But the user retrieved from indexing into the events array
1181 	 * shall be immutable while the file is opened.
1182 	 */
1183 	if (likely(refs && idx < refs->count))
1184 		user = refs->events[idx];
1185 
1186 	rcu_read_unlock_sched();
1187 
1188 	if (unlikely(user == NULL))
1189 		return -ENOENT;
1190 
1191 	if (unlikely(i->count < user->min_size))
1192 		return -EINVAL;
1193 
1194 	tp = &user->tracepoint;
1195 
1196 	/*
1197 	 * It's possible key.enabled disables after this check, however
1198 	 * we don't mind if a few events are included in this condition.
1199 	 */
1200 	if (likely(atomic_read(&tp->key.enabled) > 0)) {
1201 		struct tracepoint_func *probe_func_ptr;
1202 		user_event_func_t probe_func;
1203 		struct iov_iter copy;
1204 		void *tpdata;
1205 		bool faulted;
1206 
1207 		if (unlikely(fault_in_iov_iter_readable(i, i->count)))
1208 			return -EFAULT;
1209 
1210 		faulted = false;
1211 
1212 		rcu_read_lock_sched();
1213 
1214 		probe_func_ptr = rcu_dereference_sched(tp->funcs);
1215 
1216 		if (probe_func_ptr) {
1217 			do {
1218 				copy = *i;
1219 				probe_func = probe_func_ptr->func;
1220 				tpdata = probe_func_ptr->data;
1221 				probe_func(user, &copy, tpdata, &faulted);
1222 			} while ((++probe_func_ptr)->func);
1223 		}
1224 
1225 		rcu_read_unlock_sched();
1226 
1227 		if (unlikely(faulted))
1228 			return -EFAULT;
1229 	}
1230 
1231 	return ret;
1232 }
1233 
1234 static ssize_t user_events_write(struct file *file, const char __user *ubuf,
1235 				 size_t count, loff_t *ppos)
1236 {
1237 	struct iovec iov;
1238 	struct iov_iter i;
1239 
1240 	if (unlikely(*ppos != 0))
1241 		return -EFAULT;
1242 
1243 	if (unlikely(import_single_range(READ, (char *)ubuf, count, &iov, &i)))
1244 		return -EFAULT;
1245 
1246 	return user_events_write_core(file, &i);
1247 }
1248 
1249 static ssize_t user_events_write_iter(struct kiocb *kp, struct iov_iter *i)
1250 {
1251 	return user_events_write_core(kp->ki_filp, i);
1252 }
1253 
1254 static int user_events_ref_add(struct file *file, struct user_event *user)
1255 {
1256 	struct user_event_refs *refs, *new_refs;
1257 	int i, size, count = 0;
1258 
1259 	refs = rcu_dereference_protected(file->private_data,
1260 					 lockdep_is_held(&reg_mutex));
1261 
1262 	if (refs) {
1263 		count = refs->count;
1264 
1265 		for (i = 0; i < count; ++i)
1266 			if (refs->events[i] == user)
1267 				return i;
1268 	}
1269 
1270 	size = struct_size(refs, events, count + 1);
1271 
1272 	new_refs = kzalloc(size, GFP_KERNEL);
1273 
1274 	if (!new_refs)
1275 		return -ENOMEM;
1276 
1277 	new_refs->count = count + 1;
1278 
1279 	for (i = 0; i < count; ++i)
1280 		new_refs->events[i] = refs->events[i];
1281 
1282 	new_refs->events[i] = user;
1283 
1284 	atomic_inc(&user->refcnt);
1285 
1286 	rcu_assign_pointer(file->private_data, new_refs);
1287 
1288 	if (refs)
1289 		kfree_rcu(refs, rcu);
1290 
1291 	return i;
1292 }
1293 
1294 static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg)
1295 {
1296 	u32 size;
1297 	long ret;
1298 
1299 	ret = get_user(size, &ureg->size);
1300 
1301 	if (ret)
1302 		return ret;
1303 
1304 	if (size > PAGE_SIZE)
1305 		return -E2BIG;
1306 
1307 	return copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
1308 }
1309 
1310 /*
1311  * Registers a user_event on behalf of a user process.
1312  */
1313 static long user_events_ioctl_reg(struct file *file, unsigned long uarg)
1314 {
1315 	struct user_reg __user *ureg = (struct user_reg __user *)uarg;
1316 	struct user_reg reg;
1317 	struct user_event *user;
1318 	char *name;
1319 	long ret;
1320 
1321 	ret = user_reg_get(ureg, &reg);
1322 
1323 	if (ret)
1324 		return ret;
1325 
1326 	name = strndup_user((const char __user *)(uintptr_t)reg.name_args,
1327 			    MAX_EVENT_DESC);
1328 
1329 	if (IS_ERR(name)) {
1330 		ret = PTR_ERR(name);
1331 		return ret;
1332 	}
1333 
1334 	ret = user_event_parse_cmd(name, &user);
1335 
1336 	if (ret) {
1337 		kfree(name);
1338 		return ret;
1339 	}
1340 
1341 	ret = user_events_ref_add(file, user);
1342 
1343 	/* Positive number is index and valid */
1344 	if (ret < 0)
1345 		return ret;
1346 
1347 	put_user((u32)ret, &ureg->write_index);
1348 	put_user(user->index, &ureg->status_index);
1349 
1350 	return 0;
1351 }
1352 
1353 /*
1354  * Deletes a user_event on behalf of a user process.
1355  */
1356 static long user_events_ioctl_del(struct file *file, unsigned long uarg)
1357 {
1358 	void __user *ubuf = (void __user *)uarg;
1359 	char *name;
1360 	long ret;
1361 
1362 	name = strndup_user(ubuf, MAX_EVENT_DESC);
1363 
1364 	if (IS_ERR(name))
1365 		return PTR_ERR(name);
1366 
1367 	ret = delete_user_event(name);
1368 
1369 	kfree(name);
1370 
1371 	return ret;
1372 }
1373 
1374 /*
1375  * Handles the ioctl from user mode to register or alter operations.
1376  */
1377 static long user_events_ioctl(struct file *file, unsigned int cmd,
1378 			      unsigned long uarg)
1379 {
1380 	long ret = -ENOTTY;
1381 
1382 	switch (cmd) {
1383 	case DIAG_IOCSREG:
1384 		mutex_lock(&reg_mutex);
1385 		ret = user_events_ioctl_reg(file, uarg);
1386 		mutex_unlock(&reg_mutex);
1387 		break;
1388 
1389 	case DIAG_IOCSDEL:
1390 		mutex_lock(&reg_mutex);
1391 		ret = user_events_ioctl_del(file, uarg);
1392 		mutex_unlock(&reg_mutex);
1393 		break;
1394 	}
1395 
1396 	return ret;
1397 }
1398 
1399 /*
1400  * Handles the final close of the file from user mode.
1401  */
1402 static int user_events_release(struct inode *node, struct file *file)
1403 {
1404 	struct user_event_refs *refs;
1405 	struct user_event *user;
1406 	int i;
1407 
1408 	/*
1409 	 * Ensure refs cannot change under any situation by taking the
1410 	 * register mutex during the final freeing of the references.
1411 	 */
1412 	mutex_lock(&reg_mutex);
1413 
1414 	refs = file->private_data;
1415 
1416 	if (!refs)
1417 		goto out;
1418 
1419 	/*
1420 	 * The lifetime of refs has reached an end, it's tied to this file.
1421 	 * The underlying user_events are ref counted, and cannot be freed.
1422 	 * After this decrement, the user_events may be freed elsewhere.
1423 	 */
1424 	for (i = 0; i < refs->count; ++i) {
1425 		user = refs->events[i];
1426 
1427 		if (user)
1428 			atomic_dec(&user->refcnt);
1429 	}
1430 out:
1431 	file->private_data = NULL;
1432 
1433 	mutex_unlock(&reg_mutex);
1434 
1435 	kfree(refs);
1436 
1437 	return 0;
1438 }
1439 
1440 static const struct file_operations user_data_fops = {
1441 	.write = user_events_write,
1442 	.write_iter = user_events_write_iter,
1443 	.unlocked_ioctl	= user_events_ioctl,
1444 	.release = user_events_release,
1445 };
1446 
1447 /*
1448  * Maps the shared page into the user process for checking if event is enabled.
1449  */
1450 static int user_status_mmap(struct file *file, struct vm_area_struct *vma)
1451 {
1452 	unsigned long size = vma->vm_end - vma->vm_start;
1453 
1454 	if (size != MAX_EVENTS)
1455 		return -EINVAL;
1456 
1457 	return remap_pfn_range(vma, vma->vm_start,
1458 			       virt_to_phys(register_page_data) >> PAGE_SHIFT,
1459 			       size, vm_get_page_prot(VM_READ));
1460 }
1461 
1462 static void *user_seq_start(struct seq_file *m, loff_t *pos)
1463 {
1464 	if (*pos)
1465 		return NULL;
1466 
1467 	return (void *)1;
1468 }
1469 
1470 static void *user_seq_next(struct seq_file *m, void *p, loff_t *pos)
1471 {
1472 	++*pos;
1473 	return NULL;
1474 }
1475 
1476 static void user_seq_stop(struct seq_file *m, void *p)
1477 {
1478 }
1479 
1480 static int user_seq_show(struct seq_file *m, void *p)
1481 {
1482 	struct user_event *user;
1483 	char status;
1484 	int i, active = 0, busy = 0, flags;
1485 
1486 	mutex_lock(&reg_mutex);
1487 
1488 	hash_for_each(register_table, i, user, node) {
1489 		status = register_page_data[user->index];
1490 		flags = user->flags;
1491 
1492 		seq_printf(m, "%d:%s", user->index, EVENT_NAME(user));
1493 
1494 		if (flags != 0 || status != 0)
1495 			seq_puts(m, " #");
1496 
1497 		if (status != 0) {
1498 			seq_puts(m, " Used by");
1499 			if (status & EVENT_STATUS_FTRACE)
1500 				seq_puts(m, " ftrace");
1501 			if (status & EVENT_STATUS_PERF)
1502 				seq_puts(m, " perf");
1503 			if (status & EVENT_STATUS_OTHER)
1504 				seq_puts(m, " other");
1505 			busy++;
1506 		}
1507 
1508 		if (flags & FLAG_BPF_ITER)
1509 			seq_puts(m, " FLAG:BPF_ITER");
1510 
1511 		seq_puts(m, "\n");
1512 		active++;
1513 	}
1514 
1515 	mutex_unlock(&reg_mutex);
1516 
1517 	seq_puts(m, "\n");
1518 	seq_printf(m, "Active: %d\n", active);
1519 	seq_printf(m, "Busy: %d\n", busy);
1520 	seq_printf(m, "Max: %ld\n", MAX_EVENTS);
1521 
1522 	return 0;
1523 }
1524 
1525 static const struct seq_operations user_seq_ops = {
1526 	.start = user_seq_start,
1527 	.next  = user_seq_next,
1528 	.stop  = user_seq_stop,
1529 	.show  = user_seq_show,
1530 };
1531 
1532 static int user_status_open(struct inode *node, struct file *file)
1533 {
1534 	return seq_open(file, &user_seq_ops);
1535 }
1536 
1537 static const struct file_operations user_status_fops = {
1538 	.open = user_status_open,
1539 	.mmap = user_status_mmap,
1540 	.read = seq_read,
1541 	.llseek  = seq_lseek,
1542 	.release = seq_release,
1543 };
1544 
1545 /*
1546  * Creates a set of tracefs files to allow user mode interactions.
1547  */
1548 static int create_user_tracefs(void)
1549 {
1550 	struct dentry *edata, *emmap;
1551 
1552 	edata = tracefs_create_file("user_events_data", TRACE_MODE_WRITE,
1553 				    NULL, NULL, &user_data_fops);
1554 
1555 	if (!edata) {
1556 		pr_warn("Could not create tracefs 'user_events_data' entry\n");
1557 		goto err;
1558 	}
1559 
1560 	/* mmap with MAP_SHARED requires writable fd */
1561 	emmap = tracefs_create_file("user_events_status", TRACE_MODE_WRITE,
1562 				    NULL, NULL, &user_status_fops);
1563 
1564 	if (!emmap) {
1565 		tracefs_remove(edata);
1566 		pr_warn("Could not create tracefs 'user_events_mmap' entry\n");
1567 		goto err;
1568 	}
1569 
1570 	return 0;
1571 err:
1572 	return -ENODEV;
1573 }
1574 
1575 static void set_page_reservations(bool set)
1576 {
1577 	int page;
1578 
1579 	for (page = 0; page < MAX_PAGES; ++page) {
1580 		void *addr = register_page_data + (PAGE_SIZE * page);
1581 
1582 		if (set)
1583 			SetPageReserved(virt_to_page(addr));
1584 		else
1585 			ClearPageReserved(virt_to_page(addr));
1586 	}
1587 }
1588 
1589 static int __init trace_events_user_init(void)
1590 {
1591 	int ret;
1592 
1593 	/* Zero all bits beside 0 (which is reserved for failures) */
1594 	bitmap_zero(page_bitmap, MAX_EVENTS);
1595 	set_bit(0, page_bitmap);
1596 
1597 	register_page_data = kzalloc(MAX_EVENTS, GFP_KERNEL);
1598 
1599 	if (!register_page_data)
1600 		return -ENOMEM;
1601 
1602 	set_page_reservations(true);
1603 
1604 	ret = create_user_tracefs();
1605 
1606 	if (ret) {
1607 		pr_warn("user_events could not register with tracefs\n");
1608 		set_page_reservations(false);
1609 		kfree(register_page_data);
1610 		return ret;
1611 	}
1612 
1613 	if (dyn_event_register(&user_event_dops))
1614 		pr_warn("user_events could not register with dyn_events\n");
1615 
1616 	return 0;
1617 }
1618 
1619 fs_initcall(trace_events_user_init);
1620