xref: /openbmc/linux/kernel/trace/trace_events_user.c (revision c9933d494c54f72290831191c09bb8488bfd5905)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2021, Microsoft Corporation.
4  *
5  * Authors:
6  *   Beau Belgrave <beaub@linux.microsoft.com>
7  */
8 
9 #include <linux/bitmap.h>
10 #include <linux/cdev.h>
11 #include <linux/hashtable.h>
12 #include <linux/list.h>
13 #include <linux/io.h>
14 #include <linux/uio.h>
15 #include <linux/ioctl.h>
16 #include <linux/jhash.h>
17 #include <linux/trace_events.h>
18 #include <linux/tracefs.h>
19 #include <linux/types.h>
20 #include <linux/uaccess.h>
21 /* Reminder to move to uapi when everything works */
22 #ifdef CONFIG_COMPILE_TEST
23 #include <linux/user_events.h>
24 #else
25 #include <uapi/linux/user_events.h>
26 #endif
27 #include "trace.h"
28 #include "trace_dynevent.h"
29 
30 #define USER_EVENTS_PREFIX_LEN (sizeof(USER_EVENTS_PREFIX)-1)
31 
32 #define FIELD_DEPTH_TYPE 0
33 #define FIELD_DEPTH_NAME 1
34 #define FIELD_DEPTH_SIZE 2
35 
36 /*
37  * Limits how many trace_event calls user processes can create:
38  * Must be a power of two of PAGE_SIZE.
39  */
40 #define MAX_PAGE_ORDER 0
41 #define MAX_PAGES (1 << MAX_PAGE_ORDER)
42 #define MAX_EVENTS (MAX_PAGES * PAGE_SIZE)
43 
44 /* Limit how long of an event name plus args within the subsystem. */
45 #define MAX_EVENT_DESC 512
46 #define EVENT_NAME(user_event) ((user_event)->tracepoint.name)
47 #define MAX_FIELD_ARRAY_SIZE 1024
48 #define MAX_FIELD_ARG_NAME 256
49 
50 static char *register_page_data;
51 
52 static DEFINE_MUTEX(reg_mutex);
53 static DEFINE_HASHTABLE(register_table, 4);
54 static DECLARE_BITMAP(page_bitmap, MAX_EVENTS);
55 
56 /*
57  * Stores per-event properties, as users register events
58  * within a file a user_event might be created if it does not
59  * already exist. These are globally used and their lifetime
60  * is tied to the refcnt member. These cannot go away until the
61  * refcnt reaches zero.
62  */
63 struct user_event {
64 	struct tracepoint tracepoint;
65 	struct trace_event_call call;
66 	struct trace_event_class class;
67 	struct dyn_event devent;
68 	struct hlist_node node;
69 	struct list_head fields;
70 	struct list_head validators;
71 	atomic_t refcnt;
72 	int index;
73 	int flags;
74 	int min_size;
75 };
76 
77 /*
78  * Stores per-file events references, as users register events
79  * within a file this structure is modified and freed via RCU.
80  * The lifetime of this struct is tied to the lifetime of the file.
81  * These are not shared and only accessible by the file that created it.
82  */
83 struct user_event_refs {
84 	struct rcu_head rcu;
85 	int count;
86 	struct user_event *events[];
87 };
88 
89 #define VALIDATOR_ENSURE_NULL (1 << 0)
90 #define VALIDATOR_REL (1 << 1)
91 
92 struct user_event_validator {
93 	struct list_head link;
94 	int offset;
95 	int flags;
96 };
97 
98 typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
99 				   void *tpdata, bool *faulted);
100 
101 static int user_event_parse(char *name, char *args, char *flags,
102 			    struct user_event **newuser);
103 
104 static u32 user_event_key(char *name)
105 {
106 	return jhash(name, strlen(name), 0);
107 }
108 
109 static __always_inline __must_check
110 size_t copy_nofault(void *addr, size_t bytes, struct iov_iter *i)
111 {
112 	size_t ret;
113 
114 	pagefault_disable();
115 
116 	ret = copy_from_iter_nocache(addr, bytes, i);
117 
118 	pagefault_enable();
119 
120 	return ret;
121 }
122 
123 static struct list_head *user_event_get_fields(struct trace_event_call *call)
124 {
125 	struct user_event *user = (struct user_event *)call->data;
126 
127 	return &user->fields;
128 }
129 
130 /*
131  * Parses a register command for user_events
132  * Format: event_name[:FLAG1[,FLAG2...]] [field1[;field2...]]
133  *
134  * Example event named 'test' with a 20 char 'msg' field with an unsigned int
135  * 'id' field after:
136  * test char[20] msg;unsigned int id
137  *
138  * NOTE: Offsets are from the user data perspective, they are not from the
139  * trace_entry/buffer perspective. We automatically add the common properties
140  * sizes to the offset for the user.
141  *
142  * Upon success user_event has its ref count increased by 1.
143  */
144 static int user_event_parse_cmd(char *raw_command, struct user_event **newuser)
145 {
146 	char *name = raw_command;
147 	char *args = strpbrk(name, " ");
148 	char *flags;
149 
150 	if (args)
151 		*args++ = '\0';
152 
153 	flags = strpbrk(name, ":");
154 
155 	if (flags)
156 		*flags++ = '\0';
157 
158 	return user_event_parse(name, args, flags, newuser);
159 }
160 
161 static int user_field_array_size(const char *type)
162 {
163 	const char *start = strchr(type, '[');
164 	char val[8];
165 	char *bracket;
166 	int size = 0;
167 
168 	if (start == NULL)
169 		return -EINVAL;
170 
171 	if (strscpy(val, start + 1, sizeof(val)) <= 0)
172 		return -EINVAL;
173 
174 	bracket = strchr(val, ']');
175 
176 	if (!bracket)
177 		return -EINVAL;
178 
179 	*bracket = '\0';
180 
181 	if (kstrtouint(val, 0, &size))
182 		return -EINVAL;
183 
184 	if (size > MAX_FIELD_ARRAY_SIZE)
185 		return -EINVAL;
186 
187 	return size;
188 }
189 
190 static int user_field_size(const char *type)
191 {
192 	/* long is not allowed from a user, since it's ambigious in size */
193 	if (strcmp(type, "s64") == 0)
194 		return sizeof(s64);
195 	if (strcmp(type, "u64") == 0)
196 		return sizeof(u64);
197 	if (strcmp(type, "s32") == 0)
198 		return sizeof(s32);
199 	if (strcmp(type, "u32") == 0)
200 		return sizeof(u32);
201 	if (strcmp(type, "int") == 0)
202 		return sizeof(int);
203 	if (strcmp(type, "unsigned int") == 0)
204 		return sizeof(unsigned int);
205 	if (strcmp(type, "s16") == 0)
206 		return sizeof(s16);
207 	if (strcmp(type, "u16") == 0)
208 		return sizeof(u16);
209 	if (strcmp(type, "short") == 0)
210 		return sizeof(short);
211 	if (strcmp(type, "unsigned short") == 0)
212 		return sizeof(unsigned short);
213 	if (strcmp(type, "s8") == 0)
214 		return sizeof(s8);
215 	if (strcmp(type, "u8") == 0)
216 		return sizeof(u8);
217 	if (strcmp(type, "char") == 0)
218 		return sizeof(char);
219 	if (strcmp(type, "unsigned char") == 0)
220 		return sizeof(unsigned char);
221 	if (str_has_prefix(type, "char["))
222 		return user_field_array_size(type);
223 	if (str_has_prefix(type, "unsigned char["))
224 		return user_field_array_size(type);
225 	if (str_has_prefix(type, "__data_loc "))
226 		return sizeof(u32);
227 	if (str_has_prefix(type, "__rel_loc "))
228 		return sizeof(u32);
229 
230 	/* Uknown basic type, error */
231 	return -EINVAL;
232 }
233 
234 static void user_event_destroy_validators(struct user_event *user)
235 {
236 	struct user_event_validator *validator, *next;
237 	struct list_head *head = &user->validators;
238 
239 	list_for_each_entry_safe(validator, next, head, link) {
240 		list_del(&validator->link);
241 		kfree(validator);
242 	}
243 }
244 
245 static void user_event_destroy_fields(struct user_event *user)
246 {
247 	struct ftrace_event_field *field, *next;
248 	struct list_head *head = &user->fields;
249 
250 	list_for_each_entry_safe(field, next, head, link) {
251 		list_del(&field->link);
252 		kfree(field);
253 	}
254 }
255 
256 static int user_event_add_field(struct user_event *user, const char *type,
257 				const char *name, int offset, int size,
258 				int is_signed, int filter_type)
259 {
260 	struct user_event_validator *validator;
261 	struct ftrace_event_field *field;
262 	int validator_flags = 0;
263 
264 	field = kmalloc(sizeof(*field), GFP_KERNEL);
265 
266 	if (!field)
267 		return -ENOMEM;
268 
269 	if (str_has_prefix(type, "__data_loc "))
270 		goto add_validator;
271 
272 	if (str_has_prefix(type, "__rel_loc ")) {
273 		validator_flags |= VALIDATOR_REL;
274 		goto add_validator;
275 	}
276 
277 	goto add_field;
278 
279 add_validator:
280 	if (strstr(type, "char") != 0)
281 		validator_flags |= VALIDATOR_ENSURE_NULL;
282 
283 	validator = kmalloc(sizeof(*validator), GFP_KERNEL);
284 
285 	if (!validator) {
286 		kfree(field);
287 		return -ENOMEM;
288 	}
289 
290 	validator->flags = validator_flags;
291 	validator->offset = offset;
292 
293 	/* Want sequential access when validating */
294 	list_add_tail(&validator->link, &user->validators);
295 
296 add_field:
297 	field->type = type;
298 	field->name = name;
299 	field->offset = offset;
300 	field->size = size;
301 	field->is_signed = is_signed;
302 	field->filter_type = filter_type;
303 
304 	list_add(&field->link, &user->fields);
305 
306 	/*
307 	 * Min size from user writes that are required, this does not include
308 	 * the size of trace_entry (common fields).
309 	 */
310 	user->min_size = (offset + size) - sizeof(struct trace_entry);
311 
312 	return 0;
313 }
314 
315 /*
316  * Parses the values of a field within the description
317  * Format: type name [size]
318  */
319 static int user_event_parse_field(char *field, struct user_event *user,
320 				  u32 *offset)
321 {
322 	char *part, *type, *name;
323 	u32 depth = 0, saved_offset = *offset;
324 	int len, size = -EINVAL;
325 	bool is_struct = false;
326 
327 	field = skip_spaces(field);
328 
329 	if (*field == '\0')
330 		return 0;
331 
332 	/* Handle types that have a space within */
333 	len = str_has_prefix(field, "unsigned ");
334 	if (len)
335 		goto skip_next;
336 
337 	len = str_has_prefix(field, "struct ");
338 	if (len) {
339 		is_struct = true;
340 		goto skip_next;
341 	}
342 
343 	len = str_has_prefix(field, "__data_loc unsigned ");
344 	if (len)
345 		goto skip_next;
346 
347 	len = str_has_prefix(field, "__data_loc ");
348 	if (len)
349 		goto skip_next;
350 
351 	len = str_has_prefix(field, "__rel_loc unsigned ");
352 	if (len)
353 		goto skip_next;
354 
355 	len = str_has_prefix(field, "__rel_loc ");
356 	if (len)
357 		goto skip_next;
358 
359 	goto parse;
360 skip_next:
361 	type = field;
362 	field = strpbrk(field + len, " ");
363 
364 	if (field == NULL)
365 		return -EINVAL;
366 
367 	*field++ = '\0';
368 	depth++;
369 parse:
370 	name = NULL;
371 
372 	while ((part = strsep(&field, " ")) != NULL) {
373 		switch (depth++) {
374 		case FIELD_DEPTH_TYPE:
375 			type = part;
376 			break;
377 		case FIELD_DEPTH_NAME:
378 			name = part;
379 			break;
380 		case FIELD_DEPTH_SIZE:
381 			if (!is_struct)
382 				return -EINVAL;
383 
384 			if (kstrtou32(part, 10, &size))
385 				return -EINVAL;
386 			break;
387 		default:
388 			return -EINVAL;
389 		}
390 	}
391 
392 	if (depth < FIELD_DEPTH_SIZE || !name)
393 		return -EINVAL;
394 
395 	if (depth == FIELD_DEPTH_SIZE)
396 		size = user_field_size(type);
397 
398 	if (size == 0)
399 		return -EINVAL;
400 
401 	if (size < 0)
402 		return size;
403 
404 	*offset = saved_offset + size;
405 
406 	return user_event_add_field(user, type, name, saved_offset, size,
407 				    type[0] != 'u', FILTER_OTHER);
408 }
409 
410 static int user_event_parse_fields(struct user_event *user, char *args)
411 {
412 	char *field;
413 	u32 offset = sizeof(struct trace_entry);
414 	int ret = -EINVAL;
415 
416 	if (args == NULL)
417 		return 0;
418 
419 	while ((field = strsep(&args, ";")) != NULL) {
420 		ret = user_event_parse_field(field, user, &offset);
421 
422 		if (ret)
423 			break;
424 	}
425 
426 	return ret;
427 }
428 
429 static struct trace_event_fields user_event_fields_array[1];
430 
431 static const char *user_field_format(const char *type)
432 {
433 	if (strcmp(type, "s64") == 0)
434 		return "%lld";
435 	if (strcmp(type, "u64") == 0)
436 		return "%llu";
437 	if (strcmp(type, "s32") == 0)
438 		return "%d";
439 	if (strcmp(type, "u32") == 0)
440 		return "%u";
441 	if (strcmp(type, "int") == 0)
442 		return "%d";
443 	if (strcmp(type, "unsigned int") == 0)
444 		return "%u";
445 	if (strcmp(type, "s16") == 0)
446 		return "%d";
447 	if (strcmp(type, "u16") == 0)
448 		return "%u";
449 	if (strcmp(type, "short") == 0)
450 		return "%d";
451 	if (strcmp(type, "unsigned short") == 0)
452 		return "%u";
453 	if (strcmp(type, "s8") == 0)
454 		return "%d";
455 	if (strcmp(type, "u8") == 0)
456 		return "%u";
457 	if (strcmp(type, "char") == 0)
458 		return "%d";
459 	if (strcmp(type, "unsigned char") == 0)
460 		return "%u";
461 	if (strstr(type, "char[") != 0)
462 		return "%s";
463 
464 	/* Unknown, likely struct, allowed treat as 64-bit */
465 	return "%llu";
466 }
467 
468 static bool user_field_is_dyn_string(const char *type, const char **str_func)
469 {
470 	if (str_has_prefix(type, "__data_loc ")) {
471 		*str_func = "__get_str";
472 		goto check;
473 	}
474 
475 	if (str_has_prefix(type, "__rel_loc ")) {
476 		*str_func = "__get_rel_str";
477 		goto check;
478 	}
479 
480 	return false;
481 check:
482 	return strstr(type, "char") != 0;
483 }
484 
485 #define LEN_OR_ZERO (len ? len - pos : 0)
486 static int user_event_set_print_fmt(struct user_event *user, char *buf, int len)
487 {
488 	struct ftrace_event_field *field, *next;
489 	struct list_head *head = &user->fields;
490 	int pos = 0, depth = 0;
491 	const char *str_func;
492 
493 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
494 
495 	list_for_each_entry_safe_reverse(field, next, head, link) {
496 		if (depth != 0)
497 			pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
498 
499 		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s",
500 				field->name, user_field_format(field->type));
501 
502 		depth++;
503 	}
504 
505 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
506 
507 	list_for_each_entry_safe_reverse(field, next, head, link) {
508 		if (user_field_is_dyn_string(field->type, &str_func))
509 			pos += snprintf(buf + pos, LEN_OR_ZERO,
510 					", %s(%s)", str_func, field->name);
511 		else
512 			pos += snprintf(buf + pos, LEN_OR_ZERO,
513 					", REC->%s", field->name);
514 	}
515 
516 	return pos + 1;
517 }
518 #undef LEN_OR_ZERO
519 
520 static int user_event_create_print_fmt(struct user_event *user)
521 {
522 	char *print_fmt;
523 	int len;
524 
525 	len = user_event_set_print_fmt(user, NULL, 0);
526 
527 	print_fmt = kmalloc(len, GFP_KERNEL);
528 
529 	if (!print_fmt)
530 		return -ENOMEM;
531 
532 	user_event_set_print_fmt(user, print_fmt, len);
533 
534 	user->call.print_fmt = print_fmt;
535 
536 	return 0;
537 }
538 
539 static enum print_line_t user_event_print_trace(struct trace_iterator *iter,
540 						int flags,
541 						struct trace_event *event)
542 {
543 	/* Unsafe to try to decode user provided print_fmt, use hex */
544 	trace_print_hex_dump_seq(&iter->seq, "", DUMP_PREFIX_OFFSET, 16,
545 				 1, iter->ent, iter->ent_size, true);
546 
547 	return trace_handle_return(&iter->seq);
548 }
549 
550 static struct trace_event_functions user_event_funcs = {
551 	.trace = user_event_print_trace,
552 };
553 
554 static int user_event_set_call_visible(struct user_event *user, bool visible)
555 {
556 	int ret;
557 	const struct cred *old_cred;
558 	struct cred *cred;
559 
560 	cred = prepare_creds();
561 
562 	if (!cred)
563 		return -ENOMEM;
564 
565 	/*
566 	 * While by default tracefs is locked down, systems can be configured
567 	 * to allow user_event files to be less locked down. The extreme case
568 	 * being "other" has read/write access to user_events_data/status.
569 	 *
570 	 * When not locked down, processes may not have have permissions to
571 	 * add/remove calls themselves to tracefs. We need to temporarily
572 	 * switch to root file permission to allow for this scenario.
573 	 */
574 	cred->fsuid = GLOBAL_ROOT_UID;
575 
576 	old_cred = override_creds(cred);
577 
578 	if (visible)
579 		ret = trace_add_event_call(&user->call);
580 	else
581 		ret = trace_remove_event_call(&user->call);
582 
583 	revert_creds(old_cred);
584 	put_cred(cred);
585 
586 	return ret;
587 }
588 
589 static int destroy_user_event(struct user_event *user)
590 {
591 	int ret = 0;
592 
593 	/* Must destroy fields before call removal */
594 	user_event_destroy_fields(user);
595 
596 	ret = user_event_set_call_visible(user, false);
597 
598 	if (ret)
599 		return ret;
600 
601 	dyn_event_remove(&user->devent);
602 
603 	register_page_data[user->index] = 0;
604 	clear_bit(user->index, page_bitmap);
605 	hash_del(&user->node);
606 
607 	user_event_destroy_validators(user);
608 	kfree(user->call.print_fmt);
609 	kfree(EVENT_NAME(user));
610 	kfree(user);
611 
612 	return ret;
613 }
614 
615 static struct user_event *find_user_event(char *name, u32 *outkey)
616 {
617 	struct user_event *user;
618 	u32 key = user_event_key(name);
619 
620 	*outkey = key;
621 
622 	hash_for_each_possible(register_table, user, node, key)
623 		if (!strcmp(EVENT_NAME(user), name)) {
624 			atomic_inc(&user->refcnt);
625 			return user;
626 		}
627 
628 	return NULL;
629 }
630 
631 static int user_event_validate(struct user_event *user, void *data, int len)
632 {
633 	struct list_head *head = &user->validators;
634 	struct user_event_validator *validator;
635 	void *pos, *end = data + len;
636 	u32 loc, offset, size;
637 
638 	list_for_each_entry(validator, head, link) {
639 		pos = data + validator->offset;
640 
641 		/* Already done min_size check, no bounds check here */
642 		loc = *(u32 *)pos;
643 		offset = loc & 0xffff;
644 		size = loc >> 16;
645 
646 		if (likely(validator->flags & VALIDATOR_REL))
647 			pos += offset + sizeof(loc);
648 		else
649 			pos = data + offset;
650 
651 		pos += size;
652 
653 		if (unlikely(pos > end))
654 			return -EFAULT;
655 
656 		if (likely(validator->flags & VALIDATOR_ENSURE_NULL))
657 			if (unlikely(*(char *)(pos - 1) != '\0'))
658 				return -EFAULT;
659 	}
660 
661 	return 0;
662 }
663 
664 /*
665  * Writes the user supplied payload out to a trace file.
666  */
667 static void user_event_ftrace(struct user_event *user, struct iov_iter *i,
668 			      void *tpdata, bool *faulted)
669 {
670 	struct trace_event_file *file;
671 	struct trace_entry *entry;
672 	struct trace_event_buffer event_buffer;
673 	size_t size = sizeof(*entry) + i->count;
674 
675 	file = (struct trace_event_file *)tpdata;
676 
677 	if (!file ||
678 	    !(file->flags & EVENT_FILE_FL_ENABLED) ||
679 	    trace_trigger_soft_disabled(file))
680 		return;
681 
682 	/* Allocates and fills trace_entry, + 1 of this is data payload */
683 	entry = trace_event_buffer_reserve(&event_buffer, file, size);
684 
685 	if (unlikely(!entry))
686 		return;
687 
688 	if (unlikely(!copy_nofault(entry + 1, i->count, i)))
689 		goto discard;
690 
691 	if (!list_empty(&user->validators) &&
692 	    unlikely(user_event_validate(user, entry, size)))
693 		goto discard;
694 
695 	trace_event_buffer_commit(&event_buffer);
696 
697 	return;
698 discard:
699 	*faulted = true;
700 	__trace_event_discard_commit(event_buffer.buffer,
701 				     event_buffer.event);
702 }
703 
704 #ifdef CONFIG_PERF_EVENTS
705 /*
706  * Writes the user supplied payload out to perf ring buffer.
707  */
708 static void user_event_perf(struct user_event *user, struct iov_iter *i,
709 			    void *tpdata, bool *faulted)
710 {
711 	struct hlist_head *perf_head;
712 
713 	perf_head = this_cpu_ptr(user->call.perf_events);
714 
715 	if (perf_head && !hlist_empty(perf_head)) {
716 		struct trace_entry *perf_entry;
717 		struct pt_regs *regs;
718 		size_t size = sizeof(*perf_entry) + i->count;
719 		int context;
720 
721 		perf_entry = perf_trace_buf_alloc(ALIGN(size, 8),
722 						  &regs, &context);
723 
724 		if (unlikely(!perf_entry))
725 			return;
726 
727 		perf_fetch_caller_regs(regs);
728 
729 		if (unlikely(!copy_nofault(perf_entry + 1, i->count, i)))
730 			goto discard;
731 
732 		if (!list_empty(&user->validators) &&
733 		    unlikely(user_event_validate(user, perf_entry, size)))
734 			goto discard;
735 
736 		perf_trace_buf_submit(perf_entry, size, context,
737 				      user->call.event.type, 1, regs,
738 				      perf_head, NULL);
739 
740 		return;
741 discard:
742 		*faulted = true;
743 		perf_swevent_put_recursion_context(context);
744 	}
745 }
746 #endif
747 
748 /*
749  * Update the register page that is shared between user processes.
750  */
751 static void update_reg_page_for(struct user_event *user)
752 {
753 	struct tracepoint *tp = &user->tracepoint;
754 	char status = 0;
755 
756 	if (atomic_read(&tp->key.enabled) > 0) {
757 		struct tracepoint_func *probe_func_ptr;
758 		user_event_func_t probe_func;
759 
760 		rcu_read_lock_sched();
761 
762 		probe_func_ptr = rcu_dereference_sched(tp->funcs);
763 
764 		if (probe_func_ptr) {
765 			do {
766 				probe_func = probe_func_ptr->func;
767 
768 				if (probe_func == user_event_ftrace)
769 					status |= EVENT_STATUS_FTRACE;
770 #ifdef CONFIG_PERF_EVENTS
771 				else if (probe_func == user_event_perf)
772 					status |= EVENT_STATUS_PERF;
773 #endif
774 				else
775 					status |= EVENT_STATUS_OTHER;
776 			} while ((++probe_func_ptr)->func);
777 		}
778 
779 		rcu_read_unlock_sched();
780 	}
781 
782 	register_page_data[user->index] = status;
783 }
784 
785 /*
786  * Register callback for our events from tracing sub-systems.
787  */
788 static int user_event_reg(struct trace_event_call *call,
789 			  enum trace_reg type,
790 			  void *data)
791 {
792 	struct user_event *user = (struct user_event *)call->data;
793 	int ret = 0;
794 
795 	if (!user)
796 		return -ENOENT;
797 
798 	switch (type) {
799 	case TRACE_REG_REGISTER:
800 		ret = tracepoint_probe_register(call->tp,
801 						call->class->probe,
802 						data);
803 		if (!ret)
804 			goto inc;
805 		break;
806 
807 	case TRACE_REG_UNREGISTER:
808 		tracepoint_probe_unregister(call->tp,
809 					    call->class->probe,
810 					    data);
811 		goto dec;
812 
813 #ifdef CONFIG_PERF_EVENTS
814 	case TRACE_REG_PERF_REGISTER:
815 		ret = tracepoint_probe_register(call->tp,
816 						call->class->perf_probe,
817 						data);
818 		if (!ret)
819 			goto inc;
820 		break;
821 
822 	case TRACE_REG_PERF_UNREGISTER:
823 		tracepoint_probe_unregister(call->tp,
824 					    call->class->perf_probe,
825 					    data);
826 		goto dec;
827 
828 	case TRACE_REG_PERF_OPEN:
829 	case TRACE_REG_PERF_CLOSE:
830 	case TRACE_REG_PERF_ADD:
831 	case TRACE_REG_PERF_DEL:
832 		break;
833 #endif
834 	}
835 
836 	return ret;
837 inc:
838 	atomic_inc(&user->refcnt);
839 	update_reg_page_for(user);
840 	return 0;
841 dec:
842 	update_reg_page_for(user);
843 	atomic_dec(&user->refcnt);
844 	return 0;
845 }
846 
847 static int user_event_create(const char *raw_command)
848 {
849 	struct user_event *user;
850 	char *name;
851 	int ret;
852 
853 	if (!str_has_prefix(raw_command, USER_EVENTS_PREFIX))
854 		return -ECANCELED;
855 
856 	raw_command += USER_EVENTS_PREFIX_LEN;
857 	raw_command = skip_spaces(raw_command);
858 
859 	name = kstrdup(raw_command, GFP_KERNEL);
860 
861 	if (!name)
862 		return -ENOMEM;
863 
864 	mutex_lock(&reg_mutex);
865 
866 	ret = user_event_parse_cmd(name, &user);
867 
868 	if (!ret)
869 		atomic_dec(&user->refcnt);
870 
871 	mutex_unlock(&reg_mutex);
872 
873 	if (ret)
874 		kfree(name);
875 
876 	return ret;
877 }
878 
879 static int user_event_show(struct seq_file *m, struct dyn_event *ev)
880 {
881 	struct user_event *user = container_of(ev, struct user_event, devent);
882 	struct ftrace_event_field *field, *next;
883 	struct list_head *head;
884 	int depth = 0;
885 
886 	seq_printf(m, "%s%s", USER_EVENTS_PREFIX, EVENT_NAME(user));
887 
888 	head = trace_get_fields(&user->call);
889 
890 	list_for_each_entry_safe_reverse(field, next, head, link) {
891 		if (depth == 0)
892 			seq_puts(m, " ");
893 		else
894 			seq_puts(m, "; ");
895 
896 		seq_printf(m, "%s %s", field->type, field->name);
897 
898 		if (str_has_prefix(field->type, "struct "))
899 			seq_printf(m, " %d", field->size);
900 
901 		depth++;
902 	}
903 
904 	seq_puts(m, "\n");
905 
906 	return 0;
907 }
908 
909 static bool user_event_is_busy(struct dyn_event *ev)
910 {
911 	struct user_event *user = container_of(ev, struct user_event, devent);
912 
913 	return atomic_read(&user->refcnt) != 0;
914 }
915 
916 static int user_event_free(struct dyn_event *ev)
917 {
918 	struct user_event *user = container_of(ev, struct user_event, devent);
919 
920 	if (atomic_read(&user->refcnt) != 0)
921 		return -EBUSY;
922 
923 	return destroy_user_event(user);
924 }
925 
926 static bool user_field_match(struct ftrace_event_field *field, int argc,
927 			     const char **argv, int *iout)
928 {
929 	char *field_name, *arg_name;
930 	int len, pos, i = *iout;
931 	bool colon = false, match = false;
932 
933 	if (i >= argc)
934 		return false;
935 
936 	len = MAX_FIELD_ARG_NAME;
937 	field_name = kmalloc(len, GFP_KERNEL);
938 	arg_name = kmalloc(len, GFP_KERNEL);
939 
940 	if (!arg_name || !field_name)
941 		goto out;
942 
943 	pos = 0;
944 
945 	for (; i < argc; ++i) {
946 		if (i != *iout)
947 			pos += snprintf(arg_name + pos, len - pos, " ");
948 
949 		pos += snprintf(arg_name + pos, len - pos, argv[i]);
950 
951 		if (strchr(argv[i], ';')) {
952 			++i;
953 			colon = true;
954 			break;
955 		}
956 	}
957 
958 	pos = 0;
959 
960 	pos += snprintf(field_name + pos, len - pos, field->type);
961 	pos += snprintf(field_name + pos, len - pos, " ");
962 	pos += snprintf(field_name + pos, len - pos, field->name);
963 
964 	if (colon)
965 		pos += snprintf(field_name + pos, len - pos, ";");
966 
967 	*iout = i;
968 
969 	match = strcmp(arg_name, field_name) == 0;
970 out:
971 	kfree(arg_name);
972 	kfree(field_name);
973 
974 	return match;
975 }
976 
977 static bool user_fields_match(struct user_event *user, int argc,
978 			      const char **argv)
979 {
980 	struct ftrace_event_field *field, *next;
981 	struct list_head *head = &user->fields;
982 	int i = 0;
983 
984 	list_for_each_entry_safe_reverse(field, next, head, link)
985 		if (!user_field_match(field, argc, argv, &i))
986 			return false;
987 
988 	if (i != argc)
989 		return false;
990 
991 	return true;
992 }
993 
994 static bool user_event_match(const char *system, const char *event,
995 			     int argc, const char **argv, struct dyn_event *ev)
996 {
997 	struct user_event *user = container_of(ev, struct user_event, devent);
998 	bool match;
999 
1000 	match = strcmp(EVENT_NAME(user), event) == 0 &&
1001 		(!system || strcmp(system, USER_EVENTS_SYSTEM) == 0);
1002 
1003 	if (match && argc > 0)
1004 		match = user_fields_match(user, argc, argv);
1005 
1006 	return match;
1007 }
1008 
1009 static struct dyn_event_operations user_event_dops = {
1010 	.create = user_event_create,
1011 	.show = user_event_show,
1012 	.is_busy = user_event_is_busy,
1013 	.free = user_event_free,
1014 	.match = user_event_match,
1015 };
1016 
1017 static int user_event_trace_register(struct user_event *user)
1018 {
1019 	int ret;
1020 
1021 	ret = register_trace_event(&user->call.event);
1022 
1023 	if (!ret)
1024 		return -ENODEV;
1025 
1026 	ret = user_event_set_call_visible(user, true);
1027 
1028 	if (ret)
1029 		unregister_trace_event(&user->call.event);
1030 
1031 	return ret;
1032 }
1033 
1034 /*
1035  * Parses the event name, arguments and flags then registers if successful.
1036  * The name buffer lifetime is owned by this method for success cases only.
1037  * Upon success the returned user_event has its ref count increased by 1.
1038  */
1039 static int user_event_parse(char *name, char *args, char *flags,
1040 			    struct user_event **newuser)
1041 {
1042 	int ret;
1043 	int index;
1044 	u32 key;
1045 	struct user_event *user;
1046 
1047 	/* Prevent dyn_event from racing */
1048 	mutex_lock(&event_mutex);
1049 	user = find_user_event(name, &key);
1050 	mutex_unlock(&event_mutex);
1051 
1052 	if (user) {
1053 		*newuser = user;
1054 		/*
1055 		 * Name is allocated by caller, free it since it already exists.
1056 		 * Caller only worries about failure cases for freeing.
1057 		 */
1058 		kfree(name);
1059 		return 0;
1060 	}
1061 
1062 	index = find_first_zero_bit(page_bitmap, MAX_EVENTS);
1063 
1064 	if (index == MAX_EVENTS)
1065 		return -EMFILE;
1066 
1067 	user = kzalloc(sizeof(*user), GFP_KERNEL);
1068 
1069 	if (!user)
1070 		return -ENOMEM;
1071 
1072 	INIT_LIST_HEAD(&user->class.fields);
1073 	INIT_LIST_HEAD(&user->fields);
1074 	INIT_LIST_HEAD(&user->validators);
1075 
1076 	user->tracepoint.name = name;
1077 
1078 	ret = user_event_parse_fields(user, args);
1079 
1080 	if (ret)
1081 		goto put_user;
1082 
1083 	ret = user_event_create_print_fmt(user);
1084 
1085 	if (ret)
1086 		goto put_user;
1087 
1088 	user->call.data = user;
1089 	user->call.class = &user->class;
1090 	user->call.name = name;
1091 	user->call.flags = TRACE_EVENT_FL_TRACEPOINT;
1092 	user->call.tp = &user->tracepoint;
1093 	user->call.event.funcs = &user_event_funcs;
1094 
1095 	user->class.system = USER_EVENTS_SYSTEM;
1096 	user->class.fields_array = user_event_fields_array;
1097 	user->class.get_fields = user_event_get_fields;
1098 	user->class.reg = user_event_reg;
1099 	user->class.probe = user_event_ftrace;
1100 #ifdef CONFIG_PERF_EVENTS
1101 	user->class.perf_probe = user_event_perf;
1102 #endif
1103 
1104 	mutex_lock(&event_mutex);
1105 
1106 	ret = user_event_trace_register(user);
1107 
1108 	if (ret)
1109 		goto put_user_lock;
1110 
1111 	user->index = index;
1112 
1113 	/* Ensure we track ref */
1114 	atomic_inc(&user->refcnt);
1115 
1116 	dyn_event_init(&user->devent, &user_event_dops);
1117 	dyn_event_add(&user->devent, &user->call);
1118 	set_bit(user->index, page_bitmap);
1119 	hash_add(register_table, &user->node, key);
1120 
1121 	mutex_unlock(&event_mutex);
1122 
1123 	*newuser = user;
1124 	return 0;
1125 put_user_lock:
1126 	mutex_unlock(&event_mutex);
1127 put_user:
1128 	user_event_destroy_fields(user);
1129 	user_event_destroy_validators(user);
1130 	kfree(user);
1131 	return ret;
1132 }
1133 
1134 /*
1135  * Deletes a previously created event if it is no longer being used.
1136  */
1137 static int delete_user_event(char *name)
1138 {
1139 	u32 key;
1140 	int ret;
1141 	struct user_event *user = find_user_event(name, &key);
1142 
1143 	if (!user)
1144 		return -ENOENT;
1145 
1146 	/* Ensure we are the last ref */
1147 	if (atomic_read(&user->refcnt) != 1) {
1148 		ret = -EBUSY;
1149 		goto put_ref;
1150 	}
1151 
1152 	ret = destroy_user_event(user);
1153 
1154 	if (ret)
1155 		goto put_ref;
1156 
1157 	return ret;
1158 put_ref:
1159 	/* No longer have this ref */
1160 	atomic_dec(&user->refcnt);
1161 
1162 	return ret;
1163 }
1164 
1165 /*
1166  * Validates the user payload and writes via iterator.
1167  */
1168 static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
1169 {
1170 	struct user_event_refs *refs;
1171 	struct user_event *user = NULL;
1172 	struct tracepoint *tp;
1173 	ssize_t ret = i->count;
1174 	int idx;
1175 
1176 	if (unlikely(copy_from_iter(&idx, sizeof(idx), i) != sizeof(idx)))
1177 		return -EFAULT;
1178 
1179 	rcu_read_lock_sched();
1180 
1181 	refs = rcu_dereference_sched(file->private_data);
1182 
1183 	/*
1184 	 * The refs->events array is protected by RCU, and new items may be
1185 	 * added. But the user retrieved from indexing into the events array
1186 	 * shall be immutable while the file is opened.
1187 	 */
1188 	if (likely(refs && idx < refs->count))
1189 		user = refs->events[idx];
1190 
1191 	rcu_read_unlock_sched();
1192 
1193 	if (unlikely(user == NULL))
1194 		return -ENOENT;
1195 
1196 	if (unlikely(i->count < user->min_size))
1197 		return -EINVAL;
1198 
1199 	tp = &user->tracepoint;
1200 
1201 	/*
1202 	 * It's possible key.enabled disables after this check, however
1203 	 * we don't mind if a few events are included in this condition.
1204 	 */
1205 	if (likely(atomic_read(&tp->key.enabled) > 0)) {
1206 		struct tracepoint_func *probe_func_ptr;
1207 		user_event_func_t probe_func;
1208 		struct iov_iter copy;
1209 		void *tpdata;
1210 		bool faulted;
1211 
1212 		if (unlikely(fault_in_iov_iter_readable(i, i->count)))
1213 			return -EFAULT;
1214 
1215 		faulted = false;
1216 
1217 		rcu_read_lock_sched();
1218 
1219 		probe_func_ptr = rcu_dereference_sched(tp->funcs);
1220 
1221 		if (probe_func_ptr) {
1222 			do {
1223 				copy = *i;
1224 				probe_func = probe_func_ptr->func;
1225 				tpdata = probe_func_ptr->data;
1226 				probe_func(user, &copy, tpdata, &faulted);
1227 			} while ((++probe_func_ptr)->func);
1228 		}
1229 
1230 		rcu_read_unlock_sched();
1231 
1232 		if (unlikely(faulted))
1233 			return -EFAULT;
1234 	}
1235 
1236 	return ret;
1237 }
1238 
1239 static ssize_t user_events_write(struct file *file, const char __user *ubuf,
1240 				 size_t count, loff_t *ppos)
1241 {
1242 	struct iovec iov;
1243 	struct iov_iter i;
1244 
1245 	if (unlikely(*ppos != 0))
1246 		return -EFAULT;
1247 
1248 	if (unlikely(import_single_range(READ, (char *)ubuf, count, &iov, &i)))
1249 		return -EFAULT;
1250 
1251 	return user_events_write_core(file, &i);
1252 }
1253 
1254 static ssize_t user_events_write_iter(struct kiocb *kp, struct iov_iter *i)
1255 {
1256 	return user_events_write_core(kp->ki_filp, i);
1257 }
1258 
1259 static int user_events_ref_add(struct file *file, struct user_event *user)
1260 {
1261 	struct user_event_refs *refs, *new_refs;
1262 	int i, size, count = 0;
1263 
1264 	refs = rcu_dereference_protected(file->private_data,
1265 					 lockdep_is_held(&reg_mutex));
1266 
1267 	if (refs) {
1268 		count = refs->count;
1269 
1270 		for (i = 0; i < count; ++i)
1271 			if (refs->events[i] == user)
1272 				return i;
1273 	}
1274 
1275 	size = struct_size(refs, events, count + 1);
1276 
1277 	new_refs = kzalloc(size, GFP_KERNEL);
1278 
1279 	if (!new_refs)
1280 		return -ENOMEM;
1281 
1282 	new_refs->count = count + 1;
1283 
1284 	for (i = 0; i < count; ++i)
1285 		new_refs->events[i] = refs->events[i];
1286 
1287 	new_refs->events[i] = user;
1288 
1289 	atomic_inc(&user->refcnt);
1290 
1291 	rcu_assign_pointer(file->private_data, new_refs);
1292 
1293 	if (refs)
1294 		kfree_rcu(refs, rcu);
1295 
1296 	return i;
1297 }
1298 
1299 static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg)
1300 {
1301 	u32 size;
1302 	long ret;
1303 
1304 	ret = get_user(size, &ureg->size);
1305 
1306 	if (ret)
1307 		return ret;
1308 
1309 	if (size > PAGE_SIZE)
1310 		return -E2BIG;
1311 
1312 	return copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
1313 }
1314 
1315 /*
1316  * Registers a user_event on behalf of a user process.
1317  */
1318 static long user_events_ioctl_reg(struct file *file, unsigned long uarg)
1319 {
1320 	struct user_reg __user *ureg = (struct user_reg __user *)uarg;
1321 	struct user_reg reg;
1322 	struct user_event *user;
1323 	char *name;
1324 	long ret;
1325 
1326 	ret = user_reg_get(ureg, &reg);
1327 
1328 	if (ret)
1329 		return ret;
1330 
1331 	name = strndup_user((const char __user *)(uintptr_t)reg.name_args,
1332 			    MAX_EVENT_DESC);
1333 
1334 	if (IS_ERR(name)) {
1335 		ret = PTR_ERR(name);
1336 		return ret;
1337 	}
1338 
1339 	ret = user_event_parse_cmd(name, &user);
1340 
1341 	if (ret) {
1342 		kfree(name);
1343 		return ret;
1344 	}
1345 
1346 	ret = user_events_ref_add(file, user);
1347 
1348 	/* No longer need parse ref, ref_add either worked or not */
1349 	atomic_dec(&user->refcnt);
1350 
1351 	/* Positive number is index and valid */
1352 	if (ret < 0)
1353 		return ret;
1354 
1355 	put_user((u32)ret, &ureg->write_index);
1356 	put_user(user->index, &ureg->status_index);
1357 
1358 	return 0;
1359 }
1360 
1361 /*
1362  * Deletes a user_event on behalf of a user process.
1363  */
1364 static long user_events_ioctl_del(struct file *file, unsigned long uarg)
1365 {
1366 	void __user *ubuf = (void __user *)uarg;
1367 	char *name;
1368 	long ret;
1369 
1370 	name = strndup_user(ubuf, MAX_EVENT_DESC);
1371 
1372 	if (IS_ERR(name))
1373 		return PTR_ERR(name);
1374 
1375 	/* event_mutex prevents dyn_event from racing */
1376 	mutex_lock(&event_mutex);
1377 	ret = delete_user_event(name);
1378 	mutex_unlock(&event_mutex);
1379 
1380 	kfree(name);
1381 
1382 	return ret;
1383 }
1384 
1385 /*
1386  * Handles the ioctl from user mode to register or alter operations.
1387  */
1388 static long user_events_ioctl(struct file *file, unsigned int cmd,
1389 			      unsigned long uarg)
1390 {
1391 	long ret = -ENOTTY;
1392 
1393 	switch (cmd) {
1394 	case DIAG_IOCSREG:
1395 		mutex_lock(&reg_mutex);
1396 		ret = user_events_ioctl_reg(file, uarg);
1397 		mutex_unlock(&reg_mutex);
1398 		break;
1399 
1400 	case DIAG_IOCSDEL:
1401 		mutex_lock(&reg_mutex);
1402 		ret = user_events_ioctl_del(file, uarg);
1403 		mutex_unlock(&reg_mutex);
1404 		break;
1405 	}
1406 
1407 	return ret;
1408 }
1409 
1410 /*
1411  * Handles the final close of the file from user mode.
1412  */
1413 static int user_events_release(struct inode *node, struct file *file)
1414 {
1415 	struct user_event_refs *refs;
1416 	struct user_event *user;
1417 	int i;
1418 
1419 	/*
1420 	 * Ensure refs cannot change under any situation by taking the
1421 	 * register mutex during the final freeing of the references.
1422 	 */
1423 	mutex_lock(&reg_mutex);
1424 
1425 	refs = file->private_data;
1426 
1427 	if (!refs)
1428 		goto out;
1429 
1430 	/*
1431 	 * The lifetime of refs has reached an end, it's tied to this file.
1432 	 * The underlying user_events are ref counted, and cannot be freed.
1433 	 * After this decrement, the user_events may be freed elsewhere.
1434 	 */
1435 	for (i = 0; i < refs->count; ++i) {
1436 		user = refs->events[i];
1437 
1438 		if (user)
1439 			atomic_dec(&user->refcnt);
1440 	}
1441 out:
1442 	file->private_data = NULL;
1443 
1444 	mutex_unlock(&reg_mutex);
1445 
1446 	kfree(refs);
1447 
1448 	return 0;
1449 }
1450 
1451 static const struct file_operations user_data_fops = {
1452 	.write = user_events_write,
1453 	.write_iter = user_events_write_iter,
1454 	.unlocked_ioctl	= user_events_ioctl,
1455 	.release = user_events_release,
1456 };
1457 
1458 /*
1459  * Maps the shared page into the user process for checking if event is enabled.
1460  */
1461 static int user_status_mmap(struct file *file, struct vm_area_struct *vma)
1462 {
1463 	unsigned long size = vma->vm_end - vma->vm_start;
1464 
1465 	if (size != MAX_EVENTS)
1466 		return -EINVAL;
1467 
1468 	return remap_pfn_range(vma, vma->vm_start,
1469 			       virt_to_phys(register_page_data) >> PAGE_SHIFT,
1470 			       size, vm_get_page_prot(VM_READ));
1471 }
1472 
1473 static void *user_seq_start(struct seq_file *m, loff_t *pos)
1474 {
1475 	if (*pos)
1476 		return NULL;
1477 
1478 	return (void *)1;
1479 }
1480 
1481 static void *user_seq_next(struct seq_file *m, void *p, loff_t *pos)
1482 {
1483 	++*pos;
1484 	return NULL;
1485 }
1486 
1487 static void user_seq_stop(struct seq_file *m, void *p)
1488 {
1489 }
1490 
1491 static int user_seq_show(struct seq_file *m, void *p)
1492 {
1493 	struct user_event *user;
1494 	char status;
1495 	int i, active = 0, busy = 0, flags;
1496 
1497 	mutex_lock(&reg_mutex);
1498 
1499 	hash_for_each(register_table, i, user, node) {
1500 		status = register_page_data[user->index];
1501 		flags = user->flags;
1502 
1503 		seq_printf(m, "%d:%s", user->index, EVENT_NAME(user));
1504 
1505 		if (flags != 0 || status != 0)
1506 			seq_puts(m, " #");
1507 
1508 		if (status != 0) {
1509 			seq_puts(m, " Used by");
1510 			if (status & EVENT_STATUS_FTRACE)
1511 				seq_puts(m, " ftrace");
1512 			if (status & EVENT_STATUS_PERF)
1513 				seq_puts(m, " perf");
1514 			if (status & EVENT_STATUS_OTHER)
1515 				seq_puts(m, " other");
1516 			busy++;
1517 		}
1518 
1519 		seq_puts(m, "\n");
1520 		active++;
1521 	}
1522 
1523 	mutex_unlock(&reg_mutex);
1524 
1525 	seq_puts(m, "\n");
1526 	seq_printf(m, "Active: %d\n", active);
1527 	seq_printf(m, "Busy: %d\n", busy);
1528 	seq_printf(m, "Max: %ld\n", MAX_EVENTS);
1529 
1530 	return 0;
1531 }
1532 
1533 static const struct seq_operations user_seq_ops = {
1534 	.start = user_seq_start,
1535 	.next  = user_seq_next,
1536 	.stop  = user_seq_stop,
1537 	.show  = user_seq_show,
1538 };
1539 
1540 static int user_status_open(struct inode *node, struct file *file)
1541 {
1542 	return seq_open(file, &user_seq_ops);
1543 }
1544 
1545 static const struct file_operations user_status_fops = {
1546 	.open = user_status_open,
1547 	.mmap = user_status_mmap,
1548 	.read = seq_read,
1549 	.llseek  = seq_lseek,
1550 	.release = seq_release,
1551 };
1552 
1553 /*
1554  * Creates a set of tracefs files to allow user mode interactions.
1555  */
1556 static int create_user_tracefs(void)
1557 {
1558 	struct dentry *edata, *emmap;
1559 
1560 	edata = tracefs_create_file("user_events_data", TRACE_MODE_WRITE,
1561 				    NULL, NULL, &user_data_fops);
1562 
1563 	if (!edata) {
1564 		pr_warn("Could not create tracefs 'user_events_data' entry\n");
1565 		goto err;
1566 	}
1567 
1568 	/* mmap with MAP_SHARED requires writable fd */
1569 	emmap = tracefs_create_file("user_events_status", TRACE_MODE_WRITE,
1570 				    NULL, NULL, &user_status_fops);
1571 
1572 	if (!emmap) {
1573 		tracefs_remove(edata);
1574 		pr_warn("Could not create tracefs 'user_events_mmap' entry\n");
1575 		goto err;
1576 	}
1577 
1578 	return 0;
1579 err:
1580 	return -ENODEV;
1581 }
1582 
1583 static void set_page_reservations(bool set)
1584 {
1585 	int page;
1586 
1587 	for (page = 0; page < MAX_PAGES; ++page) {
1588 		void *addr = register_page_data + (PAGE_SIZE * page);
1589 
1590 		if (set)
1591 			SetPageReserved(virt_to_page(addr));
1592 		else
1593 			ClearPageReserved(virt_to_page(addr));
1594 	}
1595 }
1596 
1597 static int __init trace_events_user_init(void)
1598 {
1599 	struct page *pages;
1600 	int ret;
1601 
1602 	/* Zero all bits beside 0 (which is reserved for failures) */
1603 	bitmap_zero(page_bitmap, MAX_EVENTS);
1604 	set_bit(0, page_bitmap);
1605 
1606 	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PAGE_ORDER);
1607 	if (!pages)
1608 		return -ENOMEM;
1609 	register_page_data = page_address(pages);
1610 
1611 	set_page_reservations(true);
1612 
1613 	ret = create_user_tracefs();
1614 
1615 	if (ret) {
1616 		pr_warn("user_events could not register with tracefs\n");
1617 		set_page_reservations(false);
1618 		__free_pages(pages, MAX_PAGE_ORDER);
1619 		return ret;
1620 	}
1621 
1622 	if (dyn_event_register(&user_event_dops))
1623 		pr_warn("user_events could not register with dyn_events\n");
1624 
1625 	return 0;
1626 }
1627 
1628 fs_initcall(trace_events_user_init);
1629