xref: /openbmc/linux/kernel/trace/trace_events_user.c (revision 7e348b325bc40eb52aead4d57a1f90d33ea834fc)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2021, Microsoft Corporation.
4  *
5  * Authors:
6  *   Beau Belgrave <beaub@linux.microsoft.com>
7  */
8 
9 #include <linux/bitmap.h>
10 #include <linux/cdev.h>
11 #include <linux/hashtable.h>
12 #include <linux/list.h>
13 #include <linux/io.h>
14 #include <linux/uio.h>
15 #include <linux/ioctl.h>
16 #include <linux/jhash.h>
17 #include <linux/trace_events.h>
18 #include <linux/tracefs.h>
19 #include <linux/types.h>
20 #include <linux/uaccess.h>
21 #include <uapi/linux/user_events.h>
22 #include "trace.h"
23 #include "trace_dynevent.h"
24 
25 #define USER_EVENTS_PREFIX_LEN (sizeof(USER_EVENTS_PREFIX)-1)
26 
27 #define FIELD_DEPTH_TYPE 0
28 #define FIELD_DEPTH_NAME 1
29 #define FIELD_DEPTH_SIZE 2
30 
31 /*
32  * Limits how many trace_event calls user processes can create:
33  * Must be multiple of PAGE_SIZE.
34  */
35 #define MAX_PAGES 1
36 #define MAX_EVENTS (MAX_PAGES * PAGE_SIZE)
37 
38 /* Limit how long of an event name plus args within the subsystem. */
39 #define MAX_EVENT_DESC 512
40 #define EVENT_NAME(user_event) ((user_event)->tracepoint.name)
41 #define MAX_FIELD_ARRAY_SIZE 1024
42 #define MAX_FIELD_ARG_NAME 256
43 
44 #define MAX_BPF_COPY_SIZE PAGE_SIZE
45 #define MAX_STACK_BPF_DATA 512
46 
47 static char *register_page_data;
48 
49 static DEFINE_MUTEX(reg_mutex);
50 static DEFINE_HASHTABLE(register_table, 4);
51 static DECLARE_BITMAP(page_bitmap, MAX_EVENTS);
52 
53 /*
54  * Stores per-event properties, as users register events
55  * within a file a user_event might be created if it does not
56  * already exist. These are globally used and their lifetime
57  * is tied to the refcnt member. These cannot go away until the
58  * refcnt reaches zero.
59  */
60 struct user_event {
61 	struct tracepoint tracepoint;
62 	struct trace_event_call call;
63 	struct trace_event_class class;
64 	struct dyn_event devent;
65 	struct hlist_node node;
66 	struct list_head fields;
67 	struct list_head validators;
68 	atomic_t refcnt;
69 	int index;
70 	int flags;
71 	int min_size;
72 };
73 
74 /*
75  * Stores per-file events references, as users register events
76  * within a file this structure is modified and freed via RCU.
77  * The lifetime of this struct is tied to the lifetime of the file.
78  * These are not shared and only accessible by the file that created it.
79  */
80 struct user_event_refs {
81 	struct rcu_head rcu;
82 	int count;
83 	struct user_event *events[];
84 };
85 
86 #define VALIDATOR_ENSURE_NULL (1 << 0)
87 #define VALIDATOR_REL (1 << 1)
88 
89 struct user_event_validator {
90 	struct list_head link;
91 	int offset;
92 	int flags;
93 };
94 
95 typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
96 				   void *tpdata, bool *faulted);
97 
98 static int user_event_parse(char *name, char *args, char *flags,
99 			    struct user_event **newuser);
100 
101 static u32 user_event_key(char *name)
102 {
103 	return jhash(name, strlen(name), 0);
104 }
105 
106 static __always_inline __must_check
107 size_t copy_nofault(void *addr, size_t bytes, struct iov_iter *i)
108 {
109 	size_t ret;
110 
111 	pagefault_disable();
112 
113 	ret = copy_from_iter_nocache(addr, bytes, i);
114 
115 	pagefault_enable();
116 
117 	return ret;
118 }
119 
120 static struct list_head *user_event_get_fields(struct trace_event_call *call)
121 {
122 	struct user_event *user = (struct user_event *)call->data;
123 
124 	return &user->fields;
125 }
126 
127 /*
128  * Parses a register command for user_events
129  * Format: event_name[:FLAG1[,FLAG2...]] [field1[;field2...]]
130  *
131  * Example event named 'test' with a 20 char 'msg' field with an unsigned int
132  * 'id' field after:
133  * test char[20] msg;unsigned int id
134  *
135  * NOTE: Offsets are from the user data perspective, they are not from the
136  * trace_entry/buffer perspective. We automatically add the common properties
137  * sizes to the offset for the user.
138  *
139  * Upon success user_event has its ref count increased by 1.
140  */
141 static int user_event_parse_cmd(char *raw_command, struct user_event **newuser)
142 {
143 	char *name = raw_command;
144 	char *args = strpbrk(name, " ");
145 	char *flags;
146 
147 	if (args)
148 		*args++ = '\0';
149 
150 	flags = strpbrk(name, ":");
151 
152 	if (flags)
153 		*flags++ = '\0';
154 
155 	return user_event_parse(name, args, flags, newuser);
156 }
157 
158 static int user_field_array_size(const char *type)
159 {
160 	const char *start = strchr(type, '[');
161 	char val[8];
162 	char *bracket;
163 	int size = 0;
164 
165 	if (start == NULL)
166 		return -EINVAL;
167 
168 	if (strscpy(val, start + 1, sizeof(val)) <= 0)
169 		return -EINVAL;
170 
171 	bracket = strchr(val, ']');
172 
173 	if (!bracket)
174 		return -EINVAL;
175 
176 	*bracket = '\0';
177 
178 	if (kstrtouint(val, 0, &size))
179 		return -EINVAL;
180 
181 	if (size > MAX_FIELD_ARRAY_SIZE)
182 		return -EINVAL;
183 
184 	return size;
185 }
186 
187 static int user_field_size(const char *type)
188 {
189 	/* long is not allowed from a user, since it's ambigious in size */
190 	if (strcmp(type, "s64") == 0)
191 		return sizeof(s64);
192 	if (strcmp(type, "u64") == 0)
193 		return sizeof(u64);
194 	if (strcmp(type, "s32") == 0)
195 		return sizeof(s32);
196 	if (strcmp(type, "u32") == 0)
197 		return sizeof(u32);
198 	if (strcmp(type, "int") == 0)
199 		return sizeof(int);
200 	if (strcmp(type, "unsigned int") == 0)
201 		return sizeof(unsigned int);
202 	if (strcmp(type, "s16") == 0)
203 		return sizeof(s16);
204 	if (strcmp(type, "u16") == 0)
205 		return sizeof(u16);
206 	if (strcmp(type, "short") == 0)
207 		return sizeof(short);
208 	if (strcmp(type, "unsigned short") == 0)
209 		return sizeof(unsigned short);
210 	if (strcmp(type, "s8") == 0)
211 		return sizeof(s8);
212 	if (strcmp(type, "u8") == 0)
213 		return sizeof(u8);
214 	if (strcmp(type, "char") == 0)
215 		return sizeof(char);
216 	if (strcmp(type, "unsigned char") == 0)
217 		return sizeof(unsigned char);
218 	if (str_has_prefix(type, "char["))
219 		return user_field_array_size(type);
220 	if (str_has_prefix(type, "unsigned char["))
221 		return user_field_array_size(type);
222 	if (str_has_prefix(type, "__data_loc "))
223 		return sizeof(u32);
224 	if (str_has_prefix(type, "__rel_loc "))
225 		return sizeof(u32);
226 
227 	/* Uknown basic type, error */
228 	return -EINVAL;
229 }
230 
231 static void user_event_destroy_validators(struct user_event *user)
232 {
233 	struct user_event_validator *validator, *next;
234 	struct list_head *head = &user->validators;
235 
236 	list_for_each_entry_safe(validator, next, head, link) {
237 		list_del(&validator->link);
238 		kfree(validator);
239 	}
240 }
241 
242 static void user_event_destroy_fields(struct user_event *user)
243 {
244 	struct ftrace_event_field *field, *next;
245 	struct list_head *head = &user->fields;
246 
247 	list_for_each_entry_safe(field, next, head, link) {
248 		list_del(&field->link);
249 		kfree(field);
250 	}
251 }
252 
253 static int user_event_add_field(struct user_event *user, const char *type,
254 				const char *name, int offset, int size,
255 				int is_signed, int filter_type)
256 {
257 	struct user_event_validator *validator;
258 	struct ftrace_event_field *field;
259 	int validator_flags = 0;
260 
261 	field = kmalloc(sizeof(*field), GFP_KERNEL);
262 
263 	if (!field)
264 		return -ENOMEM;
265 
266 	if (str_has_prefix(type, "__data_loc "))
267 		goto add_validator;
268 
269 	if (str_has_prefix(type, "__rel_loc ")) {
270 		validator_flags |= VALIDATOR_REL;
271 		goto add_validator;
272 	}
273 
274 	goto add_field;
275 
276 add_validator:
277 	if (strstr(type, "char") != 0)
278 		validator_flags |= VALIDATOR_ENSURE_NULL;
279 
280 	validator = kmalloc(sizeof(*validator), GFP_KERNEL);
281 
282 	if (!validator) {
283 		kfree(field);
284 		return -ENOMEM;
285 	}
286 
287 	validator->flags = validator_flags;
288 	validator->offset = offset;
289 
290 	/* Want sequential access when validating */
291 	list_add_tail(&validator->link, &user->validators);
292 
293 add_field:
294 	field->type = type;
295 	field->name = name;
296 	field->offset = offset;
297 	field->size = size;
298 	field->is_signed = is_signed;
299 	field->filter_type = filter_type;
300 
301 	list_add(&field->link, &user->fields);
302 
303 	/*
304 	 * Min size from user writes that are required, this does not include
305 	 * the size of trace_entry (common fields).
306 	 */
307 	user->min_size = (offset + size) - sizeof(struct trace_entry);
308 
309 	return 0;
310 }
311 
312 /*
313  * Parses the values of a field within the description
314  * Format: type name [size]
315  */
316 static int user_event_parse_field(char *field, struct user_event *user,
317 				  u32 *offset)
318 {
319 	char *part, *type, *name;
320 	u32 depth = 0, saved_offset = *offset;
321 	int len, size = -EINVAL;
322 	bool is_struct = false;
323 
324 	field = skip_spaces(field);
325 
326 	if (*field == '\0')
327 		return 0;
328 
329 	/* Handle types that have a space within */
330 	len = str_has_prefix(field, "unsigned ");
331 	if (len)
332 		goto skip_next;
333 
334 	len = str_has_prefix(field, "struct ");
335 	if (len) {
336 		is_struct = true;
337 		goto skip_next;
338 	}
339 
340 	len = str_has_prefix(field, "__data_loc unsigned ");
341 	if (len)
342 		goto skip_next;
343 
344 	len = str_has_prefix(field, "__data_loc ");
345 	if (len)
346 		goto skip_next;
347 
348 	len = str_has_prefix(field, "__rel_loc unsigned ");
349 	if (len)
350 		goto skip_next;
351 
352 	len = str_has_prefix(field, "__rel_loc ");
353 	if (len)
354 		goto skip_next;
355 
356 	goto parse;
357 skip_next:
358 	type = field;
359 	field = strpbrk(field + len, " ");
360 
361 	if (field == NULL)
362 		return -EINVAL;
363 
364 	*field++ = '\0';
365 	depth++;
366 parse:
367 	name = NULL;
368 
369 	while ((part = strsep(&field, " ")) != NULL) {
370 		switch (depth++) {
371 		case FIELD_DEPTH_TYPE:
372 			type = part;
373 			break;
374 		case FIELD_DEPTH_NAME:
375 			name = part;
376 			break;
377 		case FIELD_DEPTH_SIZE:
378 			if (!is_struct)
379 				return -EINVAL;
380 
381 			if (kstrtou32(part, 10, &size))
382 				return -EINVAL;
383 			break;
384 		default:
385 			return -EINVAL;
386 		}
387 	}
388 
389 	if (depth < FIELD_DEPTH_SIZE || !name)
390 		return -EINVAL;
391 
392 	if (depth == FIELD_DEPTH_SIZE)
393 		size = user_field_size(type);
394 
395 	if (size == 0)
396 		return -EINVAL;
397 
398 	if (size < 0)
399 		return size;
400 
401 	*offset = saved_offset + size;
402 
403 	return user_event_add_field(user, type, name, saved_offset, size,
404 				    type[0] != 'u', FILTER_OTHER);
405 }
406 
407 static void user_event_parse_flags(struct user_event *user, char *flags)
408 {
409 	char *flag;
410 
411 	if (flags == NULL)
412 		return;
413 
414 	while ((flag = strsep(&flags, ",")) != NULL) {
415 		if (strcmp(flag, "BPF_ITER") == 0)
416 			user->flags |= FLAG_BPF_ITER;
417 	}
418 }
419 
420 static int user_event_parse_fields(struct user_event *user, char *args)
421 {
422 	char *field;
423 	u32 offset = sizeof(struct trace_entry);
424 	int ret = -EINVAL;
425 
426 	if (args == NULL)
427 		return 0;
428 
429 	while ((field = strsep(&args, ";")) != NULL) {
430 		ret = user_event_parse_field(field, user, &offset);
431 
432 		if (ret)
433 			break;
434 	}
435 
436 	return ret;
437 }
438 
439 static struct trace_event_fields user_event_fields_array[1];
440 
441 static const char *user_field_format(const char *type)
442 {
443 	if (strcmp(type, "s64") == 0)
444 		return "%lld";
445 	if (strcmp(type, "u64") == 0)
446 		return "%llu";
447 	if (strcmp(type, "s32") == 0)
448 		return "%d";
449 	if (strcmp(type, "u32") == 0)
450 		return "%u";
451 	if (strcmp(type, "int") == 0)
452 		return "%d";
453 	if (strcmp(type, "unsigned int") == 0)
454 		return "%u";
455 	if (strcmp(type, "s16") == 0)
456 		return "%d";
457 	if (strcmp(type, "u16") == 0)
458 		return "%u";
459 	if (strcmp(type, "short") == 0)
460 		return "%d";
461 	if (strcmp(type, "unsigned short") == 0)
462 		return "%u";
463 	if (strcmp(type, "s8") == 0)
464 		return "%d";
465 	if (strcmp(type, "u8") == 0)
466 		return "%u";
467 	if (strcmp(type, "char") == 0)
468 		return "%d";
469 	if (strcmp(type, "unsigned char") == 0)
470 		return "%u";
471 	if (strstr(type, "char[") != 0)
472 		return "%s";
473 
474 	/* Unknown, likely struct, allowed treat as 64-bit */
475 	return "%llu";
476 }
477 
478 static bool user_field_is_dyn_string(const char *type, const char **str_func)
479 {
480 	if (str_has_prefix(type, "__data_loc ")) {
481 		*str_func = "__get_str";
482 		goto check;
483 	}
484 
485 	if (str_has_prefix(type, "__rel_loc ")) {
486 		*str_func = "__get_rel_str";
487 		goto check;
488 	}
489 
490 	return false;
491 check:
492 	return strstr(type, "char") != 0;
493 }
494 
495 #define LEN_OR_ZERO (len ? len - pos : 0)
496 static int user_event_set_print_fmt(struct user_event *user, char *buf, int len)
497 {
498 	struct ftrace_event_field *field, *next;
499 	struct list_head *head = &user->fields;
500 	int pos = 0, depth = 0;
501 	const char *str_func;
502 
503 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
504 
505 	list_for_each_entry_safe_reverse(field, next, head, link) {
506 		if (depth != 0)
507 			pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
508 
509 		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s",
510 				field->name, user_field_format(field->type));
511 
512 		depth++;
513 	}
514 
515 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
516 
517 	list_for_each_entry_safe_reverse(field, next, head, link) {
518 		if (user_field_is_dyn_string(field->type, &str_func))
519 			pos += snprintf(buf + pos, LEN_OR_ZERO,
520 					", %s(%s)", str_func, field->name);
521 		else
522 			pos += snprintf(buf + pos, LEN_OR_ZERO,
523 					", REC->%s", field->name);
524 	}
525 
526 	return pos + 1;
527 }
528 #undef LEN_OR_ZERO
529 
530 static int user_event_create_print_fmt(struct user_event *user)
531 {
532 	char *print_fmt;
533 	int len;
534 
535 	len = user_event_set_print_fmt(user, NULL, 0);
536 
537 	print_fmt = kmalloc(len, GFP_KERNEL);
538 
539 	if (!print_fmt)
540 		return -ENOMEM;
541 
542 	user_event_set_print_fmt(user, print_fmt, len);
543 
544 	user->call.print_fmt = print_fmt;
545 
546 	return 0;
547 }
548 
549 static enum print_line_t user_event_print_trace(struct trace_iterator *iter,
550 						int flags,
551 						struct trace_event *event)
552 {
553 	/* Unsafe to try to decode user provided print_fmt, use hex */
554 	trace_print_hex_dump_seq(&iter->seq, "", DUMP_PREFIX_OFFSET, 16,
555 				 1, iter->ent, iter->ent_size, true);
556 
557 	return trace_handle_return(&iter->seq);
558 }
559 
560 static struct trace_event_functions user_event_funcs = {
561 	.trace = user_event_print_trace,
562 };
563 
564 static int destroy_user_event(struct user_event *user)
565 {
566 	int ret = 0;
567 
568 	/* Must destroy fields before call removal */
569 	user_event_destroy_fields(user);
570 
571 	ret = trace_remove_event_call(&user->call);
572 
573 	if (ret)
574 		return ret;
575 
576 	dyn_event_remove(&user->devent);
577 
578 	register_page_data[user->index] = 0;
579 	clear_bit(user->index, page_bitmap);
580 	hash_del(&user->node);
581 
582 	user_event_destroy_validators(user);
583 	kfree(user->call.print_fmt);
584 	kfree(EVENT_NAME(user));
585 	kfree(user);
586 
587 	return ret;
588 }
589 
590 static struct user_event *find_user_event(char *name, u32 *outkey)
591 {
592 	struct user_event *user;
593 	u32 key = user_event_key(name);
594 
595 	*outkey = key;
596 
597 	hash_for_each_possible(register_table, user, node, key)
598 		if (!strcmp(EVENT_NAME(user), name)) {
599 			atomic_inc(&user->refcnt);
600 			return user;
601 		}
602 
603 	return NULL;
604 }
605 
606 static int user_event_validate(struct user_event *user, void *data, int len)
607 {
608 	struct list_head *head = &user->validators;
609 	struct user_event_validator *validator;
610 	void *pos, *end = data + len;
611 	u32 loc, offset, size;
612 
613 	list_for_each_entry(validator, head, link) {
614 		pos = data + validator->offset;
615 
616 		/* Already done min_size check, no bounds check here */
617 		loc = *(u32 *)pos;
618 		offset = loc & 0xffff;
619 		size = loc >> 16;
620 
621 		if (likely(validator->flags & VALIDATOR_REL))
622 			pos += offset + sizeof(loc);
623 		else
624 			pos = data + offset;
625 
626 		pos += size;
627 
628 		if (unlikely(pos > end))
629 			return -EFAULT;
630 
631 		if (likely(validator->flags & VALIDATOR_ENSURE_NULL))
632 			if (unlikely(*(char *)(pos - 1) != '\0'))
633 				return -EFAULT;
634 	}
635 
636 	return 0;
637 }
638 
639 /*
640  * Writes the user supplied payload out to a trace file.
641  */
642 static void user_event_ftrace(struct user_event *user, struct iov_iter *i,
643 			      void *tpdata, bool *faulted)
644 {
645 	struct trace_event_file *file;
646 	struct trace_entry *entry;
647 	struct trace_event_buffer event_buffer;
648 	size_t size = sizeof(*entry) + i->count;
649 
650 	file = (struct trace_event_file *)tpdata;
651 
652 	if (!file ||
653 	    !(file->flags & EVENT_FILE_FL_ENABLED) ||
654 	    trace_trigger_soft_disabled(file))
655 		return;
656 
657 	/* Allocates and fills trace_entry, + 1 of this is data payload */
658 	entry = trace_event_buffer_reserve(&event_buffer, file, size);
659 
660 	if (unlikely(!entry))
661 		return;
662 
663 	if (unlikely(!copy_nofault(entry + 1, i->count, i)))
664 		goto discard;
665 
666 	if (!list_empty(&user->validators) &&
667 	    unlikely(user_event_validate(user, entry, size)))
668 		goto discard;
669 
670 	trace_event_buffer_commit(&event_buffer);
671 
672 	return;
673 discard:
674 	*faulted = true;
675 	__trace_event_discard_commit(event_buffer.buffer,
676 				     event_buffer.event);
677 }
678 
679 #ifdef CONFIG_PERF_EVENTS
680 static void user_event_bpf(struct user_event *user, struct iov_iter *i)
681 {
682 	struct user_bpf_context context;
683 	struct user_bpf_iter bpf_i;
684 	char fast_data[MAX_STACK_BPF_DATA];
685 	void *temp = NULL;
686 
687 	if ((user->flags & FLAG_BPF_ITER) && iter_is_iovec(i)) {
688 		/* Raw iterator */
689 		context.data_type = USER_BPF_DATA_ITER;
690 		context.data_len = i->count;
691 		context.iter = &bpf_i;
692 
693 		bpf_i.iov_offset = i->iov_offset;
694 		bpf_i.iov = i->iov;
695 		bpf_i.nr_segs = i->nr_segs;
696 	} else if (i->nr_segs == 1 && iter_is_iovec(i)) {
697 		/* Single buffer from user */
698 		context.data_type = USER_BPF_DATA_USER;
699 		context.data_len = i->count;
700 		context.udata = i->iov->iov_base + i->iov_offset;
701 	} else {
702 		/* Multi buffer from user */
703 		struct iov_iter copy = *i;
704 		size_t copy_size = min_t(size_t, i->count, MAX_BPF_COPY_SIZE);
705 
706 		context.data_type = USER_BPF_DATA_KERNEL;
707 		context.kdata = fast_data;
708 
709 		if (unlikely(copy_size > sizeof(fast_data))) {
710 			temp = kmalloc(copy_size, GFP_NOWAIT);
711 
712 			if (temp)
713 				context.kdata = temp;
714 			else
715 				copy_size = sizeof(fast_data);
716 		}
717 
718 		context.data_len = copy_nofault(context.kdata,
719 						copy_size, &copy);
720 	}
721 
722 	trace_call_bpf(&user->call, &context);
723 
724 	kfree(temp);
725 }
726 
727 /*
728  * Writes the user supplied payload out to perf ring buffer or eBPF program.
729  */
730 static void user_event_perf(struct user_event *user, struct iov_iter *i,
731 			    void *tpdata, bool *faulted)
732 {
733 	struct hlist_head *perf_head;
734 
735 	if (bpf_prog_array_valid(&user->call))
736 		user_event_bpf(user, i);
737 
738 	perf_head = this_cpu_ptr(user->call.perf_events);
739 
740 	if (perf_head && !hlist_empty(perf_head)) {
741 		struct trace_entry *perf_entry;
742 		struct pt_regs *regs;
743 		size_t size = sizeof(*perf_entry) + i->count;
744 		int context;
745 
746 		perf_entry = perf_trace_buf_alloc(ALIGN(size, 8),
747 						  &regs, &context);
748 
749 		if (unlikely(!perf_entry))
750 			return;
751 
752 		perf_fetch_caller_regs(regs);
753 
754 		if (unlikely(!copy_nofault(perf_entry + 1, i->count, i)))
755 			goto discard;
756 
757 		if (!list_empty(&user->validators) &&
758 		    unlikely(user_event_validate(user, perf_entry, size)))
759 			goto discard;
760 
761 		perf_trace_buf_submit(perf_entry, size, context,
762 				      user->call.event.type, 1, regs,
763 				      perf_head, NULL);
764 
765 		return;
766 discard:
767 		*faulted = true;
768 		perf_swevent_put_recursion_context(context);
769 	}
770 }
771 #endif
772 
773 /*
774  * Update the register page that is shared between user processes.
775  */
776 static void update_reg_page_for(struct user_event *user)
777 {
778 	struct tracepoint *tp = &user->tracepoint;
779 	char status = 0;
780 
781 	if (atomic_read(&tp->key.enabled) > 0) {
782 		struct tracepoint_func *probe_func_ptr;
783 		user_event_func_t probe_func;
784 
785 		rcu_read_lock_sched();
786 
787 		probe_func_ptr = rcu_dereference_sched(tp->funcs);
788 
789 		if (probe_func_ptr) {
790 			do {
791 				probe_func = probe_func_ptr->func;
792 
793 				if (probe_func == user_event_ftrace)
794 					status |= EVENT_STATUS_FTRACE;
795 #ifdef CONFIG_PERF_EVENTS
796 				else if (probe_func == user_event_perf)
797 					status |= EVENT_STATUS_PERF;
798 #endif
799 				else
800 					status |= EVENT_STATUS_OTHER;
801 			} while ((++probe_func_ptr)->func);
802 		}
803 
804 		rcu_read_unlock_sched();
805 	}
806 
807 	register_page_data[user->index] = status;
808 }
809 
810 /*
811  * Register callback for our events from tracing sub-systems.
812  */
813 static int user_event_reg(struct trace_event_call *call,
814 			  enum trace_reg type,
815 			  void *data)
816 {
817 	struct user_event *user = (struct user_event *)call->data;
818 	int ret = 0;
819 
820 	if (!user)
821 		return -ENOENT;
822 
823 	switch (type) {
824 	case TRACE_REG_REGISTER:
825 		ret = tracepoint_probe_register(call->tp,
826 						call->class->probe,
827 						data);
828 		if (!ret)
829 			goto inc;
830 		break;
831 
832 	case TRACE_REG_UNREGISTER:
833 		tracepoint_probe_unregister(call->tp,
834 					    call->class->probe,
835 					    data);
836 		goto dec;
837 
838 #ifdef CONFIG_PERF_EVENTS
839 	case TRACE_REG_PERF_REGISTER:
840 		ret = tracepoint_probe_register(call->tp,
841 						call->class->perf_probe,
842 						data);
843 		if (!ret)
844 			goto inc;
845 		break;
846 
847 	case TRACE_REG_PERF_UNREGISTER:
848 		tracepoint_probe_unregister(call->tp,
849 					    call->class->perf_probe,
850 					    data);
851 		goto dec;
852 
853 	case TRACE_REG_PERF_OPEN:
854 	case TRACE_REG_PERF_CLOSE:
855 	case TRACE_REG_PERF_ADD:
856 	case TRACE_REG_PERF_DEL:
857 		break;
858 #endif
859 	}
860 
861 	return ret;
862 inc:
863 	atomic_inc(&user->refcnt);
864 	update_reg_page_for(user);
865 	return 0;
866 dec:
867 	update_reg_page_for(user);
868 	atomic_dec(&user->refcnt);
869 	return 0;
870 }
871 
872 static int user_event_create(const char *raw_command)
873 {
874 	struct user_event *user;
875 	char *name;
876 	int ret;
877 
878 	if (!str_has_prefix(raw_command, USER_EVENTS_PREFIX))
879 		return -ECANCELED;
880 
881 	raw_command += USER_EVENTS_PREFIX_LEN;
882 	raw_command = skip_spaces(raw_command);
883 
884 	name = kstrdup(raw_command, GFP_KERNEL);
885 
886 	if (!name)
887 		return -ENOMEM;
888 
889 	mutex_lock(&reg_mutex);
890 
891 	ret = user_event_parse_cmd(name, &user);
892 
893 	if (!ret)
894 		atomic_dec(&user->refcnt);
895 
896 	mutex_unlock(&reg_mutex);
897 
898 	if (ret)
899 		kfree(name);
900 
901 	return ret;
902 }
903 
904 static int user_event_show(struct seq_file *m, struct dyn_event *ev)
905 {
906 	struct user_event *user = container_of(ev, struct user_event, devent);
907 	struct ftrace_event_field *field, *next;
908 	struct list_head *head;
909 	int depth = 0;
910 
911 	seq_printf(m, "%s%s", USER_EVENTS_PREFIX, EVENT_NAME(user));
912 
913 	head = trace_get_fields(&user->call);
914 
915 	list_for_each_entry_safe_reverse(field, next, head, link) {
916 		if (depth == 0)
917 			seq_puts(m, " ");
918 		else
919 			seq_puts(m, "; ");
920 
921 		seq_printf(m, "%s %s", field->type, field->name);
922 
923 		if (str_has_prefix(field->type, "struct "))
924 			seq_printf(m, " %d", field->size);
925 
926 		depth++;
927 	}
928 
929 	seq_puts(m, "\n");
930 
931 	return 0;
932 }
933 
934 static bool user_event_is_busy(struct dyn_event *ev)
935 {
936 	struct user_event *user = container_of(ev, struct user_event, devent);
937 
938 	return atomic_read(&user->refcnt) != 0;
939 }
940 
941 static int user_event_free(struct dyn_event *ev)
942 {
943 	struct user_event *user = container_of(ev, struct user_event, devent);
944 
945 	if (atomic_read(&user->refcnt) != 0)
946 		return -EBUSY;
947 
948 	return destroy_user_event(user);
949 }
950 
951 static bool user_field_match(struct ftrace_event_field *field, int argc,
952 			     const char **argv, int *iout)
953 {
954 	char *field_name, *arg_name;
955 	int len, pos, i = *iout;
956 	bool colon = false, match = false;
957 
958 	if (i >= argc)
959 		return false;
960 
961 	len = MAX_FIELD_ARG_NAME;
962 	field_name = kmalloc(len, GFP_KERNEL);
963 	arg_name = kmalloc(len, GFP_KERNEL);
964 
965 	if (!arg_name || !field_name)
966 		goto out;
967 
968 	pos = 0;
969 
970 	for (; i < argc; ++i) {
971 		if (i != *iout)
972 			pos += snprintf(arg_name + pos, len - pos, " ");
973 
974 		pos += snprintf(arg_name + pos, len - pos, argv[i]);
975 
976 		if (strchr(argv[i], ';')) {
977 			++i;
978 			colon = true;
979 			break;
980 		}
981 	}
982 
983 	pos = 0;
984 
985 	pos += snprintf(field_name + pos, len - pos, field->type);
986 	pos += snprintf(field_name + pos, len - pos, " ");
987 	pos += snprintf(field_name + pos, len - pos, field->name);
988 
989 	if (colon)
990 		pos += snprintf(field_name + pos, len - pos, ";");
991 
992 	*iout = i;
993 
994 	match = strcmp(arg_name, field_name) == 0;
995 out:
996 	kfree(arg_name);
997 	kfree(field_name);
998 
999 	return match;
1000 }
1001 
1002 static bool user_fields_match(struct user_event *user, int argc,
1003 			      const char **argv)
1004 {
1005 	struct ftrace_event_field *field, *next;
1006 	struct list_head *head = &user->fields;
1007 	int i = 0;
1008 
1009 	list_for_each_entry_safe_reverse(field, next, head, link)
1010 		if (!user_field_match(field, argc, argv, &i))
1011 			return false;
1012 
1013 	if (i != argc)
1014 		return false;
1015 
1016 	return true;
1017 }
1018 
1019 static bool user_event_match(const char *system, const char *event,
1020 			     int argc, const char **argv, struct dyn_event *ev)
1021 {
1022 	struct user_event *user = container_of(ev, struct user_event, devent);
1023 	bool match;
1024 
1025 	match = strcmp(EVENT_NAME(user), event) == 0 &&
1026 		(!system || strcmp(system, USER_EVENTS_SYSTEM) == 0);
1027 
1028 	if (match && argc > 0)
1029 		match = user_fields_match(user, argc, argv);
1030 
1031 	return match;
1032 }
1033 
1034 static struct dyn_event_operations user_event_dops = {
1035 	.create = user_event_create,
1036 	.show = user_event_show,
1037 	.is_busy = user_event_is_busy,
1038 	.free = user_event_free,
1039 	.match = user_event_match,
1040 };
1041 
1042 static int user_event_trace_register(struct user_event *user)
1043 {
1044 	int ret;
1045 
1046 	ret = register_trace_event(&user->call.event);
1047 
1048 	if (!ret)
1049 		return -ENODEV;
1050 
1051 	ret = trace_add_event_call(&user->call);
1052 
1053 	if (ret)
1054 		unregister_trace_event(&user->call.event);
1055 
1056 	return ret;
1057 }
1058 
1059 /*
1060  * Parses the event name, arguments and flags then registers if successful.
1061  * The name buffer lifetime is owned by this method for success cases only.
1062  * Upon success the returned user_event has its ref count increased by 1.
1063  */
1064 static int user_event_parse(char *name, char *args, char *flags,
1065 			    struct user_event **newuser)
1066 {
1067 	int ret;
1068 	int index;
1069 	u32 key;
1070 	struct user_event *user;
1071 
1072 	/* Prevent dyn_event from racing */
1073 	mutex_lock(&event_mutex);
1074 	user = find_user_event(name, &key);
1075 	mutex_unlock(&event_mutex);
1076 
1077 	if (user) {
1078 		*newuser = user;
1079 		/*
1080 		 * Name is allocated by caller, free it since it already exists.
1081 		 * Caller only worries about failure cases for freeing.
1082 		 */
1083 		kfree(name);
1084 		return 0;
1085 	}
1086 
1087 	index = find_first_zero_bit(page_bitmap, MAX_EVENTS);
1088 
1089 	if (index == MAX_EVENTS)
1090 		return -EMFILE;
1091 
1092 	user = kzalloc(sizeof(*user), GFP_KERNEL);
1093 
1094 	if (!user)
1095 		return -ENOMEM;
1096 
1097 	INIT_LIST_HEAD(&user->class.fields);
1098 	INIT_LIST_HEAD(&user->fields);
1099 	INIT_LIST_HEAD(&user->validators);
1100 
1101 	user->tracepoint.name = name;
1102 
1103 	user_event_parse_flags(user, flags);
1104 
1105 	ret = user_event_parse_fields(user, args);
1106 
1107 	if (ret)
1108 		goto put_user;
1109 
1110 	ret = user_event_create_print_fmt(user);
1111 
1112 	if (ret)
1113 		goto put_user;
1114 
1115 	user->call.data = user;
1116 	user->call.class = &user->class;
1117 	user->call.name = name;
1118 	user->call.flags = TRACE_EVENT_FL_TRACEPOINT;
1119 	user->call.tp = &user->tracepoint;
1120 	user->call.event.funcs = &user_event_funcs;
1121 
1122 	user->class.system = USER_EVENTS_SYSTEM;
1123 	user->class.fields_array = user_event_fields_array;
1124 	user->class.get_fields = user_event_get_fields;
1125 	user->class.reg = user_event_reg;
1126 	user->class.probe = user_event_ftrace;
1127 #ifdef CONFIG_PERF_EVENTS
1128 	user->class.perf_probe = user_event_perf;
1129 #endif
1130 
1131 	mutex_lock(&event_mutex);
1132 	ret = user_event_trace_register(user);
1133 	mutex_unlock(&event_mutex);
1134 
1135 	if (ret)
1136 		goto put_user;
1137 
1138 	user->index = index;
1139 
1140 	/* Ensure we track ref */
1141 	atomic_inc(&user->refcnt);
1142 
1143 	dyn_event_init(&user->devent, &user_event_dops);
1144 	dyn_event_add(&user->devent, &user->call);
1145 	set_bit(user->index, page_bitmap);
1146 	hash_add(register_table, &user->node, key);
1147 
1148 	*newuser = user;
1149 	return 0;
1150 put_user:
1151 	user_event_destroy_fields(user);
1152 	user_event_destroy_validators(user);
1153 	kfree(user);
1154 	return ret;
1155 }
1156 
1157 /*
1158  * Deletes a previously created event if it is no longer being used.
1159  */
1160 static int delete_user_event(char *name)
1161 {
1162 	u32 key;
1163 	int ret;
1164 	struct user_event *user = find_user_event(name, &key);
1165 
1166 	if (!user)
1167 		return -ENOENT;
1168 
1169 	/* Ensure we are the last ref */
1170 	if (atomic_read(&user->refcnt) != 1) {
1171 		ret = -EBUSY;
1172 		goto put_ref;
1173 	}
1174 
1175 	ret = destroy_user_event(user);
1176 
1177 	if (ret)
1178 		goto put_ref;
1179 
1180 	return ret;
1181 put_ref:
1182 	/* No longer have this ref */
1183 	atomic_dec(&user->refcnt);
1184 
1185 	return ret;
1186 }
1187 
1188 /*
1189  * Validates the user payload and writes via iterator.
1190  */
1191 static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
1192 {
1193 	struct user_event_refs *refs;
1194 	struct user_event *user = NULL;
1195 	struct tracepoint *tp;
1196 	ssize_t ret = i->count;
1197 	int idx;
1198 
1199 	if (unlikely(copy_from_iter(&idx, sizeof(idx), i) != sizeof(idx)))
1200 		return -EFAULT;
1201 
1202 	rcu_read_lock_sched();
1203 
1204 	refs = rcu_dereference_sched(file->private_data);
1205 
1206 	/*
1207 	 * The refs->events array is protected by RCU, and new items may be
1208 	 * added. But the user retrieved from indexing into the events array
1209 	 * shall be immutable while the file is opened.
1210 	 */
1211 	if (likely(refs && idx < refs->count))
1212 		user = refs->events[idx];
1213 
1214 	rcu_read_unlock_sched();
1215 
1216 	if (unlikely(user == NULL))
1217 		return -ENOENT;
1218 
1219 	if (unlikely(i->count < user->min_size))
1220 		return -EINVAL;
1221 
1222 	tp = &user->tracepoint;
1223 
1224 	/*
1225 	 * It's possible key.enabled disables after this check, however
1226 	 * we don't mind if a few events are included in this condition.
1227 	 */
1228 	if (likely(atomic_read(&tp->key.enabled) > 0)) {
1229 		struct tracepoint_func *probe_func_ptr;
1230 		user_event_func_t probe_func;
1231 		struct iov_iter copy;
1232 		void *tpdata;
1233 		bool faulted;
1234 
1235 		if (unlikely(fault_in_iov_iter_readable(i, i->count)))
1236 			return -EFAULT;
1237 
1238 		faulted = false;
1239 
1240 		rcu_read_lock_sched();
1241 
1242 		probe_func_ptr = rcu_dereference_sched(tp->funcs);
1243 
1244 		if (probe_func_ptr) {
1245 			do {
1246 				copy = *i;
1247 				probe_func = probe_func_ptr->func;
1248 				tpdata = probe_func_ptr->data;
1249 				probe_func(user, &copy, tpdata, &faulted);
1250 			} while ((++probe_func_ptr)->func);
1251 		}
1252 
1253 		rcu_read_unlock_sched();
1254 
1255 		if (unlikely(faulted))
1256 			return -EFAULT;
1257 	}
1258 
1259 	return ret;
1260 }
1261 
1262 static ssize_t user_events_write(struct file *file, const char __user *ubuf,
1263 				 size_t count, loff_t *ppos)
1264 {
1265 	struct iovec iov;
1266 	struct iov_iter i;
1267 
1268 	if (unlikely(*ppos != 0))
1269 		return -EFAULT;
1270 
1271 	if (unlikely(import_single_range(READ, (char *)ubuf, count, &iov, &i)))
1272 		return -EFAULT;
1273 
1274 	return user_events_write_core(file, &i);
1275 }
1276 
1277 static ssize_t user_events_write_iter(struct kiocb *kp, struct iov_iter *i)
1278 {
1279 	return user_events_write_core(kp->ki_filp, i);
1280 }
1281 
1282 static int user_events_ref_add(struct file *file, struct user_event *user)
1283 {
1284 	struct user_event_refs *refs, *new_refs;
1285 	int i, size, count = 0;
1286 
1287 	refs = rcu_dereference_protected(file->private_data,
1288 					 lockdep_is_held(&reg_mutex));
1289 
1290 	if (refs) {
1291 		count = refs->count;
1292 
1293 		for (i = 0; i < count; ++i)
1294 			if (refs->events[i] == user)
1295 				return i;
1296 	}
1297 
1298 	size = struct_size(refs, events, count + 1);
1299 
1300 	new_refs = kzalloc(size, GFP_KERNEL);
1301 
1302 	if (!new_refs)
1303 		return -ENOMEM;
1304 
1305 	new_refs->count = count + 1;
1306 
1307 	for (i = 0; i < count; ++i)
1308 		new_refs->events[i] = refs->events[i];
1309 
1310 	new_refs->events[i] = user;
1311 
1312 	atomic_inc(&user->refcnt);
1313 
1314 	rcu_assign_pointer(file->private_data, new_refs);
1315 
1316 	if (refs)
1317 		kfree_rcu(refs, rcu);
1318 
1319 	return i;
1320 }
1321 
1322 static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg)
1323 {
1324 	u32 size;
1325 	long ret;
1326 
1327 	ret = get_user(size, &ureg->size);
1328 
1329 	if (ret)
1330 		return ret;
1331 
1332 	if (size > PAGE_SIZE)
1333 		return -E2BIG;
1334 
1335 	return copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
1336 }
1337 
1338 /*
1339  * Registers a user_event on behalf of a user process.
1340  */
1341 static long user_events_ioctl_reg(struct file *file, unsigned long uarg)
1342 {
1343 	struct user_reg __user *ureg = (struct user_reg __user *)uarg;
1344 	struct user_reg reg;
1345 	struct user_event *user;
1346 	char *name;
1347 	long ret;
1348 
1349 	ret = user_reg_get(ureg, &reg);
1350 
1351 	if (ret)
1352 		return ret;
1353 
1354 	name = strndup_user((const char __user *)(uintptr_t)reg.name_args,
1355 			    MAX_EVENT_DESC);
1356 
1357 	if (IS_ERR(name)) {
1358 		ret = PTR_ERR(name);
1359 		return ret;
1360 	}
1361 
1362 	ret = user_event_parse_cmd(name, &user);
1363 
1364 	if (ret) {
1365 		kfree(name);
1366 		return ret;
1367 	}
1368 
1369 	ret = user_events_ref_add(file, user);
1370 
1371 	/* No longer need parse ref, ref_add either worked or not */
1372 	atomic_dec(&user->refcnt);
1373 
1374 	/* Positive number is index and valid */
1375 	if (ret < 0)
1376 		return ret;
1377 
1378 	put_user((u32)ret, &ureg->write_index);
1379 	put_user(user->index, &ureg->status_index);
1380 
1381 	return 0;
1382 }
1383 
1384 /*
1385  * Deletes a user_event on behalf of a user process.
1386  */
1387 static long user_events_ioctl_del(struct file *file, unsigned long uarg)
1388 {
1389 	void __user *ubuf = (void __user *)uarg;
1390 	char *name;
1391 	long ret;
1392 
1393 	name = strndup_user(ubuf, MAX_EVENT_DESC);
1394 
1395 	if (IS_ERR(name))
1396 		return PTR_ERR(name);
1397 
1398 	/* event_mutex prevents dyn_event from racing */
1399 	mutex_lock(&event_mutex);
1400 	ret = delete_user_event(name);
1401 	mutex_unlock(&event_mutex);
1402 
1403 	kfree(name);
1404 
1405 	return ret;
1406 }
1407 
1408 /*
1409  * Handles the ioctl from user mode to register or alter operations.
1410  */
1411 static long user_events_ioctl(struct file *file, unsigned int cmd,
1412 			      unsigned long uarg)
1413 {
1414 	long ret = -ENOTTY;
1415 
1416 	switch (cmd) {
1417 	case DIAG_IOCSREG:
1418 		mutex_lock(&reg_mutex);
1419 		ret = user_events_ioctl_reg(file, uarg);
1420 		mutex_unlock(&reg_mutex);
1421 		break;
1422 
1423 	case DIAG_IOCSDEL:
1424 		mutex_lock(&reg_mutex);
1425 		ret = user_events_ioctl_del(file, uarg);
1426 		mutex_unlock(&reg_mutex);
1427 		break;
1428 	}
1429 
1430 	return ret;
1431 }
1432 
1433 /*
1434  * Handles the final close of the file from user mode.
1435  */
1436 static int user_events_release(struct inode *node, struct file *file)
1437 {
1438 	struct user_event_refs *refs;
1439 	struct user_event *user;
1440 	int i;
1441 
1442 	/*
1443 	 * Ensure refs cannot change under any situation by taking the
1444 	 * register mutex during the final freeing of the references.
1445 	 */
1446 	mutex_lock(&reg_mutex);
1447 
1448 	refs = file->private_data;
1449 
1450 	if (!refs)
1451 		goto out;
1452 
1453 	/*
1454 	 * The lifetime of refs has reached an end, it's tied to this file.
1455 	 * The underlying user_events are ref counted, and cannot be freed.
1456 	 * After this decrement, the user_events may be freed elsewhere.
1457 	 */
1458 	for (i = 0; i < refs->count; ++i) {
1459 		user = refs->events[i];
1460 
1461 		if (user)
1462 			atomic_dec(&user->refcnt);
1463 	}
1464 out:
1465 	file->private_data = NULL;
1466 
1467 	mutex_unlock(&reg_mutex);
1468 
1469 	kfree(refs);
1470 
1471 	return 0;
1472 }
1473 
1474 static const struct file_operations user_data_fops = {
1475 	.write = user_events_write,
1476 	.write_iter = user_events_write_iter,
1477 	.unlocked_ioctl	= user_events_ioctl,
1478 	.release = user_events_release,
1479 };
1480 
1481 /*
1482  * Maps the shared page into the user process for checking if event is enabled.
1483  */
1484 static int user_status_mmap(struct file *file, struct vm_area_struct *vma)
1485 {
1486 	unsigned long size = vma->vm_end - vma->vm_start;
1487 
1488 	if (size != MAX_EVENTS)
1489 		return -EINVAL;
1490 
1491 	return remap_pfn_range(vma, vma->vm_start,
1492 			       virt_to_phys(register_page_data) >> PAGE_SHIFT,
1493 			       size, vm_get_page_prot(VM_READ));
1494 }
1495 
1496 static void *user_seq_start(struct seq_file *m, loff_t *pos)
1497 {
1498 	if (*pos)
1499 		return NULL;
1500 
1501 	return (void *)1;
1502 }
1503 
1504 static void *user_seq_next(struct seq_file *m, void *p, loff_t *pos)
1505 {
1506 	++*pos;
1507 	return NULL;
1508 }
1509 
1510 static void user_seq_stop(struct seq_file *m, void *p)
1511 {
1512 }
1513 
1514 static int user_seq_show(struct seq_file *m, void *p)
1515 {
1516 	struct user_event *user;
1517 	char status;
1518 	int i, active = 0, busy = 0, flags;
1519 
1520 	mutex_lock(&reg_mutex);
1521 
1522 	hash_for_each(register_table, i, user, node) {
1523 		status = register_page_data[user->index];
1524 		flags = user->flags;
1525 
1526 		seq_printf(m, "%d:%s", user->index, EVENT_NAME(user));
1527 
1528 		if (flags != 0 || status != 0)
1529 			seq_puts(m, " #");
1530 
1531 		if (status != 0) {
1532 			seq_puts(m, " Used by");
1533 			if (status & EVENT_STATUS_FTRACE)
1534 				seq_puts(m, " ftrace");
1535 			if (status & EVENT_STATUS_PERF)
1536 				seq_puts(m, " perf");
1537 			if (status & EVENT_STATUS_OTHER)
1538 				seq_puts(m, " other");
1539 			busy++;
1540 		}
1541 
1542 		if (flags & FLAG_BPF_ITER)
1543 			seq_puts(m, " FLAG:BPF_ITER");
1544 
1545 		seq_puts(m, "\n");
1546 		active++;
1547 	}
1548 
1549 	mutex_unlock(&reg_mutex);
1550 
1551 	seq_puts(m, "\n");
1552 	seq_printf(m, "Active: %d\n", active);
1553 	seq_printf(m, "Busy: %d\n", busy);
1554 	seq_printf(m, "Max: %ld\n", MAX_EVENTS);
1555 
1556 	return 0;
1557 }
1558 
1559 static const struct seq_operations user_seq_ops = {
1560 	.start = user_seq_start,
1561 	.next  = user_seq_next,
1562 	.stop  = user_seq_stop,
1563 	.show  = user_seq_show,
1564 };
1565 
1566 static int user_status_open(struct inode *node, struct file *file)
1567 {
1568 	return seq_open(file, &user_seq_ops);
1569 }
1570 
1571 static const struct file_operations user_status_fops = {
1572 	.open = user_status_open,
1573 	.mmap = user_status_mmap,
1574 	.read = seq_read,
1575 	.llseek  = seq_lseek,
1576 	.release = seq_release,
1577 };
1578 
1579 /*
1580  * Creates a set of tracefs files to allow user mode interactions.
1581  */
1582 static int create_user_tracefs(void)
1583 {
1584 	struct dentry *edata, *emmap;
1585 
1586 	edata = tracefs_create_file("user_events_data", TRACE_MODE_WRITE,
1587 				    NULL, NULL, &user_data_fops);
1588 
1589 	if (!edata) {
1590 		pr_warn("Could not create tracefs 'user_events_data' entry\n");
1591 		goto err;
1592 	}
1593 
1594 	/* mmap with MAP_SHARED requires writable fd */
1595 	emmap = tracefs_create_file("user_events_status", TRACE_MODE_WRITE,
1596 				    NULL, NULL, &user_status_fops);
1597 
1598 	if (!emmap) {
1599 		tracefs_remove(edata);
1600 		pr_warn("Could not create tracefs 'user_events_mmap' entry\n");
1601 		goto err;
1602 	}
1603 
1604 	return 0;
1605 err:
1606 	return -ENODEV;
1607 }
1608 
1609 static void set_page_reservations(bool set)
1610 {
1611 	int page;
1612 
1613 	for (page = 0; page < MAX_PAGES; ++page) {
1614 		void *addr = register_page_data + (PAGE_SIZE * page);
1615 
1616 		if (set)
1617 			SetPageReserved(virt_to_page(addr));
1618 		else
1619 			ClearPageReserved(virt_to_page(addr));
1620 	}
1621 }
1622 
1623 static int __init trace_events_user_init(void)
1624 {
1625 	int ret;
1626 
1627 	/* Zero all bits beside 0 (which is reserved for failures) */
1628 	bitmap_zero(page_bitmap, MAX_EVENTS);
1629 	set_bit(0, page_bitmap);
1630 
1631 	register_page_data = kzalloc(MAX_EVENTS, GFP_KERNEL);
1632 
1633 	if (!register_page_data)
1634 		return -ENOMEM;
1635 
1636 	set_page_reservations(true);
1637 
1638 	ret = create_user_tracefs();
1639 
1640 	if (ret) {
1641 		pr_warn("user_events could not register with tracefs\n");
1642 		set_page_reservations(false);
1643 		kfree(register_page_data);
1644 		return ret;
1645 	}
1646 
1647 	if (dyn_event_register(&user_event_dops))
1648 		pr_warn("user_events could not register with dyn_events\n");
1649 
1650 	return 0;
1651 }
1652 
1653 fs_initcall(trace_events_user_init);
1654