xref: /openbmc/linux/kernel/trace/trace_syscalls.c (revision cbdf59ad)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <trace/syscall.h>
3 #include <trace/events/syscalls.h>
4 #include <linux/syscalls.h>
5 #include <linux/slab.h>
6 #include <linux/kernel.h>
7 #include <linux/module.h>	/* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
8 #include <linux/ftrace.h>
9 #include <linux/perf_event.h>
10 #include <asm/syscall.h>
11 
12 #include "trace_output.h"
13 #include "trace.h"
14 
15 static DEFINE_MUTEX(syscall_trace_lock);
16 
17 static int syscall_enter_register(struct trace_event_call *event,
18 				 enum trace_reg type, void *data);
19 static int syscall_exit_register(struct trace_event_call *event,
20 				 enum trace_reg type, void *data);
21 
22 static struct list_head *
23 syscall_get_enter_fields(struct trace_event_call *call)
24 {
25 	struct syscall_metadata *entry = call->data;
26 
27 	return &entry->enter_fields;
28 }
29 
30 extern struct syscall_metadata *__start_syscalls_metadata[];
31 extern struct syscall_metadata *__stop_syscalls_metadata[];
32 
33 static struct syscall_metadata **syscalls_metadata;
34 
35 #ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
36 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
37 {
38 	/*
39 	 * Only compare after the "sys" prefix. Archs that use
40 	 * syscall wrappers may have syscalls symbols aliases prefixed
41 	 * with ".SyS" or ".sys" instead of "sys", leading to an unwanted
42 	 * mismatch.
43 	 */
44 	return !strcmp(sym + 3, name + 3);
45 }
46 #endif
47 
48 #ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
49 /*
50  * Some architectures that allow for 32bit applications
51  * to run on a 64bit kernel, do not map the syscalls for
52  * the 32bit tasks the same as they do for 64bit tasks.
53  *
54  *     *cough*x86*cough*
55  *
56  * In such a case, instead of reporting the wrong syscalls,
57  * simply ignore them.
58  *
59  * For an arch to ignore the compat syscalls it needs to
60  * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
61  * define the function arch_trace_is_compat_syscall() to let
62  * the tracing system know that it should ignore it.
63  */
64 static int
65 trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
66 {
67 	if (unlikely(arch_trace_is_compat_syscall(regs)))
68 		return -1;
69 
70 	return syscall_get_nr(task, regs);
71 }
72 #else
73 static inline int
74 trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
75 {
76 	return syscall_get_nr(task, regs);
77 }
78 #endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */
79 
80 static __init struct syscall_metadata *
81 find_syscall_meta(unsigned long syscall)
82 {
83 	struct syscall_metadata **start;
84 	struct syscall_metadata **stop;
85 	char str[KSYM_SYMBOL_LEN];
86 
87 
88 	start = __start_syscalls_metadata;
89 	stop = __stop_syscalls_metadata;
90 	kallsyms_lookup(syscall, NULL, NULL, NULL, str);
91 
92 	if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
93 		return NULL;
94 
95 	for ( ; start < stop; start++) {
96 		if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
97 			return *start;
98 	}
99 	return NULL;
100 }
101 
102 static struct syscall_metadata *syscall_nr_to_meta(int nr)
103 {
104 	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
105 		return NULL;
106 
107 	return syscalls_metadata[nr];
108 }
109 
110 const char *get_syscall_name(int syscall)
111 {
112 	struct syscall_metadata *entry;
113 
114 	entry = syscall_nr_to_meta(syscall);
115 	if (!entry)
116 		return NULL;
117 
118 	return entry->name;
119 }
120 
121 static enum print_line_t
122 print_syscall_enter(struct trace_iterator *iter, int flags,
123 		    struct trace_event *event)
124 {
125 	struct trace_array *tr = iter->tr;
126 	struct trace_seq *s = &iter->seq;
127 	struct trace_entry *ent = iter->ent;
128 	struct syscall_trace_enter *trace;
129 	struct syscall_metadata *entry;
130 	int i, syscall;
131 
132 	trace = (typeof(trace))ent;
133 	syscall = trace->nr;
134 	entry = syscall_nr_to_meta(syscall);
135 
136 	if (!entry)
137 		goto end;
138 
139 	if (entry->enter_event->event.type != ent->type) {
140 		WARN_ON_ONCE(1);
141 		goto end;
142 	}
143 
144 	trace_seq_printf(s, "%s(", entry->name);
145 
146 	for (i = 0; i < entry->nb_args; i++) {
147 
148 		if (trace_seq_has_overflowed(s))
149 			goto end;
150 
151 		/* parameter types */
152 		if (tr->trace_flags & TRACE_ITER_VERBOSE)
153 			trace_seq_printf(s, "%s ", entry->types[i]);
154 
155 		/* parameter values */
156 		trace_seq_printf(s, "%s: %lx%s", entry->args[i],
157 				 trace->args[i],
158 				 i == entry->nb_args - 1 ? "" : ", ");
159 	}
160 
161 	trace_seq_putc(s, ')');
162 end:
163 	trace_seq_putc(s, '\n');
164 
165 	return trace_handle_return(s);
166 }
167 
168 static enum print_line_t
169 print_syscall_exit(struct trace_iterator *iter, int flags,
170 		   struct trace_event *event)
171 {
172 	struct trace_seq *s = &iter->seq;
173 	struct trace_entry *ent = iter->ent;
174 	struct syscall_trace_exit *trace;
175 	int syscall;
176 	struct syscall_metadata *entry;
177 
178 	trace = (typeof(trace))ent;
179 	syscall = trace->nr;
180 	entry = syscall_nr_to_meta(syscall);
181 
182 	if (!entry) {
183 		trace_seq_putc(s, '\n');
184 		goto out;
185 	}
186 
187 	if (entry->exit_event->event.type != ent->type) {
188 		WARN_ON_ONCE(1);
189 		return TRACE_TYPE_UNHANDLED;
190 	}
191 
192 	trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
193 				trace->ret);
194 
195  out:
196 	return trace_handle_return(s);
197 }
198 
199 extern char *__bad_type_size(void);
200 
201 #define SYSCALL_FIELD(type, field, name)				\
202 	sizeof(type) != sizeof(trace.field) ?				\
203 		__bad_type_size() :					\
204 		#type, #name, offsetof(typeof(trace), field),		\
205 		sizeof(trace.field), is_signed_type(type)
206 
207 static int __init
208 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
209 {
210 	int i;
211 	int pos = 0;
212 
213 	/* When len=0, we just calculate the needed length */
214 #define LEN_OR_ZERO (len ? len - pos : 0)
215 
216 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
217 	for (i = 0; i < entry->nb_args; i++) {
218 		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
219 				entry->args[i], sizeof(unsigned long),
220 				i == entry->nb_args - 1 ? "" : ", ");
221 	}
222 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
223 
224 	for (i = 0; i < entry->nb_args; i++) {
225 		pos += snprintf(buf + pos, LEN_OR_ZERO,
226 				", ((unsigned long)(REC->%s))", entry->args[i]);
227 	}
228 
229 #undef LEN_OR_ZERO
230 
231 	/* return the length of print_fmt */
232 	return pos;
233 }
234 
235 static int __init set_syscall_print_fmt(struct trace_event_call *call)
236 {
237 	char *print_fmt;
238 	int len;
239 	struct syscall_metadata *entry = call->data;
240 
241 	if (entry->enter_event != call) {
242 		call->print_fmt = "\"0x%lx\", REC->ret";
243 		return 0;
244 	}
245 
246 	/* First: called with 0 length to calculate the needed length */
247 	len = __set_enter_print_fmt(entry, NULL, 0);
248 
249 	print_fmt = kmalloc(len + 1, GFP_KERNEL);
250 	if (!print_fmt)
251 		return -ENOMEM;
252 
253 	/* Second: actually write the @print_fmt */
254 	__set_enter_print_fmt(entry, print_fmt, len + 1);
255 	call->print_fmt = print_fmt;
256 
257 	return 0;
258 }
259 
260 static void __init free_syscall_print_fmt(struct trace_event_call *call)
261 {
262 	struct syscall_metadata *entry = call->data;
263 
264 	if (entry->enter_event == call)
265 		kfree(call->print_fmt);
266 }
267 
268 static int __init syscall_enter_define_fields(struct trace_event_call *call)
269 {
270 	struct syscall_trace_enter trace;
271 	struct syscall_metadata *meta = call->data;
272 	int ret;
273 	int i;
274 	int offset = offsetof(typeof(trace), args);
275 
276 	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
277 				 FILTER_OTHER);
278 	if (ret)
279 		return ret;
280 
281 	for (i = 0; i < meta->nb_args; i++) {
282 		ret = trace_define_field(call, meta->types[i],
283 					 meta->args[i], offset,
284 					 sizeof(unsigned long), 0,
285 					 FILTER_OTHER);
286 		offset += sizeof(unsigned long);
287 	}
288 
289 	return ret;
290 }
291 
292 static int __init syscall_exit_define_fields(struct trace_event_call *call)
293 {
294 	struct syscall_trace_exit trace;
295 	int ret;
296 
297 	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
298 				 FILTER_OTHER);
299 	if (ret)
300 		return ret;
301 
302 	ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
303 				 FILTER_OTHER);
304 
305 	return ret;
306 }
307 
308 static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
309 {
310 	struct trace_array *tr = data;
311 	struct trace_event_file *trace_file;
312 	struct syscall_trace_enter *entry;
313 	struct syscall_metadata *sys_data;
314 	struct ring_buffer_event *event;
315 	struct ring_buffer *buffer;
316 	unsigned long irq_flags;
317 	unsigned long args[6];
318 	int pc;
319 	int syscall_nr;
320 	int size;
321 
322 	syscall_nr = trace_get_syscall_nr(current, regs);
323 	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
324 		return;
325 
326 	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
327 	trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
328 	if (!trace_file)
329 		return;
330 
331 	if (trace_trigger_soft_disabled(trace_file))
332 		return;
333 
334 	sys_data = syscall_nr_to_meta(syscall_nr);
335 	if (!sys_data)
336 		return;
337 
338 	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
339 
340 	local_save_flags(irq_flags);
341 	pc = preempt_count();
342 
343 	buffer = tr->trace_buffer.buffer;
344 	event = trace_buffer_lock_reserve(buffer,
345 			sys_data->enter_event->event.type, size, irq_flags, pc);
346 	if (!event)
347 		return;
348 
349 	entry = ring_buffer_event_data(event);
350 	entry->nr = syscall_nr;
351 	syscall_get_arguments(current, regs, args);
352 	memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
353 
354 	event_trigger_unlock_commit(trace_file, buffer, event, entry,
355 				    irq_flags, pc);
356 }
357 
358 static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
359 {
360 	struct trace_array *tr = data;
361 	struct trace_event_file *trace_file;
362 	struct syscall_trace_exit *entry;
363 	struct syscall_metadata *sys_data;
364 	struct ring_buffer_event *event;
365 	struct ring_buffer *buffer;
366 	unsigned long irq_flags;
367 	int pc;
368 	int syscall_nr;
369 
370 	syscall_nr = trace_get_syscall_nr(current, regs);
371 	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
372 		return;
373 
374 	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
375 	trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
376 	if (!trace_file)
377 		return;
378 
379 	if (trace_trigger_soft_disabled(trace_file))
380 		return;
381 
382 	sys_data = syscall_nr_to_meta(syscall_nr);
383 	if (!sys_data)
384 		return;
385 
386 	local_save_flags(irq_flags);
387 	pc = preempt_count();
388 
389 	buffer = tr->trace_buffer.buffer;
390 	event = trace_buffer_lock_reserve(buffer,
391 			sys_data->exit_event->event.type, sizeof(*entry),
392 			irq_flags, pc);
393 	if (!event)
394 		return;
395 
396 	entry = ring_buffer_event_data(event);
397 	entry->nr = syscall_nr;
398 	entry->ret = syscall_get_return_value(current, regs);
399 
400 	event_trigger_unlock_commit(trace_file, buffer, event, entry,
401 				    irq_flags, pc);
402 }
403 
404 static int reg_event_syscall_enter(struct trace_event_file *file,
405 				   struct trace_event_call *call)
406 {
407 	struct trace_array *tr = file->tr;
408 	int ret = 0;
409 	int num;
410 
411 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
412 	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
413 		return -ENOSYS;
414 	mutex_lock(&syscall_trace_lock);
415 	if (!tr->sys_refcount_enter)
416 		ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
417 	if (!ret) {
418 		rcu_assign_pointer(tr->enter_syscall_files[num], file);
419 		tr->sys_refcount_enter++;
420 	}
421 	mutex_unlock(&syscall_trace_lock);
422 	return ret;
423 }
424 
425 static void unreg_event_syscall_enter(struct trace_event_file *file,
426 				      struct trace_event_call *call)
427 {
428 	struct trace_array *tr = file->tr;
429 	int num;
430 
431 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
432 	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
433 		return;
434 	mutex_lock(&syscall_trace_lock);
435 	tr->sys_refcount_enter--;
436 	RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
437 	if (!tr->sys_refcount_enter)
438 		unregister_trace_sys_enter(ftrace_syscall_enter, tr);
439 	mutex_unlock(&syscall_trace_lock);
440 }
441 
442 static int reg_event_syscall_exit(struct trace_event_file *file,
443 				  struct trace_event_call *call)
444 {
445 	struct trace_array *tr = file->tr;
446 	int ret = 0;
447 	int num;
448 
449 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
450 	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
451 		return -ENOSYS;
452 	mutex_lock(&syscall_trace_lock);
453 	if (!tr->sys_refcount_exit)
454 		ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
455 	if (!ret) {
456 		rcu_assign_pointer(tr->exit_syscall_files[num], file);
457 		tr->sys_refcount_exit++;
458 	}
459 	mutex_unlock(&syscall_trace_lock);
460 	return ret;
461 }
462 
463 static void unreg_event_syscall_exit(struct trace_event_file *file,
464 				     struct trace_event_call *call)
465 {
466 	struct trace_array *tr = file->tr;
467 	int num;
468 
469 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
470 	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
471 		return;
472 	mutex_lock(&syscall_trace_lock);
473 	tr->sys_refcount_exit--;
474 	RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
475 	if (!tr->sys_refcount_exit)
476 		unregister_trace_sys_exit(ftrace_syscall_exit, tr);
477 	mutex_unlock(&syscall_trace_lock);
478 }
479 
480 static int __init init_syscall_trace(struct trace_event_call *call)
481 {
482 	int id;
483 	int num;
484 
485 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
486 	if (num < 0 || num >= NR_syscalls) {
487 		pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
488 				((struct syscall_metadata *)call->data)->name);
489 		return -ENOSYS;
490 	}
491 
492 	if (set_syscall_print_fmt(call) < 0)
493 		return -ENOMEM;
494 
495 	id = trace_event_raw_init(call);
496 
497 	if (id < 0) {
498 		free_syscall_print_fmt(call);
499 		return id;
500 	}
501 
502 	return id;
503 }
504 
505 struct trace_event_functions enter_syscall_print_funcs = {
506 	.trace		= print_syscall_enter,
507 };
508 
509 struct trace_event_functions exit_syscall_print_funcs = {
510 	.trace		= print_syscall_exit,
511 };
512 
513 struct trace_event_class __refdata event_class_syscall_enter = {
514 	.system		= "syscalls",
515 	.reg		= syscall_enter_register,
516 	.define_fields	= syscall_enter_define_fields,
517 	.get_fields	= syscall_get_enter_fields,
518 	.raw_init	= init_syscall_trace,
519 };
520 
521 struct trace_event_class __refdata event_class_syscall_exit = {
522 	.system		= "syscalls",
523 	.reg		= syscall_exit_register,
524 	.define_fields	= syscall_exit_define_fields,
525 	.fields		= LIST_HEAD_INIT(event_class_syscall_exit.fields),
526 	.raw_init	= init_syscall_trace,
527 };
528 
529 unsigned long __init __weak arch_syscall_addr(int nr)
530 {
531 	return (unsigned long)sys_call_table[nr];
532 }
533 
534 void __init init_ftrace_syscalls(void)
535 {
536 	struct syscall_metadata *meta;
537 	unsigned long addr;
538 	int i;
539 
540 	syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
541 				    GFP_KERNEL);
542 	if (!syscalls_metadata) {
543 		WARN_ON(1);
544 		return;
545 	}
546 
547 	for (i = 0; i < NR_syscalls; i++) {
548 		addr = arch_syscall_addr(i);
549 		meta = find_syscall_meta(addr);
550 		if (!meta)
551 			continue;
552 
553 		meta->syscall_nr = i;
554 		syscalls_metadata[i] = meta;
555 	}
556 }
557 
558 #ifdef CONFIG_PERF_EVENTS
559 
560 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
561 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
562 static int sys_perf_refcount_enter;
563 static int sys_perf_refcount_exit;
564 
565 static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs,
566 			       struct syscall_metadata *sys_data,
567 			       struct syscall_trace_enter *rec)
568 {
569 	struct syscall_tp_t {
570 		unsigned long long regs;
571 		unsigned long syscall_nr;
572 		unsigned long args[SYSCALL_DEFINE_MAXARGS];
573 	} param;
574 	int i;
575 
576 	*(struct pt_regs **)&param = regs;
577 	param.syscall_nr = rec->nr;
578 	for (i = 0; i < sys_data->nb_args; i++)
579 		param.args[i] = rec->args[i];
580 	return trace_call_bpf(call, &param);
581 }
582 
583 static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
584 {
585 	struct syscall_metadata *sys_data;
586 	struct syscall_trace_enter *rec;
587 	struct hlist_head *head;
588 	unsigned long args[6];
589 	bool valid_prog_array;
590 	int syscall_nr;
591 	int rctx;
592 	int size;
593 
594 	syscall_nr = trace_get_syscall_nr(current, regs);
595 	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
596 		return;
597 	if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
598 		return;
599 
600 	sys_data = syscall_nr_to_meta(syscall_nr);
601 	if (!sys_data)
602 		return;
603 
604 	head = this_cpu_ptr(sys_data->enter_event->perf_events);
605 	valid_prog_array = bpf_prog_array_valid(sys_data->enter_event);
606 	if (!valid_prog_array && hlist_empty(head))
607 		return;
608 
609 	/* get the size after alignment with the u32 buffer size field */
610 	size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
611 	size = ALIGN(size + sizeof(u32), sizeof(u64));
612 	size -= sizeof(u32);
613 
614 	rec = perf_trace_buf_alloc(size, NULL, &rctx);
615 	if (!rec)
616 		return;
617 
618 	rec->nr = syscall_nr;
619 	syscall_get_arguments(current, regs, args);
620 	memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
621 
622 	if ((valid_prog_array &&
623 	     !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
624 	    hlist_empty(head)) {
625 		perf_swevent_put_recursion_context(rctx);
626 		return;
627 	}
628 
629 	perf_trace_buf_submit(rec, size, rctx,
630 			      sys_data->enter_event->event.type, 1, regs,
631 			      head, NULL);
632 }
633 
634 static int perf_sysenter_enable(struct trace_event_call *call)
635 {
636 	int ret = 0;
637 	int num;
638 
639 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
640 
641 	mutex_lock(&syscall_trace_lock);
642 	if (!sys_perf_refcount_enter)
643 		ret = register_trace_sys_enter(perf_syscall_enter, NULL);
644 	if (ret) {
645 		pr_info("event trace: Could not activate syscall entry trace point");
646 	} else {
647 		set_bit(num, enabled_perf_enter_syscalls);
648 		sys_perf_refcount_enter++;
649 	}
650 	mutex_unlock(&syscall_trace_lock);
651 	return ret;
652 }
653 
654 static void perf_sysenter_disable(struct trace_event_call *call)
655 {
656 	int num;
657 
658 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
659 
660 	mutex_lock(&syscall_trace_lock);
661 	sys_perf_refcount_enter--;
662 	clear_bit(num, enabled_perf_enter_syscalls);
663 	if (!sys_perf_refcount_enter)
664 		unregister_trace_sys_enter(perf_syscall_enter, NULL);
665 	mutex_unlock(&syscall_trace_lock);
666 }
667 
668 static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs,
669 			      struct syscall_trace_exit *rec)
670 {
671 	struct syscall_tp_t {
672 		unsigned long long regs;
673 		unsigned long syscall_nr;
674 		unsigned long ret;
675 	} param;
676 
677 	*(struct pt_regs **)&param = regs;
678 	param.syscall_nr = rec->nr;
679 	param.ret = rec->ret;
680 	return trace_call_bpf(call, &param);
681 }
682 
683 static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
684 {
685 	struct syscall_metadata *sys_data;
686 	struct syscall_trace_exit *rec;
687 	struct hlist_head *head;
688 	bool valid_prog_array;
689 	int syscall_nr;
690 	int rctx;
691 	int size;
692 
693 	syscall_nr = trace_get_syscall_nr(current, regs);
694 	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
695 		return;
696 	if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
697 		return;
698 
699 	sys_data = syscall_nr_to_meta(syscall_nr);
700 	if (!sys_data)
701 		return;
702 
703 	head = this_cpu_ptr(sys_data->exit_event->perf_events);
704 	valid_prog_array = bpf_prog_array_valid(sys_data->exit_event);
705 	if (!valid_prog_array && hlist_empty(head))
706 		return;
707 
708 	/* We can probably do that at build time */
709 	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
710 	size -= sizeof(u32);
711 
712 	rec = perf_trace_buf_alloc(size, NULL, &rctx);
713 	if (!rec)
714 		return;
715 
716 	rec->nr = syscall_nr;
717 	rec->ret = syscall_get_return_value(current, regs);
718 
719 	if ((valid_prog_array &&
720 	     !perf_call_bpf_exit(sys_data->exit_event, regs, rec)) ||
721 	    hlist_empty(head)) {
722 		perf_swevent_put_recursion_context(rctx);
723 		return;
724 	}
725 
726 	perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
727 			      1, regs, head, NULL);
728 }
729 
730 static int perf_sysexit_enable(struct trace_event_call *call)
731 {
732 	int ret = 0;
733 	int num;
734 
735 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
736 
737 	mutex_lock(&syscall_trace_lock);
738 	if (!sys_perf_refcount_exit)
739 		ret = register_trace_sys_exit(perf_syscall_exit, NULL);
740 	if (ret) {
741 		pr_info("event trace: Could not activate syscall exit trace point");
742 	} else {
743 		set_bit(num, enabled_perf_exit_syscalls);
744 		sys_perf_refcount_exit++;
745 	}
746 	mutex_unlock(&syscall_trace_lock);
747 	return ret;
748 }
749 
750 static void perf_sysexit_disable(struct trace_event_call *call)
751 {
752 	int num;
753 
754 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
755 
756 	mutex_lock(&syscall_trace_lock);
757 	sys_perf_refcount_exit--;
758 	clear_bit(num, enabled_perf_exit_syscalls);
759 	if (!sys_perf_refcount_exit)
760 		unregister_trace_sys_exit(perf_syscall_exit, NULL);
761 	mutex_unlock(&syscall_trace_lock);
762 }
763 
764 #endif /* CONFIG_PERF_EVENTS */
765 
766 static int syscall_enter_register(struct trace_event_call *event,
767 				 enum trace_reg type, void *data)
768 {
769 	struct trace_event_file *file = data;
770 
771 	switch (type) {
772 	case TRACE_REG_REGISTER:
773 		return reg_event_syscall_enter(file, event);
774 	case TRACE_REG_UNREGISTER:
775 		unreg_event_syscall_enter(file, event);
776 		return 0;
777 
778 #ifdef CONFIG_PERF_EVENTS
779 	case TRACE_REG_PERF_REGISTER:
780 		return perf_sysenter_enable(event);
781 	case TRACE_REG_PERF_UNREGISTER:
782 		perf_sysenter_disable(event);
783 		return 0;
784 	case TRACE_REG_PERF_OPEN:
785 	case TRACE_REG_PERF_CLOSE:
786 	case TRACE_REG_PERF_ADD:
787 	case TRACE_REG_PERF_DEL:
788 		return 0;
789 #endif
790 	}
791 	return 0;
792 }
793 
794 static int syscall_exit_register(struct trace_event_call *event,
795 				 enum trace_reg type, void *data)
796 {
797 	struct trace_event_file *file = data;
798 
799 	switch (type) {
800 	case TRACE_REG_REGISTER:
801 		return reg_event_syscall_exit(file, event);
802 	case TRACE_REG_UNREGISTER:
803 		unreg_event_syscall_exit(file, event);
804 		return 0;
805 
806 #ifdef CONFIG_PERF_EVENTS
807 	case TRACE_REG_PERF_REGISTER:
808 		return perf_sysexit_enable(event);
809 	case TRACE_REG_PERF_UNREGISTER:
810 		perf_sysexit_disable(event);
811 		return 0;
812 	case TRACE_REG_PERF_OPEN:
813 	case TRACE_REG_PERF_CLOSE:
814 	case TRACE_REG_PERF_ADD:
815 	case TRACE_REG_PERF_DEL:
816 		return 0;
817 #endif
818 	}
819 	return 0;
820 }
821