1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  * rseq.c
4  *
5  * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; only
10  * version 2.1 of the License.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  */
17 
18 #define _GNU_SOURCE
19 #include <errno.h>
20 #include <sched.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <syscall.h>
26 #include <assert.h>
27 #include <signal.h>
28 #include <limits.h>
29 #include <dlfcn.h>
30 #include <stddef.h>
31 #include <sys/auxv.h>
32 #include <linux/auxvec.h>
33 
34 #include "../kselftest.h"
35 #include "rseq.h"
36 
37 static const ptrdiff_t *libc_rseq_offset_p;
38 static const unsigned int *libc_rseq_size_p;
39 static const unsigned int *libc_rseq_flags_p;
40 
41 /* Offset from the thread pointer to the rseq area. */
42 ptrdiff_t rseq_offset;
43 
44 /*
45  * Size of the registered rseq area. 0 if the registration was
46  * unsuccessful.
47  */
48 unsigned int rseq_size = -1U;
49 
50 /* Flags used during rseq registration.  */
51 unsigned int rseq_flags;
52 
53 /*
54  * rseq feature size supported by the kernel. 0 if the registration was
55  * unsuccessful.
56  */
57 unsigned int rseq_feature_size = -1U;
58 
59 static int rseq_ownership;
60 static int rseq_reg_success;	/* At least one rseq registration has succeded. */
61 
62 /* Allocate a large area for the TLS. */
63 #define RSEQ_THREAD_AREA_ALLOC_SIZE	1024
64 
65 /* Original struct rseq feature size is 20 bytes. */
66 #define ORIG_RSEQ_FEATURE_SIZE		20
67 
68 /* Original struct rseq allocation size is 32 bytes. */
69 #define ORIG_RSEQ_ALLOC_SIZE		32
70 
71 static
72 __thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
73 	.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
74 };
75 
76 static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
77 		    int flags, uint32_t sig)
78 {
79 	return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
80 }
81 
82 static int sys_getcpu(unsigned *cpu, unsigned *node)
83 {
84 	return syscall(__NR_getcpu, cpu, node, NULL);
85 }
86 
87 int rseq_available(void)
88 {
89 	int rc;
90 
91 	rc = sys_rseq(NULL, 0, 0, 0);
92 	if (rc != -1)
93 		abort();
94 	switch (errno) {
95 	case ENOSYS:
96 		return 0;
97 	case EINVAL:
98 		return 1;
99 	default:
100 		abort();
101 	}
102 }
103 
104 int rseq_register_current_thread(void)
105 {
106 	int rc;
107 
108 	if (!rseq_ownership) {
109 		/* Treat libc's ownership as a successful registration. */
110 		return 0;
111 	}
112 	rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
113 	if (rc) {
114 		if (RSEQ_READ_ONCE(rseq_reg_success)) {
115 			/* Incoherent success/failure within process. */
116 			abort();
117 		}
118 		return -1;
119 	}
120 	assert(rseq_current_cpu_raw() >= 0);
121 	RSEQ_WRITE_ONCE(rseq_reg_success, 1);
122 	return 0;
123 }
124 
125 int rseq_unregister_current_thread(void)
126 {
127 	int rc;
128 
129 	if (!rseq_ownership) {
130 		/* Treat libc's ownership as a successful unregistration. */
131 		return 0;
132 	}
133 	rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
134 	if (rc)
135 		return -1;
136 	return 0;
137 }
138 
139 static
140 unsigned int get_rseq_feature_size(void)
141 {
142 	unsigned long auxv_rseq_feature_size, auxv_rseq_align;
143 
144 	auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
145 	assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
146 
147 	auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
148 	assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
149 	if (auxv_rseq_feature_size)
150 		return auxv_rseq_feature_size;
151 	else
152 		return ORIG_RSEQ_FEATURE_SIZE;
153 }
154 
155 static __attribute__((constructor))
156 void rseq_init(void)
157 {
158 	libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
159 	libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
160 	libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
161 	if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
162 			*libc_rseq_size_p != 0) {
163 		/* rseq registration owned by glibc */
164 		rseq_offset = *libc_rseq_offset_p;
165 		rseq_size = *libc_rseq_size_p;
166 		rseq_flags = *libc_rseq_flags_p;
167 		rseq_feature_size = get_rseq_feature_size();
168 		if (rseq_feature_size > rseq_size)
169 			rseq_feature_size = rseq_size;
170 		return;
171 	}
172 	rseq_ownership = 1;
173 	if (!rseq_available()) {
174 		rseq_size = 0;
175 		rseq_feature_size = 0;
176 		return;
177 	}
178 	rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
179 	rseq_flags = 0;
180 	rseq_feature_size = get_rseq_feature_size();
181 	if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE)
182 		rseq_size = ORIG_RSEQ_ALLOC_SIZE;
183 	else
184 		rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE;
185 }
186 
187 static __attribute__((destructor))
188 void rseq_exit(void)
189 {
190 	if (!rseq_ownership)
191 		return;
192 	rseq_offset = 0;
193 	rseq_size = -1U;
194 	rseq_feature_size = -1U;
195 	rseq_ownership = 0;
196 }
197 
198 int32_t rseq_fallback_current_cpu(void)
199 {
200 	int32_t cpu;
201 
202 	cpu = sched_getcpu();
203 	if (cpu < 0) {
204 		perror("sched_getcpu()");
205 		abort();
206 	}
207 	return cpu;
208 }
209 
210 int32_t rseq_fallback_current_node(void)
211 {
212 	uint32_t cpu_id, node_id;
213 	int ret;
214 
215 	ret = sys_getcpu(&cpu_id, &node_id);
216 	if (ret) {
217 		perror("sys_getcpu()");
218 		return ret;
219 	}
220 	return (int32_t) node_id;
221 }
222