1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  * rseq.c
4  *
5  * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; only
10  * version 2.1 of the License.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  */
17 
18 #define _GNU_SOURCE
19 #include <errno.h>
20 #include <sched.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <syscall.h>
26 #include <assert.h>
27 #include <signal.h>
28 #include <limits.h>
29 #include <dlfcn.h>
30 #include <stddef.h>
31 #include <sys/auxv.h>
32 #include <linux/auxvec.h>
33 
34 #include "../kselftest.h"
35 #include "rseq.h"
36 
37 /*
38  * Define weak versions to play nice with binaries that are statically linked
39  * against a libc that doesn't support registering its own rseq.
40  */
41 __weak ptrdiff_t __rseq_offset;
42 __weak unsigned int __rseq_size;
43 __weak unsigned int __rseq_flags;
44 
45 static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
46 static const unsigned int *libc_rseq_size_p = &__rseq_size;
47 static const unsigned int *libc_rseq_flags_p = &__rseq_flags;
48 
49 /* Offset from the thread pointer to the rseq area. */
50 ptrdiff_t rseq_offset;
51 
52 /*
53  * Size of the registered rseq area. 0 if the registration was
54  * unsuccessful.
55  */
56 unsigned int rseq_size = -1U;
57 
58 /* Flags used during rseq registration.  */
59 unsigned int rseq_flags;
60 
61 /*
62  * rseq feature size supported by the kernel. 0 if the registration was
63  * unsuccessful.
64  */
65 unsigned int rseq_feature_size = -1U;
66 
67 static int rseq_ownership;
68 static int rseq_reg_success;	/* At least one rseq registration has succeded. */
69 
70 /* Allocate a large area for the TLS. */
71 #define RSEQ_THREAD_AREA_ALLOC_SIZE	1024
72 
73 /* Original struct rseq feature size is 20 bytes. */
74 #define ORIG_RSEQ_FEATURE_SIZE		20
75 
76 /* Original struct rseq allocation size is 32 bytes. */
77 #define ORIG_RSEQ_ALLOC_SIZE		32
78 
79 static
80 __thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
81 	.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
82 };
83 
84 static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
85 		    int flags, uint32_t sig)
86 {
87 	return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
88 }
89 
90 static int sys_getcpu(unsigned *cpu, unsigned *node)
91 {
92 	return syscall(__NR_getcpu, cpu, node, NULL);
93 }
94 
95 int rseq_available(void)
96 {
97 	int rc;
98 
99 	rc = sys_rseq(NULL, 0, 0, 0);
100 	if (rc != -1)
101 		abort();
102 	switch (errno) {
103 	case ENOSYS:
104 		return 0;
105 	case EINVAL:
106 		return 1;
107 	default:
108 		abort();
109 	}
110 }
111 
112 int rseq_register_current_thread(void)
113 {
114 	int rc;
115 
116 	if (!rseq_ownership) {
117 		/* Treat libc's ownership as a successful registration. */
118 		return 0;
119 	}
120 	rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
121 	if (rc) {
122 		if (RSEQ_READ_ONCE(rseq_reg_success)) {
123 			/* Incoherent success/failure within process. */
124 			abort();
125 		}
126 		return -1;
127 	}
128 	assert(rseq_current_cpu_raw() >= 0);
129 	RSEQ_WRITE_ONCE(rseq_reg_success, 1);
130 	return 0;
131 }
132 
133 int rseq_unregister_current_thread(void)
134 {
135 	int rc;
136 
137 	if (!rseq_ownership) {
138 		/* Treat libc's ownership as a successful unregistration. */
139 		return 0;
140 	}
141 	rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
142 	if (rc)
143 		return -1;
144 	return 0;
145 }
146 
147 static
148 unsigned int get_rseq_feature_size(void)
149 {
150 	unsigned long auxv_rseq_feature_size, auxv_rseq_align;
151 
152 	auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
153 	assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
154 
155 	auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
156 	assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
157 	if (auxv_rseq_feature_size)
158 		return auxv_rseq_feature_size;
159 	else
160 		return ORIG_RSEQ_FEATURE_SIZE;
161 }
162 
163 static __attribute__((constructor))
164 void rseq_init(void)
165 {
166 	/*
167 	 * If the libc's registered rseq size isn't already valid, it may be
168 	 * because the binary is dynamically linked and not necessarily due to
169 	 * libc not having registered a restartable sequence.  Try to find the
170 	 * symbols if that's the case.
171 	 */
172 	if (!*libc_rseq_size_p) {
173 		libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
174 		libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
175 		libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
176 	}
177 	if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
178 			*libc_rseq_size_p != 0) {
179 		/* rseq registration owned by glibc */
180 		rseq_offset = *libc_rseq_offset_p;
181 		rseq_size = *libc_rseq_size_p;
182 		rseq_flags = *libc_rseq_flags_p;
183 		rseq_feature_size = get_rseq_feature_size();
184 		if (rseq_feature_size > rseq_size)
185 			rseq_feature_size = rseq_size;
186 		return;
187 	}
188 	rseq_ownership = 1;
189 	if (!rseq_available()) {
190 		rseq_size = 0;
191 		rseq_feature_size = 0;
192 		return;
193 	}
194 	rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
195 	rseq_flags = 0;
196 	rseq_feature_size = get_rseq_feature_size();
197 	if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE)
198 		rseq_size = ORIG_RSEQ_ALLOC_SIZE;
199 	else
200 		rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE;
201 }
202 
203 static __attribute__((destructor))
204 void rseq_exit(void)
205 {
206 	if (!rseq_ownership)
207 		return;
208 	rseq_offset = 0;
209 	rseq_size = -1U;
210 	rseq_feature_size = -1U;
211 	rseq_ownership = 0;
212 }
213 
214 int32_t rseq_fallback_current_cpu(void)
215 {
216 	int32_t cpu;
217 
218 	cpu = sched_getcpu();
219 	if (cpu < 0) {
220 		perror("sched_getcpu()");
221 		abort();
222 	}
223 	return cpu;
224 }
225 
226 int32_t rseq_fallback_current_node(void)
227 {
228 	uint32_t cpu_id, node_id;
229 	int ret;
230 
231 	ret = sys_getcpu(&cpu_id, &node_id);
232 	if (ret) {
233 		perror("sys_getcpu()");
234 		return ret;
235 	}
236 	return (int32_t) node_id;
237 }
238