1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * rseq.c 4 * 5 * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> 6 * 7 * This library is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; only 10 * version 2.1 of the License. 11 * 12 * This library is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 */ 17 18 #define _GNU_SOURCE 19 #include <errno.h> 20 #include <sched.h> 21 #include <stdio.h> 22 #include <stdlib.h> 23 #include <string.h> 24 #include <unistd.h> 25 #include <syscall.h> 26 #include <assert.h> 27 #include <signal.h> 28 #include <limits.h> 29 #include <dlfcn.h> 30 #include <stddef.h> 31 #include <sys/auxv.h> 32 #include <linux/auxvec.h> 33 34 #include "../kselftest.h" 35 #include "rseq.h" 36 37 /* 38 * Define weak versions to play nice with binaries that are statically linked 39 * against a libc that doesn't support registering its own rseq. 40 */ 41 __weak ptrdiff_t __rseq_offset; 42 __weak unsigned int __rseq_size; 43 __weak unsigned int __rseq_flags; 44 45 static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset; 46 static const unsigned int *libc_rseq_size_p = &__rseq_size; 47 static const unsigned int *libc_rseq_flags_p = &__rseq_flags; 48 49 /* Offset from the thread pointer to the rseq area. */ 50 ptrdiff_t rseq_offset; 51 52 /* 53 * Size of the registered rseq area. 0 if the registration was 54 * unsuccessful. 55 */ 56 unsigned int rseq_size = -1U; 57 58 /* Flags used during rseq registration. */ 59 unsigned int rseq_flags; 60 61 /* 62 * rseq feature size supported by the kernel. 0 if the registration was 63 * unsuccessful. 64 */ 65 unsigned int rseq_feature_size = -1U; 66 67 static int rseq_ownership; 68 static int rseq_reg_success; /* At least one rseq registration has succeded. */ 69 70 /* Allocate a large area for the TLS. */ 71 #define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 72 73 /* Original struct rseq feature size is 20 bytes. */ 74 #define ORIG_RSEQ_FEATURE_SIZE 20 75 76 /* Original struct rseq allocation size is 32 bytes. */ 77 #define ORIG_RSEQ_ALLOC_SIZE 32 78 79 static 80 __thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = { 81 .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, 82 }; 83 84 static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, 85 int flags, uint32_t sig) 86 { 87 return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); 88 } 89 90 static int sys_getcpu(unsigned *cpu, unsigned *node) 91 { 92 return syscall(__NR_getcpu, cpu, node, NULL); 93 } 94 95 int rseq_available(void) 96 { 97 int rc; 98 99 rc = sys_rseq(NULL, 0, 0, 0); 100 if (rc != -1) 101 abort(); 102 switch (errno) { 103 case ENOSYS: 104 return 0; 105 case EINVAL: 106 return 1; 107 default: 108 abort(); 109 } 110 } 111 112 int rseq_register_current_thread(void) 113 { 114 int rc; 115 116 if (!rseq_ownership) { 117 /* Treat libc's ownership as a successful registration. */ 118 return 0; 119 } 120 rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); 121 if (rc) { 122 if (RSEQ_READ_ONCE(rseq_reg_success)) { 123 /* Incoherent success/failure within process. */ 124 abort(); 125 } 126 return -1; 127 } 128 assert(rseq_current_cpu_raw() >= 0); 129 RSEQ_WRITE_ONCE(rseq_reg_success, 1); 130 return 0; 131 } 132 133 int rseq_unregister_current_thread(void) 134 { 135 int rc; 136 137 if (!rseq_ownership) { 138 /* Treat libc's ownership as a successful unregistration. */ 139 return 0; 140 } 141 rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); 142 if (rc) 143 return -1; 144 return 0; 145 } 146 147 static 148 unsigned int get_rseq_feature_size(void) 149 { 150 unsigned long auxv_rseq_feature_size, auxv_rseq_align; 151 152 auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); 153 assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); 154 155 auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); 156 assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); 157 if (auxv_rseq_feature_size) 158 return auxv_rseq_feature_size; 159 else 160 return ORIG_RSEQ_FEATURE_SIZE; 161 } 162 163 static __attribute__((constructor)) 164 void rseq_init(void) 165 { 166 /* 167 * If the libc's registered rseq size isn't already valid, it may be 168 * because the binary is dynamically linked and not necessarily due to 169 * libc not having registered a restartable sequence. Try to find the 170 * symbols if that's the case. 171 */ 172 if (!*libc_rseq_size_p) { 173 libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); 174 libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); 175 libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); 176 } 177 if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && 178 *libc_rseq_size_p != 0) { 179 /* rseq registration owned by glibc */ 180 rseq_offset = *libc_rseq_offset_p; 181 rseq_size = *libc_rseq_size_p; 182 rseq_flags = *libc_rseq_flags_p; 183 rseq_feature_size = get_rseq_feature_size(); 184 if (rseq_feature_size > rseq_size) 185 rseq_feature_size = rseq_size; 186 return; 187 } 188 rseq_ownership = 1; 189 if (!rseq_available()) { 190 rseq_size = 0; 191 rseq_feature_size = 0; 192 return; 193 } 194 rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); 195 rseq_flags = 0; 196 rseq_feature_size = get_rseq_feature_size(); 197 if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) 198 rseq_size = ORIG_RSEQ_ALLOC_SIZE; 199 else 200 rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; 201 } 202 203 static __attribute__((destructor)) 204 void rseq_exit(void) 205 { 206 if (!rseq_ownership) 207 return; 208 rseq_offset = 0; 209 rseq_size = -1U; 210 rseq_feature_size = -1U; 211 rseq_ownership = 0; 212 } 213 214 int32_t rseq_fallback_current_cpu(void) 215 { 216 int32_t cpu; 217 218 cpu = sched_getcpu(); 219 if (cpu < 0) { 220 perror("sched_getcpu()"); 221 abort(); 222 } 223 return cpu; 224 } 225 226 int32_t rseq_fallback_current_node(void) 227 { 228 uint32_t cpu_id, node_id; 229 int ret; 230 231 ret = sys_getcpu(&cpu_id, &node_id); 232 if (ret) { 233 perror("sys_getcpu()"); 234 return ret; 235 } 236 return (int32_t) node_id; 237 } 238