1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 #include <linux/compat.h> 4 #include <linux/syscalls.h> 5 #include <linux/time_namespace.h> 6 7 #include "futex.h" 8 9 /* 10 * Support for robust futexes: the kernel cleans up held futexes at 11 * thread exit time. 12 * 13 * Implementation: user-space maintains a per-thread list of locks it 14 * is holding. Upon do_exit(), the kernel carefully walks this list, 15 * and marks all locks that are owned by this thread with the 16 * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is 17 * always manipulated with the lock held, so the list is private and 18 * per-thread. Userspace also maintains a per-thread 'list_op_pending' 19 * field, to allow the kernel to clean up if the thread dies after 20 * acquiring the lock, but just before it could have added itself to 21 * the list. There can only be one such pending lock. 22 */ 23 24 /** 25 * sys_set_robust_list() - Set the robust-futex list head of a task 26 * @head: pointer to the list-head 27 * @len: length of the list-head, as userspace expects 28 */ 29 SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, 30 size_t, len) 31 { 32 /* 33 * The kernel knows only one size for now: 34 */ 35 if (unlikely(len != sizeof(*head))) 36 return -EINVAL; 37 38 current->robust_list = head; 39 40 return 0; 41 } 42 43 /** 44 * sys_get_robust_list() - Get the robust-futex list head of a task 45 * @pid: pid of the process [zero for current task] 46 * @head_ptr: pointer to a list-head pointer, the kernel fills it in 47 * @len_ptr: pointer to a length field, the kernel fills in the header size 48 */ 49 SYSCALL_DEFINE3(get_robust_list, int, pid, 50 struct robust_list_head __user * __user *, head_ptr, 51 size_t __user *, len_ptr) 52 { 53 struct robust_list_head __user *head; 54 unsigned long ret; 55 struct task_struct *p; 56 57 rcu_read_lock(); 58 59 ret = -ESRCH; 60 if (!pid) 61 p = current; 62 else { 63 p = find_task_by_vpid(pid); 64 if (!p) 65 goto err_unlock; 66 } 67 68 ret = -EPERM; 69 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) 70 goto err_unlock; 71 72 head = p->robust_list; 73 rcu_read_unlock(); 74 75 if (put_user(sizeof(*head), len_ptr)) 76 return -EFAULT; 77 return put_user(head, head_ptr); 78 79 err_unlock: 80 rcu_read_unlock(); 81 82 return ret; 83 } 84 85 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, 86 u32 __user *uaddr2, u32 val2, u32 val3) 87 { 88 int cmd = op & FUTEX_CMD_MASK; 89 unsigned int flags = 0; 90 91 if (!(op & FUTEX_PRIVATE_FLAG)) 92 flags |= FLAGS_SHARED; 93 94 if (op & FUTEX_CLOCK_REALTIME) { 95 flags |= FLAGS_CLOCKRT; 96 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI && 97 cmd != FUTEX_LOCK_PI2) 98 return -ENOSYS; 99 } 100 101 switch (cmd) { 102 case FUTEX_WAIT: 103 val3 = FUTEX_BITSET_MATCH_ANY; 104 fallthrough; 105 case FUTEX_WAIT_BITSET: 106 return futex_wait(uaddr, flags, val, timeout, val3); 107 case FUTEX_WAKE: 108 val3 = FUTEX_BITSET_MATCH_ANY; 109 fallthrough; 110 case FUTEX_WAKE_BITSET: 111 return futex_wake(uaddr, flags, val, val3); 112 case FUTEX_REQUEUE: 113 return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0); 114 case FUTEX_CMP_REQUEUE: 115 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0); 116 case FUTEX_WAKE_OP: 117 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); 118 case FUTEX_LOCK_PI: 119 flags |= FLAGS_CLOCKRT; 120 fallthrough; 121 case FUTEX_LOCK_PI2: 122 return futex_lock_pi(uaddr, flags, timeout, 0); 123 case FUTEX_UNLOCK_PI: 124 return futex_unlock_pi(uaddr, flags); 125 case FUTEX_TRYLOCK_PI: 126 return futex_lock_pi(uaddr, flags, NULL, 1); 127 case FUTEX_WAIT_REQUEUE_PI: 128 val3 = FUTEX_BITSET_MATCH_ANY; 129 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, 130 uaddr2); 131 case FUTEX_CMP_REQUEUE_PI: 132 return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); 133 } 134 return -ENOSYS; 135 } 136 137 static __always_inline bool futex_cmd_has_timeout(u32 cmd) 138 { 139 switch (cmd) { 140 case FUTEX_WAIT: 141 case FUTEX_LOCK_PI: 142 case FUTEX_LOCK_PI2: 143 case FUTEX_WAIT_BITSET: 144 case FUTEX_WAIT_REQUEUE_PI: 145 return true; 146 } 147 return false; 148 } 149 150 static __always_inline int 151 futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t) 152 { 153 if (!timespec64_valid(ts)) 154 return -EINVAL; 155 156 *t = timespec64_to_ktime(*ts); 157 if (cmd == FUTEX_WAIT) 158 *t = ktime_add_safe(ktime_get(), *t); 159 else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME)) 160 *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t); 161 return 0; 162 } 163 164 SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, 165 const struct __kernel_timespec __user *, utime, 166 u32 __user *, uaddr2, u32, val3) 167 { 168 int ret, cmd = op & FUTEX_CMD_MASK; 169 ktime_t t, *tp = NULL; 170 struct timespec64 ts; 171 172 if (utime && futex_cmd_has_timeout(cmd)) { 173 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) 174 return -EFAULT; 175 if (get_timespec64(&ts, utime)) 176 return -EFAULT; 177 ret = futex_init_timeout(cmd, op, &ts, &t); 178 if (ret) 179 return ret; 180 tp = &t; 181 } 182 183 return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); 184 } 185 186 /* Mask of available flags for each futex in futex_waitv list */ 187 #define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG) 188 189 /** 190 * futex_parse_waitv - Parse a waitv array from userspace 191 * @futexv: Kernel side list of waiters to be filled 192 * @uwaitv: Userspace list to be parsed 193 * @nr_futexes: Length of futexv 194 * 195 * Return: Error code on failure, 0 on success 196 */ 197 static int futex_parse_waitv(struct futex_vector *futexv, 198 struct futex_waitv __user *uwaitv, 199 unsigned int nr_futexes) 200 { 201 struct futex_waitv aux; 202 unsigned int i; 203 204 for (i = 0; i < nr_futexes; i++) { 205 if (copy_from_user(&aux, &uwaitv[i], sizeof(aux))) 206 return -EFAULT; 207 208 if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved) 209 return -EINVAL; 210 211 if (!(aux.flags & FUTEX_32)) 212 return -EINVAL; 213 214 futexv[i].w.flags = aux.flags; 215 futexv[i].w.val = aux.val; 216 futexv[i].w.uaddr = aux.uaddr; 217 futexv[i].q = futex_q_init; 218 } 219 220 return 0; 221 } 222 223 /** 224 * sys_futex_waitv - Wait on a list of futexes 225 * @waiters: List of futexes to wait on 226 * @nr_futexes: Length of futexv 227 * @flags: Flag for timeout (monotonic/realtime) 228 * @timeout: Optional absolute timeout. 229 * @clockid: Clock to be used for the timeout, realtime or monotonic. 230 * 231 * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes 232 * if a futex_wake() is performed at any uaddr. The syscall returns immediately 233 * if any waiter has *uaddr != val. *timeout is an optional timeout value for 234 * the operation. Each waiter has individual flags. The `flags` argument for 235 * the syscall should be used solely for specifying the timeout as realtime, if 236 * needed. Flags for private futexes, sizes, etc. should be used on the 237 * individual flags of each waiter. 238 * 239 * Returns the array index of one of the woken futexes. No further information 240 * is provided: any number of other futexes may also have been woken by the 241 * same event, and if more than one futex was woken, the retrned index may 242 * refer to any one of them. (It is not necessaryily the futex with the 243 * smallest index, nor the one most recently woken, nor...) 244 */ 245 246 SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, 247 unsigned int, nr_futexes, unsigned int, flags, 248 struct __kernel_timespec __user *, timeout, clockid_t, clockid) 249 { 250 struct hrtimer_sleeper to; 251 struct futex_vector *futexv; 252 struct timespec64 ts; 253 ktime_t time; 254 int ret; 255 256 /* This syscall supports no flags for now */ 257 if (flags) 258 return -EINVAL; 259 260 if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters) 261 return -EINVAL; 262 263 if (timeout) { 264 int flag_clkid = 0, flag_init = 0; 265 266 if (clockid == CLOCK_REALTIME) { 267 flag_clkid = FLAGS_CLOCKRT; 268 flag_init = FUTEX_CLOCK_REALTIME; 269 } 270 271 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) 272 return -EINVAL; 273 274 if (get_timespec64(&ts, timeout)) 275 return -EFAULT; 276 277 /* 278 * Since there's no opcode for futex_waitv, use 279 * FUTEX_WAIT_BITSET that uses absolute timeout as well 280 */ 281 ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); 282 if (ret) 283 return ret; 284 285 futex_setup_timer(&time, &to, flag_clkid, 0); 286 } 287 288 futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL); 289 if (!futexv) 290 return -ENOMEM; 291 292 ret = futex_parse_waitv(futexv, waiters, nr_futexes); 293 if (!ret) 294 ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL); 295 296 if (timeout) { 297 hrtimer_cancel(&to.timer); 298 destroy_hrtimer_on_stack(&to.timer); 299 } 300 301 kfree(futexv); 302 return ret; 303 } 304 305 #ifdef CONFIG_COMPAT 306 COMPAT_SYSCALL_DEFINE2(set_robust_list, 307 struct compat_robust_list_head __user *, head, 308 compat_size_t, len) 309 { 310 if (unlikely(len != sizeof(*head))) 311 return -EINVAL; 312 313 current->compat_robust_list = head; 314 315 return 0; 316 } 317 318 COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, 319 compat_uptr_t __user *, head_ptr, 320 compat_size_t __user *, len_ptr) 321 { 322 struct compat_robust_list_head __user *head; 323 unsigned long ret; 324 struct task_struct *p; 325 326 rcu_read_lock(); 327 328 ret = -ESRCH; 329 if (!pid) 330 p = current; 331 else { 332 p = find_task_by_vpid(pid); 333 if (!p) 334 goto err_unlock; 335 } 336 337 ret = -EPERM; 338 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) 339 goto err_unlock; 340 341 head = p->compat_robust_list; 342 rcu_read_unlock(); 343 344 if (put_user(sizeof(*head), len_ptr)) 345 return -EFAULT; 346 return put_user(ptr_to_compat(head), head_ptr); 347 348 err_unlock: 349 rcu_read_unlock(); 350 351 return ret; 352 } 353 #endif /* CONFIG_COMPAT */ 354 355 #ifdef CONFIG_COMPAT_32BIT_TIME 356 SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, 357 const struct old_timespec32 __user *, utime, u32 __user *, uaddr2, 358 u32, val3) 359 { 360 int ret, cmd = op & FUTEX_CMD_MASK; 361 ktime_t t, *tp = NULL; 362 struct timespec64 ts; 363 364 if (utime && futex_cmd_has_timeout(cmd)) { 365 if (get_old_timespec32(&ts, utime)) 366 return -EFAULT; 367 ret = futex_init_timeout(cmd, op, &ts, &t); 368 if (ret) 369 return ret; 370 tp = &t; 371 } 372 373 return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); 374 } 375 #endif /* CONFIG_COMPAT_32BIT_TIME */ 376 377