161e52f16SSean Christopherson // SPDX-License-Identifier: GPL-2.0-only 261e52f16SSean Christopherson #define _GNU_SOURCE /* for program_invocation_short_name */ 361e52f16SSean Christopherson #include <errno.h> 461e52f16SSean Christopherson #include <fcntl.h> 561e52f16SSean Christopherson #include <pthread.h> 661e52f16SSean Christopherson #include <sched.h> 761e52f16SSean Christopherson #include <stdio.h> 861e52f16SSean Christopherson #include <stdlib.h> 961e52f16SSean Christopherson #include <string.h> 1061e52f16SSean Christopherson #include <signal.h> 1161e52f16SSean Christopherson #include <syscall.h> 1261e52f16SSean Christopherson #include <sys/ioctl.h> 1361e52f16SSean Christopherson #include <asm/barrier.h> 1461e52f16SSean Christopherson #include <linux/atomic.h> 1561e52f16SSean Christopherson #include <linux/rseq.h> 1661e52f16SSean Christopherson #include <linux/unistd.h> 1761e52f16SSean Christopherson 1861e52f16SSean Christopherson #include "kvm_util.h" 1961e52f16SSean Christopherson #include "processor.h" 2061e52f16SSean Christopherson #include "test_util.h" 2161e52f16SSean Christopherson 2261e52f16SSean Christopherson #define VCPU_ID 0 2361e52f16SSean Christopherson 2461e52f16SSean Christopherson static __thread volatile struct rseq __rseq = { 2561e52f16SSean Christopherson .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, 2661e52f16SSean Christopherson }; 2761e52f16SSean Christopherson 2861e52f16SSean Christopherson /* 2961e52f16SSean Christopherson * Use an arbitrary, bogus signature for configuring rseq, this test does not 3061e52f16SSean Christopherson * actually enter an rseq critical section. 3161e52f16SSean Christopherson */ 3261e52f16SSean Christopherson #define RSEQ_SIG 0xdeadbeef 3361e52f16SSean Christopherson 3461e52f16SSean Christopherson /* 3561e52f16SSean Christopherson * Any bug related to task migration is likely to be timing-dependent; perform 3661e52f16SSean Christopherson * a large number of migrations to reduce the odds of a false negative. 3761e52f16SSean Christopherson */ 3861e52f16SSean Christopherson #define NR_TASK_MIGRATIONS 100000 3961e52f16SSean Christopherson 4061e52f16SSean Christopherson static pthread_t migration_thread; 4161e52f16SSean Christopherson static cpu_set_t possible_mask; 4261e52f16SSean Christopherson static bool done; 4361e52f16SSean Christopherson 4461e52f16SSean Christopherson static atomic_t seq_cnt; 4561e52f16SSean Christopherson 4661e52f16SSean Christopherson static void guest_code(void) 4761e52f16SSean Christopherson { 4861e52f16SSean Christopherson for (;;) 4961e52f16SSean Christopherson GUEST_SYNC(0); 5061e52f16SSean Christopherson } 5161e52f16SSean Christopherson 5261e52f16SSean Christopherson static void sys_rseq(int flags) 5361e52f16SSean Christopherson { 5461e52f16SSean Christopherson int r; 5561e52f16SSean Christopherson 5661e52f16SSean Christopherson r = syscall(__NR_rseq, &__rseq, sizeof(__rseq), flags, RSEQ_SIG); 5761e52f16SSean Christopherson TEST_ASSERT(!r, "rseq failed, errno = %d (%s)", errno, strerror(errno)); 5861e52f16SSean Christopherson } 5961e52f16SSean Christopherson 6061e52f16SSean Christopherson static void *migration_worker(void *ign) 6161e52f16SSean Christopherson { 6261e52f16SSean Christopherson cpu_set_t allowed_mask; 6361e52f16SSean Christopherson int r, i, nr_cpus, cpu; 6461e52f16SSean Christopherson 6561e52f16SSean Christopherson CPU_ZERO(&allowed_mask); 6661e52f16SSean Christopherson 6761e52f16SSean Christopherson nr_cpus = CPU_COUNT(&possible_mask); 6861e52f16SSean Christopherson 6961e52f16SSean Christopherson for (i = 0; i < NR_TASK_MIGRATIONS; i++) { 7061e52f16SSean Christopherson cpu = i % nr_cpus; 7161e52f16SSean Christopherson if (!CPU_ISSET(cpu, &possible_mask)) 7261e52f16SSean Christopherson continue; 7361e52f16SSean Christopherson 7461e52f16SSean Christopherson CPU_SET(cpu, &allowed_mask); 7561e52f16SSean Christopherson 7661e52f16SSean Christopherson /* 7761e52f16SSean Christopherson * Bump the sequence count twice to allow the reader to detect 7861e52f16SSean Christopherson * that a migration may have occurred in between rseq and sched 7961e52f16SSean Christopherson * CPU ID reads. An odd sequence count indicates a migration 8061e52f16SSean Christopherson * is in-progress, while a completely different count indicates 8161e52f16SSean Christopherson * a migration occurred since the count was last read. 8261e52f16SSean Christopherson */ 8361e52f16SSean Christopherson atomic_inc(&seq_cnt); 8461e52f16SSean Christopherson 8561e52f16SSean Christopherson /* 8661e52f16SSean Christopherson * Ensure the odd count is visible while sched_getcpu() isn't 8761e52f16SSean Christopherson * stable, i.e. while changing affinity is in-progress. 8861e52f16SSean Christopherson */ 8961e52f16SSean Christopherson smp_wmb(); 9061e52f16SSean Christopherson r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask); 9161e52f16SSean Christopherson TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)", 9261e52f16SSean Christopherson errno, strerror(errno)); 9361e52f16SSean Christopherson smp_wmb(); 9461e52f16SSean Christopherson atomic_inc(&seq_cnt); 9561e52f16SSean Christopherson 9661e52f16SSean Christopherson CPU_CLR(cpu, &allowed_mask); 9761e52f16SSean Christopherson 9861e52f16SSean Christopherson /* 9961e52f16SSean Christopherson * Wait 1-10us before proceeding to the next iteration and more 10061e52f16SSean Christopherson * specifically, before bumping seq_cnt again. A delay is 10161e52f16SSean Christopherson * needed on three fronts: 10261e52f16SSean Christopherson * 10361e52f16SSean Christopherson * 1. To allow sched_setaffinity() to prompt migration before 10461e52f16SSean Christopherson * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME 10561e52f16SSean Christopherson * (or TIF_NEED_RESCHED, which indirectly leads to handling 10661e52f16SSean Christopherson * NOTIFY_RESUME) is handled in KVM context. 10761e52f16SSean Christopherson * 10861e52f16SSean Christopherson * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters 10961e52f16SSean Christopherson * the guest, the guest will trigger a IO/MMIO exit all the 11061e52f16SSean Christopherson * way to userspace and the TIF flags will be handled by 11161e52f16SSean Christopherson * the generic "exit to userspace" logic, not by KVM. The 11261e52f16SSean Christopherson * exit to userspace is necessary to give the test a chance 11361e52f16SSean Christopherson * to check the rseq CPU ID (see #2). 11461e52f16SSean Christopherson * 11561e52f16SSean Christopherson * Alternatively, guest_code() could include an instruction 11661e52f16SSean Christopherson * to trigger an exit that is handled by KVM, but any such 11761e52f16SSean Christopherson * exit requires architecture specific code. 11861e52f16SSean Christopherson * 11961e52f16SSean Christopherson * 2. To let ioctl(KVM_RUN) make its way back to the test 12061e52f16SSean Christopherson * before the next round of migration. The test's check on 12161e52f16SSean Christopherson * the rseq CPU ID must wait for migration to complete in 12261e52f16SSean Christopherson * order to avoid false positive, thus any kernel rseq bug 12361e52f16SSean Christopherson * will be missed if the next migration starts before the 12461e52f16SSean Christopherson * check completes. 12561e52f16SSean Christopherson * 12661e52f16SSean Christopherson * 3. To ensure the read-side makes efficient forward progress, 12761e52f16SSean Christopherson * e.g. if sched_getcpu() involves a syscall. Stalling the 12861e52f16SSean Christopherson * read-side means the test will spend more time waiting for 12961e52f16SSean Christopherson * sched_getcpu() to stabilize and less time trying to hit 13061e52f16SSean Christopherson * the timing-dependent bug. 13161e52f16SSean Christopherson * 13261e52f16SSean Christopherson * Because any bug in this area is likely to be timing-dependent, 13361e52f16SSean Christopherson * run with a range of delays at 1us intervals from 1us to 10us 13461e52f16SSean Christopherson * as a best effort to avoid tuning the test to the point where 13561e52f16SSean Christopherson * it can hit _only_ the original bug and not detect future 13661e52f16SSean Christopherson * regressions. 13761e52f16SSean Christopherson * 13861e52f16SSean Christopherson * The original bug can reproduce with a delay up to ~500us on 13961e52f16SSean Christopherson * x86-64, but starts to require more iterations to reproduce 14061e52f16SSean Christopherson * as the delay creeps above ~10us, and the average runtime of 14161e52f16SSean Christopherson * each iteration obviously increases as well. Cap the delay 14261e52f16SSean Christopherson * at 10us to keep test runtime reasonable while minimizing 14361e52f16SSean Christopherson * potential coverage loss. 14461e52f16SSean Christopherson * 14561e52f16SSean Christopherson * The lower bound for reproducing the bug is likely below 1us, 14661e52f16SSean Christopherson * e.g. failures occur on x86-64 with nanosleep(0), but at that 14761e52f16SSean Christopherson * point the overhead of the syscall likely dominates the delay. 14861e52f16SSean Christopherson * Use usleep() for simplicity and to avoid unnecessary kernel 14961e52f16SSean Christopherson * dependencies. 15061e52f16SSean Christopherson */ 15161e52f16SSean Christopherson usleep((i % 10) + 1); 15261e52f16SSean Christopherson } 15361e52f16SSean Christopherson done = true; 15461e52f16SSean Christopherson return NULL; 15561e52f16SSean Christopherson } 15661e52f16SSean Christopherson 15761e52f16SSean Christopherson int main(int argc, char *argv[]) 15861e52f16SSean Christopherson { 15961e52f16SSean Christopherson int r, i, snapshot; 16061e52f16SSean Christopherson struct kvm_vm *vm; 16161e52f16SSean Christopherson u32 cpu, rseq_cpu; 16261e52f16SSean Christopherson 16361e52f16SSean Christopherson /* Tell stdout not to buffer its content */ 16461e52f16SSean Christopherson setbuf(stdout, NULL); 16561e52f16SSean Christopherson 16661e52f16SSean Christopherson r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); 16761e52f16SSean Christopherson TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, 16861e52f16SSean Christopherson strerror(errno)); 16961e52f16SSean Christopherson 17061e52f16SSean Christopherson if (CPU_COUNT(&possible_mask) < 2) { 17161e52f16SSean Christopherson print_skip("Only one CPU, task migration not possible\n"); 17261e52f16SSean Christopherson exit(KSFT_SKIP); 17361e52f16SSean Christopherson } 17461e52f16SSean Christopherson 17561e52f16SSean Christopherson sys_rseq(0); 17661e52f16SSean Christopherson 17761e52f16SSean Christopherson /* 17861e52f16SSean Christopherson * Create and run a dummy VM that immediately exits to userspace via 17961e52f16SSean Christopherson * GUEST_SYNC, while concurrently migrating the process by setting its 18061e52f16SSean Christopherson * CPU affinity. 18161e52f16SSean Christopherson */ 18261e52f16SSean Christopherson vm = vm_create_default(VCPU_ID, 0, guest_code); 183*fbf094ceSOliver Upton ucall_init(vm, NULL); 18461e52f16SSean Christopherson 18561e52f16SSean Christopherson pthread_create(&migration_thread, NULL, migration_worker, 0); 18661e52f16SSean Christopherson 18761e52f16SSean Christopherson for (i = 0; !done; i++) { 18861e52f16SSean Christopherson vcpu_run(vm, VCPU_ID); 18961e52f16SSean Christopherson TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, 19061e52f16SSean Christopherson "Guest failed?"); 19161e52f16SSean Christopherson 19261e52f16SSean Christopherson /* 19361e52f16SSean Christopherson * Verify rseq's CPU matches sched's CPU. Ensure migration 19461e52f16SSean Christopherson * doesn't occur between sched_getcpu() and reading the rseq 19561e52f16SSean Christopherson * cpu_id by rereading both if the sequence count changes, or 19661e52f16SSean Christopherson * if the count is odd (migration in-progress). 19761e52f16SSean Christopherson */ 19861e52f16SSean Christopherson do { 19961e52f16SSean Christopherson /* 20061e52f16SSean Christopherson * Drop bit 0 to force a mismatch if the count is odd, 20161e52f16SSean Christopherson * i.e. if a migration is in-progress. 20261e52f16SSean Christopherson */ 20361e52f16SSean Christopherson snapshot = atomic_read(&seq_cnt) & ~1; 20461e52f16SSean Christopherson 20561e52f16SSean Christopherson /* 20661e52f16SSean Christopherson * Ensure reading sched_getcpu() and rseq.cpu_id 20761e52f16SSean Christopherson * complete in a single "no migration" window, i.e. are 20861e52f16SSean Christopherson * not reordered across the seq_cnt reads. 20961e52f16SSean Christopherson */ 21061e52f16SSean Christopherson smp_rmb(); 21161e52f16SSean Christopherson cpu = sched_getcpu(); 21261e52f16SSean Christopherson rseq_cpu = READ_ONCE(__rseq.cpu_id); 21361e52f16SSean Christopherson smp_rmb(); 21461e52f16SSean Christopherson } while (snapshot != atomic_read(&seq_cnt)); 21561e52f16SSean Christopherson 21661e52f16SSean Christopherson TEST_ASSERT(rseq_cpu == cpu, 21761e52f16SSean Christopherson "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu); 21861e52f16SSean Christopherson } 21961e52f16SSean Christopherson 22061e52f16SSean Christopherson /* 22161e52f16SSean Christopherson * Sanity check that the test was able to enter the guest a reasonable 22261e52f16SSean Christopherson * number of times, e.g. didn't get stalled too often/long waiting for 22361e52f16SSean Christopherson * sched_getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a 22461e52f16SSean Christopherson * fairly conservative ratio on x86-64, which can do _more_ KVM_RUNs 22561e52f16SSean Christopherson * than migrations given the 1us+ delay in the migration task. 22661e52f16SSean Christopherson */ 22761e52f16SSean Christopherson TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), 22861e52f16SSean Christopherson "Only performed %d KVM_RUNs, task stalled too much?\n", i); 22961e52f16SSean Christopherson 23061e52f16SSean Christopherson pthread_join(migration_thread, NULL); 23161e52f16SSean Christopherson 23261e52f16SSean Christopherson kvm_vm_free(vm); 23361e52f16SSean Christopherson 23461e52f16SSean Christopherson sys_rseq(RSEQ_FLAG_UNREGISTER); 23561e52f16SSean Christopherson 23661e52f16SSean Christopherson return 0; 23761e52f16SSean Christopherson } 238