161e52f16SSean Christopherson // SPDX-License-Identifier: GPL-2.0-only 261e52f16SSean Christopherson #define _GNU_SOURCE /* for program_invocation_short_name */ 361e52f16SSean Christopherson #include <errno.h> 461e52f16SSean Christopherson #include <fcntl.h> 561e52f16SSean Christopherson #include <pthread.h> 661e52f16SSean Christopherson #include <sched.h> 761e52f16SSean Christopherson #include <stdio.h> 861e52f16SSean Christopherson #include <stdlib.h> 961e52f16SSean Christopherson #include <string.h> 1061e52f16SSean Christopherson #include <signal.h> 1161e52f16SSean Christopherson #include <syscall.h> 1261e52f16SSean Christopherson #include <sys/ioctl.h> 137b0035eaSSean Christopherson #include <sys/sysinfo.h> 1461e52f16SSean Christopherson #include <asm/barrier.h> 1561e52f16SSean Christopherson #include <linux/atomic.h> 1661e52f16SSean Christopherson #include <linux/rseq.h> 1761e52f16SSean Christopherson #include <linux/unistd.h> 1861e52f16SSean Christopherson 1961e52f16SSean Christopherson #include "kvm_util.h" 2061e52f16SSean Christopherson #include "processor.h" 2161e52f16SSean Christopherson #include "test_util.h" 2261e52f16SSean Christopherson 2361e52f16SSean Christopherson static __thread volatile struct rseq __rseq = { 2461e52f16SSean Christopherson .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, 2561e52f16SSean Christopherson }; 2661e52f16SSean Christopherson 2761e52f16SSean Christopherson /* 2861e52f16SSean Christopherson * Use an arbitrary, bogus signature for configuring rseq, this test does not 2961e52f16SSean Christopherson * actually enter an rseq critical section. 3061e52f16SSean Christopherson */ 3161e52f16SSean Christopherson #define RSEQ_SIG 0xdeadbeef 3261e52f16SSean Christopherson 3361e52f16SSean Christopherson /* 3461e52f16SSean Christopherson * Any bug related to task migration is likely to be timing-dependent; perform 3561e52f16SSean Christopherson * a large number of migrations to reduce the odds of a false negative. 3661e52f16SSean Christopherson */ 3761e52f16SSean Christopherson #define NR_TASK_MIGRATIONS 100000 3861e52f16SSean Christopherson 3961e52f16SSean Christopherson static pthread_t migration_thread; 4061e52f16SSean Christopherson static cpu_set_t possible_mask; 417b0035eaSSean Christopherson static int min_cpu, max_cpu; 4261e52f16SSean Christopherson static bool done; 4361e52f16SSean Christopherson 4461e52f16SSean Christopherson static atomic_t seq_cnt; 4561e52f16SSean Christopherson 4661e52f16SSean Christopherson static void guest_code(void) 4761e52f16SSean Christopherson { 4861e52f16SSean Christopherson for (;;) 4961e52f16SSean Christopherson GUEST_SYNC(0); 5061e52f16SSean Christopherson } 5161e52f16SSean Christopherson 5261e52f16SSean Christopherson static void sys_rseq(int flags) 5361e52f16SSean Christopherson { 5461e52f16SSean Christopherson int r; 5561e52f16SSean Christopherson 5661e52f16SSean Christopherson r = syscall(__NR_rseq, &__rseq, sizeof(__rseq), flags, RSEQ_SIG); 5761e52f16SSean Christopherson TEST_ASSERT(!r, "rseq failed, errno = %d (%s)", errno, strerror(errno)); 5861e52f16SSean Christopherson } 5961e52f16SSean Christopherson 607b0035eaSSean Christopherson static int next_cpu(int cpu) 617b0035eaSSean Christopherson { 627b0035eaSSean Christopherson /* 637b0035eaSSean Christopherson * Advance to the next CPU, skipping those that weren't in the original 647b0035eaSSean Christopherson * affinity set. Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's 657b0035eaSSean Christopherson * data storage is considered as opaque. Note, if this task is pinned 667b0035eaSSean Christopherson * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will 677b0035eaSSean Christopherson * burn a lot cycles and the test will take longer than normal to 687b0035eaSSean Christopherson * complete. 697b0035eaSSean Christopherson */ 707b0035eaSSean Christopherson do { 717b0035eaSSean Christopherson cpu++; 727b0035eaSSean Christopherson if (cpu > max_cpu) { 737b0035eaSSean Christopherson cpu = min_cpu; 747b0035eaSSean Christopherson TEST_ASSERT(CPU_ISSET(cpu, &possible_mask), 757b0035eaSSean Christopherson "Min CPU = %d must always be usable", cpu); 767b0035eaSSean Christopherson break; 777b0035eaSSean Christopherson } 787b0035eaSSean Christopherson } while (!CPU_ISSET(cpu, &possible_mask)); 797b0035eaSSean Christopherson 807b0035eaSSean Christopherson return cpu; 817b0035eaSSean Christopherson } 827b0035eaSSean Christopherson 8361e52f16SSean Christopherson static void *migration_worker(void *ign) 8461e52f16SSean Christopherson { 8561e52f16SSean Christopherson cpu_set_t allowed_mask; 867b0035eaSSean Christopherson int r, i, cpu; 8761e52f16SSean Christopherson 8861e52f16SSean Christopherson CPU_ZERO(&allowed_mask); 8961e52f16SSean Christopherson 907b0035eaSSean Christopherson for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) { 9161e52f16SSean Christopherson CPU_SET(cpu, &allowed_mask); 9261e52f16SSean Christopherson 9361e52f16SSean Christopherson /* 9461e52f16SSean Christopherson * Bump the sequence count twice to allow the reader to detect 9561e52f16SSean Christopherson * that a migration may have occurred in between rseq and sched 9661e52f16SSean Christopherson * CPU ID reads. An odd sequence count indicates a migration 9761e52f16SSean Christopherson * is in-progress, while a completely different count indicates 9861e52f16SSean Christopherson * a migration occurred since the count was last read. 9961e52f16SSean Christopherson */ 10061e52f16SSean Christopherson atomic_inc(&seq_cnt); 10161e52f16SSean Christopherson 10261e52f16SSean Christopherson /* 10361e52f16SSean Christopherson * Ensure the odd count is visible while sched_getcpu() isn't 10461e52f16SSean Christopherson * stable, i.e. while changing affinity is in-progress. 10561e52f16SSean Christopherson */ 10661e52f16SSean Christopherson smp_wmb(); 10761e52f16SSean Christopherson r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask); 10861e52f16SSean Christopherson TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)", 10961e52f16SSean Christopherson errno, strerror(errno)); 11061e52f16SSean Christopherson smp_wmb(); 11161e52f16SSean Christopherson atomic_inc(&seq_cnt); 11261e52f16SSean Christopherson 11361e52f16SSean Christopherson CPU_CLR(cpu, &allowed_mask); 11461e52f16SSean Christopherson 11561e52f16SSean Christopherson /* 11661e52f16SSean Christopherson * Wait 1-10us before proceeding to the next iteration and more 11761e52f16SSean Christopherson * specifically, before bumping seq_cnt again. A delay is 11861e52f16SSean Christopherson * needed on three fronts: 11961e52f16SSean Christopherson * 12061e52f16SSean Christopherson * 1. To allow sched_setaffinity() to prompt migration before 12161e52f16SSean Christopherson * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME 12261e52f16SSean Christopherson * (or TIF_NEED_RESCHED, which indirectly leads to handling 12361e52f16SSean Christopherson * NOTIFY_RESUME) is handled in KVM context. 12461e52f16SSean Christopherson * 12561e52f16SSean Christopherson * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters 12661e52f16SSean Christopherson * the guest, the guest will trigger a IO/MMIO exit all the 12761e52f16SSean Christopherson * way to userspace and the TIF flags will be handled by 12861e52f16SSean Christopherson * the generic "exit to userspace" logic, not by KVM. The 12961e52f16SSean Christopherson * exit to userspace is necessary to give the test a chance 13061e52f16SSean Christopherson * to check the rseq CPU ID (see #2). 13161e52f16SSean Christopherson * 13261e52f16SSean Christopherson * Alternatively, guest_code() could include an instruction 13361e52f16SSean Christopherson * to trigger an exit that is handled by KVM, but any such 13461e52f16SSean Christopherson * exit requires architecture specific code. 13561e52f16SSean Christopherson * 13661e52f16SSean Christopherson * 2. To let ioctl(KVM_RUN) make its way back to the test 13761e52f16SSean Christopherson * before the next round of migration. The test's check on 13861e52f16SSean Christopherson * the rseq CPU ID must wait for migration to complete in 13961e52f16SSean Christopherson * order to avoid false positive, thus any kernel rseq bug 14061e52f16SSean Christopherson * will be missed if the next migration starts before the 14161e52f16SSean Christopherson * check completes. 14261e52f16SSean Christopherson * 14361e52f16SSean Christopherson * 3. To ensure the read-side makes efficient forward progress, 14461e52f16SSean Christopherson * e.g. if sched_getcpu() involves a syscall. Stalling the 14561e52f16SSean Christopherson * read-side means the test will spend more time waiting for 14661e52f16SSean Christopherson * sched_getcpu() to stabilize and less time trying to hit 14761e52f16SSean Christopherson * the timing-dependent bug. 14861e52f16SSean Christopherson * 14961e52f16SSean Christopherson * Because any bug in this area is likely to be timing-dependent, 15061e52f16SSean Christopherson * run with a range of delays at 1us intervals from 1us to 10us 15161e52f16SSean Christopherson * as a best effort to avoid tuning the test to the point where 15261e52f16SSean Christopherson * it can hit _only_ the original bug and not detect future 15361e52f16SSean Christopherson * regressions. 15461e52f16SSean Christopherson * 15561e52f16SSean Christopherson * The original bug can reproduce with a delay up to ~500us on 15661e52f16SSean Christopherson * x86-64, but starts to require more iterations to reproduce 15761e52f16SSean Christopherson * as the delay creeps above ~10us, and the average runtime of 15861e52f16SSean Christopherson * each iteration obviously increases as well. Cap the delay 15961e52f16SSean Christopherson * at 10us to keep test runtime reasonable while minimizing 16061e52f16SSean Christopherson * potential coverage loss. 16161e52f16SSean Christopherson * 16261e52f16SSean Christopherson * The lower bound for reproducing the bug is likely below 1us, 16361e52f16SSean Christopherson * e.g. failures occur on x86-64 with nanosleep(0), but at that 16461e52f16SSean Christopherson * point the overhead of the syscall likely dominates the delay. 16561e52f16SSean Christopherson * Use usleep() for simplicity and to avoid unnecessary kernel 16661e52f16SSean Christopherson * dependencies. 16761e52f16SSean Christopherson */ 16861e52f16SSean Christopherson usleep((i % 10) + 1); 16961e52f16SSean Christopherson } 17061e52f16SSean Christopherson done = true; 17161e52f16SSean Christopherson return NULL; 17261e52f16SSean Christopherson } 17361e52f16SSean Christopherson 1747b0035eaSSean Christopherson static int calc_min_max_cpu(void) 1757b0035eaSSean Christopherson { 1767b0035eaSSean Christopherson int i, cnt, nproc; 1777b0035eaSSean Christopherson 1787b0035eaSSean Christopherson if (CPU_COUNT(&possible_mask) < 2) 1797b0035eaSSean Christopherson return -EINVAL; 1807b0035eaSSean Christopherson 1817b0035eaSSean Christopherson /* 1827b0035eaSSean Christopherson * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that 1837b0035eaSSean Christopherson * this task is affined to in order to reduce the time spent querying 1847b0035eaSSean Christopherson * unusable CPUs, e.g. if this task is pinned to a small percentage of 1857b0035eaSSean Christopherson * total CPUs. 1867b0035eaSSean Christopherson */ 1877b0035eaSSean Christopherson nproc = get_nprocs_conf(); 1887b0035eaSSean Christopherson min_cpu = -1; 1897b0035eaSSean Christopherson max_cpu = -1; 1907b0035eaSSean Christopherson cnt = 0; 1917b0035eaSSean Christopherson 1927b0035eaSSean Christopherson for (i = 0; i < nproc; i++) { 1937b0035eaSSean Christopherson if (!CPU_ISSET(i, &possible_mask)) 1947b0035eaSSean Christopherson continue; 1957b0035eaSSean Christopherson if (min_cpu == -1) 1967b0035eaSSean Christopherson min_cpu = i; 1977b0035eaSSean Christopherson max_cpu = i; 1987b0035eaSSean Christopherson cnt++; 1997b0035eaSSean Christopherson } 2007b0035eaSSean Christopherson 2017b0035eaSSean Christopherson return (cnt < 2) ? -EINVAL : 0; 2027b0035eaSSean Christopherson } 2037b0035eaSSean Christopherson 20461e52f16SSean Christopherson int main(int argc, char *argv[]) 20561e52f16SSean Christopherson { 20661e52f16SSean Christopherson int r, i, snapshot; 20761e52f16SSean Christopherson struct kvm_vm *vm; 208*2494a6d8SSean Christopherson struct kvm_vcpu *vcpu; 20961e52f16SSean Christopherson u32 cpu, rseq_cpu; 21061e52f16SSean Christopherson 21161e52f16SSean Christopherson /* Tell stdout not to buffer its content */ 21261e52f16SSean Christopherson setbuf(stdout, NULL); 21361e52f16SSean Christopherson 21461e52f16SSean Christopherson r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); 21561e52f16SSean Christopherson TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, 21661e52f16SSean Christopherson strerror(errno)); 21761e52f16SSean Christopherson 2187b0035eaSSean Christopherson if (calc_min_max_cpu()) { 2197b0035eaSSean Christopherson print_skip("Only one usable CPU, task migration not possible"); 22061e52f16SSean Christopherson exit(KSFT_SKIP); 22161e52f16SSean Christopherson } 22261e52f16SSean Christopherson 22361e52f16SSean Christopherson sys_rseq(0); 22461e52f16SSean Christopherson 22561e52f16SSean Christopherson /* 22661e52f16SSean Christopherson * Create and run a dummy VM that immediately exits to userspace via 22761e52f16SSean Christopherson * GUEST_SYNC, while concurrently migrating the process by setting its 22861e52f16SSean Christopherson * CPU affinity. 22961e52f16SSean Christopherson */ 230*2494a6d8SSean Christopherson vm = vm_create_with_one_vcpu(&vcpu, guest_code); 231fbf094ceSOliver Upton ucall_init(vm, NULL); 23261e52f16SSean Christopherson 23361e52f16SSean Christopherson pthread_create(&migration_thread, NULL, migration_worker, 0); 23461e52f16SSean Christopherson 23561e52f16SSean Christopherson for (i = 0; !done; i++) { 236*2494a6d8SSean Christopherson vcpu_run(vm, vcpu->id); 237*2494a6d8SSean Christopherson TEST_ASSERT(get_ucall(vm, vcpu->id, NULL) == UCALL_SYNC, 23861e52f16SSean Christopherson "Guest failed?"); 23961e52f16SSean Christopherson 24061e52f16SSean Christopherson /* 24161e52f16SSean Christopherson * Verify rseq's CPU matches sched's CPU. Ensure migration 24261e52f16SSean Christopherson * doesn't occur between sched_getcpu() and reading the rseq 24361e52f16SSean Christopherson * cpu_id by rereading both if the sequence count changes, or 24461e52f16SSean Christopherson * if the count is odd (migration in-progress). 24561e52f16SSean Christopherson */ 24661e52f16SSean Christopherson do { 24761e52f16SSean Christopherson /* 24861e52f16SSean Christopherson * Drop bit 0 to force a mismatch if the count is odd, 24961e52f16SSean Christopherson * i.e. if a migration is in-progress. 25061e52f16SSean Christopherson */ 25161e52f16SSean Christopherson snapshot = atomic_read(&seq_cnt) & ~1; 25261e52f16SSean Christopherson 25361e52f16SSean Christopherson /* 25461e52f16SSean Christopherson * Ensure reading sched_getcpu() and rseq.cpu_id 25561e52f16SSean Christopherson * complete in a single "no migration" window, i.e. are 25661e52f16SSean Christopherson * not reordered across the seq_cnt reads. 25761e52f16SSean Christopherson */ 25861e52f16SSean Christopherson smp_rmb(); 25961e52f16SSean Christopherson cpu = sched_getcpu(); 26061e52f16SSean Christopherson rseq_cpu = READ_ONCE(__rseq.cpu_id); 26161e52f16SSean Christopherson smp_rmb(); 26261e52f16SSean Christopherson } while (snapshot != atomic_read(&seq_cnt)); 26361e52f16SSean Christopherson 26461e52f16SSean Christopherson TEST_ASSERT(rseq_cpu == cpu, 26561e52f16SSean Christopherson "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu); 26661e52f16SSean Christopherson } 26761e52f16SSean Christopherson 26861e52f16SSean Christopherson /* 26961e52f16SSean Christopherson * Sanity check that the test was able to enter the guest a reasonable 27061e52f16SSean Christopherson * number of times, e.g. didn't get stalled too often/long waiting for 27161e52f16SSean Christopherson * sched_getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a 27261e52f16SSean Christopherson * fairly conservative ratio on x86-64, which can do _more_ KVM_RUNs 27361e52f16SSean Christopherson * than migrations given the 1us+ delay in the migration task. 27461e52f16SSean Christopherson */ 27561e52f16SSean Christopherson TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), 27661e52f16SSean Christopherson "Only performed %d KVM_RUNs, task stalled too much?\n", i); 27761e52f16SSean Christopherson 27861e52f16SSean Christopherson pthread_join(migration_thread, NULL); 27961e52f16SSean Christopherson 28061e52f16SSean Christopherson kvm_vm_free(vm); 28161e52f16SSean Christopherson 28261e52f16SSean Christopherson sys_rseq(RSEQ_FLAG_UNREGISTER); 28361e52f16SSean Christopherson 28461e52f16SSean Christopherson return 0; 28561e52f16SSean Christopherson } 286