161e52f16SSean Christopherson // SPDX-License-Identifier: GPL-2.0-only 261e52f16SSean Christopherson #define _GNU_SOURCE /* for program_invocation_short_name */ 361e52f16SSean Christopherson #include <errno.h> 461e52f16SSean Christopherson #include <fcntl.h> 561e52f16SSean Christopherson #include <pthread.h> 661e52f16SSean Christopherson #include <sched.h> 761e52f16SSean Christopherson #include <stdio.h> 861e52f16SSean Christopherson #include <stdlib.h> 961e52f16SSean Christopherson #include <string.h> 1061e52f16SSean Christopherson #include <signal.h> 1161e52f16SSean Christopherson #include <syscall.h> 1261e52f16SSean Christopherson #include <sys/ioctl.h> 137b0035eaSSean Christopherson #include <sys/sysinfo.h> 1461e52f16SSean Christopherson #include <asm/barrier.h> 1561e52f16SSean Christopherson #include <linux/atomic.h> 1661e52f16SSean Christopherson #include <linux/rseq.h> 1761e52f16SSean Christopherson #include <linux/unistd.h> 1861e52f16SSean Christopherson 1961e52f16SSean Christopherson #include "kvm_util.h" 2061e52f16SSean Christopherson #include "processor.h" 2161e52f16SSean Christopherson #include "test_util.h" 2261e52f16SSean Christopherson 2366d42ac7SGavin Shan #include "../rseq/rseq.c" 2461e52f16SSean Christopherson 2561e52f16SSean Christopherson /* 2661e52f16SSean Christopherson * Any bug related to task migration is likely to be timing-dependent; perform 2761e52f16SSean Christopherson * a large number of migrations to reduce the odds of a false negative. 2861e52f16SSean Christopherson */ 2961e52f16SSean Christopherson #define NR_TASK_MIGRATIONS 100000 3061e52f16SSean Christopherson 3161e52f16SSean Christopherson static pthread_t migration_thread; 3261e52f16SSean Christopherson static cpu_set_t possible_mask; 337b0035eaSSean Christopherson static int min_cpu, max_cpu; 3461e52f16SSean Christopherson static bool done; 3561e52f16SSean Christopherson 3661e52f16SSean Christopherson static atomic_t seq_cnt; 3761e52f16SSean Christopherson 3861e52f16SSean Christopherson static void guest_code(void) 3961e52f16SSean Christopherson { 4061e52f16SSean Christopherson for (;;) 4161e52f16SSean Christopherson GUEST_SYNC(0); 4261e52f16SSean Christopherson } 4361e52f16SSean Christopherson 440fcc1029SGavin Shan /* 450fcc1029SGavin Shan * We have to perform direct system call for getcpu() because it's 460fcc1029SGavin Shan * not available until glic 2.29. 470fcc1029SGavin Shan */ 480fcc1029SGavin Shan static void sys_getcpu(unsigned *cpu) 490fcc1029SGavin Shan { 500fcc1029SGavin Shan int r; 510fcc1029SGavin Shan 520fcc1029SGavin Shan r = syscall(__NR_getcpu, cpu, NULL, NULL); 530fcc1029SGavin Shan TEST_ASSERT(!r, "getcpu failed, errno = %d (%s)", errno, strerror(errno)); 540fcc1029SGavin Shan } 550fcc1029SGavin Shan 567b0035eaSSean Christopherson static int next_cpu(int cpu) 577b0035eaSSean Christopherson { 587b0035eaSSean Christopherson /* 597b0035eaSSean Christopherson * Advance to the next CPU, skipping those that weren't in the original 607b0035eaSSean Christopherson * affinity set. Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's 617b0035eaSSean Christopherson * data storage is considered as opaque. Note, if this task is pinned 627b0035eaSSean Christopherson * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will 637b0035eaSSean Christopherson * burn a lot cycles and the test will take longer than normal to 647b0035eaSSean Christopherson * complete. 657b0035eaSSean Christopherson */ 667b0035eaSSean Christopherson do { 677b0035eaSSean Christopherson cpu++; 687b0035eaSSean Christopherson if (cpu > max_cpu) { 697b0035eaSSean Christopherson cpu = min_cpu; 707b0035eaSSean Christopherson TEST_ASSERT(CPU_ISSET(cpu, &possible_mask), 717b0035eaSSean Christopherson "Min CPU = %d must always be usable", cpu); 727b0035eaSSean Christopherson break; 737b0035eaSSean Christopherson } 747b0035eaSSean Christopherson } while (!CPU_ISSET(cpu, &possible_mask)); 757b0035eaSSean Christopherson 767b0035eaSSean Christopherson return cpu; 777b0035eaSSean Christopherson } 787b0035eaSSean Christopherson 79e923b053SGavin Shan static void *migration_worker(void *__rseq_tid) 8061e52f16SSean Christopherson { 81e923b053SGavin Shan pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid; 8261e52f16SSean Christopherson cpu_set_t allowed_mask; 837b0035eaSSean Christopherson int r, i, cpu; 8461e52f16SSean Christopherson 8561e52f16SSean Christopherson CPU_ZERO(&allowed_mask); 8661e52f16SSean Christopherson 877b0035eaSSean Christopherson for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) { 8861e52f16SSean Christopherson CPU_SET(cpu, &allowed_mask); 8961e52f16SSean Christopherson 9061e52f16SSean Christopherson /* 9161e52f16SSean Christopherson * Bump the sequence count twice to allow the reader to detect 9261e52f16SSean Christopherson * that a migration may have occurred in between rseq and sched 9361e52f16SSean Christopherson * CPU ID reads. An odd sequence count indicates a migration 9461e52f16SSean Christopherson * is in-progress, while a completely different count indicates 9561e52f16SSean Christopherson * a migration occurred since the count was last read. 9661e52f16SSean Christopherson */ 9761e52f16SSean Christopherson atomic_inc(&seq_cnt); 9861e52f16SSean Christopherson 9961e52f16SSean Christopherson /* 1000fcc1029SGavin Shan * Ensure the odd count is visible while getcpu() isn't 10161e52f16SSean Christopherson * stable, i.e. while changing affinity is in-progress. 10261e52f16SSean Christopherson */ 10361e52f16SSean Christopherson smp_wmb(); 104e923b053SGavin Shan r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask); 10561e52f16SSean Christopherson TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)", 10661e52f16SSean Christopherson errno, strerror(errno)); 10761e52f16SSean Christopherson smp_wmb(); 10861e52f16SSean Christopherson atomic_inc(&seq_cnt); 10961e52f16SSean Christopherson 11061e52f16SSean Christopherson CPU_CLR(cpu, &allowed_mask); 11161e52f16SSean Christopherson 11261e52f16SSean Christopherson /* 11361e52f16SSean Christopherson * Wait 1-10us before proceeding to the next iteration and more 11461e52f16SSean Christopherson * specifically, before bumping seq_cnt again. A delay is 11561e52f16SSean Christopherson * needed on three fronts: 11661e52f16SSean Christopherson * 11761e52f16SSean Christopherson * 1. To allow sched_setaffinity() to prompt migration before 11861e52f16SSean Christopherson * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME 11961e52f16SSean Christopherson * (or TIF_NEED_RESCHED, which indirectly leads to handling 12061e52f16SSean Christopherson * NOTIFY_RESUME) is handled in KVM context. 12161e52f16SSean Christopherson * 12261e52f16SSean Christopherson * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters 12361e52f16SSean Christopherson * the guest, the guest will trigger a IO/MMIO exit all the 12461e52f16SSean Christopherson * way to userspace and the TIF flags will be handled by 12561e52f16SSean Christopherson * the generic "exit to userspace" logic, not by KVM. The 12661e52f16SSean Christopherson * exit to userspace is necessary to give the test a chance 12761e52f16SSean Christopherson * to check the rseq CPU ID (see #2). 12861e52f16SSean Christopherson * 12961e52f16SSean Christopherson * Alternatively, guest_code() could include an instruction 13061e52f16SSean Christopherson * to trigger an exit that is handled by KVM, but any such 13161e52f16SSean Christopherson * exit requires architecture specific code. 13261e52f16SSean Christopherson * 13361e52f16SSean Christopherson * 2. To let ioctl(KVM_RUN) make its way back to the test 13461e52f16SSean Christopherson * before the next round of migration. The test's check on 13561e52f16SSean Christopherson * the rseq CPU ID must wait for migration to complete in 13661e52f16SSean Christopherson * order to avoid false positive, thus any kernel rseq bug 13761e52f16SSean Christopherson * will be missed if the next migration starts before the 13861e52f16SSean Christopherson * check completes. 13961e52f16SSean Christopherson * 14061e52f16SSean Christopherson * 3. To ensure the read-side makes efficient forward progress, 1410fcc1029SGavin Shan * e.g. if getcpu() involves a syscall. Stalling the read-side 1420fcc1029SGavin Shan * means the test will spend more time waiting for getcpu() 1430fcc1029SGavin Shan * to stabilize and less time trying to hit the timing-dependent 1440fcc1029SGavin Shan * bug. 14561e52f16SSean Christopherson * 14661e52f16SSean Christopherson * Because any bug in this area is likely to be timing-dependent, 14761e52f16SSean Christopherson * run with a range of delays at 1us intervals from 1us to 10us 14861e52f16SSean Christopherson * as a best effort to avoid tuning the test to the point where 14961e52f16SSean Christopherson * it can hit _only_ the original bug and not detect future 15061e52f16SSean Christopherson * regressions. 15161e52f16SSean Christopherson * 15261e52f16SSean Christopherson * The original bug can reproduce with a delay up to ~500us on 15361e52f16SSean Christopherson * x86-64, but starts to require more iterations to reproduce 15461e52f16SSean Christopherson * as the delay creeps above ~10us, and the average runtime of 15561e52f16SSean Christopherson * each iteration obviously increases as well. Cap the delay 15661e52f16SSean Christopherson * at 10us to keep test runtime reasonable while minimizing 15761e52f16SSean Christopherson * potential coverage loss. 15861e52f16SSean Christopherson * 15961e52f16SSean Christopherson * The lower bound for reproducing the bug is likely below 1us, 16061e52f16SSean Christopherson * e.g. failures occur on x86-64 with nanosleep(0), but at that 16161e52f16SSean Christopherson * point the overhead of the syscall likely dominates the delay. 16261e52f16SSean Christopherson * Use usleep() for simplicity and to avoid unnecessary kernel 16361e52f16SSean Christopherson * dependencies. 16461e52f16SSean Christopherson */ 16561e52f16SSean Christopherson usleep((i % 10) + 1); 16661e52f16SSean Christopherson } 16761e52f16SSean Christopherson done = true; 16861e52f16SSean Christopherson return NULL; 16961e52f16SSean Christopherson } 17061e52f16SSean Christopherson 1717ed397d1SSean Christopherson static void calc_min_max_cpu(void) 1727b0035eaSSean Christopherson { 1737b0035eaSSean Christopherson int i, cnt, nproc; 1747b0035eaSSean Christopherson 1757ed397d1SSean Christopherson TEST_REQUIRE(CPU_COUNT(&possible_mask) >= 2); 1767b0035eaSSean Christopherson 1777b0035eaSSean Christopherson /* 1787b0035eaSSean Christopherson * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that 1797b0035eaSSean Christopherson * this task is affined to in order to reduce the time spent querying 1807b0035eaSSean Christopherson * unusable CPUs, e.g. if this task is pinned to a small percentage of 1817b0035eaSSean Christopherson * total CPUs. 1827b0035eaSSean Christopherson */ 1837b0035eaSSean Christopherson nproc = get_nprocs_conf(); 1847b0035eaSSean Christopherson min_cpu = -1; 1857b0035eaSSean Christopherson max_cpu = -1; 1867b0035eaSSean Christopherson cnt = 0; 1877b0035eaSSean Christopherson 1887b0035eaSSean Christopherson for (i = 0; i < nproc; i++) { 1897b0035eaSSean Christopherson if (!CPU_ISSET(i, &possible_mask)) 1907b0035eaSSean Christopherson continue; 1917b0035eaSSean Christopherson if (min_cpu == -1) 1927b0035eaSSean Christopherson min_cpu = i; 1937b0035eaSSean Christopherson max_cpu = i; 1947b0035eaSSean Christopherson cnt++; 1957b0035eaSSean Christopherson } 1967b0035eaSSean Christopherson 1977ed397d1SSean Christopherson __TEST_REQUIRE(cnt >= 2, 1987ed397d1SSean Christopherson "Only one usable CPU, task migration not possible"); 1997b0035eaSSean Christopherson } 2007b0035eaSSean Christopherson 20161e52f16SSean Christopherson int main(int argc, char *argv[]) 20261e52f16SSean Christopherson { 20361e52f16SSean Christopherson int r, i, snapshot; 20461e52f16SSean Christopherson struct kvm_vm *vm; 2052494a6d8SSean Christopherson struct kvm_vcpu *vcpu; 20661e52f16SSean Christopherson u32 cpu, rseq_cpu; 20761e52f16SSean Christopherson 20861e52f16SSean Christopherson /* Tell stdout not to buffer its content */ 20961e52f16SSean Christopherson setbuf(stdout, NULL); 21061e52f16SSean Christopherson 21161e52f16SSean Christopherson r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); 21261e52f16SSean Christopherson TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, 21361e52f16SSean Christopherson strerror(errno)); 21461e52f16SSean Christopherson 2157ed397d1SSean Christopherson calc_min_max_cpu(); 21661e52f16SSean Christopherson 21766d42ac7SGavin Shan r = rseq_register_current_thread(); 21866d42ac7SGavin Shan TEST_ASSERT(!r, "rseq_register_current_thread failed, errno = %d (%s)", 21966d42ac7SGavin Shan errno, strerror(errno)); 22061e52f16SSean Christopherson 22161e52f16SSean Christopherson /* 22261e52f16SSean Christopherson * Create and run a dummy VM that immediately exits to userspace via 22361e52f16SSean Christopherson * GUEST_SYNC, while concurrently migrating the process by setting its 22461e52f16SSean Christopherson * CPU affinity. 22561e52f16SSean Christopherson */ 2262494a6d8SSean Christopherson vm = vm_create_with_one_vcpu(&vcpu, guest_code); 227fbf094ceSOliver Upton ucall_init(vm, NULL); 22861e52f16SSean Christopherson 229e923b053SGavin Shan pthread_create(&migration_thread, NULL, migration_worker, 230*561cafebSJinrong Liang (void *)(unsigned long)syscall(SYS_gettid)); 23161e52f16SSean Christopherson 23261e52f16SSean Christopherson for (i = 0; !done; i++) { 233768e9a61SSean Christopherson vcpu_run(vcpu); 234768e9a61SSean Christopherson TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, 23561e52f16SSean Christopherson "Guest failed?"); 23661e52f16SSean Christopherson 23761e52f16SSean Christopherson /* 23861e52f16SSean Christopherson * Verify rseq's CPU matches sched's CPU. Ensure migration 2390fcc1029SGavin Shan * doesn't occur between getcpu() and reading the rseq cpu_id 2400fcc1029SGavin Shan * by rereading both if the sequence count changes, or if the 2410fcc1029SGavin Shan * count is odd (migration in-progress). 24261e52f16SSean Christopherson */ 24361e52f16SSean Christopherson do { 24461e52f16SSean Christopherson /* 24561e52f16SSean Christopherson * Drop bit 0 to force a mismatch if the count is odd, 24661e52f16SSean Christopherson * i.e. if a migration is in-progress. 24761e52f16SSean Christopherson */ 24861e52f16SSean Christopherson snapshot = atomic_read(&seq_cnt) & ~1; 24961e52f16SSean Christopherson 25061e52f16SSean Christopherson /* 2510fcc1029SGavin Shan * Ensure calling getcpu() and reading rseq.cpu_id complete 2520fcc1029SGavin Shan * in a single "no migration" window, i.e. are not reordered 2530fcc1029SGavin Shan * across the seq_cnt reads. 25461e52f16SSean Christopherson */ 25561e52f16SSean Christopherson smp_rmb(); 2560fcc1029SGavin Shan sys_getcpu(&cpu); 25766d42ac7SGavin Shan rseq_cpu = rseq_current_cpu_raw(); 25861e52f16SSean Christopherson smp_rmb(); 25961e52f16SSean Christopherson } while (snapshot != atomic_read(&seq_cnt)); 26061e52f16SSean Christopherson 26161e52f16SSean Christopherson TEST_ASSERT(rseq_cpu == cpu, 26261e52f16SSean Christopherson "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu); 26361e52f16SSean Christopherson } 26461e52f16SSean Christopherson 26561e52f16SSean Christopherson /* 26661e52f16SSean Christopherson * Sanity check that the test was able to enter the guest a reasonable 26761e52f16SSean Christopherson * number of times, e.g. didn't get stalled too often/long waiting for 2680fcc1029SGavin Shan * getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly 2690fcc1029SGavin Shan * conservative ratio on x86-64, which can do _more_ KVM_RUNs than 2700fcc1029SGavin Shan * migrations given the 1us+ delay in the migration task. 27161e52f16SSean Christopherson */ 27261e52f16SSean Christopherson TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), 27361e52f16SSean Christopherson "Only performed %d KVM_RUNs, task stalled too much?\n", i); 27461e52f16SSean Christopherson 27561e52f16SSean Christopherson pthread_join(migration_thread, NULL); 27661e52f16SSean Christopherson 27761e52f16SSean Christopherson kvm_vm_free(vm); 27861e52f16SSean Christopherson 27966d42ac7SGavin Shan rseq_unregister_current_thread(); 28061e52f16SSean Christopherson 28161e52f16SSean Christopherson return 0; 28261e52f16SSean Christopherson } 283