1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <pthread.h> 7 #include <semaphore.h> 8 #include <sys/types.h> 9 #include <signal.h> 10 #include <errno.h> 11 #include <linux/bitmap.h> 12 #include <linux/bitops.h> 13 #include <linux/atomic.h> 14 15 #include "kvm_util.h" 16 #include "test_util.h" 17 #include "guest_modes.h" 18 #include "processor.h" 19 20 static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) 21 { 22 uint64_t gpa; 23 24 for (gpa = start_gpa; gpa < end_gpa; gpa += stride) 25 *((volatile uint64_t *)gpa) = gpa; 26 27 GUEST_DONE(); 28 } 29 30 struct vcpu_info { 31 struct kvm_vm *vm; 32 uint32_t id; 33 uint64_t start_gpa; 34 uint64_t end_gpa; 35 }; 36 37 static int nr_vcpus; 38 static atomic_t rendezvous; 39 40 static void rendezvous_with_boss(void) 41 { 42 int orig = atomic_read(&rendezvous); 43 44 if (orig > 0) { 45 atomic_dec_and_test(&rendezvous); 46 while (atomic_read(&rendezvous) > 0) 47 cpu_relax(); 48 } else { 49 atomic_inc(&rendezvous); 50 while (atomic_read(&rendezvous) < 0) 51 cpu_relax(); 52 } 53 } 54 55 static void run_vcpu(struct kvm_vm *vm, uint32_t vcpu_id) 56 { 57 vcpu_run(vm, vcpu_id); 58 ASSERT_EQ(get_ucall(vm, vcpu_id, NULL), UCALL_DONE); 59 } 60 61 static void *vcpu_worker(void *data) 62 { 63 struct vcpu_info *vcpu = data; 64 struct kvm_vm *vm = vcpu->vm; 65 struct kvm_sregs sregs; 66 struct kvm_regs regs; 67 68 vcpu_args_set(vm, vcpu->id, 3, vcpu->start_gpa, vcpu->end_gpa, 69 vm_get_page_size(vm)); 70 71 /* Snapshot regs before the first run. */ 72 vcpu_regs_get(vm, vcpu->id, ®s); 73 rendezvous_with_boss(); 74 75 run_vcpu(vm, vcpu->id); 76 rendezvous_with_boss(); 77 vcpu_regs_set(vm, vcpu->id, ®s); 78 vcpu_sregs_get(vm, vcpu->id, &sregs); 79 #ifdef __x86_64__ 80 /* Toggle CR0.WP to trigger a MMU context reset. */ 81 sregs.cr0 ^= X86_CR0_WP; 82 #endif 83 vcpu_sregs_set(vm, vcpu->id, &sregs); 84 rendezvous_with_boss(); 85 86 run_vcpu(vm, vcpu->id); 87 rendezvous_with_boss(); 88 89 return NULL; 90 } 91 92 static pthread_t *spawn_workers(struct kvm_vm *vm, uint64_t start_gpa, 93 uint64_t end_gpa) 94 { 95 struct vcpu_info *info; 96 uint64_t gpa, nr_bytes; 97 pthread_t *threads; 98 int i; 99 100 threads = malloc(nr_vcpus * sizeof(*threads)); 101 TEST_ASSERT(threads, "Failed to allocate vCPU threads"); 102 103 info = malloc(nr_vcpus * sizeof(*info)); 104 TEST_ASSERT(info, "Failed to allocate vCPU gpa ranges"); 105 106 nr_bytes = ((end_gpa - start_gpa) / nr_vcpus) & 107 ~((uint64_t)vm_get_page_size(vm) - 1); 108 TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus); 109 110 for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) { 111 info[i].vm = vm; 112 info[i].id = i; 113 info[i].start_gpa = gpa; 114 info[i].end_gpa = gpa + nr_bytes; 115 pthread_create(&threads[i], NULL, vcpu_worker, &info[i]); 116 } 117 return threads; 118 } 119 120 static void rendezvous_with_vcpus(struct timespec *time, const char *name) 121 { 122 int i, rendezvoused; 123 124 pr_info("Waiting for vCPUs to finish %s...\n", name); 125 126 rendezvoused = atomic_read(&rendezvous); 127 for (i = 0; abs(rendezvoused) != 1; i++) { 128 usleep(100); 129 if (!(i & 0x3f)) 130 pr_info("\r%d vCPUs haven't rendezvoused...", 131 abs(rendezvoused) - 1); 132 rendezvoused = atomic_read(&rendezvous); 133 } 134 135 clock_gettime(CLOCK_MONOTONIC, time); 136 137 /* Release the vCPUs after getting the time of the previous action. */ 138 pr_info("\rAll vCPUs finished %s, releasing...\n", name); 139 if (rendezvoused > 0) 140 atomic_set(&rendezvous, -nr_vcpus - 1); 141 else 142 atomic_set(&rendezvous, nr_vcpus + 1); 143 } 144 145 static void calc_default_nr_vcpus(void) 146 { 147 cpu_set_t possible_mask; 148 int r; 149 150 r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); 151 TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", 152 errno, strerror(errno)); 153 154 nr_vcpus = CPU_COUNT(&possible_mask) * 3/4; 155 TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?"); 156 } 157 158 int main(int argc, char *argv[]) 159 { 160 /* 161 * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back 162 * the guest's code, stack, and page tables. Because selftests creates 163 * an IRQCHIP, a.k.a. a local APIC, KVM creates an internal memslot 164 * just below the 4gb boundary. This test could create memory at 165 * 1gb-3gb,but it's simpler to skip straight to 4gb. 166 */ 167 const uint64_t size_1gb = (1 << 30); 168 const uint64_t start_gpa = (4ull * size_1gb); 169 const int first_slot = 1; 170 171 struct timespec time_start, time_run1, time_reset, time_run2; 172 uint64_t max_gpa, gpa, slot_size, max_mem, i; 173 int max_slots, slot, opt, fd; 174 bool hugepages = false; 175 pthread_t *threads; 176 struct kvm_vm *vm; 177 void *mem; 178 179 /* 180 * Default to 2gb so that maxing out systems with MAXPHADDR=46, which 181 * are quite common for x86, requires changing only max_mem (KVM allows 182 * 32k memslots, 32k * 2gb == ~64tb of guest memory). 183 */ 184 slot_size = 2 * size_1gb; 185 186 max_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); 187 TEST_ASSERT(max_slots > first_slot, "KVM is broken"); 188 189 /* All KVM MMUs should be able to survive a 128gb guest. */ 190 max_mem = 128 * size_1gb; 191 192 calc_default_nr_vcpus(); 193 194 while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) { 195 switch (opt) { 196 case 'c': 197 nr_vcpus = atoi(optarg); 198 TEST_ASSERT(nr_vcpus > 0, "number of vcpus must be >0"); 199 break; 200 case 'm': 201 max_mem = atoi(optarg) * size_1gb; 202 TEST_ASSERT(max_mem > 0, "memory size must be >0"); 203 break; 204 case 's': 205 slot_size = atoi(optarg) * size_1gb; 206 TEST_ASSERT(slot_size > 0, "slot size must be >0"); 207 break; 208 case 'H': 209 hugepages = true; 210 break; 211 case 'h': 212 default: 213 printf("usage: %s [-c nr_vcpus] [-m max_mem_in_gb] [-s slot_size_in_gb] [-H]\n", argv[0]); 214 exit(1); 215 } 216 } 217 218 vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); 219 220 max_gpa = vm_get_max_gfn(vm) << vm_get_page_shift(vm); 221 TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); 222 223 fd = kvm_memfd_alloc(slot_size, hugepages); 224 mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 225 TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); 226 227 TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed"); 228 229 /* Pre-fault the memory to avoid taking mmap_sem on guest page faults. */ 230 for (i = 0; i < slot_size; i += vm_get_page_size(vm)) 231 ((uint8_t *)mem)[i] = 0xaa; 232 233 gpa = 0; 234 for (slot = first_slot; slot < max_slots; slot++) { 235 gpa = start_gpa + ((slot - first_slot) * slot_size); 236 if (gpa + slot_size > max_gpa) 237 break; 238 239 if ((gpa - start_gpa) >= max_mem) 240 break; 241 242 vm_set_user_memory_region(vm, slot, 0, gpa, slot_size, mem); 243 244 #ifdef __x86_64__ 245 /* Identity map memory in the guest using 1gb pages. */ 246 for (i = 0; i < slot_size; i += size_1gb) 247 __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); 248 #else 249 for (i = 0; i < slot_size; i += vm_get_page_size(vm)) 250 virt_pg_map(vm, gpa + i, gpa + i); 251 #endif 252 } 253 254 atomic_set(&rendezvous, nr_vcpus + 1); 255 threads = spawn_workers(vm, start_gpa, gpa); 256 257 pr_info("Running with %lugb of guest memory and %u vCPUs\n", 258 (gpa - start_gpa) / size_1gb, nr_vcpus); 259 260 rendezvous_with_vcpus(&time_start, "spawning"); 261 rendezvous_with_vcpus(&time_run1, "run 1"); 262 rendezvous_with_vcpus(&time_reset, "reset"); 263 rendezvous_with_vcpus(&time_run2, "run 2"); 264 265 time_run2 = timespec_sub(time_run2, time_reset); 266 time_reset = timespec_sub(time_reset, time_run1); 267 time_run1 = timespec_sub(time_run1, time_start); 268 269 pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds\n", 270 time_run1.tv_sec, time_run1.tv_nsec, 271 time_reset.tv_sec, time_reset.tv_nsec, 272 time_run2.tv_sec, time_run2.tv_nsec); 273 274 /* 275 * Delete even numbered slots (arbitrary) and unmap the first half of 276 * the backing (also arbitrary) to verify KVM correctly drops all 277 * references to the removed regions. 278 */ 279 for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2) 280 vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL); 281 282 munmap(mem, slot_size / 2); 283 284 /* Sanity check that the vCPUs actually ran. */ 285 for (i = 0; i < nr_vcpus; i++) 286 pthread_join(threads[i], NULL); 287 288 /* 289 * Deliberately exit without deleting the remaining memslots or closing 290 * kvm_fd to test cleanup via mmu_notifier.release. 291 */ 292 } 293