1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Test for x86 KVM_SET_PMU_EVENT_FILTER. 4 * 5 * Copyright (C) 2022, Google LLC. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2. 8 * 9 * Verifies the expected behavior of allow lists and deny lists for 10 * virtual PMU events. 11 */ 12 13 #define _GNU_SOURCE /* for program_invocation_short_name */ 14 #include "test_util.h" 15 #include "kvm_util.h" 16 #include "processor.h" 17 18 /* 19 * In lieu of copying perf_event.h into tools... 20 */ 21 #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) 22 #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) 23 24 union cpuid10_eax { 25 struct { 26 unsigned int version_id:8; 27 unsigned int num_counters:8; 28 unsigned int bit_width:8; 29 unsigned int mask_length:8; 30 } split; 31 unsigned int full; 32 }; 33 34 union cpuid10_ebx { 35 struct { 36 unsigned int no_unhalted_core_cycles:1; 37 unsigned int no_instructions_retired:1; 38 unsigned int no_unhalted_reference_cycles:1; 39 unsigned int no_llc_reference:1; 40 unsigned int no_llc_misses:1; 41 unsigned int no_branch_instruction_retired:1; 42 unsigned int no_branch_misses_retired:1; 43 } split; 44 unsigned int full; 45 }; 46 47 /* End of stuff taken from perf_event.h. */ 48 49 /* Oddly, this isn't in perf_event.h. */ 50 #define ARCH_PERFMON_BRANCHES_RETIRED 5 51 52 #define VCPU_ID 0 53 #define NUM_BRANCHES 42 54 55 /* 56 * This is how the event selector and unit mask are stored in an AMD 57 * core performance event-select register. Intel's format is similar, 58 * but the event selector is only 8 bits. 59 */ 60 #define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \ 61 (umask & 0xff) << 8) 62 63 /* 64 * "Branch instructions retired", from the Intel SDM, volume 3, 65 * "Pre-defined Architectural Performance Events." 66 */ 67 68 #define INTEL_BR_RETIRED EVENT(0xc4, 0) 69 70 /* 71 * "Retired branch instructions", from Processor Programming Reference 72 * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors, 73 * Preliminary Processor Programming Reference (PPR) for AMD Family 74 * 17h Model 31h, Revision B0 Processors, and Preliminary Processor 75 * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision 76 * B1 Processors Volume 1 of 2. 77 */ 78 79 #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0) 80 81 /* 82 * This event list comprises Intel's eight architectural events plus 83 * AMD's "retired branch instructions" for Zen[123] (and possibly 84 * other AMD CPUs). 85 */ 86 static const uint64_t event_list[] = { 87 EVENT(0x3c, 0), 88 EVENT(0xc0, 0), 89 EVENT(0x3c, 1), 90 EVENT(0x2e, 0x4f), 91 EVENT(0x2e, 0x41), 92 EVENT(0xc4, 0), 93 EVENT(0xc5, 0), 94 EVENT(0xa4, 1), 95 AMD_ZEN_BR_RETIRED, 96 }; 97 98 /* 99 * If we encounter a #GP during the guest PMU sanity check, then the guest 100 * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0). 101 */ 102 static void guest_gp_handler(struct ex_regs *regs) 103 { 104 GUEST_SYNC(0); 105 } 106 107 /* 108 * Check that we can write a new value to the given MSR and read it back. 109 * The caller should provide a non-empty set of bits that are safe to flip. 110 * 111 * Return on success. GUEST_SYNC(0) on error. 112 */ 113 static void check_msr(uint32_t msr, uint64_t bits_to_flip) 114 { 115 uint64_t v = rdmsr(msr) ^ bits_to_flip; 116 117 wrmsr(msr, v); 118 if (rdmsr(msr) != v) 119 GUEST_SYNC(0); 120 121 v ^= bits_to_flip; 122 wrmsr(msr, v); 123 if (rdmsr(msr) != v) 124 GUEST_SYNC(0); 125 } 126 127 static void intel_guest_code(void) 128 { 129 check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1); 130 check_msr(MSR_P6_EVNTSEL0, 0xffff); 131 check_msr(MSR_IA32_PMC0, 0xffff); 132 GUEST_SYNC(1); 133 134 for (;;) { 135 uint64_t br0, br1; 136 137 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); 138 wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | 139 ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED); 140 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1); 141 br0 = rdmsr(MSR_IA32_PMC0); 142 __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); 143 br1 = rdmsr(MSR_IA32_PMC0); 144 GUEST_SYNC(br1 - br0); 145 } 146 } 147 148 /* 149 * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23], 150 * this code uses the always-available, legacy K7 PMU MSRs, which alias to 151 * the first four of the six extended core PMU MSRs. 152 */ 153 static void amd_guest_code(void) 154 { 155 check_msr(MSR_K7_EVNTSEL0, 0xffff); 156 check_msr(MSR_K7_PERFCTR0, 0xffff); 157 GUEST_SYNC(1); 158 159 for (;;) { 160 uint64_t br0, br1; 161 162 wrmsr(MSR_K7_EVNTSEL0, 0); 163 wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | 164 ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED); 165 br0 = rdmsr(MSR_K7_PERFCTR0); 166 __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); 167 br1 = rdmsr(MSR_K7_PERFCTR0); 168 GUEST_SYNC(br1 - br0); 169 } 170 } 171 172 /* 173 * Run the VM to the next GUEST_SYNC(value), and return the value passed 174 * to the sync. Any other exit from the guest is fatal. 175 */ 176 static uint64_t run_vm_to_sync(struct kvm_vm *vm) 177 { 178 struct kvm_run *run = vcpu_state(vm, VCPU_ID); 179 struct ucall uc; 180 181 vcpu_run(vm, VCPU_ID); 182 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 183 "Exit_reason other than KVM_EXIT_IO: %u (%s)\n", 184 run->exit_reason, 185 exit_reason_str(run->exit_reason)); 186 get_ucall(vm, VCPU_ID, &uc); 187 TEST_ASSERT(uc.cmd == UCALL_SYNC, 188 "Received ucall other than UCALL_SYNC: %lu", uc.cmd); 189 return uc.args[1]; 190 } 191 192 /* 193 * In a nested environment or if the vPMU is disabled, the guest PMU 194 * might not work as architected (accessing the PMU MSRs may raise 195 * #GP, or writes could simply be discarded). In those situations, 196 * there is no point in running these tests. The guest code will perform 197 * a sanity check and then GUEST_SYNC(success). In the case of failure, 198 * the behavior of the guest on resumption is undefined. 199 */ 200 static bool sanity_check_pmu(struct kvm_vm *vm) 201 { 202 bool success; 203 204 vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); 205 success = run_vm_to_sync(vm); 206 vm_install_exception_handler(vm, GP_VECTOR, NULL); 207 208 return success; 209 } 210 211 static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents) 212 { 213 struct kvm_pmu_event_filter *f; 214 int size = sizeof(*f) + nevents * sizeof(f->events[0]); 215 216 f = malloc(size); 217 TEST_ASSERT(f, "Out of memory"); 218 memset(f, 0, size); 219 f->nevents = nevents; 220 return f; 221 } 222 223 224 static struct kvm_pmu_event_filter * 225 create_pmu_event_filter(const uint64_t event_list[], 226 int nevents, uint32_t action) 227 { 228 struct kvm_pmu_event_filter *f; 229 int i; 230 231 f = alloc_pmu_event_filter(nevents); 232 f->action = action; 233 for (i = 0; i < nevents; i++) 234 f->events[i] = event_list[i]; 235 236 return f; 237 } 238 239 static struct kvm_pmu_event_filter *event_filter(uint32_t action) 240 { 241 return create_pmu_event_filter(event_list, 242 ARRAY_SIZE(event_list), 243 action); 244 } 245 246 /* 247 * Remove the first occurrence of 'event' (if any) from the filter's 248 * event list. 249 */ 250 static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, 251 uint64_t event) 252 { 253 bool found = false; 254 int i; 255 256 for (i = 0; i < f->nevents; i++) { 257 if (found) 258 f->events[i - 1] = f->events[i]; 259 else 260 found = f->events[i] == event; 261 } 262 if (found) 263 f->nevents--; 264 return f; 265 } 266 267 static void test_without_filter(struct kvm_vm *vm) 268 { 269 uint64_t count = run_vm_to_sync(vm); 270 271 if (count != NUM_BRANCHES) 272 pr_info("%s: Branch instructions retired = %lu (expected %u)\n", 273 __func__, count, NUM_BRANCHES); 274 TEST_ASSERT(count, "Allowed PMU event is not counting"); 275 } 276 277 static uint64_t test_with_filter(struct kvm_vm *vm, 278 struct kvm_pmu_event_filter *f) 279 { 280 vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f); 281 return run_vm_to_sync(vm); 282 } 283 284 static void test_amd_deny_list(struct kvm_vm *vm) 285 { 286 uint64_t event = EVENT(0x1C2, 0); 287 struct kvm_pmu_event_filter *f; 288 uint64_t count; 289 290 f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY); 291 count = test_with_filter(vm, f); 292 293 free(f); 294 if (count != NUM_BRANCHES) 295 pr_info("%s: Branch instructions retired = %lu (expected %u)\n", 296 __func__, count, NUM_BRANCHES); 297 TEST_ASSERT(count, "Allowed PMU event is not counting"); 298 } 299 300 static void test_member_deny_list(struct kvm_vm *vm) 301 { 302 struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); 303 uint64_t count = test_with_filter(vm, f); 304 305 free(f); 306 if (count) 307 pr_info("%s: Branch instructions retired = %lu (expected 0)\n", 308 __func__, count); 309 TEST_ASSERT(!count, "Disallowed PMU Event is counting"); 310 } 311 312 static void test_member_allow_list(struct kvm_vm *vm) 313 { 314 struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); 315 uint64_t count = test_with_filter(vm, f); 316 317 free(f); 318 if (count != NUM_BRANCHES) 319 pr_info("%s: Branch instructions retired = %lu (expected %u)\n", 320 __func__, count, NUM_BRANCHES); 321 TEST_ASSERT(count, "Allowed PMU event is not counting"); 322 } 323 324 static void test_not_member_deny_list(struct kvm_vm *vm) 325 { 326 struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); 327 uint64_t count; 328 329 remove_event(f, INTEL_BR_RETIRED); 330 remove_event(f, AMD_ZEN_BR_RETIRED); 331 count = test_with_filter(vm, f); 332 free(f); 333 if (count != NUM_BRANCHES) 334 pr_info("%s: Branch instructions retired = %lu (expected %u)\n", 335 __func__, count, NUM_BRANCHES); 336 TEST_ASSERT(count, "Allowed PMU event is not counting"); 337 } 338 339 static void test_not_member_allow_list(struct kvm_vm *vm) 340 { 341 struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); 342 uint64_t count; 343 344 remove_event(f, INTEL_BR_RETIRED); 345 remove_event(f, AMD_ZEN_BR_RETIRED); 346 count = test_with_filter(vm, f); 347 free(f); 348 if (count) 349 pr_info("%s: Branch instructions retired = %lu (expected 0)\n", 350 __func__, count); 351 TEST_ASSERT(!count, "Disallowed PMU Event is counting"); 352 } 353 354 /* 355 * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU. 356 * 357 * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs. 358 */ 359 static void test_pmu_config_disable(void (*guest_code)(void)) 360 { 361 int r; 362 struct kvm_vm *vm; 363 struct kvm_enable_cap cap = { 0 }; 364 365 r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY); 366 if (!(r & KVM_PMU_CAP_DISABLE)) 367 return; 368 369 vm = vm_create_without_vcpus(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES); 370 371 cap.cap = KVM_CAP_PMU_CAPABILITY; 372 cap.args[0] = KVM_PMU_CAP_DISABLE; 373 TEST_ASSERT(!vm_enable_cap(vm, &cap), "Failed to set KVM_PMU_CAP_DISABLE."); 374 375 vm_vcpu_add_default(vm, VCPU_ID, guest_code); 376 vm_init_descriptor_tables(vm); 377 vcpu_init_descriptor_tables(vm, VCPU_ID); 378 379 TEST_ASSERT(!sanity_check_pmu(vm), 380 "Guest should not be able to use disabled PMU."); 381 382 kvm_vm_free(vm); 383 } 384 385 /* 386 * Check for a non-zero PMU version, at least one general-purpose 387 * counter per logical processor, an EBX bit vector of length greater 388 * than 5, and EBX[5] clear. 389 */ 390 static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry) 391 { 392 union cpuid10_eax eax = { .full = entry->eax }; 393 union cpuid10_ebx ebx = { .full = entry->ebx }; 394 395 return eax.split.version_id && eax.split.num_counters > 0 && 396 eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED && 397 !ebx.split.no_branch_instruction_retired; 398 } 399 400 /* 401 * Note that CPUID leaf 0xa is Intel-specific. This leaf should be 402 * clear on AMD hardware. 403 */ 404 static bool use_intel_pmu(void) 405 { 406 struct kvm_cpuid_entry2 *entry; 407 408 entry = kvm_get_supported_cpuid_index(0xa, 0); 409 return is_intel_cpu() && entry && check_intel_pmu_leaf(entry); 410 } 411 412 static bool is_zen1(uint32_t eax) 413 { 414 return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f; 415 } 416 417 static bool is_zen2(uint32_t eax) 418 { 419 return x86_family(eax) == 0x17 && 420 x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f; 421 } 422 423 static bool is_zen3(uint32_t eax) 424 { 425 return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f; 426 } 427 428 /* 429 * Determining AMD support for a PMU event requires consulting the AMD 430 * PPR for the CPU or reference material derived therefrom. The AMD 431 * test code herein has been verified to work on Zen1, Zen2, and Zen3. 432 * 433 * Feel free to add more AMD CPUs that are documented to support event 434 * select 0xc2 umask 0 as "retired branch instructions." 435 */ 436 static bool use_amd_pmu(void) 437 { 438 struct kvm_cpuid_entry2 *entry; 439 440 entry = kvm_get_supported_cpuid_index(1, 0); 441 return is_amd_cpu() && entry && 442 (is_zen1(entry->eax) || 443 is_zen2(entry->eax) || 444 is_zen3(entry->eax)); 445 } 446 447 int main(int argc, char *argv[]) 448 { 449 void (*guest_code)(void) = NULL; 450 struct kvm_vm *vm; 451 int r; 452 453 /* Tell stdout not to buffer its content */ 454 setbuf(stdout, NULL); 455 456 r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER); 457 if (!r) { 458 print_skip("KVM_CAP_PMU_EVENT_FILTER not supported"); 459 exit(KSFT_SKIP); 460 } 461 462 if (use_intel_pmu()) 463 guest_code = intel_guest_code; 464 else if (use_amd_pmu()) 465 guest_code = amd_guest_code; 466 467 if (!guest_code) { 468 print_skip("Don't know how to test this guest PMU"); 469 exit(KSFT_SKIP); 470 } 471 472 vm = vm_create_default(VCPU_ID, 0, guest_code); 473 474 vm_init_descriptor_tables(vm); 475 vcpu_init_descriptor_tables(vm, VCPU_ID); 476 477 if (!sanity_check_pmu(vm)) { 478 print_skip("Guest PMU is not functional"); 479 exit(KSFT_SKIP); 480 } 481 482 if (use_amd_pmu()) 483 test_amd_deny_list(vm); 484 485 test_without_filter(vm); 486 test_member_deny_list(vm); 487 test_member_allow_list(vm); 488 test_not_member_deny_list(vm); 489 test_not_member_allow_list(vm); 490 491 kvm_vm_free(vm); 492 493 test_pmu_config_disable(guest_code); 494 495 return 0; 496 } 497