1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 - ARM Ltd 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #include <linux/arm-smccc.h> 8 #include <linux/preempt.h> 9 #include <linux/kvm_host.h> 10 #include <linux/uaccess.h> 11 #include <linux/wait.h> 12 13 #include <asm/cputype.h> 14 #include <asm/kvm_emulate.h> 15 16 #include <kvm/arm_psci.h> 17 #include <kvm/arm_hypercalls.h> 18 19 /* 20 * This is an implementation of the Power State Coordination Interface 21 * as described in ARM document number ARM DEN 0022A. 22 */ 23 24 #define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) 25 26 static unsigned long psci_affinity_mask(unsigned long affinity_level) 27 { 28 if (affinity_level <= 3) 29 return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); 30 31 return 0; 32 } 33 34 static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) 35 { 36 /* 37 * NOTE: For simplicity, we make VCPU suspend emulation to be 38 * same-as WFI (Wait-for-interrupt) emulation. 39 * 40 * This means for KVM the wakeup events are interrupts and 41 * this is consistent with intended use of StateID as described 42 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A). 43 * 44 * Further, we also treat power-down request to be same as 45 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2 46 * specification (ARM DEN 0022A). This means all suspend states 47 * for KVM will preserve the register state. 48 */ 49 kvm_vcpu_wfi(vcpu); 50 51 return PSCI_RET_SUCCESS; 52 } 53 54 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) 55 { 56 vcpu->arch.power_off = true; 57 kvm_make_request(KVM_REQ_SLEEP, vcpu); 58 kvm_vcpu_kick(vcpu); 59 } 60 61 static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu, 62 unsigned long affinity) 63 { 64 return !(affinity & ~MPIDR_HWID_BITMASK); 65 } 66 67 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) 68 { 69 struct vcpu_reset_state *reset_state; 70 struct kvm *kvm = source_vcpu->kvm; 71 struct kvm_vcpu *vcpu = NULL; 72 unsigned long cpu_id; 73 74 cpu_id = smccc_get_arg1(source_vcpu); 75 if (!kvm_psci_valid_affinity(source_vcpu, cpu_id)) 76 return PSCI_RET_INVALID_PARAMS; 77 78 vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id); 79 80 /* 81 * Make sure the caller requested a valid CPU and that the CPU is 82 * turned off. 83 */ 84 if (!vcpu) 85 return PSCI_RET_INVALID_PARAMS; 86 if (!vcpu->arch.power_off) { 87 if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1) 88 return PSCI_RET_ALREADY_ON; 89 else 90 return PSCI_RET_INVALID_PARAMS; 91 } 92 93 reset_state = &vcpu->arch.reset_state; 94 95 reset_state->pc = smccc_get_arg2(source_vcpu); 96 97 /* Propagate caller endianness */ 98 reset_state->be = kvm_vcpu_is_be(source_vcpu); 99 100 /* 101 * NOTE: We always update r0 (or x0) because for PSCI v0.1 102 * the general purpose registers are undefined upon CPU_ON. 103 */ 104 reset_state->r0 = smccc_get_arg3(source_vcpu); 105 106 WRITE_ONCE(reset_state->reset, true); 107 kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); 108 109 /* 110 * Make sure the reset request is observed if the change to 111 * power_off is observed. 112 */ 113 smp_wmb(); 114 115 vcpu->arch.power_off = false; 116 kvm_vcpu_wake_up(vcpu); 117 118 return PSCI_RET_SUCCESS; 119 } 120 121 static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) 122 { 123 int matching_cpus = 0; 124 unsigned long i, mpidr; 125 unsigned long target_affinity; 126 unsigned long target_affinity_mask; 127 unsigned long lowest_affinity_level; 128 struct kvm *kvm = vcpu->kvm; 129 struct kvm_vcpu *tmp; 130 131 target_affinity = smccc_get_arg1(vcpu); 132 lowest_affinity_level = smccc_get_arg2(vcpu); 133 134 if (!kvm_psci_valid_affinity(vcpu, target_affinity)) 135 return PSCI_RET_INVALID_PARAMS; 136 137 /* Determine target affinity mask */ 138 target_affinity_mask = psci_affinity_mask(lowest_affinity_level); 139 if (!target_affinity_mask) 140 return PSCI_RET_INVALID_PARAMS; 141 142 /* Ignore other bits of target affinity */ 143 target_affinity &= target_affinity_mask; 144 145 /* 146 * If one or more VCPU matching target affinity are running 147 * then ON else OFF 148 */ 149 kvm_for_each_vcpu(i, tmp, kvm) { 150 mpidr = kvm_vcpu_get_mpidr_aff(tmp); 151 if ((mpidr & target_affinity_mask) == target_affinity) { 152 matching_cpus++; 153 if (!tmp->arch.power_off) 154 return PSCI_0_2_AFFINITY_LEVEL_ON; 155 } 156 } 157 158 if (!matching_cpus) 159 return PSCI_RET_INVALID_PARAMS; 160 161 return PSCI_0_2_AFFINITY_LEVEL_OFF; 162 } 163 164 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) 165 { 166 unsigned long i; 167 struct kvm_vcpu *tmp; 168 169 /* 170 * The KVM ABI specifies that a system event exit may call KVM_RUN 171 * again and may perform shutdown/reboot at a later time that when the 172 * actual request is made. Since we are implementing PSCI and a 173 * caller of PSCI reboot and shutdown expects that the system shuts 174 * down or reboots immediately, let's make sure that VCPUs are not run 175 * after this call is handled and before the VCPUs have been 176 * re-initialized. 177 */ 178 kvm_for_each_vcpu(i, tmp, vcpu->kvm) 179 tmp->arch.power_off = true; 180 kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); 181 182 memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); 183 vcpu->run->system_event.type = type; 184 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; 185 } 186 187 static void kvm_psci_system_off(struct kvm_vcpu *vcpu) 188 { 189 kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); 190 } 191 192 static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) 193 { 194 kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); 195 } 196 197 static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) 198 { 199 int i; 200 201 /* 202 * Zero the input registers' upper 32 bits. They will be fully 203 * zeroed on exit, so we're fine changing them in place. 204 */ 205 for (i = 1; i < 4; i++) 206 vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i))); 207 } 208 209 static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn) 210 { 211 switch(fn) { 212 case PSCI_0_2_FN64_CPU_SUSPEND: 213 case PSCI_0_2_FN64_CPU_ON: 214 case PSCI_0_2_FN64_AFFINITY_INFO: 215 /* Disallow these functions for 32bit guests */ 216 if (vcpu_mode_is_32bit(vcpu)) 217 return PSCI_RET_NOT_SUPPORTED; 218 break; 219 } 220 221 return 0; 222 } 223 224 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) 225 { 226 struct kvm *kvm = vcpu->kvm; 227 u32 psci_fn = smccc_get_function(vcpu); 228 unsigned long val; 229 int ret = 1; 230 231 val = kvm_psci_check_allowed_function(vcpu, psci_fn); 232 if (val) 233 goto out; 234 235 switch (psci_fn) { 236 case PSCI_0_2_FN_PSCI_VERSION: 237 /* 238 * Bits[31:16] = Major Version = 0 239 * Bits[15:0] = Minor Version = 2 240 */ 241 val = KVM_ARM_PSCI_0_2; 242 break; 243 case PSCI_0_2_FN_CPU_SUSPEND: 244 case PSCI_0_2_FN64_CPU_SUSPEND: 245 val = kvm_psci_vcpu_suspend(vcpu); 246 break; 247 case PSCI_0_2_FN_CPU_OFF: 248 kvm_psci_vcpu_off(vcpu); 249 val = PSCI_RET_SUCCESS; 250 break; 251 case PSCI_0_2_FN_CPU_ON: 252 kvm_psci_narrow_to_32bit(vcpu); 253 fallthrough; 254 case PSCI_0_2_FN64_CPU_ON: 255 mutex_lock(&kvm->lock); 256 val = kvm_psci_vcpu_on(vcpu); 257 mutex_unlock(&kvm->lock); 258 break; 259 case PSCI_0_2_FN_AFFINITY_INFO: 260 kvm_psci_narrow_to_32bit(vcpu); 261 fallthrough; 262 case PSCI_0_2_FN64_AFFINITY_INFO: 263 val = kvm_psci_vcpu_affinity_info(vcpu); 264 break; 265 case PSCI_0_2_FN_MIGRATE_INFO_TYPE: 266 /* 267 * Trusted OS is MP hence does not require migration 268 * or 269 * Trusted OS is not present 270 */ 271 val = PSCI_0_2_TOS_MP; 272 break; 273 case PSCI_0_2_FN_SYSTEM_OFF: 274 kvm_psci_system_off(vcpu); 275 /* 276 * We shouldn't be going back to guest VCPU after 277 * receiving SYSTEM_OFF request. 278 * 279 * If user space accidentally/deliberately resumes 280 * guest VCPU after SYSTEM_OFF request then guest 281 * VCPU should see internal failure from PSCI return 282 * value. To achieve this, we preload r0 (or x0) with 283 * PSCI return value INTERNAL_FAILURE. 284 */ 285 val = PSCI_RET_INTERNAL_FAILURE; 286 ret = 0; 287 break; 288 case PSCI_0_2_FN_SYSTEM_RESET: 289 kvm_psci_system_reset(vcpu); 290 /* 291 * Same reason as SYSTEM_OFF for preloading r0 (or x0) 292 * with PSCI return value INTERNAL_FAILURE. 293 */ 294 val = PSCI_RET_INTERNAL_FAILURE; 295 ret = 0; 296 break; 297 default: 298 val = PSCI_RET_NOT_SUPPORTED; 299 break; 300 } 301 302 out: 303 smccc_set_retval(vcpu, val, 0, 0, 0); 304 return ret; 305 } 306 307 static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) 308 { 309 u32 psci_fn = smccc_get_function(vcpu); 310 u32 feature; 311 unsigned long val; 312 int ret = 1; 313 314 switch(psci_fn) { 315 case PSCI_0_2_FN_PSCI_VERSION: 316 val = KVM_ARM_PSCI_1_0; 317 break; 318 case PSCI_1_0_FN_PSCI_FEATURES: 319 feature = smccc_get_arg1(vcpu); 320 val = kvm_psci_check_allowed_function(vcpu, feature); 321 if (val) 322 break; 323 324 switch(feature) { 325 case PSCI_0_2_FN_PSCI_VERSION: 326 case PSCI_0_2_FN_CPU_SUSPEND: 327 case PSCI_0_2_FN64_CPU_SUSPEND: 328 case PSCI_0_2_FN_CPU_OFF: 329 case PSCI_0_2_FN_CPU_ON: 330 case PSCI_0_2_FN64_CPU_ON: 331 case PSCI_0_2_FN_AFFINITY_INFO: 332 case PSCI_0_2_FN64_AFFINITY_INFO: 333 case PSCI_0_2_FN_MIGRATE_INFO_TYPE: 334 case PSCI_0_2_FN_SYSTEM_OFF: 335 case PSCI_0_2_FN_SYSTEM_RESET: 336 case PSCI_1_0_FN_PSCI_FEATURES: 337 case ARM_SMCCC_VERSION_FUNC_ID: 338 val = 0; 339 break; 340 default: 341 val = PSCI_RET_NOT_SUPPORTED; 342 break; 343 } 344 break; 345 default: 346 return kvm_psci_0_2_call(vcpu); 347 } 348 349 smccc_set_retval(vcpu, val, 0, 0, 0); 350 return ret; 351 } 352 353 static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) 354 { 355 struct kvm *kvm = vcpu->kvm; 356 u32 psci_fn = smccc_get_function(vcpu); 357 unsigned long val; 358 359 switch (psci_fn) { 360 case KVM_PSCI_FN_CPU_OFF: 361 kvm_psci_vcpu_off(vcpu); 362 val = PSCI_RET_SUCCESS; 363 break; 364 case KVM_PSCI_FN_CPU_ON: 365 mutex_lock(&kvm->lock); 366 val = kvm_psci_vcpu_on(vcpu); 367 mutex_unlock(&kvm->lock); 368 break; 369 default: 370 val = PSCI_RET_NOT_SUPPORTED; 371 break; 372 } 373 374 smccc_set_retval(vcpu, val, 0, 0, 0); 375 return 1; 376 } 377 378 /** 379 * kvm_psci_call - handle PSCI call if r0 value is in range 380 * @vcpu: Pointer to the VCPU struct 381 * 382 * Handle PSCI calls from guests through traps from HVC instructions. 383 * The calling convention is similar to SMC calls to the secure world 384 * where the function number is placed in r0. 385 * 386 * This function returns: > 0 (success), 0 (success but exit to user 387 * space), and < 0 (errors) 388 * 389 * Errors: 390 * -EINVAL: Unrecognized PSCI function 391 */ 392 int kvm_psci_call(struct kvm_vcpu *vcpu) 393 { 394 switch (kvm_psci_version(vcpu, vcpu->kvm)) { 395 case KVM_ARM_PSCI_1_0: 396 return kvm_psci_1_0_call(vcpu); 397 case KVM_ARM_PSCI_0_2: 398 return kvm_psci_0_2_call(vcpu); 399 case KVM_ARM_PSCI_0_1: 400 return kvm_psci_0_1_call(vcpu); 401 default: 402 return -EINVAL; 403 }; 404 } 405 406 int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) 407 { 408 return 4; /* PSCI version and three workaround registers */ 409 } 410 411 int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) 412 { 413 if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++)) 414 return -EFAULT; 415 416 if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++)) 417 return -EFAULT; 418 419 if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++)) 420 return -EFAULT; 421 422 if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++)) 423 return -EFAULT; 424 425 return 0; 426 } 427 428 #define KVM_REG_FEATURE_LEVEL_WIDTH 4 429 #define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1) 430 431 /* 432 * Convert the workaround level into an easy-to-compare number, where higher 433 * values mean better protection. 434 */ 435 static int get_kernel_wa_level(u64 regid) 436 { 437 switch (regid) { 438 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: 439 switch (arm64_get_spectre_v2_state()) { 440 case SPECTRE_VULNERABLE: 441 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; 442 case SPECTRE_MITIGATED: 443 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL; 444 case SPECTRE_UNAFFECTED: 445 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED; 446 } 447 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; 448 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: 449 switch (arm64_get_spectre_v4_state()) { 450 case SPECTRE_MITIGATED: 451 /* 452 * As for the hypercall discovery, we pretend we 453 * don't have any FW mitigation if SSBS is there at 454 * all times. 455 */ 456 if (cpus_have_final_cap(ARM64_SSBS)) 457 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; 458 fallthrough; 459 case SPECTRE_UNAFFECTED: 460 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; 461 case SPECTRE_VULNERABLE: 462 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; 463 } 464 break; 465 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: 466 switch (arm64_get_spectre_bhb_state()) { 467 case SPECTRE_VULNERABLE: 468 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; 469 case SPECTRE_MITIGATED: 470 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL; 471 case SPECTRE_UNAFFECTED: 472 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED; 473 } 474 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; 475 } 476 477 return -EINVAL; 478 } 479 480 int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 481 { 482 void __user *uaddr = (void __user *)(long)reg->addr; 483 u64 val; 484 485 switch (reg->id) { 486 case KVM_REG_ARM_PSCI_VERSION: 487 val = kvm_psci_version(vcpu, vcpu->kvm); 488 break; 489 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: 490 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: 491 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: 492 val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; 493 break; 494 default: 495 return -ENOENT; 496 } 497 498 if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id))) 499 return -EFAULT; 500 501 return 0; 502 } 503 504 int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 505 { 506 void __user *uaddr = (void __user *)(long)reg->addr; 507 u64 val; 508 int wa_level; 509 510 if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id))) 511 return -EFAULT; 512 513 switch (reg->id) { 514 case KVM_REG_ARM_PSCI_VERSION: 515 { 516 bool wants_02; 517 518 wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features); 519 520 switch (val) { 521 case KVM_ARM_PSCI_0_1: 522 if (wants_02) 523 return -EINVAL; 524 vcpu->kvm->arch.psci_version = val; 525 return 0; 526 case KVM_ARM_PSCI_0_2: 527 case KVM_ARM_PSCI_1_0: 528 if (!wants_02) 529 return -EINVAL; 530 vcpu->kvm->arch.psci_version = val; 531 return 0; 532 } 533 break; 534 } 535 536 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: 537 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: 538 if (val & ~KVM_REG_FEATURE_LEVEL_MASK) 539 return -EINVAL; 540 541 if (get_kernel_wa_level(reg->id) < val) 542 return -EINVAL; 543 544 return 0; 545 546 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: 547 if (val & ~(KVM_REG_FEATURE_LEVEL_MASK | 548 KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED)) 549 return -EINVAL; 550 551 /* The enabled bit must not be set unless the level is AVAIL. */ 552 if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) && 553 (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL) 554 return -EINVAL; 555 556 /* 557 * Map all the possible incoming states to the only two we 558 * really want to deal with. 559 */ 560 switch (val & KVM_REG_FEATURE_LEVEL_MASK) { 561 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: 562 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: 563 wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; 564 break; 565 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: 566 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: 567 wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; 568 break; 569 default: 570 return -EINVAL; 571 } 572 573 /* 574 * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the 575 * other way around. 576 */ 577 if (get_kernel_wa_level(reg->id) < wa_level) 578 return -EINVAL; 579 580 return 0; 581 default: 582 return -ENOENT; 583 } 584 585 return -EINVAL; 586 } 587