1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 - ARM Ltd 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #include <linux/arm-smccc.h> 8 #include <linux/preempt.h> 9 #include <linux/kvm_host.h> 10 #include <linux/uaccess.h> 11 #include <linux/wait.h> 12 13 #include <asm/cputype.h> 14 #include <asm/kvm_emulate.h> 15 16 #include <kvm/arm_psci.h> 17 #include <kvm/arm_hypercalls.h> 18 19 /* 20 * This is an implementation of the Power State Coordination Interface 21 * as described in ARM document number ARM DEN 0022A. 22 */ 23 24 #define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) 25 26 static unsigned long psci_affinity_mask(unsigned long affinity_level) 27 { 28 if (affinity_level <= 3) 29 return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); 30 31 return 0; 32 } 33 34 static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) 35 { 36 /* 37 * NOTE: For simplicity, we make VCPU suspend emulation to be 38 * same-as WFI (Wait-for-interrupt) emulation. 39 * 40 * This means for KVM the wakeup events are interrupts and 41 * this is consistent with intended use of StateID as described 42 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A). 43 * 44 * Further, we also treat power-down request to be same as 45 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2 46 * specification (ARM DEN 0022A). This means all suspend states 47 * for KVM will preserve the register state. 48 */ 49 kvm_vcpu_block(vcpu); 50 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 51 52 return PSCI_RET_SUCCESS; 53 } 54 55 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) 56 { 57 vcpu->arch.power_off = true; 58 kvm_make_request(KVM_REQ_SLEEP, vcpu); 59 kvm_vcpu_kick(vcpu); 60 } 61 62 static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu, 63 unsigned long affinity) 64 { 65 return !(affinity & ~MPIDR_HWID_BITMASK); 66 } 67 68 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) 69 { 70 struct vcpu_reset_state *reset_state; 71 struct kvm *kvm = source_vcpu->kvm; 72 struct kvm_vcpu *vcpu = NULL; 73 unsigned long cpu_id; 74 75 cpu_id = smccc_get_arg1(source_vcpu); 76 if (!kvm_psci_valid_affinity(source_vcpu, cpu_id)) 77 return PSCI_RET_INVALID_PARAMS; 78 79 vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id); 80 81 /* 82 * Make sure the caller requested a valid CPU and that the CPU is 83 * turned off. 84 */ 85 if (!vcpu) 86 return PSCI_RET_INVALID_PARAMS; 87 if (!vcpu->arch.power_off) { 88 if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1) 89 return PSCI_RET_ALREADY_ON; 90 else 91 return PSCI_RET_INVALID_PARAMS; 92 } 93 94 reset_state = &vcpu->arch.reset_state; 95 96 reset_state->pc = smccc_get_arg2(source_vcpu); 97 98 /* Propagate caller endianness */ 99 reset_state->be = kvm_vcpu_is_be(source_vcpu); 100 101 /* 102 * NOTE: We always update r0 (or x0) because for PSCI v0.1 103 * the general purpose registers are undefined upon CPU_ON. 104 */ 105 reset_state->r0 = smccc_get_arg3(source_vcpu); 106 107 WRITE_ONCE(reset_state->reset, true); 108 kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); 109 110 /* 111 * Make sure the reset request is observed if the change to 112 * power_state is observed. 113 */ 114 smp_wmb(); 115 116 vcpu->arch.power_off = false; 117 kvm_vcpu_wake_up(vcpu); 118 119 return PSCI_RET_SUCCESS; 120 } 121 122 static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) 123 { 124 int i, matching_cpus = 0; 125 unsigned long mpidr; 126 unsigned long target_affinity; 127 unsigned long target_affinity_mask; 128 unsigned long lowest_affinity_level; 129 struct kvm *kvm = vcpu->kvm; 130 struct kvm_vcpu *tmp; 131 132 target_affinity = smccc_get_arg1(vcpu); 133 lowest_affinity_level = smccc_get_arg2(vcpu); 134 135 if (!kvm_psci_valid_affinity(vcpu, target_affinity)) 136 return PSCI_RET_INVALID_PARAMS; 137 138 /* Determine target affinity mask */ 139 target_affinity_mask = psci_affinity_mask(lowest_affinity_level); 140 if (!target_affinity_mask) 141 return PSCI_RET_INVALID_PARAMS; 142 143 /* Ignore other bits of target affinity */ 144 target_affinity &= target_affinity_mask; 145 146 /* 147 * If one or more VCPU matching target affinity are running 148 * then ON else OFF 149 */ 150 kvm_for_each_vcpu(i, tmp, kvm) { 151 mpidr = kvm_vcpu_get_mpidr_aff(tmp); 152 if ((mpidr & target_affinity_mask) == target_affinity) { 153 matching_cpus++; 154 if (!tmp->arch.power_off) 155 return PSCI_0_2_AFFINITY_LEVEL_ON; 156 } 157 } 158 159 if (!matching_cpus) 160 return PSCI_RET_INVALID_PARAMS; 161 162 return PSCI_0_2_AFFINITY_LEVEL_OFF; 163 } 164 165 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) 166 { 167 int i; 168 struct kvm_vcpu *tmp; 169 170 /* 171 * The KVM ABI specifies that a system event exit may call KVM_RUN 172 * again and may perform shutdown/reboot at a later time that when the 173 * actual request is made. Since we are implementing PSCI and a 174 * caller of PSCI reboot and shutdown expects that the system shuts 175 * down or reboots immediately, let's make sure that VCPUs are not run 176 * after this call is handled and before the VCPUs have been 177 * re-initialized. 178 */ 179 kvm_for_each_vcpu(i, tmp, vcpu->kvm) 180 tmp->arch.power_off = true; 181 kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); 182 183 memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); 184 vcpu->run->system_event.type = type; 185 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; 186 } 187 188 static void kvm_psci_system_off(struct kvm_vcpu *vcpu) 189 { 190 kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); 191 } 192 193 static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) 194 { 195 kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); 196 } 197 198 static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) 199 { 200 int i; 201 202 /* 203 * Zero the input registers' upper 32 bits. They will be fully 204 * zeroed on exit, so we're fine changing them in place. 205 */ 206 for (i = 1; i < 4; i++) 207 vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i))); 208 } 209 210 static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn) 211 { 212 switch(fn) { 213 case PSCI_0_2_FN64_CPU_SUSPEND: 214 case PSCI_0_2_FN64_CPU_ON: 215 case PSCI_0_2_FN64_AFFINITY_INFO: 216 /* Disallow these functions for 32bit guests */ 217 if (vcpu_mode_is_32bit(vcpu)) 218 return PSCI_RET_NOT_SUPPORTED; 219 break; 220 } 221 222 return 0; 223 } 224 225 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) 226 { 227 struct kvm *kvm = vcpu->kvm; 228 u32 psci_fn = smccc_get_function(vcpu); 229 unsigned long val; 230 int ret = 1; 231 232 val = kvm_psci_check_allowed_function(vcpu, psci_fn); 233 if (val) 234 goto out; 235 236 switch (psci_fn) { 237 case PSCI_0_2_FN_PSCI_VERSION: 238 /* 239 * Bits[31:16] = Major Version = 0 240 * Bits[15:0] = Minor Version = 2 241 */ 242 val = KVM_ARM_PSCI_0_2; 243 break; 244 case PSCI_0_2_FN_CPU_SUSPEND: 245 case PSCI_0_2_FN64_CPU_SUSPEND: 246 val = kvm_psci_vcpu_suspend(vcpu); 247 break; 248 case PSCI_0_2_FN_CPU_OFF: 249 kvm_psci_vcpu_off(vcpu); 250 val = PSCI_RET_SUCCESS; 251 break; 252 case PSCI_0_2_FN_CPU_ON: 253 kvm_psci_narrow_to_32bit(vcpu); 254 fallthrough; 255 case PSCI_0_2_FN64_CPU_ON: 256 mutex_lock(&kvm->lock); 257 val = kvm_psci_vcpu_on(vcpu); 258 mutex_unlock(&kvm->lock); 259 break; 260 case PSCI_0_2_FN_AFFINITY_INFO: 261 kvm_psci_narrow_to_32bit(vcpu); 262 fallthrough; 263 case PSCI_0_2_FN64_AFFINITY_INFO: 264 val = kvm_psci_vcpu_affinity_info(vcpu); 265 break; 266 case PSCI_0_2_FN_MIGRATE_INFO_TYPE: 267 /* 268 * Trusted OS is MP hence does not require migration 269 * or 270 * Trusted OS is not present 271 */ 272 val = PSCI_0_2_TOS_MP; 273 break; 274 case PSCI_0_2_FN_SYSTEM_OFF: 275 kvm_psci_system_off(vcpu); 276 /* 277 * We shouldn't be going back to guest VCPU after 278 * receiving SYSTEM_OFF request. 279 * 280 * If user space accidentally/deliberately resumes 281 * guest VCPU after SYSTEM_OFF request then guest 282 * VCPU should see internal failure from PSCI return 283 * value. To achieve this, we preload r0 (or x0) with 284 * PSCI return value INTERNAL_FAILURE. 285 */ 286 val = PSCI_RET_INTERNAL_FAILURE; 287 ret = 0; 288 break; 289 case PSCI_0_2_FN_SYSTEM_RESET: 290 kvm_psci_system_reset(vcpu); 291 /* 292 * Same reason as SYSTEM_OFF for preloading r0 (or x0) 293 * with PSCI return value INTERNAL_FAILURE. 294 */ 295 val = PSCI_RET_INTERNAL_FAILURE; 296 ret = 0; 297 break; 298 default: 299 val = PSCI_RET_NOT_SUPPORTED; 300 break; 301 } 302 303 out: 304 smccc_set_retval(vcpu, val, 0, 0, 0); 305 return ret; 306 } 307 308 static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) 309 { 310 u32 psci_fn = smccc_get_function(vcpu); 311 u32 feature; 312 unsigned long val; 313 int ret = 1; 314 315 switch(psci_fn) { 316 case PSCI_0_2_FN_PSCI_VERSION: 317 val = KVM_ARM_PSCI_1_0; 318 break; 319 case PSCI_1_0_FN_PSCI_FEATURES: 320 feature = smccc_get_arg1(vcpu); 321 val = kvm_psci_check_allowed_function(vcpu, feature); 322 if (val) 323 break; 324 325 switch(feature) { 326 case PSCI_0_2_FN_PSCI_VERSION: 327 case PSCI_0_2_FN_CPU_SUSPEND: 328 case PSCI_0_2_FN64_CPU_SUSPEND: 329 case PSCI_0_2_FN_CPU_OFF: 330 case PSCI_0_2_FN_CPU_ON: 331 case PSCI_0_2_FN64_CPU_ON: 332 case PSCI_0_2_FN_AFFINITY_INFO: 333 case PSCI_0_2_FN64_AFFINITY_INFO: 334 case PSCI_0_2_FN_MIGRATE_INFO_TYPE: 335 case PSCI_0_2_FN_SYSTEM_OFF: 336 case PSCI_0_2_FN_SYSTEM_RESET: 337 case PSCI_1_0_FN_PSCI_FEATURES: 338 case ARM_SMCCC_VERSION_FUNC_ID: 339 val = 0; 340 break; 341 default: 342 val = PSCI_RET_NOT_SUPPORTED; 343 break; 344 } 345 break; 346 default: 347 return kvm_psci_0_2_call(vcpu); 348 } 349 350 smccc_set_retval(vcpu, val, 0, 0, 0); 351 return ret; 352 } 353 354 static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) 355 { 356 struct kvm *kvm = vcpu->kvm; 357 u32 psci_fn = smccc_get_function(vcpu); 358 unsigned long val; 359 360 switch (psci_fn) { 361 case KVM_PSCI_FN_CPU_OFF: 362 kvm_psci_vcpu_off(vcpu); 363 val = PSCI_RET_SUCCESS; 364 break; 365 case KVM_PSCI_FN_CPU_ON: 366 mutex_lock(&kvm->lock); 367 val = kvm_psci_vcpu_on(vcpu); 368 mutex_unlock(&kvm->lock); 369 break; 370 default: 371 val = PSCI_RET_NOT_SUPPORTED; 372 break; 373 } 374 375 smccc_set_retval(vcpu, val, 0, 0, 0); 376 return 1; 377 } 378 379 /** 380 * kvm_psci_call - handle PSCI call if r0 value is in range 381 * @vcpu: Pointer to the VCPU struct 382 * 383 * Handle PSCI calls from guests through traps from HVC instructions. 384 * The calling convention is similar to SMC calls to the secure world 385 * where the function number is placed in r0. 386 * 387 * This function returns: > 0 (success), 0 (success but exit to user 388 * space), and < 0 (errors) 389 * 390 * Errors: 391 * -EINVAL: Unrecognized PSCI function 392 */ 393 int kvm_psci_call(struct kvm_vcpu *vcpu) 394 { 395 switch (kvm_psci_version(vcpu, vcpu->kvm)) { 396 case KVM_ARM_PSCI_1_0: 397 return kvm_psci_1_0_call(vcpu); 398 case KVM_ARM_PSCI_0_2: 399 return kvm_psci_0_2_call(vcpu); 400 case KVM_ARM_PSCI_0_1: 401 return kvm_psci_0_1_call(vcpu); 402 default: 403 return -EINVAL; 404 }; 405 } 406 407 int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) 408 { 409 return 3; /* PSCI version and two workaround registers */ 410 } 411 412 int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) 413 { 414 if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++)) 415 return -EFAULT; 416 417 if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++)) 418 return -EFAULT; 419 420 if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++)) 421 return -EFAULT; 422 423 return 0; 424 } 425 426 #define KVM_REG_FEATURE_LEVEL_WIDTH 4 427 #define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1) 428 429 /* 430 * Convert the workaround level into an easy-to-compare number, where higher 431 * values mean better protection. 432 */ 433 static int get_kernel_wa_level(u64 regid) 434 { 435 switch (regid) { 436 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: 437 switch (arm64_get_spectre_v2_state()) { 438 case SPECTRE_VULNERABLE: 439 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; 440 case SPECTRE_MITIGATED: 441 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL; 442 case SPECTRE_UNAFFECTED: 443 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED; 444 } 445 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; 446 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: 447 switch (arm64_get_spectre_v4_state()) { 448 case SPECTRE_MITIGATED: 449 /* 450 * As for the hypercall discovery, we pretend we 451 * don't have any FW mitigation if SSBS is there at 452 * all times. 453 */ 454 if (cpus_have_final_cap(ARM64_SSBS)) 455 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; 456 fallthrough; 457 case SPECTRE_UNAFFECTED: 458 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; 459 case SPECTRE_VULNERABLE: 460 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; 461 } 462 } 463 464 return -EINVAL; 465 } 466 467 int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 468 { 469 void __user *uaddr = (void __user *)(long)reg->addr; 470 u64 val; 471 472 switch (reg->id) { 473 case KVM_REG_ARM_PSCI_VERSION: 474 val = kvm_psci_version(vcpu, vcpu->kvm); 475 break; 476 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: 477 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: 478 val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; 479 break; 480 default: 481 return -ENOENT; 482 } 483 484 if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id))) 485 return -EFAULT; 486 487 return 0; 488 } 489 490 int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 491 { 492 void __user *uaddr = (void __user *)(long)reg->addr; 493 u64 val; 494 int wa_level; 495 496 if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id))) 497 return -EFAULT; 498 499 switch (reg->id) { 500 case KVM_REG_ARM_PSCI_VERSION: 501 { 502 bool wants_02; 503 504 wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features); 505 506 switch (val) { 507 case KVM_ARM_PSCI_0_1: 508 if (wants_02) 509 return -EINVAL; 510 vcpu->kvm->arch.psci_version = val; 511 return 0; 512 case KVM_ARM_PSCI_0_2: 513 case KVM_ARM_PSCI_1_0: 514 if (!wants_02) 515 return -EINVAL; 516 vcpu->kvm->arch.psci_version = val; 517 return 0; 518 } 519 break; 520 } 521 522 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: 523 if (val & ~KVM_REG_FEATURE_LEVEL_MASK) 524 return -EINVAL; 525 526 if (get_kernel_wa_level(reg->id) < val) 527 return -EINVAL; 528 529 return 0; 530 531 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: 532 if (val & ~(KVM_REG_FEATURE_LEVEL_MASK | 533 KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED)) 534 return -EINVAL; 535 536 /* The enabled bit must not be set unless the level is AVAIL. */ 537 if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) && 538 (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL) 539 return -EINVAL; 540 541 /* 542 * Map all the possible incoming states to the only two we 543 * really want to deal with. 544 */ 545 switch (val & KVM_REG_FEATURE_LEVEL_MASK) { 546 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: 547 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: 548 wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; 549 break; 550 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: 551 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: 552 wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; 553 break; 554 default: 555 return -EINVAL; 556 } 557 558 /* 559 * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the 560 * other way around. 561 */ 562 if (get_kernel_wa_level(reg->id) < wa_level) 563 return -EINVAL; 564 565 return 0; 566 default: 567 return -ENOENT; 568 } 569 570 return -EINVAL; 571 } 572