1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 - ARM Ltd 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #include <linux/arm-smccc.h> 8 #include <linux/preempt.h> 9 #include <linux/kvm_host.h> 10 #include <linux/uaccess.h> 11 #include <linux/wait.h> 12 13 #include <asm/cputype.h> 14 #include <asm/kvm_emulate.h> 15 16 #include <kvm/arm_psci.h> 17 #include <kvm/arm_hypercalls.h> 18 19 /* 20 * This is an implementation of the Power State Coordination Interface 21 * as described in ARM document number ARM DEN 0022A. 22 */ 23 24 #define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) 25 26 static unsigned long psci_affinity_mask(unsigned long affinity_level) 27 { 28 if (affinity_level <= 3) 29 return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); 30 31 return 0; 32 } 33 34 static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) 35 { 36 /* 37 * NOTE: For simplicity, we make VCPU suspend emulation to be 38 * same-as WFI (Wait-for-interrupt) emulation. 39 * 40 * This means for KVM the wakeup events are interrupts and 41 * this is consistent with intended use of StateID as described 42 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A). 43 * 44 * Further, we also treat power-down request to be same as 45 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2 46 * specification (ARM DEN 0022A). This means all suspend states 47 * for KVM will preserve the register state. 48 */ 49 kvm_vcpu_block(vcpu); 50 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 51 52 return PSCI_RET_SUCCESS; 53 } 54 55 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) 56 { 57 vcpu->arch.power_off = true; 58 kvm_make_request(KVM_REQ_SLEEP, vcpu); 59 kvm_vcpu_kick(vcpu); 60 } 61 62 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) 63 { 64 struct vcpu_reset_state *reset_state; 65 struct kvm *kvm = source_vcpu->kvm; 66 struct kvm_vcpu *vcpu = NULL; 67 unsigned long cpu_id; 68 69 cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK; 70 if (vcpu_mode_is_32bit(source_vcpu)) 71 cpu_id &= ~((u32) 0); 72 73 vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id); 74 75 /* 76 * Make sure the caller requested a valid CPU and that the CPU is 77 * turned off. 78 */ 79 if (!vcpu) 80 return PSCI_RET_INVALID_PARAMS; 81 if (!vcpu->arch.power_off) { 82 if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1) 83 return PSCI_RET_ALREADY_ON; 84 else 85 return PSCI_RET_INVALID_PARAMS; 86 } 87 88 reset_state = &vcpu->arch.reset_state; 89 90 reset_state->pc = smccc_get_arg2(source_vcpu); 91 92 /* Propagate caller endianness */ 93 reset_state->be = kvm_vcpu_is_be(source_vcpu); 94 95 /* 96 * NOTE: We always update r0 (or x0) because for PSCI v0.1 97 * the general purpose registers are undefined upon CPU_ON. 98 */ 99 reset_state->r0 = smccc_get_arg3(source_vcpu); 100 101 WRITE_ONCE(reset_state->reset, true); 102 kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); 103 104 /* 105 * Make sure the reset request is observed if the change to 106 * power_state is observed. 107 */ 108 smp_wmb(); 109 110 vcpu->arch.power_off = false; 111 kvm_vcpu_wake_up(vcpu); 112 113 return PSCI_RET_SUCCESS; 114 } 115 116 static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) 117 { 118 int i, matching_cpus = 0; 119 unsigned long mpidr; 120 unsigned long target_affinity; 121 unsigned long target_affinity_mask; 122 unsigned long lowest_affinity_level; 123 struct kvm *kvm = vcpu->kvm; 124 struct kvm_vcpu *tmp; 125 126 target_affinity = smccc_get_arg1(vcpu); 127 lowest_affinity_level = smccc_get_arg2(vcpu); 128 129 /* Determine target affinity mask */ 130 target_affinity_mask = psci_affinity_mask(lowest_affinity_level); 131 if (!target_affinity_mask) 132 return PSCI_RET_INVALID_PARAMS; 133 134 /* Ignore other bits of target affinity */ 135 target_affinity &= target_affinity_mask; 136 137 /* 138 * If one or more VCPU matching target affinity are running 139 * then ON else OFF 140 */ 141 kvm_for_each_vcpu(i, tmp, kvm) { 142 mpidr = kvm_vcpu_get_mpidr_aff(tmp); 143 if ((mpidr & target_affinity_mask) == target_affinity) { 144 matching_cpus++; 145 if (!tmp->arch.power_off) 146 return PSCI_0_2_AFFINITY_LEVEL_ON; 147 } 148 } 149 150 if (!matching_cpus) 151 return PSCI_RET_INVALID_PARAMS; 152 153 return PSCI_0_2_AFFINITY_LEVEL_OFF; 154 } 155 156 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) 157 { 158 int i; 159 struct kvm_vcpu *tmp; 160 161 /* 162 * The KVM ABI specifies that a system event exit may call KVM_RUN 163 * again and may perform shutdown/reboot at a later time that when the 164 * actual request is made. Since we are implementing PSCI and a 165 * caller of PSCI reboot and shutdown expects that the system shuts 166 * down or reboots immediately, let's make sure that VCPUs are not run 167 * after this call is handled and before the VCPUs have been 168 * re-initialized. 169 */ 170 kvm_for_each_vcpu(i, tmp, vcpu->kvm) 171 tmp->arch.power_off = true; 172 kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); 173 174 memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); 175 vcpu->run->system_event.type = type; 176 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; 177 } 178 179 static void kvm_psci_system_off(struct kvm_vcpu *vcpu) 180 { 181 kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); 182 } 183 184 static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) 185 { 186 kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); 187 } 188 189 static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) 190 { 191 int i; 192 193 /* 194 * Zero the input registers' upper 32 bits. They will be fully 195 * zeroed on exit, so we're fine changing them in place. 196 */ 197 for (i = 1; i < 4; i++) 198 vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i))); 199 } 200 201 static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn) 202 { 203 switch(fn) { 204 case PSCI_0_2_FN64_CPU_SUSPEND: 205 case PSCI_0_2_FN64_CPU_ON: 206 case PSCI_0_2_FN64_AFFINITY_INFO: 207 /* Disallow these functions for 32bit guests */ 208 if (vcpu_mode_is_32bit(vcpu)) 209 return PSCI_RET_NOT_SUPPORTED; 210 break; 211 } 212 213 return 0; 214 } 215 216 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) 217 { 218 struct kvm *kvm = vcpu->kvm; 219 u32 psci_fn = smccc_get_function(vcpu); 220 unsigned long val; 221 int ret = 1; 222 223 val = kvm_psci_check_allowed_function(vcpu, psci_fn); 224 if (val) 225 goto out; 226 227 switch (psci_fn) { 228 case PSCI_0_2_FN_PSCI_VERSION: 229 /* 230 * Bits[31:16] = Major Version = 0 231 * Bits[15:0] = Minor Version = 2 232 */ 233 val = KVM_ARM_PSCI_0_2; 234 break; 235 case PSCI_0_2_FN_CPU_SUSPEND: 236 case PSCI_0_2_FN64_CPU_SUSPEND: 237 val = kvm_psci_vcpu_suspend(vcpu); 238 break; 239 case PSCI_0_2_FN_CPU_OFF: 240 kvm_psci_vcpu_off(vcpu); 241 val = PSCI_RET_SUCCESS; 242 break; 243 case PSCI_0_2_FN_CPU_ON: 244 kvm_psci_narrow_to_32bit(vcpu); 245 fallthrough; 246 case PSCI_0_2_FN64_CPU_ON: 247 mutex_lock(&kvm->lock); 248 val = kvm_psci_vcpu_on(vcpu); 249 mutex_unlock(&kvm->lock); 250 break; 251 case PSCI_0_2_FN_AFFINITY_INFO: 252 kvm_psci_narrow_to_32bit(vcpu); 253 fallthrough; 254 case PSCI_0_2_FN64_AFFINITY_INFO: 255 val = kvm_psci_vcpu_affinity_info(vcpu); 256 break; 257 case PSCI_0_2_FN_MIGRATE_INFO_TYPE: 258 /* 259 * Trusted OS is MP hence does not require migration 260 * or 261 * Trusted OS is not present 262 */ 263 val = PSCI_0_2_TOS_MP; 264 break; 265 case PSCI_0_2_FN_SYSTEM_OFF: 266 kvm_psci_system_off(vcpu); 267 /* 268 * We shouldn't be going back to guest VCPU after 269 * receiving SYSTEM_OFF request. 270 * 271 * If user space accidentally/deliberately resumes 272 * guest VCPU after SYSTEM_OFF request then guest 273 * VCPU should see internal failure from PSCI return 274 * value. To achieve this, we preload r0 (or x0) with 275 * PSCI return value INTERNAL_FAILURE. 276 */ 277 val = PSCI_RET_INTERNAL_FAILURE; 278 ret = 0; 279 break; 280 case PSCI_0_2_FN_SYSTEM_RESET: 281 kvm_psci_system_reset(vcpu); 282 /* 283 * Same reason as SYSTEM_OFF for preloading r0 (or x0) 284 * with PSCI return value INTERNAL_FAILURE. 285 */ 286 val = PSCI_RET_INTERNAL_FAILURE; 287 ret = 0; 288 break; 289 default: 290 val = PSCI_RET_NOT_SUPPORTED; 291 break; 292 } 293 294 out: 295 smccc_set_retval(vcpu, val, 0, 0, 0); 296 return ret; 297 } 298 299 static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) 300 { 301 u32 psci_fn = smccc_get_function(vcpu); 302 u32 feature; 303 unsigned long val; 304 int ret = 1; 305 306 switch(psci_fn) { 307 case PSCI_0_2_FN_PSCI_VERSION: 308 val = KVM_ARM_PSCI_1_0; 309 break; 310 case PSCI_1_0_FN_PSCI_FEATURES: 311 feature = smccc_get_arg1(vcpu); 312 val = kvm_psci_check_allowed_function(vcpu, feature); 313 if (val) 314 break; 315 316 switch(feature) { 317 case PSCI_0_2_FN_PSCI_VERSION: 318 case PSCI_0_2_FN_CPU_SUSPEND: 319 case PSCI_0_2_FN64_CPU_SUSPEND: 320 case PSCI_0_2_FN_CPU_OFF: 321 case PSCI_0_2_FN_CPU_ON: 322 case PSCI_0_2_FN64_CPU_ON: 323 case PSCI_0_2_FN_AFFINITY_INFO: 324 case PSCI_0_2_FN64_AFFINITY_INFO: 325 case PSCI_0_2_FN_MIGRATE_INFO_TYPE: 326 case PSCI_0_2_FN_SYSTEM_OFF: 327 case PSCI_0_2_FN_SYSTEM_RESET: 328 case PSCI_1_0_FN_PSCI_FEATURES: 329 case ARM_SMCCC_VERSION_FUNC_ID: 330 val = 0; 331 break; 332 default: 333 val = PSCI_RET_NOT_SUPPORTED; 334 break; 335 } 336 break; 337 default: 338 return kvm_psci_0_2_call(vcpu); 339 } 340 341 smccc_set_retval(vcpu, val, 0, 0, 0); 342 return ret; 343 } 344 345 static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) 346 { 347 struct kvm *kvm = vcpu->kvm; 348 u32 psci_fn = smccc_get_function(vcpu); 349 unsigned long val; 350 351 switch (psci_fn) { 352 case KVM_PSCI_FN_CPU_OFF: 353 kvm_psci_vcpu_off(vcpu); 354 val = PSCI_RET_SUCCESS; 355 break; 356 case KVM_PSCI_FN_CPU_ON: 357 mutex_lock(&kvm->lock); 358 val = kvm_psci_vcpu_on(vcpu); 359 mutex_unlock(&kvm->lock); 360 break; 361 default: 362 val = PSCI_RET_NOT_SUPPORTED; 363 break; 364 } 365 366 smccc_set_retval(vcpu, val, 0, 0, 0); 367 return 1; 368 } 369 370 /** 371 * kvm_psci_call - handle PSCI call if r0 value is in range 372 * @vcpu: Pointer to the VCPU struct 373 * 374 * Handle PSCI calls from guests through traps from HVC instructions. 375 * The calling convention is similar to SMC calls to the secure world 376 * where the function number is placed in r0. 377 * 378 * This function returns: > 0 (success), 0 (success but exit to user 379 * space), and < 0 (errors) 380 * 381 * Errors: 382 * -EINVAL: Unrecognized PSCI function 383 */ 384 int kvm_psci_call(struct kvm_vcpu *vcpu) 385 { 386 switch (kvm_psci_version(vcpu, vcpu->kvm)) { 387 case KVM_ARM_PSCI_1_0: 388 return kvm_psci_1_0_call(vcpu); 389 case KVM_ARM_PSCI_0_2: 390 return kvm_psci_0_2_call(vcpu); 391 case KVM_ARM_PSCI_0_1: 392 return kvm_psci_0_1_call(vcpu); 393 default: 394 return -EINVAL; 395 }; 396 } 397 398 int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) 399 { 400 return 3; /* PSCI version and two workaround registers */ 401 } 402 403 int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) 404 { 405 if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++)) 406 return -EFAULT; 407 408 if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++)) 409 return -EFAULT; 410 411 if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++)) 412 return -EFAULT; 413 414 return 0; 415 } 416 417 #define KVM_REG_FEATURE_LEVEL_WIDTH 4 418 #define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1) 419 420 /* 421 * Convert the workaround level into an easy-to-compare number, where higher 422 * values mean better protection. 423 */ 424 static int get_kernel_wa_level(u64 regid) 425 { 426 switch (regid) { 427 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: 428 switch (arm64_get_spectre_v2_state()) { 429 case SPECTRE_VULNERABLE: 430 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; 431 case SPECTRE_MITIGATED: 432 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL; 433 case SPECTRE_UNAFFECTED: 434 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED; 435 } 436 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; 437 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: 438 switch (arm64_get_spectre_v4_state()) { 439 case SPECTRE_MITIGATED: 440 /* 441 * As for the hypercall discovery, we pretend we 442 * don't have any FW mitigation if SSBS is there at 443 * all times. 444 */ 445 if (cpus_have_final_cap(ARM64_SSBS)) 446 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; 447 fallthrough; 448 case SPECTRE_UNAFFECTED: 449 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; 450 case SPECTRE_VULNERABLE: 451 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; 452 } 453 } 454 455 return -EINVAL; 456 } 457 458 int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 459 { 460 void __user *uaddr = (void __user *)(long)reg->addr; 461 u64 val; 462 463 switch (reg->id) { 464 case KVM_REG_ARM_PSCI_VERSION: 465 val = kvm_psci_version(vcpu, vcpu->kvm); 466 break; 467 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: 468 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: 469 val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; 470 break; 471 default: 472 return -ENOENT; 473 } 474 475 if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id))) 476 return -EFAULT; 477 478 return 0; 479 } 480 481 int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 482 { 483 void __user *uaddr = (void __user *)(long)reg->addr; 484 u64 val; 485 int wa_level; 486 487 if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id))) 488 return -EFAULT; 489 490 switch (reg->id) { 491 case KVM_REG_ARM_PSCI_VERSION: 492 { 493 bool wants_02; 494 495 wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features); 496 497 switch (val) { 498 case KVM_ARM_PSCI_0_1: 499 if (wants_02) 500 return -EINVAL; 501 vcpu->kvm->arch.psci_version = val; 502 return 0; 503 case KVM_ARM_PSCI_0_2: 504 case KVM_ARM_PSCI_1_0: 505 if (!wants_02) 506 return -EINVAL; 507 vcpu->kvm->arch.psci_version = val; 508 return 0; 509 } 510 break; 511 } 512 513 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: 514 if (val & ~KVM_REG_FEATURE_LEVEL_MASK) 515 return -EINVAL; 516 517 if (get_kernel_wa_level(reg->id) < val) 518 return -EINVAL; 519 520 return 0; 521 522 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: 523 if (val & ~(KVM_REG_FEATURE_LEVEL_MASK | 524 KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED)) 525 return -EINVAL; 526 527 /* The enabled bit must not be set unless the level is AVAIL. */ 528 if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) && 529 (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL) 530 return -EINVAL; 531 532 /* 533 * Map all the possible incoming states to the only two we 534 * really want to deal with. 535 */ 536 switch (val & KVM_REG_FEATURE_LEVEL_MASK) { 537 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: 538 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: 539 wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; 540 break; 541 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: 542 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: 543 wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; 544 break; 545 default: 546 return -EINVAL; 547 } 548 549 /* 550 * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the 551 * other way around. 552 */ 553 if (get_kernel_wa_level(reg->id) < wa_level) 554 return -EINVAL; 555 556 return 0; 557 default: 558 return -ENOENT; 559 } 560 561 return -EINVAL; 562 } 563