1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012,2013 - ARM Ltd 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 * 6 * Derived from arch/arm/kvm/reset.c 7 * Copyright (C) 2012 - Virtual Open Systems and Columbia University 8 * Author: Christoffer Dall <c.dall@virtualopensystems.com> 9 */ 10 11 #include <linux/errno.h> 12 #include <linux/kernel.h> 13 #include <linux/kvm_host.h> 14 #include <linux/kvm.h> 15 #include <linux/hw_breakpoint.h> 16 #include <linux/slab.h> 17 #include <linux/string.h> 18 #include <linux/types.h> 19 20 #include <kvm/arm_arch_timer.h> 21 22 #include <asm/cpufeature.h> 23 #include <asm/cputype.h> 24 #include <asm/fpsimd.h> 25 #include <asm/ptrace.h> 26 #include <asm/kvm_arm.h> 27 #include <asm/kvm_asm.h> 28 #include <asm/kvm_coproc.h> 29 #include <asm/kvm_emulate.h> 30 #include <asm/kvm_mmu.h> 31 #include <asm/virt.h> 32 33 /* Maximum phys_shift supported for any VM on this host */ 34 static u32 kvm_ipa_limit; 35 36 /* 37 * ARMv8 Reset Values 38 */ 39 static const struct kvm_regs default_regs_reset = { 40 .regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | 41 PSR_F_BIT | PSR_D_BIT), 42 }; 43 44 static const struct kvm_regs default_regs_reset32 = { 45 .regs.pstate = (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | 46 PSR_AA32_I_BIT | PSR_AA32_F_BIT), 47 }; 48 49 static bool cpu_has_32bit_el1(void) 50 { 51 u64 pfr0; 52 53 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); 54 return !!(pfr0 & 0x20); 55 } 56 57 /** 58 * kvm_arch_vm_ioctl_check_extension 59 * 60 * We currently assume that the number of HW registers is uniform 61 * across all CPUs (see cpuinfo_sanity_check). 62 */ 63 int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) 64 { 65 int r; 66 67 switch (ext) { 68 case KVM_CAP_ARM_EL1_32BIT: 69 r = cpu_has_32bit_el1(); 70 break; 71 case KVM_CAP_GUEST_DEBUG_HW_BPS: 72 r = get_num_brps(); 73 break; 74 case KVM_CAP_GUEST_DEBUG_HW_WPS: 75 r = get_num_wrps(); 76 break; 77 case KVM_CAP_ARM_PMU_V3: 78 r = kvm_arm_support_pmu_v3(); 79 break; 80 case KVM_CAP_ARM_INJECT_SERROR_ESR: 81 r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); 82 break; 83 case KVM_CAP_SET_GUEST_DEBUG: 84 case KVM_CAP_VCPU_ATTRIBUTES: 85 r = 1; 86 break; 87 case KVM_CAP_ARM_VM_IPA_SIZE: 88 r = kvm_ipa_limit; 89 break; 90 case KVM_CAP_ARM_SVE: 91 r = system_supports_sve(); 92 break; 93 case KVM_CAP_ARM_PTRAUTH_ADDRESS: 94 case KVM_CAP_ARM_PTRAUTH_GENERIC: 95 r = has_vhe() && system_supports_address_auth() && 96 system_supports_generic_auth(); 97 break; 98 default: 99 r = 0; 100 } 101 102 return r; 103 } 104 105 unsigned int kvm_sve_max_vl; 106 107 int kvm_arm_init_sve(void) 108 { 109 if (system_supports_sve()) { 110 kvm_sve_max_vl = sve_max_virtualisable_vl; 111 112 /* 113 * The get_sve_reg()/set_sve_reg() ioctl interface will need 114 * to be extended with multiple register slice support in 115 * order to support vector lengths greater than 116 * SVE_VL_ARCH_MAX: 117 */ 118 if (WARN_ON(kvm_sve_max_vl > SVE_VL_ARCH_MAX)) 119 kvm_sve_max_vl = SVE_VL_ARCH_MAX; 120 121 /* 122 * Don't even try to make use of vector lengths that 123 * aren't available on all CPUs, for now: 124 */ 125 if (kvm_sve_max_vl < sve_max_vl) 126 pr_warn("KVM: SVE vector length for guests limited to %u bytes\n", 127 kvm_sve_max_vl); 128 } 129 130 return 0; 131 } 132 133 static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu) 134 { 135 if (!system_supports_sve()) 136 return -EINVAL; 137 138 /* Verify that KVM startup enforced this when SVE was detected: */ 139 if (WARN_ON(!has_vhe())) 140 return -EINVAL; 141 142 vcpu->arch.sve_max_vl = kvm_sve_max_vl; 143 144 /* 145 * Userspace can still customize the vector lengths by writing 146 * KVM_REG_ARM64_SVE_VLS. Allocation is deferred until 147 * kvm_arm_vcpu_finalize(), which freezes the configuration. 148 */ 149 vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE; 150 151 return 0; 152 } 153 154 /* 155 * Finalize vcpu's maximum SVE vector length, allocating 156 * vcpu->arch.sve_state as necessary. 157 */ 158 static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) 159 { 160 void *buf; 161 unsigned int vl; 162 163 vl = vcpu->arch.sve_max_vl; 164 165 /* 166 * Resposibility for these properties is shared between 167 * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and 168 * set_sve_vls(). Double-check here just to be sure: 169 */ 170 if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl || 171 vl > SVE_VL_ARCH_MAX)) 172 return -EIO; 173 174 buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL); 175 if (!buf) 176 return -ENOMEM; 177 178 vcpu->arch.sve_state = buf; 179 vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED; 180 return 0; 181 } 182 183 int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature) 184 { 185 switch (feature) { 186 case KVM_ARM_VCPU_SVE: 187 if (!vcpu_has_sve(vcpu)) 188 return -EINVAL; 189 190 if (kvm_arm_vcpu_sve_finalized(vcpu)) 191 return -EPERM; 192 193 return kvm_vcpu_finalize_sve(vcpu); 194 } 195 196 return -EINVAL; 197 } 198 199 bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) 200 { 201 if (vcpu_has_sve(vcpu) && !kvm_arm_vcpu_sve_finalized(vcpu)) 202 return false; 203 204 return true; 205 } 206 207 void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) 208 { 209 kfree(vcpu->arch.sve_state); 210 } 211 212 static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) 213 { 214 if (vcpu_has_sve(vcpu)) 215 memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu)); 216 } 217 218 static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) 219 { 220 /* Support ptrauth only if the system supports these capabilities. */ 221 if (!has_vhe()) 222 return -EINVAL; 223 224 if (!system_supports_address_auth() || 225 !system_supports_generic_auth()) 226 return -EINVAL; 227 /* 228 * For now make sure that both address/generic pointer authentication 229 * features are requested by the userspace together. 230 */ 231 if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || 232 !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) 233 return -EINVAL; 234 235 vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH; 236 return 0; 237 } 238 239 /** 240 * kvm_reset_vcpu - sets core registers and sys_regs to reset value 241 * @vcpu: The VCPU pointer 242 * 243 * This function finds the right table above and sets the registers on 244 * the virtual CPU struct to their architecturally defined reset 245 * values, except for registers whose reset is deferred until 246 * kvm_arm_vcpu_finalize(). 247 * 248 * Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT 249 * ioctl or as part of handling a request issued by another VCPU in the PSCI 250 * handling code. In the first case, the VCPU will not be loaded, and in the 251 * second case the VCPU will be loaded. Because this function operates purely 252 * on the memory-backed valus of system registers, we want to do a full put if 253 * we were loaded (handling a request) and load the values back at the end of 254 * the function. Otherwise we leave the state alone. In both cases, we 255 * disable preemption around the vcpu reset as we would otherwise race with 256 * preempt notifiers which also call put/load. 257 */ 258 int kvm_reset_vcpu(struct kvm_vcpu *vcpu) 259 { 260 const struct kvm_regs *cpu_reset; 261 int ret = -EINVAL; 262 bool loaded; 263 264 /* Reset PMU outside of the non-preemptible section */ 265 kvm_pmu_vcpu_reset(vcpu); 266 267 preempt_disable(); 268 loaded = (vcpu->cpu != -1); 269 if (loaded) 270 kvm_arch_vcpu_put(vcpu); 271 272 if (!kvm_arm_vcpu_sve_finalized(vcpu)) { 273 if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) { 274 ret = kvm_vcpu_enable_sve(vcpu); 275 if (ret) 276 goto out; 277 } 278 } else { 279 kvm_vcpu_reset_sve(vcpu); 280 } 281 282 if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || 283 test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) { 284 if (kvm_vcpu_enable_ptrauth(vcpu)) 285 goto out; 286 } 287 288 switch (vcpu->arch.target) { 289 default: 290 if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { 291 if (!cpu_has_32bit_el1()) 292 goto out; 293 cpu_reset = &default_regs_reset32; 294 } else { 295 cpu_reset = &default_regs_reset; 296 } 297 298 break; 299 } 300 301 /* Reset core registers */ 302 memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset)); 303 304 /* Reset system registers */ 305 kvm_reset_sys_regs(vcpu); 306 307 /* 308 * Additional reset state handling that PSCI may have imposed on us. 309 * Must be done after all the sys_reg reset. 310 */ 311 if (vcpu->arch.reset_state.reset) { 312 unsigned long target_pc = vcpu->arch.reset_state.pc; 313 314 /* Gracefully handle Thumb2 entry point */ 315 if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) { 316 target_pc &= ~1UL; 317 vcpu_set_thumb(vcpu); 318 } 319 320 /* Propagate caller endianness */ 321 if (vcpu->arch.reset_state.be) 322 kvm_vcpu_set_be(vcpu); 323 324 *vcpu_pc(vcpu) = target_pc; 325 vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0); 326 327 vcpu->arch.reset_state.reset = false; 328 } 329 330 /* Default workaround setup is enabled (if supported) */ 331 if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL) 332 vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; 333 334 /* Reset timer */ 335 ret = kvm_timer_vcpu_reset(vcpu); 336 out: 337 if (loaded) 338 kvm_arch_vcpu_load(vcpu, smp_processor_id()); 339 preempt_enable(); 340 return ret; 341 } 342 343 void kvm_set_ipa_limit(void) 344 { 345 unsigned int ipa_max, pa_max, va_max, parange; 346 347 parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7; 348 pa_max = id_aa64mmfr0_parange_to_phys_shift(parange); 349 350 /* Clamp the IPA limit to the PA size supported by the kernel */ 351 ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max; 352 /* 353 * Since our stage2 table is dependent on the stage1 page table code, 354 * we must always honor the following condition: 355 * 356 * Number of levels in Stage1 >= Number of levels in Stage2. 357 * 358 * So clamp the ipa limit further down to limit the number of levels. 359 * Since we can concatenate upto 16 tables at entry level, we could 360 * go upto 4bits above the maximum VA addressible with the current 361 * number of levels. 362 */ 363 va_max = PGDIR_SHIFT + PAGE_SHIFT - 3; 364 va_max += 4; 365 366 if (va_max < ipa_max) 367 ipa_max = va_max; 368 369 /* 370 * If the final limit is lower than the real physical address 371 * limit of the CPUs, report the reason. 372 */ 373 if (ipa_max < pa_max) 374 pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n", 375 (va_max < pa_max) ? "Virtual" : "Physical"); 376 377 WARN(ipa_max < KVM_PHYS_SHIFT, 378 "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max); 379 kvm_ipa_limit = ipa_max; 380 kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit); 381 } 382 383 /* 384 * Configure the VTCR_EL2 for this VM. The VTCR value is common 385 * across all the physical CPUs on the system. We use system wide 386 * sanitised values to fill in different fields, except for Hardware 387 * Management of Access Flags. HA Flag is set unconditionally on 388 * all CPUs, as it is safe to run with or without the feature and 389 * the bit is RES0 on CPUs that don't support it. 390 */ 391 int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) 392 { 393 u64 vtcr = VTCR_EL2_FLAGS; 394 u32 parange, phys_shift; 395 u8 lvls; 396 397 if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) 398 return -EINVAL; 399 400 phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); 401 if (phys_shift) { 402 if (phys_shift > kvm_ipa_limit || 403 phys_shift < 32) 404 return -EINVAL; 405 } else { 406 phys_shift = KVM_PHYS_SHIFT; 407 } 408 409 parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7; 410 if (parange > ID_AA64MMFR0_PARANGE_MAX) 411 parange = ID_AA64MMFR0_PARANGE_MAX; 412 vtcr |= parange << VTCR_EL2_PS_SHIFT; 413 414 vtcr |= VTCR_EL2_T0SZ(phys_shift); 415 /* 416 * Use a minimum 2 level page table to prevent splitting 417 * host PMD huge pages at stage2. 418 */ 419 lvls = stage2_pgtable_levels(phys_shift); 420 if (lvls < 2) 421 lvls = 2; 422 vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); 423 424 /* 425 * Enable the Hardware Access Flag management, unconditionally 426 * on all CPUs. The features is RES0 on CPUs without the support 427 * and must be ignored by the CPUs. 428 */ 429 vtcr |= VTCR_EL2_HA; 430 431 /* Set the vmid bits */ 432 vtcr |= (kvm_get_vmid_bits() == 16) ? 433 VTCR_EL2_VS_16BIT : 434 VTCR_EL2_VS_8BIT; 435 kvm->arch.vtcr = vtcr; 436 return 0; 437 } 438