1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Stand-alone page-table allocator for hyp stage-1 and guest stage-2. 4 * No bombay mix was harmed in the writing of this file. 5 * 6 * Copyright (C) 2020 Google LLC 7 * Author: Will Deacon <will@kernel.org> 8 */ 9 10 #include <linux/bitfield.h> 11 #include <asm/kvm_pgtable.h> 12 13 #define KVM_PGTABLE_MAX_LEVELS 4U 14 15 #define KVM_PTE_VALID BIT(0) 16 17 #define KVM_PTE_TYPE BIT(1) 18 #define KVM_PTE_TYPE_BLOCK 0 19 #define KVM_PTE_TYPE_PAGE 1 20 #define KVM_PTE_TYPE_TABLE 1 21 22 #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) 23 #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) 24 25 #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) 26 27 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) 28 #define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) 29 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3 30 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1 31 #define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) 32 #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 33 #define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) 34 35 #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) 36 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) 37 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) 38 #define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) 39 #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 40 #define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) 41 42 #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51) 43 44 #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) 45 46 #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) 47 48 struct kvm_pgtable_walk_data { 49 struct kvm_pgtable *pgt; 50 struct kvm_pgtable_walker *walker; 51 52 u64 addr; 53 u64 end; 54 }; 55 56 static u64 kvm_granule_shift(u32 level) 57 { 58 /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ 59 return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); 60 } 61 62 static u64 kvm_granule_size(u32 level) 63 { 64 return BIT(kvm_granule_shift(level)); 65 } 66 67 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) 68 { 69 u64 granule = kvm_granule_size(level); 70 71 /* 72 * Reject invalid block mappings and don't bother with 4TB mappings for 73 * 52-bit PAs. 74 */ 75 if (level == 0 || (PAGE_SIZE != SZ_4K && level == 1)) 76 return false; 77 78 if (granule > (end - addr)) 79 return false; 80 81 return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule); 82 } 83 84 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) 85 { 86 u64 shift = kvm_granule_shift(level); 87 u64 mask = BIT(PAGE_SHIFT - 3) - 1; 88 89 return (data->addr >> shift) & mask; 90 } 91 92 static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) 93 { 94 u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ 95 u64 mask = BIT(pgt->ia_bits) - 1; 96 97 return (addr & mask) >> shift; 98 } 99 100 static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data) 101 { 102 return __kvm_pgd_page_idx(data->pgt, data->addr); 103 } 104 105 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) 106 { 107 struct kvm_pgtable pgt = { 108 .ia_bits = ia_bits, 109 .start_level = start_level, 110 }; 111 112 return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; 113 } 114 115 static bool kvm_pte_valid(kvm_pte_t pte) 116 { 117 return pte & KVM_PTE_VALID; 118 } 119 120 static bool kvm_pte_table(kvm_pte_t pte, u32 level) 121 { 122 if (level == KVM_PGTABLE_MAX_LEVELS - 1) 123 return false; 124 125 if (!kvm_pte_valid(pte)) 126 return false; 127 128 return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; 129 } 130 131 static u64 kvm_pte_to_phys(kvm_pte_t pte) 132 { 133 u64 pa = pte & KVM_PTE_ADDR_MASK; 134 135 if (PAGE_SHIFT == 16) 136 pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; 137 138 return pa; 139 } 140 141 static kvm_pte_t kvm_phys_to_pte(u64 pa) 142 { 143 kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; 144 145 if (PAGE_SHIFT == 16) 146 pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); 147 148 return pte; 149 } 150 151 static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte) 152 { 153 return __va(kvm_pte_to_phys(pte)); 154 } 155 156 static void kvm_set_invalid_pte(kvm_pte_t *ptep) 157 { 158 kvm_pte_t pte = *ptep; 159 WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID); 160 } 161 162 static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp) 163 { 164 kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(__pa(childp)); 165 166 pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE); 167 pte |= KVM_PTE_VALID; 168 169 WARN_ON(kvm_pte_valid(old)); 170 smp_store_release(ptep, pte); 171 } 172 173 static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr, 174 u32 level) 175 { 176 kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(pa); 177 u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE : 178 KVM_PTE_TYPE_BLOCK; 179 180 pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI); 181 pte |= FIELD_PREP(KVM_PTE_TYPE, type); 182 pte |= KVM_PTE_VALID; 183 184 /* Tolerate KVM recreating the exact same mapping. */ 185 if (kvm_pte_valid(old)) 186 return old == pte; 187 188 smp_store_release(ptep, pte); 189 return true; 190 } 191 192 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr, 193 u32 level, kvm_pte_t *ptep, 194 enum kvm_pgtable_walk_flags flag) 195 { 196 struct kvm_pgtable_walker *walker = data->walker; 197 return walker->cb(addr, data->end, level, ptep, flag, walker->arg); 198 } 199 200 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, 201 kvm_pte_t *pgtable, u32 level); 202 203 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, 204 kvm_pte_t *ptep, u32 level) 205 { 206 int ret = 0; 207 u64 addr = data->addr; 208 kvm_pte_t *childp, pte = *ptep; 209 bool table = kvm_pte_table(pte, level); 210 enum kvm_pgtable_walk_flags flags = data->walker->flags; 211 212 if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) { 213 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, 214 KVM_PGTABLE_WALK_TABLE_PRE); 215 } 216 217 if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) { 218 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, 219 KVM_PGTABLE_WALK_LEAF); 220 pte = *ptep; 221 table = kvm_pte_table(pte, level); 222 } 223 224 if (ret) 225 goto out; 226 227 if (!table) { 228 data->addr += kvm_granule_size(level); 229 goto out; 230 } 231 232 childp = kvm_pte_follow(pte); 233 ret = __kvm_pgtable_walk(data, childp, level + 1); 234 if (ret) 235 goto out; 236 237 if (flags & KVM_PGTABLE_WALK_TABLE_POST) { 238 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, 239 KVM_PGTABLE_WALK_TABLE_POST); 240 } 241 242 out: 243 return ret; 244 } 245 246 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, 247 kvm_pte_t *pgtable, u32 level) 248 { 249 u32 idx; 250 int ret = 0; 251 252 if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS)) 253 return -EINVAL; 254 255 for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { 256 kvm_pte_t *ptep = &pgtable[idx]; 257 258 if (data->addr >= data->end) 259 break; 260 261 ret = __kvm_pgtable_visit(data, ptep, level); 262 if (ret) 263 break; 264 } 265 266 return ret; 267 } 268 269 static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) 270 { 271 u32 idx; 272 int ret = 0; 273 struct kvm_pgtable *pgt = data->pgt; 274 u64 limit = BIT(pgt->ia_bits); 275 276 if (data->addr > limit || data->end > limit) 277 return -ERANGE; 278 279 if (!pgt->pgd) 280 return -EINVAL; 281 282 for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) { 283 kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; 284 285 ret = __kvm_pgtable_walk(data, ptep, pgt->start_level); 286 if (ret) 287 break; 288 } 289 290 return ret; 291 } 292 293 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, 294 struct kvm_pgtable_walker *walker) 295 { 296 struct kvm_pgtable_walk_data walk_data = { 297 .pgt = pgt, 298 .addr = ALIGN_DOWN(addr, PAGE_SIZE), 299 .end = PAGE_ALIGN(walk_data.addr + size), 300 .walker = walker, 301 }; 302 303 return _kvm_pgtable_walk(&walk_data); 304 } 305 306 struct hyp_map_data { 307 u64 phys; 308 kvm_pte_t attr; 309 }; 310 311 static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot, 312 struct hyp_map_data *data) 313 { 314 bool device = prot & KVM_PGTABLE_PROT_DEVICE; 315 u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL; 316 kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype); 317 u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS; 318 u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW : 319 KVM_PTE_LEAF_ATTR_LO_S1_AP_RO; 320 321 if (!(prot & KVM_PGTABLE_PROT_R)) 322 return -EINVAL; 323 324 if (prot & KVM_PGTABLE_PROT_X) { 325 if (prot & KVM_PGTABLE_PROT_W) 326 return -EINVAL; 327 328 if (device) 329 return -EINVAL; 330 } else { 331 attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; 332 } 333 334 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); 335 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); 336 attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; 337 data->attr = attr; 338 return 0; 339 } 340 341 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, 342 kvm_pte_t *ptep, struct hyp_map_data *data) 343 { 344 u64 granule = kvm_granule_size(level), phys = data->phys; 345 346 if (!kvm_block_mapping_supported(addr, end, phys, level)) 347 return false; 348 349 WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)); 350 data->phys += granule; 351 return true; 352 } 353 354 static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 355 enum kvm_pgtable_walk_flags flag, void * const arg) 356 { 357 kvm_pte_t *childp; 358 359 if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg)) 360 return 0; 361 362 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) 363 return -EINVAL; 364 365 childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); 366 if (!childp) 367 return -ENOMEM; 368 369 kvm_set_table_pte(ptep, childp); 370 return 0; 371 } 372 373 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, 374 enum kvm_pgtable_prot prot) 375 { 376 int ret; 377 struct hyp_map_data map_data = { 378 .phys = ALIGN_DOWN(phys, PAGE_SIZE), 379 }; 380 struct kvm_pgtable_walker walker = { 381 .cb = hyp_map_walker, 382 .flags = KVM_PGTABLE_WALK_LEAF, 383 .arg = &map_data, 384 }; 385 386 ret = hyp_map_set_prot_attr(prot, &map_data); 387 if (ret) 388 return ret; 389 390 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 391 dsb(ishst); 392 isb(); 393 return ret; 394 } 395 396 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits) 397 { 398 u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); 399 400 pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); 401 if (!pgt->pgd) 402 return -ENOMEM; 403 404 pgt->ia_bits = va_bits; 405 pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; 406 pgt->mmu = NULL; 407 return 0; 408 } 409 410 static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 411 enum kvm_pgtable_walk_flags flag, void * const arg) 412 { 413 free_page((unsigned long)kvm_pte_follow(*ptep)); 414 return 0; 415 } 416 417 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) 418 { 419 struct kvm_pgtable_walker walker = { 420 .cb = hyp_free_walker, 421 .flags = KVM_PGTABLE_WALK_TABLE_POST, 422 }; 423 424 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); 425 free_page((unsigned long)pgt->pgd); 426 pgt->pgd = NULL; 427 } 428 429 struct stage2_map_data { 430 u64 phys; 431 kvm_pte_t attr; 432 433 kvm_pte_t *anchor; 434 435 struct kvm_s2_mmu *mmu; 436 struct kvm_mmu_memory_cache *memcache; 437 }; 438 439 static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, 440 struct stage2_map_data *data) 441 { 442 bool device = prot & KVM_PGTABLE_PROT_DEVICE; 443 kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) : 444 PAGE_S2_MEMATTR(NORMAL); 445 u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; 446 447 if (!(prot & KVM_PGTABLE_PROT_X)) 448 attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 449 else if (device) 450 return -EINVAL; 451 452 if (prot & KVM_PGTABLE_PROT_R) 453 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; 454 455 if (prot & KVM_PGTABLE_PROT_W) 456 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 457 458 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); 459 attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; 460 data->attr = attr; 461 return 0; 462 } 463 464 static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, 465 kvm_pte_t *ptep, 466 struct stage2_map_data *data) 467 { 468 u64 granule = kvm_granule_size(level), phys = data->phys; 469 470 if (!kvm_block_mapping_supported(addr, end, phys, level)) 471 return false; 472 473 /* 474 * If the PTE was already valid, drop the refcount on the table 475 * early, as it will be bumped-up again in stage2_map_walk_leaf(). 476 * This ensures that the refcount stays constant across a valid to 477 * valid PTE update. 478 */ 479 if (kvm_pte_valid(*ptep)) 480 put_page(virt_to_page(ptep)); 481 482 if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)) 483 goto out; 484 485 /* There's an existing valid leaf entry, so perform break-before-make */ 486 kvm_set_invalid_pte(ptep); 487 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); 488 kvm_set_valid_leaf_pte(ptep, phys, data->attr, level); 489 out: 490 data->phys += granule; 491 return true; 492 } 493 494 static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, 495 kvm_pte_t *ptep, 496 struct stage2_map_data *data) 497 { 498 if (data->anchor) 499 return 0; 500 501 if (!kvm_block_mapping_supported(addr, end, data->phys, level)) 502 return 0; 503 504 kvm_set_invalid_pte(ptep); 505 506 /* 507 * Invalidate the whole stage-2, as we may have numerous leaf 508 * entries below us which would otherwise need invalidating 509 * individually. 510 */ 511 kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); 512 data->anchor = ptep; 513 return 0; 514 } 515 516 static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 517 struct stage2_map_data *data) 518 { 519 kvm_pte_t *childp, pte = *ptep; 520 struct page *page = virt_to_page(ptep); 521 522 if (data->anchor) { 523 if (kvm_pte_valid(pte)) 524 put_page(page); 525 526 return 0; 527 } 528 529 if (stage2_map_walker_try_leaf(addr, end, level, ptep, data)) 530 goto out_get_page; 531 532 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) 533 return -EINVAL; 534 535 if (!data->memcache) 536 return -ENOMEM; 537 538 childp = kvm_mmu_memory_cache_alloc(data->memcache); 539 if (!childp) 540 return -ENOMEM; 541 542 /* 543 * If we've run into an existing block mapping then replace it with 544 * a table. Accesses beyond 'end' that fall within the new table 545 * will be mapped lazily. 546 */ 547 if (kvm_pte_valid(pte)) { 548 kvm_set_invalid_pte(ptep); 549 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); 550 put_page(page); 551 } 552 553 kvm_set_table_pte(ptep, childp); 554 555 out_get_page: 556 get_page(page); 557 return 0; 558 } 559 560 static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, 561 kvm_pte_t *ptep, 562 struct stage2_map_data *data) 563 { 564 int ret = 0; 565 566 if (!data->anchor) 567 return 0; 568 569 free_page((unsigned long)kvm_pte_follow(*ptep)); 570 put_page(virt_to_page(ptep)); 571 572 if (data->anchor == ptep) { 573 data->anchor = NULL; 574 ret = stage2_map_walk_leaf(addr, end, level, ptep, data); 575 } 576 577 return ret; 578 } 579 580 /* 581 * This is a little fiddly, as we use all three of the walk flags. The idea 582 * is that the TABLE_PRE callback runs for table entries on the way down, 583 * looking for table entries which we could conceivably replace with a 584 * block entry for this mapping. If it finds one, then it sets the 'anchor' 585 * field in 'struct stage2_map_data' to point at the table entry, before 586 * clearing the entry to zero and descending into the now detached table. 587 * 588 * The behaviour of the LEAF callback then depends on whether or not the 589 * anchor has been set. If not, then we're not using a block mapping higher 590 * up the table and we perform the mapping at the existing leaves instead. 591 * If, on the other hand, the anchor _is_ set, then we drop references to 592 * all valid leaves so that the pages beneath the anchor can be freed. 593 * 594 * Finally, the TABLE_POST callback does nothing if the anchor has not 595 * been set, but otherwise frees the page-table pages while walking back up 596 * the page-table, installing the block entry when it revisits the anchor 597 * pointer and clearing the anchor to NULL. 598 */ 599 static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 600 enum kvm_pgtable_walk_flags flag, void * const arg) 601 { 602 struct stage2_map_data *data = arg; 603 604 switch (flag) { 605 case KVM_PGTABLE_WALK_TABLE_PRE: 606 return stage2_map_walk_table_pre(addr, end, level, ptep, data); 607 case KVM_PGTABLE_WALK_LEAF: 608 return stage2_map_walk_leaf(addr, end, level, ptep, data); 609 case KVM_PGTABLE_WALK_TABLE_POST: 610 return stage2_map_walk_table_post(addr, end, level, ptep, data); 611 } 612 613 return -EINVAL; 614 } 615 616 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, 617 u64 phys, enum kvm_pgtable_prot prot, 618 struct kvm_mmu_memory_cache *mc) 619 { 620 int ret; 621 struct stage2_map_data map_data = { 622 .phys = ALIGN_DOWN(phys, PAGE_SIZE), 623 .mmu = pgt->mmu, 624 .memcache = mc, 625 }; 626 struct kvm_pgtable_walker walker = { 627 .cb = stage2_map_walker, 628 .flags = KVM_PGTABLE_WALK_TABLE_PRE | 629 KVM_PGTABLE_WALK_LEAF | 630 KVM_PGTABLE_WALK_TABLE_POST, 631 .arg = &map_data, 632 }; 633 634 ret = stage2_map_set_prot_attr(prot, &map_data); 635 if (ret) 636 return ret; 637 638 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 639 dsb(ishst); 640 return ret; 641 } 642 643 static void stage2_flush_dcache(void *addr, u64 size) 644 { 645 if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) 646 return; 647 648 __flush_dcache_area(addr, size); 649 } 650 651 static bool stage2_pte_cacheable(kvm_pte_t pte) 652 { 653 u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; 654 return memattr == PAGE_S2_MEMATTR(NORMAL); 655 } 656 657 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 658 enum kvm_pgtable_walk_flags flag, 659 void * const arg) 660 { 661 struct kvm_s2_mmu *mmu = arg; 662 kvm_pte_t pte = *ptep, *childp = NULL; 663 bool need_flush = false; 664 665 if (!kvm_pte_valid(pte)) 666 return 0; 667 668 if (kvm_pte_table(pte, level)) { 669 childp = kvm_pte_follow(pte); 670 671 if (page_count(virt_to_page(childp)) != 1) 672 return 0; 673 } else if (stage2_pte_cacheable(pte)) { 674 need_flush = true; 675 } 676 677 /* 678 * This is similar to the map() path in that we unmap the entire 679 * block entry and rely on the remaining portions being faulted 680 * back lazily. 681 */ 682 kvm_set_invalid_pte(ptep); 683 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); 684 put_page(virt_to_page(ptep)); 685 686 if (need_flush) { 687 stage2_flush_dcache(kvm_pte_follow(pte), 688 kvm_granule_size(level)); 689 } 690 691 if (childp) 692 free_page((unsigned long)childp); 693 694 return 0; 695 } 696 697 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) 698 { 699 struct kvm_pgtable_walker walker = { 700 .cb = stage2_unmap_walker, 701 .arg = pgt->mmu, 702 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, 703 }; 704 705 return kvm_pgtable_walk(pgt, addr, size, &walker); 706 } 707 708 struct stage2_attr_data { 709 kvm_pte_t attr_set; 710 kvm_pte_t attr_clr; 711 kvm_pte_t pte; 712 u32 level; 713 }; 714 715 static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 716 enum kvm_pgtable_walk_flags flag, 717 void * const arg) 718 { 719 kvm_pte_t pte = *ptep; 720 struct stage2_attr_data *data = arg; 721 722 if (!kvm_pte_valid(pte)) 723 return 0; 724 725 data->level = level; 726 data->pte = pte; 727 pte &= ~data->attr_clr; 728 pte |= data->attr_set; 729 730 /* 731 * We may race with the CPU trying to set the access flag here, 732 * but worst-case the access flag update gets lost and will be 733 * set on the next access instead. 734 */ 735 if (data->pte != pte) 736 WRITE_ONCE(*ptep, pte); 737 738 return 0; 739 } 740 741 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, 742 u64 size, kvm_pte_t attr_set, 743 kvm_pte_t attr_clr, kvm_pte_t *orig_pte, 744 u32 *level) 745 { 746 int ret; 747 kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; 748 struct stage2_attr_data data = { 749 .attr_set = attr_set & attr_mask, 750 .attr_clr = attr_clr & attr_mask, 751 }; 752 struct kvm_pgtable_walker walker = { 753 .cb = stage2_attr_walker, 754 .arg = &data, 755 .flags = KVM_PGTABLE_WALK_LEAF, 756 }; 757 758 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 759 if (ret) 760 return ret; 761 762 if (orig_pte) 763 *orig_pte = data.pte; 764 765 if (level) 766 *level = data.level; 767 return 0; 768 } 769 770 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) 771 { 772 return stage2_update_leaf_attrs(pgt, addr, size, 0, 773 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, 774 NULL, NULL); 775 } 776 777 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) 778 { 779 kvm_pte_t pte = 0; 780 stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, 781 &pte, NULL); 782 dsb(ishst); 783 return pte; 784 } 785 786 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) 787 { 788 kvm_pte_t pte = 0; 789 stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF, 790 &pte, NULL); 791 /* 792 * "But where's the TLBI?!", you scream. 793 * "Over in the core code", I sigh. 794 * 795 * See the '->clear_flush_young()' callback on the KVM mmu notifier. 796 */ 797 return pte; 798 } 799 800 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr) 801 { 802 kvm_pte_t pte = 0; 803 stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL); 804 return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF; 805 } 806 807 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, 808 enum kvm_pgtable_prot prot) 809 { 810 int ret; 811 u32 level; 812 kvm_pte_t set = 0, clr = 0; 813 814 if (prot & KVM_PGTABLE_PROT_R) 815 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; 816 817 if (prot & KVM_PGTABLE_PROT_W) 818 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 819 820 if (prot & KVM_PGTABLE_PROT_X) 821 clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 822 823 ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level); 824 if (!ret) 825 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); 826 return ret; 827 } 828 829 static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 830 enum kvm_pgtable_walk_flags flag, 831 void * const arg) 832 { 833 kvm_pte_t pte = *ptep; 834 835 if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte)) 836 return 0; 837 838 stage2_flush_dcache(kvm_pte_follow(pte), kvm_granule_size(level)); 839 return 0; 840 } 841 842 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) 843 { 844 struct kvm_pgtable_walker walker = { 845 .cb = stage2_flush_walker, 846 .flags = KVM_PGTABLE_WALK_LEAF, 847 }; 848 849 if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) 850 return 0; 851 852 return kvm_pgtable_walk(pgt, addr, size, &walker); 853 } 854 855 int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm) 856 { 857 size_t pgd_sz; 858 u64 vtcr = kvm->arch.vtcr; 859 u32 ia_bits = VTCR_EL2_IPA(vtcr); 860 u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); 861 u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; 862 863 pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; 864 pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO); 865 if (!pgt->pgd) 866 return -ENOMEM; 867 868 pgt->ia_bits = ia_bits; 869 pgt->start_level = start_level; 870 pgt->mmu = &kvm->arch.mmu; 871 872 /* Ensure zeroed PGD pages are visible to the hardware walker */ 873 dsb(ishst); 874 return 0; 875 } 876 877 static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 878 enum kvm_pgtable_walk_flags flag, 879 void * const arg) 880 { 881 kvm_pte_t pte = *ptep; 882 883 if (!kvm_pte_valid(pte)) 884 return 0; 885 886 put_page(virt_to_page(ptep)); 887 888 if (kvm_pte_table(pte, level)) 889 free_page((unsigned long)kvm_pte_follow(pte)); 890 891 return 0; 892 } 893 894 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 895 { 896 size_t pgd_sz; 897 struct kvm_pgtable_walker walker = { 898 .cb = stage2_free_walker, 899 .flags = KVM_PGTABLE_WALK_LEAF | 900 KVM_PGTABLE_WALK_TABLE_POST, 901 }; 902 903 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); 904 pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; 905 free_pages_exact(pgt->pgd, pgd_sz); 906 pgt->pgd = NULL; 907 } 908