1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Stand-alone page-table allocator for hyp stage-1 and guest stage-2. 4 * No bombay mix was harmed in the writing of this file. 5 * 6 * Copyright (C) 2020 Google LLC 7 * Author: Will Deacon <will@kernel.org> 8 */ 9 10 #include <linux/bitfield.h> 11 #include <asm/kvm_pgtable.h> 12 13 #define KVM_PGTABLE_MAX_LEVELS 4U 14 15 #define KVM_PTE_VALID BIT(0) 16 17 #define KVM_PTE_TYPE BIT(1) 18 #define KVM_PTE_TYPE_BLOCK 0 19 #define KVM_PTE_TYPE_PAGE 1 20 #define KVM_PTE_TYPE_TABLE 1 21 22 #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) 23 #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) 24 25 #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) 26 27 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) 28 #define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) 29 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3 30 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1 31 #define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) 32 #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 33 #define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) 34 35 #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) 36 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) 37 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) 38 #define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) 39 #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 40 #define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) 41 42 #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51) 43 44 #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) 45 46 #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) 47 48 struct kvm_pgtable_walk_data { 49 struct kvm_pgtable *pgt; 50 struct kvm_pgtable_walker *walker; 51 52 u64 addr; 53 u64 end; 54 }; 55 56 static u64 kvm_granule_shift(u32 level) 57 { 58 /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ 59 return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); 60 } 61 62 static u64 kvm_granule_size(u32 level) 63 { 64 return BIT(kvm_granule_shift(level)); 65 } 66 67 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) 68 { 69 u64 granule = kvm_granule_size(level); 70 71 /* 72 * Reject invalid block mappings and don't bother with 4TB mappings for 73 * 52-bit PAs. 74 */ 75 if (level == 0 || (PAGE_SIZE != SZ_4K && level == 1)) 76 return false; 77 78 if (granule > (end - addr)) 79 return false; 80 81 return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule); 82 } 83 84 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) 85 { 86 u64 shift = kvm_granule_shift(level); 87 u64 mask = BIT(PAGE_SHIFT - 3) - 1; 88 89 return (data->addr >> shift) & mask; 90 } 91 92 static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) 93 { 94 u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ 95 u64 mask = BIT(pgt->ia_bits) - 1; 96 97 return (addr & mask) >> shift; 98 } 99 100 static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data) 101 { 102 return __kvm_pgd_page_idx(data->pgt, data->addr); 103 } 104 105 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) 106 { 107 struct kvm_pgtable pgt = { 108 .ia_bits = ia_bits, 109 .start_level = start_level, 110 }; 111 112 return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; 113 } 114 115 static bool kvm_pte_valid(kvm_pte_t pte) 116 { 117 return pte & KVM_PTE_VALID; 118 } 119 120 static bool kvm_pte_table(kvm_pte_t pte, u32 level) 121 { 122 if (level == KVM_PGTABLE_MAX_LEVELS - 1) 123 return false; 124 125 if (!kvm_pte_valid(pte)) 126 return false; 127 128 return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; 129 } 130 131 static u64 kvm_pte_to_phys(kvm_pte_t pte) 132 { 133 u64 pa = pte & KVM_PTE_ADDR_MASK; 134 135 if (PAGE_SHIFT == 16) 136 pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; 137 138 return pa; 139 } 140 141 static kvm_pte_t kvm_phys_to_pte(u64 pa) 142 { 143 kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; 144 145 if (PAGE_SHIFT == 16) 146 pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); 147 148 return pte; 149 } 150 151 static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte) 152 { 153 return __va(kvm_pte_to_phys(pte)); 154 } 155 156 static void kvm_set_invalid_pte(kvm_pte_t *ptep) 157 { 158 kvm_pte_t pte = *ptep; 159 WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID); 160 } 161 162 static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp) 163 { 164 kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(__pa(childp)); 165 166 pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE); 167 pte |= KVM_PTE_VALID; 168 169 WARN_ON(kvm_pte_valid(old)); 170 smp_store_release(ptep, pte); 171 } 172 173 static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr, 174 u32 level) 175 { 176 kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(pa); 177 u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE : 178 KVM_PTE_TYPE_BLOCK; 179 180 pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI); 181 pte |= FIELD_PREP(KVM_PTE_TYPE, type); 182 pte |= KVM_PTE_VALID; 183 184 /* Tolerate KVM recreating the exact same mapping. */ 185 if (kvm_pte_valid(old)) 186 return old == pte; 187 188 smp_store_release(ptep, pte); 189 return true; 190 } 191 192 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr, 193 u32 level, kvm_pte_t *ptep, 194 enum kvm_pgtable_walk_flags flag) 195 { 196 struct kvm_pgtable_walker *walker = data->walker; 197 return walker->cb(addr, data->end, level, ptep, flag, walker->arg); 198 } 199 200 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, 201 kvm_pte_t *pgtable, u32 level); 202 203 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, 204 kvm_pte_t *ptep, u32 level) 205 { 206 int ret = 0; 207 u64 addr = data->addr; 208 kvm_pte_t *childp, pte = *ptep; 209 bool table = kvm_pte_table(pte, level); 210 enum kvm_pgtable_walk_flags flags = data->walker->flags; 211 212 if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) { 213 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, 214 KVM_PGTABLE_WALK_TABLE_PRE); 215 } 216 217 if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) { 218 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, 219 KVM_PGTABLE_WALK_LEAF); 220 pte = *ptep; 221 table = kvm_pte_table(pte, level); 222 } 223 224 if (ret) 225 goto out; 226 227 if (!table) { 228 data->addr += kvm_granule_size(level); 229 goto out; 230 } 231 232 childp = kvm_pte_follow(pte); 233 ret = __kvm_pgtable_walk(data, childp, level + 1); 234 if (ret) 235 goto out; 236 237 if (flags & KVM_PGTABLE_WALK_TABLE_POST) { 238 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, 239 KVM_PGTABLE_WALK_TABLE_POST); 240 } 241 242 out: 243 return ret; 244 } 245 246 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, 247 kvm_pte_t *pgtable, u32 level) 248 { 249 u32 idx; 250 int ret = 0; 251 252 if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS)) 253 return -EINVAL; 254 255 for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { 256 kvm_pte_t *ptep = &pgtable[idx]; 257 258 if (data->addr >= data->end) 259 break; 260 261 ret = __kvm_pgtable_visit(data, ptep, level); 262 if (ret) 263 break; 264 } 265 266 return ret; 267 } 268 269 static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) 270 { 271 u32 idx; 272 int ret = 0; 273 struct kvm_pgtable *pgt = data->pgt; 274 u64 limit = BIT(pgt->ia_bits); 275 276 if (data->addr > limit || data->end > limit) 277 return -ERANGE; 278 279 if (!pgt->pgd) 280 return -EINVAL; 281 282 for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) { 283 kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; 284 285 ret = __kvm_pgtable_walk(data, ptep, pgt->start_level); 286 if (ret) 287 break; 288 } 289 290 return ret; 291 } 292 293 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, 294 struct kvm_pgtable_walker *walker) 295 { 296 struct kvm_pgtable_walk_data walk_data = { 297 .pgt = pgt, 298 .addr = ALIGN_DOWN(addr, PAGE_SIZE), 299 .end = PAGE_ALIGN(walk_data.addr + size), 300 .walker = walker, 301 }; 302 303 return _kvm_pgtable_walk(&walk_data); 304 } 305 306 struct hyp_map_data { 307 u64 phys; 308 kvm_pte_t attr; 309 }; 310 311 static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot, 312 struct hyp_map_data *data) 313 { 314 bool device = prot & KVM_PGTABLE_PROT_DEVICE; 315 u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL; 316 kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype); 317 u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS; 318 u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW : 319 KVM_PTE_LEAF_ATTR_LO_S1_AP_RO; 320 321 if (!(prot & KVM_PGTABLE_PROT_R)) 322 return -EINVAL; 323 324 if (prot & KVM_PGTABLE_PROT_X) { 325 if (prot & KVM_PGTABLE_PROT_W) 326 return -EINVAL; 327 328 if (device) 329 return -EINVAL; 330 } else { 331 attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; 332 } 333 334 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); 335 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); 336 attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; 337 data->attr = attr; 338 return 0; 339 } 340 341 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, 342 kvm_pte_t *ptep, struct hyp_map_data *data) 343 { 344 u64 granule = kvm_granule_size(level), phys = data->phys; 345 346 if (!kvm_block_mapping_supported(addr, end, phys, level)) 347 return false; 348 349 WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)); 350 data->phys += granule; 351 return true; 352 } 353 354 static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 355 enum kvm_pgtable_walk_flags flag, void * const arg) 356 { 357 kvm_pte_t *childp; 358 359 if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg)) 360 return 0; 361 362 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) 363 return -EINVAL; 364 365 childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); 366 if (!childp) 367 return -ENOMEM; 368 369 kvm_set_table_pte(ptep, childp); 370 return 0; 371 } 372 373 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, 374 enum kvm_pgtable_prot prot) 375 { 376 int ret; 377 struct hyp_map_data map_data = { 378 .phys = ALIGN_DOWN(phys, PAGE_SIZE), 379 }; 380 struct kvm_pgtable_walker walker = { 381 .cb = hyp_map_walker, 382 .flags = KVM_PGTABLE_WALK_LEAF, 383 .arg = &map_data, 384 }; 385 386 ret = hyp_map_set_prot_attr(prot, &map_data); 387 if (ret) 388 return ret; 389 390 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 391 dsb(ishst); 392 isb(); 393 return ret; 394 } 395 396 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits) 397 { 398 u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); 399 400 pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); 401 if (!pgt->pgd) 402 return -ENOMEM; 403 404 pgt->ia_bits = va_bits; 405 pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; 406 pgt->mmu = NULL; 407 return 0; 408 } 409 410 static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 411 enum kvm_pgtable_walk_flags flag, void * const arg) 412 { 413 free_page((unsigned long)kvm_pte_follow(*ptep)); 414 return 0; 415 } 416 417 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) 418 { 419 struct kvm_pgtable_walker walker = { 420 .cb = hyp_free_walker, 421 .flags = KVM_PGTABLE_WALK_TABLE_POST, 422 }; 423 424 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); 425 free_page((unsigned long)pgt->pgd); 426 pgt->pgd = NULL; 427 } 428 429 struct stage2_map_data { 430 u64 phys; 431 kvm_pte_t attr; 432 433 kvm_pte_t *anchor; 434 435 struct kvm_s2_mmu *mmu; 436 struct kvm_mmu_memory_cache *memcache; 437 }; 438 439 static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, 440 struct stage2_map_data *data) 441 { 442 bool device = prot & KVM_PGTABLE_PROT_DEVICE; 443 kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) : 444 PAGE_S2_MEMATTR(NORMAL); 445 u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; 446 447 if (!(prot & KVM_PGTABLE_PROT_X)) 448 attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 449 else if (device) 450 return -EINVAL; 451 452 if (prot & KVM_PGTABLE_PROT_R) 453 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; 454 455 if (prot & KVM_PGTABLE_PROT_W) 456 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 457 458 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); 459 attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; 460 data->attr = attr; 461 return 0; 462 } 463 464 static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, 465 kvm_pte_t *ptep, 466 struct stage2_map_data *data) 467 { 468 u64 granule = kvm_granule_size(level), phys = data->phys; 469 470 if (!kvm_block_mapping_supported(addr, end, phys, level)) 471 return false; 472 473 if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)) 474 goto out; 475 476 /* There's an existing valid leaf entry, so perform break-before-make */ 477 kvm_set_invalid_pte(ptep); 478 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); 479 kvm_set_valid_leaf_pte(ptep, phys, data->attr, level); 480 out: 481 data->phys += granule; 482 return true; 483 } 484 485 static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, 486 kvm_pte_t *ptep, 487 struct stage2_map_data *data) 488 { 489 if (data->anchor) 490 return 0; 491 492 if (!kvm_block_mapping_supported(addr, end, data->phys, level)) 493 return 0; 494 495 kvm_set_invalid_pte(ptep); 496 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0); 497 data->anchor = ptep; 498 return 0; 499 } 500 501 static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 502 struct stage2_map_data *data) 503 { 504 kvm_pte_t *childp, pte = *ptep; 505 struct page *page = virt_to_page(ptep); 506 507 if (data->anchor) { 508 if (kvm_pte_valid(pte)) 509 put_page(page); 510 511 return 0; 512 } 513 514 if (stage2_map_walker_try_leaf(addr, end, level, ptep, data)) 515 goto out_get_page; 516 517 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) 518 return -EINVAL; 519 520 if (!data->memcache) 521 return -ENOMEM; 522 523 childp = kvm_mmu_memory_cache_alloc(data->memcache); 524 if (!childp) 525 return -ENOMEM; 526 527 /* 528 * If we've run into an existing block mapping then replace it with 529 * a table. Accesses beyond 'end' that fall within the new table 530 * will be mapped lazily. 531 */ 532 if (kvm_pte_valid(pte)) { 533 kvm_set_invalid_pte(ptep); 534 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); 535 put_page(page); 536 } 537 538 kvm_set_table_pte(ptep, childp); 539 540 out_get_page: 541 get_page(page); 542 return 0; 543 } 544 545 static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, 546 kvm_pte_t *ptep, 547 struct stage2_map_data *data) 548 { 549 int ret = 0; 550 551 if (!data->anchor) 552 return 0; 553 554 free_page((unsigned long)kvm_pte_follow(*ptep)); 555 put_page(virt_to_page(ptep)); 556 557 if (data->anchor == ptep) { 558 data->anchor = NULL; 559 ret = stage2_map_walk_leaf(addr, end, level, ptep, data); 560 } 561 562 return ret; 563 } 564 565 /* 566 * This is a little fiddly, as we use all three of the walk flags. The idea 567 * is that the TABLE_PRE callback runs for table entries on the way down, 568 * looking for table entries which we could conceivably replace with a 569 * block entry for this mapping. If it finds one, then it sets the 'anchor' 570 * field in 'struct stage2_map_data' to point at the table entry, before 571 * clearing the entry to zero and descending into the now detached table. 572 * 573 * The behaviour of the LEAF callback then depends on whether or not the 574 * anchor has been set. If not, then we're not using a block mapping higher 575 * up the table and we perform the mapping at the existing leaves instead. 576 * If, on the other hand, the anchor _is_ set, then we drop references to 577 * all valid leaves so that the pages beneath the anchor can be freed. 578 * 579 * Finally, the TABLE_POST callback does nothing if the anchor has not 580 * been set, but otherwise frees the page-table pages while walking back up 581 * the page-table, installing the block entry when it revisits the anchor 582 * pointer and clearing the anchor to NULL. 583 */ 584 static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 585 enum kvm_pgtable_walk_flags flag, void * const arg) 586 { 587 struct stage2_map_data *data = arg; 588 589 switch (flag) { 590 case KVM_PGTABLE_WALK_TABLE_PRE: 591 return stage2_map_walk_table_pre(addr, end, level, ptep, data); 592 case KVM_PGTABLE_WALK_LEAF: 593 return stage2_map_walk_leaf(addr, end, level, ptep, data); 594 case KVM_PGTABLE_WALK_TABLE_POST: 595 return stage2_map_walk_table_post(addr, end, level, ptep, data); 596 } 597 598 return -EINVAL; 599 } 600 601 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, 602 u64 phys, enum kvm_pgtable_prot prot, 603 struct kvm_mmu_memory_cache *mc) 604 { 605 int ret; 606 struct stage2_map_data map_data = { 607 .phys = ALIGN_DOWN(phys, PAGE_SIZE), 608 .mmu = pgt->mmu, 609 .memcache = mc, 610 }; 611 struct kvm_pgtable_walker walker = { 612 .cb = stage2_map_walker, 613 .flags = KVM_PGTABLE_WALK_TABLE_PRE | 614 KVM_PGTABLE_WALK_LEAF | 615 KVM_PGTABLE_WALK_TABLE_POST, 616 .arg = &map_data, 617 }; 618 619 ret = stage2_map_set_prot_attr(prot, &map_data); 620 if (ret) 621 return ret; 622 623 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 624 dsb(ishst); 625 return ret; 626 } 627 628 static void stage2_flush_dcache(void *addr, u64 size) 629 { 630 if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) 631 return; 632 633 __flush_dcache_area(addr, size); 634 } 635 636 static bool stage2_pte_cacheable(kvm_pte_t pte) 637 { 638 u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; 639 return memattr == PAGE_S2_MEMATTR(NORMAL); 640 } 641 642 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 643 enum kvm_pgtable_walk_flags flag, 644 void * const arg) 645 { 646 struct kvm_s2_mmu *mmu = arg; 647 kvm_pte_t pte = *ptep, *childp = NULL; 648 bool need_flush = false; 649 650 if (!kvm_pte_valid(pte)) 651 return 0; 652 653 if (kvm_pte_table(pte, level)) { 654 childp = kvm_pte_follow(pte); 655 656 if (page_count(virt_to_page(childp)) != 1) 657 return 0; 658 } else if (stage2_pte_cacheable(pte)) { 659 need_flush = true; 660 } 661 662 /* 663 * This is similar to the map() path in that we unmap the entire 664 * block entry and rely on the remaining portions being faulted 665 * back lazily. 666 */ 667 kvm_set_invalid_pte(ptep); 668 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); 669 put_page(virt_to_page(ptep)); 670 671 if (need_flush) { 672 stage2_flush_dcache(kvm_pte_follow(pte), 673 kvm_granule_size(level)); 674 } 675 676 if (childp) 677 free_page((unsigned long)childp); 678 679 return 0; 680 } 681 682 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) 683 { 684 struct kvm_pgtable_walker walker = { 685 .cb = stage2_unmap_walker, 686 .arg = pgt->mmu, 687 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, 688 }; 689 690 return kvm_pgtable_walk(pgt, addr, size, &walker); 691 } 692 693 struct stage2_attr_data { 694 kvm_pte_t attr_set; 695 kvm_pte_t attr_clr; 696 kvm_pte_t pte; 697 u32 level; 698 }; 699 700 static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 701 enum kvm_pgtable_walk_flags flag, 702 void * const arg) 703 { 704 kvm_pte_t pte = *ptep; 705 struct stage2_attr_data *data = arg; 706 707 if (!kvm_pte_valid(pte)) 708 return 0; 709 710 data->level = level; 711 data->pte = pte; 712 pte &= ~data->attr_clr; 713 pte |= data->attr_set; 714 715 /* 716 * We may race with the CPU trying to set the access flag here, 717 * but worst-case the access flag update gets lost and will be 718 * set on the next access instead. 719 */ 720 if (data->pte != pte) 721 WRITE_ONCE(*ptep, pte); 722 723 return 0; 724 } 725 726 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, 727 u64 size, kvm_pte_t attr_set, 728 kvm_pte_t attr_clr, kvm_pte_t *orig_pte, 729 u32 *level) 730 { 731 int ret; 732 kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; 733 struct stage2_attr_data data = { 734 .attr_set = attr_set & attr_mask, 735 .attr_clr = attr_clr & attr_mask, 736 }; 737 struct kvm_pgtable_walker walker = { 738 .cb = stage2_attr_walker, 739 .arg = &data, 740 .flags = KVM_PGTABLE_WALK_LEAF, 741 }; 742 743 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 744 if (ret) 745 return ret; 746 747 if (orig_pte) 748 *orig_pte = data.pte; 749 750 if (level) 751 *level = data.level; 752 return 0; 753 } 754 755 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) 756 { 757 return stage2_update_leaf_attrs(pgt, addr, size, 0, 758 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, 759 NULL, NULL); 760 } 761 762 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) 763 { 764 kvm_pte_t pte = 0; 765 stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, 766 &pte, NULL); 767 dsb(ishst); 768 return pte; 769 } 770 771 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) 772 { 773 kvm_pte_t pte = 0; 774 stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF, 775 &pte, NULL); 776 /* 777 * "But where's the TLBI?!", you scream. 778 * "Over in the core code", I sigh. 779 * 780 * See the '->clear_flush_young()' callback on the KVM mmu notifier. 781 */ 782 return pte; 783 } 784 785 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr) 786 { 787 kvm_pte_t pte = 0; 788 stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL); 789 return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF; 790 } 791 792 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, 793 enum kvm_pgtable_prot prot) 794 { 795 int ret; 796 u32 level; 797 kvm_pte_t set = 0, clr = 0; 798 799 if (prot & KVM_PGTABLE_PROT_R) 800 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; 801 802 if (prot & KVM_PGTABLE_PROT_W) 803 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 804 805 if (prot & KVM_PGTABLE_PROT_X) 806 clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 807 808 ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level); 809 if (!ret) 810 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); 811 return ret; 812 } 813 814 static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 815 enum kvm_pgtable_walk_flags flag, 816 void * const arg) 817 { 818 kvm_pte_t pte = *ptep; 819 820 if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte)) 821 return 0; 822 823 stage2_flush_dcache(kvm_pte_follow(pte), kvm_granule_size(level)); 824 return 0; 825 } 826 827 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) 828 { 829 struct kvm_pgtable_walker walker = { 830 .cb = stage2_flush_walker, 831 .flags = KVM_PGTABLE_WALK_LEAF, 832 }; 833 834 if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) 835 return 0; 836 837 return kvm_pgtable_walk(pgt, addr, size, &walker); 838 } 839 840 int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm) 841 { 842 size_t pgd_sz; 843 u64 vtcr = kvm->arch.vtcr; 844 u32 ia_bits = VTCR_EL2_IPA(vtcr); 845 u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); 846 u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; 847 848 pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; 849 pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO); 850 if (!pgt->pgd) 851 return -ENOMEM; 852 853 pgt->ia_bits = ia_bits; 854 pgt->start_level = start_level; 855 pgt->mmu = &kvm->arch.mmu; 856 857 /* Ensure zeroed PGD pages are visible to the hardware walker */ 858 dsb(ishst); 859 return 0; 860 } 861 862 static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 863 enum kvm_pgtable_walk_flags flag, 864 void * const arg) 865 { 866 kvm_pte_t pte = *ptep; 867 868 if (!kvm_pte_valid(pte)) 869 return 0; 870 871 put_page(virt_to_page(ptep)); 872 873 if (kvm_pte_table(pte, level)) 874 free_page((unsigned long)kvm_pte_follow(pte)); 875 876 return 0; 877 } 878 879 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 880 { 881 size_t pgd_sz; 882 struct kvm_pgtable_walker walker = { 883 .cb = stage2_free_walker, 884 .flags = KVM_PGTABLE_WALK_LEAF | 885 KVM_PGTABLE_WALK_TABLE_POST, 886 }; 887 888 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); 889 pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; 890 free_pages_exact(pgt->pgd, pgd_sz); 891 pgt->pgd = NULL; 892 } 893