1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Stand-alone page-table allocator for hyp stage-1 and guest stage-2. 4 * No bombay mix was harmed in the writing of this file. 5 * 6 * Copyright (C) 2020 Google LLC 7 * Author: Will Deacon <will@kernel.org> 8 */ 9 10 #include <linux/bitfield.h> 11 #include <asm/kvm_pgtable.h> 12 13 #define KVM_PGTABLE_MAX_LEVELS 4U 14 15 #define KVM_PTE_VALID BIT(0) 16 17 #define KVM_PTE_TYPE BIT(1) 18 #define KVM_PTE_TYPE_BLOCK 0 19 #define KVM_PTE_TYPE_PAGE 1 20 #define KVM_PTE_TYPE_TABLE 1 21 22 #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) 23 #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) 24 25 #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) 26 27 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) 28 #define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) 29 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3 30 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1 31 #define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) 32 #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 33 #define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) 34 35 #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) 36 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) 37 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) 38 #define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) 39 #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 40 #define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) 41 42 #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51) 43 44 #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) 45 46 #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) 47 48 #define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ 49 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ 50 KVM_PTE_LEAF_ATTR_HI_S2_XN) 51 52 struct kvm_pgtable_walk_data { 53 struct kvm_pgtable *pgt; 54 struct kvm_pgtable_walker *walker; 55 56 u64 addr; 57 u64 end; 58 }; 59 60 static u64 kvm_granule_shift(u32 level) 61 { 62 /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ 63 return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); 64 } 65 66 static u64 kvm_granule_size(u32 level) 67 { 68 return BIT(kvm_granule_shift(level)); 69 } 70 71 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) 72 { 73 u64 granule = kvm_granule_size(level); 74 75 /* 76 * Reject invalid block mappings and don't bother with 4TB mappings for 77 * 52-bit PAs. 78 */ 79 if (level == 0 || (PAGE_SIZE != SZ_4K && level == 1)) 80 return false; 81 82 if (granule > (end - addr)) 83 return false; 84 85 return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule); 86 } 87 88 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) 89 { 90 u64 shift = kvm_granule_shift(level); 91 u64 mask = BIT(PAGE_SHIFT - 3) - 1; 92 93 return (data->addr >> shift) & mask; 94 } 95 96 static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) 97 { 98 u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ 99 u64 mask = BIT(pgt->ia_bits) - 1; 100 101 return (addr & mask) >> shift; 102 } 103 104 static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data) 105 { 106 return __kvm_pgd_page_idx(data->pgt, data->addr); 107 } 108 109 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) 110 { 111 struct kvm_pgtable pgt = { 112 .ia_bits = ia_bits, 113 .start_level = start_level, 114 }; 115 116 return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; 117 } 118 119 static bool kvm_pte_valid(kvm_pte_t pte) 120 { 121 return pte & KVM_PTE_VALID; 122 } 123 124 static bool kvm_pte_table(kvm_pte_t pte, u32 level) 125 { 126 if (level == KVM_PGTABLE_MAX_LEVELS - 1) 127 return false; 128 129 if (!kvm_pte_valid(pte)) 130 return false; 131 132 return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; 133 } 134 135 static u64 kvm_pte_to_phys(kvm_pte_t pte) 136 { 137 u64 pa = pte & KVM_PTE_ADDR_MASK; 138 139 if (PAGE_SHIFT == 16) 140 pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; 141 142 return pa; 143 } 144 145 static kvm_pte_t kvm_phys_to_pte(u64 pa) 146 { 147 kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; 148 149 if (PAGE_SHIFT == 16) 150 pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); 151 152 return pte; 153 } 154 155 static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte) 156 { 157 return __va(kvm_pte_to_phys(pte)); 158 } 159 160 static void kvm_set_invalid_pte(kvm_pte_t *ptep) 161 { 162 kvm_pte_t pte = *ptep; 163 WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID); 164 } 165 166 static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp) 167 { 168 kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(__pa(childp)); 169 170 pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE); 171 pte |= KVM_PTE_VALID; 172 173 WARN_ON(kvm_pte_valid(old)); 174 smp_store_release(ptep, pte); 175 } 176 177 static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level) 178 { 179 kvm_pte_t pte = kvm_phys_to_pte(pa); 180 u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE : 181 KVM_PTE_TYPE_BLOCK; 182 183 pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI); 184 pte |= FIELD_PREP(KVM_PTE_TYPE, type); 185 pte |= KVM_PTE_VALID; 186 187 return pte; 188 } 189 190 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr, 191 u32 level, kvm_pte_t *ptep, 192 enum kvm_pgtable_walk_flags flag) 193 { 194 struct kvm_pgtable_walker *walker = data->walker; 195 return walker->cb(addr, data->end, level, ptep, flag, walker->arg); 196 } 197 198 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, 199 kvm_pte_t *pgtable, u32 level); 200 201 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, 202 kvm_pte_t *ptep, u32 level) 203 { 204 int ret = 0; 205 u64 addr = data->addr; 206 kvm_pte_t *childp, pte = *ptep; 207 bool table = kvm_pte_table(pte, level); 208 enum kvm_pgtable_walk_flags flags = data->walker->flags; 209 210 if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) { 211 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, 212 KVM_PGTABLE_WALK_TABLE_PRE); 213 } 214 215 if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) { 216 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, 217 KVM_PGTABLE_WALK_LEAF); 218 pte = *ptep; 219 table = kvm_pte_table(pte, level); 220 } 221 222 if (ret) 223 goto out; 224 225 if (!table) { 226 data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level)); 227 data->addr += kvm_granule_size(level); 228 goto out; 229 } 230 231 childp = kvm_pte_follow(pte); 232 ret = __kvm_pgtable_walk(data, childp, level + 1); 233 if (ret) 234 goto out; 235 236 if (flags & KVM_PGTABLE_WALK_TABLE_POST) { 237 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, 238 KVM_PGTABLE_WALK_TABLE_POST); 239 } 240 241 out: 242 return ret; 243 } 244 245 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, 246 kvm_pte_t *pgtable, u32 level) 247 { 248 u32 idx; 249 int ret = 0; 250 251 if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS)) 252 return -EINVAL; 253 254 for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { 255 kvm_pte_t *ptep = &pgtable[idx]; 256 257 if (data->addr >= data->end) 258 break; 259 260 ret = __kvm_pgtable_visit(data, ptep, level); 261 if (ret) 262 break; 263 } 264 265 return ret; 266 } 267 268 static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) 269 { 270 u32 idx; 271 int ret = 0; 272 struct kvm_pgtable *pgt = data->pgt; 273 u64 limit = BIT(pgt->ia_bits); 274 275 if (data->addr > limit || data->end > limit) 276 return -ERANGE; 277 278 if (!pgt->pgd) 279 return -EINVAL; 280 281 for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) { 282 kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; 283 284 ret = __kvm_pgtable_walk(data, ptep, pgt->start_level); 285 if (ret) 286 break; 287 } 288 289 return ret; 290 } 291 292 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, 293 struct kvm_pgtable_walker *walker) 294 { 295 struct kvm_pgtable_walk_data walk_data = { 296 .pgt = pgt, 297 .addr = ALIGN_DOWN(addr, PAGE_SIZE), 298 .end = PAGE_ALIGN(walk_data.addr + size), 299 .walker = walker, 300 }; 301 302 return _kvm_pgtable_walk(&walk_data); 303 } 304 305 struct hyp_map_data { 306 u64 phys; 307 kvm_pte_t attr; 308 }; 309 310 static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot, 311 struct hyp_map_data *data) 312 { 313 bool device = prot & KVM_PGTABLE_PROT_DEVICE; 314 u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL; 315 kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype); 316 u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS; 317 u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW : 318 KVM_PTE_LEAF_ATTR_LO_S1_AP_RO; 319 320 if (!(prot & KVM_PGTABLE_PROT_R)) 321 return -EINVAL; 322 323 if (prot & KVM_PGTABLE_PROT_X) { 324 if (prot & KVM_PGTABLE_PROT_W) 325 return -EINVAL; 326 327 if (device) 328 return -EINVAL; 329 } else { 330 attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; 331 } 332 333 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); 334 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); 335 attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; 336 data->attr = attr; 337 return 0; 338 } 339 340 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, 341 kvm_pte_t *ptep, struct hyp_map_data *data) 342 { 343 kvm_pte_t new, old = *ptep; 344 u64 granule = kvm_granule_size(level), phys = data->phys; 345 346 if (!kvm_block_mapping_supported(addr, end, phys, level)) 347 return false; 348 349 /* Tolerate KVM recreating the exact same mapping */ 350 new = kvm_init_valid_leaf_pte(phys, data->attr, level); 351 if (old != new && !WARN_ON(kvm_pte_valid(old))) 352 smp_store_release(ptep, new); 353 354 data->phys += granule; 355 return true; 356 } 357 358 static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 359 enum kvm_pgtable_walk_flags flag, void * const arg) 360 { 361 kvm_pte_t *childp; 362 363 if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg)) 364 return 0; 365 366 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) 367 return -EINVAL; 368 369 childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); 370 if (!childp) 371 return -ENOMEM; 372 373 kvm_set_table_pte(ptep, childp); 374 return 0; 375 } 376 377 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, 378 enum kvm_pgtable_prot prot) 379 { 380 int ret; 381 struct hyp_map_data map_data = { 382 .phys = ALIGN_DOWN(phys, PAGE_SIZE), 383 }; 384 struct kvm_pgtable_walker walker = { 385 .cb = hyp_map_walker, 386 .flags = KVM_PGTABLE_WALK_LEAF, 387 .arg = &map_data, 388 }; 389 390 ret = hyp_map_set_prot_attr(prot, &map_data); 391 if (ret) 392 return ret; 393 394 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 395 dsb(ishst); 396 isb(); 397 return ret; 398 } 399 400 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits) 401 { 402 u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); 403 404 pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); 405 if (!pgt->pgd) 406 return -ENOMEM; 407 408 pgt->ia_bits = va_bits; 409 pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; 410 pgt->mmu = NULL; 411 return 0; 412 } 413 414 static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 415 enum kvm_pgtable_walk_flags flag, void * const arg) 416 { 417 free_page((unsigned long)kvm_pte_follow(*ptep)); 418 return 0; 419 } 420 421 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) 422 { 423 struct kvm_pgtable_walker walker = { 424 .cb = hyp_free_walker, 425 .flags = KVM_PGTABLE_WALK_TABLE_POST, 426 }; 427 428 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); 429 free_page((unsigned long)pgt->pgd); 430 pgt->pgd = NULL; 431 } 432 433 struct stage2_map_data { 434 u64 phys; 435 kvm_pte_t attr; 436 437 kvm_pte_t *anchor; 438 439 struct kvm_s2_mmu *mmu; 440 struct kvm_mmu_memory_cache *memcache; 441 }; 442 443 static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, 444 struct stage2_map_data *data) 445 { 446 bool device = prot & KVM_PGTABLE_PROT_DEVICE; 447 kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) : 448 PAGE_S2_MEMATTR(NORMAL); 449 u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; 450 451 if (!(prot & KVM_PGTABLE_PROT_X)) 452 attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 453 else if (device) 454 return -EINVAL; 455 456 if (prot & KVM_PGTABLE_PROT_R) 457 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; 458 459 if (prot & KVM_PGTABLE_PROT_W) 460 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 461 462 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); 463 attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; 464 data->attr = attr; 465 return 0; 466 } 467 468 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, 469 kvm_pte_t *ptep, 470 struct stage2_map_data *data) 471 { 472 kvm_pte_t new, old = *ptep; 473 u64 granule = kvm_granule_size(level), phys = data->phys; 474 struct page *page = virt_to_page(ptep); 475 476 if (!kvm_block_mapping_supported(addr, end, phys, level)) 477 return -E2BIG; 478 479 new = kvm_init_valid_leaf_pte(phys, data->attr, level); 480 if (kvm_pte_valid(old)) { 481 /* 482 * Skip updating the PTE if we are trying to recreate the exact 483 * same mapping or only change the access permissions. Instead, 484 * the vCPU will exit one more time from guest if still needed 485 * and then go through the path of relaxing permissions. 486 */ 487 if (!((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS))) 488 return -EAGAIN; 489 490 /* 491 * There's an existing different valid leaf entry, so perform 492 * break-before-make. 493 */ 494 kvm_set_invalid_pte(ptep); 495 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); 496 put_page(page); 497 } 498 499 smp_store_release(ptep, new); 500 get_page(page); 501 data->phys += granule; 502 return 0; 503 } 504 505 static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, 506 kvm_pte_t *ptep, 507 struct stage2_map_data *data) 508 { 509 if (data->anchor) 510 return 0; 511 512 if (!kvm_block_mapping_supported(addr, end, data->phys, level)) 513 return 0; 514 515 kvm_set_invalid_pte(ptep); 516 517 /* 518 * Invalidate the whole stage-2, as we may have numerous leaf 519 * entries below us which would otherwise need invalidating 520 * individually. 521 */ 522 kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); 523 data->anchor = ptep; 524 return 0; 525 } 526 527 static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 528 struct stage2_map_data *data) 529 { 530 int ret; 531 kvm_pte_t *childp, pte = *ptep; 532 struct page *page = virt_to_page(ptep); 533 534 if (data->anchor) { 535 if (kvm_pte_valid(pte)) 536 put_page(page); 537 538 return 0; 539 } 540 541 ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data); 542 if (ret != -E2BIG) 543 return ret; 544 545 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) 546 return -EINVAL; 547 548 if (!data->memcache) 549 return -ENOMEM; 550 551 childp = kvm_mmu_memory_cache_alloc(data->memcache); 552 if (!childp) 553 return -ENOMEM; 554 555 /* 556 * If we've run into an existing block mapping then replace it with 557 * a table. Accesses beyond 'end' that fall within the new table 558 * will be mapped lazily. 559 */ 560 if (kvm_pte_valid(pte)) { 561 kvm_set_invalid_pte(ptep); 562 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); 563 put_page(page); 564 } 565 566 kvm_set_table_pte(ptep, childp); 567 get_page(page); 568 569 return 0; 570 } 571 572 static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, 573 kvm_pte_t *ptep, 574 struct stage2_map_data *data) 575 { 576 int ret = 0; 577 578 if (!data->anchor) 579 return 0; 580 581 free_page((unsigned long)kvm_pte_follow(*ptep)); 582 put_page(virt_to_page(ptep)); 583 584 if (data->anchor == ptep) { 585 data->anchor = NULL; 586 ret = stage2_map_walk_leaf(addr, end, level, ptep, data); 587 } 588 589 return ret; 590 } 591 592 /* 593 * This is a little fiddly, as we use all three of the walk flags. The idea 594 * is that the TABLE_PRE callback runs for table entries on the way down, 595 * looking for table entries which we could conceivably replace with a 596 * block entry for this mapping. If it finds one, then it sets the 'anchor' 597 * field in 'struct stage2_map_data' to point at the table entry, before 598 * clearing the entry to zero and descending into the now detached table. 599 * 600 * The behaviour of the LEAF callback then depends on whether or not the 601 * anchor has been set. If not, then we're not using a block mapping higher 602 * up the table and we perform the mapping at the existing leaves instead. 603 * If, on the other hand, the anchor _is_ set, then we drop references to 604 * all valid leaves so that the pages beneath the anchor can be freed. 605 * 606 * Finally, the TABLE_POST callback does nothing if the anchor has not 607 * been set, but otherwise frees the page-table pages while walking back up 608 * the page-table, installing the block entry when it revisits the anchor 609 * pointer and clearing the anchor to NULL. 610 */ 611 static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 612 enum kvm_pgtable_walk_flags flag, void * const arg) 613 { 614 struct stage2_map_data *data = arg; 615 616 switch (flag) { 617 case KVM_PGTABLE_WALK_TABLE_PRE: 618 return stage2_map_walk_table_pre(addr, end, level, ptep, data); 619 case KVM_PGTABLE_WALK_LEAF: 620 return stage2_map_walk_leaf(addr, end, level, ptep, data); 621 case KVM_PGTABLE_WALK_TABLE_POST: 622 return stage2_map_walk_table_post(addr, end, level, ptep, data); 623 } 624 625 return -EINVAL; 626 } 627 628 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, 629 u64 phys, enum kvm_pgtable_prot prot, 630 struct kvm_mmu_memory_cache *mc) 631 { 632 int ret; 633 struct stage2_map_data map_data = { 634 .phys = ALIGN_DOWN(phys, PAGE_SIZE), 635 .mmu = pgt->mmu, 636 .memcache = mc, 637 }; 638 struct kvm_pgtable_walker walker = { 639 .cb = stage2_map_walker, 640 .flags = KVM_PGTABLE_WALK_TABLE_PRE | 641 KVM_PGTABLE_WALK_LEAF | 642 KVM_PGTABLE_WALK_TABLE_POST, 643 .arg = &map_data, 644 }; 645 646 ret = stage2_map_set_prot_attr(prot, &map_data); 647 if (ret) 648 return ret; 649 650 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 651 dsb(ishst); 652 return ret; 653 } 654 655 static void stage2_flush_dcache(void *addr, u64 size) 656 { 657 if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) 658 return; 659 660 __flush_dcache_area(addr, size); 661 } 662 663 static bool stage2_pte_cacheable(kvm_pte_t pte) 664 { 665 u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; 666 return memattr == PAGE_S2_MEMATTR(NORMAL); 667 } 668 669 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 670 enum kvm_pgtable_walk_flags flag, 671 void * const arg) 672 { 673 struct kvm_s2_mmu *mmu = arg; 674 kvm_pte_t pte = *ptep, *childp = NULL; 675 bool need_flush = false; 676 677 if (!kvm_pte_valid(pte)) 678 return 0; 679 680 if (kvm_pte_table(pte, level)) { 681 childp = kvm_pte_follow(pte); 682 683 if (page_count(virt_to_page(childp)) != 1) 684 return 0; 685 } else if (stage2_pte_cacheable(pte)) { 686 need_flush = true; 687 } 688 689 /* 690 * This is similar to the map() path in that we unmap the entire 691 * block entry and rely on the remaining portions being faulted 692 * back lazily. 693 */ 694 kvm_set_invalid_pte(ptep); 695 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); 696 put_page(virt_to_page(ptep)); 697 698 if (need_flush) { 699 stage2_flush_dcache(kvm_pte_follow(pte), 700 kvm_granule_size(level)); 701 } 702 703 if (childp) 704 free_page((unsigned long)childp); 705 706 return 0; 707 } 708 709 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) 710 { 711 struct kvm_pgtable_walker walker = { 712 .cb = stage2_unmap_walker, 713 .arg = pgt->mmu, 714 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, 715 }; 716 717 return kvm_pgtable_walk(pgt, addr, size, &walker); 718 } 719 720 struct stage2_attr_data { 721 kvm_pte_t attr_set; 722 kvm_pte_t attr_clr; 723 kvm_pte_t pte; 724 u32 level; 725 }; 726 727 static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 728 enum kvm_pgtable_walk_flags flag, 729 void * const arg) 730 { 731 kvm_pte_t pte = *ptep; 732 struct stage2_attr_data *data = arg; 733 734 if (!kvm_pte_valid(pte)) 735 return 0; 736 737 data->level = level; 738 data->pte = pte; 739 pte &= ~data->attr_clr; 740 pte |= data->attr_set; 741 742 /* 743 * We may race with the CPU trying to set the access flag here, 744 * but worst-case the access flag update gets lost and will be 745 * set on the next access instead. 746 */ 747 if (data->pte != pte) 748 WRITE_ONCE(*ptep, pte); 749 750 return 0; 751 } 752 753 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, 754 u64 size, kvm_pte_t attr_set, 755 kvm_pte_t attr_clr, kvm_pte_t *orig_pte, 756 u32 *level) 757 { 758 int ret; 759 kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; 760 struct stage2_attr_data data = { 761 .attr_set = attr_set & attr_mask, 762 .attr_clr = attr_clr & attr_mask, 763 }; 764 struct kvm_pgtable_walker walker = { 765 .cb = stage2_attr_walker, 766 .arg = &data, 767 .flags = KVM_PGTABLE_WALK_LEAF, 768 }; 769 770 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 771 if (ret) 772 return ret; 773 774 if (orig_pte) 775 *orig_pte = data.pte; 776 777 if (level) 778 *level = data.level; 779 return 0; 780 } 781 782 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) 783 { 784 return stage2_update_leaf_attrs(pgt, addr, size, 0, 785 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, 786 NULL, NULL); 787 } 788 789 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) 790 { 791 kvm_pte_t pte = 0; 792 stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, 793 &pte, NULL); 794 dsb(ishst); 795 return pte; 796 } 797 798 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) 799 { 800 kvm_pte_t pte = 0; 801 stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF, 802 &pte, NULL); 803 /* 804 * "But where's the TLBI?!", you scream. 805 * "Over in the core code", I sigh. 806 * 807 * See the '->clear_flush_young()' callback on the KVM mmu notifier. 808 */ 809 return pte; 810 } 811 812 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr) 813 { 814 kvm_pte_t pte = 0; 815 stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL); 816 return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF; 817 } 818 819 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, 820 enum kvm_pgtable_prot prot) 821 { 822 int ret; 823 u32 level; 824 kvm_pte_t set = 0, clr = 0; 825 826 if (prot & KVM_PGTABLE_PROT_R) 827 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; 828 829 if (prot & KVM_PGTABLE_PROT_W) 830 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 831 832 if (prot & KVM_PGTABLE_PROT_X) 833 clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 834 835 ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level); 836 if (!ret) 837 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); 838 return ret; 839 } 840 841 static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 842 enum kvm_pgtable_walk_flags flag, 843 void * const arg) 844 { 845 kvm_pte_t pte = *ptep; 846 847 if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte)) 848 return 0; 849 850 stage2_flush_dcache(kvm_pte_follow(pte), kvm_granule_size(level)); 851 return 0; 852 } 853 854 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) 855 { 856 struct kvm_pgtable_walker walker = { 857 .cb = stage2_flush_walker, 858 .flags = KVM_PGTABLE_WALK_LEAF, 859 }; 860 861 if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) 862 return 0; 863 864 return kvm_pgtable_walk(pgt, addr, size, &walker); 865 } 866 867 int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm) 868 { 869 size_t pgd_sz; 870 u64 vtcr = kvm->arch.vtcr; 871 u32 ia_bits = VTCR_EL2_IPA(vtcr); 872 u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); 873 u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; 874 875 pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; 876 pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO); 877 if (!pgt->pgd) 878 return -ENOMEM; 879 880 pgt->ia_bits = ia_bits; 881 pgt->start_level = start_level; 882 pgt->mmu = &kvm->arch.mmu; 883 884 /* Ensure zeroed PGD pages are visible to the hardware walker */ 885 dsb(ishst); 886 return 0; 887 } 888 889 static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 890 enum kvm_pgtable_walk_flags flag, 891 void * const arg) 892 { 893 kvm_pte_t pte = *ptep; 894 895 if (!kvm_pte_valid(pte)) 896 return 0; 897 898 put_page(virt_to_page(ptep)); 899 900 if (kvm_pte_table(pte, level)) 901 free_page((unsigned long)kvm_pte_follow(pte)); 902 903 return 0; 904 } 905 906 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 907 { 908 size_t pgd_sz; 909 struct kvm_pgtable_walker walker = { 910 .cb = stage2_free_walker, 911 .flags = KVM_PGTABLE_WALK_LEAF | 912 KVM_PGTABLE_WALK_TABLE_POST, 913 }; 914 915 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); 916 pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; 917 free_pages_exact(pgt->pgd, pgd_sz); 918 pgt->pgd = NULL; 919 } 920