1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * KVM/MIPS MMU handling in the KVM module. 7 * 8 * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. 9 * Authors: Sanjay Lal <sanjayl@kymasys.com> 10 */ 11 12 #include <linux/highmem.h> 13 #include <linux/kvm_host.h> 14 #include <linux/uaccess.h> 15 #include <asm/mmu_context.h> 16 #include <asm/pgalloc.h> 17 18 /* 19 * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels 20 * for which pages need to be cached. 21 */ 22 #if defined(__PAGETABLE_PMD_FOLDED) 23 #define KVM_MMU_CACHE_MIN_PAGES 1 24 #else 25 #define KVM_MMU_CACHE_MIN_PAGES 2 26 #endif 27 28 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) 29 { 30 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); 31 } 32 33 /** 34 * kvm_pgd_init() - Initialise KVM GPA page directory. 35 * @page: Pointer to page directory (PGD) for KVM GPA. 36 * 37 * Initialise a KVM GPA page directory with pointers to the invalid table, i.e. 38 * representing no mappings. This is similar to pgd_init(), however it 39 * initialises all the page directory pointers, not just the ones corresponding 40 * to the userland address space (since it is for the guest physical address 41 * space rather than a virtual address space). 42 */ 43 static void kvm_pgd_init(void *page) 44 { 45 unsigned long *p, *end; 46 unsigned long entry; 47 48 #ifdef __PAGETABLE_PMD_FOLDED 49 entry = (unsigned long)invalid_pte_table; 50 #else 51 entry = (unsigned long)invalid_pmd_table; 52 #endif 53 54 p = (unsigned long *)page; 55 end = p + PTRS_PER_PGD; 56 57 do { 58 p[0] = entry; 59 p[1] = entry; 60 p[2] = entry; 61 p[3] = entry; 62 p[4] = entry; 63 p += 8; 64 p[-3] = entry; 65 p[-2] = entry; 66 p[-1] = entry; 67 } while (p != end); 68 } 69 70 /** 71 * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory. 72 * 73 * Allocate a blank KVM GPA page directory (PGD) for representing guest physical 74 * to host physical page mappings. 75 * 76 * Returns: Pointer to new KVM GPA page directory. 77 * NULL on allocation failure. 78 */ 79 pgd_t *kvm_pgd_alloc(void) 80 { 81 pgd_t *ret; 82 83 ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_ORDER); 84 if (ret) 85 kvm_pgd_init(ret); 86 87 return ret; 88 } 89 90 /** 91 * kvm_mips_walk_pgd() - Walk page table with optional allocation. 92 * @pgd: Page directory pointer. 93 * @addr: Address to index page table using. 94 * @cache: MMU page cache to allocate new page tables from, or NULL. 95 * 96 * Walk the page tables pointed to by @pgd to find the PTE corresponding to the 97 * address @addr. If page tables don't exist for @addr, they will be created 98 * from the MMU cache if @cache is not NULL. 99 * 100 * Returns: Pointer to pte_t corresponding to @addr. 101 * NULL if a page table doesn't exist for @addr and !@cache. 102 * NULL if a page table allocation failed. 103 */ 104 static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache, 105 unsigned long addr) 106 { 107 p4d_t *p4d; 108 pud_t *pud; 109 pmd_t *pmd; 110 111 pgd += pgd_index(addr); 112 if (pgd_none(*pgd)) { 113 /* Not used on MIPS yet */ 114 BUG(); 115 return NULL; 116 } 117 p4d = p4d_offset(pgd, addr); 118 pud = pud_offset(p4d, addr); 119 if (pud_none(*pud)) { 120 pmd_t *new_pmd; 121 122 if (!cache) 123 return NULL; 124 new_pmd = kvm_mmu_memory_cache_alloc(cache); 125 pmd_init((unsigned long)new_pmd, 126 (unsigned long)invalid_pte_table); 127 pud_populate(NULL, pud, new_pmd); 128 } 129 pmd = pmd_offset(pud, addr); 130 if (pmd_none(*pmd)) { 131 pte_t *new_pte; 132 133 if (!cache) 134 return NULL; 135 new_pte = kvm_mmu_memory_cache_alloc(cache); 136 clear_page(new_pte); 137 pmd_populate_kernel(NULL, pmd, new_pte); 138 } 139 return pte_offset_kernel(pmd, addr); 140 } 141 142 /* Caller must hold kvm->mm_lock */ 143 static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm, 144 struct kvm_mmu_memory_cache *cache, 145 unsigned long addr) 146 { 147 return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr); 148 } 149 150 /* 151 * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}. 152 * Flush a range of guest physical address space from the VM's GPA page tables. 153 */ 154 155 static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa, 156 unsigned long end_gpa) 157 { 158 int i_min = pte_index(start_gpa); 159 int i_max = pte_index(end_gpa); 160 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); 161 int i; 162 163 for (i = i_min; i <= i_max; ++i) { 164 if (!pte_present(pte[i])) 165 continue; 166 167 set_pte(pte + i, __pte(0)); 168 } 169 return safe_to_remove; 170 } 171 172 static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa, 173 unsigned long end_gpa) 174 { 175 pte_t *pte; 176 unsigned long end = ~0ul; 177 int i_min = pmd_index(start_gpa); 178 int i_max = pmd_index(end_gpa); 179 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); 180 int i; 181 182 for (i = i_min; i <= i_max; ++i, start_gpa = 0) { 183 if (!pmd_present(pmd[i])) 184 continue; 185 186 pte = pte_offset_kernel(pmd + i, 0); 187 if (i == i_max) 188 end = end_gpa; 189 190 if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) { 191 pmd_clear(pmd + i); 192 pte_free_kernel(NULL, pte); 193 } else { 194 safe_to_remove = false; 195 } 196 } 197 return safe_to_remove; 198 } 199 200 static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa, 201 unsigned long end_gpa) 202 { 203 pmd_t *pmd; 204 unsigned long end = ~0ul; 205 int i_min = pud_index(start_gpa); 206 int i_max = pud_index(end_gpa); 207 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); 208 int i; 209 210 for (i = i_min; i <= i_max; ++i, start_gpa = 0) { 211 if (!pud_present(pud[i])) 212 continue; 213 214 pmd = pmd_offset(pud + i, 0); 215 if (i == i_max) 216 end = end_gpa; 217 218 if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) { 219 pud_clear(pud + i); 220 pmd_free(NULL, pmd); 221 } else { 222 safe_to_remove = false; 223 } 224 } 225 return safe_to_remove; 226 } 227 228 static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa, 229 unsigned long end_gpa) 230 { 231 p4d_t *p4d; 232 pud_t *pud; 233 unsigned long end = ~0ul; 234 int i_min = pgd_index(start_gpa); 235 int i_max = pgd_index(end_gpa); 236 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); 237 int i; 238 239 for (i = i_min; i <= i_max; ++i, start_gpa = 0) { 240 if (!pgd_present(pgd[i])) 241 continue; 242 243 p4d = p4d_offset(pgd, 0); 244 pud = pud_offset(p4d + i, 0); 245 if (i == i_max) 246 end = end_gpa; 247 248 if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) { 249 pgd_clear(pgd + i); 250 pud_free(NULL, pud); 251 } else { 252 safe_to_remove = false; 253 } 254 } 255 return safe_to_remove; 256 } 257 258 /** 259 * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses. 260 * @kvm: KVM pointer. 261 * @start_gfn: Guest frame number of first page in GPA range to flush. 262 * @end_gfn: Guest frame number of last page in GPA range to flush. 263 * 264 * Flushes a range of GPA mappings from the GPA page tables. 265 * 266 * The caller must hold the @kvm->mmu_lock spinlock. 267 * 268 * Returns: Whether its safe to remove the top level page directory because 269 * all lower levels have been removed. 270 */ 271 bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) 272 { 273 return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd, 274 start_gfn << PAGE_SHIFT, 275 end_gfn << PAGE_SHIFT); 276 } 277 278 #define BUILD_PTE_RANGE_OP(name, op) \ 279 static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start, \ 280 unsigned long end) \ 281 { \ 282 int ret = 0; \ 283 int i_min = pte_index(start); \ 284 int i_max = pte_index(end); \ 285 int i; \ 286 pte_t old, new; \ 287 \ 288 for (i = i_min; i <= i_max; ++i) { \ 289 if (!pte_present(pte[i])) \ 290 continue; \ 291 \ 292 old = pte[i]; \ 293 new = op(old); \ 294 if (pte_val(new) == pte_val(old)) \ 295 continue; \ 296 set_pte(pte + i, new); \ 297 ret = 1; \ 298 } \ 299 return ret; \ 300 } \ 301 \ 302 /* returns true if anything was done */ \ 303 static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \ 304 unsigned long end) \ 305 { \ 306 int ret = 0; \ 307 pte_t *pte; \ 308 unsigned long cur_end = ~0ul; \ 309 int i_min = pmd_index(start); \ 310 int i_max = pmd_index(end); \ 311 int i; \ 312 \ 313 for (i = i_min; i <= i_max; ++i, start = 0) { \ 314 if (!pmd_present(pmd[i])) \ 315 continue; \ 316 \ 317 pte = pte_offset_kernel(pmd + i, 0); \ 318 if (i == i_max) \ 319 cur_end = end; \ 320 \ 321 ret |= kvm_mips_##name##_pte(pte, start, cur_end); \ 322 } \ 323 return ret; \ 324 } \ 325 \ 326 static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \ 327 unsigned long end) \ 328 { \ 329 int ret = 0; \ 330 pmd_t *pmd; \ 331 unsigned long cur_end = ~0ul; \ 332 int i_min = pud_index(start); \ 333 int i_max = pud_index(end); \ 334 int i; \ 335 \ 336 for (i = i_min; i <= i_max; ++i, start = 0) { \ 337 if (!pud_present(pud[i])) \ 338 continue; \ 339 \ 340 pmd = pmd_offset(pud + i, 0); \ 341 if (i == i_max) \ 342 cur_end = end; \ 343 \ 344 ret |= kvm_mips_##name##_pmd(pmd, start, cur_end); \ 345 } \ 346 return ret; \ 347 } \ 348 \ 349 static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \ 350 unsigned long end) \ 351 { \ 352 int ret = 0; \ 353 p4d_t *p4d; \ 354 pud_t *pud; \ 355 unsigned long cur_end = ~0ul; \ 356 int i_min = pgd_index(start); \ 357 int i_max = pgd_index(end); \ 358 int i; \ 359 \ 360 for (i = i_min; i <= i_max; ++i, start = 0) { \ 361 if (!pgd_present(pgd[i])) \ 362 continue; \ 363 \ 364 p4d = p4d_offset(pgd, 0); \ 365 pud = pud_offset(p4d + i, 0); \ 366 if (i == i_max) \ 367 cur_end = end; \ 368 \ 369 ret |= kvm_mips_##name##_pud(pud, start, cur_end); \ 370 } \ 371 return ret; \ 372 } 373 374 /* 375 * kvm_mips_mkclean_gpa_pt. 376 * Mark a range of guest physical address space clean (writes fault) in the VM's 377 * GPA page table to allow dirty page tracking. 378 */ 379 380 BUILD_PTE_RANGE_OP(mkclean, pte_mkclean) 381 382 /** 383 * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean. 384 * @kvm: KVM pointer. 385 * @start_gfn: Guest frame number of first page in GPA range to flush. 386 * @end_gfn: Guest frame number of last page in GPA range to flush. 387 * 388 * Make a range of GPA mappings clean so that guest writes will fault and 389 * trigger dirty page logging. 390 * 391 * The caller must hold the @kvm->mmu_lock spinlock. 392 * 393 * Returns: Whether any GPA mappings were modified, which would require 394 * derived mappings (GVA page tables & TLB enties) to be 395 * invalidated. 396 */ 397 int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) 398 { 399 return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd, 400 start_gfn << PAGE_SHIFT, 401 end_gfn << PAGE_SHIFT); 402 } 403 404 /** 405 * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages 406 * @kvm: The KVM pointer 407 * @slot: The memory slot associated with mask 408 * @gfn_offset: The gfn offset in memory slot 409 * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory 410 * slot to be write protected 411 * 412 * Walks bits set in mask write protects the associated pte's. Caller must 413 * acquire @kvm->mmu_lock. 414 */ 415 void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, 416 struct kvm_memory_slot *slot, 417 gfn_t gfn_offset, unsigned long mask) 418 { 419 gfn_t base_gfn = slot->base_gfn + gfn_offset; 420 gfn_t start = base_gfn + __ffs(mask); 421 gfn_t end = base_gfn + __fls(mask); 422 423 kvm_mips_mkclean_gpa_pt(kvm, start, end); 424 } 425 426 /* 427 * kvm_mips_mkold_gpa_pt. 428 * Mark a range of guest physical address space old (all accesses fault) in the 429 * VM's GPA page table to allow detection of commonly used pages. 430 */ 431 432 BUILD_PTE_RANGE_OP(mkold, pte_mkold) 433 434 static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn, 435 gfn_t end_gfn) 436 { 437 return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd, 438 start_gfn << PAGE_SHIFT, 439 end_gfn << PAGE_SHIFT); 440 } 441 442 static int handle_hva_to_gpa(struct kvm *kvm, 443 unsigned long start, 444 unsigned long end, 445 int (*handler)(struct kvm *kvm, gfn_t gfn, 446 gpa_t gfn_end, 447 struct kvm_memory_slot *memslot, 448 void *data), 449 void *data) 450 { 451 struct kvm_memslots *slots; 452 struct kvm_memory_slot *memslot; 453 int ret = 0; 454 455 slots = kvm_memslots(kvm); 456 457 /* we only care about the pages that the guest sees */ 458 kvm_for_each_memslot(memslot, slots) { 459 unsigned long hva_start, hva_end; 460 gfn_t gfn, gfn_end; 461 462 hva_start = max(start, memslot->userspace_addr); 463 hva_end = min(end, memslot->userspace_addr + 464 (memslot->npages << PAGE_SHIFT)); 465 if (hva_start >= hva_end) 466 continue; 467 468 /* 469 * {gfn(page) | page intersects with [hva_start, hva_end)} = 470 * {gfn_start, gfn_start+1, ..., gfn_end-1}. 471 */ 472 gfn = hva_to_gfn_memslot(hva_start, memslot); 473 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 474 475 ret |= handler(kvm, gfn, gfn_end, memslot, data); 476 } 477 478 return ret; 479 } 480 481 482 static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, 483 struct kvm_memory_slot *memslot, void *data) 484 { 485 kvm_mips_flush_gpa_pt(kvm, gfn, gfn_end); 486 return 1; 487 } 488 489 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 490 { 491 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); 492 493 kvm_mips_callbacks->flush_shadow_all(kvm); 494 return 0; 495 } 496 497 static int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, 498 struct kvm_memory_slot *memslot, void *data) 499 { 500 gpa_t gpa = gfn << PAGE_SHIFT; 501 pte_t hva_pte = *(pte_t *)data; 502 pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); 503 pte_t old_pte; 504 505 if (!gpa_pte) 506 return 0; 507 508 /* Mapping may need adjusting depending on memslot flags */ 509 old_pte = *gpa_pte; 510 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte)) 511 hva_pte = pte_mkclean(hva_pte); 512 else if (memslot->flags & KVM_MEM_READONLY) 513 hva_pte = pte_wrprotect(hva_pte); 514 515 set_pte(gpa_pte, hva_pte); 516 517 /* Replacing an absent or old page doesn't need flushes */ 518 if (!pte_present(old_pte) || !pte_young(old_pte)) 519 return 0; 520 521 /* Pages swapped, aged, moved, or cleaned require flushes */ 522 return !pte_present(hva_pte) || 523 !pte_young(hva_pte) || 524 pte_pfn(old_pte) != pte_pfn(hva_pte) || 525 (pte_dirty(old_pte) && !pte_dirty(hva_pte)); 526 } 527 528 int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 529 { 530 unsigned long end = hva + PAGE_SIZE; 531 int ret; 532 533 ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte); 534 if (ret) 535 kvm_mips_callbacks->flush_shadow_all(kvm); 536 return 0; 537 } 538 539 static int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, 540 struct kvm_memory_slot *memslot, void *data) 541 { 542 return kvm_mips_mkold_gpa_pt(kvm, gfn, gfn_end); 543 } 544 545 static int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, 546 struct kvm_memory_slot *memslot, void *data) 547 { 548 gpa_t gpa = gfn << PAGE_SHIFT; 549 pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); 550 551 if (!gpa_pte) 552 return 0; 553 return pte_young(*gpa_pte); 554 } 555 556 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) 557 { 558 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); 559 } 560 561 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 562 { 563 return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); 564 } 565 566 /** 567 * _kvm_mips_map_page_fast() - Fast path GPA fault handler. 568 * @vcpu: VCPU pointer. 569 * @gpa: Guest physical address of fault. 570 * @write_fault: Whether the fault was due to a write. 571 * @out_entry: New PTE for @gpa (written on success unless NULL). 572 * @out_buddy: New PTE for @gpa's buddy (written on success unless 573 * NULL). 574 * 575 * Perform fast path GPA fault handling, doing all that can be done without 576 * calling into KVM. This handles marking old pages young (for idle page 577 * tracking), and dirtying of clean pages (for dirty page logging). 578 * 579 * Returns: 0 on success, in which case we can update derived mappings and 580 * resume guest execution. 581 * -EFAULT on failure due to absent GPA mapping or write to 582 * read-only page, in which case KVM must be consulted. 583 */ 584 static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, 585 bool write_fault, 586 pte_t *out_entry, pte_t *out_buddy) 587 { 588 struct kvm *kvm = vcpu->kvm; 589 gfn_t gfn = gpa >> PAGE_SHIFT; 590 pte_t *ptep; 591 kvm_pfn_t pfn = 0; /* silence bogus GCC warning */ 592 bool pfn_valid = false; 593 int ret = 0; 594 595 spin_lock(&kvm->mmu_lock); 596 597 /* Fast path - just check GPA page table for an existing entry */ 598 ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa); 599 if (!ptep || !pte_present(*ptep)) { 600 ret = -EFAULT; 601 goto out; 602 } 603 604 /* Track access to pages marked old */ 605 if (!pte_young(*ptep)) { 606 set_pte(ptep, pte_mkyoung(*ptep)); 607 pfn = pte_pfn(*ptep); 608 pfn_valid = true; 609 /* call kvm_set_pfn_accessed() after unlock */ 610 } 611 if (write_fault && !pte_dirty(*ptep)) { 612 if (!pte_write(*ptep)) { 613 ret = -EFAULT; 614 goto out; 615 } 616 617 /* Track dirtying of writeable pages */ 618 set_pte(ptep, pte_mkdirty(*ptep)); 619 pfn = pte_pfn(*ptep); 620 mark_page_dirty(kvm, gfn); 621 kvm_set_pfn_dirty(pfn); 622 } 623 624 if (out_entry) 625 *out_entry = *ptep; 626 if (out_buddy) 627 *out_buddy = *ptep_buddy(ptep); 628 629 out: 630 spin_unlock(&kvm->mmu_lock); 631 if (pfn_valid) 632 kvm_set_pfn_accessed(pfn); 633 return ret; 634 } 635 636 /** 637 * kvm_mips_map_page() - Map a guest physical page. 638 * @vcpu: VCPU pointer. 639 * @gpa: Guest physical address of fault. 640 * @write_fault: Whether the fault was due to a write. 641 * @out_entry: New PTE for @gpa (written on success unless NULL). 642 * @out_buddy: New PTE for @gpa's buddy (written on success unless 643 * NULL). 644 * 645 * Handle GPA faults by creating a new GPA mapping (or updating an existing 646 * one). 647 * 648 * This takes care of marking pages young or dirty (idle/dirty page tracking), 649 * asking KVM for the corresponding PFN, and creating a mapping in the GPA page 650 * tables. Derived mappings (GVA page tables and TLBs) must be handled by the 651 * caller. 652 * 653 * Returns: 0 on success, in which case the caller may use the @out_entry 654 * and @out_buddy PTEs to update derived mappings and resume guest 655 * execution. 656 * -EFAULT if there is no memory region at @gpa or a write was 657 * attempted to a read-only memory region. This is usually handled 658 * as an MMIO access. 659 */ 660 static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, 661 bool write_fault, 662 pte_t *out_entry, pte_t *out_buddy) 663 { 664 struct kvm *kvm = vcpu->kvm; 665 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 666 gfn_t gfn = gpa >> PAGE_SHIFT; 667 int srcu_idx, err; 668 kvm_pfn_t pfn; 669 pte_t *ptep, entry, old_pte; 670 bool writeable; 671 unsigned long prot_bits; 672 unsigned long mmu_seq; 673 674 /* Try the fast path to handle old / clean pages */ 675 srcu_idx = srcu_read_lock(&kvm->srcu); 676 err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry, 677 out_buddy); 678 if (!err) 679 goto out; 680 681 /* We need a minimum of cached pages ready for page table creation */ 682 err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES); 683 if (err) 684 goto out; 685 686 retry: 687 /* 688 * Used to check for invalidations in progress, of the pfn that is 689 * returned by pfn_to_pfn_prot below. 690 */ 691 mmu_seq = kvm->mmu_notifier_seq; 692 /* 693 * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in 694 * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't 695 * risk the page we get a reference to getting unmapped before we have a 696 * chance to grab the mmu_lock without mmu_notifier_retry() noticing. 697 * 698 * This smp_rmb() pairs with the effective smp_wmb() of the combination 699 * of the pte_unmap_unlock() after the PTE is zapped, and the 700 * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before 701 * mmu_notifier_seq is incremented. 702 */ 703 smp_rmb(); 704 705 /* Slow path - ask KVM core whether we can access this GPA */ 706 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable); 707 if (is_error_noslot_pfn(pfn)) { 708 err = -EFAULT; 709 goto out; 710 } 711 712 spin_lock(&kvm->mmu_lock); 713 /* Check if an invalidation has taken place since we got pfn */ 714 if (mmu_notifier_retry(kvm, mmu_seq)) { 715 /* 716 * This can happen when mappings are changed asynchronously, but 717 * also synchronously if a COW is triggered by 718 * gfn_to_pfn_prot(). 719 */ 720 spin_unlock(&kvm->mmu_lock); 721 kvm_release_pfn_clean(pfn); 722 goto retry; 723 } 724 725 /* Ensure page tables are allocated */ 726 ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa); 727 728 /* Set up the PTE */ 729 prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default; 730 if (writeable) { 731 prot_bits |= _PAGE_WRITE; 732 if (write_fault) { 733 prot_bits |= __WRITEABLE; 734 mark_page_dirty(kvm, gfn); 735 kvm_set_pfn_dirty(pfn); 736 } 737 } 738 entry = pfn_pte(pfn, __pgprot(prot_bits)); 739 740 /* Write the PTE */ 741 old_pte = *ptep; 742 set_pte(ptep, entry); 743 744 err = 0; 745 if (out_entry) 746 *out_entry = *ptep; 747 if (out_buddy) 748 *out_buddy = *ptep_buddy(ptep); 749 750 spin_unlock(&kvm->mmu_lock); 751 kvm_release_pfn_clean(pfn); 752 kvm_set_pfn_accessed(pfn); 753 out: 754 srcu_read_unlock(&kvm->srcu, srcu_idx); 755 return err; 756 } 757 758 static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu *vcpu, 759 unsigned long addr) 760 { 761 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 762 pgd_t *pgdp; 763 int ret; 764 765 /* We need a minimum of cached pages ready for page table creation */ 766 ret = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES); 767 if (ret) 768 return NULL; 769 770 if (KVM_GUEST_KERNEL_MODE(vcpu)) 771 pgdp = vcpu->arch.guest_kernel_mm.pgd; 772 else 773 pgdp = vcpu->arch.guest_user_mm.pgd; 774 775 return kvm_mips_walk_pgd(pgdp, memcache, addr); 776 } 777 778 void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr, 779 bool user) 780 { 781 pgd_t *pgdp; 782 pte_t *ptep; 783 784 addr &= PAGE_MASK << 1; 785 786 pgdp = vcpu->arch.guest_kernel_mm.pgd; 787 ptep = kvm_mips_walk_pgd(pgdp, NULL, addr); 788 if (ptep) { 789 ptep[0] = pfn_pte(0, __pgprot(0)); 790 ptep[1] = pfn_pte(0, __pgprot(0)); 791 } 792 793 if (user) { 794 pgdp = vcpu->arch.guest_user_mm.pgd; 795 ptep = kvm_mips_walk_pgd(pgdp, NULL, addr); 796 if (ptep) { 797 ptep[0] = pfn_pte(0, __pgprot(0)); 798 ptep[1] = pfn_pte(0, __pgprot(0)); 799 } 800 } 801 } 802 803 /* 804 * kvm_mips_flush_gva_{pte,pmd,pud,pgd,pt}. 805 * Flush a range of guest physical address space from the VM's GPA page tables. 806 */ 807 808 static bool kvm_mips_flush_gva_pte(pte_t *pte, unsigned long start_gva, 809 unsigned long end_gva) 810 { 811 int i_min = pte_index(start_gva); 812 int i_max = pte_index(end_gva); 813 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); 814 int i; 815 816 /* 817 * There's no freeing to do, so there's no point clearing individual 818 * entries unless only part of the last level page table needs flushing. 819 */ 820 if (safe_to_remove) 821 return true; 822 823 for (i = i_min; i <= i_max; ++i) { 824 if (!pte_present(pte[i])) 825 continue; 826 827 set_pte(pte + i, __pte(0)); 828 } 829 return false; 830 } 831 832 static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva, 833 unsigned long end_gva) 834 { 835 pte_t *pte; 836 unsigned long end = ~0ul; 837 int i_min = pmd_index(start_gva); 838 int i_max = pmd_index(end_gva); 839 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); 840 int i; 841 842 for (i = i_min; i <= i_max; ++i, start_gva = 0) { 843 if (!pmd_present(pmd[i])) 844 continue; 845 846 pte = pte_offset_kernel(pmd + i, 0); 847 if (i == i_max) 848 end = end_gva; 849 850 if (kvm_mips_flush_gva_pte(pte, start_gva, end)) { 851 pmd_clear(pmd + i); 852 pte_free_kernel(NULL, pte); 853 } else { 854 safe_to_remove = false; 855 } 856 } 857 return safe_to_remove; 858 } 859 860 static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva, 861 unsigned long end_gva) 862 { 863 pmd_t *pmd; 864 unsigned long end = ~0ul; 865 int i_min = pud_index(start_gva); 866 int i_max = pud_index(end_gva); 867 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); 868 int i; 869 870 for (i = i_min; i <= i_max; ++i, start_gva = 0) { 871 if (!pud_present(pud[i])) 872 continue; 873 874 pmd = pmd_offset(pud + i, 0); 875 if (i == i_max) 876 end = end_gva; 877 878 if (kvm_mips_flush_gva_pmd(pmd, start_gva, end)) { 879 pud_clear(pud + i); 880 pmd_free(NULL, pmd); 881 } else { 882 safe_to_remove = false; 883 } 884 } 885 return safe_to_remove; 886 } 887 888 static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva, 889 unsigned long end_gva) 890 { 891 p4d_t *p4d; 892 pud_t *pud; 893 unsigned long end = ~0ul; 894 int i_min = pgd_index(start_gva); 895 int i_max = pgd_index(end_gva); 896 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); 897 int i; 898 899 for (i = i_min; i <= i_max; ++i, start_gva = 0) { 900 if (!pgd_present(pgd[i])) 901 continue; 902 903 p4d = p4d_offset(pgd, 0); 904 pud = pud_offset(p4d + i, 0); 905 if (i == i_max) 906 end = end_gva; 907 908 if (kvm_mips_flush_gva_pud(pud, start_gva, end)) { 909 pgd_clear(pgd + i); 910 pud_free(NULL, pud); 911 } else { 912 safe_to_remove = false; 913 } 914 } 915 return safe_to_remove; 916 } 917 918 void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags) 919 { 920 if (flags & KMF_GPA) { 921 /* all of guest virtual address space could be affected */ 922 if (flags & KMF_KERN) 923 /* useg, kseg0, seg2/3 */ 924 kvm_mips_flush_gva_pgd(pgd, 0, 0x7fffffff); 925 else 926 /* useg */ 927 kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff); 928 } else { 929 /* useg */ 930 kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff); 931 932 /* kseg2/3 */ 933 if (flags & KMF_KERN) 934 kvm_mips_flush_gva_pgd(pgd, 0x60000000, 0x7fffffff); 935 } 936 } 937 938 static pte_t kvm_mips_gpa_pte_to_gva_unmapped(pte_t pte) 939 { 940 /* 941 * Don't leak writeable but clean entries from GPA page tables. We don't 942 * want the normal Linux tlbmod handler to handle dirtying when KVM 943 * accesses guest memory. 944 */ 945 if (!pte_dirty(pte)) 946 pte = pte_wrprotect(pte); 947 948 return pte; 949 } 950 951 static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo) 952 { 953 /* Guest EntryLo overrides host EntryLo */ 954 if (!(entrylo & ENTRYLO_D)) 955 pte = pte_mkclean(pte); 956 957 return kvm_mips_gpa_pte_to_gva_unmapped(pte); 958 } 959 960 #ifdef CONFIG_KVM_MIPS_VZ 961 int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr, 962 struct kvm_vcpu *vcpu, 963 bool write_fault) 964 { 965 int ret; 966 967 ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL); 968 if (ret) 969 return ret; 970 971 /* Invalidate this entry in the TLB */ 972 return kvm_vz_host_tlb_inv(vcpu, badvaddr); 973 } 974 #endif 975 976 /* XXXKYMA: Must be called with interrupts disabled */ 977 int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, 978 struct kvm_vcpu *vcpu, 979 bool write_fault) 980 { 981 unsigned long gpa; 982 pte_t pte_gpa[2], *ptep_gva; 983 int idx; 984 985 if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) { 986 kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr); 987 kvm_mips_dump_host_tlbs(); 988 return -1; 989 } 990 991 /* Get the GPA page table entry */ 992 gpa = KVM_GUEST_CPHYSADDR(badvaddr); 993 idx = (badvaddr >> PAGE_SHIFT) & 1; 994 if (kvm_mips_map_page(vcpu, gpa, write_fault, &pte_gpa[idx], 995 &pte_gpa[!idx]) < 0) 996 return -1; 997 998 /* Get the GVA page table entry */ 999 ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, badvaddr & ~PAGE_SIZE); 1000 if (!ptep_gva) { 1001 kvm_err("No ptep for gva %lx\n", badvaddr); 1002 return -1; 1003 } 1004 1005 /* Copy a pair of entries from GPA page table to GVA page table */ 1006 ptep_gva[0] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[0]); 1007 ptep_gva[1] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[1]); 1008 1009 /* Invalidate this entry in the TLB, guest kernel ASID only */ 1010 kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true); 1011 return 0; 1012 } 1013 1014 int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, 1015 struct kvm_mips_tlb *tlb, 1016 unsigned long gva, 1017 bool write_fault) 1018 { 1019 struct kvm *kvm = vcpu->kvm; 1020 long tlb_lo[2]; 1021 pte_t pte_gpa[2], *ptep_buddy, *ptep_gva; 1022 unsigned int idx = TLB_LO_IDX(*tlb, gva); 1023 bool kernel = KVM_GUEST_KERNEL_MODE(vcpu); 1024 1025 tlb_lo[0] = tlb->tlb_lo[0]; 1026 tlb_lo[1] = tlb->tlb_lo[1]; 1027 1028 /* 1029 * The commpage address must not be mapped to anything else if the guest 1030 * TLB contains entries nearby, or commpage accesses will break. 1031 */ 1032 if (!((gva ^ KVM_GUEST_COMMPAGE_ADDR) & VPN2_MASK & (PAGE_MASK << 1))) 1033 tlb_lo[TLB_LO_IDX(*tlb, KVM_GUEST_COMMPAGE_ADDR)] = 0; 1034 1035 /* Get the GPA page table entry */ 1036 if (kvm_mips_map_page(vcpu, mips3_tlbpfn_to_paddr(tlb_lo[idx]), 1037 write_fault, &pte_gpa[idx], NULL) < 0) 1038 return -1; 1039 1040 /* And its GVA buddy's GPA page table entry if it also exists */ 1041 pte_gpa[!idx] = pfn_pte(0, __pgprot(0)); 1042 if (tlb_lo[!idx] & ENTRYLO_V) { 1043 spin_lock(&kvm->mmu_lock); 1044 ptep_buddy = kvm_mips_pte_for_gpa(kvm, NULL, 1045 mips3_tlbpfn_to_paddr(tlb_lo[!idx])); 1046 if (ptep_buddy) 1047 pte_gpa[!idx] = *ptep_buddy; 1048 spin_unlock(&kvm->mmu_lock); 1049 } 1050 1051 /* Get the GVA page table entry pair */ 1052 ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, gva & ~PAGE_SIZE); 1053 if (!ptep_gva) { 1054 kvm_err("No ptep for gva %lx\n", gva); 1055 return -1; 1056 } 1057 1058 /* Copy a pair of entries from GPA page table to GVA page table */ 1059 ptep_gva[0] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[0], tlb_lo[0]); 1060 ptep_gva[1] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[1], tlb_lo[1]); 1061 1062 /* Invalidate this entry in the TLB, current guest mode ASID only */ 1063 kvm_mips_host_tlb_inv(vcpu, gva, !kernel, kernel); 1064 1065 kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, 1066 tlb->tlb_lo[0], tlb->tlb_lo[1]); 1067 1068 return 0; 1069 } 1070 1071 int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, 1072 struct kvm_vcpu *vcpu) 1073 { 1074 kvm_pfn_t pfn; 1075 pte_t *ptep; 1076 1077 ptep = kvm_trap_emul_pte_for_gva(vcpu, badvaddr); 1078 if (!ptep) { 1079 kvm_err("No ptep for commpage %lx\n", badvaddr); 1080 return -1; 1081 } 1082 1083 pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage)); 1084 /* Also set valid and dirty, so refill handler doesn't have to */ 1085 *ptep = pte_mkyoung(pte_mkdirty(pfn_pte(pfn, PAGE_SHARED))); 1086 1087 /* Invalidate this entry in the TLB, guest kernel ASID only */ 1088 kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true); 1089 return 0; 1090 } 1091 1092 /** 1093 * kvm_mips_migrate_count() - Migrate timer. 1094 * @vcpu: Virtual CPU. 1095 * 1096 * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it 1097 * if it was running prior to being cancelled. 1098 * 1099 * Must be called when the VCPU is migrated to a different CPU to ensure that 1100 * timer expiry during guest execution interrupts the guest and causes the 1101 * interrupt to be delivered in a timely manner. 1102 */ 1103 static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu) 1104 { 1105 if (hrtimer_cancel(&vcpu->arch.comparecount_timer)) 1106 hrtimer_restart(&vcpu->arch.comparecount_timer); 1107 } 1108 1109 /* Restore ASID once we are scheduled back after preemption */ 1110 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1111 { 1112 unsigned long flags; 1113 1114 kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu); 1115 1116 local_irq_save(flags); 1117 1118 vcpu->cpu = cpu; 1119 if (vcpu->arch.last_sched_cpu != cpu) { 1120 kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", 1121 vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); 1122 /* 1123 * Migrate the timer interrupt to the current CPU so that it 1124 * always interrupts the guest and synchronously triggers a 1125 * guest timer interrupt. 1126 */ 1127 kvm_mips_migrate_count(vcpu); 1128 } 1129 1130 /* restore guest state to registers */ 1131 kvm_mips_callbacks->vcpu_load(vcpu, cpu); 1132 1133 local_irq_restore(flags); 1134 } 1135 1136 /* ASID can change if another task is scheduled during preemption */ 1137 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1138 { 1139 unsigned long flags; 1140 int cpu; 1141 1142 local_irq_save(flags); 1143 1144 cpu = smp_processor_id(); 1145 vcpu->arch.last_sched_cpu = cpu; 1146 vcpu->cpu = -1; 1147 1148 /* save guest state in registers */ 1149 kvm_mips_callbacks->vcpu_put(vcpu, cpu); 1150 1151 local_irq_restore(flags); 1152 } 1153 1154 /** 1155 * kvm_trap_emul_gva_fault() - Safely attempt to handle a GVA access fault. 1156 * @vcpu: Virtual CPU. 1157 * @gva: Guest virtual address to be accessed. 1158 * @write: True if write attempted (must be dirtied and made writable). 1159 * 1160 * Safely attempt to handle a GVA fault, mapping GVA pages if necessary, and 1161 * dirtying the page if @write so that guest instructions can be modified. 1162 * 1163 * Returns: KVM_MIPS_MAPPED on success. 1164 * KVM_MIPS_GVA if bad guest virtual address. 1165 * KVM_MIPS_GPA if bad guest physical address. 1166 * KVM_MIPS_TLB if guest TLB not present. 1167 * KVM_MIPS_TLBINV if guest TLB present but not valid. 1168 * KVM_MIPS_TLBMOD if guest TLB read only. 1169 */ 1170 enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu, 1171 unsigned long gva, 1172 bool write) 1173 { 1174 struct mips_coproc *cop0 = vcpu->arch.cop0; 1175 struct kvm_mips_tlb *tlb; 1176 int index; 1177 1178 if (KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG0) { 1179 if (kvm_mips_handle_kseg0_tlb_fault(gva, vcpu, write) < 0) 1180 return KVM_MIPS_GPA; 1181 } else if ((KVM_GUEST_KSEGX(gva) < KVM_GUEST_KSEG0) || 1182 KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG23) { 1183 /* Address should be in the guest TLB */ 1184 index = kvm_mips_guest_tlb_lookup(vcpu, (gva & VPN2_MASK) | 1185 (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID)); 1186 if (index < 0) 1187 return KVM_MIPS_TLB; 1188 tlb = &vcpu->arch.guest_tlb[index]; 1189 1190 /* Entry should be valid, and dirty for writes */ 1191 if (!TLB_IS_VALID(*tlb, gva)) 1192 return KVM_MIPS_TLBINV; 1193 if (write && !TLB_IS_DIRTY(*tlb, gva)) 1194 return KVM_MIPS_TLBMOD; 1195 1196 if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, gva, write)) 1197 return KVM_MIPS_GPA; 1198 } else { 1199 return KVM_MIPS_GVA; 1200 } 1201 1202 return KVM_MIPS_MAPPED; 1203 } 1204 1205 int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) 1206 { 1207 int err; 1208 1209 if (WARN(IS_ENABLED(CONFIG_KVM_MIPS_VZ), 1210 "Expect BadInstr/BadInstrP registers to be used with VZ\n")) 1211 return -EINVAL; 1212 1213 retry: 1214 kvm_trap_emul_gva_lockless_begin(vcpu); 1215 err = get_user(*out, opc); 1216 kvm_trap_emul_gva_lockless_end(vcpu); 1217 1218 if (unlikely(err)) { 1219 /* 1220 * Try to handle the fault, maybe we just raced with a GVA 1221 * invalidation. 1222 */ 1223 err = kvm_trap_emul_gva_fault(vcpu, (unsigned long)opc, 1224 false); 1225 if (unlikely(err)) { 1226 kvm_err("%s: illegal address: %p\n", 1227 __func__, opc); 1228 return -EFAULT; 1229 } 1230 1231 /* Hopefully it'll work now */ 1232 goto retry; 1233 } 1234 return 0; 1235 } 1236