1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * KVM/MIPS MMU handling in the KVM module. 7 * 8 * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. 9 * Authors: Sanjay Lal <sanjayl@kymasys.com> 10 */ 11 12 #include <linux/highmem.h> 13 #include <linux/kvm_host.h> 14 #include <linux/uaccess.h> 15 #include <asm/mmu_context.h> 16 #include <asm/pgalloc.h> 17 18 /* 19 * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels 20 * for which pages need to be cached. 21 */ 22 #if defined(__PAGETABLE_PMD_FOLDED) 23 #define KVM_MMU_CACHE_MIN_PAGES 1 24 #else 25 #define KVM_MMU_CACHE_MIN_PAGES 2 26 #endif 27 28 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) 29 { 30 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); 31 } 32 33 /** 34 * kvm_pgd_init() - Initialise KVM GPA page directory. 35 * @page: Pointer to page directory (PGD) for KVM GPA. 36 * 37 * Initialise a KVM GPA page directory with pointers to the invalid table, i.e. 38 * representing no mappings. This is similar to pgd_init(), however it 39 * initialises all the page directory pointers, not just the ones corresponding 40 * to the userland address space (since it is for the guest physical address 41 * space rather than a virtual address space). 42 */ 43 static void kvm_pgd_init(void *page) 44 { 45 unsigned long *p, *end; 46 unsigned long entry; 47 48 #ifdef __PAGETABLE_PMD_FOLDED 49 entry = (unsigned long)invalid_pte_table; 50 #else 51 entry = (unsigned long)invalid_pmd_table; 52 #endif 53 54 p = (unsigned long *)page; 55 end = p + PTRS_PER_PGD; 56 57 do { 58 p[0] = entry; 59 p[1] = entry; 60 p[2] = entry; 61 p[3] = entry; 62 p[4] = entry; 63 p += 8; 64 p[-3] = entry; 65 p[-2] = entry; 66 p[-1] = entry; 67 } while (p != end); 68 } 69 70 /** 71 * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory. 72 * 73 * Allocate a blank KVM GPA page directory (PGD) for representing guest physical 74 * to host physical page mappings. 75 * 76 * Returns: Pointer to new KVM GPA page directory. 77 * NULL on allocation failure. 78 */ 79 pgd_t *kvm_pgd_alloc(void) 80 { 81 pgd_t *ret; 82 83 ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_ORDER); 84 if (ret) 85 kvm_pgd_init(ret); 86 87 return ret; 88 } 89 90 /** 91 * kvm_mips_walk_pgd() - Walk page table with optional allocation. 92 * @pgd: Page directory pointer. 93 * @addr: Address to index page table using. 94 * @cache: MMU page cache to allocate new page tables from, or NULL. 95 * 96 * Walk the page tables pointed to by @pgd to find the PTE corresponding to the 97 * address @addr. If page tables don't exist for @addr, they will be created 98 * from the MMU cache if @cache is not NULL. 99 * 100 * Returns: Pointer to pte_t corresponding to @addr. 101 * NULL if a page table doesn't exist for @addr and !@cache. 102 * NULL if a page table allocation failed. 103 */ 104 static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache, 105 unsigned long addr) 106 { 107 p4d_t *p4d; 108 pud_t *pud; 109 pmd_t *pmd; 110 111 pgd += pgd_index(addr); 112 if (pgd_none(*pgd)) { 113 /* Not used on MIPS yet */ 114 BUG(); 115 return NULL; 116 } 117 p4d = p4d_offset(pgd, addr); 118 pud = pud_offset(p4d, addr); 119 if (pud_none(*pud)) { 120 pmd_t *new_pmd; 121 122 if (!cache) 123 return NULL; 124 new_pmd = kvm_mmu_memory_cache_alloc(cache); 125 pmd_init((unsigned long)new_pmd, 126 (unsigned long)invalid_pte_table); 127 pud_populate(NULL, pud, new_pmd); 128 } 129 pmd = pmd_offset(pud, addr); 130 if (pmd_none(*pmd)) { 131 pte_t *new_pte; 132 133 if (!cache) 134 return NULL; 135 new_pte = kvm_mmu_memory_cache_alloc(cache); 136 clear_page(new_pte); 137 pmd_populate_kernel(NULL, pmd, new_pte); 138 } 139 return pte_offset_kernel(pmd, addr); 140 } 141 142 /* Caller must hold kvm->mm_lock */ 143 static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm, 144 struct kvm_mmu_memory_cache *cache, 145 unsigned long addr) 146 { 147 return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr); 148 } 149 150 /* 151 * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}. 152 * Flush a range of guest physical address space from the VM's GPA page tables. 153 */ 154 155 static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa, 156 unsigned long end_gpa) 157 { 158 int i_min = pte_index(start_gpa); 159 int i_max = pte_index(end_gpa); 160 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); 161 int i; 162 163 for (i = i_min; i <= i_max; ++i) { 164 if (!pte_present(pte[i])) 165 continue; 166 167 set_pte(pte + i, __pte(0)); 168 } 169 return safe_to_remove; 170 } 171 172 static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa, 173 unsigned long end_gpa) 174 { 175 pte_t *pte; 176 unsigned long end = ~0ul; 177 int i_min = pmd_index(start_gpa); 178 int i_max = pmd_index(end_gpa); 179 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); 180 int i; 181 182 for (i = i_min; i <= i_max; ++i, start_gpa = 0) { 183 if (!pmd_present(pmd[i])) 184 continue; 185 186 pte = pte_offset_kernel(pmd + i, 0); 187 if (i == i_max) 188 end = end_gpa; 189 190 if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) { 191 pmd_clear(pmd + i); 192 pte_free_kernel(NULL, pte); 193 } else { 194 safe_to_remove = false; 195 } 196 } 197 return safe_to_remove; 198 } 199 200 static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa, 201 unsigned long end_gpa) 202 { 203 pmd_t *pmd; 204 unsigned long end = ~0ul; 205 int i_min = pud_index(start_gpa); 206 int i_max = pud_index(end_gpa); 207 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); 208 int i; 209 210 for (i = i_min; i <= i_max; ++i, start_gpa = 0) { 211 if (!pud_present(pud[i])) 212 continue; 213 214 pmd = pmd_offset(pud + i, 0); 215 if (i == i_max) 216 end = end_gpa; 217 218 if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) { 219 pud_clear(pud + i); 220 pmd_free(NULL, pmd); 221 } else { 222 safe_to_remove = false; 223 } 224 } 225 return safe_to_remove; 226 } 227 228 static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa, 229 unsigned long end_gpa) 230 { 231 p4d_t *p4d; 232 pud_t *pud; 233 unsigned long end = ~0ul; 234 int i_min = pgd_index(start_gpa); 235 int i_max = pgd_index(end_gpa); 236 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); 237 int i; 238 239 for (i = i_min; i <= i_max; ++i, start_gpa = 0) { 240 if (!pgd_present(pgd[i])) 241 continue; 242 243 p4d = p4d_offset(pgd, 0); 244 pud = pud_offset(p4d + i, 0); 245 if (i == i_max) 246 end = end_gpa; 247 248 if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) { 249 pgd_clear(pgd + i); 250 pud_free(NULL, pud); 251 } else { 252 safe_to_remove = false; 253 } 254 } 255 return safe_to_remove; 256 } 257 258 /** 259 * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses. 260 * @kvm: KVM pointer. 261 * @start_gfn: Guest frame number of first page in GPA range to flush. 262 * @end_gfn: Guest frame number of last page in GPA range to flush. 263 * 264 * Flushes a range of GPA mappings from the GPA page tables. 265 * 266 * The caller must hold the @kvm->mmu_lock spinlock. 267 * 268 * Returns: Whether its safe to remove the top level page directory because 269 * all lower levels have been removed. 270 */ 271 bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) 272 { 273 return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd, 274 start_gfn << PAGE_SHIFT, 275 end_gfn << PAGE_SHIFT); 276 } 277 278 #define BUILD_PTE_RANGE_OP(name, op) \ 279 static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start, \ 280 unsigned long end) \ 281 { \ 282 int ret = 0; \ 283 int i_min = pte_index(start); \ 284 int i_max = pte_index(end); \ 285 int i; \ 286 pte_t old, new; \ 287 \ 288 for (i = i_min; i <= i_max; ++i) { \ 289 if (!pte_present(pte[i])) \ 290 continue; \ 291 \ 292 old = pte[i]; \ 293 new = op(old); \ 294 if (pte_val(new) == pte_val(old)) \ 295 continue; \ 296 set_pte(pte + i, new); \ 297 ret = 1; \ 298 } \ 299 return ret; \ 300 } \ 301 \ 302 /* returns true if anything was done */ \ 303 static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \ 304 unsigned long end) \ 305 { \ 306 int ret = 0; \ 307 pte_t *pte; \ 308 unsigned long cur_end = ~0ul; \ 309 int i_min = pmd_index(start); \ 310 int i_max = pmd_index(end); \ 311 int i; \ 312 \ 313 for (i = i_min; i <= i_max; ++i, start = 0) { \ 314 if (!pmd_present(pmd[i])) \ 315 continue; \ 316 \ 317 pte = pte_offset_kernel(pmd + i, 0); \ 318 if (i == i_max) \ 319 cur_end = end; \ 320 \ 321 ret |= kvm_mips_##name##_pte(pte, start, cur_end); \ 322 } \ 323 return ret; \ 324 } \ 325 \ 326 static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \ 327 unsigned long end) \ 328 { \ 329 int ret = 0; \ 330 pmd_t *pmd; \ 331 unsigned long cur_end = ~0ul; \ 332 int i_min = pud_index(start); \ 333 int i_max = pud_index(end); \ 334 int i; \ 335 \ 336 for (i = i_min; i <= i_max; ++i, start = 0) { \ 337 if (!pud_present(pud[i])) \ 338 continue; \ 339 \ 340 pmd = pmd_offset(pud + i, 0); \ 341 if (i == i_max) \ 342 cur_end = end; \ 343 \ 344 ret |= kvm_mips_##name##_pmd(pmd, start, cur_end); \ 345 } \ 346 return ret; \ 347 } \ 348 \ 349 static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \ 350 unsigned long end) \ 351 { \ 352 int ret = 0; \ 353 p4d_t *p4d; \ 354 pud_t *pud; \ 355 unsigned long cur_end = ~0ul; \ 356 int i_min = pgd_index(start); \ 357 int i_max = pgd_index(end); \ 358 int i; \ 359 \ 360 for (i = i_min; i <= i_max; ++i, start = 0) { \ 361 if (!pgd_present(pgd[i])) \ 362 continue; \ 363 \ 364 p4d = p4d_offset(pgd, 0); \ 365 pud = pud_offset(p4d + i, 0); \ 366 if (i == i_max) \ 367 cur_end = end; \ 368 \ 369 ret |= kvm_mips_##name##_pud(pud, start, cur_end); \ 370 } \ 371 return ret; \ 372 } 373 374 /* 375 * kvm_mips_mkclean_gpa_pt. 376 * Mark a range of guest physical address space clean (writes fault) in the VM's 377 * GPA page table to allow dirty page tracking. 378 */ 379 380 BUILD_PTE_RANGE_OP(mkclean, pte_mkclean) 381 382 /** 383 * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean. 384 * @kvm: KVM pointer. 385 * @start_gfn: Guest frame number of first page in GPA range to flush. 386 * @end_gfn: Guest frame number of last page in GPA range to flush. 387 * 388 * Make a range of GPA mappings clean so that guest writes will fault and 389 * trigger dirty page logging. 390 * 391 * The caller must hold the @kvm->mmu_lock spinlock. 392 * 393 * Returns: Whether any GPA mappings were modified, which would require 394 * derived mappings (GVA page tables & TLB enties) to be 395 * invalidated. 396 */ 397 int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) 398 { 399 return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd, 400 start_gfn << PAGE_SHIFT, 401 end_gfn << PAGE_SHIFT); 402 } 403 404 /** 405 * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages 406 * @kvm: The KVM pointer 407 * @slot: The memory slot associated with mask 408 * @gfn_offset: The gfn offset in memory slot 409 * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory 410 * slot to be write protected 411 * 412 * Walks bits set in mask write protects the associated pte's. Caller must 413 * acquire @kvm->mmu_lock. 414 */ 415 void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, 416 struct kvm_memory_slot *slot, 417 gfn_t gfn_offset, unsigned long mask) 418 { 419 gfn_t base_gfn = slot->base_gfn + gfn_offset; 420 gfn_t start = base_gfn + __ffs(mask); 421 gfn_t end = base_gfn + __fls(mask); 422 423 kvm_mips_mkclean_gpa_pt(kvm, start, end); 424 } 425 426 /* 427 * kvm_mips_mkold_gpa_pt. 428 * Mark a range of guest physical address space old (all accesses fault) in the 429 * VM's GPA page table to allow detection of commonly used pages. 430 */ 431 432 BUILD_PTE_RANGE_OP(mkold, pte_mkold) 433 434 static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn, 435 gfn_t end_gfn) 436 { 437 return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd, 438 start_gfn << PAGE_SHIFT, 439 end_gfn << PAGE_SHIFT); 440 } 441 442 static int handle_hva_to_gpa(struct kvm *kvm, 443 unsigned long start, 444 unsigned long end, 445 int (*handler)(struct kvm *kvm, gfn_t gfn, 446 gpa_t gfn_end, 447 struct kvm_memory_slot *memslot, 448 void *data), 449 void *data) 450 { 451 struct kvm_memslots *slots; 452 struct kvm_memory_slot *memslot; 453 int ret = 0; 454 455 slots = kvm_memslots(kvm); 456 457 /* we only care about the pages that the guest sees */ 458 kvm_for_each_memslot(memslot, slots) { 459 unsigned long hva_start, hva_end; 460 gfn_t gfn, gfn_end; 461 462 hva_start = max(start, memslot->userspace_addr); 463 hva_end = min(end, memslot->userspace_addr + 464 (memslot->npages << PAGE_SHIFT)); 465 if (hva_start >= hva_end) 466 continue; 467 468 /* 469 * {gfn(page) | page intersects with [hva_start, hva_end)} = 470 * {gfn_start, gfn_start+1, ..., gfn_end-1}. 471 */ 472 gfn = hva_to_gfn_memslot(hva_start, memslot); 473 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 474 475 ret |= handler(kvm, gfn, gfn_end, memslot, data); 476 } 477 478 return ret; 479 } 480 481 482 static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, 483 struct kvm_memory_slot *memslot, void *data) 484 { 485 kvm_mips_flush_gpa_pt(kvm, gfn, gfn_end); 486 return 1; 487 } 488 489 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, 490 unsigned flags) 491 { 492 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); 493 494 kvm_mips_callbacks->flush_shadow_all(kvm); 495 return 0; 496 } 497 498 static int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, 499 struct kvm_memory_slot *memslot, void *data) 500 { 501 gpa_t gpa = gfn << PAGE_SHIFT; 502 pte_t hva_pte = *(pte_t *)data; 503 pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); 504 pte_t old_pte; 505 506 if (!gpa_pte) 507 return 0; 508 509 /* Mapping may need adjusting depending on memslot flags */ 510 old_pte = *gpa_pte; 511 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte)) 512 hva_pte = pte_mkclean(hva_pte); 513 else if (memslot->flags & KVM_MEM_READONLY) 514 hva_pte = pte_wrprotect(hva_pte); 515 516 set_pte(gpa_pte, hva_pte); 517 518 /* Replacing an absent or old page doesn't need flushes */ 519 if (!pte_present(old_pte) || !pte_young(old_pte)) 520 return 0; 521 522 /* Pages swapped, aged, moved, or cleaned require flushes */ 523 return !pte_present(hva_pte) || 524 !pte_young(hva_pte) || 525 pte_pfn(old_pte) != pte_pfn(hva_pte) || 526 (pte_dirty(old_pte) && !pte_dirty(hva_pte)); 527 } 528 529 int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 530 { 531 unsigned long end = hva + PAGE_SIZE; 532 int ret; 533 534 ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte); 535 if (ret) 536 kvm_mips_callbacks->flush_shadow_all(kvm); 537 return 0; 538 } 539 540 static int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, 541 struct kvm_memory_slot *memslot, void *data) 542 { 543 return kvm_mips_mkold_gpa_pt(kvm, gfn, gfn_end); 544 } 545 546 static int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, 547 struct kvm_memory_slot *memslot, void *data) 548 { 549 gpa_t gpa = gfn << PAGE_SHIFT; 550 pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); 551 552 if (!gpa_pte) 553 return 0; 554 return pte_young(*gpa_pte); 555 } 556 557 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) 558 { 559 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); 560 } 561 562 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 563 { 564 return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); 565 } 566 567 /** 568 * _kvm_mips_map_page_fast() - Fast path GPA fault handler. 569 * @vcpu: VCPU pointer. 570 * @gpa: Guest physical address of fault. 571 * @write_fault: Whether the fault was due to a write. 572 * @out_entry: New PTE for @gpa (written on success unless NULL). 573 * @out_buddy: New PTE for @gpa's buddy (written on success unless 574 * NULL). 575 * 576 * Perform fast path GPA fault handling, doing all that can be done without 577 * calling into KVM. This handles marking old pages young (for idle page 578 * tracking), and dirtying of clean pages (for dirty page logging). 579 * 580 * Returns: 0 on success, in which case we can update derived mappings and 581 * resume guest execution. 582 * -EFAULT on failure due to absent GPA mapping or write to 583 * read-only page, in which case KVM must be consulted. 584 */ 585 static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, 586 bool write_fault, 587 pte_t *out_entry, pte_t *out_buddy) 588 { 589 struct kvm *kvm = vcpu->kvm; 590 gfn_t gfn = gpa >> PAGE_SHIFT; 591 pte_t *ptep; 592 kvm_pfn_t pfn = 0; /* silence bogus GCC warning */ 593 bool pfn_valid = false; 594 int ret = 0; 595 596 spin_lock(&kvm->mmu_lock); 597 598 /* Fast path - just check GPA page table for an existing entry */ 599 ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa); 600 if (!ptep || !pte_present(*ptep)) { 601 ret = -EFAULT; 602 goto out; 603 } 604 605 /* Track access to pages marked old */ 606 if (!pte_young(*ptep)) { 607 set_pte(ptep, pte_mkyoung(*ptep)); 608 pfn = pte_pfn(*ptep); 609 pfn_valid = true; 610 /* call kvm_set_pfn_accessed() after unlock */ 611 } 612 if (write_fault && !pte_dirty(*ptep)) { 613 if (!pte_write(*ptep)) { 614 ret = -EFAULT; 615 goto out; 616 } 617 618 /* Track dirtying of writeable pages */ 619 set_pte(ptep, pte_mkdirty(*ptep)); 620 pfn = pte_pfn(*ptep); 621 mark_page_dirty(kvm, gfn); 622 kvm_set_pfn_dirty(pfn); 623 } 624 625 if (out_entry) 626 *out_entry = *ptep; 627 if (out_buddy) 628 *out_buddy = *ptep_buddy(ptep); 629 630 out: 631 spin_unlock(&kvm->mmu_lock); 632 if (pfn_valid) 633 kvm_set_pfn_accessed(pfn); 634 return ret; 635 } 636 637 /** 638 * kvm_mips_map_page() - Map a guest physical page. 639 * @vcpu: VCPU pointer. 640 * @gpa: Guest physical address of fault. 641 * @write_fault: Whether the fault was due to a write. 642 * @out_entry: New PTE for @gpa (written on success unless NULL). 643 * @out_buddy: New PTE for @gpa's buddy (written on success unless 644 * NULL). 645 * 646 * Handle GPA faults by creating a new GPA mapping (or updating an existing 647 * one). 648 * 649 * This takes care of marking pages young or dirty (idle/dirty page tracking), 650 * asking KVM for the corresponding PFN, and creating a mapping in the GPA page 651 * tables. Derived mappings (GVA page tables and TLBs) must be handled by the 652 * caller. 653 * 654 * Returns: 0 on success, in which case the caller may use the @out_entry 655 * and @out_buddy PTEs to update derived mappings and resume guest 656 * execution. 657 * -EFAULT if there is no memory region at @gpa or a write was 658 * attempted to a read-only memory region. This is usually handled 659 * as an MMIO access. 660 */ 661 static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, 662 bool write_fault, 663 pte_t *out_entry, pte_t *out_buddy) 664 { 665 struct kvm *kvm = vcpu->kvm; 666 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 667 gfn_t gfn = gpa >> PAGE_SHIFT; 668 int srcu_idx, err; 669 kvm_pfn_t pfn; 670 pte_t *ptep, entry, old_pte; 671 bool writeable; 672 unsigned long prot_bits; 673 unsigned long mmu_seq; 674 675 /* Try the fast path to handle old / clean pages */ 676 srcu_idx = srcu_read_lock(&kvm->srcu); 677 err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry, 678 out_buddy); 679 if (!err) 680 goto out; 681 682 /* We need a minimum of cached pages ready for page table creation */ 683 err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES); 684 if (err) 685 goto out; 686 687 retry: 688 /* 689 * Used to check for invalidations in progress, of the pfn that is 690 * returned by pfn_to_pfn_prot below. 691 */ 692 mmu_seq = kvm->mmu_notifier_seq; 693 /* 694 * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in 695 * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't 696 * risk the page we get a reference to getting unmapped before we have a 697 * chance to grab the mmu_lock without mmu_notifier_retry() noticing. 698 * 699 * This smp_rmb() pairs with the effective smp_wmb() of the combination 700 * of the pte_unmap_unlock() after the PTE is zapped, and the 701 * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before 702 * mmu_notifier_seq is incremented. 703 */ 704 smp_rmb(); 705 706 /* Slow path - ask KVM core whether we can access this GPA */ 707 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable); 708 if (is_error_noslot_pfn(pfn)) { 709 err = -EFAULT; 710 goto out; 711 } 712 713 spin_lock(&kvm->mmu_lock); 714 /* Check if an invalidation has taken place since we got pfn */ 715 if (mmu_notifier_retry(kvm, mmu_seq)) { 716 /* 717 * This can happen when mappings are changed asynchronously, but 718 * also synchronously if a COW is triggered by 719 * gfn_to_pfn_prot(). 720 */ 721 spin_unlock(&kvm->mmu_lock); 722 kvm_release_pfn_clean(pfn); 723 goto retry; 724 } 725 726 /* Ensure page tables are allocated */ 727 ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa); 728 729 /* Set up the PTE */ 730 prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default; 731 if (writeable) { 732 prot_bits |= _PAGE_WRITE; 733 if (write_fault) { 734 prot_bits |= __WRITEABLE; 735 mark_page_dirty(kvm, gfn); 736 kvm_set_pfn_dirty(pfn); 737 } 738 } 739 entry = pfn_pte(pfn, __pgprot(prot_bits)); 740 741 /* Write the PTE */ 742 old_pte = *ptep; 743 set_pte(ptep, entry); 744 745 err = 0; 746 if (out_entry) 747 *out_entry = *ptep; 748 if (out_buddy) 749 *out_buddy = *ptep_buddy(ptep); 750 751 spin_unlock(&kvm->mmu_lock); 752 kvm_release_pfn_clean(pfn); 753 kvm_set_pfn_accessed(pfn); 754 out: 755 srcu_read_unlock(&kvm->srcu, srcu_idx); 756 return err; 757 } 758 759 int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr, 760 struct kvm_vcpu *vcpu, 761 bool write_fault) 762 { 763 int ret; 764 765 ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL); 766 if (ret) 767 return ret; 768 769 /* Invalidate this entry in the TLB */ 770 return kvm_vz_host_tlb_inv(vcpu, badvaddr); 771 } 772 773 /** 774 * kvm_mips_migrate_count() - Migrate timer. 775 * @vcpu: Virtual CPU. 776 * 777 * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it 778 * if it was running prior to being cancelled. 779 * 780 * Must be called when the VCPU is migrated to a different CPU to ensure that 781 * timer expiry during guest execution interrupts the guest and causes the 782 * interrupt to be delivered in a timely manner. 783 */ 784 static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu) 785 { 786 if (hrtimer_cancel(&vcpu->arch.comparecount_timer)) 787 hrtimer_restart(&vcpu->arch.comparecount_timer); 788 } 789 790 /* Restore ASID once we are scheduled back after preemption */ 791 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 792 { 793 unsigned long flags; 794 795 kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu); 796 797 local_irq_save(flags); 798 799 vcpu->cpu = cpu; 800 if (vcpu->arch.last_sched_cpu != cpu) { 801 kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", 802 vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); 803 /* 804 * Migrate the timer interrupt to the current CPU so that it 805 * always interrupts the guest and synchronously triggers a 806 * guest timer interrupt. 807 */ 808 kvm_mips_migrate_count(vcpu); 809 } 810 811 /* restore guest state to registers */ 812 kvm_mips_callbacks->vcpu_load(vcpu, cpu); 813 814 local_irq_restore(flags); 815 } 816 817 /* ASID can change if another task is scheduled during preemption */ 818 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 819 { 820 unsigned long flags; 821 int cpu; 822 823 local_irq_save(flags); 824 825 cpu = smp_processor_id(); 826 vcpu->arch.last_sched_cpu = cpu; 827 vcpu->cpu = -1; 828 829 /* save guest state in registers */ 830 kvm_mips_callbacks->vcpu_put(vcpu, cpu); 831 832 local_irq_restore(flags); 833 } 834