1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright 2002 Andi Kleen, SuSE Labs. 4 * Thanks to Ben LaHaise for precious feedback. 5 */ 6 #include <linux/highmem.h> 7 #include <linux/memblock.h> 8 #include <linux/sched.h> 9 #include <linux/mm.h> 10 #include <linux/interrupt.h> 11 #include <linux/seq_file.h> 12 #include <linux/debugfs.h> 13 #include <linux/pfn.h> 14 #include <linux/percpu.h> 15 #include <linux/gfp.h> 16 #include <linux/pci.h> 17 #include <linux/vmalloc.h> 18 #include <linux/libnvdimm.h> 19 20 #include <asm/e820/api.h> 21 #include <asm/processor.h> 22 #include <asm/tlbflush.h> 23 #include <asm/sections.h> 24 #include <asm/setup.h> 25 #include <linux/uaccess.h> 26 #include <asm/pgalloc.h> 27 #include <asm/proto.h> 28 #include <asm/memtype.h> 29 #include <asm/set_memory.h> 30 31 #include "../mm_internal.h" 32 33 /* 34 * The current flushing context - we pass it instead of 5 arguments: 35 */ 36 struct cpa_data { 37 unsigned long *vaddr; 38 pgd_t *pgd; 39 pgprot_t mask_set; 40 pgprot_t mask_clr; 41 unsigned long numpages; 42 unsigned long curpage; 43 unsigned long pfn; 44 unsigned int flags; 45 unsigned int force_split : 1, 46 force_static_prot : 1, 47 force_flush_all : 1; 48 struct page **pages; 49 }; 50 51 enum cpa_warn { 52 CPA_CONFLICT, 53 CPA_PROTECT, 54 CPA_DETECT, 55 }; 56 57 static const int cpa_warn_level = CPA_PROTECT; 58 59 /* 60 * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings) 61 * using cpa_lock. So that we don't allow any other cpu, with stale large tlb 62 * entries change the page attribute in parallel to some other cpu 63 * splitting a large page entry along with changing the attribute. 64 */ 65 static DEFINE_SPINLOCK(cpa_lock); 66 67 #define CPA_FLUSHTLB 1 68 #define CPA_ARRAY 2 69 #define CPA_PAGES_ARRAY 4 70 #define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */ 71 72 #ifdef CONFIG_PROC_FS 73 static unsigned long direct_pages_count[PG_LEVEL_NUM]; 74 75 void update_page_count(int level, unsigned long pages) 76 { 77 /* Protect against CPA */ 78 spin_lock(&pgd_lock); 79 direct_pages_count[level] += pages; 80 spin_unlock(&pgd_lock); 81 } 82 83 static void split_page_count(int level) 84 { 85 if (direct_pages_count[level] == 0) 86 return; 87 88 direct_pages_count[level]--; 89 direct_pages_count[level - 1] += PTRS_PER_PTE; 90 } 91 92 void arch_report_meminfo(struct seq_file *m) 93 { 94 seq_printf(m, "DirectMap4k: %8lu kB\n", 95 direct_pages_count[PG_LEVEL_4K] << 2); 96 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 97 seq_printf(m, "DirectMap2M: %8lu kB\n", 98 direct_pages_count[PG_LEVEL_2M] << 11); 99 #else 100 seq_printf(m, "DirectMap4M: %8lu kB\n", 101 direct_pages_count[PG_LEVEL_2M] << 12); 102 #endif 103 if (direct_gbpages) 104 seq_printf(m, "DirectMap1G: %8lu kB\n", 105 direct_pages_count[PG_LEVEL_1G] << 20); 106 } 107 #else 108 static inline void split_page_count(int level) { } 109 #endif 110 111 #ifdef CONFIG_X86_CPA_STATISTICS 112 113 static unsigned long cpa_1g_checked; 114 static unsigned long cpa_1g_sameprot; 115 static unsigned long cpa_1g_preserved; 116 static unsigned long cpa_2m_checked; 117 static unsigned long cpa_2m_sameprot; 118 static unsigned long cpa_2m_preserved; 119 static unsigned long cpa_4k_install; 120 121 static inline void cpa_inc_1g_checked(void) 122 { 123 cpa_1g_checked++; 124 } 125 126 static inline void cpa_inc_2m_checked(void) 127 { 128 cpa_2m_checked++; 129 } 130 131 static inline void cpa_inc_4k_install(void) 132 { 133 cpa_4k_install++; 134 } 135 136 static inline void cpa_inc_lp_sameprot(int level) 137 { 138 if (level == PG_LEVEL_1G) 139 cpa_1g_sameprot++; 140 else 141 cpa_2m_sameprot++; 142 } 143 144 static inline void cpa_inc_lp_preserved(int level) 145 { 146 if (level == PG_LEVEL_1G) 147 cpa_1g_preserved++; 148 else 149 cpa_2m_preserved++; 150 } 151 152 static int cpastats_show(struct seq_file *m, void *p) 153 { 154 seq_printf(m, "1G pages checked: %16lu\n", cpa_1g_checked); 155 seq_printf(m, "1G pages sameprot: %16lu\n", cpa_1g_sameprot); 156 seq_printf(m, "1G pages preserved: %16lu\n", cpa_1g_preserved); 157 seq_printf(m, "2M pages checked: %16lu\n", cpa_2m_checked); 158 seq_printf(m, "2M pages sameprot: %16lu\n", cpa_2m_sameprot); 159 seq_printf(m, "2M pages preserved: %16lu\n", cpa_2m_preserved); 160 seq_printf(m, "4K pages set-checked: %16lu\n", cpa_4k_install); 161 return 0; 162 } 163 164 static int cpastats_open(struct inode *inode, struct file *file) 165 { 166 return single_open(file, cpastats_show, NULL); 167 } 168 169 static const struct file_operations cpastats_fops = { 170 .open = cpastats_open, 171 .read = seq_read, 172 .llseek = seq_lseek, 173 .release = single_release, 174 }; 175 176 static int __init cpa_stats_init(void) 177 { 178 debugfs_create_file("cpa_stats", S_IRUSR, arch_debugfs_dir, NULL, 179 &cpastats_fops); 180 return 0; 181 } 182 late_initcall(cpa_stats_init); 183 #else 184 static inline void cpa_inc_1g_checked(void) { } 185 static inline void cpa_inc_2m_checked(void) { } 186 static inline void cpa_inc_4k_install(void) { } 187 static inline void cpa_inc_lp_sameprot(int level) { } 188 static inline void cpa_inc_lp_preserved(int level) { } 189 #endif 190 191 192 static inline int 193 within(unsigned long addr, unsigned long start, unsigned long end) 194 { 195 return addr >= start && addr < end; 196 } 197 198 static inline int 199 within_inclusive(unsigned long addr, unsigned long start, unsigned long end) 200 { 201 return addr >= start && addr <= end; 202 } 203 204 #ifdef CONFIG_X86_64 205 206 static inline unsigned long highmap_start_pfn(void) 207 { 208 return __pa_symbol(_text) >> PAGE_SHIFT; 209 } 210 211 static inline unsigned long highmap_end_pfn(void) 212 { 213 /* Do not reference physical address outside the kernel. */ 214 return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT; 215 } 216 217 static bool __cpa_pfn_in_highmap(unsigned long pfn) 218 { 219 /* 220 * Kernel text has an alias mapping at a high address, known 221 * here as "highmap". 222 */ 223 return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn()); 224 } 225 226 #else 227 228 static bool __cpa_pfn_in_highmap(unsigned long pfn) 229 { 230 /* There is no highmap on 32-bit */ 231 return false; 232 } 233 234 #endif 235 236 /* 237 * See set_mce_nospec(). 238 * 239 * Machine check recovery code needs to change cache mode of poisoned pages to 240 * UC to avoid speculative access logging another error. But passing the 241 * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a 242 * speculative access. So we cheat and flip the top bit of the address. This 243 * works fine for the code that updates the page tables. But at the end of the 244 * process we need to flush the TLB and cache and the non-canonical address 245 * causes a #GP fault when used by the INVLPG and CLFLUSH instructions. 246 * 247 * But in the common case we already have a canonical address. This code 248 * will fix the top bit if needed and is a no-op otherwise. 249 */ 250 static inline unsigned long fix_addr(unsigned long addr) 251 { 252 #ifdef CONFIG_X86_64 253 return (long)(addr << 1) >> 1; 254 #else 255 return addr; 256 #endif 257 } 258 259 static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx) 260 { 261 if (cpa->flags & CPA_PAGES_ARRAY) { 262 struct page *page = cpa->pages[idx]; 263 264 if (unlikely(PageHighMem(page))) 265 return 0; 266 267 return (unsigned long)page_address(page); 268 } 269 270 if (cpa->flags & CPA_ARRAY) 271 return cpa->vaddr[idx]; 272 273 return *cpa->vaddr + idx * PAGE_SIZE; 274 } 275 276 /* 277 * Flushing functions 278 */ 279 280 static void clflush_cache_range_opt(void *vaddr, unsigned int size) 281 { 282 const unsigned long clflush_size = boot_cpu_data.x86_clflush_size; 283 void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1)); 284 void *vend = vaddr + size; 285 286 if (p >= vend) 287 return; 288 289 for (; p < vend; p += clflush_size) 290 clflushopt(p); 291 } 292 293 /** 294 * clflush_cache_range - flush a cache range with clflush 295 * @vaddr: virtual start address 296 * @size: number of bytes to flush 297 * 298 * CLFLUSHOPT is an unordered instruction which needs fencing with MFENCE or 299 * SFENCE to avoid ordering issues. 300 */ 301 void clflush_cache_range(void *vaddr, unsigned int size) 302 { 303 mb(); 304 clflush_cache_range_opt(vaddr, size); 305 mb(); 306 } 307 EXPORT_SYMBOL_GPL(clflush_cache_range); 308 309 #ifdef CONFIG_ARCH_HAS_PMEM_API 310 void arch_invalidate_pmem(void *addr, size_t size) 311 { 312 clflush_cache_range(addr, size); 313 } 314 EXPORT_SYMBOL_GPL(arch_invalidate_pmem); 315 #endif 316 317 static void __cpa_flush_all(void *arg) 318 { 319 unsigned long cache = (unsigned long)arg; 320 321 /* 322 * Flush all to work around Errata in early athlons regarding 323 * large page flushing. 324 */ 325 __flush_tlb_all(); 326 327 if (cache && boot_cpu_data.x86 >= 4) 328 wbinvd(); 329 } 330 331 static void cpa_flush_all(unsigned long cache) 332 { 333 BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); 334 335 on_each_cpu(__cpa_flush_all, (void *) cache, 1); 336 } 337 338 static void __cpa_flush_tlb(void *data) 339 { 340 struct cpa_data *cpa = data; 341 unsigned int i; 342 343 for (i = 0; i < cpa->numpages; i++) 344 __flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); 345 } 346 347 static void cpa_flush(struct cpa_data *data, int cache) 348 { 349 struct cpa_data *cpa = data; 350 unsigned int i; 351 352 BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); 353 354 if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) { 355 cpa_flush_all(cache); 356 return; 357 } 358 359 if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling) 360 flush_tlb_all(); 361 else 362 on_each_cpu(__cpa_flush_tlb, cpa, 1); 363 364 if (!cache) 365 return; 366 367 mb(); 368 for (i = 0; i < cpa->numpages; i++) { 369 unsigned long addr = __cpa_addr(cpa, i); 370 unsigned int level; 371 372 pte_t *pte = lookup_address(addr, &level); 373 374 /* 375 * Only flush present addresses: 376 */ 377 if (pte && (pte_val(*pte) & _PAGE_PRESENT)) 378 clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE); 379 } 380 mb(); 381 } 382 383 static bool overlaps(unsigned long r1_start, unsigned long r1_end, 384 unsigned long r2_start, unsigned long r2_end) 385 { 386 return (r1_start <= r2_end && r1_end >= r2_start) || 387 (r2_start <= r1_end && r2_end >= r1_start); 388 } 389 390 #ifdef CONFIG_PCI_BIOS 391 /* 392 * The BIOS area between 640k and 1Mb needs to be executable for PCI BIOS 393 * based config access (CONFIG_PCI_GOBIOS) support. 394 */ 395 #define BIOS_PFN PFN_DOWN(BIOS_BEGIN) 396 #define BIOS_PFN_END PFN_DOWN(BIOS_END - 1) 397 398 static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn) 399 { 400 if (pcibios_enabled && overlaps(spfn, epfn, BIOS_PFN, BIOS_PFN_END)) 401 return _PAGE_NX; 402 return 0; 403 } 404 #else 405 static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn) 406 { 407 return 0; 408 } 409 #endif 410 411 /* 412 * The .rodata section needs to be read-only. Using the pfn catches all 413 * aliases. This also includes __ro_after_init, so do not enforce until 414 * kernel_set_to_readonly is true. 415 */ 416 static pgprotval_t protect_rodata(unsigned long spfn, unsigned long epfn) 417 { 418 unsigned long epfn_ro, spfn_ro = PFN_DOWN(__pa_symbol(__start_rodata)); 419 420 /* 421 * Note: __end_rodata is at page aligned and not inclusive, so 422 * subtract 1 to get the last enforced PFN in the rodata area. 423 */ 424 epfn_ro = PFN_DOWN(__pa_symbol(__end_rodata)) - 1; 425 426 if (kernel_set_to_readonly && overlaps(spfn, epfn, spfn_ro, epfn_ro)) 427 return _PAGE_RW; 428 return 0; 429 } 430 431 /* 432 * Protect kernel text against becoming non executable by forbidding 433 * _PAGE_NX. This protects only the high kernel mapping (_text -> _etext) 434 * out of which the kernel actually executes. Do not protect the low 435 * mapping. 436 * 437 * This does not cover __inittext since that is gone after boot. 438 */ 439 static pgprotval_t protect_kernel_text(unsigned long start, unsigned long end) 440 { 441 unsigned long t_end = (unsigned long)_etext - 1; 442 unsigned long t_start = (unsigned long)_text; 443 444 if (overlaps(start, end, t_start, t_end)) 445 return _PAGE_NX; 446 return 0; 447 } 448 449 #if defined(CONFIG_X86_64) 450 /* 451 * Once the kernel maps the text as RO (kernel_set_to_readonly is set), 452 * kernel text mappings for the large page aligned text, rodata sections 453 * will be always read-only. For the kernel identity mappings covering the 454 * holes caused by this alignment can be anything that user asks. 455 * 456 * This will preserve the large page mappings for kernel text/data at no 457 * extra cost. 458 */ 459 static pgprotval_t protect_kernel_text_ro(unsigned long start, 460 unsigned long end) 461 { 462 unsigned long t_end = (unsigned long)__end_rodata_hpage_align - 1; 463 unsigned long t_start = (unsigned long)_text; 464 unsigned int level; 465 466 if (!kernel_set_to_readonly || !overlaps(start, end, t_start, t_end)) 467 return 0; 468 /* 469 * Don't enforce the !RW mapping for the kernel text mapping, if 470 * the current mapping is already using small page mapping. No 471 * need to work hard to preserve large page mappings in this case. 472 * 473 * This also fixes the Linux Xen paravirt guest boot failure caused 474 * by unexpected read-only mappings for kernel identity 475 * mappings. In this paravirt guest case, the kernel text mapping 476 * and the kernel identity mapping share the same page-table pages, 477 * so the protections for kernel text and identity mappings have to 478 * be the same. 479 */ 480 if (lookup_address(start, &level) && (level != PG_LEVEL_4K)) 481 return _PAGE_RW; 482 return 0; 483 } 484 #else 485 static pgprotval_t protect_kernel_text_ro(unsigned long start, 486 unsigned long end) 487 { 488 return 0; 489 } 490 #endif 491 492 static inline bool conflicts(pgprot_t prot, pgprotval_t val) 493 { 494 return (pgprot_val(prot) & ~val) != pgprot_val(prot); 495 } 496 497 static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val, 498 unsigned long start, unsigned long end, 499 unsigned long pfn, const char *txt) 500 { 501 static const char *lvltxt[] = { 502 [CPA_CONFLICT] = "conflict", 503 [CPA_PROTECT] = "protect", 504 [CPA_DETECT] = "detect", 505 }; 506 507 if (warnlvl > cpa_warn_level || !conflicts(prot, val)) 508 return; 509 510 pr_warn("CPA %8s %10s: 0x%016lx - 0x%016lx PFN %lx req %016llx prevent %016llx\n", 511 lvltxt[warnlvl], txt, start, end, pfn, (unsigned long long)pgprot_val(prot), 512 (unsigned long long)val); 513 } 514 515 /* 516 * Certain areas of memory on x86 require very specific protection flags, 517 * for example the BIOS area or kernel text. Callers don't always get this 518 * right (again, ioremap() on BIOS memory is not uncommon) so this function 519 * checks and fixes these known static required protection bits. 520 */ 521 static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, 522 unsigned long pfn, unsigned long npg, 523 unsigned long lpsize, int warnlvl) 524 { 525 pgprotval_t forbidden, res; 526 unsigned long end; 527 528 /* 529 * There is no point in checking RW/NX conflicts when the requested 530 * mapping is setting the page !PRESENT. 531 */ 532 if (!(pgprot_val(prot) & _PAGE_PRESENT)) 533 return prot; 534 535 /* Operate on the virtual address */ 536 end = start + npg * PAGE_SIZE - 1; 537 538 res = protect_kernel_text(start, end); 539 check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX"); 540 forbidden = res; 541 542 /* 543 * Special case to preserve a large page. If the change spawns the 544 * full large page mapping then there is no point to split it 545 * up. Happens with ftrace and is going to be removed once ftrace 546 * switched to text_poke(). 547 */ 548 if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) { 549 res = protect_kernel_text_ro(start, end); 550 check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO"); 551 forbidden |= res; 552 } 553 554 /* Check the PFN directly */ 555 res = protect_pci_bios(pfn, pfn + npg - 1); 556 check_conflict(warnlvl, prot, res, start, end, pfn, "PCIBIOS NX"); 557 forbidden |= res; 558 559 res = protect_rodata(pfn, pfn + npg - 1); 560 check_conflict(warnlvl, prot, res, start, end, pfn, "Rodata RO"); 561 forbidden |= res; 562 563 return __pgprot(pgprot_val(prot) & ~forbidden); 564 } 565 566 /* 567 * Lookup the page table entry for a virtual address in a specific pgd. 568 * Return a pointer to the entry and the level of the mapping. 569 */ 570 pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, 571 unsigned int *level) 572 { 573 p4d_t *p4d; 574 pud_t *pud; 575 pmd_t *pmd; 576 577 *level = PG_LEVEL_NONE; 578 579 if (pgd_none(*pgd)) 580 return NULL; 581 582 p4d = p4d_offset(pgd, address); 583 if (p4d_none(*p4d)) 584 return NULL; 585 586 *level = PG_LEVEL_512G; 587 if (p4d_large(*p4d) || !p4d_present(*p4d)) 588 return (pte_t *)p4d; 589 590 pud = pud_offset(p4d, address); 591 if (pud_none(*pud)) 592 return NULL; 593 594 *level = PG_LEVEL_1G; 595 if (pud_large(*pud) || !pud_present(*pud)) 596 return (pte_t *)pud; 597 598 pmd = pmd_offset(pud, address); 599 if (pmd_none(*pmd)) 600 return NULL; 601 602 *level = PG_LEVEL_2M; 603 if (pmd_large(*pmd) || !pmd_present(*pmd)) 604 return (pte_t *)pmd; 605 606 *level = PG_LEVEL_4K; 607 608 return pte_offset_kernel(pmd, address); 609 } 610 611 /* 612 * Lookup the page table entry for a virtual address. Return a pointer 613 * to the entry and the level of the mapping. 614 * 615 * Note: We return pud and pmd either when the entry is marked large 616 * or when the present bit is not set. Otherwise we would return a 617 * pointer to a nonexisting mapping. 618 */ 619 pte_t *lookup_address(unsigned long address, unsigned int *level) 620 { 621 return lookup_address_in_pgd(pgd_offset_k(address), address, level); 622 } 623 EXPORT_SYMBOL_GPL(lookup_address); 624 625 /* 626 * Lookup the page table entry for a virtual address in a given mm. Return a 627 * pointer to the entry and the level of the mapping. 628 */ 629 pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address, 630 unsigned int *level) 631 { 632 return lookup_address_in_pgd(pgd_offset(mm, address), address, level); 633 } 634 EXPORT_SYMBOL_GPL(lookup_address_in_mm); 635 636 static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, 637 unsigned int *level) 638 { 639 if (cpa->pgd) 640 return lookup_address_in_pgd(cpa->pgd + pgd_index(address), 641 address, level); 642 643 return lookup_address(address, level); 644 } 645 646 /* 647 * Lookup the PMD entry for a virtual address. Return a pointer to the entry 648 * or NULL if not present. 649 */ 650 pmd_t *lookup_pmd_address(unsigned long address) 651 { 652 pgd_t *pgd; 653 p4d_t *p4d; 654 pud_t *pud; 655 656 pgd = pgd_offset_k(address); 657 if (pgd_none(*pgd)) 658 return NULL; 659 660 p4d = p4d_offset(pgd, address); 661 if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d)) 662 return NULL; 663 664 pud = pud_offset(p4d, address); 665 if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud)) 666 return NULL; 667 668 return pmd_offset(pud, address); 669 } 670 671 /* 672 * This is necessary because __pa() does not work on some 673 * kinds of memory, like vmalloc() or the alloc_remap() 674 * areas on 32-bit NUMA systems. The percpu areas can 675 * end up in this kind of memory, for instance. 676 * 677 * This could be optimized, but it is only intended to be 678 * used at inititalization time, and keeping it 679 * unoptimized should increase the testing coverage for 680 * the more obscure platforms. 681 */ 682 phys_addr_t slow_virt_to_phys(void *__virt_addr) 683 { 684 unsigned long virt_addr = (unsigned long)__virt_addr; 685 phys_addr_t phys_addr; 686 unsigned long offset; 687 enum pg_level level; 688 pte_t *pte; 689 690 pte = lookup_address(virt_addr, &level); 691 BUG_ON(!pte); 692 693 /* 694 * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t 695 * before being left-shifted PAGE_SHIFT bits -- this trick is to 696 * make 32-PAE kernel work correctly. 697 */ 698 switch (level) { 699 case PG_LEVEL_1G: 700 phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; 701 offset = virt_addr & ~PUD_PAGE_MASK; 702 break; 703 case PG_LEVEL_2M: 704 phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; 705 offset = virt_addr & ~PMD_PAGE_MASK; 706 break; 707 default: 708 phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; 709 offset = virt_addr & ~PAGE_MASK; 710 } 711 712 return (phys_addr_t)(phys_addr | offset); 713 } 714 EXPORT_SYMBOL_GPL(slow_virt_to_phys); 715 716 /* 717 * Set the new pmd in all the pgds we know about: 718 */ 719 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 720 { 721 /* change init_mm */ 722 set_pte_atomic(kpte, pte); 723 #ifdef CONFIG_X86_32 724 if (!SHARED_KERNEL_PMD) { 725 struct page *page; 726 727 list_for_each_entry(page, &pgd_list, lru) { 728 pgd_t *pgd; 729 p4d_t *p4d; 730 pud_t *pud; 731 pmd_t *pmd; 732 733 pgd = (pgd_t *)page_address(page) + pgd_index(address); 734 p4d = p4d_offset(pgd, address); 735 pud = pud_offset(p4d, address); 736 pmd = pmd_offset(pud, address); 737 set_pte_atomic((pte_t *)pmd, pte); 738 } 739 } 740 #endif 741 } 742 743 static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot) 744 { 745 /* 746 * _PAGE_GLOBAL means "global page" for present PTEs. 747 * But, it is also used to indicate _PAGE_PROTNONE 748 * for non-present PTEs. 749 * 750 * This ensures that a _PAGE_GLOBAL PTE going from 751 * present to non-present is not confused as 752 * _PAGE_PROTNONE. 753 */ 754 if (!(pgprot_val(prot) & _PAGE_PRESENT)) 755 pgprot_val(prot) &= ~_PAGE_GLOBAL; 756 757 return prot; 758 } 759 760 static int __should_split_large_page(pte_t *kpte, unsigned long address, 761 struct cpa_data *cpa) 762 { 763 unsigned long numpages, pmask, psize, lpaddr, pfn, old_pfn; 764 pgprot_t old_prot, new_prot, req_prot, chk_prot; 765 pte_t new_pte, *tmp; 766 enum pg_level level; 767 768 /* 769 * Check for races, another CPU might have split this page 770 * up already: 771 */ 772 tmp = _lookup_address_cpa(cpa, address, &level); 773 if (tmp != kpte) 774 return 1; 775 776 switch (level) { 777 case PG_LEVEL_2M: 778 old_prot = pmd_pgprot(*(pmd_t *)kpte); 779 old_pfn = pmd_pfn(*(pmd_t *)kpte); 780 cpa_inc_2m_checked(); 781 break; 782 case PG_LEVEL_1G: 783 old_prot = pud_pgprot(*(pud_t *)kpte); 784 old_pfn = pud_pfn(*(pud_t *)kpte); 785 cpa_inc_1g_checked(); 786 break; 787 default: 788 return -EINVAL; 789 } 790 791 psize = page_level_size(level); 792 pmask = page_level_mask(level); 793 794 /* 795 * Calculate the number of pages, which fit into this large 796 * page starting at address: 797 */ 798 lpaddr = (address + psize) & pmask; 799 numpages = (lpaddr - address) >> PAGE_SHIFT; 800 if (numpages < cpa->numpages) 801 cpa->numpages = numpages; 802 803 /* 804 * We are safe now. Check whether the new pgprot is the same: 805 * Convert protection attributes to 4k-format, as cpa->mask* are set 806 * up accordingly. 807 */ 808 809 /* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */ 810 req_prot = pgprot_large_2_4k(old_prot); 811 812 pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); 813 pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); 814 815 /* 816 * req_prot is in format of 4k pages. It must be converted to large 817 * page format: the caching mode includes the PAT bit located at 818 * different bit positions in the two formats. 819 */ 820 req_prot = pgprot_4k_2_large(req_prot); 821 req_prot = pgprot_clear_protnone_bits(req_prot); 822 if (pgprot_val(req_prot) & _PAGE_PRESENT) 823 pgprot_val(req_prot) |= _PAGE_PSE; 824 825 /* 826 * old_pfn points to the large page base pfn. So we need to add the 827 * offset of the virtual address: 828 */ 829 pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT); 830 cpa->pfn = pfn; 831 832 /* 833 * Calculate the large page base address and the number of 4K pages 834 * in the large page 835 */ 836 lpaddr = address & pmask; 837 numpages = psize >> PAGE_SHIFT; 838 839 /* 840 * Sanity check that the existing mapping is correct versus the static 841 * protections. static_protections() guards against !PRESENT, so no 842 * extra conditional required here. 843 */ 844 chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages, 845 psize, CPA_CONFLICT); 846 847 if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) { 848 /* 849 * Split the large page and tell the split code to 850 * enforce static protections. 851 */ 852 cpa->force_static_prot = 1; 853 return 1; 854 } 855 856 /* 857 * Optimization: If the requested pgprot is the same as the current 858 * pgprot, then the large page can be preserved and no updates are 859 * required independent of alignment and length of the requested 860 * range. The above already established that the current pgprot is 861 * correct, which in consequence makes the requested pgprot correct 862 * as well if it is the same. The static protection scan below will 863 * not come to a different conclusion. 864 */ 865 if (pgprot_val(req_prot) == pgprot_val(old_prot)) { 866 cpa_inc_lp_sameprot(level); 867 return 0; 868 } 869 870 /* 871 * If the requested range does not cover the full page, split it up 872 */ 873 if (address != lpaddr || cpa->numpages != numpages) 874 return 1; 875 876 /* 877 * Check whether the requested pgprot is conflicting with a static 878 * protection requirement in the large page. 879 */ 880 new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages, 881 psize, CPA_DETECT); 882 883 /* 884 * If there is a conflict, split the large page. 885 * 886 * There used to be a 4k wise evaluation trying really hard to 887 * preserve the large pages, but experimentation has shown, that this 888 * does not help at all. There might be corner cases which would 889 * preserve one large page occasionally, but it's really not worth the 890 * extra code and cycles for the common case. 891 */ 892 if (pgprot_val(req_prot) != pgprot_val(new_prot)) 893 return 1; 894 895 /* All checks passed. Update the large page mapping. */ 896 new_pte = pfn_pte(old_pfn, new_prot); 897 __set_pmd_pte(kpte, address, new_pte); 898 cpa->flags |= CPA_FLUSHTLB; 899 cpa_inc_lp_preserved(level); 900 return 0; 901 } 902 903 static int should_split_large_page(pte_t *kpte, unsigned long address, 904 struct cpa_data *cpa) 905 { 906 int do_split; 907 908 if (cpa->force_split) 909 return 1; 910 911 spin_lock(&pgd_lock); 912 do_split = __should_split_large_page(kpte, address, cpa); 913 spin_unlock(&pgd_lock); 914 915 return do_split; 916 } 917 918 static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn, 919 pgprot_t ref_prot, unsigned long address, 920 unsigned long size) 921 { 922 unsigned int npg = PFN_DOWN(size); 923 pgprot_t prot; 924 925 /* 926 * If should_split_large_page() discovered an inconsistent mapping, 927 * remove the invalid protection in the split mapping. 928 */ 929 if (!cpa->force_static_prot) 930 goto set; 931 932 /* Hand in lpsize = 0 to enforce the protection mechanism */ 933 prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT); 934 935 if (pgprot_val(prot) == pgprot_val(ref_prot)) 936 goto set; 937 938 /* 939 * If this is splitting a PMD, fix it up. PUD splits cannot be 940 * fixed trivially as that would require to rescan the newly 941 * installed PMD mappings after returning from split_large_page() 942 * so an eventual further split can allocate the necessary PTE 943 * pages. Warn for now and revisit it in case this actually 944 * happens. 945 */ 946 if (size == PAGE_SIZE) 947 ref_prot = prot; 948 else 949 pr_warn_once("CPA: Cannot fixup static protections for PUD split\n"); 950 set: 951 set_pte(pte, pfn_pte(pfn, ref_prot)); 952 } 953 954 static int 955 __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, 956 struct page *base) 957 { 958 unsigned long lpaddr, lpinc, ref_pfn, pfn, pfninc = 1; 959 pte_t *pbase = (pte_t *)page_address(base); 960 unsigned int i, level; 961 pgprot_t ref_prot; 962 pte_t *tmp; 963 964 spin_lock(&pgd_lock); 965 /* 966 * Check for races, another CPU might have split this page 967 * up for us already: 968 */ 969 tmp = _lookup_address_cpa(cpa, address, &level); 970 if (tmp != kpte) { 971 spin_unlock(&pgd_lock); 972 return 1; 973 } 974 975 paravirt_alloc_pte(&init_mm, page_to_pfn(base)); 976 977 switch (level) { 978 case PG_LEVEL_2M: 979 ref_prot = pmd_pgprot(*(pmd_t *)kpte); 980 /* 981 * Clear PSE (aka _PAGE_PAT) and move 982 * PAT bit to correct position. 983 */ 984 ref_prot = pgprot_large_2_4k(ref_prot); 985 ref_pfn = pmd_pfn(*(pmd_t *)kpte); 986 lpaddr = address & PMD_MASK; 987 lpinc = PAGE_SIZE; 988 break; 989 990 case PG_LEVEL_1G: 991 ref_prot = pud_pgprot(*(pud_t *)kpte); 992 ref_pfn = pud_pfn(*(pud_t *)kpte); 993 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; 994 lpaddr = address & PUD_MASK; 995 lpinc = PMD_SIZE; 996 /* 997 * Clear the PSE flags if the PRESENT flag is not set 998 * otherwise pmd_present/pmd_huge will return true 999 * even on a non present pmd. 1000 */ 1001 if (!(pgprot_val(ref_prot) & _PAGE_PRESENT)) 1002 pgprot_val(ref_prot) &= ~_PAGE_PSE; 1003 break; 1004 1005 default: 1006 spin_unlock(&pgd_lock); 1007 return 1; 1008 } 1009 1010 ref_prot = pgprot_clear_protnone_bits(ref_prot); 1011 1012 /* 1013 * Get the target pfn from the original entry: 1014 */ 1015 pfn = ref_pfn; 1016 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc, lpaddr += lpinc) 1017 split_set_pte(cpa, pbase + i, pfn, ref_prot, lpaddr, lpinc); 1018 1019 if (virt_addr_valid(address)) { 1020 unsigned long pfn = PFN_DOWN(__pa(address)); 1021 1022 if (pfn_range_is_mapped(pfn, pfn + 1)) 1023 split_page_count(level); 1024 } 1025 1026 /* 1027 * Install the new, split up pagetable. 1028 * 1029 * We use the standard kernel pagetable protections for the new 1030 * pagetable protections, the actual ptes set above control the 1031 * primary protection behavior: 1032 */ 1033 __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); 1034 1035 /* 1036 * Do a global flush tlb after splitting the large page 1037 * and before we do the actual change page attribute in the PTE. 1038 * 1039 * Without this, we violate the TLB application note, that says: 1040 * "The TLBs may contain both ordinary and large-page 1041 * translations for a 4-KByte range of linear addresses. This 1042 * may occur if software modifies the paging structures so that 1043 * the page size used for the address range changes. If the two 1044 * translations differ with respect to page frame or attributes 1045 * (e.g., permissions), processor behavior is undefined and may 1046 * be implementation-specific." 1047 * 1048 * We do this global tlb flush inside the cpa_lock, so that we 1049 * don't allow any other cpu, with stale tlb entries change the 1050 * page attribute in parallel, that also falls into the 1051 * just split large page entry. 1052 */ 1053 flush_tlb_all(); 1054 spin_unlock(&pgd_lock); 1055 1056 return 0; 1057 } 1058 1059 static int split_large_page(struct cpa_data *cpa, pte_t *kpte, 1060 unsigned long address) 1061 { 1062 struct page *base; 1063 1064 if (!debug_pagealloc_enabled()) 1065 spin_unlock(&cpa_lock); 1066 base = alloc_pages(GFP_KERNEL, 0); 1067 if (!debug_pagealloc_enabled()) 1068 spin_lock(&cpa_lock); 1069 if (!base) 1070 return -ENOMEM; 1071 1072 if (__split_large_page(cpa, kpte, address, base)) 1073 __free_page(base); 1074 1075 return 0; 1076 } 1077 1078 static bool try_to_free_pte_page(pte_t *pte) 1079 { 1080 int i; 1081 1082 for (i = 0; i < PTRS_PER_PTE; i++) 1083 if (!pte_none(pte[i])) 1084 return false; 1085 1086 free_page((unsigned long)pte); 1087 return true; 1088 } 1089 1090 static bool try_to_free_pmd_page(pmd_t *pmd) 1091 { 1092 int i; 1093 1094 for (i = 0; i < PTRS_PER_PMD; i++) 1095 if (!pmd_none(pmd[i])) 1096 return false; 1097 1098 free_page((unsigned long)pmd); 1099 return true; 1100 } 1101 1102 static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) 1103 { 1104 pte_t *pte = pte_offset_kernel(pmd, start); 1105 1106 while (start < end) { 1107 set_pte(pte, __pte(0)); 1108 1109 start += PAGE_SIZE; 1110 pte++; 1111 } 1112 1113 if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { 1114 pmd_clear(pmd); 1115 return true; 1116 } 1117 return false; 1118 } 1119 1120 static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, 1121 unsigned long start, unsigned long end) 1122 { 1123 if (unmap_pte_range(pmd, start, end)) 1124 if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) 1125 pud_clear(pud); 1126 } 1127 1128 static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) 1129 { 1130 pmd_t *pmd = pmd_offset(pud, start); 1131 1132 /* 1133 * Not on a 2MB page boundary? 1134 */ 1135 if (start & (PMD_SIZE - 1)) { 1136 unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; 1137 unsigned long pre_end = min_t(unsigned long, end, next_page); 1138 1139 __unmap_pmd_range(pud, pmd, start, pre_end); 1140 1141 start = pre_end; 1142 pmd++; 1143 } 1144 1145 /* 1146 * Try to unmap in 2M chunks. 1147 */ 1148 while (end - start >= PMD_SIZE) { 1149 if (pmd_large(*pmd)) 1150 pmd_clear(pmd); 1151 else 1152 __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); 1153 1154 start += PMD_SIZE; 1155 pmd++; 1156 } 1157 1158 /* 1159 * 4K leftovers? 1160 */ 1161 if (start < end) 1162 return __unmap_pmd_range(pud, pmd, start, end); 1163 1164 /* 1165 * Try again to free the PMD page if haven't succeeded above. 1166 */ 1167 if (!pud_none(*pud)) 1168 if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) 1169 pud_clear(pud); 1170 } 1171 1172 static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end) 1173 { 1174 pud_t *pud = pud_offset(p4d, start); 1175 1176 /* 1177 * Not on a GB page boundary? 1178 */ 1179 if (start & (PUD_SIZE - 1)) { 1180 unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; 1181 unsigned long pre_end = min_t(unsigned long, end, next_page); 1182 1183 unmap_pmd_range(pud, start, pre_end); 1184 1185 start = pre_end; 1186 pud++; 1187 } 1188 1189 /* 1190 * Try to unmap in 1G chunks? 1191 */ 1192 while (end - start >= PUD_SIZE) { 1193 1194 if (pud_large(*pud)) 1195 pud_clear(pud); 1196 else 1197 unmap_pmd_range(pud, start, start + PUD_SIZE); 1198 1199 start += PUD_SIZE; 1200 pud++; 1201 } 1202 1203 /* 1204 * 2M leftovers? 1205 */ 1206 if (start < end) 1207 unmap_pmd_range(pud, start, end); 1208 1209 /* 1210 * No need to try to free the PUD page because we'll free it in 1211 * populate_pgd's error path 1212 */ 1213 } 1214 1215 static int alloc_pte_page(pmd_t *pmd) 1216 { 1217 pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL); 1218 if (!pte) 1219 return -1; 1220 1221 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); 1222 return 0; 1223 } 1224 1225 static int alloc_pmd_page(pud_t *pud) 1226 { 1227 pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); 1228 if (!pmd) 1229 return -1; 1230 1231 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); 1232 return 0; 1233 } 1234 1235 static void populate_pte(struct cpa_data *cpa, 1236 unsigned long start, unsigned long end, 1237 unsigned num_pages, pmd_t *pmd, pgprot_t pgprot) 1238 { 1239 pte_t *pte; 1240 1241 pte = pte_offset_kernel(pmd, start); 1242 1243 pgprot = pgprot_clear_protnone_bits(pgprot); 1244 1245 while (num_pages-- && start < end) { 1246 set_pte(pte, pfn_pte(cpa->pfn, pgprot)); 1247 1248 start += PAGE_SIZE; 1249 cpa->pfn++; 1250 pte++; 1251 } 1252 } 1253 1254 static long populate_pmd(struct cpa_data *cpa, 1255 unsigned long start, unsigned long end, 1256 unsigned num_pages, pud_t *pud, pgprot_t pgprot) 1257 { 1258 long cur_pages = 0; 1259 pmd_t *pmd; 1260 pgprot_t pmd_pgprot; 1261 1262 /* 1263 * Not on a 2M boundary? 1264 */ 1265 if (start & (PMD_SIZE - 1)) { 1266 unsigned long pre_end = start + (num_pages << PAGE_SHIFT); 1267 unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; 1268 1269 pre_end = min_t(unsigned long, pre_end, next_page); 1270 cur_pages = (pre_end - start) >> PAGE_SHIFT; 1271 cur_pages = min_t(unsigned int, num_pages, cur_pages); 1272 1273 /* 1274 * Need a PTE page? 1275 */ 1276 pmd = pmd_offset(pud, start); 1277 if (pmd_none(*pmd)) 1278 if (alloc_pte_page(pmd)) 1279 return -1; 1280 1281 populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot); 1282 1283 start = pre_end; 1284 } 1285 1286 /* 1287 * We mapped them all? 1288 */ 1289 if (num_pages == cur_pages) 1290 return cur_pages; 1291 1292 pmd_pgprot = pgprot_4k_2_large(pgprot); 1293 1294 while (end - start >= PMD_SIZE) { 1295 1296 /* 1297 * We cannot use a 1G page so allocate a PMD page if needed. 1298 */ 1299 if (pud_none(*pud)) 1300 if (alloc_pmd_page(pud)) 1301 return -1; 1302 1303 pmd = pmd_offset(pud, start); 1304 1305 set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn, 1306 canon_pgprot(pmd_pgprot)))); 1307 1308 start += PMD_SIZE; 1309 cpa->pfn += PMD_SIZE >> PAGE_SHIFT; 1310 cur_pages += PMD_SIZE >> PAGE_SHIFT; 1311 } 1312 1313 /* 1314 * Map trailing 4K pages. 1315 */ 1316 if (start < end) { 1317 pmd = pmd_offset(pud, start); 1318 if (pmd_none(*pmd)) 1319 if (alloc_pte_page(pmd)) 1320 return -1; 1321 1322 populate_pte(cpa, start, end, num_pages - cur_pages, 1323 pmd, pgprot); 1324 } 1325 return num_pages; 1326 } 1327 1328 static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d, 1329 pgprot_t pgprot) 1330 { 1331 pud_t *pud; 1332 unsigned long end; 1333 long cur_pages = 0; 1334 pgprot_t pud_pgprot; 1335 1336 end = start + (cpa->numpages << PAGE_SHIFT); 1337 1338 /* 1339 * Not on a Gb page boundary? => map everything up to it with 1340 * smaller pages. 1341 */ 1342 if (start & (PUD_SIZE - 1)) { 1343 unsigned long pre_end; 1344 unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; 1345 1346 pre_end = min_t(unsigned long, end, next_page); 1347 cur_pages = (pre_end - start) >> PAGE_SHIFT; 1348 cur_pages = min_t(int, (int)cpa->numpages, cur_pages); 1349 1350 pud = pud_offset(p4d, start); 1351 1352 /* 1353 * Need a PMD page? 1354 */ 1355 if (pud_none(*pud)) 1356 if (alloc_pmd_page(pud)) 1357 return -1; 1358 1359 cur_pages = populate_pmd(cpa, start, pre_end, cur_pages, 1360 pud, pgprot); 1361 if (cur_pages < 0) 1362 return cur_pages; 1363 1364 start = pre_end; 1365 } 1366 1367 /* We mapped them all? */ 1368 if (cpa->numpages == cur_pages) 1369 return cur_pages; 1370 1371 pud = pud_offset(p4d, start); 1372 pud_pgprot = pgprot_4k_2_large(pgprot); 1373 1374 /* 1375 * Map everything starting from the Gb boundary, possibly with 1G pages 1376 */ 1377 while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) { 1378 set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn, 1379 canon_pgprot(pud_pgprot)))); 1380 1381 start += PUD_SIZE; 1382 cpa->pfn += PUD_SIZE >> PAGE_SHIFT; 1383 cur_pages += PUD_SIZE >> PAGE_SHIFT; 1384 pud++; 1385 } 1386 1387 /* Map trailing leftover */ 1388 if (start < end) { 1389 long tmp; 1390 1391 pud = pud_offset(p4d, start); 1392 if (pud_none(*pud)) 1393 if (alloc_pmd_page(pud)) 1394 return -1; 1395 1396 tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages, 1397 pud, pgprot); 1398 if (tmp < 0) 1399 return cur_pages; 1400 1401 cur_pages += tmp; 1402 } 1403 return cur_pages; 1404 } 1405 1406 /* 1407 * Restrictions for kernel page table do not necessarily apply when mapping in 1408 * an alternate PGD. 1409 */ 1410 static int populate_pgd(struct cpa_data *cpa, unsigned long addr) 1411 { 1412 pgprot_t pgprot = __pgprot(_KERNPG_TABLE); 1413 pud_t *pud = NULL; /* shut up gcc */ 1414 p4d_t *p4d; 1415 pgd_t *pgd_entry; 1416 long ret; 1417 1418 pgd_entry = cpa->pgd + pgd_index(addr); 1419 1420 if (pgd_none(*pgd_entry)) { 1421 p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL); 1422 if (!p4d) 1423 return -1; 1424 1425 set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE)); 1426 } 1427 1428 /* 1429 * Allocate a PUD page and hand it down for mapping. 1430 */ 1431 p4d = p4d_offset(pgd_entry, addr); 1432 if (p4d_none(*p4d)) { 1433 pud = (pud_t *)get_zeroed_page(GFP_KERNEL); 1434 if (!pud) 1435 return -1; 1436 1437 set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); 1438 } 1439 1440 pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); 1441 pgprot_val(pgprot) |= pgprot_val(cpa->mask_set); 1442 1443 ret = populate_pud(cpa, addr, p4d, pgprot); 1444 if (ret < 0) { 1445 /* 1446 * Leave the PUD page in place in case some other CPU or thread 1447 * already found it, but remove any useless entries we just 1448 * added to it. 1449 */ 1450 unmap_pud_range(p4d, addr, 1451 addr + (cpa->numpages << PAGE_SHIFT)); 1452 return ret; 1453 } 1454 1455 cpa->numpages = ret; 1456 return 0; 1457 } 1458 1459 static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, 1460 int primary) 1461 { 1462 if (cpa->pgd) { 1463 /* 1464 * Right now, we only execute this code path when mapping 1465 * the EFI virtual memory map regions, no other users 1466 * provide a ->pgd value. This may change in the future. 1467 */ 1468 return populate_pgd(cpa, vaddr); 1469 } 1470 1471 /* 1472 * Ignore all non primary paths. 1473 */ 1474 if (!primary) { 1475 cpa->numpages = 1; 1476 return 0; 1477 } 1478 1479 /* 1480 * Ignore the NULL PTE for kernel identity mapping, as it is expected 1481 * to have holes. 1482 * Also set numpages to '1' indicating that we processed cpa req for 1483 * one virtual address page and its pfn. TBD: numpages can be set based 1484 * on the initial value and the level returned by lookup_address(). 1485 */ 1486 if (within(vaddr, PAGE_OFFSET, 1487 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) { 1488 cpa->numpages = 1; 1489 cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; 1490 return 0; 1491 1492 } else if (__cpa_pfn_in_highmap(cpa->pfn)) { 1493 /* Faults in the highmap are OK, so do not warn: */ 1494 return -EFAULT; 1495 } else { 1496 WARN(1, KERN_WARNING "CPA: called for zero pte. " 1497 "vaddr = %lx cpa->vaddr = %lx\n", vaddr, 1498 *cpa->vaddr); 1499 1500 return -EFAULT; 1501 } 1502 } 1503 1504 static int __change_page_attr(struct cpa_data *cpa, int primary) 1505 { 1506 unsigned long address; 1507 int do_split, err; 1508 unsigned int level; 1509 pte_t *kpte, old_pte; 1510 1511 address = __cpa_addr(cpa, cpa->curpage); 1512 repeat: 1513 kpte = _lookup_address_cpa(cpa, address, &level); 1514 if (!kpte) 1515 return __cpa_process_fault(cpa, address, primary); 1516 1517 old_pte = *kpte; 1518 if (pte_none(old_pte)) 1519 return __cpa_process_fault(cpa, address, primary); 1520 1521 if (level == PG_LEVEL_4K) { 1522 pte_t new_pte; 1523 pgprot_t new_prot = pte_pgprot(old_pte); 1524 unsigned long pfn = pte_pfn(old_pte); 1525 1526 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); 1527 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); 1528 1529 cpa_inc_4k_install(); 1530 /* Hand in lpsize = 0 to enforce the protection mechanism */ 1531 new_prot = static_protections(new_prot, address, pfn, 1, 0, 1532 CPA_PROTECT); 1533 1534 new_prot = pgprot_clear_protnone_bits(new_prot); 1535 1536 /* 1537 * We need to keep the pfn from the existing PTE, 1538 * after all we're only going to change it's attributes 1539 * not the memory it points to 1540 */ 1541 new_pte = pfn_pte(pfn, new_prot); 1542 cpa->pfn = pfn; 1543 /* 1544 * Do we really change anything ? 1545 */ 1546 if (pte_val(old_pte) != pte_val(new_pte)) { 1547 set_pte_atomic(kpte, new_pte); 1548 cpa->flags |= CPA_FLUSHTLB; 1549 } 1550 cpa->numpages = 1; 1551 return 0; 1552 } 1553 1554 /* 1555 * Check, whether we can keep the large page intact 1556 * and just change the pte: 1557 */ 1558 do_split = should_split_large_page(kpte, address, cpa); 1559 /* 1560 * When the range fits into the existing large page, 1561 * return. cp->numpages and cpa->tlbflush have been updated in 1562 * try_large_page: 1563 */ 1564 if (do_split <= 0) 1565 return do_split; 1566 1567 /* 1568 * We have to split the large page: 1569 */ 1570 err = split_large_page(cpa, kpte, address); 1571 if (!err) 1572 goto repeat; 1573 1574 return err; 1575 } 1576 1577 static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); 1578 1579 static int cpa_process_alias(struct cpa_data *cpa) 1580 { 1581 struct cpa_data alias_cpa; 1582 unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); 1583 unsigned long vaddr; 1584 int ret; 1585 1586 if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1)) 1587 return 0; 1588 1589 /* 1590 * No need to redo, when the primary call touched the direct 1591 * mapping already: 1592 */ 1593 vaddr = __cpa_addr(cpa, cpa->curpage); 1594 if (!(within(vaddr, PAGE_OFFSET, 1595 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { 1596 1597 alias_cpa = *cpa; 1598 alias_cpa.vaddr = &laddr; 1599 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); 1600 alias_cpa.curpage = 0; 1601 1602 cpa->force_flush_all = 1; 1603 1604 ret = __change_page_attr_set_clr(&alias_cpa, 0); 1605 if (ret) 1606 return ret; 1607 } 1608 1609 #ifdef CONFIG_X86_64 1610 /* 1611 * If the primary call didn't touch the high mapping already 1612 * and the physical address is inside the kernel map, we need 1613 * to touch the high mapped kernel as well: 1614 */ 1615 if (!within(vaddr, (unsigned long)_text, _brk_end) && 1616 __cpa_pfn_in_highmap(cpa->pfn)) { 1617 unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + 1618 __START_KERNEL_map - phys_base; 1619 alias_cpa = *cpa; 1620 alias_cpa.vaddr = &temp_cpa_vaddr; 1621 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); 1622 alias_cpa.curpage = 0; 1623 1624 cpa->force_flush_all = 1; 1625 /* 1626 * The high mapping range is imprecise, so ignore the 1627 * return value. 1628 */ 1629 __change_page_attr_set_clr(&alias_cpa, 0); 1630 } 1631 #endif 1632 1633 return 0; 1634 } 1635 1636 static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) 1637 { 1638 unsigned long numpages = cpa->numpages; 1639 unsigned long rempages = numpages; 1640 int ret = 0; 1641 1642 while (rempages) { 1643 /* 1644 * Store the remaining nr of pages for the large page 1645 * preservation check. 1646 */ 1647 cpa->numpages = rempages; 1648 /* for array changes, we can't use large page */ 1649 if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) 1650 cpa->numpages = 1; 1651 1652 if (!debug_pagealloc_enabled()) 1653 spin_lock(&cpa_lock); 1654 ret = __change_page_attr(cpa, checkalias); 1655 if (!debug_pagealloc_enabled()) 1656 spin_unlock(&cpa_lock); 1657 if (ret) 1658 goto out; 1659 1660 if (checkalias) { 1661 ret = cpa_process_alias(cpa); 1662 if (ret) 1663 goto out; 1664 } 1665 1666 /* 1667 * Adjust the number of pages with the result of the 1668 * CPA operation. Either a large page has been 1669 * preserved or a single page update happened. 1670 */ 1671 BUG_ON(cpa->numpages > rempages || !cpa->numpages); 1672 rempages -= cpa->numpages; 1673 cpa->curpage += cpa->numpages; 1674 } 1675 1676 out: 1677 /* Restore the original numpages */ 1678 cpa->numpages = numpages; 1679 return ret; 1680 } 1681 1682 static int change_page_attr_set_clr(unsigned long *addr, int numpages, 1683 pgprot_t mask_set, pgprot_t mask_clr, 1684 int force_split, int in_flag, 1685 struct page **pages) 1686 { 1687 struct cpa_data cpa; 1688 int ret, cache, checkalias; 1689 1690 memset(&cpa, 0, sizeof(cpa)); 1691 1692 /* 1693 * Check, if we are requested to set a not supported 1694 * feature. Clearing non-supported features is OK. 1695 */ 1696 mask_set = canon_pgprot(mask_set); 1697 1698 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split) 1699 return 0; 1700 1701 /* Ensure we are PAGE_SIZE aligned */ 1702 if (in_flag & CPA_ARRAY) { 1703 int i; 1704 for (i = 0; i < numpages; i++) { 1705 if (addr[i] & ~PAGE_MASK) { 1706 addr[i] &= PAGE_MASK; 1707 WARN_ON_ONCE(1); 1708 } 1709 } 1710 } else if (!(in_flag & CPA_PAGES_ARRAY)) { 1711 /* 1712 * in_flag of CPA_PAGES_ARRAY implies it is aligned. 1713 * No need to check in that case 1714 */ 1715 if (*addr & ~PAGE_MASK) { 1716 *addr &= PAGE_MASK; 1717 /* 1718 * People should not be passing in unaligned addresses: 1719 */ 1720 WARN_ON_ONCE(1); 1721 } 1722 } 1723 1724 /* Must avoid aliasing mappings in the highmem code */ 1725 kmap_flush_unused(); 1726 1727 vm_unmap_aliases(); 1728 1729 cpa.vaddr = addr; 1730 cpa.pages = pages; 1731 cpa.numpages = numpages; 1732 cpa.mask_set = mask_set; 1733 cpa.mask_clr = mask_clr; 1734 cpa.flags = 0; 1735 cpa.curpage = 0; 1736 cpa.force_split = force_split; 1737 1738 if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY)) 1739 cpa.flags |= in_flag; 1740 1741 /* No alias checking for _NX bit modifications */ 1742 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; 1743 /* Has caller explicitly disabled alias checking? */ 1744 if (in_flag & CPA_NO_CHECK_ALIAS) 1745 checkalias = 0; 1746 1747 ret = __change_page_attr_set_clr(&cpa, checkalias); 1748 1749 /* 1750 * Check whether we really changed something: 1751 */ 1752 if (!(cpa.flags & CPA_FLUSHTLB)) 1753 goto out; 1754 1755 /* 1756 * No need to flush, when we did not set any of the caching 1757 * attributes: 1758 */ 1759 cache = !!pgprot2cachemode(mask_set); 1760 1761 /* 1762 * On error; flush everything to be sure. 1763 */ 1764 if (ret) { 1765 cpa_flush_all(cache); 1766 goto out; 1767 } 1768 1769 cpa_flush(&cpa, cache); 1770 out: 1771 return ret; 1772 } 1773 1774 static inline int change_page_attr_set(unsigned long *addr, int numpages, 1775 pgprot_t mask, int array) 1776 { 1777 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, 1778 (array ? CPA_ARRAY : 0), NULL); 1779 } 1780 1781 static inline int change_page_attr_clear(unsigned long *addr, int numpages, 1782 pgprot_t mask, int array) 1783 { 1784 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, 1785 (array ? CPA_ARRAY : 0), NULL); 1786 } 1787 1788 static inline int cpa_set_pages_array(struct page **pages, int numpages, 1789 pgprot_t mask) 1790 { 1791 return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0, 1792 CPA_PAGES_ARRAY, pages); 1793 } 1794 1795 static inline int cpa_clear_pages_array(struct page **pages, int numpages, 1796 pgprot_t mask) 1797 { 1798 return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0, 1799 CPA_PAGES_ARRAY, pages); 1800 } 1801 1802 /* 1803 * _set_memory_prot is an internal helper for callers that have been passed 1804 * a pgprot_t value from upper layers and a reservation has already been taken. 1805 * If you want to set the pgprot to a specific page protocol, use the 1806 * set_memory_xx() functions. 1807 */ 1808 int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot) 1809 { 1810 return change_page_attr_set_clr(&addr, numpages, prot, 1811 __pgprot(~pgprot_val(prot)), 0, 0, 1812 NULL); 1813 } 1814 1815 int _set_memory_uc(unsigned long addr, int numpages) 1816 { 1817 /* 1818 * for now UC MINUS. see comments in ioremap() 1819 * If you really need strong UC use ioremap_uc(), but note 1820 * that you cannot override IO areas with set_memory_*() as 1821 * these helpers cannot work with IO memory. 1822 */ 1823 return change_page_attr_set(&addr, numpages, 1824 cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), 1825 0); 1826 } 1827 1828 int set_memory_uc(unsigned long addr, int numpages) 1829 { 1830 int ret; 1831 1832 /* 1833 * for now UC MINUS. see comments in ioremap() 1834 */ 1835 ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, 1836 _PAGE_CACHE_MODE_UC_MINUS, NULL); 1837 if (ret) 1838 goto out_err; 1839 1840 ret = _set_memory_uc(addr, numpages); 1841 if (ret) 1842 goto out_free; 1843 1844 return 0; 1845 1846 out_free: 1847 memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); 1848 out_err: 1849 return ret; 1850 } 1851 EXPORT_SYMBOL(set_memory_uc); 1852 1853 int _set_memory_wc(unsigned long addr, int numpages) 1854 { 1855 int ret; 1856 1857 ret = change_page_attr_set(&addr, numpages, 1858 cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), 1859 0); 1860 if (!ret) { 1861 ret = change_page_attr_set_clr(&addr, numpages, 1862 cachemode2pgprot(_PAGE_CACHE_MODE_WC), 1863 __pgprot(_PAGE_CACHE_MASK), 1864 0, 0, NULL); 1865 } 1866 return ret; 1867 } 1868 1869 int set_memory_wc(unsigned long addr, int numpages) 1870 { 1871 int ret; 1872 1873 ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, 1874 _PAGE_CACHE_MODE_WC, NULL); 1875 if (ret) 1876 return ret; 1877 1878 ret = _set_memory_wc(addr, numpages); 1879 if (ret) 1880 memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); 1881 1882 return ret; 1883 } 1884 EXPORT_SYMBOL(set_memory_wc); 1885 1886 int _set_memory_wt(unsigned long addr, int numpages) 1887 { 1888 return change_page_attr_set(&addr, numpages, 1889 cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0); 1890 } 1891 1892 int _set_memory_wb(unsigned long addr, int numpages) 1893 { 1894 /* WB cache mode is hard wired to all cache attribute bits being 0 */ 1895 return change_page_attr_clear(&addr, numpages, 1896 __pgprot(_PAGE_CACHE_MASK), 0); 1897 } 1898 1899 int set_memory_wb(unsigned long addr, int numpages) 1900 { 1901 int ret; 1902 1903 ret = _set_memory_wb(addr, numpages); 1904 if (ret) 1905 return ret; 1906 1907 memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); 1908 return 0; 1909 } 1910 EXPORT_SYMBOL(set_memory_wb); 1911 1912 int set_memory_x(unsigned long addr, int numpages) 1913 { 1914 if (!(__supported_pte_mask & _PAGE_NX)) 1915 return 0; 1916 1917 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); 1918 } 1919 1920 int set_memory_nx(unsigned long addr, int numpages) 1921 { 1922 if (!(__supported_pte_mask & _PAGE_NX)) 1923 return 0; 1924 1925 return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); 1926 } 1927 1928 int set_memory_ro(unsigned long addr, int numpages) 1929 { 1930 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); 1931 } 1932 1933 int set_memory_rw(unsigned long addr, int numpages) 1934 { 1935 return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); 1936 } 1937 1938 int set_memory_np(unsigned long addr, int numpages) 1939 { 1940 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); 1941 } 1942 1943 int set_memory_np_noalias(unsigned long addr, int numpages) 1944 { 1945 int cpa_flags = CPA_NO_CHECK_ALIAS; 1946 1947 return change_page_attr_set_clr(&addr, numpages, __pgprot(0), 1948 __pgprot(_PAGE_PRESENT), 0, 1949 cpa_flags, NULL); 1950 } 1951 1952 int set_memory_4k(unsigned long addr, int numpages) 1953 { 1954 return change_page_attr_set_clr(&addr, numpages, __pgprot(0), 1955 __pgprot(0), 1, 0, NULL); 1956 } 1957 1958 int set_memory_nonglobal(unsigned long addr, int numpages) 1959 { 1960 return change_page_attr_clear(&addr, numpages, 1961 __pgprot(_PAGE_GLOBAL), 0); 1962 } 1963 1964 int set_memory_global(unsigned long addr, int numpages) 1965 { 1966 return change_page_attr_set(&addr, numpages, 1967 __pgprot(_PAGE_GLOBAL), 0); 1968 } 1969 1970 static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) 1971 { 1972 struct cpa_data cpa; 1973 int ret; 1974 1975 /* Nothing to do if memory encryption is not active */ 1976 if (!mem_encrypt_active()) 1977 return 0; 1978 1979 /* Should not be working on unaligned addresses */ 1980 if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr)) 1981 addr &= PAGE_MASK; 1982 1983 memset(&cpa, 0, sizeof(cpa)); 1984 cpa.vaddr = &addr; 1985 cpa.numpages = numpages; 1986 cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0); 1987 cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC); 1988 cpa.pgd = init_mm.pgd; 1989 1990 /* Must avoid aliasing mappings in the highmem code */ 1991 kmap_flush_unused(); 1992 vm_unmap_aliases(); 1993 1994 /* 1995 * Before changing the encryption attribute, we need to flush caches. 1996 */ 1997 cpa_flush(&cpa, 1); 1998 1999 ret = __change_page_attr_set_clr(&cpa, 1); 2000 2001 /* 2002 * After changing the encryption attribute, we need to flush TLBs again 2003 * in case any speculative TLB caching occurred (but no need to flush 2004 * caches again). We could just use cpa_flush_all(), but in case TLB 2005 * flushing gets optimized in the cpa_flush() path use the same logic 2006 * as above. 2007 */ 2008 cpa_flush(&cpa, 0); 2009 2010 return ret; 2011 } 2012 2013 int set_memory_encrypted(unsigned long addr, int numpages) 2014 { 2015 return __set_memory_enc_dec(addr, numpages, true); 2016 } 2017 EXPORT_SYMBOL_GPL(set_memory_encrypted); 2018 2019 int set_memory_decrypted(unsigned long addr, int numpages) 2020 { 2021 return __set_memory_enc_dec(addr, numpages, false); 2022 } 2023 EXPORT_SYMBOL_GPL(set_memory_decrypted); 2024 2025 int set_pages_uc(struct page *page, int numpages) 2026 { 2027 unsigned long addr = (unsigned long)page_address(page); 2028 2029 return set_memory_uc(addr, numpages); 2030 } 2031 EXPORT_SYMBOL(set_pages_uc); 2032 2033 static int _set_pages_array(struct page **pages, int numpages, 2034 enum page_cache_mode new_type) 2035 { 2036 unsigned long start; 2037 unsigned long end; 2038 enum page_cache_mode set_type; 2039 int i; 2040 int free_idx; 2041 int ret; 2042 2043 for (i = 0; i < numpages; i++) { 2044 if (PageHighMem(pages[i])) 2045 continue; 2046 start = page_to_pfn(pages[i]) << PAGE_SHIFT; 2047 end = start + PAGE_SIZE; 2048 if (memtype_reserve(start, end, new_type, NULL)) 2049 goto err_out; 2050 } 2051 2052 /* If WC, set to UC- first and then WC */ 2053 set_type = (new_type == _PAGE_CACHE_MODE_WC) ? 2054 _PAGE_CACHE_MODE_UC_MINUS : new_type; 2055 2056 ret = cpa_set_pages_array(pages, numpages, 2057 cachemode2pgprot(set_type)); 2058 if (!ret && new_type == _PAGE_CACHE_MODE_WC) 2059 ret = change_page_attr_set_clr(NULL, numpages, 2060 cachemode2pgprot( 2061 _PAGE_CACHE_MODE_WC), 2062 __pgprot(_PAGE_CACHE_MASK), 2063 0, CPA_PAGES_ARRAY, pages); 2064 if (ret) 2065 goto err_out; 2066 return 0; /* Success */ 2067 err_out: 2068 free_idx = i; 2069 for (i = 0; i < free_idx; i++) { 2070 if (PageHighMem(pages[i])) 2071 continue; 2072 start = page_to_pfn(pages[i]) << PAGE_SHIFT; 2073 end = start + PAGE_SIZE; 2074 memtype_free(start, end); 2075 } 2076 return -EINVAL; 2077 } 2078 2079 int set_pages_array_uc(struct page **pages, int numpages) 2080 { 2081 return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_UC_MINUS); 2082 } 2083 EXPORT_SYMBOL(set_pages_array_uc); 2084 2085 int set_pages_array_wc(struct page **pages, int numpages) 2086 { 2087 return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WC); 2088 } 2089 EXPORT_SYMBOL(set_pages_array_wc); 2090 2091 int set_pages_array_wt(struct page **pages, int numpages) 2092 { 2093 return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT); 2094 } 2095 EXPORT_SYMBOL_GPL(set_pages_array_wt); 2096 2097 int set_pages_wb(struct page *page, int numpages) 2098 { 2099 unsigned long addr = (unsigned long)page_address(page); 2100 2101 return set_memory_wb(addr, numpages); 2102 } 2103 EXPORT_SYMBOL(set_pages_wb); 2104 2105 int set_pages_array_wb(struct page **pages, int numpages) 2106 { 2107 int retval; 2108 unsigned long start; 2109 unsigned long end; 2110 int i; 2111 2112 /* WB cache mode is hard wired to all cache attribute bits being 0 */ 2113 retval = cpa_clear_pages_array(pages, numpages, 2114 __pgprot(_PAGE_CACHE_MASK)); 2115 if (retval) 2116 return retval; 2117 2118 for (i = 0; i < numpages; i++) { 2119 if (PageHighMem(pages[i])) 2120 continue; 2121 start = page_to_pfn(pages[i]) << PAGE_SHIFT; 2122 end = start + PAGE_SIZE; 2123 memtype_free(start, end); 2124 } 2125 2126 return 0; 2127 } 2128 EXPORT_SYMBOL(set_pages_array_wb); 2129 2130 int set_pages_ro(struct page *page, int numpages) 2131 { 2132 unsigned long addr = (unsigned long)page_address(page); 2133 2134 return set_memory_ro(addr, numpages); 2135 } 2136 2137 int set_pages_rw(struct page *page, int numpages) 2138 { 2139 unsigned long addr = (unsigned long)page_address(page); 2140 2141 return set_memory_rw(addr, numpages); 2142 } 2143 2144 static int __set_pages_p(struct page *page, int numpages) 2145 { 2146 unsigned long tempaddr = (unsigned long) page_address(page); 2147 struct cpa_data cpa = { .vaddr = &tempaddr, 2148 .pgd = NULL, 2149 .numpages = numpages, 2150 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), 2151 .mask_clr = __pgprot(0), 2152 .flags = 0}; 2153 2154 /* 2155 * No alias checking needed for setting present flag. otherwise, 2156 * we may need to break large pages for 64-bit kernel text 2157 * mappings (this adds to complexity if we want to do this from 2158 * atomic context especially). Let's keep it simple! 2159 */ 2160 return __change_page_attr_set_clr(&cpa, 0); 2161 } 2162 2163 static int __set_pages_np(struct page *page, int numpages) 2164 { 2165 unsigned long tempaddr = (unsigned long) page_address(page); 2166 struct cpa_data cpa = { .vaddr = &tempaddr, 2167 .pgd = NULL, 2168 .numpages = numpages, 2169 .mask_set = __pgprot(0), 2170 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), 2171 .flags = 0}; 2172 2173 /* 2174 * No alias checking needed for setting not present flag. otherwise, 2175 * we may need to break large pages for 64-bit kernel text 2176 * mappings (this adds to complexity if we want to do this from 2177 * atomic context especially). Let's keep it simple! 2178 */ 2179 return __change_page_attr_set_clr(&cpa, 0); 2180 } 2181 2182 int set_direct_map_invalid_noflush(struct page *page) 2183 { 2184 return __set_pages_np(page, 1); 2185 } 2186 2187 int set_direct_map_default_noflush(struct page *page) 2188 { 2189 return __set_pages_p(page, 1); 2190 } 2191 2192 void __kernel_map_pages(struct page *page, int numpages, int enable) 2193 { 2194 if (PageHighMem(page)) 2195 return; 2196 if (!enable) { 2197 debug_check_no_locks_freed(page_address(page), 2198 numpages * PAGE_SIZE); 2199 } 2200 2201 /* 2202 * The return value is ignored as the calls cannot fail. 2203 * Large pages for identity mappings are not used at boot time 2204 * and hence no memory allocations during large page split. 2205 */ 2206 if (enable) 2207 __set_pages_p(page, numpages); 2208 else 2209 __set_pages_np(page, numpages); 2210 2211 /* 2212 * We should perform an IPI and flush all tlbs, 2213 * but that can deadlock->flush only current cpu. 2214 * Preemption needs to be disabled around __flush_tlb_all() due to 2215 * CR3 reload in __native_flush_tlb(). 2216 */ 2217 preempt_disable(); 2218 __flush_tlb_all(); 2219 preempt_enable(); 2220 2221 arch_flush_lazy_mmu_mode(); 2222 } 2223 2224 #ifdef CONFIG_HIBERNATION 2225 bool kernel_page_present(struct page *page) 2226 { 2227 unsigned int level; 2228 pte_t *pte; 2229 2230 if (PageHighMem(page)) 2231 return false; 2232 2233 pte = lookup_address((unsigned long)page_address(page), &level); 2234 return (pte_val(*pte) & _PAGE_PRESENT); 2235 } 2236 #endif /* CONFIG_HIBERNATION */ 2237 2238 int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, 2239 unsigned numpages, unsigned long page_flags) 2240 { 2241 int retval = -EINVAL; 2242 2243 struct cpa_data cpa = { 2244 .vaddr = &address, 2245 .pfn = pfn, 2246 .pgd = pgd, 2247 .numpages = numpages, 2248 .mask_set = __pgprot(0), 2249 .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), 2250 .flags = 0, 2251 }; 2252 2253 WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); 2254 2255 if (!(__supported_pte_mask & _PAGE_NX)) 2256 goto out; 2257 2258 if (!(page_flags & _PAGE_ENC)) 2259 cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); 2260 2261 cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); 2262 2263 retval = __change_page_attr_set_clr(&cpa, 0); 2264 __flush_tlb_all(); 2265 2266 out: 2267 return retval; 2268 } 2269 2270 /* 2271 * __flush_tlb_all() flushes mappings only on current CPU and hence this 2272 * function shouldn't be used in an SMP environment. Presently, it's used only 2273 * during boot (way before smp_init()) by EFI subsystem and hence is ok. 2274 */ 2275 int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, 2276 unsigned long numpages) 2277 { 2278 int retval; 2279 2280 /* 2281 * The typical sequence for unmapping is to find a pte through 2282 * lookup_address_in_pgd() (ideally, it should never return NULL because 2283 * the address is already mapped) and change it's protections. As pfn is 2284 * the *target* of a mapping, it's not useful while unmapping. 2285 */ 2286 struct cpa_data cpa = { 2287 .vaddr = &address, 2288 .pfn = 0, 2289 .pgd = pgd, 2290 .numpages = numpages, 2291 .mask_set = __pgprot(0), 2292 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), 2293 .flags = 0, 2294 }; 2295 2296 WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); 2297 2298 retval = __change_page_attr_set_clr(&cpa, 0); 2299 __flush_tlb_all(); 2300 2301 return retval; 2302 } 2303 2304 /* 2305 * The testcases use internal knowledge of the implementation that shouldn't 2306 * be exposed to the rest of the kernel. Include these directly here. 2307 */ 2308 #ifdef CONFIG_CPA_DEBUG 2309 #include "cpa-test.c" 2310 #endif 2311