1 /* 2 * PPC64 (POWER4) Huge TLB Page Support for Kernel. 3 * 4 * Copyright (C) 2003 David Gibson, IBM Corporation. 5 * 6 * Based on the IA-32 version: 7 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> 8 */ 9 10 #include <linux/init.h> 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include <linux/hugetlb.h> 14 #include <linux/pagemap.h> 15 #include <linux/slab.h> 16 #include <linux/err.h> 17 #include <linux/sysctl.h> 18 #include <asm/mman.h> 19 #include <asm/pgalloc.h> 20 #include <asm/tlb.h> 21 #include <asm/tlbflush.h> 22 #include <asm/mmu_context.h> 23 #include <asm/machdep.h> 24 #include <asm/cputable.h> 25 #include <asm/spu.h> 26 27 #define PAGE_SHIFT_64K 16 28 #define PAGE_SHIFT_16M 24 29 #define PAGE_SHIFT_16G 34 30 31 #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) 32 #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) 33 #define MAX_NUMBER_GPAGES 1024 34 35 /* Tracks the 16G pages after the device tree is scanned and before the 36 * huge_boot_pages list is ready. */ 37 static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; 38 static unsigned nr_gpages; 39 40 /* Array of valid huge page sizes - non-zero value(hugepte_shift) is 41 * stored for the huge page sizes that are valid. 42 */ 43 unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ 44 45 #define hugepte_shift mmu_huge_psizes 46 #define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize]) 47 #define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize]) 48 49 #define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ 50 + hugepte_shift[psize]) 51 #define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) 52 #define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) 53 54 /* Subtract one from array size because we don't need a cache for 4K since 55 * is not a huge page size */ 56 #define HUGE_PGTABLE_INDEX(psize) (HUGEPTE_CACHE_NUM + psize - 1) 57 #define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) 58 59 static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { 60 [MMU_PAGE_64K] = "hugepte_cache_64K", 61 [MMU_PAGE_1M] = "hugepte_cache_1M", 62 [MMU_PAGE_16M] = "hugepte_cache_16M", 63 [MMU_PAGE_16G] = "hugepte_cache_16G", 64 }; 65 66 /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() 67 * will choke on pointers to hugepte tables, which is handy for 68 * catching screwups early. */ 69 #define HUGEPD_OK 0x1 70 71 typedef struct { unsigned long pd; } hugepd_t; 72 73 #define hugepd_none(hpd) ((hpd).pd == 0) 74 75 static inline int shift_to_mmu_psize(unsigned int shift) 76 { 77 switch (shift) { 78 #ifndef CONFIG_PPC_64K_PAGES 79 case PAGE_SHIFT_64K: 80 return MMU_PAGE_64K; 81 #endif 82 case PAGE_SHIFT_16M: 83 return MMU_PAGE_16M; 84 case PAGE_SHIFT_16G: 85 return MMU_PAGE_16G; 86 } 87 return -1; 88 } 89 90 static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) 91 { 92 if (mmu_psize_defs[mmu_psize].shift) 93 return mmu_psize_defs[mmu_psize].shift; 94 BUG(); 95 } 96 97 static inline pte_t *hugepd_page(hugepd_t hpd) 98 { 99 BUG_ON(!(hpd.pd & HUGEPD_OK)); 100 return (pte_t *)(hpd.pd & ~HUGEPD_OK); 101 } 102 103 static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, 104 struct hstate *hstate) 105 { 106 unsigned int shift = huge_page_shift(hstate); 107 int psize = shift_to_mmu_psize(shift); 108 unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1)); 109 pte_t *dir = hugepd_page(*hpdp); 110 111 return dir + idx; 112 } 113 114 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 115 unsigned long address, unsigned int psize) 116 { 117 pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], 118 GFP_KERNEL|__GFP_REPEAT); 119 120 if (! new) 121 return -ENOMEM; 122 123 spin_lock(&mm->page_table_lock); 124 if (!hugepd_none(*hpdp)) 125 kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new); 126 else 127 hpdp->pd = (unsigned long)new | HUGEPD_OK; 128 spin_unlock(&mm->page_table_lock); 129 return 0; 130 } 131 132 133 static pud_t *hpud_offset(pgd_t *pgd, unsigned long addr, struct hstate *hstate) 134 { 135 if (huge_page_shift(hstate) < PUD_SHIFT) 136 return pud_offset(pgd, addr); 137 else 138 return (pud_t *) pgd; 139 } 140 static pud_t *hpud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, 141 struct hstate *hstate) 142 { 143 if (huge_page_shift(hstate) < PUD_SHIFT) 144 return pud_alloc(mm, pgd, addr); 145 else 146 return (pud_t *) pgd; 147 } 148 static pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate) 149 { 150 if (huge_page_shift(hstate) < PMD_SHIFT) 151 return pmd_offset(pud, addr); 152 else 153 return (pmd_t *) pud; 154 } 155 static pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr, 156 struct hstate *hstate) 157 { 158 if (huge_page_shift(hstate) < PMD_SHIFT) 159 return pmd_alloc(mm, pud, addr); 160 else 161 return (pmd_t *) pud; 162 } 163 164 /* Build list of addresses of gigantic pages. This function is used in early 165 * boot before the buddy or bootmem allocator is setup. 166 */ 167 void add_gpage(unsigned long addr, unsigned long page_size, 168 unsigned long number_of_pages) 169 { 170 if (!addr) 171 return; 172 while (number_of_pages > 0) { 173 gpage_freearray[nr_gpages] = addr; 174 nr_gpages++; 175 number_of_pages--; 176 addr += page_size; 177 } 178 } 179 180 /* Moves the gigantic page addresses from the temporary list to the 181 * huge_boot_pages list. 182 */ 183 int alloc_bootmem_huge_page(struct hstate *hstate) 184 { 185 struct huge_bootmem_page *m; 186 if (nr_gpages == 0) 187 return 0; 188 m = phys_to_virt(gpage_freearray[--nr_gpages]); 189 gpage_freearray[nr_gpages] = 0; 190 list_add(&m->list, &huge_boot_pages); 191 m->hstate = hstate; 192 return 1; 193 } 194 195 196 /* Modelled after find_linux_pte() */ 197 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 198 { 199 pgd_t *pg; 200 pud_t *pu; 201 pmd_t *pm; 202 203 unsigned int psize; 204 unsigned int shift; 205 unsigned long sz; 206 struct hstate *hstate; 207 psize = get_slice_psize(mm, addr); 208 shift = mmu_psize_to_shift(psize); 209 sz = ((1UL) << shift); 210 hstate = size_to_hstate(sz); 211 212 addr &= hstate->mask; 213 214 pg = pgd_offset(mm, addr); 215 if (!pgd_none(*pg)) { 216 pu = hpud_offset(pg, addr, hstate); 217 if (!pud_none(*pu)) { 218 pm = hpmd_offset(pu, addr, hstate); 219 if (!pmd_none(*pm)) 220 return hugepte_offset((hugepd_t *)pm, addr, 221 hstate); 222 } 223 } 224 225 return NULL; 226 } 227 228 pte_t *huge_pte_alloc(struct mm_struct *mm, 229 unsigned long addr, unsigned long sz) 230 { 231 pgd_t *pg; 232 pud_t *pu; 233 pmd_t *pm; 234 hugepd_t *hpdp = NULL; 235 struct hstate *hstate; 236 unsigned int psize; 237 hstate = size_to_hstate(sz); 238 239 psize = get_slice_psize(mm, addr); 240 BUG_ON(!mmu_huge_psizes[psize]); 241 242 addr &= hstate->mask; 243 244 pg = pgd_offset(mm, addr); 245 pu = hpud_alloc(mm, pg, addr, hstate); 246 247 if (pu) { 248 pm = hpmd_alloc(mm, pu, addr, hstate); 249 if (pm) 250 hpdp = (hugepd_t *)pm; 251 } 252 253 if (! hpdp) 254 return NULL; 255 256 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize)) 257 return NULL; 258 259 return hugepte_offset(hpdp, addr, hstate); 260 } 261 262 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 263 { 264 return 0; 265 } 266 267 static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp, 268 unsigned int psize) 269 { 270 pte_t *hugepte = hugepd_page(*hpdp); 271 272 hpdp->pd = 0; 273 tlb->need_flush = 1; 274 pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, 275 HUGEPTE_CACHE_NUM+psize-1, 276 PGF_CACHENUM_MASK)); 277 } 278 279 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 280 unsigned long addr, unsigned long end, 281 unsigned long floor, unsigned long ceiling, 282 unsigned int psize) 283 { 284 pmd_t *pmd; 285 unsigned long next; 286 unsigned long start; 287 288 start = addr; 289 pmd = pmd_offset(pud, addr); 290 do { 291 next = pmd_addr_end(addr, end); 292 if (pmd_none(*pmd)) 293 continue; 294 free_hugepte_range(tlb, (hugepd_t *)pmd, psize); 295 } while (pmd++, addr = next, addr != end); 296 297 start &= PUD_MASK; 298 if (start < floor) 299 return; 300 if (ceiling) { 301 ceiling &= PUD_MASK; 302 if (!ceiling) 303 return; 304 } 305 if (end - 1 > ceiling - 1) 306 return; 307 308 pmd = pmd_offset(pud, start); 309 pud_clear(pud); 310 pmd_free_tlb(tlb, pmd, start); 311 } 312 313 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 314 unsigned long addr, unsigned long end, 315 unsigned long floor, unsigned long ceiling) 316 { 317 pud_t *pud; 318 unsigned long next; 319 unsigned long start; 320 unsigned int shift; 321 unsigned int psize = get_slice_psize(tlb->mm, addr); 322 shift = mmu_psize_to_shift(psize); 323 324 start = addr; 325 pud = pud_offset(pgd, addr); 326 do { 327 next = pud_addr_end(addr, end); 328 if (shift < PMD_SHIFT) { 329 if (pud_none_or_clear_bad(pud)) 330 continue; 331 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, 332 ceiling, psize); 333 } else { 334 if (pud_none(*pud)) 335 continue; 336 free_hugepte_range(tlb, (hugepd_t *)pud, psize); 337 } 338 } while (pud++, addr = next, addr != end); 339 340 start &= PGDIR_MASK; 341 if (start < floor) 342 return; 343 if (ceiling) { 344 ceiling &= PGDIR_MASK; 345 if (!ceiling) 346 return; 347 } 348 if (end - 1 > ceiling - 1) 349 return; 350 351 pud = pud_offset(pgd, start); 352 pgd_clear(pgd); 353 pud_free_tlb(tlb, pud, start); 354 } 355 356 /* 357 * This function frees user-level page tables of a process. 358 * 359 * Must be called with pagetable lock held. 360 */ 361 void hugetlb_free_pgd_range(struct mmu_gather *tlb, 362 unsigned long addr, unsigned long end, 363 unsigned long floor, unsigned long ceiling) 364 { 365 pgd_t *pgd; 366 unsigned long next; 367 unsigned long start; 368 369 /* 370 * Comments below take from the normal free_pgd_range(). They 371 * apply here too. The tests against HUGEPD_MASK below are 372 * essential, because we *don't* test for this at the bottom 373 * level. Without them we'll attempt to free a hugepte table 374 * when we unmap just part of it, even if there are other 375 * active mappings using it. 376 * 377 * The next few lines have given us lots of grief... 378 * 379 * Why are we testing HUGEPD* at this top level? Because 380 * often there will be no work to do at all, and we'd prefer 381 * not to go all the way down to the bottom just to discover 382 * that. 383 * 384 * Why all these "- 1"s? Because 0 represents both the bottom 385 * of the address space and the top of it (using -1 for the 386 * top wouldn't help much: the masks would do the wrong thing). 387 * The rule is that addr 0 and floor 0 refer to the bottom of 388 * the address space, but end 0 and ceiling 0 refer to the top 389 * Comparisons need to use "end - 1" and "ceiling - 1" (though 390 * that end 0 case should be mythical). 391 * 392 * Wherever addr is brought up or ceiling brought down, we 393 * must be careful to reject "the opposite 0" before it 394 * confuses the subsequent tests. But what about where end is 395 * brought down by HUGEPD_SIZE below? no, end can't go down to 396 * 0 there. 397 * 398 * Whereas we round start (addr) and ceiling down, by different 399 * masks at different levels, in order to test whether a table 400 * now has no other vmas using it, so can be freed, we don't 401 * bother to round floor or end up - the tests don't need that. 402 */ 403 unsigned int psize = get_slice_psize(tlb->mm, addr); 404 405 addr &= HUGEPD_MASK(psize); 406 if (addr < floor) { 407 addr += HUGEPD_SIZE(psize); 408 if (!addr) 409 return; 410 } 411 if (ceiling) { 412 ceiling &= HUGEPD_MASK(psize); 413 if (!ceiling) 414 return; 415 } 416 if (end - 1 > ceiling - 1) 417 end -= HUGEPD_SIZE(psize); 418 if (addr > end - 1) 419 return; 420 421 start = addr; 422 pgd = pgd_offset(tlb->mm, addr); 423 do { 424 psize = get_slice_psize(tlb->mm, addr); 425 BUG_ON(!mmu_huge_psizes[psize]); 426 next = pgd_addr_end(addr, end); 427 if (mmu_psize_to_shift(psize) < PUD_SHIFT) { 428 if (pgd_none_or_clear_bad(pgd)) 429 continue; 430 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); 431 } else { 432 if (pgd_none(*pgd)) 433 continue; 434 free_hugepte_range(tlb, (hugepd_t *)pgd, psize); 435 } 436 } while (pgd++, addr = next, addr != end); 437 } 438 439 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 440 pte_t *ptep, pte_t pte) 441 { 442 if (pte_present(*ptep)) { 443 /* We open-code pte_clear because we need to pass the right 444 * argument to hpte_need_flush (huge / !huge). Might not be 445 * necessary anymore if we make hpte_need_flush() get the 446 * page size from the slices 447 */ 448 unsigned int psize = get_slice_psize(mm, addr); 449 unsigned int shift = mmu_psize_to_shift(psize); 450 unsigned long sz = ((1UL) << shift); 451 struct hstate *hstate = size_to_hstate(sz); 452 pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1); 453 } 454 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 455 } 456 457 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 458 pte_t *ptep) 459 { 460 unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1); 461 return __pte(old); 462 } 463 464 struct page * 465 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 466 { 467 pte_t *ptep; 468 struct page *page; 469 unsigned int mmu_psize = get_slice_psize(mm, address); 470 471 /* Verify it is a huge page else bail. */ 472 if (!mmu_huge_psizes[mmu_psize]) 473 return ERR_PTR(-EINVAL); 474 475 ptep = huge_pte_offset(mm, address); 476 page = pte_page(*ptep); 477 if (page) { 478 unsigned int shift = mmu_psize_to_shift(mmu_psize); 479 unsigned long sz = ((1UL) << shift); 480 page += (address % sz) / PAGE_SIZE; 481 } 482 483 return page; 484 } 485 486 int pmd_huge(pmd_t pmd) 487 { 488 return 0; 489 } 490 491 int pud_huge(pud_t pud) 492 { 493 return 0; 494 } 495 496 struct page * 497 follow_huge_pmd(struct mm_struct *mm, unsigned long address, 498 pmd_t *pmd, int write) 499 { 500 BUG(); 501 return NULL; 502 } 503 504 505 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 506 unsigned long len, unsigned long pgoff, 507 unsigned long flags) 508 { 509 struct hstate *hstate = hstate_file(file); 510 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); 511 512 if (!mmu_huge_psizes[mmu_psize]) 513 return -EINVAL; 514 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); 515 } 516 517 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 518 { 519 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); 520 521 return 1UL << mmu_psize_to_shift(psize); 522 } 523 524 /* 525 * Called by asm hashtable.S for doing lazy icache flush 526 */ 527 static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, 528 pte_t pte, int trap, unsigned long sz) 529 { 530 struct page *page; 531 int i; 532 533 if (!pfn_valid(pte_pfn(pte))) 534 return rflags; 535 536 page = pte_page(pte); 537 538 /* page is dirty */ 539 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { 540 if (trap == 0x400) { 541 for (i = 0; i < (sz / PAGE_SIZE); i++) 542 __flush_dcache_icache(page_address(page+i)); 543 set_bit(PG_arch_1, &page->flags); 544 } else { 545 rflags |= HPTE_R_N; 546 } 547 } 548 return rflags; 549 } 550 551 int hash_huge_page(struct mm_struct *mm, unsigned long access, 552 unsigned long ea, unsigned long vsid, int local, 553 unsigned long trap) 554 { 555 pte_t *ptep; 556 unsigned long old_pte, new_pte; 557 unsigned long va, rflags, pa, sz; 558 long slot; 559 int err = 1; 560 int ssize = user_segment_size(ea); 561 unsigned int mmu_psize; 562 int shift; 563 mmu_psize = get_slice_psize(mm, ea); 564 565 if (!mmu_huge_psizes[mmu_psize]) 566 goto out; 567 ptep = huge_pte_offset(mm, ea); 568 569 /* Search the Linux page table for a match with va */ 570 va = hpt_va(ea, vsid, ssize); 571 572 /* 573 * If no pte found or not present, send the problem up to 574 * do_page_fault 575 */ 576 if (unlikely(!ptep || pte_none(*ptep))) 577 goto out; 578 579 /* 580 * Check the user's access rights to the page. If access should be 581 * prevented then send the problem up to do_page_fault. 582 */ 583 if (unlikely(access & ~pte_val(*ptep))) 584 goto out; 585 /* 586 * At this point, we have a pte (old_pte) which can be used to build 587 * or update an HPTE. There are 2 cases: 588 * 589 * 1. There is a valid (present) pte with no associated HPTE (this is 590 * the most common case) 591 * 2. There is a valid (present) pte with an associated HPTE. The 592 * current values of the pp bits in the HPTE prevent access 593 * because we are doing software DIRTY bit management and the 594 * page is currently not DIRTY. 595 */ 596 597 598 do { 599 old_pte = pte_val(*ptep); 600 if (old_pte & _PAGE_BUSY) 601 goto out; 602 new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; 603 } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, 604 old_pte, new_pte)); 605 606 rflags = 0x2 | (!(new_pte & _PAGE_RW)); 607 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ 608 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); 609 shift = mmu_psize_to_shift(mmu_psize); 610 sz = ((1UL) << shift); 611 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) 612 /* No CPU has hugepages but lacks no execute, so we 613 * don't need to worry about that case */ 614 rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), 615 trap, sz); 616 617 /* Check if pte already has an hpte (case 2) */ 618 if (unlikely(old_pte & _PAGE_HASHPTE)) { 619 /* There MIGHT be an HPTE for this pte */ 620 unsigned long hash, slot; 621 622 hash = hpt_hash(va, shift, ssize); 623 if (old_pte & _PAGE_F_SECOND) 624 hash = ~hash; 625 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 626 slot += (old_pte & _PAGE_F_GIX) >> 12; 627 628 if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize, 629 ssize, local) == -1) 630 old_pte &= ~_PAGE_HPTEFLAGS; 631 } 632 633 if (likely(!(old_pte & _PAGE_HASHPTE))) { 634 unsigned long hash = hpt_hash(va, shift, ssize); 635 unsigned long hpte_group; 636 637 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; 638 639 repeat: 640 hpte_group = ((hash & htab_hash_mask) * 641 HPTES_PER_GROUP) & ~0x7UL; 642 643 /* clear HPTE slot informations in new PTE */ 644 #ifdef CONFIG_PPC_64K_PAGES 645 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0; 646 #else 647 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; 648 #endif 649 /* Add in WIMG bits */ 650 rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | 651 _PAGE_COHERENT | _PAGE_GUARDED)); 652 653 /* Insert into the hash table, primary slot */ 654 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, 655 mmu_psize, ssize); 656 657 /* Primary is full, try the secondary */ 658 if (unlikely(slot == -1)) { 659 hpte_group = ((~hash & htab_hash_mask) * 660 HPTES_PER_GROUP) & ~0x7UL; 661 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 662 HPTE_V_SECONDARY, 663 mmu_psize, ssize); 664 if (slot == -1) { 665 if (mftb() & 0x1) 666 hpte_group = ((hash & htab_hash_mask) * 667 HPTES_PER_GROUP)&~0x7UL; 668 669 ppc_md.hpte_remove(hpte_group); 670 goto repeat; 671 } 672 } 673 674 if (unlikely(slot == -2)) 675 panic("hash_huge_page: pte_insert failed\n"); 676 677 new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX); 678 } 679 680 /* 681 * No need to use ldarx/stdcx here 682 */ 683 *ptep = __pte(new_pte & ~_PAGE_BUSY); 684 685 err = 0; 686 687 out: 688 return err; 689 } 690 691 static void __init set_huge_psize(int psize) 692 { 693 /* Check that it is a page size supported by the hardware and 694 * that it fits within pagetable limits. */ 695 if (mmu_psize_defs[psize].shift && 696 mmu_psize_defs[psize].shift < SID_SHIFT_1T && 697 (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || 698 mmu_psize_defs[psize].shift == PAGE_SHIFT_64K || 699 mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) { 700 /* Return if huge page size has already been setup or is the 701 * same as the base page size. */ 702 if (mmu_huge_psizes[psize] || 703 mmu_psize_defs[psize].shift == PAGE_SHIFT) 704 return; 705 if (WARN_ON(HUGEPTE_CACHE_NAME(psize) == NULL)) 706 return; 707 hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); 708 709 switch (mmu_psize_defs[psize].shift) { 710 case PAGE_SHIFT_64K: 711 /* We only allow 64k hpages with 4k base page, 712 * which was checked above, and always put them 713 * at the PMD */ 714 hugepte_shift[psize] = PMD_SHIFT; 715 break; 716 case PAGE_SHIFT_16M: 717 /* 16M pages can be at two different levels 718 * of pagestables based on base page size */ 719 if (PAGE_SHIFT == PAGE_SHIFT_64K) 720 hugepte_shift[psize] = PMD_SHIFT; 721 else /* 4k base page */ 722 hugepte_shift[psize] = PUD_SHIFT; 723 break; 724 case PAGE_SHIFT_16G: 725 /* 16G pages are always at PGD level */ 726 hugepte_shift[psize] = PGDIR_SHIFT; 727 break; 728 } 729 hugepte_shift[psize] -= mmu_psize_defs[psize].shift; 730 } else 731 hugepte_shift[psize] = 0; 732 } 733 734 static int __init hugepage_setup_sz(char *str) 735 { 736 unsigned long long size; 737 int mmu_psize; 738 int shift; 739 740 size = memparse(str, &str); 741 742 shift = __ffs(size); 743 mmu_psize = shift_to_mmu_psize(shift); 744 if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) 745 set_huge_psize(mmu_psize); 746 else 747 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); 748 749 return 1; 750 } 751 __setup("hugepagesz=", hugepage_setup_sz); 752 753 static int __init hugetlbpage_init(void) 754 { 755 unsigned int psize; 756 757 if (!cpu_has_feature(CPU_FTR_16M_PAGE)) 758 return -ENODEV; 759 760 /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE 761 * and adjust PTE_NONCACHE_NUM if the number of supported huge page 762 * sizes changes. 763 */ 764 set_huge_psize(MMU_PAGE_16M); 765 set_huge_psize(MMU_PAGE_16G); 766 767 /* Temporarily disable support for 64K huge pages when 64K SPU local 768 * store support is enabled as the current implementation conflicts. 769 */ 770 #ifndef CONFIG_SPU_FS_64K_LS 771 set_huge_psize(MMU_PAGE_64K); 772 #endif 773 774 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 775 if (mmu_huge_psizes[psize]) { 776 pgtable_cache[HUGE_PGTABLE_INDEX(psize)] = 777 kmem_cache_create( 778 HUGEPTE_CACHE_NAME(psize), 779 HUGEPTE_TABLE_SIZE(psize), 780 HUGEPTE_TABLE_SIZE(psize), 781 0, 782 NULL); 783 if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)]) 784 panic("hugetlbpage_init(): could not create %s"\ 785 "\n", HUGEPTE_CACHE_NAME(psize)); 786 } 787 } 788 789 return 0; 790 } 791 792 module_init(hugetlbpage_init); 793