1 /* 2 * PPC64 (POWER4) Huge TLB Page Support for Kernel. 3 * 4 * Copyright (C) 2003 David Gibson, IBM Corporation. 5 * 6 * Based on the IA-32 version: 7 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> 8 */ 9 10 #include <linux/init.h> 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include <linux/hugetlb.h> 14 #include <linux/pagemap.h> 15 #include <linux/smp_lock.h> 16 #include <linux/slab.h> 17 #include <linux/err.h> 18 #include <linux/sysctl.h> 19 #include <asm/mman.h> 20 #include <asm/pgalloc.h> 21 #include <asm/tlb.h> 22 #include <asm/tlbflush.h> 23 #include <asm/mmu_context.h> 24 #include <asm/machdep.h> 25 #include <asm/cputable.h> 26 #include <asm/tlb.h> 27 28 #include <linux/sysctl.h> 29 30 #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) 31 #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) 32 33 /* Modelled after find_linux_pte() */ 34 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 35 { 36 pgd_t *pg; 37 pud_t *pu; 38 pmd_t *pm; 39 pte_t *pt; 40 41 BUG_ON(! in_hugepage_area(mm->context, addr)); 42 43 addr &= HPAGE_MASK; 44 45 pg = pgd_offset(mm, addr); 46 if (!pgd_none(*pg)) { 47 pu = pud_offset(pg, addr); 48 if (!pud_none(*pu)) { 49 pm = pmd_offset(pu, addr); 50 #ifdef CONFIG_PPC_64K_PAGES 51 /* Currently, we use the normal PTE offset within full 52 * size PTE pages, thus our huge PTEs are scattered in 53 * the PTE page and we do waste some. We may change 54 * that in the future, but the current mecanism keeps 55 * things much simpler 56 */ 57 if (!pmd_none(*pm)) { 58 /* Note: pte_offset_* are all equivalent on 59 * ppc64 as we don't have HIGHMEM 60 */ 61 pt = pte_offset_kernel(pm, addr); 62 return pt; 63 } 64 #else /* CONFIG_PPC_64K_PAGES */ 65 /* On 4k pages, we put huge PTEs in the PMD page */ 66 pt = (pte_t *)pm; 67 return pt; 68 #endif /* CONFIG_PPC_64K_PAGES */ 69 } 70 } 71 72 return NULL; 73 } 74 75 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 76 { 77 pgd_t *pg; 78 pud_t *pu; 79 pmd_t *pm; 80 pte_t *pt; 81 82 BUG_ON(! in_hugepage_area(mm->context, addr)); 83 84 addr &= HPAGE_MASK; 85 86 pg = pgd_offset(mm, addr); 87 pu = pud_alloc(mm, pg, addr); 88 89 if (pu) { 90 pm = pmd_alloc(mm, pu, addr); 91 if (pm) { 92 #ifdef CONFIG_PPC_64K_PAGES 93 /* See comment in huge_pte_offset. Note that if we ever 94 * want to put the page size in the PMD, we would have 95 * to open code our own pte_alloc* function in order 96 * to populate and set the size atomically 97 */ 98 pt = pte_alloc_map(mm, pm, addr); 99 #else /* CONFIG_PPC_64K_PAGES */ 100 pt = (pte_t *)pm; 101 #endif /* CONFIG_PPC_64K_PAGES */ 102 return pt; 103 } 104 } 105 106 return NULL; 107 } 108 109 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 110 pte_t *ptep, pte_t pte) 111 { 112 if (pte_present(*ptep)) { 113 /* We open-code pte_clear because we need to pass the right 114 * argument to hpte_update (huge / !huge) 115 */ 116 unsigned long old = pte_update(ptep, ~0UL); 117 if (old & _PAGE_HASHPTE) 118 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); 119 flush_tlb_pending(); 120 } 121 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 122 } 123 124 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 125 pte_t *ptep) 126 { 127 unsigned long old = pte_update(ptep, ~0UL); 128 129 if (old & _PAGE_HASHPTE) 130 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); 131 *ptep = __pte(0); 132 133 return __pte(old); 134 } 135 136 /* 137 * This function checks for proper alignment of input addr and len parameters. 138 */ 139 int is_aligned_hugepage_range(unsigned long addr, unsigned long len) 140 { 141 if (len & ~HPAGE_MASK) 142 return -EINVAL; 143 if (addr & ~HPAGE_MASK) 144 return -EINVAL; 145 if (! (within_hugepage_low_range(addr, len) 146 || within_hugepage_high_range(addr, len)) ) 147 return -EINVAL; 148 return 0; 149 } 150 151 struct slb_flush_info { 152 struct mm_struct *mm; 153 u16 newareas; 154 }; 155 156 static void flush_low_segments(void *parm) 157 { 158 struct slb_flush_info *fi = parm; 159 unsigned long i; 160 161 BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); 162 163 if (current->active_mm != fi->mm) 164 return; 165 166 /* Only need to do anything if this CPU is working in the same 167 * mm as the one which has changed */ 168 169 /* update the paca copy of the context struct */ 170 get_paca()->context = current->active_mm->context; 171 172 asm volatile("isync" : : : "memory"); 173 for (i = 0; i < NUM_LOW_AREAS; i++) { 174 if (! (fi->newareas & (1U << i))) 175 continue; 176 asm volatile("slbie %0" 177 : : "r" ((i << SID_SHIFT) | SLBIE_C)); 178 } 179 asm volatile("isync" : : : "memory"); 180 } 181 182 static void flush_high_segments(void *parm) 183 { 184 struct slb_flush_info *fi = parm; 185 unsigned long i, j; 186 187 188 BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); 189 190 if (current->active_mm != fi->mm) 191 return; 192 193 /* Only need to do anything if this CPU is working in the same 194 * mm as the one which has changed */ 195 196 /* update the paca copy of the context struct */ 197 get_paca()->context = current->active_mm->context; 198 199 asm volatile("isync" : : : "memory"); 200 for (i = 0; i < NUM_HIGH_AREAS; i++) { 201 if (! (fi->newareas & (1U << i))) 202 continue; 203 for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) 204 asm volatile("slbie %0" 205 :: "r" (((i << HTLB_AREA_SHIFT) 206 + (j << SID_SHIFT)) | SLBIE_C)); 207 } 208 asm volatile("isync" : : : "memory"); 209 } 210 211 static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) 212 { 213 unsigned long start = area << SID_SHIFT; 214 unsigned long end = (area+1) << SID_SHIFT; 215 struct vm_area_struct *vma; 216 217 BUG_ON(area >= NUM_LOW_AREAS); 218 219 /* Check no VMAs are in the region */ 220 vma = find_vma(mm, start); 221 if (vma && (vma->vm_start < end)) 222 return -EBUSY; 223 224 return 0; 225 } 226 227 static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) 228 { 229 unsigned long start = area << HTLB_AREA_SHIFT; 230 unsigned long end = (area+1) << HTLB_AREA_SHIFT; 231 struct vm_area_struct *vma; 232 233 BUG_ON(area >= NUM_HIGH_AREAS); 234 235 /* Hack, so that each addresses is controlled by exactly one 236 * of the high or low area bitmaps, the first high area starts 237 * at 4GB, not 0 */ 238 if (start == 0) 239 start = 0x100000000UL; 240 241 /* Check no VMAs are in the region */ 242 vma = find_vma(mm, start); 243 if (vma && (vma->vm_start < end)) 244 return -EBUSY; 245 246 return 0; 247 } 248 249 static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) 250 { 251 unsigned long i; 252 struct slb_flush_info fi; 253 254 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); 255 BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); 256 257 newareas &= ~(mm->context.low_htlb_areas); 258 if (! newareas) 259 return 0; /* The segments we want are already open */ 260 261 for (i = 0; i < NUM_LOW_AREAS; i++) 262 if ((1 << i) & newareas) 263 if (prepare_low_area_for_htlb(mm, i) != 0) 264 return -EBUSY; 265 266 mm->context.low_htlb_areas |= newareas; 267 268 /* the context change must make it to memory before the flush, 269 * so that further SLB misses do the right thing. */ 270 mb(); 271 272 fi.mm = mm; 273 fi.newareas = newareas; 274 on_each_cpu(flush_low_segments, &fi, 0, 1); 275 276 return 0; 277 } 278 279 static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) 280 { 281 struct slb_flush_info fi; 282 unsigned long i; 283 284 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); 285 BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) 286 != NUM_HIGH_AREAS); 287 288 newareas &= ~(mm->context.high_htlb_areas); 289 if (! newareas) 290 return 0; /* The areas we want are already open */ 291 292 for (i = 0; i < NUM_HIGH_AREAS; i++) 293 if ((1 << i) & newareas) 294 if (prepare_high_area_for_htlb(mm, i) != 0) 295 return -EBUSY; 296 297 mm->context.high_htlb_areas |= newareas; 298 299 /* update the paca copy of the context struct */ 300 get_paca()->context = mm->context; 301 302 /* the context change must make it to memory before the flush, 303 * so that further SLB misses do the right thing. */ 304 mb(); 305 306 fi.mm = mm; 307 fi.newareas = newareas; 308 on_each_cpu(flush_high_segments, &fi, 0, 1); 309 310 return 0; 311 } 312 313 int prepare_hugepage_range(unsigned long addr, unsigned long len) 314 { 315 int err = 0; 316 317 if ( (addr+len) < addr ) 318 return -EINVAL; 319 320 if (addr < 0x100000000UL) 321 err = open_low_hpage_areas(current->mm, 322 LOW_ESID_MASK(addr, len)); 323 if ((addr + len) > 0x100000000UL) 324 err = open_high_hpage_areas(current->mm, 325 HTLB_AREA_MASK(addr, len)); 326 if (err) { 327 printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" 328 " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", 329 addr, len, 330 LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); 331 return err; 332 } 333 334 return 0; 335 } 336 337 struct page * 338 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 339 { 340 pte_t *ptep; 341 struct page *page; 342 343 if (! in_hugepage_area(mm->context, address)) 344 return ERR_PTR(-EINVAL); 345 346 ptep = huge_pte_offset(mm, address); 347 page = pte_page(*ptep); 348 if (page) 349 page += (address % HPAGE_SIZE) / PAGE_SIZE; 350 351 return page; 352 } 353 354 int pmd_huge(pmd_t pmd) 355 { 356 return 0; 357 } 358 359 struct page * 360 follow_huge_pmd(struct mm_struct *mm, unsigned long address, 361 pmd_t *pmd, int write) 362 { 363 BUG(); 364 return NULL; 365 } 366 367 /* Because we have an exclusive hugepage region which lies within the 368 * normal user address space, we have to take special measures to make 369 * non-huge mmap()s evade the hugepage reserved regions. */ 370 unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, 371 unsigned long len, unsigned long pgoff, 372 unsigned long flags) 373 { 374 struct mm_struct *mm = current->mm; 375 struct vm_area_struct *vma; 376 unsigned long start_addr; 377 378 if (len > TASK_SIZE) 379 return -ENOMEM; 380 381 if (addr) { 382 addr = PAGE_ALIGN(addr); 383 vma = find_vma(mm, addr); 384 if (((TASK_SIZE - len) >= addr) 385 && (!vma || (addr+len) <= vma->vm_start) 386 && !is_hugepage_only_range(mm, addr,len)) 387 return addr; 388 } 389 if (len > mm->cached_hole_size) { 390 start_addr = addr = mm->free_area_cache; 391 } else { 392 start_addr = addr = TASK_UNMAPPED_BASE; 393 mm->cached_hole_size = 0; 394 } 395 396 full_search: 397 vma = find_vma(mm, addr); 398 while (TASK_SIZE - len >= addr) { 399 BUG_ON(vma && (addr >= vma->vm_end)); 400 401 if (touches_hugepage_low_range(mm, addr, len)) { 402 addr = ALIGN(addr+1, 1<<SID_SHIFT); 403 vma = find_vma(mm, addr); 404 continue; 405 } 406 if (touches_hugepage_high_range(mm, addr, len)) { 407 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); 408 vma = find_vma(mm, addr); 409 continue; 410 } 411 if (!vma || addr + len <= vma->vm_start) { 412 /* 413 * Remember the place where we stopped the search: 414 */ 415 mm->free_area_cache = addr + len; 416 return addr; 417 } 418 if (addr + mm->cached_hole_size < vma->vm_start) 419 mm->cached_hole_size = vma->vm_start - addr; 420 addr = vma->vm_end; 421 vma = vma->vm_next; 422 } 423 424 /* Make sure we didn't miss any holes */ 425 if (start_addr != TASK_UNMAPPED_BASE) { 426 start_addr = addr = TASK_UNMAPPED_BASE; 427 mm->cached_hole_size = 0; 428 goto full_search; 429 } 430 return -ENOMEM; 431 } 432 433 /* 434 * This mmap-allocator allocates new areas top-down from below the 435 * stack's low limit (the base): 436 * 437 * Because we have an exclusive hugepage region which lies within the 438 * normal user address space, we have to take special measures to make 439 * non-huge mmap()s evade the hugepage reserved regions. 440 */ 441 unsigned long 442 arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, 443 const unsigned long len, const unsigned long pgoff, 444 const unsigned long flags) 445 { 446 struct vm_area_struct *vma, *prev_vma; 447 struct mm_struct *mm = current->mm; 448 unsigned long base = mm->mmap_base, addr = addr0; 449 unsigned long largest_hole = mm->cached_hole_size; 450 int first_time = 1; 451 452 /* requested length too big for entire address space */ 453 if (len > TASK_SIZE) 454 return -ENOMEM; 455 456 /* dont allow allocations above current base */ 457 if (mm->free_area_cache > base) 458 mm->free_area_cache = base; 459 460 /* requesting a specific address */ 461 if (addr) { 462 addr = PAGE_ALIGN(addr); 463 vma = find_vma(mm, addr); 464 if (TASK_SIZE - len >= addr && 465 (!vma || addr + len <= vma->vm_start) 466 && !is_hugepage_only_range(mm, addr,len)) 467 return addr; 468 } 469 470 if (len <= largest_hole) { 471 largest_hole = 0; 472 mm->free_area_cache = base; 473 } 474 try_again: 475 /* make sure it can fit in the remaining address space */ 476 if (mm->free_area_cache < len) 477 goto fail; 478 479 /* either no address requested or cant fit in requested address hole */ 480 addr = (mm->free_area_cache - len) & PAGE_MASK; 481 do { 482 hugepage_recheck: 483 if (touches_hugepage_low_range(mm, addr, len)) { 484 addr = (addr & ((~0) << SID_SHIFT)) - len; 485 goto hugepage_recheck; 486 } else if (touches_hugepage_high_range(mm, addr, len)) { 487 addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; 488 goto hugepage_recheck; 489 } 490 491 /* 492 * Lookup failure means no vma is above this address, 493 * i.e. return with success: 494 */ 495 if (!(vma = find_vma_prev(mm, addr, &prev_vma))) 496 return addr; 497 498 /* 499 * new region fits between prev_vma->vm_end and 500 * vma->vm_start, use it: 501 */ 502 if (addr+len <= vma->vm_start && 503 (!prev_vma || (addr >= prev_vma->vm_end))) { 504 /* remember the address as a hint for next time */ 505 mm->cached_hole_size = largest_hole; 506 return (mm->free_area_cache = addr); 507 } else { 508 /* pull free_area_cache down to the first hole */ 509 if (mm->free_area_cache == vma->vm_end) { 510 mm->free_area_cache = vma->vm_start; 511 mm->cached_hole_size = largest_hole; 512 } 513 } 514 515 /* remember the largest hole we saw so far */ 516 if (addr + largest_hole < vma->vm_start) 517 largest_hole = vma->vm_start - addr; 518 519 /* try just below the current vma->vm_start */ 520 addr = vma->vm_start-len; 521 } while (len <= vma->vm_start); 522 523 fail: 524 /* 525 * if hint left us with no space for the requested 526 * mapping then try again: 527 */ 528 if (first_time) { 529 mm->free_area_cache = base; 530 largest_hole = 0; 531 first_time = 0; 532 goto try_again; 533 } 534 /* 535 * A failed mmap() very likely causes application failure, 536 * so fall back to the bottom-up function here. This scenario 537 * can happen with large stack limits and large mmap() 538 * allocations. 539 */ 540 mm->free_area_cache = TASK_UNMAPPED_BASE; 541 mm->cached_hole_size = ~0UL; 542 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); 543 /* 544 * Restore the topdown base: 545 */ 546 mm->free_area_cache = base; 547 mm->cached_hole_size = ~0UL; 548 549 return addr; 550 } 551 552 static int htlb_check_hinted_area(unsigned long addr, unsigned long len) 553 { 554 struct vm_area_struct *vma; 555 556 vma = find_vma(current->mm, addr); 557 if (!vma || ((addr + len) <= vma->vm_start)) 558 return 0; 559 560 return -ENOMEM; 561 } 562 563 static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) 564 { 565 unsigned long addr = 0; 566 struct vm_area_struct *vma; 567 568 vma = find_vma(current->mm, addr); 569 while (addr + len <= 0x100000000UL) { 570 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ 571 572 if (! __within_hugepage_low_range(addr, len, segmask)) { 573 addr = ALIGN(addr+1, 1<<SID_SHIFT); 574 vma = find_vma(current->mm, addr); 575 continue; 576 } 577 578 if (!vma || (addr + len) <= vma->vm_start) 579 return addr; 580 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 581 /* Depending on segmask this might not be a confirmed 582 * hugepage region, so the ALIGN could have skipped 583 * some VMAs */ 584 vma = find_vma(current->mm, addr); 585 } 586 587 return -ENOMEM; 588 } 589 590 static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) 591 { 592 unsigned long addr = 0x100000000UL; 593 struct vm_area_struct *vma; 594 595 vma = find_vma(current->mm, addr); 596 while (addr + len <= TASK_SIZE_USER64) { 597 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ 598 599 if (! __within_hugepage_high_range(addr, len, areamask)) { 600 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); 601 vma = find_vma(current->mm, addr); 602 continue; 603 } 604 605 if (!vma || (addr + len) <= vma->vm_start) 606 return addr; 607 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 608 /* Depending on segmask this might not be a confirmed 609 * hugepage region, so the ALIGN could have skipped 610 * some VMAs */ 611 vma = find_vma(current->mm, addr); 612 } 613 614 return -ENOMEM; 615 } 616 617 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 618 unsigned long len, unsigned long pgoff, 619 unsigned long flags) 620 { 621 int lastshift; 622 u16 areamask, curareas; 623 624 if (HPAGE_SHIFT == 0) 625 return -EINVAL; 626 if (len & ~HPAGE_MASK) 627 return -EINVAL; 628 629 if (!cpu_has_feature(CPU_FTR_16M_PAGE)) 630 return -EINVAL; 631 632 /* Paranoia, caller should have dealt with this */ 633 BUG_ON((addr + len) < addr); 634 635 if (test_thread_flag(TIF_32BIT)) { 636 /* Paranoia, caller should have dealt with this */ 637 BUG_ON((addr + len) > 0x100000000UL); 638 639 curareas = current->mm->context.low_htlb_areas; 640 641 /* First see if we can use the hint address */ 642 if (addr && (htlb_check_hinted_area(addr, len) == 0)) { 643 areamask = LOW_ESID_MASK(addr, len); 644 if (open_low_hpage_areas(current->mm, areamask) == 0) 645 return addr; 646 } 647 648 /* Next see if we can map in the existing low areas */ 649 addr = htlb_get_low_area(len, curareas); 650 if (addr != -ENOMEM) 651 return addr; 652 653 /* Finally go looking for areas to open */ 654 lastshift = 0; 655 for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); 656 ! lastshift; areamask >>=1) { 657 if (areamask & 1) 658 lastshift = 1; 659 660 addr = htlb_get_low_area(len, curareas | areamask); 661 if ((addr != -ENOMEM) 662 && open_low_hpage_areas(current->mm, areamask) == 0) 663 return addr; 664 } 665 } else { 666 curareas = current->mm->context.high_htlb_areas; 667 668 /* First see if we can use the hint address */ 669 /* We discourage 64-bit processes from doing hugepage 670 * mappings below 4GB (must use MAP_FIXED) */ 671 if ((addr >= 0x100000000UL) 672 && (htlb_check_hinted_area(addr, len) == 0)) { 673 areamask = HTLB_AREA_MASK(addr, len); 674 if (open_high_hpage_areas(current->mm, areamask) == 0) 675 return addr; 676 } 677 678 /* Next see if we can map in the existing high areas */ 679 addr = htlb_get_high_area(len, curareas); 680 if (addr != -ENOMEM) 681 return addr; 682 683 /* Finally go looking for areas to open */ 684 lastshift = 0; 685 for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); 686 ! lastshift; areamask >>=1) { 687 if (areamask & 1) 688 lastshift = 1; 689 690 addr = htlb_get_high_area(len, curareas | areamask); 691 if ((addr != -ENOMEM) 692 && open_high_hpage_areas(current->mm, areamask) == 0) 693 return addr; 694 } 695 } 696 printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" 697 " enough areas\n"); 698 return -ENOMEM; 699 } 700 701 /* 702 * Called by asm hashtable.S for doing lazy icache flush 703 */ 704 static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, 705 pte_t pte, int trap) 706 { 707 struct page *page; 708 int i; 709 710 if (!pfn_valid(pte_pfn(pte))) 711 return rflags; 712 713 page = pte_page(pte); 714 715 /* page is dirty */ 716 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { 717 if (trap == 0x400) { 718 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) 719 __flush_dcache_icache(page_address(page+i)); 720 set_bit(PG_arch_1, &page->flags); 721 } else { 722 rflags |= HPTE_R_N; 723 } 724 } 725 return rflags; 726 } 727 728 int hash_huge_page(struct mm_struct *mm, unsigned long access, 729 unsigned long ea, unsigned long vsid, int local, 730 unsigned long trap) 731 { 732 pte_t *ptep; 733 unsigned long old_pte, new_pte; 734 unsigned long va, rflags, pa; 735 long slot; 736 int err = 1; 737 738 ptep = huge_pte_offset(mm, ea); 739 740 /* Search the Linux page table for a match with va */ 741 va = (vsid << 28) | (ea & 0x0fffffff); 742 743 /* 744 * If no pte found or not present, send the problem up to 745 * do_page_fault 746 */ 747 if (unlikely(!ptep || pte_none(*ptep))) 748 goto out; 749 750 /* 751 * Check the user's access rights to the page. If access should be 752 * prevented then send the problem up to do_page_fault. 753 */ 754 if (unlikely(access & ~pte_val(*ptep))) 755 goto out; 756 /* 757 * At this point, we have a pte (old_pte) which can be used to build 758 * or update an HPTE. There are 2 cases: 759 * 760 * 1. There is a valid (present) pte with no associated HPTE (this is 761 * the most common case) 762 * 2. There is a valid (present) pte with an associated HPTE. The 763 * current values of the pp bits in the HPTE prevent access 764 * because we are doing software DIRTY bit management and the 765 * page is currently not DIRTY. 766 */ 767 768 769 do { 770 old_pte = pte_val(*ptep); 771 if (old_pte & _PAGE_BUSY) 772 goto out; 773 new_pte = old_pte | _PAGE_BUSY | 774 _PAGE_ACCESSED | _PAGE_HASHPTE; 775 } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, 776 old_pte, new_pte)); 777 778 rflags = 0x2 | (!(new_pte & _PAGE_RW)); 779 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ 780 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); 781 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) 782 /* No CPU has hugepages but lacks no execute, so we 783 * don't need to worry about that case */ 784 rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), 785 trap); 786 787 /* Check if pte already has an hpte (case 2) */ 788 if (unlikely(old_pte & _PAGE_HASHPTE)) { 789 /* There MIGHT be an HPTE for this pte */ 790 unsigned long hash, slot; 791 792 hash = hpt_hash(va, HPAGE_SHIFT); 793 if (old_pte & _PAGE_F_SECOND) 794 hash = ~hash; 795 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 796 slot += (old_pte & _PAGE_F_GIX) >> 12; 797 798 if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, 799 local) == -1) 800 old_pte &= ~_PAGE_HPTEFLAGS; 801 } 802 803 if (likely(!(old_pte & _PAGE_HASHPTE))) { 804 unsigned long hash = hpt_hash(va, HPAGE_SHIFT); 805 unsigned long hpte_group; 806 807 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; 808 809 repeat: 810 hpte_group = ((hash & htab_hash_mask) * 811 HPTES_PER_GROUP) & ~0x7UL; 812 813 /* clear HPTE slot informations in new PTE */ 814 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; 815 816 /* Add in WIMG bits */ 817 /* XXX We should store these in the pte */ 818 /* --BenH: I think they are ... */ 819 rflags |= _PAGE_COHERENT; 820 821 /* Insert into the hash table, primary slot */ 822 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, 823 mmu_huge_psize); 824 825 /* Primary is full, try the secondary */ 826 if (unlikely(slot == -1)) { 827 new_pte |= _PAGE_F_SECOND; 828 hpte_group = ((~hash & htab_hash_mask) * 829 HPTES_PER_GROUP) & ~0x7UL; 830 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 831 HPTE_V_SECONDARY, 832 mmu_huge_psize); 833 if (slot == -1) { 834 if (mftb() & 0x1) 835 hpte_group = ((hash & htab_hash_mask) * 836 HPTES_PER_GROUP)&~0x7UL; 837 838 ppc_md.hpte_remove(hpte_group); 839 goto repeat; 840 } 841 } 842 843 if (unlikely(slot == -2)) 844 panic("hash_huge_page: pte_insert failed\n"); 845 846 new_pte |= (slot << 12) & _PAGE_F_GIX; 847 } 848 849 /* 850 * No need to use ldarx/stdcx here 851 */ 852 *ptep = __pte(new_pte & ~_PAGE_BUSY); 853 854 err = 0; 855 856 out: 857 return err; 858 } 859