1*27137e52SSam Ravnborg /* 2*27137e52SSam Ravnborg * SPARC64 Huge TLB page support. 3*27137e52SSam Ravnborg * 4*27137e52SSam Ravnborg * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net) 5*27137e52SSam Ravnborg */ 6*27137e52SSam Ravnborg 7*27137e52SSam Ravnborg #include <linux/init.h> 8*27137e52SSam Ravnborg #include <linux/module.h> 9*27137e52SSam Ravnborg #include <linux/fs.h> 10*27137e52SSam Ravnborg #include <linux/mm.h> 11*27137e52SSam Ravnborg #include <linux/hugetlb.h> 12*27137e52SSam Ravnborg #include <linux/pagemap.h> 13*27137e52SSam Ravnborg #include <linux/slab.h> 14*27137e52SSam Ravnborg #include <linux/sysctl.h> 15*27137e52SSam Ravnborg 16*27137e52SSam Ravnborg #include <asm/mman.h> 17*27137e52SSam Ravnborg #include <asm/pgalloc.h> 18*27137e52SSam Ravnborg #include <asm/tlb.h> 19*27137e52SSam Ravnborg #include <asm/tlbflush.h> 20*27137e52SSam Ravnborg #include <asm/cacheflush.h> 21*27137e52SSam Ravnborg #include <asm/mmu_context.h> 22*27137e52SSam Ravnborg 23*27137e52SSam Ravnborg /* Slightly simplified from the non-hugepage variant because by 24*27137e52SSam Ravnborg * definition we don't have to worry about any page coloring stuff 25*27137e52SSam Ravnborg */ 26*27137e52SSam Ravnborg #define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) 27*27137e52SSam Ravnborg #define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) 28*27137e52SSam Ravnborg 29*27137e52SSam Ravnborg static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, 30*27137e52SSam Ravnborg unsigned long addr, 31*27137e52SSam Ravnborg unsigned long len, 32*27137e52SSam Ravnborg unsigned long pgoff, 33*27137e52SSam Ravnborg unsigned long flags) 34*27137e52SSam Ravnborg { 35*27137e52SSam Ravnborg struct mm_struct *mm = current->mm; 36*27137e52SSam Ravnborg struct vm_area_struct * vma; 37*27137e52SSam Ravnborg unsigned long task_size = TASK_SIZE; 38*27137e52SSam Ravnborg unsigned long start_addr; 39*27137e52SSam Ravnborg 40*27137e52SSam Ravnborg if (test_thread_flag(TIF_32BIT)) 41*27137e52SSam Ravnborg task_size = STACK_TOP32; 42*27137e52SSam Ravnborg if (unlikely(len >= VA_EXCLUDE_START)) 43*27137e52SSam Ravnborg return -ENOMEM; 44*27137e52SSam Ravnborg 45*27137e52SSam Ravnborg if (len > mm->cached_hole_size) { 46*27137e52SSam Ravnborg start_addr = addr = mm->free_area_cache; 47*27137e52SSam Ravnborg } else { 48*27137e52SSam Ravnborg start_addr = addr = TASK_UNMAPPED_BASE; 49*27137e52SSam Ravnborg mm->cached_hole_size = 0; 50*27137e52SSam Ravnborg } 51*27137e52SSam Ravnborg 52*27137e52SSam Ravnborg task_size -= len; 53*27137e52SSam Ravnborg 54*27137e52SSam Ravnborg full_search: 55*27137e52SSam Ravnborg addr = ALIGN(addr, HPAGE_SIZE); 56*27137e52SSam Ravnborg 57*27137e52SSam Ravnborg for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 58*27137e52SSam Ravnborg /* At this point: (!vma || addr < vma->vm_end). */ 59*27137e52SSam Ravnborg if (addr < VA_EXCLUDE_START && 60*27137e52SSam Ravnborg (addr + len) >= VA_EXCLUDE_START) { 61*27137e52SSam Ravnborg addr = VA_EXCLUDE_END; 62*27137e52SSam Ravnborg vma = find_vma(mm, VA_EXCLUDE_END); 63*27137e52SSam Ravnborg } 64*27137e52SSam Ravnborg if (unlikely(task_size < addr)) { 65*27137e52SSam Ravnborg if (start_addr != TASK_UNMAPPED_BASE) { 66*27137e52SSam Ravnborg start_addr = addr = TASK_UNMAPPED_BASE; 67*27137e52SSam Ravnborg mm->cached_hole_size = 0; 68*27137e52SSam Ravnborg goto full_search; 69*27137e52SSam Ravnborg } 70*27137e52SSam Ravnborg return -ENOMEM; 71*27137e52SSam Ravnborg } 72*27137e52SSam Ravnborg if (likely(!vma || addr + len <= vma->vm_start)) { 73*27137e52SSam Ravnborg /* 74*27137e52SSam Ravnborg * Remember the place where we stopped the search: 75*27137e52SSam Ravnborg */ 76*27137e52SSam Ravnborg mm->free_area_cache = addr + len; 77*27137e52SSam Ravnborg return addr; 78*27137e52SSam Ravnborg } 79*27137e52SSam Ravnborg if (addr + mm->cached_hole_size < vma->vm_start) 80*27137e52SSam Ravnborg mm->cached_hole_size = vma->vm_start - addr; 81*27137e52SSam Ravnborg 82*27137e52SSam Ravnborg addr = ALIGN(vma->vm_end, HPAGE_SIZE); 83*27137e52SSam Ravnborg } 84*27137e52SSam Ravnborg } 85*27137e52SSam Ravnborg 86*27137e52SSam Ravnborg static unsigned long 87*27137e52SSam Ravnborg hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, 88*27137e52SSam Ravnborg const unsigned long len, 89*27137e52SSam Ravnborg const unsigned long pgoff, 90*27137e52SSam Ravnborg const unsigned long flags) 91*27137e52SSam Ravnborg { 92*27137e52SSam Ravnborg struct vm_area_struct *vma; 93*27137e52SSam Ravnborg struct mm_struct *mm = current->mm; 94*27137e52SSam Ravnborg unsigned long addr = addr0; 95*27137e52SSam Ravnborg 96*27137e52SSam Ravnborg /* This should only ever run for 32-bit processes. */ 97*27137e52SSam Ravnborg BUG_ON(!test_thread_flag(TIF_32BIT)); 98*27137e52SSam Ravnborg 99*27137e52SSam Ravnborg /* check if free_area_cache is useful for us */ 100*27137e52SSam Ravnborg if (len <= mm->cached_hole_size) { 101*27137e52SSam Ravnborg mm->cached_hole_size = 0; 102*27137e52SSam Ravnborg mm->free_area_cache = mm->mmap_base; 103*27137e52SSam Ravnborg } 104*27137e52SSam Ravnborg 105*27137e52SSam Ravnborg /* either no address requested or can't fit in requested address hole */ 106*27137e52SSam Ravnborg addr = mm->free_area_cache & HPAGE_MASK; 107*27137e52SSam Ravnborg 108*27137e52SSam Ravnborg /* make sure it can fit in the remaining address space */ 109*27137e52SSam Ravnborg if (likely(addr > len)) { 110*27137e52SSam Ravnborg vma = find_vma(mm, addr-len); 111*27137e52SSam Ravnborg if (!vma || addr <= vma->vm_start) { 112*27137e52SSam Ravnborg /* remember the address as a hint for next time */ 113*27137e52SSam Ravnborg return (mm->free_area_cache = addr-len); 114*27137e52SSam Ravnborg } 115*27137e52SSam Ravnborg } 116*27137e52SSam Ravnborg 117*27137e52SSam Ravnborg if (unlikely(mm->mmap_base < len)) 118*27137e52SSam Ravnborg goto bottomup; 119*27137e52SSam Ravnborg 120*27137e52SSam Ravnborg addr = (mm->mmap_base-len) & HPAGE_MASK; 121*27137e52SSam Ravnborg 122*27137e52SSam Ravnborg do { 123*27137e52SSam Ravnborg /* 124*27137e52SSam Ravnborg * Lookup failure means no vma is above this address, 125*27137e52SSam Ravnborg * else if new region fits below vma->vm_start, 126*27137e52SSam Ravnborg * return with success: 127*27137e52SSam Ravnborg */ 128*27137e52SSam Ravnborg vma = find_vma(mm, addr); 129*27137e52SSam Ravnborg if (likely(!vma || addr+len <= vma->vm_start)) { 130*27137e52SSam Ravnborg /* remember the address as a hint for next time */ 131*27137e52SSam Ravnborg return (mm->free_area_cache = addr); 132*27137e52SSam Ravnborg } 133*27137e52SSam Ravnborg 134*27137e52SSam Ravnborg /* remember the largest hole we saw so far */ 135*27137e52SSam Ravnborg if (addr + mm->cached_hole_size < vma->vm_start) 136*27137e52SSam Ravnborg mm->cached_hole_size = vma->vm_start - addr; 137*27137e52SSam Ravnborg 138*27137e52SSam Ravnborg /* try just below the current vma->vm_start */ 139*27137e52SSam Ravnborg addr = (vma->vm_start-len) & HPAGE_MASK; 140*27137e52SSam Ravnborg } while (likely(len < vma->vm_start)); 141*27137e52SSam Ravnborg 142*27137e52SSam Ravnborg bottomup: 143*27137e52SSam Ravnborg /* 144*27137e52SSam Ravnborg * A failed mmap() very likely causes application failure, 145*27137e52SSam Ravnborg * so fall back to the bottom-up function here. This scenario 146*27137e52SSam Ravnborg * can happen with large stack limits and large mmap() 147*27137e52SSam Ravnborg * allocations. 148*27137e52SSam Ravnborg */ 149*27137e52SSam Ravnborg mm->cached_hole_size = ~0UL; 150*27137e52SSam Ravnborg mm->free_area_cache = TASK_UNMAPPED_BASE; 151*27137e52SSam Ravnborg addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); 152*27137e52SSam Ravnborg /* 153*27137e52SSam Ravnborg * Restore the topdown base: 154*27137e52SSam Ravnborg */ 155*27137e52SSam Ravnborg mm->free_area_cache = mm->mmap_base; 156*27137e52SSam Ravnborg mm->cached_hole_size = ~0UL; 157*27137e52SSam Ravnborg 158*27137e52SSam Ravnborg return addr; 159*27137e52SSam Ravnborg } 160*27137e52SSam Ravnborg 161*27137e52SSam Ravnborg unsigned long 162*27137e52SSam Ravnborg hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 163*27137e52SSam Ravnborg unsigned long len, unsigned long pgoff, unsigned long flags) 164*27137e52SSam Ravnborg { 165*27137e52SSam Ravnborg struct mm_struct *mm = current->mm; 166*27137e52SSam Ravnborg struct vm_area_struct *vma; 167*27137e52SSam Ravnborg unsigned long task_size = TASK_SIZE; 168*27137e52SSam Ravnborg 169*27137e52SSam Ravnborg if (test_thread_flag(TIF_32BIT)) 170*27137e52SSam Ravnborg task_size = STACK_TOP32; 171*27137e52SSam Ravnborg 172*27137e52SSam Ravnborg if (len & ~HPAGE_MASK) 173*27137e52SSam Ravnborg return -EINVAL; 174*27137e52SSam Ravnborg if (len > task_size) 175*27137e52SSam Ravnborg return -ENOMEM; 176*27137e52SSam Ravnborg 177*27137e52SSam Ravnborg if (flags & MAP_FIXED) { 178*27137e52SSam Ravnborg if (prepare_hugepage_range(file, addr, len)) 179*27137e52SSam Ravnborg return -EINVAL; 180*27137e52SSam Ravnborg return addr; 181*27137e52SSam Ravnborg } 182*27137e52SSam Ravnborg 183*27137e52SSam Ravnborg if (addr) { 184*27137e52SSam Ravnborg addr = ALIGN(addr, HPAGE_SIZE); 185*27137e52SSam Ravnborg vma = find_vma(mm, addr); 186*27137e52SSam Ravnborg if (task_size - len >= addr && 187*27137e52SSam Ravnborg (!vma || addr + len <= vma->vm_start)) 188*27137e52SSam Ravnborg return addr; 189*27137e52SSam Ravnborg } 190*27137e52SSam Ravnborg if (mm->get_unmapped_area == arch_get_unmapped_area) 191*27137e52SSam Ravnborg return hugetlb_get_unmapped_area_bottomup(file, addr, len, 192*27137e52SSam Ravnborg pgoff, flags); 193*27137e52SSam Ravnborg else 194*27137e52SSam Ravnborg return hugetlb_get_unmapped_area_topdown(file, addr, len, 195*27137e52SSam Ravnborg pgoff, flags); 196*27137e52SSam Ravnborg } 197*27137e52SSam Ravnborg 198*27137e52SSam Ravnborg pte_t *huge_pte_alloc(struct mm_struct *mm, 199*27137e52SSam Ravnborg unsigned long addr, unsigned long sz) 200*27137e52SSam Ravnborg { 201*27137e52SSam Ravnborg pgd_t *pgd; 202*27137e52SSam Ravnborg pud_t *pud; 203*27137e52SSam Ravnborg pmd_t *pmd; 204*27137e52SSam Ravnborg pte_t *pte = NULL; 205*27137e52SSam Ravnborg 206*27137e52SSam Ravnborg /* We must align the address, because our caller will run 207*27137e52SSam Ravnborg * set_huge_pte_at() on whatever we return, which writes out 208*27137e52SSam Ravnborg * all of the sub-ptes for the hugepage range. So we have 209*27137e52SSam Ravnborg * to give it the first such sub-pte. 210*27137e52SSam Ravnborg */ 211*27137e52SSam Ravnborg addr &= HPAGE_MASK; 212*27137e52SSam Ravnborg 213*27137e52SSam Ravnborg pgd = pgd_offset(mm, addr); 214*27137e52SSam Ravnborg pud = pud_alloc(mm, pgd, addr); 215*27137e52SSam Ravnborg if (pud) { 216*27137e52SSam Ravnborg pmd = pmd_alloc(mm, pud, addr); 217*27137e52SSam Ravnborg if (pmd) 218*27137e52SSam Ravnborg pte = pte_alloc_map(mm, pmd, addr); 219*27137e52SSam Ravnborg } 220*27137e52SSam Ravnborg return pte; 221*27137e52SSam Ravnborg } 222*27137e52SSam Ravnborg 223*27137e52SSam Ravnborg pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 224*27137e52SSam Ravnborg { 225*27137e52SSam Ravnborg pgd_t *pgd; 226*27137e52SSam Ravnborg pud_t *pud; 227*27137e52SSam Ravnborg pmd_t *pmd; 228*27137e52SSam Ravnborg pte_t *pte = NULL; 229*27137e52SSam Ravnborg 230*27137e52SSam Ravnborg addr &= HPAGE_MASK; 231*27137e52SSam Ravnborg 232*27137e52SSam Ravnborg pgd = pgd_offset(mm, addr); 233*27137e52SSam Ravnborg if (!pgd_none(*pgd)) { 234*27137e52SSam Ravnborg pud = pud_offset(pgd, addr); 235*27137e52SSam Ravnborg if (!pud_none(*pud)) { 236*27137e52SSam Ravnborg pmd = pmd_offset(pud, addr); 237*27137e52SSam Ravnborg if (!pmd_none(*pmd)) 238*27137e52SSam Ravnborg pte = pte_offset_map(pmd, addr); 239*27137e52SSam Ravnborg } 240*27137e52SSam Ravnborg } 241*27137e52SSam Ravnborg return pte; 242*27137e52SSam Ravnborg } 243*27137e52SSam Ravnborg 244*27137e52SSam Ravnborg int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 245*27137e52SSam Ravnborg { 246*27137e52SSam Ravnborg return 0; 247*27137e52SSam Ravnborg } 248*27137e52SSam Ravnborg 249*27137e52SSam Ravnborg void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 250*27137e52SSam Ravnborg pte_t *ptep, pte_t entry) 251*27137e52SSam Ravnborg { 252*27137e52SSam Ravnborg int i; 253*27137e52SSam Ravnborg 254*27137e52SSam Ravnborg if (!pte_present(*ptep) && pte_present(entry)) 255*27137e52SSam Ravnborg mm->context.huge_pte_count++; 256*27137e52SSam Ravnborg 257*27137e52SSam Ravnborg addr &= HPAGE_MASK; 258*27137e52SSam Ravnborg for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { 259*27137e52SSam Ravnborg set_pte_at(mm, addr, ptep, entry); 260*27137e52SSam Ravnborg ptep++; 261*27137e52SSam Ravnborg addr += PAGE_SIZE; 262*27137e52SSam Ravnborg pte_val(entry) += PAGE_SIZE; 263*27137e52SSam Ravnborg } 264*27137e52SSam Ravnborg } 265*27137e52SSam Ravnborg 266*27137e52SSam Ravnborg pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 267*27137e52SSam Ravnborg pte_t *ptep) 268*27137e52SSam Ravnborg { 269*27137e52SSam Ravnborg pte_t entry; 270*27137e52SSam Ravnborg int i; 271*27137e52SSam Ravnborg 272*27137e52SSam Ravnborg entry = *ptep; 273*27137e52SSam Ravnborg if (pte_present(entry)) 274*27137e52SSam Ravnborg mm->context.huge_pte_count--; 275*27137e52SSam Ravnborg 276*27137e52SSam Ravnborg addr &= HPAGE_MASK; 277*27137e52SSam Ravnborg 278*27137e52SSam Ravnborg for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { 279*27137e52SSam Ravnborg pte_clear(mm, addr, ptep); 280*27137e52SSam Ravnborg addr += PAGE_SIZE; 281*27137e52SSam Ravnborg ptep++; 282*27137e52SSam Ravnborg } 283*27137e52SSam Ravnborg 284*27137e52SSam Ravnborg return entry; 285*27137e52SSam Ravnborg } 286*27137e52SSam Ravnborg 287*27137e52SSam Ravnborg struct page *follow_huge_addr(struct mm_struct *mm, 288*27137e52SSam Ravnborg unsigned long address, int write) 289*27137e52SSam Ravnborg { 290*27137e52SSam Ravnborg return ERR_PTR(-EINVAL); 291*27137e52SSam Ravnborg } 292*27137e52SSam Ravnborg 293*27137e52SSam Ravnborg int pmd_huge(pmd_t pmd) 294*27137e52SSam Ravnborg { 295*27137e52SSam Ravnborg return 0; 296*27137e52SSam Ravnborg } 297*27137e52SSam Ravnborg 298*27137e52SSam Ravnborg int pud_huge(pud_t pud) 299*27137e52SSam Ravnborg { 300*27137e52SSam Ravnborg return 0; 301*27137e52SSam Ravnborg } 302*27137e52SSam Ravnborg 303*27137e52SSam Ravnborg struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, 304*27137e52SSam Ravnborg pmd_t *pmd, int write) 305*27137e52SSam Ravnborg { 306*27137e52SSam Ravnborg return NULL; 307*27137e52SSam Ravnborg } 308*27137e52SSam Ravnborg 309*27137e52SSam Ravnborg static void context_reload(void *__data) 310*27137e52SSam Ravnborg { 311*27137e52SSam Ravnborg struct mm_struct *mm = __data; 312*27137e52SSam Ravnborg 313*27137e52SSam Ravnborg if (mm == current->mm) 314*27137e52SSam Ravnborg load_secondary_context(mm); 315*27137e52SSam Ravnborg } 316*27137e52SSam Ravnborg 317*27137e52SSam Ravnborg void hugetlb_prefault_arch_hook(struct mm_struct *mm) 318*27137e52SSam Ravnborg { 319*27137e52SSam Ravnborg struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE]; 320*27137e52SSam Ravnborg 321*27137e52SSam Ravnborg if (likely(tp->tsb != NULL)) 322*27137e52SSam Ravnborg return; 323*27137e52SSam Ravnborg 324*27137e52SSam Ravnborg tsb_grow(mm, MM_TSB_HUGE, 0); 325*27137e52SSam Ravnborg tsb_context_switch(mm); 326*27137e52SSam Ravnborg smp_tsb_sync(mm); 327*27137e52SSam Ravnborg 328*27137e52SSam Ravnborg /* On UltraSPARC-III+ and later, configure the second half of 329*27137e52SSam Ravnborg * the Data-TLB for huge pages. 330*27137e52SSam Ravnborg */ 331*27137e52SSam Ravnborg if (tlb_type == cheetah_plus) { 332*27137e52SSam Ravnborg unsigned long ctx; 333*27137e52SSam Ravnborg 334*27137e52SSam Ravnborg spin_lock(&ctx_alloc_lock); 335*27137e52SSam Ravnborg ctx = mm->context.sparc64_ctx_val; 336*27137e52SSam Ravnborg ctx &= ~CTX_PGSZ_MASK; 337*27137e52SSam Ravnborg ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; 338*27137e52SSam Ravnborg ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT; 339*27137e52SSam Ravnborg 340*27137e52SSam Ravnborg if (ctx != mm->context.sparc64_ctx_val) { 341*27137e52SSam Ravnborg /* When changing the page size fields, we 342*27137e52SSam Ravnborg * must perform a context flush so that no 343*27137e52SSam Ravnborg * stale entries match. This flush must 344*27137e52SSam Ravnborg * occur with the original context register 345*27137e52SSam Ravnborg * settings. 346*27137e52SSam Ravnborg */ 347*27137e52SSam Ravnborg do_flush_tlb_mm(mm); 348*27137e52SSam Ravnborg 349*27137e52SSam Ravnborg /* Reload the context register of all processors 350*27137e52SSam Ravnborg * also executing in this address space. 351*27137e52SSam Ravnborg */ 352*27137e52SSam Ravnborg mm->context.sparc64_ctx_val = ctx; 353*27137e52SSam Ravnborg on_each_cpu(context_reload, mm, 0); 354*27137e52SSam Ravnborg } 355*27137e52SSam Ravnborg spin_unlock(&ctx_alloc_lock); 356*27137e52SSam Ravnborg } 357*27137e52SSam Ravnborg } 358