127137e52SSam Ravnborg /* 227137e52SSam Ravnborg * SPARC64 Huge TLB page support. 327137e52SSam Ravnborg * 427137e52SSam Ravnborg * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net) 527137e52SSam Ravnborg */ 627137e52SSam Ravnborg 727137e52SSam Ravnborg #include <linux/fs.h> 827137e52SSam Ravnborg #include <linux/mm.h> 927137e52SSam Ravnborg #include <linux/hugetlb.h> 1027137e52SSam Ravnborg #include <linux/pagemap.h> 1127137e52SSam Ravnborg #include <linux/sysctl.h> 1227137e52SSam Ravnborg 1327137e52SSam Ravnborg #include <asm/mman.h> 1427137e52SSam Ravnborg #include <asm/pgalloc.h> 15*7bc3777cSNitin Gupta #include <asm/pgtable.h> 1627137e52SSam Ravnborg #include <asm/tlb.h> 1727137e52SSam Ravnborg #include <asm/tlbflush.h> 1827137e52SSam Ravnborg #include <asm/cacheflush.h> 1927137e52SSam Ravnborg #include <asm/mmu_context.h> 2027137e52SSam Ravnborg 2127137e52SSam Ravnborg /* Slightly simplified from the non-hugepage variant because by 2227137e52SSam Ravnborg * definition we don't have to worry about any page coloring stuff 2327137e52SSam Ravnborg */ 2427137e52SSam Ravnborg 2527137e52SSam Ravnborg static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, 2627137e52SSam Ravnborg unsigned long addr, 2727137e52SSam Ravnborg unsigned long len, 2827137e52SSam Ravnborg unsigned long pgoff, 2927137e52SSam Ravnborg unsigned long flags) 3027137e52SSam Ravnborg { 3127137e52SSam Ravnborg unsigned long task_size = TASK_SIZE; 322aea28b9SMichel Lespinasse struct vm_unmapped_area_info info; 3327137e52SSam Ravnborg 3427137e52SSam Ravnborg if (test_thread_flag(TIF_32BIT)) 3527137e52SSam Ravnborg task_size = STACK_TOP32; 3627137e52SSam Ravnborg 372aea28b9SMichel Lespinasse info.flags = 0; 382aea28b9SMichel Lespinasse info.length = len; 392aea28b9SMichel Lespinasse info.low_limit = TASK_UNMAPPED_BASE; 402aea28b9SMichel Lespinasse info.high_limit = min(task_size, VA_EXCLUDE_START); 412aea28b9SMichel Lespinasse info.align_mask = PAGE_MASK & ~HPAGE_MASK; 422aea28b9SMichel Lespinasse info.align_offset = 0; 432aea28b9SMichel Lespinasse addr = vm_unmapped_area(&info); 442aea28b9SMichel Lespinasse 452aea28b9SMichel Lespinasse if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) { 462aea28b9SMichel Lespinasse VM_BUG_ON(addr != -ENOMEM); 472aea28b9SMichel Lespinasse info.low_limit = VA_EXCLUDE_END; 482aea28b9SMichel Lespinasse info.high_limit = task_size; 492aea28b9SMichel Lespinasse addr = vm_unmapped_area(&info); 5027137e52SSam Ravnborg } 5127137e52SSam Ravnborg 5227137e52SSam Ravnborg return addr; 5327137e52SSam Ravnborg } 5427137e52SSam Ravnborg 5527137e52SSam Ravnborg static unsigned long 5627137e52SSam Ravnborg hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, 5727137e52SSam Ravnborg const unsigned long len, 5827137e52SSam Ravnborg const unsigned long pgoff, 5927137e52SSam Ravnborg const unsigned long flags) 6027137e52SSam Ravnborg { 6127137e52SSam Ravnborg struct mm_struct *mm = current->mm; 6227137e52SSam Ravnborg unsigned long addr = addr0; 632aea28b9SMichel Lespinasse struct vm_unmapped_area_info info; 6427137e52SSam Ravnborg 6527137e52SSam Ravnborg /* This should only ever run for 32-bit processes. */ 6627137e52SSam Ravnborg BUG_ON(!test_thread_flag(TIF_32BIT)); 6727137e52SSam Ravnborg 682aea28b9SMichel Lespinasse info.flags = VM_UNMAPPED_AREA_TOPDOWN; 692aea28b9SMichel Lespinasse info.length = len; 702aea28b9SMichel Lespinasse info.low_limit = PAGE_SIZE; 712aea28b9SMichel Lespinasse info.high_limit = mm->mmap_base; 722aea28b9SMichel Lespinasse info.align_mask = PAGE_MASK & ~HPAGE_MASK; 732aea28b9SMichel Lespinasse info.align_offset = 0; 742aea28b9SMichel Lespinasse addr = vm_unmapped_area(&info); 7527137e52SSam Ravnborg 7627137e52SSam Ravnborg /* 7727137e52SSam Ravnborg * A failed mmap() very likely causes application failure, 7827137e52SSam Ravnborg * so fall back to the bottom-up function here. This scenario 7927137e52SSam Ravnborg * can happen with large stack limits and large mmap() 8027137e52SSam Ravnborg * allocations. 8127137e52SSam Ravnborg */ 822aea28b9SMichel Lespinasse if (addr & ~PAGE_MASK) { 832aea28b9SMichel Lespinasse VM_BUG_ON(addr != -ENOMEM); 842aea28b9SMichel Lespinasse info.flags = 0; 852aea28b9SMichel Lespinasse info.low_limit = TASK_UNMAPPED_BASE; 862aea28b9SMichel Lespinasse info.high_limit = STACK_TOP32; 872aea28b9SMichel Lespinasse addr = vm_unmapped_area(&info); 882aea28b9SMichel Lespinasse } 8927137e52SSam Ravnborg 9027137e52SSam Ravnborg return addr; 9127137e52SSam Ravnborg } 9227137e52SSam Ravnborg 9327137e52SSam Ravnborg unsigned long 9427137e52SSam Ravnborg hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 9527137e52SSam Ravnborg unsigned long len, unsigned long pgoff, unsigned long flags) 9627137e52SSam Ravnborg { 9727137e52SSam Ravnborg struct mm_struct *mm = current->mm; 9827137e52SSam Ravnborg struct vm_area_struct *vma; 9927137e52SSam Ravnborg unsigned long task_size = TASK_SIZE; 10027137e52SSam Ravnborg 10127137e52SSam Ravnborg if (test_thread_flag(TIF_32BIT)) 10227137e52SSam Ravnborg task_size = STACK_TOP32; 10327137e52SSam Ravnborg 10427137e52SSam Ravnborg if (len & ~HPAGE_MASK) 10527137e52SSam Ravnborg return -EINVAL; 10627137e52SSam Ravnborg if (len > task_size) 10727137e52SSam Ravnborg return -ENOMEM; 10827137e52SSam Ravnborg 10927137e52SSam Ravnborg if (flags & MAP_FIXED) { 11027137e52SSam Ravnborg if (prepare_hugepage_range(file, addr, len)) 11127137e52SSam Ravnborg return -EINVAL; 11227137e52SSam Ravnborg return addr; 11327137e52SSam Ravnborg } 11427137e52SSam Ravnborg 11527137e52SSam Ravnborg if (addr) { 11627137e52SSam Ravnborg addr = ALIGN(addr, HPAGE_SIZE); 11727137e52SSam Ravnborg vma = find_vma(mm, addr); 11827137e52SSam Ravnborg if (task_size - len >= addr && 11927137e52SSam Ravnborg (!vma || addr + len <= vma->vm_start)) 12027137e52SSam Ravnborg return addr; 12127137e52SSam Ravnborg } 12227137e52SSam Ravnborg if (mm->get_unmapped_area == arch_get_unmapped_area) 12327137e52SSam Ravnborg return hugetlb_get_unmapped_area_bottomup(file, addr, len, 12427137e52SSam Ravnborg pgoff, flags); 12527137e52SSam Ravnborg else 12627137e52SSam Ravnborg return hugetlb_get_unmapped_area_topdown(file, addr, len, 12727137e52SSam Ravnborg pgoff, flags); 12827137e52SSam Ravnborg } 12927137e52SSam Ravnborg 13027137e52SSam Ravnborg pte_t *huge_pte_alloc(struct mm_struct *mm, 13127137e52SSam Ravnborg unsigned long addr, unsigned long sz) 13227137e52SSam Ravnborg { 13327137e52SSam Ravnborg pgd_t *pgd; 13427137e52SSam Ravnborg pud_t *pud; 13527137e52SSam Ravnborg pte_t *pte = NULL; 13627137e52SSam Ravnborg 13727137e52SSam Ravnborg pgd = pgd_offset(mm, addr); 13827137e52SSam Ravnborg pud = pud_alloc(mm, pgd, addr); 139*7bc3777cSNitin Gupta if (pud) 140*7bc3777cSNitin Gupta pte = (pte_t *)pmd_alloc(mm, pud, addr); 141*7bc3777cSNitin Gupta 14227137e52SSam Ravnborg return pte; 14327137e52SSam Ravnborg } 14427137e52SSam Ravnborg 14527137e52SSam Ravnborg pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 14627137e52SSam Ravnborg { 14727137e52SSam Ravnborg pgd_t *pgd; 14827137e52SSam Ravnborg pud_t *pud; 14927137e52SSam Ravnborg pte_t *pte = NULL; 15027137e52SSam Ravnborg 15127137e52SSam Ravnborg pgd = pgd_offset(mm, addr); 15227137e52SSam Ravnborg if (!pgd_none(*pgd)) { 15327137e52SSam Ravnborg pud = pud_offset(pgd, addr); 154*7bc3777cSNitin Gupta if (!pud_none(*pud)) 155*7bc3777cSNitin Gupta pte = (pte_t *)pmd_offset(pud, addr); 15627137e52SSam Ravnborg } 15727137e52SSam Ravnborg return pte; 15827137e52SSam Ravnborg } 15927137e52SSam Ravnborg 16027137e52SSam Ravnborg void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 16127137e52SSam Ravnborg pte_t *ptep, pte_t entry) 16227137e52SSam Ravnborg { 163*7bc3777cSNitin Gupta pte_t orig; 16427137e52SSam Ravnborg 16527137e52SSam Ravnborg if (!pte_present(*ptep) && pte_present(entry)) 166af1b1a9bSMike Kravetz mm->context.hugetlb_pte_count++; 16727137e52SSam Ravnborg 16827137e52SSam Ravnborg addr &= HPAGE_MASK; 169*7bc3777cSNitin Gupta orig = *ptep; 17024e49ee3SNitin Gupta *ptep = entry; 17124e49ee3SNitin Gupta 17224e49ee3SNitin Gupta /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ 173*7bc3777cSNitin Gupta maybe_tlb_batch_add(mm, addr, ptep, orig, 0); 174*7bc3777cSNitin Gupta maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0); 17527137e52SSam Ravnborg } 17627137e52SSam Ravnborg 17727137e52SSam Ravnborg pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 17827137e52SSam Ravnborg pte_t *ptep) 17927137e52SSam Ravnborg { 18027137e52SSam Ravnborg pte_t entry; 18127137e52SSam Ravnborg 18227137e52SSam Ravnborg entry = *ptep; 18327137e52SSam Ravnborg if (pte_present(entry)) 184af1b1a9bSMike Kravetz mm->context.hugetlb_pte_count--; 18527137e52SSam Ravnborg 18627137e52SSam Ravnborg addr &= HPAGE_MASK; 18724e49ee3SNitin Gupta *ptep = __pte(0UL); 18827137e52SSam Ravnborg 18924e49ee3SNitin Gupta /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ 19024e49ee3SNitin Gupta maybe_tlb_batch_add(mm, addr, ptep, entry, 0); 191*7bc3777cSNitin Gupta maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0); 19224e49ee3SNitin Gupta 19327137e52SSam Ravnborg return entry; 19427137e52SSam Ravnborg } 19527137e52SSam Ravnborg 19627137e52SSam Ravnborg int pmd_huge(pmd_t pmd) 19727137e52SSam Ravnborg { 198*7bc3777cSNitin Gupta return !pmd_none(pmd) && 199*7bc3777cSNitin Gupta (pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID; 20027137e52SSam Ravnborg } 20127137e52SSam Ravnborg 20227137e52SSam Ravnborg int pud_huge(pud_t pud) 20327137e52SSam Ravnborg { 20427137e52SSam Ravnborg return 0; 20527137e52SSam Ravnborg } 206*7bc3777cSNitin Gupta 207*7bc3777cSNitin Gupta static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, 208*7bc3777cSNitin Gupta unsigned long addr) 209*7bc3777cSNitin Gupta { 210*7bc3777cSNitin Gupta pgtable_t token = pmd_pgtable(*pmd); 211*7bc3777cSNitin Gupta 212*7bc3777cSNitin Gupta pmd_clear(pmd); 213*7bc3777cSNitin Gupta pte_free_tlb(tlb, token, addr); 214*7bc3777cSNitin Gupta atomic_long_dec(&tlb->mm->nr_ptes); 215*7bc3777cSNitin Gupta } 216*7bc3777cSNitin Gupta 217*7bc3777cSNitin Gupta static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 218*7bc3777cSNitin Gupta unsigned long addr, unsigned long end, 219*7bc3777cSNitin Gupta unsigned long floor, unsigned long ceiling) 220*7bc3777cSNitin Gupta { 221*7bc3777cSNitin Gupta pmd_t *pmd; 222*7bc3777cSNitin Gupta unsigned long next; 223*7bc3777cSNitin Gupta unsigned long start; 224*7bc3777cSNitin Gupta 225*7bc3777cSNitin Gupta start = addr; 226*7bc3777cSNitin Gupta pmd = pmd_offset(pud, addr); 227*7bc3777cSNitin Gupta do { 228*7bc3777cSNitin Gupta next = pmd_addr_end(addr, end); 229*7bc3777cSNitin Gupta if (pmd_none(*pmd)) 230*7bc3777cSNitin Gupta continue; 231*7bc3777cSNitin Gupta if (is_hugetlb_pmd(*pmd)) 232*7bc3777cSNitin Gupta pmd_clear(pmd); 233*7bc3777cSNitin Gupta else 234*7bc3777cSNitin Gupta hugetlb_free_pte_range(tlb, pmd, addr); 235*7bc3777cSNitin Gupta } while (pmd++, addr = next, addr != end); 236*7bc3777cSNitin Gupta 237*7bc3777cSNitin Gupta start &= PUD_MASK; 238*7bc3777cSNitin Gupta if (start < floor) 239*7bc3777cSNitin Gupta return; 240*7bc3777cSNitin Gupta if (ceiling) { 241*7bc3777cSNitin Gupta ceiling &= PUD_MASK; 242*7bc3777cSNitin Gupta if (!ceiling) 243*7bc3777cSNitin Gupta return; 244*7bc3777cSNitin Gupta } 245*7bc3777cSNitin Gupta if (end - 1 > ceiling - 1) 246*7bc3777cSNitin Gupta return; 247*7bc3777cSNitin Gupta 248*7bc3777cSNitin Gupta pmd = pmd_offset(pud, start); 249*7bc3777cSNitin Gupta pud_clear(pud); 250*7bc3777cSNitin Gupta pmd_free_tlb(tlb, pmd, start); 251*7bc3777cSNitin Gupta mm_dec_nr_pmds(tlb->mm); 252*7bc3777cSNitin Gupta } 253*7bc3777cSNitin Gupta 254*7bc3777cSNitin Gupta static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 255*7bc3777cSNitin Gupta unsigned long addr, unsigned long end, 256*7bc3777cSNitin Gupta unsigned long floor, unsigned long ceiling) 257*7bc3777cSNitin Gupta { 258*7bc3777cSNitin Gupta pud_t *pud; 259*7bc3777cSNitin Gupta unsigned long next; 260*7bc3777cSNitin Gupta unsigned long start; 261*7bc3777cSNitin Gupta 262*7bc3777cSNitin Gupta start = addr; 263*7bc3777cSNitin Gupta pud = pud_offset(pgd, addr); 264*7bc3777cSNitin Gupta do { 265*7bc3777cSNitin Gupta next = pud_addr_end(addr, end); 266*7bc3777cSNitin Gupta if (pud_none_or_clear_bad(pud)) 267*7bc3777cSNitin Gupta continue; 268*7bc3777cSNitin Gupta hugetlb_free_pmd_range(tlb, pud, addr, next, floor, 269*7bc3777cSNitin Gupta ceiling); 270*7bc3777cSNitin Gupta } while (pud++, addr = next, addr != end); 271*7bc3777cSNitin Gupta 272*7bc3777cSNitin Gupta start &= PGDIR_MASK; 273*7bc3777cSNitin Gupta if (start < floor) 274*7bc3777cSNitin Gupta return; 275*7bc3777cSNitin Gupta if (ceiling) { 276*7bc3777cSNitin Gupta ceiling &= PGDIR_MASK; 277*7bc3777cSNitin Gupta if (!ceiling) 278*7bc3777cSNitin Gupta return; 279*7bc3777cSNitin Gupta } 280*7bc3777cSNitin Gupta if (end - 1 > ceiling - 1) 281*7bc3777cSNitin Gupta return; 282*7bc3777cSNitin Gupta 283*7bc3777cSNitin Gupta pud = pud_offset(pgd, start); 284*7bc3777cSNitin Gupta pgd_clear(pgd); 285*7bc3777cSNitin Gupta pud_free_tlb(tlb, pud, start); 286*7bc3777cSNitin Gupta } 287*7bc3777cSNitin Gupta 288*7bc3777cSNitin Gupta void hugetlb_free_pgd_range(struct mmu_gather *tlb, 289*7bc3777cSNitin Gupta unsigned long addr, unsigned long end, 290*7bc3777cSNitin Gupta unsigned long floor, unsigned long ceiling) 291*7bc3777cSNitin Gupta { 292*7bc3777cSNitin Gupta pgd_t *pgd; 293*7bc3777cSNitin Gupta unsigned long next; 294*7bc3777cSNitin Gupta 295*7bc3777cSNitin Gupta pgd = pgd_offset(tlb->mm, addr); 296*7bc3777cSNitin Gupta do { 297*7bc3777cSNitin Gupta next = pgd_addr_end(addr, end); 298*7bc3777cSNitin Gupta if (pgd_none_or_clear_bad(pgd)) 299*7bc3777cSNitin Gupta continue; 300*7bc3777cSNitin Gupta hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); 301*7bc3777cSNitin Gupta } while (pgd++, addr = next, addr != end); 302*7bc3777cSNitin Gupta } 303