11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Generic hugetlb support. 31da177e4SLinus Torvalds * (C) William Irwin, April 2004 41da177e4SLinus Torvalds */ 51da177e4SLinus Torvalds #include <linux/gfp.h> 61da177e4SLinus Torvalds #include <linux/list.h> 71da177e4SLinus Torvalds #include <linux/init.h> 81da177e4SLinus Torvalds #include <linux/module.h> 91da177e4SLinus Torvalds #include <linux/mm.h> 101da177e4SLinus Torvalds #include <linux/sysctl.h> 111da177e4SLinus Torvalds #include <linux/highmem.h> 121da177e4SLinus Torvalds #include <linux/nodemask.h> 1363551ae0SDavid Gibson #include <linux/pagemap.h> 1463551ae0SDavid Gibson #include <asm/page.h> 1563551ae0SDavid Gibson #include <asm/pgtable.h> 1663551ae0SDavid Gibson 1763551ae0SDavid Gibson #include <linux/hugetlb.h> 181da177e4SLinus Torvalds 191da177e4SLinus Torvalds const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; 201da177e4SLinus Torvalds static unsigned long nr_huge_pages, free_huge_pages; 211da177e4SLinus Torvalds unsigned long max_huge_pages; 221da177e4SLinus Torvalds static struct list_head hugepage_freelists[MAX_NUMNODES]; 231da177e4SLinus Torvalds static unsigned int nr_huge_pages_node[MAX_NUMNODES]; 241da177e4SLinus Torvalds static unsigned int free_huge_pages_node[MAX_NUMNODES]; 251da177e4SLinus Torvalds static DEFINE_SPINLOCK(hugetlb_lock); 261da177e4SLinus Torvalds 271da177e4SLinus Torvalds static void enqueue_huge_page(struct page *page) 281da177e4SLinus Torvalds { 291da177e4SLinus Torvalds int nid = page_to_nid(page); 301da177e4SLinus Torvalds list_add(&page->lru, &hugepage_freelists[nid]); 311da177e4SLinus Torvalds free_huge_pages++; 321da177e4SLinus Torvalds free_huge_pages_node[nid]++; 331da177e4SLinus Torvalds } 341da177e4SLinus Torvalds 351da177e4SLinus Torvalds static struct page *dequeue_huge_page(void) 361da177e4SLinus Torvalds { 371da177e4SLinus Torvalds int nid = numa_node_id(); 381da177e4SLinus Torvalds struct page *page = NULL; 391da177e4SLinus Torvalds 401da177e4SLinus Torvalds if (list_empty(&hugepage_freelists[nid])) { 411da177e4SLinus Torvalds for (nid = 0; nid < MAX_NUMNODES; ++nid) 421da177e4SLinus Torvalds if (!list_empty(&hugepage_freelists[nid])) 431da177e4SLinus Torvalds break; 441da177e4SLinus Torvalds } 451da177e4SLinus Torvalds if (nid >= 0 && nid < MAX_NUMNODES && 461da177e4SLinus Torvalds !list_empty(&hugepage_freelists[nid])) { 471da177e4SLinus Torvalds page = list_entry(hugepage_freelists[nid].next, 481da177e4SLinus Torvalds struct page, lru); 491da177e4SLinus Torvalds list_del(&page->lru); 501da177e4SLinus Torvalds free_huge_pages--; 511da177e4SLinus Torvalds free_huge_pages_node[nid]--; 521da177e4SLinus Torvalds } 531da177e4SLinus Torvalds return page; 541da177e4SLinus Torvalds } 551da177e4SLinus Torvalds 561da177e4SLinus Torvalds static struct page *alloc_fresh_huge_page(void) 571da177e4SLinus Torvalds { 581da177e4SLinus Torvalds static int nid = 0; 591da177e4SLinus Torvalds struct page *page; 601da177e4SLinus Torvalds page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, 611da177e4SLinus Torvalds HUGETLB_PAGE_ORDER); 621da177e4SLinus Torvalds nid = (nid + 1) % num_online_nodes(); 631da177e4SLinus Torvalds if (page) { 641da177e4SLinus Torvalds nr_huge_pages++; 651da177e4SLinus Torvalds nr_huge_pages_node[page_to_nid(page)]++; 661da177e4SLinus Torvalds } 671da177e4SLinus Torvalds return page; 681da177e4SLinus Torvalds } 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds void free_huge_page(struct page *page) 711da177e4SLinus Torvalds { 721da177e4SLinus Torvalds BUG_ON(page_count(page)); 731da177e4SLinus Torvalds 741da177e4SLinus Torvalds INIT_LIST_HEAD(&page->lru); 751da177e4SLinus Torvalds page[1].mapping = NULL; 761da177e4SLinus Torvalds 771da177e4SLinus Torvalds spin_lock(&hugetlb_lock); 781da177e4SLinus Torvalds enqueue_huge_page(page); 791da177e4SLinus Torvalds spin_unlock(&hugetlb_lock); 801da177e4SLinus Torvalds } 811da177e4SLinus Torvalds 821da177e4SLinus Torvalds struct page *alloc_huge_page(void) 831da177e4SLinus Torvalds { 841da177e4SLinus Torvalds struct page *page; 851da177e4SLinus Torvalds int i; 861da177e4SLinus Torvalds 871da177e4SLinus Torvalds spin_lock(&hugetlb_lock); 881da177e4SLinus Torvalds page = dequeue_huge_page(); 891da177e4SLinus Torvalds if (!page) { 901da177e4SLinus Torvalds spin_unlock(&hugetlb_lock); 911da177e4SLinus Torvalds return NULL; 921da177e4SLinus Torvalds } 931da177e4SLinus Torvalds spin_unlock(&hugetlb_lock); 941da177e4SLinus Torvalds set_page_count(page, 1); 951da177e4SLinus Torvalds page[1].mapping = (void *)free_huge_page; 961da177e4SLinus Torvalds for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i) 971da177e4SLinus Torvalds clear_highpage(&page[i]); 981da177e4SLinus Torvalds return page; 991da177e4SLinus Torvalds } 1001da177e4SLinus Torvalds 1011da177e4SLinus Torvalds static int __init hugetlb_init(void) 1021da177e4SLinus Torvalds { 1031da177e4SLinus Torvalds unsigned long i; 1041da177e4SLinus Torvalds struct page *page; 1051da177e4SLinus Torvalds 1061da177e4SLinus Torvalds for (i = 0; i < MAX_NUMNODES; ++i) 1071da177e4SLinus Torvalds INIT_LIST_HEAD(&hugepage_freelists[i]); 1081da177e4SLinus Torvalds 1091da177e4SLinus Torvalds for (i = 0; i < max_huge_pages; ++i) { 1101da177e4SLinus Torvalds page = alloc_fresh_huge_page(); 1111da177e4SLinus Torvalds if (!page) 1121da177e4SLinus Torvalds break; 1131da177e4SLinus Torvalds spin_lock(&hugetlb_lock); 1141da177e4SLinus Torvalds enqueue_huge_page(page); 1151da177e4SLinus Torvalds spin_unlock(&hugetlb_lock); 1161da177e4SLinus Torvalds } 1171da177e4SLinus Torvalds max_huge_pages = free_huge_pages = nr_huge_pages = i; 1181da177e4SLinus Torvalds printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); 1191da177e4SLinus Torvalds return 0; 1201da177e4SLinus Torvalds } 1211da177e4SLinus Torvalds module_init(hugetlb_init); 1221da177e4SLinus Torvalds 1231da177e4SLinus Torvalds static int __init hugetlb_setup(char *s) 1241da177e4SLinus Torvalds { 1251da177e4SLinus Torvalds if (sscanf(s, "%lu", &max_huge_pages) <= 0) 1261da177e4SLinus Torvalds max_huge_pages = 0; 1271da177e4SLinus Torvalds return 1; 1281da177e4SLinus Torvalds } 1291da177e4SLinus Torvalds __setup("hugepages=", hugetlb_setup); 1301da177e4SLinus Torvalds 1311da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL 1321da177e4SLinus Torvalds static void update_and_free_page(struct page *page) 1331da177e4SLinus Torvalds { 1341da177e4SLinus Torvalds int i; 1351da177e4SLinus Torvalds nr_huge_pages--; 1361da177e4SLinus Torvalds nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--; 1371da177e4SLinus Torvalds for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { 1381da177e4SLinus Torvalds page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 1391da177e4SLinus Torvalds 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | 1401da177e4SLinus Torvalds 1 << PG_private | 1<< PG_writeback); 1411da177e4SLinus Torvalds set_page_count(&page[i], 0); 1421da177e4SLinus Torvalds } 1431da177e4SLinus Torvalds set_page_count(page, 1); 1441da177e4SLinus Torvalds __free_pages(page, HUGETLB_PAGE_ORDER); 1451da177e4SLinus Torvalds } 1461da177e4SLinus Torvalds 1471da177e4SLinus Torvalds #ifdef CONFIG_HIGHMEM 1481da177e4SLinus Torvalds static void try_to_free_low(unsigned long count) 1491da177e4SLinus Torvalds { 1501da177e4SLinus Torvalds int i, nid; 1511da177e4SLinus Torvalds for (i = 0; i < MAX_NUMNODES; ++i) { 1521da177e4SLinus Torvalds struct page *page, *next; 1531da177e4SLinus Torvalds list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { 1541da177e4SLinus Torvalds if (PageHighMem(page)) 1551da177e4SLinus Torvalds continue; 1561da177e4SLinus Torvalds list_del(&page->lru); 1571da177e4SLinus Torvalds update_and_free_page(page); 1581da177e4SLinus Torvalds nid = page_zone(page)->zone_pgdat->node_id; 1591da177e4SLinus Torvalds free_huge_pages--; 1601da177e4SLinus Torvalds free_huge_pages_node[nid]--; 1611da177e4SLinus Torvalds if (count >= nr_huge_pages) 1621da177e4SLinus Torvalds return; 1631da177e4SLinus Torvalds } 1641da177e4SLinus Torvalds } 1651da177e4SLinus Torvalds } 1661da177e4SLinus Torvalds #else 1671da177e4SLinus Torvalds static inline void try_to_free_low(unsigned long count) 1681da177e4SLinus Torvalds { 1691da177e4SLinus Torvalds } 1701da177e4SLinus Torvalds #endif 1711da177e4SLinus Torvalds 1721da177e4SLinus Torvalds static unsigned long set_max_huge_pages(unsigned long count) 1731da177e4SLinus Torvalds { 1741da177e4SLinus Torvalds while (count > nr_huge_pages) { 1751da177e4SLinus Torvalds struct page *page = alloc_fresh_huge_page(); 1761da177e4SLinus Torvalds if (!page) 1771da177e4SLinus Torvalds return nr_huge_pages; 1781da177e4SLinus Torvalds spin_lock(&hugetlb_lock); 1791da177e4SLinus Torvalds enqueue_huge_page(page); 1801da177e4SLinus Torvalds spin_unlock(&hugetlb_lock); 1811da177e4SLinus Torvalds } 1821da177e4SLinus Torvalds if (count >= nr_huge_pages) 1831da177e4SLinus Torvalds return nr_huge_pages; 1841da177e4SLinus Torvalds 1851da177e4SLinus Torvalds spin_lock(&hugetlb_lock); 1861da177e4SLinus Torvalds try_to_free_low(count); 1871da177e4SLinus Torvalds while (count < nr_huge_pages) { 1881da177e4SLinus Torvalds struct page *page = dequeue_huge_page(); 1891da177e4SLinus Torvalds if (!page) 1901da177e4SLinus Torvalds break; 1911da177e4SLinus Torvalds update_and_free_page(page); 1921da177e4SLinus Torvalds } 1931da177e4SLinus Torvalds spin_unlock(&hugetlb_lock); 1941da177e4SLinus Torvalds return nr_huge_pages; 1951da177e4SLinus Torvalds } 1961da177e4SLinus Torvalds 1971da177e4SLinus Torvalds int hugetlb_sysctl_handler(struct ctl_table *table, int write, 1981da177e4SLinus Torvalds struct file *file, void __user *buffer, 1991da177e4SLinus Torvalds size_t *length, loff_t *ppos) 2001da177e4SLinus Torvalds { 2011da177e4SLinus Torvalds proc_doulongvec_minmax(table, write, file, buffer, length, ppos); 2021da177e4SLinus Torvalds max_huge_pages = set_max_huge_pages(max_huge_pages); 2031da177e4SLinus Torvalds return 0; 2041da177e4SLinus Torvalds } 2051da177e4SLinus Torvalds #endif /* CONFIG_SYSCTL */ 2061da177e4SLinus Torvalds 2071da177e4SLinus Torvalds int hugetlb_report_meminfo(char *buf) 2081da177e4SLinus Torvalds { 2091da177e4SLinus Torvalds return sprintf(buf, 2101da177e4SLinus Torvalds "HugePages_Total: %5lu\n" 2111da177e4SLinus Torvalds "HugePages_Free: %5lu\n" 2121da177e4SLinus Torvalds "Hugepagesize: %5lu kB\n", 2131da177e4SLinus Torvalds nr_huge_pages, 2141da177e4SLinus Torvalds free_huge_pages, 2151da177e4SLinus Torvalds HPAGE_SIZE/1024); 2161da177e4SLinus Torvalds } 2171da177e4SLinus Torvalds 2181da177e4SLinus Torvalds int hugetlb_report_node_meminfo(int nid, char *buf) 2191da177e4SLinus Torvalds { 2201da177e4SLinus Torvalds return sprintf(buf, 2211da177e4SLinus Torvalds "Node %d HugePages_Total: %5u\n" 2221da177e4SLinus Torvalds "Node %d HugePages_Free: %5u\n", 2231da177e4SLinus Torvalds nid, nr_huge_pages_node[nid], 2241da177e4SLinus Torvalds nid, free_huge_pages_node[nid]); 2251da177e4SLinus Torvalds } 2261da177e4SLinus Torvalds 2271da177e4SLinus Torvalds int is_hugepage_mem_enough(size_t size) 2281da177e4SLinus Torvalds { 2291da177e4SLinus Torvalds return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages; 2301da177e4SLinus Torvalds } 2311da177e4SLinus Torvalds 2321da177e4SLinus Torvalds /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ 2331da177e4SLinus Torvalds unsigned long hugetlb_total_pages(void) 2341da177e4SLinus Torvalds { 2351da177e4SLinus Torvalds return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE); 2361da177e4SLinus Torvalds } 2371da177e4SLinus Torvalds EXPORT_SYMBOL(hugetlb_total_pages); 2381da177e4SLinus Torvalds 2391da177e4SLinus Torvalds /* 2401da177e4SLinus Torvalds * We cannot handle pagefaults against hugetlb pages at all. They cause 2411da177e4SLinus Torvalds * handle_mm_fault() to try to instantiate regular-sized pages in the 2421da177e4SLinus Torvalds * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get 2431da177e4SLinus Torvalds * this far. 2441da177e4SLinus Torvalds */ 2451da177e4SLinus Torvalds static struct page *hugetlb_nopage(struct vm_area_struct *vma, 2461da177e4SLinus Torvalds unsigned long address, int *unused) 2471da177e4SLinus Torvalds { 2481da177e4SLinus Torvalds BUG(); 2491da177e4SLinus Torvalds return NULL; 2501da177e4SLinus Torvalds } 2511da177e4SLinus Torvalds 2521da177e4SLinus Torvalds struct vm_operations_struct hugetlb_vm_ops = { 2531da177e4SLinus Torvalds .nopage = hugetlb_nopage, 2541da177e4SLinus Torvalds }; 2551da177e4SLinus Torvalds 25663551ae0SDavid Gibson static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page) 25763551ae0SDavid Gibson { 25863551ae0SDavid Gibson pte_t entry; 25963551ae0SDavid Gibson 26063551ae0SDavid Gibson if (vma->vm_flags & VM_WRITE) { 26163551ae0SDavid Gibson entry = 26263551ae0SDavid Gibson pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); 26363551ae0SDavid Gibson } else { 26463551ae0SDavid Gibson entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); 26563551ae0SDavid Gibson } 26663551ae0SDavid Gibson entry = pte_mkyoung(entry); 26763551ae0SDavid Gibson entry = pte_mkhuge(entry); 26863551ae0SDavid Gibson 26963551ae0SDavid Gibson return entry; 27063551ae0SDavid Gibson } 27163551ae0SDavid Gibson 27263551ae0SDavid Gibson int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, 27363551ae0SDavid Gibson struct vm_area_struct *vma) 27463551ae0SDavid Gibson { 27563551ae0SDavid Gibson pte_t *src_pte, *dst_pte, entry; 27663551ae0SDavid Gibson struct page *ptepage; 2771c59827dSHugh Dickins unsigned long addr; 27863551ae0SDavid Gibson 2791c59827dSHugh Dickins for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { 280c74df32cSHugh Dickins src_pte = huge_pte_offset(src, addr); 281c74df32cSHugh Dickins if (!src_pte) 282c74df32cSHugh Dickins continue; 28363551ae0SDavid Gibson dst_pte = huge_pte_alloc(dst, addr); 28463551ae0SDavid Gibson if (!dst_pte) 28563551ae0SDavid Gibson goto nomem; 286c74df32cSHugh Dickins spin_lock(&dst->page_table_lock); 2871c59827dSHugh Dickins spin_lock(&src->page_table_lock); 288c74df32cSHugh Dickins if (!pte_none(*src_pte)) { 28963551ae0SDavid Gibson entry = *src_pte; 29063551ae0SDavid Gibson ptepage = pte_page(entry); 29163551ae0SDavid Gibson get_page(ptepage); 2924294621fSHugh Dickins add_mm_counter(dst, file_rss, HPAGE_SIZE / PAGE_SIZE); 29363551ae0SDavid Gibson set_huge_pte_at(dst, addr, dst_pte, entry); 2941c59827dSHugh Dickins } 2951c59827dSHugh Dickins spin_unlock(&src->page_table_lock); 296c74df32cSHugh Dickins spin_unlock(&dst->page_table_lock); 29763551ae0SDavid Gibson } 29863551ae0SDavid Gibson return 0; 29963551ae0SDavid Gibson 30063551ae0SDavid Gibson nomem: 30163551ae0SDavid Gibson return -ENOMEM; 30263551ae0SDavid Gibson } 30363551ae0SDavid Gibson 30463551ae0SDavid Gibson void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, 30563551ae0SDavid Gibson unsigned long end) 30663551ae0SDavid Gibson { 30763551ae0SDavid Gibson struct mm_struct *mm = vma->vm_mm; 30863551ae0SDavid Gibson unsigned long address; 309c7546f8fSDavid Gibson pte_t *ptep; 31063551ae0SDavid Gibson pte_t pte; 31163551ae0SDavid Gibson struct page *page; 31263551ae0SDavid Gibson 31363551ae0SDavid Gibson WARN_ON(!is_vm_hugetlb_page(vma)); 31463551ae0SDavid Gibson BUG_ON(start & ~HPAGE_MASK); 31563551ae0SDavid Gibson BUG_ON(end & ~HPAGE_MASK); 31663551ae0SDavid Gibson 317508034a3SHugh Dickins spin_lock(&mm->page_table_lock); 318508034a3SHugh Dickins 319365e9c87SHugh Dickins /* Update high watermark before we lower rss */ 320365e9c87SHugh Dickins update_hiwater_rss(mm); 321365e9c87SHugh Dickins 32263551ae0SDavid Gibson for (address = start; address < end; address += HPAGE_SIZE) { 323c7546f8fSDavid Gibson ptep = huge_pte_offset(mm, address); 324c7546f8fSDavid Gibson if (!ptep) 325c7546f8fSDavid Gibson continue; 326c7546f8fSDavid Gibson 327c7546f8fSDavid Gibson pte = huge_ptep_get_and_clear(mm, address, ptep); 32863551ae0SDavid Gibson if (pte_none(pte)) 32963551ae0SDavid Gibson continue; 330c7546f8fSDavid Gibson 33163551ae0SDavid Gibson page = pte_page(pte); 33263551ae0SDavid Gibson put_page(page); 3334294621fSHugh Dickins add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE)); 33463551ae0SDavid Gibson } 33563551ae0SDavid Gibson 3361da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 337508034a3SHugh Dickins flush_tlb_range(vma, start, end); 3381da177e4SLinus Torvalds } 33963551ae0SDavid Gibson 3404c887265SAdam Litke static struct page *find_lock_huge_page(struct address_space *mapping, 3414c887265SAdam Litke unsigned long idx) 34263551ae0SDavid Gibson { 34363551ae0SDavid Gibson struct page *page; 3444c887265SAdam Litke int err; 3454c887265SAdam Litke struct inode *inode = mapping->host; 3464c887265SAdam Litke unsigned long size; 34763551ae0SDavid Gibson 3484c887265SAdam Litke retry: 3494c887265SAdam Litke page = find_lock_page(mapping, idx); 3504c887265SAdam Litke if (page) 35163551ae0SDavid Gibson goto out; 35263551ae0SDavid Gibson 3534c887265SAdam Litke /* Check to make sure the mapping hasn't been truncated */ 3544c887265SAdam Litke size = i_size_read(inode) >> HPAGE_SHIFT; 3554c887265SAdam Litke if (idx >= size) 35663551ae0SDavid Gibson goto out; 3574c887265SAdam Litke 3584c887265SAdam Litke if (hugetlb_get_quota(mapping)) 3594c887265SAdam Litke goto out; 36063551ae0SDavid Gibson page = alloc_huge_page(); 36163551ae0SDavid Gibson if (!page) { 36263551ae0SDavid Gibson hugetlb_put_quota(mapping); 36363551ae0SDavid Gibson goto out; 36463551ae0SDavid Gibson } 36563551ae0SDavid Gibson 3664c887265SAdam Litke err = add_to_page_cache(page, mapping, idx, GFP_KERNEL); 3674c887265SAdam Litke if (err) { 3684c887265SAdam Litke put_page(page); 3694c887265SAdam Litke hugetlb_put_quota(mapping); 3704c887265SAdam Litke if (err == -EEXIST) 3714c887265SAdam Litke goto retry; 3724c887265SAdam Litke page = NULL; 3734c887265SAdam Litke } 3744c887265SAdam Litke out: 3754c887265SAdam Litke return page; 3764c887265SAdam Litke } 3774c887265SAdam Litke 378ac9b9c66SHugh Dickins int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 379ac9b9c66SHugh Dickins unsigned long address, int write_access) 380ac9b9c66SHugh Dickins { 381ac9b9c66SHugh Dickins int ret = VM_FAULT_SIGBUS; 3824c887265SAdam Litke unsigned long idx; 3834c887265SAdam Litke unsigned long size; 384ac9b9c66SHugh Dickins pte_t *pte; 3854c887265SAdam Litke struct page *page; 3864c887265SAdam Litke struct address_space *mapping; 3874c887265SAdam Litke 3884c887265SAdam Litke pte = huge_pte_alloc(mm, address); 3894c887265SAdam Litke if (!pte) 3904c887265SAdam Litke goto out; 3914c887265SAdam Litke 3924c887265SAdam Litke mapping = vma->vm_file->f_mapping; 3934c887265SAdam Litke idx = ((address - vma->vm_start) >> HPAGE_SHIFT) 3944c887265SAdam Litke + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); 3954c887265SAdam Litke 3964c887265SAdam Litke /* 3974c887265SAdam Litke * Use page lock to guard against racing truncation 3984c887265SAdam Litke * before we get page_table_lock. 3994c887265SAdam Litke */ 4004c887265SAdam Litke page = find_lock_huge_page(mapping, idx); 4014c887265SAdam Litke if (!page) 4024c887265SAdam Litke goto out; 403ac9b9c66SHugh Dickins 404ac9b9c66SHugh Dickins spin_lock(&mm->page_table_lock); 4054c887265SAdam Litke size = i_size_read(mapping->host) >> HPAGE_SHIFT; 4064c887265SAdam Litke if (idx >= size) 4074c887265SAdam Litke goto backout; 4084c887265SAdam Litke 409ac9b9c66SHugh Dickins ret = VM_FAULT_MINOR; 4104c887265SAdam Litke if (!pte_none(*pte)) 4114c887265SAdam Litke goto backout; 4124c887265SAdam Litke 4134c887265SAdam Litke add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE); 4144c887265SAdam Litke set_huge_pte_at(mm, address, pte, make_huge_pte(vma, page)); 415ac9b9c66SHugh Dickins spin_unlock(&mm->page_table_lock); 4164c887265SAdam Litke unlock_page(page); 4174c887265SAdam Litke out: 418ac9b9c66SHugh Dickins return ret; 4194c887265SAdam Litke 4204c887265SAdam Litke backout: 4214c887265SAdam Litke spin_unlock(&mm->page_table_lock); 4224c887265SAdam Litke hugetlb_put_quota(mapping); 4234c887265SAdam Litke unlock_page(page); 4244c887265SAdam Litke put_page(page); 4254c887265SAdam Litke goto out; 426ac9b9c66SHugh Dickins } 427ac9b9c66SHugh Dickins 42863551ae0SDavid Gibson int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, 42963551ae0SDavid Gibson struct page **pages, struct vm_area_struct **vmas, 43063551ae0SDavid Gibson unsigned long *position, int *length, int i) 43163551ae0SDavid Gibson { 43263551ae0SDavid Gibson unsigned long vpfn, vaddr = *position; 43363551ae0SDavid Gibson int remainder = *length; 43463551ae0SDavid Gibson 43563551ae0SDavid Gibson vpfn = vaddr/PAGE_SIZE; 4361c59827dSHugh Dickins spin_lock(&mm->page_table_lock); 43763551ae0SDavid Gibson while (vaddr < vma->vm_end && remainder) { 43863551ae0SDavid Gibson pte_t *pte; 43963551ae0SDavid Gibson struct page *page; 44063551ae0SDavid Gibson 4414c887265SAdam Litke /* 4424c887265SAdam Litke * Some archs (sparc64, sh*) have multiple pte_ts to 4434c887265SAdam Litke * each hugepage. We have to make * sure we get the 4444c887265SAdam Litke * first, for the page indexing below to work. 4454c887265SAdam Litke */ 44663551ae0SDavid Gibson pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); 44763551ae0SDavid Gibson 4481c59827dSHugh Dickins if (!pte || pte_none(*pte)) { 4494c887265SAdam Litke int ret; 4504c887265SAdam Litke 4514c887265SAdam Litke spin_unlock(&mm->page_table_lock); 4524c887265SAdam Litke ret = hugetlb_fault(mm, vma, vaddr, 0); 4534c887265SAdam Litke spin_lock(&mm->page_table_lock); 4544c887265SAdam Litke if (ret == VM_FAULT_MINOR) 4554c887265SAdam Litke continue; 4564c887265SAdam Litke 4571c59827dSHugh Dickins remainder = 0; 4581c59827dSHugh Dickins if (!i) 4591c59827dSHugh Dickins i = -EFAULT; 4601c59827dSHugh Dickins break; 4611c59827dSHugh Dickins } 46263551ae0SDavid Gibson 4634c887265SAdam Litke if (pages) { 46463551ae0SDavid Gibson page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; 46563551ae0SDavid Gibson get_page(page); 46663551ae0SDavid Gibson pages[i] = page; 46763551ae0SDavid Gibson } 46863551ae0SDavid Gibson 46963551ae0SDavid Gibson if (vmas) 47063551ae0SDavid Gibson vmas[i] = vma; 47163551ae0SDavid Gibson 47263551ae0SDavid Gibson vaddr += PAGE_SIZE; 47363551ae0SDavid Gibson ++vpfn; 47463551ae0SDavid Gibson --remainder; 47563551ae0SDavid Gibson ++i; 47663551ae0SDavid Gibson } 4771c59827dSHugh Dickins spin_unlock(&mm->page_table_lock); 47863551ae0SDavid Gibson *length = remainder; 47963551ae0SDavid Gibson *position = vaddr; 48063551ae0SDavid Gibson 48163551ae0SDavid Gibson return i; 48263551ae0SDavid Gibson } 483