153492b1dSGerald Schaefer /* 253492b1dSGerald Schaefer * IBM System z Huge TLB Page Support for Kernel. 353492b1dSGerald Schaefer * 4a53c8fabSHeiko Carstens * Copyright IBM Corp. 2007 553492b1dSGerald Schaefer * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> 653492b1dSGerald Schaefer */ 753492b1dSGerald Schaefer 853492b1dSGerald Schaefer #include <linux/mm.h> 953492b1dSGerald Schaefer #include <linux/hugetlb.h> 1053492b1dSGerald Schaefer 11e5098611SMartin Schwidefsky static inline pmd_t __pte_to_pmd(pte_t pte) 1253492b1dSGerald Schaefer { 13e5098611SMartin Schwidefsky pmd_t pmd; 1453492b1dSGerald Schaefer 15e5098611SMartin Schwidefsky /* 16e5098611SMartin Schwidefsky * Convert encoding pte bits pmd bits 17*152125b7SMartin Schwidefsky * .IR...wrdytp dy..R...I...wr 18*152125b7SMartin Schwidefsky * empty .10...000000 -> 00..0...1...00 19*152125b7SMartin Schwidefsky * prot-none, clean, old .11...000001 -> 00..1...1...00 20*152125b7SMartin Schwidefsky * prot-none, clean, young .11...000101 -> 01..1...1...00 21*152125b7SMartin Schwidefsky * prot-none, dirty, old .10...001001 -> 10..1...1...00 22*152125b7SMartin Schwidefsky * prot-none, dirty, young .10...001101 -> 11..1...1...00 23*152125b7SMartin Schwidefsky * read-only, clean, old .11...010001 -> 00..1...1...01 24*152125b7SMartin Schwidefsky * read-only, clean, young .01...010101 -> 01..1...0...01 25*152125b7SMartin Schwidefsky * read-only, dirty, old .11...011001 -> 10..1...1...01 26*152125b7SMartin Schwidefsky * read-only, dirty, young .01...011101 -> 11..1...0...01 27*152125b7SMartin Schwidefsky * read-write, clean, old .11...110001 -> 00..0...1...11 28*152125b7SMartin Schwidefsky * read-write, clean, young .01...110101 -> 01..0...0...11 29*152125b7SMartin Schwidefsky * read-write, dirty, old .10...111001 -> 10..0...1...11 30*152125b7SMartin Schwidefsky * read-write, dirty, young .00...111101 -> 11..0...0...11 31e5098611SMartin Schwidefsky */ 32e5098611SMartin Schwidefsky if (pte_present(pte)) { 33e5098611SMartin Schwidefsky pmd_val(pmd) = pte_val(pte) & PAGE_MASK; 34*152125b7SMartin Schwidefsky pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4; 35*152125b7SMartin Schwidefsky pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4; 36*152125b7SMartin Schwidefsky pmd_val(pmd) |= (pte_val(pte) & _PAGE_INVALID) >> 5; 37*152125b7SMartin Schwidefsky pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT); 38*152125b7SMartin Schwidefsky pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; 39*152125b7SMartin Schwidefsky pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; 40e5098611SMartin Schwidefsky } else 41e5098611SMartin Schwidefsky pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; 42e5098611SMartin Schwidefsky return pmd; 4353492b1dSGerald Schaefer } 4453492b1dSGerald Schaefer 45e5098611SMartin Schwidefsky static inline pte_t __pmd_to_pte(pmd_t pmd) 46e5098611SMartin Schwidefsky { 47e5098611SMartin Schwidefsky pte_t pte; 48e5098611SMartin Schwidefsky 49e5098611SMartin Schwidefsky /* 50e5098611SMartin Schwidefsky * Convert encoding pmd bits pte bits 51*152125b7SMartin Schwidefsky * dy..R...I...wr .IR...wrdytp 52*152125b7SMartin Schwidefsky * empty 00..0...1...00 -> .10...001100 53*152125b7SMartin Schwidefsky * prot-none, clean, old 00..0...1...00 -> .10...000001 54*152125b7SMartin Schwidefsky * prot-none, clean, young 01..0...1...00 -> .10...000101 55*152125b7SMartin Schwidefsky * prot-none, dirty, old 10..0...1...00 -> .10...001001 56*152125b7SMartin Schwidefsky * prot-none, dirty, young 11..0...1...00 -> .10...001101 57*152125b7SMartin Schwidefsky * read-only, clean, old 00..1...1...01 -> .11...010001 58*152125b7SMartin Schwidefsky * read-only, clean, young 01..1...1...01 -> .11...010101 59*152125b7SMartin Schwidefsky * read-only, dirty, old 10..1...1...01 -> .11...011001 60*152125b7SMartin Schwidefsky * read-only, dirty, young 11..1...1...01 -> .11...011101 61*152125b7SMartin Schwidefsky * read-write, clean, old 00..0...1...11 -> .10...110001 62*152125b7SMartin Schwidefsky * read-write, clean, young 01..0...1...11 -> .10...110101 63*152125b7SMartin Schwidefsky * read-write, dirty, old 10..0...1...11 -> .10...111001 64*152125b7SMartin Schwidefsky * read-write, dirty, young 11..0...1...11 -> .10...111101 65e5098611SMartin Schwidefsky */ 66e5098611SMartin Schwidefsky if (pmd_present(pmd)) { 67*152125b7SMartin Schwidefsky pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE; 68*152125b7SMartin Schwidefsky pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; 69*152125b7SMartin Schwidefsky pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4; 70*152125b7SMartin Schwidefsky pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4; 71*152125b7SMartin Schwidefsky pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5; 72*152125b7SMartin Schwidefsky pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT); 73*152125b7SMartin Schwidefsky pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; 74*152125b7SMartin Schwidefsky pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; 75e5098611SMartin Schwidefsky } else 76e5098611SMartin Schwidefsky pte_val(pte) = _PAGE_INVALID; 77e5098611SMartin Schwidefsky return pte; 78e5098611SMartin Schwidefsky } 79e5098611SMartin Schwidefsky 80e5098611SMartin Schwidefsky void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 81e5098611SMartin Schwidefsky pte_t *ptep, pte_t pte) 82e5098611SMartin Schwidefsky { 83e5098611SMartin Schwidefsky pmd_t pmd; 84e5098611SMartin Schwidefsky 85e5098611SMartin Schwidefsky pmd = __pte_to_pmd(pte); 86e5098611SMartin Schwidefsky if (!MACHINE_HAS_HPAGE) { 87*152125b7SMartin Schwidefsky /* Emulated huge ptes loose the dirty and young bit */ 88e5098611SMartin Schwidefsky pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; 89e5098611SMartin Schwidefsky pmd_val(pmd) |= pte_page(pte)[1].index; 90e5098611SMartin Schwidefsky } else 91e5098611SMartin Schwidefsky pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO; 92e5098611SMartin Schwidefsky *(pmd_t *) ptep = pmd; 93e5098611SMartin Schwidefsky } 94e5098611SMartin Schwidefsky 95e5098611SMartin Schwidefsky pte_t huge_ptep_get(pte_t *ptep) 96e5098611SMartin Schwidefsky { 97e5098611SMartin Schwidefsky unsigned long origin; 98e5098611SMartin Schwidefsky pmd_t pmd; 99e5098611SMartin Schwidefsky 100e5098611SMartin Schwidefsky pmd = *(pmd_t *) ptep; 101e5098611SMartin Schwidefsky if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) { 102e5098611SMartin Schwidefsky origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; 103e5098611SMartin Schwidefsky pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; 104e5098611SMartin Schwidefsky pmd_val(pmd) |= *(unsigned long *) origin; 105*152125b7SMartin Schwidefsky /* Emulated huge ptes are young and dirty by definition */ 106*152125b7SMartin Schwidefsky pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY; 107e5098611SMartin Schwidefsky } 108e5098611SMartin Schwidefsky return __pmd_to_pte(pmd); 109e5098611SMartin Schwidefsky } 110e5098611SMartin Schwidefsky 111e5098611SMartin Schwidefsky pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 112e5098611SMartin Schwidefsky unsigned long addr, pte_t *ptep) 113e5098611SMartin Schwidefsky { 114e5098611SMartin Schwidefsky pmd_t *pmdp = (pmd_t *) ptep; 115e5098611SMartin Schwidefsky pte_t pte = huge_ptep_get(ptep); 116e5098611SMartin Schwidefsky 1171b948d6cSMartin Schwidefsky pmdp_flush_direct(mm, addr, pmdp); 118e5098611SMartin Schwidefsky pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; 119e5098611SMartin Schwidefsky return pte; 12053492b1dSGerald Schaefer } 12153492b1dSGerald Schaefer 12253492b1dSGerald Schaefer int arch_prepare_hugepage(struct page *page) 12353492b1dSGerald Schaefer { 12453492b1dSGerald Schaefer unsigned long addr = page_to_phys(page); 12553492b1dSGerald Schaefer pte_t pte; 12653492b1dSGerald Schaefer pte_t *ptep; 12753492b1dSGerald Schaefer int i; 12853492b1dSGerald Schaefer 12953492b1dSGerald Schaefer if (MACHINE_HAS_HPAGE) 13053492b1dSGerald Schaefer return 0; 13153492b1dSGerald Schaefer 132e5992f2eSMartin Schwidefsky ptep = (pte_t *) pte_alloc_one(&init_mm, addr); 13353492b1dSGerald Schaefer if (!ptep) 13453492b1dSGerald Schaefer return -ENOMEM; 13553492b1dSGerald Schaefer 136106c992aSGerald Schaefer pte_val(pte) = addr; 13753492b1dSGerald Schaefer for (i = 0; i < PTRS_PER_PTE; i++) { 13853492b1dSGerald Schaefer set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); 13953492b1dSGerald Schaefer pte_val(pte) += PAGE_SIZE; 14053492b1dSGerald Schaefer } 14153492b1dSGerald Schaefer page[1].index = (unsigned long) ptep; 14253492b1dSGerald Schaefer return 0; 14353492b1dSGerald Schaefer } 14453492b1dSGerald Schaefer 14553492b1dSGerald Schaefer void arch_release_hugepage(struct page *page) 14653492b1dSGerald Schaefer { 14753492b1dSGerald Schaefer pte_t *ptep; 14853492b1dSGerald Schaefer 14953492b1dSGerald Schaefer if (MACHINE_HAS_HPAGE) 15053492b1dSGerald Schaefer return; 15153492b1dSGerald Schaefer 15253492b1dSGerald Schaefer ptep = (pte_t *) page[1].index; 15353492b1dSGerald Schaefer if (!ptep) 15453492b1dSGerald Schaefer return; 155e5098611SMartin Schwidefsky clear_table((unsigned long *) ptep, _PAGE_INVALID, 156a686425bSGerald Schaefer PTRS_PER_PTE * sizeof(pte_t)); 15780217147SMartin Schwidefsky page_table_free(&init_mm, (unsigned long *) ptep); 15853492b1dSGerald Schaefer page[1].index = 0; 15953492b1dSGerald Schaefer } 16053492b1dSGerald Schaefer 161a5516438SAndi Kleen pte_t *huge_pte_alloc(struct mm_struct *mm, 162a5516438SAndi Kleen unsigned long addr, unsigned long sz) 16353492b1dSGerald Schaefer { 16453492b1dSGerald Schaefer pgd_t *pgdp; 16553492b1dSGerald Schaefer pud_t *pudp; 16653492b1dSGerald Schaefer pmd_t *pmdp = NULL; 16753492b1dSGerald Schaefer 16853492b1dSGerald Schaefer pgdp = pgd_offset(mm, addr); 16953492b1dSGerald Schaefer pudp = pud_alloc(mm, pgdp, addr); 17053492b1dSGerald Schaefer if (pudp) 17153492b1dSGerald Schaefer pmdp = pmd_alloc(mm, pudp, addr); 17253492b1dSGerald Schaefer return (pte_t *) pmdp; 17353492b1dSGerald Schaefer } 17453492b1dSGerald Schaefer 17553492b1dSGerald Schaefer pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 17653492b1dSGerald Schaefer { 17753492b1dSGerald Schaefer pgd_t *pgdp; 17853492b1dSGerald Schaefer pud_t *pudp; 17953492b1dSGerald Schaefer pmd_t *pmdp = NULL; 18053492b1dSGerald Schaefer 18153492b1dSGerald Schaefer pgdp = pgd_offset(mm, addr); 18253492b1dSGerald Schaefer if (pgd_present(*pgdp)) { 18353492b1dSGerald Schaefer pudp = pud_offset(pgdp, addr); 18453492b1dSGerald Schaefer if (pud_present(*pudp)) 18553492b1dSGerald Schaefer pmdp = pmd_offset(pudp, addr); 18653492b1dSGerald Schaefer } 18753492b1dSGerald Schaefer return (pte_t *) pmdp; 18853492b1dSGerald Schaefer } 18953492b1dSGerald Schaefer 19053492b1dSGerald Schaefer int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 19153492b1dSGerald Schaefer { 19253492b1dSGerald Schaefer return 0; 19353492b1dSGerald Schaefer } 19453492b1dSGerald Schaefer 19553492b1dSGerald Schaefer struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, 19653492b1dSGerald Schaefer int write) 19753492b1dSGerald Schaefer { 19853492b1dSGerald Schaefer return ERR_PTR(-EINVAL); 19953492b1dSGerald Schaefer } 20053492b1dSGerald Schaefer 20153492b1dSGerald Schaefer int pmd_huge(pmd_t pmd) 20253492b1dSGerald Schaefer { 20353492b1dSGerald Schaefer if (!MACHINE_HAS_HPAGE) 20453492b1dSGerald Schaefer return 0; 20553492b1dSGerald Schaefer 20653492b1dSGerald Schaefer return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); 20753492b1dSGerald Schaefer } 20853492b1dSGerald Schaefer 209ceb86879SAndi Kleen int pud_huge(pud_t pud) 210ceb86879SAndi Kleen { 211ceb86879SAndi Kleen return 0; 212ceb86879SAndi Kleen } 213ceb86879SAndi Kleen 21453492b1dSGerald Schaefer struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, 21553492b1dSGerald Schaefer pmd_t *pmdp, int write) 21653492b1dSGerald Schaefer { 21753492b1dSGerald Schaefer struct page *page; 21853492b1dSGerald Schaefer 21953492b1dSGerald Schaefer if (!MACHINE_HAS_HPAGE) 22053492b1dSGerald Schaefer return NULL; 22153492b1dSGerald Schaefer 22253492b1dSGerald Schaefer page = pmd_page(*pmdp); 22353492b1dSGerald Schaefer if (page) 22453492b1dSGerald Schaefer page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); 22553492b1dSGerald Schaefer return page; 22653492b1dSGerald Schaefer } 227