153492b1dSGerald Schaefer /* 253492b1dSGerald Schaefer * IBM System z Huge TLB Page Support for Kernel. 353492b1dSGerald Schaefer * 4*d08de8e2SGerald Schaefer * Copyright IBM Corp. 2007,2016 553492b1dSGerald Schaefer * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> 653492b1dSGerald Schaefer */ 753492b1dSGerald Schaefer 8*d08de8e2SGerald Schaefer #define KMSG_COMPONENT "hugetlb" 9*d08de8e2SGerald Schaefer #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 10*d08de8e2SGerald Schaefer 1153492b1dSGerald Schaefer #include <linux/mm.h> 1253492b1dSGerald Schaefer #include <linux/hugetlb.h> 1353492b1dSGerald Schaefer 14*d08de8e2SGerald Schaefer static inline unsigned long __pte_to_rste(pte_t pte) 1553492b1dSGerald Schaefer { 16*d08de8e2SGerald Schaefer unsigned long rste; 1753492b1dSGerald Schaefer 18e5098611SMartin Schwidefsky /* 19*d08de8e2SGerald Schaefer * Convert encoding pte bits pmd / pud bits 20a1c843b8SMartin Schwidefsky * lIR.uswrdy.p dy..R...I...wr 21a1c843b8SMartin Schwidefsky * empty 010.000000.0 -> 00..0...1...00 22a1c843b8SMartin Schwidefsky * prot-none, clean, old 111.000000.1 -> 00..1...1...00 23a1c843b8SMartin Schwidefsky * prot-none, clean, young 111.000001.1 -> 01..1...1...00 24a1c843b8SMartin Schwidefsky * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 25a1c843b8SMartin Schwidefsky * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 26a1c843b8SMartin Schwidefsky * read-only, clean, old 111.000100.1 -> 00..1...1...01 27a1c843b8SMartin Schwidefsky * read-only, clean, young 101.000101.1 -> 01..1...0...01 28a1c843b8SMartin Schwidefsky * read-only, dirty, old 111.000110.1 -> 10..1...1...01 29a1c843b8SMartin Schwidefsky * read-only, dirty, young 101.000111.1 -> 11..1...0...01 30a1c843b8SMartin Schwidefsky * read-write, clean, old 111.001100.1 -> 00..1...1...11 31a1c843b8SMartin Schwidefsky * read-write, clean, young 101.001101.1 -> 01..1...0...11 32a1c843b8SMartin Schwidefsky * read-write, dirty, old 110.001110.1 -> 10..0...1...11 33a1c843b8SMartin Schwidefsky * read-write, dirty, young 100.001111.1 -> 11..0...0...11 34a1c843b8SMartin Schwidefsky * HW-bits: R read-only, I invalid 35a1c843b8SMartin Schwidefsky * SW-bits: p present, y young, d dirty, r read, w write, s special, 36a1c843b8SMartin Schwidefsky * u unused, l large 37e5098611SMartin Schwidefsky */ 38e5098611SMartin Schwidefsky if (pte_present(pte)) { 39*d08de8e2SGerald Schaefer rste = pte_val(pte) & PAGE_MASK; 40*d08de8e2SGerald Schaefer rste |= (pte_val(pte) & _PAGE_READ) >> 4; 41*d08de8e2SGerald Schaefer rste |= (pte_val(pte) & _PAGE_WRITE) >> 4; 42*d08de8e2SGerald Schaefer rste |= (pte_val(pte) & _PAGE_INVALID) >> 5; 43*d08de8e2SGerald Schaefer rste |= (pte_val(pte) & _PAGE_PROTECT); 44*d08de8e2SGerald Schaefer rste |= (pte_val(pte) & _PAGE_DIRTY) << 10; 45*d08de8e2SGerald Schaefer rste |= (pte_val(pte) & _PAGE_YOUNG) << 10; 46*d08de8e2SGerald Schaefer rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13; 47e5098611SMartin Schwidefsky } else 48*d08de8e2SGerald Schaefer rste = _SEGMENT_ENTRY_INVALID; 49*d08de8e2SGerald Schaefer return rste; 5053492b1dSGerald Schaefer } 5153492b1dSGerald Schaefer 52*d08de8e2SGerald Schaefer static inline pte_t __rste_to_pte(unsigned long rste) 53e5098611SMartin Schwidefsky { 54*d08de8e2SGerald Schaefer int present; 55e5098611SMartin Schwidefsky pte_t pte; 56e5098611SMartin Schwidefsky 57*d08de8e2SGerald Schaefer if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 58*d08de8e2SGerald Schaefer present = pud_present(__pud(rste)); 59*d08de8e2SGerald Schaefer else 60*d08de8e2SGerald Schaefer present = pmd_present(__pmd(rste)); 61*d08de8e2SGerald Schaefer 62e5098611SMartin Schwidefsky /* 63*d08de8e2SGerald Schaefer * Convert encoding pmd / pud bits pte bits 64a1c843b8SMartin Schwidefsky * dy..R...I...wr lIR.uswrdy.p 65a1c843b8SMartin Schwidefsky * empty 00..0...1...00 -> 010.000000.0 66a1c843b8SMartin Schwidefsky * prot-none, clean, old 00..1...1...00 -> 111.000000.1 67a1c843b8SMartin Schwidefsky * prot-none, clean, young 01..1...1...00 -> 111.000001.1 68a1c843b8SMartin Schwidefsky * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 69a1c843b8SMartin Schwidefsky * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 70a1c843b8SMartin Schwidefsky * read-only, clean, old 00..1...1...01 -> 111.000100.1 71a1c843b8SMartin Schwidefsky * read-only, clean, young 01..1...0...01 -> 101.000101.1 72a1c843b8SMartin Schwidefsky * read-only, dirty, old 10..1...1...01 -> 111.000110.1 73a1c843b8SMartin Schwidefsky * read-only, dirty, young 11..1...0...01 -> 101.000111.1 74a1c843b8SMartin Schwidefsky * read-write, clean, old 00..1...1...11 -> 111.001100.1 75a1c843b8SMartin Schwidefsky * read-write, clean, young 01..1...0...11 -> 101.001101.1 76a1c843b8SMartin Schwidefsky * read-write, dirty, old 10..0...1...11 -> 110.001110.1 77a1c843b8SMartin Schwidefsky * read-write, dirty, young 11..0...0...11 -> 100.001111.1 78a1c843b8SMartin Schwidefsky * HW-bits: R read-only, I invalid 79a1c843b8SMartin Schwidefsky * SW-bits: p present, y young, d dirty, r read, w write, s special, 80a1c843b8SMartin Schwidefsky * u unused, l large 81e5098611SMartin Schwidefsky */ 82*d08de8e2SGerald Schaefer if (present) { 83*d08de8e2SGerald Schaefer pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; 84152125b7SMartin Schwidefsky pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; 85*d08de8e2SGerald Schaefer pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4; 86*d08de8e2SGerald Schaefer pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4; 87*d08de8e2SGerald Schaefer pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5; 88*d08de8e2SGerald Schaefer pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT); 89*d08de8e2SGerald Schaefer pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10; 90*d08de8e2SGerald Schaefer pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10; 91*d08de8e2SGerald Schaefer pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13; 92e5098611SMartin Schwidefsky } else 93e5098611SMartin Schwidefsky pte_val(pte) = _PAGE_INVALID; 94e5098611SMartin Schwidefsky return pte; 95e5098611SMartin Schwidefsky } 96e5098611SMartin Schwidefsky 97e5098611SMartin Schwidefsky void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 98e5098611SMartin Schwidefsky pte_t *ptep, pte_t pte) 99e5098611SMartin Schwidefsky { 100*d08de8e2SGerald Schaefer unsigned long rste = __pte_to_rste(pte); 101e5098611SMartin Schwidefsky 102*d08de8e2SGerald Schaefer /* Set correct table type for 2G hugepages */ 103*d08de8e2SGerald Schaefer if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 104*d08de8e2SGerald Schaefer rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; 105*d08de8e2SGerald Schaefer else 106*d08de8e2SGerald Schaefer rste |= _SEGMENT_ENTRY_LARGE; 107*d08de8e2SGerald Schaefer pte_val(*ptep) = rste; 108e5098611SMartin Schwidefsky } 109e5098611SMartin Schwidefsky 110e5098611SMartin Schwidefsky pte_t huge_ptep_get(pte_t *ptep) 111e5098611SMartin Schwidefsky { 112*d08de8e2SGerald Schaefer return __rste_to_pte(pte_val(*ptep)); 113e5098611SMartin Schwidefsky } 114e5098611SMartin Schwidefsky 115e5098611SMartin Schwidefsky pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 116e5098611SMartin Schwidefsky unsigned long addr, pte_t *ptep) 117e5098611SMartin Schwidefsky { 118*d08de8e2SGerald Schaefer pte_t pte = huge_ptep_get(ptep); 119e5098611SMartin Schwidefsky pmd_t *pmdp = (pmd_t *) ptep; 120*d08de8e2SGerald Schaefer pud_t *pudp = (pud_t *) ptep; 121e5098611SMartin Schwidefsky 122*d08de8e2SGerald Schaefer if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 123*d08de8e2SGerald Schaefer pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); 124*d08de8e2SGerald Schaefer else 125*d08de8e2SGerald Schaefer pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 126*d08de8e2SGerald Schaefer return pte; 12753492b1dSGerald Schaefer } 12853492b1dSGerald Schaefer 129a5516438SAndi Kleen pte_t *huge_pte_alloc(struct mm_struct *mm, 130a5516438SAndi Kleen unsigned long addr, unsigned long sz) 13153492b1dSGerald Schaefer { 13253492b1dSGerald Schaefer pgd_t *pgdp; 13353492b1dSGerald Schaefer pud_t *pudp; 13453492b1dSGerald Schaefer pmd_t *pmdp = NULL; 13553492b1dSGerald Schaefer 13653492b1dSGerald Schaefer pgdp = pgd_offset(mm, addr); 13753492b1dSGerald Schaefer pudp = pud_alloc(mm, pgdp, addr); 138*d08de8e2SGerald Schaefer if (pudp) { 139*d08de8e2SGerald Schaefer if (sz == PUD_SIZE) 140*d08de8e2SGerald Schaefer return (pte_t *) pudp; 141*d08de8e2SGerald Schaefer else if (sz == PMD_SIZE) 14253492b1dSGerald Schaefer pmdp = pmd_alloc(mm, pudp, addr); 143*d08de8e2SGerald Schaefer } 14453492b1dSGerald Schaefer return (pte_t *) pmdp; 14553492b1dSGerald Schaefer } 14653492b1dSGerald Schaefer 14753492b1dSGerald Schaefer pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 14853492b1dSGerald Schaefer { 14953492b1dSGerald Schaefer pgd_t *pgdp; 15053492b1dSGerald Schaefer pud_t *pudp; 15153492b1dSGerald Schaefer pmd_t *pmdp = NULL; 15253492b1dSGerald Schaefer 15353492b1dSGerald Schaefer pgdp = pgd_offset(mm, addr); 15453492b1dSGerald Schaefer if (pgd_present(*pgdp)) { 15553492b1dSGerald Schaefer pudp = pud_offset(pgdp, addr); 156*d08de8e2SGerald Schaefer if (pud_present(*pudp)) { 157*d08de8e2SGerald Schaefer if (pud_large(*pudp)) 158*d08de8e2SGerald Schaefer return (pte_t *) pudp; 15953492b1dSGerald Schaefer pmdp = pmd_offset(pudp, addr); 16053492b1dSGerald Schaefer } 161*d08de8e2SGerald Schaefer } 16253492b1dSGerald Schaefer return (pte_t *) pmdp; 16353492b1dSGerald Schaefer } 16453492b1dSGerald Schaefer 16553492b1dSGerald Schaefer int pmd_huge(pmd_t pmd) 16653492b1dSGerald Schaefer { 167cbd7d9c2SDominik Dingel return pmd_large(pmd); 16853492b1dSGerald Schaefer } 16953492b1dSGerald Schaefer 170ceb86879SAndi Kleen int pud_huge(pud_t pud) 171ceb86879SAndi Kleen { 172*d08de8e2SGerald Schaefer return pud_large(pud); 173*d08de8e2SGerald Schaefer } 174*d08de8e2SGerald Schaefer 175*d08de8e2SGerald Schaefer struct page * 176*d08de8e2SGerald Schaefer follow_huge_pud(struct mm_struct *mm, unsigned long address, 177*d08de8e2SGerald Schaefer pud_t *pud, int flags) 178*d08de8e2SGerald Schaefer { 179*d08de8e2SGerald Schaefer if (flags & FOLL_GET) 180*d08de8e2SGerald Schaefer return NULL; 181*d08de8e2SGerald Schaefer 182*d08de8e2SGerald Schaefer return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); 183*d08de8e2SGerald Schaefer } 184*d08de8e2SGerald Schaefer 185*d08de8e2SGerald Schaefer static __init int setup_hugepagesz(char *opt) 186*d08de8e2SGerald Schaefer { 187*d08de8e2SGerald Schaefer unsigned long size; 188*d08de8e2SGerald Schaefer char *string = opt; 189*d08de8e2SGerald Schaefer 190*d08de8e2SGerald Schaefer size = memparse(opt, &opt); 191*d08de8e2SGerald Schaefer if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { 192*d08de8e2SGerald Schaefer hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 193*d08de8e2SGerald Schaefer } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { 194*d08de8e2SGerald Schaefer hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 195*d08de8e2SGerald Schaefer } else { 196*d08de8e2SGerald Schaefer pr_err("hugepagesz= specifies an unsupported page size %s\n", 197*d08de8e2SGerald Schaefer string); 198ceb86879SAndi Kleen return 0; 199ceb86879SAndi Kleen } 200*d08de8e2SGerald Schaefer return 1; 201*d08de8e2SGerald Schaefer } 202*d08de8e2SGerald Schaefer __setup("hugepagesz=", setup_hugepagesz); 203