153492b1dSGerald Schaefer /* 253492b1dSGerald Schaefer * IBM System z Huge TLB Page Support for Kernel. 353492b1dSGerald Schaefer * 4d08de8e2SGerald Schaefer * Copyright IBM Corp. 2007,2016 553492b1dSGerald Schaefer * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> 653492b1dSGerald Schaefer */ 753492b1dSGerald Schaefer 8d08de8e2SGerald Schaefer #define KMSG_COMPONENT "hugetlb" 9d08de8e2SGerald Schaefer #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 10d08de8e2SGerald Schaefer 1153492b1dSGerald Schaefer #include <linux/mm.h> 1253492b1dSGerald Schaefer #include <linux/hugetlb.h> 1353492b1dSGerald Schaefer 14bc29b7acSGerald Schaefer /* 15bc29b7acSGerald Schaefer * If the bit selected by single-bit bitmask "a" is set within "x", move 16bc29b7acSGerald Schaefer * it to the position indicated by single-bit bitmask "b". 17bc29b7acSGerald Schaefer */ 18bc29b7acSGerald Schaefer #define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b)) 19bc29b7acSGerald Schaefer 20d08de8e2SGerald Schaefer static inline unsigned long __pte_to_rste(pte_t pte) 2153492b1dSGerald Schaefer { 22d08de8e2SGerald Schaefer unsigned long rste; 2353492b1dSGerald Schaefer 24e5098611SMartin Schwidefsky /* 25d08de8e2SGerald Schaefer * Convert encoding pte bits pmd / pud bits 26a1c843b8SMartin Schwidefsky * lIR.uswrdy.p dy..R...I...wr 27a1c843b8SMartin Schwidefsky * empty 010.000000.0 -> 00..0...1...00 28a1c843b8SMartin Schwidefsky * prot-none, clean, old 111.000000.1 -> 00..1...1...00 29a1c843b8SMartin Schwidefsky * prot-none, clean, young 111.000001.1 -> 01..1...1...00 30a1c843b8SMartin Schwidefsky * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 31a1c843b8SMartin Schwidefsky * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 32a1c843b8SMartin Schwidefsky * read-only, clean, old 111.000100.1 -> 00..1...1...01 33a1c843b8SMartin Schwidefsky * read-only, clean, young 101.000101.1 -> 01..1...0...01 34a1c843b8SMartin Schwidefsky * read-only, dirty, old 111.000110.1 -> 10..1...1...01 35a1c843b8SMartin Schwidefsky * read-only, dirty, young 101.000111.1 -> 11..1...0...01 36a1c843b8SMartin Schwidefsky * read-write, clean, old 111.001100.1 -> 00..1...1...11 37a1c843b8SMartin Schwidefsky * read-write, clean, young 101.001101.1 -> 01..1...0...11 38a1c843b8SMartin Schwidefsky * read-write, dirty, old 110.001110.1 -> 10..0...1...11 39a1c843b8SMartin Schwidefsky * read-write, dirty, young 100.001111.1 -> 11..0...0...11 40a1c843b8SMartin Schwidefsky * HW-bits: R read-only, I invalid 41a1c843b8SMartin Schwidefsky * SW-bits: p present, y young, d dirty, r read, w write, s special, 42a1c843b8SMartin Schwidefsky * u unused, l large 43e5098611SMartin Schwidefsky */ 44e5098611SMartin Schwidefsky if (pte_present(pte)) { 45d08de8e2SGerald Schaefer rste = pte_val(pte) & PAGE_MASK; 46bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_READ, 47bc29b7acSGerald Schaefer _SEGMENT_ENTRY_READ); 48bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, 49bc29b7acSGerald Schaefer _SEGMENT_ENTRY_WRITE); 50bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_INVALID, 51bc29b7acSGerald Schaefer _SEGMENT_ENTRY_INVALID); 52bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT, 53bc29b7acSGerald Schaefer _SEGMENT_ENTRY_PROTECT); 54bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY, 55bc29b7acSGerald Schaefer _SEGMENT_ENTRY_DIRTY); 56bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG, 57bc29b7acSGerald Schaefer _SEGMENT_ENTRY_YOUNG); 58bc29b7acSGerald Schaefer #ifdef CONFIG_MEM_SOFT_DIRTY 59bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY, 60bc29b7acSGerald Schaefer _SEGMENT_ENTRY_SOFT_DIRTY); 61bc29b7acSGerald Schaefer #endif 6257d7f939SMartin Schwidefsky rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC, 6357d7f939SMartin Schwidefsky _SEGMENT_ENTRY_NOEXEC); 64e5098611SMartin Schwidefsky } else 6554397bb0SDominik Dingel rste = _SEGMENT_ENTRY_EMPTY; 66d08de8e2SGerald Schaefer return rste; 6753492b1dSGerald Schaefer } 6853492b1dSGerald Schaefer 69d08de8e2SGerald Schaefer static inline pte_t __rste_to_pte(unsigned long rste) 70e5098611SMartin Schwidefsky { 71d08de8e2SGerald Schaefer int present; 72e5098611SMartin Schwidefsky pte_t pte; 73e5098611SMartin Schwidefsky 74d08de8e2SGerald Schaefer if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 75d08de8e2SGerald Schaefer present = pud_present(__pud(rste)); 76d08de8e2SGerald Schaefer else 77d08de8e2SGerald Schaefer present = pmd_present(__pmd(rste)); 78d08de8e2SGerald Schaefer 79e5098611SMartin Schwidefsky /* 80d08de8e2SGerald Schaefer * Convert encoding pmd / pud bits pte bits 81a1c843b8SMartin Schwidefsky * dy..R...I...wr lIR.uswrdy.p 82a1c843b8SMartin Schwidefsky * empty 00..0...1...00 -> 010.000000.0 83a1c843b8SMartin Schwidefsky * prot-none, clean, old 00..1...1...00 -> 111.000000.1 84a1c843b8SMartin Schwidefsky * prot-none, clean, young 01..1...1...00 -> 111.000001.1 85a1c843b8SMartin Schwidefsky * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 86a1c843b8SMartin Schwidefsky * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 87a1c843b8SMartin Schwidefsky * read-only, clean, old 00..1...1...01 -> 111.000100.1 88a1c843b8SMartin Schwidefsky * read-only, clean, young 01..1...0...01 -> 101.000101.1 89a1c843b8SMartin Schwidefsky * read-only, dirty, old 10..1...1...01 -> 111.000110.1 90a1c843b8SMartin Schwidefsky * read-only, dirty, young 11..1...0...01 -> 101.000111.1 91a1c843b8SMartin Schwidefsky * read-write, clean, old 00..1...1...11 -> 111.001100.1 92a1c843b8SMartin Schwidefsky * read-write, clean, young 01..1...0...11 -> 101.001101.1 93a1c843b8SMartin Schwidefsky * read-write, dirty, old 10..0...1...11 -> 110.001110.1 94a1c843b8SMartin Schwidefsky * read-write, dirty, young 11..0...0...11 -> 100.001111.1 95a1c843b8SMartin Schwidefsky * HW-bits: R read-only, I invalid 96a1c843b8SMartin Schwidefsky * SW-bits: p present, y young, d dirty, r read, w write, s special, 97a1c843b8SMartin Schwidefsky * u unused, l large 98e5098611SMartin Schwidefsky */ 99d08de8e2SGerald Schaefer if (present) { 100d08de8e2SGerald Schaefer pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; 101152125b7SMartin Schwidefsky pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; 102bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ, 103bc29b7acSGerald Schaefer _PAGE_READ); 104bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, 105bc29b7acSGerald Schaefer _PAGE_WRITE); 106bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, 107bc29b7acSGerald Schaefer _PAGE_INVALID); 108bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, 109bc29b7acSGerald Schaefer _PAGE_PROTECT); 110bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, 111bc29b7acSGerald Schaefer _PAGE_DIRTY); 112bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, 113bc29b7acSGerald Schaefer _PAGE_YOUNG); 114bc29b7acSGerald Schaefer #ifdef CONFIG_MEM_SOFT_DIRTY 115bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, 116bc29b7acSGerald Schaefer _PAGE_DIRTY); 117bc29b7acSGerald Schaefer #endif 11857d7f939SMartin Schwidefsky pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, 11957d7f939SMartin Schwidefsky _PAGE_NOEXEC); 120e5098611SMartin Schwidefsky } else 121e5098611SMartin Schwidefsky pte_val(pte) = _PAGE_INVALID; 122e5098611SMartin Schwidefsky return pte; 123e5098611SMartin Schwidefsky } 124e5098611SMartin Schwidefsky 125e5098611SMartin Schwidefsky void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 126e5098611SMartin Schwidefsky pte_t *ptep, pte_t pte) 127e5098611SMartin Schwidefsky { 12857d7f939SMartin Schwidefsky unsigned long rste; 12957d7f939SMartin Schwidefsky 13057d7f939SMartin Schwidefsky rste = __pte_to_rste(pte); 13157d7f939SMartin Schwidefsky if (!MACHINE_HAS_NX) 13257d7f939SMartin Schwidefsky rste &= ~_SEGMENT_ENTRY_NOEXEC; 133e5098611SMartin Schwidefsky 134d08de8e2SGerald Schaefer /* Set correct table type for 2G hugepages */ 135d08de8e2SGerald Schaefer if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 136d08de8e2SGerald Schaefer rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; 137d08de8e2SGerald Schaefer else 138d08de8e2SGerald Schaefer rste |= _SEGMENT_ENTRY_LARGE; 139d08de8e2SGerald Schaefer pte_val(*ptep) = rste; 140e5098611SMartin Schwidefsky } 141e5098611SMartin Schwidefsky 142e5098611SMartin Schwidefsky pte_t huge_ptep_get(pte_t *ptep) 143e5098611SMartin Schwidefsky { 144d08de8e2SGerald Schaefer return __rste_to_pte(pte_val(*ptep)); 145e5098611SMartin Schwidefsky } 146e5098611SMartin Schwidefsky 147e5098611SMartin Schwidefsky pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 148e5098611SMartin Schwidefsky unsigned long addr, pte_t *ptep) 149e5098611SMartin Schwidefsky { 150d08de8e2SGerald Schaefer pte_t pte = huge_ptep_get(ptep); 151e5098611SMartin Schwidefsky pmd_t *pmdp = (pmd_t *) ptep; 152d08de8e2SGerald Schaefer pud_t *pudp = (pud_t *) ptep; 153e5098611SMartin Schwidefsky 154d08de8e2SGerald Schaefer if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 155d08de8e2SGerald Schaefer pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); 156d08de8e2SGerald Schaefer else 157d08de8e2SGerald Schaefer pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 158d08de8e2SGerald Schaefer return pte; 15953492b1dSGerald Schaefer } 16053492b1dSGerald Schaefer 161a5516438SAndi Kleen pte_t *huge_pte_alloc(struct mm_struct *mm, 162a5516438SAndi Kleen unsigned long addr, unsigned long sz) 16353492b1dSGerald Schaefer { 16453492b1dSGerald Schaefer pgd_t *pgdp; 165*1aea9b3fSMartin Schwidefsky p4d_t *p4dp; 16653492b1dSGerald Schaefer pud_t *pudp; 16753492b1dSGerald Schaefer pmd_t *pmdp = NULL; 16853492b1dSGerald Schaefer 16953492b1dSGerald Schaefer pgdp = pgd_offset(mm, addr); 170*1aea9b3fSMartin Schwidefsky p4dp = p4d_alloc(mm, pgdp, addr); 171*1aea9b3fSMartin Schwidefsky if (p4dp) { 172*1aea9b3fSMartin Schwidefsky pudp = pud_alloc(mm, p4dp, addr); 173d08de8e2SGerald Schaefer if (pudp) { 174d08de8e2SGerald Schaefer if (sz == PUD_SIZE) 175d08de8e2SGerald Schaefer return (pte_t *) pudp; 176d08de8e2SGerald Schaefer else if (sz == PMD_SIZE) 17753492b1dSGerald Schaefer pmdp = pmd_alloc(mm, pudp, addr); 178d08de8e2SGerald Schaefer } 179*1aea9b3fSMartin Schwidefsky } 18053492b1dSGerald Schaefer return (pte_t *) pmdp; 18153492b1dSGerald Schaefer } 18253492b1dSGerald Schaefer 18353492b1dSGerald Schaefer pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 18453492b1dSGerald Schaefer { 18553492b1dSGerald Schaefer pgd_t *pgdp; 186*1aea9b3fSMartin Schwidefsky p4d_t *p4dp; 18753492b1dSGerald Schaefer pud_t *pudp; 18853492b1dSGerald Schaefer pmd_t *pmdp = NULL; 18953492b1dSGerald Schaefer 19053492b1dSGerald Schaefer pgdp = pgd_offset(mm, addr); 19153492b1dSGerald Schaefer if (pgd_present(*pgdp)) { 192*1aea9b3fSMartin Schwidefsky p4dp = p4d_offset(pgdp, addr); 193*1aea9b3fSMartin Schwidefsky if (p4d_present(*p4dp)) { 194*1aea9b3fSMartin Schwidefsky pudp = pud_offset(p4dp, addr); 195d08de8e2SGerald Schaefer if (pud_present(*pudp)) { 196d08de8e2SGerald Schaefer if (pud_large(*pudp)) 197d08de8e2SGerald Schaefer return (pte_t *) pudp; 19853492b1dSGerald Schaefer pmdp = pmd_offset(pudp, addr); 19953492b1dSGerald Schaefer } 200d08de8e2SGerald Schaefer } 201*1aea9b3fSMartin Schwidefsky } 20253492b1dSGerald Schaefer return (pte_t *) pmdp; 20353492b1dSGerald Schaefer } 20453492b1dSGerald Schaefer 20553492b1dSGerald Schaefer int pmd_huge(pmd_t pmd) 20653492b1dSGerald Schaefer { 207cbd7d9c2SDominik Dingel return pmd_large(pmd); 20853492b1dSGerald Schaefer } 20953492b1dSGerald Schaefer 210ceb86879SAndi Kleen int pud_huge(pud_t pud) 211ceb86879SAndi Kleen { 212d08de8e2SGerald Schaefer return pud_large(pud); 213d08de8e2SGerald Schaefer } 214d08de8e2SGerald Schaefer 215d08de8e2SGerald Schaefer struct page * 216d08de8e2SGerald Schaefer follow_huge_pud(struct mm_struct *mm, unsigned long address, 217d08de8e2SGerald Schaefer pud_t *pud, int flags) 218d08de8e2SGerald Schaefer { 219d08de8e2SGerald Schaefer if (flags & FOLL_GET) 220d08de8e2SGerald Schaefer return NULL; 221d08de8e2SGerald Schaefer 222d08de8e2SGerald Schaefer return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); 223d08de8e2SGerald Schaefer } 224d08de8e2SGerald Schaefer 225d08de8e2SGerald Schaefer static __init int setup_hugepagesz(char *opt) 226d08de8e2SGerald Schaefer { 227d08de8e2SGerald Schaefer unsigned long size; 228d08de8e2SGerald Schaefer char *string = opt; 229d08de8e2SGerald Schaefer 230d08de8e2SGerald Schaefer size = memparse(opt, &opt); 231d08de8e2SGerald Schaefer if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { 232d08de8e2SGerald Schaefer hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 233d08de8e2SGerald Schaefer } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { 234d08de8e2SGerald Schaefer hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 235d08de8e2SGerald Schaefer } else { 236b5003b5fSShyam Saini hugetlb_bad_size(); 237d08de8e2SGerald Schaefer pr_err("hugepagesz= specifies an unsupported page size %s\n", 238d08de8e2SGerald Schaefer string); 239ceb86879SAndi Kleen return 0; 240ceb86879SAndi Kleen } 241d08de8e2SGerald Schaefer return 1; 242d08de8e2SGerald Schaefer } 243d08de8e2SGerald Schaefer __setup("hugepagesz=", setup_hugepagesz); 244