153492b1dSGerald Schaefer /* 253492b1dSGerald Schaefer * IBM System z Huge TLB Page Support for Kernel. 353492b1dSGerald Schaefer * 4d08de8e2SGerald Schaefer * Copyright IBM Corp. 2007,2016 553492b1dSGerald Schaefer * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> 653492b1dSGerald Schaefer */ 753492b1dSGerald Schaefer 8d08de8e2SGerald Schaefer #define KMSG_COMPONENT "hugetlb" 9d08de8e2SGerald Schaefer #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 10d08de8e2SGerald Schaefer 1153492b1dSGerald Schaefer #include <linux/mm.h> 1253492b1dSGerald Schaefer #include <linux/hugetlb.h> 1353492b1dSGerald Schaefer 14bc29b7acSGerald Schaefer /* 15bc29b7acSGerald Schaefer * If the bit selected by single-bit bitmask "a" is set within "x", move 16bc29b7acSGerald Schaefer * it to the position indicated by single-bit bitmask "b". 17bc29b7acSGerald Schaefer */ 18bc29b7acSGerald Schaefer #define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b)) 19bc29b7acSGerald Schaefer 20d08de8e2SGerald Schaefer static inline unsigned long __pte_to_rste(pte_t pte) 2153492b1dSGerald Schaefer { 22d08de8e2SGerald Schaefer unsigned long rste; 2353492b1dSGerald Schaefer 24e5098611SMartin Schwidefsky /* 25d08de8e2SGerald Schaefer * Convert encoding pte bits pmd / pud bits 26a1c843b8SMartin Schwidefsky * lIR.uswrdy.p dy..R...I...wr 27a1c843b8SMartin Schwidefsky * empty 010.000000.0 -> 00..0...1...00 28a1c843b8SMartin Schwidefsky * prot-none, clean, old 111.000000.1 -> 00..1...1...00 29a1c843b8SMartin Schwidefsky * prot-none, clean, young 111.000001.1 -> 01..1...1...00 30a1c843b8SMartin Schwidefsky * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 31a1c843b8SMartin Schwidefsky * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 32a1c843b8SMartin Schwidefsky * read-only, clean, old 111.000100.1 -> 00..1...1...01 33a1c843b8SMartin Schwidefsky * read-only, clean, young 101.000101.1 -> 01..1...0...01 34a1c843b8SMartin Schwidefsky * read-only, dirty, old 111.000110.1 -> 10..1...1...01 35a1c843b8SMartin Schwidefsky * read-only, dirty, young 101.000111.1 -> 11..1...0...01 36a1c843b8SMartin Schwidefsky * read-write, clean, old 111.001100.1 -> 00..1...1...11 37a1c843b8SMartin Schwidefsky * read-write, clean, young 101.001101.1 -> 01..1...0...11 38a1c843b8SMartin Schwidefsky * read-write, dirty, old 110.001110.1 -> 10..0...1...11 39a1c843b8SMartin Schwidefsky * read-write, dirty, young 100.001111.1 -> 11..0...0...11 40a1c843b8SMartin Schwidefsky * HW-bits: R read-only, I invalid 41a1c843b8SMartin Schwidefsky * SW-bits: p present, y young, d dirty, r read, w write, s special, 42a1c843b8SMartin Schwidefsky * u unused, l large 43e5098611SMartin Schwidefsky */ 44e5098611SMartin Schwidefsky if (pte_present(pte)) { 45d08de8e2SGerald Schaefer rste = pte_val(pte) & PAGE_MASK; 46bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_READ, 47bc29b7acSGerald Schaefer _SEGMENT_ENTRY_READ); 48bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, 49bc29b7acSGerald Schaefer _SEGMENT_ENTRY_WRITE); 50bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_INVALID, 51bc29b7acSGerald Schaefer _SEGMENT_ENTRY_INVALID); 52bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT, 53bc29b7acSGerald Schaefer _SEGMENT_ENTRY_PROTECT); 54bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY, 55bc29b7acSGerald Schaefer _SEGMENT_ENTRY_DIRTY); 56bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG, 57bc29b7acSGerald Schaefer _SEGMENT_ENTRY_YOUNG); 58bc29b7acSGerald Schaefer #ifdef CONFIG_MEM_SOFT_DIRTY 59bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY, 60bc29b7acSGerald Schaefer _SEGMENT_ENTRY_SOFT_DIRTY); 61bc29b7acSGerald Schaefer #endif 62*57d7f939SMartin Schwidefsky rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC, 63*57d7f939SMartin Schwidefsky _SEGMENT_ENTRY_NOEXEC); 64e5098611SMartin Schwidefsky } else 65d08de8e2SGerald Schaefer rste = _SEGMENT_ENTRY_INVALID; 66d08de8e2SGerald Schaefer return rste; 6753492b1dSGerald Schaefer } 6853492b1dSGerald Schaefer 69d08de8e2SGerald Schaefer static inline pte_t __rste_to_pte(unsigned long rste) 70e5098611SMartin Schwidefsky { 71d08de8e2SGerald Schaefer int present; 72e5098611SMartin Schwidefsky pte_t pte; 73e5098611SMartin Schwidefsky 74d08de8e2SGerald Schaefer if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 75d08de8e2SGerald Schaefer present = pud_present(__pud(rste)); 76d08de8e2SGerald Schaefer else 77d08de8e2SGerald Schaefer present = pmd_present(__pmd(rste)); 78d08de8e2SGerald Schaefer 79e5098611SMartin Schwidefsky /* 80d08de8e2SGerald Schaefer * Convert encoding pmd / pud bits pte bits 81a1c843b8SMartin Schwidefsky * dy..R...I...wr lIR.uswrdy.p 82a1c843b8SMartin Schwidefsky * empty 00..0...1...00 -> 010.000000.0 83a1c843b8SMartin Schwidefsky * prot-none, clean, old 00..1...1...00 -> 111.000000.1 84a1c843b8SMartin Schwidefsky * prot-none, clean, young 01..1...1...00 -> 111.000001.1 85a1c843b8SMartin Schwidefsky * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 86a1c843b8SMartin Schwidefsky * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 87a1c843b8SMartin Schwidefsky * read-only, clean, old 00..1...1...01 -> 111.000100.1 88a1c843b8SMartin Schwidefsky * read-only, clean, young 01..1...0...01 -> 101.000101.1 89a1c843b8SMartin Schwidefsky * read-only, dirty, old 10..1...1...01 -> 111.000110.1 90a1c843b8SMartin Schwidefsky * read-only, dirty, young 11..1...0...01 -> 101.000111.1 91a1c843b8SMartin Schwidefsky * read-write, clean, old 00..1...1...11 -> 111.001100.1 92a1c843b8SMartin Schwidefsky * read-write, clean, young 01..1...0...11 -> 101.001101.1 93a1c843b8SMartin Schwidefsky * read-write, dirty, old 10..0...1...11 -> 110.001110.1 94a1c843b8SMartin Schwidefsky * read-write, dirty, young 11..0...0...11 -> 100.001111.1 95a1c843b8SMartin Schwidefsky * HW-bits: R read-only, I invalid 96a1c843b8SMartin Schwidefsky * SW-bits: p present, y young, d dirty, r read, w write, s special, 97a1c843b8SMartin Schwidefsky * u unused, l large 98e5098611SMartin Schwidefsky */ 99d08de8e2SGerald Schaefer if (present) { 100d08de8e2SGerald Schaefer pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; 101152125b7SMartin Schwidefsky pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; 102bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ, 103bc29b7acSGerald Schaefer _PAGE_READ); 104bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, 105bc29b7acSGerald Schaefer _PAGE_WRITE); 106bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, 107bc29b7acSGerald Schaefer _PAGE_INVALID); 108bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, 109bc29b7acSGerald Schaefer _PAGE_PROTECT); 110bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, 111bc29b7acSGerald Schaefer _PAGE_DIRTY); 112bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, 113bc29b7acSGerald Schaefer _PAGE_YOUNG); 114bc29b7acSGerald Schaefer #ifdef CONFIG_MEM_SOFT_DIRTY 115bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, 116bc29b7acSGerald Schaefer _PAGE_DIRTY); 117bc29b7acSGerald Schaefer #endif 118*57d7f939SMartin Schwidefsky pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, 119*57d7f939SMartin Schwidefsky _PAGE_NOEXEC); 120e5098611SMartin Schwidefsky } else 121e5098611SMartin Schwidefsky pte_val(pte) = _PAGE_INVALID; 122e5098611SMartin Schwidefsky return pte; 123e5098611SMartin Schwidefsky } 124e5098611SMartin Schwidefsky 125e5098611SMartin Schwidefsky void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 126e5098611SMartin Schwidefsky pte_t *ptep, pte_t pte) 127e5098611SMartin Schwidefsky { 128*57d7f939SMartin Schwidefsky unsigned long rste; 129*57d7f939SMartin Schwidefsky 130*57d7f939SMartin Schwidefsky rste = __pte_to_rste(pte); 131*57d7f939SMartin Schwidefsky if (!MACHINE_HAS_NX) 132*57d7f939SMartin Schwidefsky rste &= ~_SEGMENT_ENTRY_NOEXEC; 133e5098611SMartin Schwidefsky 134d08de8e2SGerald Schaefer /* Set correct table type for 2G hugepages */ 135d08de8e2SGerald Schaefer if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 136d08de8e2SGerald Schaefer rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; 137d08de8e2SGerald Schaefer else 138d08de8e2SGerald Schaefer rste |= _SEGMENT_ENTRY_LARGE; 139d08de8e2SGerald Schaefer pte_val(*ptep) = rste; 140e5098611SMartin Schwidefsky } 141e5098611SMartin Schwidefsky 142e5098611SMartin Schwidefsky pte_t huge_ptep_get(pte_t *ptep) 143e5098611SMartin Schwidefsky { 144d08de8e2SGerald Schaefer return __rste_to_pte(pte_val(*ptep)); 145e5098611SMartin Schwidefsky } 146e5098611SMartin Schwidefsky 147e5098611SMartin Schwidefsky pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 148e5098611SMartin Schwidefsky unsigned long addr, pte_t *ptep) 149e5098611SMartin Schwidefsky { 150d08de8e2SGerald Schaefer pte_t pte = huge_ptep_get(ptep); 151e5098611SMartin Schwidefsky pmd_t *pmdp = (pmd_t *) ptep; 152d08de8e2SGerald Schaefer pud_t *pudp = (pud_t *) ptep; 153e5098611SMartin Schwidefsky 154d08de8e2SGerald Schaefer if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 155d08de8e2SGerald Schaefer pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); 156d08de8e2SGerald Schaefer else 157d08de8e2SGerald Schaefer pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 158d08de8e2SGerald Schaefer return pte; 15953492b1dSGerald Schaefer } 16053492b1dSGerald Schaefer 161a5516438SAndi Kleen pte_t *huge_pte_alloc(struct mm_struct *mm, 162a5516438SAndi Kleen unsigned long addr, unsigned long sz) 16353492b1dSGerald Schaefer { 16453492b1dSGerald Schaefer pgd_t *pgdp; 16553492b1dSGerald Schaefer pud_t *pudp; 16653492b1dSGerald Schaefer pmd_t *pmdp = NULL; 16753492b1dSGerald Schaefer 16853492b1dSGerald Schaefer pgdp = pgd_offset(mm, addr); 16953492b1dSGerald Schaefer pudp = pud_alloc(mm, pgdp, addr); 170d08de8e2SGerald Schaefer if (pudp) { 171d08de8e2SGerald Schaefer if (sz == PUD_SIZE) 172d08de8e2SGerald Schaefer return (pte_t *) pudp; 173d08de8e2SGerald Schaefer else if (sz == PMD_SIZE) 17453492b1dSGerald Schaefer pmdp = pmd_alloc(mm, pudp, addr); 175d08de8e2SGerald Schaefer } 17653492b1dSGerald Schaefer return (pte_t *) pmdp; 17753492b1dSGerald Schaefer } 17853492b1dSGerald Schaefer 17953492b1dSGerald Schaefer pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 18053492b1dSGerald Schaefer { 18153492b1dSGerald Schaefer pgd_t *pgdp; 18253492b1dSGerald Schaefer pud_t *pudp; 18353492b1dSGerald Schaefer pmd_t *pmdp = NULL; 18453492b1dSGerald Schaefer 18553492b1dSGerald Schaefer pgdp = pgd_offset(mm, addr); 18653492b1dSGerald Schaefer if (pgd_present(*pgdp)) { 18753492b1dSGerald Schaefer pudp = pud_offset(pgdp, addr); 188d08de8e2SGerald Schaefer if (pud_present(*pudp)) { 189d08de8e2SGerald Schaefer if (pud_large(*pudp)) 190d08de8e2SGerald Schaefer return (pte_t *) pudp; 19153492b1dSGerald Schaefer pmdp = pmd_offset(pudp, addr); 19253492b1dSGerald Schaefer } 193d08de8e2SGerald Schaefer } 19453492b1dSGerald Schaefer return (pte_t *) pmdp; 19553492b1dSGerald Schaefer } 19653492b1dSGerald Schaefer 19753492b1dSGerald Schaefer int pmd_huge(pmd_t pmd) 19853492b1dSGerald Schaefer { 199cbd7d9c2SDominik Dingel return pmd_large(pmd); 20053492b1dSGerald Schaefer } 20153492b1dSGerald Schaefer 202ceb86879SAndi Kleen int pud_huge(pud_t pud) 203ceb86879SAndi Kleen { 204d08de8e2SGerald Schaefer return pud_large(pud); 205d08de8e2SGerald Schaefer } 206d08de8e2SGerald Schaefer 207d08de8e2SGerald Schaefer struct page * 208d08de8e2SGerald Schaefer follow_huge_pud(struct mm_struct *mm, unsigned long address, 209d08de8e2SGerald Schaefer pud_t *pud, int flags) 210d08de8e2SGerald Schaefer { 211d08de8e2SGerald Schaefer if (flags & FOLL_GET) 212d08de8e2SGerald Schaefer return NULL; 213d08de8e2SGerald Schaefer 214d08de8e2SGerald Schaefer return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); 215d08de8e2SGerald Schaefer } 216d08de8e2SGerald Schaefer 217d08de8e2SGerald Schaefer static __init int setup_hugepagesz(char *opt) 218d08de8e2SGerald Schaefer { 219d08de8e2SGerald Schaefer unsigned long size; 220d08de8e2SGerald Schaefer char *string = opt; 221d08de8e2SGerald Schaefer 222d08de8e2SGerald Schaefer size = memparse(opt, &opt); 223d08de8e2SGerald Schaefer if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { 224d08de8e2SGerald Schaefer hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 225d08de8e2SGerald Schaefer } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { 226d08de8e2SGerald Schaefer hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 227d08de8e2SGerald Schaefer } else { 228b5003b5fSShyam Saini hugetlb_bad_size(); 229d08de8e2SGerald Schaefer pr_err("hugepagesz= specifies an unsupported page size %s\n", 230d08de8e2SGerald Schaefer string); 231ceb86879SAndi Kleen return 0; 232ceb86879SAndi Kleen } 233d08de8e2SGerald Schaefer return 1; 234d08de8e2SGerald Schaefer } 235d08de8e2SGerald Schaefer __setup("hugepagesz=", setup_hugepagesz); 236