1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * IBM System z Huge TLB Page Support for Kernel. 4 * 5 * Copyright IBM Corp. 2007,2016 6 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> 7 */ 8 9 #define KMSG_COMPONENT "hugetlb" 10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 12 #include <linux/mm.h> 13 #include <linux/hugetlb.h> 14 15 /* 16 * If the bit selected by single-bit bitmask "a" is set within "x", move 17 * it to the position indicated by single-bit bitmask "b". 18 */ 19 #define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b)) 20 21 static inline unsigned long __pte_to_rste(pte_t pte) 22 { 23 unsigned long rste; 24 25 /* 26 * Convert encoding pte bits pmd / pud bits 27 * lIR.uswrdy.p dy..R...I...wr 28 * empty 010.000000.0 -> 00..0...1...00 29 * prot-none, clean, old 111.000000.1 -> 00..1...1...00 30 * prot-none, clean, young 111.000001.1 -> 01..1...1...00 31 * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 32 * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 33 * read-only, clean, old 111.000100.1 -> 00..1...1...01 34 * read-only, clean, young 101.000101.1 -> 01..1...0...01 35 * read-only, dirty, old 111.000110.1 -> 10..1...1...01 36 * read-only, dirty, young 101.000111.1 -> 11..1...0...01 37 * read-write, clean, old 111.001100.1 -> 00..1...1...11 38 * read-write, clean, young 101.001101.1 -> 01..1...0...11 39 * read-write, dirty, old 110.001110.1 -> 10..0...1...11 40 * read-write, dirty, young 100.001111.1 -> 11..0...0...11 41 * HW-bits: R read-only, I invalid 42 * SW-bits: p present, y young, d dirty, r read, w write, s special, 43 * u unused, l large 44 */ 45 if (pte_present(pte)) { 46 rste = pte_val(pte) & PAGE_MASK; 47 rste |= move_set_bit(pte_val(pte), _PAGE_READ, 48 _SEGMENT_ENTRY_READ); 49 rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, 50 _SEGMENT_ENTRY_WRITE); 51 rste |= move_set_bit(pte_val(pte), _PAGE_INVALID, 52 _SEGMENT_ENTRY_INVALID); 53 rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT, 54 _SEGMENT_ENTRY_PROTECT); 55 rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY, 56 _SEGMENT_ENTRY_DIRTY); 57 rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG, 58 _SEGMENT_ENTRY_YOUNG); 59 #ifdef CONFIG_MEM_SOFT_DIRTY 60 rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY, 61 _SEGMENT_ENTRY_SOFT_DIRTY); 62 #endif 63 rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC, 64 _SEGMENT_ENTRY_NOEXEC); 65 } else 66 rste = _SEGMENT_ENTRY_EMPTY; 67 return rste; 68 } 69 70 static inline pte_t __rste_to_pte(unsigned long rste) 71 { 72 int present; 73 pte_t pte; 74 75 if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 76 present = pud_present(__pud(rste)); 77 else 78 present = pmd_present(__pmd(rste)); 79 80 /* 81 * Convert encoding pmd / pud bits pte bits 82 * dy..R...I...wr lIR.uswrdy.p 83 * empty 00..0...1...00 -> 010.000000.0 84 * prot-none, clean, old 00..1...1...00 -> 111.000000.1 85 * prot-none, clean, young 01..1...1...00 -> 111.000001.1 86 * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 87 * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 88 * read-only, clean, old 00..1...1...01 -> 111.000100.1 89 * read-only, clean, young 01..1...0...01 -> 101.000101.1 90 * read-only, dirty, old 10..1...1...01 -> 111.000110.1 91 * read-only, dirty, young 11..1...0...01 -> 101.000111.1 92 * read-write, clean, old 00..1...1...11 -> 111.001100.1 93 * read-write, clean, young 01..1...0...11 -> 101.001101.1 94 * read-write, dirty, old 10..0...1...11 -> 110.001110.1 95 * read-write, dirty, young 11..0...0...11 -> 100.001111.1 96 * HW-bits: R read-only, I invalid 97 * SW-bits: p present, y young, d dirty, r read, w write, s special, 98 * u unused, l large 99 */ 100 if (present) { 101 pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; 102 pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; 103 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ, 104 _PAGE_READ); 105 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, 106 _PAGE_WRITE); 107 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, 108 _PAGE_INVALID); 109 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, 110 _PAGE_PROTECT); 111 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, 112 _PAGE_DIRTY); 113 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, 114 _PAGE_YOUNG); 115 #ifdef CONFIG_MEM_SOFT_DIRTY 116 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, 117 _PAGE_DIRTY); 118 #endif 119 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, 120 _PAGE_NOEXEC); 121 } else 122 pte_val(pte) = _PAGE_INVALID; 123 return pte; 124 } 125 126 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 127 pte_t *ptep, pte_t pte) 128 { 129 unsigned long rste; 130 131 rste = __pte_to_rste(pte); 132 if (!MACHINE_HAS_NX) 133 rste &= ~_SEGMENT_ENTRY_NOEXEC; 134 135 /* Set correct table type for 2G hugepages */ 136 if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 137 rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; 138 else 139 rste |= _SEGMENT_ENTRY_LARGE; 140 pte_val(*ptep) = rste; 141 } 142 143 pte_t huge_ptep_get(pte_t *ptep) 144 { 145 return __rste_to_pte(pte_val(*ptep)); 146 } 147 148 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 149 unsigned long addr, pte_t *ptep) 150 { 151 pte_t pte = huge_ptep_get(ptep); 152 pmd_t *pmdp = (pmd_t *) ptep; 153 pud_t *pudp = (pud_t *) ptep; 154 155 if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 156 pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); 157 else 158 pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 159 return pte; 160 } 161 162 pte_t *huge_pte_alloc(struct mm_struct *mm, 163 unsigned long addr, unsigned long sz) 164 { 165 pgd_t *pgdp; 166 p4d_t *p4dp; 167 pud_t *pudp; 168 pmd_t *pmdp = NULL; 169 170 pgdp = pgd_offset(mm, addr); 171 p4dp = p4d_alloc(mm, pgdp, addr); 172 if (p4dp) { 173 pudp = pud_alloc(mm, p4dp, addr); 174 if (pudp) { 175 if (sz == PUD_SIZE) 176 return (pte_t *) pudp; 177 else if (sz == PMD_SIZE) 178 pmdp = pmd_alloc(mm, pudp, addr); 179 } 180 } 181 return (pte_t *) pmdp; 182 } 183 184 pte_t *huge_pte_offset(struct mm_struct *mm, 185 unsigned long addr, unsigned long sz) 186 { 187 pgd_t *pgdp; 188 p4d_t *p4dp; 189 pud_t *pudp; 190 pmd_t *pmdp = NULL; 191 192 pgdp = pgd_offset(mm, addr); 193 if (pgd_present(*pgdp)) { 194 p4dp = p4d_offset(pgdp, addr); 195 if (p4d_present(*p4dp)) { 196 pudp = pud_offset(p4dp, addr); 197 if (pud_present(*pudp)) { 198 if (pud_large(*pudp)) 199 return (pte_t *) pudp; 200 pmdp = pmd_offset(pudp, addr); 201 } 202 } 203 } 204 return (pte_t *) pmdp; 205 } 206 207 int pmd_huge(pmd_t pmd) 208 { 209 return pmd_large(pmd); 210 } 211 212 int pud_huge(pud_t pud) 213 { 214 return pud_large(pud); 215 } 216 217 struct page * 218 follow_huge_pud(struct mm_struct *mm, unsigned long address, 219 pud_t *pud, int flags) 220 { 221 if (flags & FOLL_GET) 222 return NULL; 223 224 return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); 225 } 226 227 static __init int setup_hugepagesz(char *opt) 228 { 229 unsigned long size; 230 char *string = opt; 231 232 size = memparse(opt, &opt); 233 if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { 234 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 235 } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { 236 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 237 } else { 238 hugetlb_bad_size(); 239 pr_err("hugepagesz= specifies an unsupported page size %s\n", 240 string); 241 return 0; 242 } 243 return 1; 244 } 245 __setup("hugepagesz=", setup_hugepagesz); 246