1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * IBM System z Huge TLB Page Support for Kernel. 4 * 5 * Copyright IBM Corp. 2007,2016 6 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> 7 */ 8 9 #define KMSG_COMPONENT "hugetlb" 10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 12 #include <linux/mm.h> 13 #include <linux/hugetlb.h> 14 15 /* 16 * If the bit selected by single-bit bitmask "a" is set within "x", move 17 * it to the position indicated by single-bit bitmask "b". 18 */ 19 #define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b)) 20 21 static inline unsigned long __pte_to_rste(pte_t pte) 22 { 23 unsigned long rste; 24 25 /* 26 * Convert encoding pte bits pmd / pud bits 27 * lIR.uswrdy.p dy..R...I...wr 28 * empty 010.000000.0 -> 00..0...1...00 29 * prot-none, clean, old 111.000000.1 -> 00..1...1...00 30 * prot-none, clean, young 111.000001.1 -> 01..1...1...00 31 * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 32 * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 33 * read-only, clean, old 111.000100.1 -> 00..1...1...01 34 * read-only, clean, young 101.000101.1 -> 01..1...0...01 35 * read-only, dirty, old 111.000110.1 -> 10..1...1...01 36 * read-only, dirty, young 101.000111.1 -> 11..1...0...01 37 * read-write, clean, old 111.001100.1 -> 00..1...1...11 38 * read-write, clean, young 101.001101.1 -> 01..1...0...11 39 * read-write, dirty, old 110.001110.1 -> 10..0...1...11 40 * read-write, dirty, young 100.001111.1 -> 11..0...0...11 41 * HW-bits: R read-only, I invalid 42 * SW-bits: p present, y young, d dirty, r read, w write, s special, 43 * u unused, l large 44 */ 45 if (pte_present(pte)) { 46 rste = pte_val(pte) & PAGE_MASK; 47 rste |= move_set_bit(pte_val(pte), _PAGE_READ, 48 _SEGMENT_ENTRY_READ); 49 rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, 50 _SEGMENT_ENTRY_WRITE); 51 rste |= move_set_bit(pte_val(pte), _PAGE_INVALID, 52 _SEGMENT_ENTRY_INVALID); 53 rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT, 54 _SEGMENT_ENTRY_PROTECT); 55 rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY, 56 _SEGMENT_ENTRY_DIRTY); 57 rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG, 58 _SEGMENT_ENTRY_YOUNG); 59 #ifdef CONFIG_MEM_SOFT_DIRTY 60 rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY, 61 _SEGMENT_ENTRY_SOFT_DIRTY); 62 #endif 63 rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC, 64 _SEGMENT_ENTRY_NOEXEC); 65 } else 66 rste = _SEGMENT_ENTRY_EMPTY; 67 return rste; 68 } 69 70 static inline pte_t __rste_to_pte(unsigned long rste) 71 { 72 int present; 73 pte_t pte; 74 75 if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 76 present = pud_present(__pud(rste)); 77 else 78 present = pmd_present(__pmd(rste)); 79 80 /* 81 * Convert encoding pmd / pud bits pte bits 82 * dy..R...I...wr lIR.uswrdy.p 83 * empty 00..0...1...00 -> 010.000000.0 84 * prot-none, clean, old 00..1...1...00 -> 111.000000.1 85 * prot-none, clean, young 01..1...1...00 -> 111.000001.1 86 * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 87 * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 88 * read-only, clean, old 00..1...1...01 -> 111.000100.1 89 * read-only, clean, young 01..1...0...01 -> 101.000101.1 90 * read-only, dirty, old 10..1...1...01 -> 111.000110.1 91 * read-only, dirty, young 11..1...0...01 -> 101.000111.1 92 * read-write, clean, old 00..1...1...11 -> 111.001100.1 93 * read-write, clean, young 01..1...0...11 -> 101.001101.1 94 * read-write, dirty, old 10..0...1...11 -> 110.001110.1 95 * read-write, dirty, young 11..0...0...11 -> 100.001111.1 96 * HW-bits: R read-only, I invalid 97 * SW-bits: p present, y young, d dirty, r read, w write, s special, 98 * u unused, l large 99 */ 100 if (present) { 101 pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; 102 pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; 103 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ, 104 _PAGE_READ); 105 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, 106 _PAGE_WRITE); 107 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, 108 _PAGE_INVALID); 109 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, 110 _PAGE_PROTECT); 111 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, 112 _PAGE_DIRTY); 113 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, 114 _PAGE_YOUNG); 115 #ifdef CONFIG_MEM_SOFT_DIRTY 116 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, 117 _PAGE_DIRTY); 118 #endif 119 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, 120 _PAGE_NOEXEC); 121 } else 122 pte_val(pte) = _PAGE_INVALID; 123 return pte; 124 } 125 126 static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) 127 { 128 struct page *page; 129 unsigned long size, paddr; 130 131 if (!mm_uses_skeys(mm) || 132 rste & _SEGMENT_ENTRY_INVALID) 133 return; 134 135 if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { 136 page = pud_page(__pud(rste)); 137 size = PUD_SIZE; 138 paddr = rste & PUD_MASK; 139 } else { 140 page = pmd_page(__pmd(rste)); 141 size = PMD_SIZE; 142 paddr = rste & PMD_MASK; 143 } 144 145 if (!test_and_set_bit(PG_arch_1, &page->flags)) 146 __storage_key_init_range(paddr, paddr + size - 1); 147 } 148 149 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 150 pte_t *ptep, pte_t pte) 151 { 152 unsigned long rste; 153 154 rste = __pte_to_rste(pte); 155 if (!MACHINE_HAS_NX) 156 rste &= ~_SEGMENT_ENTRY_NOEXEC; 157 158 /* Set correct table type for 2G hugepages */ 159 if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 160 rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; 161 else 162 rste |= _SEGMENT_ENTRY_LARGE; 163 clear_huge_pte_skeys(mm, rste); 164 pte_val(*ptep) = rste; 165 } 166 167 pte_t huge_ptep_get(pte_t *ptep) 168 { 169 return __rste_to_pte(pte_val(*ptep)); 170 } 171 172 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 173 unsigned long addr, pte_t *ptep) 174 { 175 pte_t pte = huge_ptep_get(ptep); 176 pmd_t *pmdp = (pmd_t *) ptep; 177 pud_t *pudp = (pud_t *) ptep; 178 179 if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 180 pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); 181 else 182 pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 183 return pte; 184 } 185 186 pte_t *huge_pte_alloc(struct mm_struct *mm, 187 unsigned long addr, unsigned long sz) 188 { 189 pgd_t *pgdp; 190 p4d_t *p4dp; 191 pud_t *pudp; 192 pmd_t *pmdp = NULL; 193 194 pgdp = pgd_offset(mm, addr); 195 p4dp = p4d_alloc(mm, pgdp, addr); 196 if (p4dp) { 197 pudp = pud_alloc(mm, p4dp, addr); 198 if (pudp) { 199 if (sz == PUD_SIZE) 200 return (pte_t *) pudp; 201 else if (sz == PMD_SIZE) 202 pmdp = pmd_alloc(mm, pudp, addr); 203 } 204 } 205 return (pte_t *) pmdp; 206 } 207 208 pte_t *huge_pte_offset(struct mm_struct *mm, 209 unsigned long addr, unsigned long sz) 210 { 211 pgd_t *pgdp; 212 p4d_t *p4dp; 213 pud_t *pudp; 214 pmd_t *pmdp = NULL; 215 216 pgdp = pgd_offset(mm, addr); 217 if (pgd_present(*pgdp)) { 218 p4dp = p4d_offset(pgdp, addr); 219 if (p4d_present(*p4dp)) { 220 pudp = pud_offset(p4dp, addr); 221 if (pud_present(*pudp)) { 222 if (pud_large(*pudp)) 223 return (pte_t *) pudp; 224 pmdp = pmd_offset(pudp, addr); 225 } 226 } 227 } 228 return (pte_t *) pmdp; 229 } 230 231 int pmd_huge(pmd_t pmd) 232 { 233 return pmd_large(pmd); 234 } 235 236 int pud_huge(pud_t pud) 237 { 238 return pud_large(pud); 239 } 240 241 struct page * 242 follow_huge_pud(struct mm_struct *mm, unsigned long address, 243 pud_t *pud, int flags) 244 { 245 if (flags & FOLL_GET) 246 return NULL; 247 248 return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); 249 } 250 251 static __init int setup_hugepagesz(char *opt) 252 { 253 unsigned long size; 254 char *string = opt; 255 256 size = memparse(opt, &opt); 257 if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { 258 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 259 } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { 260 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 261 } else { 262 hugetlb_bad_size(); 263 pr_err("hugepagesz= specifies an unsupported page size %s\n", 264 string); 265 return 0; 266 } 267 return 1; 268 } 269 __setup("hugepagesz=", setup_hugepagesz); 270