1 /* 2 * IBM System z Huge TLB Page Support for Kernel. 3 * 4 * Copyright IBM Corp. 2007,2016 5 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> 6 */ 7 8 #define KMSG_COMPONENT "hugetlb" 9 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 10 11 #include <linux/mm.h> 12 #include <linux/hugetlb.h> 13 14 /* 15 * If the bit selected by single-bit bitmask "a" is set within "x", move 16 * it to the position indicated by single-bit bitmask "b". 17 */ 18 #define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b)) 19 20 static inline unsigned long __pte_to_rste(pte_t pte) 21 { 22 unsigned long rste; 23 24 /* 25 * Convert encoding pte bits pmd / pud bits 26 * lIR.uswrdy.p dy..R...I...wr 27 * empty 010.000000.0 -> 00..0...1...00 28 * prot-none, clean, old 111.000000.1 -> 00..1...1...00 29 * prot-none, clean, young 111.000001.1 -> 01..1...1...00 30 * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 31 * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 32 * read-only, clean, old 111.000100.1 -> 00..1...1...01 33 * read-only, clean, young 101.000101.1 -> 01..1...0...01 34 * read-only, dirty, old 111.000110.1 -> 10..1...1...01 35 * read-only, dirty, young 101.000111.1 -> 11..1...0...01 36 * read-write, clean, old 111.001100.1 -> 00..1...1...11 37 * read-write, clean, young 101.001101.1 -> 01..1...0...11 38 * read-write, dirty, old 110.001110.1 -> 10..0...1...11 39 * read-write, dirty, young 100.001111.1 -> 11..0...0...11 40 * HW-bits: R read-only, I invalid 41 * SW-bits: p present, y young, d dirty, r read, w write, s special, 42 * u unused, l large 43 */ 44 if (pte_present(pte)) { 45 rste = pte_val(pte) & PAGE_MASK; 46 rste |= move_set_bit(pte_val(pte), _PAGE_READ, 47 _SEGMENT_ENTRY_READ); 48 rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, 49 _SEGMENT_ENTRY_WRITE); 50 rste |= move_set_bit(pte_val(pte), _PAGE_INVALID, 51 _SEGMENT_ENTRY_INVALID); 52 rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT, 53 _SEGMENT_ENTRY_PROTECT); 54 rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY, 55 _SEGMENT_ENTRY_DIRTY); 56 rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG, 57 _SEGMENT_ENTRY_YOUNG); 58 #ifdef CONFIG_MEM_SOFT_DIRTY 59 rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY, 60 _SEGMENT_ENTRY_SOFT_DIRTY); 61 #endif 62 rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC, 63 _SEGMENT_ENTRY_NOEXEC); 64 } else 65 rste = _SEGMENT_ENTRY_EMPTY; 66 return rste; 67 } 68 69 static inline pte_t __rste_to_pte(unsigned long rste) 70 { 71 int present; 72 pte_t pte; 73 74 if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 75 present = pud_present(__pud(rste)); 76 else 77 present = pmd_present(__pmd(rste)); 78 79 /* 80 * Convert encoding pmd / pud bits pte bits 81 * dy..R...I...wr lIR.uswrdy.p 82 * empty 00..0...1...00 -> 010.000000.0 83 * prot-none, clean, old 00..1...1...00 -> 111.000000.1 84 * prot-none, clean, young 01..1...1...00 -> 111.000001.1 85 * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 86 * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 87 * read-only, clean, old 00..1...1...01 -> 111.000100.1 88 * read-only, clean, young 01..1...0...01 -> 101.000101.1 89 * read-only, dirty, old 10..1...1...01 -> 111.000110.1 90 * read-only, dirty, young 11..1...0...01 -> 101.000111.1 91 * read-write, clean, old 00..1...1...11 -> 111.001100.1 92 * read-write, clean, young 01..1...0...11 -> 101.001101.1 93 * read-write, dirty, old 10..0...1...11 -> 110.001110.1 94 * read-write, dirty, young 11..0...0...11 -> 100.001111.1 95 * HW-bits: R read-only, I invalid 96 * SW-bits: p present, y young, d dirty, r read, w write, s special, 97 * u unused, l large 98 */ 99 if (present) { 100 pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; 101 pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; 102 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ, 103 _PAGE_READ); 104 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, 105 _PAGE_WRITE); 106 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, 107 _PAGE_INVALID); 108 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, 109 _PAGE_PROTECT); 110 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, 111 _PAGE_DIRTY); 112 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, 113 _PAGE_YOUNG); 114 #ifdef CONFIG_MEM_SOFT_DIRTY 115 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, 116 _PAGE_DIRTY); 117 #endif 118 pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, 119 _PAGE_NOEXEC); 120 } else 121 pte_val(pte) = _PAGE_INVALID; 122 return pte; 123 } 124 125 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 126 pte_t *ptep, pte_t pte) 127 { 128 unsigned long rste; 129 130 rste = __pte_to_rste(pte); 131 if (!MACHINE_HAS_NX) 132 rste &= ~_SEGMENT_ENTRY_NOEXEC; 133 134 /* Set correct table type for 2G hugepages */ 135 if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 136 rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; 137 else 138 rste |= _SEGMENT_ENTRY_LARGE; 139 pte_val(*ptep) = rste; 140 } 141 142 pte_t huge_ptep_get(pte_t *ptep) 143 { 144 return __rste_to_pte(pte_val(*ptep)); 145 } 146 147 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 148 unsigned long addr, pte_t *ptep) 149 { 150 pte_t pte = huge_ptep_get(ptep); 151 pmd_t *pmdp = (pmd_t *) ptep; 152 pud_t *pudp = (pud_t *) ptep; 153 154 if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 155 pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); 156 else 157 pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 158 return pte; 159 } 160 161 pte_t *huge_pte_alloc(struct mm_struct *mm, 162 unsigned long addr, unsigned long sz) 163 { 164 pgd_t *pgdp; 165 pud_t *pudp; 166 pmd_t *pmdp = NULL; 167 168 pgdp = pgd_offset(mm, addr); 169 pudp = pud_alloc(mm, pgdp, addr); 170 if (pudp) { 171 if (sz == PUD_SIZE) 172 return (pte_t *) pudp; 173 else if (sz == PMD_SIZE) 174 pmdp = pmd_alloc(mm, pudp, addr); 175 } 176 return (pte_t *) pmdp; 177 } 178 179 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 180 { 181 pgd_t *pgdp; 182 pud_t *pudp; 183 pmd_t *pmdp = NULL; 184 185 pgdp = pgd_offset(mm, addr); 186 if (pgd_present(*pgdp)) { 187 pudp = pud_offset(pgdp, addr); 188 if (pud_present(*pudp)) { 189 if (pud_large(*pudp)) 190 return (pte_t *) pudp; 191 pmdp = pmd_offset(pudp, addr); 192 } 193 } 194 return (pte_t *) pmdp; 195 } 196 197 int pmd_huge(pmd_t pmd) 198 { 199 return pmd_large(pmd); 200 } 201 202 int pud_huge(pud_t pud) 203 { 204 return pud_large(pud); 205 } 206 207 struct page * 208 follow_huge_pud(struct mm_struct *mm, unsigned long address, 209 pud_t *pud, int flags) 210 { 211 if (flags & FOLL_GET) 212 return NULL; 213 214 return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); 215 } 216 217 static __init int setup_hugepagesz(char *opt) 218 { 219 unsigned long size; 220 char *string = opt; 221 222 size = memparse(opt, &opt); 223 if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { 224 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 225 } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { 226 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 227 } else { 228 hugetlb_bad_size(); 229 pr_err("hugepagesz= specifies an unsupported page size %s\n", 230 string); 231 return 0; 232 } 233 return 1; 234 } 235 __setup("hugepagesz=", setup_hugepagesz); 236