1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 253492b1dSGerald Schaefer /* 353492b1dSGerald Schaefer * IBM System z Huge TLB Page Support for Kernel. 453492b1dSGerald Schaefer * 5d08de8e2SGerald Schaefer * Copyright IBM Corp. 2007,2016 653492b1dSGerald Schaefer * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> 753492b1dSGerald Schaefer */ 853492b1dSGerald Schaefer 9d08de8e2SGerald Schaefer #define KMSG_COMPONENT "hugetlb" 10d08de8e2SGerald Schaefer #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11d08de8e2SGerald Schaefer 1253492b1dSGerald Schaefer #include <linux/mm.h> 1353492b1dSGerald Schaefer #include <linux/hugetlb.h> 1453492b1dSGerald Schaefer 15bc29b7acSGerald Schaefer /* 16bc29b7acSGerald Schaefer * If the bit selected by single-bit bitmask "a" is set within "x", move 17bc29b7acSGerald Schaefer * it to the position indicated by single-bit bitmask "b". 18bc29b7acSGerald Schaefer */ 19bc29b7acSGerald Schaefer #define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b)) 20bc29b7acSGerald Schaefer 21d08de8e2SGerald Schaefer static inline unsigned long __pte_to_rste(pte_t pte) 2253492b1dSGerald Schaefer { 23d08de8e2SGerald Schaefer unsigned long rste; 2453492b1dSGerald Schaefer 25e5098611SMartin Schwidefsky /* 26d08de8e2SGerald Schaefer * Convert encoding pte bits pmd / pud bits 27a1c843b8SMartin Schwidefsky * lIR.uswrdy.p dy..R...I...wr 28a1c843b8SMartin Schwidefsky * empty 010.000000.0 -> 00..0...1...00 29a1c843b8SMartin Schwidefsky * prot-none, clean, old 111.000000.1 -> 00..1...1...00 30a1c843b8SMartin Schwidefsky * prot-none, clean, young 111.000001.1 -> 01..1...1...00 31a1c843b8SMartin Schwidefsky * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 32a1c843b8SMartin Schwidefsky * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 33a1c843b8SMartin Schwidefsky * read-only, clean, old 111.000100.1 -> 00..1...1...01 34a1c843b8SMartin Schwidefsky * read-only, clean, young 101.000101.1 -> 01..1...0...01 35a1c843b8SMartin Schwidefsky * read-only, dirty, old 111.000110.1 -> 10..1...1...01 36a1c843b8SMartin Schwidefsky * read-only, dirty, young 101.000111.1 -> 11..1...0...01 37a1c843b8SMartin Schwidefsky * read-write, clean, old 111.001100.1 -> 00..1...1...11 38a1c843b8SMartin Schwidefsky * read-write, clean, young 101.001101.1 -> 01..1...0...11 39a1c843b8SMartin Schwidefsky * read-write, dirty, old 110.001110.1 -> 10..0...1...11 40a1c843b8SMartin Schwidefsky * read-write, dirty, young 100.001111.1 -> 11..0...0...11 41a1c843b8SMartin Schwidefsky * HW-bits: R read-only, I invalid 42a1c843b8SMartin Schwidefsky * SW-bits: p present, y young, d dirty, r read, w write, s special, 43a1c843b8SMartin Schwidefsky * u unused, l large 44e5098611SMartin Schwidefsky */ 45e5098611SMartin Schwidefsky if (pte_present(pte)) { 46d08de8e2SGerald Schaefer rste = pte_val(pte) & PAGE_MASK; 47bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_READ, 48bc29b7acSGerald Schaefer _SEGMENT_ENTRY_READ); 49bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, 50bc29b7acSGerald Schaefer _SEGMENT_ENTRY_WRITE); 51bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_INVALID, 52bc29b7acSGerald Schaefer _SEGMENT_ENTRY_INVALID); 53bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT, 54bc29b7acSGerald Schaefer _SEGMENT_ENTRY_PROTECT); 55bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY, 56bc29b7acSGerald Schaefer _SEGMENT_ENTRY_DIRTY); 57bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG, 58bc29b7acSGerald Schaefer _SEGMENT_ENTRY_YOUNG); 59bc29b7acSGerald Schaefer #ifdef CONFIG_MEM_SOFT_DIRTY 60bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY, 61bc29b7acSGerald Schaefer _SEGMENT_ENTRY_SOFT_DIRTY); 62bc29b7acSGerald Schaefer #endif 6357d7f939SMartin Schwidefsky rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC, 6457d7f939SMartin Schwidefsky _SEGMENT_ENTRY_NOEXEC); 65e5098611SMartin Schwidefsky } else 6654397bb0SDominik Dingel rste = _SEGMENT_ENTRY_EMPTY; 67d08de8e2SGerald Schaefer return rste; 6853492b1dSGerald Schaefer } 6953492b1dSGerald Schaefer 70d08de8e2SGerald Schaefer static inline pte_t __rste_to_pte(unsigned long rste) 71e5098611SMartin Schwidefsky { 72d08de8e2SGerald Schaefer int present; 73e5098611SMartin Schwidefsky pte_t pte; 74e5098611SMartin Schwidefsky 75d08de8e2SGerald Schaefer if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 76d08de8e2SGerald Schaefer present = pud_present(__pud(rste)); 77d08de8e2SGerald Schaefer else 78d08de8e2SGerald Schaefer present = pmd_present(__pmd(rste)); 79d08de8e2SGerald Schaefer 80e5098611SMartin Schwidefsky /* 81d08de8e2SGerald Schaefer * Convert encoding pmd / pud bits pte bits 82a1c843b8SMartin Schwidefsky * dy..R...I...wr lIR.uswrdy.p 83a1c843b8SMartin Schwidefsky * empty 00..0...1...00 -> 010.000000.0 84a1c843b8SMartin Schwidefsky * prot-none, clean, old 00..1...1...00 -> 111.000000.1 85a1c843b8SMartin Schwidefsky * prot-none, clean, young 01..1...1...00 -> 111.000001.1 86a1c843b8SMartin Schwidefsky * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 87a1c843b8SMartin Schwidefsky * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 88a1c843b8SMartin Schwidefsky * read-only, clean, old 00..1...1...01 -> 111.000100.1 89a1c843b8SMartin Schwidefsky * read-only, clean, young 01..1...0...01 -> 101.000101.1 90a1c843b8SMartin Schwidefsky * read-only, dirty, old 10..1...1...01 -> 111.000110.1 91a1c843b8SMartin Schwidefsky * read-only, dirty, young 11..1...0...01 -> 101.000111.1 92a1c843b8SMartin Schwidefsky * read-write, clean, old 00..1...1...11 -> 111.001100.1 93a1c843b8SMartin Schwidefsky * read-write, clean, young 01..1...0...11 -> 101.001101.1 94a1c843b8SMartin Schwidefsky * read-write, dirty, old 10..0...1...11 -> 110.001110.1 95a1c843b8SMartin Schwidefsky * read-write, dirty, young 11..0...0...11 -> 100.001111.1 96a1c843b8SMartin Schwidefsky * HW-bits: R read-only, I invalid 97a1c843b8SMartin Schwidefsky * SW-bits: p present, y young, d dirty, r read, w write, s special, 98a1c843b8SMartin Schwidefsky * u unused, l large 99e5098611SMartin Schwidefsky */ 100d08de8e2SGerald Schaefer if (present) { 101d08de8e2SGerald Schaefer pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; 102152125b7SMartin Schwidefsky pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; 103bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ, 104bc29b7acSGerald Schaefer _PAGE_READ); 105bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, 106bc29b7acSGerald Schaefer _PAGE_WRITE); 107bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, 108bc29b7acSGerald Schaefer _PAGE_INVALID); 109bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, 110bc29b7acSGerald Schaefer _PAGE_PROTECT); 111bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, 112bc29b7acSGerald Schaefer _PAGE_DIRTY); 113bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, 114bc29b7acSGerald Schaefer _PAGE_YOUNG); 115bc29b7acSGerald Schaefer #ifdef CONFIG_MEM_SOFT_DIRTY 116bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, 117bc29b7acSGerald Schaefer _PAGE_DIRTY); 118bc29b7acSGerald Schaefer #endif 11957d7f939SMartin Schwidefsky pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, 12057d7f939SMartin Schwidefsky _PAGE_NOEXEC); 121e5098611SMartin Schwidefsky } else 122e5098611SMartin Schwidefsky pte_val(pte) = _PAGE_INVALID; 123e5098611SMartin Schwidefsky return pte; 124e5098611SMartin Schwidefsky } 125e5098611SMartin Schwidefsky 126*3afdfca6SJanosch Frank static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) 127*3afdfca6SJanosch Frank { 128*3afdfca6SJanosch Frank struct page *page; 129*3afdfca6SJanosch Frank unsigned long size, paddr; 130*3afdfca6SJanosch Frank 131*3afdfca6SJanosch Frank if (!mm_uses_skeys(mm) || 132*3afdfca6SJanosch Frank rste & _SEGMENT_ENTRY_INVALID) 133*3afdfca6SJanosch Frank return; 134*3afdfca6SJanosch Frank 135*3afdfca6SJanosch Frank if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { 136*3afdfca6SJanosch Frank page = pud_page(__pud(rste)); 137*3afdfca6SJanosch Frank size = PUD_SIZE; 138*3afdfca6SJanosch Frank paddr = rste & PUD_MASK; 139*3afdfca6SJanosch Frank } else { 140*3afdfca6SJanosch Frank page = pmd_page(__pmd(rste)); 141*3afdfca6SJanosch Frank size = PMD_SIZE; 142*3afdfca6SJanosch Frank paddr = rste & PMD_MASK; 143*3afdfca6SJanosch Frank } 144*3afdfca6SJanosch Frank 145*3afdfca6SJanosch Frank if (!test_and_set_bit(PG_arch_1, &page->flags)) 146*3afdfca6SJanosch Frank __storage_key_init_range(paddr, paddr + size - 1); 147*3afdfca6SJanosch Frank } 148*3afdfca6SJanosch Frank 149e5098611SMartin Schwidefsky void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 150e5098611SMartin Schwidefsky pte_t *ptep, pte_t pte) 151e5098611SMartin Schwidefsky { 15257d7f939SMartin Schwidefsky unsigned long rste; 15357d7f939SMartin Schwidefsky 15457d7f939SMartin Schwidefsky rste = __pte_to_rste(pte); 15557d7f939SMartin Schwidefsky if (!MACHINE_HAS_NX) 15657d7f939SMartin Schwidefsky rste &= ~_SEGMENT_ENTRY_NOEXEC; 157e5098611SMartin Schwidefsky 158d08de8e2SGerald Schaefer /* Set correct table type for 2G hugepages */ 159d08de8e2SGerald Schaefer if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 160d08de8e2SGerald Schaefer rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; 161d08de8e2SGerald Schaefer else 162d08de8e2SGerald Schaefer rste |= _SEGMENT_ENTRY_LARGE; 163*3afdfca6SJanosch Frank clear_huge_pte_skeys(mm, rste); 164d08de8e2SGerald Schaefer pte_val(*ptep) = rste; 165e5098611SMartin Schwidefsky } 166e5098611SMartin Schwidefsky 167e5098611SMartin Schwidefsky pte_t huge_ptep_get(pte_t *ptep) 168e5098611SMartin Schwidefsky { 169d08de8e2SGerald Schaefer return __rste_to_pte(pte_val(*ptep)); 170e5098611SMartin Schwidefsky } 171e5098611SMartin Schwidefsky 172e5098611SMartin Schwidefsky pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 173e5098611SMartin Schwidefsky unsigned long addr, pte_t *ptep) 174e5098611SMartin Schwidefsky { 175d08de8e2SGerald Schaefer pte_t pte = huge_ptep_get(ptep); 176e5098611SMartin Schwidefsky pmd_t *pmdp = (pmd_t *) ptep; 177d08de8e2SGerald Schaefer pud_t *pudp = (pud_t *) ptep; 178e5098611SMartin Schwidefsky 179d08de8e2SGerald Schaefer if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 180d08de8e2SGerald Schaefer pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); 181d08de8e2SGerald Schaefer else 182d08de8e2SGerald Schaefer pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 183d08de8e2SGerald Schaefer return pte; 18453492b1dSGerald Schaefer } 18553492b1dSGerald Schaefer 186a5516438SAndi Kleen pte_t *huge_pte_alloc(struct mm_struct *mm, 187a5516438SAndi Kleen unsigned long addr, unsigned long sz) 18853492b1dSGerald Schaefer { 18953492b1dSGerald Schaefer pgd_t *pgdp; 1901aea9b3fSMartin Schwidefsky p4d_t *p4dp; 19153492b1dSGerald Schaefer pud_t *pudp; 19253492b1dSGerald Schaefer pmd_t *pmdp = NULL; 19353492b1dSGerald Schaefer 19453492b1dSGerald Schaefer pgdp = pgd_offset(mm, addr); 1951aea9b3fSMartin Schwidefsky p4dp = p4d_alloc(mm, pgdp, addr); 1961aea9b3fSMartin Schwidefsky if (p4dp) { 1971aea9b3fSMartin Schwidefsky pudp = pud_alloc(mm, p4dp, addr); 198d08de8e2SGerald Schaefer if (pudp) { 199d08de8e2SGerald Schaefer if (sz == PUD_SIZE) 200d08de8e2SGerald Schaefer return (pte_t *) pudp; 201d08de8e2SGerald Schaefer else if (sz == PMD_SIZE) 20253492b1dSGerald Schaefer pmdp = pmd_alloc(mm, pudp, addr); 203d08de8e2SGerald Schaefer } 2041aea9b3fSMartin Schwidefsky } 20553492b1dSGerald Schaefer return (pte_t *) pmdp; 20653492b1dSGerald Schaefer } 20753492b1dSGerald Schaefer 2087868a208SPunit Agrawal pte_t *huge_pte_offset(struct mm_struct *mm, 2097868a208SPunit Agrawal unsigned long addr, unsigned long sz) 21053492b1dSGerald Schaefer { 21153492b1dSGerald Schaefer pgd_t *pgdp; 2121aea9b3fSMartin Schwidefsky p4d_t *p4dp; 21353492b1dSGerald Schaefer pud_t *pudp; 21453492b1dSGerald Schaefer pmd_t *pmdp = NULL; 21553492b1dSGerald Schaefer 21653492b1dSGerald Schaefer pgdp = pgd_offset(mm, addr); 21753492b1dSGerald Schaefer if (pgd_present(*pgdp)) { 2181aea9b3fSMartin Schwidefsky p4dp = p4d_offset(pgdp, addr); 2191aea9b3fSMartin Schwidefsky if (p4d_present(*p4dp)) { 2201aea9b3fSMartin Schwidefsky pudp = pud_offset(p4dp, addr); 221d08de8e2SGerald Schaefer if (pud_present(*pudp)) { 222d08de8e2SGerald Schaefer if (pud_large(*pudp)) 223d08de8e2SGerald Schaefer return (pte_t *) pudp; 22453492b1dSGerald Schaefer pmdp = pmd_offset(pudp, addr); 22553492b1dSGerald Schaefer } 226d08de8e2SGerald Schaefer } 2271aea9b3fSMartin Schwidefsky } 22853492b1dSGerald Schaefer return (pte_t *) pmdp; 22953492b1dSGerald Schaefer } 23053492b1dSGerald Schaefer 23153492b1dSGerald Schaefer int pmd_huge(pmd_t pmd) 23253492b1dSGerald Schaefer { 233cbd7d9c2SDominik Dingel return pmd_large(pmd); 23453492b1dSGerald Schaefer } 23553492b1dSGerald Schaefer 236ceb86879SAndi Kleen int pud_huge(pud_t pud) 237ceb86879SAndi Kleen { 238d08de8e2SGerald Schaefer return pud_large(pud); 239d08de8e2SGerald Schaefer } 240d08de8e2SGerald Schaefer 241d08de8e2SGerald Schaefer struct page * 242d08de8e2SGerald Schaefer follow_huge_pud(struct mm_struct *mm, unsigned long address, 243d08de8e2SGerald Schaefer pud_t *pud, int flags) 244d08de8e2SGerald Schaefer { 245d08de8e2SGerald Schaefer if (flags & FOLL_GET) 246d08de8e2SGerald Schaefer return NULL; 247d08de8e2SGerald Schaefer 248d08de8e2SGerald Schaefer return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); 249d08de8e2SGerald Schaefer } 250d08de8e2SGerald Schaefer 251d08de8e2SGerald Schaefer static __init int setup_hugepagesz(char *opt) 252d08de8e2SGerald Schaefer { 253d08de8e2SGerald Schaefer unsigned long size; 254d08de8e2SGerald Schaefer char *string = opt; 255d08de8e2SGerald Schaefer 256d08de8e2SGerald Schaefer size = memparse(opt, &opt); 257d08de8e2SGerald Schaefer if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { 258d08de8e2SGerald Schaefer hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 259d08de8e2SGerald Schaefer } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { 260d08de8e2SGerald Schaefer hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 261d08de8e2SGerald Schaefer } else { 262b5003b5fSShyam Saini hugetlb_bad_size(); 263d08de8e2SGerald Schaefer pr_err("hugepagesz= specifies an unsupported page size %s\n", 264d08de8e2SGerald Schaefer string); 265ceb86879SAndi Kleen return 0; 266ceb86879SAndi Kleen } 267d08de8e2SGerald Schaefer return 1; 268d08de8e2SGerald Schaefer } 269d08de8e2SGerald Schaefer __setup("hugepagesz=", setup_hugepagesz); 270