153492b1dSGerald Schaefer /* 253492b1dSGerald Schaefer * IBM System z Huge TLB Page Support for Kernel. 353492b1dSGerald Schaefer * 4d08de8e2SGerald Schaefer * Copyright IBM Corp. 2007,2016 553492b1dSGerald Schaefer * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> 653492b1dSGerald Schaefer */ 753492b1dSGerald Schaefer 8d08de8e2SGerald Schaefer #define KMSG_COMPONENT "hugetlb" 9d08de8e2SGerald Schaefer #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 10d08de8e2SGerald Schaefer 1153492b1dSGerald Schaefer #include <linux/mm.h> 1253492b1dSGerald Schaefer #include <linux/hugetlb.h> 1353492b1dSGerald Schaefer 14bc29b7acSGerald Schaefer /* 15bc29b7acSGerald Schaefer * If the bit selected by single-bit bitmask "a" is set within "x", move 16bc29b7acSGerald Schaefer * it to the position indicated by single-bit bitmask "b". 17bc29b7acSGerald Schaefer */ 18bc29b7acSGerald Schaefer #define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b)) 19bc29b7acSGerald Schaefer 20d08de8e2SGerald Schaefer static inline unsigned long __pte_to_rste(pte_t pte) 2153492b1dSGerald Schaefer { 22d08de8e2SGerald Schaefer unsigned long rste; 2353492b1dSGerald Schaefer 24e5098611SMartin Schwidefsky /* 25d08de8e2SGerald Schaefer * Convert encoding pte bits pmd / pud bits 26a1c843b8SMartin Schwidefsky * lIR.uswrdy.p dy..R...I...wr 27a1c843b8SMartin Schwidefsky * empty 010.000000.0 -> 00..0...1...00 28a1c843b8SMartin Schwidefsky * prot-none, clean, old 111.000000.1 -> 00..1...1...00 29a1c843b8SMartin Schwidefsky * prot-none, clean, young 111.000001.1 -> 01..1...1...00 30a1c843b8SMartin Schwidefsky * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 31a1c843b8SMartin Schwidefsky * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 32a1c843b8SMartin Schwidefsky * read-only, clean, old 111.000100.1 -> 00..1...1...01 33a1c843b8SMartin Schwidefsky * read-only, clean, young 101.000101.1 -> 01..1...0...01 34a1c843b8SMartin Schwidefsky * read-only, dirty, old 111.000110.1 -> 10..1...1...01 35a1c843b8SMartin Schwidefsky * read-only, dirty, young 101.000111.1 -> 11..1...0...01 36a1c843b8SMartin Schwidefsky * read-write, clean, old 111.001100.1 -> 00..1...1...11 37a1c843b8SMartin Schwidefsky * read-write, clean, young 101.001101.1 -> 01..1...0...11 38a1c843b8SMartin Schwidefsky * read-write, dirty, old 110.001110.1 -> 10..0...1...11 39a1c843b8SMartin Schwidefsky * read-write, dirty, young 100.001111.1 -> 11..0...0...11 40a1c843b8SMartin Schwidefsky * HW-bits: R read-only, I invalid 41a1c843b8SMartin Schwidefsky * SW-bits: p present, y young, d dirty, r read, w write, s special, 42a1c843b8SMartin Schwidefsky * u unused, l large 43e5098611SMartin Schwidefsky */ 44e5098611SMartin Schwidefsky if (pte_present(pte)) { 45d08de8e2SGerald Schaefer rste = pte_val(pte) & PAGE_MASK; 46bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_READ, 47bc29b7acSGerald Schaefer _SEGMENT_ENTRY_READ); 48bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, 49bc29b7acSGerald Schaefer _SEGMENT_ENTRY_WRITE); 50bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_INVALID, 51bc29b7acSGerald Schaefer _SEGMENT_ENTRY_INVALID); 52bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT, 53bc29b7acSGerald Schaefer _SEGMENT_ENTRY_PROTECT); 54bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY, 55bc29b7acSGerald Schaefer _SEGMENT_ENTRY_DIRTY); 56bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG, 57bc29b7acSGerald Schaefer _SEGMENT_ENTRY_YOUNG); 58bc29b7acSGerald Schaefer #ifdef CONFIG_MEM_SOFT_DIRTY 59bc29b7acSGerald Schaefer rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY, 60bc29b7acSGerald Schaefer _SEGMENT_ENTRY_SOFT_DIRTY); 61bc29b7acSGerald Schaefer #endif 62e5098611SMartin Schwidefsky } else 63d08de8e2SGerald Schaefer rste = _SEGMENT_ENTRY_INVALID; 64d08de8e2SGerald Schaefer return rste; 6553492b1dSGerald Schaefer } 6653492b1dSGerald Schaefer 67d08de8e2SGerald Schaefer static inline pte_t __rste_to_pte(unsigned long rste) 68e5098611SMartin Schwidefsky { 69d08de8e2SGerald Schaefer int present; 70e5098611SMartin Schwidefsky pte_t pte; 71e5098611SMartin Schwidefsky 72d08de8e2SGerald Schaefer if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 73d08de8e2SGerald Schaefer present = pud_present(__pud(rste)); 74d08de8e2SGerald Schaefer else 75d08de8e2SGerald Schaefer present = pmd_present(__pmd(rste)); 76d08de8e2SGerald Schaefer 77e5098611SMartin Schwidefsky /* 78d08de8e2SGerald Schaefer * Convert encoding pmd / pud bits pte bits 79a1c843b8SMartin Schwidefsky * dy..R...I...wr lIR.uswrdy.p 80a1c843b8SMartin Schwidefsky * empty 00..0...1...00 -> 010.000000.0 81a1c843b8SMartin Schwidefsky * prot-none, clean, old 00..1...1...00 -> 111.000000.1 82a1c843b8SMartin Schwidefsky * prot-none, clean, young 01..1...1...00 -> 111.000001.1 83a1c843b8SMartin Schwidefsky * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 84a1c843b8SMartin Schwidefsky * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 85a1c843b8SMartin Schwidefsky * read-only, clean, old 00..1...1...01 -> 111.000100.1 86a1c843b8SMartin Schwidefsky * read-only, clean, young 01..1...0...01 -> 101.000101.1 87a1c843b8SMartin Schwidefsky * read-only, dirty, old 10..1...1...01 -> 111.000110.1 88a1c843b8SMartin Schwidefsky * read-only, dirty, young 11..1...0...01 -> 101.000111.1 89a1c843b8SMartin Schwidefsky * read-write, clean, old 00..1...1...11 -> 111.001100.1 90a1c843b8SMartin Schwidefsky * read-write, clean, young 01..1...0...11 -> 101.001101.1 91a1c843b8SMartin Schwidefsky * read-write, dirty, old 10..0...1...11 -> 110.001110.1 92a1c843b8SMartin Schwidefsky * read-write, dirty, young 11..0...0...11 -> 100.001111.1 93a1c843b8SMartin Schwidefsky * HW-bits: R read-only, I invalid 94a1c843b8SMartin Schwidefsky * SW-bits: p present, y young, d dirty, r read, w write, s special, 95a1c843b8SMartin Schwidefsky * u unused, l large 96e5098611SMartin Schwidefsky */ 97d08de8e2SGerald Schaefer if (present) { 98d08de8e2SGerald Schaefer pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; 99152125b7SMartin Schwidefsky pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; 100bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ, 101bc29b7acSGerald Schaefer _PAGE_READ); 102bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, 103bc29b7acSGerald Schaefer _PAGE_WRITE); 104bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, 105bc29b7acSGerald Schaefer _PAGE_INVALID); 106bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, 107bc29b7acSGerald Schaefer _PAGE_PROTECT); 108bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, 109bc29b7acSGerald Schaefer _PAGE_DIRTY); 110bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, 111bc29b7acSGerald Schaefer _PAGE_YOUNG); 112bc29b7acSGerald Schaefer #ifdef CONFIG_MEM_SOFT_DIRTY 113bc29b7acSGerald Schaefer pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, 114bc29b7acSGerald Schaefer _PAGE_DIRTY); 115bc29b7acSGerald Schaefer #endif 116e5098611SMartin Schwidefsky } else 117e5098611SMartin Schwidefsky pte_val(pte) = _PAGE_INVALID; 118e5098611SMartin Schwidefsky return pte; 119e5098611SMartin Schwidefsky } 120e5098611SMartin Schwidefsky 121e5098611SMartin Schwidefsky void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 122e5098611SMartin Schwidefsky pte_t *ptep, pte_t pte) 123e5098611SMartin Schwidefsky { 124d08de8e2SGerald Schaefer unsigned long rste = __pte_to_rste(pte); 125e5098611SMartin Schwidefsky 126d08de8e2SGerald Schaefer /* Set correct table type for 2G hugepages */ 127d08de8e2SGerald Schaefer if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 128d08de8e2SGerald Schaefer rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; 129d08de8e2SGerald Schaefer else 130d08de8e2SGerald Schaefer rste |= _SEGMENT_ENTRY_LARGE; 131d08de8e2SGerald Schaefer pte_val(*ptep) = rste; 132e5098611SMartin Schwidefsky } 133e5098611SMartin Schwidefsky 134e5098611SMartin Schwidefsky pte_t huge_ptep_get(pte_t *ptep) 135e5098611SMartin Schwidefsky { 136d08de8e2SGerald Schaefer return __rste_to_pte(pte_val(*ptep)); 137e5098611SMartin Schwidefsky } 138e5098611SMartin Schwidefsky 139e5098611SMartin Schwidefsky pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 140e5098611SMartin Schwidefsky unsigned long addr, pte_t *ptep) 141e5098611SMartin Schwidefsky { 142d08de8e2SGerald Schaefer pte_t pte = huge_ptep_get(ptep); 143e5098611SMartin Schwidefsky pmd_t *pmdp = (pmd_t *) ptep; 144d08de8e2SGerald Schaefer pud_t *pudp = (pud_t *) ptep; 145e5098611SMartin Schwidefsky 146d08de8e2SGerald Schaefer if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 147d08de8e2SGerald Schaefer pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); 148d08de8e2SGerald Schaefer else 149d08de8e2SGerald Schaefer pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 150d08de8e2SGerald Schaefer return pte; 15153492b1dSGerald Schaefer } 15253492b1dSGerald Schaefer 153a5516438SAndi Kleen pte_t *huge_pte_alloc(struct mm_struct *mm, 154a5516438SAndi Kleen unsigned long addr, unsigned long sz) 15553492b1dSGerald Schaefer { 15653492b1dSGerald Schaefer pgd_t *pgdp; 15753492b1dSGerald Schaefer pud_t *pudp; 15853492b1dSGerald Schaefer pmd_t *pmdp = NULL; 15953492b1dSGerald Schaefer 16053492b1dSGerald Schaefer pgdp = pgd_offset(mm, addr); 16153492b1dSGerald Schaefer pudp = pud_alloc(mm, pgdp, addr); 162d08de8e2SGerald Schaefer if (pudp) { 163d08de8e2SGerald Schaefer if (sz == PUD_SIZE) 164d08de8e2SGerald Schaefer return (pte_t *) pudp; 165d08de8e2SGerald Schaefer else if (sz == PMD_SIZE) 16653492b1dSGerald Schaefer pmdp = pmd_alloc(mm, pudp, addr); 167d08de8e2SGerald Schaefer } 16853492b1dSGerald Schaefer return (pte_t *) pmdp; 16953492b1dSGerald Schaefer } 17053492b1dSGerald Schaefer 17153492b1dSGerald Schaefer pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 17253492b1dSGerald Schaefer { 17353492b1dSGerald Schaefer pgd_t *pgdp; 17453492b1dSGerald Schaefer pud_t *pudp; 17553492b1dSGerald Schaefer pmd_t *pmdp = NULL; 17653492b1dSGerald Schaefer 17753492b1dSGerald Schaefer pgdp = pgd_offset(mm, addr); 17853492b1dSGerald Schaefer if (pgd_present(*pgdp)) { 17953492b1dSGerald Schaefer pudp = pud_offset(pgdp, addr); 180d08de8e2SGerald Schaefer if (pud_present(*pudp)) { 181d08de8e2SGerald Schaefer if (pud_large(*pudp)) 182d08de8e2SGerald Schaefer return (pte_t *) pudp; 18353492b1dSGerald Schaefer pmdp = pmd_offset(pudp, addr); 18453492b1dSGerald Schaefer } 185d08de8e2SGerald Schaefer } 18653492b1dSGerald Schaefer return (pte_t *) pmdp; 18753492b1dSGerald Schaefer } 18853492b1dSGerald Schaefer 18953492b1dSGerald Schaefer int pmd_huge(pmd_t pmd) 19053492b1dSGerald Schaefer { 191cbd7d9c2SDominik Dingel return pmd_large(pmd); 19253492b1dSGerald Schaefer } 19353492b1dSGerald Schaefer 194ceb86879SAndi Kleen int pud_huge(pud_t pud) 195ceb86879SAndi Kleen { 196d08de8e2SGerald Schaefer return pud_large(pud); 197d08de8e2SGerald Schaefer } 198d08de8e2SGerald Schaefer 199d08de8e2SGerald Schaefer struct page * 200d08de8e2SGerald Schaefer follow_huge_pud(struct mm_struct *mm, unsigned long address, 201d08de8e2SGerald Schaefer pud_t *pud, int flags) 202d08de8e2SGerald Schaefer { 203d08de8e2SGerald Schaefer if (flags & FOLL_GET) 204d08de8e2SGerald Schaefer return NULL; 205d08de8e2SGerald Schaefer 206d08de8e2SGerald Schaefer return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); 207d08de8e2SGerald Schaefer } 208d08de8e2SGerald Schaefer 209d08de8e2SGerald Schaefer static __init int setup_hugepagesz(char *opt) 210d08de8e2SGerald Schaefer { 211d08de8e2SGerald Schaefer unsigned long size; 212d08de8e2SGerald Schaefer char *string = opt; 213d08de8e2SGerald Schaefer 214d08de8e2SGerald Schaefer size = memparse(opt, &opt); 215d08de8e2SGerald Schaefer if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { 216d08de8e2SGerald Schaefer hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 217d08de8e2SGerald Schaefer } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { 218d08de8e2SGerald Schaefer hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 219d08de8e2SGerald Schaefer } else { 220*b5003b5fSShyam Saini hugetlb_bad_size(); 221d08de8e2SGerald Schaefer pr_err("hugepagesz= specifies an unsupported page size %s\n", 222d08de8e2SGerald Schaefer string); 223ceb86879SAndi Kleen return 0; 224ceb86879SAndi Kleen } 225d08de8e2SGerald Schaefer return 1; 226d08de8e2SGerald Schaefer } 227d08de8e2SGerald Schaefer __setup("hugepagesz=", setup_hugepagesz); 228