xref: /openbmc/linux/arch/s390/mm/hugetlbpage.c (revision d08de8e2d86744f91d9d5d57c56ca2b6e33bf6ec)
153492b1dSGerald Schaefer /*
253492b1dSGerald Schaefer  *  IBM System z Huge TLB Page Support for Kernel.
353492b1dSGerald Schaefer  *
4*d08de8e2SGerald Schaefer  *    Copyright IBM Corp. 2007,2016
553492b1dSGerald Schaefer  *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
653492b1dSGerald Schaefer  */
753492b1dSGerald Schaefer 
8*d08de8e2SGerald Schaefer #define KMSG_COMPONENT "hugetlb"
9*d08de8e2SGerald Schaefer #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
10*d08de8e2SGerald Schaefer 
1153492b1dSGerald Schaefer #include <linux/mm.h>
1253492b1dSGerald Schaefer #include <linux/hugetlb.h>
1353492b1dSGerald Schaefer 
14*d08de8e2SGerald Schaefer static inline unsigned long __pte_to_rste(pte_t pte)
1553492b1dSGerald Schaefer {
16*d08de8e2SGerald Schaefer 	unsigned long rste;
1753492b1dSGerald Schaefer 
18e5098611SMartin Schwidefsky 	/*
19*d08de8e2SGerald Schaefer 	 * Convert encoding		  pte bits	pmd / pud bits
20a1c843b8SMartin Schwidefsky 	 *				lIR.uswrdy.p	dy..R...I...wr
21a1c843b8SMartin Schwidefsky 	 * empty			010.000000.0 -> 00..0...1...00
22a1c843b8SMartin Schwidefsky 	 * prot-none, clean, old	111.000000.1 -> 00..1...1...00
23a1c843b8SMartin Schwidefsky 	 * prot-none, clean, young	111.000001.1 -> 01..1...1...00
24a1c843b8SMartin Schwidefsky 	 * prot-none, dirty, old	111.000010.1 -> 10..1...1...00
25a1c843b8SMartin Schwidefsky 	 * prot-none, dirty, young	111.000011.1 -> 11..1...1...00
26a1c843b8SMartin Schwidefsky 	 * read-only, clean, old	111.000100.1 -> 00..1...1...01
27a1c843b8SMartin Schwidefsky 	 * read-only, clean, young	101.000101.1 -> 01..1...0...01
28a1c843b8SMartin Schwidefsky 	 * read-only, dirty, old	111.000110.1 -> 10..1...1...01
29a1c843b8SMartin Schwidefsky 	 * read-only, dirty, young	101.000111.1 -> 11..1...0...01
30a1c843b8SMartin Schwidefsky 	 * read-write, clean, old	111.001100.1 -> 00..1...1...11
31a1c843b8SMartin Schwidefsky 	 * read-write, clean, young	101.001101.1 -> 01..1...0...11
32a1c843b8SMartin Schwidefsky 	 * read-write, dirty, old	110.001110.1 -> 10..0...1...11
33a1c843b8SMartin Schwidefsky 	 * read-write, dirty, young	100.001111.1 -> 11..0...0...11
34a1c843b8SMartin Schwidefsky 	 * HW-bits: R read-only, I invalid
35a1c843b8SMartin Schwidefsky 	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
36a1c843b8SMartin Schwidefsky 	 *	    u unused, l large
37e5098611SMartin Schwidefsky 	 */
38e5098611SMartin Schwidefsky 	if (pte_present(pte)) {
39*d08de8e2SGerald Schaefer 		rste = pte_val(pte) & PAGE_MASK;
40*d08de8e2SGerald Schaefer 		rste |= (pte_val(pte) & _PAGE_READ) >> 4;
41*d08de8e2SGerald Schaefer 		rste |= (pte_val(pte) & _PAGE_WRITE) >> 4;
42*d08de8e2SGerald Schaefer 		rste |= (pte_val(pte) & _PAGE_INVALID) >> 5;
43*d08de8e2SGerald Schaefer 		rste |= (pte_val(pte) & _PAGE_PROTECT);
44*d08de8e2SGerald Schaefer 		rste |= (pte_val(pte) & _PAGE_DIRTY) << 10;
45*d08de8e2SGerald Schaefer 		rste |= (pte_val(pte) & _PAGE_YOUNG) << 10;
46*d08de8e2SGerald Schaefer 		rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
47e5098611SMartin Schwidefsky 	} else
48*d08de8e2SGerald Schaefer 		rste = _SEGMENT_ENTRY_INVALID;
49*d08de8e2SGerald Schaefer 	return rste;
5053492b1dSGerald Schaefer }
5153492b1dSGerald Schaefer 
52*d08de8e2SGerald Schaefer static inline pte_t __rste_to_pte(unsigned long rste)
53e5098611SMartin Schwidefsky {
54*d08de8e2SGerald Schaefer 	int present;
55e5098611SMartin Schwidefsky 	pte_t pte;
56e5098611SMartin Schwidefsky 
57*d08de8e2SGerald Schaefer 	if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
58*d08de8e2SGerald Schaefer 		present = pud_present(__pud(rste));
59*d08de8e2SGerald Schaefer 	else
60*d08de8e2SGerald Schaefer 		present = pmd_present(__pmd(rste));
61*d08de8e2SGerald Schaefer 
62e5098611SMartin Schwidefsky 	/*
63*d08de8e2SGerald Schaefer 	 * Convert encoding		pmd / pud bits	    pte bits
64a1c843b8SMartin Schwidefsky 	 *				dy..R...I...wr	  lIR.uswrdy.p
65a1c843b8SMartin Schwidefsky 	 * empty			00..0...1...00 -> 010.000000.0
66a1c843b8SMartin Schwidefsky 	 * prot-none, clean, old	00..1...1...00 -> 111.000000.1
67a1c843b8SMartin Schwidefsky 	 * prot-none, clean, young	01..1...1...00 -> 111.000001.1
68a1c843b8SMartin Schwidefsky 	 * prot-none, dirty, old	10..1...1...00 -> 111.000010.1
69a1c843b8SMartin Schwidefsky 	 * prot-none, dirty, young	11..1...1...00 -> 111.000011.1
70a1c843b8SMartin Schwidefsky 	 * read-only, clean, old	00..1...1...01 -> 111.000100.1
71a1c843b8SMartin Schwidefsky 	 * read-only, clean, young	01..1...0...01 -> 101.000101.1
72a1c843b8SMartin Schwidefsky 	 * read-only, dirty, old	10..1...1...01 -> 111.000110.1
73a1c843b8SMartin Schwidefsky 	 * read-only, dirty, young	11..1...0...01 -> 101.000111.1
74a1c843b8SMartin Schwidefsky 	 * read-write, clean, old	00..1...1...11 -> 111.001100.1
75a1c843b8SMartin Schwidefsky 	 * read-write, clean, young	01..1...0...11 -> 101.001101.1
76a1c843b8SMartin Schwidefsky 	 * read-write, dirty, old	10..0...1...11 -> 110.001110.1
77a1c843b8SMartin Schwidefsky 	 * read-write, dirty, young	11..0...0...11 -> 100.001111.1
78a1c843b8SMartin Schwidefsky 	 * HW-bits: R read-only, I invalid
79a1c843b8SMartin Schwidefsky 	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
80a1c843b8SMartin Schwidefsky 	 *	    u unused, l large
81e5098611SMartin Schwidefsky 	 */
82*d08de8e2SGerald Schaefer 	if (present) {
83*d08de8e2SGerald Schaefer 		pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
84152125b7SMartin Schwidefsky 		pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
85*d08de8e2SGerald Schaefer 		pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4;
86*d08de8e2SGerald Schaefer 		pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4;
87*d08de8e2SGerald Schaefer 		pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5;
88*d08de8e2SGerald Schaefer 		pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT);
89*d08de8e2SGerald Schaefer 		pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10;
90*d08de8e2SGerald Schaefer 		pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10;
91*d08de8e2SGerald Schaefer 		pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
92e5098611SMartin Schwidefsky 	} else
93e5098611SMartin Schwidefsky 		pte_val(pte) = _PAGE_INVALID;
94e5098611SMartin Schwidefsky 	return pte;
95e5098611SMartin Schwidefsky }
96e5098611SMartin Schwidefsky 
97e5098611SMartin Schwidefsky void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
98e5098611SMartin Schwidefsky 		     pte_t *ptep, pte_t pte)
99e5098611SMartin Schwidefsky {
100*d08de8e2SGerald Schaefer 	unsigned long rste = __pte_to_rste(pte);
101e5098611SMartin Schwidefsky 
102*d08de8e2SGerald Schaefer 	/* Set correct table type for 2G hugepages */
103*d08de8e2SGerald Schaefer 	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
104*d08de8e2SGerald Schaefer 		rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
105*d08de8e2SGerald Schaefer 	else
106*d08de8e2SGerald Schaefer 		rste |= _SEGMENT_ENTRY_LARGE;
107*d08de8e2SGerald Schaefer 	pte_val(*ptep) = rste;
108e5098611SMartin Schwidefsky }
109e5098611SMartin Schwidefsky 
110e5098611SMartin Schwidefsky pte_t huge_ptep_get(pte_t *ptep)
111e5098611SMartin Schwidefsky {
112*d08de8e2SGerald Schaefer 	return __rste_to_pte(pte_val(*ptep));
113e5098611SMartin Schwidefsky }
114e5098611SMartin Schwidefsky 
115e5098611SMartin Schwidefsky pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
116e5098611SMartin Schwidefsky 			      unsigned long addr, pte_t *ptep)
117e5098611SMartin Schwidefsky {
118*d08de8e2SGerald Schaefer 	pte_t pte = huge_ptep_get(ptep);
119e5098611SMartin Schwidefsky 	pmd_t *pmdp = (pmd_t *) ptep;
120*d08de8e2SGerald Schaefer 	pud_t *pudp = (pud_t *) ptep;
121e5098611SMartin Schwidefsky 
122*d08de8e2SGerald Schaefer 	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
123*d08de8e2SGerald Schaefer 		pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
124*d08de8e2SGerald Schaefer 	else
125*d08de8e2SGerald Schaefer 		pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
126*d08de8e2SGerald Schaefer 	return pte;
12753492b1dSGerald Schaefer }
12853492b1dSGerald Schaefer 
129a5516438SAndi Kleen pte_t *huge_pte_alloc(struct mm_struct *mm,
130a5516438SAndi Kleen 			unsigned long addr, unsigned long sz)
13153492b1dSGerald Schaefer {
13253492b1dSGerald Schaefer 	pgd_t *pgdp;
13353492b1dSGerald Schaefer 	pud_t *pudp;
13453492b1dSGerald Schaefer 	pmd_t *pmdp = NULL;
13553492b1dSGerald Schaefer 
13653492b1dSGerald Schaefer 	pgdp = pgd_offset(mm, addr);
13753492b1dSGerald Schaefer 	pudp = pud_alloc(mm, pgdp, addr);
138*d08de8e2SGerald Schaefer 	if (pudp) {
139*d08de8e2SGerald Schaefer 		if (sz == PUD_SIZE)
140*d08de8e2SGerald Schaefer 			return (pte_t *) pudp;
141*d08de8e2SGerald Schaefer 		else if (sz == PMD_SIZE)
14253492b1dSGerald Schaefer 			pmdp = pmd_alloc(mm, pudp, addr);
143*d08de8e2SGerald Schaefer 	}
14453492b1dSGerald Schaefer 	return (pte_t *) pmdp;
14553492b1dSGerald Schaefer }
14653492b1dSGerald Schaefer 
14753492b1dSGerald Schaefer pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
14853492b1dSGerald Schaefer {
14953492b1dSGerald Schaefer 	pgd_t *pgdp;
15053492b1dSGerald Schaefer 	pud_t *pudp;
15153492b1dSGerald Schaefer 	pmd_t *pmdp = NULL;
15253492b1dSGerald Schaefer 
15353492b1dSGerald Schaefer 	pgdp = pgd_offset(mm, addr);
15453492b1dSGerald Schaefer 	if (pgd_present(*pgdp)) {
15553492b1dSGerald Schaefer 		pudp = pud_offset(pgdp, addr);
156*d08de8e2SGerald Schaefer 		if (pud_present(*pudp)) {
157*d08de8e2SGerald Schaefer 			if (pud_large(*pudp))
158*d08de8e2SGerald Schaefer 				return (pte_t *) pudp;
15953492b1dSGerald Schaefer 			pmdp = pmd_offset(pudp, addr);
16053492b1dSGerald Schaefer 		}
161*d08de8e2SGerald Schaefer 	}
16253492b1dSGerald Schaefer 	return (pte_t *) pmdp;
16353492b1dSGerald Schaefer }
16453492b1dSGerald Schaefer 
16553492b1dSGerald Schaefer int pmd_huge(pmd_t pmd)
16653492b1dSGerald Schaefer {
167cbd7d9c2SDominik Dingel 	return pmd_large(pmd);
16853492b1dSGerald Schaefer }
16953492b1dSGerald Schaefer 
170ceb86879SAndi Kleen int pud_huge(pud_t pud)
171ceb86879SAndi Kleen {
172*d08de8e2SGerald Schaefer 	return pud_large(pud);
173*d08de8e2SGerald Schaefer }
174*d08de8e2SGerald Schaefer 
175*d08de8e2SGerald Schaefer struct page *
176*d08de8e2SGerald Schaefer follow_huge_pud(struct mm_struct *mm, unsigned long address,
177*d08de8e2SGerald Schaefer 		pud_t *pud, int flags)
178*d08de8e2SGerald Schaefer {
179*d08de8e2SGerald Schaefer 	if (flags & FOLL_GET)
180*d08de8e2SGerald Schaefer 		return NULL;
181*d08de8e2SGerald Schaefer 
182*d08de8e2SGerald Schaefer 	return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
183*d08de8e2SGerald Schaefer }
184*d08de8e2SGerald Schaefer 
185*d08de8e2SGerald Schaefer static __init int setup_hugepagesz(char *opt)
186*d08de8e2SGerald Schaefer {
187*d08de8e2SGerald Schaefer 	unsigned long size;
188*d08de8e2SGerald Schaefer 	char *string = opt;
189*d08de8e2SGerald Schaefer 
190*d08de8e2SGerald Schaefer 	size = memparse(opt, &opt);
191*d08de8e2SGerald Schaefer 	if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) {
192*d08de8e2SGerald Schaefer 		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
193*d08de8e2SGerald Schaefer 	} else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
194*d08de8e2SGerald Schaefer 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
195*d08de8e2SGerald Schaefer 	} else {
196*d08de8e2SGerald Schaefer 		pr_err("hugepagesz= specifies an unsupported page size %s\n",
197*d08de8e2SGerald Schaefer 			string);
198ceb86879SAndi Kleen 		return 0;
199ceb86879SAndi Kleen 	}
200*d08de8e2SGerald Schaefer 	return 1;
201*d08de8e2SGerald Schaefer }
202*d08de8e2SGerald Schaefer __setup("hugepagesz=", setup_hugepagesz);
203