xref: /openbmc/linux/arch/s390/mm/pageattr.c (revision 11976fe2)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright IBM Corp. 2011
4  * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
5  */
6 #include <linux/hugetlb.h>
7 #include <linux/vmalloc.h>
8 #include <linux/mm.h>
9 #include <asm/cacheflush.h>
10 #include <asm/facility.h>
11 #include <asm/pgalloc.h>
12 #include <asm/kfence.h>
13 #include <asm/page.h>
14 #include <asm/set_memory.h>
15 
16 static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
17 {
18 	asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
19 		     : [addr] "+a" (addr) : [skey] "d" (skey));
20 	return addr;
21 }
22 
23 void __storage_key_init_range(unsigned long start, unsigned long end)
24 {
25 	unsigned long boundary, size;
26 
27 	while (start < end) {
28 		if (MACHINE_HAS_EDAT1) {
29 			/* set storage keys for a 1MB frame */
30 			size = 1UL << 20;
31 			boundary = (start + size) & ~(size - 1);
32 			if (boundary <= end) {
33 				do {
34 					start = sske_frame(start, PAGE_DEFAULT_KEY);
35 				} while (start < boundary);
36 				continue;
37 			}
38 		}
39 		page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
40 		start += PAGE_SIZE;
41 	}
42 }
43 
44 #ifdef CONFIG_PROC_FS
45 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
46 
47 void arch_report_meminfo(struct seq_file *m)
48 {
49 	seq_printf(m, "DirectMap4k:    %8lu kB\n",
50 		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
51 	seq_printf(m, "DirectMap1M:    %8lu kB\n",
52 		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
53 	seq_printf(m, "DirectMap2G:    %8lu kB\n",
54 		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
55 }
56 #endif /* CONFIG_PROC_FS */
57 
58 static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
59 		    unsigned long dtt)
60 {
61 	unsigned long *table, mask;
62 
63 	mask = 0;
64 	if (MACHINE_HAS_EDAT2) {
65 		switch (dtt) {
66 		case CRDTE_DTT_REGION3:
67 			mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
68 			break;
69 		case CRDTE_DTT_SEGMENT:
70 			mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
71 			break;
72 		case CRDTE_DTT_PAGE:
73 			mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
74 			break;
75 		}
76 		table = (unsigned long *)((unsigned long)old & mask);
77 		crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
78 	} else if (MACHINE_HAS_IDTE) {
79 		cspg(old, *old, new);
80 	} else {
81 		csp((unsigned int *)old + 1, *old, new);
82 	}
83 }
84 
85 static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
86 			  unsigned long flags)
87 {
88 	pte_t *ptep, new;
89 
90 	if (flags == SET_MEMORY_4K)
91 		return 0;
92 	ptep = pte_offset_kernel(pmdp, addr);
93 	do {
94 		new = *ptep;
95 		if (pte_none(new))
96 			return -EINVAL;
97 		if (flags & SET_MEMORY_RO)
98 			new = pte_wrprotect(new);
99 		else if (flags & SET_MEMORY_RW)
100 			new = pte_mkwrite(pte_mkdirty(new));
101 		if (flags & SET_MEMORY_NX)
102 			new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
103 		else if (flags & SET_MEMORY_X)
104 			new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
105 		if (flags & SET_MEMORY_INV) {
106 			new = set_pte_bit(new, __pgprot(_PAGE_INVALID));
107 		} else if (flags & SET_MEMORY_DEF) {
108 			new = __pte(pte_val(new) & PAGE_MASK);
109 			new = set_pte_bit(new, PAGE_KERNEL);
110 			if (!MACHINE_HAS_NX)
111 				new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
112 		}
113 		pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
114 		ptep++;
115 		addr += PAGE_SIZE;
116 		cond_resched();
117 	} while (addr < end);
118 	return 0;
119 }
120 
121 static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
122 {
123 	unsigned long pte_addr, prot;
124 	pte_t *pt_dir, *ptep;
125 	pmd_t new;
126 	int i, ro, nx;
127 
128 	pt_dir = vmem_pte_alloc();
129 	if (!pt_dir)
130 		return -ENOMEM;
131 	pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
132 	ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
133 	nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC);
134 	prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
135 	if (!nx)
136 		prot &= ~_PAGE_NOEXEC;
137 	ptep = pt_dir;
138 	for (i = 0; i < PTRS_PER_PTE; i++) {
139 		set_pte(ptep, __pte(pte_addr | prot));
140 		pte_addr += PAGE_SIZE;
141 		ptep++;
142 	}
143 	new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY);
144 	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
145 	update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
146 	update_page_count(PG_DIRECT_MAP_1M, -1);
147 	return 0;
148 }
149 
150 static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
151 			    unsigned long flags)
152 {
153 	pmd_t new = *pmdp;
154 
155 	if (flags & SET_MEMORY_RO)
156 		new = pmd_wrprotect(new);
157 	else if (flags & SET_MEMORY_RW)
158 		new = pmd_mkwrite(pmd_mkdirty(new));
159 	if (flags & SET_MEMORY_NX)
160 		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
161 	else if (flags & SET_MEMORY_X)
162 		new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
163 	if (flags & SET_MEMORY_INV) {
164 		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
165 	} else if (flags & SET_MEMORY_DEF) {
166 		new = __pmd(pmd_val(new) & PMD_MASK);
167 		new = set_pmd_bit(new, SEGMENT_KERNEL);
168 		if (!MACHINE_HAS_NX)
169 			new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
170 	}
171 	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
172 }
173 
174 static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
175 			  unsigned long flags)
176 {
177 	unsigned long next;
178 	int need_split;
179 	pmd_t *pmdp;
180 	int rc = 0;
181 
182 	pmdp = pmd_offset(pudp, addr);
183 	do {
184 		if (pmd_none(*pmdp))
185 			return -EINVAL;
186 		next = pmd_addr_end(addr, end);
187 		if (pmd_large(*pmdp)) {
188 			need_split  = !!(flags & SET_MEMORY_4K);
189 			need_split |= !!(addr & ~PMD_MASK);
190 			need_split |= !!(addr + PMD_SIZE > next);
191 			if (need_split) {
192 				rc = split_pmd_page(pmdp, addr);
193 				if (rc)
194 					return rc;
195 				continue;
196 			}
197 			modify_pmd_page(pmdp, addr, flags);
198 		} else {
199 			rc = walk_pte_level(pmdp, addr, next, flags);
200 			if (rc)
201 				return rc;
202 		}
203 		pmdp++;
204 		addr = next;
205 		cond_resched();
206 	} while (addr < end);
207 	return rc;
208 }
209 
210 static int split_pud_page(pud_t *pudp, unsigned long addr)
211 {
212 	unsigned long pmd_addr, prot;
213 	pmd_t *pm_dir, *pmdp;
214 	pud_t new;
215 	int i, ro, nx;
216 
217 	pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
218 	if (!pm_dir)
219 		return -ENOMEM;
220 	pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
221 	ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
222 	nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC);
223 	prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
224 	if (!nx)
225 		prot &= ~_SEGMENT_ENTRY_NOEXEC;
226 	pmdp = pm_dir;
227 	for (i = 0; i < PTRS_PER_PMD; i++) {
228 		set_pmd(pmdp, __pmd(pmd_addr | prot));
229 		pmd_addr += PMD_SIZE;
230 		pmdp++;
231 	}
232 	new = __pud(__pa(pm_dir) | _REGION3_ENTRY);
233 	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
234 	update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
235 	update_page_count(PG_DIRECT_MAP_2G, -1);
236 	return 0;
237 }
238 
239 static void modify_pud_page(pud_t *pudp, unsigned long addr,
240 			    unsigned long flags)
241 {
242 	pud_t new = *pudp;
243 
244 	if (flags & SET_MEMORY_RO)
245 		new = pud_wrprotect(new);
246 	else if (flags & SET_MEMORY_RW)
247 		new = pud_mkwrite(pud_mkdirty(new));
248 	if (flags & SET_MEMORY_NX)
249 		new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
250 	else if (flags & SET_MEMORY_X)
251 		new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
252 	if (flags & SET_MEMORY_INV) {
253 		new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID));
254 	} else if (flags & SET_MEMORY_DEF) {
255 		new = __pud(pud_val(new) & PUD_MASK);
256 		new = set_pud_bit(new, REGION3_KERNEL);
257 		if (!MACHINE_HAS_NX)
258 			new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
259 	}
260 	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
261 }
262 
263 static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
264 			  unsigned long flags)
265 {
266 	unsigned long next;
267 	int need_split;
268 	pud_t *pudp;
269 	int rc = 0;
270 
271 	pudp = pud_offset(p4d, addr);
272 	do {
273 		if (pud_none(*pudp))
274 			return -EINVAL;
275 		next = pud_addr_end(addr, end);
276 		if (pud_large(*pudp)) {
277 			need_split  = !!(flags & SET_MEMORY_4K);
278 			need_split |= !!(addr & ~PUD_MASK);
279 			need_split |= !!(addr + PUD_SIZE > next);
280 			if (need_split) {
281 				rc = split_pud_page(pudp, addr);
282 				if (rc)
283 					break;
284 				continue;
285 			}
286 			modify_pud_page(pudp, addr, flags);
287 		} else {
288 			rc = walk_pmd_level(pudp, addr, next, flags);
289 		}
290 		pudp++;
291 		addr = next;
292 		cond_resched();
293 	} while (addr < end && !rc);
294 	return rc;
295 }
296 
297 static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
298 			  unsigned long flags)
299 {
300 	unsigned long next;
301 	p4d_t *p4dp;
302 	int rc = 0;
303 
304 	p4dp = p4d_offset(pgd, addr);
305 	do {
306 		if (p4d_none(*p4dp))
307 			return -EINVAL;
308 		next = p4d_addr_end(addr, end);
309 		rc = walk_pud_level(p4dp, addr, next, flags);
310 		p4dp++;
311 		addr = next;
312 		cond_resched();
313 	} while (addr < end && !rc);
314 	return rc;
315 }
316 
317 DEFINE_MUTEX(cpa_mutex);
318 
319 static int change_page_attr(unsigned long addr, unsigned long end,
320 			    unsigned long flags)
321 {
322 	unsigned long next;
323 	int rc = -EINVAL;
324 	pgd_t *pgdp;
325 
326 	pgdp = pgd_offset_k(addr);
327 	do {
328 		if (pgd_none(*pgdp))
329 			break;
330 		next = pgd_addr_end(addr, end);
331 		rc = walk_p4d_level(pgdp, addr, next, flags);
332 		if (rc)
333 			break;
334 		cond_resched();
335 	} while (pgdp++, addr = next, addr < end && !rc);
336 	return rc;
337 }
338 
339 static int change_page_attr_alias(unsigned long addr, unsigned long end,
340 				  unsigned long flags)
341 {
342 	unsigned long alias, offset, va_start, va_end;
343 	struct vm_struct *area;
344 	int rc = 0;
345 
346 	/*
347 	 * Changes to read-only permissions on kernel VA mappings are also
348 	 * applied to the kernel direct mapping. Execute permissions are
349 	 * intentionally not transferred to keep all allocated pages within
350 	 * the direct mapping non-executable.
351 	 */
352 	flags &= SET_MEMORY_RO | SET_MEMORY_RW;
353 	if (!flags)
354 		return 0;
355 	area = NULL;
356 	while (addr < end) {
357 		if (!area)
358 			area = find_vm_area((void *)addr);
359 		if (!area || !(area->flags & VM_ALLOC))
360 			return 0;
361 		va_start = (unsigned long)area->addr;
362 		va_end = va_start + area->nr_pages * PAGE_SIZE;
363 		offset = (addr - va_start) >> PAGE_SHIFT;
364 		alias = (unsigned long)page_address(area->pages[offset]);
365 		rc = change_page_attr(alias, alias + PAGE_SIZE, flags);
366 		if (rc)
367 			break;
368 		addr += PAGE_SIZE;
369 		if (addr >= va_end)
370 			area = NULL;
371 	}
372 	return rc;
373 }
374 
375 int __set_memory(unsigned long addr, int numpages, unsigned long flags)
376 {
377 	unsigned long end;
378 	int rc;
379 
380 	if (!MACHINE_HAS_NX)
381 		flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
382 	if (!flags)
383 		return 0;
384 	if (!numpages)
385 		return 0;
386 	addr &= PAGE_MASK;
387 	end = addr + numpages * PAGE_SIZE;
388 	mutex_lock(&cpa_mutex);
389 	rc = change_page_attr(addr, end, flags);
390 	if (rc)
391 		goto out;
392 	rc = change_page_attr_alias(addr, end, flags);
393 out:
394 	mutex_unlock(&cpa_mutex);
395 	return rc;
396 }
397 
398 int set_direct_map_invalid_noflush(struct page *page)
399 {
400 	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV);
401 }
402 
403 int set_direct_map_default_noflush(struct page *page)
404 {
405 	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
406 }
407 
408 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
409 
410 static void ipte_range(pte_t *pte, unsigned long address, int nr)
411 {
412 	int i;
413 
414 	if (test_facility(13)) {
415 		__ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
416 		return;
417 	}
418 	for (i = 0; i < nr; i++) {
419 		__ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
420 		address += PAGE_SIZE;
421 		pte++;
422 	}
423 }
424 
425 void __kernel_map_pages(struct page *page, int numpages, int enable)
426 {
427 	unsigned long address;
428 	pte_t *ptep, pte;
429 	int nr, i, j;
430 
431 	for (i = 0; i < numpages;) {
432 		address = (unsigned long)page_to_virt(page + i);
433 		ptep = virt_to_kpte(address);
434 		nr = (unsigned long)ptep >> ilog2(sizeof(long));
435 		nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
436 		nr = min(numpages - i, nr);
437 		if (enable) {
438 			for (j = 0; j < nr; j++) {
439 				pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID));
440 				set_pte(ptep, pte);
441 				address += PAGE_SIZE;
442 				ptep++;
443 			}
444 		} else {
445 			ipte_range(ptep, address, nr);
446 		}
447 		i += nr;
448 	}
449 }
450 
451 #endif /* CONFIG_DEBUG_PAGEALLOC */
452