xref: /openbmc/linux/arch/s390/boot/vmem.c (revision 8e9205d2)
1bb1520d5SAlexander Gordeev // SPDX-License-Identifier: GPL-2.0
2bb1520d5SAlexander Gordeev #include <linux/sched/task.h>
3bb1520d5SAlexander Gordeev #include <linux/pgtable.h>
4bb1520d5SAlexander Gordeev #include <asm/pgalloc.h>
5bb1520d5SAlexander Gordeev #include <asm/facility.h>
6bb1520d5SAlexander Gordeev #include <asm/sections.h>
7bb1520d5SAlexander Gordeev #include <asm/mem_detect.h>
8*8e9205d2SAlexander Gordeev #include <asm/maccess.h>
9bb1520d5SAlexander Gordeev #include "decompressor.h"
10bb1520d5SAlexander Gordeev #include "boot.h"
11bb1520d5SAlexander Gordeev 
12bb1520d5SAlexander Gordeev #define init_mm			(*(struct mm_struct *)vmlinux.init_mm_off)
13bb1520d5SAlexander Gordeev #define swapper_pg_dir		vmlinux.swapper_pg_dir_off
14bb1520d5SAlexander Gordeev #define invalid_pg_dir		vmlinux.invalid_pg_dir_off
15bb1520d5SAlexander Gordeev 
16*8e9205d2SAlexander Gordeev /*
17*8e9205d2SAlexander Gordeev  * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though.
18*8e9205d2SAlexander Gordeev  */
19*8e9205d2SAlexander Gordeev static inline pte_t *__virt_to_kpte(unsigned long va)
20*8e9205d2SAlexander Gordeev {
21*8e9205d2SAlexander Gordeev 	return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
22*8e9205d2SAlexander Gordeev }
23*8e9205d2SAlexander Gordeev 
24bb1520d5SAlexander Gordeev unsigned long __bootdata_preserved(s390_invalid_asce);
25bb1520d5SAlexander Gordeev unsigned long __bootdata(pgalloc_pos);
26bb1520d5SAlexander Gordeev unsigned long __bootdata(pgalloc_end);
27bb1520d5SAlexander Gordeev unsigned long __bootdata(pgalloc_low);
28bb1520d5SAlexander Gordeev 
29e0e0a87bSAlexander Gordeev enum populate_mode {
30*8e9205d2SAlexander Gordeev 	POPULATE_NONE,
31e0e0a87bSAlexander Gordeev 	POPULATE_ONE2ONE,
32e0e0a87bSAlexander Gordeev };
33e0e0a87bSAlexander Gordeev 
34bb1520d5SAlexander Gordeev static void boot_check_oom(void)
35bb1520d5SAlexander Gordeev {
36bb1520d5SAlexander Gordeev 	if (pgalloc_pos < pgalloc_low)
37bb1520d5SAlexander Gordeev 		error("out of memory on boot\n");
38bb1520d5SAlexander Gordeev }
39bb1520d5SAlexander Gordeev 
40bb1520d5SAlexander Gordeev static void pgtable_populate_begin(unsigned long online_end)
41bb1520d5SAlexander Gordeev {
42bb1520d5SAlexander Gordeev 	unsigned long initrd_end;
43bb1520d5SAlexander Gordeev 	unsigned long kernel_end;
44bb1520d5SAlexander Gordeev 
45bb1520d5SAlexander Gordeev 	kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
46bb1520d5SAlexander Gordeev 	pgalloc_low = round_up(kernel_end, PAGE_SIZE);
47bb1520d5SAlexander Gordeev 	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
48bb1520d5SAlexander Gordeev 		initrd_end =  round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
49bb1520d5SAlexander Gordeev 		pgalloc_low = max(pgalloc_low, initrd_end);
50bb1520d5SAlexander Gordeev 	}
51bb1520d5SAlexander Gordeev 
52bb1520d5SAlexander Gordeev 	pgalloc_end = round_down(online_end, PAGE_SIZE);
53bb1520d5SAlexander Gordeev 	pgalloc_pos = pgalloc_end;
54bb1520d5SAlexander Gordeev 
55bb1520d5SAlexander Gordeev 	boot_check_oom();
56bb1520d5SAlexander Gordeev }
57bb1520d5SAlexander Gordeev 
58bb1520d5SAlexander Gordeev static void *boot_alloc_pages(unsigned int order)
59bb1520d5SAlexander Gordeev {
60bb1520d5SAlexander Gordeev 	unsigned long size = PAGE_SIZE << order;
61bb1520d5SAlexander Gordeev 
62bb1520d5SAlexander Gordeev 	pgalloc_pos -= size;
63bb1520d5SAlexander Gordeev 	pgalloc_pos = round_down(pgalloc_pos, size);
64bb1520d5SAlexander Gordeev 
65bb1520d5SAlexander Gordeev 	boot_check_oom();
66bb1520d5SAlexander Gordeev 
67bb1520d5SAlexander Gordeev 	return (void *)pgalloc_pos;
68bb1520d5SAlexander Gordeev }
69bb1520d5SAlexander Gordeev 
70bb1520d5SAlexander Gordeev static void *boot_crst_alloc(unsigned long val)
71bb1520d5SAlexander Gordeev {
72bb1520d5SAlexander Gordeev 	unsigned long *table;
73bb1520d5SAlexander Gordeev 
74bb1520d5SAlexander Gordeev 	table = boot_alloc_pages(CRST_ALLOC_ORDER);
75bb1520d5SAlexander Gordeev 	if (table)
76bb1520d5SAlexander Gordeev 		crst_table_init(table, val);
77bb1520d5SAlexander Gordeev 	return table;
78bb1520d5SAlexander Gordeev }
79bb1520d5SAlexander Gordeev 
80bb1520d5SAlexander Gordeev static pte_t *boot_pte_alloc(void)
81bb1520d5SAlexander Gordeev {
82bb1520d5SAlexander Gordeev 	static void *pte_leftover;
83bb1520d5SAlexander Gordeev 	pte_t *pte;
84bb1520d5SAlexander Gordeev 
85bb1520d5SAlexander Gordeev 	BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE);
86bb1520d5SAlexander Gordeev 
87bb1520d5SAlexander Gordeev 	if (!pte_leftover) {
88bb1520d5SAlexander Gordeev 		pte_leftover = boot_alloc_pages(0);
89bb1520d5SAlexander Gordeev 		pte = pte_leftover + _PAGE_TABLE_SIZE;
90bb1520d5SAlexander Gordeev 	} else {
91bb1520d5SAlexander Gordeev 		pte = pte_leftover;
92bb1520d5SAlexander Gordeev 		pte_leftover = NULL;
93bb1520d5SAlexander Gordeev 	}
94bb1520d5SAlexander Gordeev 	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
95bb1520d5SAlexander Gordeev 	return pte;
96bb1520d5SAlexander Gordeev }
97bb1520d5SAlexander Gordeev 
98e0e0a87bSAlexander Gordeev static unsigned long _pa(unsigned long addr, enum populate_mode mode)
99e0e0a87bSAlexander Gordeev {
100e0e0a87bSAlexander Gordeev 	switch (mode) {
101*8e9205d2SAlexander Gordeev 	case POPULATE_NONE:
102*8e9205d2SAlexander Gordeev 		return -1;
103e0e0a87bSAlexander Gordeev 	case POPULATE_ONE2ONE:
104e0e0a87bSAlexander Gordeev 		return addr;
105e0e0a87bSAlexander Gordeev 	default:
106e0e0a87bSAlexander Gordeev 		return -1;
107e0e0a87bSAlexander Gordeev 	}
108e0e0a87bSAlexander Gordeev }
109e0e0a87bSAlexander Gordeev 
110bb1520d5SAlexander Gordeev static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end)
111bb1520d5SAlexander Gordeev {
112bb1520d5SAlexander Gordeev 	return machine.has_edat2 &&
113bb1520d5SAlexander Gordeev 	       IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE;
114bb1520d5SAlexander Gordeev }
115bb1520d5SAlexander Gordeev 
116bb1520d5SAlexander Gordeev static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end)
117bb1520d5SAlexander Gordeev {
118bb1520d5SAlexander Gordeev 	return machine.has_edat1 &&
119bb1520d5SAlexander Gordeev 	       IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE;
120bb1520d5SAlexander Gordeev }
121bb1520d5SAlexander Gordeev 
122e0e0a87bSAlexander Gordeev static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
123e0e0a87bSAlexander Gordeev 				 enum populate_mode mode)
124bb1520d5SAlexander Gordeev {
125bb1520d5SAlexander Gordeev 	unsigned long next;
126bb1520d5SAlexander Gordeev 	pte_t *pte, entry;
127bb1520d5SAlexander Gordeev 
128bb1520d5SAlexander Gordeev 	pte = pte_offset_kernel(pmd, addr);
129bb1520d5SAlexander Gordeev 	for (; addr < end; addr += PAGE_SIZE, pte++) {
130bb1520d5SAlexander Gordeev 		if (pte_none(*pte)) {
131e0e0a87bSAlexander Gordeev 			entry = __pte(_pa(addr, mode));
132bb1520d5SAlexander Gordeev 			entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
133bb1520d5SAlexander Gordeev 			set_pte(pte, entry);
134bb1520d5SAlexander Gordeev 		}
135bb1520d5SAlexander Gordeev 	}
136bb1520d5SAlexander Gordeev }
137bb1520d5SAlexander Gordeev 
138e0e0a87bSAlexander Gordeev static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
139e0e0a87bSAlexander Gordeev 				 enum populate_mode mode)
140bb1520d5SAlexander Gordeev {
141bb1520d5SAlexander Gordeev 	unsigned long next;
142bb1520d5SAlexander Gordeev 	pmd_t *pmd, entry;
143bb1520d5SAlexander Gordeev 	pte_t *pte;
144bb1520d5SAlexander Gordeev 
145bb1520d5SAlexander Gordeev 	pmd = pmd_offset(pud, addr);
146bb1520d5SAlexander Gordeev 	for (; addr < end; addr = next, pmd++) {
147bb1520d5SAlexander Gordeev 		next = pmd_addr_end(addr, end);
148bb1520d5SAlexander Gordeev 		if (pmd_none(*pmd)) {
149bb1520d5SAlexander Gordeev 			if (can_large_pmd(pmd, addr, next)) {
150e0e0a87bSAlexander Gordeev 				entry = __pmd(_pa(addr, mode));
151bb1520d5SAlexander Gordeev 				entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
152bb1520d5SAlexander Gordeev 				set_pmd(pmd, entry);
153bb1520d5SAlexander Gordeev 				continue;
154bb1520d5SAlexander Gordeev 			}
155bb1520d5SAlexander Gordeev 			pte = boot_pte_alloc();
156bb1520d5SAlexander Gordeev 			pmd_populate(&init_mm, pmd, pte);
157bb1520d5SAlexander Gordeev 		} else if (pmd_large(*pmd)) {
158bb1520d5SAlexander Gordeev 			continue;
159bb1520d5SAlexander Gordeev 		}
160e0e0a87bSAlexander Gordeev 		pgtable_pte_populate(pmd, addr, next, mode);
161bb1520d5SAlexander Gordeev 	}
162bb1520d5SAlexander Gordeev }
163bb1520d5SAlexander Gordeev 
164e0e0a87bSAlexander Gordeev static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
165e0e0a87bSAlexander Gordeev 				 enum populate_mode mode)
166bb1520d5SAlexander Gordeev {
167bb1520d5SAlexander Gordeev 	unsigned long next;
168bb1520d5SAlexander Gordeev 	pud_t *pud, entry;
169bb1520d5SAlexander Gordeev 	pmd_t *pmd;
170bb1520d5SAlexander Gordeev 
171bb1520d5SAlexander Gordeev 	pud = pud_offset(p4d, addr);
172bb1520d5SAlexander Gordeev 	for (; addr < end; addr = next, pud++) {
173bb1520d5SAlexander Gordeev 		next = pud_addr_end(addr, end);
174bb1520d5SAlexander Gordeev 		if (pud_none(*pud)) {
175bb1520d5SAlexander Gordeev 			if (can_large_pud(pud, addr, next)) {
176e0e0a87bSAlexander Gordeev 				entry = __pud(_pa(addr, mode));
177bb1520d5SAlexander Gordeev 				entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
178bb1520d5SAlexander Gordeev 				set_pud(pud, entry);
179bb1520d5SAlexander Gordeev 				continue;
180bb1520d5SAlexander Gordeev 			}
181bb1520d5SAlexander Gordeev 			pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
182bb1520d5SAlexander Gordeev 			pud_populate(&init_mm, pud, pmd);
183bb1520d5SAlexander Gordeev 		} else if (pud_large(*pud)) {
184bb1520d5SAlexander Gordeev 			continue;
185bb1520d5SAlexander Gordeev 		}
186e0e0a87bSAlexander Gordeev 		pgtable_pmd_populate(pud, addr, next, mode);
187bb1520d5SAlexander Gordeev 	}
188bb1520d5SAlexander Gordeev }
189bb1520d5SAlexander Gordeev 
190e0e0a87bSAlexander Gordeev static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end,
191e0e0a87bSAlexander Gordeev 				 enum populate_mode mode)
192bb1520d5SAlexander Gordeev {
193bb1520d5SAlexander Gordeev 	unsigned long next;
194bb1520d5SAlexander Gordeev 	p4d_t *p4d;
195bb1520d5SAlexander Gordeev 	pud_t *pud;
196bb1520d5SAlexander Gordeev 
197bb1520d5SAlexander Gordeev 	p4d = p4d_offset(pgd, addr);
198bb1520d5SAlexander Gordeev 	for (; addr < end; addr = next, p4d++) {
199bb1520d5SAlexander Gordeev 		next = p4d_addr_end(addr, end);
200bb1520d5SAlexander Gordeev 		if (p4d_none(*p4d)) {
201bb1520d5SAlexander Gordeev 			pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY);
202bb1520d5SAlexander Gordeev 			p4d_populate(&init_mm, p4d, pud);
203bb1520d5SAlexander Gordeev 		}
204e0e0a87bSAlexander Gordeev 		pgtable_pud_populate(p4d, addr, next, mode);
205bb1520d5SAlexander Gordeev 	}
206bb1520d5SAlexander Gordeev }
207bb1520d5SAlexander Gordeev 
208e0e0a87bSAlexander Gordeev static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode)
209bb1520d5SAlexander Gordeev {
210bb1520d5SAlexander Gordeev 	unsigned long next;
211bb1520d5SAlexander Gordeev 	pgd_t *pgd;
212bb1520d5SAlexander Gordeev 	p4d_t *p4d;
213bb1520d5SAlexander Gordeev 
214bb1520d5SAlexander Gordeev 	pgd = pgd_offset(&init_mm, addr);
215bb1520d5SAlexander Gordeev 	for (; addr < end; addr = next, pgd++) {
216bb1520d5SAlexander Gordeev 		next = pgd_addr_end(addr, end);
217bb1520d5SAlexander Gordeev 		if (pgd_none(*pgd)) {
218bb1520d5SAlexander Gordeev 			p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY);
219bb1520d5SAlexander Gordeev 			pgd_populate(&init_mm, pgd, p4d);
220bb1520d5SAlexander Gordeev 		}
221e0e0a87bSAlexander Gordeev 		pgtable_p4d_populate(pgd, addr, next, mode);
222bb1520d5SAlexander Gordeev 	}
223bb1520d5SAlexander Gordeev }
224bb1520d5SAlexander Gordeev 
225bb1520d5SAlexander Gordeev /*
226bb1520d5SAlexander Gordeev  * The pgtables are located in the range [pgalloc_pos, pgalloc_end).
227bb1520d5SAlexander Gordeev  * That range must stay intact and is later reserved in the memblock.
228bb1520d5SAlexander Gordeev  * Therefore pgtable_populate(pgalloc_pos, pgalloc_end) is needed to
229bb1520d5SAlexander Gordeev  * finalize pgalloc_pos pointer. However that call can decrease the
230bb1520d5SAlexander Gordeev  * value of pgalloc_pos pointer itself. Therefore, pgtable_populate()
231bb1520d5SAlexander Gordeev  * needs to be called repeatedly until pgtables are complete and
232bb1520d5SAlexander Gordeev  * pgalloc_pos does not grow left anymore.
233bb1520d5SAlexander Gordeev  */
234bb1520d5SAlexander Gordeev static void pgtable_populate_end(void)
235bb1520d5SAlexander Gordeev {
236bb1520d5SAlexander Gordeev 	unsigned long pgalloc_end_curr = pgalloc_end;
237bb1520d5SAlexander Gordeev 	unsigned long pgalloc_pos_prev;
238bb1520d5SAlexander Gordeev 
239bb1520d5SAlexander Gordeev 	do {
240bb1520d5SAlexander Gordeev 		pgalloc_pos_prev = pgalloc_pos;
241e0e0a87bSAlexander Gordeev 		pgtable_populate(pgalloc_pos, pgalloc_end_curr, POPULATE_ONE2ONE);
242bb1520d5SAlexander Gordeev 		pgalloc_end_curr = pgalloc_pos_prev;
243bb1520d5SAlexander Gordeev 	} while (pgalloc_pos < pgalloc_pos_prev);
244bb1520d5SAlexander Gordeev }
245bb1520d5SAlexander Gordeev 
246bb1520d5SAlexander Gordeev void setup_vmem(unsigned long online_end, unsigned long asce_limit)
247bb1520d5SAlexander Gordeev {
248bb1520d5SAlexander Gordeev 	unsigned long asce_type;
249bb1520d5SAlexander Gordeev 	unsigned long asce_bits;
250bb1520d5SAlexander Gordeev 
251bb1520d5SAlexander Gordeev 	if (asce_limit == _REGION1_SIZE) {
252bb1520d5SAlexander Gordeev 		asce_type = _REGION2_ENTRY_EMPTY;
253bb1520d5SAlexander Gordeev 		asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
254bb1520d5SAlexander Gordeev 	} else {
255bb1520d5SAlexander Gordeev 		asce_type = _REGION3_ENTRY_EMPTY;
256bb1520d5SAlexander Gordeev 		asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
257bb1520d5SAlexander Gordeev 	}
258bb1520d5SAlexander Gordeev 	s390_invalid_asce = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
259bb1520d5SAlexander Gordeev 
260bb1520d5SAlexander Gordeev 	crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
261bb1520d5SAlexander Gordeev 	crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
262bb1520d5SAlexander Gordeev 
263bb1520d5SAlexander Gordeev 	/*
264bb1520d5SAlexander Gordeev 	 * To allow prefixing the lowcore must be mapped with 4KB pages.
265bb1520d5SAlexander Gordeev 	 * To prevent creation of a large page at address 0 first map
266bb1520d5SAlexander Gordeev 	 * the lowcore and create the identity mapping only afterwards.
267bb1520d5SAlexander Gordeev 	 *
268bb1520d5SAlexander Gordeev 	 * No further pgtable_populate() calls are allowed after the value
269bb1520d5SAlexander Gordeev 	 * of pgalloc_pos finalized with a call to pgtable_populate_end().
270bb1520d5SAlexander Gordeev 	 */
271bb1520d5SAlexander Gordeev 	pgtable_populate_begin(online_end);
272e0e0a87bSAlexander Gordeev 	pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE);
273e0e0a87bSAlexander Gordeev 	pgtable_populate(0, online_end, POPULATE_ONE2ONE);
274*8e9205d2SAlexander Gordeev 	pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
275*8e9205d2SAlexander Gordeev 			 POPULATE_NONE);
276*8e9205d2SAlexander Gordeev 	memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area);
277bb1520d5SAlexander Gordeev 	pgtable_populate_end();
278bb1520d5SAlexander Gordeev 
279bb1520d5SAlexander Gordeev 	S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits;
280bb1520d5SAlexander Gordeev 	S390_lowcore.user_asce = s390_invalid_asce;
281bb1520d5SAlexander Gordeev 
282bb1520d5SAlexander Gordeev 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
283bb1520d5SAlexander Gordeev 	__ctl_load(S390_lowcore.user_asce, 7, 7);
284bb1520d5SAlexander Gordeev 	__ctl_load(S390_lowcore.kernel_asce, 13, 13);
285bb1520d5SAlexander Gordeev 
286bb1520d5SAlexander Gordeev 	init_mm.context.asce = S390_lowcore.kernel_asce;
287bb1520d5SAlexander Gordeev }
288