xref: /openbmc/linux/arch/s390/boot/vmem.c (revision 2154e0b3)
1bb1520d5SAlexander Gordeev // SPDX-License-Identifier: GPL-2.0
2bb1520d5SAlexander Gordeev #include <linux/sched/task.h>
3bb1520d5SAlexander Gordeev #include <linux/pgtable.h>
4bb1520d5SAlexander Gordeev #include <asm/pgalloc.h>
5bb1520d5SAlexander Gordeev #include <asm/facility.h>
6bb1520d5SAlexander Gordeev #include <asm/sections.h>
7bb1520d5SAlexander Gordeev #include <asm/mem_detect.h>
88e9205d2SAlexander Gordeev #include <asm/maccess.h>
9*2154e0b3SAlexander Gordeev #include <asm/abs_lowcore.h>
10bb1520d5SAlexander Gordeev #include "decompressor.h"
11bb1520d5SAlexander Gordeev #include "boot.h"
12bb1520d5SAlexander Gordeev 
13bb1520d5SAlexander Gordeev #define init_mm			(*(struct mm_struct *)vmlinux.init_mm_off)
14bb1520d5SAlexander Gordeev #define swapper_pg_dir		vmlinux.swapper_pg_dir_off
15bb1520d5SAlexander Gordeev #define invalid_pg_dir		vmlinux.invalid_pg_dir_off
16bb1520d5SAlexander Gordeev 
178e9205d2SAlexander Gordeev /*
188e9205d2SAlexander Gordeev  * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though.
198e9205d2SAlexander Gordeev  */
208e9205d2SAlexander Gordeev static inline pte_t *__virt_to_kpte(unsigned long va)
218e9205d2SAlexander Gordeev {
228e9205d2SAlexander Gordeev 	return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
238e9205d2SAlexander Gordeev }
248e9205d2SAlexander Gordeev 
25bb1520d5SAlexander Gordeev unsigned long __bootdata_preserved(s390_invalid_asce);
26bb1520d5SAlexander Gordeev unsigned long __bootdata(pgalloc_pos);
27bb1520d5SAlexander Gordeev unsigned long __bootdata(pgalloc_end);
28bb1520d5SAlexander Gordeev unsigned long __bootdata(pgalloc_low);
29bb1520d5SAlexander Gordeev 
30e0e0a87bSAlexander Gordeev enum populate_mode {
318e9205d2SAlexander Gordeev 	POPULATE_NONE,
32e0e0a87bSAlexander Gordeev 	POPULATE_ONE2ONE,
33*2154e0b3SAlexander Gordeev 	POPULATE_ABS_LOWCORE,
34e0e0a87bSAlexander Gordeev };
35e0e0a87bSAlexander Gordeev 
36bb1520d5SAlexander Gordeev static void boot_check_oom(void)
37bb1520d5SAlexander Gordeev {
38bb1520d5SAlexander Gordeev 	if (pgalloc_pos < pgalloc_low)
39bb1520d5SAlexander Gordeev 		error("out of memory on boot\n");
40bb1520d5SAlexander Gordeev }
41bb1520d5SAlexander Gordeev 
42bb1520d5SAlexander Gordeev static void pgtable_populate_begin(unsigned long online_end)
43bb1520d5SAlexander Gordeev {
44bb1520d5SAlexander Gordeev 	unsigned long initrd_end;
45bb1520d5SAlexander Gordeev 	unsigned long kernel_end;
46bb1520d5SAlexander Gordeev 
47bb1520d5SAlexander Gordeev 	kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
48bb1520d5SAlexander Gordeev 	pgalloc_low = round_up(kernel_end, PAGE_SIZE);
49bb1520d5SAlexander Gordeev 	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
50bb1520d5SAlexander Gordeev 		initrd_end =  round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
51bb1520d5SAlexander Gordeev 		pgalloc_low = max(pgalloc_low, initrd_end);
52bb1520d5SAlexander Gordeev 	}
53bb1520d5SAlexander Gordeev 
54bb1520d5SAlexander Gordeev 	pgalloc_end = round_down(online_end, PAGE_SIZE);
55bb1520d5SAlexander Gordeev 	pgalloc_pos = pgalloc_end;
56bb1520d5SAlexander Gordeev 
57bb1520d5SAlexander Gordeev 	boot_check_oom();
58bb1520d5SAlexander Gordeev }
59bb1520d5SAlexander Gordeev 
60bb1520d5SAlexander Gordeev static void *boot_alloc_pages(unsigned int order)
61bb1520d5SAlexander Gordeev {
62bb1520d5SAlexander Gordeev 	unsigned long size = PAGE_SIZE << order;
63bb1520d5SAlexander Gordeev 
64bb1520d5SAlexander Gordeev 	pgalloc_pos -= size;
65bb1520d5SAlexander Gordeev 	pgalloc_pos = round_down(pgalloc_pos, size);
66bb1520d5SAlexander Gordeev 
67bb1520d5SAlexander Gordeev 	boot_check_oom();
68bb1520d5SAlexander Gordeev 
69bb1520d5SAlexander Gordeev 	return (void *)pgalloc_pos;
70bb1520d5SAlexander Gordeev }
71bb1520d5SAlexander Gordeev 
72bb1520d5SAlexander Gordeev static void *boot_crst_alloc(unsigned long val)
73bb1520d5SAlexander Gordeev {
74bb1520d5SAlexander Gordeev 	unsigned long *table;
75bb1520d5SAlexander Gordeev 
76bb1520d5SAlexander Gordeev 	table = boot_alloc_pages(CRST_ALLOC_ORDER);
77bb1520d5SAlexander Gordeev 	if (table)
78bb1520d5SAlexander Gordeev 		crst_table_init(table, val);
79bb1520d5SAlexander Gordeev 	return table;
80bb1520d5SAlexander Gordeev }
81bb1520d5SAlexander Gordeev 
82bb1520d5SAlexander Gordeev static pte_t *boot_pte_alloc(void)
83bb1520d5SAlexander Gordeev {
84bb1520d5SAlexander Gordeev 	static void *pte_leftover;
85bb1520d5SAlexander Gordeev 	pte_t *pte;
86bb1520d5SAlexander Gordeev 
87bb1520d5SAlexander Gordeev 	BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE);
88bb1520d5SAlexander Gordeev 
89bb1520d5SAlexander Gordeev 	if (!pte_leftover) {
90bb1520d5SAlexander Gordeev 		pte_leftover = boot_alloc_pages(0);
91bb1520d5SAlexander Gordeev 		pte = pte_leftover + _PAGE_TABLE_SIZE;
92bb1520d5SAlexander Gordeev 	} else {
93bb1520d5SAlexander Gordeev 		pte = pte_leftover;
94bb1520d5SAlexander Gordeev 		pte_leftover = NULL;
95bb1520d5SAlexander Gordeev 	}
96bb1520d5SAlexander Gordeev 	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
97bb1520d5SAlexander Gordeev 	return pte;
98bb1520d5SAlexander Gordeev }
99bb1520d5SAlexander Gordeev 
100e0e0a87bSAlexander Gordeev static unsigned long _pa(unsigned long addr, enum populate_mode mode)
101e0e0a87bSAlexander Gordeev {
102e0e0a87bSAlexander Gordeev 	switch (mode) {
1038e9205d2SAlexander Gordeev 	case POPULATE_NONE:
1048e9205d2SAlexander Gordeev 		return -1;
105e0e0a87bSAlexander Gordeev 	case POPULATE_ONE2ONE:
106e0e0a87bSAlexander Gordeev 		return addr;
107*2154e0b3SAlexander Gordeev 	case POPULATE_ABS_LOWCORE:
108*2154e0b3SAlexander Gordeev 		return __abs_lowcore_pa(addr);
109e0e0a87bSAlexander Gordeev 	default:
110e0e0a87bSAlexander Gordeev 		return -1;
111e0e0a87bSAlexander Gordeev 	}
112e0e0a87bSAlexander Gordeev }
113e0e0a87bSAlexander Gordeev 
114bb1520d5SAlexander Gordeev static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end)
115bb1520d5SAlexander Gordeev {
116bb1520d5SAlexander Gordeev 	return machine.has_edat2 &&
117bb1520d5SAlexander Gordeev 	       IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE;
118bb1520d5SAlexander Gordeev }
119bb1520d5SAlexander Gordeev 
120bb1520d5SAlexander Gordeev static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end)
121bb1520d5SAlexander Gordeev {
122bb1520d5SAlexander Gordeev 	return machine.has_edat1 &&
123bb1520d5SAlexander Gordeev 	       IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE;
124bb1520d5SAlexander Gordeev }
125bb1520d5SAlexander Gordeev 
126e0e0a87bSAlexander Gordeev static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
127e0e0a87bSAlexander Gordeev 				 enum populate_mode mode)
128bb1520d5SAlexander Gordeev {
129bb1520d5SAlexander Gordeev 	unsigned long next;
130bb1520d5SAlexander Gordeev 	pte_t *pte, entry;
131bb1520d5SAlexander Gordeev 
132bb1520d5SAlexander Gordeev 	pte = pte_offset_kernel(pmd, addr);
133bb1520d5SAlexander Gordeev 	for (; addr < end; addr += PAGE_SIZE, pte++) {
134bb1520d5SAlexander Gordeev 		if (pte_none(*pte)) {
135e0e0a87bSAlexander Gordeev 			entry = __pte(_pa(addr, mode));
136bb1520d5SAlexander Gordeev 			entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
137bb1520d5SAlexander Gordeev 			set_pte(pte, entry);
138bb1520d5SAlexander Gordeev 		}
139bb1520d5SAlexander Gordeev 	}
140bb1520d5SAlexander Gordeev }
141bb1520d5SAlexander Gordeev 
142e0e0a87bSAlexander Gordeev static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
143e0e0a87bSAlexander Gordeev 				 enum populate_mode mode)
144bb1520d5SAlexander Gordeev {
145bb1520d5SAlexander Gordeev 	unsigned long next;
146bb1520d5SAlexander Gordeev 	pmd_t *pmd, entry;
147bb1520d5SAlexander Gordeev 	pte_t *pte;
148bb1520d5SAlexander Gordeev 
149bb1520d5SAlexander Gordeev 	pmd = pmd_offset(pud, addr);
150bb1520d5SAlexander Gordeev 	for (; addr < end; addr = next, pmd++) {
151bb1520d5SAlexander Gordeev 		next = pmd_addr_end(addr, end);
152bb1520d5SAlexander Gordeev 		if (pmd_none(*pmd)) {
153bb1520d5SAlexander Gordeev 			if (can_large_pmd(pmd, addr, next)) {
154e0e0a87bSAlexander Gordeev 				entry = __pmd(_pa(addr, mode));
155bb1520d5SAlexander Gordeev 				entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
156bb1520d5SAlexander Gordeev 				set_pmd(pmd, entry);
157bb1520d5SAlexander Gordeev 				continue;
158bb1520d5SAlexander Gordeev 			}
159bb1520d5SAlexander Gordeev 			pte = boot_pte_alloc();
160bb1520d5SAlexander Gordeev 			pmd_populate(&init_mm, pmd, pte);
161bb1520d5SAlexander Gordeev 		} else if (pmd_large(*pmd)) {
162bb1520d5SAlexander Gordeev 			continue;
163bb1520d5SAlexander Gordeev 		}
164e0e0a87bSAlexander Gordeev 		pgtable_pte_populate(pmd, addr, next, mode);
165bb1520d5SAlexander Gordeev 	}
166bb1520d5SAlexander Gordeev }
167bb1520d5SAlexander Gordeev 
168e0e0a87bSAlexander Gordeev static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
169e0e0a87bSAlexander Gordeev 				 enum populate_mode mode)
170bb1520d5SAlexander Gordeev {
171bb1520d5SAlexander Gordeev 	unsigned long next;
172bb1520d5SAlexander Gordeev 	pud_t *pud, entry;
173bb1520d5SAlexander Gordeev 	pmd_t *pmd;
174bb1520d5SAlexander Gordeev 
175bb1520d5SAlexander Gordeev 	pud = pud_offset(p4d, addr);
176bb1520d5SAlexander Gordeev 	for (; addr < end; addr = next, pud++) {
177bb1520d5SAlexander Gordeev 		next = pud_addr_end(addr, end);
178bb1520d5SAlexander Gordeev 		if (pud_none(*pud)) {
179bb1520d5SAlexander Gordeev 			if (can_large_pud(pud, addr, next)) {
180e0e0a87bSAlexander Gordeev 				entry = __pud(_pa(addr, mode));
181bb1520d5SAlexander Gordeev 				entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
182bb1520d5SAlexander Gordeev 				set_pud(pud, entry);
183bb1520d5SAlexander Gordeev 				continue;
184bb1520d5SAlexander Gordeev 			}
185bb1520d5SAlexander Gordeev 			pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
186bb1520d5SAlexander Gordeev 			pud_populate(&init_mm, pud, pmd);
187bb1520d5SAlexander Gordeev 		} else if (pud_large(*pud)) {
188bb1520d5SAlexander Gordeev 			continue;
189bb1520d5SAlexander Gordeev 		}
190e0e0a87bSAlexander Gordeev 		pgtable_pmd_populate(pud, addr, next, mode);
191bb1520d5SAlexander Gordeev 	}
192bb1520d5SAlexander Gordeev }
193bb1520d5SAlexander Gordeev 
194e0e0a87bSAlexander Gordeev static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end,
195e0e0a87bSAlexander Gordeev 				 enum populate_mode mode)
196bb1520d5SAlexander Gordeev {
197bb1520d5SAlexander Gordeev 	unsigned long next;
198bb1520d5SAlexander Gordeev 	p4d_t *p4d;
199bb1520d5SAlexander Gordeev 	pud_t *pud;
200bb1520d5SAlexander Gordeev 
201bb1520d5SAlexander Gordeev 	p4d = p4d_offset(pgd, addr);
202bb1520d5SAlexander Gordeev 	for (; addr < end; addr = next, p4d++) {
203bb1520d5SAlexander Gordeev 		next = p4d_addr_end(addr, end);
204bb1520d5SAlexander Gordeev 		if (p4d_none(*p4d)) {
205bb1520d5SAlexander Gordeev 			pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY);
206bb1520d5SAlexander Gordeev 			p4d_populate(&init_mm, p4d, pud);
207bb1520d5SAlexander Gordeev 		}
208e0e0a87bSAlexander Gordeev 		pgtable_pud_populate(p4d, addr, next, mode);
209bb1520d5SAlexander Gordeev 	}
210bb1520d5SAlexander Gordeev }
211bb1520d5SAlexander Gordeev 
212e0e0a87bSAlexander Gordeev static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode)
213bb1520d5SAlexander Gordeev {
214bb1520d5SAlexander Gordeev 	unsigned long next;
215bb1520d5SAlexander Gordeev 	pgd_t *pgd;
216bb1520d5SAlexander Gordeev 	p4d_t *p4d;
217bb1520d5SAlexander Gordeev 
218bb1520d5SAlexander Gordeev 	pgd = pgd_offset(&init_mm, addr);
219bb1520d5SAlexander Gordeev 	for (; addr < end; addr = next, pgd++) {
220bb1520d5SAlexander Gordeev 		next = pgd_addr_end(addr, end);
221bb1520d5SAlexander Gordeev 		if (pgd_none(*pgd)) {
222bb1520d5SAlexander Gordeev 			p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY);
223bb1520d5SAlexander Gordeev 			pgd_populate(&init_mm, pgd, p4d);
224bb1520d5SAlexander Gordeev 		}
225e0e0a87bSAlexander Gordeev 		pgtable_p4d_populate(pgd, addr, next, mode);
226bb1520d5SAlexander Gordeev 	}
227bb1520d5SAlexander Gordeev }
228bb1520d5SAlexander Gordeev 
229bb1520d5SAlexander Gordeev /*
230bb1520d5SAlexander Gordeev  * The pgtables are located in the range [pgalloc_pos, pgalloc_end).
231bb1520d5SAlexander Gordeev  * That range must stay intact and is later reserved in the memblock.
232bb1520d5SAlexander Gordeev  * Therefore pgtable_populate(pgalloc_pos, pgalloc_end) is needed to
233bb1520d5SAlexander Gordeev  * finalize pgalloc_pos pointer. However that call can decrease the
234bb1520d5SAlexander Gordeev  * value of pgalloc_pos pointer itself. Therefore, pgtable_populate()
235bb1520d5SAlexander Gordeev  * needs to be called repeatedly until pgtables are complete and
236bb1520d5SAlexander Gordeev  * pgalloc_pos does not grow left anymore.
237bb1520d5SAlexander Gordeev  */
238bb1520d5SAlexander Gordeev static void pgtable_populate_end(void)
239bb1520d5SAlexander Gordeev {
240bb1520d5SAlexander Gordeev 	unsigned long pgalloc_end_curr = pgalloc_end;
241bb1520d5SAlexander Gordeev 	unsigned long pgalloc_pos_prev;
242bb1520d5SAlexander Gordeev 
243bb1520d5SAlexander Gordeev 	do {
244bb1520d5SAlexander Gordeev 		pgalloc_pos_prev = pgalloc_pos;
245e0e0a87bSAlexander Gordeev 		pgtable_populate(pgalloc_pos, pgalloc_end_curr, POPULATE_ONE2ONE);
246bb1520d5SAlexander Gordeev 		pgalloc_end_curr = pgalloc_pos_prev;
247bb1520d5SAlexander Gordeev 	} while (pgalloc_pos < pgalloc_pos_prev);
248bb1520d5SAlexander Gordeev }
249bb1520d5SAlexander Gordeev 
250bb1520d5SAlexander Gordeev void setup_vmem(unsigned long online_end, unsigned long asce_limit)
251bb1520d5SAlexander Gordeev {
252bb1520d5SAlexander Gordeev 	unsigned long asce_type;
253bb1520d5SAlexander Gordeev 	unsigned long asce_bits;
254bb1520d5SAlexander Gordeev 
255bb1520d5SAlexander Gordeev 	if (asce_limit == _REGION1_SIZE) {
256bb1520d5SAlexander Gordeev 		asce_type = _REGION2_ENTRY_EMPTY;
257bb1520d5SAlexander Gordeev 		asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
258bb1520d5SAlexander Gordeev 	} else {
259bb1520d5SAlexander Gordeev 		asce_type = _REGION3_ENTRY_EMPTY;
260bb1520d5SAlexander Gordeev 		asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
261bb1520d5SAlexander Gordeev 	}
262bb1520d5SAlexander Gordeev 	s390_invalid_asce = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
263bb1520d5SAlexander Gordeev 
264bb1520d5SAlexander Gordeev 	crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
265bb1520d5SAlexander Gordeev 	crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
266bb1520d5SAlexander Gordeev 
267bb1520d5SAlexander Gordeev 	/*
268bb1520d5SAlexander Gordeev 	 * To allow prefixing the lowcore must be mapped with 4KB pages.
269bb1520d5SAlexander Gordeev 	 * To prevent creation of a large page at address 0 first map
270bb1520d5SAlexander Gordeev 	 * the lowcore and create the identity mapping only afterwards.
271bb1520d5SAlexander Gordeev 	 *
272bb1520d5SAlexander Gordeev 	 * No further pgtable_populate() calls are allowed after the value
273bb1520d5SAlexander Gordeev 	 * of pgalloc_pos finalized with a call to pgtable_populate_end().
274bb1520d5SAlexander Gordeev 	 */
275bb1520d5SAlexander Gordeev 	pgtable_populate_begin(online_end);
276e0e0a87bSAlexander Gordeev 	pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE);
277e0e0a87bSAlexander Gordeev 	pgtable_populate(0, online_end, POPULATE_ONE2ONE);
278*2154e0b3SAlexander Gordeev 	pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
279*2154e0b3SAlexander Gordeev 			 POPULATE_ABS_LOWCORE);
2808e9205d2SAlexander Gordeev 	pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
2818e9205d2SAlexander Gordeev 			 POPULATE_NONE);
2828e9205d2SAlexander Gordeev 	memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area);
283bb1520d5SAlexander Gordeev 	pgtable_populate_end();
284bb1520d5SAlexander Gordeev 
285bb1520d5SAlexander Gordeev 	S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits;
286bb1520d5SAlexander Gordeev 	S390_lowcore.user_asce = s390_invalid_asce;
287bb1520d5SAlexander Gordeev 
288bb1520d5SAlexander Gordeev 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
289bb1520d5SAlexander Gordeev 	__ctl_load(S390_lowcore.user_asce, 7, 7);
290bb1520d5SAlexander Gordeev 	__ctl_load(S390_lowcore.kernel_asce, 13, 13);
291bb1520d5SAlexander Gordeev 
292bb1520d5SAlexander Gordeev 	init_mm.context.asce = S390_lowcore.kernel_asce;
293bb1520d5SAlexander Gordeev }
294