xref: /openbmc/linux/arch/s390/kernel/setup.c (revision e368cd72)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  S390 version
4  *    Copyright IBM Corp. 1999, 2012
5  *    Author(s): Hartmut Penner (hp@de.ibm.com),
6  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
7  *
8  *  Derived from "arch/i386/kernel/setup.c"
9  *    Copyright (C) 1995, Linus Torvalds
10  */
11 
12 /*
13  * This file handles the architecture-dependent parts of initialization
14  */
15 
16 #define KMSG_COMPONENT "setup"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 
19 #include <linux/errno.h>
20 #include <linux/export.h>
21 #include <linux/sched.h>
22 #include <linux/sched/task.h>
23 #include <linux/cpu.h>
24 #include <linux/kernel.h>
25 #include <linux/memblock.h>
26 #include <linux/mm.h>
27 #include <linux/stddef.h>
28 #include <linux/unistd.h>
29 #include <linux/ptrace.h>
30 #include <linux/random.h>
31 #include <linux/user.h>
32 #include <linux/tty.h>
33 #include <linux/ioport.h>
34 #include <linux/delay.h>
35 #include <linux/init.h>
36 #include <linux/initrd.h>
37 #include <linux/root_dev.h>
38 #include <linux/console.h>
39 #include <linux/kernel_stat.h>
40 #include <linux/dma-map-ops.h>
41 #include <linux/device.h>
42 #include <linux/notifier.h>
43 #include <linux/pfn.h>
44 #include <linux/ctype.h>
45 #include <linux/reboot.h>
46 #include <linux/topology.h>
47 #include <linux/kexec.h>
48 #include <linux/crash_dump.h>
49 #include <linux/memory.h>
50 #include <linux/compat.h>
51 #include <linux/start_kernel.h>
52 #include <linux/hugetlb.h>
53 
54 #include <asm/boot_data.h>
55 #include <asm/ipl.h>
56 #include <asm/facility.h>
57 #include <asm/smp.h>
58 #include <asm/mmu_context.h>
59 #include <asm/cpcmd.h>
60 #include <asm/lowcore.h>
61 #include <asm/nmi.h>
62 #include <asm/irq.h>
63 #include <asm/page.h>
64 #include <asm/ptrace.h>
65 #include <asm/sections.h>
66 #include <asm/ebcdic.h>
67 #include <asm/diag.h>
68 #include <asm/os_info.h>
69 #include <asm/sclp.h>
70 #include <asm/stacktrace.h>
71 #include <asm/sysinfo.h>
72 #include <asm/numa.h>
73 #include <asm/alternative.h>
74 #include <asm/nospec-branch.h>
75 #include <asm/mem_detect.h>
76 #include <asm/uv.h>
77 #include <asm/asm-offsets.h>
78 #include "entry.h"
79 
80 /*
81  * Machine setup..
82  */
83 unsigned int console_mode = 0;
84 EXPORT_SYMBOL(console_mode);
85 
86 unsigned int console_devno = -1;
87 EXPORT_SYMBOL(console_devno);
88 
89 unsigned int console_irq = -1;
90 EXPORT_SYMBOL(console_irq);
91 
92 /*
93  * Some code and data needs to stay below 2 GB, even when the kernel would be
94  * relocated above 2 GB, because it has to use 31 bit addresses.
95  * Such code and data is part of the .amode31 section.
96  */
97 unsigned long __amode31_ref __samode31 = __pa(&_samode31);
98 unsigned long __amode31_ref __eamode31 = __pa(&_eamode31);
99 unsigned long __amode31_ref __stext_amode31 = __pa(&_stext_amode31);
100 unsigned long __amode31_ref __etext_amode31 = __pa(&_etext_amode31);
101 struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
102 struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
103 
104 /*
105  * Control registers CR2, CR5 and CR15 are initialized with addresses
106  * of tables that must be placed below 2G which is handled by the AMODE31
107  * sections.
108  * Because the AMODE31 sections are relocated below 2G at startup,
109  * the content of control registers CR2, CR5 and CR15 must be updated
110  * with new addresses after the relocation. The initial initialization of
111  * control registers occurs in head64.S and then gets updated again after AMODE31
112  * relocation. We must access the relevant AMODE31 tables indirectly via
113  * pointers placed in the .amode31.refs linker section. Those pointers get
114  * updated automatically during AMODE31 relocation and always contain a valid
115  * address within AMODE31 sections.
116  */
117 
118 static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64);
119 
120 static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = {
121 	[1] = 0xffffffffffffffff
122 };
123 
124 static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = {
125 	0x80000000, 0, 0, 0,
126 	0x80000000, 0, 0, 0,
127 	0x80000000, 0, 0, 0,
128 	0x80000000, 0, 0, 0,
129 	0x80000000, 0, 0, 0,
130 	0x80000000, 0, 0, 0,
131 	0x80000000, 0, 0, 0,
132 	0x80000000, 0, 0, 0
133 };
134 
135 static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = {
136 	0, 0, 0x89000000, 0,
137 	0, 0, 0x8a000000, 0
138 };
139 
140 static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31;
141 static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
142 static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
143 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
144 
145 int __bootdata(noexec_disabled);
146 unsigned long __bootdata(ident_map_size);
147 struct mem_detect_info __bootdata(mem_detect);
148 struct initrd_data __bootdata(initrd_data);
149 
150 unsigned long __bootdata_preserved(__kaslr_offset);
151 unsigned int __bootdata_preserved(zlib_dfltcc_support);
152 EXPORT_SYMBOL(zlib_dfltcc_support);
153 u64 __bootdata_preserved(stfle_fac_list[16]);
154 EXPORT_SYMBOL(stfle_fac_list);
155 u64 __bootdata_preserved(alt_stfle_fac_list[16]);
156 struct oldmem_data __bootdata_preserved(oldmem_data);
157 
158 unsigned long VMALLOC_START;
159 EXPORT_SYMBOL(VMALLOC_START);
160 
161 unsigned long VMALLOC_END;
162 EXPORT_SYMBOL(VMALLOC_END);
163 
164 struct page *vmemmap;
165 EXPORT_SYMBOL(vmemmap);
166 unsigned long vmemmap_size;
167 
168 unsigned long MODULES_VADDR;
169 unsigned long MODULES_END;
170 
171 /* An array with a pointer to the lowcore of every CPU. */
172 struct lowcore *lowcore_ptr[NR_CPUS];
173 EXPORT_SYMBOL(lowcore_ptr);
174 
175 /*
176  * The Write Back bit position in the physaddr is given by the SLPC PCI.
177  * Leaving the mask zero always uses write through which is safe
178  */
179 unsigned long mio_wb_bit_mask __ro_after_init;
180 
181 /*
182  * This is set up by the setup-routine at boot-time
183  * for S390 need to find out, what we have to setup
184  * using address 0x10400 ...
185  */
186 
187 #include <asm/setup.h>
188 
189 /*
190  * condev= and conmode= setup parameter.
191  */
192 
193 static int __init condev_setup(char *str)
194 {
195 	int vdev;
196 
197 	vdev = simple_strtoul(str, &str, 0);
198 	if (vdev >= 0 && vdev < 65536) {
199 		console_devno = vdev;
200 		console_irq = -1;
201 	}
202 	return 1;
203 }
204 
205 __setup("condev=", condev_setup);
206 
207 static void __init set_preferred_console(void)
208 {
209 	if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
210 		add_preferred_console("ttyS", 0, NULL);
211 	else if (CONSOLE_IS_3270)
212 		add_preferred_console("tty3270", 0, NULL);
213 	else if (CONSOLE_IS_VT220)
214 		add_preferred_console("ttysclp", 0, NULL);
215 	else if (CONSOLE_IS_HVC)
216 		add_preferred_console("hvc", 0, NULL);
217 }
218 
219 static int __init conmode_setup(char *str)
220 {
221 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
222 	if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
223                 SET_CONSOLE_SCLP;
224 #endif
225 #if defined(CONFIG_TN3215_CONSOLE)
226 	if (!strcmp(str, "3215"))
227 		SET_CONSOLE_3215;
228 #endif
229 #if defined(CONFIG_TN3270_CONSOLE)
230 	if (!strcmp(str, "3270"))
231 		SET_CONSOLE_3270;
232 #endif
233 	set_preferred_console();
234         return 1;
235 }
236 
237 __setup("conmode=", conmode_setup);
238 
239 static void __init conmode_default(void)
240 {
241 	char query_buffer[1024];
242 	char *ptr;
243 
244         if (MACHINE_IS_VM) {
245 		cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
246 		console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
247 		ptr = strstr(query_buffer, "SUBCHANNEL =");
248 		console_irq = simple_strtoul(ptr + 13, NULL, 16);
249 		cpcmd("QUERY TERM", query_buffer, 1024, NULL);
250 		ptr = strstr(query_buffer, "CONMODE");
251 		/*
252 		 * Set the conmode to 3215 so that the device recognition
253 		 * will set the cu_type of the console to 3215. If the
254 		 * conmode is 3270 and we don't set it back then both
255 		 * 3215 and the 3270 driver will try to access the console
256 		 * device (3215 as console and 3270 as normal tty).
257 		 */
258 		cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
259 		if (ptr == NULL) {
260 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
261 			SET_CONSOLE_SCLP;
262 #endif
263 			return;
264 		}
265 		if (str_has_prefix(ptr + 8, "3270")) {
266 #if defined(CONFIG_TN3270_CONSOLE)
267 			SET_CONSOLE_3270;
268 #elif defined(CONFIG_TN3215_CONSOLE)
269 			SET_CONSOLE_3215;
270 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
271 			SET_CONSOLE_SCLP;
272 #endif
273 		} else if (str_has_prefix(ptr + 8, "3215")) {
274 #if defined(CONFIG_TN3215_CONSOLE)
275 			SET_CONSOLE_3215;
276 #elif defined(CONFIG_TN3270_CONSOLE)
277 			SET_CONSOLE_3270;
278 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
279 			SET_CONSOLE_SCLP;
280 #endif
281 		}
282 	} else if (MACHINE_IS_KVM) {
283 		if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
284 			SET_CONSOLE_VT220;
285 		else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
286 			SET_CONSOLE_SCLP;
287 		else
288 			SET_CONSOLE_HVC;
289 	} else {
290 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
291 		SET_CONSOLE_SCLP;
292 #endif
293 	}
294 }
295 
296 #ifdef CONFIG_CRASH_DUMP
297 static void __init setup_zfcpdump(void)
298 {
299 	if (!is_ipl_type_dump())
300 		return;
301 	if (oldmem_data.start)
302 		return;
303 	strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
304 	console_loglevel = 2;
305 }
306 #else
307 static inline void setup_zfcpdump(void) {}
308 #endif /* CONFIG_CRASH_DUMP */
309 
310  /*
311  * Reboot, halt and power_off stubs. They just call _machine_restart,
312  * _machine_halt or _machine_power_off.
313  */
314 
315 void machine_restart(char *command)
316 {
317 	if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
318 		/*
319 		 * Only unblank the console if we are called in enabled
320 		 * context or a bust_spinlocks cleared the way for us.
321 		 */
322 		console_unblank();
323 	_machine_restart(command);
324 }
325 
326 void machine_halt(void)
327 {
328 	if (!in_interrupt() || oops_in_progress)
329 		/*
330 		 * Only unblank the console if we are called in enabled
331 		 * context or a bust_spinlocks cleared the way for us.
332 		 */
333 		console_unblank();
334 	_machine_halt();
335 }
336 
337 void machine_power_off(void)
338 {
339 	if (!in_interrupt() || oops_in_progress)
340 		/*
341 		 * Only unblank the console if we are called in enabled
342 		 * context or a bust_spinlocks cleared the way for us.
343 		 */
344 		console_unblank();
345 	_machine_power_off();
346 }
347 
348 /*
349  * Dummy power off function.
350  */
351 void (*pm_power_off)(void) = machine_power_off;
352 EXPORT_SYMBOL_GPL(pm_power_off);
353 
354 void *restart_stack;
355 
356 unsigned long stack_alloc(void)
357 {
358 #ifdef CONFIG_VMAP_STACK
359 	return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE,
360 			THREADINFO_GFP, NUMA_NO_NODE,
361 			__builtin_return_address(0));
362 #else
363 	return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
364 #endif
365 }
366 
367 void stack_free(unsigned long stack)
368 {
369 #ifdef CONFIG_VMAP_STACK
370 	vfree((void *) stack);
371 #else
372 	free_pages(stack, THREAD_SIZE_ORDER);
373 #endif
374 }
375 
376 int __init arch_early_irq_init(void)
377 {
378 	unsigned long stack;
379 
380 	stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
381 	if (!stack)
382 		panic("Couldn't allocate async stack");
383 	S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
384 	return 0;
385 }
386 
387 void __init arch_call_rest_init(void)
388 {
389 	unsigned long stack;
390 
391 	stack = stack_alloc();
392 	if (!stack)
393 		panic("Couldn't allocate kernel stack");
394 	current->stack = (void *) stack;
395 #ifdef CONFIG_VMAP_STACK
396 	current->stack_vm_area = (void *) stack;
397 #endif
398 	set_task_stack_end_magic(current);
399 	stack += STACK_INIT_OFFSET;
400 	S390_lowcore.kernel_stack = stack;
401 	call_on_stack_noreturn(rest_init, stack);
402 }
403 
404 static void __init setup_lowcore_dat_off(void)
405 {
406 	unsigned long int_psw_mask = PSW_KERNEL_BITS;
407 	unsigned long mcck_stack;
408 	struct lowcore *lc;
409 
410 	if (IS_ENABLED(CONFIG_KASAN))
411 		int_psw_mask |= PSW_MASK_DAT;
412 
413 	/*
414 	 * Setup lowcore for boot cpu
415 	 */
416 	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
417 	lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
418 	if (!lc)
419 		panic("%s: Failed to allocate %zu bytes align=%zx\n",
420 		      __func__, sizeof(*lc), sizeof(*lc));
421 
422 	lc->restart_psw.mask = PSW_KERNEL_BITS;
423 	lc->restart_psw.addr = (unsigned long) restart_int_handler;
424 	lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
425 	lc->external_new_psw.addr = (unsigned long) ext_int_handler;
426 	lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
427 	lc->svc_new_psw.addr = (unsigned long) system_call;
428 	lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
429 	lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
430 	lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
431 	lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
432 	lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
433 	lc->io_new_psw.addr = (unsigned long) io_int_handler;
434 	lc->clock_comparator = clock_comparator_max;
435 	lc->nodat_stack = ((unsigned long) &init_thread_union)
436 		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
437 	lc->current_task = (unsigned long)&init_task;
438 	lc->lpp = LPP_MAGIC;
439 	lc->machine_flags = S390_lowcore.machine_flags;
440 	lc->preempt_count = S390_lowcore.preempt_count;
441 	nmi_alloc_boot_cpu(lc);
442 	lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
443 	lc->exit_timer = S390_lowcore.exit_timer;
444 	lc->user_timer = S390_lowcore.user_timer;
445 	lc->system_timer = S390_lowcore.system_timer;
446 	lc->steal_timer = S390_lowcore.steal_timer;
447 	lc->last_update_timer = S390_lowcore.last_update_timer;
448 	lc->last_update_clock = S390_lowcore.last_update_clock;
449 
450 	/*
451 	 * Allocate the global restart stack which is the same for
452 	 * all CPUs in cast *one* of them does a PSW restart.
453 	 */
454 	restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
455 	if (!restart_stack)
456 		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
457 		      __func__, THREAD_SIZE, THREAD_SIZE);
458 	restart_stack += STACK_INIT_OFFSET;
459 
460 	/*
461 	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
462 	 * restart data to the absolute zero lowcore. This is necessary if
463 	 * PSW restart is done on an offline CPU that has lowcore zero.
464 	 */
465 	lc->restart_stack = (unsigned long) restart_stack;
466 	lc->restart_fn = (unsigned long) do_restart;
467 	lc->restart_data = 0;
468 	lc->restart_source = -1U;
469 
470 	mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
471 	if (!mcck_stack)
472 		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
473 		      __func__, THREAD_SIZE, THREAD_SIZE);
474 	lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
475 
476 	/* Setup absolute zero lowcore */
477 	mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
478 	mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
479 	mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
480 	mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
481 	mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
482 
483 	lc->spinlock_lockval = arch_spin_lockval(0);
484 	lc->spinlock_index = 0;
485 	arch_spin_lock_setup(0);
486 	lc->br_r1_trampoline = 0x07f1;	/* br %r1 */
487 	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
488 	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
489 	lc->preempt_count = PREEMPT_DISABLED;
490 
491 	set_prefix((u32)(unsigned long) lc);
492 	lowcore_ptr[0] = lc;
493 }
494 
495 static void __init setup_lowcore_dat_on(void)
496 {
497 	struct lowcore *lc = lowcore_ptr[0];
498 
499 	__ctl_clear_bit(0, 28);
500 	S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
501 	S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
502 	S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
503 	S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
504 	__ctl_store(S390_lowcore.cregs_save_area, 0, 15);
505 	__ctl_set_bit(0, 28);
506 	mem_assign_absolute(S390_lowcore.restart_flags, RESTART_FLAG_CTLREGS);
507 	mem_assign_absolute(S390_lowcore.program_new_psw, lc->program_new_psw);
508 	memcpy_absolute(&S390_lowcore.cregs_save_area, lc->cregs_save_area,
509 			sizeof(S390_lowcore.cregs_save_area));
510 }
511 
512 static struct resource code_resource = {
513 	.name  = "Kernel code",
514 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
515 };
516 
517 static struct resource data_resource = {
518 	.name = "Kernel data",
519 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
520 };
521 
522 static struct resource bss_resource = {
523 	.name = "Kernel bss",
524 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
525 };
526 
527 static struct resource __initdata *standard_resources[] = {
528 	&code_resource,
529 	&data_resource,
530 	&bss_resource,
531 };
532 
533 static void __init setup_resources(void)
534 {
535 	struct resource *res, *std_res, *sub_res;
536 	phys_addr_t start, end;
537 	int j;
538 	u64 i;
539 
540 	code_resource.start = (unsigned long) _text;
541 	code_resource.end = (unsigned long) _etext - 1;
542 	data_resource.start = (unsigned long) _etext;
543 	data_resource.end = (unsigned long) _edata - 1;
544 	bss_resource.start = (unsigned long) __bss_start;
545 	bss_resource.end = (unsigned long) __bss_stop - 1;
546 
547 	for_each_mem_range(i, &start, &end) {
548 		res = memblock_alloc(sizeof(*res), 8);
549 		if (!res)
550 			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
551 			      __func__, sizeof(*res), 8);
552 		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
553 
554 		res->name = "System RAM";
555 		res->start = start;
556 		/*
557 		 * In memblock, end points to the first byte after the
558 		 * range while in resourses, end points to the last byte in
559 		 * the range.
560 		 */
561 		res->end = end - 1;
562 		request_resource(&iomem_resource, res);
563 
564 		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
565 			std_res = standard_resources[j];
566 			if (std_res->start < res->start ||
567 			    std_res->start > res->end)
568 				continue;
569 			if (std_res->end > res->end) {
570 				sub_res = memblock_alloc(sizeof(*sub_res), 8);
571 				if (!sub_res)
572 					panic("%s: Failed to allocate %zu bytes align=0x%x\n",
573 					      __func__, sizeof(*sub_res), 8);
574 				*sub_res = *std_res;
575 				sub_res->end = res->end;
576 				std_res->start = res->end + 1;
577 				request_resource(res, sub_res);
578 			} else {
579 				request_resource(res, std_res);
580 			}
581 		}
582 	}
583 #ifdef CONFIG_CRASH_DUMP
584 	/*
585 	 * Re-add removed crash kernel memory as reserved memory. This makes
586 	 * sure it will be mapped with the identity mapping and struct pages
587 	 * will be created, so it can be resized later on.
588 	 * However add it later since the crash kernel resource should not be
589 	 * part of the System RAM resource.
590 	 */
591 	if (crashk_res.end) {
592 		memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0);
593 		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
594 		insert_resource(&iomem_resource, &crashk_res);
595 	}
596 #endif
597 }
598 
599 static void __init setup_memory_end(void)
600 {
601 	memblock_remove(ident_map_size, ULONG_MAX);
602 	max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
603 	pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
604 }
605 
606 #ifdef CONFIG_CRASH_DUMP
607 
608 /*
609  * When kdump is enabled, we have to ensure that no memory from the area
610  * [0 - crashkernel memory size] is set offline - it will be exchanged with
611  * the crashkernel memory region when kdump is triggered. The crashkernel
612  * memory region can never get offlined (pages are unmovable).
613  */
614 static int kdump_mem_notifier(struct notifier_block *nb,
615 			      unsigned long action, void *data)
616 {
617 	struct memory_notify *arg = data;
618 
619 	if (action != MEM_GOING_OFFLINE)
620 		return NOTIFY_OK;
621 	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
622 		return NOTIFY_BAD;
623 	return NOTIFY_OK;
624 }
625 
626 static struct notifier_block kdump_mem_nb = {
627 	.notifier_call = kdump_mem_notifier,
628 };
629 
630 #endif
631 
632 /*
633  * Make sure that the area above identity mapping is protected
634  */
635 static void __init reserve_above_ident_map(void)
636 {
637 	memblock_reserve(ident_map_size, ULONG_MAX);
638 }
639 
640 /*
641  * Reserve memory for kdump kernel to be loaded with kexec
642  */
643 static void __init reserve_crashkernel(void)
644 {
645 #ifdef CONFIG_CRASH_DUMP
646 	unsigned long long crash_base, crash_size;
647 	phys_addr_t low, high;
648 	int rc;
649 
650 	rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size,
651 			       &crash_base);
652 
653 	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
654 	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
655 	if (rc || crash_size == 0)
656 		return;
657 
658 	if (memblock.memory.regions[0].size < crash_size) {
659 		pr_info("crashkernel reservation failed: %s\n",
660 			"first memory chunk must be at least crashkernel size");
661 		return;
662 	}
663 
664 	low = crash_base ?: oldmem_data.start;
665 	high = low + crash_size;
666 	if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) {
667 		/* The crashkernel fits into OLDMEM, reuse OLDMEM */
668 		crash_base = low;
669 	} else {
670 		/* Find suitable area in free memory */
671 		low = max_t(unsigned long, crash_size, sclp.hsa_size);
672 		high = crash_base ? crash_base + crash_size : ULONG_MAX;
673 
674 		if (crash_base && crash_base < low) {
675 			pr_info("crashkernel reservation failed: %s\n",
676 				"crash_base too low");
677 			return;
678 		}
679 		low = crash_base ?: low;
680 		crash_base = memblock_phys_alloc_range(crash_size,
681 						       KEXEC_CRASH_MEM_ALIGN,
682 						       low, high);
683 	}
684 
685 	if (!crash_base) {
686 		pr_info("crashkernel reservation failed: %s\n",
687 			"no suitable area found");
688 		return;
689 	}
690 
691 	if (register_memory_notifier(&kdump_mem_nb)) {
692 		memblock_free(crash_base, crash_size);
693 		return;
694 	}
695 
696 	if (!oldmem_data.start && MACHINE_IS_VM)
697 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
698 	crashk_res.start = crash_base;
699 	crashk_res.end = crash_base + crash_size - 1;
700 	memblock_remove(crash_base, crash_size);
701 	pr_info("Reserving %lluMB of memory at %lluMB "
702 		"for crashkernel (System RAM: %luMB)\n",
703 		crash_size >> 20, crash_base >> 20,
704 		(unsigned long)memblock.memory.total_size >> 20);
705 	os_info_crashkernel_add(crash_base, crash_size);
706 #endif
707 }
708 
709 /*
710  * Reserve the initrd from being used by memblock
711  */
712 static void __init reserve_initrd(void)
713 {
714 #ifdef CONFIG_BLK_DEV_INITRD
715 	if (!initrd_data.start || !initrd_data.size)
716 		return;
717 	initrd_start = initrd_data.start;
718 	initrd_end = initrd_start + initrd_data.size;
719 	memblock_reserve(initrd_data.start, initrd_data.size);
720 #endif
721 }
722 
723 /*
724  * Reserve the memory area used to pass the certificate lists
725  */
726 static void __init reserve_certificate_list(void)
727 {
728 	if (ipl_cert_list_addr)
729 		memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
730 }
731 
732 static void __init reserve_mem_detect_info(void)
733 {
734 	unsigned long start, size;
735 
736 	get_mem_detect_reserved(&start, &size);
737 	if (size)
738 		memblock_reserve(start, size);
739 }
740 
741 static void __init free_mem_detect_info(void)
742 {
743 	unsigned long start, size;
744 
745 	get_mem_detect_reserved(&start, &size);
746 	if (size)
747 		memblock_free(start, size);
748 }
749 
750 static const char * __init get_mem_info_source(void)
751 {
752 	switch (mem_detect.info_source) {
753 	case MEM_DETECT_SCLP_STOR_INFO:
754 		return "sclp storage info";
755 	case MEM_DETECT_DIAG260:
756 		return "diag260";
757 	case MEM_DETECT_SCLP_READ_INFO:
758 		return "sclp read info";
759 	case MEM_DETECT_BIN_SEARCH:
760 		return "binary search";
761 	}
762 	return "none";
763 }
764 
765 static void __init memblock_add_mem_detect_info(void)
766 {
767 	unsigned long start, end;
768 	int i;
769 
770 	pr_debug("physmem info source: %s (%hhd)\n",
771 		 get_mem_info_source(), mem_detect.info_source);
772 	/* keep memblock lists close to the kernel */
773 	memblock_set_bottom_up(true);
774 	for_each_mem_detect_block(i, &start, &end) {
775 		memblock_add(start, end - start);
776 		memblock_physmem_add(start, end - start);
777 	}
778 	memblock_set_bottom_up(false);
779 	memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
780 	memblock_dump_all();
781 }
782 
783 /*
784  * Check for initrd being in usable memory
785  */
786 static void __init check_initrd(void)
787 {
788 #ifdef CONFIG_BLK_DEV_INITRD
789 	if (initrd_data.start && initrd_data.size &&
790 	    !memblock_is_region_memory(initrd_data.start, initrd_data.size)) {
791 		pr_err("The initial RAM disk does not fit into the memory\n");
792 		memblock_free(initrd_data.start, initrd_data.size);
793 		initrd_start = initrd_end = 0;
794 	}
795 #endif
796 }
797 
798 /*
799  * Reserve memory used for lowcore/command line/kernel image.
800  */
801 static void __init reserve_kernel(void)
802 {
803 	unsigned long start_pfn = PFN_UP(__pa(_end));
804 
805 	memblock_reserve(0, STARTUP_NORMAL_OFFSET);
806 	memblock_reserve((unsigned long)sclp_early_sccb, EXT_SCCB_READ_SCP);
807 	memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
808 			 - (unsigned long)_stext);
809 }
810 
811 static void __init setup_memory(void)
812 {
813 	phys_addr_t start, end;
814 	u64 i;
815 
816 	/*
817 	 * Init storage key for present memory
818 	 */
819 	for_each_mem_range(i, &start, &end)
820 		storage_key_init_range(start, end);
821 
822 	psw_set_key(PAGE_DEFAULT_KEY);
823 
824 	/* Only cosmetics */
825 	memblock_enforce_memory_limit(memblock_end_of_DRAM());
826 }
827 
828 static void __init relocate_amode31_section(void)
829 {
830 	unsigned long amode31_addr, amode31_size;
831 	long amode31_offset;
832 	long *ptr;
833 
834 	/* Allocate a new AMODE31 capable memory region */
835 	amode31_size = __eamode31 - __samode31;
836 	pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
837 	amode31_addr = (unsigned long)memblock_alloc_low(amode31_size, PAGE_SIZE);
838 	if (!amode31_addr)
839 		panic("Failed to allocate memory for AMODE31 section\n");
840 	amode31_offset = amode31_addr - __samode31;
841 
842 	/* Move original AMODE31 section to the new one */
843 	memmove((void *)amode31_addr, (void *)__samode31, amode31_size);
844 	/* Zero out the old AMODE31 section to catch invalid accesses within it */
845 	memset((void *)__samode31, 0, amode31_size);
846 
847 	/* Update all AMODE31 region references */
848 	for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
849 		*ptr += amode31_offset;
850 }
851 
852 /* This must be called after AMODE31 relocation */
853 static void __init setup_cr(void)
854 {
855 	union ctlreg2 cr2;
856 	union ctlreg5 cr5;
857 	union ctlreg15 cr15;
858 
859 	__ctl_duct[1] = (unsigned long)__ctl_aste;
860 	__ctl_duct[2] = (unsigned long)__ctl_aste;
861 	__ctl_duct[4] = (unsigned long)__ctl_duald;
862 
863 	/* Update control registers CR2, CR5 and CR15 */
864 	__ctl_store(cr2.val, 2, 2);
865 	__ctl_store(cr5.val, 5, 5);
866 	__ctl_store(cr15.val, 15, 15);
867 	cr2.ducto = (unsigned long)__ctl_duct >> 6;
868 	cr5.pasteo = (unsigned long)__ctl_duct >> 6;
869 	cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
870 	__ctl_load(cr2.val, 2, 2);
871 	__ctl_load(cr5.val, 5, 5);
872 	__ctl_load(cr15.val, 15, 15);
873 }
874 
875 /*
876  * Add system information as device randomness
877  */
878 static void __init setup_randomness(void)
879 {
880 	struct sysinfo_3_2_2 *vmms;
881 
882 	vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE,
883 							    PAGE_SIZE);
884 	if (!vmms)
885 		panic("Failed to allocate memory for sysinfo structure\n");
886 
887 	if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
888 		add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
889 	memblock_free((unsigned long) vmms, PAGE_SIZE);
890 }
891 
892 /*
893  * Find the correct size for the task_struct. This depends on
894  * the size of the struct fpu at the end of the thread_struct
895  * which is embedded in the task_struct.
896  */
897 static void __init setup_task_size(void)
898 {
899 	int task_size = sizeof(struct task_struct);
900 
901 	if (!MACHINE_HAS_VX) {
902 		task_size -= sizeof(__vector128) * __NUM_VXRS;
903 		task_size += sizeof(freg_t) * __NUM_FPRS;
904 	}
905 	arch_task_struct_size = task_size;
906 }
907 
908 /*
909  * Issue diagnose 318 to set the control program name and
910  * version codes.
911  */
912 static void __init setup_control_program_code(void)
913 {
914 	union diag318_info diag318_info = {
915 		.cpnc = CPNC_LINUX,
916 		.cpvc = 0,
917 	};
918 
919 	if (!sclp.has_diag318)
920 		return;
921 
922 	diag_stat_inc(DIAG_STAT_X318);
923 	asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
924 }
925 
926 /*
927  * Print the component list from the IPL report
928  */
929 static void __init log_component_list(void)
930 {
931 	struct ipl_rb_component_entry *ptr, *end;
932 	char *str;
933 
934 	if (!early_ipl_comp_list_addr)
935 		return;
936 	if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
937 		pr_info("Linux is running with Secure-IPL enabled\n");
938 	else
939 		pr_info("Linux is running with Secure-IPL disabled\n");
940 	ptr = (void *) early_ipl_comp_list_addr;
941 	end = (void *) ptr + early_ipl_comp_list_size;
942 	pr_info("The IPL report contains the following components:\n");
943 	while (ptr < end) {
944 		if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
945 			if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
946 				str = "signed, verified";
947 			else
948 				str = "signed, verification failed";
949 		} else {
950 			str = "not signed";
951 		}
952 		pr_info("%016llx - %016llx (%s)\n",
953 			ptr->addr, ptr->addr + ptr->len, str);
954 		ptr++;
955 	}
956 }
957 
958 /*
959  * Setup function called from init/main.c just after the banner
960  * was printed.
961  */
962 
963 void __init setup_arch(char **cmdline_p)
964 {
965         /*
966          * print what head.S has found out about the machine
967          */
968 	if (MACHINE_IS_VM)
969 		pr_info("Linux is running as a z/VM "
970 			"guest operating system in 64-bit mode\n");
971 	else if (MACHINE_IS_KVM)
972 		pr_info("Linux is running under KVM in 64-bit mode\n");
973 	else if (MACHINE_IS_LPAR)
974 		pr_info("Linux is running natively in 64-bit mode\n");
975 	else
976 		pr_info("Linux is running as a guest in 64-bit mode\n");
977 
978 	log_component_list();
979 
980 	/* Have one command line that is parsed and saved in /proc/cmdline */
981 	/* boot_command_line has been already set up in early.c */
982 	*cmdline_p = boot_command_line;
983 
984         ROOT_DEV = Root_RAM0;
985 
986 	setup_initial_init_mm(_text, _etext, _edata, _end);
987 
988 	if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
989 		nospec_auto_detect();
990 
991 	jump_label_init();
992 	parse_early_param();
993 #ifdef CONFIG_CRASH_DUMP
994 	/* Deactivate elfcorehdr= kernel parameter */
995 	elfcorehdr_addr = ELFCORE_ADDR_MAX;
996 #endif
997 
998 	os_info_init();
999 	setup_ipl();
1000 	setup_task_size();
1001 	setup_control_program_code();
1002 
1003 	/* Do some memory reservations *before* memory is added to memblock */
1004 	reserve_above_ident_map();
1005 	reserve_kernel();
1006 	reserve_initrd();
1007 	reserve_certificate_list();
1008 	reserve_mem_detect_info();
1009 	memblock_allow_resize();
1010 
1011 	/* Get information about *all* installed memory */
1012 	memblock_add_mem_detect_info();
1013 
1014 	free_mem_detect_info();
1015 
1016 	relocate_amode31_section();
1017 	setup_cr();
1018 
1019 	setup_uv();
1020 	setup_memory_end();
1021 	setup_memory();
1022 	dma_contiguous_reserve(ident_map_size);
1023 	vmcp_cma_reserve();
1024 	if (MACHINE_HAS_EDAT2)
1025 		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
1026 
1027 	check_initrd();
1028 	reserve_crashkernel();
1029 #ifdef CONFIG_CRASH_DUMP
1030 	/*
1031 	 * Be aware that smp_save_dump_cpus() triggers a system reset.
1032 	 * Therefore CPU and device initialization should be done afterwards.
1033 	 */
1034 	smp_save_dump_cpus();
1035 #endif
1036 
1037 	setup_resources();
1038 	setup_lowcore_dat_off();
1039 	smp_fill_possible_mask();
1040 	cpu_detect_mhz_feature();
1041         cpu_init();
1042 	numa_setup();
1043 	smp_detect_cpus();
1044 	topology_init_early();
1045 
1046 	/*
1047 	 * Create kernel page tables and switch to virtual addressing.
1048 	 */
1049         paging_init();
1050 
1051 	/*
1052 	 * After paging_init created the kernel page table, the new PSWs
1053 	 * in lowcore can now run with DAT enabled.
1054 	 */
1055 	setup_lowcore_dat_on();
1056 
1057         /* Setup default console */
1058 	conmode_default();
1059 	set_preferred_console();
1060 
1061 	apply_alternative_instructions();
1062 	if (IS_ENABLED(CONFIG_EXPOLINE))
1063 		nospec_init_branches();
1064 
1065 	/* Setup zfcp/nvme dump support */
1066 	setup_zfcpdump();
1067 
1068 	/* Add system specific data to the random pool */
1069 	setup_randomness();
1070 }
1071