1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * S390 version 4 * Copyright IBM Corp. 1999, 2012 5 * Author(s): Hartmut Penner (hp@de.ibm.com), 6 * Martin Schwidefsky (schwidefsky@de.ibm.com) 7 * 8 * Derived from "arch/i386/kernel/setup.c" 9 * Copyright (C) 1995, Linus Torvalds 10 */ 11 12 /* 13 * This file handles the architecture-dependent parts of initialization 14 */ 15 16 #define KMSG_COMPONENT "setup" 17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 18 19 #include <linux/errno.h> 20 #include <linux/export.h> 21 #include <linux/sched.h> 22 #include <linux/sched/task.h> 23 #include <linux/cpu.h> 24 #include <linux/kernel.h> 25 #include <linux/memblock.h> 26 #include <linux/mm.h> 27 #include <linux/stddef.h> 28 #include <linux/unistd.h> 29 #include <linux/ptrace.h> 30 #include <linux/random.h> 31 #include <linux/user.h> 32 #include <linux/tty.h> 33 #include <linux/ioport.h> 34 #include <linux/delay.h> 35 #include <linux/init.h> 36 #include <linux/initrd.h> 37 #include <linux/root_dev.h> 38 #include <linux/console.h> 39 #include <linux/kernel_stat.h> 40 #include <linux/dma-map-ops.h> 41 #include <linux/device.h> 42 #include <linux/notifier.h> 43 #include <linux/pfn.h> 44 #include <linux/ctype.h> 45 #include <linux/reboot.h> 46 #include <linux/topology.h> 47 #include <linux/kexec.h> 48 #include <linux/crash_dump.h> 49 #include <linux/memory.h> 50 #include <linux/compat.h> 51 #include <linux/start_kernel.h> 52 #include <linux/hugetlb.h> 53 54 #include <asm/boot_data.h> 55 #include <asm/ipl.h> 56 #include <asm/facility.h> 57 #include <asm/smp.h> 58 #include <asm/mmu_context.h> 59 #include <asm/cpcmd.h> 60 #include <asm/lowcore.h> 61 #include <asm/nmi.h> 62 #include <asm/irq.h> 63 #include <asm/page.h> 64 #include <asm/ptrace.h> 65 #include <asm/sections.h> 66 #include <asm/ebcdic.h> 67 #include <asm/diag.h> 68 #include <asm/os_info.h> 69 #include <asm/sclp.h> 70 #include <asm/stacktrace.h> 71 #include <asm/sysinfo.h> 72 #include <asm/numa.h> 73 #include <asm/alternative.h> 74 #include <asm/nospec-branch.h> 75 #include <asm/mem_detect.h> 76 #include <asm/uv.h> 77 #include <asm/asm-offsets.h> 78 #include "entry.h" 79 80 /* 81 * Machine setup.. 82 */ 83 unsigned int console_mode = 0; 84 EXPORT_SYMBOL(console_mode); 85 86 unsigned int console_devno = -1; 87 EXPORT_SYMBOL(console_devno); 88 89 unsigned int console_irq = -1; 90 EXPORT_SYMBOL(console_irq); 91 92 /* 93 * Some code and data needs to stay below 2 GB, even when the kernel would be 94 * relocated above 2 GB, because it has to use 31 bit addresses. 95 * Such code and data is part of the .amode31 section. 96 */ 97 unsigned long __amode31_ref __samode31 = __pa(&_samode31); 98 unsigned long __amode31_ref __eamode31 = __pa(&_eamode31); 99 unsigned long __amode31_ref __stext_amode31 = __pa(&_stext_amode31); 100 unsigned long __amode31_ref __etext_amode31 = __pa(&_etext_amode31); 101 struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table; 102 struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table; 103 104 /* 105 * Control registers CR2, CR5 and CR15 are initialized with addresses 106 * of tables that must be placed below 2G which is handled by the AMODE31 107 * sections. 108 * Because the AMODE31 sections are relocated below 2G at startup, 109 * the content of control registers CR2, CR5 and CR15 must be updated 110 * with new addresses after the relocation. The initial initialization of 111 * control registers occurs in head64.S and then gets updated again after AMODE31 112 * relocation. We must access the relevant AMODE31 tables indirectly via 113 * pointers placed in the .amode31.refs linker section. Those pointers get 114 * updated automatically during AMODE31 relocation and always contain a valid 115 * address within AMODE31 sections. 116 */ 117 118 static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64); 119 120 static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = { 121 [1] = 0xffffffffffffffff 122 }; 123 124 static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = { 125 0x80000000, 0, 0, 0, 126 0x80000000, 0, 0, 0, 127 0x80000000, 0, 0, 0, 128 0x80000000, 0, 0, 0, 129 0x80000000, 0, 0, 0, 130 0x80000000, 0, 0, 0, 131 0x80000000, 0, 0, 0, 132 0x80000000, 0, 0, 0 133 }; 134 135 static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = { 136 0, 0, 0x89000000, 0, 137 0, 0, 0x8a000000, 0 138 }; 139 140 static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31; 141 static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31; 142 static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31; 143 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; 144 145 int __bootdata(noexec_disabled); 146 unsigned long __bootdata(ident_map_size); 147 struct mem_detect_info __bootdata(mem_detect); 148 struct initrd_data __bootdata(initrd_data); 149 150 unsigned long __bootdata_preserved(__kaslr_offset); 151 unsigned int __bootdata_preserved(zlib_dfltcc_support); 152 EXPORT_SYMBOL(zlib_dfltcc_support); 153 u64 __bootdata_preserved(stfle_fac_list[16]); 154 EXPORT_SYMBOL(stfle_fac_list); 155 u64 __bootdata_preserved(alt_stfle_fac_list[16]); 156 struct oldmem_data __bootdata_preserved(oldmem_data); 157 158 unsigned long VMALLOC_START; 159 EXPORT_SYMBOL(VMALLOC_START); 160 161 unsigned long VMALLOC_END; 162 EXPORT_SYMBOL(VMALLOC_END); 163 164 struct page *vmemmap; 165 EXPORT_SYMBOL(vmemmap); 166 unsigned long vmemmap_size; 167 168 unsigned long MODULES_VADDR; 169 unsigned long MODULES_END; 170 171 /* An array with a pointer to the lowcore of every CPU. */ 172 struct lowcore *lowcore_ptr[NR_CPUS]; 173 EXPORT_SYMBOL(lowcore_ptr); 174 175 /* 176 * The Write Back bit position in the physaddr is given by the SLPC PCI. 177 * Leaving the mask zero always uses write through which is safe 178 */ 179 unsigned long mio_wb_bit_mask __ro_after_init; 180 181 /* 182 * This is set up by the setup-routine at boot-time 183 * for S390 need to find out, what we have to setup 184 * using address 0x10400 ... 185 */ 186 187 #include <asm/setup.h> 188 189 /* 190 * condev= and conmode= setup parameter. 191 */ 192 193 static int __init condev_setup(char *str) 194 { 195 int vdev; 196 197 vdev = simple_strtoul(str, &str, 0); 198 if (vdev >= 0 && vdev < 65536) { 199 console_devno = vdev; 200 console_irq = -1; 201 } 202 return 1; 203 } 204 205 __setup("condev=", condev_setup); 206 207 static void __init set_preferred_console(void) 208 { 209 if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) 210 add_preferred_console("ttyS", 0, NULL); 211 else if (CONSOLE_IS_3270) 212 add_preferred_console("tty3270", 0, NULL); 213 else if (CONSOLE_IS_VT220) 214 add_preferred_console("ttysclp", 0, NULL); 215 else if (CONSOLE_IS_HVC) 216 add_preferred_console("hvc", 0, NULL); 217 } 218 219 static int __init conmode_setup(char *str) 220 { 221 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) 222 if (!strcmp(str, "hwc") || !strcmp(str, "sclp")) 223 SET_CONSOLE_SCLP; 224 #endif 225 #if defined(CONFIG_TN3215_CONSOLE) 226 if (!strcmp(str, "3215")) 227 SET_CONSOLE_3215; 228 #endif 229 #if defined(CONFIG_TN3270_CONSOLE) 230 if (!strcmp(str, "3270")) 231 SET_CONSOLE_3270; 232 #endif 233 set_preferred_console(); 234 return 1; 235 } 236 237 __setup("conmode=", conmode_setup); 238 239 static void __init conmode_default(void) 240 { 241 char query_buffer[1024]; 242 char *ptr; 243 244 if (MACHINE_IS_VM) { 245 cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); 246 console_devno = simple_strtoul(query_buffer + 5, NULL, 16); 247 ptr = strstr(query_buffer, "SUBCHANNEL ="); 248 console_irq = simple_strtoul(ptr + 13, NULL, 16); 249 cpcmd("QUERY TERM", query_buffer, 1024, NULL); 250 ptr = strstr(query_buffer, "CONMODE"); 251 /* 252 * Set the conmode to 3215 so that the device recognition 253 * will set the cu_type of the console to 3215. If the 254 * conmode is 3270 and we don't set it back then both 255 * 3215 and the 3270 driver will try to access the console 256 * device (3215 as console and 3270 as normal tty). 257 */ 258 cpcmd("TERM CONMODE 3215", NULL, 0, NULL); 259 if (ptr == NULL) { 260 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) 261 SET_CONSOLE_SCLP; 262 #endif 263 return; 264 } 265 if (str_has_prefix(ptr + 8, "3270")) { 266 #if defined(CONFIG_TN3270_CONSOLE) 267 SET_CONSOLE_3270; 268 #elif defined(CONFIG_TN3215_CONSOLE) 269 SET_CONSOLE_3215; 270 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) 271 SET_CONSOLE_SCLP; 272 #endif 273 } else if (str_has_prefix(ptr + 8, "3215")) { 274 #if defined(CONFIG_TN3215_CONSOLE) 275 SET_CONSOLE_3215; 276 #elif defined(CONFIG_TN3270_CONSOLE) 277 SET_CONSOLE_3270; 278 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) 279 SET_CONSOLE_SCLP; 280 #endif 281 } 282 } else if (MACHINE_IS_KVM) { 283 if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE)) 284 SET_CONSOLE_VT220; 285 else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE)) 286 SET_CONSOLE_SCLP; 287 else 288 SET_CONSOLE_HVC; 289 } else { 290 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) 291 SET_CONSOLE_SCLP; 292 #endif 293 } 294 } 295 296 #ifdef CONFIG_CRASH_DUMP 297 static void __init setup_zfcpdump(void) 298 { 299 if (!is_ipl_type_dump()) 300 return; 301 if (oldmem_data.start) 302 return; 303 strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); 304 console_loglevel = 2; 305 } 306 #else 307 static inline void setup_zfcpdump(void) {} 308 #endif /* CONFIG_CRASH_DUMP */ 309 310 /* 311 * Reboot, halt and power_off stubs. They just call _machine_restart, 312 * _machine_halt or _machine_power_off. 313 */ 314 315 void machine_restart(char *command) 316 { 317 if ((!in_interrupt() && !in_atomic()) || oops_in_progress) 318 /* 319 * Only unblank the console if we are called in enabled 320 * context or a bust_spinlocks cleared the way for us. 321 */ 322 console_unblank(); 323 _machine_restart(command); 324 } 325 326 void machine_halt(void) 327 { 328 if (!in_interrupt() || oops_in_progress) 329 /* 330 * Only unblank the console if we are called in enabled 331 * context or a bust_spinlocks cleared the way for us. 332 */ 333 console_unblank(); 334 _machine_halt(); 335 } 336 337 void machine_power_off(void) 338 { 339 if (!in_interrupt() || oops_in_progress) 340 /* 341 * Only unblank the console if we are called in enabled 342 * context or a bust_spinlocks cleared the way for us. 343 */ 344 console_unblank(); 345 _machine_power_off(); 346 } 347 348 /* 349 * Dummy power off function. 350 */ 351 void (*pm_power_off)(void) = machine_power_off; 352 EXPORT_SYMBOL_GPL(pm_power_off); 353 354 void *restart_stack; 355 356 unsigned long stack_alloc(void) 357 { 358 #ifdef CONFIG_VMAP_STACK 359 return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE, 360 THREADINFO_GFP, NUMA_NO_NODE, 361 __builtin_return_address(0)); 362 #else 363 return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); 364 #endif 365 } 366 367 void stack_free(unsigned long stack) 368 { 369 #ifdef CONFIG_VMAP_STACK 370 vfree((void *) stack); 371 #else 372 free_pages(stack, THREAD_SIZE_ORDER); 373 #endif 374 } 375 376 int __init arch_early_irq_init(void) 377 { 378 unsigned long stack; 379 380 stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); 381 if (!stack) 382 panic("Couldn't allocate async stack"); 383 S390_lowcore.async_stack = stack + STACK_INIT_OFFSET; 384 return 0; 385 } 386 387 void __init arch_call_rest_init(void) 388 { 389 unsigned long stack; 390 391 stack = stack_alloc(); 392 if (!stack) 393 panic("Couldn't allocate kernel stack"); 394 current->stack = (void *) stack; 395 #ifdef CONFIG_VMAP_STACK 396 current->stack_vm_area = (void *) stack; 397 #endif 398 set_task_stack_end_magic(current); 399 stack += STACK_INIT_OFFSET; 400 S390_lowcore.kernel_stack = stack; 401 call_on_stack_noreturn(rest_init, stack); 402 } 403 404 static void __init setup_lowcore_dat_off(void) 405 { 406 unsigned long int_psw_mask = PSW_KERNEL_BITS; 407 unsigned long mcck_stack; 408 struct lowcore *lc; 409 410 if (IS_ENABLED(CONFIG_KASAN)) 411 int_psw_mask |= PSW_MASK_DAT; 412 413 /* 414 * Setup lowcore for boot cpu 415 */ 416 BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE); 417 lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc)); 418 if (!lc) 419 panic("%s: Failed to allocate %zu bytes align=%zx\n", 420 __func__, sizeof(*lc), sizeof(*lc)); 421 422 lc->restart_psw.mask = PSW_KERNEL_BITS; 423 lc->restart_psw.addr = (unsigned long) restart_int_handler; 424 lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; 425 lc->external_new_psw.addr = (unsigned long) ext_int_handler; 426 lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; 427 lc->svc_new_psw.addr = (unsigned long) system_call; 428 lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; 429 lc->program_new_psw.addr = (unsigned long) pgm_check_handler; 430 lc->mcck_new_psw.mask = PSW_KERNEL_BITS; 431 lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler; 432 lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; 433 lc->io_new_psw.addr = (unsigned long) io_int_handler; 434 lc->clock_comparator = clock_comparator_max; 435 lc->nodat_stack = ((unsigned long) &init_thread_union) 436 + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); 437 lc->current_task = (unsigned long)&init_task; 438 lc->lpp = LPP_MAGIC; 439 lc->machine_flags = S390_lowcore.machine_flags; 440 lc->preempt_count = S390_lowcore.preempt_count; 441 nmi_alloc_boot_cpu(lc); 442 lc->sys_enter_timer = S390_lowcore.sys_enter_timer; 443 lc->exit_timer = S390_lowcore.exit_timer; 444 lc->user_timer = S390_lowcore.user_timer; 445 lc->system_timer = S390_lowcore.system_timer; 446 lc->steal_timer = S390_lowcore.steal_timer; 447 lc->last_update_timer = S390_lowcore.last_update_timer; 448 lc->last_update_clock = S390_lowcore.last_update_clock; 449 450 /* 451 * Allocate the global restart stack which is the same for 452 * all CPUs in cast *one* of them does a PSW restart. 453 */ 454 restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE); 455 if (!restart_stack) 456 panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 457 __func__, THREAD_SIZE, THREAD_SIZE); 458 restart_stack += STACK_INIT_OFFSET; 459 460 /* 461 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant 462 * restart data to the absolute zero lowcore. This is necessary if 463 * PSW restart is done on an offline CPU that has lowcore zero. 464 */ 465 lc->restart_stack = (unsigned long) restart_stack; 466 lc->restart_fn = (unsigned long) do_restart; 467 lc->restart_data = 0; 468 lc->restart_source = -1U; 469 470 mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); 471 if (!mcck_stack) 472 panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 473 __func__, THREAD_SIZE, THREAD_SIZE); 474 lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET; 475 476 /* Setup absolute zero lowcore */ 477 mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack); 478 mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn); 479 mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data); 480 mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source); 481 mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw); 482 483 lc->spinlock_lockval = arch_spin_lockval(0); 484 lc->spinlock_index = 0; 485 arch_spin_lock_setup(0); 486 lc->br_r1_trampoline = 0x07f1; /* br %r1 */ 487 lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); 488 lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); 489 lc->preempt_count = PREEMPT_DISABLED; 490 491 set_prefix((u32)(unsigned long) lc); 492 lowcore_ptr[0] = lc; 493 } 494 495 static void __init setup_lowcore_dat_on(void) 496 { 497 struct lowcore *lc = lowcore_ptr[0]; 498 499 __ctl_clear_bit(0, 28); 500 S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; 501 S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT; 502 S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT; 503 S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; 504 __ctl_store(S390_lowcore.cregs_save_area, 0, 15); 505 __ctl_set_bit(0, 28); 506 mem_assign_absolute(S390_lowcore.restart_flags, RESTART_FLAG_CTLREGS); 507 mem_assign_absolute(S390_lowcore.program_new_psw, lc->program_new_psw); 508 memcpy_absolute(&S390_lowcore.cregs_save_area, lc->cregs_save_area, 509 sizeof(S390_lowcore.cregs_save_area)); 510 } 511 512 static struct resource code_resource = { 513 .name = "Kernel code", 514 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 515 }; 516 517 static struct resource data_resource = { 518 .name = "Kernel data", 519 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 520 }; 521 522 static struct resource bss_resource = { 523 .name = "Kernel bss", 524 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 525 }; 526 527 static struct resource __initdata *standard_resources[] = { 528 &code_resource, 529 &data_resource, 530 &bss_resource, 531 }; 532 533 static void __init setup_resources(void) 534 { 535 struct resource *res, *std_res, *sub_res; 536 phys_addr_t start, end; 537 int j; 538 u64 i; 539 540 code_resource.start = (unsigned long) _text; 541 code_resource.end = (unsigned long) _etext - 1; 542 data_resource.start = (unsigned long) _etext; 543 data_resource.end = (unsigned long) _edata - 1; 544 bss_resource.start = (unsigned long) __bss_start; 545 bss_resource.end = (unsigned long) __bss_stop - 1; 546 547 for_each_mem_range(i, &start, &end) { 548 res = memblock_alloc(sizeof(*res), 8); 549 if (!res) 550 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 551 __func__, sizeof(*res), 8); 552 res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; 553 554 res->name = "System RAM"; 555 res->start = start; 556 /* 557 * In memblock, end points to the first byte after the 558 * range while in resourses, end points to the last byte in 559 * the range. 560 */ 561 res->end = end - 1; 562 request_resource(&iomem_resource, res); 563 564 for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { 565 std_res = standard_resources[j]; 566 if (std_res->start < res->start || 567 std_res->start > res->end) 568 continue; 569 if (std_res->end > res->end) { 570 sub_res = memblock_alloc(sizeof(*sub_res), 8); 571 if (!sub_res) 572 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 573 __func__, sizeof(*sub_res), 8); 574 *sub_res = *std_res; 575 sub_res->end = res->end; 576 std_res->start = res->end + 1; 577 request_resource(res, sub_res); 578 } else { 579 request_resource(res, std_res); 580 } 581 } 582 } 583 #ifdef CONFIG_CRASH_DUMP 584 /* 585 * Re-add removed crash kernel memory as reserved memory. This makes 586 * sure it will be mapped with the identity mapping and struct pages 587 * will be created, so it can be resized later on. 588 * However add it later since the crash kernel resource should not be 589 * part of the System RAM resource. 590 */ 591 if (crashk_res.end) { 592 memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0); 593 memblock_reserve(crashk_res.start, resource_size(&crashk_res)); 594 insert_resource(&iomem_resource, &crashk_res); 595 } 596 #endif 597 } 598 599 static void __init setup_memory_end(void) 600 { 601 memblock_remove(ident_map_size, ULONG_MAX); 602 max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); 603 pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20); 604 } 605 606 #ifdef CONFIG_CRASH_DUMP 607 608 /* 609 * When kdump is enabled, we have to ensure that no memory from the area 610 * [0 - crashkernel memory size] is set offline - it will be exchanged with 611 * the crashkernel memory region when kdump is triggered. The crashkernel 612 * memory region can never get offlined (pages are unmovable). 613 */ 614 static int kdump_mem_notifier(struct notifier_block *nb, 615 unsigned long action, void *data) 616 { 617 struct memory_notify *arg = data; 618 619 if (action != MEM_GOING_OFFLINE) 620 return NOTIFY_OK; 621 if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) 622 return NOTIFY_BAD; 623 return NOTIFY_OK; 624 } 625 626 static struct notifier_block kdump_mem_nb = { 627 .notifier_call = kdump_mem_notifier, 628 }; 629 630 #endif 631 632 /* 633 * Make sure that the area above identity mapping is protected 634 */ 635 static void __init reserve_above_ident_map(void) 636 { 637 memblock_reserve(ident_map_size, ULONG_MAX); 638 } 639 640 /* 641 * Reserve memory for kdump kernel to be loaded with kexec 642 */ 643 static void __init reserve_crashkernel(void) 644 { 645 #ifdef CONFIG_CRASH_DUMP 646 unsigned long long crash_base, crash_size; 647 phys_addr_t low, high; 648 int rc; 649 650 rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size, 651 &crash_base); 652 653 crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); 654 crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); 655 if (rc || crash_size == 0) 656 return; 657 658 if (memblock.memory.regions[0].size < crash_size) { 659 pr_info("crashkernel reservation failed: %s\n", 660 "first memory chunk must be at least crashkernel size"); 661 return; 662 } 663 664 low = crash_base ?: oldmem_data.start; 665 high = low + crash_size; 666 if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) { 667 /* The crashkernel fits into OLDMEM, reuse OLDMEM */ 668 crash_base = low; 669 } else { 670 /* Find suitable area in free memory */ 671 low = max_t(unsigned long, crash_size, sclp.hsa_size); 672 high = crash_base ? crash_base + crash_size : ULONG_MAX; 673 674 if (crash_base && crash_base < low) { 675 pr_info("crashkernel reservation failed: %s\n", 676 "crash_base too low"); 677 return; 678 } 679 low = crash_base ?: low; 680 crash_base = memblock_phys_alloc_range(crash_size, 681 KEXEC_CRASH_MEM_ALIGN, 682 low, high); 683 } 684 685 if (!crash_base) { 686 pr_info("crashkernel reservation failed: %s\n", 687 "no suitable area found"); 688 return; 689 } 690 691 if (register_memory_notifier(&kdump_mem_nb)) { 692 memblock_free(crash_base, crash_size); 693 return; 694 } 695 696 if (!oldmem_data.start && MACHINE_IS_VM) 697 diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); 698 crashk_res.start = crash_base; 699 crashk_res.end = crash_base + crash_size - 1; 700 memblock_remove(crash_base, crash_size); 701 pr_info("Reserving %lluMB of memory at %lluMB " 702 "for crashkernel (System RAM: %luMB)\n", 703 crash_size >> 20, crash_base >> 20, 704 (unsigned long)memblock.memory.total_size >> 20); 705 os_info_crashkernel_add(crash_base, crash_size); 706 #endif 707 } 708 709 /* 710 * Reserve the initrd from being used by memblock 711 */ 712 static void __init reserve_initrd(void) 713 { 714 #ifdef CONFIG_BLK_DEV_INITRD 715 if (!initrd_data.start || !initrd_data.size) 716 return; 717 initrd_start = initrd_data.start; 718 initrd_end = initrd_start + initrd_data.size; 719 memblock_reserve(initrd_data.start, initrd_data.size); 720 #endif 721 } 722 723 /* 724 * Reserve the memory area used to pass the certificate lists 725 */ 726 static void __init reserve_certificate_list(void) 727 { 728 if (ipl_cert_list_addr) 729 memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size); 730 } 731 732 static void __init reserve_mem_detect_info(void) 733 { 734 unsigned long start, size; 735 736 get_mem_detect_reserved(&start, &size); 737 if (size) 738 memblock_reserve(start, size); 739 } 740 741 static void __init free_mem_detect_info(void) 742 { 743 unsigned long start, size; 744 745 get_mem_detect_reserved(&start, &size); 746 if (size) 747 memblock_free(start, size); 748 } 749 750 static const char * __init get_mem_info_source(void) 751 { 752 switch (mem_detect.info_source) { 753 case MEM_DETECT_SCLP_STOR_INFO: 754 return "sclp storage info"; 755 case MEM_DETECT_DIAG260: 756 return "diag260"; 757 case MEM_DETECT_SCLP_READ_INFO: 758 return "sclp read info"; 759 case MEM_DETECT_BIN_SEARCH: 760 return "binary search"; 761 } 762 return "none"; 763 } 764 765 static void __init memblock_add_mem_detect_info(void) 766 { 767 unsigned long start, end; 768 int i; 769 770 pr_debug("physmem info source: %s (%hhd)\n", 771 get_mem_info_source(), mem_detect.info_source); 772 /* keep memblock lists close to the kernel */ 773 memblock_set_bottom_up(true); 774 for_each_mem_detect_block(i, &start, &end) { 775 memblock_add(start, end - start); 776 memblock_physmem_add(start, end - start); 777 } 778 memblock_set_bottom_up(false); 779 memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); 780 memblock_dump_all(); 781 } 782 783 /* 784 * Check for initrd being in usable memory 785 */ 786 static void __init check_initrd(void) 787 { 788 #ifdef CONFIG_BLK_DEV_INITRD 789 if (initrd_data.start && initrd_data.size && 790 !memblock_is_region_memory(initrd_data.start, initrd_data.size)) { 791 pr_err("The initial RAM disk does not fit into the memory\n"); 792 memblock_free(initrd_data.start, initrd_data.size); 793 initrd_start = initrd_end = 0; 794 } 795 #endif 796 } 797 798 /* 799 * Reserve memory used for lowcore/command line/kernel image. 800 */ 801 static void __init reserve_kernel(void) 802 { 803 unsigned long start_pfn = PFN_UP(__pa(_end)); 804 805 memblock_reserve(0, STARTUP_NORMAL_OFFSET); 806 memblock_reserve((unsigned long)sclp_early_sccb, EXT_SCCB_READ_SCP); 807 memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) 808 - (unsigned long)_stext); 809 } 810 811 static void __init setup_memory(void) 812 { 813 phys_addr_t start, end; 814 u64 i; 815 816 /* 817 * Init storage key for present memory 818 */ 819 for_each_mem_range(i, &start, &end) 820 storage_key_init_range(start, end); 821 822 psw_set_key(PAGE_DEFAULT_KEY); 823 824 /* Only cosmetics */ 825 memblock_enforce_memory_limit(memblock_end_of_DRAM()); 826 } 827 828 static void __init relocate_amode31_section(void) 829 { 830 unsigned long amode31_addr, amode31_size; 831 long amode31_offset; 832 long *ptr; 833 834 /* Allocate a new AMODE31 capable memory region */ 835 amode31_size = __eamode31 - __samode31; 836 pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); 837 amode31_addr = (unsigned long)memblock_alloc_low(amode31_size, PAGE_SIZE); 838 if (!amode31_addr) 839 panic("Failed to allocate memory for AMODE31 section\n"); 840 amode31_offset = amode31_addr - __samode31; 841 842 /* Move original AMODE31 section to the new one */ 843 memmove((void *)amode31_addr, (void *)__samode31, amode31_size); 844 /* Zero out the old AMODE31 section to catch invalid accesses within it */ 845 memset((void *)__samode31, 0, amode31_size); 846 847 /* Update all AMODE31 region references */ 848 for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++) 849 *ptr += amode31_offset; 850 } 851 852 /* This must be called after AMODE31 relocation */ 853 static void __init setup_cr(void) 854 { 855 union ctlreg2 cr2; 856 union ctlreg5 cr5; 857 union ctlreg15 cr15; 858 859 __ctl_duct[1] = (unsigned long)__ctl_aste; 860 __ctl_duct[2] = (unsigned long)__ctl_aste; 861 __ctl_duct[4] = (unsigned long)__ctl_duald; 862 863 /* Update control registers CR2, CR5 and CR15 */ 864 __ctl_store(cr2.val, 2, 2); 865 __ctl_store(cr5.val, 5, 5); 866 __ctl_store(cr15.val, 15, 15); 867 cr2.ducto = (unsigned long)__ctl_duct >> 6; 868 cr5.pasteo = (unsigned long)__ctl_duct >> 6; 869 cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3; 870 __ctl_load(cr2.val, 2, 2); 871 __ctl_load(cr5.val, 5, 5); 872 __ctl_load(cr15.val, 15, 15); 873 } 874 875 /* 876 * Add system information as device randomness 877 */ 878 static void __init setup_randomness(void) 879 { 880 struct sysinfo_3_2_2 *vmms; 881 882 vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE, 883 PAGE_SIZE); 884 if (!vmms) 885 panic("Failed to allocate memory for sysinfo structure\n"); 886 887 if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) 888 add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); 889 memblock_free((unsigned long) vmms, PAGE_SIZE); 890 } 891 892 /* 893 * Find the correct size for the task_struct. This depends on 894 * the size of the struct fpu at the end of the thread_struct 895 * which is embedded in the task_struct. 896 */ 897 static void __init setup_task_size(void) 898 { 899 int task_size = sizeof(struct task_struct); 900 901 if (!MACHINE_HAS_VX) { 902 task_size -= sizeof(__vector128) * __NUM_VXRS; 903 task_size += sizeof(freg_t) * __NUM_FPRS; 904 } 905 arch_task_struct_size = task_size; 906 } 907 908 /* 909 * Issue diagnose 318 to set the control program name and 910 * version codes. 911 */ 912 static void __init setup_control_program_code(void) 913 { 914 union diag318_info diag318_info = { 915 .cpnc = CPNC_LINUX, 916 .cpvc = 0, 917 }; 918 919 if (!sclp.has_diag318) 920 return; 921 922 diag_stat_inc(DIAG_STAT_X318); 923 asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val)); 924 } 925 926 /* 927 * Print the component list from the IPL report 928 */ 929 static void __init log_component_list(void) 930 { 931 struct ipl_rb_component_entry *ptr, *end; 932 char *str; 933 934 if (!early_ipl_comp_list_addr) 935 return; 936 if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL) 937 pr_info("Linux is running with Secure-IPL enabled\n"); 938 else 939 pr_info("Linux is running with Secure-IPL disabled\n"); 940 ptr = (void *) early_ipl_comp_list_addr; 941 end = (void *) ptr + early_ipl_comp_list_size; 942 pr_info("The IPL report contains the following components:\n"); 943 while (ptr < end) { 944 if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) { 945 if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED) 946 str = "signed, verified"; 947 else 948 str = "signed, verification failed"; 949 } else { 950 str = "not signed"; 951 } 952 pr_info("%016llx - %016llx (%s)\n", 953 ptr->addr, ptr->addr + ptr->len, str); 954 ptr++; 955 } 956 } 957 958 /* 959 * Setup function called from init/main.c just after the banner 960 * was printed. 961 */ 962 963 void __init setup_arch(char **cmdline_p) 964 { 965 /* 966 * print what head.S has found out about the machine 967 */ 968 if (MACHINE_IS_VM) 969 pr_info("Linux is running as a z/VM " 970 "guest operating system in 64-bit mode\n"); 971 else if (MACHINE_IS_KVM) 972 pr_info("Linux is running under KVM in 64-bit mode\n"); 973 else if (MACHINE_IS_LPAR) 974 pr_info("Linux is running natively in 64-bit mode\n"); 975 else 976 pr_info("Linux is running as a guest in 64-bit mode\n"); 977 978 log_component_list(); 979 980 /* Have one command line that is parsed and saved in /proc/cmdline */ 981 /* boot_command_line has been already set up in early.c */ 982 *cmdline_p = boot_command_line; 983 984 ROOT_DEV = Root_RAM0; 985 986 setup_initial_init_mm(_text, _etext, _edata, _end); 987 988 if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) 989 nospec_auto_detect(); 990 991 jump_label_init(); 992 parse_early_param(); 993 #ifdef CONFIG_CRASH_DUMP 994 /* Deactivate elfcorehdr= kernel parameter */ 995 elfcorehdr_addr = ELFCORE_ADDR_MAX; 996 #endif 997 998 os_info_init(); 999 setup_ipl(); 1000 setup_task_size(); 1001 setup_control_program_code(); 1002 1003 /* Do some memory reservations *before* memory is added to memblock */ 1004 reserve_above_ident_map(); 1005 reserve_kernel(); 1006 reserve_initrd(); 1007 reserve_certificate_list(); 1008 reserve_mem_detect_info(); 1009 memblock_allow_resize(); 1010 1011 /* Get information about *all* installed memory */ 1012 memblock_add_mem_detect_info(); 1013 1014 free_mem_detect_info(); 1015 1016 relocate_amode31_section(); 1017 setup_cr(); 1018 1019 setup_uv(); 1020 setup_memory_end(); 1021 setup_memory(); 1022 dma_contiguous_reserve(ident_map_size); 1023 vmcp_cma_reserve(); 1024 if (MACHINE_HAS_EDAT2) 1025 hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); 1026 1027 check_initrd(); 1028 reserve_crashkernel(); 1029 #ifdef CONFIG_CRASH_DUMP 1030 /* 1031 * Be aware that smp_save_dump_cpus() triggers a system reset. 1032 * Therefore CPU and device initialization should be done afterwards. 1033 */ 1034 smp_save_dump_cpus(); 1035 #endif 1036 1037 setup_resources(); 1038 setup_lowcore_dat_off(); 1039 smp_fill_possible_mask(); 1040 cpu_detect_mhz_feature(); 1041 cpu_init(); 1042 numa_setup(); 1043 smp_detect_cpus(); 1044 topology_init_early(); 1045 1046 /* 1047 * Create kernel page tables and switch to virtual addressing. 1048 */ 1049 paging_init(); 1050 1051 /* 1052 * After paging_init created the kernel page table, the new PSWs 1053 * in lowcore can now run with DAT enabled. 1054 */ 1055 setup_lowcore_dat_on(); 1056 1057 /* Setup default console */ 1058 conmode_default(); 1059 set_preferred_console(); 1060 1061 apply_alternative_instructions(); 1062 if (IS_ENABLED(CONFIG_EXPOLINE)) 1063 nospec_init_branches(); 1064 1065 /* Setup zfcp/nvme dump support */ 1066 setup_zfcpdump(); 1067 1068 /* Add system specific data to the random pool */ 1069 setup_randomness(); 1070 } 1071