1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/string.h> 3 #include <linux/elf.h> 4 #include <asm/boot_data.h> 5 #include <asm/sections.h> 6 #include <asm/cpu_mf.h> 7 #include <asm/setup.h> 8 #include <asm/kasan.h> 9 #include <asm/kexec.h> 10 #include <asm/sclp.h> 11 #include <asm/diag.h> 12 #include <asm/uv.h> 13 #include "compressed/decompressor.h" 14 #include "boot.h" 15 16 extern char __boot_data_start[], __boot_data_end[]; 17 extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; 18 unsigned long __bootdata_preserved(__kaslr_offset); 19 unsigned long __bootdata_preserved(VMALLOC_START); 20 unsigned long __bootdata_preserved(VMALLOC_END); 21 struct page *__bootdata_preserved(vmemmap); 22 unsigned long __bootdata_preserved(vmemmap_size); 23 unsigned long __bootdata_preserved(MODULES_VADDR); 24 unsigned long __bootdata_preserved(MODULES_END); 25 unsigned long __bootdata(ident_map_size); 26 27 u64 __bootdata_preserved(stfle_fac_list[16]); 28 u64 __bootdata_preserved(alt_stfle_fac_list[16]); 29 30 /* 31 * Some code and data needs to stay below 2 GB, even when the kernel would be 32 * relocated above 2 GB, because it has to use 31 bit addresses. 33 * Such code and data is part of the .dma section, and its location is passed 34 * over to the decompressed / relocated kernel via the .boot.preserved.data 35 * section. 36 */ 37 extern char _sdma[], _edma[]; 38 extern char _stext_dma[], _etext_dma[]; 39 extern struct exception_table_entry _start_dma_ex_table[]; 40 extern struct exception_table_entry _stop_dma_ex_table[]; 41 unsigned long __bootdata_preserved(__sdma) = __pa(&_sdma); 42 unsigned long __bootdata_preserved(__edma) = __pa(&_edma); 43 unsigned long __bootdata_preserved(__stext_dma) = __pa(&_stext_dma); 44 unsigned long __bootdata_preserved(__etext_dma) = __pa(&_etext_dma); 45 struct exception_table_entry * 46 __bootdata_preserved(__start_dma_ex_table) = _start_dma_ex_table; 47 struct exception_table_entry * 48 __bootdata_preserved(__stop_dma_ex_table) = _stop_dma_ex_table; 49 50 int _diag210_dma(struct diag210 *addr); 51 int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode); 52 int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode); 53 void _diag0c_dma(struct hypfs_diag0c_entry *entry); 54 void _diag308_reset_dma(void); 55 struct diag_ops __bootdata_preserved(diag_dma_ops) = { 56 .diag210 = _diag210_dma, 57 .diag26c = _diag26c_dma, 58 .diag14 = _diag14_dma, 59 .diag0c = _diag0c_dma, 60 .diag308_reset = _diag308_reset_dma 61 }; 62 static struct diag210 _diag210_tmp_dma __section(".dma.data"); 63 struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma; 64 65 void error(char *x) 66 { 67 sclp_early_printk("\n\n"); 68 sclp_early_printk(x); 69 sclp_early_printk("\n\n -- System halted"); 70 71 disabled_wait(); 72 } 73 74 static void setup_lpp(void) 75 { 76 S390_lowcore.current_pid = 0; 77 S390_lowcore.lpp = LPP_MAGIC; 78 if (test_facility(40)) 79 lpp(&S390_lowcore.lpp); 80 } 81 82 #ifdef CONFIG_KERNEL_UNCOMPRESSED 83 unsigned long mem_safe_offset(void) 84 { 85 return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size; 86 } 87 #endif 88 89 static void rescue_initrd(unsigned long addr) 90 { 91 if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) 92 return; 93 if (!INITRD_START || !INITRD_SIZE) 94 return; 95 if (addr <= INITRD_START) 96 return; 97 memmove((void *)addr, (void *)INITRD_START, INITRD_SIZE); 98 INITRD_START = addr; 99 } 100 101 static void copy_bootdata(void) 102 { 103 if (__boot_data_end - __boot_data_start != vmlinux.bootdata_size) 104 error(".boot.data section size mismatch"); 105 memcpy((void *)vmlinux.bootdata_off, __boot_data_start, vmlinux.bootdata_size); 106 if (__boot_data_preserved_end - __boot_data_preserved_start != vmlinux.bootdata_preserved_size) 107 error(".boot.preserved.data section size mismatch"); 108 memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size); 109 } 110 111 static void handle_relocs(unsigned long offset) 112 { 113 Elf64_Rela *rela_start, *rela_end, *rela; 114 int r_type, r_sym, rc; 115 Elf64_Addr loc, val; 116 Elf64_Sym *dynsym; 117 118 rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start; 119 rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end; 120 dynsym = (Elf64_Sym *) vmlinux.dynsym_start; 121 for (rela = rela_start; rela < rela_end; rela++) { 122 loc = rela->r_offset + offset; 123 val = rela->r_addend; 124 r_sym = ELF64_R_SYM(rela->r_info); 125 if (r_sym) { 126 if (dynsym[r_sym].st_shndx != SHN_UNDEF) 127 val += dynsym[r_sym].st_value + offset; 128 } else { 129 /* 130 * 0 == undefined symbol table index (STN_UNDEF), 131 * used for R_390_RELATIVE, only add KASLR offset 132 */ 133 val += offset; 134 } 135 r_type = ELF64_R_TYPE(rela->r_info); 136 rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0); 137 if (rc) 138 error("Unknown relocation type"); 139 } 140 } 141 142 /* 143 * Merge information from several sources into a single ident_map_size value. 144 * "ident_map_size" represents the upper limit of physical memory we may ever 145 * reach. It might not be all online memory, but also include standby (offline) 146 * memory. "ident_map_size" could be lower then actual standby or even online 147 * memory present, due to limiting factors. We should never go above this limit. 148 * It is the size of our identity mapping. 149 * 150 * Consider the following factors: 151 * 1. max_physmem_end - end of physical memory online or standby. 152 * Always <= end of the last online memory block (get_mem_detect_end()). 153 * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the 154 * kernel is able to support. 155 * 3. "mem=" kernel command line option which limits physical memory usage. 156 * 4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as 157 * crash kernel. 158 * 5. "hsa" size which is a memory limit when the kernel is executed during 159 * zfcp/nvme dump. 160 */ 161 static void setup_ident_map_size(unsigned long max_physmem_end) 162 { 163 unsigned long hsa_size; 164 165 ident_map_size = max_physmem_end; 166 if (memory_limit) 167 ident_map_size = min(ident_map_size, memory_limit); 168 ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS); 169 170 #ifdef CONFIG_CRASH_DUMP 171 if (OLDMEM_BASE) { 172 kaslr_enabled = 0; 173 ident_map_size = min(ident_map_size, OLDMEM_SIZE); 174 } else if (ipl_block_valid && is_ipl_block_dump()) { 175 kaslr_enabled = 0; 176 if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) 177 ident_map_size = min(ident_map_size, hsa_size); 178 } 179 #endif 180 } 181 182 static void setup_kernel_memory_layout(void) 183 { 184 bool vmalloc_size_verified = false; 185 unsigned long vmemmap_off; 186 unsigned long vspace_left; 187 unsigned long rte_size; 188 unsigned long pages; 189 unsigned long vmax; 190 191 pages = ident_map_size / PAGE_SIZE; 192 /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ 193 vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); 194 195 /* choose kernel address space layout: 4 or 3 levels. */ 196 vmemmap_off = round_up(ident_map_size, _REGION3_SIZE); 197 if (IS_ENABLED(CONFIG_KASAN) || 198 vmalloc_size > _REGION2_SIZE || 199 vmemmap_off + vmemmap_size + vmalloc_size + MODULES_LEN > _REGION2_SIZE) 200 vmax = _REGION1_SIZE; 201 else 202 vmax = _REGION2_SIZE; 203 204 /* keep vmemmap_off aligned to a top level region table entry */ 205 rte_size = vmax == _REGION1_SIZE ? _REGION2_SIZE : _REGION3_SIZE; 206 MODULES_END = vmax; 207 if (is_prot_virt_host()) { 208 /* 209 * forcing modules and vmalloc area under the ultravisor 210 * secure storage limit, so that any vmalloc allocation 211 * we do could be used to back secure guest storage. 212 */ 213 adjust_to_uv_max(&MODULES_END); 214 } 215 216 #ifdef CONFIG_KASAN 217 if (MODULES_END < vmax) { 218 /* force vmalloc and modules below kasan shadow */ 219 MODULES_END = min(MODULES_END, KASAN_SHADOW_START); 220 } else { 221 /* 222 * leave vmalloc and modules above kasan shadow but make 223 * sure they don't overlap with it 224 */ 225 vmalloc_size = min(vmalloc_size, vmax - KASAN_SHADOW_END - MODULES_LEN); 226 vmalloc_size_verified = true; 227 vspace_left = KASAN_SHADOW_START; 228 } 229 #endif 230 MODULES_VADDR = MODULES_END - MODULES_LEN; 231 VMALLOC_END = MODULES_VADDR; 232 233 if (vmalloc_size_verified) { 234 VMALLOC_START = VMALLOC_END - vmalloc_size; 235 } else { 236 vmemmap_off = round_up(ident_map_size, rte_size); 237 238 if (vmemmap_off + vmemmap_size > VMALLOC_END || 239 vmalloc_size > VMALLOC_END - vmemmap_off - vmemmap_size) { 240 /* 241 * allow vmalloc area to occupy up to 1/2 of 242 * the rest virtual space left. 243 */ 244 vmalloc_size = min(vmalloc_size, VMALLOC_END / 2); 245 } 246 VMALLOC_START = VMALLOC_END - vmalloc_size; 247 vspace_left = VMALLOC_START; 248 } 249 250 pages = vspace_left / (PAGE_SIZE + sizeof(struct page)); 251 pages = SECTION_ALIGN_UP(pages); 252 vmemmap_off = round_up(vspace_left - pages * sizeof(struct page), rte_size); 253 /* keep vmemmap left most starting from a fresh region table entry */ 254 vmemmap_off = min(vmemmap_off, round_up(ident_map_size, rte_size)); 255 /* take care that identity map is lower then vmemmap */ 256 ident_map_size = min(ident_map_size, vmemmap_off); 257 vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); 258 VMALLOC_START = max(vmemmap_off + vmemmap_size, VMALLOC_START); 259 vmemmap = (struct page *)vmemmap_off; 260 } 261 262 /* 263 * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. 264 */ 265 static void clear_bss_section(void) 266 { 267 memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size); 268 } 269 270 /* 271 * Set vmalloc area size to an 8th of (potential) physical memory 272 * size, unless size has been set by kernel command line parameter. 273 */ 274 static void setup_vmalloc_size(void) 275 { 276 unsigned long size; 277 278 if (vmalloc_size_set) 279 return; 280 size = round_up(ident_map_size / 8, _SEGMENT_SIZE); 281 vmalloc_size = max(size, vmalloc_size); 282 } 283 284 void startup_kernel(void) 285 { 286 unsigned long random_lma; 287 unsigned long safe_addr; 288 void *img; 289 290 setup_lpp(); 291 store_ipl_parmblock(); 292 safe_addr = mem_safe_offset(); 293 safe_addr = read_ipl_report(safe_addr); 294 uv_query_info(); 295 rescue_initrd(safe_addr); 296 sclp_early_read_info(); 297 setup_boot_command_line(); 298 parse_boot_command_line(); 299 setup_ident_map_size(detect_memory()); 300 setup_vmalloc_size(); 301 setup_kernel_memory_layout(); 302 303 random_lma = __kaslr_offset = 0; 304 if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { 305 random_lma = get_random_base(safe_addr); 306 if (random_lma) { 307 __kaslr_offset = random_lma - vmlinux.default_lma; 308 img = (void *)vmlinux.default_lma; 309 vmlinux.default_lma += __kaslr_offset; 310 vmlinux.entry += __kaslr_offset; 311 vmlinux.bootdata_off += __kaslr_offset; 312 vmlinux.bootdata_preserved_off += __kaslr_offset; 313 vmlinux.rela_dyn_start += __kaslr_offset; 314 vmlinux.rela_dyn_end += __kaslr_offset; 315 vmlinux.dynsym_start += __kaslr_offset; 316 } 317 } 318 319 if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { 320 img = decompress_kernel(); 321 memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); 322 } else if (__kaslr_offset) 323 memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size); 324 325 clear_bss_section(); 326 copy_bootdata(); 327 if (IS_ENABLED(CONFIG_RELOCATABLE)) 328 handle_relocs(__kaslr_offset); 329 330 if (__kaslr_offset) { 331 /* 332 * Save KASLR offset for early dumps, before vmcore_info is set. 333 * Mark as uneven to distinguish from real vmcore_info pointer. 334 */ 335 S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL; 336 /* Clear non-relocated kernel */ 337 if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) 338 memset(img, 0, vmlinux.image_size); 339 } 340 vmlinux.entry(); 341 } 342