1 /* 2 * 3 * Common boot and setup code. 4 * 5 * Copyright (C) 2001 PPC64 Team, IBM Corp 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 */ 12 13 #define DEBUG 14 15 #include <linux/export.h> 16 #include <linux/string.h> 17 #include <linux/sched.h> 18 #include <linux/init.h> 19 #include <linux/kernel.h> 20 #include <linux/reboot.h> 21 #include <linux/delay.h> 22 #include <linux/initrd.h> 23 #include <linux/seq_file.h> 24 #include <linux/ioport.h> 25 #include <linux/console.h> 26 #include <linux/utsname.h> 27 #include <linux/tty.h> 28 #include <linux/root_dev.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/unistd.h> 32 #include <linux/serial.h> 33 #include <linux/serial_8250.h> 34 #include <linux/bootmem.h> 35 #include <linux/pci.h> 36 #include <linux/lockdep.h> 37 #include <linux/memblock.h> 38 #include <linux/memory.h> 39 #include <linux/nmi.h> 40 41 #include <asm/io.h> 42 #include <asm/kdump.h> 43 #include <asm/prom.h> 44 #include <asm/processor.h> 45 #include <asm/pgtable.h> 46 #include <asm/smp.h> 47 #include <asm/elf.h> 48 #include <asm/machdep.h> 49 #include <asm/paca.h> 50 #include <asm/time.h> 51 #include <asm/cputable.h> 52 #include <asm/sections.h> 53 #include <asm/btext.h> 54 #include <asm/nvram.h> 55 #include <asm/setup.h> 56 #include <asm/rtas.h> 57 #include <asm/iommu.h> 58 #include <asm/serial.h> 59 #include <asm/cache.h> 60 #include <asm/page.h> 61 #include <asm/mmu.h> 62 #include <asm/firmware.h> 63 #include <asm/xmon.h> 64 #include <asm/udbg.h> 65 #include <asm/kexec.h> 66 #include <asm/code-patching.h> 67 #include <asm/livepatch.h> 68 #include <asm/opal.h> 69 #include <asm/cputhreads.h> 70 71 #ifdef DEBUG 72 #define DBG(fmt...) udbg_printf(fmt) 73 #else 74 #define DBG(fmt...) 75 #endif 76 77 int spinning_secondaries; 78 u64 ppc64_pft_size; 79 80 /* Pick defaults since we might want to patch instructions 81 * before we've read this from the device tree. 82 */ 83 struct ppc64_caches ppc64_caches = { 84 .dline_size = 0x40, 85 .log_dline_size = 6, 86 .iline_size = 0x40, 87 .log_iline_size = 6 88 }; 89 EXPORT_SYMBOL_GPL(ppc64_caches); 90 91 /* 92 * These are used in binfmt_elf.c to put aux entries on the stack 93 * for each elf executable being started. 94 */ 95 int dcache_bsize; 96 int icache_bsize; 97 int ucache_bsize; 98 99 #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) 100 void __init setup_tlb_core_data(void) 101 { 102 int cpu; 103 104 BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0); 105 106 for_each_possible_cpu(cpu) { 107 int first = cpu_first_thread_sibling(cpu); 108 109 /* 110 * If we boot via kdump on a non-primary thread, 111 * make sure we point at the thread that actually 112 * set up this TLB. 113 */ 114 if (cpu_first_thread_sibling(boot_cpuid) == first) 115 first = boot_cpuid; 116 117 paca[cpu].tcd_ptr = &paca[first].tcd; 118 119 /* 120 * If we have threads, we need either tlbsrx. 121 * or e6500 tablewalk mode, or else TLB handlers 122 * will be racy and could produce duplicate entries. 123 */ 124 if (smt_enabled_at_boot >= 2 && 125 !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && 126 book3e_htw_mode != PPC_HTW_E6500) { 127 /* Should we panic instead? */ 128 WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n", 129 __func__); 130 } 131 } 132 } 133 #endif 134 135 #ifdef CONFIG_SMP 136 137 static char *smt_enabled_cmdline; 138 139 /* Look for ibm,smt-enabled OF option */ 140 void __init check_smt_enabled(void) 141 { 142 struct device_node *dn; 143 const char *smt_option; 144 145 /* Default to enabling all threads */ 146 smt_enabled_at_boot = threads_per_core; 147 148 /* Allow the command line to overrule the OF option */ 149 if (smt_enabled_cmdline) { 150 if (!strcmp(smt_enabled_cmdline, "on")) 151 smt_enabled_at_boot = threads_per_core; 152 else if (!strcmp(smt_enabled_cmdline, "off")) 153 smt_enabled_at_boot = 0; 154 else { 155 int smt; 156 int rc; 157 158 rc = kstrtoint(smt_enabled_cmdline, 10, &smt); 159 if (!rc) 160 smt_enabled_at_boot = 161 min(threads_per_core, smt); 162 } 163 } else { 164 dn = of_find_node_by_path("/options"); 165 if (dn) { 166 smt_option = of_get_property(dn, "ibm,smt-enabled", 167 NULL); 168 169 if (smt_option) { 170 if (!strcmp(smt_option, "on")) 171 smt_enabled_at_boot = threads_per_core; 172 else if (!strcmp(smt_option, "off")) 173 smt_enabled_at_boot = 0; 174 } 175 176 of_node_put(dn); 177 } 178 } 179 } 180 181 /* Look for smt-enabled= cmdline option */ 182 static int __init early_smt_enabled(char *p) 183 { 184 smt_enabled_cmdline = p; 185 return 0; 186 } 187 early_param("smt-enabled", early_smt_enabled); 188 189 #endif /* CONFIG_SMP */ 190 191 /** Fix up paca fields required for the boot cpu */ 192 static void __init fixup_boot_paca(void) 193 { 194 /* The boot cpu is started */ 195 get_paca()->cpu_start = 1; 196 /* Allow percpu accesses to work until we setup percpu data */ 197 get_paca()->data_offset = 0; 198 } 199 200 static void __init configure_exceptions(void) 201 { 202 /* 203 * Setup the trampolines from the lowmem exception vectors 204 * to the kdump kernel when not using a relocatable kernel. 205 */ 206 setup_kdump_trampoline(); 207 208 /* Under a PAPR hypervisor, we need hypercalls */ 209 if (firmware_has_feature(FW_FEATURE_SET_MODE)) { 210 /* Enable AIL if possible */ 211 pseries_enable_reloc_on_exc(); 212 213 /* 214 * Tell the hypervisor that we want our exceptions to 215 * be taken in little endian mode. 216 * 217 * We don't call this for big endian as our calling convention 218 * makes us always enter in BE, and the call may fail under 219 * some circumstances with kdump. 220 */ 221 #ifdef __LITTLE_ENDIAN__ 222 pseries_little_endian_exceptions(); 223 #endif 224 } else { 225 /* Set endian mode using OPAL */ 226 if (firmware_has_feature(FW_FEATURE_OPAL)) 227 opal_configure_cores(); 228 229 /* AIL on native is done in cpu_ready_for_interrupts() */ 230 } 231 } 232 233 static void cpu_ready_for_interrupts(void) 234 { 235 /* 236 * Enable AIL if supported, and we are in hypervisor mode. This 237 * is called once for every processor. 238 * 239 * If we are not in hypervisor mode the job is done once for 240 * the whole partition in configure_exceptions(). 241 */ 242 if (early_cpu_has_feature(CPU_FTR_HVMODE) && 243 early_cpu_has_feature(CPU_FTR_ARCH_207S)) { 244 unsigned long lpcr = mfspr(SPRN_LPCR); 245 mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); 246 } 247 248 /* Set IR and DR in PACA MSR */ 249 get_paca()->kernel_msr = MSR_KERNEL; 250 } 251 252 /* 253 * Early initialization entry point. This is called by head.S 254 * with MMU translation disabled. We rely on the "feature" of 255 * the CPU that ignores the top 2 bits of the address in real 256 * mode so we can access kernel globals normally provided we 257 * only toy with things in the RMO region. From here, we do 258 * some early parsing of the device-tree to setup out MEMBLOCK 259 * data structures, and allocate & initialize the hash table 260 * and segment tables so we can start running with translation 261 * enabled. 262 * 263 * It is this function which will call the probe() callback of 264 * the various platform types and copy the matching one to the 265 * global ppc_md structure. Your platform can eventually do 266 * some very early initializations from the probe() routine, but 267 * this is not recommended, be very careful as, for example, the 268 * device-tree is not accessible via normal means at this point. 269 */ 270 271 void __init early_setup(unsigned long dt_ptr) 272 { 273 static __initdata struct paca_struct boot_paca; 274 275 /* -------- printk is _NOT_ safe to use here ! ------- */ 276 277 /* Identify CPU type */ 278 identify_cpu(0, mfspr(SPRN_PVR)); 279 280 /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ 281 initialise_paca(&boot_paca, 0); 282 setup_paca(&boot_paca); 283 fixup_boot_paca(); 284 285 /* -------- printk is now safe to use ------- */ 286 287 /* Enable early debugging if any specified (see udbg.h) */ 288 udbg_early_init(); 289 290 DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr); 291 292 /* 293 * Do early initialization using the flattened device 294 * tree, such as retrieving the physical memory map or 295 * calculating/retrieving the hash table size. 296 */ 297 early_init_devtree(__va(dt_ptr)); 298 299 /* Now we know the logical id of our boot cpu, setup the paca. */ 300 setup_paca(&paca[boot_cpuid]); 301 fixup_boot_paca(); 302 303 /* 304 * Configure exception handlers. This include setting up trampolines 305 * if needed, setting exception endian mode, etc... 306 */ 307 configure_exceptions(); 308 309 /* Apply all the dynamic patching */ 310 apply_feature_fixups(); 311 setup_feature_keys(); 312 313 /* Initialize the hash table or TLB handling */ 314 early_init_mmu(); 315 316 /* 317 * At this point, we can let interrupts switch to virtual mode 318 * (the MMU has been setup), so adjust the MSR in the PACA to 319 * have IR and DR set and enable AIL if it exists 320 */ 321 cpu_ready_for_interrupts(); 322 323 DBG(" <- early_setup()\n"); 324 325 #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX 326 /* 327 * This needs to be done *last* (after the above DBG() even) 328 * 329 * Right after we return from this function, we turn on the MMU 330 * which means the real-mode access trick that btext does will 331 * no longer work, it needs to switch to using a real MMU 332 * mapping. This call will ensure that it does 333 */ 334 btext_map(); 335 #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ 336 } 337 338 #ifdef CONFIG_SMP 339 void early_setup_secondary(void) 340 { 341 /* Mark interrupts disabled in PACA */ 342 get_paca()->soft_enabled = 0; 343 344 /* Initialize the hash table or TLB handling */ 345 early_init_mmu_secondary(); 346 347 /* 348 * At this point, we can let interrupts switch to virtual mode 349 * (the MMU has been setup), so adjust the MSR in the PACA to 350 * have IR and DR set. 351 */ 352 cpu_ready_for_interrupts(); 353 } 354 355 #endif /* CONFIG_SMP */ 356 357 #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) 358 static bool use_spinloop(void) 359 { 360 if (!IS_ENABLED(CONFIG_PPC_BOOK3E)) 361 return true; 362 363 /* 364 * When book3e boots from kexec, the ePAPR spin table does 365 * not get used. 366 */ 367 return of_property_read_bool(of_chosen, "linux,booted-from-kexec"); 368 } 369 370 void smp_release_cpus(void) 371 { 372 unsigned long *ptr; 373 int i; 374 375 if (!use_spinloop()) 376 return; 377 378 DBG(" -> smp_release_cpus()\n"); 379 380 /* All secondary cpus are spinning on a common spinloop, release them 381 * all now so they can start to spin on their individual paca 382 * spinloops. For non SMP kernels, the secondary cpus never get out 383 * of the common spinloop. 384 */ 385 386 ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop 387 - PHYSICAL_START); 388 *ptr = ppc_function_entry(generic_secondary_smp_init); 389 390 /* And wait a bit for them to catch up */ 391 for (i = 0; i < 100000; i++) { 392 mb(); 393 HMT_low(); 394 if (spinning_secondaries == 0) 395 break; 396 udelay(1); 397 } 398 DBG("spinning_secondaries = %d\n", spinning_secondaries); 399 400 DBG(" <- smp_release_cpus()\n"); 401 } 402 #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */ 403 404 /* 405 * Initialize some remaining members of the ppc64_caches and systemcfg 406 * structures 407 * (at least until we get rid of them completely). This is mostly some 408 * cache informations about the CPU that will be used by cache flush 409 * routines and/or provided to userland 410 */ 411 void __init initialize_cache_info(void) 412 { 413 struct device_node *np; 414 unsigned long num_cpus = 0; 415 416 DBG(" -> initialize_cache_info()\n"); 417 418 for_each_node_by_type(np, "cpu") { 419 num_cpus += 1; 420 421 /* 422 * We're assuming *all* of the CPUs have the same 423 * d-cache and i-cache sizes... -Peter 424 */ 425 if (num_cpus == 1) { 426 const __be32 *sizep, *lsizep; 427 u32 size, lsize; 428 429 size = 0; 430 lsize = cur_cpu_spec->dcache_bsize; 431 sizep = of_get_property(np, "d-cache-size", NULL); 432 if (sizep != NULL) 433 size = be32_to_cpu(*sizep); 434 lsizep = of_get_property(np, "d-cache-block-size", 435 NULL); 436 /* fallback if block size missing */ 437 if (lsizep == NULL) 438 lsizep = of_get_property(np, 439 "d-cache-line-size", 440 NULL); 441 if (lsizep != NULL) 442 lsize = be32_to_cpu(*lsizep); 443 if (sizep == NULL || lsizep == NULL) 444 DBG("Argh, can't find dcache properties ! " 445 "sizep: %p, lsizep: %p\n", sizep, lsizep); 446 447 ppc64_caches.dsize = size; 448 ppc64_caches.dline_size = lsize; 449 ppc64_caches.log_dline_size = __ilog2(lsize); 450 ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; 451 452 size = 0; 453 lsize = cur_cpu_spec->icache_bsize; 454 sizep = of_get_property(np, "i-cache-size", NULL); 455 if (sizep != NULL) 456 size = be32_to_cpu(*sizep); 457 lsizep = of_get_property(np, "i-cache-block-size", 458 NULL); 459 if (lsizep == NULL) 460 lsizep = of_get_property(np, 461 "i-cache-line-size", 462 NULL); 463 if (lsizep != NULL) 464 lsize = be32_to_cpu(*lsizep); 465 if (sizep == NULL || lsizep == NULL) 466 DBG("Argh, can't find icache properties ! " 467 "sizep: %p, lsizep: %p\n", sizep, lsizep); 468 469 ppc64_caches.isize = size; 470 ppc64_caches.iline_size = lsize; 471 ppc64_caches.log_iline_size = __ilog2(lsize); 472 ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; 473 } 474 } 475 476 /* For use by binfmt_elf */ 477 dcache_bsize = ppc64_caches.dline_size; 478 icache_bsize = ppc64_caches.iline_size; 479 480 DBG(" <- initialize_cache_info()\n"); 481 } 482 483 /* This returns the limit below which memory accesses to the linear 484 * mapping are guarnateed not to cause a TLB or SLB miss. This is 485 * used to allocate interrupt or emergency stacks for which our 486 * exception entry path doesn't deal with being interrupted. 487 */ 488 static __init u64 safe_stack_limit(void) 489 { 490 #ifdef CONFIG_PPC_BOOK3E 491 /* Freescale BookE bolts the entire linear mapping */ 492 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) 493 return linear_map_top; 494 /* Other BookE, we assume the first GB is bolted */ 495 return 1ul << 30; 496 #else 497 /* BookS, the first segment is bolted */ 498 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 499 return 1UL << SID_SHIFT_1T; 500 return 1UL << SID_SHIFT; 501 #endif 502 } 503 504 void __init irqstack_early_init(void) 505 { 506 u64 limit = safe_stack_limit(); 507 unsigned int i; 508 509 /* 510 * Interrupt stacks must be in the first segment since we 511 * cannot afford to take SLB misses on them. 512 */ 513 for_each_possible_cpu(i) { 514 softirq_ctx[i] = (struct thread_info *) 515 __va(memblock_alloc_base(THREAD_SIZE, 516 THREAD_SIZE, limit)); 517 hardirq_ctx[i] = (struct thread_info *) 518 __va(memblock_alloc_base(THREAD_SIZE, 519 THREAD_SIZE, limit)); 520 } 521 } 522 523 #ifdef CONFIG_PPC_BOOK3E 524 void __init exc_lvl_early_init(void) 525 { 526 unsigned int i; 527 unsigned long sp; 528 529 for_each_possible_cpu(i) { 530 sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); 531 critirq_ctx[i] = (struct thread_info *)__va(sp); 532 paca[i].crit_kstack = __va(sp + THREAD_SIZE); 533 534 sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); 535 dbgirq_ctx[i] = (struct thread_info *)__va(sp); 536 paca[i].dbg_kstack = __va(sp + THREAD_SIZE); 537 538 sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); 539 mcheckirq_ctx[i] = (struct thread_info *)__va(sp); 540 paca[i].mc_kstack = __va(sp + THREAD_SIZE); 541 } 542 543 if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) 544 patch_exception(0x040, exc_debug_debug_book3e); 545 } 546 #endif 547 548 /* 549 * Stack space used when we detect a bad kernel stack pointer, and 550 * early in SMP boots before relocation is enabled. Exclusive emergency 551 * stack for machine checks. 552 */ 553 void __init emergency_stack_init(void) 554 { 555 u64 limit; 556 unsigned int i; 557 558 /* 559 * Emergency stacks must be under 256MB, we cannot afford to take 560 * SLB misses on them. The ABI also requires them to be 128-byte 561 * aligned. 562 * 563 * Since we use these as temporary stacks during secondary CPU 564 * bringup, we need to get at them in real mode. This means they 565 * must also be within the RMO region. 566 */ 567 limit = min(safe_stack_limit(), ppc64_rma_size); 568 569 for_each_possible_cpu(i) { 570 struct thread_info *ti; 571 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 572 klp_init_thread_info(ti); 573 paca[i].emergency_sp = (void *)ti + THREAD_SIZE; 574 575 #ifdef CONFIG_PPC_BOOK3S_64 576 /* emergency stack for machine check exception handling. */ 577 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 578 klp_init_thread_info(ti); 579 paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; 580 #endif 581 } 582 } 583 584 #ifdef CONFIG_SMP 585 #define PCPU_DYN_SIZE () 586 587 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) 588 { 589 return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align, 590 __pa(MAX_DMA_ADDRESS)); 591 } 592 593 static void __init pcpu_fc_free(void *ptr, size_t size) 594 { 595 free_bootmem(__pa(ptr), size); 596 } 597 598 static int pcpu_cpu_distance(unsigned int from, unsigned int to) 599 { 600 if (cpu_to_node(from) == cpu_to_node(to)) 601 return LOCAL_DISTANCE; 602 else 603 return REMOTE_DISTANCE; 604 } 605 606 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 607 EXPORT_SYMBOL(__per_cpu_offset); 608 609 void __init setup_per_cpu_areas(void) 610 { 611 const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; 612 size_t atom_size; 613 unsigned long delta; 614 unsigned int cpu; 615 int rc; 616 617 /* 618 * Linear mapping is one of 4K, 1M and 16M. For 4K, no need 619 * to group units. For larger mappings, use 1M atom which 620 * should be large enough to contain a number of units. 621 */ 622 if (mmu_linear_psize == MMU_PAGE_4K) 623 atom_size = PAGE_SIZE; 624 else 625 atom_size = 1 << 20; 626 627 rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, 628 pcpu_fc_alloc, pcpu_fc_free); 629 if (rc < 0) 630 panic("cannot initialize percpu area (err=%d)", rc); 631 632 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; 633 for_each_possible_cpu(cpu) { 634 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; 635 paca[cpu].data_offset = __per_cpu_offset[cpu]; 636 } 637 } 638 #endif 639 640 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 641 unsigned long memory_block_size_bytes(void) 642 { 643 if (ppc_md.memory_block_size) 644 return ppc_md.memory_block_size(); 645 646 return MIN_MEMORY_BLOCK_SIZE; 647 } 648 #endif 649 650 #if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO) 651 struct ppc_pci_io ppc_pci_io; 652 EXPORT_SYMBOL(ppc_pci_io); 653 #endif 654 655 #ifdef CONFIG_HARDLOCKUP_DETECTOR 656 u64 hw_nmi_get_sample_period(int watchdog_thresh) 657 { 658 return ppc_proc_freq * watchdog_thresh; 659 } 660 661 /* 662 * The hardlockup detector breaks PMU event based branches and is likely 663 * to get false positives in KVM guests, so disable it by default. 664 */ 665 static int __init disable_hardlockup_detector(void) 666 { 667 hardlockup_detector_disable(); 668 669 return 0; 670 } 671 early_initcall(disable_hardlockup_detector); 672 #endif 673