1 /* 2 * PowerNV setup code. 3 * 4 * Copyright 2011 IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/cpu.h> 15 #include <linux/errno.h> 16 #include <linux/sched.h> 17 #include <linux/kernel.h> 18 #include <linux/tty.h> 19 #include <linux/reboot.h> 20 #include <linux/init.h> 21 #include <linux/console.h> 22 #include <linux/delay.h> 23 #include <linux/irq.h> 24 #include <linux/seq_file.h> 25 #include <linux/of.h> 26 #include <linux/of_fdt.h> 27 #include <linux/interrupt.h> 28 #include <linux/bug.h> 29 #include <linux/pci.h> 30 #include <linux/cpufreq.h> 31 32 #include <asm/machdep.h> 33 #include <asm/firmware.h> 34 #include <asm/xics.h> 35 #include <asm/rtas.h> 36 #include <asm/opal.h> 37 #include <asm/kexec.h> 38 #include <asm/smp.h> 39 #include <asm/cputhreads.h> 40 #include <asm/cpuidle.h> 41 #include <asm/code-patching.h> 42 43 #include "powernv.h" 44 #include "subcore.h" 45 46 static void __init pnv_setup_arch(void) 47 { 48 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); 49 50 /* Initialize SMP */ 51 pnv_smp_init(); 52 53 /* Setup PCI */ 54 pnv_pci_init(); 55 56 /* Setup RTC and NVRAM callbacks */ 57 if (firmware_has_feature(FW_FEATURE_OPAL)) 58 opal_nvram_init(); 59 60 /* Enable NAP mode */ 61 powersave_nap = 1; 62 63 /* XXX PMCS */ 64 } 65 66 static void __init pnv_init_early(void) 67 { 68 /* 69 * Initialize the LPC bus now so that legacy serial 70 * ports can be found on it 71 */ 72 opal_lpc_init(); 73 74 #ifdef CONFIG_HVC_OPAL 75 if (firmware_has_feature(FW_FEATURE_OPAL)) 76 hvc_opal_init_early(); 77 else 78 #endif 79 add_preferred_console("hvc", 0, NULL); 80 } 81 82 static void __init pnv_init_IRQ(void) 83 { 84 xics_init(); 85 86 WARN_ON(!ppc_md.get_irq); 87 } 88 89 static void pnv_show_cpuinfo(struct seq_file *m) 90 { 91 struct device_node *root; 92 const char *model = ""; 93 94 root = of_find_node_by_path("/"); 95 if (root) 96 model = of_get_property(root, "model", NULL); 97 seq_printf(m, "machine\t\t: PowerNV %s\n", model); 98 if (firmware_has_feature(FW_FEATURE_OPALv3)) 99 seq_printf(m, "firmware\t: OPAL v3\n"); 100 else if (firmware_has_feature(FW_FEATURE_OPALv2)) 101 seq_printf(m, "firmware\t: OPAL v2\n"); 102 else if (firmware_has_feature(FW_FEATURE_OPAL)) 103 seq_printf(m, "firmware\t: OPAL v1\n"); 104 else 105 seq_printf(m, "firmware\t: BML\n"); 106 of_node_put(root); 107 } 108 109 static void pnv_prepare_going_down(void) 110 { 111 /* 112 * Disable all notifiers from OPAL, we can't 113 * service interrupts anymore anyway 114 */ 115 opal_notifier_disable(); 116 117 /* Soft disable interrupts */ 118 local_irq_disable(); 119 120 /* 121 * Return secondary CPUs to firwmare if a flash update 122 * is pending otherwise we will get all sort of error 123 * messages about CPU being stuck etc.. This will also 124 * have the side effect of hard disabling interrupts so 125 * past this point, the kernel is effectively dead. 126 */ 127 opal_flash_term_callback(); 128 } 129 130 static void __noreturn pnv_restart(char *cmd) 131 { 132 long rc = OPAL_BUSY; 133 134 pnv_prepare_going_down(); 135 136 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 137 rc = opal_cec_reboot(); 138 if (rc == OPAL_BUSY_EVENT) 139 opal_poll_events(NULL); 140 else 141 mdelay(10); 142 } 143 for (;;) 144 opal_poll_events(NULL); 145 } 146 147 static void __noreturn pnv_power_off(void) 148 { 149 long rc = OPAL_BUSY; 150 151 pnv_prepare_going_down(); 152 153 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 154 rc = opal_cec_power_down(0); 155 if (rc == OPAL_BUSY_EVENT) 156 opal_poll_events(NULL); 157 else 158 mdelay(10); 159 } 160 for (;;) 161 opal_poll_events(NULL); 162 } 163 164 static void __noreturn pnv_halt(void) 165 { 166 pnv_power_off(); 167 } 168 169 static void pnv_progress(char *s, unsigned short hex) 170 { 171 } 172 173 static int pnv_dma_set_mask(struct device *dev, u64 dma_mask) 174 { 175 if (dev_is_pci(dev)) 176 return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask); 177 return __dma_set_mask(dev, dma_mask); 178 } 179 180 static u64 pnv_dma_get_required_mask(struct device *dev) 181 { 182 if (dev_is_pci(dev)) 183 return pnv_pci_dma_get_required_mask(to_pci_dev(dev)); 184 185 return __dma_get_required_mask(dev); 186 } 187 188 static void pnv_shutdown(void) 189 { 190 /* Let the PCI code clear up IODA tables */ 191 pnv_pci_shutdown(); 192 193 /* 194 * Stop OPAL activity: Unregister all OPAL interrupts so they 195 * don't fire up while we kexec and make sure all potentially 196 * DMA'ing ops are complete (such as dump retrieval). 197 */ 198 opal_shutdown(); 199 } 200 201 #ifdef CONFIG_KEXEC 202 static void pnv_kexec_wait_secondaries_down(void) 203 { 204 int my_cpu, i, notified = -1; 205 206 my_cpu = get_cpu(); 207 208 for_each_online_cpu(i) { 209 uint8_t status; 210 int64_t rc; 211 212 if (i == my_cpu) 213 continue; 214 215 for (;;) { 216 rc = opal_query_cpu_status(get_hard_smp_processor_id(i), 217 &status); 218 if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED) 219 break; 220 barrier(); 221 if (i != notified) { 222 printk(KERN_INFO "kexec: waiting for cpu %d " 223 "(physical %d) to enter OPAL\n", 224 i, paca[i].hw_cpu_id); 225 notified = i; 226 } 227 } 228 } 229 } 230 231 static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) 232 { 233 xics_kexec_teardown_cpu(secondary); 234 235 /* On OPAL v3, we return all CPUs to firmware */ 236 237 if (!firmware_has_feature(FW_FEATURE_OPALv3)) 238 return; 239 240 if (secondary) { 241 /* Return secondary CPUs to firmware on OPAL v3 */ 242 mb(); 243 get_paca()->kexec_state = KEXEC_STATE_REAL_MODE; 244 mb(); 245 246 /* Return the CPU to OPAL */ 247 opal_return_cpu(); 248 } else if (crash_shutdown) { 249 /* 250 * On crash, we don't wait for secondaries to go 251 * down as they might be unreachable or hung, so 252 * instead we just wait a bit and move on. 253 */ 254 mdelay(1); 255 } else { 256 /* Primary waits for the secondaries to have reached OPAL */ 257 pnv_kexec_wait_secondaries_down(); 258 } 259 } 260 #endif /* CONFIG_KEXEC */ 261 262 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 263 static unsigned long pnv_memory_block_size(void) 264 { 265 return 256UL * 1024 * 1024; 266 } 267 #endif 268 269 static void __init pnv_setup_machdep_opal(void) 270 { 271 ppc_md.get_boot_time = opal_get_boot_time; 272 ppc_md.restart = pnv_restart; 273 pm_power_off = pnv_power_off; 274 ppc_md.halt = pnv_halt; 275 ppc_md.machine_check_exception = opal_machine_check; 276 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; 277 ppc_md.hmi_exception_early = opal_hmi_exception_early; 278 ppc_md.handle_hmi_exception = opal_handle_hmi_exception; 279 } 280 281 #ifdef CONFIG_PPC_POWERNV_RTAS 282 static void __init pnv_setup_machdep_rtas(void) 283 { 284 if (rtas_token("get-time-of-day") != RTAS_UNKNOWN_SERVICE) { 285 ppc_md.get_boot_time = rtas_get_boot_time; 286 ppc_md.get_rtc_time = rtas_get_rtc_time; 287 ppc_md.set_rtc_time = rtas_set_rtc_time; 288 } 289 ppc_md.restart = rtas_restart; 290 pm_power_off = rtas_power_off; 291 ppc_md.halt = rtas_halt; 292 } 293 #endif /* CONFIG_PPC_POWERNV_RTAS */ 294 295 static u32 supported_cpuidle_states; 296 297 int pnv_save_sprs_for_winkle(void) 298 { 299 int cpu; 300 int rc; 301 302 /* 303 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross 304 * all cpus at boot. Get these reg values of current cpu and use the 305 * same accross all cpus. 306 */ 307 uint64_t lpcr_val = mfspr(SPRN_LPCR); 308 uint64_t hid0_val = mfspr(SPRN_HID0); 309 uint64_t hid1_val = mfspr(SPRN_HID1); 310 uint64_t hid4_val = mfspr(SPRN_HID4); 311 uint64_t hid5_val = mfspr(SPRN_HID5); 312 uint64_t hmeer_val = mfspr(SPRN_HMEER); 313 314 for_each_possible_cpu(cpu) { 315 uint64_t pir = get_hard_smp_processor_id(cpu); 316 uint64_t hsprg0_val = (uint64_t)&paca[cpu]; 317 318 /* 319 * HSPRG0 is used to store the cpu's pointer to paca. Hence last 320 * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 321 * with 63rd bit set, so that when a thread wakes up at 0x100 we 322 * can use this bit to distinguish between fastsleep and 323 * deep winkle. 324 */ 325 hsprg0_val |= 1; 326 327 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); 328 if (rc != 0) 329 return rc; 330 331 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 332 if (rc != 0) 333 return rc; 334 335 /* HIDs are per core registers */ 336 if (cpu_thread_in_core(cpu) == 0) { 337 338 rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); 339 if (rc != 0) 340 return rc; 341 342 rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); 343 if (rc != 0) 344 return rc; 345 346 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 347 if (rc != 0) 348 return rc; 349 350 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 351 if (rc != 0) 352 return rc; 353 354 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 355 if (rc != 0) 356 return rc; 357 } 358 } 359 360 return 0; 361 } 362 363 static void pnv_alloc_idle_core_states(void) 364 { 365 int i, j; 366 int nr_cores = cpu_nr_cores(); 367 u32 *core_idle_state; 368 369 /* 370 * core_idle_state - First 8 bits track the idle state of each thread 371 * of the core. The 8th bit is the lock bit. Initially all thread bits 372 * are set. They are cleared when the thread enters deep idle state 373 * like sleep and winkle. Initially the lock bit is cleared. 374 * The lock bit has 2 purposes 375 * a. While the first thread is restoring core state, it prevents 376 * other threads in the core from switching to process context. 377 * b. While the last thread in the core is saving the core state, it 378 * prevents a different thread from waking up. 379 */ 380 for (i = 0; i < nr_cores; i++) { 381 int first_cpu = i * threads_per_core; 382 int node = cpu_to_node(first_cpu); 383 384 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); 385 *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; 386 387 for (j = 0; j < threads_per_core; j++) { 388 int cpu = first_cpu + j; 389 390 paca[cpu].core_idle_state_ptr = core_idle_state; 391 paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; 392 paca[cpu].thread_mask = 1 << j; 393 } 394 } 395 396 update_subcore_sibling_mask(); 397 398 if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) 399 pnv_save_sprs_for_winkle(); 400 } 401 402 u32 pnv_get_supported_cpuidle_states(void) 403 { 404 return supported_cpuidle_states; 405 } 406 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); 407 408 static int __init pnv_init_idle_states(void) 409 { 410 struct device_node *power_mgt; 411 int dt_idle_states; 412 const __be32 *idle_state_flags; 413 u32 len_flags, flags; 414 int i; 415 416 supported_cpuidle_states = 0; 417 418 if (cpuidle_disable != IDLE_NO_OVERRIDE) 419 return 0; 420 421 if (!firmware_has_feature(FW_FEATURE_OPALv3)) 422 return 0; 423 424 power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); 425 if (!power_mgt) { 426 pr_warn("opal: PowerMgmt Node not found\n"); 427 return 0; 428 } 429 430 idle_state_flags = of_get_property(power_mgt, 431 "ibm,cpu-idle-state-flags", &len_flags); 432 if (!idle_state_flags) { 433 pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n"); 434 return 0; 435 } 436 437 dt_idle_states = len_flags / sizeof(u32); 438 439 for (i = 0; i < dt_idle_states; i++) { 440 flags = be32_to_cpu(idle_state_flags[i]); 441 supported_cpuidle_states |= flags; 442 } 443 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 444 patch_instruction( 445 (unsigned int *)pnv_fastsleep_workaround_at_entry, 446 PPC_INST_NOP); 447 patch_instruction( 448 (unsigned int *)pnv_fastsleep_workaround_at_exit, 449 PPC_INST_NOP); 450 } 451 pnv_alloc_idle_core_states(); 452 return 0; 453 } 454 455 subsys_initcall(pnv_init_idle_states); 456 457 static int __init pnv_probe(void) 458 { 459 unsigned long root = of_get_flat_dt_root(); 460 461 if (!of_flat_dt_is_compatible(root, "ibm,powernv")) 462 return 0; 463 464 hpte_init_native(); 465 466 if (firmware_has_feature(FW_FEATURE_OPAL)) 467 pnv_setup_machdep_opal(); 468 #ifdef CONFIG_PPC_POWERNV_RTAS 469 else if (rtas.base) 470 pnv_setup_machdep_rtas(); 471 #endif /* CONFIG_PPC_POWERNV_RTAS */ 472 473 pr_debug("PowerNV detected !\n"); 474 475 return 1; 476 } 477 478 /* 479 * Returns the cpu frequency for 'cpu' in Hz. This is used by 480 * /proc/cpuinfo 481 */ 482 static unsigned long pnv_get_proc_freq(unsigned int cpu) 483 { 484 unsigned long ret_freq; 485 486 ret_freq = cpufreq_quick_get(cpu) * 1000ul; 487 488 /* 489 * If the backend cpufreq driver does not exist, 490 * then fallback to old way of reporting the clockrate. 491 */ 492 if (!ret_freq) 493 ret_freq = ppc_proc_freq; 494 return ret_freq; 495 } 496 497 define_machine(powernv) { 498 .name = "PowerNV", 499 .probe = pnv_probe, 500 .init_early = pnv_init_early, 501 .setup_arch = pnv_setup_arch, 502 .init_IRQ = pnv_init_IRQ, 503 .show_cpuinfo = pnv_show_cpuinfo, 504 .get_proc_freq = pnv_get_proc_freq, 505 .progress = pnv_progress, 506 .machine_shutdown = pnv_shutdown, 507 .power_save = power7_idle, 508 .calibrate_decr = generic_calibrate_decr, 509 .dma_set_mask = pnv_dma_set_mask, 510 .dma_get_required_mask = pnv_dma_get_required_mask, 511 #ifdef CONFIG_KEXEC 512 .kexec_cpu_down = pnv_kexec_cpu_down, 513 #endif 514 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 515 .memory_block_size = pnv_memory_block_size, 516 #endif 517 }; 518