1 /* 2 * PowerNV setup code. 3 * 4 * Copyright 2011 IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/cpu.h> 15 #include <linux/errno.h> 16 #include <linux/sched.h> 17 #include <linux/kernel.h> 18 #include <linux/tty.h> 19 #include <linux/reboot.h> 20 #include <linux/init.h> 21 #include <linux/console.h> 22 #include <linux/delay.h> 23 #include <linux/irq.h> 24 #include <linux/seq_file.h> 25 #include <linux/of.h> 26 #include <linux/of_fdt.h> 27 #include <linux/interrupt.h> 28 #include <linux/bug.h> 29 #include <linux/pci.h> 30 #include <linux/cpufreq.h> 31 32 #include <asm/machdep.h> 33 #include <asm/firmware.h> 34 #include <asm/xics.h> 35 #include <asm/opal.h> 36 #include <asm/kexec.h> 37 #include <asm/smp.h> 38 #include <asm/cputhreads.h> 39 #include <asm/cpuidle.h> 40 #include <asm/code-patching.h> 41 42 #include "powernv.h" 43 #include "subcore.h" 44 45 static void __init pnv_setup_arch(void) 46 { 47 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); 48 49 /* Initialize SMP */ 50 pnv_smp_init(); 51 52 /* Setup PCI */ 53 pnv_pci_init(); 54 55 /* Setup RTC and NVRAM callbacks */ 56 if (firmware_has_feature(FW_FEATURE_OPAL)) 57 opal_nvram_init(); 58 59 /* Enable NAP mode */ 60 powersave_nap = 1; 61 62 /* XXX PMCS */ 63 } 64 65 static void __init pnv_init_early(void) 66 { 67 /* 68 * Initialize the LPC bus now so that legacy serial 69 * ports can be found on it 70 */ 71 opal_lpc_init(); 72 73 #ifdef CONFIG_HVC_OPAL 74 if (firmware_has_feature(FW_FEATURE_OPAL)) 75 hvc_opal_init_early(); 76 else 77 #endif 78 add_preferred_console("hvc", 0, NULL); 79 } 80 81 static void __init pnv_init_IRQ(void) 82 { 83 xics_init(); 84 85 WARN_ON(!ppc_md.get_irq); 86 } 87 88 static void pnv_show_cpuinfo(struct seq_file *m) 89 { 90 struct device_node *root; 91 const char *model = ""; 92 93 root = of_find_node_by_path("/"); 94 if (root) 95 model = of_get_property(root, "model", NULL); 96 seq_printf(m, "machine\t\t: PowerNV %s\n", model); 97 if (firmware_has_feature(FW_FEATURE_OPALv3)) 98 seq_printf(m, "firmware\t: OPAL v3\n"); 99 else if (firmware_has_feature(FW_FEATURE_OPALv2)) 100 seq_printf(m, "firmware\t: OPAL v2\n"); 101 else if (firmware_has_feature(FW_FEATURE_OPAL)) 102 seq_printf(m, "firmware\t: OPAL v1\n"); 103 else 104 seq_printf(m, "firmware\t: BML\n"); 105 of_node_put(root); 106 } 107 108 static void pnv_prepare_going_down(void) 109 { 110 /* 111 * Disable all notifiers from OPAL, we can't 112 * service interrupts anymore anyway 113 */ 114 opal_notifier_disable(); 115 116 /* Soft disable interrupts */ 117 local_irq_disable(); 118 119 /* 120 * Return secondary CPUs to firwmare if a flash update 121 * is pending otherwise we will get all sort of error 122 * messages about CPU being stuck etc.. This will also 123 * have the side effect of hard disabling interrupts so 124 * past this point, the kernel is effectively dead. 125 */ 126 opal_flash_term_callback(); 127 } 128 129 static void __noreturn pnv_restart(char *cmd) 130 { 131 long rc = OPAL_BUSY; 132 133 pnv_prepare_going_down(); 134 135 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 136 rc = opal_cec_reboot(); 137 if (rc == OPAL_BUSY_EVENT) 138 opal_poll_events(NULL); 139 else 140 mdelay(10); 141 } 142 for (;;) 143 opal_poll_events(NULL); 144 } 145 146 static void __noreturn pnv_power_off(void) 147 { 148 long rc = OPAL_BUSY; 149 150 pnv_prepare_going_down(); 151 152 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 153 rc = opal_cec_power_down(0); 154 if (rc == OPAL_BUSY_EVENT) 155 opal_poll_events(NULL); 156 else 157 mdelay(10); 158 } 159 for (;;) 160 opal_poll_events(NULL); 161 } 162 163 static void __noreturn pnv_halt(void) 164 { 165 pnv_power_off(); 166 } 167 168 static void pnv_progress(char *s, unsigned short hex) 169 { 170 } 171 172 static int pnv_dma_set_mask(struct device *dev, u64 dma_mask) 173 { 174 if (dev_is_pci(dev)) 175 return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask); 176 return __dma_set_mask(dev, dma_mask); 177 } 178 179 static u64 pnv_dma_get_required_mask(struct device *dev) 180 { 181 if (dev_is_pci(dev)) 182 return pnv_pci_dma_get_required_mask(to_pci_dev(dev)); 183 184 return __dma_get_required_mask(dev); 185 } 186 187 static void pnv_shutdown(void) 188 { 189 /* Let the PCI code clear up IODA tables */ 190 pnv_pci_shutdown(); 191 192 /* 193 * Stop OPAL activity: Unregister all OPAL interrupts so they 194 * don't fire up while we kexec and make sure all potentially 195 * DMA'ing ops are complete (such as dump retrieval). 196 */ 197 opal_shutdown(); 198 } 199 200 #ifdef CONFIG_KEXEC 201 static void pnv_kexec_wait_secondaries_down(void) 202 { 203 int my_cpu, i, notified = -1; 204 205 my_cpu = get_cpu(); 206 207 for_each_online_cpu(i) { 208 uint8_t status; 209 int64_t rc; 210 211 if (i == my_cpu) 212 continue; 213 214 for (;;) { 215 rc = opal_query_cpu_status(get_hard_smp_processor_id(i), 216 &status); 217 if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED) 218 break; 219 barrier(); 220 if (i != notified) { 221 printk(KERN_INFO "kexec: waiting for cpu %d " 222 "(physical %d) to enter OPAL\n", 223 i, paca[i].hw_cpu_id); 224 notified = i; 225 } 226 } 227 } 228 } 229 230 static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) 231 { 232 xics_kexec_teardown_cpu(secondary); 233 234 /* On OPAL v3, we return all CPUs to firmware */ 235 236 if (!firmware_has_feature(FW_FEATURE_OPALv3)) 237 return; 238 239 if (secondary) { 240 /* Return secondary CPUs to firmware on OPAL v3 */ 241 mb(); 242 get_paca()->kexec_state = KEXEC_STATE_REAL_MODE; 243 mb(); 244 245 /* Return the CPU to OPAL */ 246 opal_return_cpu(); 247 } else if (crash_shutdown) { 248 /* 249 * On crash, we don't wait for secondaries to go 250 * down as they might be unreachable or hung, so 251 * instead we just wait a bit and move on. 252 */ 253 mdelay(1); 254 } else { 255 /* Primary waits for the secondaries to have reached OPAL */ 256 pnv_kexec_wait_secondaries_down(); 257 } 258 } 259 #endif /* CONFIG_KEXEC */ 260 261 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 262 static unsigned long pnv_memory_block_size(void) 263 { 264 return 256UL * 1024 * 1024; 265 } 266 #endif 267 268 static void __init pnv_setup_machdep_opal(void) 269 { 270 ppc_md.get_boot_time = opal_get_boot_time; 271 ppc_md.restart = pnv_restart; 272 pm_power_off = pnv_power_off; 273 ppc_md.halt = pnv_halt; 274 ppc_md.machine_check_exception = opal_machine_check; 275 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; 276 ppc_md.hmi_exception_early = opal_hmi_exception_early; 277 ppc_md.handle_hmi_exception = opal_handle_hmi_exception; 278 } 279 280 static u32 supported_cpuidle_states; 281 282 int pnv_save_sprs_for_winkle(void) 283 { 284 int cpu; 285 int rc; 286 287 /* 288 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross 289 * all cpus at boot. Get these reg values of current cpu and use the 290 * same accross all cpus. 291 */ 292 uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; 293 uint64_t hid0_val = mfspr(SPRN_HID0); 294 uint64_t hid1_val = mfspr(SPRN_HID1); 295 uint64_t hid4_val = mfspr(SPRN_HID4); 296 uint64_t hid5_val = mfspr(SPRN_HID5); 297 uint64_t hmeer_val = mfspr(SPRN_HMEER); 298 299 for_each_possible_cpu(cpu) { 300 uint64_t pir = get_hard_smp_processor_id(cpu); 301 uint64_t hsprg0_val = (uint64_t)&paca[cpu]; 302 303 /* 304 * HSPRG0 is used to store the cpu's pointer to paca. Hence last 305 * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 306 * with 63rd bit set, so that when a thread wakes up at 0x100 we 307 * can use this bit to distinguish between fastsleep and 308 * deep winkle. 309 */ 310 hsprg0_val |= 1; 311 312 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); 313 if (rc != 0) 314 return rc; 315 316 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 317 if (rc != 0) 318 return rc; 319 320 /* HIDs are per core registers */ 321 if (cpu_thread_in_core(cpu) == 0) { 322 323 rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); 324 if (rc != 0) 325 return rc; 326 327 rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); 328 if (rc != 0) 329 return rc; 330 331 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 332 if (rc != 0) 333 return rc; 334 335 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 336 if (rc != 0) 337 return rc; 338 339 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 340 if (rc != 0) 341 return rc; 342 } 343 } 344 345 return 0; 346 } 347 348 static void pnv_alloc_idle_core_states(void) 349 { 350 int i, j; 351 int nr_cores = cpu_nr_cores(); 352 u32 *core_idle_state; 353 354 /* 355 * core_idle_state - First 8 bits track the idle state of each thread 356 * of the core. The 8th bit is the lock bit. Initially all thread bits 357 * are set. They are cleared when the thread enters deep idle state 358 * like sleep and winkle. Initially the lock bit is cleared. 359 * The lock bit has 2 purposes 360 * a. While the first thread is restoring core state, it prevents 361 * other threads in the core from switching to process context. 362 * b. While the last thread in the core is saving the core state, it 363 * prevents a different thread from waking up. 364 */ 365 for (i = 0; i < nr_cores; i++) { 366 int first_cpu = i * threads_per_core; 367 int node = cpu_to_node(first_cpu); 368 369 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); 370 *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; 371 372 for (j = 0; j < threads_per_core; j++) { 373 int cpu = first_cpu + j; 374 375 paca[cpu].core_idle_state_ptr = core_idle_state; 376 paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; 377 paca[cpu].thread_mask = 1 << j; 378 } 379 } 380 381 update_subcore_sibling_mask(); 382 383 if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) 384 pnv_save_sprs_for_winkle(); 385 } 386 387 u32 pnv_get_supported_cpuidle_states(void) 388 { 389 return supported_cpuidle_states; 390 } 391 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); 392 393 static int __init pnv_init_idle_states(void) 394 { 395 struct device_node *power_mgt; 396 int dt_idle_states; 397 u32 *flags; 398 int i; 399 400 supported_cpuidle_states = 0; 401 402 if (cpuidle_disable != IDLE_NO_OVERRIDE) 403 goto out; 404 405 if (!firmware_has_feature(FW_FEATURE_OPALv3)) 406 goto out; 407 408 power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); 409 if (!power_mgt) { 410 pr_warn("opal: PowerMgmt Node not found\n"); 411 goto out; 412 } 413 dt_idle_states = of_property_count_u32_elems(power_mgt, 414 "ibm,cpu-idle-state-flags"); 415 if (dt_idle_states < 0) { 416 pr_warn("cpuidle-powernv: no idle states found in the DT\n"); 417 goto out; 418 } 419 420 flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL); 421 if (of_property_read_u32_array(power_mgt, 422 "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { 423 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); 424 goto out_free; 425 } 426 427 for (i = 0; i < dt_idle_states; i++) 428 supported_cpuidle_states |= flags[i]; 429 430 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 431 patch_instruction( 432 (unsigned int *)pnv_fastsleep_workaround_at_entry, 433 PPC_INST_NOP); 434 patch_instruction( 435 (unsigned int *)pnv_fastsleep_workaround_at_exit, 436 PPC_INST_NOP); 437 } 438 pnv_alloc_idle_core_states(); 439 out_free: 440 kfree(flags); 441 out: 442 return 0; 443 } 444 445 subsys_initcall(pnv_init_idle_states); 446 447 static int __init pnv_probe(void) 448 { 449 unsigned long root = of_get_flat_dt_root(); 450 451 if (!of_flat_dt_is_compatible(root, "ibm,powernv")) 452 return 0; 453 454 hpte_init_native(); 455 456 if (firmware_has_feature(FW_FEATURE_OPAL)) 457 pnv_setup_machdep_opal(); 458 459 pr_debug("PowerNV detected !\n"); 460 461 return 1; 462 } 463 464 /* 465 * Returns the cpu frequency for 'cpu' in Hz. This is used by 466 * /proc/cpuinfo 467 */ 468 static unsigned long pnv_get_proc_freq(unsigned int cpu) 469 { 470 unsigned long ret_freq; 471 472 ret_freq = cpufreq_quick_get(cpu) * 1000ul; 473 474 /* 475 * If the backend cpufreq driver does not exist, 476 * then fallback to old way of reporting the clockrate. 477 */ 478 if (!ret_freq) 479 ret_freq = ppc_proc_freq; 480 return ret_freq; 481 } 482 483 define_machine(powernv) { 484 .name = "PowerNV", 485 .probe = pnv_probe, 486 .init_early = pnv_init_early, 487 .setup_arch = pnv_setup_arch, 488 .init_IRQ = pnv_init_IRQ, 489 .show_cpuinfo = pnv_show_cpuinfo, 490 .get_proc_freq = pnv_get_proc_freq, 491 .progress = pnv_progress, 492 .machine_shutdown = pnv_shutdown, 493 .power_save = power7_idle, 494 .calibrate_decr = generic_calibrate_decr, 495 .dma_set_mask = pnv_dma_set_mask, 496 .dma_get_required_mask = pnv_dma_get_required_mask, 497 #ifdef CONFIG_KEXEC 498 .kexec_cpu_down = pnv_kexec_cpu_down, 499 #endif 500 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 501 .memory_block_size = pnv_memory_block_size, 502 #endif 503 }; 504