1 /* 2 * SMP boot-related support 3 * 4 * Copyright (C) 1998-2003 Hewlett-Packard Co 5 * David Mosberger-Tang <davidm@hpl.hp.com> 6 * 7 * 01/05/16 Rohit Seth <rohit.seth@intel.com> Moved SMP booting functions from smp.c to here. 8 * 01/04/27 David Mosberger <davidm@hpl.hp.com> Added ITC synching code. 9 * 02/07/31 David Mosberger <davidm@hpl.hp.com> Switch over to hotplug-CPU boot-sequence. 10 * smp_boot_cpus()/smp_commence() is replaced by 11 * smp_prepare_cpus()/__cpu_up()/smp_cpus_done(). 12 */ 13 #include <linux/config.h> 14 15 #include <linux/module.h> 16 #include <linux/acpi.h> 17 #include <linux/bootmem.h> 18 #include <linux/cpu.h> 19 #include <linux/delay.h> 20 #include <linux/init.h> 21 #include <linux/interrupt.h> 22 #include <linux/irq.h> 23 #include <linux/kernel.h> 24 #include <linux/kernel_stat.h> 25 #include <linux/mm.h> 26 #include <linux/notifier.h> 27 #include <linux/smp.h> 28 #include <linux/smp_lock.h> 29 #include <linux/spinlock.h> 30 #include <linux/efi.h> 31 #include <linux/percpu.h> 32 #include <linux/bitops.h> 33 34 #include <asm/atomic.h> 35 #include <asm/cache.h> 36 #include <asm/current.h> 37 #include <asm/delay.h> 38 #include <asm/ia32.h> 39 #include <asm/io.h> 40 #include <asm/irq.h> 41 #include <asm/machvec.h> 42 #include <asm/mca.h> 43 #include <asm/page.h> 44 #include <asm/pgalloc.h> 45 #include <asm/pgtable.h> 46 #include <asm/processor.h> 47 #include <asm/ptrace.h> 48 #include <asm/sal.h> 49 #include <asm/system.h> 50 #include <asm/tlbflush.h> 51 #include <asm/unistd.h> 52 53 #define SMP_DEBUG 0 54 55 #if SMP_DEBUG 56 #define Dprintk(x...) printk(x) 57 #else 58 #define Dprintk(x...) 59 #endif 60 61 62 /* 63 * ITC synchronization related stuff: 64 */ 65 #define MASTER 0 66 #define SLAVE (SMP_CACHE_BYTES/8) 67 68 #define NUM_ROUNDS 64 /* magic value */ 69 #define NUM_ITERS 5 /* likewise */ 70 71 static DEFINE_SPINLOCK(itc_sync_lock); 72 static volatile unsigned long go[SLAVE + 1]; 73 74 #define DEBUG_ITC_SYNC 0 75 76 extern void __devinit calibrate_delay (void); 77 extern void start_ap (void); 78 extern unsigned long ia64_iobase; 79 80 task_t *task_for_booting_cpu; 81 82 /* 83 * State for each CPU 84 */ 85 DEFINE_PER_CPU(int, cpu_state); 86 87 /* Bitmasks of currently online, and possible CPUs */ 88 cpumask_t cpu_online_map; 89 EXPORT_SYMBOL(cpu_online_map); 90 cpumask_t cpu_possible_map; 91 EXPORT_SYMBOL(cpu_possible_map); 92 93 /* which logical CPU number maps to which CPU (physical APIC ID) */ 94 volatile int ia64_cpu_to_sapicid[NR_CPUS]; 95 EXPORT_SYMBOL(ia64_cpu_to_sapicid); 96 97 static volatile cpumask_t cpu_callin_map; 98 99 struct smp_boot_data smp_boot_data __initdata; 100 101 unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */ 102 103 char __initdata no_int_routing; 104 105 unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ 106 107 static int __init 108 nointroute (char *str) 109 { 110 no_int_routing = 1; 111 printk ("no_int_routing on\n"); 112 return 1; 113 } 114 115 __setup("nointroute", nointroute); 116 117 void 118 sync_master (void *arg) 119 { 120 unsigned long flags, i; 121 122 go[MASTER] = 0; 123 124 local_irq_save(flags); 125 { 126 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { 127 while (!go[MASTER]); 128 go[MASTER] = 0; 129 go[SLAVE] = ia64_get_itc(); 130 } 131 } 132 local_irq_restore(flags); 133 } 134 135 /* 136 * Return the number of cycles by which our itc differs from the itc on the master 137 * (time-keeper) CPU. A positive number indicates our itc is ahead of the master, 138 * negative that it is behind. 139 */ 140 static inline long 141 get_delta (long *rt, long *master) 142 { 143 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; 144 unsigned long tcenter, t0, t1, tm; 145 long i; 146 147 for (i = 0; i < NUM_ITERS; ++i) { 148 t0 = ia64_get_itc(); 149 go[MASTER] = 1; 150 while (!(tm = go[SLAVE])); 151 go[SLAVE] = 0; 152 t1 = ia64_get_itc(); 153 154 if (t1 - t0 < best_t1 - best_t0) 155 best_t0 = t0, best_t1 = t1, best_tm = tm; 156 } 157 158 *rt = best_t1 - best_t0; 159 *master = best_tm - best_t0; 160 161 /* average best_t0 and best_t1 without overflow: */ 162 tcenter = (best_t0/2 + best_t1/2); 163 if (best_t0 % 2 + best_t1 % 2 == 2) 164 ++tcenter; 165 return tcenter - best_tm; 166 } 167 168 /* 169 * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU 170 * (normally the time-keeper CPU). We use a closed loop to eliminate the possibility of 171 * unaccounted-for errors (such as getting a machine check in the middle of a calibration 172 * step). The basic idea is for the slave to ask the master what itc value it has and to 173 * read its own itc before and after the master responds. Each iteration gives us three 174 * timestamps: 175 * 176 * slave master 177 * 178 * t0 ---\ 179 * ---\ 180 * ---> 181 * tm 182 * /--- 183 * /--- 184 * t1 <--- 185 * 186 * 187 * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0 188 * and t1. If we achieve this, the clocks are synchronized provided the interconnect 189 * between the slave and the master is symmetric. Even if the interconnect were 190 * asymmetric, we would still know that the synchronization error is smaller than the 191 * roundtrip latency (t0 - t1). 192 * 193 * When the interconnect is quiet and symmetric, this lets us synchronize the itc to 194 * within one or two cycles. However, we can only *guarantee* that the synchronization is 195 * accurate to within a round-trip time, which is typically in the range of several 196 * hundred cycles (e.g., ~500 cycles). In practice, this means that the itc's are usually 197 * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better 198 * than half a micro second or so. 199 */ 200 void 201 ia64_sync_itc (unsigned int master) 202 { 203 long i, delta, adj, adjust_latency = 0, done = 0; 204 unsigned long flags, rt, master_time_stamp, bound; 205 #if DEBUG_ITC_SYNC 206 struct { 207 long rt; /* roundtrip time */ 208 long master; /* master's timestamp */ 209 long diff; /* difference between midpoint and master's timestamp */ 210 long lat; /* estimate of itc adjustment latency */ 211 } t[NUM_ROUNDS]; 212 #endif 213 214 /* 215 * Make sure local timer ticks are disabled while we sync. If 216 * they were enabled, we'd have to worry about nasty issues 217 * like setting the ITC ahead of (or a long time before) the 218 * next scheduled tick. 219 */ 220 BUG_ON((ia64_get_itv() & (1 << 16)) == 0); 221 222 go[MASTER] = 1; 223 224 if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) { 225 printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master); 226 return; 227 } 228 229 while (go[MASTER]); /* wait for master to be ready */ 230 231 spin_lock_irqsave(&itc_sync_lock, flags); 232 { 233 for (i = 0; i < NUM_ROUNDS; ++i) { 234 delta = get_delta(&rt, &master_time_stamp); 235 if (delta == 0) { 236 done = 1; /* let's lock on to this... */ 237 bound = rt; 238 } 239 240 if (!done) { 241 if (i > 0) { 242 adjust_latency += -delta; 243 adj = -delta + adjust_latency/4; 244 } else 245 adj = -delta; 246 247 ia64_set_itc(ia64_get_itc() + adj); 248 } 249 #if DEBUG_ITC_SYNC 250 t[i].rt = rt; 251 t[i].master = master_time_stamp; 252 t[i].diff = delta; 253 t[i].lat = adjust_latency/4; 254 #endif 255 } 256 } 257 spin_unlock_irqrestore(&itc_sync_lock, flags); 258 259 #if DEBUG_ITC_SYNC 260 for (i = 0; i < NUM_ROUNDS; ++i) 261 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", 262 t[i].rt, t[i].master, t[i].diff, t[i].lat); 263 #endif 264 265 printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, " 266 "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt); 267 } 268 269 /* 270 * Ideally sets up per-cpu profiling hooks. Doesn't do much now... 271 */ 272 static inline void __devinit 273 smp_setup_percpu_timer (void) 274 { 275 } 276 277 static void __devinit 278 smp_callin (void) 279 { 280 int cpuid, phys_id; 281 extern void ia64_init_itm(void); 282 283 #ifdef CONFIG_PERFMON 284 extern void pfm_init_percpu(void); 285 #endif 286 287 cpuid = smp_processor_id(); 288 phys_id = hard_smp_processor_id(); 289 290 if (cpu_online(cpuid)) { 291 printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", 292 phys_id, cpuid); 293 BUG(); 294 } 295 296 lock_ipi_calllock(); 297 cpu_set(cpuid, cpu_online_map); 298 unlock_ipi_calllock(); 299 300 smp_setup_percpu_timer(); 301 302 ia64_mca_cmc_vector_setup(); /* Setup vector on AP */ 303 304 #ifdef CONFIG_PERFMON 305 pfm_init_percpu(); 306 #endif 307 308 local_irq_enable(); 309 310 if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { 311 /* 312 * Synchronize the ITC with the BP. Need to do this after irqs are 313 * enabled because ia64_sync_itc() calls smp_call_function_single(), which 314 * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls 315 * local_bh_enable(), which bugs out if irqs are not enabled... 316 */ 317 Dprintk("Going to syncup ITC with BP.\n"); 318 ia64_sync_itc(0); 319 } 320 321 /* 322 * Get our bogomips. 323 */ 324 ia64_init_itm(); 325 calibrate_delay(); 326 local_cpu_data->loops_per_jiffy = loops_per_jiffy; 327 328 #ifdef CONFIG_IA32_SUPPORT 329 ia32_gdt_init(); 330 #endif 331 332 /* 333 * Allow the master to continue. 334 */ 335 cpu_set(cpuid, cpu_callin_map); 336 Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid); 337 } 338 339 340 /* 341 * Activate a secondary processor. head.S calls this. 342 */ 343 int __devinit 344 start_secondary (void *unused) 345 { 346 /* Early console may use I/O ports */ 347 ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); 348 349 Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); 350 efi_map_pal_code(); 351 cpu_init(); 352 smp_callin(); 353 354 cpu_idle(); 355 return 0; 356 } 357 358 struct pt_regs * __devinit idle_regs(struct pt_regs *regs) 359 { 360 return NULL; 361 } 362 363 struct create_idle { 364 struct task_struct *idle; 365 struct completion done; 366 int cpu; 367 }; 368 369 void 370 do_fork_idle(void *_c_idle) 371 { 372 struct create_idle *c_idle = _c_idle; 373 374 c_idle->idle = fork_idle(c_idle->cpu); 375 complete(&c_idle->done); 376 } 377 378 static int __devinit 379 do_boot_cpu (int sapicid, int cpu) 380 { 381 int timeout; 382 struct create_idle c_idle = { 383 .cpu = cpu, 384 .done = COMPLETION_INITIALIZER(c_idle.done), 385 }; 386 DECLARE_WORK(work, do_fork_idle, &c_idle); 387 /* 388 * We can't use kernel_thread since we must avoid to reschedule the child. 389 */ 390 if (!keventd_up() || current_is_keventd()) 391 work.func(work.data); 392 else { 393 schedule_work(&work); 394 wait_for_completion(&c_idle.done); 395 } 396 397 if (IS_ERR(c_idle.idle)) 398 panic("failed fork for CPU %d", cpu); 399 task_for_booting_cpu = c_idle.idle; 400 401 Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid); 402 403 platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0); 404 405 /* 406 * Wait 10s total for the AP to start 407 */ 408 Dprintk("Waiting on callin_map ..."); 409 for (timeout = 0; timeout < 100000; timeout++) { 410 if (cpu_isset(cpu, cpu_callin_map)) 411 break; /* It has booted */ 412 udelay(100); 413 } 414 Dprintk("\n"); 415 416 if (!cpu_isset(cpu, cpu_callin_map)) { 417 printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid); 418 ia64_cpu_to_sapicid[cpu] = -1; 419 cpu_clear(cpu, cpu_online_map); /* was set in smp_callin() */ 420 return -EINVAL; 421 } 422 return 0; 423 } 424 425 static int __init 426 decay (char *str) 427 { 428 int ticks; 429 get_option (&str, &ticks); 430 return 1; 431 } 432 433 __setup("decay=", decay); 434 435 /* 436 * Initialize the logical CPU number to SAPICID mapping 437 */ 438 void __init 439 smp_build_cpu_map (void) 440 { 441 int sapicid, cpu, i; 442 int boot_cpu_id = hard_smp_processor_id(); 443 444 for (cpu = 0; cpu < NR_CPUS; cpu++) { 445 ia64_cpu_to_sapicid[cpu] = -1; 446 #ifdef CONFIG_HOTPLUG_CPU 447 cpu_set(cpu, cpu_possible_map); 448 #endif 449 } 450 451 ia64_cpu_to_sapicid[0] = boot_cpu_id; 452 cpus_clear(cpu_present_map); 453 cpu_set(0, cpu_present_map); 454 cpu_set(0, cpu_possible_map); 455 for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) { 456 sapicid = smp_boot_data.cpu_phys_id[i]; 457 if (sapicid == boot_cpu_id) 458 continue; 459 cpu_set(cpu, cpu_present_map); 460 cpu_set(cpu, cpu_possible_map); 461 ia64_cpu_to_sapicid[cpu] = sapicid; 462 cpu++; 463 } 464 } 465 466 #ifdef CONFIG_NUMA 467 468 /* on which node is each logical CPU (one cacheline even for 64 CPUs) */ 469 u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; 470 EXPORT_SYMBOL(cpu_to_node_map); 471 /* which logical CPUs are on which nodes */ 472 cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; 473 474 /* 475 * Build cpu to node mapping and initialize the per node cpu masks. 476 */ 477 void __init 478 build_cpu_to_node_map (void) 479 { 480 int cpu, i, node; 481 482 for(node=0; node<MAX_NUMNODES; node++) 483 cpus_clear(node_to_cpu_mask[node]); 484 for(cpu = 0; cpu < NR_CPUS; ++cpu) { 485 /* 486 * All Itanium NUMA platforms I know use ACPI, so maybe we 487 * can drop this ifdef completely. [EF] 488 */ 489 #ifdef CONFIG_ACPI_NUMA 490 node = -1; 491 for (i = 0; i < NR_CPUS; ++i) 492 if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { 493 node = node_cpuid[i].nid; 494 break; 495 } 496 #else 497 # error Fixme: Dunno how to build CPU-to-node map. 498 #endif 499 cpu_to_node_map[cpu] = (node >= 0) ? node : 0; 500 if (node >= 0) 501 cpu_set(cpu, node_to_cpu_mask[node]); 502 } 503 } 504 505 #endif /* CONFIG_NUMA */ 506 507 /* 508 * Cycle through the APs sending Wakeup IPIs to boot each. 509 */ 510 void __init 511 smp_prepare_cpus (unsigned int max_cpus) 512 { 513 int boot_cpu_id = hard_smp_processor_id(); 514 515 /* 516 * Initialize the per-CPU profiling counter/multiplier 517 */ 518 519 smp_setup_percpu_timer(); 520 521 /* 522 * We have the boot CPU online for sure. 523 */ 524 cpu_set(0, cpu_online_map); 525 cpu_set(0, cpu_callin_map); 526 527 local_cpu_data->loops_per_jiffy = loops_per_jiffy; 528 ia64_cpu_to_sapicid[0] = boot_cpu_id; 529 530 printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id); 531 532 current_thread_info()->cpu = 0; 533 534 /* 535 * If SMP should be disabled, then really disable it! 536 */ 537 if (!max_cpus) { 538 printk(KERN_INFO "SMP mode deactivated.\n"); 539 cpus_clear(cpu_online_map); 540 cpus_clear(cpu_present_map); 541 cpus_clear(cpu_possible_map); 542 cpu_set(0, cpu_online_map); 543 cpu_set(0, cpu_present_map); 544 cpu_set(0, cpu_possible_map); 545 return; 546 } 547 } 548 549 void __devinit smp_prepare_boot_cpu(void) 550 { 551 cpu_set(smp_processor_id(), cpu_online_map); 552 cpu_set(smp_processor_id(), cpu_callin_map); 553 } 554 555 #ifdef CONFIG_HOTPLUG_CPU 556 extern void fixup_irqs(void); 557 /* must be called with cpucontrol mutex held */ 558 static int __devinit cpu_enable(unsigned int cpu) 559 { 560 per_cpu(cpu_state,cpu) = CPU_UP_PREPARE; 561 wmb(); 562 563 while (!cpu_online(cpu)) 564 cpu_relax(); 565 return 0; 566 } 567 568 int __cpu_disable(void) 569 { 570 int cpu = smp_processor_id(); 571 572 /* 573 * dont permit boot processor for now 574 */ 575 if (cpu == 0) 576 return -EBUSY; 577 578 fixup_irqs(); 579 local_flush_tlb_all(); 580 printk ("Disabled cpu %u\n", smp_processor_id()); 581 return 0; 582 } 583 584 void __cpu_die(unsigned int cpu) 585 { 586 unsigned int i; 587 588 for (i = 0; i < 100; i++) { 589 /* They ack this in play_dead by setting CPU_DEAD */ 590 if (per_cpu(cpu_state, cpu) == CPU_DEAD) 591 { 592 /* 593 * TBD: Enable this when physical removal 594 * or when we put the processor is put in 595 * SAL_BOOT_RENDEZ mode 596 * cpu_clear(cpu, cpu_callin_map); 597 */ 598 return; 599 } 600 msleep(100); 601 } 602 printk(KERN_ERR "CPU %u didn't die...\n", cpu); 603 } 604 #else /* !CONFIG_HOTPLUG_CPU */ 605 static int __devinit cpu_enable(unsigned int cpu) 606 { 607 return 0; 608 } 609 610 int __cpu_disable(void) 611 { 612 return -ENOSYS; 613 } 614 615 void __cpu_die(unsigned int cpu) 616 { 617 /* We said "no" in __cpu_disable */ 618 BUG(); 619 } 620 #endif /* CONFIG_HOTPLUG_CPU */ 621 622 void 623 smp_cpus_done (unsigned int dummy) 624 { 625 int cpu; 626 unsigned long bogosum = 0; 627 628 /* 629 * Allow the user to impress friends. 630 */ 631 632 for (cpu = 0; cpu < NR_CPUS; cpu++) 633 if (cpu_online(cpu)) 634 bogosum += cpu_data(cpu)->loops_per_jiffy; 635 636 printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", 637 (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); 638 } 639 640 int __devinit 641 __cpu_up (unsigned int cpu) 642 { 643 int ret; 644 int sapicid; 645 646 sapicid = ia64_cpu_to_sapicid[cpu]; 647 if (sapicid == -1) 648 return -EINVAL; 649 650 /* 651 * Already booted.. just enable and get outa idle lool 652 */ 653 if (cpu_isset(cpu, cpu_callin_map)) 654 { 655 cpu_enable(cpu); 656 local_irq_enable(); 657 while (!cpu_isset(cpu, cpu_online_map)) 658 mb(); 659 return 0; 660 } 661 /* Processor goes to start_secondary(), sets online flag */ 662 ret = do_boot_cpu(sapicid, cpu); 663 if (ret < 0) 664 return ret; 665 666 return 0; 667 } 668 669 /* 670 * Assume that CPU's have been discovered by some platform-dependent interface. For 671 * SoftSDV/Lion, that would be ACPI. 672 * 673 * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP(). 674 */ 675 void __init 676 init_smp_config(void) 677 { 678 struct fptr { 679 unsigned long fp; 680 unsigned long gp; 681 } *ap_startup; 682 long sal_ret; 683 684 /* Tell SAL where to drop the AP's. */ 685 ap_startup = (struct fptr *) start_ap; 686 sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ, 687 ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0); 688 if (sal_ret < 0) 689 printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n", 690 ia64_sal_strerror(sal_ret)); 691 } 692 693