1 /* 2 * Xen SMP support 3 * 4 * This file implements the Xen versions of smp_ops. SMP under Xen is 5 * very straightforward. Bringing a CPU up is simply a matter of 6 * loading its initial context and setting it running. 7 * 8 * IPIs are handled through the Xen event mechanism. 9 * 10 * Because virtual CPUs can be scheduled onto any real CPU, there's no 11 * useful topology information for the kernel to make use of. As a 12 * result, all CPUs are treated as if they're single-core and 13 * single-threaded. 14 * 15 * This does not handle HOTPLUG_CPU yet. 16 */ 17 #include <linux/sched.h> 18 #include <linux/kernel_stat.h> 19 #include <linux/err.h> 20 #include <linux/smp.h> 21 22 #include <asm/paravirt.h> 23 #include <asm/desc.h> 24 #include <asm/pgtable.h> 25 #include <asm/cpu.h> 26 27 #include <xen/interface/xen.h> 28 #include <xen/interface/vcpu.h> 29 30 #include <asm/xen/interface.h> 31 #include <asm/xen/hypercall.h> 32 33 #include <xen/page.h> 34 #include <xen/events.h> 35 36 #include "xen-ops.h" 37 #include "mmu.h" 38 39 static void __cpuinit xen_init_lock_cpu(int cpu); 40 41 cpumask_t xen_cpu_initialized_map; 42 43 static DEFINE_PER_CPU(int, resched_irq); 44 static DEFINE_PER_CPU(int, callfunc_irq); 45 static DEFINE_PER_CPU(int, callfuncsingle_irq); 46 static DEFINE_PER_CPU(int, debug_irq) = -1; 47 48 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); 49 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); 50 51 /* 52 * Reschedule call back. Nothing to do, 53 * all the work is done automatically when 54 * we return from the interrupt. 55 */ 56 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) 57 { 58 #ifdef CONFIG_X86_32 59 __get_cpu_var(irq_stat).irq_resched_count++; 60 #else 61 add_pda(irq_resched_count, 1); 62 #endif 63 64 return IRQ_HANDLED; 65 } 66 67 static __cpuinit void cpu_bringup_and_idle(void) 68 { 69 int cpu = smp_processor_id(); 70 71 cpu_init(); 72 preempt_disable(); 73 74 xen_enable_sysenter(); 75 xen_enable_syscall(); 76 77 cpu = smp_processor_id(); 78 smp_store_cpu_info(cpu); 79 cpu_data(cpu).x86_max_cores = 1; 80 set_cpu_sibling_map(cpu); 81 82 xen_setup_cpu_clockevents(); 83 84 cpu_set(cpu, cpu_online_map); 85 x86_write_percpu(cpu_state, CPU_ONLINE); 86 wmb(); 87 88 /* We can take interrupts now: we're officially "up". */ 89 local_irq_enable(); 90 91 wmb(); /* make sure everything is out */ 92 cpu_idle(); 93 } 94 95 static int xen_smp_intr_init(unsigned int cpu) 96 { 97 int rc; 98 const char *resched_name, *callfunc_name, *debug_name; 99 100 resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); 101 rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, 102 cpu, 103 xen_reschedule_interrupt, 104 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, 105 resched_name, 106 NULL); 107 if (rc < 0) 108 goto fail; 109 per_cpu(resched_irq, cpu) = rc; 110 111 callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); 112 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, 113 cpu, 114 xen_call_function_interrupt, 115 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, 116 callfunc_name, 117 NULL); 118 if (rc < 0) 119 goto fail; 120 per_cpu(callfunc_irq, cpu) = rc; 121 122 debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); 123 rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, 124 IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING, 125 debug_name, NULL); 126 if (rc < 0) 127 goto fail; 128 per_cpu(debug_irq, cpu) = rc; 129 130 callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); 131 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, 132 cpu, 133 xen_call_function_single_interrupt, 134 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, 135 callfunc_name, 136 NULL); 137 if (rc < 0) 138 goto fail; 139 per_cpu(callfuncsingle_irq, cpu) = rc; 140 141 return 0; 142 143 fail: 144 if (per_cpu(resched_irq, cpu) >= 0) 145 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); 146 if (per_cpu(callfunc_irq, cpu) >= 0) 147 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); 148 if (per_cpu(debug_irq, cpu) >= 0) 149 unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); 150 if (per_cpu(callfuncsingle_irq, cpu) >= 0) 151 unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); 152 153 return rc; 154 } 155 156 static void __init xen_fill_possible_map(void) 157 { 158 int i, rc; 159 160 for (i = 0; i < NR_CPUS; i++) { 161 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); 162 if (rc >= 0) { 163 num_processors++; 164 cpu_set(i, cpu_possible_map); 165 } 166 } 167 } 168 169 static void __init xen_smp_prepare_boot_cpu(void) 170 { 171 BUG_ON(smp_processor_id() != 0); 172 native_smp_prepare_boot_cpu(); 173 174 /* We've switched to the "real" per-cpu gdt, so make sure the 175 old memory can be recycled */ 176 make_lowmem_page_readwrite(&per_cpu_var(gdt_page)); 177 178 xen_setup_vcpu_info_placement(); 179 } 180 181 static void __init xen_smp_prepare_cpus(unsigned int max_cpus) 182 { 183 unsigned cpu; 184 185 xen_init_lock_cpu(0); 186 187 smp_store_cpu_info(0); 188 cpu_data(0).x86_max_cores = 1; 189 set_cpu_sibling_map(0); 190 191 if (xen_smp_intr_init(0)) 192 BUG(); 193 194 xen_cpu_initialized_map = cpumask_of_cpu(0); 195 196 /* Restrict the possible_map according to max_cpus. */ 197 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { 198 for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) 199 continue; 200 cpu_clear(cpu, cpu_possible_map); 201 } 202 203 for_each_possible_cpu (cpu) { 204 struct task_struct *idle; 205 206 if (cpu == 0) 207 continue; 208 209 idle = fork_idle(cpu); 210 if (IS_ERR(idle)) 211 panic("failed fork for CPU %d", cpu); 212 213 cpu_set(cpu, cpu_present_map); 214 } 215 216 //init_xenbus_allowed_cpumask(); 217 } 218 219 static __cpuinit int 220 cpu_initialize_context(unsigned int cpu, struct task_struct *idle) 221 { 222 struct vcpu_guest_context *ctxt; 223 struct desc_struct *gdt; 224 225 if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) 226 return 0; 227 228 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); 229 if (ctxt == NULL) 230 return -ENOMEM; 231 232 gdt = get_cpu_gdt_table(cpu); 233 234 ctxt->flags = VGCF_IN_KERNEL; 235 ctxt->user_regs.ds = __USER_DS; 236 ctxt->user_regs.es = __USER_DS; 237 ctxt->user_regs.ss = __KERNEL_DS; 238 #ifdef CONFIG_X86_32 239 ctxt->user_regs.fs = __KERNEL_PERCPU; 240 #endif 241 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; 242 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ 243 244 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 245 246 xen_copy_trap_info(ctxt->trap_ctxt); 247 248 ctxt->ldt_ents = 0; 249 250 BUG_ON((unsigned long)gdt & ~PAGE_MASK); 251 make_lowmem_page_readonly(gdt); 252 253 ctxt->gdt_frames[0] = virt_to_mfn(gdt); 254 ctxt->gdt_ents = GDT_ENTRIES; 255 256 ctxt->user_regs.cs = __KERNEL_CS; 257 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); 258 259 ctxt->kernel_ss = __KERNEL_DS; 260 ctxt->kernel_sp = idle->thread.sp0; 261 262 #ifdef CONFIG_X86_32 263 ctxt->event_callback_cs = __KERNEL_CS; 264 ctxt->failsafe_callback_cs = __KERNEL_CS; 265 #endif 266 ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; 267 ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; 268 269 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); 270 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); 271 272 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) 273 BUG(); 274 275 kfree(ctxt); 276 return 0; 277 } 278 279 static int __cpuinit xen_cpu_up(unsigned int cpu) 280 { 281 struct task_struct *idle = idle_task(cpu); 282 int rc; 283 284 #if 0 285 rc = cpu_up_check(cpu); 286 if (rc) 287 return rc; 288 #endif 289 290 #ifdef CONFIG_X86_64 291 /* Allocate node local memory for AP pdas */ 292 WARN_ON(cpu == 0); 293 if (cpu > 0) { 294 rc = get_local_pda(cpu); 295 if (rc) 296 return rc; 297 } 298 #endif 299 300 #ifdef CONFIG_X86_32 301 init_gdt(cpu); 302 per_cpu(current_task, cpu) = idle; 303 irq_ctx_init(cpu); 304 #else 305 cpu_pda(cpu)->pcurrent = idle; 306 clear_tsk_thread_flag(idle, TIF_FORK); 307 #endif 308 xen_setup_timer(cpu); 309 xen_init_lock_cpu(cpu); 310 311 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 312 313 /* make sure interrupts start blocked */ 314 per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; 315 316 rc = cpu_initialize_context(cpu, idle); 317 if (rc) 318 return rc; 319 320 if (num_online_cpus() == 1) 321 alternatives_smp_switch(1); 322 323 rc = xen_smp_intr_init(cpu); 324 if (rc) 325 return rc; 326 327 rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); 328 BUG_ON(rc); 329 330 while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { 331 HYPERVISOR_sched_op(SCHEDOP_yield, 0); 332 barrier(); 333 } 334 335 return 0; 336 } 337 338 static void xen_smp_cpus_done(unsigned int max_cpus) 339 { 340 } 341 342 static void stop_self(void *v) 343 { 344 int cpu = smp_processor_id(); 345 346 /* make sure we're not pinning something down */ 347 load_cr3(swapper_pg_dir); 348 /* should set up a minimal gdt */ 349 350 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); 351 BUG(); 352 } 353 354 static void xen_smp_send_stop(void) 355 { 356 smp_call_function(stop_self, NULL, 0); 357 } 358 359 static void xen_smp_send_reschedule(int cpu) 360 { 361 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); 362 } 363 364 static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) 365 { 366 unsigned cpu; 367 368 cpus_and(mask, mask, cpu_online_map); 369 370 for_each_cpu_mask_nr(cpu, mask) 371 xen_send_IPI_one(cpu, vector); 372 } 373 374 static void xen_smp_send_call_function_ipi(cpumask_t mask) 375 { 376 int cpu; 377 378 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); 379 380 /* Make sure other vcpus get a chance to run if they need to. */ 381 for_each_cpu_mask_nr(cpu, mask) { 382 if (xen_vcpu_stolen(cpu)) { 383 HYPERVISOR_sched_op(SCHEDOP_yield, 0); 384 break; 385 } 386 } 387 } 388 389 static void xen_smp_send_call_function_single_ipi(int cpu) 390 { 391 xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); 392 } 393 394 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) 395 { 396 irq_enter(); 397 generic_smp_call_function_interrupt(); 398 #ifdef CONFIG_X86_32 399 __get_cpu_var(irq_stat).irq_call_count++; 400 #else 401 add_pda(irq_call_count, 1); 402 #endif 403 irq_exit(); 404 405 return IRQ_HANDLED; 406 } 407 408 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) 409 { 410 irq_enter(); 411 generic_smp_call_function_single_interrupt(); 412 #ifdef CONFIG_X86_32 413 __get_cpu_var(irq_stat).irq_call_count++; 414 #else 415 add_pda(irq_call_count, 1); 416 #endif 417 irq_exit(); 418 419 return IRQ_HANDLED; 420 } 421 422 struct xen_spinlock { 423 unsigned char lock; /* 0 -> free; 1 -> locked */ 424 unsigned short spinners; /* count of waiting cpus */ 425 }; 426 427 static int xen_spin_is_locked(struct raw_spinlock *lock) 428 { 429 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 430 431 return xl->lock != 0; 432 } 433 434 static int xen_spin_is_contended(struct raw_spinlock *lock) 435 { 436 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 437 438 /* Not strictly true; this is only the count of contended 439 lock-takers entering the slow path. */ 440 return xl->spinners != 0; 441 } 442 443 static int xen_spin_trylock(struct raw_spinlock *lock) 444 { 445 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 446 u8 old = 1; 447 448 asm("xchgb %b0,%1" 449 : "+q" (old), "+m" (xl->lock) : : "memory"); 450 451 return old == 0; 452 } 453 454 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; 455 static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); 456 457 static inline void spinning_lock(struct xen_spinlock *xl) 458 { 459 __get_cpu_var(lock_spinners) = xl; 460 wmb(); /* set lock of interest before count */ 461 asm(LOCK_PREFIX " incw %0" 462 : "+m" (xl->spinners) : : "memory"); 463 } 464 465 static inline void unspinning_lock(struct xen_spinlock *xl) 466 { 467 asm(LOCK_PREFIX " decw %0" 468 : "+m" (xl->spinners) : : "memory"); 469 wmb(); /* decrement count before clearing lock */ 470 __get_cpu_var(lock_spinners) = NULL; 471 } 472 473 static noinline int xen_spin_lock_slow(struct raw_spinlock *lock) 474 { 475 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 476 int irq = __get_cpu_var(lock_kicker_irq); 477 int ret; 478 479 /* If kicker interrupts not initialized yet, just spin */ 480 if (irq == -1) 481 return 0; 482 483 /* announce we're spinning */ 484 spinning_lock(xl); 485 486 /* clear pending */ 487 xen_clear_irq_pending(irq); 488 489 /* check again make sure it didn't become free while 490 we weren't looking */ 491 ret = xen_spin_trylock(lock); 492 if (ret) 493 goto out; 494 495 /* block until irq becomes pending */ 496 xen_poll_irq(irq); 497 kstat_this_cpu.irqs[irq]++; 498 499 out: 500 unspinning_lock(xl); 501 return ret; 502 } 503 504 static void xen_spin_lock(struct raw_spinlock *lock) 505 { 506 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 507 int timeout; 508 u8 oldval; 509 510 do { 511 timeout = 1 << 10; 512 513 asm("1: xchgb %1,%0\n" 514 " testb %1,%1\n" 515 " jz 3f\n" 516 "2: rep;nop\n" 517 " cmpb $0,%0\n" 518 " je 1b\n" 519 " dec %2\n" 520 " jnz 2b\n" 521 "3:\n" 522 : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) 523 : "1" (1) 524 : "memory"); 525 526 } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock))); 527 } 528 529 static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) 530 { 531 int cpu; 532 533 for_each_online_cpu(cpu) { 534 /* XXX should mix up next cpu selection */ 535 if (per_cpu(lock_spinners, cpu) == xl) { 536 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 537 break; 538 } 539 } 540 } 541 542 static void xen_spin_unlock(struct raw_spinlock *lock) 543 { 544 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 545 546 smp_wmb(); /* make sure no writes get moved after unlock */ 547 xl->lock = 0; /* release lock */ 548 549 /* make sure unlock happens before kick */ 550 barrier(); 551 552 if (unlikely(xl->spinners)) 553 xen_spin_unlock_slow(xl); 554 } 555 556 static __cpuinit void xen_init_lock_cpu(int cpu) 557 { 558 int irq; 559 const char *name; 560 561 name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); 562 irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, 563 cpu, 564 xen_reschedule_interrupt, 565 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, 566 name, 567 NULL); 568 569 if (irq >= 0) { 570 disable_irq(irq); /* make sure it's never delivered */ 571 per_cpu(lock_kicker_irq, cpu) = irq; 572 } 573 574 printk("cpu %d spinlock event irq %d\n", cpu, irq); 575 } 576 577 static void __init xen_init_spinlocks(void) 578 { 579 pv_lock_ops.spin_is_locked = xen_spin_is_locked; 580 pv_lock_ops.spin_is_contended = xen_spin_is_contended; 581 pv_lock_ops.spin_lock = xen_spin_lock; 582 pv_lock_ops.spin_trylock = xen_spin_trylock; 583 pv_lock_ops.spin_unlock = xen_spin_unlock; 584 } 585 586 static const struct smp_ops xen_smp_ops __initdata = { 587 .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, 588 .smp_prepare_cpus = xen_smp_prepare_cpus, 589 .cpu_up = xen_cpu_up, 590 .smp_cpus_done = xen_smp_cpus_done, 591 592 .smp_send_stop = xen_smp_send_stop, 593 .smp_send_reschedule = xen_smp_send_reschedule, 594 595 .send_call_func_ipi = xen_smp_send_call_function_ipi, 596 .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, 597 }; 598 599 void __init xen_smp_init(void) 600 { 601 smp_ops = xen_smp_ops; 602 xen_fill_possible_map(); 603 xen_init_spinlocks(); 604 } 605