1 /* 2 * Xen SMP support 3 * 4 * This file implements the Xen versions of smp_ops. SMP under Xen is 5 * very straightforward. Bringing a CPU up is simply a matter of 6 * loading its initial context and setting it running. 7 * 8 * IPIs are handled through the Xen event mechanism. 9 * 10 * Because virtual CPUs can be scheduled onto any real CPU, there's no 11 * useful topology information for the kernel to make use of. As a 12 * result, all CPUs are treated as if they're single-core and 13 * single-threaded. 14 */ 15 #include <linux/sched.h> 16 #include <linux/err.h> 17 #include <linux/slab.h> 18 #include <linux/smp.h> 19 20 #include <asm/paravirt.h> 21 #include <asm/desc.h> 22 #include <asm/pgtable.h> 23 #include <asm/cpu.h> 24 25 #include <xen/interface/xen.h> 26 #include <xen/interface/vcpu.h> 27 28 #include <asm/xen/interface.h> 29 #include <asm/xen/hypercall.h> 30 31 #include <xen/xen.h> 32 #include <xen/page.h> 33 #include <xen/events.h> 34 35 #include "xen-ops.h" 36 #include "mmu.h" 37 38 cpumask_var_t xen_cpu_initialized_map; 39 40 static DEFINE_PER_CPU(int, xen_resched_irq); 41 static DEFINE_PER_CPU(int, xen_callfunc_irq); 42 static DEFINE_PER_CPU(int, xen_callfuncsingle_irq); 43 static DEFINE_PER_CPU(int, xen_debug_irq) = -1; 44 45 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); 46 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); 47 48 /* 49 * Reschedule call back. Nothing to do, 50 * all the work is done automatically when 51 * we return from the interrupt. 52 */ 53 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) 54 { 55 inc_irq_stat(irq_resched_count); 56 57 return IRQ_HANDLED; 58 } 59 60 static __cpuinit void cpu_bringup(void) 61 { 62 int cpu = smp_processor_id(); 63 64 cpu_init(); 65 touch_softlockup_watchdog(); 66 preempt_disable(); 67 68 xen_enable_sysenter(); 69 xen_enable_syscall(); 70 71 cpu = smp_processor_id(); 72 smp_store_cpu_info(cpu); 73 cpu_data(cpu).x86_max_cores = 1; 74 set_cpu_sibling_map(cpu); 75 76 xen_setup_cpu_clockevents(); 77 78 set_cpu_online(cpu, true); 79 percpu_write(cpu_state, CPU_ONLINE); 80 wmb(); 81 82 /* We can take interrupts now: we're officially "up". */ 83 local_irq_enable(); 84 85 wmb(); /* make sure everything is out */ 86 } 87 88 static __cpuinit void cpu_bringup_and_idle(void) 89 { 90 cpu_bringup(); 91 cpu_idle(); 92 } 93 94 static int xen_smp_intr_init(unsigned int cpu) 95 { 96 int rc; 97 const char *resched_name, *callfunc_name, *debug_name; 98 99 resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); 100 rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, 101 cpu, 102 xen_reschedule_interrupt, 103 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, 104 resched_name, 105 NULL); 106 if (rc < 0) 107 goto fail; 108 per_cpu(xen_resched_irq, cpu) = rc; 109 110 callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); 111 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, 112 cpu, 113 xen_call_function_interrupt, 114 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, 115 callfunc_name, 116 NULL); 117 if (rc < 0) 118 goto fail; 119 per_cpu(xen_callfunc_irq, cpu) = rc; 120 121 debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); 122 rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, 123 IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING, 124 debug_name, NULL); 125 if (rc < 0) 126 goto fail; 127 per_cpu(xen_debug_irq, cpu) = rc; 128 129 callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); 130 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, 131 cpu, 132 xen_call_function_single_interrupt, 133 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, 134 callfunc_name, 135 NULL); 136 if (rc < 0) 137 goto fail; 138 per_cpu(xen_callfuncsingle_irq, cpu) = rc; 139 140 return 0; 141 142 fail: 143 if (per_cpu(xen_resched_irq, cpu) >= 0) 144 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); 145 if (per_cpu(xen_callfunc_irq, cpu) >= 0) 146 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); 147 if (per_cpu(xen_debug_irq, cpu) >= 0) 148 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); 149 if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0) 150 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), 151 NULL); 152 153 return rc; 154 } 155 156 static void __init xen_fill_possible_map(void) 157 { 158 int i, rc; 159 160 if (xen_initial_domain()) 161 return; 162 163 for (i = 0; i < nr_cpu_ids; i++) { 164 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); 165 if (rc >= 0) { 166 num_processors++; 167 set_cpu_possible(i, true); 168 } 169 } 170 } 171 172 static void __init xen_filter_cpu_maps(void) 173 { 174 int i, rc; 175 176 if (!xen_initial_domain()) 177 return; 178 179 num_processors = 0; 180 disabled_cpus = 0; 181 for (i = 0; i < nr_cpu_ids; i++) { 182 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); 183 if (rc >= 0) { 184 num_processors++; 185 set_cpu_possible(i, true); 186 } else { 187 set_cpu_possible(i, false); 188 set_cpu_present(i, false); 189 } 190 } 191 } 192 193 static void __init xen_smp_prepare_boot_cpu(void) 194 { 195 BUG_ON(smp_processor_id() != 0); 196 native_smp_prepare_boot_cpu(); 197 198 /* We've switched to the "real" per-cpu gdt, so make sure the 199 old memory can be recycled */ 200 make_lowmem_page_readwrite(xen_initial_gdt); 201 202 xen_filter_cpu_maps(); 203 xen_setup_vcpu_info_placement(); 204 } 205 206 static void __init xen_smp_prepare_cpus(unsigned int max_cpus) 207 { 208 unsigned cpu; 209 210 xen_init_lock_cpu(0); 211 212 smp_store_cpu_info(0); 213 cpu_data(0).x86_max_cores = 1; 214 set_cpu_sibling_map(0); 215 216 if (xen_smp_intr_init(0)) 217 BUG(); 218 219 if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) 220 panic("could not allocate xen_cpu_initialized_map\n"); 221 222 cpumask_copy(xen_cpu_initialized_map, cpumask_of(0)); 223 224 /* Restrict the possible_map according to max_cpus. */ 225 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { 226 for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) 227 continue; 228 set_cpu_possible(cpu, false); 229 } 230 231 for_each_possible_cpu (cpu) { 232 struct task_struct *idle; 233 234 if (cpu == 0) 235 continue; 236 237 idle = fork_idle(cpu); 238 if (IS_ERR(idle)) 239 panic("failed fork for CPU %d", cpu); 240 241 set_cpu_present(cpu, true); 242 } 243 } 244 245 static __cpuinit int 246 cpu_initialize_context(unsigned int cpu, struct task_struct *idle) 247 { 248 struct vcpu_guest_context *ctxt; 249 struct desc_struct *gdt; 250 unsigned long gdt_mfn; 251 252 if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) 253 return 0; 254 255 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); 256 if (ctxt == NULL) 257 return -ENOMEM; 258 259 gdt = get_cpu_gdt_table(cpu); 260 261 ctxt->flags = VGCF_IN_KERNEL; 262 ctxt->user_regs.ds = __USER_DS; 263 ctxt->user_regs.es = __USER_DS; 264 ctxt->user_regs.ss = __KERNEL_DS; 265 #ifdef CONFIG_X86_32 266 ctxt->user_regs.fs = __KERNEL_PERCPU; 267 ctxt->user_regs.gs = __KERNEL_STACK_CANARY; 268 #else 269 ctxt->gs_base_kernel = per_cpu_offset(cpu); 270 #endif 271 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; 272 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ 273 274 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 275 276 xen_copy_trap_info(ctxt->trap_ctxt); 277 278 ctxt->ldt_ents = 0; 279 280 BUG_ON((unsigned long)gdt & ~PAGE_MASK); 281 282 gdt_mfn = arbitrary_virt_to_mfn(gdt); 283 make_lowmem_page_readonly(gdt); 284 make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); 285 286 ctxt->gdt_frames[0] = gdt_mfn; 287 ctxt->gdt_ents = GDT_ENTRIES; 288 289 ctxt->user_regs.cs = __KERNEL_CS; 290 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); 291 292 ctxt->kernel_ss = __KERNEL_DS; 293 ctxt->kernel_sp = idle->thread.sp0; 294 295 #ifdef CONFIG_X86_32 296 ctxt->event_callback_cs = __KERNEL_CS; 297 ctxt->failsafe_callback_cs = __KERNEL_CS; 298 #endif 299 ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; 300 ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; 301 302 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); 303 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); 304 305 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) 306 BUG(); 307 308 kfree(ctxt); 309 return 0; 310 } 311 312 static int __cpuinit xen_cpu_up(unsigned int cpu) 313 { 314 struct task_struct *idle = idle_task(cpu); 315 int rc; 316 317 per_cpu(current_task, cpu) = idle; 318 #ifdef CONFIG_X86_32 319 irq_ctx_init(cpu); 320 #else 321 clear_tsk_thread_flag(idle, TIF_FORK); 322 per_cpu(kernel_stack, cpu) = 323 (unsigned long)task_stack_page(idle) - 324 KERNEL_STACK_OFFSET + THREAD_SIZE; 325 #endif 326 xen_setup_runstate_info(cpu); 327 xen_setup_timer(cpu); 328 xen_init_lock_cpu(cpu); 329 330 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 331 332 /* make sure interrupts start blocked */ 333 per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; 334 335 rc = cpu_initialize_context(cpu, idle); 336 if (rc) 337 return rc; 338 339 if (num_online_cpus() == 1) 340 alternatives_smp_switch(1); 341 342 rc = xen_smp_intr_init(cpu); 343 if (rc) 344 return rc; 345 346 rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); 347 BUG_ON(rc); 348 349 while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { 350 HYPERVISOR_sched_op(SCHEDOP_yield, NULL); 351 barrier(); 352 } 353 354 return 0; 355 } 356 357 static void xen_smp_cpus_done(unsigned int max_cpus) 358 { 359 } 360 361 #ifdef CONFIG_HOTPLUG_CPU 362 static int xen_cpu_disable(void) 363 { 364 unsigned int cpu = smp_processor_id(); 365 if (cpu == 0) 366 return -EBUSY; 367 368 cpu_disable_common(); 369 370 load_cr3(swapper_pg_dir); 371 return 0; 372 } 373 374 static void xen_cpu_die(unsigned int cpu) 375 { 376 while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { 377 current->state = TASK_UNINTERRUPTIBLE; 378 schedule_timeout(HZ/10); 379 } 380 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); 381 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); 382 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); 383 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); 384 xen_uninit_lock_cpu(cpu); 385 xen_teardown_timer(cpu); 386 387 if (num_online_cpus() == 1) 388 alternatives_smp_switch(0); 389 } 390 391 static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ 392 { 393 play_dead_common(); 394 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); 395 cpu_bringup(); 396 } 397 398 #else /* !CONFIG_HOTPLUG_CPU */ 399 static int xen_cpu_disable(void) 400 { 401 return -ENOSYS; 402 } 403 404 static void xen_cpu_die(unsigned int cpu) 405 { 406 BUG(); 407 } 408 409 static void xen_play_dead(void) 410 { 411 BUG(); 412 } 413 414 #endif 415 static void stop_self(void *v) 416 { 417 int cpu = smp_processor_id(); 418 419 /* make sure we're not pinning something down */ 420 load_cr3(swapper_pg_dir); 421 /* should set up a minimal gdt */ 422 423 set_cpu_online(cpu, false); 424 425 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); 426 BUG(); 427 } 428 429 static void xen_stop_other_cpus(int wait) 430 { 431 smp_call_function(stop_self, NULL, wait); 432 } 433 434 static void xen_smp_send_reschedule(int cpu) 435 { 436 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); 437 } 438 439 static void xen_send_IPI_mask(const struct cpumask *mask, 440 enum ipi_vector vector) 441 { 442 unsigned cpu; 443 444 for_each_cpu_and(cpu, mask, cpu_online_mask) 445 xen_send_IPI_one(cpu, vector); 446 } 447 448 static void xen_smp_send_call_function_ipi(const struct cpumask *mask) 449 { 450 int cpu; 451 452 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); 453 454 /* Make sure other vcpus get a chance to run if they need to. */ 455 for_each_cpu(cpu, mask) { 456 if (xen_vcpu_stolen(cpu)) { 457 HYPERVISOR_sched_op(SCHEDOP_yield, NULL); 458 break; 459 } 460 } 461 } 462 463 static void xen_smp_send_call_function_single_ipi(int cpu) 464 { 465 xen_send_IPI_mask(cpumask_of(cpu), 466 XEN_CALL_FUNCTION_SINGLE_VECTOR); 467 } 468 469 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) 470 { 471 irq_enter(); 472 generic_smp_call_function_interrupt(); 473 inc_irq_stat(irq_call_count); 474 irq_exit(); 475 476 return IRQ_HANDLED; 477 } 478 479 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) 480 { 481 irq_enter(); 482 generic_smp_call_function_single_interrupt(); 483 inc_irq_stat(irq_call_count); 484 irq_exit(); 485 486 return IRQ_HANDLED; 487 } 488 489 static const struct smp_ops xen_smp_ops __initdata = { 490 .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, 491 .smp_prepare_cpus = xen_smp_prepare_cpus, 492 .smp_cpus_done = xen_smp_cpus_done, 493 494 .cpu_up = xen_cpu_up, 495 .cpu_die = xen_cpu_die, 496 .cpu_disable = xen_cpu_disable, 497 .play_dead = xen_play_dead, 498 499 .stop_other_cpus = xen_stop_other_cpus, 500 .smp_send_reschedule = xen_smp_send_reschedule, 501 502 .send_call_func_ipi = xen_smp_send_call_function_ipi, 503 .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, 504 }; 505 506 void __init xen_smp_init(void) 507 { 508 smp_ops = xen_smp_ops; 509 xen_fill_possible_map(); 510 xen_init_spinlocks(); 511 } 512