1 /* 2 * KVM paravirt_ops implementation 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 * 18 * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 19 * Copyright IBM Corporation, 2007 20 * Authors: Anthony Liguori <aliguori@us.ibm.com> 21 */ 22 23 #include <linux/module.h> 24 #include <linux/kernel.h> 25 #include <linux/kvm_para.h> 26 #include <linux/cpu.h> 27 #include <linux/mm.h> 28 #include <linux/highmem.h> 29 #include <linux/hardirq.h> 30 #include <linux/notifier.h> 31 #include <linux/reboot.h> 32 #include <linux/hash.h> 33 #include <linux/sched.h> 34 #include <linux/slab.h> 35 #include <linux/kprobes.h> 36 #include <asm/timer.h> 37 #include <asm/cpu.h> 38 #include <asm/traps.h> 39 #include <asm/desc.h> 40 #include <asm/tlbflush.h> 41 #include <asm/idle.h> 42 43 static int kvmapf = 1; 44 45 static int parse_no_kvmapf(char *arg) 46 { 47 kvmapf = 0; 48 return 0; 49 } 50 51 early_param("no-kvmapf", parse_no_kvmapf); 52 53 static int steal_acc = 1; 54 static int parse_no_stealacc(char *arg) 55 { 56 steal_acc = 0; 57 return 0; 58 } 59 60 early_param("no-steal-acc", parse_no_stealacc); 61 62 static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); 63 static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); 64 static int has_steal_clock = 0; 65 66 /* 67 * No need for any "IO delay" on KVM 68 */ 69 static void kvm_io_delay(void) 70 { 71 } 72 73 #define KVM_TASK_SLEEP_HASHBITS 8 74 #define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS) 75 76 struct kvm_task_sleep_node { 77 struct hlist_node link; 78 wait_queue_head_t wq; 79 u32 token; 80 int cpu; 81 bool halted; 82 }; 83 84 static struct kvm_task_sleep_head { 85 spinlock_t lock; 86 struct hlist_head list; 87 } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; 88 89 static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, 90 u32 token) 91 { 92 struct hlist_node *p; 93 94 hlist_for_each(p, &b->list) { 95 struct kvm_task_sleep_node *n = 96 hlist_entry(p, typeof(*n), link); 97 if (n->token == token) 98 return n; 99 } 100 101 return NULL; 102 } 103 104 void kvm_async_pf_task_wait(u32 token) 105 { 106 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); 107 struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; 108 struct kvm_task_sleep_node n, *e; 109 DEFINE_WAIT(wait); 110 int cpu, idle; 111 112 cpu = get_cpu(); 113 idle = idle_cpu(cpu); 114 put_cpu(); 115 116 spin_lock(&b->lock); 117 e = _find_apf_task(b, token); 118 if (e) { 119 /* dummy entry exist -> wake up was delivered ahead of PF */ 120 hlist_del(&e->link); 121 kfree(e); 122 spin_unlock(&b->lock); 123 return; 124 } 125 126 n.token = token; 127 n.cpu = smp_processor_id(); 128 n.halted = idle || preempt_count() > 1; 129 init_waitqueue_head(&n.wq); 130 hlist_add_head(&n.link, &b->list); 131 spin_unlock(&b->lock); 132 133 for (;;) { 134 if (!n.halted) 135 prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); 136 if (hlist_unhashed(&n.link)) 137 break; 138 139 if (!n.halted) { 140 local_irq_enable(); 141 schedule(); 142 local_irq_disable(); 143 } else { 144 /* 145 * We cannot reschedule. So halt. 146 */ 147 native_safe_halt(); 148 local_irq_disable(); 149 } 150 } 151 if (!n.halted) 152 finish_wait(&n.wq, &wait); 153 154 return; 155 } 156 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); 157 158 static void apf_task_wake_one(struct kvm_task_sleep_node *n) 159 { 160 hlist_del_init(&n->link); 161 if (n->halted) 162 smp_send_reschedule(n->cpu); 163 else if (waitqueue_active(&n->wq)) 164 wake_up(&n->wq); 165 } 166 167 static void apf_task_wake_all(void) 168 { 169 int i; 170 171 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { 172 struct hlist_node *p, *next; 173 struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; 174 spin_lock(&b->lock); 175 hlist_for_each_safe(p, next, &b->list) { 176 struct kvm_task_sleep_node *n = 177 hlist_entry(p, typeof(*n), link); 178 if (n->cpu == smp_processor_id()) 179 apf_task_wake_one(n); 180 } 181 spin_unlock(&b->lock); 182 } 183 } 184 185 void kvm_async_pf_task_wake(u32 token) 186 { 187 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); 188 struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; 189 struct kvm_task_sleep_node *n; 190 191 if (token == ~0) { 192 apf_task_wake_all(); 193 return; 194 } 195 196 again: 197 spin_lock(&b->lock); 198 n = _find_apf_task(b, token); 199 if (!n) { 200 /* 201 * async PF was not yet handled. 202 * Add dummy entry for the token. 203 */ 204 n = kzalloc(sizeof(*n), GFP_ATOMIC); 205 if (!n) { 206 /* 207 * Allocation failed! Busy wait while other cpu 208 * handles async PF. 209 */ 210 spin_unlock(&b->lock); 211 cpu_relax(); 212 goto again; 213 } 214 n->token = token; 215 n->cpu = smp_processor_id(); 216 init_waitqueue_head(&n->wq); 217 hlist_add_head(&n->link, &b->list); 218 } else 219 apf_task_wake_one(n); 220 spin_unlock(&b->lock); 221 return; 222 } 223 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); 224 225 u32 kvm_read_and_reset_pf_reason(void) 226 { 227 u32 reason = 0; 228 229 if (__get_cpu_var(apf_reason).enabled) { 230 reason = __get_cpu_var(apf_reason).reason; 231 __get_cpu_var(apf_reason).reason = 0; 232 } 233 234 return reason; 235 } 236 EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); 237 238 dotraplinkage void __kprobes 239 do_async_page_fault(struct pt_regs *regs, unsigned long error_code) 240 { 241 switch (kvm_read_and_reset_pf_reason()) { 242 default: 243 do_page_fault(regs, error_code); 244 break; 245 case KVM_PV_REASON_PAGE_NOT_PRESENT: 246 /* page is swapped out by the host. */ 247 kvm_async_pf_task_wait((u32)read_cr2()); 248 break; 249 case KVM_PV_REASON_PAGE_READY: 250 rcu_irq_enter(); 251 exit_idle(); 252 kvm_async_pf_task_wake((u32)read_cr2()); 253 rcu_irq_exit(); 254 break; 255 } 256 } 257 258 static void __init paravirt_ops_setup(void) 259 { 260 pv_info.name = "KVM"; 261 pv_info.paravirt_enabled = 1; 262 263 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) 264 pv_cpu_ops.io_delay = kvm_io_delay; 265 266 #ifdef CONFIG_X86_IO_APIC 267 no_timer_check = 1; 268 #endif 269 } 270 271 static void kvm_register_steal_time(void) 272 { 273 int cpu = smp_processor_id(); 274 struct kvm_steal_time *st = &per_cpu(steal_time, cpu); 275 276 if (!has_steal_clock) 277 return; 278 279 memset(st, 0, sizeof(*st)); 280 281 wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED)); 282 printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", 283 cpu, __pa(st)); 284 } 285 286 void __cpuinit kvm_guest_cpu_init(void) 287 { 288 if (!kvm_para_available()) 289 return; 290 291 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { 292 u64 pa = __pa(&__get_cpu_var(apf_reason)); 293 294 #ifdef CONFIG_PREEMPT 295 pa |= KVM_ASYNC_PF_SEND_ALWAYS; 296 #endif 297 wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); 298 __get_cpu_var(apf_reason).enabled = 1; 299 printk(KERN_INFO"KVM setup async PF for cpu %d\n", 300 smp_processor_id()); 301 } 302 303 if (has_steal_clock) 304 kvm_register_steal_time(); 305 } 306 307 static void kvm_pv_disable_apf(void *unused) 308 { 309 if (!__get_cpu_var(apf_reason).enabled) 310 return; 311 312 wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); 313 __get_cpu_var(apf_reason).enabled = 0; 314 315 printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", 316 smp_processor_id()); 317 } 318 319 static int kvm_pv_reboot_notify(struct notifier_block *nb, 320 unsigned long code, void *unused) 321 { 322 if (code == SYS_RESTART) 323 on_each_cpu(kvm_pv_disable_apf, NULL, 1); 324 return NOTIFY_DONE; 325 } 326 327 static struct notifier_block kvm_pv_reboot_nb = { 328 .notifier_call = kvm_pv_reboot_notify, 329 }; 330 331 static u64 kvm_steal_clock(int cpu) 332 { 333 u64 steal; 334 struct kvm_steal_time *src; 335 int version; 336 337 src = &per_cpu(steal_time, cpu); 338 do { 339 version = src->version; 340 rmb(); 341 steal = src->steal; 342 rmb(); 343 } while ((version & 1) || (version != src->version)); 344 345 return steal; 346 } 347 348 void kvm_disable_steal_time(void) 349 { 350 if (!has_steal_clock) 351 return; 352 353 wrmsr(MSR_KVM_STEAL_TIME, 0, 0); 354 } 355 356 #ifdef CONFIG_SMP 357 static void __init kvm_smp_prepare_boot_cpu(void) 358 { 359 #ifdef CONFIG_KVM_CLOCK 360 WARN_ON(kvm_register_clock("primary cpu clock")); 361 #endif 362 kvm_guest_cpu_init(); 363 native_smp_prepare_boot_cpu(); 364 } 365 366 static void __cpuinit kvm_guest_cpu_online(void *dummy) 367 { 368 kvm_guest_cpu_init(); 369 } 370 371 static void kvm_guest_cpu_offline(void *dummy) 372 { 373 kvm_disable_steal_time(); 374 kvm_pv_disable_apf(NULL); 375 apf_task_wake_all(); 376 } 377 378 static int __cpuinit kvm_cpu_notify(struct notifier_block *self, 379 unsigned long action, void *hcpu) 380 { 381 int cpu = (unsigned long)hcpu; 382 switch (action) { 383 case CPU_ONLINE: 384 case CPU_DOWN_FAILED: 385 case CPU_ONLINE_FROZEN: 386 smp_call_function_single(cpu, kvm_guest_cpu_online, NULL, 0); 387 break; 388 case CPU_DOWN_PREPARE: 389 case CPU_DOWN_PREPARE_FROZEN: 390 smp_call_function_single(cpu, kvm_guest_cpu_offline, NULL, 1); 391 break; 392 default: 393 break; 394 } 395 return NOTIFY_OK; 396 } 397 398 static struct notifier_block __cpuinitdata kvm_cpu_notifier = { 399 .notifier_call = kvm_cpu_notify, 400 }; 401 #endif 402 403 static void __init kvm_apf_trap_init(void) 404 { 405 set_intr_gate(14, &async_page_fault); 406 } 407 408 void __init kvm_guest_init(void) 409 { 410 int i; 411 412 if (!kvm_para_available()) 413 return; 414 415 paravirt_ops_setup(); 416 register_reboot_notifier(&kvm_pv_reboot_nb); 417 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) 418 spin_lock_init(&async_pf_sleepers[i].lock); 419 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) 420 x86_init.irqs.trap_init = kvm_apf_trap_init; 421 422 if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { 423 has_steal_clock = 1; 424 pv_time_ops.steal_clock = kvm_steal_clock; 425 } 426 427 #ifdef CONFIG_SMP 428 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; 429 register_cpu_notifier(&kvm_cpu_notifier); 430 #else 431 kvm_guest_cpu_init(); 432 #endif 433 } 434 435 static __init int activate_jump_labels(void) 436 { 437 if (has_steal_clock) { 438 static_key_slow_inc(¶virt_steal_enabled); 439 if (steal_acc) 440 static_key_slow_inc(¶virt_steal_rq_enabled); 441 } 442 443 return 0; 444 } 445 arch_initcall(activate_jump_labels); 446