1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License, version 2, as 4 * published by the Free Software Foundation. 5 * 6 * This program is distributed in the hope that it will be useful, 7 * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 * GNU General Public License for more details. 10 * 11 * You should have received a copy of the GNU General Public License 12 * along with this program; if not, write to the Free Software 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 * 15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> 17 * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> 18 */ 19 20 #include <linux/types.h> 21 #include <linux/string.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/highmem.h> 25 #include <linux/gfp.h> 26 #include <linux/slab.h> 27 #include <linux/sched/signal.h> 28 #include <linux/hugetlb.h> 29 #include <linux/list.h> 30 #include <linux/anon_inodes.h> 31 #include <linux/iommu.h> 32 #include <linux/file.h> 33 34 #include <asm/tlbflush.h> 35 #include <asm/kvm_ppc.h> 36 #include <asm/kvm_book3s.h> 37 #include <asm/book3s/64/mmu-hash.h> 38 #include <asm/hvcall.h> 39 #include <asm/synch.h> 40 #include <asm/ppc-opcode.h> 41 #include <asm/kvm_host.h> 42 #include <asm/udbg.h> 43 #include <asm/iommu.h> 44 #include <asm/tce.h> 45 #include <asm/mmu_context.h> 46 47 static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) 48 { 49 return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; 50 } 51 52 static unsigned long kvmppc_stt_pages(unsigned long tce_pages) 53 { 54 unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) + 55 (tce_pages * sizeof(struct page *)); 56 57 return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; 58 } 59 60 static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) 61 { 62 long ret = 0; 63 64 if (!current || !current->mm) 65 return ret; /* process exited */ 66 67 down_write(¤t->mm->mmap_sem); 68 69 if (inc) { 70 unsigned long locked, lock_limit; 71 72 locked = current->mm->locked_vm + stt_pages; 73 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 74 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 75 ret = -ENOMEM; 76 else 77 current->mm->locked_vm += stt_pages; 78 } else { 79 if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) 80 stt_pages = current->mm->locked_vm; 81 82 current->mm->locked_vm -= stt_pages; 83 } 84 85 pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, 86 inc ? '+' : '-', 87 stt_pages << PAGE_SHIFT, 88 current->mm->locked_vm << PAGE_SHIFT, 89 rlimit(RLIMIT_MEMLOCK), 90 ret ? " - exceeded" : ""); 91 92 up_write(¤t->mm->mmap_sem); 93 94 return ret; 95 } 96 97 static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head) 98 { 99 struct kvmppc_spapr_tce_iommu_table *stit = container_of(head, 100 struct kvmppc_spapr_tce_iommu_table, rcu); 101 102 iommu_tce_table_put(stit->tbl); 103 104 kfree(stit); 105 } 106 107 static void kvm_spapr_tce_liobn_put(struct kref *kref) 108 { 109 struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref, 110 struct kvmppc_spapr_tce_iommu_table, kref); 111 112 list_del_rcu(&stit->next); 113 114 call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free); 115 } 116 117 extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, 118 struct iommu_group *grp) 119 { 120 int i; 121 struct kvmppc_spapr_tce_table *stt; 122 struct kvmppc_spapr_tce_iommu_table *stit, *tmp; 123 struct iommu_table_group *table_group = NULL; 124 125 list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { 126 127 table_group = iommu_group_get_iommudata(grp); 128 if (WARN_ON(!table_group)) 129 continue; 130 131 list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { 132 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 133 if (table_group->tables[i] != stit->tbl) 134 continue; 135 136 kref_put(&stit->kref, kvm_spapr_tce_liobn_put); 137 return; 138 } 139 } 140 } 141 } 142 143 extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, 144 struct iommu_group *grp) 145 { 146 struct kvmppc_spapr_tce_table *stt = NULL; 147 bool found = false; 148 struct iommu_table *tbl = NULL; 149 struct iommu_table_group *table_group; 150 long i; 151 struct kvmppc_spapr_tce_iommu_table *stit; 152 struct fd f; 153 154 f = fdget(tablefd); 155 if (!f.file) 156 return -EBADF; 157 158 list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { 159 if (stt == f.file->private_data) { 160 found = true; 161 break; 162 } 163 } 164 165 fdput(f); 166 167 if (!found) 168 return -EINVAL; 169 170 table_group = iommu_group_get_iommudata(grp); 171 if (WARN_ON(!table_group)) 172 return -EFAULT; 173 174 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 175 struct iommu_table *tbltmp = table_group->tables[i]; 176 177 if (!tbltmp) 178 continue; 179 /* Make sure hardware table parameters are compatible */ 180 if ((tbltmp->it_page_shift <= stt->page_shift) && 181 (tbltmp->it_offset << tbltmp->it_page_shift == 182 stt->offset << stt->page_shift) && 183 (tbltmp->it_size << tbltmp->it_page_shift == 184 stt->size << stt->page_shift)) { 185 /* 186 * Reference the table to avoid races with 187 * add/remove DMA windows. 188 */ 189 tbl = iommu_tce_table_get(tbltmp); 190 break; 191 } 192 } 193 if (!tbl) 194 return -EINVAL; 195 196 list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { 197 if (tbl != stit->tbl) 198 continue; 199 200 if (!kref_get_unless_zero(&stit->kref)) { 201 /* stit is being destroyed */ 202 iommu_tce_table_put(tbl); 203 return -ENOTTY; 204 } 205 /* 206 * The table is already known to this KVM, we just increased 207 * its KVM reference counter and can return. 208 */ 209 return 0; 210 } 211 212 stit = kzalloc(sizeof(*stit), GFP_KERNEL); 213 if (!stit) { 214 iommu_tce_table_put(tbl); 215 return -ENOMEM; 216 } 217 218 stit->tbl = tbl; 219 kref_init(&stit->kref); 220 221 list_add_rcu(&stit->next, &stt->iommu_tables); 222 223 return 0; 224 } 225 226 static void release_spapr_tce_table(struct rcu_head *head) 227 { 228 struct kvmppc_spapr_tce_table *stt = container_of(head, 229 struct kvmppc_spapr_tce_table, rcu); 230 unsigned long i, npages = kvmppc_tce_pages(stt->size); 231 232 for (i = 0; i < npages; i++) 233 __free_page(stt->pages[i]); 234 235 kfree(stt); 236 } 237 238 static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) 239 { 240 struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; 241 struct page *page; 242 243 if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) 244 return VM_FAULT_SIGBUS; 245 246 page = stt->pages[vmf->pgoff]; 247 get_page(page); 248 vmf->page = page; 249 return 0; 250 } 251 252 static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { 253 .fault = kvm_spapr_tce_fault, 254 }; 255 256 static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) 257 { 258 vma->vm_ops = &kvm_spapr_tce_vm_ops; 259 return 0; 260 } 261 262 static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) 263 { 264 struct kvmppc_spapr_tce_table *stt = filp->private_data; 265 struct kvmppc_spapr_tce_iommu_table *stit, *tmp; 266 struct kvm *kvm = stt->kvm; 267 268 mutex_lock(&kvm->lock); 269 list_del_rcu(&stt->list); 270 mutex_unlock(&kvm->lock); 271 272 list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { 273 WARN_ON(!kref_read(&stit->kref)); 274 while (1) { 275 if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put)) 276 break; 277 } 278 } 279 280 kvm_put_kvm(stt->kvm); 281 282 kvmppc_account_memlimit( 283 kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); 284 call_rcu(&stt->rcu, release_spapr_tce_table); 285 286 return 0; 287 } 288 289 static const struct file_operations kvm_spapr_tce_fops = { 290 .mmap = kvm_spapr_tce_mmap, 291 .release = kvm_spapr_tce_release, 292 }; 293 294 long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 295 struct kvm_create_spapr_tce_64 *args) 296 { 297 struct kvmppc_spapr_tce_table *stt = NULL; 298 struct kvmppc_spapr_tce_table *siter; 299 unsigned long npages, size; 300 int ret = -ENOMEM; 301 int i; 302 303 if (!args->size || args->page_shift < 12 || args->page_shift > 34 || 304 (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) 305 return -EINVAL; 306 307 size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); 308 npages = kvmppc_tce_pages(size); 309 ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); 310 if (ret) 311 return ret; 312 313 ret = -ENOMEM; 314 stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), 315 GFP_KERNEL); 316 if (!stt) 317 goto fail_acct; 318 319 stt->liobn = args->liobn; 320 stt->page_shift = args->page_shift; 321 stt->offset = args->offset; 322 stt->size = size; 323 stt->kvm = kvm; 324 INIT_LIST_HEAD_RCU(&stt->iommu_tables); 325 326 for (i = 0; i < npages; i++) { 327 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); 328 if (!stt->pages[i]) 329 goto fail; 330 } 331 332 mutex_lock(&kvm->lock); 333 334 /* Check this LIOBN hasn't been previously allocated */ 335 ret = 0; 336 list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) { 337 if (siter->liobn == args->liobn) { 338 ret = -EBUSY; 339 break; 340 } 341 } 342 343 if (!ret) 344 ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, 345 stt, O_RDWR | O_CLOEXEC); 346 347 if (ret >= 0) { 348 list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); 349 kvm_get_kvm(kvm); 350 } 351 352 mutex_unlock(&kvm->lock); 353 354 if (ret >= 0) 355 return ret; 356 357 fail: 358 for (i = 0; i < npages; i++) 359 if (stt->pages[i]) 360 __free_page(stt->pages[i]); 361 362 kfree(stt); 363 fail_acct: 364 kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); 365 return ret; 366 } 367 368 static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry) 369 { 370 unsigned long hpa = 0; 371 enum dma_data_direction dir = DMA_NONE; 372 373 iommu_tce_xchg(tbl, entry, &hpa, &dir); 374 } 375 376 static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, 377 struct iommu_table *tbl, unsigned long entry) 378 { 379 struct mm_iommu_table_group_mem_t *mem = NULL; 380 const unsigned long pgsize = 1ULL << tbl->it_page_shift; 381 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); 382 383 if (!pua) 384 /* it_userspace allocation might be delayed */ 385 return H_TOO_HARD; 386 387 mem = mm_iommu_lookup(kvm->mm, *pua, pgsize); 388 if (!mem) 389 return H_TOO_HARD; 390 391 mm_iommu_mapped_dec(mem); 392 393 *pua = 0; 394 395 return H_SUCCESS; 396 } 397 398 static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm, 399 struct iommu_table *tbl, unsigned long entry) 400 { 401 enum dma_data_direction dir = DMA_NONE; 402 unsigned long hpa = 0; 403 long ret; 404 405 if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) 406 return H_HARDWARE; 407 408 if (dir == DMA_NONE) 409 return H_SUCCESS; 410 411 ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); 412 if (ret != H_SUCCESS) 413 iommu_tce_xchg(tbl, entry, &hpa, &dir); 414 415 return ret; 416 } 417 418 static long kvmppc_tce_iommu_unmap(struct kvm *kvm, 419 struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl, 420 unsigned long entry) 421 { 422 unsigned long i, ret = H_SUCCESS; 423 unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); 424 unsigned long io_entry = entry * subpages; 425 426 for (i = 0; i < subpages; ++i) { 427 ret = kvmppc_tce_iommu_do_unmap(kvm, tbl, io_entry + i); 428 if (ret != H_SUCCESS) 429 break; 430 } 431 432 return ret; 433 } 434 435 long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, 436 unsigned long entry, unsigned long ua, 437 enum dma_data_direction dir) 438 { 439 long ret; 440 unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); 441 struct mm_iommu_table_group_mem_t *mem; 442 443 if (!pua) 444 /* it_userspace allocation might be delayed */ 445 return H_TOO_HARD; 446 447 mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift); 448 if (!mem) 449 /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ 450 return H_TOO_HARD; 451 452 if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) 453 return H_HARDWARE; 454 455 if (mm_iommu_mapped_inc(mem)) 456 return H_CLOSED; 457 458 ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); 459 if (WARN_ON_ONCE(ret)) { 460 mm_iommu_mapped_dec(mem); 461 return H_HARDWARE; 462 } 463 464 if (dir != DMA_NONE) 465 kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); 466 467 *pua = ua; 468 469 return 0; 470 } 471 472 static long kvmppc_tce_iommu_map(struct kvm *kvm, 473 struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl, 474 unsigned long entry, unsigned long ua, 475 enum dma_data_direction dir) 476 { 477 unsigned long i, pgoff, ret = H_SUCCESS; 478 unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); 479 unsigned long io_entry = entry * subpages; 480 481 for (i = 0, pgoff = 0; i < subpages; 482 ++i, pgoff += IOMMU_PAGE_SIZE(tbl)) { 483 484 ret = kvmppc_tce_iommu_do_map(kvm, tbl, 485 io_entry + i, ua + pgoff, dir); 486 if (ret != H_SUCCESS) 487 break; 488 } 489 490 return ret; 491 } 492 493 long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 494 unsigned long ioba, unsigned long tce) 495 { 496 struct kvmppc_spapr_tce_table *stt; 497 long ret, idx; 498 struct kvmppc_spapr_tce_iommu_table *stit; 499 unsigned long entry, ua = 0; 500 enum dma_data_direction dir; 501 502 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ 503 /* liobn, ioba, tce); */ 504 505 stt = kvmppc_find_table(vcpu->kvm, liobn); 506 if (!stt) 507 return H_TOO_HARD; 508 509 ret = kvmppc_ioba_validate(stt, ioba, 1); 510 if (ret != H_SUCCESS) 511 return ret; 512 513 ret = kvmppc_tce_validate(stt, tce); 514 if (ret != H_SUCCESS) 515 return ret; 516 517 dir = iommu_tce_direction(tce); 518 519 idx = srcu_read_lock(&vcpu->kvm->srcu); 520 521 if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, 522 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) { 523 ret = H_PARAMETER; 524 goto unlock_exit; 525 } 526 527 entry = ioba >> stt->page_shift; 528 529 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 530 if (dir == DMA_NONE) 531 ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt, 532 stit->tbl, entry); 533 else 534 ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, 535 entry, ua, dir); 536 537 if (ret == H_SUCCESS) 538 continue; 539 540 if (ret == H_TOO_HARD) 541 goto unlock_exit; 542 543 WARN_ON_ONCE(1); 544 kvmppc_clear_tce(stit->tbl, entry); 545 } 546 547 kvmppc_tce_put(stt, entry, tce); 548 549 unlock_exit: 550 srcu_read_unlock(&vcpu->kvm->srcu, idx); 551 552 return ret; 553 } 554 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); 555 556 long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, 557 unsigned long liobn, unsigned long ioba, 558 unsigned long tce_list, unsigned long npages) 559 { 560 struct kvmppc_spapr_tce_table *stt; 561 long i, ret = H_SUCCESS, idx; 562 unsigned long entry, ua = 0; 563 u64 __user *tces; 564 u64 tce; 565 struct kvmppc_spapr_tce_iommu_table *stit; 566 567 stt = kvmppc_find_table(vcpu->kvm, liobn); 568 if (!stt) 569 return H_TOO_HARD; 570 571 entry = ioba >> stt->page_shift; 572 /* 573 * SPAPR spec says that the maximum size of the list is 512 TCEs 574 * so the whole table fits in 4K page 575 */ 576 if (npages > 512) 577 return H_PARAMETER; 578 579 if (tce_list & (SZ_4K - 1)) 580 return H_PARAMETER; 581 582 ret = kvmppc_ioba_validate(stt, ioba, npages); 583 if (ret != H_SUCCESS) 584 return ret; 585 586 idx = srcu_read_lock(&vcpu->kvm->srcu); 587 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { 588 ret = H_TOO_HARD; 589 goto unlock_exit; 590 } 591 tces = (u64 __user *) ua; 592 593 for (i = 0; i < npages; ++i) { 594 if (get_user(tce, tces + i)) { 595 ret = H_TOO_HARD; 596 goto unlock_exit; 597 } 598 tce = be64_to_cpu(tce); 599 600 ret = kvmppc_tce_validate(stt, tce); 601 if (ret != H_SUCCESS) 602 goto unlock_exit; 603 604 if (kvmppc_gpa_to_ua(vcpu->kvm, 605 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), 606 &ua, NULL)) 607 return H_PARAMETER; 608 609 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 610 ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, 611 stit->tbl, entry + i, ua, 612 iommu_tce_direction(tce)); 613 614 if (ret == H_SUCCESS) 615 continue; 616 617 if (ret == H_TOO_HARD) 618 goto unlock_exit; 619 620 WARN_ON_ONCE(1); 621 kvmppc_clear_tce(stit->tbl, entry); 622 } 623 624 kvmppc_tce_put(stt, entry + i, tce); 625 } 626 627 unlock_exit: 628 srcu_read_unlock(&vcpu->kvm->srcu, idx); 629 630 return ret; 631 } 632 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect); 633 634 long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, 635 unsigned long liobn, unsigned long ioba, 636 unsigned long tce_value, unsigned long npages) 637 { 638 struct kvmppc_spapr_tce_table *stt; 639 long i, ret; 640 struct kvmppc_spapr_tce_iommu_table *stit; 641 642 stt = kvmppc_find_table(vcpu->kvm, liobn); 643 if (!stt) 644 return H_TOO_HARD; 645 646 ret = kvmppc_ioba_validate(stt, ioba, npages); 647 if (ret != H_SUCCESS) 648 return ret; 649 650 /* Check permission bits only to allow userspace poison TCE for debug */ 651 if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) 652 return H_PARAMETER; 653 654 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 655 unsigned long entry = ioba >> stt->page_shift; 656 657 for (i = 0; i < npages; ++i) { 658 ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt, 659 stit->tbl, entry + i); 660 661 if (ret == H_SUCCESS) 662 continue; 663 664 if (ret == H_TOO_HARD) 665 return ret; 666 667 WARN_ON_ONCE(1); 668 kvmppc_clear_tce(stit->tbl, entry); 669 } 670 } 671 672 for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) 673 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); 674 675 return H_SUCCESS; 676 } 677 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); 678