1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License, version 2, as 4 * published by the Free Software Foundation. 5 * 6 * This program is distributed in the hope that it will be useful, 7 * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 * GNU General Public License for more details. 10 * 11 * You should have received a copy of the GNU General Public License 12 * along with this program; if not, write to the Free Software 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 * 15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> 17 * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> 18 */ 19 20 #include <linux/types.h> 21 #include <linux/string.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/highmem.h> 25 #include <linux/gfp.h> 26 #include <linux/slab.h> 27 #include <linux/sched/signal.h> 28 #include <linux/hugetlb.h> 29 #include <linux/list.h> 30 #include <linux/anon_inodes.h> 31 #include <linux/iommu.h> 32 #include <linux/file.h> 33 34 #include <asm/kvm_ppc.h> 35 #include <asm/kvm_book3s.h> 36 #include <asm/book3s/64/mmu-hash.h> 37 #include <asm/hvcall.h> 38 #include <asm/synch.h> 39 #include <asm/ppc-opcode.h> 40 #include <asm/kvm_host.h> 41 #include <asm/udbg.h> 42 #include <asm/iommu.h> 43 #include <asm/tce.h> 44 #include <asm/mmu_context.h> 45 46 static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) 47 { 48 return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; 49 } 50 51 static unsigned long kvmppc_stt_pages(unsigned long tce_pages) 52 { 53 unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) + 54 (tce_pages * sizeof(struct page *)); 55 56 return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; 57 } 58 59 static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) 60 { 61 long ret = 0; 62 63 if (!current || !current->mm) 64 return ret; /* process exited */ 65 66 down_write(¤t->mm->mmap_sem); 67 68 if (inc) { 69 unsigned long locked, lock_limit; 70 71 locked = current->mm->locked_vm + stt_pages; 72 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 73 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 74 ret = -ENOMEM; 75 else 76 current->mm->locked_vm += stt_pages; 77 } else { 78 if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) 79 stt_pages = current->mm->locked_vm; 80 81 current->mm->locked_vm -= stt_pages; 82 } 83 84 pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, 85 inc ? '+' : '-', 86 stt_pages << PAGE_SHIFT, 87 current->mm->locked_vm << PAGE_SHIFT, 88 rlimit(RLIMIT_MEMLOCK), 89 ret ? " - exceeded" : ""); 90 91 up_write(¤t->mm->mmap_sem); 92 93 return ret; 94 } 95 96 static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head) 97 { 98 struct kvmppc_spapr_tce_iommu_table *stit = container_of(head, 99 struct kvmppc_spapr_tce_iommu_table, rcu); 100 101 iommu_tce_table_put(stit->tbl); 102 103 kfree(stit); 104 } 105 106 static void kvm_spapr_tce_liobn_put(struct kref *kref) 107 { 108 struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref, 109 struct kvmppc_spapr_tce_iommu_table, kref); 110 111 list_del_rcu(&stit->next); 112 113 call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free); 114 } 115 116 extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, 117 struct iommu_group *grp) 118 { 119 int i; 120 struct kvmppc_spapr_tce_table *stt; 121 struct kvmppc_spapr_tce_iommu_table *stit, *tmp; 122 struct iommu_table_group *table_group = NULL; 123 124 list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { 125 126 table_group = iommu_group_get_iommudata(grp); 127 if (WARN_ON(!table_group)) 128 continue; 129 130 list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { 131 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 132 if (table_group->tables[i] != stit->tbl) 133 continue; 134 135 kref_put(&stit->kref, kvm_spapr_tce_liobn_put); 136 return; 137 } 138 } 139 } 140 } 141 142 extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, 143 struct iommu_group *grp) 144 { 145 struct kvmppc_spapr_tce_table *stt = NULL; 146 bool found = false; 147 struct iommu_table *tbl = NULL; 148 struct iommu_table_group *table_group; 149 long i; 150 struct kvmppc_spapr_tce_iommu_table *stit; 151 struct fd f; 152 153 f = fdget(tablefd); 154 if (!f.file) 155 return -EBADF; 156 157 list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { 158 if (stt == f.file->private_data) { 159 found = true; 160 break; 161 } 162 } 163 164 fdput(f); 165 166 if (!found) 167 return -EINVAL; 168 169 table_group = iommu_group_get_iommudata(grp); 170 if (WARN_ON(!table_group)) 171 return -EFAULT; 172 173 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 174 struct iommu_table *tbltmp = table_group->tables[i]; 175 176 if (!tbltmp) 177 continue; 178 /* Make sure hardware table parameters are compatible */ 179 if ((tbltmp->it_page_shift <= stt->page_shift) && 180 (tbltmp->it_offset << tbltmp->it_page_shift == 181 stt->offset << stt->page_shift) && 182 (tbltmp->it_size << tbltmp->it_page_shift >= 183 stt->size << stt->page_shift)) { 184 /* 185 * Reference the table to avoid races with 186 * add/remove DMA windows. 187 */ 188 tbl = iommu_tce_table_get(tbltmp); 189 break; 190 } 191 } 192 if (!tbl) 193 return -EINVAL; 194 195 list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { 196 if (tbl != stit->tbl) 197 continue; 198 199 if (!kref_get_unless_zero(&stit->kref)) { 200 /* stit is being destroyed */ 201 iommu_tce_table_put(tbl); 202 return -ENOTTY; 203 } 204 /* 205 * The table is already known to this KVM, we just increased 206 * its KVM reference counter and can return. 207 */ 208 return 0; 209 } 210 211 stit = kzalloc(sizeof(*stit), GFP_KERNEL); 212 if (!stit) { 213 iommu_tce_table_put(tbl); 214 return -ENOMEM; 215 } 216 217 stit->tbl = tbl; 218 kref_init(&stit->kref); 219 220 list_add_rcu(&stit->next, &stt->iommu_tables); 221 222 return 0; 223 } 224 225 static void release_spapr_tce_table(struct rcu_head *head) 226 { 227 struct kvmppc_spapr_tce_table *stt = container_of(head, 228 struct kvmppc_spapr_tce_table, rcu); 229 unsigned long i, npages = kvmppc_tce_pages(stt->size); 230 231 for (i = 0; i < npages; i++) 232 __free_page(stt->pages[i]); 233 234 kfree(stt); 235 } 236 237 static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) 238 { 239 struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; 240 struct page *page; 241 242 if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) 243 return VM_FAULT_SIGBUS; 244 245 page = stt->pages[vmf->pgoff]; 246 get_page(page); 247 vmf->page = page; 248 return 0; 249 } 250 251 static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { 252 .fault = kvm_spapr_tce_fault, 253 }; 254 255 static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) 256 { 257 vma->vm_ops = &kvm_spapr_tce_vm_ops; 258 return 0; 259 } 260 261 static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) 262 { 263 struct kvmppc_spapr_tce_table *stt = filp->private_data; 264 struct kvmppc_spapr_tce_iommu_table *stit, *tmp; 265 struct kvm *kvm = stt->kvm; 266 267 mutex_lock(&kvm->lock); 268 list_del_rcu(&stt->list); 269 mutex_unlock(&kvm->lock); 270 271 list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { 272 WARN_ON(!kref_read(&stit->kref)); 273 while (1) { 274 if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put)) 275 break; 276 } 277 } 278 279 kvm_put_kvm(stt->kvm); 280 281 kvmppc_account_memlimit( 282 kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); 283 call_rcu(&stt->rcu, release_spapr_tce_table); 284 285 return 0; 286 } 287 288 static const struct file_operations kvm_spapr_tce_fops = { 289 .mmap = kvm_spapr_tce_mmap, 290 .release = kvm_spapr_tce_release, 291 }; 292 293 long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 294 struct kvm_create_spapr_tce_64 *args) 295 { 296 struct kvmppc_spapr_tce_table *stt = NULL; 297 struct kvmppc_spapr_tce_table *siter; 298 unsigned long npages, size = args->size; 299 int ret = -ENOMEM; 300 int i; 301 302 if (!args->size || args->page_shift < 12 || args->page_shift > 34 || 303 (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) 304 return -EINVAL; 305 306 npages = kvmppc_tce_pages(size); 307 ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); 308 if (ret) 309 return ret; 310 311 ret = -ENOMEM; 312 stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), 313 GFP_KERNEL); 314 if (!stt) 315 goto fail_acct; 316 317 stt->liobn = args->liobn; 318 stt->page_shift = args->page_shift; 319 stt->offset = args->offset; 320 stt->size = size; 321 stt->kvm = kvm; 322 INIT_LIST_HEAD_RCU(&stt->iommu_tables); 323 324 for (i = 0; i < npages; i++) { 325 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); 326 if (!stt->pages[i]) 327 goto fail; 328 } 329 330 mutex_lock(&kvm->lock); 331 332 /* Check this LIOBN hasn't been previously allocated */ 333 ret = 0; 334 list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) { 335 if (siter->liobn == args->liobn) { 336 ret = -EBUSY; 337 break; 338 } 339 } 340 341 if (!ret) 342 ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, 343 stt, O_RDWR | O_CLOEXEC); 344 345 if (ret >= 0) { 346 list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); 347 kvm_get_kvm(kvm); 348 } 349 350 mutex_unlock(&kvm->lock); 351 352 if (ret >= 0) 353 return ret; 354 355 fail: 356 for (i = 0; i < npages; i++) 357 if (stt->pages[i]) 358 __free_page(stt->pages[i]); 359 360 kfree(stt); 361 fail_acct: 362 kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); 363 return ret; 364 } 365 366 static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry) 367 { 368 unsigned long hpa = 0; 369 enum dma_data_direction dir = DMA_NONE; 370 371 iommu_tce_xchg(tbl, entry, &hpa, &dir); 372 } 373 374 static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, 375 struct iommu_table *tbl, unsigned long entry) 376 { 377 struct mm_iommu_table_group_mem_t *mem = NULL; 378 const unsigned long pgsize = 1ULL << tbl->it_page_shift; 379 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); 380 381 if (!pua) 382 /* it_userspace allocation might be delayed */ 383 return H_TOO_HARD; 384 385 mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize); 386 if (!mem) 387 return H_TOO_HARD; 388 389 mm_iommu_mapped_dec(mem); 390 391 *pua = cpu_to_be64(0); 392 393 return H_SUCCESS; 394 } 395 396 static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm, 397 struct iommu_table *tbl, unsigned long entry) 398 { 399 enum dma_data_direction dir = DMA_NONE; 400 unsigned long hpa = 0; 401 long ret; 402 403 if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) 404 return H_HARDWARE; 405 406 if (dir == DMA_NONE) 407 return H_SUCCESS; 408 409 ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); 410 if (ret != H_SUCCESS) 411 iommu_tce_xchg(tbl, entry, &hpa, &dir); 412 413 return ret; 414 } 415 416 static long kvmppc_tce_iommu_unmap(struct kvm *kvm, 417 struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl, 418 unsigned long entry) 419 { 420 unsigned long i, ret = H_SUCCESS; 421 unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); 422 unsigned long io_entry = entry * subpages; 423 424 for (i = 0; i < subpages; ++i) { 425 ret = kvmppc_tce_iommu_do_unmap(kvm, tbl, io_entry + i); 426 if (ret != H_SUCCESS) 427 break; 428 } 429 430 return ret; 431 } 432 433 long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, 434 unsigned long entry, unsigned long ua, 435 enum dma_data_direction dir) 436 { 437 long ret; 438 unsigned long hpa; 439 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); 440 struct mm_iommu_table_group_mem_t *mem; 441 442 if (!pua) 443 /* it_userspace allocation might be delayed */ 444 return H_TOO_HARD; 445 446 mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift); 447 if (!mem) 448 /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ 449 return H_TOO_HARD; 450 451 if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa))) 452 return H_HARDWARE; 453 454 if (mm_iommu_mapped_inc(mem)) 455 return H_CLOSED; 456 457 ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); 458 if (WARN_ON_ONCE(ret)) { 459 mm_iommu_mapped_dec(mem); 460 return H_HARDWARE; 461 } 462 463 if (dir != DMA_NONE) 464 kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); 465 466 *pua = cpu_to_be64(ua); 467 468 return 0; 469 } 470 471 static long kvmppc_tce_iommu_map(struct kvm *kvm, 472 struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl, 473 unsigned long entry, unsigned long ua, 474 enum dma_data_direction dir) 475 { 476 unsigned long i, pgoff, ret = H_SUCCESS; 477 unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); 478 unsigned long io_entry = entry * subpages; 479 480 for (i = 0, pgoff = 0; i < subpages; 481 ++i, pgoff += IOMMU_PAGE_SIZE(tbl)) { 482 483 ret = kvmppc_tce_iommu_do_map(kvm, tbl, 484 io_entry + i, ua + pgoff, dir); 485 if (ret != H_SUCCESS) 486 break; 487 } 488 489 return ret; 490 } 491 492 long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 493 unsigned long ioba, unsigned long tce) 494 { 495 struct kvmppc_spapr_tce_table *stt; 496 long ret, idx; 497 struct kvmppc_spapr_tce_iommu_table *stit; 498 unsigned long entry, ua = 0; 499 enum dma_data_direction dir; 500 501 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ 502 /* liobn, ioba, tce); */ 503 504 stt = kvmppc_find_table(vcpu->kvm, liobn); 505 if (!stt) 506 return H_TOO_HARD; 507 508 ret = kvmppc_ioba_validate(stt, ioba, 1); 509 if (ret != H_SUCCESS) 510 return ret; 511 512 ret = kvmppc_tce_validate(stt, tce); 513 if (ret != H_SUCCESS) 514 return ret; 515 516 dir = iommu_tce_direction(tce); 517 518 idx = srcu_read_lock(&vcpu->kvm->srcu); 519 520 if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, 521 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) { 522 ret = H_PARAMETER; 523 goto unlock_exit; 524 } 525 526 entry = ioba >> stt->page_shift; 527 528 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 529 if (dir == DMA_NONE) 530 ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt, 531 stit->tbl, entry); 532 else 533 ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, 534 entry, ua, dir); 535 536 if (ret == H_SUCCESS) 537 continue; 538 539 if (ret == H_TOO_HARD) 540 goto unlock_exit; 541 542 WARN_ON_ONCE(1); 543 kvmppc_clear_tce(stit->tbl, entry); 544 } 545 546 kvmppc_tce_put(stt, entry, tce); 547 548 unlock_exit: 549 srcu_read_unlock(&vcpu->kvm->srcu, idx); 550 551 return ret; 552 } 553 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); 554 555 long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, 556 unsigned long liobn, unsigned long ioba, 557 unsigned long tce_list, unsigned long npages) 558 { 559 struct kvmppc_spapr_tce_table *stt; 560 long i, ret = H_SUCCESS, idx; 561 unsigned long entry, ua = 0; 562 u64 __user *tces; 563 u64 tce; 564 struct kvmppc_spapr_tce_iommu_table *stit; 565 566 stt = kvmppc_find_table(vcpu->kvm, liobn); 567 if (!stt) 568 return H_TOO_HARD; 569 570 entry = ioba >> stt->page_shift; 571 /* 572 * SPAPR spec says that the maximum size of the list is 512 TCEs 573 * so the whole table fits in 4K page 574 */ 575 if (npages > 512) 576 return H_PARAMETER; 577 578 if (tce_list & (SZ_4K - 1)) 579 return H_PARAMETER; 580 581 ret = kvmppc_ioba_validate(stt, ioba, npages); 582 if (ret != H_SUCCESS) 583 return ret; 584 585 idx = srcu_read_lock(&vcpu->kvm->srcu); 586 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { 587 ret = H_TOO_HARD; 588 goto unlock_exit; 589 } 590 tces = (u64 __user *) ua; 591 592 for (i = 0; i < npages; ++i) { 593 if (get_user(tce, tces + i)) { 594 ret = H_TOO_HARD; 595 goto unlock_exit; 596 } 597 tce = be64_to_cpu(tce); 598 599 ret = kvmppc_tce_validate(stt, tce); 600 if (ret != H_SUCCESS) 601 goto unlock_exit; 602 603 if (kvmppc_gpa_to_ua(vcpu->kvm, 604 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), 605 &ua, NULL)) 606 return H_PARAMETER; 607 608 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 609 ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, 610 stit->tbl, entry + i, ua, 611 iommu_tce_direction(tce)); 612 613 if (ret == H_SUCCESS) 614 continue; 615 616 if (ret == H_TOO_HARD) 617 goto unlock_exit; 618 619 WARN_ON_ONCE(1); 620 kvmppc_clear_tce(stit->tbl, entry); 621 } 622 623 kvmppc_tce_put(stt, entry + i, tce); 624 } 625 626 unlock_exit: 627 srcu_read_unlock(&vcpu->kvm->srcu, idx); 628 629 return ret; 630 } 631 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect); 632 633 long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, 634 unsigned long liobn, unsigned long ioba, 635 unsigned long tce_value, unsigned long npages) 636 { 637 struct kvmppc_spapr_tce_table *stt; 638 long i, ret; 639 struct kvmppc_spapr_tce_iommu_table *stit; 640 641 stt = kvmppc_find_table(vcpu->kvm, liobn); 642 if (!stt) 643 return H_TOO_HARD; 644 645 ret = kvmppc_ioba_validate(stt, ioba, npages); 646 if (ret != H_SUCCESS) 647 return ret; 648 649 /* Check permission bits only to allow userspace poison TCE for debug */ 650 if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) 651 return H_PARAMETER; 652 653 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 654 unsigned long entry = ioba >> stt->page_shift; 655 656 for (i = 0; i < npages; ++i) { 657 ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt, 658 stit->tbl, entry + i); 659 660 if (ret == H_SUCCESS) 661 continue; 662 663 if (ret == H_TOO_HARD) 664 return ret; 665 666 WARN_ON_ONCE(1); 667 kvmppc_clear_tce(stit->tbl, entry); 668 } 669 } 670 671 for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) 672 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); 673 674 return H_SUCCESS; 675 } 676 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); 677