1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License, version 2, as 4 * published by the Free Software Foundation. 5 * 6 * This program is distributed in the hope that it will be useful, 7 * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 * GNU General Public License for more details. 10 * 11 * You should have received a copy of the GNU General Public License 12 * along with this program; if not, write to the Free Software 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 * 15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> 17 * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> 18 */ 19 20 #include <linux/types.h> 21 #include <linux/string.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/highmem.h> 25 #include <linux/gfp.h> 26 #include <linux/slab.h> 27 #include <linux/sched/signal.h> 28 #include <linux/hugetlb.h> 29 #include <linux/list.h> 30 #include <linux/anon_inodes.h> 31 #include <linux/iommu.h> 32 #include <linux/file.h> 33 34 #include <asm/tlbflush.h> 35 #include <asm/kvm_ppc.h> 36 #include <asm/kvm_book3s.h> 37 #include <asm/book3s/64/mmu-hash.h> 38 #include <asm/hvcall.h> 39 #include <asm/synch.h> 40 #include <asm/ppc-opcode.h> 41 #include <asm/kvm_host.h> 42 #include <asm/udbg.h> 43 #include <asm/iommu.h> 44 #include <asm/tce.h> 45 #include <asm/mmu_context.h> 46 47 static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) 48 { 49 return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; 50 } 51 52 static unsigned long kvmppc_stt_pages(unsigned long tce_pages) 53 { 54 unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) + 55 (tce_pages * sizeof(struct page *)); 56 57 return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; 58 } 59 60 static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) 61 { 62 long ret = 0; 63 64 if (!current || !current->mm) 65 return ret; /* process exited */ 66 67 down_write(¤t->mm->mmap_sem); 68 69 if (inc) { 70 unsigned long locked, lock_limit; 71 72 locked = current->mm->locked_vm + stt_pages; 73 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 74 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 75 ret = -ENOMEM; 76 else 77 current->mm->locked_vm += stt_pages; 78 } else { 79 if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) 80 stt_pages = current->mm->locked_vm; 81 82 current->mm->locked_vm -= stt_pages; 83 } 84 85 pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, 86 inc ? '+' : '-', 87 stt_pages << PAGE_SHIFT, 88 current->mm->locked_vm << PAGE_SHIFT, 89 rlimit(RLIMIT_MEMLOCK), 90 ret ? " - exceeded" : ""); 91 92 up_write(¤t->mm->mmap_sem); 93 94 return ret; 95 } 96 97 static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head) 98 { 99 struct kvmppc_spapr_tce_iommu_table *stit = container_of(head, 100 struct kvmppc_spapr_tce_iommu_table, rcu); 101 102 iommu_tce_table_put(stit->tbl); 103 104 kfree(stit); 105 } 106 107 static void kvm_spapr_tce_liobn_put(struct kref *kref) 108 { 109 struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref, 110 struct kvmppc_spapr_tce_iommu_table, kref); 111 112 list_del_rcu(&stit->next); 113 114 call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free); 115 } 116 117 extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, 118 struct iommu_group *grp) 119 { 120 int i; 121 struct kvmppc_spapr_tce_table *stt; 122 struct kvmppc_spapr_tce_iommu_table *stit, *tmp; 123 struct iommu_table_group *table_group = NULL; 124 125 list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { 126 127 table_group = iommu_group_get_iommudata(grp); 128 if (WARN_ON(!table_group)) 129 continue; 130 131 list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { 132 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 133 if (table_group->tables[i] != stit->tbl) 134 continue; 135 136 kref_put(&stit->kref, kvm_spapr_tce_liobn_put); 137 return; 138 } 139 } 140 } 141 } 142 143 extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, 144 struct iommu_group *grp) 145 { 146 struct kvmppc_spapr_tce_table *stt = NULL; 147 bool found = false; 148 struct iommu_table *tbl = NULL; 149 struct iommu_table_group *table_group; 150 long i; 151 struct kvmppc_spapr_tce_iommu_table *stit; 152 struct fd f; 153 154 f = fdget(tablefd); 155 if (!f.file) 156 return -EBADF; 157 158 list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { 159 if (stt == f.file->private_data) { 160 found = true; 161 break; 162 } 163 } 164 165 fdput(f); 166 167 if (!found) 168 return -EINVAL; 169 170 table_group = iommu_group_get_iommudata(grp); 171 if (WARN_ON(!table_group)) 172 return -EFAULT; 173 174 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 175 struct iommu_table *tbltmp = table_group->tables[i]; 176 177 if (!tbltmp) 178 continue; 179 /* 180 * Make sure hardware table parameters are exactly the same; 181 * this is used in the TCE handlers where boundary checks 182 * use only the first attached table. 183 */ 184 if ((tbltmp->it_page_shift == stt->page_shift) && 185 (tbltmp->it_offset == stt->offset) && 186 (tbltmp->it_size == stt->size)) { 187 /* 188 * Reference the table to avoid races with 189 * add/remove DMA windows. 190 */ 191 tbl = iommu_tce_table_get(tbltmp); 192 break; 193 } 194 } 195 if (!tbl) 196 return -EINVAL; 197 198 list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { 199 if (tbl != stit->tbl) 200 continue; 201 202 if (!kref_get_unless_zero(&stit->kref)) { 203 /* stit is being destroyed */ 204 iommu_tce_table_put(tbl); 205 return -ENOTTY; 206 } 207 /* 208 * The table is already known to this KVM, we just increased 209 * its KVM reference counter and can return. 210 */ 211 return 0; 212 } 213 214 stit = kzalloc(sizeof(*stit), GFP_KERNEL); 215 if (!stit) { 216 iommu_tce_table_put(tbl); 217 return -ENOMEM; 218 } 219 220 stit->tbl = tbl; 221 kref_init(&stit->kref); 222 223 list_add_rcu(&stit->next, &stt->iommu_tables); 224 225 return 0; 226 } 227 228 static void release_spapr_tce_table(struct rcu_head *head) 229 { 230 struct kvmppc_spapr_tce_table *stt = container_of(head, 231 struct kvmppc_spapr_tce_table, rcu); 232 unsigned long i, npages = kvmppc_tce_pages(stt->size); 233 234 for (i = 0; i < npages; i++) 235 __free_page(stt->pages[i]); 236 237 kfree(stt); 238 } 239 240 static int kvm_spapr_tce_fault(struct vm_fault *vmf) 241 { 242 struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; 243 struct page *page; 244 245 if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) 246 return VM_FAULT_SIGBUS; 247 248 page = stt->pages[vmf->pgoff]; 249 get_page(page); 250 vmf->page = page; 251 return 0; 252 } 253 254 static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { 255 .fault = kvm_spapr_tce_fault, 256 }; 257 258 static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) 259 { 260 vma->vm_ops = &kvm_spapr_tce_vm_ops; 261 return 0; 262 } 263 264 static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) 265 { 266 struct kvmppc_spapr_tce_table *stt = filp->private_data; 267 struct kvmppc_spapr_tce_iommu_table *stit, *tmp; 268 269 list_del_rcu(&stt->list); 270 271 list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { 272 WARN_ON(!kref_read(&stit->kref)); 273 while (1) { 274 if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put)) 275 break; 276 } 277 } 278 279 kvm_put_kvm(stt->kvm); 280 281 kvmppc_account_memlimit( 282 kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); 283 call_rcu(&stt->rcu, release_spapr_tce_table); 284 285 return 0; 286 } 287 288 static const struct file_operations kvm_spapr_tce_fops = { 289 .mmap = kvm_spapr_tce_mmap, 290 .release = kvm_spapr_tce_release, 291 }; 292 293 long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 294 struct kvm_create_spapr_tce_64 *args) 295 { 296 struct kvmppc_spapr_tce_table *stt = NULL; 297 struct kvmppc_spapr_tce_table *siter; 298 unsigned long npages, size; 299 int ret = -ENOMEM; 300 int i; 301 int fd = -1; 302 303 if (!args->size) 304 return -EINVAL; 305 306 size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); 307 npages = kvmppc_tce_pages(size); 308 ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); 309 if (ret) 310 return ret; 311 312 ret = -ENOMEM; 313 stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), 314 GFP_KERNEL); 315 if (!stt) 316 goto fail_acct; 317 318 stt->liobn = args->liobn; 319 stt->page_shift = args->page_shift; 320 stt->offset = args->offset; 321 stt->size = size; 322 stt->kvm = kvm; 323 INIT_LIST_HEAD_RCU(&stt->iommu_tables); 324 325 for (i = 0; i < npages; i++) { 326 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); 327 if (!stt->pages[i]) 328 goto fail; 329 } 330 331 ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, 332 stt, O_RDWR | O_CLOEXEC); 333 if (ret < 0) 334 goto fail; 335 336 mutex_lock(&kvm->lock); 337 338 /* Check this LIOBN hasn't been previously allocated */ 339 ret = 0; 340 list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) { 341 if (siter->liobn == args->liobn) { 342 ret = -EBUSY; 343 break; 344 } 345 } 346 347 if (!ret) { 348 list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); 349 kvm_get_kvm(kvm); 350 } 351 352 mutex_unlock(&kvm->lock); 353 354 if (!ret) 355 return fd; 356 357 put_unused_fd(fd); 358 359 fail: 360 for (i = 0; i < npages; i++) 361 if (stt->pages[i]) 362 __free_page(stt->pages[i]); 363 364 kfree(stt); 365 fail_acct: 366 kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); 367 return ret; 368 } 369 370 static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry) 371 { 372 unsigned long hpa = 0; 373 enum dma_data_direction dir = DMA_NONE; 374 375 iommu_tce_xchg(tbl, entry, &hpa, &dir); 376 } 377 378 static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, 379 struct iommu_table *tbl, unsigned long entry) 380 { 381 struct mm_iommu_table_group_mem_t *mem = NULL; 382 const unsigned long pgsize = 1ULL << tbl->it_page_shift; 383 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); 384 385 if (!pua) 386 /* it_userspace allocation might be delayed */ 387 return H_TOO_HARD; 388 389 mem = mm_iommu_lookup(kvm->mm, *pua, pgsize); 390 if (!mem) 391 return H_TOO_HARD; 392 393 mm_iommu_mapped_dec(mem); 394 395 *pua = 0; 396 397 return H_SUCCESS; 398 } 399 400 static long kvmppc_tce_iommu_unmap(struct kvm *kvm, 401 struct iommu_table *tbl, unsigned long entry) 402 { 403 enum dma_data_direction dir = DMA_NONE; 404 unsigned long hpa = 0; 405 long ret; 406 407 if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) 408 return H_HARDWARE; 409 410 if (dir == DMA_NONE) 411 return H_SUCCESS; 412 413 ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); 414 if (ret != H_SUCCESS) 415 iommu_tce_xchg(tbl, entry, &hpa, &dir); 416 417 return ret; 418 } 419 420 long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, 421 unsigned long entry, unsigned long ua, 422 enum dma_data_direction dir) 423 { 424 long ret; 425 unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); 426 struct mm_iommu_table_group_mem_t *mem; 427 428 if (!pua) 429 /* it_userspace allocation might be delayed */ 430 return H_TOO_HARD; 431 432 mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift); 433 if (!mem) 434 /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ 435 return H_TOO_HARD; 436 437 if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) 438 return H_HARDWARE; 439 440 if (mm_iommu_mapped_inc(mem)) 441 return H_CLOSED; 442 443 ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); 444 if (WARN_ON_ONCE(ret)) { 445 mm_iommu_mapped_dec(mem); 446 return H_HARDWARE; 447 } 448 449 if (dir != DMA_NONE) 450 kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); 451 452 *pua = ua; 453 454 return 0; 455 } 456 457 long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 458 unsigned long ioba, unsigned long tce) 459 { 460 struct kvmppc_spapr_tce_table *stt; 461 long ret, idx; 462 struct kvmppc_spapr_tce_iommu_table *stit; 463 unsigned long entry, ua = 0; 464 enum dma_data_direction dir; 465 466 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ 467 /* liobn, ioba, tce); */ 468 469 stt = kvmppc_find_table(vcpu->kvm, liobn); 470 if (!stt) 471 return H_TOO_HARD; 472 473 ret = kvmppc_ioba_validate(stt, ioba, 1); 474 if (ret != H_SUCCESS) 475 return ret; 476 477 ret = kvmppc_tce_validate(stt, tce); 478 if (ret != H_SUCCESS) 479 return ret; 480 481 dir = iommu_tce_direction(tce); 482 if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, 483 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) 484 return H_PARAMETER; 485 486 entry = ioba >> stt->page_shift; 487 488 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 489 if (dir == DMA_NONE) { 490 ret = kvmppc_tce_iommu_unmap(vcpu->kvm, 491 stit->tbl, entry); 492 } else { 493 idx = srcu_read_lock(&vcpu->kvm->srcu); 494 ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl, 495 entry, ua, dir); 496 srcu_read_unlock(&vcpu->kvm->srcu, idx); 497 } 498 499 if (ret == H_SUCCESS) 500 continue; 501 502 if (ret == H_TOO_HARD) 503 return ret; 504 505 WARN_ON_ONCE(1); 506 kvmppc_clear_tce(stit->tbl, entry); 507 } 508 509 kvmppc_tce_put(stt, entry, tce); 510 511 return H_SUCCESS; 512 } 513 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); 514 515 long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, 516 unsigned long liobn, unsigned long ioba, 517 unsigned long tce_list, unsigned long npages) 518 { 519 struct kvmppc_spapr_tce_table *stt; 520 long i, ret = H_SUCCESS, idx; 521 unsigned long entry, ua = 0; 522 u64 __user *tces; 523 u64 tce; 524 struct kvmppc_spapr_tce_iommu_table *stit; 525 526 stt = kvmppc_find_table(vcpu->kvm, liobn); 527 if (!stt) 528 return H_TOO_HARD; 529 530 entry = ioba >> stt->page_shift; 531 /* 532 * SPAPR spec says that the maximum size of the list is 512 TCEs 533 * so the whole table fits in 4K page 534 */ 535 if (npages > 512) 536 return H_PARAMETER; 537 538 if (tce_list & (SZ_4K - 1)) 539 return H_PARAMETER; 540 541 ret = kvmppc_ioba_validate(stt, ioba, npages); 542 if (ret != H_SUCCESS) 543 return ret; 544 545 idx = srcu_read_lock(&vcpu->kvm->srcu); 546 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { 547 ret = H_TOO_HARD; 548 goto unlock_exit; 549 } 550 tces = (u64 __user *) ua; 551 552 for (i = 0; i < npages; ++i) { 553 if (get_user(tce, tces + i)) { 554 ret = H_TOO_HARD; 555 goto unlock_exit; 556 } 557 tce = be64_to_cpu(tce); 558 559 ret = kvmppc_tce_validate(stt, tce); 560 if (ret != H_SUCCESS) 561 goto unlock_exit; 562 563 if (kvmppc_gpa_to_ua(vcpu->kvm, 564 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), 565 &ua, NULL)) 566 return H_PARAMETER; 567 568 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 569 ret = kvmppc_tce_iommu_map(vcpu->kvm, 570 stit->tbl, entry + i, ua, 571 iommu_tce_direction(tce)); 572 573 if (ret == H_SUCCESS) 574 continue; 575 576 if (ret == H_TOO_HARD) 577 goto unlock_exit; 578 579 WARN_ON_ONCE(1); 580 kvmppc_clear_tce(stit->tbl, entry); 581 } 582 583 kvmppc_tce_put(stt, entry + i, tce); 584 } 585 586 unlock_exit: 587 srcu_read_unlock(&vcpu->kvm->srcu, idx); 588 589 return ret; 590 } 591 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect); 592 593 long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, 594 unsigned long liobn, unsigned long ioba, 595 unsigned long tce_value, unsigned long npages) 596 { 597 struct kvmppc_spapr_tce_table *stt; 598 long i, ret; 599 struct kvmppc_spapr_tce_iommu_table *stit; 600 601 stt = kvmppc_find_table(vcpu->kvm, liobn); 602 if (!stt) 603 return H_TOO_HARD; 604 605 ret = kvmppc_ioba_validate(stt, ioba, npages); 606 if (ret != H_SUCCESS) 607 return ret; 608 609 /* Check permission bits only to allow userspace poison TCE for debug */ 610 if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) 611 return H_PARAMETER; 612 613 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 614 unsigned long entry = ioba >> stit->tbl->it_page_shift; 615 616 for (i = 0; i < npages; ++i) { 617 ret = kvmppc_tce_iommu_unmap(vcpu->kvm, 618 stit->tbl, entry + i); 619 620 if (ret == H_SUCCESS) 621 continue; 622 623 if (ret == H_TOO_HARD) 624 return ret; 625 626 WARN_ON_ONCE(1); 627 kvmppc_clear_tce(stit->tbl, entry); 628 } 629 } 630 631 for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) 632 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); 633 634 return H_SUCCESS; 635 } 636 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); 637