1 /* 2 * KVMGT - the implementation of Intel mediated pass-through framework for KVM 3 * 4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Kevin Tian <kevin.tian@intel.com> 27 * Jike Song <jike.song@intel.com> 28 * Xiaoguang Chen <xiaoguang.chen@intel.com> 29 * Eddie Dong <eddie.dong@intel.com> 30 * 31 * Contributors: 32 * Niu Bing <bing.niu@intel.com> 33 * Zhi Wang <zhi.a.wang@intel.com> 34 */ 35 36 #include <linux/init.h> 37 #include <linux/device.h> 38 #include <linux/mm.h> 39 #include <linux/kthread.h> 40 #include <linux/sched/mm.h> 41 #include <linux/types.h> 42 #include <linux/list.h> 43 #include <linux/rbtree.h> 44 #include <linux/spinlock.h> 45 #include <linux/eventfd.h> 46 #include <linux/uuid.h> 47 #include <linux/mdev.h> 48 #include <linux/debugfs.h> 49 50 #include <linux/nospec.h> 51 52 #include <drm/drm_edid.h> 53 54 #include "i915_drv.h" 55 #include "intel_gvt.h" 56 #include "gvt.h" 57 58 MODULE_IMPORT_NS(DMA_BUF); 59 MODULE_IMPORT_NS(I915_GVT); 60 61 /* helper macros copied from vfio-pci */ 62 #define VFIO_PCI_OFFSET_SHIFT 40 63 #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) 64 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) 65 #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) 66 67 #define EDID_BLOB_OFFSET (PAGE_SIZE/2) 68 69 #define OPREGION_SIGNATURE "IntelGraphicsMem" 70 71 struct vfio_region; 72 struct intel_vgpu_regops { 73 size_t (*rw)(struct intel_vgpu *vgpu, char *buf, 74 size_t count, loff_t *ppos, bool iswrite); 75 void (*release)(struct intel_vgpu *vgpu, 76 struct vfio_region *region); 77 }; 78 79 struct vfio_region { 80 u32 type; 81 u32 subtype; 82 size_t size; 83 u32 flags; 84 const struct intel_vgpu_regops *ops; 85 void *data; 86 }; 87 88 struct vfio_edid_region { 89 struct vfio_region_gfx_edid vfio_edid_regs; 90 void *edid_blob; 91 }; 92 93 struct kvmgt_pgfn { 94 gfn_t gfn; 95 struct hlist_node hnode; 96 }; 97 98 struct gvt_dma { 99 struct intel_vgpu *vgpu; 100 struct rb_node gfn_node; 101 struct rb_node dma_addr_node; 102 gfn_t gfn; 103 dma_addr_t dma_addr; 104 unsigned long size; 105 struct kref ref; 106 }; 107 108 #define vfio_dev_to_vgpu(vfio_dev) \ 109 container_of((vfio_dev), struct intel_vgpu, vfio_device) 110 111 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, 112 const u8 *val, int len, 113 struct kvm_page_track_notifier_node *node); 114 static void kvmgt_page_track_flush_slot(struct kvm *kvm, 115 struct kvm_memory_slot *slot, 116 struct kvm_page_track_notifier_node *node); 117 118 static ssize_t available_instances_show(struct mdev_type *mtype, 119 struct mdev_type_attribute *attr, 120 char *buf) 121 { 122 struct intel_vgpu_type *type; 123 unsigned int num = 0; 124 struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; 125 126 type = &gvt->types[mtype_get_type_group_id(mtype)]; 127 if (!type) 128 num = 0; 129 else 130 num = type->avail_instance; 131 132 return sprintf(buf, "%u\n", num); 133 } 134 135 static ssize_t device_api_show(struct mdev_type *mtype, 136 struct mdev_type_attribute *attr, char *buf) 137 { 138 return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); 139 } 140 141 static ssize_t description_show(struct mdev_type *mtype, 142 struct mdev_type_attribute *attr, char *buf) 143 { 144 struct intel_vgpu_type *type; 145 struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; 146 147 type = &gvt->types[mtype_get_type_group_id(mtype)]; 148 if (!type) 149 return 0; 150 151 return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" 152 "fence: %d\nresolution: %s\n" 153 "weight: %d\n", 154 BYTES_TO_MB(type->low_gm_size), 155 BYTES_TO_MB(type->high_gm_size), 156 type->fence, vgpu_edid_str(type->resolution), 157 type->weight); 158 } 159 160 static ssize_t name_show(struct mdev_type *mtype, 161 struct mdev_type_attribute *attr, char *buf) 162 { 163 struct intel_vgpu_type *type; 164 struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; 165 166 type = &gvt->types[mtype_get_type_group_id(mtype)]; 167 if (!type) 168 return 0; 169 170 return sprintf(buf, "%s\n", type->name); 171 } 172 173 static MDEV_TYPE_ATTR_RO(available_instances); 174 static MDEV_TYPE_ATTR_RO(device_api); 175 static MDEV_TYPE_ATTR_RO(description); 176 static MDEV_TYPE_ATTR_RO(name); 177 178 static struct attribute *gvt_type_attrs[] = { 179 &mdev_type_attr_available_instances.attr, 180 &mdev_type_attr_device_api.attr, 181 &mdev_type_attr_description.attr, 182 &mdev_type_attr_name.attr, 183 NULL, 184 }; 185 186 static struct attribute_group *gvt_vgpu_type_groups[] = { 187 [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, 188 }; 189 190 static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) 191 { 192 int i, j; 193 struct intel_vgpu_type *type; 194 struct attribute_group *group; 195 196 for (i = 0; i < gvt->num_types; i++) { 197 type = &gvt->types[i]; 198 199 group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); 200 if (!group) 201 goto unwind; 202 203 group->name = type->name; 204 group->attrs = gvt_type_attrs; 205 gvt_vgpu_type_groups[i] = group; 206 } 207 208 return 0; 209 210 unwind: 211 for (j = 0; j < i; j++) { 212 group = gvt_vgpu_type_groups[j]; 213 kfree(group); 214 } 215 216 return -ENOMEM; 217 } 218 219 static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) 220 { 221 int i; 222 struct attribute_group *group; 223 224 for (i = 0; i < gvt->num_types; i++) { 225 group = gvt_vgpu_type_groups[i]; 226 gvt_vgpu_type_groups[i] = NULL; 227 kfree(group); 228 } 229 } 230 231 static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, 232 unsigned long size) 233 { 234 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 235 int total_pages; 236 int npage; 237 int ret; 238 239 total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; 240 241 for (npage = 0; npage < total_pages; npage++) { 242 unsigned long cur_gfn = gfn + npage; 243 244 ret = vfio_unpin_pages(&vgpu->vfio_device, &cur_gfn, 1); 245 drm_WARN_ON(&i915->drm, ret != 1); 246 } 247 } 248 249 /* Pin a normal or compound guest page for dma. */ 250 static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, 251 unsigned long size, struct page **page) 252 { 253 unsigned long base_pfn = 0; 254 int total_pages; 255 int npage; 256 int ret; 257 258 total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; 259 /* 260 * We pin the pages one-by-one to avoid allocating a big arrary 261 * on stack to hold pfns. 262 */ 263 for (npage = 0; npage < total_pages; npage++) { 264 unsigned long cur_gfn = gfn + npage; 265 unsigned long pfn; 266 267 ret = vfio_pin_pages(&vgpu->vfio_device, &cur_gfn, 1, 268 IOMMU_READ | IOMMU_WRITE, &pfn); 269 if (ret != 1) { 270 gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", 271 cur_gfn, ret); 272 goto err; 273 } 274 275 if (!pfn_valid(pfn)) { 276 gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); 277 npage++; 278 ret = -EFAULT; 279 goto err; 280 } 281 282 if (npage == 0) 283 base_pfn = pfn; 284 else if (base_pfn + npage != pfn) { 285 gvt_vgpu_err("The pages are not continuous\n"); 286 ret = -EINVAL; 287 npage++; 288 goto err; 289 } 290 } 291 292 *page = pfn_to_page(base_pfn); 293 return 0; 294 err: 295 gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); 296 return ret; 297 } 298 299 static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, 300 dma_addr_t *dma_addr, unsigned long size) 301 { 302 struct device *dev = vgpu->gvt->gt->i915->drm.dev; 303 struct page *page = NULL; 304 int ret; 305 306 ret = gvt_pin_guest_page(vgpu, gfn, size, &page); 307 if (ret) 308 return ret; 309 310 /* Setup DMA mapping. */ 311 *dma_addr = dma_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL); 312 if (dma_mapping_error(dev, *dma_addr)) { 313 gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n", 314 page_to_pfn(page), ret); 315 gvt_unpin_guest_page(vgpu, gfn, size); 316 return -ENOMEM; 317 } 318 319 return 0; 320 } 321 322 static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, 323 dma_addr_t dma_addr, unsigned long size) 324 { 325 struct device *dev = vgpu->gvt->gt->i915->drm.dev; 326 327 dma_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL); 328 gvt_unpin_guest_page(vgpu, gfn, size); 329 } 330 331 static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, 332 dma_addr_t dma_addr) 333 { 334 struct rb_node *node = vgpu->dma_addr_cache.rb_node; 335 struct gvt_dma *itr; 336 337 while (node) { 338 itr = rb_entry(node, struct gvt_dma, dma_addr_node); 339 340 if (dma_addr < itr->dma_addr) 341 node = node->rb_left; 342 else if (dma_addr > itr->dma_addr) 343 node = node->rb_right; 344 else 345 return itr; 346 } 347 return NULL; 348 } 349 350 static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) 351 { 352 struct rb_node *node = vgpu->gfn_cache.rb_node; 353 struct gvt_dma *itr; 354 355 while (node) { 356 itr = rb_entry(node, struct gvt_dma, gfn_node); 357 358 if (gfn < itr->gfn) 359 node = node->rb_left; 360 else if (gfn > itr->gfn) 361 node = node->rb_right; 362 else 363 return itr; 364 } 365 return NULL; 366 } 367 368 static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, 369 dma_addr_t dma_addr, unsigned long size) 370 { 371 struct gvt_dma *new, *itr; 372 struct rb_node **link, *parent = NULL; 373 374 new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); 375 if (!new) 376 return -ENOMEM; 377 378 new->vgpu = vgpu; 379 new->gfn = gfn; 380 new->dma_addr = dma_addr; 381 new->size = size; 382 kref_init(&new->ref); 383 384 /* gfn_cache maps gfn to struct gvt_dma. */ 385 link = &vgpu->gfn_cache.rb_node; 386 while (*link) { 387 parent = *link; 388 itr = rb_entry(parent, struct gvt_dma, gfn_node); 389 390 if (gfn < itr->gfn) 391 link = &parent->rb_left; 392 else 393 link = &parent->rb_right; 394 } 395 rb_link_node(&new->gfn_node, parent, link); 396 rb_insert_color(&new->gfn_node, &vgpu->gfn_cache); 397 398 /* dma_addr_cache maps dma addr to struct gvt_dma. */ 399 parent = NULL; 400 link = &vgpu->dma_addr_cache.rb_node; 401 while (*link) { 402 parent = *link; 403 itr = rb_entry(parent, struct gvt_dma, dma_addr_node); 404 405 if (dma_addr < itr->dma_addr) 406 link = &parent->rb_left; 407 else 408 link = &parent->rb_right; 409 } 410 rb_link_node(&new->dma_addr_node, parent, link); 411 rb_insert_color(&new->dma_addr_node, &vgpu->dma_addr_cache); 412 413 vgpu->nr_cache_entries++; 414 return 0; 415 } 416 417 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, 418 struct gvt_dma *entry) 419 { 420 rb_erase(&entry->gfn_node, &vgpu->gfn_cache); 421 rb_erase(&entry->dma_addr_node, &vgpu->dma_addr_cache); 422 kfree(entry); 423 vgpu->nr_cache_entries--; 424 } 425 426 static void gvt_cache_destroy(struct intel_vgpu *vgpu) 427 { 428 struct gvt_dma *dma; 429 struct rb_node *node = NULL; 430 431 for (;;) { 432 mutex_lock(&vgpu->cache_lock); 433 node = rb_first(&vgpu->gfn_cache); 434 if (!node) { 435 mutex_unlock(&vgpu->cache_lock); 436 break; 437 } 438 dma = rb_entry(node, struct gvt_dma, gfn_node); 439 gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size); 440 __gvt_cache_remove_entry(vgpu, dma); 441 mutex_unlock(&vgpu->cache_lock); 442 } 443 } 444 445 static void gvt_cache_init(struct intel_vgpu *vgpu) 446 { 447 vgpu->gfn_cache = RB_ROOT; 448 vgpu->dma_addr_cache = RB_ROOT; 449 vgpu->nr_cache_entries = 0; 450 mutex_init(&vgpu->cache_lock); 451 } 452 453 static void kvmgt_protect_table_init(struct intel_vgpu *info) 454 { 455 hash_init(info->ptable); 456 } 457 458 static void kvmgt_protect_table_destroy(struct intel_vgpu *info) 459 { 460 struct kvmgt_pgfn *p; 461 struct hlist_node *tmp; 462 int i; 463 464 hash_for_each_safe(info->ptable, i, tmp, p, hnode) { 465 hash_del(&p->hnode); 466 kfree(p); 467 } 468 } 469 470 static struct kvmgt_pgfn * 471 __kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn) 472 { 473 struct kvmgt_pgfn *p, *res = NULL; 474 475 hash_for_each_possible(info->ptable, p, hnode, gfn) { 476 if (gfn == p->gfn) { 477 res = p; 478 break; 479 } 480 } 481 482 return res; 483 } 484 485 static bool kvmgt_gfn_is_write_protected(struct intel_vgpu *info, gfn_t gfn) 486 { 487 struct kvmgt_pgfn *p; 488 489 p = __kvmgt_protect_table_find(info, gfn); 490 return !!p; 491 } 492 493 static void kvmgt_protect_table_add(struct intel_vgpu *info, gfn_t gfn) 494 { 495 struct kvmgt_pgfn *p; 496 497 if (kvmgt_gfn_is_write_protected(info, gfn)) 498 return; 499 500 p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC); 501 if (WARN(!p, "gfn: 0x%llx\n", gfn)) 502 return; 503 504 p->gfn = gfn; 505 hash_add(info->ptable, &p->hnode, gfn); 506 } 507 508 static void kvmgt_protect_table_del(struct intel_vgpu *info, gfn_t gfn) 509 { 510 struct kvmgt_pgfn *p; 511 512 p = __kvmgt_protect_table_find(info, gfn); 513 if (p) { 514 hash_del(&p->hnode); 515 kfree(p); 516 } 517 } 518 519 static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf, 520 size_t count, loff_t *ppos, bool iswrite) 521 { 522 unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - 523 VFIO_PCI_NUM_REGIONS; 524 void *base = vgpu->region[i].data; 525 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 526 527 528 if (pos >= vgpu->region[i].size || iswrite) { 529 gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n"); 530 return -EINVAL; 531 } 532 count = min(count, (size_t)(vgpu->region[i].size - pos)); 533 memcpy(buf, base + pos, count); 534 535 return count; 536 } 537 538 static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu, 539 struct vfio_region *region) 540 { 541 } 542 543 static const struct intel_vgpu_regops intel_vgpu_regops_opregion = { 544 .rw = intel_vgpu_reg_rw_opregion, 545 .release = intel_vgpu_reg_release_opregion, 546 }; 547 548 static int handle_edid_regs(struct intel_vgpu *vgpu, 549 struct vfio_edid_region *region, char *buf, 550 size_t count, u16 offset, bool is_write) 551 { 552 struct vfio_region_gfx_edid *regs = ®ion->vfio_edid_regs; 553 unsigned int data; 554 555 if (offset + count > sizeof(*regs)) 556 return -EINVAL; 557 558 if (count != 4) 559 return -EINVAL; 560 561 if (is_write) { 562 data = *((unsigned int *)buf); 563 switch (offset) { 564 case offsetof(struct vfio_region_gfx_edid, link_state): 565 if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) { 566 if (!drm_edid_block_valid( 567 (u8 *)region->edid_blob, 568 0, 569 true, 570 NULL)) { 571 gvt_vgpu_err("invalid EDID blob\n"); 572 return -EINVAL; 573 } 574 intel_vgpu_emulate_hotplug(vgpu, true); 575 } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN) 576 intel_vgpu_emulate_hotplug(vgpu, false); 577 else { 578 gvt_vgpu_err("invalid EDID link state %d\n", 579 regs->link_state); 580 return -EINVAL; 581 } 582 regs->link_state = data; 583 break; 584 case offsetof(struct vfio_region_gfx_edid, edid_size): 585 if (data > regs->edid_max_size) { 586 gvt_vgpu_err("EDID size is bigger than %d!\n", 587 regs->edid_max_size); 588 return -EINVAL; 589 } 590 regs->edid_size = data; 591 break; 592 default: 593 /* read-only regs */ 594 gvt_vgpu_err("write read-only EDID region at offset %d\n", 595 offset); 596 return -EPERM; 597 } 598 } else { 599 memcpy(buf, (char *)regs + offset, count); 600 } 601 602 return count; 603 } 604 605 static int handle_edid_blob(struct vfio_edid_region *region, char *buf, 606 size_t count, u16 offset, bool is_write) 607 { 608 if (offset + count > region->vfio_edid_regs.edid_size) 609 return -EINVAL; 610 611 if (is_write) 612 memcpy(region->edid_blob + offset, buf, count); 613 else 614 memcpy(buf, region->edid_blob + offset, count); 615 616 return count; 617 } 618 619 static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf, 620 size_t count, loff_t *ppos, bool iswrite) 621 { 622 int ret; 623 unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - 624 VFIO_PCI_NUM_REGIONS; 625 struct vfio_edid_region *region = vgpu->region[i].data; 626 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 627 628 if (pos < region->vfio_edid_regs.edid_offset) { 629 ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite); 630 } else { 631 pos -= EDID_BLOB_OFFSET; 632 ret = handle_edid_blob(region, buf, count, pos, iswrite); 633 } 634 635 if (ret < 0) 636 gvt_vgpu_err("failed to access EDID region\n"); 637 638 return ret; 639 } 640 641 static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu, 642 struct vfio_region *region) 643 { 644 kfree(region->data); 645 } 646 647 static const struct intel_vgpu_regops intel_vgpu_regops_edid = { 648 .rw = intel_vgpu_reg_rw_edid, 649 .release = intel_vgpu_reg_release_edid, 650 }; 651 652 static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, 653 unsigned int type, unsigned int subtype, 654 const struct intel_vgpu_regops *ops, 655 size_t size, u32 flags, void *data) 656 { 657 struct vfio_region *region; 658 659 region = krealloc(vgpu->region, 660 (vgpu->num_regions + 1) * sizeof(*region), 661 GFP_KERNEL); 662 if (!region) 663 return -ENOMEM; 664 665 vgpu->region = region; 666 vgpu->region[vgpu->num_regions].type = type; 667 vgpu->region[vgpu->num_regions].subtype = subtype; 668 vgpu->region[vgpu->num_regions].ops = ops; 669 vgpu->region[vgpu->num_regions].size = size; 670 vgpu->region[vgpu->num_regions].flags = flags; 671 vgpu->region[vgpu->num_regions].data = data; 672 vgpu->num_regions++; 673 return 0; 674 } 675 676 int intel_gvt_set_opregion(struct intel_vgpu *vgpu) 677 { 678 void *base; 679 int ret; 680 681 /* Each vgpu has its own opregion, although VFIO would create another 682 * one later. This one is used to expose opregion to VFIO. And the 683 * other one created by VFIO later, is used by guest actually. 684 */ 685 base = vgpu_opregion(vgpu)->va; 686 if (!base) 687 return -ENOMEM; 688 689 if (memcmp(base, OPREGION_SIGNATURE, 16)) { 690 memunmap(base); 691 return -EINVAL; 692 } 693 694 ret = intel_vgpu_register_reg(vgpu, 695 PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, 696 VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, 697 &intel_vgpu_regops_opregion, OPREGION_SIZE, 698 VFIO_REGION_INFO_FLAG_READ, base); 699 700 return ret; 701 } 702 703 int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num) 704 { 705 struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); 706 struct vfio_edid_region *base; 707 int ret; 708 709 base = kzalloc(sizeof(*base), GFP_KERNEL); 710 if (!base) 711 return -ENOMEM; 712 713 /* TODO: Add multi-port and EDID extension block support */ 714 base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET; 715 base->vfio_edid_regs.edid_max_size = EDID_SIZE; 716 base->vfio_edid_regs.edid_size = EDID_SIZE; 717 base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id); 718 base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id); 719 base->edid_blob = port->edid->edid_block; 720 721 ret = intel_vgpu_register_reg(vgpu, 722 VFIO_REGION_TYPE_GFX, 723 VFIO_REGION_SUBTYPE_GFX_EDID, 724 &intel_vgpu_regops_edid, EDID_SIZE, 725 VFIO_REGION_INFO_FLAG_READ | 726 VFIO_REGION_INFO_FLAG_WRITE | 727 VFIO_REGION_INFO_FLAG_CAPS, base); 728 729 return ret; 730 } 731 732 static int intel_vgpu_iommu_notifier(struct notifier_block *nb, 733 unsigned long action, void *data) 734 { 735 struct intel_vgpu *vgpu = 736 container_of(nb, struct intel_vgpu, iommu_notifier); 737 738 if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { 739 struct vfio_iommu_type1_dma_unmap *unmap = data; 740 struct gvt_dma *entry; 741 unsigned long iov_pfn, end_iov_pfn; 742 743 iov_pfn = unmap->iova >> PAGE_SHIFT; 744 end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; 745 746 mutex_lock(&vgpu->cache_lock); 747 for (; iov_pfn < end_iov_pfn; iov_pfn++) { 748 entry = __gvt_cache_find_gfn(vgpu, iov_pfn); 749 if (!entry) 750 continue; 751 752 gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, 753 entry->size); 754 __gvt_cache_remove_entry(vgpu, entry); 755 } 756 mutex_unlock(&vgpu->cache_lock); 757 } 758 759 return NOTIFY_OK; 760 } 761 762 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) 763 { 764 struct intel_vgpu *itr; 765 int id; 766 bool ret = false; 767 768 mutex_lock(&vgpu->gvt->lock); 769 for_each_active_vgpu(vgpu->gvt, itr, id) { 770 if (!itr->attached) 771 continue; 772 773 if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) { 774 ret = true; 775 goto out; 776 } 777 } 778 out: 779 mutex_unlock(&vgpu->gvt->lock); 780 return ret; 781 } 782 783 static int intel_vgpu_open_device(struct vfio_device *vfio_dev) 784 { 785 struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); 786 unsigned long events; 787 int ret; 788 789 vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; 790 791 events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; 792 ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events, 793 &vgpu->iommu_notifier); 794 if (ret != 0) { 795 gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", 796 ret); 797 goto out; 798 } 799 800 ret = -EEXIST; 801 if (vgpu->attached) 802 goto undo_iommu; 803 804 ret = -ESRCH; 805 if (!vgpu->vfio_device.kvm || 806 vgpu->vfio_device.kvm->mm != current->mm) { 807 gvt_vgpu_err("KVM is required to use Intel vGPU\n"); 808 goto undo_iommu; 809 } 810 811 kvm_get_kvm(vgpu->vfio_device.kvm); 812 813 ret = -EEXIST; 814 if (__kvmgt_vgpu_exist(vgpu)) 815 goto undo_iommu; 816 817 vgpu->attached = true; 818 819 kvmgt_protect_table_init(vgpu); 820 gvt_cache_init(vgpu); 821 822 vgpu->track_node.track_write = kvmgt_page_track_write; 823 vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; 824 kvm_page_track_register_notifier(vgpu->vfio_device.kvm, 825 &vgpu->track_node); 826 827 debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs, 828 &vgpu->nr_cache_entries); 829 830 intel_gvt_activate_vgpu(vgpu); 831 832 atomic_set(&vgpu->released, 0); 833 return 0; 834 835 undo_iommu: 836 vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, 837 &vgpu->iommu_notifier); 838 out: 839 return ret; 840 } 841 842 static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) 843 { 844 struct eventfd_ctx *trigger; 845 846 trigger = vgpu->msi_trigger; 847 if (trigger) { 848 eventfd_ctx_put(trigger); 849 vgpu->msi_trigger = NULL; 850 } 851 } 852 853 static void intel_vgpu_close_device(struct vfio_device *vfio_dev) 854 { 855 struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); 856 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 857 int ret; 858 859 if (!vgpu->attached) 860 return; 861 862 if (atomic_cmpxchg(&vgpu->released, 0, 1)) 863 return; 864 865 intel_gvt_release_vgpu(vgpu); 866 867 ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY, 868 &vgpu->iommu_notifier); 869 drm_WARN(&i915->drm, ret, 870 "vfio_unregister_notifier for iommu failed: %d\n", ret); 871 872 debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); 873 874 kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, 875 &vgpu->track_node); 876 kvmgt_protect_table_destroy(vgpu); 877 gvt_cache_destroy(vgpu); 878 879 intel_vgpu_release_msi_eventfd_ctx(vgpu); 880 881 vgpu->attached = false; 882 883 if (vgpu->vfio_device.kvm) 884 kvm_put_kvm(vgpu->vfio_device.kvm); 885 } 886 887 static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) 888 { 889 u32 start_lo, start_hi; 890 u32 mem_type; 891 892 start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) & 893 PCI_BASE_ADDRESS_MEM_MASK; 894 mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) & 895 PCI_BASE_ADDRESS_MEM_TYPE_MASK; 896 897 switch (mem_type) { 898 case PCI_BASE_ADDRESS_MEM_TYPE_64: 899 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space 900 + bar + 4)); 901 break; 902 case PCI_BASE_ADDRESS_MEM_TYPE_32: 903 case PCI_BASE_ADDRESS_MEM_TYPE_1M: 904 /* 1M mem BAR treated as 32-bit BAR */ 905 default: 906 /* mem unknown type treated as 32-bit BAR */ 907 start_hi = 0; 908 break; 909 } 910 911 return ((u64)start_hi << 32) | start_lo; 912 } 913 914 static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off, 915 void *buf, unsigned int count, bool is_write) 916 { 917 u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar); 918 int ret; 919 920 if (is_write) 921 ret = intel_vgpu_emulate_mmio_write(vgpu, 922 bar_start + off, buf, count); 923 else 924 ret = intel_vgpu_emulate_mmio_read(vgpu, 925 bar_start + off, buf, count); 926 return ret; 927 } 928 929 static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off) 930 { 931 return off >= vgpu_aperture_offset(vgpu) && 932 off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu); 933 } 934 935 static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off, 936 void *buf, unsigned long count, bool is_write) 937 { 938 void __iomem *aperture_va; 939 940 if (!intel_vgpu_in_aperture(vgpu, off) || 941 !intel_vgpu_in_aperture(vgpu, off + count)) { 942 gvt_vgpu_err("Invalid aperture offset %llu\n", off); 943 return -EINVAL; 944 } 945 946 aperture_va = io_mapping_map_wc(&vgpu->gvt->gt->ggtt->iomap, 947 ALIGN_DOWN(off, PAGE_SIZE), 948 count + offset_in_page(off)); 949 if (!aperture_va) 950 return -EIO; 951 952 if (is_write) 953 memcpy_toio(aperture_va + offset_in_page(off), buf, count); 954 else 955 memcpy_fromio(buf, aperture_va + offset_in_page(off), count); 956 957 io_mapping_unmap(aperture_va); 958 959 return 0; 960 } 961 962 static ssize_t intel_vgpu_rw(struct intel_vgpu *vgpu, char *buf, 963 size_t count, loff_t *ppos, bool is_write) 964 { 965 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 966 u64 pos = *ppos & VFIO_PCI_OFFSET_MASK; 967 int ret = -EINVAL; 968 969 970 if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) { 971 gvt_vgpu_err("invalid index: %u\n", index); 972 return -EINVAL; 973 } 974 975 switch (index) { 976 case VFIO_PCI_CONFIG_REGION_INDEX: 977 if (is_write) 978 ret = intel_vgpu_emulate_cfg_write(vgpu, pos, 979 buf, count); 980 else 981 ret = intel_vgpu_emulate_cfg_read(vgpu, pos, 982 buf, count); 983 break; 984 case VFIO_PCI_BAR0_REGION_INDEX: 985 ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos, 986 buf, count, is_write); 987 break; 988 case VFIO_PCI_BAR2_REGION_INDEX: 989 ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write); 990 break; 991 case VFIO_PCI_BAR1_REGION_INDEX: 992 case VFIO_PCI_BAR3_REGION_INDEX: 993 case VFIO_PCI_BAR4_REGION_INDEX: 994 case VFIO_PCI_BAR5_REGION_INDEX: 995 case VFIO_PCI_VGA_REGION_INDEX: 996 case VFIO_PCI_ROM_REGION_INDEX: 997 break; 998 default: 999 if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) 1000 return -EINVAL; 1001 1002 index -= VFIO_PCI_NUM_REGIONS; 1003 return vgpu->region[index].ops->rw(vgpu, buf, count, 1004 ppos, is_write); 1005 } 1006 1007 return ret == 0 ? count : ret; 1008 } 1009 1010 static bool gtt_entry(struct intel_vgpu *vgpu, loff_t *ppos) 1011 { 1012 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 1013 struct intel_gvt *gvt = vgpu->gvt; 1014 int offset; 1015 1016 /* Only allow MMIO GGTT entry access */ 1017 if (index != PCI_BASE_ADDRESS_0) 1018 return false; 1019 1020 offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) - 1021 intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0); 1022 1023 return (offset >= gvt->device_info.gtt_start_offset && 1024 offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ? 1025 true : false; 1026 } 1027 1028 static ssize_t intel_vgpu_read(struct vfio_device *vfio_dev, char __user *buf, 1029 size_t count, loff_t *ppos) 1030 { 1031 struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); 1032 unsigned int done = 0; 1033 int ret; 1034 1035 while (count) { 1036 size_t filled; 1037 1038 /* Only support GGTT entry 8 bytes read */ 1039 if (count >= 8 && !(*ppos % 8) && 1040 gtt_entry(vgpu, ppos)) { 1041 u64 val; 1042 1043 ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), 1044 ppos, false); 1045 if (ret <= 0) 1046 goto read_err; 1047 1048 if (copy_to_user(buf, &val, sizeof(val))) 1049 goto read_err; 1050 1051 filled = 8; 1052 } else if (count >= 4 && !(*ppos % 4)) { 1053 u32 val; 1054 1055 ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), 1056 ppos, false); 1057 if (ret <= 0) 1058 goto read_err; 1059 1060 if (copy_to_user(buf, &val, sizeof(val))) 1061 goto read_err; 1062 1063 filled = 4; 1064 } else if (count >= 2 && !(*ppos % 2)) { 1065 u16 val; 1066 1067 ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), 1068 ppos, false); 1069 if (ret <= 0) 1070 goto read_err; 1071 1072 if (copy_to_user(buf, &val, sizeof(val))) 1073 goto read_err; 1074 1075 filled = 2; 1076 } else { 1077 u8 val; 1078 1079 ret = intel_vgpu_rw(vgpu, &val, sizeof(val), ppos, 1080 false); 1081 if (ret <= 0) 1082 goto read_err; 1083 1084 if (copy_to_user(buf, &val, sizeof(val))) 1085 goto read_err; 1086 1087 filled = 1; 1088 } 1089 1090 count -= filled; 1091 done += filled; 1092 *ppos += filled; 1093 buf += filled; 1094 } 1095 1096 return done; 1097 1098 read_err: 1099 return -EFAULT; 1100 } 1101 1102 static ssize_t intel_vgpu_write(struct vfio_device *vfio_dev, 1103 const char __user *buf, 1104 size_t count, loff_t *ppos) 1105 { 1106 struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); 1107 unsigned int done = 0; 1108 int ret; 1109 1110 while (count) { 1111 size_t filled; 1112 1113 /* Only support GGTT entry 8 bytes write */ 1114 if (count >= 8 && !(*ppos % 8) && 1115 gtt_entry(vgpu, ppos)) { 1116 u64 val; 1117 1118 if (copy_from_user(&val, buf, sizeof(val))) 1119 goto write_err; 1120 1121 ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), 1122 ppos, true); 1123 if (ret <= 0) 1124 goto write_err; 1125 1126 filled = 8; 1127 } else if (count >= 4 && !(*ppos % 4)) { 1128 u32 val; 1129 1130 if (copy_from_user(&val, buf, sizeof(val))) 1131 goto write_err; 1132 1133 ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), 1134 ppos, true); 1135 if (ret <= 0) 1136 goto write_err; 1137 1138 filled = 4; 1139 } else if (count >= 2 && !(*ppos % 2)) { 1140 u16 val; 1141 1142 if (copy_from_user(&val, buf, sizeof(val))) 1143 goto write_err; 1144 1145 ret = intel_vgpu_rw(vgpu, (char *)&val, 1146 sizeof(val), ppos, true); 1147 if (ret <= 0) 1148 goto write_err; 1149 1150 filled = 2; 1151 } else { 1152 u8 val; 1153 1154 if (copy_from_user(&val, buf, sizeof(val))) 1155 goto write_err; 1156 1157 ret = intel_vgpu_rw(vgpu, &val, sizeof(val), 1158 ppos, true); 1159 if (ret <= 0) 1160 goto write_err; 1161 1162 filled = 1; 1163 } 1164 1165 count -= filled; 1166 done += filled; 1167 *ppos += filled; 1168 buf += filled; 1169 } 1170 1171 return done; 1172 write_err: 1173 return -EFAULT; 1174 } 1175 1176 static int intel_vgpu_mmap(struct vfio_device *vfio_dev, 1177 struct vm_area_struct *vma) 1178 { 1179 struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); 1180 unsigned int index; 1181 u64 virtaddr; 1182 unsigned long req_size, pgoff, req_start; 1183 pgprot_t pg_prot; 1184 1185 index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); 1186 if (index >= VFIO_PCI_ROM_REGION_INDEX) 1187 return -EINVAL; 1188 1189 if (vma->vm_end < vma->vm_start) 1190 return -EINVAL; 1191 if ((vma->vm_flags & VM_SHARED) == 0) 1192 return -EINVAL; 1193 if (index != VFIO_PCI_BAR2_REGION_INDEX) 1194 return -EINVAL; 1195 1196 pg_prot = vma->vm_page_prot; 1197 virtaddr = vma->vm_start; 1198 req_size = vma->vm_end - vma->vm_start; 1199 pgoff = vma->vm_pgoff & 1200 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); 1201 req_start = pgoff << PAGE_SHIFT; 1202 1203 if (!intel_vgpu_in_aperture(vgpu, req_start)) 1204 return -EINVAL; 1205 if (req_start + req_size > 1206 vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu)) 1207 return -EINVAL; 1208 1209 pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff; 1210 1211 return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot); 1212 } 1213 1214 static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type) 1215 { 1216 if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX) 1217 return 1; 1218 1219 return 0; 1220 } 1221 1222 static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu, 1223 unsigned int index, unsigned int start, 1224 unsigned int count, u32 flags, 1225 void *data) 1226 { 1227 return 0; 1228 } 1229 1230 static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu, 1231 unsigned int index, unsigned int start, 1232 unsigned int count, u32 flags, void *data) 1233 { 1234 return 0; 1235 } 1236 1237 static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu, 1238 unsigned int index, unsigned int start, unsigned int count, 1239 u32 flags, void *data) 1240 { 1241 return 0; 1242 } 1243 1244 static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu, 1245 unsigned int index, unsigned int start, unsigned int count, 1246 u32 flags, void *data) 1247 { 1248 struct eventfd_ctx *trigger; 1249 1250 if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { 1251 int fd = *(int *)data; 1252 1253 trigger = eventfd_ctx_fdget(fd); 1254 if (IS_ERR(trigger)) { 1255 gvt_vgpu_err("eventfd_ctx_fdget failed\n"); 1256 return PTR_ERR(trigger); 1257 } 1258 vgpu->msi_trigger = trigger; 1259 } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count) 1260 intel_vgpu_release_msi_eventfd_ctx(vgpu); 1261 1262 return 0; 1263 } 1264 1265 static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags, 1266 unsigned int index, unsigned int start, unsigned int count, 1267 void *data) 1268 { 1269 int (*func)(struct intel_vgpu *vgpu, unsigned int index, 1270 unsigned int start, unsigned int count, u32 flags, 1271 void *data) = NULL; 1272 1273 switch (index) { 1274 case VFIO_PCI_INTX_IRQ_INDEX: 1275 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { 1276 case VFIO_IRQ_SET_ACTION_MASK: 1277 func = intel_vgpu_set_intx_mask; 1278 break; 1279 case VFIO_IRQ_SET_ACTION_UNMASK: 1280 func = intel_vgpu_set_intx_unmask; 1281 break; 1282 case VFIO_IRQ_SET_ACTION_TRIGGER: 1283 func = intel_vgpu_set_intx_trigger; 1284 break; 1285 } 1286 break; 1287 case VFIO_PCI_MSI_IRQ_INDEX: 1288 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { 1289 case VFIO_IRQ_SET_ACTION_MASK: 1290 case VFIO_IRQ_SET_ACTION_UNMASK: 1291 /* XXX Need masking support exported */ 1292 break; 1293 case VFIO_IRQ_SET_ACTION_TRIGGER: 1294 func = intel_vgpu_set_msi_trigger; 1295 break; 1296 } 1297 break; 1298 } 1299 1300 if (!func) 1301 return -ENOTTY; 1302 1303 return func(vgpu, index, start, count, flags, data); 1304 } 1305 1306 static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd, 1307 unsigned long arg) 1308 { 1309 struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); 1310 unsigned long minsz; 1311 1312 gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd); 1313 1314 if (cmd == VFIO_DEVICE_GET_INFO) { 1315 struct vfio_device_info info; 1316 1317 minsz = offsetofend(struct vfio_device_info, num_irqs); 1318 1319 if (copy_from_user(&info, (void __user *)arg, minsz)) 1320 return -EFAULT; 1321 1322 if (info.argsz < minsz) 1323 return -EINVAL; 1324 1325 info.flags = VFIO_DEVICE_FLAGS_PCI; 1326 info.flags |= VFIO_DEVICE_FLAGS_RESET; 1327 info.num_regions = VFIO_PCI_NUM_REGIONS + 1328 vgpu->num_regions; 1329 info.num_irqs = VFIO_PCI_NUM_IRQS; 1330 1331 return copy_to_user((void __user *)arg, &info, minsz) ? 1332 -EFAULT : 0; 1333 1334 } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { 1335 struct vfio_region_info info; 1336 struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; 1337 unsigned int i; 1338 int ret; 1339 struct vfio_region_info_cap_sparse_mmap *sparse = NULL; 1340 int nr_areas = 1; 1341 int cap_type_id; 1342 1343 minsz = offsetofend(struct vfio_region_info, offset); 1344 1345 if (copy_from_user(&info, (void __user *)arg, minsz)) 1346 return -EFAULT; 1347 1348 if (info.argsz < minsz) 1349 return -EINVAL; 1350 1351 switch (info.index) { 1352 case VFIO_PCI_CONFIG_REGION_INDEX: 1353 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1354 info.size = vgpu->gvt->device_info.cfg_space_size; 1355 info.flags = VFIO_REGION_INFO_FLAG_READ | 1356 VFIO_REGION_INFO_FLAG_WRITE; 1357 break; 1358 case VFIO_PCI_BAR0_REGION_INDEX: 1359 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1360 info.size = vgpu->cfg_space.bar[info.index].size; 1361 if (!info.size) { 1362 info.flags = 0; 1363 break; 1364 } 1365 1366 info.flags = VFIO_REGION_INFO_FLAG_READ | 1367 VFIO_REGION_INFO_FLAG_WRITE; 1368 break; 1369 case VFIO_PCI_BAR1_REGION_INDEX: 1370 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1371 info.size = 0; 1372 info.flags = 0; 1373 break; 1374 case VFIO_PCI_BAR2_REGION_INDEX: 1375 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1376 info.flags = VFIO_REGION_INFO_FLAG_CAPS | 1377 VFIO_REGION_INFO_FLAG_MMAP | 1378 VFIO_REGION_INFO_FLAG_READ | 1379 VFIO_REGION_INFO_FLAG_WRITE; 1380 info.size = gvt_aperture_sz(vgpu->gvt); 1381 1382 sparse = kzalloc(struct_size(sparse, areas, nr_areas), 1383 GFP_KERNEL); 1384 if (!sparse) 1385 return -ENOMEM; 1386 1387 sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; 1388 sparse->header.version = 1; 1389 sparse->nr_areas = nr_areas; 1390 cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; 1391 sparse->areas[0].offset = 1392 PAGE_ALIGN(vgpu_aperture_offset(vgpu)); 1393 sparse->areas[0].size = vgpu_aperture_sz(vgpu); 1394 break; 1395 1396 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: 1397 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1398 info.size = 0; 1399 info.flags = 0; 1400 1401 gvt_dbg_core("get region info bar:%d\n", info.index); 1402 break; 1403 1404 case VFIO_PCI_ROM_REGION_INDEX: 1405 case VFIO_PCI_VGA_REGION_INDEX: 1406 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1407 info.size = 0; 1408 info.flags = 0; 1409 1410 gvt_dbg_core("get region info index:%d\n", info.index); 1411 break; 1412 default: 1413 { 1414 struct vfio_region_info_cap_type cap_type = { 1415 .header.id = VFIO_REGION_INFO_CAP_TYPE, 1416 .header.version = 1 }; 1417 1418 if (info.index >= VFIO_PCI_NUM_REGIONS + 1419 vgpu->num_regions) 1420 return -EINVAL; 1421 info.index = 1422 array_index_nospec(info.index, 1423 VFIO_PCI_NUM_REGIONS + 1424 vgpu->num_regions); 1425 1426 i = info.index - VFIO_PCI_NUM_REGIONS; 1427 1428 info.offset = 1429 VFIO_PCI_INDEX_TO_OFFSET(info.index); 1430 info.size = vgpu->region[i].size; 1431 info.flags = vgpu->region[i].flags; 1432 1433 cap_type.type = vgpu->region[i].type; 1434 cap_type.subtype = vgpu->region[i].subtype; 1435 1436 ret = vfio_info_add_capability(&caps, 1437 &cap_type.header, 1438 sizeof(cap_type)); 1439 if (ret) 1440 return ret; 1441 } 1442 } 1443 1444 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) { 1445 switch (cap_type_id) { 1446 case VFIO_REGION_INFO_CAP_SPARSE_MMAP: 1447 ret = vfio_info_add_capability(&caps, 1448 &sparse->header, 1449 struct_size(sparse, areas, 1450 sparse->nr_areas)); 1451 if (ret) { 1452 kfree(sparse); 1453 return ret; 1454 } 1455 break; 1456 default: 1457 kfree(sparse); 1458 return -EINVAL; 1459 } 1460 } 1461 1462 if (caps.size) { 1463 info.flags |= VFIO_REGION_INFO_FLAG_CAPS; 1464 if (info.argsz < sizeof(info) + caps.size) { 1465 info.argsz = sizeof(info) + caps.size; 1466 info.cap_offset = 0; 1467 } else { 1468 vfio_info_cap_shift(&caps, sizeof(info)); 1469 if (copy_to_user((void __user *)arg + 1470 sizeof(info), caps.buf, 1471 caps.size)) { 1472 kfree(caps.buf); 1473 kfree(sparse); 1474 return -EFAULT; 1475 } 1476 info.cap_offset = sizeof(info); 1477 } 1478 1479 kfree(caps.buf); 1480 } 1481 1482 kfree(sparse); 1483 return copy_to_user((void __user *)arg, &info, minsz) ? 1484 -EFAULT : 0; 1485 } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { 1486 struct vfio_irq_info info; 1487 1488 minsz = offsetofend(struct vfio_irq_info, count); 1489 1490 if (copy_from_user(&info, (void __user *)arg, minsz)) 1491 return -EFAULT; 1492 1493 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS) 1494 return -EINVAL; 1495 1496 switch (info.index) { 1497 case VFIO_PCI_INTX_IRQ_INDEX: 1498 case VFIO_PCI_MSI_IRQ_INDEX: 1499 break; 1500 default: 1501 return -EINVAL; 1502 } 1503 1504 info.flags = VFIO_IRQ_INFO_EVENTFD; 1505 1506 info.count = intel_vgpu_get_irq_count(vgpu, info.index); 1507 1508 if (info.index == VFIO_PCI_INTX_IRQ_INDEX) 1509 info.flags |= (VFIO_IRQ_INFO_MASKABLE | 1510 VFIO_IRQ_INFO_AUTOMASKED); 1511 else 1512 info.flags |= VFIO_IRQ_INFO_NORESIZE; 1513 1514 return copy_to_user((void __user *)arg, &info, minsz) ? 1515 -EFAULT : 0; 1516 } else if (cmd == VFIO_DEVICE_SET_IRQS) { 1517 struct vfio_irq_set hdr; 1518 u8 *data = NULL; 1519 int ret = 0; 1520 size_t data_size = 0; 1521 1522 minsz = offsetofend(struct vfio_irq_set, count); 1523 1524 if (copy_from_user(&hdr, (void __user *)arg, minsz)) 1525 return -EFAULT; 1526 1527 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) { 1528 int max = intel_vgpu_get_irq_count(vgpu, hdr.index); 1529 1530 ret = vfio_set_irqs_validate_and_prepare(&hdr, max, 1531 VFIO_PCI_NUM_IRQS, &data_size); 1532 if (ret) { 1533 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n"); 1534 return -EINVAL; 1535 } 1536 if (data_size) { 1537 data = memdup_user((void __user *)(arg + minsz), 1538 data_size); 1539 if (IS_ERR(data)) 1540 return PTR_ERR(data); 1541 } 1542 } 1543 1544 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index, 1545 hdr.start, hdr.count, data); 1546 kfree(data); 1547 1548 return ret; 1549 } else if (cmd == VFIO_DEVICE_RESET) { 1550 intel_gvt_reset_vgpu(vgpu); 1551 return 0; 1552 } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) { 1553 struct vfio_device_gfx_plane_info dmabuf; 1554 int ret = 0; 1555 1556 minsz = offsetofend(struct vfio_device_gfx_plane_info, 1557 dmabuf_id); 1558 if (copy_from_user(&dmabuf, (void __user *)arg, minsz)) 1559 return -EFAULT; 1560 if (dmabuf.argsz < minsz) 1561 return -EINVAL; 1562 1563 ret = intel_vgpu_query_plane(vgpu, &dmabuf); 1564 if (ret != 0) 1565 return ret; 1566 1567 return copy_to_user((void __user *)arg, &dmabuf, minsz) ? 1568 -EFAULT : 0; 1569 } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) { 1570 __u32 dmabuf_id; 1571 1572 if (get_user(dmabuf_id, (__u32 __user *)arg)) 1573 return -EFAULT; 1574 return intel_vgpu_get_dmabuf(vgpu, dmabuf_id); 1575 } 1576 1577 return -ENOTTY; 1578 } 1579 1580 static ssize_t 1581 vgpu_id_show(struct device *dev, struct device_attribute *attr, 1582 char *buf) 1583 { 1584 struct intel_vgpu *vgpu = dev_get_drvdata(dev); 1585 1586 return sprintf(buf, "%d\n", vgpu->id); 1587 } 1588 1589 static DEVICE_ATTR_RO(vgpu_id); 1590 1591 static struct attribute *intel_vgpu_attrs[] = { 1592 &dev_attr_vgpu_id.attr, 1593 NULL 1594 }; 1595 1596 static const struct attribute_group intel_vgpu_group = { 1597 .name = "intel_vgpu", 1598 .attrs = intel_vgpu_attrs, 1599 }; 1600 1601 static const struct attribute_group *intel_vgpu_groups[] = { 1602 &intel_vgpu_group, 1603 NULL, 1604 }; 1605 1606 static const struct vfio_device_ops intel_vgpu_dev_ops = { 1607 .open_device = intel_vgpu_open_device, 1608 .close_device = intel_vgpu_close_device, 1609 .read = intel_vgpu_read, 1610 .write = intel_vgpu_write, 1611 .mmap = intel_vgpu_mmap, 1612 .ioctl = intel_vgpu_ioctl, 1613 }; 1614 1615 static int intel_vgpu_probe(struct mdev_device *mdev) 1616 { 1617 struct device *pdev = mdev_parent_dev(mdev); 1618 struct intel_gvt *gvt = kdev_to_i915(pdev)->gvt; 1619 struct intel_vgpu_type *type; 1620 struct intel_vgpu *vgpu; 1621 int ret; 1622 1623 type = &gvt->types[mdev_get_type_group_id(mdev)]; 1624 if (!type) 1625 return -EINVAL; 1626 1627 vgpu = intel_gvt_create_vgpu(gvt, type); 1628 if (IS_ERR(vgpu)) { 1629 gvt_err("failed to create intel vgpu: %ld\n", PTR_ERR(vgpu)); 1630 return PTR_ERR(vgpu); 1631 } 1632 1633 vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev, 1634 &intel_vgpu_dev_ops); 1635 1636 dev_set_drvdata(&mdev->dev, vgpu); 1637 ret = vfio_register_emulated_iommu_dev(&vgpu->vfio_device); 1638 if (ret) { 1639 intel_gvt_destroy_vgpu(vgpu); 1640 return ret; 1641 } 1642 1643 gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n", 1644 dev_name(mdev_dev(mdev))); 1645 return 0; 1646 } 1647 1648 static void intel_vgpu_remove(struct mdev_device *mdev) 1649 { 1650 struct intel_vgpu *vgpu = dev_get_drvdata(&mdev->dev); 1651 1652 if (WARN_ON_ONCE(vgpu->attached)) 1653 return; 1654 intel_gvt_destroy_vgpu(vgpu); 1655 } 1656 1657 static struct mdev_driver intel_vgpu_mdev_driver = { 1658 .driver = { 1659 .name = "intel_vgpu_mdev", 1660 .owner = THIS_MODULE, 1661 .dev_groups = intel_vgpu_groups, 1662 }, 1663 .probe = intel_vgpu_probe, 1664 .remove = intel_vgpu_remove, 1665 .supported_type_groups = gvt_vgpu_type_groups, 1666 }; 1667 1668 int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) 1669 { 1670 struct kvm *kvm = info->vfio_device.kvm; 1671 struct kvm_memory_slot *slot; 1672 int idx; 1673 1674 if (!info->attached) 1675 return -ESRCH; 1676 1677 idx = srcu_read_lock(&kvm->srcu); 1678 slot = gfn_to_memslot(kvm, gfn); 1679 if (!slot) { 1680 srcu_read_unlock(&kvm->srcu, idx); 1681 return -EINVAL; 1682 } 1683 1684 write_lock(&kvm->mmu_lock); 1685 1686 if (kvmgt_gfn_is_write_protected(info, gfn)) 1687 goto out; 1688 1689 kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); 1690 kvmgt_protect_table_add(info, gfn); 1691 1692 out: 1693 write_unlock(&kvm->mmu_lock); 1694 srcu_read_unlock(&kvm->srcu, idx); 1695 return 0; 1696 } 1697 1698 int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) 1699 { 1700 struct kvm *kvm = info->vfio_device.kvm; 1701 struct kvm_memory_slot *slot; 1702 int idx; 1703 1704 if (!info->attached) 1705 return 0; 1706 1707 idx = srcu_read_lock(&kvm->srcu); 1708 slot = gfn_to_memslot(kvm, gfn); 1709 if (!slot) { 1710 srcu_read_unlock(&kvm->srcu, idx); 1711 return -EINVAL; 1712 } 1713 1714 write_lock(&kvm->mmu_lock); 1715 1716 if (!kvmgt_gfn_is_write_protected(info, gfn)) 1717 goto out; 1718 1719 kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); 1720 kvmgt_protect_table_del(info, gfn); 1721 1722 out: 1723 write_unlock(&kvm->mmu_lock); 1724 srcu_read_unlock(&kvm->srcu, idx); 1725 return 0; 1726 } 1727 1728 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, 1729 const u8 *val, int len, 1730 struct kvm_page_track_notifier_node *node) 1731 { 1732 struct intel_vgpu *info = 1733 container_of(node, struct intel_vgpu, track_node); 1734 1735 if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) 1736 intel_vgpu_page_track_handler(info, gpa, 1737 (void *)val, len); 1738 } 1739 1740 static void kvmgt_page_track_flush_slot(struct kvm *kvm, 1741 struct kvm_memory_slot *slot, 1742 struct kvm_page_track_notifier_node *node) 1743 { 1744 int i; 1745 gfn_t gfn; 1746 struct intel_vgpu *info = 1747 container_of(node, struct intel_vgpu, track_node); 1748 1749 write_lock(&kvm->mmu_lock); 1750 for (i = 0; i < slot->npages; i++) { 1751 gfn = slot->base_gfn + i; 1752 if (kvmgt_gfn_is_write_protected(info, gfn)) { 1753 kvm_slot_page_track_remove_page(kvm, slot, gfn, 1754 KVM_PAGE_TRACK_WRITE); 1755 kvmgt_protect_table_del(info, gfn); 1756 } 1757 } 1758 write_unlock(&kvm->mmu_lock); 1759 } 1760 1761 void intel_vgpu_detach_regions(struct intel_vgpu *vgpu) 1762 { 1763 int i; 1764 1765 if (!vgpu->region) 1766 return; 1767 1768 for (i = 0; i < vgpu->num_regions; i++) 1769 if (vgpu->region[i].ops->release) 1770 vgpu->region[i].ops->release(vgpu, 1771 &vgpu->region[i]); 1772 vgpu->num_regions = 0; 1773 kfree(vgpu->region); 1774 vgpu->region = NULL; 1775 } 1776 1777 int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, 1778 unsigned long size, dma_addr_t *dma_addr) 1779 { 1780 struct gvt_dma *entry; 1781 int ret; 1782 1783 if (!vgpu->attached) 1784 return -EINVAL; 1785 1786 mutex_lock(&vgpu->cache_lock); 1787 1788 entry = __gvt_cache_find_gfn(vgpu, gfn); 1789 if (!entry) { 1790 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); 1791 if (ret) 1792 goto err_unlock; 1793 1794 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size); 1795 if (ret) 1796 goto err_unmap; 1797 } else if (entry->size != size) { 1798 /* the same gfn with different size: unmap and re-map */ 1799 gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size); 1800 __gvt_cache_remove_entry(vgpu, entry); 1801 1802 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); 1803 if (ret) 1804 goto err_unlock; 1805 1806 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size); 1807 if (ret) 1808 goto err_unmap; 1809 } else { 1810 kref_get(&entry->ref); 1811 *dma_addr = entry->dma_addr; 1812 } 1813 1814 mutex_unlock(&vgpu->cache_lock); 1815 return 0; 1816 1817 err_unmap: 1818 gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size); 1819 err_unlock: 1820 mutex_unlock(&vgpu->cache_lock); 1821 return ret; 1822 } 1823 1824 int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr) 1825 { 1826 struct gvt_dma *entry; 1827 int ret = 0; 1828 1829 if (!vgpu->attached) 1830 return -ENODEV; 1831 1832 mutex_lock(&vgpu->cache_lock); 1833 entry = __gvt_cache_find_dma_addr(vgpu, dma_addr); 1834 if (entry) 1835 kref_get(&entry->ref); 1836 else 1837 ret = -ENOMEM; 1838 mutex_unlock(&vgpu->cache_lock); 1839 1840 return ret; 1841 } 1842 1843 static void __gvt_dma_release(struct kref *ref) 1844 { 1845 struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); 1846 1847 gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr, 1848 entry->size); 1849 __gvt_cache_remove_entry(entry->vgpu, entry); 1850 } 1851 1852 void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu, 1853 dma_addr_t dma_addr) 1854 { 1855 struct gvt_dma *entry; 1856 1857 if (!vgpu->attached) 1858 return; 1859 1860 mutex_lock(&vgpu->cache_lock); 1861 entry = __gvt_cache_find_dma_addr(vgpu, dma_addr); 1862 if (entry) 1863 kref_put(&entry->ref, __gvt_dma_release); 1864 mutex_unlock(&vgpu->cache_lock); 1865 } 1866 1867 static void init_device_info(struct intel_gvt *gvt) 1868 { 1869 struct intel_gvt_device_info *info = &gvt->device_info; 1870 struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev); 1871 1872 info->max_support_vgpus = 8; 1873 info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE; 1874 info->mmio_size = 2 * 1024 * 1024; 1875 info->mmio_bar = 0; 1876 info->gtt_start_offset = 8 * 1024 * 1024; 1877 info->gtt_entry_size = 8; 1878 info->gtt_entry_size_shift = 3; 1879 info->gmadr_bytes_in_cmd = 8; 1880 info->max_surface_size = 36 * 1024 * 1024; 1881 info->msi_cap_offset = pdev->msi_cap; 1882 } 1883 1884 static void intel_gvt_test_and_emulate_vblank(struct intel_gvt *gvt) 1885 { 1886 struct intel_vgpu *vgpu; 1887 int id; 1888 1889 mutex_lock(&gvt->lock); 1890 idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) { 1891 if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK + id, 1892 (void *)&gvt->service_request)) { 1893 if (vgpu->active) 1894 intel_vgpu_emulate_vblank(vgpu); 1895 } 1896 } 1897 mutex_unlock(&gvt->lock); 1898 } 1899 1900 static int gvt_service_thread(void *data) 1901 { 1902 struct intel_gvt *gvt = (struct intel_gvt *)data; 1903 int ret; 1904 1905 gvt_dbg_core("service thread start\n"); 1906 1907 while (!kthread_should_stop()) { 1908 ret = wait_event_interruptible(gvt->service_thread_wq, 1909 kthread_should_stop() || gvt->service_request); 1910 1911 if (kthread_should_stop()) 1912 break; 1913 1914 if (WARN_ONCE(ret, "service thread is waken up by signal.\n")) 1915 continue; 1916 1917 intel_gvt_test_and_emulate_vblank(gvt); 1918 1919 if (test_bit(INTEL_GVT_REQUEST_SCHED, 1920 (void *)&gvt->service_request) || 1921 test_bit(INTEL_GVT_REQUEST_EVENT_SCHED, 1922 (void *)&gvt->service_request)) { 1923 intel_gvt_schedule(gvt); 1924 } 1925 } 1926 1927 return 0; 1928 } 1929 1930 static void clean_service_thread(struct intel_gvt *gvt) 1931 { 1932 kthread_stop(gvt->service_thread); 1933 } 1934 1935 static int init_service_thread(struct intel_gvt *gvt) 1936 { 1937 init_waitqueue_head(&gvt->service_thread_wq); 1938 1939 gvt->service_thread = kthread_run(gvt_service_thread, 1940 gvt, "gvt_service_thread"); 1941 if (IS_ERR(gvt->service_thread)) { 1942 gvt_err("fail to start service thread.\n"); 1943 return PTR_ERR(gvt->service_thread); 1944 } 1945 return 0; 1946 } 1947 1948 /** 1949 * intel_gvt_clean_device - clean a GVT device 1950 * @i915: i915 private 1951 * 1952 * This function is called at the driver unloading stage, to free the 1953 * resources owned by a GVT device. 1954 * 1955 */ 1956 static void intel_gvt_clean_device(struct drm_i915_private *i915) 1957 { 1958 struct intel_gvt *gvt = fetch_and_zero(&i915->gvt); 1959 1960 if (drm_WARN_ON(&i915->drm, !gvt)) 1961 return; 1962 1963 mdev_unregister_device(i915->drm.dev); 1964 intel_gvt_cleanup_vgpu_type_groups(gvt); 1965 intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); 1966 intel_gvt_clean_vgpu_types(gvt); 1967 1968 intel_gvt_debugfs_clean(gvt); 1969 clean_service_thread(gvt); 1970 intel_gvt_clean_cmd_parser(gvt); 1971 intel_gvt_clean_sched_policy(gvt); 1972 intel_gvt_clean_workload_scheduler(gvt); 1973 intel_gvt_clean_gtt(gvt); 1974 intel_gvt_free_firmware(gvt); 1975 intel_gvt_clean_mmio_info(gvt); 1976 idr_destroy(&gvt->vgpu_idr); 1977 1978 kfree(i915->gvt); 1979 } 1980 1981 /** 1982 * intel_gvt_init_device - initialize a GVT device 1983 * @i915: drm i915 private data 1984 * 1985 * This function is called at the initialization stage, to initialize 1986 * necessary GVT components. 1987 * 1988 * Returns: 1989 * Zero on success, negative error code if failed. 1990 * 1991 */ 1992 static int intel_gvt_init_device(struct drm_i915_private *i915) 1993 { 1994 struct intel_gvt *gvt; 1995 struct intel_vgpu *vgpu; 1996 int ret; 1997 1998 if (drm_WARN_ON(&i915->drm, i915->gvt)) 1999 return -EEXIST; 2000 2001 gvt = kzalloc(sizeof(struct intel_gvt), GFP_KERNEL); 2002 if (!gvt) 2003 return -ENOMEM; 2004 2005 gvt_dbg_core("init gvt device\n"); 2006 2007 idr_init_base(&gvt->vgpu_idr, 1); 2008 spin_lock_init(&gvt->scheduler.mmio_context_lock); 2009 mutex_init(&gvt->lock); 2010 mutex_init(&gvt->sched_lock); 2011 gvt->gt = to_gt(i915); 2012 i915->gvt = gvt; 2013 2014 init_device_info(gvt); 2015 2016 ret = intel_gvt_setup_mmio_info(gvt); 2017 if (ret) 2018 goto out_clean_idr; 2019 2020 intel_gvt_init_engine_mmio_context(gvt); 2021 2022 ret = intel_gvt_load_firmware(gvt); 2023 if (ret) 2024 goto out_clean_mmio_info; 2025 2026 ret = intel_gvt_init_irq(gvt); 2027 if (ret) 2028 goto out_free_firmware; 2029 2030 ret = intel_gvt_init_gtt(gvt); 2031 if (ret) 2032 goto out_free_firmware; 2033 2034 ret = intel_gvt_init_workload_scheduler(gvt); 2035 if (ret) 2036 goto out_clean_gtt; 2037 2038 ret = intel_gvt_init_sched_policy(gvt); 2039 if (ret) 2040 goto out_clean_workload_scheduler; 2041 2042 ret = intel_gvt_init_cmd_parser(gvt); 2043 if (ret) 2044 goto out_clean_sched_policy; 2045 2046 ret = init_service_thread(gvt); 2047 if (ret) 2048 goto out_clean_cmd_parser; 2049 2050 ret = intel_gvt_init_vgpu_types(gvt); 2051 if (ret) 2052 goto out_clean_thread; 2053 2054 vgpu = intel_gvt_create_idle_vgpu(gvt); 2055 if (IS_ERR(vgpu)) { 2056 ret = PTR_ERR(vgpu); 2057 gvt_err("failed to create idle vgpu\n"); 2058 goto out_clean_types; 2059 } 2060 gvt->idle_vgpu = vgpu; 2061 2062 intel_gvt_debugfs_init(gvt); 2063 2064 ret = intel_gvt_init_vgpu_type_groups(gvt); 2065 if (ret) 2066 goto out_destroy_idle_vgpu; 2067 2068 ret = mdev_register_device(i915->drm.dev, &intel_vgpu_mdev_driver); 2069 if (ret) 2070 goto out_cleanup_vgpu_type_groups; 2071 2072 gvt_dbg_core("gvt device initialization is done\n"); 2073 return 0; 2074 2075 out_cleanup_vgpu_type_groups: 2076 intel_gvt_cleanup_vgpu_type_groups(gvt); 2077 out_destroy_idle_vgpu: 2078 intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); 2079 intel_gvt_debugfs_clean(gvt); 2080 out_clean_types: 2081 intel_gvt_clean_vgpu_types(gvt); 2082 out_clean_thread: 2083 clean_service_thread(gvt); 2084 out_clean_cmd_parser: 2085 intel_gvt_clean_cmd_parser(gvt); 2086 out_clean_sched_policy: 2087 intel_gvt_clean_sched_policy(gvt); 2088 out_clean_workload_scheduler: 2089 intel_gvt_clean_workload_scheduler(gvt); 2090 out_clean_gtt: 2091 intel_gvt_clean_gtt(gvt); 2092 out_free_firmware: 2093 intel_gvt_free_firmware(gvt); 2094 out_clean_mmio_info: 2095 intel_gvt_clean_mmio_info(gvt); 2096 out_clean_idr: 2097 idr_destroy(&gvt->vgpu_idr); 2098 kfree(gvt); 2099 i915->gvt = NULL; 2100 return ret; 2101 } 2102 2103 static void intel_gvt_pm_resume(struct drm_i915_private *i915) 2104 { 2105 struct intel_gvt *gvt = i915->gvt; 2106 2107 intel_gvt_restore_fence(gvt); 2108 intel_gvt_restore_mmio(gvt); 2109 intel_gvt_restore_ggtt(gvt); 2110 } 2111 2112 static const struct intel_vgpu_ops intel_gvt_vgpu_ops = { 2113 .init_device = intel_gvt_init_device, 2114 .clean_device = intel_gvt_clean_device, 2115 .pm_resume = intel_gvt_pm_resume, 2116 }; 2117 2118 static int __init kvmgt_init(void) 2119 { 2120 int ret; 2121 2122 ret = intel_gvt_set_ops(&intel_gvt_vgpu_ops); 2123 if (ret) 2124 return ret; 2125 2126 ret = mdev_register_driver(&intel_vgpu_mdev_driver); 2127 if (ret) 2128 intel_gvt_clear_ops(&intel_gvt_vgpu_ops); 2129 return ret; 2130 } 2131 2132 static void __exit kvmgt_exit(void) 2133 { 2134 mdev_unregister_driver(&intel_vgpu_mdev_driver); 2135 intel_gvt_clear_ops(&intel_gvt_vgpu_ops); 2136 } 2137 2138 module_init(kvmgt_init); 2139 module_exit(kvmgt_exit); 2140 2141 MODULE_LICENSE("GPL and additional rights"); 2142 MODULE_AUTHOR("Intel Corporation"); 2143