1 /* 2 * KVMGT - the implementation of Intel mediated pass-through framework for KVM 3 * 4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Kevin Tian <kevin.tian@intel.com> 27 * Jike Song <jike.song@intel.com> 28 * Xiaoguang Chen <xiaoguang.chen@intel.com> 29 * Eddie Dong <eddie.dong@intel.com> 30 * 31 * Contributors: 32 * Niu Bing <bing.niu@intel.com> 33 * Zhi Wang <zhi.a.wang@intel.com> 34 */ 35 36 #include <linux/init.h> 37 #include <linux/device.h> 38 #include <linux/mm.h> 39 #include <linux/kthread.h> 40 #include <linux/sched/mm.h> 41 #include <linux/types.h> 42 #include <linux/list.h> 43 #include <linux/rbtree.h> 44 #include <linux/spinlock.h> 45 #include <linux/eventfd.h> 46 #include <linux/uuid.h> 47 #include <linux/mdev.h> 48 #include <linux/debugfs.h> 49 50 #include <linux/nospec.h> 51 52 #include <drm/drm_edid.h> 53 54 #include "i915_drv.h" 55 #include "intel_gvt.h" 56 #include "gvt.h" 57 58 MODULE_IMPORT_NS(DMA_BUF); 59 MODULE_IMPORT_NS(I915_GVT); 60 61 /* helper macros copied from vfio-pci */ 62 #define VFIO_PCI_OFFSET_SHIFT 40 63 #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) 64 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) 65 #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) 66 67 #define EDID_BLOB_OFFSET (PAGE_SIZE/2) 68 69 #define OPREGION_SIGNATURE "IntelGraphicsMem" 70 71 struct vfio_region; 72 struct intel_vgpu_regops { 73 size_t (*rw)(struct intel_vgpu *vgpu, char *buf, 74 size_t count, loff_t *ppos, bool iswrite); 75 void (*release)(struct intel_vgpu *vgpu, 76 struct vfio_region *region); 77 }; 78 79 struct vfio_region { 80 u32 type; 81 u32 subtype; 82 size_t size; 83 u32 flags; 84 const struct intel_vgpu_regops *ops; 85 void *data; 86 }; 87 88 struct vfio_edid_region { 89 struct vfio_region_gfx_edid vfio_edid_regs; 90 void *edid_blob; 91 }; 92 93 struct kvmgt_pgfn { 94 gfn_t gfn; 95 struct hlist_node hnode; 96 }; 97 98 struct gvt_dma { 99 struct intel_vgpu *vgpu; 100 struct rb_node gfn_node; 101 struct rb_node dma_addr_node; 102 gfn_t gfn; 103 dma_addr_t dma_addr; 104 unsigned long size; 105 struct kref ref; 106 }; 107 108 #define vfio_dev_to_vgpu(vfio_dev) \ 109 container_of((vfio_dev), struct intel_vgpu, vfio_device) 110 111 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, 112 const u8 *val, int len, 113 struct kvm_page_track_notifier_node *node); 114 static void kvmgt_page_track_flush_slot(struct kvm *kvm, 115 struct kvm_memory_slot *slot, 116 struct kvm_page_track_notifier_node *node); 117 118 static ssize_t available_instances_show(struct mdev_type *mtype, 119 struct mdev_type_attribute *attr, 120 char *buf) 121 { 122 struct intel_vgpu_type *type; 123 unsigned int num = 0; 124 struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; 125 126 type = &gvt->types[mtype_get_type_group_id(mtype)]; 127 if (!type) 128 num = 0; 129 else 130 num = type->avail_instance; 131 132 return sprintf(buf, "%u\n", num); 133 } 134 135 static ssize_t device_api_show(struct mdev_type *mtype, 136 struct mdev_type_attribute *attr, char *buf) 137 { 138 return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); 139 } 140 141 static ssize_t description_show(struct mdev_type *mtype, 142 struct mdev_type_attribute *attr, char *buf) 143 { 144 struct intel_vgpu_type *type; 145 struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; 146 147 type = &gvt->types[mtype_get_type_group_id(mtype)]; 148 if (!type) 149 return 0; 150 151 return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" 152 "fence: %d\nresolution: %s\n" 153 "weight: %d\n", 154 BYTES_TO_MB(type->low_gm_size), 155 BYTES_TO_MB(type->high_gm_size), 156 type->fence, vgpu_edid_str(type->resolution), 157 type->weight); 158 } 159 160 static ssize_t name_show(struct mdev_type *mtype, 161 struct mdev_type_attribute *attr, char *buf) 162 { 163 struct intel_vgpu_type *type; 164 struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; 165 166 type = &gvt->types[mtype_get_type_group_id(mtype)]; 167 if (!type) 168 return 0; 169 170 return sprintf(buf, "%s\n", type->name); 171 } 172 173 static MDEV_TYPE_ATTR_RO(available_instances); 174 static MDEV_TYPE_ATTR_RO(device_api); 175 static MDEV_TYPE_ATTR_RO(description); 176 static MDEV_TYPE_ATTR_RO(name); 177 178 static struct attribute *gvt_type_attrs[] = { 179 &mdev_type_attr_available_instances.attr, 180 &mdev_type_attr_device_api.attr, 181 &mdev_type_attr_description.attr, 182 &mdev_type_attr_name.attr, 183 NULL, 184 }; 185 186 static struct attribute_group *gvt_vgpu_type_groups[] = { 187 [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, 188 }; 189 190 static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) 191 { 192 int i, j; 193 struct intel_vgpu_type *type; 194 struct attribute_group *group; 195 196 for (i = 0; i < gvt->num_types; i++) { 197 type = &gvt->types[i]; 198 199 group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); 200 if (!group) 201 goto unwind; 202 203 group->name = type->name; 204 group->attrs = gvt_type_attrs; 205 gvt_vgpu_type_groups[i] = group; 206 } 207 208 return 0; 209 210 unwind: 211 for (j = 0; j < i; j++) { 212 group = gvt_vgpu_type_groups[j]; 213 kfree(group); 214 } 215 216 return -ENOMEM; 217 } 218 219 static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) 220 { 221 int i; 222 struct attribute_group *group; 223 224 for (i = 0; i < gvt->num_types; i++) { 225 group = gvt_vgpu_type_groups[i]; 226 gvt_vgpu_type_groups[i] = NULL; 227 kfree(group); 228 } 229 } 230 231 static void intel_vgpu_release_work(struct work_struct *work); 232 233 static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, 234 unsigned long size) 235 { 236 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 237 int total_pages; 238 int npage; 239 int ret; 240 241 total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; 242 243 for (npage = 0; npage < total_pages; npage++) { 244 unsigned long cur_gfn = gfn + npage; 245 246 ret = vfio_group_unpin_pages(vgpu->vfio_group, &cur_gfn, 1); 247 drm_WARN_ON(&i915->drm, ret != 1); 248 } 249 } 250 251 /* Pin a normal or compound guest page for dma. */ 252 static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, 253 unsigned long size, struct page **page) 254 { 255 unsigned long base_pfn = 0; 256 int total_pages; 257 int npage; 258 int ret; 259 260 total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; 261 /* 262 * We pin the pages one-by-one to avoid allocating a big arrary 263 * on stack to hold pfns. 264 */ 265 for (npage = 0; npage < total_pages; npage++) { 266 unsigned long cur_gfn = gfn + npage; 267 unsigned long pfn; 268 269 ret = vfio_group_pin_pages(vgpu->vfio_group, &cur_gfn, 1, 270 IOMMU_READ | IOMMU_WRITE, &pfn); 271 if (ret != 1) { 272 gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", 273 cur_gfn, ret); 274 goto err; 275 } 276 277 if (!pfn_valid(pfn)) { 278 gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); 279 npage++; 280 ret = -EFAULT; 281 goto err; 282 } 283 284 if (npage == 0) 285 base_pfn = pfn; 286 else if (base_pfn + npage != pfn) { 287 gvt_vgpu_err("The pages are not continuous\n"); 288 ret = -EINVAL; 289 npage++; 290 goto err; 291 } 292 } 293 294 *page = pfn_to_page(base_pfn); 295 return 0; 296 err: 297 gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); 298 return ret; 299 } 300 301 static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, 302 dma_addr_t *dma_addr, unsigned long size) 303 { 304 struct device *dev = vgpu->gvt->gt->i915->drm.dev; 305 struct page *page = NULL; 306 int ret; 307 308 ret = gvt_pin_guest_page(vgpu, gfn, size, &page); 309 if (ret) 310 return ret; 311 312 /* Setup DMA mapping. */ 313 *dma_addr = dma_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL); 314 if (dma_mapping_error(dev, *dma_addr)) { 315 gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n", 316 page_to_pfn(page), ret); 317 gvt_unpin_guest_page(vgpu, gfn, size); 318 return -ENOMEM; 319 } 320 321 return 0; 322 } 323 324 static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, 325 dma_addr_t dma_addr, unsigned long size) 326 { 327 struct device *dev = vgpu->gvt->gt->i915->drm.dev; 328 329 dma_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL); 330 gvt_unpin_guest_page(vgpu, gfn, size); 331 } 332 333 static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, 334 dma_addr_t dma_addr) 335 { 336 struct rb_node *node = vgpu->dma_addr_cache.rb_node; 337 struct gvt_dma *itr; 338 339 while (node) { 340 itr = rb_entry(node, struct gvt_dma, dma_addr_node); 341 342 if (dma_addr < itr->dma_addr) 343 node = node->rb_left; 344 else if (dma_addr > itr->dma_addr) 345 node = node->rb_right; 346 else 347 return itr; 348 } 349 return NULL; 350 } 351 352 static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) 353 { 354 struct rb_node *node = vgpu->gfn_cache.rb_node; 355 struct gvt_dma *itr; 356 357 while (node) { 358 itr = rb_entry(node, struct gvt_dma, gfn_node); 359 360 if (gfn < itr->gfn) 361 node = node->rb_left; 362 else if (gfn > itr->gfn) 363 node = node->rb_right; 364 else 365 return itr; 366 } 367 return NULL; 368 } 369 370 static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, 371 dma_addr_t dma_addr, unsigned long size) 372 { 373 struct gvt_dma *new, *itr; 374 struct rb_node **link, *parent = NULL; 375 376 new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); 377 if (!new) 378 return -ENOMEM; 379 380 new->vgpu = vgpu; 381 new->gfn = gfn; 382 new->dma_addr = dma_addr; 383 new->size = size; 384 kref_init(&new->ref); 385 386 /* gfn_cache maps gfn to struct gvt_dma. */ 387 link = &vgpu->gfn_cache.rb_node; 388 while (*link) { 389 parent = *link; 390 itr = rb_entry(parent, struct gvt_dma, gfn_node); 391 392 if (gfn < itr->gfn) 393 link = &parent->rb_left; 394 else 395 link = &parent->rb_right; 396 } 397 rb_link_node(&new->gfn_node, parent, link); 398 rb_insert_color(&new->gfn_node, &vgpu->gfn_cache); 399 400 /* dma_addr_cache maps dma addr to struct gvt_dma. */ 401 parent = NULL; 402 link = &vgpu->dma_addr_cache.rb_node; 403 while (*link) { 404 parent = *link; 405 itr = rb_entry(parent, struct gvt_dma, dma_addr_node); 406 407 if (dma_addr < itr->dma_addr) 408 link = &parent->rb_left; 409 else 410 link = &parent->rb_right; 411 } 412 rb_link_node(&new->dma_addr_node, parent, link); 413 rb_insert_color(&new->dma_addr_node, &vgpu->dma_addr_cache); 414 415 vgpu->nr_cache_entries++; 416 return 0; 417 } 418 419 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, 420 struct gvt_dma *entry) 421 { 422 rb_erase(&entry->gfn_node, &vgpu->gfn_cache); 423 rb_erase(&entry->dma_addr_node, &vgpu->dma_addr_cache); 424 kfree(entry); 425 vgpu->nr_cache_entries--; 426 } 427 428 static void gvt_cache_destroy(struct intel_vgpu *vgpu) 429 { 430 struct gvt_dma *dma; 431 struct rb_node *node = NULL; 432 433 for (;;) { 434 mutex_lock(&vgpu->cache_lock); 435 node = rb_first(&vgpu->gfn_cache); 436 if (!node) { 437 mutex_unlock(&vgpu->cache_lock); 438 break; 439 } 440 dma = rb_entry(node, struct gvt_dma, gfn_node); 441 gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size); 442 __gvt_cache_remove_entry(vgpu, dma); 443 mutex_unlock(&vgpu->cache_lock); 444 } 445 } 446 447 static void gvt_cache_init(struct intel_vgpu *vgpu) 448 { 449 vgpu->gfn_cache = RB_ROOT; 450 vgpu->dma_addr_cache = RB_ROOT; 451 vgpu->nr_cache_entries = 0; 452 mutex_init(&vgpu->cache_lock); 453 } 454 455 static void kvmgt_protect_table_init(struct intel_vgpu *info) 456 { 457 hash_init(info->ptable); 458 } 459 460 static void kvmgt_protect_table_destroy(struct intel_vgpu *info) 461 { 462 struct kvmgt_pgfn *p; 463 struct hlist_node *tmp; 464 int i; 465 466 hash_for_each_safe(info->ptable, i, tmp, p, hnode) { 467 hash_del(&p->hnode); 468 kfree(p); 469 } 470 } 471 472 static struct kvmgt_pgfn * 473 __kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn) 474 { 475 struct kvmgt_pgfn *p, *res = NULL; 476 477 hash_for_each_possible(info->ptable, p, hnode, gfn) { 478 if (gfn == p->gfn) { 479 res = p; 480 break; 481 } 482 } 483 484 return res; 485 } 486 487 static bool kvmgt_gfn_is_write_protected(struct intel_vgpu *info, gfn_t gfn) 488 { 489 struct kvmgt_pgfn *p; 490 491 p = __kvmgt_protect_table_find(info, gfn); 492 return !!p; 493 } 494 495 static void kvmgt_protect_table_add(struct intel_vgpu *info, gfn_t gfn) 496 { 497 struct kvmgt_pgfn *p; 498 499 if (kvmgt_gfn_is_write_protected(info, gfn)) 500 return; 501 502 p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC); 503 if (WARN(!p, "gfn: 0x%llx\n", gfn)) 504 return; 505 506 p->gfn = gfn; 507 hash_add(info->ptable, &p->hnode, gfn); 508 } 509 510 static void kvmgt_protect_table_del(struct intel_vgpu *info, gfn_t gfn) 511 { 512 struct kvmgt_pgfn *p; 513 514 p = __kvmgt_protect_table_find(info, gfn); 515 if (p) { 516 hash_del(&p->hnode); 517 kfree(p); 518 } 519 } 520 521 static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf, 522 size_t count, loff_t *ppos, bool iswrite) 523 { 524 unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - 525 VFIO_PCI_NUM_REGIONS; 526 void *base = vgpu->region[i].data; 527 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 528 529 530 if (pos >= vgpu->region[i].size || iswrite) { 531 gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n"); 532 return -EINVAL; 533 } 534 count = min(count, (size_t)(vgpu->region[i].size - pos)); 535 memcpy(buf, base + pos, count); 536 537 return count; 538 } 539 540 static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu, 541 struct vfio_region *region) 542 { 543 } 544 545 static const struct intel_vgpu_regops intel_vgpu_regops_opregion = { 546 .rw = intel_vgpu_reg_rw_opregion, 547 .release = intel_vgpu_reg_release_opregion, 548 }; 549 550 static int handle_edid_regs(struct intel_vgpu *vgpu, 551 struct vfio_edid_region *region, char *buf, 552 size_t count, u16 offset, bool is_write) 553 { 554 struct vfio_region_gfx_edid *regs = ®ion->vfio_edid_regs; 555 unsigned int data; 556 557 if (offset + count > sizeof(*regs)) 558 return -EINVAL; 559 560 if (count != 4) 561 return -EINVAL; 562 563 if (is_write) { 564 data = *((unsigned int *)buf); 565 switch (offset) { 566 case offsetof(struct vfio_region_gfx_edid, link_state): 567 if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) { 568 if (!drm_edid_block_valid( 569 (u8 *)region->edid_blob, 570 0, 571 true, 572 NULL)) { 573 gvt_vgpu_err("invalid EDID blob\n"); 574 return -EINVAL; 575 } 576 intel_vgpu_emulate_hotplug(vgpu, true); 577 } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN) 578 intel_vgpu_emulate_hotplug(vgpu, false); 579 else { 580 gvt_vgpu_err("invalid EDID link state %d\n", 581 regs->link_state); 582 return -EINVAL; 583 } 584 regs->link_state = data; 585 break; 586 case offsetof(struct vfio_region_gfx_edid, edid_size): 587 if (data > regs->edid_max_size) { 588 gvt_vgpu_err("EDID size is bigger than %d!\n", 589 regs->edid_max_size); 590 return -EINVAL; 591 } 592 regs->edid_size = data; 593 break; 594 default: 595 /* read-only regs */ 596 gvt_vgpu_err("write read-only EDID region at offset %d\n", 597 offset); 598 return -EPERM; 599 } 600 } else { 601 memcpy(buf, (char *)regs + offset, count); 602 } 603 604 return count; 605 } 606 607 static int handle_edid_blob(struct vfio_edid_region *region, char *buf, 608 size_t count, u16 offset, bool is_write) 609 { 610 if (offset + count > region->vfio_edid_regs.edid_size) 611 return -EINVAL; 612 613 if (is_write) 614 memcpy(region->edid_blob + offset, buf, count); 615 else 616 memcpy(buf, region->edid_blob + offset, count); 617 618 return count; 619 } 620 621 static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf, 622 size_t count, loff_t *ppos, bool iswrite) 623 { 624 int ret; 625 unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - 626 VFIO_PCI_NUM_REGIONS; 627 struct vfio_edid_region *region = vgpu->region[i].data; 628 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 629 630 if (pos < region->vfio_edid_regs.edid_offset) { 631 ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite); 632 } else { 633 pos -= EDID_BLOB_OFFSET; 634 ret = handle_edid_blob(region, buf, count, pos, iswrite); 635 } 636 637 if (ret < 0) 638 gvt_vgpu_err("failed to access EDID region\n"); 639 640 return ret; 641 } 642 643 static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu, 644 struct vfio_region *region) 645 { 646 kfree(region->data); 647 } 648 649 static const struct intel_vgpu_regops intel_vgpu_regops_edid = { 650 .rw = intel_vgpu_reg_rw_edid, 651 .release = intel_vgpu_reg_release_edid, 652 }; 653 654 static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, 655 unsigned int type, unsigned int subtype, 656 const struct intel_vgpu_regops *ops, 657 size_t size, u32 flags, void *data) 658 { 659 struct vfio_region *region; 660 661 region = krealloc(vgpu->region, 662 (vgpu->num_regions + 1) * sizeof(*region), 663 GFP_KERNEL); 664 if (!region) 665 return -ENOMEM; 666 667 vgpu->region = region; 668 vgpu->region[vgpu->num_regions].type = type; 669 vgpu->region[vgpu->num_regions].subtype = subtype; 670 vgpu->region[vgpu->num_regions].ops = ops; 671 vgpu->region[vgpu->num_regions].size = size; 672 vgpu->region[vgpu->num_regions].flags = flags; 673 vgpu->region[vgpu->num_regions].data = data; 674 vgpu->num_regions++; 675 return 0; 676 } 677 678 int intel_gvt_set_opregion(struct intel_vgpu *vgpu) 679 { 680 void *base; 681 int ret; 682 683 /* Each vgpu has its own opregion, although VFIO would create another 684 * one later. This one is used to expose opregion to VFIO. And the 685 * other one created by VFIO later, is used by guest actually. 686 */ 687 base = vgpu_opregion(vgpu)->va; 688 if (!base) 689 return -ENOMEM; 690 691 if (memcmp(base, OPREGION_SIGNATURE, 16)) { 692 memunmap(base); 693 return -EINVAL; 694 } 695 696 ret = intel_vgpu_register_reg(vgpu, 697 PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, 698 VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, 699 &intel_vgpu_regops_opregion, OPREGION_SIZE, 700 VFIO_REGION_INFO_FLAG_READ, base); 701 702 return ret; 703 } 704 705 int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num) 706 { 707 struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); 708 struct vfio_edid_region *base; 709 int ret; 710 711 base = kzalloc(sizeof(*base), GFP_KERNEL); 712 if (!base) 713 return -ENOMEM; 714 715 /* TODO: Add multi-port and EDID extension block support */ 716 base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET; 717 base->vfio_edid_regs.edid_max_size = EDID_SIZE; 718 base->vfio_edid_regs.edid_size = EDID_SIZE; 719 base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id); 720 base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id); 721 base->edid_blob = port->edid->edid_block; 722 723 ret = intel_vgpu_register_reg(vgpu, 724 VFIO_REGION_TYPE_GFX, 725 VFIO_REGION_SUBTYPE_GFX_EDID, 726 &intel_vgpu_regops_edid, EDID_SIZE, 727 VFIO_REGION_INFO_FLAG_READ | 728 VFIO_REGION_INFO_FLAG_WRITE | 729 VFIO_REGION_INFO_FLAG_CAPS, base); 730 731 return ret; 732 } 733 734 static int intel_vgpu_iommu_notifier(struct notifier_block *nb, 735 unsigned long action, void *data) 736 { 737 struct intel_vgpu *vgpu = 738 container_of(nb, struct intel_vgpu, iommu_notifier); 739 740 if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { 741 struct vfio_iommu_type1_dma_unmap *unmap = data; 742 struct gvt_dma *entry; 743 unsigned long iov_pfn, end_iov_pfn; 744 745 iov_pfn = unmap->iova >> PAGE_SHIFT; 746 end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; 747 748 mutex_lock(&vgpu->cache_lock); 749 for (; iov_pfn < end_iov_pfn; iov_pfn++) { 750 entry = __gvt_cache_find_gfn(vgpu, iov_pfn); 751 if (!entry) 752 continue; 753 754 gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, 755 entry->size); 756 __gvt_cache_remove_entry(vgpu, entry); 757 } 758 mutex_unlock(&vgpu->cache_lock); 759 } 760 761 return NOTIFY_OK; 762 } 763 764 static int intel_vgpu_group_notifier(struct notifier_block *nb, 765 unsigned long action, void *data) 766 { 767 struct intel_vgpu *vgpu = 768 container_of(nb, struct intel_vgpu, group_notifier); 769 770 /* the only action we care about */ 771 if (action == VFIO_GROUP_NOTIFY_SET_KVM) { 772 vgpu->kvm = data; 773 774 if (!data) 775 schedule_work(&vgpu->release_work); 776 } 777 778 return NOTIFY_OK; 779 } 780 781 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) 782 { 783 struct intel_vgpu *itr; 784 int id; 785 bool ret = false; 786 787 mutex_lock(&vgpu->gvt->lock); 788 for_each_active_vgpu(vgpu->gvt, itr, id) { 789 if (!itr->attached) 790 continue; 791 792 if (vgpu->kvm == itr->kvm) { 793 ret = true; 794 goto out; 795 } 796 } 797 out: 798 mutex_unlock(&vgpu->gvt->lock); 799 return ret; 800 } 801 802 static int intel_vgpu_open_device(struct vfio_device *vfio_dev) 803 { 804 struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); 805 unsigned long events; 806 int ret; 807 struct vfio_group *vfio_group; 808 809 vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; 810 vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier; 811 812 events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; 813 ret = vfio_register_notifier(vfio_dev->dev, VFIO_IOMMU_NOTIFY, &events, 814 &vgpu->iommu_notifier); 815 if (ret != 0) { 816 gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", 817 ret); 818 goto out; 819 } 820 821 events = VFIO_GROUP_NOTIFY_SET_KVM; 822 ret = vfio_register_notifier(vfio_dev->dev, VFIO_GROUP_NOTIFY, &events, 823 &vgpu->group_notifier); 824 if (ret != 0) { 825 gvt_vgpu_err("vfio_register_notifier for group failed: %d\n", 826 ret); 827 goto undo_iommu; 828 } 829 830 vfio_group = 831 vfio_group_get_external_user_from_dev(vgpu->vfio_device.dev); 832 if (IS_ERR_OR_NULL(vfio_group)) { 833 ret = !vfio_group ? -EFAULT : PTR_ERR(vfio_group); 834 gvt_vgpu_err("vfio_group_get_external_user_from_dev failed\n"); 835 goto undo_register; 836 } 837 vgpu->vfio_group = vfio_group; 838 839 ret = -EEXIST; 840 if (vgpu->attached) 841 goto undo_group; 842 843 ret = -ESRCH; 844 if (!vgpu->kvm || vgpu->kvm->mm != current->mm) { 845 gvt_vgpu_err("KVM is required to use Intel vGPU\n"); 846 goto undo_group; 847 } 848 849 ret = -EEXIST; 850 if (__kvmgt_vgpu_exist(vgpu)) 851 goto undo_group; 852 853 vgpu->attached = true; 854 kvm_get_kvm(vgpu->kvm); 855 856 kvmgt_protect_table_init(vgpu); 857 gvt_cache_init(vgpu); 858 859 vgpu->track_node.track_write = kvmgt_page_track_write; 860 vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; 861 kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node); 862 863 debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs, 864 &vgpu->nr_cache_entries); 865 866 intel_gvt_activate_vgpu(vgpu); 867 868 atomic_set(&vgpu->released, 0); 869 return 0; 870 871 undo_group: 872 vfio_group_put_external_user(vgpu->vfio_group); 873 vgpu->vfio_group = NULL; 874 875 undo_register: 876 vfio_unregister_notifier(vfio_dev->dev, VFIO_GROUP_NOTIFY, 877 &vgpu->group_notifier); 878 879 undo_iommu: 880 vfio_unregister_notifier(vfio_dev->dev, VFIO_IOMMU_NOTIFY, 881 &vgpu->iommu_notifier); 882 out: 883 return ret; 884 } 885 886 static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) 887 { 888 struct eventfd_ctx *trigger; 889 890 trigger = vgpu->msi_trigger; 891 if (trigger) { 892 eventfd_ctx_put(trigger); 893 vgpu->msi_trigger = NULL; 894 } 895 } 896 897 static void __intel_vgpu_release(struct intel_vgpu *vgpu) 898 { 899 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 900 int ret; 901 902 if (!vgpu->attached) 903 return; 904 905 if (atomic_cmpxchg(&vgpu->released, 0, 1)) 906 return; 907 908 intel_gvt_release_vgpu(vgpu); 909 910 ret = vfio_unregister_notifier(vgpu->vfio_device.dev, VFIO_IOMMU_NOTIFY, 911 &vgpu->iommu_notifier); 912 drm_WARN(&i915->drm, ret, 913 "vfio_unregister_notifier for iommu failed: %d\n", ret); 914 915 ret = vfio_unregister_notifier(vgpu->vfio_device.dev, VFIO_GROUP_NOTIFY, 916 &vgpu->group_notifier); 917 drm_WARN(&i915->drm, ret, 918 "vfio_unregister_notifier for group failed: %d\n", ret); 919 920 debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); 921 922 kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node); 923 kvm_put_kvm(vgpu->kvm); 924 kvmgt_protect_table_destroy(vgpu); 925 gvt_cache_destroy(vgpu); 926 927 intel_vgpu_release_msi_eventfd_ctx(vgpu); 928 vfio_group_put_external_user(vgpu->vfio_group); 929 930 vgpu->kvm = NULL; 931 vgpu->attached = false; 932 } 933 934 static void intel_vgpu_close_device(struct vfio_device *vfio_dev) 935 { 936 __intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev)); 937 } 938 939 static void intel_vgpu_release_work(struct work_struct *work) 940 { 941 struct intel_vgpu *vgpu = 942 container_of(work, struct intel_vgpu, release_work); 943 944 __intel_vgpu_release(vgpu); 945 } 946 947 static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) 948 { 949 u32 start_lo, start_hi; 950 u32 mem_type; 951 952 start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) & 953 PCI_BASE_ADDRESS_MEM_MASK; 954 mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) & 955 PCI_BASE_ADDRESS_MEM_TYPE_MASK; 956 957 switch (mem_type) { 958 case PCI_BASE_ADDRESS_MEM_TYPE_64: 959 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space 960 + bar + 4)); 961 break; 962 case PCI_BASE_ADDRESS_MEM_TYPE_32: 963 case PCI_BASE_ADDRESS_MEM_TYPE_1M: 964 /* 1M mem BAR treated as 32-bit BAR */ 965 default: 966 /* mem unknown type treated as 32-bit BAR */ 967 start_hi = 0; 968 break; 969 } 970 971 return ((u64)start_hi << 32) | start_lo; 972 } 973 974 static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off, 975 void *buf, unsigned int count, bool is_write) 976 { 977 u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar); 978 int ret; 979 980 if (is_write) 981 ret = intel_vgpu_emulate_mmio_write(vgpu, 982 bar_start + off, buf, count); 983 else 984 ret = intel_vgpu_emulate_mmio_read(vgpu, 985 bar_start + off, buf, count); 986 return ret; 987 } 988 989 static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off) 990 { 991 return off >= vgpu_aperture_offset(vgpu) && 992 off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu); 993 } 994 995 static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off, 996 void *buf, unsigned long count, bool is_write) 997 { 998 void __iomem *aperture_va; 999 1000 if (!intel_vgpu_in_aperture(vgpu, off) || 1001 !intel_vgpu_in_aperture(vgpu, off + count)) { 1002 gvt_vgpu_err("Invalid aperture offset %llu\n", off); 1003 return -EINVAL; 1004 } 1005 1006 aperture_va = io_mapping_map_wc(&vgpu->gvt->gt->ggtt->iomap, 1007 ALIGN_DOWN(off, PAGE_SIZE), 1008 count + offset_in_page(off)); 1009 if (!aperture_va) 1010 return -EIO; 1011 1012 if (is_write) 1013 memcpy_toio(aperture_va + offset_in_page(off), buf, count); 1014 else 1015 memcpy_fromio(buf, aperture_va + offset_in_page(off), count); 1016 1017 io_mapping_unmap(aperture_va); 1018 1019 return 0; 1020 } 1021 1022 static ssize_t intel_vgpu_rw(struct intel_vgpu *vgpu, char *buf, 1023 size_t count, loff_t *ppos, bool is_write) 1024 { 1025 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 1026 u64 pos = *ppos & VFIO_PCI_OFFSET_MASK; 1027 int ret = -EINVAL; 1028 1029 1030 if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) { 1031 gvt_vgpu_err("invalid index: %u\n", index); 1032 return -EINVAL; 1033 } 1034 1035 switch (index) { 1036 case VFIO_PCI_CONFIG_REGION_INDEX: 1037 if (is_write) 1038 ret = intel_vgpu_emulate_cfg_write(vgpu, pos, 1039 buf, count); 1040 else 1041 ret = intel_vgpu_emulate_cfg_read(vgpu, pos, 1042 buf, count); 1043 break; 1044 case VFIO_PCI_BAR0_REGION_INDEX: 1045 ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos, 1046 buf, count, is_write); 1047 break; 1048 case VFIO_PCI_BAR2_REGION_INDEX: 1049 ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write); 1050 break; 1051 case VFIO_PCI_BAR1_REGION_INDEX: 1052 case VFIO_PCI_BAR3_REGION_INDEX: 1053 case VFIO_PCI_BAR4_REGION_INDEX: 1054 case VFIO_PCI_BAR5_REGION_INDEX: 1055 case VFIO_PCI_VGA_REGION_INDEX: 1056 case VFIO_PCI_ROM_REGION_INDEX: 1057 break; 1058 default: 1059 if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) 1060 return -EINVAL; 1061 1062 index -= VFIO_PCI_NUM_REGIONS; 1063 return vgpu->region[index].ops->rw(vgpu, buf, count, 1064 ppos, is_write); 1065 } 1066 1067 return ret == 0 ? count : ret; 1068 } 1069 1070 static bool gtt_entry(struct intel_vgpu *vgpu, loff_t *ppos) 1071 { 1072 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 1073 struct intel_gvt *gvt = vgpu->gvt; 1074 int offset; 1075 1076 /* Only allow MMIO GGTT entry access */ 1077 if (index != PCI_BASE_ADDRESS_0) 1078 return false; 1079 1080 offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) - 1081 intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0); 1082 1083 return (offset >= gvt->device_info.gtt_start_offset && 1084 offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ? 1085 true : false; 1086 } 1087 1088 static ssize_t intel_vgpu_read(struct vfio_device *vfio_dev, char __user *buf, 1089 size_t count, loff_t *ppos) 1090 { 1091 struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); 1092 unsigned int done = 0; 1093 int ret; 1094 1095 while (count) { 1096 size_t filled; 1097 1098 /* Only support GGTT entry 8 bytes read */ 1099 if (count >= 8 && !(*ppos % 8) && 1100 gtt_entry(vgpu, ppos)) { 1101 u64 val; 1102 1103 ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), 1104 ppos, false); 1105 if (ret <= 0) 1106 goto read_err; 1107 1108 if (copy_to_user(buf, &val, sizeof(val))) 1109 goto read_err; 1110 1111 filled = 8; 1112 } else if (count >= 4 && !(*ppos % 4)) { 1113 u32 val; 1114 1115 ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), 1116 ppos, false); 1117 if (ret <= 0) 1118 goto read_err; 1119 1120 if (copy_to_user(buf, &val, sizeof(val))) 1121 goto read_err; 1122 1123 filled = 4; 1124 } else if (count >= 2 && !(*ppos % 2)) { 1125 u16 val; 1126 1127 ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), 1128 ppos, false); 1129 if (ret <= 0) 1130 goto read_err; 1131 1132 if (copy_to_user(buf, &val, sizeof(val))) 1133 goto read_err; 1134 1135 filled = 2; 1136 } else { 1137 u8 val; 1138 1139 ret = intel_vgpu_rw(vgpu, &val, sizeof(val), ppos, 1140 false); 1141 if (ret <= 0) 1142 goto read_err; 1143 1144 if (copy_to_user(buf, &val, sizeof(val))) 1145 goto read_err; 1146 1147 filled = 1; 1148 } 1149 1150 count -= filled; 1151 done += filled; 1152 *ppos += filled; 1153 buf += filled; 1154 } 1155 1156 return done; 1157 1158 read_err: 1159 return -EFAULT; 1160 } 1161 1162 static ssize_t intel_vgpu_write(struct vfio_device *vfio_dev, 1163 const char __user *buf, 1164 size_t count, loff_t *ppos) 1165 { 1166 struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); 1167 unsigned int done = 0; 1168 int ret; 1169 1170 while (count) { 1171 size_t filled; 1172 1173 /* Only support GGTT entry 8 bytes write */ 1174 if (count >= 8 && !(*ppos % 8) && 1175 gtt_entry(vgpu, ppos)) { 1176 u64 val; 1177 1178 if (copy_from_user(&val, buf, sizeof(val))) 1179 goto write_err; 1180 1181 ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), 1182 ppos, true); 1183 if (ret <= 0) 1184 goto write_err; 1185 1186 filled = 8; 1187 } else if (count >= 4 && !(*ppos % 4)) { 1188 u32 val; 1189 1190 if (copy_from_user(&val, buf, sizeof(val))) 1191 goto write_err; 1192 1193 ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), 1194 ppos, true); 1195 if (ret <= 0) 1196 goto write_err; 1197 1198 filled = 4; 1199 } else if (count >= 2 && !(*ppos % 2)) { 1200 u16 val; 1201 1202 if (copy_from_user(&val, buf, sizeof(val))) 1203 goto write_err; 1204 1205 ret = intel_vgpu_rw(vgpu, (char *)&val, 1206 sizeof(val), ppos, true); 1207 if (ret <= 0) 1208 goto write_err; 1209 1210 filled = 2; 1211 } else { 1212 u8 val; 1213 1214 if (copy_from_user(&val, buf, sizeof(val))) 1215 goto write_err; 1216 1217 ret = intel_vgpu_rw(vgpu, &val, sizeof(val), 1218 ppos, true); 1219 if (ret <= 0) 1220 goto write_err; 1221 1222 filled = 1; 1223 } 1224 1225 count -= filled; 1226 done += filled; 1227 *ppos += filled; 1228 buf += filled; 1229 } 1230 1231 return done; 1232 write_err: 1233 return -EFAULT; 1234 } 1235 1236 static int intel_vgpu_mmap(struct vfio_device *vfio_dev, 1237 struct vm_area_struct *vma) 1238 { 1239 struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); 1240 unsigned int index; 1241 u64 virtaddr; 1242 unsigned long req_size, pgoff, req_start; 1243 pgprot_t pg_prot; 1244 1245 index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); 1246 if (index >= VFIO_PCI_ROM_REGION_INDEX) 1247 return -EINVAL; 1248 1249 if (vma->vm_end < vma->vm_start) 1250 return -EINVAL; 1251 if ((vma->vm_flags & VM_SHARED) == 0) 1252 return -EINVAL; 1253 if (index != VFIO_PCI_BAR2_REGION_INDEX) 1254 return -EINVAL; 1255 1256 pg_prot = vma->vm_page_prot; 1257 virtaddr = vma->vm_start; 1258 req_size = vma->vm_end - vma->vm_start; 1259 pgoff = vma->vm_pgoff & 1260 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); 1261 req_start = pgoff << PAGE_SHIFT; 1262 1263 if (!intel_vgpu_in_aperture(vgpu, req_start)) 1264 return -EINVAL; 1265 if (req_start + req_size > 1266 vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu)) 1267 return -EINVAL; 1268 1269 pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff; 1270 1271 return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot); 1272 } 1273 1274 static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type) 1275 { 1276 if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX) 1277 return 1; 1278 1279 return 0; 1280 } 1281 1282 static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu, 1283 unsigned int index, unsigned int start, 1284 unsigned int count, u32 flags, 1285 void *data) 1286 { 1287 return 0; 1288 } 1289 1290 static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu, 1291 unsigned int index, unsigned int start, 1292 unsigned int count, u32 flags, void *data) 1293 { 1294 return 0; 1295 } 1296 1297 static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu, 1298 unsigned int index, unsigned int start, unsigned int count, 1299 u32 flags, void *data) 1300 { 1301 return 0; 1302 } 1303 1304 static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu, 1305 unsigned int index, unsigned int start, unsigned int count, 1306 u32 flags, void *data) 1307 { 1308 struct eventfd_ctx *trigger; 1309 1310 if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { 1311 int fd = *(int *)data; 1312 1313 trigger = eventfd_ctx_fdget(fd); 1314 if (IS_ERR(trigger)) { 1315 gvt_vgpu_err("eventfd_ctx_fdget failed\n"); 1316 return PTR_ERR(trigger); 1317 } 1318 vgpu->msi_trigger = trigger; 1319 } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count) 1320 intel_vgpu_release_msi_eventfd_ctx(vgpu); 1321 1322 return 0; 1323 } 1324 1325 static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags, 1326 unsigned int index, unsigned int start, unsigned int count, 1327 void *data) 1328 { 1329 int (*func)(struct intel_vgpu *vgpu, unsigned int index, 1330 unsigned int start, unsigned int count, u32 flags, 1331 void *data) = NULL; 1332 1333 switch (index) { 1334 case VFIO_PCI_INTX_IRQ_INDEX: 1335 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { 1336 case VFIO_IRQ_SET_ACTION_MASK: 1337 func = intel_vgpu_set_intx_mask; 1338 break; 1339 case VFIO_IRQ_SET_ACTION_UNMASK: 1340 func = intel_vgpu_set_intx_unmask; 1341 break; 1342 case VFIO_IRQ_SET_ACTION_TRIGGER: 1343 func = intel_vgpu_set_intx_trigger; 1344 break; 1345 } 1346 break; 1347 case VFIO_PCI_MSI_IRQ_INDEX: 1348 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { 1349 case VFIO_IRQ_SET_ACTION_MASK: 1350 case VFIO_IRQ_SET_ACTION_UNMASK: 1351 /* XXX Need masking support exported */ 1352 break; 1353 case VFIO_IRQ_SET_ACTION_TRIGGER: 1354 func = intel_vgpu_set_msi_trigger; 1355 break; 1356 } 1357 break; 1358 } 1359 1360 if (!func) 1361 return -ENOTTY; 1362 1363 return func(vgpu, index, start, count, flags, data); 1364 } 1365 1366 static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd, 1367 unsigned long arg) 1368 { 1369 struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); 1370 unsigned long minsz; 1371 1372 gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd); 1373 1374 if (cmd == VFIO_DEVICE_GET_INFO) { 1375 struct vfio_device_info info; 1376 1377 minsz = offsetofend(struct vfio_device_info, num_irqs); 1378 1379 if (copy_from_user(&info, (void __user *)arg, minsz)) 1380 return -EFAULT; 1381 1382 if (info.argsz < minsz) 1383 return -EINVAL; 1384 1385 info.flags = VFIO_DEVICE_FLAGS_PCI; 1386 info.flags |= VFIO_DEVICE_FLAGS_RESET; 1387 info.num_regions = VFIO_PCI_NUM_REGIONS + 1388 vgpu->num_regions; 1389 info.num_irqs = VFIO_PCI_NUM_IRQS; 1390 1391 return copy_to_user((void __user *)arg, &info, minsz) ? 1392 -EFAULT : 0; 1393 1394 } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { 1395 struct vfio_region_info info; 1396 struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; 1397 unsigned int i; 1398 int ret; 1399 struct vfio_region_info_cap_sparse_mmap *sparse = NULL; 1400 int nr_areas = 1; 1401 int cap_type_id; 1402 1403 minsz = offsetofend(struct vfio_region_info, offset); 1404 1405 if (copy_from_user(&info, (void __user *)arg, minsz)) 1406 return -EFAULT; 1407 1408 if (info.argsz < minsz) 1409 return -EINVAL; 1410 1411 switch (info.index) { 1412 case VFIO_PCI_CONFIG_REGION_INDEX: 1413 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1414 info.size = vgpu->gvt->device_info.cfg_space_size; 1415 info.flags = VFIO_REGION_INFO_FLAG_READ | 1416 VFIO_REGION_INFO_FLAG_WRITE; 1417 break; 1418 case VFIO_PCI_BAR0_REGION_INDEX: 1419 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1420 info.size = vgpu->cfg_space.bar[info.index].size; 1421 if (!info.size) { 1422 info.flags = 0; 1423 break; 1424 } 1425 1426 info.flags = VFIO_REGION_INFO_FLAG_READ | 1427 VFIO_REGION_INFO_FLAG_WRITE; 1428 break; 1429 case VFIO_PCI_BAR1_REGION_INDEX: 1430 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1431 info.size = 0; 1432 info.flags = 0; 1433 break; 1434 case VFIO_PCI_BAR2_REGION_INDEX: 1435 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1436 info.flags = VFIO_REGION_INFO_FLAG_CAPS | 1437 VFIO_REGION_INFO_FLAG_MMAP | 1438 VFIO_REGION_INFO_FLAG_READ | 1439 VFIO_REGION_INFO_FLAG_WRITE; 1440 info.size = gvt_aperture_sz(vgpu->gvt); 1441 1442 sparse = kzalloc(struct_size(sparse, areas, nr_areas), 1443 GFP_KERNEL); 1444 if (!sparse) 1445 return -ENOMEM; 1446 1447 sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; 1448 sparse->header.version = 1; 1449 sparse->nr_areas = nr_areas; 1450 cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; 1451 sparse->areas[0].offset = 1452 PAGE_ALIGN(vgpu_aperture_offset(vgpu)); 1453 sparse->areas[0].size = vgpu_aperture_sz(vgpu); 1454 break; 1455 1456 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: 1457 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1458 info.size = 0; 1459 info.flags = 0; 1460 1461 gvt_dbg_core("get region info bar:%d\n", info.index); 1462 break; 1463 1464 case VFIO_PCI_ROM_REGION_INDEX: 1465 case VFIO_PCI_VGA_REGION_INDEX: 1466 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); 1467 info.size = 0; 1468 info.flags = 0; 1469 1470 gvt_dbg_core("get region info index:%d\n", info.index); 1471 break; 1472 default: 1473 { 1474 struct vfio_region_info_cap_type cap_type = { 1475 .header.id = VFIO_REGION_INFO_CAP_TYPE, 1476 .header.version = 1 }; 1477 1478 if (info.index >= VFIO_PCI_NUM_REGIONS + 1479 vgpu->num_regions) 1480 return -EINVAL; 1481 info.index = 1482 array_index_nospec(info.index, 1483 VFIO_PCI_NUM_REGIONS + 1484 vgpu->num_regions); 1485 1486 i = info.index - VFIO_PCI_NUM_REGIONS; 1487 1488 info.offset = 1489 VFIO_PCI_INDEX_TO_OFFSET(info.index); 1490 info.size = vgpu->region[i].size; 1491 info.flags = vgpu->region[i].flags; 1492 1493 cap_type.type = vgpu->region[i].type; 1494 cap_type.subtype = vgpu->region[i].subtype; 1495 1496 ret = vfio_info_add_capability(&caps, 1497 &cap_type.header, 1498 sizeof(cap_type)); 1499 if (ret) 1500 return ret; 1501 } 1502 } 1503 1504 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) { 1505 switch (cap_type_id) { 1506 case VFIO_REGION_INFO_CAP_SPARSE_MMAP: 1507 ret = vfio_info_add_capability(&caps, 1508 &sparse->header, 1509 struct_size(sparse, areas, 1510 sparse->nr_areas)); 1511 if (ret) { 1512 kfree(sparse); 1513 return ret; 1514 } 1515 break; 1516 default: 1517 kfree(sparse); 1518 return -EINVAL; 1519 } 1520 } 1521 1522 if (caps.size) { 1523 info.flags |= VFIO_REGION_INFO_FLAG_CAPS; 1524 if (info.argsz < sizeof(info) + caps.size) { 1525 info.argsz = sizeof(info) + caps.size; 1526 info.cap_offset = 0; 1527 } else { 1528 vfio_info_cap_shift(&caps, sizeof(info)); 1529 if (copy_to_user((void __user *)arg + 1530 sizeof(info), caps.buf, 1531 caps.size)) { 1532 kfree(caps.buf); 1533 kfree(sparse); 1534 return -EFAULT; 1535 } 1536 info.cap_offset = sizeof(info); 1537 } 1538 1539 kfree(caps.buf); 1540 } 1541 1542 kfree(sparse); 1543 return copy_to_user((void __user *)arg, &info, minsz) ? 1544 -EFAULT : 0; 1545 } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { 1546 struct vfio_irq_info info; 1547 1548 minsz = offsetofend(struct vfio_irq_info, count); 1549 1550 if (copy_from_user(&info, (void __user *)arg, minsz)) 1551 return -EFAULT; 1552 1553 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS) 1554 return -EINVAL; 1555 1556 switch (info.index) { 1557 case VFIO_PCI_INTX_IRQ_INDEX: 1558 case VFIO_PCI_MSI_IRQ_INDEX: 1559 break; 1560 default: 1561 return -EINVAL; 1562 } 1563 1564 info.flags = VFIO_IRQ_INFO_EVENTFD; 1565 1566 info.count = intel_vgpu_get_irq_count(vgpu, info.index); 1567 1568 if (info.index == VFIO_PCI_INTX_IRQ_INDEX) 1569 info.flags |= (VFIO_IRQ_INFO_MASKABLE | 1570 VFIO_IRQ_INFO_AUTOMASKED); 1571 else 1572 info.flags |= VFIO_IRQ_INFO_NORESIZE; 1573 1574 return copy_to_user((void __user *)arg, &info, minsz) ? 1575 -EFAULT : 0; 1576 } else if (cmd == VFIO_DEVICE_SET_IRQS) { 1577 struct vfio_irq_set hdr; 1578 u8 *data = NULL; 1579 int ret = 0; 1580 size_t data_size = 0; 1581 1582 minsz = offsetofend(struct vfio_irq_set, count); 1583 1584 if (copy_from_user(&hdr, (void __user *)arg, minsz)) 1585 return -EFAULT; 1586 1587 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) { 1588 int max = intel_vgpu_get_irq_count(vgpu, hdr.index); 1589 1590 ret = vfio_set_irqs_validate_and_prepare(&hdr, max, 1591 VFIO_PCI_NUM_IRQS, &data_size); 1592 if (ret) { 1593 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n"); 1594 return -EINVAL; 1595 } 1596 if (data_size) { 1597 data = memdup_user((void __user *)(arg + minsz), 1598 data_size); 1599 if (IS_ERR(data)) 1600 return PTR_ERR(data); 1601 } 1602 } 1603 1604 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index, 1605 hdr.start, hdr.count, data); 1606 kfree(data); 1607 1608 return ret; 1609 } else if (cmd == VFIO_DEVICE_RESET) { 1610 intel_gvt_reset_vgpu(vgpu); 1611 return 0; 1612 } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) { 1613 struct vfio_device_gfx_plane_info dmabuf; 1614 int ret = 0; 1615 1616 minsz = offsetofend(struct vfio_device_gfx_plane_info, 1617 dmabuf_id); 1618 if (copy_from_user(&dmabuf, (void __user *)arg, minsz)) 1619 return -EFAULT; 1620 if (dmabuf.argsz < minsz) 1621 return -EINVAL; 1622 1623 ret = intel_vgpu_query_plane(vgpu, &dmabuf); 1624 if (ret != 0) 1625 return ret; 1626 1627 return copy_to_user((void __user *)arg, &dmabuf, minsz) ? 1628 -EFAULT : 0; 1629 } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) { 1630 __u32 dmabuf_id; 1631 1632 if (get_user(dmabuf_id, (__u32 __user *)arg)) 1633 return -EFAULT; 1634 return intel_vgpu_get_dmabuf(vgpu, dmabuf_id); 1635 } 1636 1637 return -ENOTTY; 1638 } 1639 1640 static ssize_t 1641 vgpu_id_show(struct device *dev, struct device_attribute *attr, 1642 char *buf) 1643 { 1644 struct intel_vgpu *vgpu = dev_get_drvdata(dev); 1645 1646 return sprintf(buf, "%d\n", vgpu->id); 1647 } 1648 1649 static DEVICE_ATTR_RO(vgpu_id); 1650 1651 static struct attribute *intel_vgpu_attrs[] = { 1652 &dev_attr_vgpu_id.attr, 1653 NULL 1654 }; 1655 1656 static const struct attribute_group intel_vgpu_group = { 1657 .name = "intel_vgpu", 1658 .attrs = intel_vgpu_attrs, 1659 }; 1660 1661 static const struct attribute_group *intel_vgpu_groups[] = { 1662 &intel_vgpu_group, 1663 NULL, 1664 }; 1665 1666 static const struct vfio_device_ops intel_vgpu_dev_ops = { 1667 .open_device = intel_vgpu_open_device, 1668 .close_device = intel_vgpu_close_device, 1669 .read = intel_vgpu_read, 1670 .write = intel_vgpu_write, 1671 .mmap = intel_vgpu_mmap, 1672 .ioctl = intel_vgpu_ioctl, 1673 }; 1674 1675 static int intel_vgpu_probe(struct mdev_device *mdev) 1676 { 1677 struct device *pdev = mdev_parent_dev(mdev); 1678 struct intel_gvt *gvt = kdev_to_i915(pdev)->gvt; 1679 struct intel_vgpu_type *type; 1680 struct intel_vgpu *vgpu; 1681 int ret; 1682 1683 type = &gvt->types[mdev_get_type_group_id(mdev)]; 1684 if (!type) 1685 return -EINVAL; 1686 1687 vgpu = intel_gvt_create_vgpu(gvt, type); 1688 if (IS_ERR(vgpu)) { 1689 gvt_err("failed to create intel vgpu: %ld\n", PTR_ERR(vgpu)); 1690 return PTR_ERR(vgpu); 1691 } 1692 1693 INIT_WORK(&vgpu->release_work, intel_vgpu_release_work); 1694 vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev, 1695 &intel_vgpu_dev_ops); 1696 1697 dev_set_drvdata(&mdev->dev, vgpu); 1698 ret = vfio_register_emulated_iommu_dev(&vgpu->vfio_device); 1699 if (ret) { 1700 intel_gvt_destroy_vgpu(vgpu); 1701 return ret; 1702 } 1703 1704 gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n", 1705 dev_name(mdev_dev(mdev))); 1706 return 0; 1707 } 1708 1709 static void intel_vgpu_remove(struct mdev_device *mdev) 1710 { 1711 struct intel_vgpu *vgpu = dev_get_drvdata(&mdev->dev); 1712 1713 if (WARN_ON_ONCE(vgpu->attached)) 1714 return; 1715 intel_gvt_destroy_vgpu(vgpu); 1716 } 1717 1718 static struct mdev_driver intel_vgpu_mdev_driver = { 1719 .driver = { 1720 .name = "intel_vgpu_mdev", 1721 .owner = THIS_MODULE, 1722 .dev_groups = intel_vgpu_groups, 1723 }, 1724 .probe = intel_vgpu_probe, 1725 .remove = intel_vgpu_remove, 1726 .supported_type_groups = gvt_vgpu_type_groups, 1727 }; 1728 1729 int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) 1730 { 1731 struct kvm *kvm = info->kvm; 1732 struct kvm_memory_slot *slot; 1733 int idx; 1734 1735 if (!info->attached) 1736 return -ESRCH; 1737 1738 idx = srcu_read_lock(&kvm->srcu); 1739 slot = gfn_to_memslot(kvm, gfn); 1740 if (!slot) { 1741 srcu_read_unlock(&kvm->srcu, idx); 1742 return -EINVAL; 1743 } 1744 1745 write_lock(&kvm->mmu_lock); 1746 1747 if (kvmgt_gfn_is_write_protected(info, gfn)) 1748 goto out; 1749 1750 kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); 1751 kvmgt_protect_table_add(info, gfn); 1752 1753 out: 1754 write_unlock(&kvm->mmu_lock); 1755 srcu_read_unlock(&kvm->srcu, idx); 1756 return 0; 1757 } 1758 1759 int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) 1760 { 1761 struct kvm *kvm = info->kvm; 1762 struct kvm_memory_slot *slot; 1763 int idx; 1764 1765 if (!info->attached) 1766 return 0; 1767 1768 idx = srcu_read_lock(&kvm->srcu); 1769 slot = gfn_to_memslot(kvm, gfn); 1770 if (!slot) { 1771 srcu_read_unlock(&kvm->srcu, idx); 1772 return -EINVAL; 1773 } 1774 1775 write_lock(&kvm->mmu_lock); 1776 1777 if (!kvmgt_gfn_is_write_protected(info, gfn)) 1778 goto out; 1779 1780 kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); 1781 kvmgt_protect_table_del(info, gfn); 1782 1783 out: 1784 write_unlock(&kvm->mmu_lock); 1785 srcu_read_unlock(&kvm->srcu, idx); 1786 return 0; 1787 } 1788 1789 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, 1790 const u8 *val, int len, 1791 struct kvm_page_track_notifier_node *node) 1792 { 1793 struct intel_vgpu *info = 1794 container_of(node, struct intel_vgpu, track_node); 1795 1796 if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) 1797 intel_vgpu_page_track_handler(info, gpa, 1798 (void *)val, len); 1799 } 1800 1801 static void kvmgt_page_track_flush_slot(struct kvm *kvm, 1802 struct kvm_memory_slot *slot, 1803 struct kvm_page_track_notifier_node *node) 1804 { 1805 int i; 1806 gfn_t gfn; 1807 struct intel_vgpu *info = 1808 container_of(node, struct intel_vgpu, track_node); 1809 1810 write_lock(&kvm->mmu_lock); 1811 for (i = 0; i < slot->npages; i++) { 1812 gfn = slot->base_gfn + i; 1813 if (kvmgt_gfn_is_write_protected(info, gfn)) { 1814 kvm_slot_page_track_remove_page(kvm, slot, gfn, 1815 KVM_PAGE_TRACK_WRITE); 1816 kvmgt_protect_table_del(info, gfn); 1817 } 1818 } 1819 write_unlock(&kvm->mmu_lock); 1820 } 1821 1822 void intel_vgpu_detach_regions(struct intel_vgpu *vgpu) 1823 { 1824 int i; 1825 1826 if (!vgpu->region) 1827 return; 1828 1829 for (i = 0; i < vgpu->num_regions; i++) 1830 if (vgpu->region[i].ops->release) 1831 vgpu->region[i].ops->release(vgpu, 1832 &vgpu->region[i]); 1833 vgpu->num_regions = 0; 1834 kfree(vgpu->region); 1835 vgpu->region = NULL; 1836 } 1837 1838 int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, 1839 unsigned long size, dma_addr_t *dma_addr) 1840 { 1841 struct gvt_dma *entry; 1842 int ret; 1843 1844 if (!vgpu->attached) 1845 return -EINVAL; 1846 1847 mutex_lock(&vgpu->cache_lock); 1848 1849 entry = __gvt_cache_find_gfn(vgpu, gfn); 1850 if (!entry) { 1851 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); 1852 if (ret) 1853 goto err_unlock; 1854 1855 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size); 1856 if (ret) 1857 goto err_unmap; 1858 } else if (entry->size != size) { 1859 /* the same gfn with different size: unmap and re-map */ 1860 gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size); 1861 __gvt_cache_remove_entry(vgpu, entry); 1862 1863 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); 1864 if (ret) 1865 goto err_unlock; 1866 1867 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size); 1868 if (ret) 1869 goto err_unmap; 1870 } else { 1871 kref_get(&entry->ref); 1872 *dma_addr = entry->dma_addr; 1873 } 1874 1875 mutex_unlock(&vgpu->cache_lock); 1876 return 0; 1877 1878 err_unmap: 1879 gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size); 1880 err_unlock: 1881 mutex_unlock(&vgpu->cache_lock); 1882 return ret; 1883 } 1884 1885 int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr) 1886 { 1887 struct gvt_dma *entry; 1888 int ret = 0; 1889 1890 if (!vgpu->attached) 1891 return -ENODEV; 1892 1893 mutex_lock(&vgpu->cache_lock); 1894 entry = __gvt_cache_find_dma_addr(vgpu, dma_addr); 1895 if (entry) 1896 kref_get(&entry->ref); 1897 else 1898 ret = -ENOMEM; 1899 mutex_unlock(&vgpu->cache_lock); 1900 1901 return ret; 1902 } 1903 1904 static void __gvt_dma_release(struct kref *ref) 1905 { 1906 struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); 1907 1908 gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr, 1909 entry->size); 1910 __gvt_cache_remove_entry(entry->vgpu, entry); 1911 } 1912 1913 void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu, 1914 dma_addr_t dma_addr) 1915 { 1916 struct gvt_dma *entry; 1917 1918 if (!vgpu->attached) 1919 return; 1920 1921 mutex_lock(&vgpu->cache_lock); 1922 entry = __gvt_cache_find_dma_addr(vgpu, dma_addr); 1923 if (entry) 1924 kref_put(&entry->ref, __gvt_dma_release); 1925 mutex_unlock(&vgpu->cache_lock); 1926 } 1927 1928 static void init_device_info(struct intel_gvt *gvt) 1929 { 1930 struct intel_gvt_device_info *info = &gvt->device_info; 1931 struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev); 1932 1933 info->max_support_vgpus = 8; 1934 info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE; 1935 info->mmio_size = 2 * 1024 * 1024; 1936 info->mmio_bar = 0; 1937 info->gtt_start_offset = 8 * 1024 * 1024; 1938 info->gtt_entry_size = 8; 1939 info->gtt_entry_size_shift = 3; 1940 info->gmadr_bytes_in_cmd = 8; 1941 info->max_surface_size = 36 * 1024 * 1024; 1942 info->msi_cap_offset = pdev->msi_cap; 1943 } 1944 1945 static void intel_gvt_test_and_emulate_vblank(struct intel_gvt *gvt) 1946 { 1947 struct intel_vgpu *vgpu; 1948 int id; 1949 1950 mutex_lock(&gvt->lock); 1951 idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) { 1952 if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK + id, 1953 (void *)&gvt->service_request)) { 1954 if (vgpu->active) 1955 intel_vgpu_emulate_vblank(vgpu); 1956 } 1957 } 1958 mutex_unlock(&gvt->lock); 1959 } 1960 1961 static int gvt_service_thread(void *data) 1962 { 1963 struct intel_gvt *gvt = (struct intel_gvt *)data; 1964 int ret; 1965 1966 gvt_dbg_core("service thread start\n"); 1967 1968 while (!kthread_should_stop()) { 1969 ret = wait_event_interruptible(gvt->service_thread_wq, 1970 kthread_should_stop() || gvt->service_request); 1971 1972 if (kthread_should_stop()) 1973 break; 1974 1975 if (WARN_ONCE(ret, "service thread is waken up by signal.\n")) 1976 continue; 1977 1978 intel_gvt_test_and_emulate_vblank(gvt); 1979 1980 if (test_bit(INTEL_GVT_REQUEST_SCHED, 1981 (void *)&gvt->service_request) || 1982 test_bit(INTEL_GVT_REQUEST_EVENT_SCHED, 1983 (void *)&gvt->service_request)) { 1984 intel_gvt_schedule(gvt); 1985 } 1986 } 1987 1988 return 0; 1989 } 1990 1991 static void clean_service_thread(struct intel_gvt *gvt) 1992 { 1993 kthread_stop(gvt->service_thread); 1994 } 1995 1996 static int init_service_thread(struct intel_gvt *gvt) 1997 { 1998 init_waitqueue_head(&gvt->service_thread_wq); 1999 2000 gvt->service_thread = kthread_run(gvt_service_thread, 2001 gvt, "gvt_service_thread"); 2002 if (IS_ERR(gvt->service_thread)) { 2003 gvt_err("fail to start service thread.\n"); 2004 return PTR_ERR(gvt->service_thread); 2005 } 2006 return 0; 2007 } 2008 2009 /** 2010 * intel_gvt_clean_device - clean a GVT device 2011 * @i915: i915 private 2012 * 2013 * This function is called at the driver unloading stage, to free the 2014 * resources owned by a GVT device. 2015 * 2016 */ 2017 static void intel_gvt_clean_device(struct drm_i915_private *i915) 2018 { 2019 struct intel_gvt *gvt = fetch_and_zero(&i915->gvt); 2020 2021 if (drm_WARN_ON(&i915->drm, !gvt)) 2022 return; 2023 2024 mdev_unregister_device(i915->drm.dev); 2025 intel_gvt_cleanup_vgpu_type_groups(gvt); 2026 intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); 2027 intel_gvt_clean_vgpu_types(gvt); 2028 2029 intel_gvt_debugfs_clean(gvt); 2030 clean_service_thread(gvt); 2031 intel_gvt_clean_cmd_parser(gvt); 2032 intel_gvt_clean_sched_policy(gvt); 2033 intel_gvt_clean_workload_scheduler(gvt); 2034 intel_gvt_clean_gtt(gvt); 2035 intel_gvt_free_firmware(gvt); 2036 intel_gvt_clean_mmio_info(gvt); 2037 idr_destroy(&gvt->vgpu_idr); 2038 2039 kfree(i915->gvt); 2040 } 2041 2042 /** 2043 * intel_gvt_init_device - initialize a GVT device 2044 * @i915: drm i915 private data 2045 * 2046 * This function is called at the initialization stage, to initialize 2047 * necessary GVT components. 2048 * 2049 * Returns: 2050 * Zero on success, negative error code if failed. 2051 * 2052 */ 2053 static int intel_gvt_init_device(struct drm_i915_private *i915) 2054 { 2055 struct intel_gvt *gvt; 2056 struct intel_vgpu *vgpu; 2057 int ret; 2058 2059 if (drm_WARN_ON(&i915->drm, i915->gvt)) 2060 return -EEXIST; 2061 2062 gvt = kzalloc(sizeof(struct intel_gvt), GFP_KERNEL); 2063 if (!gvt) 2064 return -ENOMEM; 2065 2066 gvt_dbg_core("init gvt device\n"); 2067 2068 idr_init_base(&gvt->vgpu_idr, 1); 2069 spin_lock_init(&gvt->scheduler.mmio_context_lock); 2070 mutex_init(&gvt->lock); 2071 mutex_init(&gvt->sched_lock); 2072 gvt->gt = to_gt(i915); 2073 i915->gvt = gvt; 2074 2075 init_device_info(gvt); 2076 2077 ret = intel_gvt_setup_mmio_info(gvt); 2078 if (ret) 2079 goto out_clean_idr; 2080 2081 intel_gvt_init_engine_mmio_context(gvt); 2082 2083 ret = intel_gvt_load_firmware(gvt); 2084 if (ret) 2085 goto out_clean_mmio_info; 2086 2087 ret = intel_gvt_init_irq(gvt); 2088 if (ret) 2089 goto out_free_firmware; 2090 2091 ret = intel_gvt_init_gtt(gvt); 2092 if (ret) 2093 goto out_free_firmware; 2094 2095 ret = intel_gvt_init_workload_scheduler(gvt); 2096 if (ret) 2097 goto out_clean_gtt; 2098 2099 ret = intel_gvt_init_sched_policy(gvt); 2100 if (ret) 2101 goto out_clean_workload_scheduler; 2102 2103 ret = intel_gvt_init_cmd_parser(gvt); 2104 if (ret) 2105 goto out_clean_sched_policy; 2106 2107 ret = init_service_thread(gvt); 2108 if (ret) 2109 goto out_clean_cmd_parser; 2110 2111 ret = intel_gvt_init_vgpu_types(gvt); 2112 if (ret) 2113 goto out_clean_thread; 2114 2115 vgpu = intel_gvt_create_idle_vgpu(gvt); 2116 if (IS_ERR(vgpu)) { 2117 ret = PTR_ERR(vgpu); 2118 gvt_err("failed to create idle vgpu\n"); 2119 goto out_clean_types; 2120 } 2121 gvt->idle_vgpu = vgpu; 2122 2123 intel_gvt_debugfs_init(gvt); 2124 2125 ret = intel_gvt_init_vgpu_type_groups(gvt); 2126 if (ret) 2127 goto out_destroy_idle_vgpu; 2128 2129 ret = mdev_register_device(i915->drm.dev, &intel_vgpu_mdev_driver); 2130 if (ret) 2131 goto out_cleanup_vgpu_type_groups; 2132 2133 gvt_dbg_core("gvt device initialization is done\n"); 2134 return 0; 2135 2136 out_cleanup_vgpu_type_groups: 2137 intel_gvt_cleanup_vgpu_type_groups(gvt); 2138 out_destroy_idle_vgpu: 2139 intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); 2140 intel_gvt_debugfs_clean(gvt); 2141 out_clean_types: 2142 intel_gvt_clean_vgpu_types(gvt); 2143 out_clean_thread: 2144 clean_service_thread(gvt); 2145 out_clean_cmd_parser: 2146 intel_gvt_clean_cmd_parser(gvt); 2147 out_clean_sched_policy: 2148 intel_gvt_clean_sched_policy(gvt); 2149 out_clean_workload_scheduler: 2150 intel_gvt_clean_workload_scheduler(gvt); 2151 out_clean_gtt: 2152 intel_gvt_clean_gtt(gvt); 2153 out_free_firmware: 2154 intel_gvt_free_firmware(gvt); 2155 out_clean_mmio_info: 2156 intel_gvt_clean_mmio_info(gvt); 2157 out_clean_idr: 2158 idr_destroy(&gvt->vgpu_idr); 2159 kfree(gvt); 2160 i915->gvt = NULL; 2161 return ret; 2162 } 2163 2164 static void intel_gvt_pm_resume(struct drm_i915_private *i915) 2165 { 2166 struct intel_gvt *gvt = i915->gvt; 2167 2168 intel_gvt_restore_fence(gvt); 2169 intel_gvt_restore_mmio(gvt); 2170 intel_gvt_restore_ggtt(gvt); 2171 } 2172 2173 static const struct intel_vgpu_ops intel_gvt_vgpu_ops = { 2174 .init_device = intel_gvt_init_device, 2175 .clean_device = intel_gvt_clean_device, 2176 .pm_resume = intel_gvt_pm_resume, 2177 }; 2178 2179 static int __init kvmgt_init(void) 2180 { 2181 int ret; 2182 2183 ret = intel_gvt_set_ops(&intel_gvt_vgpu_ops); 2184 if (ret) 2185 return ret; 2186 2187 ret = mdev_register_driver(&intel_vgpu_mdev_driver); 2188 if (ret) 2189 intel_gvt_clear_ops(&intel_gvt_vgpu_ops); 2190 return ret; 2191 } 2192 2193 static void __exit kvmgt_exit(void) 2194 { 2195 mdev_unregister_driver(&intel_vgpu_mdev_driver); 2196 intel_gvt_clear_ops(&intel_gvt_vgpu_ops); 2197 } 2198 2199 module_init(kvmgt_init); 2200 module_exit(kvmgt_exit); 2201 2202 MODULE_LICENSE("GPL and additional rights"); 2203 MODULE_AUTHOR("Intel Corporation"); 2204